drivers/gpu/drm/drm_mipi_dsi.c | 44 ++++++++++++++++++++++++++++++++---------- include/drm/drm_mipi_dsi.h | 6 ++++++ 2 files changed, 40 insertions(+), 10 deletions(-)
From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com>
When a bridge driver uses devm_mipi_dsi_device_register_full() or
devm_mipi_dsi_attach(), the resource management is moved to devres,
which releases the resource automatically when the bridge driver is
unbound.
However, if the DSI host goes away first, the host unregistration code
will automatically detach and unregister any DSI peripherals, without
notifying the devres about it. So when the bridge driver later is
unbound, the resources are released a second time, leading to crash.
Fix this by recording the device that was used when calling the above
mentioned functions into the struct mipi_dsi_device, and when the
unregister or detach is called, remove the devres action if needed.
Signed-off-by: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com>
---
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
---
drivers/gpu/drm/drm_mipi_dsi.c | 44 ++++++++++++++++++++++++++++++++----------
include/drm/drm_mipi_dsi.h | 6 ++++++
2 files changed, 40 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
index 795001bb7ff1..a78c4b6cae70 100644
--- a/drivers/gpu/drm/drm_mipi_dsi.c
+++ b/drivers/gpu/drm/drm_mipi_dsi.c
@@ -237,13 +237,15 @@ mipi_dsi_device_register_full(struct mipi_dsi_host *host,
}
EXPORT_SYMBOL(mipi_dsi_device_register_full);
+static void mipi_dsi_do_device_unregister(struct mipi_dsi_device *dsi, bool devres);
+
/**
* mipi_dsi_device_unregister - unregister MIPI DSI device
* @dsi: DSI peripheral device
*/
void mipi_dsi_device_unregister(struct mipi_dsi_device *dsi)
{
- device_unregister(&dsi->dev);
+ mipi_dsi_do_device_unregister(dsi, false);
}
EXPORT_SYMBOL(mipi_dsi_device_unregister);
@@ -251,7 +253,15 @@ static void devm_mipi_dsi_device_unregister(void *arg)
{
struct mipi_dsi_device *dsi = arg;
- mipi_dsi_device_unregister(dsi);
+ mipi_dsi_do_device_unregister(dsi, true);
+}
+
+static void mipi_dsi_do_device_unregister(struct mipi_dsi_device *dsi, bool devres)
+{
+ if (!devres && dsi->devres_register_dev)
+ devm_remove_action(dsi->devres_register_dev, devm_mipi_dsi_device_unregister, dsi);
+
+ device_unregister(&dsi->dev);
}
/**
@@ -289,6 +299,8 @@ devm_mipi_dsi_device_register_full(struct device *dev,
if (ret)
return ERR_PTR(ret);
+ dsi->devres_register_dev = dev;
+
return dsi;
}
EXPORT_SYMBOL_GPL(devm_mipi_dsi_device_register_full);
@@ -386,17 +398,35 @@ int mipi_dsi_attach(struct mipi_dsi_device *dsi)
}
EXPORT_SYMBOL(mipi_dsi_attach);
+static int mipi_dsi_do_detach(struct mipi_dsi_device *dsi, bool devres);
+
/**
* mipi_dsi_detach - detach a DSI device from its DSI host
* @dsi: DSI peripheral
*/
int mipi_dsi_detach(struct mipi_dsi_device *dsi)
+{
+ return mipi_dsi_do_detach(dsi, false);
+}
+EXPORT_SYMBOL(mipi_dsi_detach);
+
+static void devm_mipi_dsi_detach(void *arg)
+{
+ struct mipi_dsi_device *dsi = arg;
+
+ mipi_dsi_do_detach(dsi, true);
+}
+
+static int mipi_dsi_do_detach(struct mipi_dsi_device *dsi, bool devres)
{
const struct mipi_dsi_host_ops *ops = dsi->host->ops;
if (WARN_ON(!dsi->attached))
return -EINVAL;
+ if (!devres && dsi->devres_attach_dev)
+ devm_remove_action(dsi->devres_attach_dev, devm_mipi_dsi_detach, dsi);
+
if (!ops || !ops->detach)
return -ENOSYS;
@@ -404,14 +434,6 @@ int mipi_dsi_detach(struct mipi_dsi_device *dsi)
return ops->detach(dsi->host, dsi);
}
-EXPORT_SYMBOL(mipi_dsi_detach);
-
-static void devm_mipi_dsi_detach(void *arg)
-{
- struct mipi_dsi_device *dsi = arg;
-
- mipi_dsi_detach(dsi);
-}
/**
* devm_mipi_dsi_attach - Attach a MIPI-DSI device to its DSI Host
@@ -437,6 +459,8 @@ int devm_mipi_dsi_attach(struct device *dev,
if (ret)
return ret;
+ dsi->devres_attach_dev = dev;
+
return 0;
}
EXPORT_SYMBOL_GPL(devm_mipi_dsi_attach);
diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index 82b1cc434ea3..f68aee6813db 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -181,6 +181,9 @@ struct mipi_dsi_device_info {
* be set to the real limits of the hardware, zero is only accepted for
* legacy drivers
* @dsc: panel/bridge DSC pps payload to be sent
+ * devres_register_dev: device that was used with
+ * devm_mipi_dsi_device_register_full() or NULL
+ * devres_attach_dev: device that was used with devm_mipi_dsi_attach() or NULL
*/
struct mipi_dsi_device {
struct mipi_dsi_host *host;
@@ -195,6 +198,9 @@ struct mipi_dsi_device {
unsigned long hs_rate;
unsigned long lp_rate;
struct drm_dsc_config *dsc;
+
+ struct device *devres_register_dev;
+ struct device *devres_attach_dev;
};
#define MIPI_DSI_MODULE_PREFIX "mipi-dsi:"
---
base-commit: 6ba59ff4227927d3a8530fc2973b80e94b54d58f
change-id: 20240619-dsi-devres-fix-8d55852b406a
Best regards,
--
Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Hi, On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: > From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> > > When a bridge driver uses devm_mipi_dsi_device_register_full() or > devm_mipi_dsi_attach(), the resource management is moved to devres, > which releases the resource automatically when the bridge driver is > unbound. > > However, if the DSI host goes away first, the host unregistration code > will automatically detach and unregister any DSI peripherals, without > notifying the devres about it. So when the bridge driver later is > unbound, the resources are released a second time, leading to crash. That's super surprising. mipi_dsi_device_unregister calls device_unregister, which calls device_del, which in turn calls devres_release_all. If that doesn't work like that, then it's what needs to be fixed, and not worked around in the MIPI-DSI bus. Maxime
On 26/06/2024 11:49, Maxime Ripard wrote: > Hi, > > On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: >> From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> >> >> When a bridge driver uses devm_mipi_dsi_device_register_full() or >> devm_mipi_dsi_attach(), the resource management is moved to devres, >> which releases the resource automatically when the bridge driver is >> unbound. >> >> However, if the DSI host goes away first, the host unregistration code >> will automatically detach and unregister any DSI peripherals, without >> notifying the devres about it. So when the bridge driver later is >> unbound, the resources are released a second time, leading to crash. > > That's super surprising. mipi_dsi_device_unregister calls > device_unregister, which calls device_del, which in turn calls > devres_release_all. Hmm, right. > If that doesn't work like that, then it's what needs to be fixed, and > not worked around in the MIPI-DSI bus. Well, something causes a crash for both the device register/unregister case and the attach/detach case, and the call stacks and debug prints showed a double unregister/detach... I need to dig up the board and check again why the devres_release_all() in device_del() doesn't solve this. But I can probably only get back to this in August, so it's perhaps best to ignore this patch for now. However, the attach/detach case is still valid? I see no devres calls in the detach paths. Tomi
On Wed, Jun 26, 2024 at 12:55:39PM GMT, Tomi Valkeinen wrote: > On 26/06/2024 11:49, Maxime Ripard wrote: > > Hi, > > > > On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: > > > From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> > > > > > > When a bridge driver uses devm_mipi_dsi_device_register_full() or > > > devm_mipi_dsi_attach(), the resource management is moved to devres, > > > which releases the resource automatically when the bridge driver is > > > unbound. > > > > > > However, if the DSI host goes away first, the host unregistration code > > > will automatically detach and unregister any DSI peripherals, without > > > notifying the devres about it. So when the bridge driver later is > > > unbound, the resources are released a second time, leading to crash. > > > > That's super surprising. mipi_dsi_device_unregister calls > > device_unregister, which calls device_del, which in turn calls > > devres_release_all. > > Hmm, right. > > > If that doesn't work like that, then it's what needs to be fixed, and > > not worked around in the MIPI-DSI bus. > > Well, something causes a crash for both the device register/unregister case > and the attach/detach case, and the call stacks and debug prints showed a > double unregister/detach... > > I need to dig up the board and check again why the devres_release_all() in > device_del() doesn't solve this. But I can probably only get back to this in > August, so it's perhaps best to ignore this patch for now. > > However, the attach/detach case is still valid? I see no devres calls in the > detach paths. I'm not sure what you mean by the attach/detach case. Do you expect device resources allocated in attach to be freed when detach run? Maxime
On 26/06/2024 18:07, Maxime Ripard wrote: > On Wed, Jun 26, 2024 at 12:55:39PM GMT, Tomi Valkeinen wrote: >> On 26/06/2024 11:49, Maxime Ripard wrote: >>> Hi, >>> >>> On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: >>>> From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> >>>> >>>> When a bridge driver uses devm_mipi_dsi_device_register_full() or >>>> devm_mipi_dsi_attach(), the resource management is moved to devres, >>>> which releases the resource automatically when the bridge driver is >>>> unbound. >>>> >>>> However, if the DSI host goes away first, the host unregistration code >>>> will automatically detach and unregister any DSI peripherals, without >>>> notifying the devres about it. So when the bridge driver later is >>>> unbound, the resources are released a second time, leading to crash. >>> >>> That's super surprising. mipi_dsi_device_unregister calls >>> device_unregister, which calls device_del, which in turn calls >>> devres_release_all. >> >> Hmm, right. >> >>> If that doesn't work like that, then it's what needs to be fixed, and >>> not worked around in the MIPI-DSI bus. >> >> Well, something causes a crash for both the device register/unregister case >> and the attach/detach case, and the call stacks and debug prints showed a >> double unregister/detach... >> >> I need to dig up the board and check again why the devres_release_all() in >> device_del() doesn't solve this. But I can probably only get back to this in >> August, so it's perhaps best to ignore this patch for now. >> >> However, the attach/detach case is still valid? I see no devres calls in the >> detach paths. > > I'm not sure what you mean by the attach/detach case. Do you expect > device resources allocated in attach to be freed when detach run? Ah, never mind, the devres_release_all() would of course deal with that too. However, I just realized/remembered why it crashes. devm_mipi_dsi_device_register_full() and devm_mipi_dsi_attach() are given a device which is used for the devres. This device is probably always the bridge device. So when the bridge device goes away, so do those resources. The mipi_dsi_device_unregister() call deals with a DSI device, which was created in devm_mipi_dsi_device_register_full(). Unregistering that DSI device, which does happen when the DSI host is removed, does not affect the devres of the bridge. So, unloading the DSI host driver causes mipi_dsi_device_unregister() and mipi_dsi_detach() to be called (as part of mipi_dsi_host_unregister()), and unloading the bridge driver causes them to be called again via devres. Tomi
Hi Tomi, On Wed, Jun 26, 2024 at 06:53:40PM GMT, Tomi Valkeinen wrote: > On 26/06/2024 18:07, Maxime Ripard wrote: > > On Wed, Jun 26, 2024 at 12:55:39PM GMT, Tomi Valkeinen wrote: > > > On 26/06/2024 11:49, Maxime Ripard wrote: > > > > Hi, > > > > > > > > On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: > > > > > From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> > > > > > > > > > > When a bridge driver uses devm_mipi_dsi_device_register_full() or > > > > > devm_mipi_dsi_attach(), the resource management is moved to devres, > > > > > which releases the resource automatically when the bridge driver is > > > > > unbound. > > > > > > > > > > However, if the DSI host goes away first, the host unregistration code > > > > > will automatically detach and unregister any DSI peripherals, without > > > > > notifying the devres about it. So when the bridge driver later is > > > > > unbound, the resources are released a second time, leading to crash. > > > > > > > > That's super surprising. mipi_dsi_device_unregister calls > > > > device_unregister, which calls device_del, which in turn calls > > > > devres_release_all. > > > > > > Hmm, right. > > > > > > > If that doesn't work like that, then it's what needs to be fixed, and > > > > not worked around in the MIPI-DSI bus. > > > > > > Well, something causes a crash for both the device register/unregister case > > > and the attach/detach case, and the call stacks and debug prints showed a > > > double unregister/detach... > > > > > > I need to dig up the board and check again why the devres_release_all() in > > > device_del() doesn't solve this. But I can probably only get back to this in > > > August, so it's perhaps best to ignore this patch for now. > > > > > > However, the attach/detach case is still valid? I see no devres calls in the > > > detach paths. > > > > I'm not sure what you mean by the attach/detach case. Do you expect > > device resources allocated in attach to be freed when detach run? > > Ah, never mind, the devres_release_all() would of course deal with that too. > > However, I just realized/remembered why it crashes. > > devm_mipi_dsi_device_register_full() and devm_mipi_dsi_attach() are given a > device which is used for the devres. This device is probably always the > bridge device. So when the bridge device goes away, so do those resources. > > The mipi_dsi_device_unregister() call deals with a DSI device, which was > created in devm_mipi_dsi_device_register_full(). Unregistering that DSI > device, which does happen when the DSI host is removed, does not affect the > devres of the bridge. > > So, unloading the DSI host driver causes mipi_dsi_device_unregister() and > mipi_dsi_detach() to be called (as part of mipi_dsi_host_unregister()), and > unloading the bridge driver causes them to be called again via devres. Sorry, that's one of the things I don't quite get. Both functions are exclusively(?) called from I2C bridges, so the device passed there should be a i2c_client instance, and thus the MIPI-DSI host going away will not remove those i2c devices, only the MIPI-DSI ones, right? So if we remove the host, the MIPI-DSI device will be detached and removed through the path you were explaing with the i2c client lingering around. And if we remove the I2C device, then devm will kick in and will detach and remove the MIPI-DSI device. Or is it the other way around? That if you remove the host, the device is properly detached and removed, but there's still the devm actions lingering around in the i2c device with pointers to the mipi_dsi_device that was first created, but since destroyed? And thus, if the i2c device ever goes away, we get a use-after-free? Maxime
On 02/07/2024 14:43, Maxime Ripard wrote: > Hi Tomi, > > On Wed, Jun 26, 2024 at 06:53:40PM GMT, Tomi Valkeinen wrote: >> On 26/06/2024 18:07, Maxime Ripard wrote: >>> On Wed, Jun 26, 2024 at 12:55:39PM GMT, Tomi Valkeinen wrote: >>>> On 26/06/2024 11:49, Maxime Ripard wrote: >>>>> Hi, >>>>> >>>>> On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: >>>>>> From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> >>>>>> >>>>>> When a bridge driver uses devm_mipi_dsi_device_register_full() or >>>>>> devm_mipi_dsi_attach(), the resource management is moved to devres, >>>>>> which releases the resource automatically when the bridge driver is >>>>>> unbound. >>>>>> >>>>>> However, if the DSI host goes away first, the host unregistration code >>>>>> will automatically detach and unregister any DSI peripherals, without >>>>>> notifying the devres about it. So when the bridge driver later is >>>>>> unbound, the resources are released a second time, leading to crash. >>>>> >>>>> That's super surprising. mipi_dsi_device_unregister calls >>>>> device_unregister, which calls device_del, which in turn calls >>>>> devres_release_all. >>>> >>>> Hmm, right. >>>> >>>>> If that doesn't work like that, then it's what needs to be fixed, and >>>>> not worked around in the MIPI-DSI bus. >>>> >>>> Well, something causes a crash for both the device register/unregister case >>>> and the attach/detach case, and the call stacks and debug prints showed a >>>> double unregister/detach... >>>> >>>> I need to dig up the board and check again why the devres_release_all() in >>>> device_del() doesn't solve this. But I can probably only get back to this in >>>> August, so it's perhaps best to ignore this patch for now. >>>> >>>> However, the attach/detach case is still valid? I see no devres calls in the >>>> detach paths. >>> >>> I'm not sure what you mean by the attach/detach case. Do you expect >>> device resources allocated in attach to be freed when detach run? >> >> Ah, never mind, the devres_release_all() would of course deal with that too. >> >> However, I just realized/remembered why it crashes. >> >> devm_mipi_dsi_device_register_full() and devm_mipi_dsi_attach() are given a >> device which is used for the devres. This device is probably always the >> bridge device. So when the bridge device goes away, so do those resources. >> >> The mipi_dsi_device_unregister() call deals with a DSI device, which was >> created in devm_mipi_dsi_device_register_full(). Unregistering that DSI >> device, which does happen when the DSI host is removed, does not affect the >> devres of the bridge. >> >> So, unloading the DSI host driver causes mipi_dsi_device_unregister() and >> mipi_dsi_detach() to be called (as part of mipi_dsi_host_unregister()), and >> unloading the bridge driver causes them to be called again via devres. > > Sorry, that's one of the things I don't quite get. Both functions are > exclusively(?) called from I2C bridges, so the device passed there > should be a i2c_client instance, and thus the MIPI-DSI host going away > will not remove those i2c devices, only the MIPI-DSI ones, right? Yes. > So if we remove the host, the MIPI-DSI device will be detached and > removed through the path you were explaing with the i2c client lingering > around. And if we remove the I2C device, then devm will kick in and will > detach and remove the MIPI-DSI device. Right. > Or is it the other way around? That if you remove the host, the device > is properly detached and removed, but there's still the devm actions > lingering around in the i2c device with pointers to the mipi_dsi_device > that was first created, but since destroyed? > > And thus, if the i2c device ever goes away, we get a use-after-free? Hmm, I'm not sure I understand what you mean here... Aren't you describing the same thing in both of these cases? In any case, to expand the description a bit, module unloading is quite fragile. I do get a crash if I first unload the i2c bridge module, and only then go and unload the other ones in the DRM pipeline. But I think module unloading will very easily crash, whatever the DRM drivers being used are, so it's not related to this particular issue. In my view, the unload sequence that should be supported (for development purposes, not for production) is to start the unload from the display controller module, which tears down the DRM pipeline, and going from there towards the panels/connectors. Of course, it would be very nice if the module unloading worked perfectly, but afaics fixing all that's related to module unloading would be a multi-year project... So, I just want to keep the sequence I described above working, which allows using modules while doing driver development. Tomi
On Mon, Jul 15, 2024 at 11:32:34AM GMT, Tomi Valkeinen wrote: > On 02/07/2024 14:43, Maxime Ripard wrote: > > Hi Tomi, > > > > On Wed, Jun 26, 2024 at 06:53:40PM GMT, Tomi Valkeinen wrote: > > > On 26/06/2024 18:07, Maxime Ripard wrote: > > > > On Wed, Jun 26, 2024 at 12:55:39PM GMT, Tomi Valkeinen wrote: > > > > > On 26/06/2024 11:49, Maxime Ripard wrote: > > > > > > Hi, > > > > > > > > > > > > On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: > > > > > > > From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> > > > > > > > > > > > > > > When a bridge driver uses devm_mipi_dsi_device_register_full() or > > > > > > > devm_mipi_dsi_attach(), the resource management is moved to devres, > > > > > > > which releases the resource automatically when the bridge driver is > > > > > > > unbound. > > > > > > > > > > > > > > However, if the DSI host goes away first, the host unregistration code > > > > > > > will automatically detach and unregister any DSI peripherals, without > > > > > > > notifying the devres about it. So when the bridge driver later is > > > > > > > unbound, the resources are released a second time, leading to crash. > > > > > > > > > > > > That's super surprising. mipi_dsi_device_unregister calls > > > > > > device_unregister, which calls device_del, which in turn calls > > > > > > devres_release_all. > > > > > > > > > > Hmm, right. > > > > > > > > > > > If that doesn't work like that, then it's what needs to be fixed, and > > > > > > not worked around in the MIPI-DSI bus. > > > > > > > > > > Well, something causes a crash for both the device register/unregister case > > > > > and the attach/detach case, and the call stacks and debug prints showed a > > > > > double unregister/detach... > > > > > > > > > > I need to dig up the board and check again why the devres_release_all() in > > > > > device_del() doesn't solve this. But I can probably only get back to this in > > > > > August, so it's perhaps best to ignore this patch for now. > > > > > > > > > > However, the attach/detach case is still valid? I see no devres calls in the > > > > > detach paths. > > > > > > > > I'm not sure what you mean by the attach/detach case. Do you expect > > > > device resources allocated in attach to be freed when detach run? > > > > > > Ah, never mind, the devres_release_all() would of course deal with that too. > > > > > > However, I just realized/remembered why it crashes. > > > > > > devm_mipi_dsi_device_register_full() and devm_mipi_dsi_attach() are given a > > > device which is used for the devres. This device is probably always the > > > bridge device. So when the bridge device goes away, so do those resources. > > > > > > The mipi_dsi_device_unregister() call deals with a DSI device, which was > > > created in devm_mipi_dsi_device_register_full(). Unregistering that DSI > > > device, which does happen when the DSI host is removed, does not affect the > > > devres of the bridge. > > > > > > So, unloading the DSI host driver causes mipi_dsi_device_unregister() and > > > mipi_dsi_detach() to be called (as part of mipi_dsi_host_unregister()), and > > > unloading the bridge driver causes them to be called again via devres. > > > > Sorry, that's one of the things I don't quite get. Both functions are > > exclusively(?) called from I2C bridges, so the device passed there > > should be a i2c_client instance, and thus the MIPI-DSI host going away > > will not remove those i2c devices, only the MIPI-DSI ones, right? > > Yes. > > > So if we remove the host, the MIPI-DSI device will be detached and > > removed through the path you were explaing with the i2c client lingering > > around. And if we remove the I2C device, then devm will kick in and will > > detach and remove the MIPI-DSI device. > > Right. > > > Or is it the other way around? That if you remove the host, the device > > is properly detached and removed, but there's still the devm actions > > lingering around in the i2c device with pointers to the mipi_dsi_device > > that was first created, but since destroyed? > > > > And thus, if the i2c device ever goes away, we get a use-after-free? > > Hmm, I'm not sure I understand what you mean here... Aren't you describing > the same thing in both of these cases? > > In any case, to expand the description a bit, module unloading is quite > fragile. I do get a crash if I first unload the i2c bridge module, and only > then go and unload the other ones in the DRM pipeline. But I think module > unloading will very easily crash, whatever the DRM drivers being used are, > so it's not related to this particular issue. > > In my view, the unload sequence that should be supported (for development > purposes, not for production) is to start the unload from the display > controller module, which tears down the DRM pipeline, and going from there > towards the panels/connectors. > > Of course, it would be very nice if the module unloading worked perfectly, > but afaics fixing all that's related to module unloading would be a > multi-year project... So, I just want to keep the sequence I described above > working, which allows using modules while doing driver development. FTR, I'm all for supporting module unloading. The discussion above was about what is broken exactly, so we can come up with a good solution. Maxime
Hi, On 25/07/2024 14:28, Maxime Ripard wrote: > On Mon, Jul 15, 2024 at 11:32:34AM GMT, Tomi Valkeinen wrote: >> On 02/07/2024 14:43, Maxime Ripard wrote: >>> Hi Tomi, >>> >>> On Wed, Jun 26, 2024 at 06:53:40PM GMT, Tomi Valkeinen wrote: >>>> On 26/06/2024 18:07, Maxime Ripard wrote: >>>>> On Wed, Jun 26, 2024 at 12:55:39PM GMT, Tomi Valkeinen wrote: >>>>>> On 26/06/2024 11:49, Maxime Ripard wrote: >>>>>>> Hi, >>>>>>> >>>>>>> On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: >>>>>>>> From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> >>>>>>>> >>>>>>>> When a bridge driver uses devm_mipi_dsi_device_register_full() or >>>>>>>> devm_mipi_dsi_attach(), the resource management is moved to devres, >>>>>>>> which releases the resource automatically when the bridge driver is >>>>>>>> unbound. >>>>>>>> >>>>>>>> However, if the DSI host goes away first, the host unregistration code >>>>>>>> will automatically detach and unregister any DSI peripherals, without >>>>>>>> notifying the devres about it. So when the bridge driver later is >>>>>>>> unbound, the resources are released a second time, leading to crash. >>>>>>> >>>>>>> That's super surprising. mipi_dsi_device_unregister calls >>>>>>> device_unregister, which calls device_del, which in turn calls >>>>>>> devres_release_all. >>>>>> >>>>>> Hmm, right. >>>>>> >>>>>>> If that doesn't work like that, then it's what needs to be fixed, and >>>>>>> not worked around in the MIPI-DSI bus. >>>>>> >>>>>> Well, something causes a crash for both the device register/unregister case >>>>>> and the attach/detach case, and the call stacks and debug prints showed a >>>>>> double unregister/detach... >>>>>> >>>>>> I need to dig up the board and check again why the devres_release_all() in >>>>>> device_del() doesn't solve this. But I can probably only get back to this in >>>>>> August, so it's perhaps best to ignore this patch for now. >>>>>> >>>>>> However, the attach/detach case is still valid? I see no devres calls in the >>>>>> detach paths. >>>>> >>>>> I'm not sure what you mean by the attach/detach case. Do you expect >>>>> device resources allocated in attach to be freed when detach run? >>>> >>>> Ah, never mind, the devres_release_all() would of course deal with that too. >>>> >>>> However, I just realized/remembered why it crashes. >>>> >>>> devm_mipi_dsi_device_register_full() and devm_mipi_dsi_attach() are given a >>>> device which is used for the devres. This device is probably always the >>>> bridge device. So when the bridge device goes away, so do those resources. >>>> >>>> The mipi_dsi_device_unregister() call deals with a DSI device, which was >>>> created in devm_mipi_dsi_device_register_full(). Unregistering that DSI >>>> device, which does happen when the DSI host is removed, does not affect the >>>> devres of the bridge. >>>> >>>> So, unloading the DSI host driver causes mipi_dsi_device_unregister() and >>>> mipi_dsi_detach() to be called (as part of mipi_dsi_host_unregister()), and >>>> unloading the bridge driver causes them to be called again via devres. >>> >>> Sorry, that's one of the things I don't quite get. Both functions are >>> exclusively(?) called from I2C bridges, so the device passed there >>> should be a i2c_client instance, and thus the MIPI-DSI host going away >>> will not remove those i2c devices, only the MIPI-DSI ones, right? >> >> Yes. >> >>> So if we remove the host, the MIPI-DSI device will be detached and >>> removed through the path you were explaing with the i2c client lingering >>> around. And if we remove the I2C device, then devm will kick in and will >>> detach and remove the MIPI-DSI device. >> >> Right. >> >>> Or is it the other way around? That if you remove the host, the device >>> is properly detached and removed, but there's still the devm actions >>> lingering around in the i2c device with pointers to the mipi_dsi_device >>> that was first created, but since destroyed? >>> >>> And thus, if the i2c device ever goes away, we get a use-after-free? >> >> Hmm, I'm not sure I understand what you mean here... Aren't you describing >> the same thing in both of these cases? >> >> In any case, to expand the description a bit, module unloading is quite >> fragile. I do get a crash if I first unload the i2c bridge module, and only >> then go and unload the other ones in the DRM pipeline. But I think module >> unloading will very easily crash, whatever the DRM drivers being used are, >> so it's not related to this particular issue. >> >> In my view, the unload sequence that should be supported (for development >> purposes, not for production) is to start the unload from the display >> controller module, which tears down the DRM pipeline, and going from there >> towards the panels/connectors. >> >> Of course, it would be very nice if the module unloading worked perfectly, >> but afaics fixing all that's related to module unloading would be a >> multi-year project... So, I just want to keep the sequence I described above >> working, which allows using modules while doing driver development. > > FTR, I'm all for supporting module unloading. The discussion above was > about what is broken exactly, so we can come up with a good solution. Does that mean that you're ok with the patch, or that something should be improved? Tomi
Hi, On Wed, Aug 07, 2024 at 03:19:23PM GMT, Tomi Valkeinen wrote: > On 25/07/2024 14:28, Maxime Ripard wrote: > > On Mon, Jul 15, 2024 at 11:32:34AM GMT, Tomi Valkeinen wrote: > > > On 02/07/2024 14:43, Maxime Ripard wrote: > > > > Hi Tomi, > > > > > > > > On Wed, Jun 26, 2024 at 06:53:40PM GMT, Tomi Valkeinen wrote: > > > > > On 26/06/2024 18:07, Maxime Ripard wrote: > > > > > > On Wed, Jun 26, 2024 at 12:55:39PM GMT, Tomi Valkeinen wrote: > > > > > > > On 26/06/2024 11:49, Maxime Ripard wrote: > > > > > > > > Hi, > > > > > > > > > > > > > > > > On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: > > > > > > > > > From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> > > > > > > > > > > > > > > > > > > When a bridge driver uses devm_mipi_dsi_device_register_full() or > > > > > > > > > devm_mipi_dsi_attach(), the resource management is moved to devres, > > > > > > > > > which releases the resource automatically when the bridge driver is > > > > > > > > > unbound. > > > > > > > > > > > > > > > > > > However, if the DSI host goes away first, the host unregistration code > > > > > > > > > will automatically detach and unregister any DSI peripherals, without > > > > > > > > > notifying the devres about it. So when the bridge driver later is > > > > > > > > > unbound, the resources are released a second time, leading to crash. > > > > > > > > > > > > > > > > That's super surprising. mipi_dsi_device_unregister calls > > > > > > > > device_unregister, which calls device_del, which in turn calls > > > > > > > > devres_release_all. > > > > > > > > > > > > > > Hmm, right. > > > > > > > > > > > > > > > If that doesn't work like that, then it's what needs to be fixed, and > > > > > > > > not worked around in the MIPI-DSI bus. > > > > > > > > > > > > > > Well, something causes a crash for both the device register/unregister case > > > > > > > and the attach/detach case, and the call stacks and debug prints showed a > > > > > > > double unregister/detach... > > > > > > > > > > > > > > I need to dig up the board and check again why the devres_release_all() in > > > > > > > device_del() doesn't solve this. But I can probably only get back to this in > > > > > > > August, so it's perhaps best to ignore this patch for now. > > > > > > > > > > > > > > However, the attach/detach case is still valid? I see no devres calls in the > > > > > > > detach paths. > > > > > > > > > > > > I'm not sure what you mean by the attach/detach case. Do you expect > > > > > > device resources allocated in attach to be freed when detach run? > > > > > > > > > > Ah, never mind, the devres_release_all() would of course deal with that too. > > > > > > > > > > However, I just realized/remembered why it crashes. > > > > > > > > > > devm_mipi_dsi_device_register_full() and devm_mipi_dsi_attach() are given a > > > > > device which is used for the devres. This device is probably always the > > > > > bridge device. So when the bridge device goes away, so do those resources. > > > > > > > > > > The mipi_dsi_device_unregister() call deals with a DSI device, which was > > > > > created in devm_mipi_dsi_device_register_full(). Unregistering that DSI > > > > > device, which does happen when the DSI host is removed, does not affect the > > > > > devres of the bridge. > > > > > > > > > > So, unloading the DSI host driver causes mipi_dsi_device_unregister() and > > > > > mipi_dsi_detach() to be called (as part of mipi_dsi_host_unregister()), and > > > > > unloading the bridge driver causes them to be called again via devres. > > > > > > > > Sorry, that's one of the things I don't quite get. Both functions are > > > > exclusively(?) called from I2C bridges, so the device passed there > > > > should be a i2c_client instance, and thus the MIPI-DSI host going away > > > > will not remove those i2c devices, only the MIPI-DSI ones, right? > > > > > > Yes. > > > > > > > So if we remove the host, the MIPI-DSI device will be detached and > > > > removed through the path you were explaing with the i2c client lingering > > > > around. And if we remove the I2C device, then devm will kick in and will > > > > detach and remove the MIPI-DSI device. > > > > > > Right. > > > > > > > Or is it the other way around? That if you remove the host, the device > > > > is properly detached and removed, but there's still the devm actions > > > > lingering around in the i2c device with pointers to the mipi_dsi_device > > > > that was first created, but since destroyed? > > > > > > > > And thus, if the i2c device ever goes away, we get a use-after-free? > > > > > > Hmm, I'm not sure I understand what you mean here... Aren't you describing > > > the same thing in both of these cases? > > > > > > In any case, to expand the description a bit, module unloading is quite > > > fragile. I do get a crash if I first unload the i2c bridge module, and only > > > then go and unload the other ones in the DRM pipeline. But I think module > > > unloading will very easily crash, whatever the DRM drivers being used are, > > > so it's not related to this particular issue. > > > > > > In my view, the unload sequence that should be supported (for development > > > purposes, not for production) is to start the unload from the display > > > controller module, which tears down the DRM pipeline, and going from there > > > towards the panels/connectors. > > > > > > Of course, it would be very nice if the module unloading worked perfectly, > > > but afaics fixing all that's related to module unloading would be a > > > multi-year project... So, I just want to keep the sequence I described above > > > working, which allows using modules while doing driver development. > > > > FTR, I'm all for supporting module unloading. The discussion above was > > about what is broken exactly, so we can come up with a good solution. > > Does that mean that you're ok with the patch, or that something should be > improved? No, I meant that at the very least the commit log needs to be updated to reflect what is actually going on, because at least my understanding of it doesn't match what actually happens. We want a solution to the problem you're facing, but it's not clear to me what the problem is exactly at this point, so it's hard to review a solution. Maxime
On Mon, Sep 02, 2024 at 11:26:11AM +0200, Maxime Ripard wrote: > Hi, > > On Wed, Aug 07, 2024 at 03:19:23PM GMT, Tomi Valkeinen wrote: > > On 25/07/2024 14:28, Maxime Ripard wrote: > > > On Mon, Jul 15, 2024 at 11:32:34AM GMT, Tomi Valkeinen wrote: > > > > On 02/07/2024 14:43, Maxime Ripard wrote: > > > > > Hi Tomi, > > > > > > > > > > On Wed, Jun 26, 2024 at 06:53:40PM GMT, Tomi Valkeinen wrote: > > > > > > On 26/06/2024 18:07, Maxime Ripard wrote: > > > > > > > On Wed, Jun 26, 2024 at 12:55:39PM GMT, Tomi Valkeinen wrote: > > > > > > > > On 26/06/2024 11:49, Maxime Ripard wrote: > > > > > > > > > Hi, > > > > > > > > > > > > > > > > > > On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: > > > > > > > > > > From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> > > > > > > > > > > > > > > > > > > > > When a bridge driver uses devm_mipi_dsi_device_register_full() or > > > > > > > > > > devm_mipi_dsi_attach(), the resource management is moved to devres, > > > > > > > > > > which releases the resource automatically when the bridge driver is > > > > > > > > > > unbound. > > > > > > > > > > > > > > > > > > > > However, if the DSI host goes away first, the host unregistration code > > > > > > > > > > will automatically detach and unregister any DSI peripherals, without > > > > > > > > > > notifying the devres about it. So when the bridge driver later is > > > > > > > > > > unbound, the resources are released a second time, leading to crash. > > > > > > > > > > > > > > > > > > That's super surprising. mipi_dsi_device_unregister calls > > > > > > > > > device_unregister, which calls device_del, which in turn calls > > > > > > > > > devres_release_all. > > > > > > > > > > > > > > > > Hmm, right. > > > > > > > > > > > > > > > > > If that doesn't work like that, then it's what needs to be fixed, and > > > > > > > > > not worked around in the MIPI-DSI bus. > > > > > > > > > > > > > > > > Well, something causes a crash for both the device register/unregister case > > > > > > > > and the attach/detach case, and the call stacks and debug prints showed a > > > > > > > > double unregister/detach... > > > > > > > > > > > > > > > > I need to dig up the board and check again why the devres_release_all() in > > > > > > > > device_del() doesn't solve this. But I can probably only get back to this in > > > > > > > > August, so it's perhaps best to ignore this patch for now. > > > > > > > > > > > > > > > > However, the attach/detach case is still valid? I see no devres calls in the > > > > > > > > detach paths. > > > > > > > > > > > > > > I'm not sure what you mean by the attach/detach case. Do you expect > > > > > > > device resources allocated in attach to be freed when detach run? > > > > > > > > > > > > Ah, never mind, the devres_release_all() would of course deal with that too. > > > > > > > > > > > > However, I just realized/remembered why it crashes. > > > > > > > > > > > > devm_mipi_dsi_device_register_full() and devm_mipi_dsi_attach() are given a > > > > > > device which is used for the devres. This device is probably always the > > > > > > bridge device. So when the bridge device goes away, so do those resources. > > > > > > > > > > > > The mipi_dsi_device_unregister() call deals with a DSI device, which was > > > > > > created in devm_mipi_dsi_device_register_full(). Unregistering that DSI > > > > > > device, which does happen when the DSI host is removed, does not affect the > > > > > > devres of the bridge. > > > > > > > > > > > > So, unloading the DSI host driver causes mipi_dsi_device_unregister() and > > > > > > mipi_dsi_detach() to be called (as part of mipi_dsi_host_unregister()), and > > > > > > unloading the bridge driver causes them to be called again via devres. > > > > > > > > > > Sorry, that's one of the things I don't quite get. Both functions are > > > > > exclusively(?) called from I2C bridges, so the device passed there > > > > > should be a i2c_client instance, and thus the MIPI-DSI host going away > > > > > will not remove those i2c devices, only the MIPI-DSI ones, right? > > > > > > > > Yes. > > > > > > > > > So if we remove the host, the MIPI-DSI device will be detached and > > > > > removed through the path you were explaing with the i2c client lingering > > > > > around. And if we remove the I2C device, then devm will kick in and will > > > > > detach and remove the MIPI-DSI device. > > > > > > > > Right. > > > > > > > > > Or is it the other way around? That if you remove the host, the device > > > > > is properly detached and removed, but there's still the devm actions > > > > > lingering around in the i2c device with pointers to the mipi_dsi_device > > > > > that was first created, but since destroyed? > > > > > > > > > > And thus, if the i2c device ever goes away, we get a use-after-free? > > > > > > > > Hmm, I'm not sure I understand what you mean here... Aren't you describing > > > > the same thing in both of these cases? > > > > > > > > In any case, to expand the description a bit, module unloading is quite > > > > fragile. I do get a crash if I first unload the i2c bridge module, and only > > > > then go and unload the other ones in the DRM pipeline. But I think module > > > > unloading will very easily crash, whatever the DRM drivers being used are, > > > > so it's not related to this particular issue. > > > > > > > > In my view, the unload sequence that should be supported (for development > > > > purposes, not for production) is to start the unload from the display > > > > controller module, which tears down the DRM pipeline, and going from there > > > > towards the panels/connectors. > > > > > > > > Of course, it would be very nice if the module unloading worked perfectly, > > > > but afaics fixing all that's related to module unloading would be a > > > > multi-year project... So, I just want to keep the sequence I described above > > > > working, which allows using modules while doing driver development. > > > > > > FTR, I'm all for supporting module unloading. The discussion above was > > > about what is broken exactly, so we can come up with a good solution. > > > > Does that mean that you're ok with the patch, or that something should be > > improved? > > No, I meant that at the very least the commit log needs to be updated to > reflect what is actually going on, because at least my understanding of > it doesn't match what actually happens. > > We want a solution to the problem you're facing, but it's not clear to > me what the problem is exactly at this point, so it's hard to review a > solution. So I haven't looked at the full thing, but I think the proper fix is to make both detach and unregister cope with being called multiple times. I think devm_ here is a red herring, the underlying issues is that we can unregister/detach from two sides: - when the host dsi goes away - when individual dsi devices on a given host go away So there needs to be book-keeping and locking to make sure no matter which order things disappear, we don't try to unregister/detach a dsi device twice. Now whether that device model is correct is maybe a different question, but I think it's entirely orthogonal issue too. Or maybe I'm just wrong ... Cheers, Sima -- Simona Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
Hi, On 02/09/2024 13:50, Daniel Vetter wrote: > On Mon, Sep 02, 2024 at 11:26:11AM +0200, Maxime Ripard wrote: >> Hi, >> >> On Wed, Aug 07, 2024 at 03:19:23PM GMT, Tomi Valkeinen wrote: >>> On 25/07/2024 14:28, Maxime Ripard wrote: >>>> On Mon, Jul 15, 2024 at 11:32:34AM GMT, Tomi Valkeinen wrote: >>>>> On 02/07/2024 14:43, Maxime Ripard wrote: >>>>>> Hi Tomi, >>>>>> >>>>>> On Wed, Jun 26, 2024 at 06:53:40PM GMT, Tomi Valkeinen wrote: >>>>>>> On 26/06/2024 18:07, Maxime Ripard wrote: >>>>>>>> On Wed, Jun 26, 2024 at 12:55:39PM GMT, Tomi Valkeinen wrote: >>>>>>>>> On 26/06/2024 11:49, Maxime Ripard wrote: >>>>>>>>>> Hi, >>>>>>>>>> >>>>>>>>>> On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: >>>>>>>>>>> From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> >>>>>>>>>>> >>>>>>>>>>> When a bridge driver uses devm_mipi_dsi_device_register_full() or >>>>>>>>>>> devm_mipi_dsi_attach(), the resource management is moved to devres, >>>>>>>>>>> which releases the resource automatically when the bridge driver is >>>>>>>>>>> unbound. >>>>>>>>>>> >>>>>>>>>>> However, if the DSI host goes away first, the host unregistration code >>>>>>>>>>> will automatically detach and unregister any DSI peripherals, without >>>>>>>>>>> notifying the devres about it. So when the bridge driver later is >>>>>>>>>>> unbound, the resources are released a second time, leading to crash. >>>>>>>>>> >>>>>>>>>> That's super surprising. mipi_dsi_device_unregister calls >>>>>>>>>> device_unregister, which calls device_del, which in turn calls >>>>>>>>>> devres_release_all. >>>>>>>>> >>>>>>>>> Hmm, right. >>>>>>>>> >>>>>>>>>> If that doesn't work like that, then it's what needs to be fixed, and >>>>>>>>>> not worked around in the MIPI-DSI bus. >>>>>>>>> >>>>>>>>> Well, something causes a crash for both the device register/unregister case >>>>>>>>> and the attach/detach case, and the call stacks and debug prints showed a >>>>>>>>> double unregister/detach... >>>>>>>>> >>>>>>>>> I need to dig up the board and check again why the devres_release_all() in >>>>>>>>> device_del() doesn't solve this. But I can probably only get back to this in >>>>>>>>> August, so it's perhaps best to ignore this patch for now. >>>>>>>>> >>>>>>>>> However, the attach/detach case is still valid? I see no devres calls in the >>>>>>>>> detach paths. >>>>>>>> >>>>>>>> I'm not sure what you mean by the attach/detach case. Do you expect >>>>>>>> device resources allocated in attach to be freed when detach run? >>>>>>> >>>>>>> Ah, never mind, the devres_release_all() would of course deal with that too. >>>>>>> >>>>>>> However, I just realized/remembered why it crashes. >>>>>>> >>>>>>> devm_mipi_dsi_device_register_full() and devm_mipi_dsi_attach() are given a >>>>>>> device which is used for the devres. This device is probably always the >>>>>>> bridge device. So when the bridge device goes away, so do those resources. >>>>>>> >>>>>>> The mipi_dsi_device_unregister() call deals with a DSI device, which was >>>>>>> created in devm_mipi_dsi_device_register_full(). Unregistering that DSI >>>>>>> device, which does happen when the DSI host is removed, does not affect the >>>>>>> devres of the bridge. >>>>>>> >>>>>>> So, unloading the DSI host driver causes mipi_dsi_device_unregister() and >>>>>>> mipi_dsi_detach() to be called (as part of mipi_dsi_host_unregister()), and >>>>>>> unloading the bridge driver causes them to be called again via devres. >>>>>> >>>>>> Sorry, that's one of the things I don't quite get. Both functions are >>>>>> exclusively(?) called from I2C bridges, so the device passed there >>>>>> should be a i2c_client instance, and thus the MIPI-DSI host going away >>>>>> will not remove those i2c devices, only the MIPI-DSI ones, right? >>>>> >>>>> Yes. >>>>> >>>>>> So if we remove the host, the MIPI-DSI device will be detached and >>>>>> removed through the path you were explaing with the i2c client lingering >>>>>> around. And if we remove the I2C device, then devm will kick in and will >>>>>> detach and remove the MIPI-DSI device. >>>>> >>>>> Right. >>>>> >>>>>> Or is it the other way around? That if you remove the host, the device >>>>>> is properly detached and removed, but there's still the devm actions >>>>>> lingering around in the i2c device with pointers to the mipi_dsi_device >>>>>> that was first created, but since destroyed? >>>>>> >>>>>> And thus, if the i2c device ever goes away, we get a use-after-free? >>>>> >>>>> Hmm, I'm not sure I understand what you mean here... Aren't you describing >>>>> the same thing in both of these cases? >>>>> >>>>> In any case, to expand the description a bit, module unloading is quite >>>>> fragile. I do get a crash if I first unload the i2c bridge module, and only >>>>> then go and unload the other ones in the DRM pipeline. But I think module >>>>> unloading will very easily crash, whatever the DRM drivers being used are, >>>>> so it's not related to this particular issue. >>>>> >>>>> In my view, the unload sequence that should be supported (for development >>>>> purposes, not for production) is to start the unload from the display >>>>> controller module, which tears down the DRM pipeline, and going from there >>>>> towards the panels/connectors. >>>>> >>>>> Of course, it would be very nice if the module unloading worked perfectly, >>>>> but afaics fixing all that's related to module unloading would be a >>>>> multi-year project... So, I just want to keep the sequence I described above >>>>> working, which allows using modules while doing driver development. >>>> >>>> FTR, I'm all for supporting module unloading. The discussion above was >>>> about what is broken exactly, so we can come up with a good solution. >>> >>> Does that mean that you're ok with the patch, or that something should be >>> improved? >> >> No, I meant that at the very least the commit log needs to be updated to >> reflect what is actually going on, because at least my understanding of >> it doesn't match what actually happens. >> >> We want a solution to the problem you're facing, but it's not clear to >> me what the problem is exactly at this point, so it's hard to review a >> solution. > > So I haven't looked at the full thing, but I think the proper fix is to > make both detach and unregister cope with being called multiple times. I > think devm_ here is a red herring, the underlying issues is that we can > unregister/detach from two sides: > > - when the host dsi goes away > - when individual dsi devices on a given host go away > > So there needs to be book-keeping and locking to make sure no matter which > order things disappear, we don't try to unregister/detach a dsi device > twice. I think that is what my patch does (for devm_). Some vocabulary first: dsi peripheral device - The device that represents the DSI peripheral. It is a bridge or a panel, and (usually) an i2c or platform device. dsi peripheral driver - The driver handling the dsi peripheral device. dsi device - Runtime created device instance that represents the DSI peripheral. So in my case we have the i2c bridge device, and a dsi device is created for it in the setup code. dsi controller device - A device that has a DSI bus (usually a platform or i2c device, I would guess). dsi controller driver - A driver for the dsi controller device. Creates the dsi host. dsi host - represents the DSI host side, owned by the dsi controller driver. When a dsi peripheral driver uses devm_mipi_dsi_device_register_full() or devm_mipi_dsi_attach(), the dsi device is created and attached to the dsi host. When the dsi peripheral device-driver is unbound, devres will call unregister and detach are called automatically. This works fine. But when the device-driver for the dsi controller is unbound, the dsi controller driver will unregister the dsi host, and the unregistration will also unregister and detach the dsi device. But the devres is not told about that. So when the dsi peripheral is later unbound, its devres will again unregister and detach. To fix that this patch uses devm_remove_action() to remove the devres action when the host side goes away first. Now, after writing the above, I realized that all this won't help with the non-devm versions: the host side has unregistered and detached the dsi device, but if the dsi peripheral driver calls mipi_dsi_detach() or mipi_dsi_device_unregister(), it will again crash. Handling the attach/detach should be quite easy, and in fact the code already handles it, but it uses WARN_ON() there so that has to go. But attach/detach will crash anyway if the dsi device has already been freed, which happens when the dsi controller driver calls mipi_dsi_device_unregister(). So... The dsi peripheral driver should keep a reference to the dsi device, with get_device()? And then do a put_device() after calling mipi_dsi_device_unregister()? But we don't free the dsi device, it has essentially been disabled without telling the dsi peripheral driver about it, which might cause problems. I don't know... This doesn't sound correct to me. Probably my patch is just new wrong on top of old wrong. Or maybe I don't quite grasp how this works. Oh, I now realized/remembered that we can also have "real" dsi devices, when the dsi peripheral is a child of the dsi bus device, and controlled via DSI commands (i.e. not via i2c or memory mapped registers). Perhaps all/some of this confusion in the code comes from the use of dsi device for both "real" dsi devices and as a "dsi client", created by i2c/platform devices. With a "real" dsi device things work fine, as the driver only attaches and detaches, and the device (un)registration is handled by the dsi host. Well, this turned out to be a bit of a rambling email... I don't have a clear solution in mind. Tomi
On Mon, Sep 02, 2024 at 03:31:28PM GMT, Tomi Valkeinen wrote: > Hi, > > On 02/09/2024 13:50, Daniel Vetter wrote: > > On Mon, Sep 02, 2024 at 11:26:11AM +0200, Maxime Ripard wrote: > > > Hi, > > > > > > On Wed, Aug 07, 2024 at 03:19:23PM GMT, Tomi Valkeinen wrote: > > > > On 25/07/2024 14:28, Maxime Ripard wrote: > > > > > On Mon, Jul 15, 2024 at 11:32:34AM GMT, Tomi Valkeinen wrote: > > > > > > On 02/07/2024 14:43, Maxime Ripard wrote: > > > > > > > Hi Tomi, > > > > > > > > > > > > > > On Wed, Jun 26, 2024 at 06:53:40PM GMT, Tomi Valkeinen wrote: > > > > > > > > On 26/06/2024 18:07, Maxime Ripard wrote: > > > > > > > > > On Wed, Jun 26, 2024 at 12:55:39PM GMT, Tomi Valkeinen wrote: > > > > > > > > > > On 26/06/2024 11:49, Maxime Ripard wrote: > > > > > > > > > > > Hi, > > > > > > > > > > > > > > > > > > > > > > On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: > > > > > > > > > > > > From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> > > > > > > > > > > > > > > > > > > > > > > > > When a bridge driver uses devm_mipi_dsi_device_register_full() or > > > > > > > > > > > > devm_mipi_dsi_attach(), the resource management is moved to devres, > > > > > > > > > > > > which releases the resource automatically when the bridge driver is > > > > > > > > > > > > unbound. > > > > > > > > > > > > > > > > > > > > > > > > However, if the DSI host goes away first, the host unregistration code > > > > > > > > > > > > will automatically detach and unregister any DSI peripherals, without > > > > > > > > > > > > notifying the devres about it. So when the bridge driver later is > > > > > > > > > > > > unbound, the resources are released a second time, leading to crash. > > > > > > > > > > > > > > > > > > > > > > That's super surprising. mipi_dsi_device_unregister calls > > > > > > > > > > > device_unregister, which calls device_del, which in turn calls > > > > > > > > > > > devres_release_all. > > > > > > > > > > > > > > > > > > > > Hmm, right. > > > > > > > > > > > > > > > > > > > > > If that doesn't work like that, then it's what needs to be fixed, and > > > > > > > > > > > not worked around in the MIPI-DSI bus. > > > > > > > > > > > > > > > > > > > > Well, something causes a crash for both the device register/unregister case > > > > > > > > > > and the attach/detach case, and the call stacks and debug prints showed a > > > > > > > > > > double unregister/detach... > > > > > > > > > > > > > > > > > > > > I need to dig up the board and check again why the devres_release_all() in > > > > > > > > > > device_del() doesn't solve this. But I can probably only get back to this in > > > > > > > > > > August, so it's perhaps best to ignore this patch for now. > > > > > > > > > > > > > > > > > > > > However, the attach/detach case is still valid? I see no devres calls in the > > > > > > > > > > detach paths. > > > > > > > > > > > > > > > > > > I'm not sure what you mean by the attach/detach case. Do you expect > > > > > > > > > device resources allocated in attach to be freed when detach run? > > > > > > > > > > > > > > > > Ah, never mind, the devres_release_all() would of course deal with that too. > > > > > > > > > > > > > > > > However, I just realized/remembered why it crashes. > > > > > > > > > > > > > > > > devm_mipi_dsi_device_register_full() and devm_mipi_dsi_attach() are given a > > > > > > > > device which is used for the devres. This device is probably always the > > > > > > > > bridge device. So when the bridge device goes away, so do those resources. > > > > > > > > > > > > > > > > The mipi_dsi_device_unregister() call deals with a DSI device, which was > > > > > > > > created in devm_mipi_dsi_device_register_full(). Unregistering that DSI > > > > > > > > device, which does happen when the DSI host is removed, does not affect the > > > > > > > > devres of the bridge. > > > > > > > > > > > > > > > > So, unloading the DSI host driver causes mipi_dsi_device_unregister() and > > > > > > > > mipi_dsi_detach() to be called (as part of mipi_dsi_host_unregister()), and > > > > > > > > unloading the bridge driver causes them to be called again via devres. > > > > > > > > > > > > > > Sorry, that's one of the things I don't quite get. Both functions are > > > > > > > exclusively(?) called from I2C bridges, so the device passed there > > > > > > > should be a i2c_client instance, and thus the MIPI-DSI host going away > > > > > > > will not remove those i2c devices, only the MIPI-DSI ones, right? > > > > > > > > > > > > Yes. > > > > > > > > > > > > > So if we remove the host, the MIPI-DSI device will be detached and > > > > > > > removed through the path you were explaing with the i2c client lingering > > > > > > > around. And if we remove the I2C device, then devm will kick in and will > > > > > > > detach and remove the MIPI-DSI device. > > > > > > > > > > > > Right. > > > > > > > > > > > > > Or is it the other way around? That if you remove the host, the device > > > > > > > is properly detached and removed, but there's still the devm actions > > > > > > > lingering around in the i2c device with pointers to the mipi_dsi_device > > > > > > > that was first created, but since destroyed? > > > > > > > > > > > > > > And thus, if the i2c device ever goes away, we get a use-after-free? > > > > > > > > > > > > Hmm, I'm not sure I understand what you mean here... Aren't you describing > > > > > > the same thing in both of these cases? > > > > > > > > > > > > In any case, to expand the description a bit, module unloading is quite > > > > > > fragile. I do get a crash if I first unload the i2c bridge module, and only > > > > > > then go and unload the other ones in the DRM pipeline. But I think module > > > > > > unloading will very easily crash, whatever the DRM drivers being used are, > > > > > > so it's not related to this particular issue. > > > > > > > > > > > > In my view, the unload sequence that should be supported (for development > > > > > > purposes, not for production) is to start the unload from the display > > > > > > controller module, which tears down the DRM pipeline, and going from there > > > > > > towards the panels/connectors. > > > > > > > > > > > > Of course, it would be very nice if the module unloading worked perfectly, > > > > > > but afaics fixing all that's related to module unloading would be a > > > > > > multi-year project... So, I just want to keep the sequence I described above > > > > > > working, which allows using modules while doing driver development. > > > > > > > > > > FTR, I'm all for supporting module unloading. The discussion above was > > > > > about what is broken exactly, so we can come up with a good solution. > > > > > > > > Does that mean that you're ok with the patch, or that something should be > > > > improved? > > > > > > No, I meant that at the very least the commit log needs to be updated to > > > reflect what is actually going on, because at least my understanding of > > > it doesn't match what actually happens. > > > > > > We want a solution to the problem you're facing, but it's not clear to > > > me what the problem is exactly at this point, so it's hard to review a > > > solution. > > > > So I haven't looked at the full thing, but I think the proper fix is to > > make both detach and unregister cope with being called multiple times. I > > think devm_ here is a red herring, the underlying issues is that we can > > unregister/detach from two sides: > > > > - when the host dsi goes away > > - when individual dsi devices on a given host go away > > > > So there needs to be book-keeping and locking to make sure no matter which > > order things disappear, we don't try to unregister/detach a dsi device > > twice. > > I think that is what my patch does (for devm_). > > Some vocabulary first: > > dsi peripheral device - The device that represents the DSI peripheral. It is > a bridge or a panel, and (usually) an i2c or platform device. > > dsi peripheral driver - The driver handling the dsi peripheral device. > > dsi device - Runtime created device instance that represents the DSI > peripheral. So in my case we have the i2c bridge device, and a dsi device is > created for it in the setup code. > > dsi controller device - A device that has a DSI bus (usually a platform or > i2c device, I would guess). > > dsi controller driver - A driver for the dsi controller device. Creates the > dsi host. > > dsi host - represents the DSI host side, owned by the dsi controller driver. > > When a dsi peripheral driver uses devm_mipi_dsi_device_register_full() or > devm_mipi_dsi_attach(), the dsi device is created and attached to the dsi > host. When the dsi peripheral device-driver is unbound, devres will call > unregister and detach are called automatically. This works fine. > > But when the device-driver for the dsi controller is unbound, the dsi > controller driver will unregister the dsi host, I assume that you're talking about: https://elixir.bootlin.com/linux/v6.10.7/source/drivers/gpu/drm/drm_mipi_dsi.c#L357 ? > and the unregistration will also unregister and detach the dsi device. And https://elixir.bootlin.com/linux/v6.10.7/source/drivers/gpu/drm/drm_mipi_dsi.c#L346 ? > But the devres is not told about that. If my assumptions are correct, device_unregister() will definitely clean up the devres resources on that device: https://elixir.bootlin.com/linux/v6.10.7/source/drivers/base/core.c#L3886 > So when the dsi peripheral is later unbound, its devres will again > unregister and detach. I guess in this case, only the device resource tied to the i2c client device (so dsi device? in your nomenclature) will run. Or is it that: https://elixir.bootlin.com/linux/v6.10.7/source/drivers/gpu/drm/drm_mipi_dsi.c#L250 Gets tied to the i2c client device, but the host being removed has free'd that device already? > To fix that this patch uses devm_remove_action() to remove the devres > action when the host side goes away first. > > Now, after writing the above, I realized that all this won't help with the > non-devm versions: the host side has unregistered and detached the dsi > device, but if the dsi peripheral driver calls mipi_dsi_detach() or > mipi_dsi_device_unregister(), it will again crash. > > Handling the attach/detach should be quite easy, and in fact the code > already handles it, but it uses WARN_ON() there so that has to go. But > attach/detach will crash anyway if the dsi device has already been freed, > which happens when the dsi controller driver calls > mipi_dsi_device_unregister(). > > So... The dsi peripheral driver should keep a reference to the dsi device, > with get_device()? And then do a put_device() after calling > mipi_dsi_device_unregister()? > > But we don't free the dsi device, it has essentially been disabled without > telling the dsi peripheral driver about it, which might cause problems. Yeah, and the host pointer would be lingering as well. > I don't know... This doesn't sound correct to me. Probably my patch is just > new wrong on top of old wrong. Or maybe I don't quite grasp how this works. I think we can fix some of them by storing the "parent" device of mipi_dsi_device (ie, the i2c client device) that the devm action is registered against, and removing the action in mipi_dsi_remove_device_fn. And marking non-devm variants as deprecated in favor of the devm one. Maxime
Hi, On 03/09/2024 14:56, Maxime Ripard wrote: > On Mon, Sep 02, 2024 at 03:31:28PM GMT, Tomi Valkeinen wrote: >> Hi, >> >> On 02/09/2024 13:50, Daniel Vetter wrote: >>> On Mon, Sep 02, 2024 at 11:26:11AM +0200, Maxime Ripard wrote: >>>> Hi, >>>> >>>> On Wed, Aug 07, 2024 at 03:19:23PM GMT, Tomi Valkeinen wrote: >>>>> On 25/07/2024 14:28, Maxime Ripard wrote: >>>>>> On Mon, Jul 15, 2024 at 11:32:34AM GMT, Tomi Valkeinen wrote: >>>>>>> On 02/07/2024 14:43, Maxime Ripard wrote: >>>>>>>> Hi Tomi, >>>>>>>> >>>>>>>> On Wed, Jun 26, 2024 at 06:53:40PM GMT, Tomi Valkeinen wrote: >>>>>>>>> On 26/06/2024 18:07, Maxime Ripard wrote: >>>>>>>>>> On Wed, Jun 26, 2024 at 12:55:39PM GMT, Tomi Valkeinen wrote: >>>>>>>>>>> On 26/06/2024 11:49, Maxime Ripard wrote: >>>>>>>>>>>> Hi, >>>>>>>>>>>> >>>>>>>>>>>> On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: >>>>>>>>>>>>> From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> >>>>>>>>>>>>> >>>>>>>>>>>>> When a bridge driver uses devm_mipi_dsi_device_register_full() or >>>>>>>>>>>>> devm_mipi_dsi_attach(), the resource management is moved to devres, >>>>>>>>>>>>> which releases the resource automatically when the bridge driver is >>>>>>>>>>>>> unbound. >>>>>>>>>>>>> >>>>>>>>>>>>> However, if the DSI host goes away first, the host unregistration code >>>>>>>>>>>>> will automatically detach and unregister any DSI peripherals, without >>>>>>>>>>>>> notifying the devres about it. So when the bridge driver later is >>>>>>>>>>>>> unbound, the resources are released a second time, leading to crash. >>>>>>>>>>>> >>>>>>>>>>>> That's super surprising. mipi_dsi_device_unregister calls >>>>>>>>>>>> device_unregister, which calls device_del, which in turn calls >>>>>>>>>>>> devres_release_all. >>>>>>>>>>> >>>>>>>>>>> Hmm, right. >>>>>>>>>>> >>>>>>>>>>>> If that doesn't work like that, then it's what needs to be fixed, and >>>>>>>>>>>> not worked around in the MIPI-DSI bus. >>>>>>>>>>> >>>>>>>>>>> Well, something causes a crash for both the device register/unregister case >>>>>>>>>>> and the attach/detach case, and the call stacks and debug prints showed a >>>>>>>>>>> double unregister/detach... >>>>>>>>>>> >>>>>>>>>>> I need to dig up the board and check again why the devres_release_all() in >>>>>>>>>>> device_del() doesn't solve this. But I can probably only get back to this in >>>>>>>>>>> August, so it's perhaps best to ignore this patch for now. >>>>>>>>>>> >>>>>>>>>>> However, the attach/detach case is still valid? I see no devres calls in the >>>>>>>>>>> detach paths. >>>>>>>>>> >>>>>>>>>> I'm not sure what you mean by the attach/detach case. Do you expect >>>>>>>>>> device resources allocated in attach to be freed when detach run? >>>>>>>>> >>>>>>>>> Ah, never mind, the devres_release_all() would of course deal with that too. >>>>>>>>> >>>>>>>>> However, I just realized/remembered why it crashes. >>>>>>>>> >>>>>>>>> devm_mipi_dsi_device_register_full() and devm_mipi_dsi_attach() are given a >>>>>>>>> device which is used for the devres. This device is probably always the >>>>>>>>> bridge device. So when the bridge device goes away, so do those resources. >>>>>>>>> >>>>>>>>> The mipi_dsi_device_unregister() call deals with a DSI device, which was >>>>>>>>> created in devm_mipi_dsi_device_register_full(). Unregistering that DSI >>>>>>>>> device, which does happen when the DSI host is removed, does not affect the >>>>>>>>> devres of the bridge. >>>>>>>>> >>>>>>>>> So, unloading the DSI host driver causes mipi_dsi_device_unregister() and >>>>>>>>> mipi_dsi_detach() to be called (as part of mipi_dsi_host_unregister()), and >>>>>>>>> unloading the bridge driver causes them to be called again via devres. >>>>>>>> >>>>>>>> Sorry, that's one of the things I don't quite get. Both functions are >>>>>>>> exclusively(?) called from I2C bridges, so the device passed there >>>>>>>> should be a i2c_client instance, and thus the MIPI-DSI host going away >>>>>>>> will not remove those i2c devices, only the MIPI-DSI ones, right? >>>>>>> >>>>>>> Yes. >>>>>>> >>>>>>>> So if we remove the host, the MIPI-DSI device will be detached and >>>>>>>> removed through the path you were explaing with the i2c client lingering >>>>>>>> around. And if we remove the I2C device, then devm will kick in and will >>>>>>>> detach and remove the MIPI-DSI device. >>>>>>> >>>>>>> Right. >>>>>>> >>>>>>>> Or is it the other way around? That if you remove the host, the device >>>>>>>> is properly detached and removed, but there's still the devm actions >>>>>>>> lingering around in the i2c device with pointers to the mipi_dsi_device >>>>>>>> that was first created, but since destroyed? >>>>>>>> >>>>>>>> And thus, if the i2c device ever goes away, we get a use-after-free? >>>>>>> >>>>>>> Hmm, I'm not sure I understand what you mean here... Aren't you describing >>>>>>> the same thing in both of these cases? >>>>>>> >>>>>>> In any case, to expand the description a bit, module unloading is quite >>>>>>> fragile. I do get a crash if I first unload the i2c bridge module, and only >>>>>>> then go and unload the other ones in the DRM pipeline. But I think module >>>>>>> unloading will very easily crash, whatever the DRM drivers being used are, >>>>>>> so it's not related to this particular issue. >>>>>>> >>>>>>> In my view, the unload sequence that should be supported (for development >>>>>>> purposes, not for production) is to start the unload from the display >>>>>>> controller module, which tears down the DRM pipeline, and going from there >>>>>>> towards the panels/connectors. >>>>>>> >>>>>>> Of course, it would be very nice if the module unloading worked perfectly, >>>>>>> but afaics fixing all that's related to module unloading would be a >>>>>>> multi-year project... So, I just want to keep the sequence I described above >>>>>>> working, which allows using modules while doing driver development. >>>>>> >>>>>> FTR, I'm all for supporting module unloading. The discussion above was >>>>>> about what is broken exactly, so we can come up with a good solution. >>>>> >>>>> Does that mean that you're ok with the patch, or that something should be >>>>> improved? >>>> >>>> No, I meant that at the very least the commit log needs to be updated to >>>> reflect what is actually going on, because at least my understanding of >>>> it doesn't match what actually happens. >>>> >>>> We want a solution to the problem you're facing, but it's not clear to >>>> me what the problem is exactly at this point, so it's hard to review a >>>> solution. >>> >>> So I haven't looked at the full thing, but I think the proper fix is to >>> make both detach and unregister cope with being called multiple times. I >>> think devm_ here is a red herring, the underlying issues is that we can >>> unregister/detach from two sides: >>> >>> - when the host dsi goes away >>> - when individual dsi devices on a given host go away >>> >>> So there needs to be book-keeping and locking to make sure no matter which >>> order things disappear, we don't try to unregister/detach a dsi device >>> twice. >> >> I think that is what my patch does (for devm_). >> >> Some vocabulary first: >> >> dsi peripheral device - The device that represents the DSI peripheral. It is >> a bridge or a panel, and (usually) an i2c or platform device. >> >> dsi peripheral driver - The driver handling the dsi peripheral device. >> >> dsi device - Runtime created device instance that represents the DSI >> peripheral. So in my case we have the i2c bridge device, and a dsi device is >> created for it in the setup code. >> >> dsi controller device - A device that has a DSI bus (usually a platform or >> i2c device, I would guess). >> >> dsi controller driver - A driver for the dsi controller device. Creates the >> dsi host. >> >> dsi host - represents the DSI host side, owned by the dsi controller driver. >> >> When a dsi peripheral driver uses devm_mipi_dsi_device_register_full() or >> devm_mipi_dsi_attach(), the dsi device is created and attached to the dsi >> host. When the dsi peripheral device-driver is unbound, devres will call >> unregister and detach are called automatically. This works fine. >> >> But when the device-driver for the dsi controller is unbound, the dsi >> controller driver will unregister the dsi host, > > I assume that you're talking about: > https://elixir.bootlin.com/linux/v6.10.7/source/drivers/gpu/drm/drm_mipi_dsi.c#L357 ? Yes. >> and the unregistration will also unregister and detach the dsi device. > > And https://elixir.bootlin.com/linux/v6.10.7/source/drivers/gpu/drm/drm_mipi_dsi.c#L346 ? And yes. >> But the devres is not told about that. > > If my assumptions are correct, device_unregister() will definitely clean > up the devres resources on that device: Yes, and not. Devres cleans up the resources on "that" device, where that device is the dsi_device. But that is _not_ the one where we registered the resources. > https://elixir.bootlin.com/linux/v6.10.7/source/drivers/base/core.c#L3886 > >> So when the dsi peripheral is later unbound, its devres will again >> unregister and detach. > > I guess in this case, only the device resource tied to the i2c client > device (so dsi device? in your nomenclature) will run. No, the i2c client device is the "dsi peripheral device". Say, a DSI video mode panel that is controlled via i2c. Or ti-sn65dsi86.c bridge (that one actually uses a auxiliary_device so it's a bit more complex). > Or is it that: > https://elixir.bootlin.com/linux/v6.10.7/source/drivers/gpu/drm/drm_mipi_dsi.c#L250 > > Gets tied to the i2c client device, but the host being removed has > free'd that device already? Yes. The devm_mipi_dsi_* functions register the resources (in this case, the dsi_device itself and the dsi attach) to the i2c client device's devres. >> To fix that this patch uses devm_remove_action() to remove the devres >> action when the host side goes away first. >> >> Now, after writing the above, I realized that all this won't help with the >> non-devm versions: the host side has unregistered and detached the dsi >> device, but if the dsi peripheral driver calls mipi_dsi_detach() or >> mipi_dsi_device_unregister(), it will again crash. >> >> Handling the attach/detach should be quite easy, and in fact the code >> already handles it, but it uses WARN_ON() there so that has to go. But >> attach/detach will crash anyway if the dsi device has already been freed, >> which happens when the dsi controller driver calls >> mipi_dsi_device_unregister(). >> >> So... The dsi peripheral driver should keep a reference to the dsi device, >> with get_device()? And then do a put_device() after calling >> mipi_dsi_device_unregister()? >> >> But we don't free the dsi device, it has essentially been disabled without >> telling the dsi peripheral driver about it, which might cause problems. > > Yeah, and the host pointer would be lingering as well. > >> I don't know... This doesn't sound correct to me. Probably my patch is just >> new wrong on top of old wrong. Or maybe I don't quite grasp how this works. > > I think we can fix some of them by storing the "parent" device of > mipi_dsi_device (ie, the i2c client device) that the devm action is > registered against, and removing the action in > mipi_dsi_remove_device_fn. That is what my patch does. But, as Sima replied, there's much more to this. I'll try to look at this at some point, but, unfortunately, no customer so far (as far as my memory serves) has ever been interested in module unloading or unbinding the devices, so... not very high in the todo list =). Tomi
On Tue, Sep 03, 2024 at 04:05:06PM +0300, Tomi Valkeinen wrote: > Hi, > > On 03/09/2024 14:56, Maxime Ripard wrote: > > On Mon, Sep 02, 2024 at 03:31:28PM GMT, Tomi Valkeinen wrote: > > > Hi, > > > > > > On 02/09/2024 13:50, Daniel Vetter wrote: > > > > On Mon, Sep 02, 2024 at 11:26:11AM +0200, Maxime Ripard wrote: > > > > > Hi, > > > > > > > > > > On Wed, Aug 07, 2024 at 03:19:23PM GMT, Tomi Valkeinen wrote: > > > > > > On 25/07/2024 14:28, Maxime Ripard wrote: > > > > > > > On Mon, Jul 15, 2024 at 11:32:34AM GMT, Tomi Valkeinen wrote: > > > > > > > > On 02/07/2024 14:43, Maxime Ripard wrote: > > > > > > > > > Hi Tomi, > > > > > > > > > > > > > > > > > > On Wed, Jun 26, 2024 at 06:53:40PM GMT, Tomi Valkeinen wrote: > > > > > > > > > > On 26/06/2024 18:07, Maxime Ripard wrote: > > > > > > > > > > > On Wed, Jun 26, 2024 at 12:55:39PM GMT, Tomi Valkeinen wrote: > > > > > > > > > > > > On 26/06/2024 11:49, Maxime Ripard wrote: > > > > > > > > > > > > > Hi, > > > > > > > > > > > > > > > > > > > > > > > > > > On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: > > > > > > > > > > > > > > From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> > > > > > > > > > > > > > > > > > > > > > > > > > > > > When a bridge driver uses devm_mipi_dsi_device_register_full() or > > > > > > > > > > > > > > devm_mipi_dsi_attach(), the resource management is moved to devres, > > > > > > > > > > > > > > which releases the resource automatically when the bridge driver is > > > > > > > > > > > > > > unbound. > > > > > > > > > > > > > > > > > > > > > > > > > > > > However, if the DSI host goes away first, the host unregistration code > > > > > > > > > > > > > > will automatically detach and unregister any DSI peripherals, without > > > > > > > > > > > > > > notifying the devres about it. So when the bridge driver later is > > > > > > > > > > > > > > unbound, the resources are released a second time, leading to crash. > > > > > > > > > > > > > > > > > > > > > > > > > > That's super surprising. mipi_dsi_device_unregister calls > > > > > > > > > > > > > device_unregister, which calls device_del, which in turn calls > > > > > > > > > > > > > devres_release_all. > > > > > > > > > > > > > > > > > > > > > > > > Hmm, right. > > > > > > > > > > > > > > > > > > > > > > > > > If that doesn't work like that, then it's what needs to be fixed, and > > > > > > > > > > > > > not worked around in the MIPI-DSI bus. > > > > > > > > > > > > > > > > > > > > > > > > Well, something causes a crash for both the device register/unregister case > > > > > > > > > > > > and the attach/detach case, and the call stacks and debug prints showed a > > > > > > > > > > > > double unregister/detach... > > > > > > > > > > > > > > > > > > > > > > > > I need to dig up the board and check again why the devres_release_all() in > > > > > > > > > > > > device_del() doesn't solve this. But I can probably only get back to this in > > > > > > > > > > > > August, so it's perhaps best to ignore this patch for now. > > > > > > > > > > > > > > > > > > > > > > > > However, the attach/detach case is still valid? I see no devres calls in the > > > > > > > > > > > > detach paths. > > > > > > > > > > > > > > > > > > > > > > I'm not sure what you mean by the attach/detach case. Do you expect > > > > > > > > > > > device resources allocated in attach to be freed when detach run? > > > > > > > > > > > > > > > > > > > > Ah, never mind, the devres_release_all() would of course deal with that too. > > > > > > > > > > > > > > > > > > > > However, I just realized/remembered why it crashes. > > > > > > > > > > > > > > > > > > > > devm_mipi_dsi_device_register_full() and devm_mipi_dsi_attach() are given a > > > > > > > > > > device which is used for the devres. This device is probably always the > > > > > > > > > > bridge device. So when the bridge device goes away, so do those resources. > > > > > > > > > > > > > > > > > > > > The mipi_dsi_device_unregister() call deals with a DSI device, which was > > > > > > > > > > created in devm_mipi_dsi_device_register_full(). Unregistering that DSI > > > > > > > > > > device, which does happen when the DSI host is removed, does not affect the > > > > > > > > > > devres of the bridge. > > > > > > > > > > > > > > > > > > > > So, unloading the DSI host driver causes mipi_dsi_device_unregister() and > > > > > > > > > > mipi_dsi_detach() to be called (as part of mipi_dsi_host_unregister()), and > > > > > > > > > > unloading the bridge driver causes them to be called again via devres. > > > > > > > > > > > > > > > > > > Sorry, that's one of the things I don't quite get. Both functions are > > > > > > > > > exclusively(?) called from I2C bridges, so the device passed there > > > > > > > > > should be a i2c_client instance, and thus the MIPI-DSI host going away > > > > > > > > > will not remove those i2c devices, only the MIPI-DSI ones, right? > > > > > > > > > > > > > > > > Yes. > > > > > > > > > > > > > > > > > So if we remove the host, the MIPI-DSI device will be detached and > > > > > > > > > removed through the path you were explaing with the i2c client lingering > > > > > > > > > around. And if we remove the I2C device, then devm will kick in and will > > > > > > > > > detach and remove the MIPI-DSI device. > > > > > > > > > > > > > > > > Right. > > > > > > > > > > > > > > > > > Or is it the other way around? That if you remove the host, the device > > > > > > > > > is properly detached and removed, but there's still the devm actions > > > > > > > > > lingering around in the i2c device with pointers to the mipi_dsi_device > > > > > > > > > that was first created, but since destroyed? > > > > > > > > > > > > > > > > > > And thus, if the i2c device ever goes away, we get a use-after-free? > > > > > > > > > > > > > > > > Hmm, I'm not sure I understand what you mean here... Aren't you describing > > > > > > > > the same thing in both of these cases? > > > > > > > > > > > > > > > > In any case, to expand the description a bit, module unloading is quite > > > > > > > > fragile. I do get a crash if I first unload the i2c bridge module, and only > > > > > > > > then go and unload the other ones in the DRM pipeline. But I think module > > > > > > > > unloading will very easily crash, whatever the DRM drivers being used are, > > > > > > > > so it's not related to this particular issue. > > > > > > > > > > > > > > > > In my view, the unload sequence that should be supported (for development > > > > > > > > purposes, not for production) is to start the unload from the display > > > > > > > > controller module, which tears down the DRM pipeline, and going from there > > > > > > > > towards the panels/connectors. > > > > > > > > > > > > > > > > Of course, it would be very nice if the module unloading worked perfectly, > > > > > > > > but afaics fixing all that's related to module unloading would be a > > > > > > > > multi-year project... So, I just want to keep the sequence I described above > > > > > > > > working, which allows using modules while doing driver development. > > > > > > > > > > > > > > FTR, I'm all for supporting module unloading. The discussion above was > > > > > > > about what is broken exactly, so we can come up with a good solution. > > > > > > > > > > > > Does that mean that you're ok with the patch, or that something should be > > > > > > improved? > > > > > > > > > > No, I meant that at the very least the commit log needs to be updated to > > > > > reflect what is actually going on, because at least my understanding of > > > > > it doesn't match what actually happens. > > > > > > > > > > We want a solution to the problem you're facing, but it's not clear to > > > > > me what the problem is exactly at this point, so it's hard to review a > > > > > solution. > > > > > > > > So I haven't looked at the full thing, but I think the proper fix is to > > > > make both detach and unregister cope with being called multiple times. I > > > > think devm_ here is a red herring, the underlying issues is that we can > > > > unregister/detach from two sides: > > > > > > > > - when the host dsi goes away > > > > - when individual dsi devices on a given host go away > > > > > > > > So there needs to be book-keeping and locking to make sure no matter which > > > > order things disappear, we don't try to unregister/detach a dsi device > > > > twice. > > > > > > I think that is what my patch does (for devm_). > > > > > > Some vocabulary first: > > > > > > dsi peripheral device - The device that represents the DSI peripheral. It is > > > a bridge or a panel, and (usually) an i2c or platform device. > > > > > > dsi peripheral driver - The driver handling the dsi peripheral device. > > > > > > dsi device - Runtime created device instance that represents the DSI > > > peripheral. So in my case we have the i2c bridge device, and a dsi device is > > > created for it in the setup code. > > > > > > dsi controller device - A device that has a DSI bus (usually a platform or > > > i2c device, I would guess). > > > > > > dsi controller driver - A driver for the dsi controller device. Creates the > > > dsi host. > > > > > > dsi host - represents the DSI host side, owned by the dsi controller driver. > > > > > > When a dsi peripheral driver uses devm_mipi_dsi_device_register_full() or > > > devm_mipi_dsi_attach(), the dsi device is created and attached to the dsi > > > host. When the dsi peripheral device-driver is unbound, devres will call > > > unregister and detach are called automatically. This works fine. > > > > > > But when the device-driver for the dsi controller is unbound, the dsi > > > controller driver will unregister the dsi host, > > > > I assume that you're talking about: > > https://elixir.bootlin.com/linux/v6.10.7/source/drivers/gpu/drm/drm_mipi_dsi.c#L357 ? > > Yes. > > > > and the unregistration will also unregister and detach the dsi device. > > > > And https://elixir.bootlin.com/linux/v6.10.7/source/drivers/gpu/drm/drm_mipi_dsi.c#L346 ? > > And yes. > > > > But the devres is not told about that. > > > > If my assumptions are correct, device_unregister() will definitely clean > > up the devres resources on that device: > > Yes, and not. Devres cleans up the resources on "that" device, where that > device is the dsi_device. But that is _not_ the one where we registered the > resources. > > > https://elixir.bootlin.com/linux/v6.10.7/source/drivers/base/core.c#L3886 > > > > > So when the dsi peripheral is later unbound, its devres will again > > > unregister and detach. > > > > I guess in this case, only the device resource tied to the i2c client > > device (so dsi device? in your nomenclature) will run. > > No, the i2c client device is the "dsi peripheral device". Say, a DSI video > mode panel that is controlled via i2c. Or ti-sn65dsi86.c bridge (that one > actually uses a auxiliary_device so it's a bit more complex). > > > Or is it that: > > https://elixir.bootlin.com/linux/v6.10.7/source/drivers/gpu/drm/drm_mipi_dsi.c#L250 > > > > Gets tied to the i2c client device, but the host being removed has > > free'd that device already? > > Yes. The devm_mipi_dsi_* functions register the resources (in this case, the > dsi_device itself and the dsi attach) to the i2c client device's devres. > > > > To fix that this patch uses devm_remove_action() to remove the devres > > > action when the host side goes away first. > > > > > > Now, after writing the above, I realized that all this won't help with the > > > non-devm versions: the host side has unregistered and detached the dsi > > > device, but if the dsi peripheral driver calls mipi_dsi_detach() or > > > mipi_dsi_device_unregister(), it will again crash. > > > > > > Handling the attach/detach should be quite easy, and in fact the code > > > already handles it, but it uses WARN_ON() there so that has to go. But > > > attach/detach will crash anyway if the dsi device has already been freed, > > > which happens when the dsi controller driver calls > > > mipi_dsi_device_unregister(). > > > > > > So... The dsi peripheral driver should keep a reference to the dsi device, > > > with get_device()? And then do a put_device() after calling > > > mipi_dsi_device_unregister()? > > > > > > But we don't free the dsi device, it has essentially been disabled without > > > telling the dsi peripheral driver about it, which might cause problems. > > > > Yeah, and the host pointer would be lingering as well. > > > > > I don't know... This doesn't sound correct to me. Probably my patch is just > > > new wrong on top of old wrong. Or maybe I don't quite grasp how this works. > > > > I think we can fix some of them by storing the "parent" device of > > mipi_dsi_device (ie, the i2c client device) that the devm action is > > registered against, and removing the action in > > mipi_dsi_remove_device_fn. > > That is what my patch does. > > But, as Sima replied, there's much more to this. I'll try to look at this at > some point, but, unfortunately, no customer so far (as far as my memory > serves) has ever been interested in module unloading or unbinding the > devices, so... not very high in the todo list =). I think the proper fix isn't too bad. The changes in the dsi code should be fairly small, and for the refcounting fix you only have to add a call to mipi_dsi_put() in all the non-dsi drivers. With that prep work you can then tackle the conversion to a proper device/driver model driver for each non-dsi driver individually, and as needed. And at least the drivers I looked at are practically there already, so for an individual case I don't think it's a horrible amount of work to fix this properly. Still more than your original patch in this thread though. -Sima -- Simona Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
On Tue, Sep 03, 2024 at 01:56:45PM +0200, Maxime Ripard wrote: > On Mon, Sep 02, 2024 at 03:31:28PM GMT, Tomi Valkeinen wrote: > > Hi, > > > > On 02/09/2024 13:50, Daniel Vetter wrote: > > > On Mon, Sep 02, 2024 at 11:26:11AM +0200, Maxime Ripard wrote: > > > > Hi, > > > > > > > > On Wed, Aug 07, 2024 at 03:19:23PM GMT, Tomi Valkeinen wrote: > > > > > On 25/07/2024 14:28, Maxime Ripard wrote: > > > > > > On Mon, Jul 15, 2024 at 11:32:34AM GMT, Tomi Valkeinen wrote: > > > > > > > On 02/07/2024 14:43, Maxime Ripard wrote: > > > > > > > > Hi Tomi, > > > > > > > > > > > > > > > > On Wed, Jun 26, 2024 at 06:53:40PM GMT, Tomi Valkeinen wrote: > > > > > > > > > On 26/06/2024 18:07, Maxime Ripard wrote: > > > > > > > > > > On Wed, Jun 26, 2024 at 12:55:39PM GMT, Tomi Valkeinen wrote: > > > > > > > > > > > On 26/06/2024 11:49, Maxime Ripard wrote: > > > > > > > > > > > > Hi, > > > > > > > > > > > > > > > > > > > > > > > > On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: > > > > > > > > > > > > > From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> > > > > > > > > > > > > > > > > > > > > > > > > > > When a bridge driver uses devm_mipi_dsi_device_register_full() or > > > > > > > > > > > > > devm_mipi_dsi_attach(), the resource management is moved to devres, > > > > > > > > > > > > > which releases the resource automatically when the bridge driver is > > > > > > > > > > > > > unbound. > > > > > > > > > > > > > > > > > > > > > > > > > > However, if the DSI host goes away first, the host unregistration code > > > > > > > > > > > > > will automatically detach and unregister any DSI peripherals, without > > > > > > > > > > > > > notifying the devres about it. So when the bridge driver later is > > > > > > > > > > > > > unbound, the resources are released a second time, leading to crash. > > > > > > > > > > > > > > > > > > > > > > > > That's super surprising. mipi_dsi_device_unregister calls > > > > > > > > > > > > device_unregister, which calls device_del, which in turn calls > > > > > > > > > > > > devres_release_all. > > > > > > > > > > > > > > > > > > > > > > Hmm, right. > > > > > > > > > > > > > > > > > > > > > > > If that doesn't work like that, then it's what needs to be fixed, and > > > > > > > > > > > > not worked around in the MIPI-DSI bus. > > > > > > > > > > > > > > > > > > > > > > Well, something causes a crash for both the device register/unregister case > > > > > > > > > > > and the attach/detach case, and the call stacks and debug prints showed a > > > > > > > > > > > double unregister/detach... > > > > > > > > > > > > > > > > > > > > > > I need to dig up the board and check again why the devres_release_all() in > > > > > > > > > > > device_del() doesn't solve this. But I can probably only get back to this in > > > > > > > > > > > August, so it's perhaps best to ignore this patch for now. > > > > > > > > > > > > > > > > > > > > > > However, the attach/detach case is still valid? I see no devres calls in the > > > > > > > > > > > detach paths. > > > > > > > > > > > > > > > > > > > > I'm not sure what you mean by the attach/detach case. Do you expect > > > > > > > > > > device resources allocated in attach to be freed when detach run? > > > > > > > > > > > > > > > > > > Ah, never mind, the devres_release_all() would of course deal with that too. > > > > > > > > > > > > > > > > > > However, I just realized/remembered why it crashes. > > > > > > > > > > > > > > > > > > devm_mipi_dsi_device_register_full() and devm_mipi_dsi_attach() are given a > > > > > > > > > device which is used for the devres. This device is probably always the > > > > > > > > > bridge device. So when the bridge device goes away, so do those resources. > > > > > > > > > > > > > > > > > > The mipi_dsi_device_unregister() call deals with a DSI device, which was > > > > > > > > > created in devm_mipi_dsi_device_register_full(). Unregistering that DSI > > > > > > > > > device, which does happen when the DSI host is removed, does not affect the > > > > > > > > > devres of the bridge. > > > > > > > > > > > > > > > > > > So, unloading the DSI host driver causes mipi_dsi_device_unregister() and > > > > > > > > > mipi_dsi_detach() to be called (as part of mipi_dsi_host_unregister()), and > > > > > > > > > unloading the bridge driver causes them to be called again via devres. > > > > > > > > > > > > > > > > Sorry, that's one of the things I don't quite get. Both functions are > > > > > > > > exclusively(?) called from I2C bridges, so the device passed there > > > > > > > > should be a i2c_client instance, and thus the MIPI-DSI host going away > > > > > > > > will not remove those i2c devices, only the MIPI-DSI ones, right? > > > > > > > > > > > > > > Yes. > > > > > > > > > > > > > > > So if we remove the host, the MIPI-DSI device will be detached and > > > > > > > > removed through the path you were explaing with the i2c client lingering > > > > > > > > around. And if we remove the I2C device, then devm will kick in and will > > > > > > > > detach and remove the MIPI-DSI device. > > > > > > > > > > > > > > Right. > > > > > > > > > > > > > > > Or is it the other way around? That if you remove the host, the device > > > > > > > > is properly detached and removed, but there's still the devm actions > > > > > > > > lingering around in the i2c device with pointers to the mipi_dsi_device > > > > > > > > that was first created, but since destroyed? > > > > > > > > > > > > > > > > And thus, if the i2c device ever goes away, we get a use-after-free? > > > > > > > > > > > > > > Hmm, I'm not sure I understand what you mean here... Aren't you describing > > > > > > > the same thing in both of these cases? > > > > > > > > > > > > > > In any case, to expand the description a bit, module unloading is quite > > > > > > > fragile. I do get a crash if I first unload the i2c bridge module, and only > > > > > > > then go and unload the other ones in the DRM pipeline. But I think module > > > > > > > unloading will very easily crash, whatever the DRM drivers being used are, > > > > > > > so it's not related to this particular issue. > > > > > > > > > > > > > > In my view, the unload sequence that should be supported (for development > > > > > > > purposes, not for production) is to start the unload from the display > > > > > > > controller module, which tears down the DRM pipeline, and going from there > > > > > > > towards the panels/connectors. > > > > > > > > > > > > > > Of course, it would be very nice if the module unloading worked perfectly, > > > > > > > but afaics fixing all that's related to module unloading would be a > > > > > > > multi-year project... So, I just want to keep the sequence I described above > > > > > > > working, which allows using modules while doing driver development. > > > > > > > > > > > > FTR, I'm all for supporting module unloading. The discussion above was > > > > > > about what is broken exactly, so we can come up with a good solution. > > > > > > > > > > Does that mean that you're ok with the patch, or that something should be > > > > > improved? > > > > > > > > No, I meant that at the very least the commit log needs to be updated to > > > > reflect what is actually going on, because at least my understanding of > > > > it doesn't match what actually happens. > > > > > > > > We want a solution to the problem you're facing, but it's not clear to > > > > me what the problem is exactly at this point, so it's hard to review a > > > > solution. > > > > > > So I haven't looked at the full thing, but I think the proper fix is to > > > make both detach and unregister cope with being called multiple times. I > > > think devm_ here is a red herring, the underlying issues is that we can > > > unregister/detach from two sides: > > > > > > - when the host dsi goes away > > > - when individual dsi devices on a given host go away > > > > > > So there needs to be book-keeping and locking to make sure no matter which > > > order things disappear, we don't try to unregister/detach a dsi device > > > twice. > > > > I think that is what my patch does (for devm_). > > > > Some vocabulary first: > > > > dsi peripheral device - The device that represents the DSI peripheral. It is > > a bridge or a panel, and (usually) an i2c or platform device. > > > > dsi peripheral driver - The driver handling the dsi peripheral device. > > > > dsi device - Runtime created device instance that represents the DSI > > peripheral. So in my case we have the i2c bridge device, and a dsi device is > > created for it in the setup code. > > > > dsi controller device - A device that has a DSI bus (usually a platform or > > i2c device, I would guess). > > > > dsi controller driver - A driver for the dsi controller device. Creates the > > dsi host. > > > > dsi host - represents the DSI host side, owned by the dsi controller driver. > > > > When a dsi peripheral driver uses devm_mipi_dsi_device_register_full() or > > devm_mipi_dsi_attach(), the dsi device is created and attached to the dsi > > host. When the dsi peripheral device-driver is unbound, devres will call > > unregister and detach are called automatically. This works fine. > > > > But when the device-driver for the dsi controller is unbound, the dsi > > controller driver will unregister the dsi host, > > I assume that you're talking about: > https://elixir.bootlin.com/linux/v6.10.7/source/drivers/gpu/drm/drm_mipi_dsi.c#L357 ? > > > and the unregistration will also unregister and detach the dsi device. > > And https://elixir.bootlin.com/linux/v6.10.7/source/drivers/gpu/drm/drm_mipi_dsi.c#L346 ? > > > But the devres is not told about that. > > If my assumptions are correct, device_unregister() will definitely clean > up the devres resources on that device: > > https://elixir.bootlin.com/linux/v6.10.7/source/drivers/base/core.c#L3886 > > > So when the dsi peripheral is later unbound, its devres will again > > unregister and detach. > > I guess in this case, only the device resource tied to the i2c client > device (so dsi device? in your nomenclature) will run. > > Or is it that: > https://elixir.bootlin.com/linux/v6.10.7/source/drivers/gpu/drm/drm_mipi_dsi.c#L250 > > Gets tied to the i2c client device, but the host being removed has > free'd that device already? > > > To fix that this patch uses devm_remove_action() to remove the devres > > action when the host side goes away first. > > > > Now, after writing the above, I realized that all this won't help with the > > non-devm versions: the host side has unregistered and detached the dsi > > device, but if the dsi peripheral driver calls mipi_dsi_detach() or > > mipi_dsi_device_unregister(), it will again crash. > > > > Handling the attach/detach should be quite easy, and in fact the code > > already handles it, but it uses WARN_ON() there so that has to go. But > > attach/detach will crash anyway if the dsi device has already been freed, > > which happens when the dsi controller driver calls > > mipi_dsi_device_unregister(). > > > > So... The dsi peripheral driver should keep a reference to the dsi device, > > with get_device()? And then do a put_device() after calling > > mipi_dsi_device_unregister()? > > > > But we don't free the dsi device, it has essentially been disabled without > > telling the dsi peripheral driver about it, which might cause problems. > > Yeah, and the host pointer would be lingering as well. > > > I don't know... This doesn't sound correct to me. Probably my patch is just > > new wrong on top of old wrong. Or maybe I don't quite grasp how this works. > > I think we can fix some of them by storing the "parent" device of > mipi_dsi_device (ie, the i2c client device) that the devm action is > registered against, and removing the action in > mipi_dsi_remove_device_fn. > > And marking non-devm variants as deprecated in favor of the devm one. I think focusing too much on the devm automatic cleanup is missing the point. For the non-dsi drivers that don't go through a full mipi_dsi_driver with probe/remove the lifetime is fundamentally busted. If the host is independent of the dsi device, then that just blows up as Tomi points out in another reply, because the non-dsi driver has no idea that everything just disappeared and will happily continue to use it. The only exception is if the same driver (e.g. pci driver) registers both the dsi device and host, and then also unregisters them both (and hopefully in the right order). Everywhere else I think we just need to bite the bullet and use the full driver/device model, it has all these issues sorted out. And once we have that, there's also no confusion about automatic devm cleanup of stuff I think. -Sima -- Simona Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
On Mon, Sep 02, 2024 at 03:31:28PM +0300, Tomi Valkeinen wrote: > Hi, > > On 02/09/2024 13:50, Daniel Vetter wrote: > > On Mon, Sep 02, 2024 at 11:26:11AM +0200, Maxime Ripard wrote: > > > Hi, > > > > > > On Wed, Aug 07, 2024 at 03:19:23PM GMT, Tomi Valkeinen wrote: > > > > On 25/07/2024 14:28, Maxime Ripard wrote: > > > > > On Mon, Jul 15, 2024 at 11:32:34AM GMT, Tomi Valkeinen wrote: > > > > > > On 02/07/2024 14:43, Maxime Ripard wrote: > > > > > > > Hi Tomi, > > > > > > > > > > > > > > On Wed, Jun 26, 2024 at 06:53:40PM GMT, Tomi Valkeinen wrote: > > > > > > > > On 26/06/2024 18:07, Maxime Ripard wrote: > > > > > > > > > On Wed, Jun 26, 2024 at 12:55:39PM GMT, Tomi Valkeinen wrote: > > > > > > > > > > On 26/06/2024 11:49, Maxime Ripard wrote: > > > > > > > > > > > Hi, > > > > > > > > > > > > > > > > > > > > > > On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: > > > > > > > > > > > > From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> > > > > > > > > > > > > > > > > > > > > > > > > When a bridge driver uses devm_mipi_dsi_device_register_full() or > > > > > > > > > > > > devm_mipi_dsi_attach(), the resource management is moved to devres, > > > > > > > > > > > > which releases the resource automatically when the bridge driver is > > > > > > > > > > > > unbound. > > > > > > > > > > > > > > > > > > > > > > > > However, if the DSI host goes away first, the host unregistration code > > > > > > > > > > > > will automatically detach and unregister any DSI peripherals, without > > > > > > > > > > > > notifying the devres about it. So when the bridge driver later is > > > > > > > > > > > > unbound, the resources are released a second time, leading to crash. > > > > > > > > > > > > > > > > > > > > > > That's super surprising. mipi_dsi_device_unregister calls > > > > > > > > > > > device_unregister, which calls device_del, which in turn calls > > > > > > > > > > > devres_release_all. > > > > > > > > > > > > > > > > > > > > Hmm, right. > > > > > > > > > > > > > > > > > > > > > If that doesn't work like that, then it's what needs to be fixed, and > > > > > > > > > > > not worked around in the MIPI-DSI bus. > > > > > > > > > > > > > > > > > > > > Well, something causes a crash for both the device register/unregister case > > > > > > > > > > and the attach/detach case, and the call stacks and debug prints showed a > > > > > > > > > > double unregister/detach... > > > > > > > > > > > > > > > > > > > > I need to dig up the board and check again why the devres_release_all() in > > > > > > > > > > device_del() doesn't solve this. But I can probably only get back to this in > > > > > > > > > > August, so it's perhaps best to ignore this patch for now. > > > > > > > > > > > > > > > > > > > > However, the attach/detach case is still valid? I see no devres calls in the > > > > > > > > > > detach paths. > > > > > > > > > > > > > > > > > > I'm not sure what you mean by the attach/detach case. Do you expect > > > > > > > > > device resources allocated in attach to be freed when detach run? > > > > > > > > > > > > > > > > Ah, never mind, the devres_release_all() would of course deal with that too. > > > > > > > > > > > > > > > > However, I just realized/remembered why it crashes. > > > > > > > > > > > > > > > > devm_mipi_dsi_device_register_full() and devm_mipi_dsi_attach() are given a > > > > > > > > device which is used for the devres. This device is probably always the > > > > > > > > bridge device. So when the bridge device goes away, so do those resources. > > > > > > > > > > > > > > > > The mipi_dsi_device_unregister() call deals with a DSI device, which was > > > > > > > > created in devm_mipi_dsi_device_register_full(). Unregistering that DSI > > > > > > > > device, which does happen when the DSI host is removed, does not affect the > > > > > > > > devres of the bridge. > > > > > > > > > > > > > > > > So, unloading the DSI host driver causes mipi_dsi_device_unregister() and > > > > > > > > mipi_dsi_detach() to be called (as part of mipi_dsi_host_unregister()), and > > > > > > > > unloading the bridge driver causes them to be called again via devres. > > > > > > > > > > > > > > Sorry, that's one of the things I don't quite get. Both functions are > > > > > > > exclusively(?) called from I2C bridges, so the device passed there > > > > > > > should be a i2c_client instance, and thus the MIPI-DSI host going away > > > > > > > will not remove those i2c devices, only the MIPI-DSI ones, right? > > > > > > > > > > > > Yes. > > > > > > > > > > > > > So if we remove the host, the MIPI-DSI device will be detached and > > > > > > > removed through the path you were explaing with the i2c client lingering > > > > > > > around. And if we remove the I2C device, then devm will kick in and will > > > > > > > detach and remove the MIPI-DSI device. > > > > > > > > > > > > Right. > > > > > > > > > > > > > Or is it the other way around? That if you remove the host, the device > > > > > > > is properly detached and removed, but there's still the devm actions > > > > > > > lingering around in the i2c device with pointers to the mipi_dsi_device > > > > > > > that was first created, but since destroyed? > > > > > > > > > > > > > > And thus, if the i2c device ever goes away, we get a use-after-free? > > > > > > > > > > > > Hmm, I'm not sure I understand what you mean here... Aren't you describing > > > > > > the same thing in both of these cases? > > > > > > > > > > > > In any case, to expand the description a bit, module unloading is quite > > > > > > fragile. I do get a crash if I first unload the i2c bridge module, and only > > > > > > then go and unload the other ones in the DRM pipeline. But I think module > > > > > > unloading will very easily crash, whatever the DRM drivers being used are, > > > > > > so it's not related to this particular issue. > > > > > > > > > > > > In my view, the unload sequence that should be supported (for development > > > > > > purposes, not for production) is to start the unload from the display > > > > > > controller module, which tears down the DRM pipeline, and going from there > > > > > > towards the panels/connectors. > > > > > > > > > > > > Of course, it would be very nice if the module unloading worked perfectly, > > > > > > but afaics fixing all that's related to module unloading would be a > > > > > > multi-year project... So, I just want to keep the sequence I described above > > > > > > working, which allows using modules while doing driver development. > > > > > > > > > > FTR, I'm all for supporting module unloading. The discussion above was > > > > > about what is broken exactly, so we can come up with a good solution. > > > > > > > > Does that mean that you're ok with the patch, or that something should be > > > > improved? > > > > > > No, I meant that at the very least the commit log needs to be updated to > > > reflect what is actually going on, because at least my understanding of > > > it doesn't match what actually happens. > > > > > > We want a solution to the problem you're facing, but it's not clear to > > > me what the problem is exactly at this point, so it's hard to review a > > > solution. > > > > So I haven't looked at the full thing, but I think the proper fix is to > > make both detach and unregister cope with being called multiple times. I > > think devm_ here is a red herring, the underlying issues is that we can > > unregister/detach from two sides: > > > > - when the host dsi goes away > > - when individual dsi devices on a given host go away > > > > So there needs to be book-keeping and locking to make sure no matter which > > order things disappear, we don't try to unregister/detach a dsi device > > twice. > > I think that is what my patch does (for devm_). Yep, except I think you should just do it for everyone, not just for the special case where one of the calls is done through devm. > Some vocabulary first: > > dsi peripheral device - The device that represents the DSI peripheral. It is > a bridge or a panel, and (usually) an i2c or platform device. > > dsi peripheral driver - The driver handling the dsi peripheral device. > > dsi device - Runtime created device instance that represents the DSI > peripheral. So in my case we have the i2c bridge device, and a dsi device is > created for it in the setup code. > > dsi controller device - A device that has a DSI bus (usually a platform or > i2c device, I would guess). > > dsi controller driver - A driver for the dsi controller device. Creates the > dsi host. > > dsi host - represents the DSI host side, owned by the dsi controller driver. > > When a dsi peripheral driver uses devm_mipi_dsi_device_register_full() or > devm_mipi_dsi_attach(), the dsi device is created and attached to the dsi > host. When the dsi peripheral device-driver is unbound, devres will call > unregister and detach are called automatically. This works fine. > > But when the device-driver for the dsi controller is unbound, the dsi > controller driver will unregister the dsi host, and the unregistration will > also unregister and detach the dsi device. But the devres is not told about > that. So when the dsi peripheral is later unbound, its devres will again > unregister and detach. > > To fix that this patch uses devm_remove_action() to remove the devres action > when the host side goes away first. > > Now, after writing the above, I realized that all this won't help with the > non-devm versions: the host side has unregistered and detached the dsi > device, but if the dsi peripheral driver calls mipi_dsi_detach() or > mipi_dsi_device_unregister(), it will again crash. > > Handling the attach/detach should be quite easy, and in fact the code > already handles it, but it uses WARN_ON() there so that has to go. But > attach/detach will crash anyway if the dsi device has already been freed, > which happens when the dsi controller driver calls > mipi_dsi_device_unregister(). Hm I thought we have a full struct device, so refcounted, and also with struct device unregister should be separate from the final kfree when the last reference drops away. Hence I thought this should just work. We might need to grab a reference in attach/detach to sort this out? > So... The dsi peripheral driver should keep a reference to the dsi device, > with get_device()? And then do a put_device() after calling > mipi_dsi_device_unregister()? Yeah I think so. > But we don't free the dsi device, it has essentially been disabled without > telling the dsi peripheral driver about it, which might cause problems. > > I don't know... This doesn't sound correct to me. Probably my patch is just > new wrong on top of old wrong. Or maybe I don't quite grasp how this works. _unregister calls ->detach, so I think we're fine. Essentially what we're doing here is hand-rolling a device/driver bind model, instead of using the one from the core driver model. But all the pieces are there: - register/unregister: Adding/removing a device, after unregister the physical device cannot be accessed anymore (the host driver might be gone), but the struct device will stick around. - attach/detach: binding/unbinding a driver to a dsi peripheral > Oh, I now realized/remembered that we can also have "real" dsi devices, when > the dsi peripheral is a child of the dsi bus device, and controlled via DSI > commands (i.e. not via i2c or memory mapped registers). Perhaps all/some of > this confusion in the code comes from the use of dsi device for both "real" > dsi devices and as a "dsi client", created by i2c/platform devices. I think for more complicated topology device links should help. Of course the dev links will not know about our hand-rolled driver binding, so maybe we need to fix that to make it work perfectly. But I don't think that's the case here. > With a "real" dsi device things work fine, as the driver only attaches and > detaches, and the device (un)registration is handled by the dsi host. > > Well, this turned out to be a bit of a rambling email... I don't have a > clear solution in mind. I think aside from maybe a missing device_get/put for attach/detach we have all the pieces, and it just needs a little bit of polish? We're almost 90% at a clean device driver model implementation, adding the last few bits is I think all we need? Or is there a catch? Cheers, Sima -- Simona Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
On 03/09/2024 10:40, Simona Vetter wrote: > On Mon, Sep 02, 2024 at 03:31:28PM +0300, Tomi Valkeinen wrote: >> Hi, >> >> On 02/09/2024 13:50, Daniel Vetter wrote: >>> On Mon, Sep 02, 2024 at 11:26:11AM +0200, Maxime Ripard wrote: >>>> Hi, >>>> >>>> On Wed, Aug 07, 2024 at 03:19:23PM GMT, Tomi Valkeinen wrote: >>>>> On 25/07/2024 14:28, Maxime Ripard wrote: >>>>>> On Mon, Jul 15, 2024 at 11:32:34AM GMT, Tomi Valkeinen wrote: >>>>>>> On 02/07/2024 14:43, Maxime Ripard wrote: >>>>>>>> Hi Tomi, >>>>>>>> >>>>>>>> On Wed, Jun 26, 2024 at 06:53:40PM GMT, Tomi Valkeinen wrote: >>>>>>>>> On 26/06/2024 18:07, Maxime Ripard wrote: >>>>>>>>>> On Wed, Jun 26, 2024 at 12:55:39PM GMT, Tomi Valkeinen wrote: >>>>>>>>>>> On 26/06/2024 11:49, Maxime Ripard wrote: >>>>>>>>>>>> Hi, >>>>>>>>>>>> >>>>>>>>>>>> On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: >>>>>>>>>>>>> From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> >>>>>>>>>>>>> >>>>>>>>>>>>> When a bridge driver uses devm_mipi_dsi_device_register_full() or >>>>>>>>>>>>> devm_mipi_dsi_attach(), the resource management is moved to devres, >>>>>>>>>>>>> which releases the resource automatically when the bridge driver is >>>>>>>>>>>>> unbound. >>>>>>>>>>>>> >>>>>>>>>>>>> However, if the DSI host goes away first, the host unregistration code >>>>>>>>>>>>> will automatically detach and unregister any DSI peripherals, without >>>>>>>>>>>>> notifying the devres about it. So when the bridge driver later is >>>>>>>>>>>>> unbound, the resources are released a second time, leading to crash. >>>>>>>>>>>> >>>>>>>>>>>> That's super surprising. mipi_dsi_device_unregister calls >>>>>>>>>>>> device_unregister, which calls device_del, which in turn calls >>>>>>>>>>>> devres_release_all. >>>>>>>>>>> >>>>>>>>>>> Hmm, right. >>>>>>>>>>> >>>>>>>>>>>> If that doesn't work like that, then it's what needs to be fixed, and >>>>>>>>>>>> not worked around in the MIPI-DSI bus. >>>>>>>>>>> >>>>>>>>>>> Well, something causes a crash for both the device register/unregister case >>>>>>>>>>> and the attach/detach case, and the call stacks and debug prints showed a >>>>>>>>>>> double unregister/detach... >>>>>>>>>>> >>>>>>>>>>> I need to dig up the board and check again why the devres_release_all() in >>>>>>>>>>> device_del() doesn't solve this. But I can probably only get back to this in >>>>>>>>>>> August, so it's perhaps best to ignore this patch for now. >>>>>>>>>>> >>>>>>>>>>> However, the attach/detach case is still valid? I see no devres calls in the >>>>>>>>>>> detach paths. >>>>>>>>>> >>>>>>>>>> I'm not sure what you mean by the attach/detach case. Do you expect >>>>>>>>>> device resources allocated in attach to be freed when detach run? >>>>>>>>> >>>>>>>>> Ah, never mind, the devres_release_all() would of course deal with that too. >>>>>>>>> >>>>>>>>> However, I just realized/remembered why it crashes. >>>>>>>>> >>>>>>>>> devm_mipi_dsi_device_register_full() and devm_mipi_dsi_attach() are given a >>>>>>>>> device which is used for the devres. This device is probably always the >>>>>>>>> bridge device. So when the bridge device goes away, so do those resources. >>>>>>>>> >>>>>>>>> The mipi_dsi_device_unregister() call deals with a DSI device, which was >>>>>>>>> created in devm_mipi_dsi_device_register_full(). Unregistering that DSI >>>>>>>>> device, which does happen when the DSI host is removed, does not affect the >>>>>>>>> devres of the bridge. >>>>>>>>> >>>>>>>>> So, unloading the DSI host driver causes mipi_dsi_device_unregister() and >>>>>>>>> mipi_dsi_detach() to be called (as part of mipi_dsi_host_unregister()), and >>>>>>>>> unloading the bridge driver causes them to be called again via devres. >>>>>>>> >>>>>>>> Sorry, that's one of the things I don't quite get. Both functions are >>>>>>>> exclusively(?) called from I2C bridges, so the device passed there >>>>>>>> should be a i2c_client instance, and thus the MIPI-DSI host going away >>>>>>>> will not remove those i2c devices, only the MIPI-DSI ones, right? >>>>>>> >>>>>>> Yes. >>>>>>> >>>>>>>> So if we remove the host, the MIPI-DSI device will be detached and >>>>>>>> removed through the path you were explaing with the i2c client lingering >>>>>>>> around. And if we remove the I2C device, then devm will kick in and will >>>>>>>> detach and remove the MIPI-DSI device. >>>>>>> >>>>>>> Right. >>>>>>> >>>>>>>> Or is it the other way around? That if you remove the host, the device >>>>>>>> is properly detached and removed, but there's still the devm actions >>>>>>>> lingering around in the i2c device with pointers to the mipi_dsi_device >>>>>>>> that was first created, but since destroyed? >>>>>>>> >>>>>>>> And thus, if the i2c device ever goes away, we get a use-after-free? >>>>>>> >>>>>>> Hmm, I'm not sure I understand what you mean here... Aren't you describing >>>>>>> the same thing in both of these cases? >>>>>>> >>>>>>> In any case, to expand the description a bit, module unloading is quite >>>>>>> fragile. I do get a crash if I first unload the i2c bridge module, and only >>>>>>> then go and unload the other ones in the DRM pipeline. But I think module >>>>>>> unloading will very easily crash, whatever the DRM drivers being used are, >>>>>>> so it's not related to this particular issue. >>>>>>> >>>>>>> In my view, the unload sequence that should be supported (for development >>>>>>> purposes, not for production) is to start the unload from the display >>>>>>> controller module, which tears down the DRM pipeline, and going from there >>>>>>> towards the panels/connectors. >>>>>>> >>>>>>> Of course, it would be very nice if the module unloading worked perfectly, >>>>>>> but afaics fixing all that's related to module unloading would be a >>>>>>> multi-year project... So, I just want to keep the sequence I described above >>>>>>> working, which allows using modules while doing driver development. >>>>>> >>>>>> FTR, I'm all for supporting module unloading. The discussion above was >>>>>> about what is broken exactly, so we can come up with a good solution. >>>>> >>>>> Does that mean that you're ok with the patch, or that something should be >>>>> improved? >>>> >>>> No, I meant that at the very least the commit log needs to be updated to >>>> reflect what is actually going on, because at least my understanding of >>>> it doesn't match what actually happens. >>>> >>>> We want a solution to the problem you're facing, but it's not clear to >>>> me what the problem is exactly at this point, so it's hard to review a >>>> solution. >>> >>> So I haven't looked at the full thing, but I think the proper fix is to >>> make both detach and unregister cope with being called multiple times. I >>> think devm_ here is a red herring, the underlying issues is that we can >>> unregister/detach from two sides: >>> >>> - when the host dsi goes away >>> - when individual dsi devices on a given host go away >>> >>> So there needs to be book-keeping and locking to make sure no matter which >>> order things disappear, we don't try to unregister/detach a dsi device >>> twice. >> >> I think that is what my patch does (for devm_). > > Yep, except I think you should just do it for everyone, not just for the > special case where one of the calls is done through devm. > >> Some vocabulary first: >> >> dsi peripheral device - The device that represents the DSI peripheral. It is >> a bridge or a panel, and (usually) an i2c or platform device. >> >> dsi peripheral driver - The driver handling the dsi peripheral device. >> >> dsi device - Runtime created device instance that represents the DSI >> peripheral. So in my case we have the i2c bridge device, and a dsi device is >> created for it in the setup code. >> >> dsi controller device - A device that has a DSI bus (usually a platform or >> i2c device, I would guess). >> >> dsi controller driver - A driver for the dsi controller device. Creates the >> dsi host. >> >> dsi host - represents the DSI host side, owned by the dsi controller driver. >> >> When a dsi peripheral driver uses devm_mipi_dsi_device_register_full() or >> devm_mipi_dsi_attach(), the dsi device is created and attached to the dsi >> host. When the dsi peripheral device-driver is unbound, devres will call >> unregister and detach are called automatically. This works fine. >> >> But when the device-driver for the dsi controller is unbound, the dsi >> controller driver will unregister the dsi host, and the unregistration will >> also unregister and detach the dsi device. But the devres is not told about >> that. So when the dsi peripheral is later unbound, its devres will again >> unregister and detach. >> >> To fix that this patch uses devm_remove_action() to remove the devres action >> when the host side goes away first. >> >> Now, after writing the above, I realized that all this won't help with the >> non-devm versions: the host side has unregistered and detached the dsi >> device, but if the dsi peripheral driver calls mipi_dsi_detach() or >> mipi_dsi_device_unregister(), it will again crash. >> >> Handling the attach/detach should be quite easy, and in fact the code >> already handles it, but it uses WARN_ON() there so that has to go. But >> attach/detach will crash anyway if the dsi device has already been freed, >> which happens when the dsi controller driver calls >> mipi_dsi_device_unregister(). > > Hm I thought we have a full struct device, so refcounted, and also with > struct device unregister should be separate from the final kfree when the > last reference drops away. Hence I thought this should just work. > > We might need to grab a reference in attach/detach to sort this out? I think there's a bit more to it. A non-dsi-device bridge driver would do: (devm_)mipi_dsi_device_register_full() ... (devm_)mipi_dsi_attach() The DSI host side could unregister and free the dsi_device right after the call to mipi_dsi_device_register_full(), and mipi_dsi_attach() would probably just crash. If I understand this correctly, the main issue is that the bridge driver doesn't own an exclusive reference to the dsi_device, even if it looks like it does, but rather both the dsi host and the bridge driver share the same reference. So, we could get an extra reference, so that each side has its own. But I think there's more. The bridge driver will call mipi_dsi_device_unregister() (manually or via devres), and that does device_unregister(). However, the dsi host will also call mipi_dsi_device_unregister() when tearing down, which would result in another device_unregister(). The above is not a problem if the bridge does away first, as then the dsi bus won't contain the dsi_device anymore, and the dsi host will not unregister it. But if it's the other way around, the dsi host will do device_unregister, and later the bridge will do it too as it thinks it owns the dsi_device. I presume that can be solved by tracking whether we have unregistered the dsi_device or not. However, what would happen if the bridge driver calls any of the other mipi_dsi_* functions with the dsi_device that has already been unregistered by the dsi host? Nothing good, probably. So all those functions should start to fail graciously when the dsi_device has been unregistered. Or the bridge driver should somehow get a notification about the unregistration so that it knows not to call those functions. I have to say I feel a bit uncertain about the whole ownership model here. Tomi
On Tue, Sep 03, 2024 at 11:27:23AM +0300, Tomi Valkeinen wrote: > On 03/09/2024 10:40, Simona Vetter wrote: > > On Mon, Sep 02, 2024 at 03:31:28PM +0300, Tomi Valkeinen wrote: > > > Hi, > > > > > > On 02/09/2024 13:50, Daniel Vetter wrote: > > > > On Mon, Sep 02, 2024 at 11:26:11AM +0200, Maxime Ripard wrote: > > > > > Hi, > > > > > > > > > > On Wed, Aug 07, 2024 at 03:19:23PM GMT, Tomi Valkeinen wrote: > > > > > > On 25/07/2024 14:28, Maxime Ripard wrote: > > > > > > > On Mon, Jul 15, 2024 at 11:32:34AM GMT, Tomi Valkeinen wrote: > > > > > > > > On 02/07/2024 14:43, Maxime Ripard wrote: > > > > > > > > > Hi Tomi, > > > > > > > > > > > > > > > > > > On Wed, Jun 26, 2024 at 06:53:40PM GMT, Tomi Valkeinen wrote: > > > > > > > > > > On 26/06/2024 18:07, Maxime Ripard wrote: > > > > > > > > > > > On Wed, Jun 26, 2024 at 12:55:39PM GMT, Tomi Valkeinen wrote: > > > > > > > > > > > > On 26/06/2024 11:49, Maxime Ripard wrote: > > > > > > > > > > > > > Hi, > > > > > > > > > > > > > > > > > > > > > > > > > > On Wed, Jun 19, 2024 at 12:07:48PM GMT, Tomi Valkeinen wrote: > > > > > > > > > > > > > > From: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com> > > > > > > > > > > > > > > > > > > > > > > > > > > > > When a bridge driver uses devm_mipi_dsi_device_register_full() or > > > > > > > > > > > > > > devm_mipi_dsi_attach(), the resource management is moved to devres, > > > > > > > > > > > > > > which releases the resource automatically when the bridge driver is > > > > > > > > > > > > > > unbound. > > > > > > > > > > > > > > > > > > > > > > > > > > > > However, if the DSI host goes away first, the host unregistration code > > > > > > > > > > > > > > will automatically detach and unregister any DSI peripherals, without > > > > > > > > > > > > > > notifying the devres about it. So when the bridge driver later is > > > > > > > > > > > > > > unbound, the resources are released a second time, leading to crash. > > > > > > > > > > > > > > > > > > > > > > > > > > That's super surprising. mipi_dsi_device_unregister calls > > > > > > > > > > > > > device_unregister, which calls device_del, which in turn calls > > > > > > > > > > > > > devres_release_all. > > > > > > > > > > > > > > > > > > > > > > > > Hmm, right. > > > > > > > > > > > > > > > > > > > > > > > > > If that doesn't work like that, then it's what needs to be fixed, and > > > > > > > > > > > > > not worked around in the MIPI-DSI bus. > > > > > > > > > > > > > > > > > > > > > > > > Well, something causes a crash for both the device register/unregister case > > > > > > > > > > > > and the attach/detach case, and the call stacks and debug prints showed a > > > > > > > > > > > > double unregister/detach... > > > > > > > > > > > > > > > > > > > > > > > > I need to dig up the board and check again why the devres_release_all() in > > > > > > > > > > > > device_del() doesn't solve this. But I can probably only get back to this in > > > > > > > > > > > > August, so it's perhaps best to ignore this patch for now. > > > > > > > > > > > > > > > > > > > > > > > > However, the attach/detach case is still valid? I see no devres calls in the > > > > > > > > > > > > detach paths. > > > > > > > > > > > > > > > > > > > > > > I'm not sure what you mean by the attach/detach case. Do you expect > > > > > > > > > > > device resources allocated in attach to be freed when detach run? > > > > > > > > > > > > > > > > > > > > Ah, never mind, the devres_release_all() would of course deal with that too. > > > > > > > > > > > > > > > > > > > > However, I just realized/remembered why it crashes. > > > > > > > > > > > > > > > > > > > > devm_mipi_dsi_device_register_full() and devm_mipi_dsi_attach() are given a > > > > > > > > > > device which is used for the devres. This device is probably always the > > > > > > > > > > bridge device. So when the bridge device goes away, so do those resources. > > > > > > > > > > > > > > > > > > > > The mipi_dsi_device_unregister() call deals with a DSI device, which was > > > > > > > > > > created in devm_mipi_dsi_device_register_full(). Unregistering that DSI > > > > > > > > > > device, which does happen when the DSI host is removed, does not affect the > > > > > > > > > > devres of the bridge. > > > > > > > > > > > > > > > > > > > > So, unloading the DSI host driver causes mipi_dsi_device_unregister() and > > > > > > > > > > mipi_dsi_detach() to be called (as part of mipi_dsi_host_unregister()), and > > > > > > > > > > unloading the bridge driver causes them to be called again via devres. > > > > > > > > > > > > > > > > > > Sorry, that's one of the things I don't quite get. Both functions are > > > > > > > > > exclusively(?) called from I2C bridges, so the device passed there > > > > > > > > > should be a i2c_client instance, and thus the MIPI-DSI host going away > > > > > > > > > will not remove those i2c devices, only the MIPI-DSI ones, right? > > > > > > > > > > > > > > > > Yes. > > > > > > > > > > > > > > > > > So if we remove the host, the MIPI-DSI device will be detached and > > > > > > > > > removed through the path you were explaing with the i2c client lingering > > > > > > > > > around. And if we remove the I2C device, then devm will kick in and will > > > > > > > > > detach and remove the MIPI-DSI device. > > > > > > > > > > > > > > > > Right. > > > > > > > > > > > > > > > > > Or is it the other way around? That if you remove the host, the device > > > > > > > > > is properly detached and removed, but there's still the devm actions > > > > > > > > > lingering around in the i2c device with pointers to the mipi_dsi_device > > > > > > > > > that was first created, but since destroyed? > > > > > > > > > > > > > > > > > > And thus, if the i2c device ever goes away, we get a use-after-free? > > > > > > > > > > > > > > > > Hmm, I'm not sure I understand what you mean here... Aren't you describing > > > > > > > > the same thing in both of these cases? > > > > > > > > > > > > > > > > In any case, to expand the description a bit, module unloading is quite > > > > > > > > fragile. I do get a crash if I first unload the i2c bridge module, and only > > > > > > > > then go and unload the other ones in the DRM pipeline. But I think module > > > > > > > > unloading will very easily crash, whatever the DRM drivers being used are, > > > > > > > > so it's not related to this particular issue. > > > > > > > > > > > > > > > > In my view, the unload sequence that should be supported (for development > > > > > > > > purposes, not for production) is to start the unload from the display > > > > > > > > controller module, which tears down the DRM pipeline, and going from there > > > > > > > > towards the panels/connectors. > > > > > > > > > > > > > > > > Of course, it would be very nice if the module unloading worked perfectly, > > > > > > > > but afaics fixing all that's related to module unloading would be a > > > > > > > > multi-year project... So, I just want to keep the sequence I described above > > > > > > > > working, which allows using modules while doing driver development. > > > > > > > > > > > > > > FTR, I'm all for supporting module unloading. The discussion above was > > > > > > > about what is broken exactly, so we can come up with a good solution. > > > > > > > > > > > > Does that mean that you're ok with the patch, or that something should be > > > > > > improved? > > > > > > > > > > No, I meant that at the very least the commit log needs to be updated to > > > > > reflect what is actually going on, because at least my understanding of > > > > > it doesn't match what actually happens. > > > > > > > > > > We want a solution to the problem you're facing, but it's not clear to > > > > > me what the problem is exactly at this point, so it's hard to review a > > > > > solution. > > > > > > > > So I haven't looked at the full thing, but I think the proper fix is to > > > > make both detach and unregister cope with being called multiple times. I > > > > think devm_ here is a red herring, the underlying issues is that we can > > > > unregister/detach from two sides: > > > > > > > > - when the host dsi goes away > > > > - when individual dsi devices on a given host go away > > > > > > > > So there needs to be book-keeping and locking to make sure no matter which > > > > order things disappear, we don't try to unregister/detach a dsi device > > > > twice. > > > > > > I think that is what my patch does (for devm_). > > > > Yep, except I think you should just do it for everyone, not just for the > > special case where one of the calls is done through devm. > > > > > Some vocabulary first: > > > > > > dsi peripheral device - The device that represents the DSI peripheral. It is > > > a bridge or a panel, and (usually) an i2c or platform device. > > > > > > dsi peripheral driver - The driver handling the dsi peripheral device. > > > > > > dsi device - Runtime created device instance that represents the DSI > > > peripheral. So in my case we have the i2c bridge device, and a dsi device is > > > created for it in the setup code. > > > > > > dsi controller device - A device that has a DSI bus (usually a platform or > > > i2c device, I would guess). > > > > > > dsi controller driver - A driver for the dsi controller device. Creates the > > > dsi host. > > > > > > dsi host - represents the DSI host side, owned by the dsi controller driver. > > > > > > When a dsi peripheral driver uses devm_mipi_dsi_device_register_full() or > > > devm_mipi_dsi_attach(), the dsi device is created and attached to the dsi > > > host. When the dsi peripheral device-driver is unbound, devres will call > > > unregister and detach are called automatically. This works fine. > > > > > > But when the device-driver for the dsi controller is unbound, the dsi > > > controller driver will unregister the dsi host, and the unregistration will > > > also unregister and detach the dsi device. But the devres is not told about > > > that. So when the dsi peripheral is later unbound, its devres will again > > > unregister and detach. > > > > > > To fix that this patch uses devm_remove_action() to remove the devres action > > > when the host side goes away first. > > > > > > Now, after writing the above, I realized that all this won't help with the > > > non-devm versions: the host side has unregistered and detached the dsi > > > device, but if the dsi peripheral driver calls mipi_dsi_detach() or > > > mipi_dsi_device_unregister(), it will again crash. > > > > > > Handling the attach/detach should be quite easy, and in fact the code > > > already handles it, but it uses WARN_ON() there so that has to go. But > > > attach/detach will crash anyway if the dsi device has already been freed, > > > which happens when the dsi controller driver calls > > > mipi_dsi_device_unregister(). > > > > Hm I thought we have a full struct device, so refcounted, and also with > > struct device unregister should be separate from the final kfree when the > > last reference drops away. Hence I thought this should just work. > > > > We might need to grab a reference in attach/detach to sort this out? > > I think there's a bit more to it. A non-dsi-device bridge driver would do: > > (devm_)mipi_dsi_device_register_full() > ... > (devm_)mipi_dsi_attach() > > The DSI host side could unregister and free the dsi_device right after the > call to mipi_dsi_device_register_full(), and mipi_dsi_attach() would > probably just crash. > > If I understand this correctly, the main issue is that the bridge driver > doesn't own an exclusive reference to the dsi_device, even if it looks like > it does, but rather both the dsi host and the bridge driver share the same > reference. So, we could get an extra reference, so that each side has its > own. > > But I think there's more. The bridge driver will call > mipi_dsi_device_unregister() (manually or via devres), and that does > device_unregister(). However, the dsi host will also call > mipi_dsi_device_unregister() when tearing down, which would result in > another device_unregister(). > > The above is not a problem if the bridge does away first, as then the dsi > bus won't contain the dsi_device anymore, and the dsi host will not > unregister it. But if it's the other way around, the dsi host will do > device_unregister, and later the bridge will do it too as it thinks it owns > the dsi_device. > > I presume that can be solved by tracking whether we have unregistered the > dsi_device or not. However, what would happen if the bridge driver calls any > of the other mipi_dsi_* functions with the dsi_device that has already been > unregistered by the dsi host? Nothing good, probably. So all those functions > should start to fail graciously when the dsi_device has been unregistered. > Or the bridge driver should somehow get a notification about the > unregistration so that it knows not to call those functions. Ah yeah I got confused, I thought attach/detach was for the dsi_device, not the dsi_host. But it's all ok, because mipi_dsi_driver is a full-blown driver, so if we unregister the device all the drivers will be unbound, which means they should first detach and stop using the mipi_dsi_device. If they continue to do that, they're busted. Where things fall apart is for the non-dsi drivers which call mipi_dsi_attach directly, after having called mipi_dsi_device_register_full, bypassing the driver model. Those just blow up, and I don't think you can fix that without using the driver model properly. With that you should have all the pieces: - No matter who calls unregister, the driver gets unbound and can clean up the mess. After the unregister call there should be nothing attached anymore, so we can make that a WARN_ON. - mipi_dsi_device_register_full needs to be changed to not transfer the reference, meaning using device_register instead of device_add. This way the non-dsi drivers retain a reference of their own. - the non-dsi driver in it's non-dsi remove hook needs to call mipi_dsi_unregister and then mipi_dsi_put (which is just a device_put). - we need some locking around mipi_dsi_unregister to make sure it's not called twice and cannot race between a non-dsi driver and a bridge driver. > I have to say I feel a bit uncertain about the whole ownership model here. Yeah I misunderstood a few things too, but I think now it's clearer for me. Essentially unless you have a guarantee that both the host and dsi-device will be removed in the same order always (by both being instantiated from one probe hook like with pci drivers) you have to use a full dsi_driver even for non-dsi cases. Otherwise you won't get the ->remove hook and things will blow up. Also the manual call to mipi_dsi_detach in mipi_dsi_remove_device_fn is a bug and should be replaced by a WARN_ON(dsi->attached) after we've called mipi_dsi_device_unregister: - For the case of a native driver we've called that driver's ->remove hook as part of unregister, which should have called mipi_dsi_detach. If it didn't, it's a bug in that drivers ->remove hook Aside: we should have a devm_mipi_dsi_attach to make this easier, which also checks that you only call this on a mipi_dsi_device and nothing else (because anything else is a bug). - for the case of a non-dsi driver that also registers the host (the only case which does not result in lifetime bugs on removal) that non-dsi driver's remove function should make sure it's nuking everything in the right order. So first explicit dsi_device unregister, then explicit dsi_host_unregister. Otherwise it will at least look buggy. It's a bit of work, but I think this should be solid and we should be able to get there in fairly small steps. Cheers, Sima -- Simona Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
© 2016 - 2026 Red Hat, Inc.