[PATCH v5 4/4] drm: bridge: ti-sn65dsi83: Add error recovery mechanism

Herve Codina posted 4 patches 1 year ago
There is a newer version of this series
[PATCH v5 4/4] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Herve Codina 1 year ago
In some cases observed during ESD tests, the TI SN65DSI83 cannot recover
from errors by itself. A full restart of the bridge is needed in those
cases to have the bridge output LVDS signals again.

Also, during tests, cases were observed where reading the status of the
bridge was not even possible. Indeed, in those cases, the bridge stops
to acknowledge I2C transactions. Only a full reset of the bridge (power
off/on) brings back the bridge to a functional state.

The TI SN65DSI83 has some error detection capabilities. Introduce an
error recovery mechanism based on this detection.

The errors detected are signaled through an interrupt. On system where
this interrupt is not available, the driver uses a polling monitoring
fallback to check for errors. When an error is present or when reading
the bridge status leads to an I2C failure, the recovery process is
launched.

Restarting the bridge needs to redo the initialization sequence. This
initialization sequence has to be done with the DSI data lanes driven in
LP11 state. In order to do that, the recovery process resets the whole
output path (i.e the path from the encoder to the connector) where the
bridge is located.

Signed-off-by: Herve Codina <herve.codina@bootlin.com>
---
 drivers/gpu/drm/bridge/ti-sn65dsi83.c | 131 ++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)

diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
index 336380114eea..26a050b13997 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
@@ -35,9 +35,12 @@
 #include <linux/of_graph.h>
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge.h>
+#include <drm/drm_drv.h> /* DRM_MODESET_LOCK_ALL_BEGIN() needs drm_drv_uses_atomic_modeset() */
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
@@ -159,6 +162,9 @@ struct sn65dsi83 {
 	bool				lvds_dual_link_even_odd_swap;
 	int				lvds_vod_swing_conf[2];
 	int				lvds_term_conf[2];
+	int				irq;
+	struct delayed_work		monitor_work;
+	struct work_struct		reset_work;
 };
 
 static const struct regmap_range sn65dsi83_readable_ranges[] = {
@@ -363,6 +369,95 @@ static u8 sn65dsi83_get_dsi_div(struct sn65dsi83 *ctx)
 	return dsi_div - 1;
 }
 
+static int sn65dsi83_reset_pipe(struct sn65dsi83 *sn65dsi83)
+{
+	struct drm_device *dev = sn65dsi83->bridge.dev;
+	struct drm_modeset_acquire_ctx ctx;
+	int err;
+
+	/*
+	 * Reset active outputs of the related CRTC.
+	 *
+	 * This way, drm core will reconfigure each components in the CRTC
+	 * outputs path. In our case, this will force the previous component to
+	 * go back in LP11 mode and so allow the reconfiguration of SN64DSI83
+	 * bridge.
+	 *
+	 * Keep the lock during the whole operation to be atomic.
+	 */
+
+	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
+
+	err = drm_atomic_helper_reset_crtc(sn65dsi83->bridge.encoder->crtc, &ctx);
+
+	DRM_MODESET_LOCK_ALL_END(dev, ctx, err);
+
+	return err;
+}
+
+static void sn65dsi83_reset_work(struct work_struct *ws)
+{
+	struct sn65dsi83 *ctx = container_of(ws, struct sn65dsi83, reset_work);
+	int ret;
+
+	dev_warn(ctx->dev, "reset the pipe\n");
+
+	/* Reset the pipe */
+	ret = sn65dsi83_reset_pipe(ctx);
+	if (ret) {
+		dev_err(ctx->dev, "reset pipe failed %pe\n", ERR_PTR(ret));
+		return;
+	}
+	if (ctx->irq)
+		enable_irq(ctx->irq);
+}
+
+static void sn65dsi83_handle_errors(struct sn65dsi83 *ctx)
+{
+	unsigned int irq_stat;
+	int ret;
+
+	/*
+	 * Schedule a reset in case of:
+	 *  - the bridge doesn't answer
+	 *  - the bridge signals an error
+	 */
+
+	ret = regmap_read(ctx->regmap, REG_IRQ_STAT, &irq_stat);
+	if (ret || irq_stat) {
+		/*
+		 * IRQ acknowledged is not always possible (the bridge can be in
+		 * a state where it doesn't answer anymore). To prevent an
+		 * interrupt storm, disable interrupt. The interrupt will be
+		 * after the reset.
+		 */
+		if (ctx->irq)
+			disable_irq_nosync(ctx->irq);
+
+		schedule_work(&ctx->reset_work);
+	}
+}
+
+static void sn65dsi83_monitor_work(struct work_struct *work)
+{
+	struct sn65dsi83 *ctx = container_of(to_delayed_work(work),
+					     struct sn65dsi83, monitor_work);
+
+	sn65dsi83_handle_errors(ctx);
+
+	schedule_delayed_work(&ctx->monitor_work, msecs_to_jiffies(1000));
+}
+
+static void sn65dsi83_monitor_start(struct sn65dsi83 *ctx)
+{
+	schedule_delayed_work(&ctx->monitor_work, msecs_to_jiffies(1000));
+}
+
+static void sn65dsi83_monitor_stop(struct sn65dsi83 *ctx)
+{
+	cancel_delayed_work_sync(&ctx->monitor_work);
+}
+
 static void sn65dsi83_atomic_pre_enable(struct drm_bridge *bridge,
 					struct drm_bridge_state *old_bridge_state)
 {
@@ -549,6 +644,15 @@ static void sn65dsi83_atomic_enable(struct drm_bridge *bridge,
 	regmap_read(ctx->regmap, REG_IRQ_STAT, &pval);
 	if (pval)
 		dev_err(ctx->dev, "Unexpected link status 0x%02x\n", pval);
+
+	if (ctx->irq) {
+		/* Enable irq to detect errors */
+		regmap_write(ctx->regmap, REG_IRQ_GLOBAL, REG_IRQ_GLOBAL_IRQ_EN);
+		regmap_write(ctx->regmap, REG_IRQ_EN, 0xff);
+	} else {
+		/* Use the polling task */
+		sn65dsi83_monitor_start(ctx);
+	}
 }
 
 static void sn65dsi83_atomic_disable(struct drm_bridge *bridge,
@@ -557,6 +661,15 @@ static void sn65dsi83_atomic_disable(struct drm_bridge *bridge,
 	struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge);
 	int ret;
 
+	if (ctx->irq) {
+		/* Disable irq */
+		regmap_write(ctx->regmap, REG_IRQ_EN, 0x0);
+		regmap_write(ctx->regmap, REG_IRQ_GLOBAL, 0x0);
+	} else {
+		/* Stop the polling task */
+		sn65dsi83_monitor_stop(ctx);
+	}
+
 	/* Put the chip in reset, pull EN line low, and assure 10ms reset low timing. */
 	gpiod_set_value_cansleep(ctx->enable_gpio, 0);
 	usleep_range(10000, 11000);
@@ -806,6 +919,14 @@ static int sn65dsi83_host_attach(struct sn65dsi83 *ctx)
 	return 0;
 }
 
+static irqreturn_t sn65dsi83_irq(int irq, void *data)
+{
+	struct sn65dsi83 *ctx = data;
+
+	sn65dsi83_handle_errors(ctx);
+	return IRQ_HANDLED;
+}
+
 static int sn65dsi83_probe(struct i2c_client *client)
 {
 	const struct i2c_device_id *id = i2c_client_get_device_id(client);
@@ -819,6 +940,8 @@ static int sn65dsi83_probe(struct i2c_client *client)
 		return -ENOMEM;
 
 	ctx->dev = dev;
+	INIT_WORK(&ctx->reset_work, sn65dsi83_reset_work);
+	INIT_DELAYED_WORK(&ctx->monitor_work, sn65dsi83_monitor_work);
 
 	if (dev->of_node) {
 		model = (enum sn65dsi83_model)(uintptr_t)
@@ -843,6 +966,14 @@ static int sn65dsi83_probe(struct i2c_client *client)
 	if (IS_ERR(ctx->regmap))
 		return dev_err_probe(dev, PTR_ERR(ctx->regmap), "failed to get regmap\n");
 
+	if (client->irq) {
+		ctx->irq = client->irq;
+		ret = devm_request_threaded_irq(ctx->dev, ctx->irq, NULL, sn65dsi83_irq,
+						IRQF_ONESHOT, dev_name(ctx->dev), ctx);
+		if (ret)
+			return dev_err_probe(dev, ret, "failed to request irq\n");
+	}
+
 	dev_set_drvdata(dev, ctx);
 	i2c_set_clientdata(client, ctx);
 
-- 
2.47.1
Re: [PATCH v5 4/4] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Alexander Stein 1 year ago
Hi,

Am Montag, 3. Februar 2025, 17:16:06 CET schrieb Herve Codina:
> In some cases observed during ESD tests, the TI SN65DSI83 cannot recover
> from errors by itself. A full restart of the bridge is needed in those
> cases to have the bridge output LVDS signals again.
> 
> Also, during tests, cases were observed where reading the status of the
> bridge was not even possible. Indeed, in those cases, the bridge stops
> to acknowledge I2C transactions. Only a full reset of the bridge (power
> off/on) brings back the bridge to a functional state.
> 
> The TI SN65DSI83 has some error detection capabilities. Introduce an
> error recovery mechanism based on this detection.
> 
> The errors detected are signaled through an interrupt. On system where
> this interrupt is not available, the driver uses a polling monitoring
> fallback to check for errors. When an error is present or when reading
> the bridge status leads to an I2C failure, the recovery process is
> launched.
> 
> Restarting the bridge needs to redo the initialization sequence. This
> initialization sequence has to be done with the DSI data lanes driven in
> LP11 state. In order to do that, the recovery process resets the whole
> output path (i.e the path from the encoder to the connector) where the
> bridge is located.
> 
> Signed-off-by: Herve Codina <herve.codina@bootlin.com>
> ---

With interrupt configured I got the following stack trace upon
reboot/poweroff:

[   91.317264] sn65dsi83 2-002d: reset the pipe
[   91.344093] Unable to handle ke
** replaying previous printk message **
[   91.344093] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
[   91.344107] Mem abort info:
[   91.344111]   ESR = 0x0000000096000004
[   91.344115]   EC = 0x25: DABT (current EL), IL = 32 bits
[   91.344120]   SET = 0, FnV = 0
[   91.344122]   EA = 0, S1PTW = 0
[   91.344125]   FSC = 0x04: level 0 translation fault
[   91.344128] Data abort info:
[   91.344130]   ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
[   91.344133]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
[   91.344136]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
[   91.344141] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000102eec000
[   91.344145] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000
[   91.344155] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP
[   91.420670] Modules linked in: bluetooth 8021q garp mrp stp llc hid_generic hantro_vpu snd_soc_fsl_asoc_card snd_soc_fsl_sai snd_soc_imx_audmux snd_soc_t
lv320aic32x4_i2c imx_pcm_dma v4l2_vp9 snd_soc_simple_card_utils snd_soc_tlv320aic32x4 snd_soc_fsl_utils v4l2_h264 v4l2_jpeg snd_soc_core videobuf2_dma_conti
g v4l2_mem2mem videobuf2_memops videobuf2_v4l2 snd_pcm_dmaengine videobuf2_common imx8m_ddrc phy_generic snd_pcm ci_hdrc_imx ti_sn65dsi83 ci_hdrc clk_renesa
s_pcie mxsfb snd_timer samsung_dsim usbmisc_imx pwm_imx27 snd soundcore spi_imx imx_sdma imx8mm_thermal panel_simple gpio_aggregator pwm_beeper cfg80211 rfk
ill fuse ipv6
[   91.476291] CPU: 0 UID: 0 PID: 83 Comm: kworker/0:3 Not tainted 6.14.0-rc1-next-20250205+ #2906 da26d4e444ec7c54f82096af1ee2c91e843e9303
[   91.488555] Hardware name: TQ-Systems GmbH i.MX8MM TQMa8MxML on MBa8Mx (DT)
[   91.495517] Workqueue: events sn65dsi83_reset_work [ti_sn65dsi83]
[   91.501623] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[   91.508588] pc : drm_atomic_helper_reset_crtc+0x18/0x100
[   91.513906] lr : sn65dsi83_reset_work+0x80/0x118 [ti_sn65dsi83]
[   91.519832] sp : ffff80008271bcc0
[   91.523146] x29: ffff80008271bcc0 x28: 0000000000000000 x27: 0000000000000000
[   91.530288] x26: ffff0000c0d6e340 x25: 0000000000000000 x24: ffff0000c0022005
[   91.537432] x23: ffff0000c4cb4248 x22: ffff0000ff736e40 x21: ffff80008271bcf8
[   91.544576] x20: 0000000000000000 x19: 0000000000000000 x18: 0000000000000000
[   91.551719] x17: 000000040044ffff x16: 0000000000000000 x15: 00000015425993c0
[   91.558860] x14: 0000000000000001 x13: ffff0000c0ece800 x12: 0000000000000020
[   91.566004] x11: ffff0000c0ece800 x10: 0000000000000af0 x9 : ffff80008271ba70
[   91.573147] x8 : ffff0000c0ecf2d0 x7 : ffff0000c0ece800 x6 : dead000000000122
[   91.580291] x5 : 0000000000000000 x4 : ffff0000c0ece780 x3 : 0000000000000000
[   91.587436] x2 : ffff0000c4cb40b8 x1 : ffff80008271bcf8 x0 : 0000000000000000
[   91.594580] Call trace:
[   91.597027]  drm_atomic_helper_reset_crtc+0x18/0x100 (P)
[   91.602346]  sn65dsi83_reset_work+0x80/0x118 [ti_sn65dsi83 d96dc31a1413a92a7c205a475a2357ddacaa4a3b]
[   91.611485]  process_one_work+0x14c/0x3e0
[   91.615503]  worker_thread+0x2d0/0x3f0
[   91.619257]  kthread+0x110/0x1e0
[   91.622488]  ret_from_fork+0x10/0x20
[   91.626073] Code: a90153f3 aa0003f4 f90013f5 aa0103f5 (f9400000) 
[   91.632167] ---[ end trace 0000000000000000 ]---

Running with workqueue does not raise this error.

Best regards,
Alexander

>  drivers/gpu/drm/bridge/ti-sn65dsi83.c | 131 ++++++++++++++++++++++++++
>  1 file changed, 131 insertions(+)
> 
> diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
> index 336380114eea..26a050b13997 100644
> --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c
> +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
> @@ -35,9 +35,12 @@
>  #include <linux/of_graph.h>
>  #include <linux/regmap.h>
>  #include <linux/regulator/consumer.h>
> +#include <linux/timer.h>
> +#include <linux/workqueue.h>
>  
>  #include <drm/drm_atomic_helper.h>
>  #include <drm/drm_bridge.h>
> +#include <drm/drm_drv.h> /* DRM_MODESET_LOCK_ALL_BEGIN() needs drm_drv_uses_atomic_modeset() */
>  #include <drm/drm_mipi_dsi.h>
>  #include <drm/drm_of.h>
>  #include <drm/drm_panel.h>
> @@ -159,6 +162,9 @@ struct sn65dsi83 {
>  	bool				lvds_dual_link_even_odd_swap;
>  	int				lvds_vod_swing_conf[2];
>  	int				lvds_term_conf[2];
> +	int				irq;
> +	struct delayed_work		monitor_work;
> +	struct work_struct		reset_work;
>  };
>  
>  static const struct regmap_range sn65dsi83_readable_ranges[] = {
> @@ -363,6 +369,95 @@ static u8 sn65dsi83_get_dsi_div(struct sn65dsi83 *ctx)
>  	return dsi_div - 1;
>  }
>  
> +static int sn65dsi83_reset_pipe(struct sn65dsi83 *sn65dsi83)
> +{
> +	struct drm_device *dev = sn65dsi83->bridge.dev;
> +	struct drm_modeset_acquire_ctx ctx;
> +	int err;
> +
> +	/*
> +	 * Reset active outputs of the related CRTC.
> +	 *
> +	 * This way, drm core will reconfigure each components in the CRTC
> +	 * outputs path. In our case, this will force the previous component to
> +	 * go back in LP11 mode and so allow the reconfiguration of SN64DSI83
> +	 * bridge.
> +	 *
> +	 * Keep the lock during the whole operation to be atomic.
> +	 */
> +
> +	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
> +
> +	err = drm_atomic_helper_reset_crtc(sn65dsi83->bridge.encoder->crtc, &ctx);
> +
> +	DRM_MODESET_LOCK_ALL_END(dev, ctx, err);
> +
> +	return err;
> +}
> +
> +static void sn65dsi83_reset_work(struct work_struct *ws)
> +{
> +	struct sn65dsi83 *ctx = container_of(ws, struct sn65dsi83, reset_work);
> +	int ret;
> +
> +	dev_warn(ctx->dev, "reset the pipe\n");
> +
> +	/* Reset the pipe */
> +	ret = sn65dsi83_reset_pipe(ctx);
> +	if (ret) {
> +		dev_err(ctx->dev, "reset pipe failed %pe\n", ERR_PTR(ret));
> +		return;
> +	}
> +	if (ctx->irq)
> +		enable_irq(ctx->irq);
> +}
> +
> +static void sn65dsi83_handle_errors(struct sn65dsi83 *ctx)
> +{
> +	unsigned int irq_stat;
> +	int ret;
> +
> +	/*
> +	 * Schedule a reset in case of:
> +	 *  - the bridge doesn't answer
> +	 *  - the bridge signals an error
> +	 */
> +
> +	ret = regmap_read(ctx->regmap, REG_IRQ_STAT, &irq_stat);
> +	if (ret || irq_stat) {
> +		/*
> +		 * IRQ acknowledged is not always possible (the bridge can be in
> +		 * a state where it doesn't answer anymore). To prevent an
> +		 * interrupt storm, disable interrupt. The interrupt will be
> +		 * after the reset.
> +		 */
> +		if (ctx->irq)
> +			disable_irq_nosync(ctx->irq);
> +
> +		schedule_work(&ctx->reset_work);
> +	}
> +}
> +
> +static void sn65dsi83_monitor_work(struct work_struct *work)
> +{
> +	struct sn65dsi83 *ctx = container_of(to_delayed_work(work),
> +					     struct sn65dsi83, monitor_work);
> +
> +	sn65dsi83_handle_errors(ctx);
> +
> +	schedule_delayed_work(&ctx->monitor_work, msecs_to_jiffies(1000));
> +}
> +
> +static void sn65dsi83_monitor_start(struct sn65dsi83 *ctx)
> +{
> +	schedule_delayed_work(&ctx->monitor_work, msecs_to_jiffies(1000));
> +}
> +
> +static void sn65dsi83_monitor_stop(struct sn65dsi83 *ctx)
> +{
> +	cancel_delayed_work_sync(&ctx->monitor_work);
> +}
> +
>  static void sn65dsi83_atomic_pre_enable(struct drm_bridge *bridge,
>  					struct drm_bridge_state *old_bridge_state)
>  {
> @@ -549,6 +644,15 @@ static void sn65dsi83_atomic_enable(struct drm_bridge *bridge,
>  	regmap_read(ctx->regmap, REG_IRQ_STAT, &pval);
>  	if (pval)
>  		dev_err(ctx->dev, "Unexpected link status 0x%02x\n", pval);
> +
> +	if (ctx->irq) {
> +		/* Enable irq to detect errors */
> +		regmap_write(ctx->regmap, REG_IRQ_GLOBAL, REG_IRQ_GLOBAL_IRQ_EN);
> +		regmap_write(ctx->regmap, REG_IRQ_EN, 0xff);
> +	} else {
> +		/* Use the polling task */
> +		sn65dsi83_monitor_start(ctx);
> +	}
>  }
>  
>  static void sn65dsi83_atomic_disable(struct drm_bridge *bridge,
> @@ -557,6 +661,15 @@ static void sn65dsi83_atomic_disable(struct drm_bridge *bridge,
>  	struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge);
>  	int ret;
>  
> +	if (ctx->irq) {
> +		/* Disable irq */
> +		regmap_write(ctx->regmap, REG_IRQ_EN, 0x0);
> +		regmap_write(ctx->regmap, REG_IRQ_GLOBAL, 0x0);
> +	} else {
> +		/* Stop the polling task */
> +		sn65dsi83_monitor_stop(ctx);
> +	}
> +
>  	/* Put the chip in reset, pull EN line low, and assure 10ms reset low timing. */
>  	gpiod_set_value_cansleep(ctx->enable_gpio, 0);
>  	usleep_range(10000, 11000);
> @@ -806,6 +919,14 @@ static int sn65dsi83_host_attach(struct sn65dsi83 *ctx)
>  	return 0;
>  }
>  
> +static irqreturn_t sn65dsi83_irq(int irq, void *data)
> +{
> +	struct sn65dsi83 *ctx = data;
> +
> +	sn65dsi83_handle_errors(ctx);
> +	return IRQ_HANDLED;
> +}
> +
>  static int sn65dsi83_probe(struct i2c_client *client)
>  {
>  	const struct i2c_device_id *id = i2c_client_get_device_id(client);
> @@ -819,6 +940,8 @@ static int sn65dsi83_probe(struct i2c_client *client)
>  		return -ENOMEM;
>  
>  	ctx->dev = dev;
> +	INIT_WORK(&ctx->reset_work, sn65dsi83_reset_work);
> +	INIT_DELAYED_WORK(&ctx->monitor_work, sn65dsi83_monitor_work);
>  
>  	if (dev->of_node) {
>  		model = (enum sn65dsi83_model)(uintptr_t)
> @@ -843,6 +966,14 @@ static int sn65dsi83_probe(struct i2c_client *client)
>  	if (IS_ERR(ctx->regmap))
>  		return dev_err_probe(dev, PTR_ERR(ctx->regmap), "failed to get regmap\n");
>  
> +	if (client->irq) {
> +		ctx->irq = client->irq;
> +		ret = devm_request_threaded_irq(ctx->dev, ctx->irq, NULL, sn65dsi83_irq,
> +						IRQF_ONESHOT, dev_name(ctx->dev), ctx);
> +		if (ret)
> +			return dev_err_probe(dev, ret, "failed to request irq\n");
> +	}
> +
>  	dev_set_drvdata(dev, ctx);
>  	i2c_set_clientdata(client, ctx);
>  
> 


-- 
TQ-Systems GmbH | Mühlstraße 2, Gut Delling | 82229 Seefeld, Germany
Amtsgericht München, HRB 105018
Geschäftsführer: Detlef Schneider, Rüdiger Stahl, Stefan Schneider
http://www.tq-group.com/
Re: [PATCH v5 4/4] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Herve Codina 1 year ago
Hi Alexander,

On Thu, 06 Feb 2025 15:38:42 +0100
Alexander Stein <alexander.stein@ew.tq-group.com> wrote:

...
> With interrupt configured I got the following stack trace upon
> reboot/poweroff:
> 
> [   91.317264] sn65dsi83 2-002d: reset the pipe
> [   91.344093] Unable to handle ke
> ** replaying previous printk message **
> [   91.344093] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
...

Ouch :(

One question to help me investigating:
Do you have the issue at init/probe or when you start to display graphics?

Best regards,
Hervé
Re: [PATCH v5 4/4] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Alexander Stein 1 year ago
Hi Herve,

Am Donnerstag, 6. Februar 2025, 16:20:48 CET schrieb Herve Codina:
> Hi Alexander,
> 
> On Thu, 06 Feb 2025 15:38:42 +0100
> Alexander Stein <alexander.stein@ew.tq-group.com> wrote:
> 
> ...
> > With interrupt configured I got the following stack trace upon
> > reboot/poweroff:
> > 
> > [   91.317264] sn65dsi83 2-002d: reset the pipe
> > [   91.344093] Unable to handle ke
> > ** replaying previous printk message **
> > [   91.344093] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
> ...
> 
> Ouch :(
> 
> One question to help me investigating:
> Do you have the issue at init/probe or when you start to display graphics?

This is during shutdown/poweroff. I assume that regmap_reg() in
sn65dsi83_handle_errors() fails and because of that reset_work is scheduled.

Best regards,
Alexander
-- 
TQ-Systems GmbH | Mühlstraße 2, Gut Delling | 82229 Seefeld, Germany
Amtsgericht München, HRB 105018
Geschäftsführer: Detlef Schneider, Rüdiger Stahl, Stefan Schneider
http://www.tq-group.com/
Re: [PATCH v5 4/4] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Herve Codina 1 year ago
Hi Alexander,

On Thu, 06 Feb 2025 16:39:09 +0100
Alexander Stein <alexander.stein@ew.tq-group.com> wrote:

> Hi Herve,
> 
> Am Donnerstag, 6. Februar 2025, 16:20:48 CET schrieb Herve Codina:
> > Hi Alexander,
> > 
> > On Thu, 06 Feb 2025 15:38:42 +0100
> > Alexander Stein <alexander.stein@ew.tq-group.com> wrote:
> > 
> > ...  
> > > With interrupt configured I got the following stack trace upon
> > > reboot/poweroff:
> > > 
> > > [   91.317264] sn65dsi83 2-002d: reset the pipe
> > > [   91.344093] Unable to handle ke
> > > ** replaying previous printk message **
> > > [   91.344093] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000  
> > ...
> > 
> > Ouch :(
> > 
> > One question to help me investigating:
> > Do you have the issue at init/probe or when you start to display graphics?  
> 
> This is during shutdown/poweroff. I assume that regmap_reg() in
> sn65dsi83_handle_errors() fails and because of that reset_work is scheduled.
> 

Found the issue.

Can you give me following information so that I can validate what I
understood:
  - Is your interrupt line connected directly to the SoC?
  - Is there any pullup/pulldown on your interrupt line?
  - In your devicetree what is the configuration used for this interrupt in
    terms of level or edge.

Anyway, I will send a fix in the next iteration.

Best regards,
Hervé
Re: [PATCH v5 4/4] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Alexander Stein 12 months ago
Hi Herve,

Am Freitag, 7. Februar 2025, 19:08:16 CET schrieb Herve Codina:
> Hi Alexander,
> 
> On Thu, 06 Feb 2025 16:39:09 +0100
> Alexander Stein <alexander.stein@ew.tq-group.com> wrote:
> 
> > Hi Herve,
> > 
> > Am Donnerstag, 6. Februar 2025, 16:20:48 CET schrieb Herve Codina:
> > > Hi Alexander,
> > > 
> > > On Thu, 06 Feb 2025 15:38:42 +0100
> > > Alexander Stein <alexander.stein@ew.tq-group.com> wrote:
> > > 
> > > ...  
> > > > With interrupt configured I got the following stack trace upon
> > > > reboot/poweroff:
> > > > 
> > > > [   91.317264] sn65dsi83 2-002d: reset the pipe
> > > > [   91.344093] Unable to handle ke
> > > > ** replaying previous printk message **
> > > > [   91.344093] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000  
> > > ...
> > > 
> > > Ouch :(
> > > 
> > > One question to help me investigating:
> > > Do you have the issue at init/probe or when you start to display graphics?  
> > 
> > This is during shutdown/poweroff. I assume that regmap_reg() in
> > sn65dsi83_handle_errors() fails and because of that reset_work is scheduled.
> > 
> 
> Found the issue.
> 
> Can you give me following information so that I can validate what I
> understood:
>   - Is your interrupt line connected directly to the SoC?

No, unfortunately not. It's connected to a GPIO expander, which in turn
delivers an IRQ to the SoC. In between the bridge and the expander is a
buffer for voltage switch (1V8 -> 3V3).

>   - Is there any pullup/pulldown on your interrupt line?

As far a I can tell, there is no pullup/pulldown on the IRQ line to
the buffer.

>   - In your devicetree what is the configuration used for this interrupt in
>     terms of level or edge.

As this line is connected to an expander (pca9555) only edge triggered
interrupts are supported.

> Anyway, I will send a fix in the next iteration.

Thanks for digging into this.
Best regards,
Alexander
-- 
TQ-Systems GmbH | Mühlstraße 2, Gut Delling | 82229 Seefeld, Germany
Amtsgericht München, HRB 105018
Geschäftsführer: Detlef Schneider, Rüdiger Stahl, Stefan Schneider
http://www.tq-group.com/
Re: [PATCH v5 4/4] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Herve Codina 12 months ago
Hi Alexander,

On Mon, 10 Feb 2025 11:42:09 +0100
Alexander Stein <alexander.stein@ew.tq-group.com> wrote:

> Hi Herve,
> 
> Am Freitag, 7. Februar 2025, 19:08:16 CET schrieb Herve Codina:
> > Hi Alexander,
> > 
> > On Thu, 06 Feb 2025 16:39:09 +0100
> > Alexander Stein <alexander.stein@ew.tq-group.com> wrote:
> >   
> > > Hi Herve,
> > > 
> > > Am Donnerstag, 6. Februar 2025, 16:20:48 CET schrieb Herve Codina:  
> > > > Hi Alexander,
> > > > 
> > > > On Thu, 06 Feb 2025 15:38:42 +0100
> > > > Alexander Stein <alexander.stein@ew.tq-group.com> wrote:
> > > > 
> > > > ...    
> > > > > With interrupt configured I got the following stack trace upon
> > > > > reboot/poweroff:
> > > > > 
> > > > > [   91.317264] sn65dsi83 2-002d: reset the pipe
> > > > > [   91.344093] Unable to handle ke
> > > > > ** replaying previous printk message **
> > > > > [   91.344093] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000    
> > > > ...
> > > > 
> > > > Ouch :(
> > > > 
> > > > One question to help me investigating:
> > > > Do you have the issue at init/probe or when you start to display graphics?    
> > > 
> > > This is during shutdown/poweroff. I assume that regmap_reg() in
> > > sn65dsi83_handle_errors() fails and because of that reset_work is scheduled.
> > >   
> > 
> > Found the issue.
> > 
> > Can you give me following information so that I can validate what I
> > understood:
> >   - Is your interrupt line connected directly to the SoC?  
> 
> No, unfortunately not. It's connected to a GPIO expander, which in turn
> delivers an IRQ to the SoC. In between the bridge and the expander is a
> buffer for voltage switch (1V8 -> 3V3).
> 
> >   - Is there any pullup/pulldown on your interrupt line?  
> 
> As far a I can tell, there is no pullup/pulldown on the IRQ line to
> the buffer.
> 
> >   - In your devicetree what is the configuration used for this interrupt in
> >     terms of level or edge.  
> 
> As this line is connected to an expander (pca9555) only edge triggered
> interrupts are supported.
> 
> > Anyway, I will send a fix in the next iteration.  
> 

Thanks for all those details.

I've just sent a new iteration:
  https://lore.kernel.org/all/20250210132620.42263-1-herve.codina@bootlin.com/

Can you test it on your side?

Best regards,
Hervé
Re: [PATCH v5 4/4] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Maxime Ripard 1 year ago
On Mon, 3 Feb 2025 17:16:06 +0100, Herve Codina wrote:
> In some cases observed during ESD tests, the TI SN65DSI83 cannot recover
> from errors by itself. A full restart of the bridge is needed in those
> cases to have the bridge output LVDS signals again.
> 
> Also, during tests, cases were observed where reading the status of the
> 
> [ ... ]

Reviewed-by: Maxime Ripard <mripard@kernel.org>

Thanks!
Maxime