[PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism

Herve Codina posted 2 patches 1 month ago
[PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Herve Codina 1 month ago
In some cases observed during ESD tests, the TI SN65DSI83 cannot recover
from errors by itself. A full restart of the bridge is needed in those
cases to have the bridge output LVDS signals again.

The TI SN65DSI83 has some error detection capabilities. Introduce an
error recovery mechanism based on this detection.

The errors detected are signaled through an interrupt. On system where
this interrupt is not available, the driver uses a polling monitoring
fallback to check for errors. When an error is present, the recovery
process is launched.

Restarting the bridge needs to redo the initialization sequence. This
initialization sequence has to be done with the DSI data lanes driven in
LP11 state. In order to do that, the recovery process resets the entire
pipeline.

Signed-off-by: Herve Codina <herve.codina@bootlin.com>
---
 drivers/gpu/drm/bridge/ti-sn65dsi83.c | 128 ++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)

diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
index 96e829163d87..22975b60e80f 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
@@ -35,9 +35,12 @@
 #include <linux/of_graph.h>
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge.h>
+#include <drm/drm_drv.h> /* DRM_MODESET_LOCK_ALL_BEGIN() need drm_drv_uses_atomic_modeset() */
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
@@ -147,6 +150,9 @@ struct sn65dsi83 {
 	struct regulator		*vcc;
 	bool				lvds_dual_link;
 	bool				lvds_dual_link_even_odd_swap;
+	bool				use_irq;
+	struct delayed_work		monitor_work;
+	struct work_struct		reset_work;
 };
 
 static const struct regmap_range sn65dsi83_readable_ranges[] = {
@@ -321,6 +327,92 @@ static u8 sn65dsi83_get_dsi_div(struct sn65dsi83 *ctx)
 	return dsi_div - 1;
 }
 
+static int sn65dsi83_reset_pipeline(struct sn65dsi83 *sn65dsi83)
+{
+	struct drm_device *dev = sn65dsi83->bridge.dev;
+	struct drm_modeset_acquire_ctx ctx;
+	struct drm_atomic_state *state;
+	int err;
+
+	/* Use operation done in drm_atomic_helper_suspend() followed by
+	 * operation done in drm_atomic_helper_resume() but without releasing
+	 * the lock between suspend()/resume()
+	 */
+
+	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
+
+	state = drm_atomic_helper_duplicate_state(dev, &ctx);
+	if (IS_ERR(state)) {
+		err = PTR_ERR(state);
+		goto unlock;
+	}
+
+	err = drm_atomic_helper_disable_all(dev, &ctx);
+	if (err < 0)
+		goto unlock;
+
+	drm_mode_config_reset(dev);
+
+	err = drm_atomic_helper_commit_duplicated_state(state, &ctx);
+
+unlock:
+	DRM_MODESET_LOCK_ALL_END(dev, ctx, err);
+	if (!IS_ERR(state))
+		drm_atomic_state_put(state);
+	return err;
+}
+
+static void sn65dsi83_reset_work(struct work_struct *ws)
+{
+	struct sn65dsi83 *ctx = container_of(ws, struct sn65dsi83, reset_work);
+	int ret;
+
+	dev_warn(ctx->dev, "reset pipeline\n");
+
+	/* Reset the pipeline */
+	ret = sn65dsi83_reset_pipeline(ctx);
+	if (ret) {
+		dev_err(ctx->dev, "reset pipeline failed %pe\n", ERR_PTR(ret));
+		return;
+	}
+}
+
+static void sn65dsi83_handle_errors(struct sn65dsi83 *ctx)
+{
+	unsigned int irq_stat;
+	int ret;
+
+	/*
+	 * Schedule a reset in case of:
+	 *  - the bridge doesn't answer
+	 *  - the bridge signals an error
+	 */
+
+	ret = regmap_read(ctx->regmap, REG_IRQ_STAT, &irq_stat);
+	if (ret || irq_stat)
+		schedule_work(&ctx->reset_work);
+}
+
+static void sn65dsi83_monitor_work(struct work_struct *work)
+{
+	struct sn65dsi83 *ctx = container_of(to_delayed_work(work),
+					     struct sn65dsi83, monitor_work);
+
+	sn65dsi83_handle_errors(ctx);
+
+	schedule_delayed_work(&ctx->monitor_work, msecs_to_jiffies(1000));
+}
+
+static void sn65dsi83_monitor_start(struct sn65dsi83 *ctx)
+{
+	schedule_delayed_work(&ctx->monitor_work, msecs_to_jiffies(1000));
+}
+
+static void sn65dsi83_monitor_stop(struct sn65dsi83 *ctx)
+{
+	cancel_delayed_work_sync(&ctx->monitor_work);
+}
+
 static void sn65dsi83_atomic_pre_enable(struct drm_bridge *bridge,
 					struct drm_bridge_state *old_bridge_state)
 {
@@ -509,6 +601,15 @@ static void sn65dsi83_atomic_enable(struct drm_bridge *bridge,
 	regmap_read(ctx->regmap, REG_IRQ_STAT, &pval);
 	if (pval)
 		dev_err(ctx->dev, "Unexpected link status 0x%02x\n", pval);
+
+	if (ctx->use_irq) {
+		/* Enable irq to detect errors */
+		regmap_write(ctx->regmap, REG_IRQ_GLOBAL, REG_IRQ_GLOBAL_IRQ_EN);
+		regmap_write(ctx->regmap, REG_IRQ_EN, 0xff);
+	} else {
+		/* Use the polling task */
+		sn65dsi83_monitor_start(ctx);
+	}
 }
 
 static void sn65dsi83_atomic_disable(struct drm_bridge *bridge,
@@ -517,6 +618,15 @@ static void sn65dsi83_atomic_disable(struct drm_bridge *bridge,
 	struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge);
 	int ret;
 
+	if (ctx->use_irq) {
+		/* Disable irq */
+		regmap_write(ctx->regmap, REG_IRQ_EN, 0x0);
+		regmap_write(ctx->regmap, REG_IRQ_GLOBAL, 0x0);
+	} else {
+		/* Stop the polling task */
+		sn65dsi83_monitor_stop(ctx);
+	}
+
 	/* Put the chip in reset, pull EN line low, and assure 10ms reset low timing. */
 	gpiod_set_value_cansleep(ctx->enable_gpio, 0);
 	usleep_range(10000, 11000);
@@ -673,6 +783,14 @@ static int sn65dsi83_host_attach(struct sn65dsi83 *ctx)
 	return 0;
 }
 
+static irqreturn_t sn65dsi83_irq(int irq, void *data)
+{
+	struct sn65dsi83 *ctx = data;
+
+	sn65dsi83_handle_errors(ctx);
+	return IRQ_HANDLED;
+}
+
 static int sn65dsi83_probe(struct i2c_client *client)
 {
 	const struct i2c_device_id *id = i2c_client_get_device_id(client);
@@ -686,6 +804,8 @@ static int sn65dsi83_probe(struct i2c_client *client)
 		return -ENOMEM;
 
 	ctx->dev = dev;
+	INIT_WORK(&ctx->reset_work, sn65dsi83_reset_work);
+	INIT_DELAYED_WORK(&ctx->monitor_work, sn65dsi83_monitor_work);
 
 	if (dev->of_node) {
 		model = (enum sn65dsi83_model)(uintptr_t)
@@ -710,6 +830,14 @@ static int sn65dsi83_probe(struct i2c_client *client)
 	if (IS_ERR(ctx->regmap))
 		return dev_err_probe(dev, PTR_ERR(ctx->regmap), "failed to get regmap\n");
 
+	if (client->irq) {
+		ret = devm_request_threaded_irq(ctx->dev, client->irq, NULL, sn65dsi83_irq,
+						IRQF_ONESHOT, dev_name(ctx->dev), ctx);
+		if (ret)
+			return dev_err_probe(dev, ret, "failed to request irq\n");
+		ctx->use_irq = true;
+	}
+
 	dev_set_drvdata(dev, ctx);
 	i2c_set_clientdata(client, ctx);
 
-- 
2.46.2
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Dan Carpenter 4 weeks ago
Hi Herve,

kernel test robot noticed the following build warnings:

https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Herve-Codina/dt-bindings-display-bridge-sn65dsi83-Add-interrupt/20241024-175758
base:   https://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git for-next
patch link:    https://lore.kernel.org/r/20241024095539.1637280-3-herve.codina%40bootlin.com
patch subject: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
config: csky-randconfig-r073-20241026 (https://download.01.org/0day-ci/archive/20241026/202410262052.CRR7XezU-lkp@intel.com/config)
compiler: csky-linux-gcc (GCC) 14.1.0

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
| Closes: https://lore.kernel.org/r/202410262052.CRR7XezU-lkp@intel.com/

smatch warnings:
drivers/gpu/drm/bridge/ti-sn65dsi83.c:360 sn65dsi83_reset_pipeline() error: uninitialized symbol 'state'.

vim +/state +360 drivers/gpu/drm/bridge/ti-sn65dsi83.c

caeb909b9ed830 Herve Codina 2024-10-24  330  static int sn65dsi83_reset_pipeline(struct sn65dsi83 *sn65dsi83)
caeb909b9ed830 Herve Codina 2024-10-24  331  {
caeb909b9ed830 Herve Codina 2024-10-24  332  	struct drm_device *dev = sn65dsi83->bridge.dev;
caeb909b9ed830 Herve Codina 2024-10-24  333  	struct drm_modeset_acquire_ctx ctx;
caeb909b9ed830 Herve Codina 2024-10-24  334  	struct drm_atomic_state *state;

Almost everyone has their compiler set to zero out stack variables these days.
You should set this to struct drm_atomic_state *state = ERR_PTR(-EINVAL);.

caeb909b9ed830 Herve Codina 2024-10-24  335  	int err;
caeb909b9ed830 Herve Codina 2024-10-24  336  
caeb909b9ed830 Herve Codina 2024-10-24  337  	/* Use operation done in drm_atomic_helper_suspend() followed by
caeb909b9ed830 Herve Codina 2024-10-24  338  	 * operation done in drm_atomic_helper_resume() but without releasing
caeb909b9ed830 Herve Codina 2024-10-24  339  	 * the lock between suspend()/resume()
caeb909b9ed830 Herve Codina 2024-10-24  340  	 */
caeb909b9ed830 Herve Codina 2024-10-24  341  
caeb909b9ed830 Herve Codina 2024-10-24  342  	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);

This macro has a goto in it.

caeb909b9ed830 Herve Codina 2024-10-24  343  
caeb909b9ed830 Herve Codina 2024-10-24  344  	state = drm_atomic_helper_duplicate_state(dev, &ctx);
caeb909b9ed830 Herve Codina 2024-10-24  345  	if (IS_ERR(state)) {
caeb909b9ed830 Herve Codina 2024-10-24  346  		err = PTR_ERR(state);
caeb909b9ed830 Herve Codina 2024-10-24  347  		goto unlock;
caeb909b9ed830 Herve Codina 2024-10-24  348  	}
caeb909b9ed830 Herve Codina 2024-10-24  349  
caeb909b9ed830 Herve Codina 2024-10-24  350  	err = drm_atomic_helper_disable_all(dev, &ctx);
caeb909b9ed830 Herve Codina 2024-10-24  351  	if (err < 0)
caeb909b9ed830 Herve Codina 2024-10-24  352  		goto unlock;
caeb909b9ed830 Herve Codina 2024-10-24  353  
caeb909b9ed830 Herve Codina 2024-10-24  354  	drm_mode_config_reset(dev);
caeb909b9ed830 Herve Codina 2024-10-24  355  
caeb909b9ed830 Herve Codina 2024-10-24  356  	err = drm_atomic_helper_commit_duplicated_state(state, &ctx);
caeb909b9ed830 Herve Codina 2024-10-24  357  
caeb909b9ed830 Herve Codina 2024-10-24  358  unlock:
caeb909b9ed830 Herve Codina 2024-10-24  359  	DRM_MODESET_LOCK_ALL_END(dev, ctx, err);
caeb909b9ed830 Herve Codina 2024-10-24 @360  	if (!IS_ERR(state))
caeb909b9ed830 Herve Codina 2024-10-24  361  		drm_atomic_state_put(state);

Calling drm_atomic_state_put(NULL) leads to an Oops.

caeb909b9ed830 Herve Codina 2024-10-24  362  	return err;
caeb909b9ed830 Herve Codina 2024-10-24  363  }

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Laurent Pinchart 4 weeks, 1 day ago
On Thu, Oct 24, 2024 at 11:55:38AM +0200, Herve Codina wrote:
> In some cases observed during ESD tests, the TI SN65DSI83 cannot recover
> from errors by itself. A full restart of the bridge is needed in those
> cases to have the bridge output LVDS signals again.
> 
> The TI SN65DSI83 has some error detection capabilities. Introduce an
> error recovery mechanism based on this detection.
> 
> The errors detected are signaled through an interrupt. On system where
> this interrupt is not available, the driver uses a polling monitoring
> fallback to check for errors. When an error is present, the recovery
> process is launched.
> 
> Restarting the bridge needs to redo the initialization sequence. This
> initialization sequence has to be done with the DSI data lanes driven in
> LP11 state. In order to do that, the recovery process resets the entire
> pipeline.
> 
> Signed-off-by: Herve Codina <herve.codina@bootlin.com>
> ---
>  drivers/gpu/drm/bridge/ti-sn65dsi83.c | 128 ++++++++++++++++++++++++++
>  1 file changed, 128 insertions(+)
> 
> diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
> index 96e829163d87..22975b60e80f 100644
> --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c
> +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
> @@ -35,9 +35,12 @@
>  #include <linux/of_graph.h>
>  #include <linux/regmap.h>
>  #include <linux/regulator/consumer.h>
> +#include <linux/timer.h>
> +#include <linux/workqueue.h>
>  
>  #include <drm/drm_atomic_helper.h>
>  #include <drm/drm_bridge.h>
> +#include <drm/drm_drv.h> /* DRM_MODESET_LOCK_ALL_BEGIN() need drm_drv_uses_atomic_modeset() */
>  #include <drm/drm_mipi_dsi.h>
>  #include <drm/drm_of.h>
>  #include <drm/drm_panel.h>
> @@ -147,6 +150,9 @@ struct sn65dsi83 {
>  	struct regulator		*vcc;
>  	bool				lvds_dual_link;
>  	bool				lvds_dual_link_even_odd_swap;
> +	bool				use_irq;
> +	struct delayed_work		monitor_work;
> +	struct work_struct		reset_work;
>  };
>  
>  static const struct regmap_range sn65dsi83_readable_ranges[] = {
> @@ -321,6 +327,92 @@ static u8 sn65dsi83_get_dsi_div(struct sn65dsi83 *ctx)
>  	return dsi_div - 1;
>  }
>  
> +static int sn65dsi83_reset_pipeline(struct sn65dsi83 *sn65dsi83)
> +{
> +	struct drm_device *dev = sn65dsi83->bridge.dev;
> +	struct drm_modeset_acquire_ctx ctx;
> +	struct drm_atomic_state *state;
> +	int err;
> +
> +	/* Use operation done in drm_atomic_helper_suspend() followed by
> +	 * operation done in drm_atomic_helper_resume() but without releasing
> +	 * the lock between suspend()/resume()
> +	 */
> +
> +	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
> +
> +	state = drm_atomic_helper_duplicate_state(dev, &ctx);
> +	if (IS_ERR(state)) {
> +		err = PTR_ERR(state);
> +		goto unlock;
> +	}
> +
> +	err = drm_atomic_helper_disable_all(dev, &ctx);
> +	if (err < 0)
> +		goto unlock;
> +
> +	drm_mode_config_reset(dev);
> +
> +	err = drm_atomic_helper_commit_duplicated_state(state, &ctx);

Committing a full atomic state from a bridge driver in an asynchronous
way seems quite uncharted territory, and it worries me. It's also a very
heavyweight, you disable all outputs here, instead of focussing on the
output connected to the bridge. Can you either implement something more
local, resetting the bridge only, or create a core helper to handle this
kind of situation, on a per-output basis ?

> +
> +unlock:
> +	DRM_MODESET_LOCK_ALL_END(dev, ctx, err);
> +	if (!IS_ERR(state))
> +		drm_atomic_state_put(state);
> +	return err;
> +}
> +
> +static void sn65dsi83_reset_work(struct work_struct *ws)
> +{
> +	struct sn65dsi83 *ctx = container_of(ws, struct sn65dsi83, reset_work);
> +	int ret;
> +
> +	dev_warn(ctx->dev, "reset pipeline\n");
> +
> +	/* Reset the pipeline */
> +	ret = sn65dsi83_reset_pipeline(ctx);
> +	if (ret) {
> +		dev_err(ctx->dev, "reset pipeline failed %pe\n", ERR_PTR(ret));
> +		return;
> +	}
> +}
> +
> +static void sn65dsi83_handle_errors(struct sn65dsi83 *ctx)
> +{
> +	unsigned int irq_stat;
> +	int ret;
> +
> +	/*
> +	 * Schedule a reset in case of:
> +	 *  - the bridge doesn't answer
> +	 *  - the bridge signals an error
> +	 */
> +
> +	ret = regmap_read(ctx->regmap, REG_IRQ_STAT, &irq_stat);
> +	if (ret || irq_stat)
> +		schedule_work(&ctx->reset_work);
> +}
> +
> +static void sn65dsi83_monitor_work(struct work_struct *work)
> +{
> +	struct sn65dsi83 *ctx = container_of(to_delayed_work(work),
> +					     struct sn65dsi83, monitor_work);
> +
> +	sn65dsi83_handle_errors(ctx);
> +
> +	schedule_delayed_work(&ctx->monitor_work, msecs_to_jiffies(1000));
> +}
> +
> +static void sn65dsi83_monitor_start(struct sn65dsi83 *ctx)
> +{
> +	schedule_delayed_work(&ctx->monitor_work, msecs_to_jiffies(1000));
> +}
> +
> +static void sn65dsi83_monitor_stop(struct sn65dsi83 *ctx)
> +{
> +	cancel_delayed_work_sync(&ctx->monitor_work);
> +}
> +
>  static void sn65dsi83_atomic_pre_enable(struct drm_bridge *bridge,
>  					struct drm_bridge_state *old_bridge_state)
>  {
> @@ -509,6 +601,15 @@ static void sn65dsi83_atomic_enable(struct drm_bridge *bridge,
>  	regmap_read(ctx->regmap, REG_IRQ_STAT, &pval);
>  	if (pval)
>  		dev_err(ctx->dev, "Unexpected link status 0x%02x\n", pval);
> +
> +	if (ctx->use_irq) {
> +		/* Enable irq to detect errors */
> +		regmap_write(ctx->regmap, REG_IRQ_GLOBAL, REG_IRQ_GLOBAL_IRQ_EN);
> +		regmap_write(ctx->regmap, REG_IRQ_EN, 0xff);
> +	} else {
> +		/* Use the polling task */
> +		sn65dsi83_monitor_start(ctx);
> +	}
>  }
>  
>  static void sn65dsi83_atomic_disable(struct drm_bridge *bridge,
> @@ -517,6 +618,15 @@ static void sn65dsi83_atomic_disable(struct drm_bridge *bridge,
>  	struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge);
>  	int ret;
>  
> +	if (ctx->use_irq) {
> +		/* Disable irq */
> +		regmap_write(ctx->regmap, REG_IRQ_EN, 0x0);
> +		regmap_write(ctx->regmap, REG_IRQ_GLOBAL, 0x0);
> +	} else {
> +		/* Stop the polling task */
> +		sn65dsi83_monitor_stop(ctx);
> +	}
> +
>  	/* Put the chip in reset, pull EN line low, and assure 10ms reset low timing. */
>  	gpiod_set_value_cansleep(ctx->enable_gpio, 0);
>  	usleep_range(10000, 11000);
> @@ -673,6 +783,14 @@ static int sn65dsi83_host_attach(struct sn65dsi83 *ctx)
>  	return 0;
>  }
>  
> +static irqreturn_t sn65dsi83_irq(int irq, void *data)
> +{
> +	struct sn65dsi83 *ctx = data;
> +
> +	sn65dsi83_handle_errors(ctx);
> +	return IRQ_HANDLED;
> +}
> +
>  static int sn65dsi83_probe(struct i2c_client *client)
>  {
>  	const struct i2c_device_id *id = i2c_client_get_device_id(client);
> @@ -686,6 +804,8 @@ static int sn65dsi83_probe(struct i2c_client *client)
>  		return -ENOMEM;
>  
>  	ctx->dev = dev;
> +	INIT_WORK(&ctx->reset_work, sn65dsi83_reset_work);
> +	INIT_DELAYED_WORK(&ctx->monitor_work, sn65dsi83_monitor_work);
>  
>  	if (dev->of_node) {
>  		model = (enum sn65dsi83_model)(uintptr_t)
> @@ -710,6 +830,14 @@ static int sn65dsi83_probe(struct i2c_client *client)
>  	if (IS_ERR(ctx->regmap))
>  		return dev_err_probe(dev, PTR_ERR(ctx->regmap), "failed to get regmap\n");
>  
> +	if (client->irq) {
> +		ret = devm_request_threaded_irq(ctx->dev, client->irq, NULL, sn65dsi83_irq,
> +						IRQF_ONESHOT, dev_name(ctx->dev), ctx);
> +		if (ret)
> +			return dev_err_probe(dev, ret, "failed to request irq\n");
> +		ctx->use_irq = true;
> +	}
> +
>  	dev_set_drvdata(dev, ctx);
>  	i2c_set_clientdata(client, ctx);
>  

-- 
Regards,

Laurent Pinchart
Re:Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Andy Yan 3 weeks, 6 days ago
Hi all,

At 2024-10-28 00:23:50, "Laurent Pinchart" <laurent.pinchart@ideasonboard.com> wrote:
>On Thu, Oct 24, 2024 at 11:55:38AM +0200, Herve Codina wrote:
>> In some cases observed during ESD tests, the TI SN65DSI83 cannot recover
>> from errors by itself. A full restart of the bridge is needed in those
>> cases to have the bridge output LVDS signals again.
>> 
>> The TI SN65DSI83 has some error detection capabilities. Introduce an
>> error recovery mechanism based on this detection.
>> 
>> The errors detected are signaled through an interrupt. On system where
>> this interrupt is not available, the driver uses a polling monitoring
>> fallback to check for errors. When an error is present, the recovery
>> process is launched.
>> 
>> Restarting the bridge needs to redo the initialization sequence. This
>> initialization sequence has to be done with the DSI data lanes driven in
>> LP11 state. In order to do that, the recovery process resets the entire
>> pipeline.
>> 
>> Signed-off-by: Herve Codina <herve.codina@bootlin.com>
>> ---
>>  drivers/gpu/drm/bridge/ti-sn65dsi83.c | 128 ++++++++++++++++++++++++++
>>  1 file changed, 128 insertions(+)
>> 
>> diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
>> index 96e829163d87..22975b60e80f 100644
>> --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c
>> +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
>> @@ -35,9 +35,12 @@
>>  #include <linux/of_graph.h>
>>  #include <linux/regmap.h>
>>  #include <linux/regulator/consumer.h>
>> +#include <linux/timer.h>
>> +#include <linux/workqueue.h>
>>  
>>  #include <drm/drm_atomic_helper.h>
>>  #include <drm/drm_bridge.h>
>> +#include <drm/drm_drv.h> /* DRM_MODESET_LOCK_ALL_BEGIN() need drm_drv_uses_atomic_modeset() */
>>  #include <drm/drm_mipi_dsi.h>
>>  #include <drm/drm_of.h>
>>  #include <drm/drm_panel.h>
>> @@ -147,6 +150,9 @@ struct sn65dsi83 {
>>  	struct regulator		*vcc;
>>  	bool				lvds_dual_link;
>>  	bool				lvds_dual_link_even_odd_swap;
>> +	bool				use_irq;
>> +	struct delayed_work		monitor_work;
>> +	struct work_struct		reset_work;
>>  };
>>  
>>  static const struct regmap_range sn65dsi83_readable_ranges[] = {
>> @@ -321,6 +327,92 @@ static u8 sn65dsi83_get_dsi_div(struct sn65dsi83 *ctx)
>>  	return dsi_div - 1;
>>  }
>>  
>> +static int sn65dsi83_reset_pipeline(struct sn65dsi83 *sn65dsi83)
>> +{
>> +	struct drm_device *dev = sn65dsi83->bridge.dev;
>> +	struct drm_modeset_acquire_ctx ctx;
>> +	struct drm_atomic_state *state;
>> +	int err;
>> +
>> +	/* Use operation done in drm_atomic_helper_suspend() followed by
>> +	 * operation done in drm_atomic_helper_resume() but without releasing
>> +	 * the lock between suspend()/resume()
>> +	 */
>> +
>> +	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
>> +
>> +	state = drm_atomic_helper_duplicate_state(dev, &ctx);
>> +	if (IS_ERR(state)) {
>> +		err = PTR_ERR(state);
>> +		goto unlock;
>> +	}
>> +
>> +	err = drm_atomic_helper_disable_all(dev, &ctx);
>> +	if (err < 0)
>> +		goto unlock;
>> +
>> +	drm_mode_config_reset(dev);
>> +
>> +	err = drm_atomic_helper_commit_duplicated_state(state, &ctx);
>
>Committing a full atomic state from a bridge driver in an asynchronous
>way seems quite uncharted territory, and it worries me. It's also a very
>heavyweight, you disable all outputs here, instead of focussing on the
>output connected to the bridge. Can you either implement something more
>local, resetting the bridge only, or create a core helper to handle this
>kind of situation, on a per-output basis ?

If we could simulate a hotplug(disconnected then connected) event to user space and
let userspace do the disable/enable of the output pipeline,  would things be simpler?


>
>> +
>> +unlock:
>> +	DRM_MODESET_LOCK_ALL_END(dev, ctx, err);
>> +	if (!IS_ERR(state))
>> +		drm_atomic_state_put(state);
>> +	return err;
>> +}
>> +
>> +static void sn65dsi83_reset_work(struct work_struct *ws)
>> +{
>> +	struct sn65dsi83 *ctx = container_of(ws, struct sn65dsi83, reset_work);
>> +	int ret;
>> +
>> +	dev_warn(ctx->dev, "reset pipeline\n");
>> +
>> +	/* Reset the pipeline */
>> +	ret = sn65dsi83_reset_pipeline(ctx);
>> +	if (ret) {
>> +		dev_err(ctx->dev, "reset pipeline failed %pe\n", ERR_PTR(ret));
>> +		return;
>> +	}
>> +}
>> +
>> +static void sn65dsi83_handle_errors(struct sn65dsi83 *ctx)
>> +{
>> +	unsigned int irq_stat;
>> +	int ret;
>> +
>> +	/*
>> +	 * Schedule a reset in case of:
>> +	 *  - the bridge doesn't answer
>> +	 *  - the bridge signals an error
>> +	 */
>> +
>> +	ret = regmap_read(ctx->regmap, REG_IRQ_STAT, &irq_stat);
>> +	if (ret || irq_stat)
>> +		schedule_work(&ctx->reset_work);
>> +}
>> +
>> +static void sn65dsi83_monitor_work(struct work_struct *work)
>> +{
>> +	struct sn65dsi83 *ctx = container_of(to_delayed_work(work),
>> +					     struct sn65dsi83, monitor_work);
>> +
>> +	sn65dsi83_handle_errors(ctx);
>> +
>> +	schedule_delayed_work(&ctx->monitor_work, msecs_to_jiffies(1000));
>> +}
>> +
>> +static void sn65dsi83_monitor_start(struct sn65dsi83 *ctx)
>> +{
>> +	schedule_delayed_work(&ctx->monitor_work, msecs_to_jiffies(1000));
>> +}
>> +
>> +static void sn65dsi83_monitor_stop(struct sn65dsi83 *ctx)
>> +{
>> +	cancel_delayed_work_sync(&ctx->monitor_work);
>> +}
>> +
>>  static void sn65dsi83_atomic_pre_enable(struct drm_bridge *bridge,
>>  					struct drm_bridge_state *old_bridge_state)
>>  {
>> @@ -509,6 +601,15 @@ static void sn65dsi83_atomic_enable(struct drm_bridge *bridge,
>>  	regmap_read(ctx->regmap, REG_IRQ_STAT, &pval);
>>  	if (pval)
>>  		dev_err(ctx->dev, "Unexpected link status 0x%02x\n", pval);
>> +
>> +	if (ctx->use_irq) {
>> +		/* Enable irq to detect errors */
>> +		regmap_write(ctx->regmap, REG_IRQ_GLOBAL, REG_IRQ_GLOBAL_IRQ_EN);
>> +		regmap_write(ctx->regmap, REG_IRQ_EN, 0xff);
>> +	} else {
>> +		/* Use the polling task */
>> +		sn65dsi83_monitor_start(ctx);
>> +	}
>>  }
>>  
>>  static void sn65dsi83_atomic_disable(struct drm_bridge *bridge,
>> @@ -517,6 +618,15 @@ static void sn65dsi83_atomic_disable(struct drm_bridge *bridge,
>>  	struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge);
>>  	int ret;
>>  
>> +	if (ctx->use_irq) {
>> +		/* Disable irq */
>> +		regmap_write(ctx->regmap, REG_IRQ_EN, 0x0);
>> +		regmap_write(ctx->regmap, REG_IRQ_GLOBAL, 0x0);
>> +	} else {
>> +		/* Stop the polling task */
>> +		sn65dsi83_monitor_stop(ctx);
>> +	}
>> +
>>  	/* Put the chip in reset, pull EN line low, and assure 10ms reset low timing. */
>>  	gpiod_set_value_cansleep(ctx->enable_gpio, 0);
>>  	usleep_range(10000, 11000);
>> @@ -673,6 +783,14 @@ static int sn65dsi83_host_attach(struct sn65dsi83 *ctx)
>>  	return 0;
>>  }
>>  
>> +static irqreturn_t sn65dsi83_irq(int irq, void *data)
>> +{
>> +	struct sn65dsi83 *ctx = data;
>> +
>> +	sn65dsi83_handle_errors(ctx);
>> +	return IRQ_HANDLED;
>> +}
>> +
>>  static int sn65dsi83_probe(struct i2c_client *client)
>>  {
>>  	const struct i2c_device_id *id = i2c_client_get_device_id(client);
>> @@ -686,6 +804,8 @@ static int sn65dsi83_probe(struct i2c_client *client)
>>  		return -ENOMEM;
>>  
>>  	ctx->dev = dev;
>> +	INIT_WORK(&ctx->reset_work, sn65dsi83_reset_work);
>> +	INIT_DELAYED_WORK(&ctx->monitor_work, sn65dsi83_monitor_work);
>>  
>>  	if (dev->of_node) {
>>  		model = (enum sn65dsi83_model)(uintptr_t)
>> @@ -710,6 +830,14 @@ static int sn65dsi83_probe(struct i2c_client *client)
>>  	if (IS_ERR(ctx->regmap))
>>  		return dev_err_probe(dev, PTR_ERR(ctx->regmap), "failed to get regmap\n");
>>  
>> +	if (client->irq) {
>> +		ret = devm_request_threaded_irq(ctx->dev, client->irq, NULL, sn65dsi83_irq,
>> +						IRQF_ONESHOT, dev_name(ctx->dev), ctx);
>> +		if (ret)
>> +			return dev_err_probe(dev, ret, "failed to request irq\n");
>> +		ctx->use_irq = true;
>> +	}
>> +
>>  	dev_set_drvdata(dev, ctx);
>>  	i2c_set_clientdata(client, ctx);
>>  
>
>-- 
>Regards,
>
>Laurent Pinchart
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Maxime Ripard 3 weeks, 6 days ago
On Tue, Oct 29, 2024 at 04:02:33PM +0800, Andy Yan wrote:
> At 2024-10-28 00:23:50, "Laurent Pinchart" <laurent.pinchart@ideasonboard.com> wrote:
> >On Thu, Oct 24, 2024 at 11:55:38AM +0200, Herve Codina wrote:
> >> In some cases observed during ESD tests, the TI SN65DSI83 cannot recover
> >> from errors by itself. A full restart of the bridge is needed in those
> >> cases to have the bridge output LVDS signals again.
> >> 
> >> The TI SN65DSI83 has some error detection capabilities. Introduce an
> >> error recovery mechanism based on this detection.
> >> 
> >> The errors detected are signaled through an interrupt. On system where
> >> this interrupt is not available, the driver uses a polling monitoring
> >> fallback to check for errors. When an error is present, the recovery
> >> process is launched.
> >> 
> >> Restarting the bridge needs to redo the initialization sequence. This
> >> initialization sequence has to be done with the DSI data lanes driven in
> >> LP11 state. In order to do that, the recovery process resets the entire
> >> pipeline.
> >> 
> >> Signed-off-by: Herve Codina <herve.codina@bootlin.com>
> >> ---
> >>  drivers/gpu/drm/bridge/ti-sn65dsi83.c | 128 ++++++++++++++++++++++++++
> >>  1 file changed, 128 insertions(+)
> >> 
> >> diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
> >> index 96e829163d87..22975b60e80f 100644
> >> --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c
> >> +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
> >> @@ -35,9 +35,12 @@
> >>  #include <linux/of_graph.h>
> >>  #include <linux/regmap.h>
> >>  #include <linux/regulator/consumer.h>
> >> +#include <linux/timer.h>
> >> +#include <linux/workqueue.h>
> >>  
> >>  #include <drm/drm_atomic_helper.h>
> >>  #include <drm/drm_bridge.h>
> >> +#include <drm/drm_drv.h> /* DRM_MODESET_LOCK_ALL_BEGIN() need drm_drv_uses_atomic_modeset() */
> >>  #include <drm/drm_mipi_dsi.h>
> >>  #include <drm/drm_of.h>
> >>  #include <drm/drm_panel.h>
> >> @@ -147,6 +150,9 @@ struct sn65dsi83 {
> >>  	struct regulator		*vcc;
> >>  	bool				lvds_dual_link;
> >>  	bool				lvds_dual_link_even_odd_swap;
> >> +	bool				use_irq;
> >> +	struct delayed_work		monitor_work;
> >> +	struct work_struct		reset_work;
> >>  };
> >>  
> >>  static const struct regmap_range sn65dsi83_readable_ranges[] = {
> >> @@ -321,6 +327,92 @@ static u8 sn65dsi83_get_dsi_div(struct sn65dsi83 *ctx)
> >>  	return dsi_div - 1;
> >>  }
> >>  
> >> +static int sn65dsi83_reset_pipeline(struct sn65dsi83 *sn65dsi83)
> >> +{
> >> +	struct drm_device *dev = sn65dsi83->bridge.dev;
> >> +	struct drm_modeset_acquire_ctx ctx;
> >> +	struct drm_atomic_state *state;
> >> +	int err;
> >> +
> >> +	/* Use operation done in drm_atomic_helper_suspend() followed by
> >> +	 * operation done in drm_atomic_helper_resume() but without releasing
> >> +	 * the lock between suspend()/resume()
> >> +	 */
> >> +
> >> +	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
> >> +
> >> +	state = drm_atomic_helper_duplicate_state(dev, &ctx);
> >> +	if (IS_ERR(state)) {
> >> +		err = PTR_ERR(state);
> >> +		goto unlock;
> >> +	}
> >> +
> >> +	err = drm_atomic_helper_disable_all(dev, &ctx);
> >> +	if (err < 0)
> >> +		goto unlock;
> >> +
> >> +	drm_mode_config_reset(dev);
> >> +
> >> +	err = drm_atomic_helper_commit_duplicated_state(state, &ctx);
> >
> >Committing a full atomic state from a bridge driver in an asynchronous
> >way seems quite uncharted territory, and it worries me. It's also a very
> >heavyweight, you disable all outputs here, instead of focussing on the
> >output connected to the bridge. Can you either implement something more
> >local, resetting the bridge only, or create a core helper to handle this
> >kind of situation, on a per-output basis ?
> 
> If we could simulate a hotplug(disconnected then connected) event to
> user space and let userspace do the disable/enable of the output
> pipeline, would things be simpler?

No, because you can't expect the userspace to handle that event in the
first place.

Maxime
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Maxime Ripard 4 weeks ago
On Sun, Oct 27, 2024 at 06:23:50PM +0200, Laurent Pinchart wrote:
> On Thu, Oct 24, 2024 at 11:55:38AM +0200, Herve Codina wrote:
> > In some cases observed during ESD tests, the TI SN65DSI83 cannot recover
> > from errors by itself. A full restart of the bridge is needed in those
> > cases to have the bridge output LVDS signals again.
> > 
> > The TI SN65DSI83 has some error detection capabilities. Introduce an
> > error recovery mechanism based on this detection.
> > 
> > The errors detected are signaled through an interrupt. On system where
> > this interrupt is not available, the driver uses a polling monitoring
> > fallback to check for errors. When an error is present, the recovery
> > process is launched.
> > 
> > Restarting the bridge needs to redo the initialization sequence. This
> > initialization sequence has to be done with the DSI data lanes driven in
> > LP11 state. In order to do that, the recovery process resets the entire
> > pipeline.
> > 
> > Signed-off-by: Herve Codina <herve.codina@bootlin.com>
> > ---
> >  drivers/gpu/drm/bridge/ti-sn65dsi83.c | 128 ++++++++++++++++++++++++++
> >  1 file changed, 128 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
> > index 96e829163d87..22975b60e80f 100644
> > --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c
> > +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
> > @@ -35,9 +35,12 @@
> >  #include <linux/of_graph.h>
> >  #include <linux/regmap.h>
> >  #include <linux/regulator/consumer.h>
> > +#include <linux/timer.h>
> > +#include <linux/workqueue.h>
> >  
> >  #include <drm/drm_atomic_helper.h>
> >  #include <drm/drm_bridge.h>
> > +#include <drm/drm_drv.h> /* DRM_MODESET_LOCK_ALL_BEGIN() need drm_drv_uses_atomic_modeset() */
> >  #include <drm/drm_mipi_dsi.h>
> >  #include <drm/drm_of.h>
> >  #include <drm/drm_panel.h>
> > @@ -147,6 +150,9 @@ struct sn65dsi83 {
> >  	struct regulator		*vcc;
> >  	bool				lvds_dual_link;
> >  	bool				lvds_dual_link_even_odd_swap;
> > +	bool				use_irq;
> > +	struct delayed_work		monitor_work;
> > +	struct work_struct		reset_work;
> >  };
> >  
> >  static const struct regmap_range sn65dsi83_readable_ranges[] = {
> > @@ -321,6 +327,92 @@ static u8 sn65dsi83_get_dsi_div(struct sn65dsi83 *ctx)
> >  	return dsi_div - 1;
> >  }
> >  
> > +static int sn65dsi83_reset_pipeline(struct sn65dsi83 *sn65dsi83)
> > +{
> > +	struct drm_device *dev = sn65dsi83->bridge.dev;
> > +	struct drm_modeset_acquire_ctx ctx;
> > +	struct drm_atomic_state *state;
> > +	int err;
> > +
> > +	/* Use operation done in drm_atomic_helper_suspend() followed by
> > +	 * operation done in drm_atomic_helper_resume() but without releasing
> > +	 * the lock between suspend()/resume()
> > +	 */
> > +
> > +	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
> > +
> > +	state = drm_atomic_helper_duplicate_state(dev, &ctx);
> > +	if (IS_ERR(state)) {
> > +		err = PTR_ERR(state);
> > +		goto unlock;
> > +	}
> > +
> > +	err = drm_atomic_helper_disable_all(dev, &ctx);
> > +	if (err < 0)
> > +		goto unlock;
> > +
> > +	drm_mode_config_reset(dev);
> > +
> > +	err = drm_atomic_helper_commit_duplicated_state(state, &ctx);
> 
> Committing a full atomic state from a bridge driver in an asynchronous
> way seems quite uncharted territory, and it worries me. It's also a very
> heavyweight, you disable all outputs here, instead of focussing on the
> output connected to the bridge. Can you either implement something more
> local, resetting the bridge only, or create a core helper to handle this
> kind of situation, on a per-output basis ?

I think you can't just shut down the bridge and restart it, since some
require particular power sequences that will only occur if you also shut
down the upstream controller.

So I think we'd need to tear down the CRTC, connector and everything
in between.

I do agree that it needs to be a generic helper though. In fact, it
looks awfully similar to what vc4 and i915 are doing in reset_pipe and
and intel_modeset_commit_pipes, respectively. It looks like a good
opportunity to make it a helper.

Maxime
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Herve Codina 4 weeks ago
Hi Laurent,

On Sun, 27 Oct 2024 18:23:50 +0200
Laurent Pinchart <laurent.pinchart@ideasonboard.com> wrote:

[...]
> > +static int sn65dsi83_reset_pipeline(struct sn65dsi83 *sn65dsi83)
> > +{
> > +	struct drm_device *dev = sn65dsi83->bridge.dev;
> > +	struct drm_modeset_acquire_ctx ctx;
> > +	struct drm_atomic_state *state;
> > +	int err;
> > +
> > +	/* Use operation done in drm_atomic_helper_suspend() followed by
> > +	 * operation done in drm_atomic_helper_resume() but without releasing
> > +	 * the lock between suspend()/resume()
> > +	 */
> > +
> > +	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
> > +
> > +	state = drm_atomic_helper_duplicate_state(dev, &ctx);
> > +	if (IS_ERR(state)) {
> > +		err = PTR_ERR(state);
> > +		goto unlock;
> > +	}
> > +
> > +	err = drm_atomic_helper_disable_all(dev, &ctx);
> > +	if (err < 0)
> > +		goto unlock;
> > +
> > +	drm_mode_config_reset(dev);
> > +
> > +	err = drm_atomic_helper_commit_duplicated_state(state, &ctx);  
> 
> Committing a full atomic state from a bridge driver in an asynchronous
> way seems quite uncharted territory, and it worries me. It's also a very
> heavyweight, you disable all outputs here, instead of focussing on the
> output connected to the bridge. Can you either implement something more
> local, resetting the bridge only, or create a core helper to handle this
> kind of situation, on a per-output basis ?

A full restart of the bridge (power off/on) is needed and so we need to
redo the initialization sequence. This initialization sequence has to be
done with the DSI data lanes (bridge inputs) driven in LP11 state and so
without any video stream. Only focussing on bridge outputs will not be
sufficient. That's why I brought the pipeline down and restarted it.

Of course, I can copy/paste sn65dsi83_reset_pipeline() to a core helper
function. Is drm_atomic_helper_reset_all() could be a good candidate?

Best regards,
Hervé
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Laurent Pinchart 4 weeks ago
On Mon, Oct 28, 2024 at 09:13:31AM +0100, Herve Codina wrote:
> Hi Laurent,
> 
> On Sun, 27 Oct 2024 18:23:50 +0200
> Laurent Pinchart <laurent.pinchart@ideasonboard.com> wrote:
> 
> [...]
> > > +static int sn65dsi83_reset_pipeline(struct sn65dsi83 *sn65dsi83)
> > > +{
> > > +	struct drm_device *dev = sn65dsi83->bridge.dev;
> > > +	struct drm_modeset_acquire_ctx ctx;
> > > +	struct drm_atomic_state *state;
> > > +	int err;
> > > +
> > > +	/* Use operation done in drm_atomic_helper_suspend() followed by
> > > +	 * operation done in drm_atomic_helper_resume() but without releasing
> > > +	 * the lock between suspend()/resume()
> > > +	 */
> > > +
> > > +	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
> > > +
> > > +	state = drm_atomic_helper_duplicate_state(dev, &ctx);
> > > +	if (IS_ERR(state)) {
> > > +		err = PTR_ERR(state);
> > > +		goto unlock;
> > > +	}
> > > +
> > > +	err = drm_atomic_helper_disable_all(dev, &ctx);
> > > +	if (err < 0)
> > > +		goto unlock;
> > > +
> > > +	drm_mode_config_reset(dev);
> > > +
> > > +	err = drm_atomic_helper_commit_duplicated_state(state, &ctx);  
> > 
> > Committing a full atomic state from a bridge driver in an asynchronous
> > way seems quite uncharted territory, and it worries me. It's also a very
> > heavyweight, you disable all outputs here, instead of focussing on the
> > output connected to the bridge. Can you either implement something more
> > local, resetting the bridge only, or create a core helper to handle this
> > kind of situation, on a per-output basis ?
> 
> A full restart of the bridge (power off/on) is needed and so we need to
> redo the initialization sequence. This initialization sequence has to be
> done with the DSI data lanes (bridge inputs) driven in LP11 state and so
> without any video stream. Only focussing on bridge outputs will not be
> sufficient. That's why I brought the pipeline down and restarted it.

Fair point.

> Of course, I can copy/paste sn65dsi83_reset_pipeline() to a core helper
> function. Is drm_atomic_helper_reset_all() could be a good candidate?

The helper should operate on a single output, unrelated outputs should
not be affected.

-- 
Regards,

Laurent Pinchart
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Maxime Ripard 4 weeks ago
On Mon, Oct 28, 2024 at 01:28:57PM +0200, Laurent Pinchart wrote:
> On Mon, Oct 28, 2024 at 09:13:31AM +0100, Herve Codina wrote:
> > Hi Laurent,
> > 
> > On Sun, 27 Oct 2024 18:23:50 +0200
> > Laurent Pinchart <laurent.pinchart@ideasonboard.com> wrote:
> > 
> > [...]
> > > > +static int sn65dsi83_reset_pipeline(struct sn65dsi83 *sn65dsi83)
> > > > +{
> > > > +	struct drm_device *dev = sn65dsi83->bridge.dev;
> > > > +	struct drm_modeset_acquire_ctx ctx;
> > > > +	struct drm_atomic_state *state;
> > > > +	int err;
> > > > +
> > > > +	/* Use operation done in drm_atomic_helper_suspend() followed by
> > > > +	 * operation done in drm_atomic_helper_resume() but without releasing
> > > > +	 * the lock between suspend()/resume()
> > > > +	 */
> > > > +
> > > > +	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
> > > > +
> > > > +	state = drm_atomic_helper_duplicate_state(dev, &ctx);
> > > > +	if (IS_ERR(state)) {
> > > > +		err = PTR_ERR(state);
> > > > +		goto unlock;
> > > > +	}
> > > > +
> > > > +	err = drm_atomic_helper_disable_all(dev, &ctx);
> > > > +	if (err < 0)
> > > > +		goto unlock;
> > > > +
> > > > +	drm_mode_config_reset(dev);
> > > > +
> > > > +	err = drm_atomic_helper_commit_duplicated_state(state, &ctx);  
> > > 
> > > Committing a full atomic state from a bridge driver in an asynchronous
> > > way seems quite uncharted territory, and it worries me. It's also a very
> > > heavyweight, you disable all outputs here, instead of focussing on the
> > > output connected to the bridge. Can you either implement something more
> > > local, resetting the bridge only, or create a core helper to handle this
> > > kind of situation, on a per-output basis ?
> > 
> > A full restart of the bridge (power off/on) is needed and so we need to
> > redo the initialization sequence. This initialization sequence has to be
> > done with the DSI data lanes (bridge inputs) driven in LP11 state and so
> > without any video stream. Only focussing on bridge outputs will not be
> > sufficient. That's why I brought the pipeline down and restarted it.
> 
> Fair point.
> 
> > Of course, I can copy/paste sn65dsi83_reset_pipeline() to a core helper
> > function. Is drm_atomic_helper_reset_all() could be a good candidate?
> 
> The helper should operate on a single output, unrelated outputs should
> not be affected.

Also, you don't want to reset anything, you just want the last commit to
be replayed.

Maxime
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Laurent Pinchart 4 weeks ago
On Mon, Oct 28, 2024 at 01:21:45PM +0100, Maxime Ripard wrote:
> On Mon, Oct 28, 2024 at 01:28:57PM +0200, Laurent Pinchart wrote:
> > On Mon, Oct 28, 2024 at 09:13:31AM +0100, Herve Codina wrote:
> > > On Sun, 27 Oct 2024 18:23:50 +0200 Laurent Pinchart wrote:
> > > 
> > > [...]
> > > > > +static int sn65dsi83_reset_pipeline(struct sn65dsi83 *sn65dsi83)
> > > > > +{
> > > > > +	struct drm_device *dev = sn65dsi83->bridge.dev;
> > > > > +	struct drm_modeset_acquire_ctx ctx;
> > > > > +	struct drm_atomic_state *state;
> > > > > +	int err;
> > > > > +
> > > > > +	/* Use operation done in drm_atomic_helper_suspend() followed by
> > > > > +	 * operation done in drm_atomic_helper_resume() but without releasing
> > > > > +	 * the lock between suspend()/resume()
> > > > > +	 */
> > > > > +
> > > > > +	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
> > > > > +
> > > > > +	state = drm_atomic_helper_duplicate_state(dev, &ctx);
> > > > > +	if (IS_ERR(state)) {
> > > > > +		err = PTR_ERR(state);
> > > > > +		goto unlock;
> > > > > +	}
> > > > > +
> > > > > +	err = drm_atomic_helper_disable_all(dev, &ctx);
> > > > > +	if (err < 0)
> > > > > +		goto unlock;
> > > > > +
> > > > > +	drm_mode_config_reset(dev);
> > > > > +
> > > > > +	err = drm_atomic_helper_commit_duplicated_state(state, &ctx);  
> > > > 
> > > > Committing a full atomic state from a bridge driver in an asynchronous
> > > > way seems quite uncharted territory, and it worries me. It's also a very
> > > > heavyweight, you disable all outputs here, instead of focussing on the
> > > > output connected to the bridge. Can you either implement something more
> > > > local, resetting the bridge only, or create a core helper to handle this
> > > > kind of situation, on a per-output basis ?
> > > 
> > > A full restart of the bridge (power off/on) is needed and so we need to
> > > redo the initialization sequence. This initialization sequence has to be
> > > done with the DSI data lanes (bridge inputs) driven in LP11 state and so
> > > without any video stream. Only focussing on bridge outputs will not be
> > > sufficient. That's why I brought the pipeline down and restarted it.
> > 
> > Fair point.
> > 
> > > Of course, I can copy/paste sn65dsi83_reset_pipeline() to a core helper
> > > function. Is drm_atomic_helper_reset_all() could be a good candidate?
> > 
> > The helper should operate on a single output, unrelated outputs should
> > not be affected.
> 
> Also, you don't want to reset anything, you just want the last commit to
> be replayed.

I'm not sure about that. If the last commit is just a page flip, that
won't help, will it ?

-- 
Regards,

Laurent Pinchart
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Maxime Ripard 4 weeks ago
On Mon, Oct 28, 2024 at 03:28:58PM +0200, Laurent Pinchart wrote:
> On Mon, Oct 28, 2024 at 01:21:45PM +0100, Maxime Ripard wrote:
> > On Mon, Oct 28, 2024 at 01:28:57PM +0200, Laurent Pinchart wrote:
> > > On Mon, Oct 28, 2024 at 09:13:31AM +0100, Herve Codina wrote:
> > > > On Sun, 27 Oct 2024 18:23:50 +0200 Laurent Pinchart wrote:
> > > > 
> > > > [...]
> > > > > > +static int sn65dsi83_reset_pipeline(struct sn65dsi83 *sn65dsi83)
> > > > > > +{
> > > > > > +	struct drm_device *dev = sn65dsi83->bridge.dev;
> > > > > > +	struct drm_modeset_acquire_ctx ctx;
> > > > > > +	struct drm_atomic_state *state;
> > > > > > +	int err;
> > > > > > +
> > > > > > +	/* Use operation done in drm_atomic_helper_suspend() followed by
> > > > > > +	 * operation done in drm_atomic_helper_resume() but without releasing
> > > > > > +	 * the lock between suspend()/resume()
> > > > > > +	 */
> > > > > > +
> > > > > > +	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
> > > > > > +
> > > > > > +	state = drm_atomic_helper_duplicate_state(dev, &ctx);
> > > > > > +	if (IS_ERR(state)) {
> > > > > > +		err = PTR_ERR(state);
> > > > > > +		goto unlock;
> > > > > > +	}
> > > > > > +
> > > > > > +	err = drm_atomic_helper_disable_all(dev, &ctx);
> > > > > > +	if (err < 0)
> > > > > > +		goto unlock;
> > > > > > +
> > > > > > +	drm_mode_config_reset(dev);
> > > > > > +
> > > > > > +	err = drm_atomic_helper_commit_duplicated_state(state, &ctx);  
> > > > > 
> > > > > Committing a full atomic state from a bridge driver in an asynchronous
> > > > > way seems quite uncharted territory, and it worries me. It's also a very
> > > > > heavyweight, you disable all outputs here, instead of focussing on the
> > > > > output connected to the bridge. Can you either implement something more
> > > > > local, resetting the bridge only, or create a core helper to handle this
> > > > > kind of situation, on a per-output basis ?
> > > > 
> > > > A full restart of the bridge (power off/on) is needed and so we need to
> > > > redo the initialization sequence. This initialization sequence has to be
> > > > done with the DSI data lanes (bridge inputs) driven in LP11 state and so
> > > > without any video stream. Only focussing on bridge outputs will not be
> > > > sufficient. That's why I brought the pipeline down and restarted it.
> > > 
> > > Fair point.
> > > 
> > > > Of course, I can copy/paste sn65dsi83_reset_pipeline() to a core helper
> > > > function. Is drm_atomic_helper_reset_all() could be a good candidate?
> > > 
> > > The helper should operate on a single output, unrelated outputs should
> > > not be affected.
> > 
> > Also, you don't want to reset anything, you just want the last commit to
> > be replayed.
> 
> I'm not sure about that. If the last commit is just a page flip, that
> won't help, will it ?

The alternative would be that you start anew with a blank state, which
effectively drops every configuration that has been done by userspace.
This is terrible.

And a page flip wouldn't have affected the connector and
connector->state would still be to the last state that affected it, so
it would work.

Maxime
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Laurent Pinchart 4 weeks ago
On Mon, Oct 28, 2024 at 02:55:47PM +0100, Maxime Ripard wrote:
> On Mon, Oct 28, 2024 at 03:28:58PM +0200, Laurent Pinchart wrote:
> > On Mon, Oct 28, 2024 at 01:21:45PM +0100, Maxime Ripard wrote:
> > > On Mon, Oct 28, 2024 at 01:28:57PM +0200, Laurent Pinchart wrote:
> > > > On Mon, Oct 28, 2024 at 09:13:31AM +0100, Herve Codina wrote:
> > > > > On Sun, 27 Oct 2024 18:23:50 +0200 Laurent Pinchart wrote:
> > > > > 
> > > > > [...]
> > > > > > > +static int sn65dsi83_reset_pipeline(struct sn65dsi83 *sn65dsi83)
> > > > > > > +{
> > > > > > > +	struct drm_device *dev = sn65dsi83->bridge.dev;
> > > > > > > +	struct drm_modeset_acquire_ctx ctx;
> > > > > > > +	struct drm_atomic_state *state;
> > > > > > > +	int err;
> > > > > > > +
> > > > > > > +	/* Use operation done in drm_atomic_helper_suspend() followed by
> > > > > > > +	 * operation done in drm_atomic_helper_resume() but without releasing
> > > > > > > +	 * the lock between suspend()/resume()
> > > > > > > +	 */
> > > > > > > +
> > > > > > > +	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
> > > > > > > +
> > > > > > > +	state = drm_atomic_helper_duplicate_state(dev, &ctx);
> > > > > > > +	if (IS_ERR(state)) {
> > > > > > > +		err = PTR_ERR(state);
> > > > > > > +		goto unlock;
> > > > > > > +	}
> > > > > > > +
> > > > > > > +	err = drm_atomic_helper_disable_all(dev, &ctx);
> > > > > > > +	if (err < 0)
> > > > > > > +		goto unlock;
> > > > > > > +
> > > > > > > +	drm_mode_config_reset(dev);
> > > > > > > +
> > > > > > > +	err = drm_atomic_helper_commit_duplicated_state(state, &ctx);  
> > > > > > 
> > > > > > Committing a full atomic state from a bridge driver in an asynchronous
> > > > > > way seems quite uncharted territory, and it worries me. It's also a very
> > > > > > heavyweight, you disable all outputs here, instead of focussing on the
> > > > > > output connected to the bridge. Can you either implement something more
> > > > > > local, resetting the bridge only, or create a core helper to handle this
> > > > > > kind of situation, on a per-output basis ?
> > > > > 
> > > > > A full restart of the bridge (power off/on) is needed and so we need to
> > > > > redo the initialization sequence. This initialization sequence has to be
> > > > > done with the DSI data lanes (bridge inputs) driven in LP11 state and so
> > > > > without any video stream. Only focussing on bridge outputs will not be
> > > > > sufficient. That's why I brought the pipeline down and restarted it.
> > > > 
> > > > Fair point.
> > > > 
> > > > > Of course, I can copy/paste sn65dsi83_reset_pipeline() to a core helper
> > > > > function. Is drm_atomic_helper_reset_all() could be a good candidate?
> > > > 
> > > > The helper should operate on a single output, unrelated outputs should
> > > > not be affected.
> > > 
> > > Also, you don't want to reset anything, you just want the last commit to
> > > be replayed.
> > 
> > I'm not sure about that. If the last commit is just a page flip, that
> > won't help, will it ?
> 
> The alternative would be that you start anew with a blank state, which
> effectively drops every configuration that has been done by userspace.
> This is terrible.
> 
> And a page flip wouldn't have affected the connector and
> connector->state would still be to the last state that affected it, so
> it would work.

Ah right, you didn't mean replaying the last commit then, but first
disabling the output and then restoring the current state ? That should
work.

-- 
Regards,

Laurent Pinchart
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Herve Codina 2 weeks, 6 days ago
Hi Maxime, Laurent,

On Mon, 28 Oct 2024 16:09:13 +0200
Laurent Pinchart <laurent.pinchart@ideasonboard.com> wrote:

> On Mon, Oct 28, 2024 at 02:55:47PM +0100, Maxime Ripard wrote:
> > On Mon, Oct 28, 2024 at 03:28:58PM +0200, Laurent Pinchart wrote:  
> > > On Mon, Oct 28, 2024 at 01:21:45PM +0100, Maxime Ripard wrote:  
> > > > On Mon, Oct 28, 2024 at 01:28:57PM +0200, Laurent Pinchart wrote:  
> > > > > On Mon, Oct 28, 2024 at 09:13:31AM +0100, Herve Codina wrote:  
> > > > > > On Sun, 27 Oct 2024 18:23:50 +0200 Laurent Pinchart wrote:
> > > > > > 
> > > > > > [...]  
> > > > > > > > +static int sn65dsi83_reset_pipeline(struct sn65dsi83 *sn65dsi83)
> > > > > > > > +{
> > > > > > > > +	struct drm_device *dev = sn65dsi83->bridge.dev;
> > > > > > > > +	struct drm_modeset_acquire_ctx ctx;
> > > > > > > > +	struct drm_atomic_state *state;
> > > > > > > > +	int err;
> > > > > > > > +
> > > > > > > > +	/* Use operation done in drm_atomic_helper_suspend() followed by
> > > > > > > > +	 * operation done in drm_atomic_helper_resume() but without releasing
> > > > > > > > +	 * the lock between suspend()/resume()
> > > > > > > > +	 */
> > > > > > > > +
> > > > > > > > +	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
> > > > > > > > +
> > > > > > > > +	state = drm_atomic_helper_duplicate_state(dev, &ctx);
> > > > > > > > +	if (IS_ERR(state)) {
> > > > > > > > +		err = PTR_ERR(state);
> > > > > > > > +		goto unlock;
> > > > > > > > +	}
> > > > > > > > +
> > > > > > > > +	err = drm_atomic_helper_disable_all(dev, &ctx);
> > > > > > > > +	if (err < 0)
> > > > > > > > +		goto unlock;
> > > > > > > > +
> > > > > > > > +	drm_mode_config_reset(dev);
> > > > > > > > +
> > > > > > > > +	err = drm_atomic_helper_commit_duplicated_state(state, &ctx);    
> > > > > > > 
> > > > > > > Committing a full atomic state from a bridge driver in an asynchronous
> > > > > > > way seems quite uncharted territory, and it worries me. It's also a very
> > > > > > > heavyweight, you disable all outputs here, instead of focussing on the
> > > > > > > output connected to the bridge. Can you either implement something more
> > > > > > > local, resetting the bridge only, or create a core helper to handle this
> > > > > > > kind of situation, on a per-output basis ?  
> > > > > > 
> > > > > > A full restart of the bridge (power off/on) is needed and so we need to
> > > > > > redo the initialization sequence. This initialization sequence has to be
> > > > > > done with the DSI data lanes (bridge inputs) driven in LP11 state and so
> > > > > > without any video stream. Only focussing on bridge outputs will not be
> > > > > > sufficient. That's why I brought the pipeline down and restarted it.  
> > > > > 
> > > > > Fair point.
> > > > >   
> > > > > > Of course, I can copy/paste sn65dsi83_reset_pipeline() to a core helper
> > > > > > function. Is drm_atomic_helper_reset_all() could be a good candidate?  
> > > > > 
> > > > > The helper should operate on a single output, unrelated outputs should
> > > > > not be affected.  
> > > > 
> > > > Also, you don't want to reset anything, you just want the last commit to
> > > > be replayed.  
> > > 
> > > I'm not sure about that. If the last commit is just a page flip, that
> > > won't help, will it ?  
> > 
> > The alternative would be that you start anew with a blank state, which
> > effectively drops every configuration that has been done by userspace.
> > This is terrible.
> > 
> > And a page flip wouldn't have affected the connector and
> > connector->state would still be to the last state that affected it, so
> > it would work.  
> 
> Ah right, you didn't mean replaying the last commit then, but first
> disabling the output and then restoring the current state ? That should
> work.
> 

Thanks for the feedback.

If I understand correctly, I should try to disable the output.
What is the 'output' exactly, the connector?
How can I disable it? Can you give me some pointers?

Further more, is disabling the "output" disable the whole path where the
bridge is located?
I mean, I need to power off/on the bridge and re-init it with its input DSI
lines in LP11.

Best regards,
Hervé
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Maxime Ripard 2 weeks, 6 days ago
On Tue, Nov 05, 2024 at 09:15:03AM +0100, Herve Codina wrote:
> Hi Maxime, Laurent,
> 
> On Mon, 28 Oct 2024 16:09:13 +0200
> Laurent Pinchart <laurent.pinchart@ideasonboard.com> wrote:
> 
> > On Mon, Oct 28, 2024 at 02:55:47PM +0100, Maxime Ripard wrote:
> > > On Mon, Oct 28, 2024 at 03:28:58PM +0200, Laurent Pinchart wrote:  
> > > > On Mon, Oct 28, 2024 at 01:21:45PM +0100, Maxime Ripard wrote:  
> > > > > On Mon, Oct 28, 2024 at 01:28:57PM +0200, Laurent Pinchart wrote:  
> > > > > > On Mon, Oct 28, 2024 at 09:13:31AM +0100, Herve Codina wrote:  
> > > > > > > On Sun, 27 Oct 2024 18:23:50 +0200 Laurent Pinchart wrote:
> > > > > > > 
> > > > > > > [...]  
> > > > > > > > > +static int sn65dsi83_reset_pipeline(struct sn65dsi83 *sn65dsi83)
> > > > > > > > > +{
> > > > > > > > > +	struct drm_device *dev = sn65dsi83->bridge.dev;
> > > > > > > > > +	struct drm_modeset_acquire_ctx ctx;
> > > > > > > > > +	struct drm_atomic_state *state;
> > > > > > > > > +	int err;
> > > > > > > > > +
> > > > > > > > > +	/* Use operation done in drm_atomic_helper_suspend() followed by
> > > > > > > > > +	 * operation done in drm_atomic_helper_resume() but without releasing
> > > > > > > > > +	 * the lock between suspend()/resume()
> > > > > > > > > +	 */
> > > > > > > > > +
> > > > > > > > > +	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
> > > > > > > > > +
> > > > > > > > > +	state = drm_atomic_helper_duplicate_state(dev, &ctx);
> > > > > > > > > +	if (IS_ERR(state)) {
> > > > > > > > > +		err = PTR_ERR(state);
> > > > > > > > > +		goto unlock;
> > > > > > > > > +	}
> > > > > > > > > +
> > > > > > > > > +	err = drm_atomic_helper_disable_all(dev, &ctx);
> > > > > > > > > +	if (err < 0)
> > > > > > > > > +		goto unlock;
> > > > > > > > > +
> > > > > > > > > +	drm_mode_config_reset(dev);
> > > > > > > > > +
> > > > > > > > > +	err = drm_atomic_helper_commit_duplicated_state(state, &ctx);    
> > > > > > > > 
> > > > > > > > Committing a full atomic state from a bridge driver in an asynchronous
> > > > > > > > way seems quite uncharted territory, and it worries me. It's also a very
> > > > > > > > heavyweight, you disable all outputs here, instead of focussing on the
> > > > > > > > output connected to the bridge. Can you either implement something more
> > > > > > > > local, resetting the bridge only, or create a core helper to handle this
> > > > > > > > kind of situation, on a per-output basis ?  
> > > > > > > 
> > > > > > > A full restart of the bridge (power off/on) is needed and so we need to
> > > > > > > redo the initialization sequence. This initialization sequence has to be
> > > > > > > done with the DSI data lanes (bridge inputs) driven in LP11 state and so
> > > > > > > without any video stream. Only focussing on bridge outputs will not be
> > > > > > > sufficient. That's why I brought the pipeline down and restarted it.  
> > > > > > 
> > > > > > Fair point.
> > > > > >   
> > > > > > > Of course, I can copy/paste sn65dsi83_reset_pipeline() to a core helper
> > > > > > > function. Is drm_atomic_helper_reset_all() could be a good candidate?  
> > > > > > 
> > > > > > The helper should operate on a single output, unrelated outputs should
> > > > > > not be affected.  
> > > > > 
> > > > > Also, you don't want to reset anything, you just want the last commit to
> > > > > be replayed.  
> > > > 
> > > > I'm not sure about that. If the last commit is just a page flip, that
> > > > won't help, will it ?  
> > > 
> > > The alternative would be that you start anew with a blank state, which
> > > effectively drops every configuration that has been done by userspace.
> > > This is terrible.
> > > 
> > > And a page flip wouldn't have affected the connector and
> > > connector->state would still be to the last state that affected it, so
> > > it would work.  
> > 
> > Ah right, you didn't mean replaying the last commit then, but first
> > disabling the output and then restoring the current state ? That should
> > work.
> > 
> 
> Thanks for the feedback.
> 
> If I understand correctly, I should try to disable the output.
> What is the 'output' exactly, the connector?

At the very least, the encoder, connector and everything in between. And
maybe the CRTC.

> How can I disable it? Can you give me some pointers?

See my mail here:
https://lore.kernel.org/all/20241028-thankful-boar-of-camouflage-3de96c@houat/

> Further more, is disabling the "output" disable the whole path where the
> bridge is located?

Not the whole path, but most of it, yeah.

> I mean, I need to power off/on the bridge and re-init it with its input DSI
> lines in LP11.

Right, and that might work with that bridge in particular, but it's
definitely not generic.

Maxime
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Laurent Pinchart 2 weeks, 6 days ago
On Tue, Nov 05, 2024 at 09:15:03AM +0100, Herve Codina wrote:
> On Mon, 28 Oct 2024 16:09:13 +0200 Laurent Pinchart wrote:
> > On Mon, Oct 28, 2024 at 02:55:47PM +0100, Maxime Ripard wrote:
> > > On Mon, Oct 28, 2024 at 03:28:58PM +0200, Laurent Pinchart wrote:  
> > > > On Mon, Oct 28, 2024 at 01:21:45PM +0100, Maxime Ripard wrote:  
> > > > > On Mon, Oct 28, 2024 at 01:28:57PM +0200, Laurent Pinchart wrote:  
> > > > > > On Mon, Oct 28, 2024 at 09:13:31AM +0100, Herve Codina wrote:  
> > > > > > > On Sun, 27 Oct 2024 18:23:50 +0200 Laurent Pinchart wrote:
> > > > > > > 
> > > > > > > [...]  
> > > > > > > > > +static int sn65dsi83_reset_pipeline(struct sn65dsi83 *sn65dsi83)
> > > > > > > > > +{
> > > > > > > > > +	struct drm_device *dev = sn65dsi83->bridge.dev;
> > > > > > > > > +	struct drm_modeset_acquire_ctx ctx;
> > > > > > > > > +	struct drm_atomic_state *state;
> > > > > > > > > +	int err;
> > > > > > > > > +
> > > > > > > > > +	/* Use operation done in drm_atomic_helper_suspend() followed by
> > > > > > > > > +	 * operation done in drm_atomic_helper_resume() but without releasing
> > > > > > > > > +	 * the lock between suspend()/resume()
> > > > > > > > > +	 */
> > > > > > > > > +
> > > > > > > > > +	DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err);
> > > > > > > > > +
> > > > > > > > > +	state = drm_atomic_helper_duplicate_state(dev, &ctx);
> > > > > > > > > +	if (IS_ERR(state)) {
> > > > > > > > > +		err = PTR_ERR(state);
> > > > > > > > > +		goto unlock;
> > > > > > > > > +	}
> > > > > > > > > +
> > > > > > > > > +	err = drm_atomic_helper_disable_all(dev, &ctx);
> > > > > > > > > +	if (err < 0)
> > > > > > > > > +		goto unlock;
> > > > > > > > > +
> > > > > > > > > +	drm_mode_config_reset(dev);
> > > > > > > > > +
> > > > > > > > > +	err = drm_atomic_helper_commit_duplicated_state(state, &ctx);    
> > > > > > > > 
> > > > > > > > Committing a full atomic state from a bridge driver in an asynchronous
> > > > > > > > way seems quite uncharted territory, and it worries me. It's also a very
> > > > > > > > heavyweight, you disable all outputs here, instead of focussing on the
> > > > > > > > output connected to the bridge. Can you either implement something more
> > > > > > > > local, resetting the bridge only, or create a core helper to handle this
> > > > > > > > kind of situation, on a per-output basis ?  
> > > > > > > 
> > > > > > > A full restart of the bridge (power off/on) is needed and so we need to
> > > > > > > redo the initialization sequence. This initialization sequence has to be
> > > > > > > done with the DSI data lanes (bridge inputs) driven in LP11 state and so
> > > > > > > without any video stream. Only focussing on bridge outputs will not be
> > > > > > > sufficient. That's why I brought the pipeline down and restarted it.  
> > > > > > 
> > > > > > Fair point.
> > > > > >   
> > > > > > > Of course, I can copy/paste sn65dsi83_reset_pipeline() to a core helper
> > > > > > > function. Is drm_atomic_helper_reset_all() could be a good candidate?  
> > > > > > 
> > > > > > The helper should operate on a single output, unrelated outputs should
> > > > > > not be affected.  
> > > > > 
> > > > > Also, you don't want to reset anything, you just want the last commit to
> > > > > be replayed.  
> > > > 
> > > > I'm not sure about that. If the last commit is just a page flip, that
> > > > won't help, will it ?  
> > > 
> > > The alternative would be that you start anew with a blank state, which
> > > effectively drops every configuration that has been done by userspace.
> > > This is terrible.
> > > 
> > > And a page flip wouldn't have affected the connector and
> > > connector->state would still be to the last state that affected it, so
> > > it would work.  
> > 
> > Ah right, you didn't mean replaying the last commit then, but first
> > disabling the output and then restoring the current state ? That should
> > work.
> 
> Thanks for the feedback.
> 
> If I understand correctly, I should try to disable the output.
> What is the 'output' exactly, the connector?

Yes, the output maps to the connector.

> How can I disable it? Can you give me some pointers?

By creating a commit that disables it :-) Conceptually that's about
setting the same properties you would from userspace. Maybe look at
drm_atomic_helper_disable_all() to see if you can make a version that
operates on a single output.

> Further more, is disabling the "output" disable the whole path where the
> bridge is located?

It should yes.

> I mean, I need to power off/on the bridge and re-init it with its input DSI
> lines in LP11.

-- 
Regards,

Laurent Pinchart
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Marek Vasut 1 month ago
On 10/24/24 11:55 AM, Herve Codina wrote:
> In some cases observed during ESD tests, the TI SN65DSI83 cannot recover
> from errors by itself. A full restart of the bridge is needed in those
> cases to have the bridge output LVDS signals again.

I have seen the bridge being flaky sometimes, do you have any more 
details of what is going on when this irrecoverable error occurs ?

> The TI SN65DSI83 has some error detection capabilities. Introduce an
> error recovery mechanism based on this detection.
> 
> The errors detected are signaled through an interrupt. On system where
> this interrupt is not available, the driver uses a polling monitoring
> fallback to check for errors. When an error is present, the recovery
> process is launched.
> 
> Restarting the bridge needs to redo the initialization sequence. This
> initialization sequence has to be done with the DSI data lanes driven in
> LP11 state. In order to do that, the recovery process resets the entire
> pipeline.

+CC Michael regarding the LP11 part , I think there was some development 
to switch lanes into LP11 mode on request ?

[...]
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Herve Codina 4 weeks ago
Hi Marek,

On Sat, 26 Oct 2024 00:53:51 +0200
Marek Vasut <marex@denx.de> wrote:

> On 10/24/24 11:55 AM, Herve Codina wrote:
> > In some cases observed during ESD tests, the TI SN65DSI83 cannot recover
> > from errors by itself. A full restart of the bridge is needed in those
> > cases to have the bridge output LVDS signals again.  
> 
> I have seen the bridge being flaky sometimes, do you have any more 
> details of what is going on when this irrecoverable error occurs ?

The panel attached to the bridge goes and stays black. That's the behavior.
A full reset brings the panel back displaying frames.

Best regards,
Hervé
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Marek Vasut 4 weeks ago
On 10/28/24 9:02 AM, Herve Codina wrote:
> Hi Marek,

Hi,

> On Sat, 26 Oct 2024 00:53:51 +0200
> Marek Vasut <marex@denx.de> wrote:
> 
>> On 10/24/24 11:55 AM, Herve Codina wrote:
>>> In some cases observed during ESD tests, the TI SN65DSI83 cannot recover
>>> from errors by itself. A full restart of the bridge is needed in those
>>> cases to have the bridge output LVDS signals again.
>>
>> I have seen the bridge being flaky sometimes, do you have any more
>> details of what is going on when this irrecoverable error occurs ?
> 
> The panel attached to the bridge goes and stays black. That's the behavior.
> A full reset brings the panel back displaying frames.
Is there some noticeable change in 0xe0/0xe1/0xe5 registers, esp. 0xe5, 
do they indicate the error occurred somehow ?
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Herve Codina 4 weeks ago
Hi Marek,

On Mon, 28 Oct 2024 12:47:14 +0100
Marek Vasut <marex@denx.de> wrote:

> On 10/28/24 9:02 AM, Herve Codina wrote:
> > Hi Marek,  
> 
> Hi,
> 
> > On Sat, 26 Oct 2024 00:53:51 +0200
> > Marek Vasut <marex@denx.de> wrote:
> >   
> >> On 10/24/24 11:55 AM, Herve Codina wrote:  
> >>> In some cases observed during ESD tests, the TI SN65DSI83 cannot recover
> >>> from errors by itself. A full restart of the bridge is needed in those
> >>> cases to have the bridge output LVDS signals again.  
> >>
> >> I have seen the bridge being flaky sometimes, do you have any more
> >> details of what is going on when this irrecoverable error occurs ?  
> > 
> > The panel attached to the bridge goes and stays black. That's the behavior.
> > A full reset brings the panel back displaying frames.  
> Is there some noticeable change in 0xe0/0xe1/0xe5 registers, esp. 0xe5, 
> do they indicate the error occurred somehow ?

0xe5 register can signal any DSI errors (depending on when the ESD affects
the DSI bus) even PLL unlock bit was observed set but we didn't see any
relationship between the bits set in 0xe5 register and the recoverable or
unrecoverable behavior.

Also, in some cases, reading the register was not even possible (i2c
transaction nacked).

Best regards,
Hervé
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Marek Vasut 4 weeks ago
On 10/28/24 2:52 PM, Herve Codina wrote:
> Hi Marek,

Hi,

>>> On Sat, 26 Oct 2024 00:53:51 +0200
>>> Marek Vasut <marex@denx.de> wrote:
>>>    
>>>> On 10/24/24 11:55 AM, Herve Codina wrote:
>>>>> In some cases observed during ESD tests, the TI SN65DSI83 cannot recover
>>>>> from errors by itself. A full restart of the bridge is needed in those
>>>>> cases to have the bridge output LVDS signals again.
>>>>
>>>> I have seen the bridge being flaky sometimes, do you have any more
>>>> details of what is going on when this irrecoverable error occurs ?
>>>
>>> The panel attached to the bridge goes and stays black. That's the behavior.
>>> A full reset brings the panel back displaying frames.
>> Is there some noticeable change in 0xe0/0xe1/0xe5 registers, esp. 0xe5,
>> do they indicate the error occurred somehow ?
> 
> 0xe5 register can signal any DSI errors (depending on when the ESD affects
> the DSI bus) even PLL unlock bit was observed set but we didn't see any
> relationship between the bits set in 0xe5 register and the recoverable or
> unrecoverable behavior.
> 
> Also, in some cases, reading the register was not even possible (i2c
> transaction nacked).
Oh, wow, I haven't seen that one before. But this is really useful 
information, can you please add it into the commit message for V2 ?

Thank you
Re: [PATCH 2/2] drm: bridge: ti-sn65dsi83: Add error recovery mechanism
Posted by Herve Codina 4 weeks ago
On Mon, 28 Oct 2024 15:47:25 +0100
Marek Vasut <marex@denx.de> wrote:

> On 10/28/24 2:52 PM, Herve Codina wrote:
> > Hi Marek,  
> 
> Hi,
> 
> >>> On Sat, 26 Oct 2024 00:53:51 +0200
> >>> Marek Vasut <marex@denx.de> wrote:
> >>>      
> >>>> On 10/24/24 11:55 AM, Herve Codina wrote:  
> >>>>> In some cases observed during ESD tests, the TI SN65DSI83 cannot recover
> >>>>> from errors by itself. A full restart of the bridge is needed in those
> >>>>> cases to have the bridge output LVDS signals again.  
> >>>>
> >>>> I have seen the bridge being flaky sometimes, do you have any more
> >>>> details of what is going on when this irrecoverable error occurs ?  
> >>>
> >>> The panel attached to the bridge goes and stays black. That's the behavior.
> >>> A full reset brings the panel back displaying frames.  
> >> Is there some noticeable change in 0xe0/0xe1/0xe5 registers, esp. 0xe5,
> >> do they indicate the error occurred somehow ?  
> > 
> > 0xe5 register can signal any DSI errors (depending on when the ESD affects
> > the DSI bus) even PLL unlock bit was observed set but we didn't see any
> > relationship between the bits set in 0xe5 register and the recoverable or
> > unrecoverable behavior.
> > 
> > Also, in some cases, reading the register was not even possible (i2c
> > transaction nacked).  
> Oh, wow, I haven't seen that one before. But this is really useful 
> information, can you please add it into the commit message for V2 ?

Yes, I will add this information in v2.

Best regards,
Hervé


-- 
Hervé Codina, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com