ufs: core: complete wl runtime resume after SCSI EH

[PATCH v1] ufs: core: complete wl runtime resume after SCSI EH

Posted by Hongjie Fang 1 week, 6 days ago

A PM START STOP sent from the UFS well-known LU resume path can race with
SCSI EH.

The "wl resume" task flow is:
  __ufshcd_wl_resume()
    ufshcd_set_dev_pwr_mode(UFS_ACTIVE_PWR_MODE)
      ufshcd_execute_start_stop()
        scsi_execute_cmd()
          blk_execute_rq           <-- wait
          scsi_check_passthrough() <-- may retry START STOP

If the first START STOP time out, SCSI EH may already recover the link and
reset the device before scsi_execute_cmd() returns:
  scsi_timeout()
    scsi_eh_scmd_add()
      scsi_error_handler()
        scsi_unjam_host()
          scsi_eh_ready_devs()
            scsi_eh_host_reset()
              ufshcd_eh_host_reset_handler()
                if (hba->pm_op_in_progress)
                  ufshcd_link_recovery()
                    ufshcd_device_reset()
                    ufshcd_host_reset_and_restore()
          ...
          scsi_eh_flush_done_q()   <-- wakeup "wl resume" task
        ...                        <-- host still in SHOST_RECOVERY
        scsi_restart_operations()

A later passthrough retry can then run while the host is still in
SHOST_RECOVERY and hit the SCMD_FAIL_IF_RECOVERING path:
  scsi_queue_rq()
    if (scsi_host_in_recovery(shost) &&
        cmd->flags & SCMD_FAIL_IF_RECOVERING)
      return BLK_STS_OFFLINE

That retry completes with DID_ERROR or DID_NO_CONNECT even though EH may
already have restored the device to an operational ACTIVE state.

In this case __ufshcd_wl_resume() can return -EIO even though recovery
has already restored the device to an operational ACTIVE state. This is
especially harmful for runtime PM because callback errors other than
-EAGAIN and -EBUSY are latched by the PM core as runtime_error.

After a runtime-PM START STOP returns -EIO, wait for any ongoing SCSI EH
round to finish and treat the resume as successful if recovery has
already restored:

  - an online device
  - UFSHCD_STATE_OPERATIONAL
  - an active link
  - cached ACTIVE device power mode

Fixes: b294ff3e3449 ("scsi: ufs: core: Enable power management for wlun")
Signed-off-by: Hongjie Fang <hongjiefang@asrmicro.com>
---
 drivers/ufs/core/ufshcd.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index c3f08957d179..e7517cf23f06 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -10368,6 +10368,22 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 }
 
 #ifdef CONFIG_PM
+static int ufshcd_wl_resume_pm_recovered(struct ufs_hba *hba)
+{
+	int ret = 0;
+	struct scsi_device *sdp = hba->ufs_device_wlun;
+
+	if (!sdp || !scsi_block_when_processing_errors(sdp))
+		return 0;
+
+	if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL &&
+	    ufshcd_is_link_active(hba) &&
+	    ufshcd_is_ufs_dev_active(hba))
+		ret = 1;
+
+	return ret;
+}
+
 static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 {
 	int ret;
@@ -10422,6 +10438,9 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 
 	if (!ufshcd_is_ufs_dev_active(hba)) {
 		ret = ufshcd_set_dev_pwr_mode(hba, UFS_ACTIVE_PWR_MODE);
+		if (pm_op == UFS_RUNTIME_PM && ret == -EIO &&
+		    ufshcd_wl_resume_pm_recovered(hba))
+			ret = 0;
 		if (ret)
 			goto set_old_link_state;
 		ufshcd_set_timestamp_attr(hba);
-- 
2.25.1

Re: [PATCH v1] ufs: core: complete wl runtime resume after SCSI EH

Posted by Bart Van Assche 1 week, 6 days ago

On 5/26/26 4:49 AM, Hongjie Fang wrote:
> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
> index c3f08957d179..e7517cf23f06 100644
> --- a/drivers/ufs/core/ufshcd.c
> +++ b/drivers/ufs/core/ufshcd.c
> @@ -10368,6 +10368,22 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
>   }
>   
>   #ifdef CONFIG_PM
> +static int ufshcd_wl_resume_pm_recovered(struct ufs_hba *hba)
> +{
> +	int ret = 0;
> +	struct scsi_device *sdp = hba->ufs_device_wlun;
> +
> +	if (!sdp || !scsi_block_when_processing_errors(sdp))
> +		return 0;
> +
> +	if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL &&
> +	    ufshcd_is_link_active(hba) &&
> +	    ufshcd_is_ufs_dev_active(hba))
> +		ret = 1;
> +
> +	return ret;
> +}
> +
>   static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
>   {
>   	int ret;
> @@ -10422,6 +10438,9 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
>   
>   	if (!ufshcd_is_ufs_dev_active(hba)) {
>   		ret = ufshcd_set_dev_pwr_mode(hba, UFS_ACTIVE_PWR_MODE);
> +		if (pm_op == UFS_RUNTIME_PM && ret == -EIO &&
> +		    ufshcd_wl_resume_pm_recovered(hba))
> +			ret = 0;
>   		if (ret)
>   			goto set_old_link_state;
>   		ufshcd_set_timestamp_attr(hba);

This change increases the complexity of the UFS driver too much. Please 
consider building a solution for this issue on top of the (untested)
patch below. The patch below prevents that the SCSI EH is activated if a
START STOP UNIT command times out:

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 9e0336098e26..4be5453efb34 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -9491,7 +9491,7 @@ static enum scsi_timeout_action 
ufshcd_eh_timed_out(struct scsi_cmnd *scmd)
  {
  	struct ufs_hba *hba = shost_priv(scmd->device->host);

-	if (!hba->system_suspending) {
+	if (!hba->pm_op_in_progress) {
  		/* Activate the error handler in the SCSI core. */
  		return SCSI_EH_NOT_HANDLED;
  	}
@@ -10543,7 +10543,6 @@ static int ufshcd_wl_suspend(struct device *dev)

  	hba = shost_priv(sdev->host);
  	down(&hba->host_sem);
-	hba->system_suspending = true;

  	if (pm_runtime_suspended(dev))
  		goto out;
@@ -10585,7 +10584,6 @@ static int ufshcd_wl_resume(struct device *dev)
  		hba->curr_dev_pwr_mode, hba->uic_link_state);
  	if (!ret)
  		hba->is_sys_suspended = false;
-	hba->system_suspending = false;
  	up(&hba->host_sem);
  	return ret;
  }
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 3eaae082329c..248d0a5bef40 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -1029,8 +1029,6 @@ enum ufshcd_mcq_opr {
   * @caps: bitmask with information about UFS controller capabilities
   * @devfreq: frequency scaling information owned by the devfreq core
   * @clk_scaling: frequency scaling information owned by the UFS driver
- * @system_suspending: system suspend has been started and system 
resume has
- *	not yet finished.
   * @is_sys_suspended: UFS device has been suspended because of system 
suspend
   * @urgent_bkops_lvl: keeps track of urgent bkops level for device
   * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
@@ -1206,7 +1204,6 @@ struct ufs_hba {

  	struct devfreq *devfreq;
  	struct ufs_clk_scaling clk_scaling;
-	bool system_suspending;
  	bool is_sys_suspended;

  	enum bkops_status urgent_bkops_lvl;

Thanks,

Bart.

RE: [PATCH v1] ufs: core: complete wl runtime resume after SCSI EH

Posted by Fang Hongjie(方洪杰) 1 week, 5 days ago

> From: Bart Van Assche [mailto:bvanassche@acm.org]
> Sent: Wednesday, May 27, 2026 4:47 AM
> To: Fang Hongjie(方洪杰) <hongjiefang@asrmicro.com>;
> alim.akhtar@samsung.com; avri.altman@wdc.com;
> James.Bottomley@HansenPartnership.com; martin.petersen@oracle.com;
> peter.wang@mediatek.com; beanhuo@micron.com
> Cc: linux-scsi@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH v1] ufs: core: complete wl runtime resume after SCSI EH
> 
> On 5/26/26 4:49 AM, Hongjie Fang wrote:
> > diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
> > index c3f08957d179..e7517cf23f06 100644
> > --- a/drivers/ufs/core/ufshcd.c
> > +++ b/drivers/ufs/core/ufshcd.c
> > @@ -10368,6 +10368,22 @@ static int __ufshcd_wl_suspend(struct
> ufs_hba *hba, enum ufs_pm_op pm_op)
> >   }
> >
> >   #ifdef CONFIG_PM
> > +static int ufshcd_wl_resume_pm_recovered(struct ufs_hba *hba)
> > +{
> > +	int ret = 0;
> > +	struct scsi_device *sdp = hba->ufs_device_wlun;
> > +
> > +	if (!sdp || !scsi_block_when_processing_errors(sdp))
> > +		return 0;
> > +
> > +	if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL &&
> > +	    ufshcd_is_link_active(hba) &&
> > +	    ufshcd_is_ufs_dev_active(hba))
> > +		ret = 1;
> > +
> > +	return ret;
> > +}
> > +
> >   static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op
> pm_op)
> >   {
> >   	int ret;
> > @@ -10422,6 +10438,9 @@ static int __ufshcd_wl_resume(struct ufs_hba
> *hba, enum ufs_pm_op pm_op)
> >
> >   	if (!ufshcd_is_ufs_dev_active(hba)) {
> >   		ret = ufshcd_set_dev_pwr_mode(hba,
> UFS_ACTIVE_PWR_MODE);
> > +		if (pm_op == UFS_RUNTIME_PM && ret == -EIO &&
> > +		    ufshcd_wl_resume_pm_recovered(hba))
> > +			ret = 0;
> >   		if (ret)
> >   			goto set_old_link_state;
> >   		ufshcd_set_timestamp_attr(hba);
> 
> This change increases the complexity of the UFS driver too much. Please
> consider building a solution for this issue on top of the (untested)
> patch below. The patch below prevents that the SCSI EH is activated if a
> START STOP UNIT command times out:
> 
> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
> index 9e0336098e26..4be5453efb34 100644
> --- a/drivers/ufs/core/ufshcd.c
> +++ b/drivers/ufs/core/ufshcd.c
> @@ -9491,7 +9491,7 @@ static enum scsi_timeout_action
> ufshcd_eh_timed_out(struct scsi_cmnd *scmd)
>   {
>   	struct ufs_hba *hba = shost_priv(scmd->device->host);
> 
> -	if (!hba->system_suspending) {
> +	if (!hba->pm_op_in_progress) {
>   		/* Activate the error handler in the SCSI core. */
>   		return SCSI_EH_NOT_HANDLED;
>   	}

Thanks for the suggestion. I agree that preventing the PM START STOP UNIT
timeout from entering the regular SCSI EH path is cleaner than handling
the race after scsi_execute_cmd() returns.

I looked closer at the direct ufshcd_link_recovery() approach from
ufshcd_eh_timed_out(). There is one detail that I think needs to be
handled.

For the legacy single-doorbell path, force_compl=true currently
still calls ufshcd_transfer_req_compl(), which only completes requests for
which the doorbell bit has already been cleared:
  completed_reqs = ~tr_doorbell & hba->outstanding_reqs;

So if the timed-out SSU is still marked in both hba->outstanding_reqs and
the transfer request doorbell after ufshcd_hba_stop(), it will not be
completed by ufshcd_complete_requests(hba, true). Returning
SCSI_EH_RESET_TIMER in that state would only restart the request timer and
would not wake the blk_execute_rq() waiter.



> @@ -10543,7 +10543,6 @@ static int ufshcd_wl_suspend(struct device *dev)
> 
>   	hba = shost_priv(sdev->host);
>   	down(&hba->host_sem);
> -	hba->system_suspending = true;
> 
>   	if (pm_runtime_suspended(dev))
>   		goto out;
> @@ -10585,7 +10584,6 @@ static int ufshcd_wl_resume(struct device *dev)
>   		hba->curr_dev_pwr_mode, hba->uic_link_state);
>   	if (!ret)
>   		hba->is_sys_suspended = false;
> -	hba->system_suspending = false;
>   	up(&hba->host_sem);
>   	return ret;
>   }
> diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
> index 3eaae082329c..248d0a5bef40 100644
> --- a/include/ufs/ufshcd.h
> +++ b/include/ufs/ufshcd.h
> @@ -1029,8 +1029,6 @@ enum ufshcd_mcq_opr {
>    * @caps: bitmask with information about UFS controller capabilities
>    * @devfreq: frequency scaling information owned by the devfreq core
>    * @clk_scaling: frequency scaling information owned by the UFS driver
> - * @system_suspending: system suspend has been started and system
> resume has
> - *	not yet finished.
>    * @is_sys_suspended: UFS device has been suspended because of system
> suspend
>    * @urgent_bkops_lvl: keeps track of urgent bkops level for device
>    * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
> @@ -1206,7 +1204,6 @@ struct ufs_hba {
> 
>   	struct devfreq *devfreq;
>   	struct ufs_clk_scaling clk_scaling;
> -	bool system_suspending;
>   	bool is_sys_suspended;
> 
>   	enum bkops_status urgent_bkops_lvl;
> 
> Thanks,
> 
> Bart.


Best.

Re: [PATCH v1] ufs: core: complete wl runtime resume after SCSI EH

Posted by Bart Van Assche 1 week, 5 days ago

On 5/27/26 4:23 AM, Fang Hongjie(方洪杰) wrote:
> I looked closer at the direct ufshcd_link_recovery() approach from
> ufshcd_eh_timed_out(). There is one detail that I think needs to be
> handled.
> 
> For the legacy single-doorbell path, force_compl=true currently
> still calls ufshcd_transfer_req_compl(), which only completes requests for
> which the doorbell bit has already been cleared:
>    completed_reqs = ~tr_doorbell & hba->outstanding_reqs;
> 
> So if the timed-out SSU is still marked in both hba->outstanding_reqs and
> the transfer request doorbell after ufshcd_hba_stop(), it will not be
> completed by ufshcd_complete_requests(hba, true). Returning
> SCSI_EH_RESET_TIMER in that state would only restart the request timer and
> would not wake the blk_execute_rq() waiter.

Feel free to add force_compl support to the legacy single doorbell code
path.

Thanks,

Bart.