drivers/scsi/scsi_error.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-)
shost->eh_noresume is currently consulted twice in one error handling
iteration: once before scsi_autopm_get_host() and once again before
scsi_autopm_put_host().
That is racy when a PM-triggered error path flips shost->eh_noresume while
the SCSI EH thread is still running.
The problem flow looks like this:
PM path
ufshcd_set_dev_pwr_mode()
shost->eh_noresume = 1
ufshcd_execute_start_stop <-- trigger EH
...
shost->eh_noresume = 0
EH path
scsi_error_handler()
if (!shost->eh_noresume)
scsi_autopm_get_host() <-- skipped
...
if (!shost->eh_noresume)
scsi_autopm_put_host() <-- executed later
In that case one EH iteration can skip autoresume on entry and still drop a
runtime PM reference on exit. That leaves an unmatched runtime PM put and
can trigger a runtime PM usage count underflow.
Fix this by snapshotting shost->eh_noresume once at the beginning of each
EH iteration and by calling scsi_autopm_put_host() only if the same
iteration successfully acquired a runtime PM reference through the
scsi_autopm_get_host().
Fixes: ae0751ffc77e ("[SCSI] add flag to skip the runtime PM calls on the host")
Signed-off-by: Hongjie Fang <hongjiefang@asrmicro.com>
---
drivers/scsi/scsi_error.c | 21 ++++++++++++++-------
1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 147127fb4db9..d83bfa24f184 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -2342,6 +2342,8 @@ static void scsi_unjam_host(struct Scsi_Host *shost)
int scsi_error_handler(void *data)
{
struct Scsi_Host *shost = data;
+ bool autopm_get;
+ bool skip_autopm;
/*
* We use TASK_INTERRUPTIBLE so that the thread is not
@@ -2383,12 +2385,17 @@ int scsi_error_handler(void *data)
* what we need to do to get it up and online again (if we can).
* If we fail, we end up taking the thing offline.
*/
- if (!shost->eh_noresume && scsi_autopm_get_host(shost) != 0) {
- SCSI_LOG_ERROR_RECOVERY(1,
- shost_printk(KERN_ERR, shost,
- "scsi_eh_%d: unable to autoresume\n",
- shost->host_no));
- continue;
+ autopm_get = false;
+ skip_autopm = shost->eh_noresume;
+ if (!skip_autopm) {
+ if (scsi_autopm_get_host(shost) != 0) {
+ SCSI_LOG_ERROR_RECOVERY(1,
+ shost_printk(KERN_ERR, shost,
+ "scsi_eh_%d: unable to autoresume\n",
+ shost->host_no));
+ continue;
+ }
+ autopm_get = true;
}
if (shost->transportt->eh_strategy_handler)
@@ -2407,7 +2414,7 @@ int scsi_error_handler(void *data)
* which are still online.
*/
scsi_restart_operations(shost);
- if (!shost->eh_noresume)
+ if (autopm_get)
scsi_autopm_put_host(shost);
}
__set_current_state(TASK_RUNNING);
--
2.25.1
On Sat, May 23, 2026 at 11:34:38AM +0800, Hongjie Fang wrote:
> shost->eh_noresume is currently consulted twice in one error handling
> iteration: once before scsi_autopm_get_host() and once again before
> scsi_autopm_put_host().
>
> That is racy when a PM-triggered error path flips shost->eh_noresume while
> the SCSI EH thread is still running.
>
> The problem flow looks like this:
> PM path
> ufshcd_set_dev_pwr_mode()
> shost->eh_noresume = 1
> ufshcd_execute_start_stop <-- trigger EH
> ...
> shost->eh_noresume = 0
>
> EH path
> scsi_error_handler()
> if (!shost->eh_noresume)
> scsi_autopm_get_host() <-- skipped
> ...
> if (!shost->eh_noresume)
> scsi_autopm_put_host() <-- executed later
>
> In that case one EH iteration can skip autoresume on entry and still drop a
> runtime PM reference on exit. That leaves an unmatched runtime PM put and
> can trigger a runtime PM usage count underflow.
>
> Fix this by snapshotting shost->eh_noresume once at the beginning of each
> EH iteration and by calling scsi_autopm_put_host() only if the same
> iteration successfully acquired a runtime PM reference through the
> scsi_autopm_get_host().
>
> Fixes: ae0751ffc77e ("[SCSI] add flag to skip the runtime PM calls on the host")
> Signed-off-by: Hongjie Fang <hongjiefang@asrmicro.com>
> ---
> drivers/scsi/scsi_error.c | 21 ++++++++++++++-------
> 1 file changed, 14 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index 147127fb4db9..d83bfa24f184 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -2342,6 +2342,8 @@ static void scsi_unjam_host(struct Scsi_Host *shost)
> int scsi_error_handler(void *data)
> {
> struct Scsi_Host *shost = data;
> + bool autopm_get;
> + bool skip_autopm;
>
> /*
> * We use TASK_INTERRUPTIBLE so that the thread is not
> @@ -2383,12 +2385,17 @@ int scsi_error_handler(void *data)
> * what we need to do to get it up and online again (if we can).
> * If we fail, we end up taking the thing offline.
> */
> - if (!shost->eh_noresume && scsi_autopm_get_host(shost) != 0) {
> - SCSI_LOG_ERROR_RECOVERY(1,
> - shost_printk(KERN_ERR, shost,
> - "scsi_eh_%d: unable to autoresume\n",
> - shost->host_no));
> - continue;
> + autopm_get = false;
> + skip_autopm = shost->eh_noresume;
> + if (!skip_autopm) {
Since this is the only place you use the skip_autopm variable, you may
as well not introduce it at all. Just test shost->eh_noresume directly.
Alan Stern
> + if (scsi_autopm_get_host(shost) != 0) {
> + SCSI_LOG_ERROR_RECOVERY(1,
> + shost_printk(KERN_ERR, shost,
> + "scsi_eh_%d: unable to autoresume\n",
> + shost->host_no));
> + continue;
> + }
> + autopm_get = true;
> }
>
> if (shost->transportt->eh_strategy_handler)
> @@ -2407,7 +2414,7 @@ int scsi_error_handler(void *data)
> * which are still online.
> */
> scsi_restart_operations(shost);
> - if (!shost->eh_noresume)
> + if (autopm_get)
> scsi_autopm_put_host(shost);
> }
> __set_current_state(TASK_RUNNING);
> --
> 2.25.1
>
© 2016 - 2026 Red Hat, Inc.