drivers/ufs/core/ufshcd.c | 14 ++++++++++++++ include/ufs/ufshcd.h | 13 +++++++++++++ 2 files changed, 27 insertions(+)
From: zhanghui <zhanghui31@xiaomi.com>
If the SSU CMD completion flow in UFS resume and the CMD timeout flow occur
simultaneously, the timestamp attribute command will be sent to the device
during the UFS resume flow, while the UFS controller performs a reset in
the timeout error handling flow.
In this scenario, a bus timeout will occur because the UFS resume flow
will attempt to access the UFS host controller registers while the UFS
controller is in a reset state or power cycle.
Call trace:
arm64_serror_panic+0x6c/0x94
do_serror+0xbc/0xc8
el1h_64_error_handler+0x34/0x48
el1h_64_error+0x68/0x6c
_raw_spin_unlock+0x18/0x44
ufshcd_send_command+0x1c0/0x380
ufshcd_exec_dev_cmd+0x21c/0x29c
ufshcd_set_timestamp_attr+0x78/0xdc
__ufshcd_wl_resume+0x228/0x48c
ufshcd_wl_resume+0x5c/0x1b4
scsi_bus_resume+0x58/0xa0
dpm_run_callback+0x6c/0x23c
__device_resume+0x298/0x46c
async_resume+0x24/0x3c
async_run_entry_fn+0x44/0x150
process_scheduled_works+0x254/0x4f4
worker_thread+0x244/0x334
kthread+0x124/0x1f0
ret_from_fork+0x10/0x20
This patch fixes the issue by adding a check of the UFS controller
register states before sending the device command.
Signed-off-by: zhanghui <zhanghui31@xiaomi.com>
---
drivers/ufs/core/ufshcd.c | 14 ++++++++++++++
include/ufs/ufshcd.h | 13 +++++++++++++
2 files changed, 27 insertions(+)
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 5e3c67e96956..e5e3e0277d43 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -3291,6 +3291,8 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
struct ufshcd_lrb *lrbp = &hba->lrb[tag];
int err;
+ if (hba->ufshcd_reg_state == UFSHCD_REG_RESET)
+ return -EBUSY;
/* Protects use of hba->reserved_slot. */
lockdep_assert_held(&hba->dev_cmd.lock);
@@ -4857,6 +4859,7 @@ static int ufshcd_hba_execute_hce(struct ufs_hba *hba)
int ufshcd_hba_enable(struct ufs_hba *hba)
{
int ret;
+ unsigned long flags;
if (hba->quirks & UFSHCI_QUIRK_BROKEN_HCE) {
ufshcd_set_link_off(hba);
@@ -4881,6 +4884,13 @@ int ufshcd_hba_enable(struct ufs_hba *hba)
ret = ufshcd_hba_execute_hce(hba);
}
+ spin_lock_irqsave(hba->host->host_lock, flags);
+ if (ret)
+ hba->ufshcd_reg_state = UFSHCD_REG_RESET;
+ else
+ hba->ufshcd_reg_state = UFSHCD_REG_OPERATIONAL;
+ spin_unlock_irqrestore(hba->host->host_lock, flags);
+
return ret;
}
EXPORT_SYMBOL_GPL(ufshcd_hba_enable);
@@ -7693,7 +7703,11 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
{
int err;
+ unsigned long flags;
+ spin_lock_irqsave(hba->host->host_lock, flags);
+ hba->ufshcd_reg_state = UFSHCD_REG_RESET;
+ spin_unlock_irqrestore(hba->host->host_lock, flags);
/*
* Stop the host controller and complete the requests
* cleared by h/w
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index cac0cdb9a916..e5af4c2114ce 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -523,6 +523,18 @@ enum ufshcd_state {
UFSHCD_STATE_ERROR,
};
+/**
+ * enum ufshcd_state - UFS host controller state
+ * @UFSHCD_REG_OPERATIONAL: UFS host controller is enabled, the host controller
+ * register can be accessed.
+ * @UFSHCD_REG_RESET: UFS host controller registers will be reset, can't access
+ * any ufs host controller register.
+ */
+enum ufshcd_reg_state {
+ UFSHCD_REG_OPERATIONAL,
+ UFSHCD_REG_RESET,
+};
+
enum ufshcd_quirks {
/* Interrupt aggregation support is broken */
UFSHCD_QUIRK_BROKEN_INTR_AGGR = 1 << 0,
@@ -1020,6 +1032,7 @@ struct ufs_hba {
struct completion *uic_async_done;
enum ufshcd_state ufshcd_state;
+ enum ufshcd_reg_state ufshcd_reg_state;
u32 eh_flags;
u32 intr_mask;
u16 ee_ctrl_mask;
--
2.43.0
On Tue, 2024-08-13 at 21:47 +0800, ZhangHui wrote:
> From: zhanghui <zhanghui31@xiaomi.com>
>
> If the SSU CMD completion flow in UFS resume and the CMD timeout flow
> occur
> simultaneously, the timestamp attribute command will be sent to the
> device
>
Hi Zhanghui,
If the timeout command is SSU?
In resume flow, if SSU command timeout, ufshcd_eh_host_reset_handler
invoke ufshcd_link_recovery only, not schedule eh work?
Thanks.
Peter
> during the UFS resume flow, while the UFS controller performs a reset
> in
> the timeout error handling flow.
>
> In this scenario, a bus timeout will occur because the UFS resume
> flow
> will attempt to access the UFS host controller registers while the
> UFS
> controller is in a reset state or power cycle.
>
> Call trace:
> arm64_serror_panic+0x6c/0x94
> do_serror+0xbc/0xc8
> el1h_64_error_handler+0x34/0x48
> el1h_64_error+0x68/0x6c
> _raw_spin_unlock+0x18/0x44
> ufshcd_send_command+0x1c0/0x380
> ufshcd_exec_dev_cmd+0x21c/0x29c
> ufshcd_set_timestamp_attr+0x78/0xdc
> __ufshcd_wl_resume+0x228/0x48c
> ufshcd_wl_resume+0x5c/0x1b4
> scsi_bus_resume+0x58/0xa0
> dpm_run_callback+0x6c/0x23c
> __device_resume+0x298/0x46c
> async_resume+0x24/0x3c
> async_run_entry_fn+0x44/0x150
> process_scheduled_works+0x254/0x4f4
> worker_thread+0x244/0x334
> kthread+0x124/0x1f0
> ret_from_fork+0x10/0x20
>
> This patch fixes the issue by adding a check of the UFS controller
> register states before sending the device command.
>
> Signed-off-by: zhanghui <zhanghui31@xiaomi.com>
> ---
> drivers/ufs/core/ufshcd.c | 14 ++++++++++++++
> include/ufs/ufshcd.h | 13 +++++++++++++
> 2 files changed, 27 insertions(+)
>
> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
> index 5e3c67e96956..e5e3e0277d43 100644
> --- a/drivers/ufs/core/ufshcd.c
> +++ b/drivers/ufs/core/ufshcd.c
> @@ -3291,6 +3291,8 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba
> *hba,
> struct ufshcd_lrb *lrbp = &hba->lrb[tag];
> int err;
>
> + if (hba->ufshcd_reg_state == UFSHCD_REG_RESET)
> + return -EBUSY;
> /* Protects use of hba->reserved_slot. */
> lockdep_assert_held(&hba->dev_cmd.lock);
>
> @@ -4857,6 +4859,7 @@ static int ufshcd_hba_execute_hce(struct
> ufs_hba *hba)
> int ufshcd_hba_enable(struct ufs_hba *hba)
> {
> int ret;
> + unsigned long flags;
>
> if (hba->quirks & UFSHCI_QUIRK_BROKEN_HCE) {
> ufshcd_set_link_off(hba);
> @@ -4881,6 +4884,13 @@ int ufshcd_hba_enable(struct ufs_hba *hba)
> ret = ufshcd_hba_execute_hce(hba);
> }
>
> + spin_lock_irqsave(hba->host->host_lock, flags);
> + if (ret)
> + hba->ufshcd_reg_state = UFSHCD_REG_RESET;
> + else
> + hba->ufshcd_reg_state = UFSHCD_REG_OPERATIONAL;
> + spin_unlock_irqrestore(hba->host->host_lock, flags);
> +
> return ret;
> }
> EXPORT_SYMBOL_GPL(ufshcd_hba_enable);
> @@ -7693,7 +7703,11 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
> static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
> {
> int err;
> + unsigned long flags;
>
> + spin_lock_irqsave(hba->host->host_lock, flags);
> + hba->ufshcd_reg_state = UFSHCD_REG_RESET;
> + spin_unlock_irqrestore(hba->host->host_lock, flags);
> /*
> * Stop the host controller and complete the requests
> * cleared by h/w
> diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
> index cac0cdb9a916..e5af4c2114ce 100644
> --- a/include/ufs/ufshcd.h
> +++ b/include/ufs/ufshcd.h
> @@ -523,6 +523,18 @@ enum ufshcd_state {
> UFSHCD_STATE_ERROR,
> };
>
> +/**
> + * enum ufshcd_state - UFS host controller state
> + * @UFSHCD_REG_OPERATIONAL: UFS host controller is enabled, the host
> controller
> + * register can be accessed.
> + * @UFSHCD_REG_RESET: UFS host controller registers will be reset,
> can't access
> + * any ufs host controller register.
> + */
> +enum ufshcd_reg_state {
> + UFSHCD_REG_OPERATIONAL,
> + UFSHCD_REG_RESET,
> +};
> +
> enum ufshcd_quirks {
> /* Interrupt aggregation support is broken */
> UFSHCD_QUIRK_BROKEN_INTR_AGGR = 1 << 0,
> @@ -1020,6 +1032,7 @@ struct ufs_hba {
> struct completion *uic_async_done;
>
> enum ufshcd_state ufshcd_state;
> + enum ufshcd_reg_state ufshcd_reg_state;
> u32 eh_flags;
> u32 intr_mask;
> u16 ee_ctrl_mask;
On 2024/8/14 14:39, Peter Wang (王信友) wrote: > On Tue, 2024-08-13 at 21:47 +0800, ZhangHui wrote: >> From: zhanghui <zhanghui31@xiaomi.com> >> >> If the SSU CMD completion flow in UFS resume and the CMD timeout flow >> occur >> simultaneously, the timestamp attribute command will be sent to the >> device >> > Hi Zhanghui, > > If the timeout command is SSU? > In resume flow, if SSU command timeout, ufshcd_eh_host_reset_handler > invoke ufshcd_link_recovery only, not schedule eh work? > > > Thanks. > Peter hi Peter G, ufshcd_link_recovery calls ufshcd_host_reset_and_restore to power cycle and reset the host controller. Thanks Zhanghui
On 8/13/24 6:47 AM, ZhangHui wrote: > diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c > index 5e3c67e96956..e5e3e0277d43 100644 > --- a/drivers/ufs/core/ufshcd.c > +++ b/drivers/ufs/core/ufshcd.c > @@ -3291,6 +3291,8 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, > struct ufshcd_lrb *lrbp = &hba->lrb[tag]; > int err; > > + if (hba->ufshcd_reg_state == UFSHCD_REG_RESET) > + return -EBUSY; > /* Protects use of hba->reserved_slot. */ > lockdep_assert_held(&hba->dev_cmd.lock); Does this change make ufshcd_exec_dev_cmd() unpredictable - it succeeds if the controller is in the normal state and fails if error recovery is ongoing? If so, which code paths does this affect and/or break? Additionally, I think the above check is racy. hba->ufshcd_reg_state may change after the above code checked it and before ufshcd_exec_dev_cmd() has finished. Wouldn't it be better to make code that shouldn't be executed while the error handler is ongoing wait until error handling has finished? Thanks, Bart.
On 2024/8/14 9:41, Bart Van Assche wrote: > On 8/13/24 6:47 AM, ZhangHui wrote: >> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c >> index 5e3c67e96956..e5e3e0277d43 100644 >> --- a/drivers/ufs/core/ufshcd.c >> +++ b/drivers/ufs/core/ufshcd.c >> @@ -3291,6 +3291,8 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba >> *hba, >> struct ufshcd_lrb *lrbp = &hba->lrb[tag]; >> int err; >> >> + if (hba->ufshcd_reg_state == UFSHCD_REG_RESET) >> + return -EBUSY; >> /* Protects use of hba->reserved_slot. */ >> lockdep_assert_held(&hba->dev_cmd.lock); > > Does this change make ufshcd_exec_dev_cmd() unpredictable - it succeeds > if the controller is in the normal state and fails if error recovery > is ongoing? If so, which code paths does this affect and/or break? > > Additionally, I think the above check is racy. hba->ufshcd_reg_state may > change after the above code checked it and before ufshcd_exec_dev_cmd() > has finished. Wouldn't it be better to make code that shouldn't be > executed while the error handler is ongoing wait until error handling > has finished? > > Thanks, > > Bart. > hi Bart, 1. If the host needs to send a dev command, the HBA must be enabled. We have set the ufshcd_reg_state to operational in the ufshcd_hba_enable, so it is not unpredictable. 2. That's a good question, but I think it makes sense to block dev cmd while ufs is doing a reset. Thanks Zhanghui
© 2016 - 2026 Red Hat, Inc.