[v3] TP8028 Rapid Path Failure Recovery

[PATCH v3 13/21] nvme-fc: Use CCR to recover controller that hits an error

Posted by Mohamed Khalfella 1 month, 2 weeks ago

An alive nvme controller that hits an error now will move to FENCING
state instead of RESETTING state. ctrl->fencing_work attempts CCR to
terminate inflight IOs. Regardless of the success or failure of CCR
operation the controller is transitioned to RESETTING state to continue
error recovery process.

Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
---
 drivers/nvme/host/fc.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index e6ffaa19aba4..6ebabfb7e76d 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -166,6 +166,7 @@ struct nvme_fc_ctrl {
 	struct blk_mq_tag_set	admin_tag_set;
 	struct blk_mq_tag_set	tag_set;
 
+	struct work_struct	fencing_work;
 	struct work_struct	ioerr_work;
 	struct delayed_work	connect_work;
 
@@ -1868,6 +1869,24 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
 	}
 }
 
+static void nvme_fc_fencing_work(struct work_struct *work)
+{
+	struct nvme_fc_ctrl *fc_ctrl =
+			container_of(work, struct nvme_fc_ctrl, fencing_work);
+	struct nvme_ctrl *ctrl = &fc_ctrl->ctrl;
+	unsigned long rem;
+
+	rem = nvme_fence_ctrl(ctrl);
+	if (rem) {
+		dev_info(ctrl->device,
+			 "CCR failed, skipping time-based recovery\n");
+	}
+
+	nvme_change_ctrl_state(ctrl, NVME_CTRL_FENCED);
+	if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+		queue_work(nvme_reset_wq, &fc_ctrl->ioerr_work);
+}
+
 static void
 nvme_fc_ctrl_ioerr_work(struct work_struct *work)
 {
@@ -1889,6 +1908,7 @@ nvme_fc_ctrl_ioerr_work(struct work_struct *work)
 		return;
 	}
 
+	flush_work(&ctrl->fencing_work);
 	nvme_fc_error_recovery(ctrl);
 }
 
@@ -1915,6 +1935,14 @@ static void nvme_fc_start_ioerr_recovery(struct nvme_fc_ctrl *ctrl,
 {
 	enum nvme_ctrl_state state;
 
+	if (nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_FENCING)) {
+		dev_warn(ctrl->ctrl.device,
+			 "NVME-FC{%d}: starting controller fencing %s\n",
+			 ctrl->cnum, errmsg);
+		queue_work(nvme_wq, &ctrl->fencing_work);
+		return;
+	}
+
 	if (nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) {
 		dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: starting error recovery %s\n",
 			 ctrl->cnum, errmsg);
@@ -3322,6 +3350,7 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
 	struct nvme_fc_ctrl *ctrl =
 		container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
 
+	flush_work(&ctrl->fencing_work);
 	nvme_stop_ctrl(&ctrl->ctrl);
 
 	/* will block will waiting for io to terminate */
@@ -3497,6 +3526,7 @@ nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 	INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
 	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
+	INIT_WORK(&ctrl->fencing_work, nvme_fc_fencing_work);
 	INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work);
 	spin_lock_init(&ctrl->lock);
 
-- 
2.52.0

Re: [PATCH v3 13/21] nvme-fc: Use CCR to recover controller that hits an error

Posted by James Smart 1 month ago

On 2/13/2026 8:25 PM, Mohamed Khalfella wrote:
> An alive nvme controller that hits an error now will move to FENCING
> state instead of RESETTING state. ctrl->fencing_work attempts CCR to
> terminate inflight IOs. Regardless of the success or failure of CCR
> operation the controller is transitioned to RESETTING state to continue
> error recovery process.
> 
> Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
> ---
>   drivers/nvme/host/fc.c | 30 ++++++++++++++++++++++++++++++
>   1 file changed, 30 insertions(+)
> 
> diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
> index e6ffaa19aba4..6ebabfb7e76d 100644
> --- a/drivers/nvme/host/fc.c
> +++ b/drivers/nvme/host/fc.c
> @@ -166,6 +166,7 @@ struct nvme_fc_ctrl {
>   	struct blk_mq_tag_set	admin_tag_set;
>   	struct blk_mq_tag_set	tag_set;
>   
> +	struct work_struct	fencing_work;
>   	struct work_struct	ioerr_work;
>   	struct delayed_work	connect_work;
>   
> @@ -1868,6 +1869,24 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
>   	}
>   }
>   
> +static void nvme_fc_fencing_work(struct work_struct *work)
> +{
> +	struct nvme_fc_ctrl *fc_ctrl =
> +			container_of(work, struct nvme_fc_ctrl, fencing_work);
> +	struct nvme_ctrl *ctrl = &fc_ctrl->ctrl;
> +	unsigned long rem;
> +
> +	rem = nvme_fence_ctrl(ctrl);
> +	if (rem) {
> +		dev_info(ctrl->device,
> +			 "CCR failed, skipping time-based recovery\n");
> +	}
> +
> +	nvme_change_ctrl_state(ctrl, NVME_CTRL_FENCED);
> +	if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
> +		queue_work(nvme_reset_wq, &fc_ctrl->ioerr_work);

catch the rework of prior patch

> +}
> +
>   static void
>   nvme_fc_ctrl_ioerr_work(struct work_struct *work)
>   {
> @@ -1889,6 +1908,7 @@ nvme_fc_ctrl_ioerr_work(struct work_struct *work)
>   		return;
>   	}
>   
> +	flush_work(&ctrl->fencing_work);
>   	nvme_fc_error_recovery(ctrl);
>   }
>   
> @@ -1915,6 +1935,14 @@ static void nvme_fc_start_ioerr_recovery(struct nvme_fc_ctrl *ctrl,
>   {
>   	enum nvme_ctrl_state state;
>   
 From prior patch - the CONNECTING logic should be here....

> +	if (nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_FENCING)) {
> +		dev_warn(ctrl->ctrl.device,
> +			 "NVME-FC{%d}: starting controller fencing %s\n",
> +			 ctrl->cnum, errmsg);
> +		queue_work(nvme_wq, &ctrl->fencing_work);
> +		return;
> +	}
> +
>   	if (nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) {
>   		dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: starting error recovery %s\n",
>   			 ctrl->cnum, errmsg);
> @@ -3322,6 +3350,7 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
>   	struct nvme_fc_ctrl *ctrl =
>   		container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
>   
> +	flush_work(&ctrl->fencing_work);
>   	nvme_stop_ctrl(&ctrl->ctrl);
>   
>   	/* will block will waiting for io to terminate */
> @@ -3497,6 +3526,7 @@ nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
>   
>   	INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
>   	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
> +	INIT_WORK(&ctrl->fencing_work, nvme_fc_fencing_work);
>   	INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work);
>   	spin_lock_init(&ctrl->lock);
>   

there is a little to be in sync with my comment on the prior patch, but 
otherwise what is here is fine.

What bothers me in this process is - there are certainly conditions 
where there is not connectivity loss  where FC can send things such as 
the ABTS or a Disconnect LS that can inform the controller to start 
terminating. Its odd that we skip this step and go directly to the CCR 
reset to terminate the controller.  We should have been able to continue 
to send the things that start to directly tear down the controller which 
can be happening in parallel with the CCR.

-- james

Re: [PATCH v3 13/21] nvme-fc: Use CCR to recover controller that hits an error

Posted by Mohamed Khalfella 1 week ago

On Fri 2026-02-27 17:03:55 -0800, James Smart wrote:
> On 2/13/2026 8:25 PM, Mohamed Khalfella wrote:
> > An alive nvme controller that hits an error now will move to FENCING
> > state instead of RESETTING state. ctrl->fencing_work attempts CCR to
> > terminate inflight IOs. Regardless of the success or failure of CCR
> > operation the controller is transitioned to RESETTING state to continue
> > error recovery process.
> > 
> > Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
> > ---
> >   drivers/nvme/host/fc.c | 30 ++++++++++++++++++++++++++++++
> >   1 file changed, 30 insertions(+)
> > 
> > diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
> > index e6ffaa19aba4..6ebabfb7e76d 100644
> > --- a/drivers/nvme/host/fc.c
> > +++ b/drivers/nvme/host/fc.c
> > @@ -166,6 +166,7 @@ struct nvme_fc_ctrl {
> >   	struct blk_mq_tag_set	admin_tag_set;
> >   	struct blk_mq_tag_set	tag_set;
> >   
> > +	struct work_struct	fencing_work;
> >   	struct work_struct	ioerr_work;
> >   	struct delayed_work	connect_work;
> >   
> > @@ -1868,6 +1869,24 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
> >   	}
> >   }
> >   
> > +static void nvme_fc_fencing_work(struct work_struct *work)
> > +{
> > +	struct nvme_fc_ctrl *fc_ctrl =
> > +			container_of(work, struct nvme_fc_ctrl, fencing_work);
> > +	struct nvme_ctrl *ctrl = &fc_ctrl->ctrl;
> > +	unsigned long rem;
> > +
> > +	rem = nvme_fence_ctrl(ctrl);
> > +	if (rem) {
> > +		dev_info(ctrl->device,
> > +			 "CCR failed, skipping time-based recovery\n");
> > +	}
> > +
> > +	nvme_change_ctrl_state(ctrl, NVME_CTRL_FENCED);
> > +	if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
> > +		queue_work(nvme_reset_wq, &fc_ctrl->ioerr_work);
> 
> catch the rework of prior patch

I ended up not dropping ctrl->ioerr_work. There are situations we need
error recovery work to run on a separate thread.

> 
> > +}
> > +
> >   static void
> >   nvme_fc_ctrl_ioerr_work(struct work_struct *work)
> >   {
> > @@ -1889,6 +1908,7 @@ nvme_fc_ctrl_ioerr_work(struct work_struct *work)
> >   		return;
> >   	}
> >   
> > +	flush_work(&ctrl->fencing_work);
> >   	nvme_fc_error_recovery(ctrl);
> >   }
> >   
> > @@ -1915,6 +1935,14 @@ static void nvme_fc_start_ioerr_recovery(struct nvme_fc_ctrl *ctrl,
> >   {
> >   	enum nvme_ctrl_state state;
> >   
>  From prior patch - the CONNECTING logic should be here....

Yes, it is here. The check for CONNECTING state is at the top of
nvme_fc_start_ioerr_recovery(). 

> 
> > +	if (nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_FENCING)) {
> > +		dev_warn(ctrl->ctrl.device,
> > +			 "NVME-FC{%d}: starting controller fencing %s\n",
> > +			 ctrl->cnum, errmsg);
> > +		queue_work(nvme_wq, &ctrl->fencing_work);
> > +		return;
> > +	}
> > +
> >   	if (nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) {
> >   		dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: starting error recovery %s\n",
> >   			 ctrl->cnum, errmsg);
> > @@ -3322,6 +3350,7 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
> >   	struct nvme_fc_ctrl *ctrl =
> >   		container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
> >   
> > +	flush_work(&ctrl->fencing_work);
> >   	nvme_stop_ctrl(&ctrl->ctrl);
> >   
> >   	/* will block will waiting for io to terminate */
> > @@ -3497,6 +3526,7 @@ nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
> >   
> >   	INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
> >   	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
> > +	INIT_WORK(&ctrl->fencing_work, nvme_fc_fencing_work);
> >   	INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work);
> >   	spin_lock_init(&ctrl->lock);
> >   
> 
> there is a little to be in sync with my comment on the prior patch, but 
> otherwise what is here is fine.
> 
> What bothers me in this process is - there are certainly conditions 
> where there is not connectivity loss  where FC can send things such as 
> the ABTS or a Disconnect LS that can inform the controller to start 
> terminating. Its odd that we skip this step and go directly to the CCR 
> reset to terminate the controller.  We should have been able to continue 
> to send the things that start to directly tear down the controller which 
> can be happening in parallel with the CCR.

Depending on how the target is implemented ABTS or Disconnect LS do not
guarantee inflight IOs are terminated. CCR main point is terminate
inflight IOs making it safe to retry failed IOs.