FENCING is a new controller state that a LIVE controller enter when an
error is encountered. While in FENCING state inflight IOs that timeout
are not canceled because they should be held until either CCR succeeds
or time-based recovery completes. While the queues remain alive requests
are not allowed to be sent in this state and the controller can not be
reset of deleted. This is intentional because resetting or deleting the
controller results in canceling inflight IOs.
FENCED is a short-term state the controller enters before it is reset.
It exists only to prevent manual resets to happen while controller is
in FENCING state.
Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
---
drivers/nvme/host/core.c | 25 +++++++++++++++++++++++--
drivers/nvme/host/nvme.h | 4 ++++
drivers/nvme/host/sysfs.c | 2 ++
3 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 8961d612ccb0..3e1e02822dd4 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -574,10 +574,29 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
break;
}
break;
+ case NVME_CTRL_FENCING:
+ switch (old_state) {
+ case NVME_CTRL_LIVE:
+ changed = true;
+ fallthrough;
+ default:
+ break;
+ }
+ break;
+ case NVME_CTRL_FENCED:
+ switch (old_state) {
+ case NVME_CTRL_FENCING:
+ changed = true;
+ fallthrough;
+ default:
+ break;
+ }
+ break;
case NVME_CTRL_RESETTING:
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
+ case NVME_CTRL_FENCED:
changed = true;
fallthrough;
default:
@@ -760,6 +779,7 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
if (state != NVME_CTRL_DELETING_NOIO &&
state != NVME_CTRL_DELETING &&
+ state != NVME_CTRL_FENCING &&
state != NVME_CTRL_DEAD &&
!test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
@@ -802,10 +822,11 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
req->cmd->fabrics.fctype == nvme_fabrics_type_auth_receive))
return true;
break;
- default:
- break;
+ case NVME_CTRL_FENCING:
case NVME_CTRL_DEAD:
return false;
+ default:
+ break;
}
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9dd9f179ad88..00866bbc66f3 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -251,6 +251,8 @@ static inline u16 nvme_req_qid(struct request *req)
enum nvme_ctrl_state {
NVME_CTRL_NEW,
NVME_CTRL_LIVE,
+ NVME_CTRL_FENCING,
+ NVME_CTRL_FENCED,
NVME_CTRL_RESETTING,
NVME_CTRL_CONNECTING,
NVME_CTRL_DELETING,
@@ -777,6 +779,8 @@ static inline bool nvme_state_terminal(struct nvme_ctrl *ctrl)
switch (nvme_ctrl_state(ctrl)) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
+ case NVME_CTRL_FENCING:
+ case NVME_CTRL_FENCED:
case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING:
return false;
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index f81bbb6ec768..4ec9dfeb736e 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -443,6 +443,8 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
static const char *const state_name[] = {
[NVME_CTRL_NEW] = "new",
[NVME_CTRL_LIVE] = "live",
+ [NVME_CTRL_FENCING] = "fencing",
+ [NVME_CTRL_FENCED] = "fenced",
[NVME_CTRL_RESETTING] = "resetting",
[NVME_CTRL_CONNECTING] = "connecting",
[NVME_CTRL_DELETING] = "deleting",
--
2.52.0
On 1/30/26 23:34, Mohamed Khalfella wrote:
> FENCING is a new controller state that a LIVE controller enter when an
> error is encountered. While in FENCING state inflight IOs that timeout
> are not canceled because they should be held until either CCR succeeds
> or time-based recovery completes. While the queues remain alive requests
> are not allowed to be sent in this state and the controller can not be
> reset of deleted. This is intentional because resetting or deleting the
> controller results in canceling inflight IOs.
>
> FENCED is a short-term state the controller enters before it is reset.
> It exists only to prevent manual resets to happen while controller is
> in FENCING state.
>
> Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
> ---
> drivers/nvme/host/core.c | 25 +++++++++++++++++++++++--
> drivers/nvme/host/nvme.h | 4 ++++
> drivers/nvme/host/sysfs.c | 2 ++
> 3 files changed, 29 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 8961d612ccb0..3e1e02822dd4 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -574,10 +574,29 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
> break;
> }
> break;
> + case NVME_CTRL_FENCING:
> + switch (old_state) {
> + case NVME_CTRL_LIVE:
> + changed = true;
> + fallthrough;
> + default:
> + break;
> + }
> + break;
> + case NVME_CTRL_FENCED:> + switch (old_state) {
> + case NVME_CTRL_FENCING:
> + changed = true;
> + fallthrough;
> + default:
> + break;
> + }
> + break;
> case NVME_CTRL_RESETTING:
> switch (old_state) {
> case NVME_CTRL_NEW:
> case NVME_CTRL_LIVE:
> + case NVME_CTRL_FENCED:
> changed = true;
> fallthrough;
> default:
> @@ -760,6 +779,7 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
>
> if (state != NVME_CTRL_DELETING_NOIO &&
> state != NVME_CTRL_DELETING &&
> + state != NVME_CTRL_FENCING &&
Shouldn't 'FENCED' be in here, too?
> state != NVME_CTRL_DEAD &&
> !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
> !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
> @@ -802,10 +822,11 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
> req->cmd->fabrics.fctype == nvme_fabrics_type_auth_receive))
> return true;
> break;
> - default:
> - break;
> + case NVME_CTRL_FENCING:
Similar here.
> case NVME_CTRL_DEAD:
> return false;
> + default:
> + break;
> }
> }
>
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 9dd9f179ad88..00866bbc66f3 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -251,6 +251,8 @@ static inline u16 nvme_req_qid(struct request *req)
> enum nvme_ctrl_state {
> NVME_CTRL_NEW,
> NVME_CTRL_LIVE,
> + NVME_CTRL_FENCING,
> + NVME_CTRL_FENCED,
> NVME_CTRL_RESETTING,
> NVME_CTRL_CONNECTING,
> NVME_CTRL_DELETING,
> @@ -777,6 +779,8 @@ static inline bool nvme_state_terminal(struct nvme_ctrl *ctrl)
> switch (nvme_ctrl_state(ctrl)) {
> case NVME_CTRL_NEW:
> case NVME_CTRL_LIVE:
> + case NVME_CTRL_FENCING:
> + case NVME_CTRL_FENCED:
> case NVME_CTRL_RESETTING:
> case NVME_CTRL_CONNECTING:
> return false;
> diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
> index f81bbb6ec768..4ec9dfeb736e 100644
> --- a/drivers/nvme/host/sysfs.c
> +++ b/drivers/nvme/host/sysfs.c
> @@ -443,6 +443,8 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
> static const char *const state_name[] = {
> [NVME_CTRL_NEW] = "new",
> [NVME_CTRL_LIVE] = "live",
> + [NVME_CTRL_FENCING] = "fencing",
> + [NVME_CTRL_FENCED] = "fenced",
> [NVME_CTRL_RESETTING] = "resetting",
> [NVME_CTRL_CONNECTING] = "connecting",
> [NVME_CTRL_DELETING] = "deleting",
You need to modify nvme-tcp.c:nvme_tcp_timeout() too, as this checks
'just' for 'LIVE' state and will abort/terminate commands when in
FENCING. Similar argument for nvme-rdma.c. And nvme-fc.c also needs an
audit to ensure it works correctly.
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
On Tue 2026-02-03 06:07:35 +0100, Hannes Reinecke wrote:
> On 1/30/26 23:34, Mohamed Khalfella wrote:
> > FENCING is a new controller state that a LIVE controller enter when an
> > error is encountered. While in FENCING state inflight IOs that timeout
> > are not canceled because they should be held until either CCR succeeds
> > or time-based recovery completes. While the queues remain alive requests
> > are not allowed to be sent in this state and the controller can not be
> > reset of deleted. This is intentional because resetting or deleting the
> > controller results in canceling inflight IOs.
> >
> > FENCED is a short-term state the controller enters before it is reset.
> > It exists only to prevent manual resets to happen while controller is
> > in FENCING state.
> >
> > Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
> > ---
> > drivers/nvme/host/core.c | 25 +++++++++++++++++++++++--
> > drivers/nvme/host/nvme.h | 4 ++++
> > drivers/nvme/host/sysfs.c | 2 ++
> > 3 files changed, 29 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > index 8961d612ccb0..3e1e02822dd4 100644
> > --- a/drivers/nvme/host/core.c
> > +++ b/drivers/nvme/host/core.c
> > @@ -574,10 +574,29 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
> > break;
> > }
> > break;
> > + case NVME_CTRL_FENCING:
> > + switch (old_state) {
> > + case NVME_CTRL_LIVE:
> > + changed = true;
> > + fallthrough;
> > + default:
> > + break;
> > + }
> > + break;
> > + case NVME_CTRL_FENCED:> + switch (old_state) {
> > + case NVME_CTRL_FENCING:
> > + changed = true;
> > + fallthrough;
> > + default:
> > + break;
> > + }
> > + break;
> > case NVME_CTRL_RESETTING:
> > switch (old_state) {
> > case NVME_CTRL_NEW:
> > case NVME_CTRL_LIVE:
> > + case NVME_CTRL_FENCED:
> > changed = true;
> > fallthrough;
> > default:
> > @@ -760,6 +779,7 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
> >
> > if (state != NVME_CTRL_DELETING_NOIO &&
> > state != NVME_CTRL_DELETING &&
> > + state != NVME_CTRL_FENCING &&
>
> Shouldn't 'FENCED' be in here, too?
Agreed. Will add FENCED to the two places.
>
> > state != NVME_CTRL_DEAD &&
> > !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
> > !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
> > @@ -802,10 +822,11 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
> > req->cmd->fabrics.fctype == nvme_fabrics_type_auth_receive))
> > return true;
> > break;
> > - default:
> > - break;
> > + case NVME_CTRL_FENCING:
>
> Similar here.
>
> > case NVME_CTRL_DEAD:
> > return false;
> > + default:
> > + break;
> > }
> > }
> >
> > diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> > index 9dd9f179ad88..00866bbc66f3 100644
> > --- a/drivers/nvme/host/nvme.h
> > +++ b/drivers/nvme/host/nvme.h
> > @@ -251,6 +251,8 @@ static inline u16 nvme_req_qid(struct request *req)
> > enum nvme_ctrl_state {
> > NVME_CTRL_NEW,
> > NVME_CTRL_LIVE,
> > + NVME_CTRL_FENCING,
> > + NVME_CTRL_FENCED,
> > NVME_CTRL_RESETTING,
> > NVME_CTRL_CONNECTING,
> > NVME_CTRL_DELETING,
> > @@ -777,6 +779,8 @@ static inline bool nvme_state_terminal(struct nvme_ctrl *ctrl)
> > switch (nvme_ctrl_state(ctrl)) {
> > case NVME_CTRL_NEW:
> > case NVME_CTRL_LIVE:
> > + case NVME_CTRL_FENCING:
> > + case NVME_CTRL_FENCED:
> > case NVME_CTRL_RESETTING:
> > case NVME_CTRL_CONNECTING:
> > return false;
> > diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
> > index f81bbb6ec768..4ec9dfeb736e 100644
> > --- a/drivers/nvme/host/sysfs.c
> > +++ b/drivers/nvme/host/sysfs.c
> > @@ -443,6 +443,8 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
> > static const char *const state_name[] = {
> > [NVME_CTRL_NEW] = "new",
> > [NVME_CTRL_LIVE] = "live",
> > + [NVME_CTRL_FENCING] = "fencing",
> > + [NVME_CTRL_FENCED] = "fenced",
> > [NVME_CTRL_RESETTING] = "resetting",
> > [NVME_CTRL_CONNECTING] = "connecting",
> > [NVME_CTRL_DELETING] = "deleting",
>
> You need to modify nvme-tcp.c:nvme_tcp_timeout() too, as this checks
> 'just' for 'LIVE' state and will abort/terminate commands when in
> FENCING. Similar argument for nvme-rdma.c. And nvme-fc.c also needs an
> audit to ensure it works correctly.
Exactly. The changes to nvme-tcp, nvme-rdma, and nvme-fc are in
transport specific patches. For tcp and rdma the timeout callback
handler has been modified to do what you mentioned.
For nvme-fc nvme_fc_start_ioerr_recovery() does nothing if the
controller is in FENCING state.
© 2016 - 2026 Red Hat, Inc.