When the ctrl is not in LIVE state, a hardware queue can be in the
INACTIVE state due to CPU hotplug offlining operations. In this case,
the driver will freeze and quiesce the request queue and doesn't expect
new request entering via queue_rq. Though a request will fail eventually,
though shortcut it and fail it earlier.
Check if a request is targeted for an inactive hardware queue and use
nvme_failover_req and hand it back to the block layer.
Signed-off-by: Daniel Wagner <wagi@kernel.org>
---
drivers/nvme/host/core.c | 55 ++++++++++++++++++++++++++++++++++++++++++-
drivers/nvme/host/multipath.c | 43 ---------------------------------
drivers/nvme/host/nvme.h | 1 -
3 files changed, 54 insertions(+), 45 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f5ebcaa2f859..e84df1a2d321 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -454,6 +454,51 @@ void nvme_end_req(struct request *req)
blk_mq_end_request(req, status);
}
+static void nvme_failover_req(struct request *req)
+{
+ struct nvme_ns *ns = req->q->queuedata;
+ u16 status = nvme_req(req)->status & NVME_SCT_SC_MASK;
+ unsigned long flags;
+ struct bio *bio;
+
+ if (nvme_ns_head_multipath(ns->head))
+ nvme_mpath_clear_current_path(ns);
+
+ /*
+ * If we got back an ANA error, we know the controller is alive but not
+ * ready to serve this namespace. Kick of a re-read of the ANA
+ * information page, and just try any other available path for now.
+ */
+ if (nvme_is_ana_error(status) && ns->ctrl->ana_log_buf) {
+ set_bit(NVME_NS_ANA_PENDING, &ns->flags);
+ queue_work(nvme_wq, &ns->ctrl->ana_work);
+ }
+
+ spin_lock_irqsave(&ns->head->requeue_lock, flags);
+ for (bio = req->bio; bio; bio = bio->bi_next) {
+ if (nvme_ns_head_multipath(ns->head))
+ bio_set_dev(bio, ns->head->disk->part0);
+ if (bio->bi_opf & REQ_POLLED) {
+ bio->bi_opf &= ~REQ_POLLED;
+ bio->bi_cookie = BLK_QC_T_NONE;
+ }
+ /*
+ * The alternate request queue that we may end up submitting
+ * the bio to may be frozen temporarily, in this case REQ_NOWAIT
+ * will fail the I/O immediately with EAGAIN to the issuer.
+ * We are not in the issuer context which cannot block. Clear
+ * the flag to avoid spurious EAGAIN I/O failures.
+ */
+ bio->bi_opf &= ~REQ_NOWAIT;
+ }
+ blk_steal_bios(&ns->head->requeue_list, req);
+ spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
+
+ nvme_req(req)->status = 0;
+ nvme_end_req(req);
+ kblockd_schedule_work(&ns->head->requeue_work);
+}
+
void nvme_complete_rq(struct request *req)
{
struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
@@ -762,8 +807,13 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
state != NVME_CTRL_DELETING &&
state != NVME_CTRL_DEAD &&
!test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
- !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
+ !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) {
+ if (test_bit(BLK_MQ_S_INACTIVE, &rq->mq_hctx->state)) {
+ nvme_failover_req(rq);
+ return BLK_STS_OK;
+ }
return BLK_STS_RESOURCE;
+ }
if (!(rq->rq_flags & RQF_DONTPREP))
nvme_clear_nvme_request(rq);
@@ -809,6 +859,9 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
}
}
+ if (test_bit(BLK_MQ_S_INACTIVE, &rq->mq_hctx->state))
+ return false;
+
return queue_live;
}
EXPORT_SYMBOL_GPL(__nvme_check_ready);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 174027d1cc19..cce3a23f6de5 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -134,49 +134,6 @@ void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
blk_freeze_queue_start(h->disk->queue);
}
-void nvme_failover_req(struct request *req)
-{
- struct nvme_ns *ns = req->q->queuedata;
- u16 status = nvme_req(req)->status & NVME_SCT_SC_MASK;
- unsigned long flags;
- struct bio *bio;
-
- nvme_mpath_clear_current_path(ns);
-
- /*
- * If we got back an ANA error, we know the controller is alive but not
- * ready to serve this namespace. Kick of a re-read of the ANA
- * information page, and just try any other available path for now.
- */
- if (nvme_is_ana_error(status) && ns->ctrl->ana_log_buf) {
- set_bit(NVME_NS_ANA_PENDING, &ns->flags);
- queue_work(nvme_wq, &ns->ctrl->ana_work);
- }
-
- spin_lock_irqsave(&ns->head->requeue_lock, flags);
- for (bio = req->bio; bio; bio = bio->bi_next) {
- bio_set_dev(bio, ns->head->disk->part0);
- if (bio->bi_opf & REQ_POLLED) {
- bio->bi_opf &= ~REQ_POLLED;
- bio->bi_cookie = BLK_QC_T_NONE;
- }
- /*
- * The alternate request queue that we may end up submitting
- * the bio to may be frozen temporarily, in this case REQ_NOWAIT
- * will fail the I/O immediately with EAGAIN to the issuer.
- * We are not in the issuer context which cannot block. Clear
- * the flag to avoid spurious EAGAIN I/O failures.
- */
- bio->bi_opf &= ~REQ_NOWAIT;
- }
- blk_steal_bios(&ns->head->requeue_list, req);
- spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
-
- nvme_req(req)->status = 0;
- nvme_end_req(req);
- kblockd_schedule_work(&ns->head->requeue_work);
-}
-
void nvme_mpath_start_request(struct request *rq)
{
struct nvme_ns *ns = rq->q->queuedata;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9a5f28c5103c..dbd063413da9 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -967,7 +967,6 @@ void nvme_mpath_unfreeze(struct nvme_subsystem *subsys);
void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys);
-void nvme_failover_req(struct request *req);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
void nvme_mpath_add_sysfs_link(struct nvme_ns_head *ns);
--
2.53.0
Hi Daniel,
kernel test robot noticed the following build errors:
[auto build test ERROR on 6de23f81a5e08be8fbf5e8d7e9febc72a5b5f27f]
url: https://github.com/intel-lab-lkp/linux/commits/Daniel-Wagner/nvme-failover-requests-for-inactive-hctx/20260226-224213
base: 6de23f81a5e08be8fbf5e8d7e9febc72a5b5f27f
patch link: https://lore.kernel.org/r/20260226-revert-cpu-read-lock-v1-1-eb005072566e%40kernel.org
patch subject: [PATCH 1/3] nvme: failover requests for inactive hctx
config: x86_64-randconfig-r071-20260227 (https://download.01.org/0day-ci/archive/20260227/202602270720.cugNS3m1-lkp@intel.com/config)
compiler: gcc-14 (Debian 14.2.0-19) 14.2.0
smatch version: v0.5.0-8994-gd50c5a4c
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260227/202602270720.cugNS3m1-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202602270720.cugNS3m1-lkp@intel.com/
All errors (new ones prefixed by >>):
>> drivers/nvme/host/core.c:457:13: error: redefinition of 'nvme_failover_req'
457 | static void nvme_failover_req(struct request *req)
| ^~~~~~~~~~~~~~~~~
In file included from drivers/nvme/host/core.c:27:
drivers/nvme/host/nvme.h:1020:20: note: previous definition of 'nvme_failover_req' with type 'void(struct request *)'
1020 | static inline void nvme_failover_req(struct request *req)
| ^~~~~~~~~~~~~~~~~
drivers/nvme/host/core.c: In function 'nvme_failover_req':
>> drivers/nvme/host/core.c:472:50: error: 'struct nvme_ctrl' has no member named 'ana_log_buf'
472 | if (nvme_is_ana_error(status) && ns->ctrl->ana_log_buf) {
| ^~
>> drivers/nvme/host/core.c:474:48: error: 'struct nvme_ctrl' has no member named 'ana_work'; did you mean 'ka_work'?
474 | queue_work(nvme_wq, &ns->ctrl->ana_work);
| ^~~~~~~~
| ka_work
In file included from include/linux/sched.h:37,
from include/linux/ratelimit.h:6,
from include/linux/dev_printk.h:16,
from include/linux/device.h:15,
from include/linux/async.h:14,
from drivers/nvme/host/core.c:7:
>> drivers/nvme/host/core.c:477:36: error: 'struct nvme_ns_head' has no member named 'requeue_lock'
477 | spin_lock_irqsave(&ns->head->requeue_lock, flags);
| ^~
include/linux/spinlock.h:244:48: note: in definition of macro 'raw_spin_lock_irqsave'
244 | flags = _raw_spin_lock_irqsave(lock); \
| ^~~~
drivers/nvme/host/core.c:477:9: note: in expansion of macro 'spin_lock_irqsave'
477 | spin_lock_irqsave(&ns->head->requeue_lock, flags);
| ^~~~~~~~~~~~~~~~~
>> drivers/nvme/host/core.c:494:33: error: 'struct nvme_ns_head' has no member named 'requeue_list'
494 | blk_steal_bios(&ns->head->requeue_list, req);
| ^~
drivers/nvme/host/core.c:495:41: error: 'struct nvme_ns_head' has no member named 'requeue_lock'
495 | spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
| ^~
>> drivers/nvme/host/core.c:499:40: error: 'struct nvme_ns_head' has no member named 'requeue_work'
499 | kblockd_schedule_work(&ns->head->requeue_work);
| ^~
vim +/nvme_failover_req +457 drivers/nvme/host/core.c
456
> 457 static void nvme_failover_req(struct request *req)
458 {
459 struct nvme_ns *ns = req->q->queuedata;
460 u16 status = nvme_req(req)->status & NVME_SCT_SC_MASK;
461 unsigned long flags;
462 struct bio *bio;
463
464 if (nvme_ns_head_multipath(ns->head))
465 nvme_mpath_clear_current_path(ns);
466
467 /*
468 * If we got back an ANA error, we know the controller is alive but not
469 * ready to serve this namespace. Kick of a re-read of the ANA
470 * information page, and just try any other available path for now.
471 */
> 472 if (nvme_is_ana_error(status) && ns->ctrl->ana_log_buf) {
473 set_bit(NVME_NS_ANA_PENDING, &ns->flags);
> 474 queue_work(nvme_wq, &ns->ctrl->ana_work);
475 }
476
> 477 spin_lock_irqsave(&ns->head->requeue_lock, flags);
478 for (bio = req->bio; bio; bio = bio->bi_next) {
479 if (nvme_ns_head_multipath(ns->head))
480 bio_set_dev(bio, ns->head->disk->part0);
481 if (bio->bi_opf & REQ_POLLED) {
482 bio->bi_opf &= ~REQ_POLLED;
483 bio->bi_cookie = BLK_QC_T_NONE;
484 }
485 /*
486 * The alternate request queue that we may end up submitting
487 * the bio to may be frozen temporarily, in this case REQ_NOWAIT
488 * will fail the I/O immediately with EAGAIN to the issuer.
489 * We are not in the issuer context which cannot block. Clear
490 * the flag to avoid spurious EAGAIN I/O failures.
491 */
492 bio->bi_opf &= ~REQ_NOWAIT;
493 }
> 494 blk_steal_bios(&ns->head->requeue_list, req);
495 spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
496
497 nvme_req(req)->status = 0;
498 nvme_end_req(req);
> 499 kblockd_schedule_work(&ns->head->requeue_work);
500 }
501
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Hi Daniel,
kernel test robot noticed the following build errors:
[auto build test ERROR on 6de23f81a5e08be8fbf5e8d7e9febc72a5b5f27f]
url: https://github.com/intel-lab-lkp/linux/commits/Daniel-Wagner/nvme-failover-requests-for-inactive-hctx/20260226-224213
base: 6de23f81a5e08be8fbf5e8d7e9febc72a5b5f27f
patch link: https://lore.kernel.org/r/20260226-revert-cpu-read-lock-v1-1-eb005072566e%40kernel.org
patch subject: [PATCH 1/3] nvme: failover requests for inactive hctx
config: riscv-defconfig (https://download.01.org/0day-ci/archive/20260227/202602270348.j0MMNhUj-lkp@intel.com/config)
compiler: clang version 23.0.0git (https://github.com/llvm/llvm-project 9a109fbb6e184ec9bcce10615949f598f4c974a9)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260227/202602270348.j0MMNhUj-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202602270348.j0MMNhUj-lkp@intel.com/
All errors (new ones prefixed by >>):
drivers/nvme/host/core.c:457:13: error: redefinition of 'nvme_failover_req'
457 | static void nvme_failover_req(struct request *req)
| ^
drivers/nvme/host/nvme.h:1020:20: note: previous definition is here
1020 | static inline void nvme_failover_req(struct request *req)
| ^
>> drivers/nvme/host/core.c:472:45: error: no member named 'ana_log_buf' in 'struct nvme_ctrl'
472 | if (nvme_is_ana_error(status) && ns->ctrl->ana_log_buf) {
| ~~~~~~~~ ^
>> drivers/nvme/host/core.c:474:34: error: no member named 'ana_work' in 'struct nvme_ctrl'
474 | queue_work(nvme_wq, &ns->ctrl->ana_work);
| ~~~~~~~~ ^
>> drivers/nvme/host/core.c:477:31: error: no member named 'requeue_lock' in 'struct nvme_ns_head'
477 | spin_lock_irqsave(&ns->head->requeue_lock, flags);
| ~~~~~~~~ ^
include/linux/spinlock.h:376:39: note: expanded from macro 'spin_lock_irqsave'
376 | raw_spin_lock_irqsave(spinlock_check(lock), flags); \
| ^~~~
include/linux/spinlock.h:244:34: note: expanded from macro 'raw_spin_lock_irqsave'
244 | flags = _raw_spin_lock_irqsave(lock); \
| ^~~~
>> drivers/nvme/host/core.c:494:28: error: no member named 'requeue_list' in 'struct nvme_ns_head'
494 | blk_steal_bios(&ns->head->requeue_list, req);
| ~~~~~~~~ ^
drivers/nvme/host/core.c:495:36: error: no member named 'requeue_lock' in 'struct nvme_ns_head'
495 | spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
| ~~~~~~~~ ^
>> drivers/nvme/host/core.c:499:35: error: no member named 'requeue_work' in 'struct nvme_ns_head'
499 | kblockd_schedule_work(&ns->head->requeue_work);
| ~~~~~~~~ ^
7 errors generated.
vim +472 drivers/nvme/host/core.c
456
457 static void nvme_failover_req(struct request *req)
458 {
459 struct nvme_ns *ns = req->q->queuedata;
460 u16 status = nvme_req(req)->status & NVME_SCT_SC_MASK;
461 unsigned long flags;
462 struct bio *bio;
463
464 if (nvme_ns_head_multipath(ns->head))
465 nvme_mpath_clear_current_path(ns);
466
467 /*
468 * If we got back an ANA error, we know the controller is alive but not
469 * ready to serve this namespace. Kick of a re-read of the ANA
470 * information page, and just try any other available path for now.
471 */
> 472 if (nvme_is_ana_error(status) && ns->ctrl->ana_log_buf) {
473 set_bit(NVME_NS_ANA_PENDING, &ns->flags);
> 474 queue_work(nvme_wq, &ns->ctrl->ana_work);
475 }
476
> 477 spin_lock_irqsave(&ns->head->requeue_lock, flags);
478 for (bio = req->bio; bio; bio = bio->bi_next) {
479 if (nvme_ns_head_multipath(ns->head))
480 bio_set_dev(bio, ns->head->disk->part0);
481 if (bio->bi_opf & REQ_POLLED) {
482 bio->bi_opf &= ~REQ_POLLED;
483 bio->bi_cookie = BLK_QC_T_NONE;
484 }
485 /*
486 * The alternate request queue that we may end up submitting
487 * the bio to may be frozen temporarily, in this case REQ_NOWAIT
488 * will fail the I/O immediately with EAGAIN to the issuer.
489 * We are not in the issuer context which cannot block. Clear
490 * the flag to avoid spurious EAGAIN I/O failures.
491 */
492 bio->bi_opf &= ~REQ_NOWAIT;
493 }
> 494 blk_steal_bios(&ns->head->requeue_list, req);
495 spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
496
497 nvme_req(req)->status = 0;
498 nvme_end_req(req);
> 499 kblockd_schedule_work(&ns->head->requeue_work);
500 }
501
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2026 Red Hat, Inc.