[v4] TP8028 Rapid Path Failure Recovery

[PATCH v4 03/15] nvmet: Implement CCR nvme command

Posted by Mohamed Khalfella 5 days, 13 hours ago

Defined by TP8028 Rapid Path Failure Recovery, CCR (Cross-Controller
Reset) command is an nvme command issued to source controller by
initiator to reset impacted controller. Implement CCR command for linux
nvme target.

Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
---
 drivers/nvme/target/admin-cmd.c | 74 ++++++++++++++++++++++++++++++++
 drivers/nvme/target/core.c      | 76 +++++++++++++++++++++++++++++++++
 drivers/nvme/target/nvmet.h     | 13 ++++++
 include/linux/nvme.h            | 23 ++++++++++
 4 files changed, 186 insertions(+)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index ec09e30eca18..0a37c0eeebb5 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -376,7 +376,9 @@ static void nvmet_get_cmd_effects_admin(struct nvmet_ctrl *ctrl,
 	log->acs[nvme_admin_get_features] =
 	log->acs[nvme_admin_async_event] =
 	log->acs[nvme_admin_keep_alive] =
+	log->acs[nvme_admin_cross_ctrl_reset] =
 		cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
+
 }
 
 static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
@@ -1613,6 +1615,75 @@ void nvmet_execute_keep_alive(struct nvmet_req *req)
 	nvmet_req_complete(req, status);
 }
 
+void nvmet_execute_cross_ctrl_reset(struct nvmet_req *req)
+{
+	struct nvmet_ctrl *ictrl, *sctrl = req->sq->ctrl;
+	struct nvme_command *cmd = req->cmd;
+	struct nvmet_ccr *ccr, *new_ccr;
+	int ccr_active, ccr_total;
+	u16 cntlid, status = NVME_SC_SUCCESS;
+
+	cntlid = le16_to_cpu(cmd->ccr.icid);
+	if (sctrl->cntlid == cntlid) {
+		req->error_loc =
+			offsetof(struct nvme_cross_ctrl_reset_cmd, icid);
+		status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+		goto out;
+	}
+
+	/* Find and get impacted controller */
+	ictrl = nvmet_ctrl_find_get_ccr(sctrl->subsys, sctrl->hostnqn,
+					cmd->ccr.ciu, cntlid,
+					le64_to_cpu(cmd->ccr.cirn));
+	if (!ictrl) {
+		/* Immediate Reset Successful */
+		nvmet_set_result(req, 1);
+		status = NVME_SC_SUCCESS;
+		goto out;
+	}
+
+	ccr_total = ccr_active = 0;
+	mutex_lock(&sctrl->lock);
+	list_for_each_entry(ccr, &sctrl->ccr_list, entry) {
+		if (ccr->ctrl == ictrl) {
+			status = NVME_SC_CCR_IN_PROGRESS | NVME_STATUS_DNR;
+			goto out_unlock;
+		}
+
+		ccr_total++;
+		if (ccr->ctrl)
+			ccr_active++;
+	}
+
+	if (ccr_active >= NVMF_CCR_LIMIT) {
+		status = NVME_SC_CCR_LIMIT_EXCEEDED;
+		goto out_unlock;
+	}
+	if (ccr_total >= NVMF_CCR_PER_PAGE) {
+		status = NVME_SC_CCR_LOGPAGE_FULL;
+		goto out_unlock;
+	}
+
+	new_ccr = kmalloc_obj(*new_ccr, GFP_KERNEL);
+	if (!new_ccr) {
+		status = NVME_SC_INTERNAL;
+		goto out_unlock;
+	}
+
+	new_ccr->ciu = cmd->ccr.ciu;
+	new_ccr->icid = cntlid;
+	new_ccr->ctrl = ictrl;
+	list_add_tail(&new_ccr->entry, &sctrl->ccr_list);
+
+out_unlock:
+	mutex_unlock(&sctrl->lock);
+	if (status == NVME_SC_SUCCESS)
+		nvmet_ctrl_fatal_error(ictrl);
+	nvmet_ctrl_put(ictrl);
+out:
+	nvmet_req_complete(req, status);
+}
+
 u32 nvmet_admin_cmd_data_len(struct nvmet_req *req)
 {
 	struct nvme_command *cmd = req->cmd;
@@ -1690,6 +1761,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
 	case nvme_admin_keep_alive:
 		req->execute = nvmet_execute_keep_alive;
 		return 0;
+	case nvme_admin_cross_ctrl_reset:
+		req->execute = nvmet_execute_cross_ctrl_reset;
+		return 0;
 	default:
 		return nvmet_report_invalid_opcode(req);
 	}
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index e8b945a01f35..2e0c31d82bad 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -117,6 +117,20 @@ u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
 	return 0;
 }
 
+void nvmet_ctrl_cleanup_ccrs(struct nvmet_ctrl *ctrl, bool all)
+{
+	struct nvmet_ccr *ccr, *tmp;
+
+	lockdep_assert_held(&ctrl->lock);
+
+	list_for_each_entry_safe(ccr, tmp, &ctrl->ccr_list, entry) {
+		if (all || ccr->ctrl == NULL) {
+			list_del(&ccr->entry);
+			kfree(ccr);
+		}
+	}
+}
+
 static u32 nvmet_max_nsid(struct nvmet_subsys *subsys)
 {
 	struct nvmet_ns *cur;
@@ -1399,6 +1413,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
 	if (!nvmet_is_disc_subsys(ctrl->subsys)) {
 		ctrl->ciu = ((u8)(ctrl->ciu + 1)) ? : 1;
 		ctrl->cirn = get_random_u64();
+		nvmet_ctrl_cleanup_ccrs(ctrl, false);
 	}
 	ctrl->csts = NVME_CSTS_RDY;
 
@@ -1504,6 +1519,35 @@ struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
 	return ctrl;
 }
 
+struct nvmet_ctrl *nvmet_ctrl_find_get_ccr(struct nvmet_subsys *subsys,
+					   const char *hostnqn, u8 ciu,
+					   u16 cntlid, u64 cirn)
+{
+	struct nvmet_ctrl *ctrl, *ictrl = NULL;
+	bool found = false;
+
+	mutex_lock(&subsys->lock);
+	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+		if (ctrl->cntlid != cntlid)
+			continue;
+
+		/* Avoid racing with a controller that is becoming ready */
+		mutex_lock(&ctrl->lock);
+		if (ctrl->ciu == ciu && ctrl->cirn == cirn)
+			found = true;
+		mutex_unlock(&ctrl->lock);
+
+		if (found) {
+			if (kref_get_unless_zero(&ctrl->ref))
+				ictrl = ctrl;
+			break;
+		}
+	};
+	mutex_unlock(&subsys->lock);
+
+	return ictrl;
+}
+
 u16 nvmet_check_ctrl_status(struct nvmet_req *req)
 {
 	if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
@@ -1629,6 +1673,7 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
 		subsys->clear_ids = 1;
 #endif
 
+	INIT_LIST_HEAD(&ctrl->ccr_list);
 	INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
 	INIT_LIST_HEAD(&ctrl->async_events);
 	INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
@@ -1739,12 +1784,43 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
 }
 EXPORT_SYMBOL_GPL(nvmet_alloc_ctrl);
 
+static void nvmet_ctrl_complete_pending_ccr(struct nvmet_ctrl *ctrl)
+{
+	struct nvmet_subsys *subsys = ctrl->subsys;
+	struct nvmet_ctrl *sctrl;
+	struct nvmet_ccr *ccr;
+
+	lockdep_assert_held(&subsys->lock);
+
+	/* Cleanup all CCRs issued by ctrl as source controller */
+	mutex_lock(&ctrl->lock);
+	nvmet_ctrl_cleanup_ccrs(ctrl, true);
+	mutex_unlock(&ctrl->lock);
+
+	/*
+	 * Find all CCRs targeting ctrl as impacted controller and
+	 * set ccr->ctrl to NULL. This tells the source controller
+	 * that CCR completed successfully.
+	 */
+	list_for_each_entry(sctrl, &subsys->ctrls, subsys_entry) {
+		mutex_lock(&sctrl->lock);
+		list_for_each_entry(ccr, &sctrl->ccr_list, entry) {
+			if (ccr->ctrl == ctrl) {
+				ccr->ctrl = NULL;
+				break;
+			}
+		}
+		mutex_unlock(&sctrl->lock);
+	}
+}
+
 static void nvmet_ctrl_free(struct kref *ref)
 {
 	struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
 	struct nvmet_subsys *subsys = ctrl->subsys;
 
 	mutex_lock(&subsys->lock);
+	nvmet_ctrl_complete_pending_ccr(ctrl);
 	nvmet_ctrl_destroy_pr(ctrl);
 	nvmet_release_p2p_ns_map(ctrl);
 	list_del(&ctrl->subsys_entry);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 2181ac45ae7f..b9eb044ded19 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -268,6 +268,7 @@ struct nvmet_ctrl {
 	u32			kato;
 	u64			cirn;
 
+	struct list_head	ccr_list;
 	struct nvmet_port	*port;
 
 	u32			aen_enabled;
@@ -314,6 +315,13 @@ struct nvmet_ctrl {
 	struct nvmet_pr_log_mgr pr_log_mgr;
 };
 
+struct nvmet_ccr {
+	struct nvmet_ctrl	*ctrl;
+	struct list_head	entry;
+	u16			icid;
+	u8			ciu;
+};
+
 struct nvmet_subsys {
 	enum nvme_subsys_type	type;
 
@@ -577,6 +585,7 @@ void nvmet_req_free_sgls(struct nvmet_req *req);
 void nvmet_execute_set_features(struct nvmet_req *req);
 void nvmet_execute_get_features(struct nvmet_req *req);
 void nvmet_execute_keep_alive(struct nvmet_req *req);
+void nvmet_execute_cross_ctrl_reset(struct nvmet_req *req);
 
 u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create);
 u16 nvmet_check_io_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create);
@@ -619,6 +628,10 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args);
 struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
 				       const char *hostnqn, u16 cntlid,
 				       struct nvmet_req *req);
+struct nvmet_ctrl *nvmet_ctrl_find_get_ccr(struct nvmet_subsys *subsys,
+					   const char *hostnqn, u8 ciu,
+					   u16 cntlid, u64 cirn);
+void nvmet_ctrl_cleanup_ccrs(struct nvmet_ctrl *ctrl, bool all);
 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl);
 u16 nvmet_check_ctrl_status(struct nvmet_req *req);
 ssize_t nvmet_ctrl_host_traddr(struct nvmet_ctrl *ctrl,
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 7746b6d30349..bd3b3f2a5377 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -22,6 +22,7 @@
 #define NVMF_TSAS_SIZE		256
 
 #define NVMF_CCR_LIMIT		4
+#define NVMF_CCR_PER_PAGE	511
 
 #define NVME_DISC_SUBSYS_NAME	"nqn.2014-08.org.nvmexpress.discovery"
 
@@ -1222,6 +1223,22 @@ struct nvme_zone_mgmt_recv_cmd {
 	__le32			cdw14[2];
 };
 
+struct nvme_cross_ctrl_reset_cmd {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__le64			rsvd2[2];
+	union nvme_data_ptr	dptr;
+	__le16			icid;
+	__u8			ciu;
+	__u8			rsvd10;
+	__le32			cdw11;
+	__le64			cirn;
+	__le32			cdw14;
+	__le32			cdw15;
+};
+
 struct nvme_io_mgmt_recv_cmd {
 	__u8			opcode;
 	__u8			flags;
@@ -1320,6 +1337,7 @@ enum nvme_admin_opcode {
 	nvme_admin_virtual_mgmt		= 0x1c,
 	nvme_admin_nvme_mi_send		= 0x1d,
 	nvme_admin_nvme_mi_recv		= 0x1e,
+	nvme_admin_cross_ctrl_reset	= 0x38,
 	nvme_admin_dbbuf		= 0x7C,
 	nvme_admin_format_nvm		= 0x80,
 	nvme_admin_security_send	= 0x81,
@@ -1353,6 +1371,7 @@ enum nvme_admin_opcode {
 		nvme_admin_opcode_name(nvme_admin_virtual_mgmt),	\
 		nvme_admin_opcode_name(nvme_admin_nvme_mi_send),	\
 		nvme_admin_opcode_name(nvme_admin_nvme_mi_recv),	\
+		nvme_admin_opcode_name(nvme_admin_cross_ctrl_reset),	\
 		nvme_admin_opcode_name(nvme_admin_dbbuf),		\
 		nvme_admin_opcode_name(nvme_admin_format_nvm),		\
 		nvme_admin_opcode_name(nvme_admin_security_send),	\
@@ -2006,6 +2025,7 @@ struct nvme_command {
 		struct nvme_dbbuf dbbuf;
 		struct nvme_directive_cmd directive;
 		struct nvme_io_mgmt_recv_cmd imr;
+		struct nvme_cross_ctrl_reset_cmd ccr;
 	};
 };
 
@@ -2170,6 +2190,9 @@ enum {
 	NVME_SC_PMR_SAN_PROHIBITED	= 0x123,
 	NVME_SC_ANA_GROUP_ID_INVALID	= 0x124,
 	NVME_SC_ANA_ATTACH_FAILED	= 0x125,
+	NVME_SC_CCR_IN_PROGRESS		= 0x13f,
+	NVME_SC_CCR_LOGPAGE_FULL	= 0x140,
+	NVME_SC_CCR_LIMIT_EXCEEDED	= 0x141,
 
 	/*
 	 * I/O Command Set Specific - NVM commands:
-- 
2.52.0

Re: [PATCH v4 03/15] nvmet: Implement CCR nvme command

Posted by Hannes Reinecke 3 days, 3 hours ago

On 3/28/26 01:43, Mohamed Khalfella wrote:
> Defined by TP8028 Rapid Path Failure Recovery, CCR (Cross-Controller
> Reset) command is an nvme command issued to source controller by
> initiator to reset impacted controller. Implement CCR command for linux
> nvme target.
> 
> Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
> ---
>   drivers/nvme/target/admin-cmd.c | 74 ++++++++++++++++++++++++++++++++
>   drivers/nvme/target/core.c      | 76 +++++++++++++++++++++++++++++++++
>   drivers/nvme/target/nvmet.h     | 13 ++++++
>   include/linux/nvme.h            | 23 ++++++++++
>   4 files changed, 186 insertions(+)
> 
> diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
> index ec09e30eca18..0a37c0eeebb5 100644
> --- a/drivers/nvme/target/admin-cmd.c
> +++ b/drivers/nvme/target/admin-cmd.c
> @@ -376,7 +376,9 @@ static void nvmet_get_cmd_effects_admin(struct nvmet_ctrl *ctrl,
>   	log->acs[nvme_admin_get_features] =
>   	log->acs[nvme_admin_async_event] =
>   	log->acs[nvme_admin_keep_alive] =
> +	log->acs[nvme_admin_cross_ctrl_reset] =
>   		cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
> +
>   }
>   
>   static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
> @@ -1613,6 +1615,75 @@ void nvmet_execute_keep_alive(struct nvmet_req *req)
>   	nvmet_req_complete(req, status);
>   }
>   
> +void nvmet_execute_cross_ctrl_reset(struct nvmet_req *req)
> +{
> +	struct nvmet_ctrl *ictrl, *sctrl = req->sq->ctrl;
> +	struct nvme_command *cmd = req->cmd;
> +	struct nvmet_ccr *ccr, *new_ccr;
> +	int ccr_active, ccr_total;
> +	u16 cntlid, status = NVME_SC_SUCCESS;
> +
> +	cntlid = le16_to_cpu(cmd->ccr.icid);
> +	if (sctrl->cntlid == cntlid) {
> +		req->error_loc =
> +			offsetof(struct nvme_cross_ctrl_reset_cmd, icid);
> +		status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
> +		goto out;
> +	}
> +
> +	/* Find and get impacted controller */
> +	ictrl = nvmet_ctrl_find_get_ccr(sctrl->subsys, sctrl->hostnqn,
> +					cmd->ccr.ciu, cntlid,
> +					le64_to_cpu(cmd->ccr.cirn));
> +	if (!ictrl) {
> +		/* Immediate Reset Successful */
> +		nvmet_set_result(req, 1);
> +		status = NVME_SC_SUCCESS;
> +		goto out;
> +	}
> +
> +	ccr_total = ccr_active = 0;
> +	mutex_lock(&sctrl->lock);
> +	list_for_each_entry(ccr, &sctrl->ccr_list, entry) {
> +		if (ccr->ctrl == ictrl) {
> +			status = NVME_SC_CCR_IN_PROGRESS | NVME_STATUS_DNR;
> +			goto out_unlock;
> +		}
> +
> +		ccr_total++;
> +		if (ccr->ctrl)
> +			ccr_active++;
> +	}
> +
> +	if (ccr_active >= NVMF_CCR_LIMIT) {
> +		status = NVME_SC_CCR_LIMIT_EXCEEDED;
> +		goto out_unlock;
> +	}
> +	if (ccr_total >= NVMF_CCR_PER_PAGE) {
> +		status = NVME_SC_CCR_LOGPAGE_FULL;
> +		goto out_unlock;
> +	}
> +
> +	new_ccr = kmalloc_obj(*new_ccr, GFP_KERNEL);
> +	if (!new_ccr) {
> +		status = NVME_SC_INTERNAL;
> +		goto out_unlock;
> +	}
> +
> +	new_ccr->ciu = cmd->ccr.ciu;
> +	new_ccr->icid = cntlid;
> +	new_ccr->ctrl = ictrl;
> +	list_add_tail(&new_ccr->entry, &sctrl->ccr_list);
> +
> +out_unlock:
> +	mutex_unlock(&sctrl->lock);
> +	if (status == NVME_SC_SUCCESS)
> +		nvmet_ctrl_fatal_error(ictrl);
> +	nvmet_ctrl_put(ictrl);
> +out:
> +	nvmet_req_complete(req, status);
> +}
> +
>   u32 nvmet_admin_cmd_data_len(struct nvmet_req *req)
>   {
>   	struct nvme_command *cmd = req->cmd;
> @@ -1690,6 +1761,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
>   	case nvme_admin_keep_alive:
>   		req->execute = nvmet_execute_keep_alive;
>   		return 0;
> +	case nvme_admin_cross_ctrl_reset:
> +		req->execute = nvmet_execute_cross_ctrl_reset;
> +		return 0;
>   	default:
>   		return nvmet_report_invalid_opcode(req);
>   	}
> diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
> index e8b945a01f35..2e0c31d82bad 100644
> --- a/drivers/nvme/target/core.c
> +++ b/drivers/nvme/target/core.c
> @@ -117,6 +117,20 @@ u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
>   	return 0;
>   }
>   
> +void nvmet_ctrl_cleanup_ccrs(struct nvmet_ctrl *ctrl, bool all)
> +{
> +	struct nvmet_ccr *ccr, *tmp;
> +
> +	lockdep_assert_held(&ctrl->lock);
> +
> +	list_for_each_entry_safe(ccr, tmp, &ctrl->ccr_list, entry) {
> +		if (all || ccr->ctrl == NULL) {
> +			list_del(&ccr->entry);
> +			kfree(ccr);
> +		}
> +	}
> +}
> +
>   static u32 nvmet_max_nsid(struct nvmet_subsys *subsys)
>   {
>   	struct nvmet_ns *cur;
> @@ -1399,6 +1413,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
>   	if (!nvmet_is_disc_subsys(ctrl->subsys)) {
>   		ctrl->ciu = ((u8)(ctrl->ciu + 1)) ? : 1;
>   		ctrl->cirn = get_random_u64();
> +		nvmet_ctrl_cleanup_ccrs(ctrl, false);
>   	}
>   	ctrl->csts = NVME_CSTS_RDY;
>   
> @@ -1504,6 +1519,35 @@ struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
>   	return ctrl;
>   }
>   
> +struct nvmet_ctrl *nvmet_ctrl_find_get_ccr(struct nvmet_subsys *subsys,
> +					   const char *hostnqn, u8 ciu,
> +					   u16 cntlid, u64 cirn)
> +{
> +	struct nvmet_ctrl *ctrl, *ictrl = NULL;
> +	bool found = false;
> +
> +	mutex_lock(&subsys->lock);
> +	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
> +		if (ctrl->cntlid != cntlid)
> +			continue;
> +
> +		/* Avoid racing with a controller that is becoming ready */
> +		mutex_lock(&ctrl->lock);
> +		if (ctrl->ciu == ciu && ctrl->cirn == cirn)
> +			found = true;
> +		mutex_unlock(&ctrl->lock);
> +
> +		if (found) {
> +			if (kref_get_unless_zero(&ctrl->ref))
> +				ictrl = ctrl;
> +			break;
> +		}
> +	};
> +	mutex_unlock(&subsys->lock);
> +
> +	return ictrl;
> +}
> +
>   u16 nvmet_check_ctrl_status(struct nvmet_req *req)
>   {
>   	if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
> @@ -1629,6 +1673,7 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
>   		subsys->clear_ids = 1;
>   #endif
>   
> +	INIT_LIST_HEAD(&ctrl->ccr_list);
>   	INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
>   	INIT_LIST_HEAD(&ctrl->async_events);
>   	INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
> @@ -1739,12 +1784,43 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
>   }
>   EXPORT_SYMBOL_GPL(nvmet_alloc_ctrl);
>   
> +static void nvmet_ctrl_complete_pending_ccr(struct nvmet_ctrl *ctrl)
> +{
> +	struct nvmet_subsys *subsys = ctrl->subsys;
> +	struct nvmet_ctrl *sctrl;
> +	struct nvmet_ccr *ccr;
> +
> +	lockdep_assert_held(&subsys->lock);
> +
> +	/* Cleanup all CCRs issued by ctrl as source controller */
> +	mutex_lock(&ctrl->lock);
> +	nvmet_ctrl_cleanup_ccrs(ctrl, true);
> +	mutex_unlock(&ctrl->lock);
> +
> +	/*
> +	 * Find all CCRs targeting ctrl as impacted controller and
> +	 * set ccr->ctrl to NULL. This tells the source controller
> +	 * that CCR completed successfully.
> +	 */
> +	list_for_each_entry(sctrl, &subsys->ctrls, subsys_entry) {
> +		mutex_lock(&sctrl->lock);
> +		list_for_each_entry(ccr, &sctrl->ccr_list, entry) {
> +			if (ccr->ctrl == ctrl) {
> +				ccr->ctrl = NULL;
> +				break;
> +			}
> +		}
> +		mutex_unlock(&sctrl->lock);
> +	}
> +}
> +

Do I see this correct that with this implementation a CCR is only 
complete once the controller resets? IOW the CCR has to wait for
the controller to be reset, but it does not invoke a controller reset
itself?

Is that intended?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

Re: [PATCH v4 03/15] nvmet: Implement CCR nvme command

Posted by Mohamed Khalfella 1 day, 21 hours ago

On Mon 2026-03-30 12:45:57 +0200, Hannes Reinecke wrote:
> On 3/28/26 01:43, Mohamed Khalfella wrote:
> > Defined by TP8028 Rapid Path Failure Recovery, CCR (Cross-Controller
> > Reset) command is an nvme command issued to source controller by
> > initiator to reset impacted controller. Implement CCR command for linux
> > nvme target.
> > 
> > Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
> > ---
> >   drivers/nvme/target/admin-cmd.c | 74 ++++++++++++++++++++++++++++++++
> >   drivers/nvme/target/core.c      | 76 +++++++++++++++++++++++++++++++++
> >   drivers/nvme/target/nvmet.h     | 13 ++++++
> >   include/linux/nvme.h            | 23 ++++++++++
> >   4 files changed, 186 insertions(+)
> > 
> > diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
> > index ec09e30eca18..0a37c0eeebb5 100644
> > --- a/drivers/nvme/target/admin-cmd.c
> > +++ b/drivers/nvme/target/admin-cmd.c
> > @@ -376,7 +376,9 @@ static void nvmet_get_cmd_effects_admin(struct nvmet_ctrl *ctrl,
> >   	log->acs[nvme_admin_get_features] =
> >   	log->acs[nvme_admin_async_event] =
> >   	log->acs[nvme_admin_keep_alive] =
> > +	log->acs[nvme_admin_cross_ctrl_reset] =
> >   		cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
> > +
> >   }
> >   
> >   static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
> > @@ -1613,6 +1615,75 @@ void nvmet_execute_keep_alive(struct nvmet_req *req)
> >   	nvmet_req_complete(req, status);
> >   }
> >   
> > +void nvmet_execute_cross_ctrl_reset(struct nvmet_req *req)
> > +{
> > +	struct nvmet_ctrl *ictrl, *sctrl = req->sq->ctrl;
> > +	struct nvme_command *cmd = req->cmd;
> > +	struct nvmet_ccr *ccr, *new_ccr;
> > +	int ccr_active, ccr_total;
> > +	u16 cntlid, status = NVME_SC_SUCCESS;
> > +
> > +	cntlid = le16_to_cpu(cmd->ccr.icid);
> > +	if (sctrl->cntlid == cntlid) {
> > +		req->error_loc =
> > +			offsetof(struct nvme_cross_ctrl_reset_cmd, icid);
> > +		status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
> > +		goto out;
> > +	}
> > +
> > +	/* Find and get impacted controller */
> > +	ictrl = nvmet_ctrl_find_get_ccr(sctrl->subsys, sctrl->hostnqn,
> > +					cmd->ccr.ciu, cntlid,
> > +					le64_to_cpu(cmd->ccr.cirn));
> > +	if (!ictrl) {
> > +		/* Immediate Reset Successful */
> > +		nvmet_set_result(req, 1);
> > +		status = NVME_SC_SUCCESS;
> > +		goto out;
> > +	}
> > +
> > +	ccr_total = ccr_active = 0;
> > +	mutex_lock(&sctrl->lock);
> > +	list_for_each_entry(ccr, &sctrl->ccr_list, entry) {
> > +		if (ccr->ctrl == ictrl) {
> > +			status = NVME_SC_CCR_IN_PROGRESS | NVME_STATUS_DNR;
> > +			goto out_unlock;
> > +		}
> > +
> > +		ccr_total++;
> > +		if (ccr->ctrl)
> > +			ccr_active++;
> > +	}
> > +
> > +	if (ccr_active >= NVMF_CCR_LIMIT) {
> > +		status = NVME_SC_CCR_LIMIT_EXCEEDED;
> > +		goto out_unlock;
> > +	}
> > +	if (ccr_total >= NVMF_CCR_PER_PAGE) {
> > +		status = NVME_SC_CCR_LOGPAGE_FULL;
> > +		goto out_unlock;
> > +	}
> > +
> > +	new_ccr = kmalloc_obj(*new_ccr, GFP_KERNEL);
> > +	if (!new_ccr) {
> > +		status = NVME_SC_INTERNAL;
> > +		goto out_unlock;
> > +	}
> > +
> > +	new_ccr->ciu = cmd->ccr.ciu;
> > +	new_ccr->icid = cntlid;
> > +	new_ccr->ctrl = ictrl;
> > +	list_add_tail(&new_ccr->entry, &sctrl->ccr_list);
> > +
> > +out_unlock:
> > +	mutex_unlock(&sctrl->lock);
> > +	if (status == NVME_SC_SUCCESS)
> > +		nvmet_ctrl_fatal_error(ictrl);
> > +	nvmet_ctrl_put(ictrl);
> > +out:
> > +	nvmet_req_complete(req, status);
> > +}
> > +
> >   u32 nvmet_admin_cmd_data_len(struct nvmet_req *req)
> >   {
> >   	struct nvme_command *cmd = req->cmd;
> > @@ -1690,6 +1761,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
> >   	case nvme_admin_keep_alive:
> >   		req->execute = nvmet_execute_keep_alive;
> >   		return 0;
> > +	case nvme_admin_cross_ctrl_reset:
> > +		req->execute = nvmet_execute_cross_ctrl_reset;
> > +		return 0;
> >   	default:
> >   		return nvmet_report_invalid_opcode(req);
> >   	}
> > diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
> > index e8b945a01f35..2e0c31d82bad 100644
> > --- a/drivers/nvme/target/core.c
> > +++ b/drivers/nvme/target/core.c
> > @@ -117,6 +117,20 @@ u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
> >   	return 0;
> >   }
> >   
> > +void nvmet_ctrl_cleanup_ccrs(struct nvmet_ctrl *ctrl, bool all)
> > +{
> > +	struct nvmet_ccr *ccr, *tmp;
> > +
> > +	lockdep_assert_held(&ctrl->lock);
> > +
> > +	list_for_each_entry_safe(ccr, tmp, &ctrl->ccr_list, entry) {
> > +		if (all || ccr->ctrl == NULL) {
> > +			list_del(&ccr->entry);
> > +			kfree(ccr);
> > +		}
> > +	}
> > +}
> > +
> >   static u32 nvmet_max_nsid(struct nvmet_subsys *subsys)
> >   {
> >   	struct nvmet_ns *cur;
> > @@ -1399,6 +1413,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
> >   	if (!nvmet_is_disc_subsys(ctrl->subsys)) {
> >   		ctrl->ciu = ((u8)(ctrl->ciu + 1)) ? : 1;
> >   		ctrl->cirn = get_random_u64();
> > +		nvmet_ctrl_cleanup_ccrs(ctrl, false);
> >   	}
> >   	ctrl->csts = NVME_CSTS_RDY;
> >   
> > @@ -1504,6 +1519,35 @@ struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
> >   	return ctrl;
> >   }
> >   
> > +struct nvmet_ctrl *nvmet_ctrl_find_get_ccr(struct nvmet_subsys *subsys,
> > +					   const char *hostnqn, u8 ciu,
> > +					   u16 cntlid, u64 cirn)
> > +{
> > +	struct nvmet_ctrl *ctrl, *ictrl = NULL;
> > +	bool found = false;
> > +
> > +	mutex_lock(&subsys->lock);
> > +	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
> > +		if (ctrl->cntlid != cntlid)
> > +			continue;
> > +
> > +		/* Avoid racing with a controller that is becoming ready */
> > +		mutex_lock(&ctrl->lock);
> > +		if (ctrl->ciu == ciu && ctrl->cirn == cirn)
> > +			found = true;
> > +		mutex_unlock(&ctrl->lock);
> > +
> > +		if (found) {
> > +			if (kref_get_unless_zero(&ctrl->ref))
> > +				ictrl = ctrl;
> > +			break;
> > +		}
> > +	};
> > +	mutex_unlock(&subsys->lock);
> > +
> > +	return ictrl;
> > +}
> > +
> >   u16 nvmet_check_ctrl_status(struct nvmet_req *req)
> >   {
> >   	if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
> > @@ -1629,6 +1673,7 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
> >   		subsys->clear_ids = 1;
> >   #endif
> >   
> > +	INIT_LIST_HEAD(&ctrl->ccr_list);
> >   	INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
> >   	INIT_LIST_HEAD(&ctrl->async_events);
> >   	INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
> > @@ -1739,12 +1784,43 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
> >   }
> >   EXPORT_SYMBOL_GPL(nvmet_alloc_ctrl);
> >   
> > +static void nvmet_ctrl_complete_pending_ccr(struct nvmet_ctrl *ctrl)
> > +{
> > +	struct nvmet_subsys *subsys = ctrl->subsys;
> > +	struct nvmet_ctrl *sctrl;
> > +	struct nvmet_ccr *ccr;
> > +
> > +	lockdep_assert_held(&subsys->lock);
> > +
> > +	/* Cleanup all CCRs issued by ctrl as source controller */
> > +	mutex_lock(&ctrl->lock);
> > +	nvmet_ctrl_cleanup_ccrs(ctrl, true);
> > +	mutex_unlock(&ctrl->lock);
> > +
> > +	/*
> > +	 * Find all CCRs targeting ctrl as impacted controller and
> > +	 * set ccr->ctrl to NULL. This tells the source controller
> > +	 * that CCR completed successfully.
> > +	 */
> > +	list_for_each_entry(sctrl, &subsys->ctrls, subsys_entry) {
> > +		mutex_lock(&sctrl->lock);
> > +		list_for_each_entry(ccr, &sctrl->ccr_list, entry) {
> > +			if (ccr->ctrl == ctrl) {
> > +				ccr->ctrl = NULL;
> > +				break;
> > +			}
> > +		}
> > +		mutex_unlock(&sctrl->lock);
> > +	}
> > +}
> > +
> 
> Do I see this correct that with this implementation a CCR is only 
> complete once the controller resets? IOW the CCR has to wait for
> the controller to be reset, but it does not invoke a controller reset
> itself?
> 
> Is that intended?

nvmet_execute_cross_ctrl_reset() calls nvmet_ctrl_fatal_error() to cause
impacted controller to fail. CCR is completed when the impacted
controller exits.