[PATCH] nvme-pci: ignore bogus CRTO according to NVME 2.0 spec

Felix Yan posted 1 patch 2 years, 3 months ago
drivers/nvme/host/core.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
[PATCH] nvme-pci: ignore bogus CRTO according to NVME 2.0 spec
Posted by Felix Yan 2 years, 3 months ago
NVME 2.0 spec section 3.1.3 suggests that "Software should not rely on
0h being returned". Here we should safeguard timeout reads when CRTO is 0 and
fallback to the old NVME 1.4 compatible field.

Fixes 4TB SSD initialization issues with MAXIO MAP1602 controller, including
Lexar NM790, AIGO P7000Z, Fanxiang S790, Acer Predator GM7, etc.

----------
nvme nvme1: Device not ready; aborting initialisation, CSTS=0x0
----------

Signed-off-by: Felix Yan <felixonmars@archlinux.org>
---
 drivers/nvme/host/core.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f3a01b79148c..8ec28b1016ca 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2255,11 +2255,17 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
 			return ret;
 		}
 
-		if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
-			ctrl->ctrl_config |= NVME_CC_CRIME;
-			timeout = NVME_CRTO_CRIMT(crto);
+		if (crto == 0) {
+			timeout = NVME_CAP_TIMEOUT(ctrl->cap);
+			dev_warn(ctrl->device, "Ignoring bogus CRTO (0), falling back to NVME_CAP_TIMEOUT (%u)\n",
+				timeout);
 		} else {
-			timeout = NVME_CRTO_CRWMT(crto);
+			if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
+				ctrl->ctrl_config |= NVME_CC_CRIME;
+				timeout = NVME_CRTO_CRIMT(crto);
+			} else {
+				timeout = NVME_CRTO_CRWMT(crto);
+			}
 		}
 	} else {
 		timeout = NVME_CAP_TIMEOUT(ctrl->cap);
-- 
2.42.0
Re: [PATCH] nvme-pci: ignore bogus CRTO according to NVME 2.0 spec
Posted by Keith Busch 2 years, 3 months ago
On Fri, Sep 08, 2023 at 06:54:42PM +0300, Felix Yan wrote:
> NVME 2.0 spec section 3.1.3 suggests that "Software should not rely on
> 0h being returned". Here we should safeguard timeout reads when CRTO is 0 and
> fallback to the old NVME 1.4 compatible field.
> 
> Fixes 4TB SSD initialization issues with MAXIO MAP1602 controller, including
> Lexar NM790, AIGO P7000Z, Fanxiang S790, Acer Predator GM7, etc.
> 
> ----------
> nvme nvme1: Device not ready; aborting initialisation, CSTS=0x0
> ----------
> 
> Signed-off-by: Felix Yan <felixonmars@archlinux.org>
> ---
>  drivers/nvme/host/core.c | 14 ++++++++++----
>  1 file changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index f3a01b79148c..8ec28b1016ca 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2255,11 +2255,17 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
>  			return ret;
>  		}
>  
> -		if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
> -			ctrl->ctrl_config |= NVME_CC_CRIME;
> -			timeout = NVME_CRTO_CRIMT(crto);
> +		if (crto == 0) {
> +			timeout = NVME_CAP_TIMEOUT(ctrl->cap);
> +			dev_warn(ctrl->device, "Ignoring bogus CRTO (0), falling back to NVME_CAP_TIMEOUT (%u)\n",
> +				timeout);
>  		} else {
> -			timeout = NVME_CRTO_CRWMT(crto);
> +			if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
> +				ctrl->ctrl_config |= NVME_CC_CRIME;
> +				timeout = NVME_CRTO_CRIMT(crto);
> +			} else {
> +				timeout = NVME_CRTO_CRWMT(crto);
> +			}
>  		}
>  	} else {
>  		timeout = NVME_CAP_TIMEOUT(ctrl->cap);

What do you think about this change instead? We don't need to print a
warning on every device reset, but we should probably add a comment
explaining why this is happening.

---
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 37b6fa7466620..b4577a860e677 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2245,6 +2245,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
 	else
 		ctrl->ctrl_config = NVME_CC_CSS_NVM;
 
+	timeout = NVME_CAP_TIMEOUT(ctrl->cap);
 	if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
 		u32 crto;
 
@@ -2257,12 +2258,15 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
 
 		if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
 			ctrl->ctrl_config |= NVME_CC_CRIME;
-			timeout = NVME_CRTO_CRIMT(crto);
+			/*
+			 * CRIMT should always be greater or equal to CAP.TO,
+			 * but some devices are known to get this wrong. Use
+			 * the larger of the two values.
+			 */
+			timeout = max(timeout, NVME_CRTO_CRIMT(crto));
 		} else {
 			timeout = NVME_CRTO_CRWMT(crto);
 		}
-	} else {
-		timeout = NVME_CAP_TIMEOUT(ctrl->cap);
 	}
 
 	ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
--
Re: [PATCH] nvme-pci: ignore bogus CRTO according to NVME 2.0 spec
Posted by Felix Yan 2 years, 3 months ago
On 9/12/23 02:00, Keith Busch wrote:
> What do you think about this change instead? We don't need to print a
> warning on every device reset, but we should probably add a comment
> explaining why this is happening.
> 
> ---
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 37b6fa7466620..b4577a860e677 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2245,6 +2245,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
>   	else
>   		ctrl->ctrl_config = NVME_CC_CSS_NVM;
>   
> +	timeout = NVME_CAP_TIMEOUT(ctrl->cap);
>   	if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
>   		u32 crto;
>   
> @@ -2257,12 +2258,15 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
>   
>   		if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
>   			ctrl->ctrl_config |= NVME_CC_CRIME;
> -			timeout = NVME_CRTO_CRIMT(crto);
> +			/*
> +			 * CRIMT should always be greater or equal to CAP.TO,
> +			 * but some devices are known to get this wrong. Use
> +			 * the larger of the two values.
> +			 */
> +			timeout = max(timeout, NVME_CRTO_CRIMT(crto));
>   		} else {
>   			timeout = NVME_CRTO_CRWMT(crto);
>   		}
> -	} else {
> -		timeout = NVME_CAP_TIMEOUT(ctrl->cap);
>   	}
>   
>   	ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;

I'm fine either way.

Should we also apply the same max() on the NVME_CRTO_CRIMT branch 
though? The spec actually says the same thing (Timeout should be FFh) 
for that too.

-- 
Regards,
Felix Yan

Re: [PATCH] nvme-pci: ignore bogus CRTO according to NVME 2.0 spec
Posted by Keith Busch 2 years, 3 months ago
On Tue, Sep 12, 2023 at 09:26:19AM +0300, Felix Yan wrote:
> 
> Should we also apply the same max() on the NVME_CRTO_CRIMT branch though?
> The spec actually says the same thing (Timeout should be FFh) for that too.

The spec is weird here: the CAP.TO value depends on the CC setting, but
we read and cache CAP.TO before setting CC, so TO is always associated
to CRWMT. We'll need to refresh the CAP value after the initial CC
write, but before final CC.EN.

---
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 37b6fa7466620..4adc0b2f12f1e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2245,25 +2245,8 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
 	else
 		ctrl->ctrl_config = NVME_CC_CSS_NVM;
 
-	if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
-		u32 crto;
-
-		ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto);
-		if (ret) {
-			dev_err(ctrl->device, "Reading CRTO failed (%d)\n",
-				ret);
-			return ret;
-		}
-
-		if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
-			ctrl->ctrl_config |= NVME_CC_CRIME;
-			timeout = NVME_CRTO_CRIMT(crto);
-		} else {
-			timeout = NVME_CRTO_CRWMT(crto);
-		}
-	} else {
-		timeout = NVME_CAP_TIMEOUT(ctrl->cap);
-	}
+	if (ctrl->cap & NVME_CAP_CRMS_CRWMS && ctrl->cap & NVME_CAP_CRMS_CRIMS)
+		ctrl->ctrl_config |= NVME_CC_CRIME;
 
 	ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
 	ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
@@ -2277,6 +2260,33 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
 	if (ret)
 		return ret;
 
+	/* CAP value may change after initial CC write */
+	ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
+	if (ret)
+		return ret;
+
+	timeout = NVME_CAP_TIMEOUT(ctrl->cap);
+	if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
+		u32 crto;
+
+		ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto);
+		if (ret) {
+			dev_err(ctrl->device, "Reading CRTO failed (%d)\n",
+				ret);
+			return ret;
+		}
+
+		/*
+		 * CRTO should always be greater or equal to CAP.TO, but some
+		 * devices are known to get this wrong. Use the larger of the
+		 * two values.
+		 */
+		if (ctrl->ctrl_config & NVME_CC_CRIME)
+			timeout = max(timeout, NVME_CRTO_CRIMT(crto));
+		else
+			timeout = max(timeout, NVME_CRTO_CRWMT(crto));
+	}
+
 	ctrl->ctrl_config |= NVME_CC_ENABLE;
 	ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
 	if (ret)
--
Re: [PATCH] nvme-pci: ignore bogus CRTO according to NVME 2.0 spec
Posted by Felix Yan 2 years, 3 months ago
On 9/12/23 18:44, Keith Busch wrote:
> On Tue, Sep 12, 2023 at 09:26:19AM +0300, Felix Yan wrote:
>>
>> Should we also apply the same max() on the NVME_CRTO_CRIMT branch though?
>> The spec actually says the same thing (Timeout should be FFh) for that too.
> 
> The spec is weird here: the CAP.TO value depends on the CC setting, but
> we read and cache CAP.TO before setting CC, so TO is always associated
> to CRWMT. We'll need to refresh the CAP value after the initial CC
> write, but before final CC.EN.
> 
> ---
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 37b6fa7466620..4adc0b2f12f1e 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2245,25 +2245,8 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
>   	else
>   		ctrl->ctrl_config = NVME_CC_CSS_NVM;
>   
> -	if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
> -		u32 crto;
> -
> -		ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto);
> -		if (ret) {
> -			dev_err(ctrl->device, "Reading CRTO failed (%d)\n",
> -				ret);
> -			return ret;
> -		}
> -
> -		if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
> -			ctrl->ctrl_config |= NVME_CC_CRIME;
> -			timeout = NVME_CRTO_CRIMT(crto);
> -		} else {
> -			timeout = NVME_CRTO_CRWMT(crto);
> -		}
> -	} else {
> -		timeout = NVME_CAP_TIMEOUT(ctrl->cap);
> -	}
> +	if (ctrl->cap & NVME_CAP_CRMS_CRWMS && ctrl->cap & NVME_CAP_CRMS_CRIMS)
> +		ctrl->ctrl_config |= NVME_CC_CRIME;
>   
>   	ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
>   	ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
> @@ -2277,6 +2260,33 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
>   	if (ret)
>   		return ret;
>   
> +	/* CAP value may change after initial CC write */
> +	ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
> +	if (ret)
> +		return ret;
> +
> +	timeout = NVME_CAP_TIMEOUT(ctrl->cap);
> +	if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
> +		u32 crto;
> +
> +		ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto);
> +		if (ret) {
> +			dev_err(ctrl->device, "Reading CRTO failed (%d)\n",
> +				ret);
> +			return ret;
> +		}
> +
> +		/*
> +		 * CRTO should always be greater or equal to CAP.TO, but some
> +		 * devices are known to get this wrong. Use the larger of the
> +		 * two values.
> +		 */
> +		if (ctrl->ctrl_config & NVME_CC_CRIME)
> +			timeout = max(timeout, NVME_CRTO_CRIMT(crto));
> +		else
> +			timeout = max(timeout, NVME_CRTO_CRWMT(crto));
> +	}
> +
>   	ctrl->ctrl_config |= NVME_CC_ENABLE;
>   	ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
>   	if (ret)

Thanks for getting into the details. This looks great to me.

I have also tried it on top of 6.5.2 kernel and verified that it works 
as expected.

-- 
Regards,
Felix Yan

Re: [PATCH] nvme-pci: ignore bogus CRTO according to NVME 2.0 spec
Posted by Keith Busch 2 years, 3 months ago
On Mon, Sep 11, 2023 at 04:00:42PM -0700, Keith Busch wrote:
> @@ -2257,12 +2258,15 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
>  
>  		if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
>  			ctrl->ctrl_config |= NVME_CC_CRIME;
> -			timeout = NVME_CRTO_CRIMT(crto);
> +			/*
> +			 * CRIMT should always be greater or equal to CAP.TO,
> +			 * but some devices are known to get this wrong. Use
> +			 * the larger of the two values.
> +			 */
> +			timeout = max(timeout, NVME_CRTO_CRIMT(crto));
>  		} else {
>  			timeout = NVME_CRTO_CRWMT(crto);
>  		}

Er... please pretend I added the "max()" handling in the 'else' case
instead of the CRIMS case.

-- >8 --
@@ -2259,10 +2260,13 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
 			ctrl->ctrl_config |= NVME_CC_CRIME;
 			timeout = NVME_CRTO_CRIMT(crto);
 		} else {
-			timeout = NVME_CRTO_CRWMT(crto);
+			/*
+			 * CRWMT should always be greater or equal to CAP.TO,
+			 * but some devices are known to get this wrong. Use
+			 * the larger of the two values.
+			 */
+			timeout = max(timeout, NVME_CRTO_CRWMT(crto));
 		}
Re: [PATCH] nvme-pci: ignore bogus CRTO according to NVME 2.0 spec
Posted by Keith Busch 2 years, 3 months ago
On Fri, Sep 08, 2023 at 06:54:42PM +0300, Felix Yan wrote:
> NVME 2.0 spec section 3.1.3 suggests that "Software should not rely on
> 0h being returned". Here we should safeguard timeout reads when CRTO is 0 and
> fallback to the old NVME 1.4 compatible field.

Not sure I follow what you're saying here. We're not really relying on
CRTO being 0. It was a non-zero capability bit that told the driver to
use CRTO, and 0 is potentially a valid value a controller could report.

> Fixes 4TB SSD initialization issues with MAXIO MAP1602 controller, including
> Lexar NM790, AIGO P7000Z, Fanxiang S790, Acer Predator GM7, etc.

This patch makes more sense, thanks for getting to the bottom of it.

So the device reports CRWMS capability. The host is supposed to use the
CRTO.CRWMT in that case, and 0 could be legit. But spec also says CAP.TO
must match CTRO.CRWMT if it's less than 0xff. This obviously doesn't, so
your patch looks like a reasonable fallback to me. Maybe always just set
timeout to the bigger of the two values since CRWMT isn't reliable if
it's ever smaller than CAP.TO.

	timeout = max(NVME_CRTO_CRWMT(crto), NVME_CAP_TIMEOUT(ctrl->cap));

I'll add the Cc: stable when apply so they are sure to pick this up.
I'll just wait for next Monday to apply in case there any other reviewer
comments.

> Signed-off-by: Felix Yan <felixonmars@archlinux.org>
> ---
>  drivers/nvme/host/core.c | 14 ++++++++++----
>  1 file changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index f3a01b79148c..8ec28b1016ca 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2255,11 +2255,17 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
>  			return ret;
>  		}
>  
> -		if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
> -			ctrl->ctrl_config |= NVME_CC_CRIME;
> -			timeout = NVME_CRTO_CRIMT(crto);
> +		if (crto == 0) {
> +			timeout = NVME_CAP_TIMEOUT(ctrl->cap);
> +			dev_warn(ctrl->device, "Ignoring bogus CRTO (0), falling back to NVME_CAP_TIMEOUT (%u)\n",
> +				timeout);
>  		} else {
> -			timeout = NVME_CRTO_CRWMT(crto);
> +			if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
> +				ctrl->ctrl_config |= NVME_CC_CRIME;
> +				timeout = NVME_CRTO_CRIMT(crto);
> +			} else {
> +				timeout = NVME_CRTO_CRWMT(crto);
> +			}
>  		}
>  	} else {
>  		timeout = NVME_CAP_TIMEOUT(ctrl->cap);
> -- 
> 2.42.0
>
Re: [PATCH] nvme-pci: ignore bogus CRTO according to NVME 2.0 spec
Posted by Felix Yan 2 years, 3 months ago
On 9/8/23 19:51, Keith Busch wrote:
> On Fri, Sep 08, 2023 at 06:54:42PM +0300, Felix Yan wrote:
>> NVME 2.0 spec section 3.1.3 suggests that "Software should not rely on
>> 0h being returned". Here we should safeguard timeout reads when CRTO is 0 and
>> fallback to the old NVME 1.4 compatible field.
> 
> Not sure I follow what you're saying here. We're not really relying on
> CRTO being 0. It was a non-zero capability bit that told the driver to
> use CRTO, and 0 is potentially a valid value a controller could report.

Sorry, I actually meant the opposite: If the whole CRTO field is zeroed, 
we probably shouldn't rely on it as per the spec says.

I understand that 0 could be a valid value, the spec _may_ be a little 
bit unclear here then, if I understand it correctly.

-- 
Regards,
Felix Yan