[PATCH v8 10/16] cxl/pci: Add log message if RAS registers are not mapped

Terry Bowman posted 16 patches 8 months, 3 weeks ago
There is a newer version of this series
[PATCH v8 10/16] cxl/pci: Add log message if RAS registers are not mapped
Posted by Terry Bowman 8 months, 3 weeks ago
The CXL RAS handlers do not currently log if the RAS registers are
unmapped. This is needed in order to help debug CXL error handling. Update
the CXL driver to log a warning message if the RAS register block is
unmapped during RAS error handling.

Also, refactor the __cxl_handle_cor_ras() functions check for status.
Change it to be consistent with the same status check in
__cxl_handle_cor_ras().

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
---
 drivers/cxl/core/pci.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 1cf1ab4d9160..4770810b2138 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -656,15 +656,18 @@ static void __cxl_handle_cor_ras(struct device *dev,
 	void __iomem *addr;
 	u32 status;
 
-	if (!ras_base)
+	if (!ras_base) {
+		dev_warn_once(dev, "CXL RAS register block is not mapped");
 		return;
+	}
 
 	addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
 	status = readl(addr);
-	if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
-		writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
-		trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status);
-	}
+	if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK))
+		return;
+	writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
+
+	trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status);
 }
 
 static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
@@ -700,8 +703,10 @@ static bool __cxl_handle_ras(struct device *dev, void __iomem *ras_base)
 	u32 status;
 	u32 fe;
 
-	if (!ras_base)
+	if (!ras_base) {
+		dev_warn_once(dev, "CXL RAS register block is not mapped");
 		return false;
+	}
 
 	addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
 	status = readl(addr);
-- 
2.34.1
Re: [PATCH v8 10/16] cxl/pci: Add log message if RAS registers are not mapped
Posted by Jonathan Cameron 7 months, 3 weeks ago
On Wed, 26 Mar 2025 20:47:11 -0500
Terry Bowman <terry.bowman@amd.com> wrote:

> The CXL RAS handlers do not currently log if the RAS registers are
> unmapped. This is needed in order to help debug CXL error handling. Update
> the CXL driver to log a warning message if the RAS register block is
> unmapped during RAS error handling.
> 
> Also, refactor the __cxl_handle_cor_ras() functions check for status.
> Change it to be consistent with the same status check in
> __cxl_handle_cor_ras().

Not keen on an 'also' bit in here.  Seems entirely separable
into its own patch.

Two trivial one thing patches seems better than one slightly larger one.
Actual changes seem fine to me so feel free to add
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
to resulting pair of patches.

> 
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> ---
>  drivers/cxl/core/pci.c | 17 +++++++++++------
>  1 file changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 1cf1ab4d9160..4770810b2138 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -656,15 +656,18 @@ static void __cxl_handle_cor_ras(struct device *dev,
>  	void __iomem *addr;
>  	u32 status;
>  
> -	if (!ras_base)
> +	if (!ras_base) {
> +		dev_warn_once(dev, "CXL RAS register block is not mapped");
>  		return;
> +	}
>  
>  	addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
>  	status = readl(addr);
> -	if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
> -		writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
> -		trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status);
> -	}
> +	if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK))
> +		return;
> +	writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
> +
> +	trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status);
>  }
>  
>  static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
> @@ -700,8 +703,10 @@ static bool __cxl_handle_ras(struct device *dev, void __iomem *ras_base)
>  	u32 status;
>  	u32 fe;
>  
> -	if (!ras_base)
> +	if (!ras_base) {
> +		dev_warn_once(dev, "CXL RAS register block is not mapped");
>  		return false;
> +	}
>  
>  	addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
>  	status = readl(addr);
Re: [PATCH v8 10/16] cxl/pci: Add log message if RAS registers are not mapped
Posted by Bowman, Terry 7 months, 3 weeks ago

On 4/23/2025 11:41 AM, Jonathan Cameron wrote:
> On Wed, 26 Mar 2025 20:47:11 -0500
> Terry Bowman <terry.bowman@amd.com> wrote:
>
>> The CXL RAS handlers do not currently log if the RAS registers are
>> unmapped. This is needed in order to help debug CXL error handling. Update
>> the CXL driver to log a warning message if the RAS register block is
>> unmapped during RAS error handling.
>>
>> Also, refactor the __cxl_handle_cor_ras() functions check for status.
>> Change it to be consistent with the same status check in
>> __cxl_handle_cor_ras().
> Not keen on an 'also' bit in here.  Seems entirely separable
> into its own patch.
>
> Two trivial one thing patches seems better than one slightly larger one.
> Actual changes seem fine to me so feel free to add
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> to resulting pair of patches.

Hi Jonathan,

I will split the patch as you recommend.

-Terry

>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>> ---
>>  drivers/cxl/core/pci.c | 17 +++++++++++------
>>  1 file changed, 11 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
>> index 1cf1ab4d9160..4770810b2138 100644
>> --- a/drivers/cxl/core/pci.c
>> +++ b/drivers/cxl/core/pci.c
>> @@ -656,15 +656,18 @@ static void __cxl_handle_cor_ras(struct device *dev,
>>  	void __iomem *addr;
>>  	u32 status;
>>  
>> -	if (!ras_base)
>> +	if (!ras_base) {
>> +		dev_warn_once(dev, "CXL RAS register block is not mapped");
>>  		return;
>> +	}
>>  
>>  	addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
>>  	status = readl(addr);
>> -	if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
>> -		writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
>> -		trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status);
>> -	}
>> +	if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK))
>> +		return;
>> +	writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
>> +
>> +	trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status);
>>  }
>>  
>>  static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
>> @@ -700,8 +703,10 @@ static bool __cxl_handle_ras(struct device *dev, void __iomem *ras_base)
>>  	u32 status;
>>  	u32 fe;
>>  
>> -	if (!ras_base)
>> +	if (!ras_base) {
>> +		dev_warn_once(dev, "CXL RAS register block is not mapped");
>>  		return false;
>> +	}
>>  
>>  	addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
>>  	status = readl(addr);