__cxl_handle_cor_ras() is missing logic to leave the function early in the
case there is no RAS error. Update __cxl_handle_cor_ras() to exit early in
the case there is no RAS errors detected after applying the mask.
Signed-off-by: Terry Bowman <terry.bowman@amd.com>
---
drivers/cxl/core/pci.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 0f4c07fd64a5..f5f87c2c3fd5 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -677,10 +677,11 @@ static void __cxl_handle_cor_ras(struct device *dev, u64 serial,
addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
status = readl(addr);
- if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
- writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
- trace_cxl_aer_correctable_error(dev, serial, status);
- }
+ if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK))
+ return;
+ writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
+
+ trace_cxl_aer_correctable_error(dev, serial, status);
}
static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
--
2.34.1
On Tue, 3 Jun 2025 12:22:34 -0500
Terry Bowman <terry.bowman@amd.com> wrote:
> __cxl_handle_cor_ras() is missing logic to leave the function early in the
> case there is no RAS error. Update __cxl_handle_cor_ras() to exit early in
> the case there is no RAS errors detected after applying the mask.
I'm all for this as sensible cleanup, but the 'missing' kind
of suggest a bug to me whereas I don't see one.
Perhaps reword?
>
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> ---
> drivers/cxl/core/pci.c | 9 +++++----
> 1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 0f4c07fd64a5..f5f87c2c3fd5 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -677,10 +677,11 @@ static void __cxl_handle_cor_ras(struct device *dev, u64 serial,
>
> addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
> status = readl(addr);
> - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
> - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
> - trace_cxl_aer_correctable_error(dev, serial, status);
> - }
> + if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK))
> + return;
> + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
> +
> + trace_cxl_aer_correctable_error(dev, serial, status);
> }
>
> static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
On 6/12/2025 11:46 AM, Jonathan Cameron wrote:
> On Tue, 3 Jun 2025 12:22:34 -0500
> Terry Bowman <terry.bowman@amd.com> wrote:
>
>> __cxl_handle_cor_ras() is missing logic to leave the function early in the
>> case there is no RAS error. Update __cxl_handle_cor_ras() to exit early in
>> the case there is no RAS errors detected after applying the mask.
>
> I'm all for this as sensible cleanup, but the 'missing' kind
> of suggest a bug to me whereas I don't see one.
> Perhaps reword?
>
Yes, I will reword the commit message.
-Terry
>>
>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>> ---
>> drivers/cxl/core/pci.c | 9 +++++----
>> 1 file changed, 5 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
>> index 0f4c07fd64a5..f5f87c2c3fd5 100644
>> --- a/drivers/cxl/core/pci.c
>> +++ b/drivers/cxl/core/pci.c
>> @@ -677,10 +677,11 @@ static void __cxl_handle_cor_ras(struct device *dev, u64 serial,
>>
>> addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
>> status = readl(addr);
>> - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
>> - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
>> - trace_cxl_aer_correctable_error(dev, serial, status);
>> - }
>> + if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK))
>> + return;
>> + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
>> +
>> + trace_cxl_aer_correctable_error(dev, serial, status);
>> }
>>
>> static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
>
On 6/3/25 10:22 AM, Terry Bowman wrote:
> __cxl_handle_cor_ras() is missing logic to leave the function early in the
> case there is no RAS error. Update __cxl_handle_cor_ras() to exit early in
> the case there is no RAS errors detected after applying the mask.
This change is small enough that I would just fold it into the patch that introduces this function.
DJ
>
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> ---
> drivers/cxl/core/pci.c | 9 +++++----
> 1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 0f4c07fd64a5..f5f87c2c3fd5 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -677,10 +677,11 @@ static void __cxl_handle_cor_ras(struct device *dev, u64 serial,
>
> addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
> status = readl(addr);
> - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
> - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
> - trace_cxl_aer_correctable_error(dev, serial, status);
> - }
> + if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK))
> + return;
> + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
> +
> + trace_cxl_aer_correctable_error(dev, serial, status);
> }
>
> static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
On 6/6/2025 3:30 PM, Dave Jiang wrote:
>
> On 6/3/25 10:22 AM, Terry Bowman wrote:
>> __cxl_handle_cor_ras() is missing logic to leave the function early in the
>> case there is no RAS error. Update __cxl_handle_cor_ras() to exit early in
>> the case there is no RAS errors detected after applying the mask.
> This change is small enough that I would just fold it into the patch that introduces this function.
>
> DJ
I agree. The problem is it was already present before this series. This is a 'fix'. I had this change in:
[PATCH v9 09/16] cxl/pci: Log message if RAS registers are unmapped
but was asked to move out because it appeared as an unrelated miscellaneous patch.
Terry
>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>> ---
>> drivers/cxl/core/pci.c | 9 +++++----
>> 1 file changed, 5 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
>> index 0f4c07fd64a5..f5f87c2c3fd5 100644
>> --- a/drivers/cxl/core/pci.c
>> +++ b/drivers/cxl/core/pci.c
>> @@ -677,10 +677,11 @@ static void __cxl_handle_cor_ras(struct device *dev, u64 serial,
>>
>> addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
>> status = readl(addr);
>> - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
>> - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
>> - trace_cxl_aer_correctable_error(dev, serial, status);
>> - }
>> + if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK))
>> + return;
>> + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
>> +
>> + trace_cxl_aer_correctable_error(dev, serial, status);
>> }
>>
>> static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
On 6/6/25 1:55 PM, Bowman, Terry wrote:
>
>
> On 6/6/2025 3:30 PM, Dave Jiang wrote:
>>
>> On 6/3/25 10:22 AM, Terry Bowman wrote:
>>> __cxl_handle_cor_ras() is missing logic to leave the function early in the
>>> case there is no RAS error. Update __cxl_handle_cor_ras() to exit early in
>>> the case there is no RAS errors detected after applying the mask.
>> This change is small enough that I would just fold it into the patch that introduces this function.
>>
>> DJ
> I agree. The problem is it was already present before this series. This is a 'fix'. I had this change in:
> [PATCH v9 09/16] cxl/pci: Log message if RAS registers are unmapped
> but was asked to move out because it appeared as an unrelated miscellaneous patch.
Ok. Then
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
>
> Terry
>>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>>> ---
>>> drivers/cxl/core/pci.c | 9 +++++----
>>> 1 file changed, 5 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
>>> index 0f4c07fd64a5..f5f87c2c3fd5 100644
>>> --- a/drivers/cxl/core/pci.c
>>> +++ b/drivers/cxl/core/pci.c
>>> @@ -677,10 +677,11 @@ static void __cxl_handle_cor_ras(struct device *dev, u64 serial,
>>>
>>> addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
>>> status = readl(addr);
>>> - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
>>> - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
>>> - trace_cxl_aer_correctable_error(dev, serial, status);
>>> - }
>>> + if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK))
>>> + return;
>>> + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
>>> +
>>> + trace_cxl_aer_correctable_error(dev, serial, status);
>>> }
>>>
>>> static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
>
On 6/3/25 10:22 AM, Terry Bowman wrote:
> __cxl_handle_cor_ras() is missing logic to leave the function early in the
> case there is no RAS error. Update __cxl_handle_cor_ras() to exit early in
> the case there is no RAS errors detected after applying the mask.
>
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> ---
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
> drivers/cxl/core/pci.c | 9 +++++----
> 1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 0f4c07fd64a5..f5f87c2c3fd5 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -677,10 +677,11 @@ static void __cxl_handle_cor_ras(struct device *dev, u64 serial,
>
> addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
> status = readl(addr);
> - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
> - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
> - trace_cxl_aer_correctable_error(dev, serial, status);
> - }
> + if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK))
> + return;
> + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
> +
> + trace_cxl_aer_correctable_error(dev, serial, status);
> }
>
> static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
--
Sathyanarayanan Kuppuswamy
Linux Kernel Developer
© 2016 - 2025 Red Hat, Inc.