[PATCH v15 6/9] cxl: Update error handlers to support CXL Port protocol errors

Terry Bowman posted 9 patches 4 days, 6 hours ago
[PATCH v15 6/9] cxl: Update error handlers to support CXL Port protocol errors
Posted by Terry Bowman 4 days, 6 hours ago
CXL Protocol errors are logged for Endpoints in cxl_handle_ras() and
cxl_handle_cor_ras(). The same is missing for CXL Port devices. The CXL
Port logging function is already present but needs a call added from
the handlers.

Update cxl_handle_ras() and cxl_handle_cor_ras() to call the CXL Port
trace logging function.

Also, add log messages in the case 'ras_base' is NULL. And, add calls to
the existing CXL Port tracing in the same functions.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>

---

Changes in v14 -> v15:
- New commit
---
 drivers/cxl/core/core.h | 10 ++++++----
 drivers/cxl/core/ras.c  | 30 ++++++++++++++++++++++--------
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 92aea110817d..3b232e991b12 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -6,6 +6,7 @@
 
 #include <cxl/mailbox.h>
 #include <linux/rwsem.h>
+#include <linux/pci.h>
 
 extern const struct device_type cxl_nvdimm_bridge_type;
 extern const struct device_type cxl_nvdimm_type;
@@ -155,7 +156,8 @@ static inline struct device *dport_to_host(struct cxl_dport *dport)
 #ifdef CONFIG_CXL_RAS
 int cxl_ras_init(void);
 void cxl_ras_exit(void);
-bool cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base);
+pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial,
+				void __iomem *ras_base);
 void cxl_handle_cor_ras(struct device *dev, u64 serial,
 			void __iomem *ras_base);
 void cxl_dport_map_rch_aer(struct cxl_dport *dport);
@@ -168,10 +170,10 @@ static inline int cxl_ras_init(void)
 	return 0;
 }
 static inline void cxl_ras_exit(void) { }
-static inline bool cxl_handle_ras(struct device *dev, u64 serial,
-				  void __iomem *ras_base)
+static inline pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial,
+					      void __iomem *ras_base)
 {
-	return false;
+	return PCI_ERS_RESULT_NONE;
 }
 static inline void cxl_handle_cor_ras(struct device *dev, u64 serial,
 				      void __iomem *ras_base) { }
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index 0216dafa6118..970ff3df442c 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -285,15 +285,22 @@ void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base)
 	void __iomem *addr;
 	u32 status;
 
-	if (!ras_base)
+	if (!ras_base) {
+		pr_err_ratelimited("%s: CXL RAS registers aren't mapped\n",
+				   dev_name(dev));
 		return;
+	}
 
 	addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
 	status = readl(addr);
-	if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
-		writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
+	if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK))
+		return;
+
+	writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
+	if (is_cxl_memdev(dev))
 		trace_cxl_aer_correctable_error(dev, status, serial);
-	}
+	else
+		trace_cxl_port_aer_correctable_error(dev, status);
 }
 
 /* CXL spec rev3.0 8.2.4.16.1 */
@@ -317,15 +324,19 @@ static void header_log_copy(void __iomem *ras_base, u32 *log)
  * Log the state of the RAS status registers and prepare them to log the
  * next error status. Return 1 if reset needed.
  */
-bool cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base)
+pci_ers_result_t
+cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base)
 {
 	u32 hl[CXL_HEADERLOG_SIZE_U32];
 	void __iomem *addr;
 	u32 status;
 	u32 fe;
 
-	if (!ras_base)
+	if (!ras_base) {
+		pr_err_ratelimited("%s: CXL RAS registers aren't mapped\n",
+				   dev_name(dev));
 		return false;
+	}
 
 	addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
 	status = readl(addr);
@@ -344,10 +355,13 @@ bool cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base)
 	}
 
 	header_log_copy(ras_base, hl);
-	trace_cxl_aer_uncorrectable_error(dev, status, fe, hl, serial);
+	if (is_cxl_memdev(dev))
+		trace_cxl_aer_uncorrectable_error(dev, status, fe, hl, serial);
+	else
+		trace_cxl_port_aer_uncorrectable_error(dev, status, fe, hl);
 	writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
 
-	return true;
+	return PCI_ERS_RESULT_PANIC;
 }
 
 static void cxl_port_cor_error_detected(struct device *dev)
-- 
2.34.1
Re: [PATCH v15 6/9] cxl: Update error handlers to support CXL Port protocol errors
Posted by Jonathan Cameron 3 days, 17 hours ago
On Mon, 2 Feb 2026 20:52:41 -0600
Terry Bowman <terry.bowman@amd.com> wrote:

> CXL Protocol errors are logged for Endpoints in cxl_handle_ras() and
> cxl_handle_cor_ras(). The same is missing for CXL Port devices. The CXL
> Port logging function is already present but needs a call added from
> the handlers.
> 
> Update cxl_handle_ras() and cxl_handle_cor_ras() to call the CXL Port
> trace logging function.
> 
> Also, add log messages in the case 'ras_base' is NULL. And, add calls to
> the existing CXL Port tracing in the same functions.
> 
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
The error type was already wrongly documented for cxl_handle_ras().
This makes that comment inaccurate in a different way, particularly as you return a bool
value for a pci_ers_result_t.
> 
> ---
> 
> Changes in v14 -> v15:
> - New commit
> ---
>  drivers/cxl/core/core.h | 10 ++++++----
>  drivers/cxl/core/ras.c  | 30 ++++++++++++++++++++++--------
>  2 files changed, 28 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index 92aea110817d..3b232e991b12 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h

> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
> index 0216dafa6118..970ff3df442c 100644
> --- a/drivers/cxl/core/ras.c
> +++ b/drivers/cxl/core/ras.c

>  /* CXL spec rev3.0 8.2.4.16.1 */
> @@ -317,15 +324,19 @@ static void header_log_copy(void __iomem *ras_base, u32 *log)
>   * Log the state of the RAS status registers and prepare them to log the
>   * next error status. Return 1 if reset needed.

It didn't return 1 previously and doesn't do in a different way now.
So comment needs an update.

>   */
> -bool cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base)
> +pci_ers_result_t
> +cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base)
>  {
>  	u32 hl[CXL_HEADERLOG_SIZE_U32];
>  	void __iomem *addr;
>  	u32 status;
>  	u32 fe;
>  
> -	if (!ras_base)
> +	if (!ras_base) {
> +		pr_err_ratelimited("%s: CXL RAS registers aren't mapped\n",
> +				   dev_name(dev));
>  		return false;

returning false as pci_err_result_t?

> +	}
>  
>  	addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
>  	status = readl(addr);
> @@ -344,10 +355,13 @@ bool cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base)
>  	}
>  
>  	header_log_copy(ras_base, hl);
> -	trace_cxl_aer_uncorrectable_error(dev, status, fe, hl, serial);
> +	if (is_cxl_memdev(dev))
> +		trace_cxl_aer_uncorrectable_error(dev, status, fe, hl, serial);
> +	else
> +		trace_cxl_port_aer_uncorrectable_error(dev, status, fe, hl);
>  	writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
>  
> -	return true;
> +	return PCI_ERS_RESULT_PANIC;
>  }
>  
>  static void cxl_port_cor_error_detected(struct device *dev)