[PATCH v16 07/10] cxl: Update error handlers to support CXL Port devices

Terry Bowman posted 10 patches 7 hours ago
[PATCH v16 07/10] cxl: Update error handlers to support CXL Port devices
Posted by Terry Bowman 7 hours ago
CXL Protocol trace logging is called for Endpoints in cxl_handle_ras() and
cxl_handle_cor_ras(). Trace logging support for CXL Port devices is missing.

CXL Endpoint trace logging utilizes a separate trace routine than CXL Port
device handling. Using is_cxl_memdev(), determine if the device is a CXL EP
or one of the CXL Port devices.

Update cxl_handle_ras() and cxl_handle_cor_ras() to call the CXL Port trace
logging function. Change cxl_handle_ras() return values to be pci_ers_result_t
type.

Check for invalid ras_base and add log messages if NULL.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>

---

Changes in v15 -> v16:
- New commit
---
 drivers/cxl/core/core.h | 10 ++++++----
 drivers/cxl/core/ras.c  | 36 +++++++++++++++++++++++++-----------
 2 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 76d2593e68c6..984cc37be186 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -6,6 +6,7 @@
 
 #include <cxl/mailbox.h>
 #include <linux/rwsem.h>
+#include <linux/pci.h>
 
 extern const struct device_type cxl_nvdimm_bridge_type;
 extern const struct device_type cxl_nvdimm_type;
@@ -181,7 +182,8 @@ static inline struct device *dport_to_host(struct cxl_dport *dport)
 #ifdef CONFIG_CXL_RAS
 int cxl_ras_init(void);
 void cxl_ras_exit(void);
-bool cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base);
+pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial,
+				void __iomem *ras_base);
 void cxl_handle_cor_ras(struct device *dev, u64 serial,
 			void __iomem *ras_base);
 void cxl_dport_map_rch_aer(struct cxl_dport *dport);
@@ -195,10 +197,10 @@ static inline int cxl_ras_init(void)
 	return 0;
 }
 static inline void cxl_ras_exit(void) { }
-static inline bool cxl_handle_ras(struct device *dev, u64 serial,
-				  void __iomem *ras_base)
+static inline pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial,
+					      void __iomem *ras_base)
 {
-	return false;
+	return PCI_ERS_RESULT_NONE;
 }
 static inline void cxl_handle_cor_ras(struct device *dev, u64 serial,
 				      void __iomem *ras_base) { }
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index 48d3ef7cbb92..254144d19764 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -291,15 +291,22 @@ void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base)
 	void __iomem *addr;
 	u32 status;
 
-	if (!ras_base)
+	if (!ras_base) {
+		pr_err_ratelimited("%s: CXL RAS registers aren't mapped\n",
+				   dev_name(dev));
 		return;
+	}
 
 	addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
 	status = readl(addr);
-	if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
-		writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
+	if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK))
+		return;
+
+	writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
+	if (is_cxl_memdev(dev))
 		trace_cxl_aer_correctable_error(dev, status, serial);
-	}
+	else
+		trace_cxl_port_aer_correctable_error(dev, status);
 }
 
 /* CXL spec rev3.0 8.2.4.16.1 */
@@ -321,22 +328,26 @@ static void header_log_copy(void __iomem *ras_base, u32 *log)
 
 /*
  * Log the state of the RAS status registers and prepare them to log the
- * next error status. Return 1 if reset needed.
+ * next error status. Return PCI_ERS_RESULT_PANIC if reset needed.
  */
-bool cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base)
+pci_ers_result_t
+cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base)
 {
 	u32 hl[CXL_HEADERLOG_SIZE_U32];
 	void __iomem *addr;
 	u32 status;
 	u32 fe;
 
-	if (!ras_base)
-		return false;
+	if (!ras_base) {
+		pr_err_ratelimited("%s: CXL RAS registers aren't mapped\n",
+				   dev_name(dev));
+		return PCI_ERS_RESULT_NONE;
+	}
 
 	addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
 	status = readl(addr);
 	if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
-		return false;
+		return PCI_ERS_RESULT_NONE;
 
 	/* If multiple errors, log header points to first error from ctrl reg */
 	if (hweight32(status) > 1) {
@@ -350,10 +361,13 @@ bool cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base)
 	}
 
 	header_log_copy(ras_base, hl);
-	trace_cxl_aer_uncorrectable_error(dev, status, fe, hl, serial);
+	if (is_cxl_memdev(dev))
+		trace_cxl_aer_uncorrectable_error(dev, status, fe, hl, serial);
+	else
+		trace_cxl_port_aer_uncorrectable_error(dev, status, fe, hl);
 	writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
 
-	return true;
+	return PCI_ERS_RESULT_PANIC;
 }
 
 void cxl_cor_error_detected(struct pci_dev *pdev)
-- 
2.34.1