[RFC PATCH v3] cxl/core: Work around CXL Port PM Init failure when ACS SV enabled

Fabio M. De Francesco posted 1 patch 1 day, 12 hours ago
drivers/cxl/core/pci.c        | 171 ++++++++++++++++++++++++++++++++++
drivers/cxl/core/port.c       |  19 ++++
drivers/cxl/cxlpci.h          |   3 +
include/uapi/linux/pci_regs.h |   2 +
4 files changed, 195 insertions(+)
[RFC PATCH v3] cxl/core: Work around CXL Port PM Init failure when ACS SV enabled
Posted by Fabio M. De Francesco 1 day, 12 hours ago
Compute Express Link (CXL) Specification Revision 4.0, Version 1.0, Section
8.1.5.1 - CXL Port Extension Status, Implementation Note, describes a
scenario where a CXL downstream port may fail PM initialization:

"Certain conditions such as Link Down, Secondary Bus Reset, or Downstream
Port Containment reset the Downstream Component’s bus number. If the
Component generates the CREDIT_RTN IP2PM message with Requester Bus=0, the
Downstream Port may reject the IP2PM message if software has enabled ACS
Source Validation. In this scenario, Power Management initialization may
fail to complete and another Secondary Bus Reset alone will not facilitate
recovery".

Implement the recommended workaround for this scenario, which involves
saving and disabling the ACS Source Validation and Bus Master Enable bits,
issuing a secondary bus reset, waiting for PM init to complete, and then
restoring those bits.

This is an RFC because it checks for PM init failures in ports enumeration,
where an SBR might not be very welcome. Maybe other call sites are more
appropriate for this workaround?

Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
---

Changes for v3: 
        - match the naming, capitalization, and indentation of two 
          definitions with the others in pci_regs.h (Bjorn)
            
Changes for v2: 
        - change subject label from the mistaken PCI (relic of an old 
          solution which was later discarded) to cxl/core
        - restore the class storage of sbr_masked() to static (another
          relic of the old solution)

 drivers/cxl/core/pci.c        | 171 ++++++++++++++++++++++++++++++++++
 drivers/cxl/core/port.c       |  19 ++++
 drivers/cxl/cxlpci.h          |   3 +
 include/uapi/linux/pci_regs.h |   2 +
 4 files changed, 195 insertions(+)

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index f96ce884a213..1e75bd35b0f6 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -869,3 +869,174 @@ int cxl_port_get_possible_dports(struct cxl_port *port)
 
 	return ctx.count;
 }
+
+struct pci_saved_flags {
+	u16 pci_command;
+	u16 acs_ctrl;
+};
+
+/*
+ * pci_disable_acs_bme - disable ACS Source Validation and BME
+ * @pdev: downstream port
+ * @saved: pointer to pci_saved_acs_bme struct, for saving bit values
+ *
+ * Save the current ACS Source Validation and Bus Master Enable (BME)
+ * bits before disabling them.
+ */
+static void pci_disable_acs_bme(struct pci_dev *pdev,
+				struct pci_saved_flags *saved)
+{
+	int pos;
+	u16 field;
+
+	pos = pdev->acs_cap;
+	if (!pos)
+		return;
+
+	pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &field);
+	saved->acs_ctrl = field;
+
+	if (field & PCI_ACS_SV)
+		pci_write_config_word(pdev, pos + PCI_ACS_CTRL,
+				      field & ~PCI_ACS_SV);
+
+	pci_read_config_word(pdev, PCI_COMMAND, &field);
+	saved->pci_command = field;
+
+	if (field & PCI_COMMAND_MASTER)
+		pci_clear_master(pdev);
+}
+
+/*
+ * pci_enable_acs_bme - enable ACS Source Validation and BME
+ * @pdev: downstream port
+ * @saved: pointer to pci_saved_acs_bme struct, for restoring bit values
+ *
+ * Restore the previously saved ACS Source Validation and Bus Master
+ * Enable (BME) bits.
+ */
+static void pci_enable_acs_bme(struct pci_dev *pdev,
+			       struct pci_saved_flags *saved)
+{
+	int pos;
+	u16 field;
+
+	pos = pdev->acs_cap;
+	if (!pos)
+		return;
+
+	pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &field);
+	if (saved->acs_ctrl & PCI_ACS_SV)
+		pci_write_config_word(pdev, pos + PCI_ACS_CTRL,
+				      field | PCI_ACS_SV);
+
+	pci_read_config_word(pdev, PCI_COMMAND, &field);
+	if (saved->pci_command & PCI_COMMAND_MASTER)
+		pci_set_master(pdev);
+}
+
+/**
+ * cxl_port_pm_init_is_complete - check if the downstream port has completed PM
+ * init
+ *
+ * @pdev: downstream port
+ *
+ * Check if the Port Power Management Initialization Complete bit is set in the
+ * Downstream Port's CXL DVSEC Port Extended Status register.
+ *
+ * Returns true if PM init is complete, false otherwise.
+ */
+bool cxl_port_pm_init_is_complete(struct pci_dev *pdev)
+{
+	int pm_init_complete;
+	u16 status;
+	u16 dvsec;
+
+	dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+					  PCI_DVSEC_CXL_PORT);
+	if (!dvsec)
+		return false;
+
+	pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_PORT_EXT_STATUS,
+			     &status);
+	pm_init_complete = FIELD_GET(PCI_DVSEC_CXL_PORT_EXT_STATUS_PM_INIT_COMP,
+				     status);
+
+	return !!pm_init_complete;
+}
+
+static void mask_sbr(struct pci_dev *pdev)
+{
+	int dvsec;
+	u16 reg;
+
+	dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+					  PCI_DVSEC_CXL_PORT);
+	if (!dvsec)
+		return;
+
+	pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_PORT_CTL, &reg);
+
+	pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_PORT_CTL,
+			      reg & ~PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR);
+}
+
+static void unmask_sbr(struct pci_dev *pdev)
+{
+	int dvsec;
+	u16 reg;
+
+	dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+					  PCI_DVSEC_CXL_PORT);
+	if (!dvsec)
+		return;
+
+	pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_PORT_CTL, &reg);
+
+	pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_PORT_CTL,
+			      reg | PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR);
+}
+
+/**
+ * cxl_port_retry_failed_pm_init - retry a downstream port's failed PM init
+ *
+ * @pdev: downstream port
+ *
+ * Retry a downstream CXL port's failed PM init according to CXL Erratum for
+ * Section 8.1.5.1 - Port Power Management Initialization Complete.
+ *
+ * This entails saving and disabling the ACS Source Validation and Bus Master
+ * enable bits, and unmasking and masking the SBR, before doing a secondary bus
+ * reset and then restoring and re-enabling those bits.
+ *
+ * return: 0 on success, errors otherwise.
+ */
+int cxl_port_retry_failed_pm_init(struct pci_dev *pdev)
+{
+	struct pci_saved_flags saved;
+	int ret;
+
+	if (!is_cxl_port(pdev->dev.parent))
+		return -EINVAL;
+
+	pci_disable_acs_bme(pdev, &saved);
+	unmask_sbr(pdev);
+
+	/* Generate Secondary Bus Reset */
+	ret = pci_reset_bus(pdev);
+	if (ret)
+		dev_dbg(&pdev->dev, "Bus reset failed: %d\n", ret);
+
+	/*
+	 * Wait until the Port Power Management Initialization
+	 * Complete bit is set in the Downstream Port. CXL Spec 4.0
+	 * Section 8.1.5.1 of the CXL spec specifies 100 ms as the
+	 * max time needed for the PM Init Complete bit to be set.
+	 */
+	msleep(100);
+
+	mask_sbr(pdev);
+	pci_enable_acs_bme(pdev, &saved);
+
+	return ret;
+}
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index b69c2529744c..62fabcdf0faf 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1829,12 +1829,31 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd)
 retry:
 	for (iter = dev; iter; iter = grandparent(iter)) {
 		struct device *dport_dev = grandparent(iter);
+		struct pci_dev *dport_pdev = to_pci_dev(dport_dev);
 		struct device *uport_dev;
 		struct cxl_dport *dport;
 
 		if (is_cxl_host_bridge(dport_dev))
 			return 0;
 
+		/*
+		 * Check the downstream port's PM init status, and if it has
+		 * failed retry PM init according to CXL Spec. 4.0 Sect. 8.1.5.1
+		 * - Implementation Note
+		 */
+		if (!cxl_port_pm_init_is_complete(dport_pdev)) {
+			dev_dbg(&cxlmd->dev,
+				"PM init failed for %s, retrying PM init\n",
+				dev_name(dport_dev));
+
+			cxl_port_retry_failed_pm_init(dport_pdev);
+
+			if (!cxl_port_pm_init_is_complete(dport_pdev))
+				dev_dbg(&cxlmd->dev,
+					"PM init failed retry for %s\n",
+					dev_name(dport_dev));
+		}
+
 		uport_dev = dport_dev->parent;
 		if (!uport_dev) {
 			dev_warn(dev, "at %s no parent for dport: %s\n",
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 0cf64218aa16..0458096be2c4 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -101,4 +101,7 @@ static inline void devm_cxl_port_ras_setup(struct cxl_port *port)
 }
 #endif
 
+bool cxl_port_pm_init_is_complete(struct pci_dev *pdev);
+int cxl_port_retry_failed_pm_init(struct pci_dev *pdev);
+
 #endif /* __CXL_PCI_H__ */
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index ec1c54b5a310..6e0b81b2351b 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1369,6 +1369,8 @@
 
 /* CXL r4.0, 8.1.5: Extensions DVSEC for Ports */
 #define PCI_DVSEC_CXL_PORT				3
+#define  PCI_DVSEC_CXL_PORT_EXT_STATUS			0x0A
+#define   PCI_DVSEC_CXL_PORT_EXT_STATUS_PM_INIT_COMP	_BITUL(0)
 #define  PCI_DVSEC_CXL_PORT_CTL				0x0c
 #define   PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR		0x00000001
 
-- 
2.53.0