[PATCH v2 1/5] cxl/pci: Add RCH downstream port AER and RAS register discovery

Terry Bowman posted 5 patches 2 years, 10 months ago
There is a newer version of this series
[PATCH v2 1/5] cxl/pci: Add RCH downstream port AER and RAS register discovery
Posted by Terry Bowman 2 years, 10 months ago
Restricted CXL host (RCH) downstream port AER information is not currently
logged while in the error state. One problem preventing existing PCIe AER
functions from logging errors is the AER registers are not accessible. The
CXL driver requires changes to find RCH downstream port AER registers for
purpose of error logging.

RCH downstream ports are not enumerated during a PCI bus scan and are
instead discovered using system firmware, ACPI in this case.[1] The
downstream port is implemented as a Root Complex Register Block (RCRB).
The RCRB is a 4k memory block containing PCIe registers based on the PCIe
root port.[2] The RCRB includes AER extended capability registers used for
reporting errors. Note, the RCH's AER Capability is located in the RCRB
memory space instead of PCI configuration space, thus its register access
is different. Existing kernel PCIe AER functions can not be used to manage
the downstream port AER capabilities because the port was not enumerated
during PCI scan and the registers are not PCI config accessible.

Discover RCH downstream port AER extended capability registers. This
requires using MMIO accesses to search for extended AER capability in
RCRB register space.

[1] CXL 3.0 Spec, 9.11.2 - System Firmware View of CXL 1.1 Hierarchy
[2] CXL 3.0 Spec, 8.2.1.1 - RCH Downstream Port RCRB

Co-developed-by: Robert Richter <rrichter@amd.com>
Signed-off-by: Robert Richter <rrichter@amd.com>
Signed-off-by: Terry Bowman <terry.bowman@amd.com>
---
 drivers/cxl/core/regs.c | 93 +++++++++++++++++++++++++++++++++++------
 drivers/cxl/cxl.h       |  5 +++
 drivers/cxl/mem.c       | 41 ++++++++++++------
 3 files changed, 115 insertions(+), 24 deletions(-)

diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
index 1476a0299c9b..108a349d8101 100644
--- a/drivers/cxl/core/regs.c
+++ b/drivers/cxl/core/regs.c
@@ -332,10 +332,36 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
 }
 EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);
 
+static void __iomem *cxl_map_reg(struct device *dev, struct cxl_register_map *map,
+				 char *name)
+{
+
+	if (!request_mem_region(map->resource, map->max_size, name))
+		return 0;
+
+	map->base = ioremap(map->resource, map->max_size);
+	if (!map->base) {
+		release_mem_region(map->resource, map->max_size);
+		return 0;
+	}
+
+	return map->base;
+}
+
+static void cxl_unmap_reg(struct device *dev, struct cxl_register_map *map)
+{
+	iounmap(map->base);
+	release_mem_region(map->resource, map->max_size);
+}
+
 resource_size_t cxl_rcrb_to_component(struct device *dev,
 				      resource_size_t rcrb,
 				      enum cxl_rcrb which)
 {
+	struct cxl_register_map map = {
+		.resource = rcrb,
+		.max_size = SZ_4K
+	};
 	resource_size_t component_reg_phys;
 	void __iomem *addr;
 	u32 bar0, bar1;
@@ -343,7 +369,10 @@ resource_size_t cxl_rcrb_to_component(struct device *dev,
 	u32 id;
 
 	if (which == CXL_RCRB_UPSTREAM)
-		rcrb += SZ_4K;
+		map.resource += SZ_4K;
+
+	if (!cxl_map_reg(dev, &map, "CXL RCRB"))
+		return CXL_RESOURCE_NONE;
 
 	/*
 	 * RCRB's BAR[0..1] point to component block containing CXL
@@ -351,21 +380,12 @@ resource_size_t cxl_rcrb_to_component(struct device *dev,
 	 * the PCI Base spec here, esp. 64 bit extraction and memory
 	 * ranges alignment (6.0, 7.5.1.2.1).
 	 */
-	if (!request_mem_region(rcrb, SZ_4K, "CXL RCRB"))
-		return CXL_RESOURCE_NONE;
-	addr = ioremap(rcrb, SZ_4K);
-	if (!addr) {
-		dev_err(dev, "Failed to map region %pr\n", addr);
-		release_mem_region(rcrb, SZ_4K);
-		return CXL_RESOURCE_NONE;
-	}
-
+	addr = map.base;
 	id = readl(addr + PCI_VENDOR_ID);
 	cmd = readw(addr + PCI_COMMAND);
 	bar0 = readl(addr + PCI_BASE_ADDRESS_0);
 	bar1 = readl(addr + PCI_BASE_ADDRESS_1);
-	iounmap(addr);
-	release_mem_region(rcrb, SZ_4K);
+	cxl_unmap_reg(dev, &map);
 
 	/*
 	 * Sanity check, see CXL 3.0 Figure 9-8 CXL Device that Does Not
@@ -396,3 +416,52 @@ resource_size_t cxl_rcrb_to_component(struct device *dev,
 	return component_reg_phys;
 }
 EXPORT_SYMBOL_NS_GPL(cxl_rcrb_to_component, CXL);
+
+u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb)
+{
+	struct cxl_register_map map = {
+		.resource = rcrb,
+		.max_size = SZ_4K,
+	};
+	u32 cap_hdr;
+	u16 offset = 0;
+
+	if (!cxl_map_reg(dev, &map, "CXL RCRB"))
+		return 0;
+
+	cap_hdr = readl(map.base + offset);
+	while (PCI_EXT_CAP_ID(cap_hdr) != PCI_EXT_CAP_ID_ERR) {
+
+		offset = PCI_EXT_CAP_NEXT(cap_hdr);
+		if (!offset) {
+			cxl_unmap_reg(dev, &map);
+			return 0;
+		}
+		cap_hdr = readl(map.base + offset);
+	}
+
+	dev_dbg(dev, "found AER extended capability (0x%x)\n", offset);
+	cxl_unmap_reg(dev, &map);
+
+	return offset;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_rcrb_to_aer, CXL);
+
+u16 cxl_component_to_ras(struct device *dev, resource_size_t component_reg_phys)
+{
+	struct cxl_register_map map = {
+		.resource = component_reg_phys,
+		.max_size = CXL_COMPONENT_REG_BLOCK_SIZE,
+	};
+
+	if (!cxl_map_reg(dev, &map, "component"))
+		return 0;
+
+	cxl_probe_component_regs(dev, map.base, &map.component_map);
+	cxl_unmap_reg(dev, &map);
+	if (!map.component_map.ras.valid)
+		return 0;
+
+	return map.component_map.ras.offset;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_component_to_ras, CXL);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index d853a0238ad7..9fd7df48ce99 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -270,6 +270,9 @@ enum cxl_rcrb {
 resource_size_t cxl_rcrb_to_component(struct device *dev,
 				      resource_size_t rcrb,
 				      enum cxl_rcrb which);
+u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb);
+u16 cxl_component_to_ras(struct device *dev,
+			 resource_size_t component_reg_phys);
 
 #define CXL_RESOURCE_NONE ((resource_size_t) -1)
 #define CXL_TARGET_STRLEN 20
@@ -601,6 +604,8 @@ struct cxl_dport {
 	int port_id;
 	resource_size_t component_reg_phys;
 	resource_size_t rcrb;
+	u16 aer_cap;
+	u16 ras_cap;
 	bool rch;
 	struct cxl_port *port;
 };
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 39c4b54f0715..12e8e8ebaac0 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -45,13 +45,38 @@ static int cxl_mem_dpa_show(struct seq_file *file, void *data)
 	return 0;
 }
 
+static void cxl_rcrb_setup(struct cxl_dev_state *cxlds,
+			   struct cxl_dport *parent_dport)
+{
+	struct cxl_memdev *cxlmd  = cxlds->cxlmd;
+
+	if (!parent_dport->rch)
+		return;
+
+	/*
+	 * The component registers for an RCD might come from the
+	 * host-bridge RCRB if they are not already mapped via the
+	 * typical register locator mechanism.
+	 */
+	if (cxlds->component_reg_phys == CXL_RESOURCE_NONE)
+		cxlds->component_reg_phys = cxl_rcrb_to_component(
+			&cxlmd->dev, parent_dport->rcrb, CXL_RCRB_UPSTREAM);
+
+	/* RCH AER is required. CXL3.0 Spec Table 8-12 */
+	parent_dport->aer_cap = cxl_rcrb_to_aer(parent_dport->dport,
+						parent_dport->rcrb);
+
+	/* RCH RAS is required. CXL3.0 Spec Table 8-22 */
+	parent_dport->ras_cap = cxl_component_to_ras(parent_dport->dport,
+						     parent_dport->component_reg_phys);
+}
+
 static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd,
 				 struct cxl_dport *parent_dport)
 {
 	struct cxl_port *parent_port = parent_dport->port;
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
 	struct cxl_port *endpoint, *iter, *down;
-	resource_size_t component_reg_phys;
 	int rc;
 
 	/*
@@ -66,17 +91,9 @@ static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd,
 		ep->next = down;
 	}
 
-	/*
-	 * The component registers for an RCD might come from the
-	 * host-bridge RCRB if they are not already mapped via the
-	 * typical register locator mechanism.
-	 */
-	if (parent_dport->rch && cxlds->component_reg_phys == CXL_RESOURCE_NONE)
-		component_reg_phys = cxl_rcrb_to_component(
-			&cxlmd->dev, parent_dport->rcrb, CXL_RCRB_UPSTREAM);
-	else
-		component_reg_phys = cxlds->component_reg_phys;
-	endpoint = devm_cxl_add_port(host, &cxlmd->dev, component_reg_phys,
+	cxl_rcrb_setup(cxlds, parent_dport);
+
+	endpoint = devm_cxl_add_port(host, &cxlmd->dev, cxlds->component_reg_phys,
 				     parent_dport);
 	if (IS_ERR(endpoint))
 		return PTR_ERR(endpoint);
-- 
2.34.1
Re: [PATCH v2 1/5] cxl/pci: Add RCH downstream port AER and RAS register discovery
Posted by kernel test robot 2 years, 10 months ago
Hi Terry,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on pci/next]
[also build test WARNING on pci/for-linus efi/next cxl/next cxl/pending linus/master v6.3-rc3 next-20230324]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Terry-Bowman/cxl-pci-Add-RCH-downstream-port-AER-and-RAS-register-discovery/20230324-054044
base:   https://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git next
patch link:    https://lore.kernel.org/r/20230323213808.398039-2-terry.bowman%40amd.com
patch subject: [PATCH v2 1/5] cxl/pci: Add RCH downstream port AER and RAS register discovery
config: parisc-randconfig-s043-20230322 (https://download.01.org/0day-ci/archive/20230324/202303241632.HTxJ0yfj-lkp@intel.com/config)
compiler: hppa-linux-gcc (GCC) 12.1.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # apt-get install sparse
        # sparse version: v0.6.4-39-gce1a6720-dirty
        # https://github.com/intel-lab-lkp/linux/commit/7cbc5c7357504af79c820ad7d0e9369b4a580a65
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Terry-Bowman/cxl-pci-Add-RCH-downstream-port-AER-and-RAS-register-discovery/20230324-054044
        git checkout 7cbc5c7357504af79c820ad7d0e9369b4a580a65
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=parisc olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=parisc SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303241632.HTxJ0yfj-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> drivers/cxl/core/regs.c:340:24: sparse: sparse: Using plain integer as NULL pointer
   drivers/cxl/core/regs.c:345:24: sparse: sparse: Using plain integer as NULL pointer

vim +340 drivers/cxl/core/regs.c

   338	
   339		if (!request_mem_region(map->resource, map->max_size, name))
 > 340			return 0;
   341	
   342		map->base = ioremap(map->resource, map->max_size);
   343		if (!map->base) {
   344			release_mem_region(map->resource, map->max_size);
   345			return 0;
   346		}
   347	
   348		return map->base;
   349	}
   350	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests
Re: [PATCH v2 1/5] cxl/pci: Add RCH downstream port AER and RAS register discovery
Posted by Terry Bowman 2 years, 10 months ago

On 3/24/23 03:53, kernel test robot wrote:

> If you fix the issue, kindly add following tag where applicable
> | Reported-by: kernel test robot <lkp@intel.com>
> | Link: https://lore.kernel.org/oe-kbuild-all/202303241632.HTxJ0yfj-lkp@intel.com/
> 
> sparse warnings: (new ones prefixed by >>)
>>> drivers/cxl/core/regs.c:340:24: sparse: sparse: Using plain integer as NULL pointer
>    drivers/cxl/core/regs.c:345:24: sparse: sparse: Using plain integer as NULL pointer
> 
> vim +340 drivers/cxl/core/regs.c
> 
>    338	
>    339		if (!request_mem_region(map->resource, map->max_size, name))
>  > 340			return 0;
>    341	
>    342		map->base = ioremap(map->resource, map->max_size);
>    343		if (!map->base) {
>    344			release_mem_region(map->resource, map->max_size);
>    345			return 0;
>    346		}
>    347	
>    348		return map->base;
>    349	}
>    350	
> 

Yes, I will change the 0 return value to use NULL instead.

Regards,
Terry