[PATCH v2 RESEND] EDAC/bluefield: Use Arm SMC for EMI access on BlueField-2

David Thompson posted 1 patch 1 month, 4 weeks ago
There is a newer version of this series
drivers/edac/bluefield_edac.c | 174 ++++++++++++++++++++++++++++++----
1 file changed, 155 insertions(+), 19 deletions(-)
[PATCH v2 RESEND] EDAC/bluefield: Use Arm SMC for EMI access on BlueField-2
Posted by David Thompson 1 month, 4 weeks ago
The BlueField EDAC driver supports the first generation BlueField-1 SoC,
but not the second generation BlueField-2 SoC.  The BlueField-2 SoC is
different in that only secure accesses are allowed to the External Memory
Interface (EMI) register block. On BlueField-2, all read/write accesses
from Linux to EMI registers are routed via Arm Secure Monitor Call (SMC)
through Arm Trusted Firmware (ATF), which runs at EL3 privileged state.

On BlueField-1, EMI registers are mapped and accessed directly. In order
to support BlueField-2, the driver's read and write access methods must
be extended with additional logic to include secure access to the EMI
registers via SMCs.

The driver's probe routine must check the ACPI table for presence of
the "sec_reg_block" property and ensure the minimum required SMC service
version is present before enabling the BlueField-2 secure access methods.
The "sec_reg_block" property is only present in BlueField-2 ACPI table,
not the BlueField-1 ACPI table.

Also, the bluefield_edac driver needs two coding style fixes: one fix
addresses an issue raised by checkpatch, and the other fix provides
consistency in declaration of #defines.

Signed-off-by: David Thompson <davthompson@nvidia.com>
Reviewed-by: Shravan Kumar Ramani <shravankr@nvidia.com>
---
v1 -> v2
a) removed #defines for SMC function IDs that are not used
b) added "bluefield_edac_" prefix to "secure_readl/writel" functions
c) added "bluefield_" prefix to "edac_readl/writel" functions
d) changed logic to use "uN" typedefs instead of "uintN_t"
e) added initialization of "priv->dev" in probe()
f) added more details to commit message

v0 -> v1
Updated commit message
---
 drivers/edac/bluefield_edac.c | 174 ++++++++++++++++++++++++++++++----
 1 file changed, 155 insertions(+), 19 deletions(-)

diff --git a/drivers/edac/bluefield_edac.c b/drivers/edac/bluefield_edac.c
index 5b3164560648..4e0db1cbfbe7 100644
--- a/drivers/edac/bluefield_edac.c
+++ b/drivers/edac/bluefield_edac.c
@@ -47,13 +47,22 @@
 #define MLXBF_EDAC_MAX_DIMM_PER_MC	2
 #define MLXBF_EDAC_ERROR_GRAIN		8
 
+#define MLXBF_WRITE_REG_32		(0x82000009)
+#define MLXBF_READ_REG_32		(0x8200000A)
+#define MLXBF_SIP_SVC_VERSION		(0x8200ff03)
+
+#define MLXBF_SMCCC_ACCESS_VIOLATION	(-4)
+
+#define MLXBF_SVC_REQ_MAJOR		0
+#define MLXBF_SVC_REQ_MINOR		3
+
 /*
- * Request MLNX_SIP_GET_DIMM_INFO
+ * Request MLXBF_SIP_GET_DIMM_INFO
  *
  * Retrieve information about DIMM on a certain slot.
  *
  * Call register usage:
- * a0: MLNX_SIP_GET_DIMM_INFO
+ * a0: MLXBF_SIP_GET_DIMM_INFO
  * a1: (Memory controller index) << 16 | (Dimm index in memory controller)
  * a2-7: not used.
  *
@@ -61,7 +70,7 @@
  * a0: MLXBF_DIMM_INFO defined below describing the DIMM.
  * a1-3: not used.
  */
-#define MLNX_SIP_GET_DIMM_INFO		0x82000008
+#define MLXBF_SIP_GET_DIMM_INFO		0x82000008
 
 /* Format for the SMC response about the memory information */
 #define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0)
@@ -72,9 +81,12 @@
 #define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24)
 
 struct bluefield_edac_priv {
+	struct device *dev;
 	int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC];
 	void __iomem *emi_base;
 	int dimm_per_mc;
+	bool svc_sreg_support;
+	u32 sreg_tbl_edac;
 };
 
 static u64 smc_call1(u64 smc_op, u64 smc_arg)
@@ -86,6 +98,71 @@ static u64 smc_call1(u64 smc_op, u64 smc_arg)
 	return res.a0;
 }
 
+static int bluefield_edac_secure_readl(void __iomem *addr, u32 *result, u32 sreg_tbl)
+{
+	struct arm_smccc_res res;
+	int status;
+
+	arm_smccc_smc(MLXBF_READ_REG_32, sreg_tbl, (uintptr_t)addr,
+		      0, 0, 0, 0, 0, &res);
+
+	status = res.a0;
+
+	switch (status) {
+	case SMCCC_RET_NOT_SUPPORTED:
+	case MLXBF_SMCCC_ACCESS_VIOLATION:
+		return -1;
+	default:
+		*result = (u32)res.a1;
+		return 0;
+	}
+}
+
+static int bluefield_edac_secure_writel(void __iomem *addr, u32 data, u32 sreg_tbl)
+{
+	struct arm_smccc_res res;
+	int status;
+
+	arm_smccc_smc(MLXBF_WRITE_REG_32, sreg_tbl, data, (uintptr_t)addr,
+		      0, 0, 0, 0, &res);
+
+	status = res.a0;
+
+	switch (status) {
+	case SMCCC_RET_NOT_SUPPORTED:
+	case MLXBF_SMCCC_ACCESS_VIOLATION:
+		return -1;
+	default:
+		return 0;
+	}
+}
+
+static int bluefield_edac_readl(void __iomem *addr, u32 *result,
+				bool sreg_support, u32 sreg_tbl)
+{
+	int err = 0;
+
+	if (sreg_support)
+		err = bluefield_edac_secure_readl(addr, result, sreg_tbl);
+	else
+		*result = readl(addr);
+
+	return err;
+}
+
+static int bluefield_edac_writel(void __iomem *addr, u32 data,
+				 bool sreg_support, u32 sreg_tbl)
+{
+	int err = 0;
+
+	if (sreg_support)
+		err = bluefield_edac_secure_writel(addr, data, sreg_tbl);
+	else
+		writel(data, addr);
+
+	return err;
+}
+
 /*
  * Gather the ECC information from the External Memory Interface registers
  * and report it to the edac handler.
@@ -99,7 +176,7 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
 	u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom;
 	enum hw_event_mc_err_type ecc_type;
 	u64 ecc_dimm_addr;
-	int ecc_dimm;
+	int ecc_dimm, err;
 
 	ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED :
 				   HW_EVENT_ERR_UNCORRECTED;
@@ -109,14 +186,22 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
 	 * registers with information about the last ECC error occurrence.
 	 */
 	ecc_latch_select = MLXBF_ECC_LATCH_SEL__START;
-	writel(ecc_latch_select, priv->emi_base + MLXBF_ECC_LATCH_SEL);
+	err = bluefield_edac_writel(priv->emi_base + MLXBF_ECC_LATCH_SEL,
+				    ecc_latch_select, priv->svc_sreg_support,
+				    priv->sreg_tbl_edac);
+	if (err)
+		dev_err(priv->dev, "ECC latch select write failed.\n");
 
 	/*
 	 * Verify that the ECC reported info in the registers is of the
 	 * same type as the one asked to report. If not, just report the
 	 * error without the detailed information.
 	 */
-	dram_syndrom = readl(priv->emi_base + MLXBF_SYNDROM);
+	err = bluefield_edac_readl(priv->emi_base + MLXBF_SYNDROM, &dram_syndrom,
+				   priv->svc_sreg_support, priv->sreg_tbl_edac);
+	if (err)
+		dev_err(priv->dev, "DRAM syndrom read failed.\n");
+
 	serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom);
 	derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom);
 	syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom);
@@ -127,13 +212,24 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
 		return;
 	}
 
-	dram_additional_info = readl(priv->emi_base + MLXBF_ADD_INFO);
+	err = bluefield_edac_readl(priv->emi_base + MLXBF_ADD_INFO, &dram_additional_info,
+				   priv->svc_sreg_support, priv->sreg_tbl_edac);
+	if (err)
+		dev_err(priv->dev, "DRAM additional info read failed.\n");
+
 	err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info);
 
 	ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0;
 
-	edea0 = readl(priv->emi_base + MLXBF_ERR_ADDR_0);
-	edea1 = readl(priv->emi_base + MLXBF_ERR_ADDR_1);
+	err = bluefield_edac_readl(priv->emi_base + MLXBF_ERR_ADDR_0, &edea0,
+				   priv->svc_sreg_support, priv->sreg_tbl_edac);
+	if (err)
+		dev_err(priv->dev, "Error addr 0 read failed.\n");
+
+	err = bluefield_edac_readl(priv->emi_base + MLXBF_ERR_ADDR_1, &edea1,
+				   priv->svc_sreg_support, priv->sreg_tbl_edac);
+	if (err)
+		dev_err(priv->dev, "Error addr 1 read failed.\n");
 
 	ecc_dimm_addr = ((u64)edea1 << 32) | edea0;
 
@@ -147,6 +243,7 @@ static void bluefield_edac_check(struct mem_ctl_info *mci)
 {
 	struct bluefield_edac_priv *priv = mci->pvt_info;
 	u32 ecc_count, single_error_count, double_error_count, ecc_error = 0;
+	int err;
 
 	/*
 	 * The memory controller might not be initialized by the firmware
@@ -155,7 +252,11 @@ static void bluefield_edac_check(struct mem_ctl_info *mci)
 	if (mci->edac_cap == EDAC_FLAG_NONE)
 		return;
 
-	ecc_count = readl(priv->emi_base + MLXBF_ECC_CNT);
+	err = bluefield_edac_readl(priv->emi_base + MLXBF_ECC_CNT, &ecc_count,
+				   priv->svc_sreg_support, priv->sreg_tbl_edac);
+	if (err)
+		dev_err(priv->dev, "ECC count read failed.\n");
+
 	single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count);
 	double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count);
 
@@ -172,8 +273,12 @@ static void bluefield_edac_check(struct mem_ctl_info *mci)
 	}
 
 	/* Write to clear reported errors. */
-	if (ecc_count)
-		writel(ecc_error, priv->emi_base + MLXBF_ECC_ERR);
+	if (ecc_count) {
+		err = bluefield_edac_writel(priv->emi_base + MLXBF_ECC_ERR, ecc_error,
+					    priv->svc_sreg_support, priv->sreg_tbl_edac);
+		if (err)
+			dev_err(priv->dev, "ECC Error write failed.\n");
+	}
 }
 
 /* Initialize the DIMMs information for the given memory controller. */
@@ -189,7 +294,7 @@ static void bluefield_edac_init_dimms(struct mem_ctl_info *mci)
 		dimm = mci->dimms[i];
 
 		smc_arg = mem_ctrl_idx << 16 | i;
-		smc_info = smc_call1(MLNX_SIP_GET_DIMM_INFO, smc_arg);
+		smc_info = smc_call1(MLXBF_SIP_GET_DIMM_INFO, smc_arg);
 
 		if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) {
 			dimm->mtype = MEM_EMPTY;
@@ -244,6 +349,7 @@ static int bluefield_edac_mc_probe(struct platform_device *pdev)
 	struct bluefield_edac_priv *priv;
 	struct device *dev = &pdev->dev;
 	struct edac_mc_layer layers[1];
+	struct arm_smccc_res res;
 	struct mem_ctl_info *mci;
 	struct resource *emi_res;
 	unsigned int mc_idx, dimm_count;
@@ -279,13 +385,44 @@ static int bluefield_edac_mc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	priv = mci->pvt_info;
+	priv->dev = dev;
+
+	/*
+	 * The "sec_reg_block" property in the ACPI table determines the method
+	 * the driver uses to access the EMI registers:
+	 * a) property is not present - directly access registers via readl/writel
+	 * b) property is present - indirectly access registers via SMC calls
+	 *    (assuming required Silicon Provider service version found)
+	 */
+	if (device_property_read_u32(dev,
+				     "sec_reg_block", &priv->sreg_tbl_edac)) {
+		priv->svc_sreg_support = false;
+	} else {
+		/*
+		 * Check for minimum required Arm Silicon Provider (SiP) service
+		 * version, ensuring support of required SMC function IDs.
+		 */
+		arm_smccc_smc(MLXBF_SIP_SVC_VERSION, 0, 0, 0, 0, 0, 0, 0, &res);
+		if (res.a0 == MLXBF_SVC_REQ_MAJOR &&
+		    res.a1 >= MLXBF_SVC_REQ_MINOR) {
+			priv->svc_sreg_support = true;
+		} else {
+			dev_err(dev, "Required SMCs are not supported.\n");
+			ret = -EINVAL;
+			goto err;
+		}
+	}
 
 	priv->dimm_per_mc = dimm_count;
-	priv->emi_base = devm_ioremap_resource(dev, emi_res);
-	if (IS_ERR(priv->emi_base)) {
-		dev_err(dev, "failed to map EMI IO resource\n");
-		ret = PTR_ERR(priv->emi_base);
-		goto err;
+	if (!priv->svc_sreg_support) {
+		priv->emi_base = devm_ioremap_resource(dev, emi_res);
+		if (IS_ERR(priv->emi_base)) {
+			dev_err(dev, "failed to map EMI IO resource\n");
+			ret = PTR_ERR(priv->emi_base);
+			goto err;
+		}
+	} else {
+		priv->emi_base = (void __iomem *)emi_res->start;
 	}
 
 	mci->pdev = dev;
@@ -320,7 +457,6 @@ static int bluefield_edac_mc_probe(struct platform_device *pdev)
 	edac_mc_free(mci);
 
 	return ret;
-
 }
 
 static void bluefield_edac_mc_remove(struct platform_device *pdev)
-- 
2.30.1
Re: [PATCH v2 RESEND] EDAC/bluefield: Use Arm SMC for EMI access on BlueField-2
Posted by Borislav Petkov 1 month, 1 week ago
On Mon, Sep 30, 2024 at 11:20:30AM -0400, David Thompson wrote:
> The BlueField EDAC driver supports the first generation BlueField-1 SoC,
> but not the second generation BlueField-2 SoC.  The BlueField-2 SoC is
> different in that only secure accesses are allowed to the External Memory
> Interface (EMI) register block. On BlueField-2, all read/write accesses
> from Linux to EMI registers are routed via Arm Secure Monitor Call (SMC)
> through Arm Trusted Firmware (ATF), which runs at EL3 privileged state.
> 
> On BlueField-1, EMI registers are mapped and accessed directly. In order
> to support BlueField-2, the driver's read and write access methods must
> be extended with additional logic to include secure access to the EMI
> registers via SMCs.
> 
> The driver's probe routine must check the ACPI table for presence of
> the "sec_reg_block" property and ensure the minimum required SMC service
> version is present before enabling the BlueField-2 secure access methods.
> The "sec_reg_block" property is only present in BlueField-2 ACPI table,
> not the BlueField-1 ACPI table.
> 
> Also, the bluefield_edac driver needs two coding style fixes: one fix
> addresses an issue raised by checkpatch, and the other fix provides
> consistency in declaration of #defines.

Do not talk about *what* the patch is doing in the commit message - that
should be obvious from the diff itself. Rather, concentrate on the *why*
it needs to be done.

Imagine one fine day you're doing git archeology, you find the place in
the code about which you want to find out why it was changed the way it 
is now.

You do git annotate <filename> ... find the line, see the commit id and
you do:

git show <commit id>

You read the commit message and there's just gibberish and nothing's
explaining *why* that change was done. And you start scratching your head,
trying to figure out why. Because the damn commit message is not really
helping.

>  struct bluefield_edac_priv {
> +	struct device *dev;
>  	int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC];
>  	void __iomem *emi_base;
>  	int dimm_per_mc;
> +	bool svc_sreg_support;
> +	u32 sreg_tbl_edac;

Some comments above these members as to what they are, would be good.

>  static u64 smc_call1(u64 smc_op, u64 smc_arg)
> @@ -86,6 +98,71 @@ static u64 smc_call1(u64 smc_op, u64 smc_arg)
>  	return res.a0;
>  }
>  
> +static int bluefield_edac_secure_readl(void __iomem *addr, u32 *result, u32 sreg_tbl)

For all static functions' names:

s/bluefield_edac_//

> +{
> +	struct arm_smccc_res res;
> +	int status;
> +
> +	arm_smccc_smc(MLXBF_READ_REG_32, sreg_tbl, (uintptr_t)addr,
> +		      0, 0, 0, 0, 0, &res);
> +
> +	status = res.a0;
> +
> +	switch (status) {
> +	case SMCCC_RET_NOT_SUPPORTED:
> +	case MLXBF_SMCCC_ACCESS_VIOLATION:
> +		return -1;
> +	default:
> +		*result = (u32)res.a1;
> +		return 0;
> +	}

This is a silly switch-case - just use a normal if-else.

> +}
> +
> +static int bluefield_edac_secure_writel(void __iomem *addr, u32 data, u32 sreg_tbl)
> +{
> +	struct arm_smccc_res res;
> +	int status;
> +
> +	arm_smccc_smc(MLXBF_WRITE_REG_32, sreg_tbl, data, (uintptr_t)addr,
> +		      0, 0, 0, 0, &res);
> +
> +	status = res.a0;
> +
> +	switch (status) {
> +	case SMCCC_RET_NOT_SUPPORTED:
> +	case MLXBF_SMCCC_ACCESS_VIOLATION:
> +		return -1;
> +	default:
> +		return 0;
> +	}

Ditto.

> @@ -109,14 +186,22 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
>  	 * registers with information about the last ECC error occurrence.
>  	 */
>  	ecc_latch_select = MLXBF_ECC_LATCH_SEL__START;
> -	writel(ecc_latch_select, priv->emi_base + MLXBF_ECC_LATCH_SEL);
> +	err = bluefield_edac_writel(priv->emi_base + MLXBF_ECC_LATCH_SEL,
> +				    ecc_latch_select, priv->svc_sreg_support,
> +				    priv->sreg_tbl_edac);
> +	if (err)
> +		dev_err(priv->dev, "ECC latch select write failed.\n");
>  
>  	/*
>  	 * Verify that the ECC reported info in the registers is of the
>  	 * same type as the one asked to report. If not, just report the
>  	 * error without the detailed information.
>  	 */
> -	dram_syndrom = readl(priv->emi_base + MLXBF_SYNDROM);
> +	err = bluefield_edac_readl(priv->emi_base + MLXBF_SYNDROM, &dram_syndrom,
> +				   priv->svc_sreg_support, priv->sreg_tbl_edac);

You're passing three priv members down here. Why don't you simply pass down
@priv itself?

Ditto for the write routine.

> +	if (err)
> +		dev_err(priv->dev, "DRAM syndrom read failed.\n");
> +
>  	serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom);
>  	derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom);
>  	syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom);

...

> @@ -279,13 +385,44 @@ static int bluefield_edac_mc_probe(struct platform_device *pdev)
>  		return -ENOMEM;
>  
>  	priv = mci->pvt_info;
> +	priv->dev = dev;
> +
> +	/*
> +	 * The "sec_reg_block" property in the ACPI table determines the method
> +	 * the driver uses to access the EMI registers:
> +	 * a) property is not present - directly access registers via readl/writel
> +	 * b) property is present - indirectly access registers via SMC calls
> +	 *    (assuming required Silicon Provider service version found)
> +	 */
> +	if (device_property_read_u32(dev,
> +				     "sec_reg_block", &priv->sreg_tbl_edac)) {

Please do not do such ugly linebreaks.

> +		priv->svc_sreg_support = false;
> +	} else {
> +		/*
> +		 * Check for minimum required Arm Silicon Provider (SiP) service
> +		 * version, ensuring support of required SMC function IDs.
> +		 */
> +		arm_smccc_smc(MLXBF_SIP_SVC_VERSION, 0, 0, 0, 0, 0, 0, 0, &res);
> +		if (res.a0 == MLXBF_SVC_REQ_MAJOR &&
> +		    res.a1 >= MLXBF_SVC_REQ_MINOR) {
> +			priv->svc_sreg_support = true;
> +		} else {
> +			dev_err(dev, "Required SMCs are not supported.\n");
> +			ret = -EINVAL;
> +			goto err;
> +		}
> +	}

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette