[PATCH v3 3/5] RAS/AEST: Introduce AEST inject interface to test AEST driver

Ruidong Tian posted 5 patches 11 months ago
[PATCH v3 3/5] RAS/AEST: Introduce AEST inject interface to test AEST driver
Posted by Ruidong Tian 11 months ago
AEST injection interface can help to test how AEST driver process error
record which raise error.

This interface just raise a SW simulate error rather than HW error.

Example1:

1. write RAS register value to err_* file:
echo 0x... > <debugfs>/aest/<dev>/<node>/inject/err_fr
echo 0x... > <debugfs>/aest/<dev>/<node>/inject/err_status
echo 0x... > <debugfs>/aest/<dev>/<node>/inject/err_addr
echo 0x... > <debugfs>/aest/<dev>/<node>/inject/err_*

2. trigger the error:
echo -1 > <debugfs>/aest/<dev>/<node>/inject/inject

AEST driver will process this error with error register value specified
by user.

Example2:

1. just trigger the error:
echo n(record_cpunt > n >=0 ) > <debugfs>/aest/<dev>/<node>/inject/inject

AEST driver will process this error with error register values read
from record<n> of this node.

Signed-off-by: Ruidong Tian <tianruidong@linux.alibaba.com>
---
 Documentation/ABI/testing/debugfs-aest |  17 +++
 drivers/ras/aest/Makefile              |   1 +
 drivers/ras/aest/aest-inject.c         | 151 +++++++++++++++++++++++++
 drivers/ras/aest/aest-sysfs.c          |   8 +-
 drivers/ras/aest/aest.h                |   2 +
 5 files changed, 177 insertions(+), 2 deletions(-)
 create mode 100644 drivers/ras/aest/aest-inject.c

diff --git a/Documentation/ABI/testing/debugfs-aest b/Documentation/ABI/testing/debugfs-aest
index 39d9c85843ef..4d3f4464cf98 100644
--- a/Documentation/ABI/testing/debugfs-aest
+++ b/Documentation/ABI/testing/debugfs-aest
@@ -96,3 +96,20 @@ KernelVersion	6.10
 Contact:	Ruidong Tian <tianruidong@linux.alibaba.com>
 Description:
 		(RO) Outputs error statistics for all this records.
+
+What:		/sys/devices/platform/AEST.<UID>/<Nome_name>/inject/err_*
+Date:		June 2024
+KernelVersion	6.10
+Contact:	Ruidong Tian <tianruidong@linux.alibaba.com>
+Description:
+		(RW) Write any integer to this file to trigger the error
+		injection. Make sure you have specified all necessary error
+		parameters, i.e. this write should be the last step when
+		injecting errors.
+
+		Accepts values -  -1 or n ( 0 <= n < <record_count>).
+		-1 : If you write -1, make sure you specified all err_* file,
+		     driver will use these err_* value to proce AEST error.
+		n : Driver will read record<n> of this error node to collect
+		    error register value, and use these values to proce AEST
+		    error.
diff --git a/drivers/ras/aest/Makefile b/drivers/ras/aest/Makefile
index 75495413d2b6..5ee10fc8b2e9 100644
--- a/drivers/ras/aest/Makefile
+++ b/drivers/ras/aest/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_AEST) 	+= aest.o
 
 aest-y		:= aest-core.o
 aest-y		+= aest-sysfs.o
+aest-y		+= aest-inject.o
diff --git a/drivers/ras/aest/aest-inject.c b/drivers/ras/aest/aest-inject.c
new file mode 100644
index 000000000000..2ca074aa021c
--- /dev/null
+++ b/drivers/ras/aest/aest-inject.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Error Source Table Support
+ *
+ * Copyright (c) 2024, Alibaba Group.
+ */
+
+#include "aest.h"
+
+static struct ras_ext_regs regs_inj;
+static u64 hard_inject_val;
+
+struct inj_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct aest_node *n, struct inj_attr *a, char *b);
+	ssize_t (*store)(struct aest_node *n, struct inj_attr *a, const char *b,
+				size_t c);
+};
+
+struct aest_inject {
+	struct aest_node *node;
+	struct kobject kobj;
+};
+
+#define to_inj(k)	container_of(k, struct aest_inject, kobj)
+#define to_inj_attr(a)	container_of(a, struct inj_attr, attr)
+
+static u64 aest_sysreg_read_inject(void *__unused, u32 offset)
+{
+	u64 *p = (u64 *)&regs_inj;
+
+	return p[offset/8];
+}
+
+static void aest_sysreg_write_inject(void *base, u32 offset, u64 val)
+{
+	u64 *p = (u64 *)&regs_inj;
+
+	p[offset/8] = val;
+}
+
+static u64 aest_iomem_read_inject(void *base, u32 offset)
+{
+	u64 *p = (u64 *)&regs_inj;
+
+	return p[offset/8];
+}
+
+static void aest_iomem_write_inject(void *base, u32 offset, u64 val)
+{
+	u64 *p = (u64 *)&regs_inj;
+
+	p[offset/8] = val;
+}
+
+static struct aest_access aest_access_inject[] = {
+	[ACPI_AEST_NODE_SYSTEM_REGISTER] = {
+		.read = aest_sysreg_read_inject,
+		.write = aest_sysreg_write_inject,
+	},
+
+	[ACPI_AEST_NODE_MEMORY_MAPPED] = {
+		.read = aest_iomem_read_inject,
+		.write = aest_iomem_write_inject,
+	},
+	[ACPI_AEST_NODE_SINGLE_RECORD_MEMORY_MAPPED] = {
+		.read = aest_iomem_read_inject,
+		.write = aest_iomem_write_inject,
+	},
+	{ }
+};
+
+static int inject_store(void *data, u64 val)
+{
+	int i = val, count = 0;
+	struct aest_record record_inj, *record;
+	struct aest_node node_inj, *node = data;
+
+	if (i > (int)node->info->interface_hdr->error_record_count)
+		return -EINVAL;
+
+	memcpy(&node_inj, node, sizeof(*node));
+	node_inj.name = "AEST-injection";
+
+	record_inj.access = &aest_access_inject[node->info->interface_hdr->type];
+	record_inj.node = &node_inj;
+	record_inj.index = i;
+	if (i >= 0) {
+		record = &node->records[i];
+		regs_inj.err_fr = record_read(record, ERXFR);
+		regs_inj.err_ctlr = record_read(record, ERXCTLR);
+		regs_inj.err_status = record_read(record, ERXSTATUS);
+		regs_inj.err_addr = record_read(record, ERXADDR);
+		regs_inj.err_misc[0] = record_read(record, ERXMISC0);
+		regs_inj.err_misc[1] = record_read(record, ERXMISC1);
+		regs_inj.err_misc[2] = record_read(record, ERXMISC2);
+		regs_inj.err_misc[3] = record_read(record, ERXMISC3);
+	}
+
+	regs_inj.err_status |= ERR_STATUS_V;
+
+	aest_proc_record(&record_inj, &count);
+
+	if (count != 1)
+		return -EIO;
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(inject_ops, NULL, inject_store, "%llu\n");
+
+static int hard_inject_store(void *data, u64 val)
+{
+	struct aest_node *node = data;
+
+	if (!node->inj)
+		return -EPERM;
+
+	if (val > node->record_count)
+		return -ENODEV;
+
+	if (node->type == ACPI_AEST_PROCESSOR_ERROR_NODE) {
+		aest_select_record(node, val);
+		write_sysreg_s(hard_inject_val, SYS_ERXPFGCTL_EL1);
+		write_sysreg_s(0x100, SYS_ERXPFGCDN_EL1);
+		aest_sync(node);
+	} else
+		writeq_relaxed(hard_inject_val, node->inj + val * 8);
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(hard_inject_ops, NULL, hard_inject_store, "%llu\n");
+
+void aest_inject_init_debugfs(struct aest_node *node)
+{
+	struct dentry *inj;
+
+	inj = debugfs_create_dir("inject", node->debugfs);
+
+	debugfs_create_u64("err_fr", 0400, inj, &regs_inj.err_fr);
+	debugfs_create_u64("err_ctrl", 0400, inj, &regs_inj.err_ctlr);
+	debugfs_create_u64("err_status", 0400, inj, &regs_inj.err_status);
+	debugfs_create_u64("err_addr", 0400, inj, &regs_inj.err_addr);
+	debugfs_create_u64("err_misc0", 0400, inj, &regs_inj.err_misc[0]);
+	debugfs_create_u64("err_misc1", 0400, inj, &regs_inj.err_misc[1]);
+	debugfs_create_u64("err_misc2", 0400, inj, &regs_inj.err_misc[2]);
+	debugfs_create_u64("err_misc3", 0400, inj, &regs_inj.err_misc[3]);
+	debugfs_create_file("inject", 0400, inj, node, &inject_ops);
+
+	debugfs_create_file("hard_inject", 0600, inj, node, &hard_inject_ops);
+	debugfs_create_u64("hard_inject_val", 0600, inj, &hard_inject_val);
+}
diff --git a/drivers/ras/aest/aest-sysfs.c b/drivers/ras/aest/aest-sysfs.c
index f19cd2b5edb2..ba913556fc03 100644
--- a/drivers/ras/aest/aest-sysfs.c
+++ b/drivers/ras/aest/aest-sysfs.c
@@ -192,8 +192,8 @@ aest_oncore_dev_init_debugfs(struct aest_device *adev)
 	for_each_possible_cpu(cpu) {
 		percpu_dev = this_cpu_ptr(adev->adev_oncore);
 
-		snprintf(name, sizeof(name), "processor%u", cpu);
-		percpu_dev->debugfs = debugfs_create_dir(name, aest_debugfs);
+		snprintf(name, sizeof(name), "CPU%u", cpu);
+		percpu_dev->debugfs = debugfs_create_dir(name, adev->debugfs);
 
 		for (i = 0; i < adev->node_cnt; i++) {
 			node = &adev->nodes[i];
@@ -210,6 +210,9 @@ void aest_dev_init_debugfs(struct aest_device *adev)
 	int i;
 	struct aest_node *node;
 
+	if (!aest_debugfs)
+		dev_err(adev->dev, "debugfs not enabled\n");
+
 	adev->debugfs = debugfs_create_dir(dev_name(adev->dev), aest_debugfs);
 	if (aest_dev_is_oncore(adev)) {
 		aest_oncore_dev_init_debugfs(adev);
@@ -222,5 +225,6 @@ void aest_dev_init_debugfs(struct aest_device *adev)
 			continue;
 		node->debugfs = debugfs_create_dir(node->name, adev->debugfs);
 		aest_node_init_debugfs(node);
+		aest_inject_init_debugfs(node);
 	}
 }
diff --git a/drivers/ras/aest/aest.h b/drivers/ras/aest/aest.h
index d9a52e39b1b9..90a96e2666d3 100644
--- a/drivers/ras/aest/aest.h
+++ b/drivers/ras/aest/aest.h
@@ -334,3 +334,5 @@ aest_set_name(struct aest_device *adev, struct aest_hnode *ahnode)
 }
 
 void aest_dev_init_debugfs(struct aest_device *adev);
+void aest_inject_init_debugfs(struct aest_node *node);
+void aest_proc_record(struct aest_record *record, void *data);
-- 
2.33.1
Re: [PATCH v3 3/5] RAS/AEST: Introduce AEST inject interface to test AEST driver
Posted by kernel test robot 11 months ago
Hi Ruidong,

kernel test robot noticed the following build errors:

[auto build test ERROR on rafael-pm/linux-next]
[also build test ERROR on rafael-pm/bleeding-edge arm64/for-next/core ras/edac-for-next linus/master tip/smp/core v6.13-rc7 next-20250116]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Ruidong-Tian/ACPI-RAS-AEST-Initial-AEST-driver/20250115-164601
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
patch link:    https://lore.kernel.org/r/20250115084228.107573-4-tianruidong%40linux.alibaba.com
patch subject: [PATCH v3 3/5] RAS/AEST: Introduce AEST inject interface to test AEST driver
config: arm64-allmodconfig (https://download.01.org/0day-ci/archive/20250117/202501171406.o7oztilo-lkp@intel.com/config)
compiler: clang version 18.1.8 (https://github.com/llvm/llvm-project 3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250117/202501171406.o7oztilo-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202501171406.o7oztilo-lkp@intel.com/

All errors (new ones prefixed by >>):

>> drivers/ras/aest/aest-core.c:280:13: error: static declaration of 'aest_proc_record' follows non-static declaration
     280 | static void aest_proc_record(struct aest_record *record, void *data)
         |             ^
   drivers/ras/aest/aest.h:338:6: note: previous declaration is here
     338 | void aest_proc_record(struct aest_record *record, void *data);
         |      ^
   1 error generated.


vim +/aest_proc_record +280 drivers/ras/aest/aest-core.c

b6c745ae1213b2 Ruidong Tian 2025-01-15  279  
b6c745ae1213b2 Ruidong Tian 2025-01-15 @280  static void aest_proc_record(struct aest_record *record, void *data)
b6c745ae1213b2 Ruidong Tian 2025-01-15  281  {
b6c745ae1213b2 Ruidong Tian 2025-01-15  282  	struct ras_ext_regs regs = {0};
b6c745ae1213b2 Ruidong Tian 2025-01-15  283  	int *count = data;
b6c745ae1213b2 Ruidong Tian 2025-01-15  284  
b6c745ae1213b2 Ruidong Tian 2025-01-15  285  	regs.err_status = record_read(record, ERXSTATUS);
b6c745ae1213b2 Ruidong Tian 2025-01-15  286  	if (!(regs.err_status & ERR_STATUS_V))
b6c745ae1213b2 Ruidong Tian 2025-01-15  287  		return;
b6c745ae1213b2 Ruidong Tian 2025-01-15  288  
b6c745ae1213b2 Ruidong Tian 2025-01-15  289  	(*count)++;
b6c745ae1213b2 Ruidong Tian 2025-01-15  290  
b6c745ae1213b2 Ruidong Tian 2025-01-15  291  	if (regs.err_status & ERR_STATUS_AV)
b6c745ae1213b2 Ruidong Tian 2025-01-15  292  		regs.err_addr = record_read(record, ERXADDR);
b6c745ae1213b2 Ruidong Tian 2025-01-15  293  
b6c745ae1213b2 Ruidong Tian 2025-01-15  294  	regs.err_fr = record->fr;
b6c745ae1213b2 Ruidong Tian 2025-01-15  295  	regs.err_ctlr = record_read(record, ERXCTLR);
b6c745ae1213b2 Ruidong Tian 2025-01-15  296  
b6c745ae1213b2 Ruidong Tian 2025-01-15  297  	if (regs.err_status & ERR_STATUS_MV) {
b6c745ae1213b2 Ruidong Tian 2025-01-15  298  		regs.err_misc[0] = record_read(record, ERXMISC0);
b6c745ae1213b2 Ruidong Tian 2025-01-15  299  		regs.err_misc[1] = record_read(record, ERXMISC1);
b6c745ae1213b2 Ruidong Tian 2025-01-15  300  		if (record->node->version >= ID_AA64PFR0_EL1_RAS_V1P1) {
b6c745ae1213b2 Ruidong Tian 2025-01-15  301  			regs.err_misc[2] = record_read(record, ERXMISC2);
b6c745ae1213b2 Ruidong Tian 2025-01-15  302  			regs.err_misc[3] = record_read(record, ERXMISC3);
b6c745ae1213b2 Ruidong Tian 2025-01-15  303  		}
b6c745ae1213b2 Ruidong Tian 2025-01-15  304  
b6c745ae1213b2 Ruidong Tian 2025-01-15  305  		if (record->node->info->interface_hdr->flags &
b6c745ae1213b2 Ruidong Tian 2025-01-15  306  			AEST_XFACE_FLAG_CLEAR_MISC) {
b6c745ae1213b2 Ruidong Tian 2025-01-15  307  			record_write(record, ERXMISC0, 0);
b6c745ae1213b2 Ruidong Tian 2025-01-15  308  			record_write(record, ERXMISC1, 0);
b6c745ae1213b2 Ruidong Tian 2025-01-15  309  			if (record->node->version >= ID_AA64PFR0_EL1_RAS_V1P1) {
b6c745ae1213b2 Ruidong Tian 2025-01-15  310  				record_write(record, ERXMISC2, 0);
b6c745ae1213b2 Ruidong Tian 2025-01-15  311  				record_write(record, ERXMISC3, 0);
b6c745ae1213b2 Ruidong Tian 2025-01-15  312  			}
b6c745ae1213b2 Ruidong Tian 2025-01-15  313  		/* ce count is 0 if record do not support ce */
b6c745ae1213b2 Ruidong Tian 2025-01-15  314  		} else if (record->ce.count > 0)
b6c745ae1213b2 Ruidong Tian 2025-01-15  315  			record_write(record, ERXMISC0, record->ce.reg_val);
b6c745ae1213b2 Ruidong Tian 2025-01-15  316  	}
b6c745ae1213b2 Ruidong Tian 2025-01-15  317  
b6c745ae1213b2 Ruidong Tian 2025-01-15  318  	/* panic if unrecoverable and uncontainable error encountered */
b6c745ae1213b2 Ruidong Tian 2025-01-15  319  	if ((regs.err_status & ERR_STATUS_UE) &&
b6c745ae1213b2 Ruidong Tian 2025-01-15  320  		(regs.err_status & ERR_STATUS_UET) > ERR_STATUS_UET_UEU)
b6c745ae1213b2 Ruidong Tian 2025-01-15  321  		aest_panic(record, &regs, "AEST: unrecoverable error encountered");
b6c745ae1213b2 Ruidong Tian 2025-01-15  322  
b6c745ae1213b2 Ruidong Tian 2025-01-15  323  	aest_log(record, &regs);
b6c745ae1213b2 Ruidong Tian 2025-01-15  324  
b6c745ae1213b2 Ruidong Tian 2025-01-15  325  	/* Write-one-to-clear the bits we've seen */
b6c745ae1213b2 Ruidong Tian 2025-01-15  326  	regs.err_status &= ERR_STATUS_W1TC;
b6c745ae1213b2 Ruidong Tian 2025-01-15  327  
b6c745ae1213b2 Ruidong Tian 2025-01-15  328  	/* Multi bit filed need to write all-ones to clear. */
b6c745ae1213b2 Ruidong Tian 2025-01-15  329  	if (regs.err_status & ERR_STATUS_CE)
b6c745ae1213b2 Ruidong Tian 2025-01-15  330  		regs.err_status |= ERR_STATUS_CE;
b6c745ae1213b2 Ruidong Tian 2025-01-15  331  
b6c745ae1213b2 Ruidong Tian 2025-01-15  332  	/* Multi bit filed need to write all-ones to clear. */
b6c745ae1213b2 Ruidong Tian 2025-01-15  333  	if (regs.err_status & ERR_STATUS_UET)
b6c745ae1213b2 Ruidong Tian 2025-01-15  334  		regs.err_status |= ERR_STATUS_UET;
b6c745ae1213b2 Ruidong Tian 2025-01-15  335  
b6c745ae1213b2 Ruidong Tian 2025-01-15  336  	record_write(record, ERXSTATUS, regs.err_status);
b6c745ae1213b2 Ruidong Tian 2025-01-15  337  }
b6c745ae1213b2 Ruidong Tian 2025-01-15  338  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki