AEST injection interface can help to test how AEST driver process error
record which raise error.
This interface just raise a SW simulate error rather than HW error.
Example1:
1. write RAS register value to err_* file:
echo 0x... > <debugfs>/aest/<dev>/<node>/inject/err_fr
echo 0x... > <debugfs>/aest/<dev>/<node>/inject/err_status
echo 0x... > <debugfs>/aest/<dev>/<node>/inject/err_addr
echo 0x... > <debugfs>/aest/<dev>/<node>/inject/err_*
2. trigger the error:
echo -1 > <debugfs>/aest/<dev>/<node>/inject/inject
AEST driver will process this error with error register value specified
by user.
Example2:
1. just trigger the error:
echo n(record_cpunt > n >=0 ) > <debugfs>/aest/<dev>/<node>/inject/inject
AEST driver will process this error with error register values read
from record<n> of this node.
Signed-off-by: Ruidong Tian <tianruidong@linux.alibaba.com>
---
Documentation/ABI/testing/debugfs-aest | 17 +++
drivers/ras/aest/Makefile | 1 +
drivers/ras/aest/aest-inject.c | 151 +++++++++++++++++++++++++
drivers/ras/aest/aest-sysfs.c | 8 +-
drivers/ras/aest/aest.h | 2 +
5 files changed, 177 insertions(+), 2 deletions(-)
create mode 100644 drivers/ras/aest/aest-inject.c
diff --git a/Documentation/ABI/testing/debugfs-aest b/Documentation/ABI/testing/debugfs-aest
index 39d9c85843ef..4d3f4464cf98 100644
--- a/Documentation/ABI/testing/debugfs-aest
+++ b/Documentation/ABI/testing/debugfs-aest
@@ -96,3 +96,20 @@ KernelVersion 6.10
Contact: Ruidong Tian <tianruidong@linux.alibaba.com>
Description:
(RO) Outputs error statistics for all this records.
+
+What: /sys/devices/platform/AEST.<UID>/<Nome_name>/inject/err_*
+Date: June 2024
+KernelVersion 6.10
+Contact: Ruidong Tian <tianruidong@linux.alibaba.com>
+Description:
+ (RW) Write any integer to this file to trigger the error
+ injection. Make sure you have specified all necessary error
+ parameters, i.e. this write should be the last step when
+ injecting errors.
+
+ Accepts values - -1 or n ( 0 <= n < <record_count>).
+ -1 : If you write -1, make sure you specified all err_* file,
+ driver will use these err_* value to proce AEST error.
+ n : Driver will read record<n> of this error node to collect
+ error register value, and use these values to proce AEST
+ error.
diff --git a/drivers/ras/aest/Makefile b/drivers/ras/aest/Makefile
index 75495413d2b6..5ee10fc8b2e9 100644
--- a/drivers/ras/aest/Makefile
+++ b/drivers/ras/aest/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_AEST) += aest.o
aest-y := aest-core.o
aest-y += aest-sysfs.o
+aest-y += aest-inject.o
diff --git a/drivers/ras/aest/aest-inject.c b/drivers/ras/aest/aest-inject.c
new file mode 100644
index 000000000000..2ca074aa021c
--- /dev/null
+++ b/drivers/ras/aest/aest-inject.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Error Source Table Support
+ *
+ * Copyright (c) 2024, Alibaba Group.
+ */
+
+#include "aest.h"
+
+static struct ras_ext_regs regs_inj;
+static u64 hard_inject_val;
+
+struct inj_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct aest_node *n, struct inj_attr *a, char *b);
+ ssize_t (*store)(struct aest_node *n, struct inj_attr *a, const char *b,
+ size_t c);
+};
+
+struct aest_inject {
+ struct aest_node *node;
+ struct kobject kobj;
+};
+
+#define to_inj(k) container_of(k, struct aest_inject, kobj)
+#define to_inj_attr(a) container_of(a, struct inj_attr, attr)
+
+static u64 aest_sysreg_read_inject(void *__unused, u32 offset)
+{
+ u64 *p = (u64 *)®s_inj;
+
+ return p[offset/8];
+}
+
+static void aest_sysreg_write_inject(void *base, u32 offset, u64 val)
+{
+ u64 *p = (u64 *)®s_inj;
+
+ p[offset/8] = val;
+}
+
+static u64 aest_iomem_read_inject(void *base, u32 offset)
+{
+ u64 *p = (u64 *)®s_inj;
+
+ return p[offset/8];
+}
+
+static void aest_iomem_write_inject(void *base, u32 offset, u64 val)
+{
+ u64 *p = (u64 *)®s_inj;
+
+ p[offset/8] = val;
+}
+
+static struct aest_access aest_access_inject[] = {
+ [ACPI_AEST_NODE_SYSTEM_REGISTER] = {
+ .read = aest_sysreg_read_inject,
+ .write = aest_sysreg_write_inject,
+ },
+
+ [ACPI_AEST_NODE_MEMORY_MAPPED] = {
+ .read = aest_iomem_read_inject,
+ .write = aest_iomem_write_inject,
+ },
+ [ACPI_AEST_NODE_SINGLE_RECORD_MEMORY_MAPPED] = {
+ .read = aest_iomem_read_inject,
+ .write = aest_iomem_write_inject,
+ },
+ { }
+};
+
+static int inject_store(void *data, u64 val)
+{
+ int i = val, count = 0;
+ struct aest_record record_inj, *record;
+ struct aest_node node_inj, *node = data;
+
+ if (i > (int)node->info->interface_hdr->error_record_count)
+ return -EINVAL;
+
+ memcpy(&node_inj, node, sizeof(*node));
+ node_inj.name = "AEST-injection";
+
+ record_inj.access = &aest_access_inject[node->info->interface_hdr->type];
+ record_inj.node = &node_inj;
+ record_inj.index = i;
+ if (i >= 0) {
+ record = &node->records[i];
+ regs_inj.err_fr = record_read(record, ERXFR);
+ regs_inj.err_ctlr = record_read(record, ERXCTLR);
+ regs_inj.err_status = record_read(record, ERXSTATUS);
+ regs_inj.err_addr = record_read(record, ERXADDR);
+ regs_inj.err_misc[0] = record_read(record, ERXMISC0);
+ regs_inj.err_misc[1] = record_read(record, ERXMISC1);
+ regs_inj.err_misc[2] = record_read(record, ERXMISC2);
+ regs_inj.err_misc[3] = record_read(record, ERXMISC3);
+ }
+
+ regs_inj.err_status |= ERR_STATUS_V;
+
+ aest_proc_record(&record_inj, &count);
+
+ if (count != 1)
+ return -EIO;
+
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(inject_ops, NULL, inject_store, "%llu\n");
+
+static int hard_inject_store(void *data, u64 val)
+{
+ struct aest_node *node = data;
+
+ if (!node->inj)
+ return -EPERM;
+
+ if (val > node->record_count)
+ return -ENODEV;
+
+ if (node->type == ACPI_AEST_PROCESSOR_ERROR_NODE) {
+ aest_select_record(node, val);
+ write_sysreg_s(hard_inject_val, SYS_ERXPFGCTL_EL1);
+ write_sysreg_s(0x100, SYS_ERXPFGCDN_EL1);
+ aest_sync(node);
+ } else
+ writeq_relaxed(hard_inject_val, node->inj + val * 8);
+
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(hard_inject_ops, NULL, hard_inject_store, "%llu\n");
+
+void aest_inject_init_debugfs(struct aest_node *node)
+{
+ struct dentry *inj;
+
+ inj = debugfs_create_dir("inject", node->debugfs);
+
+ debugfs_create_u64("err_fr", 0400, inj, ®s_inj.err_fr);
+ debugfs_create_u64("err_ctrl", 0400, inj, ®s_inj.err_ctlr);
+ debugfs_create_u64("err_status", 0400, inj, ®s_inj.err_status);
+ debugfs_create_u64("err_addr", 0400, inj, ®s_inj.err_addr);
+ debugfs_create_u64("err_misc0", 0400, inj, ®s_inj.err_misc[0]);
+ debugfs_create_u64("err_misc1", 0400, inj, ®s_inj.err_misc[1]);
+ debugfs_create_u64("err_misc2", 0400, inj, ®s_inj.err_misc[2]);
+ debugfs_create_u64("err_misc3", 0400, inj, ®s_inj.err_misc[3]);
+ debugfs_create_file("inject", 0400, inj, node, &inject_ops);
+
+ debugfs_create_file("hard_inject", 0600, inj, node, &hard_inject_ops);
+ debugfs_create_u64("hard_inject_val", 0600, inj, &hard_inject_val);
+}
diff --git a/drivers/ras/aest/aest-sysfs.c b/drivers/ras/aest/aest-sysfs.c
index f19cd2b5edb2..ba913556fc03 100644
--- a/drivers/ras/aest/aest-sysfs.c
+++ b/drivers/ras/aest/aest-sysfs.c
@@ -192,8 +192,8 @@ aest_oncore_dev_init_debugfs(struct aest_device *adev)
for_each_possible_cpu(cpu) {
percpu_dev = this_cpu_ptr(adev->adev_oncore);
- snprintf(name, sizeof(name), "processor%u", cpu);
- percpu_dev->debugfs = debugfs_create_dir(name, aest_debugfs);
+ snprintf(name, sizeof(name), "CPU%u", cpu);
+ percpu_dev->debugfs = debugfs_create_dir(name, adev->debugfs);
for (i = 0; i < adev->node_cnt; i++) {
node = &adev->nodes[i];
@@ -210,6 +210,9 @@ void aest_dev_init_debugfs(struct aest_device *adev)
int i;
struct aest_node *node;
+ if (!aest_debugfs)
+ dev_err(adev->dev, "debugfs not enabled\n");
+
adev->debugfs = debugfs_create_dir(dev_name(adev->dev), aest_debugfs);
if (aest_dev_is_oncore(adev)) {
aest_oncore_dev_init_debugfs(adev);
@@ -222,5 +225,6 @@ void aest_dev_init_debugfs(struct aest_device *adev)
continue;
node->debugfs = debugfs_create_dir(node->name, adev->debugfs);
aest_node_init_debugfs(node);
+ aest_inject_init_debugfs(node);
}
}
diff --git a/drivers/ras/aest/aest.h b/drivers/ras/aest/aest.h
index d9a52e39b1b9..90a96e2666d3 100644
--- a/drivers/ras/aest/aest.h
+++ b/drivers/ras/aest/aest.h
@@ -334,3 +334,5 @@ aest_set_name(struct aest_device *adev, struct aest_hnode *ahnode)
}
void aest_dev_init_debugfs(struct aest_device *adev);
+void aest_inject_init_debugfs(struct aest_node *node);
+void aest_proc_record(struct aest_record *record, void *data);
--
2.33.1
Hi Ruidong,
kernel test robot noticed the following build errors:
[auto build test ERROR on rafael-pm/linux-next]
[also build test ERROR on rafael-pm/bleeding-edge arm64/for-next/core ras/edac-for-next linus/master tip/smp/core v6.13-rc7 next-20250116]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Ruidong-Tian/ACPI-RAS-AEST-Initial-AEST-driver/20250115-164601
base: https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
patch link: https://lore.kernel.org/r/20250115084228.107573-4-tianruidong%40linux.alibaba.com
patch subject: [PATCH v3 3/5] RAS/AEST: Introduce AEST inject interface to test AEST driver
config: arm64-allmodconfig (https://download.01.org/0day-ci/archive/20250117/202501171406.o7oztilo-lkp@intel.com/config)
compiler: clang version 18.1.8 (https://github.com/llvm/llvm-project 3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250117/202501171406.o7oztilo-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202501171406.o7oztilo-lkp@intel.com/
All errors (new ones prefixed by >>):
>> drivers/ras/aest/aest-core.c:280:13: error: static declaration of 'aest_proc_record' follows non-static declaration
280 | static void aest_proc_record(struct aest_record *record, void *data)
| ^
drivers/ras/aest/aest.h:338:6: note: previous declaration is here
338 | void aest_proc_record(struct aest_record *record, void *data);
| ^
1 error generated.
vim +/aest_proc_record +280 drivers/ras/aest/aest-core.c
b6c745ae1213b2 Ruidong Tian 2025-01-15 279
b6c745ae1213b2 Ruidong Tian 2025-01-15 @280 static void aest_proc_record(struct aest_record *record, void *data)
b6c745ae1213b2 Ruidong Tian 2025-01-15 281 {
b6c745ae1213b2 Ruidong Tian 2025-01-15 282 struct ras_ext_regs regs = {0};
b6c745ae1213b2 Ruidong Tian 2025-01-15 283 int *count = data;
b6c745ae1213b2 Ruidong Tian 2025-01-15 284
b6c745ae1213b2 Ruidong Tian 2025-01-15 285 regs.err_status = record_read(record, ERXSTATUS);
b6c745ae1213b2 Ruidong Tian 2025-01-15 286 if (!(regs.err_status & ERR_STATUS_V))
b6c745ae1213b2 Ruidong Tian 2025-01-15 287 return;
b6c745ae1213b2 Ruidong Tian 2025-01-15 288
b6c745ae1213b2 Ruidong Tian 2025-01-15 289 (*count)++;
b6c745ae1213b2 Ruidong Tian 2025-01-15 290
b6c745ae1213b2 Ruidong Tian 2025-01-15 291 if (regs.err_status & ERR_STATUS_AV)
b6c745ae1213b2 Ruidong Tian 2025-01-15 292 regs.err_addr = record_read(record, ERXADDR);
b6c745ae1213b2 Ruidong Tian 2025-01-15 293
b6c745ae1213b2 Ruidong Tian 2025-01-15 294 regs.err_fr = record->fr;
b6c745ae1213b2 Ruidong Tian 2025-01-15 295 regs.err_ctlr = record_read(record, ERXCTLR);
b6c745ae1213b2 Ruidong Tian 2025-01-15 296
b6c745ae1213b2 Ruidong Tian 2025-01-15 297 if (regs.err_status & ERR_STATUS_MV) {
b6c745ae1213b2 Ruidong Tian 2025-01-15 298 regs.err_misc[0] = record_read(record, ERXMISC0);
b6c745ae1213b2 Ruidong Tian 2025-01-15 299 regs.err_misc[1] = record_read(record, ERXMISC1);
b6c745ae1213b2 Ruidong Tian 2025-01-15 300 if (record->node->version >= ID_AA64PFR0_EL1_RAS_V1P1) {
b6c745ae1213b2 Ruidong Tian 2025-01-15 301 regs.err_misc[2] = record_read(record, ERXMISC2);
b6c745ae1213b2 Ruidong Tian 2025-01-15 302 regs.err_misc[3] = record_read(record, ERXMISC3);
b6c745ae1213b2 Ruidong Tian 2025-01-15 303 }
b6c745ae1213b2 Ruidong Tian 2025-01-15 304
b6c745ae1213b2 Ruidong Tian 2025-01-15 305 if (record->node->info->interface_hdr->flags &
b6c745ae1213b2 Ruidong Tian 2025-01-15 306 AEST_XFACE_FLAG_CLEAR_MISC) {
b6c745ae1213b2 Ruidong Tian 2025-01-15 307 record_write(record, ERXMISC0, 0);
b6c745ae1213b2 Ruidong Tian 2025-01-15 308 record_write(record, ERXMISC1, 0);
b6c745ae1213b2 Ruidong Tian 2025-01-15 309 if (record->node->version >= ID_AA64PFR0_EL1_RAS_V1P1) {
b6c745ae1213b2 Ruidong Tian 2025-01-15 310 record_write(record, ERXMISC2, 0);
b6c745ae1213b2 Ruidong Tian 2025-01-15 311 record_write(record, ERXMISC3, 0);
b6c745ae1213b2 Ruidong Tian 2025-01-15 312 }
b6c745ae1213b2 Ruidong Tian 2025-01-15 313 /* ce count is 0 if record do not support ce */
b6c745ae1213b2 Ruidong Tian 2025-01-15 314 } else if (record->ce.count > 0)
b6c745ae1213b2 Ruidong Tian 2025-01-15 315 record_write(record, ERXMISC0, record->ce.reg_val);
b6c745ae1213b2 Ruidong Tian 2025-01-15 316 }
b6c745ae1213b2 Ruidong Tian 2025-01-15 317
b6c745ae1213b2 Ruidong Tian 2025-01-15 318 /* panic if unrecoverable and uncontainable error encountered */
b6c745ae1213b2 Ruidong Tian 2025-01-15 319 if ((regs.err_status & ERR_STATUS_UE) &&
b6c745ae1213b2 Ruidong Tian 2025-01-15 320 (regs.err_status & ERR_STATUS_UET) > ERR_STATUS_UET_UEU)
b6c745ae1213b2 Ruidong Tian 2025-01-15 321 aest_panic(record, ®s, "AEST: unrecoverable error encountered");
b6c745ae1213b2 Ruidong Tian 2025-01-15 322
b6c745ae1213b2 Ruidong Tian 2025-01-15 323 aest_log(record, ®s);
b6c745ae1213b2 Ruidong Tian 2025-01-15 324
b6c745ae1213b2 Ruidong Tian 2025-01-15 325 /* Write-one-to-clear the bits we've seen */
b6c745ae1213b2 Ruidong Tian 2025-01-15 326 regs.err_status &= ERR_STATUS_W1TC;
b6c745ae1213b2 Ruidong Tian 2025-01-15 327
b6c745ae1213b2 Ruidong Tian 2025-01-15 328 /* Multi bit filed need to write all-ones to clear. */
b6c745ae1213b2 Ruidong Tian 2025-01-15 329 if (regs.err_status & ERR_STATUS_CE)
b6c745ae1213b2 Ruidong Tian 2025-01-15 330 regs.err_status |= ERR_STATUS_CE;
b6c745ae1213b2 Ruidong Tian 2025-01-15 331
b6c745ae1213b2 Ruidong Tian 2025-01-15 332 /* Multi bit filed need to write all-ones to clear. */
b6c745ae1213b2 Ruidong Tian 2025-01-15 333 if (regs.err_status & ERR_STATUS_UET)
b6c745ae1213b2 Ruidong Tian 2025-01-15 334 regs.err_status |= ERR_STATUS_UET;
b6c745ae1213b2 Ruidong Tian 2025-01-15 335
b6c745ae1213b2 Ruidong Tian 2025-01-15 336 record_write(record, ERXSTATUS, regs.err_status);
b6c745ae1213b2 Ruidong Tian 2025-01-15 337 }
b6c745ae1213b2 Ruidong Tian 2025-01-15 338
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.