Certain components report errors using their internal physical addresses.
For instance, on AMD platforms, the UMC (Unified Memory Controller) reports
normalized addresses, while on Arm systems, memory controllers may report
logical addresses. These addresses must be translated into System Physical
Addresses (SPA) before the OS can utilize them effectively.
AMD already provides the amd_atl_umc_na_to_spa interface for physical address
translation. This patch introduces a common function, atl_ras_la_to_spa,
intended for use by both AMD and Arm64 architectures. The parameters of this
function are architecture-specific data required for the address translation
process.
Signed-off-by: Ruidong Tian <tianruidong@linux.alibaba.com>
---
drivers/edac/amd64_edac.c | 2 +-
drivers/ras/aest/aest-core.c | 3 +++
drivers/ras/amd/atl/core.c | 4 ++--
drivers/ras/amd/atl/internal.h | 2 +-
drivers/ras/amd/atl/umc.c | 3 ++-
drivers/ras/ras.c | 24 +++++++++++-------------
include/linux/ras.h | 9 ++++-----
7 files changed, 24 insertions(+), 23 deletions(-)
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 2391f3469961..478cfef37892 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2847,7 +2847,7 @@ static void decode_umc_error(int node_id, struct mce *m)
a_err.ipid = m->ipid;
a_err.cpu = m->extcpu;
- sys_addr = amd_convert_umc_mca_addr_to_sys_addr(&a_err);
+ sys_addr = convert_ras_la_to_spa(&a_err);
if (IS_ERR_VALUE(sys_addr)) {
err.err_code = ERR_NORM_ADDR;
goto log_error;
diff --git a/drivers/ras/aest/aest-core.c b/drivers/ras/aest/aest-core.c
index a290b482bf8b..052211ca3e2a 100644
--- a/drivers/ras/aest/aest-core.c
+++ b/drivers/ras/aest/aest-core.c
@@ -235,6 +235,9 @@ static void aest_node_pool_process(struct work_struct *work)
(status & (ERR_STATUS_UE | ERR_STATUS_DE))) {
if (event->addressing_mode == AEST_ADDREESS_SPA)
addr = event->regs.err_addr & PHYS_MASK;
+ else
+ addr = convert_ras_la_to_spa(event);
+
aest_handle_memory_failure(addr);
}
diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
index 0f7cd6dab0b0..4f44c0ce97ec 100644
--- a/drivers/ras/amd/atl/core.c
+++ b/drivers/ras/amd/atl/core.c
@@ -210,7 +210,7 @@ static int __init amd_atl_init(void)
/* Increment this module's recount so that it can't be easily unloaded. */
__module_get(THIS_MODULE);
- amd_atl_register_decoder(convert_umc_mca_addr_to_sys_addr);
+ atl_register_decoder(convert_umc_mca_addr_to_sys_addr);
pr_info("AMD Address Translation Library initialized\n");
return 0;
@@ -222,7 +222,7 @@ static int __init amd_atl_init(void)
*/
static void __exit amd_atl_exit(void)
{
- amd_atl_unregister_decoder();
+ atl_unregister_decoder();
}
module_init(amd_atl_init);
diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
index 82a56d9c2be1..423a6193fdc7 100644
--- a/drivers/ras/amd/atl/internal.h
+++ b/drivers/ras/amd/atl/internal.h
@@ -279,7 +279,7 @@ int denormalize_address(struct addr_ctx *ctx);
int dehash_address(struct addr_ctx *ctx);
unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr);
-unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
+unsigned long convert_umc_mca_addr_to_sys_addr(void *data);
u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);
diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c
index befc616d5e8a..57a78c380467 100644
--- a/drivers/ras/amd/atl/umc.c
+++ b/drivers/ras/amd/atl/umc.c
@@ -399,8 +399,9 @@ static u8 get_coh_st_inst_id(struct atl_err *err)
return FIELD_GET(UMC_CHANNEL_NUM, err->ipid);
}
-unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err)
+unsigned long convert_umc_mca_addr_to_sys_addr(void *data)
{
+ struct atl_err *err = data;
u8 socket_id = topology_physical_package_id(err->cpu);
u8 coh_st_inst_id = get_coh_st_inst_id(err);
unsigned long addr = get_addr(err->addr);
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index 2a5b5a9fdcb3..050b49466a18 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -10,36 +10,34 @@
#include <linux/ras.h>
#include <linux/uuid.h>
-#if IS_ENABLED(CONFIG_AMD_ATL)
/*
* Once set, this function pointer should never be unset.
*
* The library module will set this pointer if it successfully loads. The module
* should not be unloaded except for testing and debug purposes.
*/
-static unsigned long (*amd_atl_umc_na_to_spa)(struct atl_err *err);
+static unsigned long (*atl_ras_la_to_spa)(void *err);
-void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *))
+void atl_register_decoder(unsigned long (*f)(void *))
{
- amd_atl_umc_na_to_spa = f;
+ atl_ras_la_to_spa = f;
}
-EXPORT_SYMBOL_GPL(amd_atl_register_decoder);
+EXPORT_SYMBOL_GPL(atl_register_decoder);
-void amd_atl_unregister_decoder(void)
+void atl_unregister_decoder(void)
{
- amd_atl_umc_na_to_spa = NULL;
+ atl_ras_la_to_spa = NULL;
}
-EXPORT_SYMBOL_GPL(amd_atl_unregister_decoder);
+EXPORT_SYMBOL_GPL(atl_unregister_decoder);
-unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err)
+unsigned long convert_ras_la_to_spa(void *err)
{
- if (!amd_atl_umc_na_to_spa)
+ if (!atl_ras_la_to_spa)
return -EINVAL;
- return amd_atl_umc_na_to_spa(err);
+ return atl_ras_la_to_spa(err);
}
-EXPORT_SYMBOL_GPL(amd_convert_umc_mca_addr_to_sys_addr);
-#endif /* CONFIG_AMD_ATL */
+EXPORT_SYMBOL_GPL(convert_ras_la_to_spa);
#define CREATE_TRACE_POINTS
#define TRACE_INCLUDE_PATH ../../include/ras
diff --git a/include/linux/ras.h b/include/linux/ras.h
index 05096f049dac..2270a8eb1038 100644
--- a/include/linux/ras.h
+++ b/include/linux/ras.h
@@ -42,14 +42,9 @@ struct atl_err {
};
#if IS_ENABLED(CONFIG_AMD_ATL)
-void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *));
-void amd_atl_unregister_decoder(void);
void amd_retire_dram_row(struct atl_err *err);
-unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
#else
static inline void amd_retire_dram_row(struct atl_err *err) { }
-static inline unsigned long
-amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; }
#endif /* CONFIG_AMD_ATL */
#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
@@ -63,6 +58,10 @@ amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; }
#define GET_LOGICAL_INDEX(mpidr) -EINVAL
#endif /* CONFIG_ARM || CONFIG_ARM64 */
+void atl_register_decoder(unsigned long (*f)(void *));
+void atl_unregister_decoder(void);
+unsigned long convert_ras_la_to_spa(void *err);
+
#if IS_ENABLED(CONFIG_AEST)
void aest_register_decode_chain(struct notifier_block *nb);
void aest_unregister_decode_chain(struct notifier_block *nb);
--
2.51.2.612.gdc70283dfc
Hi Ruidong,
kernel test robot noticed the following build errors:
[auto build test ERROR on rafael-pm/linux-next]
[also build test ERROR on rafael-pm/bleeding-edge linus/master v6.19-rc2 next-20251219]
[cannot apply to arm64/for-next/core]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Ruidong-Tian/ACPI-AEST-Parse-the-AEST-table/20251222-215248
base: https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
patch link: https://lore.kernel.org/r/20251222094351.38792-16-tianruidong%40linux.alibaba.com
patch subject: [PATCH v4 14/17] ras: ATL: Unify ATL interface for ARM64 and AMD
config: x86_64-rhel-9.4 (https://download.01.org/0day-ci/archive/20251225/202512251419.gOeKyBqX-lkp@intel.com/config)
compiler: gcc-14 (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251225/202512251419.gOeKyBqX-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202512251419.gOeKyBqX-lkp@intel.com/
All errors (new ones prefixed by >>):
drivers/ras/amd/fmpm.c: In function 'save_spa':
>> drivers/ras/amd/fmpm.c:336:15: error: implicit declaration of function 'amd_convert_umc_mca_addr_to_sys_addr'; did you mean 'convert_umc_mca_addr_to_sys_addr'? [-Wimplicit-function-declaration]
336 | spa = amd_convert_umc_mca_addr_to_sys_addr(&a_err);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| convert_umc_mca_addr_to_sys_addr
--
drivers/ras/amd/atl/umc.c: In function '_retire_row_mi300':
>> drivers/ras/amd/atl/umc.c:321:24: error: implicit declaration of function 'amd_convert_umc_mca_addr_to_sys_addr'; did you mean 'convert_umc_mca_addr_to_sys_addr'? [-Wimplicit-function-declaration]
321 | addr = amd_convert_umc_mca_addr_to_sys_addr(a_err);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| convert_umc_mca_addr_to_sys_addr
vim +336 drivers/ras/amd/fmpm.c
6f15e617cc9932 Yazen Ghannam 2024-02-13 298
838850c50884cd Yazen Ghannam 2024-03-01 299 static void save_spa(struct fru_rec *rec, unsigned int entry,
838850c50884cd Yazen Ghannam 2024-03-01 300 u64 addr, u64 id, unsigned int cpu)
838850c50884cd Yazen Ghannam 2024-03-01 301 {
838850c50884cd Yazen Ghannam 2024-03-01 302 unsigned int i, fru_idx, spa_entry;
838850c50884cd Yazen Ghannam 2024-03-01 303 struct atl_err a_err;
838850c50884cd Yazen Ghannam 2024-03-01 304 unsigned long spa;
838850c50884cd Yazen Ghannam 2024-03-01 305
838850c50884cd Yazen Ghannam 2024-03-01 306 if (entry >= max_nr_entries) {
838850c50884cd Yazen Ghannam 2024-03-01 307 pr_warn_once("FRU descriptor entry %d out-of-bounds (max: %d)\n",
838850c50884cd Yazen Ghannam 2024-03-01 308 entry, max_nr_entries);
838850c50884cd Yazen Ghannam 2024-03-01 309 return;
838850c50884cd Yazen Ghannam 2024-03-01 310 }
838850c50884cd Yazen Ghannam 2024-03-01 311
838850c50884cd Yazen Ghannam 2024-03-01 312 /* spa_nr_entries is always multiple of max_nr_entries */
838850c50884cd Yazen Ghannam 2024-03-01 313 for (i = 0; i < spa_nr_entries; i += max_nr_entries) {
838850c50884cd Yazen Ghannam 2024-03-01 314 fru_idx = i / max_nr_entries;
838850c50884cd Yazen Ghannam 2024-03-01 315 if (fru_records[fru_idx] == rec)
838850c50884cd Yazen Ghannam 2024-03-01 316 break;
838850c50884cd Yazen Ghannam 2024-03-01 317 }
838850c50884cd Yazen Ghannam 2024-03-01 318
838850c50884cd Yazen Ghannam 2024-03-01 319 if (i >= spa_nr_entries) {
838850c50884cd Yazen Ghannam 2024-03-01 320 pr_warn_once("FRU record %d not found\n", i);
838850c50884cd Yazen Ghannam 2024-03-01 321 return;
838850c50884cd Yazen Ghannam 2024-03-01 322 }
838850c50884cd Yazen Ghannam 2024-03-01 323
838850c50884cd Yazen Ghannam 2024-03-01 324 spa_entry = i + entry;
838850c50884cd Yazen Ghannam 2024-03-01 325 if (spa_entry >= spa_nr_entries) {
838850c50884cd Yazen Ghannam 2024-03-01 326 pr_warn_once("spa_entries[] index out-of-bounds\n");
838850c50884cd Yazen Ghannam 2024-03-01 327 return;
838850c50884cd Yazen Ghannam 2024-03-01 328 }
838850c50884cd Yazen Ghannam 2024-03-01 329
838850c50884cd Yazen Ghannam 2024-03-01 330 memset(&a_err, 0, sizeof(struct atl_err));
838850c50884cd Yazen Ghannam 2024-03-01 331
838850c50884cd Yazen Ghannam 2024-03-01 332 a_err.addr = addr;
838850c50884cd Yazen Ghannam 2024-03-01 333 a_err.ipid = id;
838850c50884cd Yazen Ghannam 2024-03-01 334 a_err.cpu = cpu;
838850c50884cd Yazen Ghannam 2024-03-01 335
838850c50884cd Yazen Ghannam 2024-03-01 @336 spa = amd_convert_umc_mca_addr_to_sys_addr(&a_err);
838850c50884cd Yazen Ghannam 2024-03-01 337 if (IS_ERR_VALUE(spa)) {
838850c50884cd Yazen Ghannam 2024-03-01 338 pr_debug("Failed to get system address\n");
838850c50884cd Yazen Ghannam 2024-03-01 339 return;
838850c50884cd Yazen Ghannam 2024-03-01 340 }
838850c50884cd Yazen Ghannam 2024-03-01 341
838850c50884cd Yazen Ghannam 2024-03-01 342 spa_entries[spa_entry] = spa;
838850c50884cd Yazen Ghannam 2024-03-01 343 pr_debug("fru_idx: %u, entry: %u, spa_entry: %u, spa: 0x%016llx\n",
838850c50884cd Yazen Ghannam 2024-03-01 344 fru_idx, entry, spa_entry, spa_entries[spa_entry]);
838850c50884cd Yazen Ghannam 2024-03-01 345 }
838850c50884cd Yazen Ghannam 2024-03-01 346
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Hi Ruidong,
kernel test robot noticed the following build errors:
[auto build test ERROR on rafael-pm/linux-next]
[also build test ERROR on rafael-pm/bleeding-edge ras/edac-for-next linus/master v6.19-rc2 next-20251219]
[cannot apply to arm64/for-next/core tip/smp/core]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Ruidong-Tian/ACPI-AEST-Parse-the-AEST-table/20251222-175211
base: https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
patch link: https://lore.kernel.org/r/20251222094351.38792-16-tianruidong%40linux.alibaba.com
patch subject: [PATCH v4 14/17] ras: ATL: Unify ATL interface for ARM64 and AMD
config: x86_64-rhel-9.4 (https://download.01.org/0day-ci/archive/20251223/202512230007.Vs6IvFVD-lkp@intel.com/config)
compiler: gcc-14 (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251223/202512230007.Vs6IvFVD-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202512230007.Vs6IvFVD-lkp@intel.com/
All errors (new ones prefixed by >>):
drivers/ras/amd/fmpm.c: In function 'save_spa':
>> drivers/ras/amd/fmpm.c:336:15: error: implicit declaration of function 'amd_convert_umc_mca_addr_to_sys_addr'; did you mean 'convert_umc_mca_addr_to_sys_addr'? [-Wimplicit-function-declaration]
336 | spa = amd_convert_umc_mca_addr_to_sys_addr(&a_err);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| convert_umc_mca_addr_to_sys_addr
--
drivers/ras/amd/atl/umc.c: In function '_retire_row_mi300':
>> drivers/ras/amd/atl/umc.c:321:24: error: implicit declaration of function 'amd_convert_umc_mca_addr_to_sys_addr'; did you mean 'convert_umc_mca_addr_to_sys_addr'? [-Wimplicit-function-declaration]
321 | addr = amd_convert_umc_mca_addr_to_sys_addr(a_err);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| convert_umc_mca_addr_to_sys_addr
vim +336 drivers/ras/amd/fmpm.c
6f15e617cc9932 Yazen Ghannam 2024-02-13 298
838850c50884cd Yazen Ghannam 2024-03-01 299 static void save_spa(struct fru_rec *rec, unsigned int entry,
838850c50884cd Yazen Ghannam 2024-03-01 300 u64 addr, u64 id, unsigned int cpu)
838850c50884cd Yazen Ghannam 2024-03-01 301 {
838850c50884cd Yazen Ghannam 2024-03-01 302 unsigned int i, fru_idx, spa_entry;
838850c50884cd Yazen Ghannam 2024-03-01 303 struct atl_err a_err;
838850c50884cd Yazen Ghannam 2024-03-01 304 unsigned long spa;
838850c50884cd Yazen Ghannam 2024-03-01 305
838850c50884cd Yazen Ghannam 2024-03-01 306 if (entry >= max_nr_entries) {
838850c50884cd Yazen Ghannam 2024-03-01 307 pr_warn_once("FRU descriptor entry %d out-of-bounds (max: %d)\n",
838850c50884cd Yazen Ghannam 2024-03-01 308 entry, max_nr_entries);
838850c50884cd Yazen Ghannam 2024-03-01 309 return;
838850c50884cd Yazen Ghannam 2024-03-01 310 }
838850c50884cd Yazen Ghannam 2024-03-01 311
838850c50884cd Yazen Ghannam 2024-03-01 312 /* spa_nr_entries is always multiple of max_nr_entries */
838850c50884cd Yazen Ghannam 2024-03-01 313 for (i = 0; i < spa_nr_entries; i += max_nr_entries) {
838850c50884cd Yazen Ghannam 2024-03-01 314 fru_idx = i / max_nr_entries;
838850c50884cd Yazen Ghannam 2024-03-01 315 if (fru_records[fru_idx] == rec)
838850c50884cd Yazen Ghannam 2024-03-01 316 break;
838850c50884cd Yazen Ghannam 2024-03-01 317 }
838850c50884cd Yazen Ghannam 2024-03-01 318
838850c50884cd Yazen Ghannam 2024-03-01 319 if (i >= spa_nr_entries) {
838850c50884cd Yazen Ghannam 2024-03-01 320 pr_warn_once("FRU record %d not found\n", i);
838850c50884cd Yazen Ghannam 2024-03-01 321 return;
838850c50884cd Yazen Ghannam 2024-03-01 322 }
838850c50884cd Yazen Ghannam 2024-03-01 323
838850c50884cd Yazen Ghannam 2024-03-01 324 spa_entry = i + entry;
838850c50884cd Yazen Ghannam 2024-03-01 325 if (spa_entry >= spa_nr_entries) {
838850c50884cd Yazen Ghannam 2024-03-01 326 pr_warn_once("spa_entries[] index out-of-bounds\n");
838850c50884cd Yazen Ghannam 2024-03-01 327 return;
838850c50884cd Yazen Ghannam 2024-03-01 328 }
838850c50884cd Yazen Ghannam 2024-03-01 329
838850c50884cd Yazen Ghannam 2024-03-01 330 memset(&a_err, 0, sizeof(struct atl_err));
838850c50884cd Yazen Ghannam 2024-03-01 331
838850c50884cd Yazen Ghannam 2024-03-01 332 a_err.addr = addr;
838850c50884cd Yazen Ghannam 2024-03-01 333 a_err.ipid = id;
838850c50884cd Yazen Ghannam 2024-03-01 334 a_err.cpu = cpu;
838850c50884cd Yazen Ghannam 2024-03-01 335
838850c50884cd Yazen Ghannam 2024-03-01 @336 spa = amd_convert_umc_mca_addr_to_sys_addr(&a_err);
838850c50884cd Yazen Ghannam 2024-03-01 337 if (IS_ERR_VALUE(spa)) {
838850c50884cd Yazen Ghannam 2024-03-01 338 pr_debug("Failed to get system address\n");
838850c50884cd Yazen Ghannam 2024-03-01 339 return;
838850c50884cd Yazen Ghannam 2024-03-01 340 }
838850c50884cd Yazen Ghannam 2024-03-01 341
838850c50884cd Yazen Ghannam 2024-03-01 342 spa_entries[spa_entry] = spa;
838850c50884cd Yazen Ghannam 2024-03-01 343 pr_debug("fru_idx: %u, entry: %u, spa_entry: %u, spa: 0x%016llx\n",
838850c50884cd Yazen Ghannam 2024-03-01 344 fru_idx, entry, spa_entry, spa_entries[spa_entry]);
838850c50884cd Yazen Ghannam 2024-03-01 345 }
838850c50884cd Yazen Ghannam 2024-03-01 346
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2026 Red Hat, Inc.