[PATCH] EDAC/versalnet: Report PFN and page offset for DDR errors

Shubhrajyoti Datta posted 1 patch 3 days, 18 hours ago
drivers/edac/versalnet_edac.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
[PATCH] EDAC/versalnet: Report PFN and page offset for DDR errors
Posted by Shubhrajyoti Datta 3 days, 18 hours ago
Currently, DDRMC correctable and uncorrectable error events are reported
to EDAC with page frame number (pfn) and offset set to zero.
This information is not useful to locate the address for memory errors.

Compute the physical address from the error information and extract
the page frame number and offset before calling edac_mc_handle_error().
This provides the actual memory location information to the userspace.

Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
---

 drivers/edac/versalnet_edac.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/edac/versalnet_edac.c b/drivers/edac/versalnet_edac.c
index 915bcd6166f7..66df090245be 100644
--- a/drivers/edac/versalnet_edac.c
+++ b/drivers/edac/versalnet_edac.c
@@ -431,8 +431,7 @@ static void handle_error(struct mc_priv  *priv, struct ecc_status *stat,
 {
 	union ecc_error_info pinf;
 	struct mem_ctl_info *mci;
-	unsigned long pa;
-	phys_addr_t pfn;
+	unsigned long pa, pfn;
 	int err;
 
 	if (WARN_ON_ONCE(ctl_num >= NUM_CONTROLLERS))
@@ -442,27 +441,28 @@ static void handle_error(struct mc_priv  *priv, struct ecc_status *stat,
 
 	if (stat->error_type == MC5_ERR_TYPE_CE) {
 		pinf = stat->ceinfo[stat->channel];
+		pa = convert_to_physical(priv, pinf, ctl_num, error_data);
+		pfn = PHYS_PFN(pa);
 		snprintf(priv->message, sizeof(priv->message),
 			 "Error type:%s Controller %d Addr at %lx\n",
-			 "CE", ctl_num, convert_to_physical(priv, pinf, ctl_num, error_data));
+			 "CE", ctl_num, pa);
 
 		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
-				     1, 0, 0, 0, 0, 0, -1,
+				     1, pfn, offset_in_page(pa), 0, 0, 0, -1,
 				     priv->message, "");
 	}
 
 	if (stat->error_type == MC5_ERR_TYPE_UE) {
 		pinf = stat->ueinfo[stat->channel];
+		pa = convert_to_physical(priv, pinf, ctl_num, error_data);
+		pfn = PHYS_PFN(pa);
 		snprintf(priv->message, sizeof(priv->message),
 			 "Error type:%s controller %d Addr at %lx\n",
-			 "UE", ctl_num, convert_to_physical(priv, pinf, ctl_num, error_data));
+			 "UE", ctl_num, pa);
 
 		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
-				     1, 0, 0, 0, 0, 0, -1,
+				     1, pfn, offset_in_page(pa), 0, 0, 0, -1,
 				     priv->message, "");
-		pa = convert_to_physical(priv, pinf, ctl_num, error_data);
-		pfn = PHYS_PFN(pa);
-
 		if (IS_ENABLED(CONFIG_MEMORY_FAILURE)) {
 			err = memory_failure(pfn, MF_ACTION_REQUIRED);
 			if (err)
-- 
2.34.1
Re: [PATCH] EDAC/versalnet: Report PFN and page offset for DDR errors
Posted by Prasanna Kumar T S M 1 day, 1 hour ago

On 29-03-2026 18:14, Shubhrajyoti Datta wrote:
> Currently, DDRMC correctable and uncorrectable error events are reported
> to EDAC with page frame number (pfn) and offset set to zero.
> This information is not useful to locate the address for memory errors.
> 
> Compute the physical address from the error information and extract
> the page frame number and offset before calling edac_mc_handle_error().
> This provides the actual memory location information to the userspace.
> 
> Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
> ---
> 
>   drivers/edac/versalnet_edac.c | 18 +++++++++---------
>   1 file changed, 9 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/edac/versalnet_edac.c b/drivers/edac/versalnet_edac.c
> index 915bcd6166f7..66df090245be 100644
> --- a/drivers/edac/versalnet_edac.c
> +++ b/drivers/edac/versalnet_edac.c
> @@ -431,8 +431,7 @@ static void handle_error(struct mc_priv  *priv, struct ecc_status *stat,
>   {
>   	union ecc_error_info pinf;
>   	struct mem_ctl_info *mci;
> -	unsigned long pa;
> -	phys_addr_t pfn;
> +	unsigned long pa, pfn;
>   	int err;
>   
>   	if (WARN_ON_ONCE(ctl_num >= NUM_CONTROLLERS))
> @@ -442,27 +441,28 @@ static void handle_error(struct mc_priv  *priv, struct ecc_status *stat,
>   
>   	if (stat->error_type == MC5_ERR_TYPE_CE) {
>   		pinf = stat->ceinfo[stat->channel];
> +		pa = convert_to_physical(priv, pinf, ctl_num, error_data);
> +		pfn = PHYS_PFN(pa);
>   		snprintf(priv->message, sizeof(priv->message),
>   			 "Error type:%s Controller %d Addr at %lx\n",
> -			 "CE", ctl_num, convert_to_physical(priv, pinf, ctl_num, error_data));
> +			 "CE", ctl_num, pa);
>   
>   		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
> -				     1, 0, 0, 0, 0, 0, -1,
> +				     1, pfn, offset_in_page(pa), 0, 0, 0, -1,
>   				     priv->message, "");
>   	}
>   
>   	if (stat->error_type == MC5_ERR_TYPE_UE) {
>   		pinf = stat->ueinfo[stat->channel];
> +		pa = convert_to_physical(priv, pinf, ctl_num, error_data);
> +		pfn = PHYS_PFN(pa);
>   		snprintf(priv->message, sizeof(priv->message),
>   			 "Error type:%s controller %d Addr at %lx\n",
> -			 "UE", ctl_num, convert_to_physical(priv, pinf, ctl_num, error_data));
> +			 "UE", ctl_num, pa);
>   
>   		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
> -				     1, 0, 0, 0, 0, 0, -1,
> +				     1, pfn, offset_in_page(pa), 0, 0, 0, -1,
>   				     priv->message, "");
> -		pa = convert_to_physical(priv, pinf, ctl_num, error_data);
> -		pfn = PHYS_PFN(pa);
> -
>   		if (IS_ENABLED(CONFIG_MEMORY_FAILURE)) {
>   			err = memory_failure(pfn, MF_ACTION_REQUIRED);
>   			if (err)

Nit: pa and pfn calculation can be moved out of the if() condition.

Irrespective of the nit, the patch looks good.

Reviewed-by: Prasanna Kumar T S M <ptsm@linux.microsoft.com>