[PATCH v3 2/5] EDAC/amd64: Remove NUM_CONTROLLERS macro

Avadhut Naik posted 5 patches 3 weeks, 2 days ago
[PATCH v3 2/5] EDAC/amd64: Remove NUM_CONTROLLERS macro
Posted by Avadhut Naik 3 weeks, 2 days ago
Currently, the NUM_CONTROLLERS macro is only used to statically allocate
the csels array of struct chip_select in struct amd64_pvt.

The size of this array, however, will never exceed the number of UMCs on
the SOC. Since, max_mcs variable in struct amd64_pvt already stores the
number of UMCs on the SOC, the macro can be removed and the static array
can be dynamically allocated instead.

Signed-off-by: Avadhut Naik <avadhut.naik@amd.com>
---
Changes in v3:
Patch introduced.
---
 drivers/edac/amd64_edac.c | 19 +++++++++++++------
 drivers/edac/amd64_edac.h |  5 ++---
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 3989794e4f29..0fade110c3fb 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -4000,30 +4000,34 @@ static int probe_one_instance(unsigned int nid)
 	if (ret < 0)
 		goto err_enable;
 
+	pvt->csels = kcalloc(pvt->max_mcs, sizeof(*pvt->csels), GFP_KERNEL);
+	if (!pvt->csels)
+		goto err_enable;
+
 	ret = pvt->ops->hw_info_get(pvt);
 	if (ret < 0)
-		goto err_enable;
+		goto err_csels;
 
 	ret = 0;
 	if (!instance_has_memory(pvt)) {
 		amd64_info("Node %d: No DIMMs detected.\n", nid);
-		goto err_enable;
+		goto err_csels;
 	}
 
 	if (!pvt->ops->ecc_enabled(pvt)) {
 		ret = -ENODEV;
 
 		if (!ecc_enable_override)
-			goto err_enable;
+			goto err_csels;
 
 		if (boot_cpu_data.x86 >= 0x17) {
 			amd64_warn("Forcing ECC on is not recommended on newer systems. Please enable ECC in BIOS.");
-			goto err_enable;
+			goto err_csels;
 		} else
 			amd64_warn("Forcing ECC on!\n");
 
 		if (!enable_ecc_error_reporting(s, nid, F3))
-			goto err_enable;
+			goto err_csels;
 	}
 
 	ret = init_one_instance(pvt);
@@ -4033,7 +4037,7 @@ static int probe_one_instance(unsigned int nid)
 		if (boot_cpu_data.x86 < 0x17)
 			restore_ecc_error_reporting(s, nid, F3);
 
-		goto err_enable;
+		goto err_csels;
 	}
 
 	amd64_info("%s detected (node %d).\n", pvt->ctl_name, pvt->mc_node_id);
@@ -4043,6 +4047,8 @@ static int probe_one_instance(unsigned int nid)
 
 	return ret;
 
+err_csels:
+	kfree(pvt->csels);
 err_enable:
 	hw_info_put(pvt);
 	kfree(pvt);
@@ -4077,6 +4083,7 @@ static void remove_one_instance(unsigned int nid)
 	/* Free the EDAC CORE resources */
 	mci->pvt_info = NULL;
 
+	kfree(pvt->csels);
 	hw_info_put(pvt);
 	kfree(pvt);
 	edac_mc_free(mci);
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 56999ed3ae56..39d30255c767 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -96,7 +96,6 @@
 /* Hardware limit on ChipSelect rows per MC and processors per system */
 #define NUM_CHIPSELECTS			8
 #define DRAM_RANGES			8
-#define NUM_CONTROLLERS			12
 
 #define ON true
 #define OFF false
@@ -347,8 +346,8 @@ struct amd64_pvt {
 	u32 dbam0;		/* DRAM Base Address Mapping reg for DCT0 */
 	u32 dbam1;		/* DRAM Base Address Mapping reg for DCT1 */
 
-	/* one for each DCT/UMC */
-	struct chip_select csels[NUM_CONTROLLERS];
+	/* Allocate one for each DCT/UMC */
+	struct chip_select *csels;
 
 	/* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */
 	struct dram_range ranges[DRAM_RANGES];
-- 
2.43.0
Re: [PATCH v3 2/5] EDAC/amd64: Remove NUM_CONTROLLERS macro
Posted by Yazen Ghannam 3 weeks, 1 day ago
On Tue, Sep 09, 2025 at 06:53:11PM +0000, Avadhut Naik wrote:
> Currently, the NUM_CONTROLLERS macro is only used to statically allocate
> the csels array of struct chip_select in struct amd64_pvt.
> 
> The size of this array, however, will never exceed the number of UMCs on
> the SOC. Since, max_mcs variable in struct amd64_pvt already stores the
> number of UMCs on the SOC, the macro can be removed and the static array
> can be dynamically allocated instead.

You should note that max_mcs and the csels array are also used in legacy
systems with 'DCTs'.

Those had a max of 2 controllers which we already set in
per_family_init() as the global default. So the legacy systems are
covered by this change too.

Without noting this, it seems like that case may be overlooked.

> 
> Signed-off-by: Avadhut Naik <avadhut.naik@amd.com>
> ---
> Changes in v3:
> Patch introduced.
> ---
>  drivers/edac/amd64_edac.c | 19 +++++++++++++------
>  drivers/edac/amd64_edac.h |  5 ++---
>  2 files changed, 15 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
> index 3989794e4f29..0fade110c3fb 100644
> --- a/drivers/edac/amd64_edac.c
> +++ b/drivers/edac/amd64_edac.c
> @@ -4000,30 +4000,34 @@ static int probe_one_instance(unsigned int nid)
>  	if (ret < 0)
>  		goto err_enable;
>  
> +	pvt->csels = kcalloc(pvt->max_mcs, sizeof(*pvt->csels), GFP_KERNEL);
> +	if (!pvt->csels)
> +		goto err_enable;
> +

You can move this allocation to the end of per_family_init(). That's
where we determine 'max_mcs'.

If you do so, then the 'goto' changes below are not needed.

Another option is to put it in hw_info_get() like we do for UMCs. But
that means adding the allocation to three different helper functions
rather than just the one with per_family_init().

>  	ret = pvt->ops->hw_info_get(pvt);
>  	if (ret < 0)
> -		goto err_enable;
> +		goto err_csels;
>  
>  	ret = 0;
>  	if (!instance_has_memory(pvt)) {
>  		amd64_info("Node %d: No DIMMs detected.\n", nid);
> -		goto err_enable;
> +		goto err_csels;
>  	}
>  
>  	if (!pvt->ops->ecc_enabled(pvt)) {
>  		ret = -ENODEV;
>  
>  		if (!ecc_enable_override)
> -			goto err_enable;
> +			goto err_csels;
>  
>  		if (boot_cpu_data.x86 >= 0x17) {
>  			amd64_warn("Forcing ECC on is not recommended on newer systems. Please enable ECC in BIOS.");
> -			goto err_enable;
> +			goto err_csels;
>  		} else
>  			amd64_warn("Forcing ECC on!\n");
>  
>  		if (!enable_ecc_error_reporting(s, nid, F3))
> -			goto err_enable;
> +			goto err_csels;
>  	}
>  
>  	ret = init_one_instance(pvt);
> @@ -4033,7 +4037,7 @@ static int probe_one_instance(unsigned int nid)
>  		if (boot_cpu_data.x86 < 0x17)
>  			restore_ecc_error_reporting(s, nid, F3);
>  
> -		goto err_enable;
> +		goto err_csels;
>  	}
>  
>  	amd64_info("%s detected (node %d).\n", pvt->ctl_name, pvt->mc_node_id);
> @@ -4043,6 +4047,8 @@ static int probe_one_instance(unsigned int nid)
>  
>  	return ret;
>  
> +err_csels:
> +	kfree(pvt->csels);

This can go in hw_info_put(). We have kfree(pvt->umc) there already.

>  err_enable:
>  	hw_info_put(pvt);
>  	kfree(pvt);
> @@ -4077,6 +4083,7 @@ static void remove_one_instance(unsigned int nid)
>  	/* Free the EDAC CORE resources */
>  	mci->pvt_info = NULL;
>  
> +	kfree(pvt->csels);
>  	hw_info_put(pvt);
>  	kfree(pvt);
>  	edac_mc_free(mci);
> diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
> index 56999ed3ae56..39d30255c767 100644
> --- a/drivers/edac/amd64_edac.h
> +++ b/drivers/edac/amd64_edac.h
> @@ -96,7 +96,6 @@
>  /* Hardware limit on ChipSelect rows per MC and processors per system */
>  #define NUM_CHIPSELECTS			8
>  #define DRAM_RANGES			8
> -#define NUM_CONTROLLERS			12
>  
>  #define ON true
>  #define OFF false
> @@ -347,8 +346,8 @@ struct amd64_pvt {
>  	u32 dbam0;		/* DRAM Base Address Mapping reg for DCT0 */
>  	u32 dbam1;		/* DRAM Base Address Mapping reg for DCT1 */
>  
> -	/* one for each DCT/UMC */
> -	struct chip_select csels[NUM_CONTROLLERS];
> +	/* Allocate one for each DCT/UMC */
> +	struct chip_select *csels;
>  
>  	/* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */
>  	struct dram_range ranges[DRAM_RANGES];
> -- 

Thanks,
Yazen
Re: [PATCH v3 2/5] EDAC/amd64: Remove NUM_CONTROLLERS macro
Posted by Naik, Avadhut 3 weeks, 1 day ago

On 9/10/2025 10:05, Yazen Ghannam wrote:
> On Tue, Sep 09, 2025 at 06:53:11PM +0000, Avadhut Naik wrote:
>> Currently, the NUM_CONTROLLERS macro is only used to statically allocate
>> the csels array of struct chip_select in struct amd64_pvt.
>>
>> The size of this array, however, will never exceed the number of UMCs on
>> the SOC. Since, max_mcs variable in struct amd64_pvt already stores the
>> number of UMCs on the SOC, the macro can be removed and the static array
>> can be dynamically allocated instead.
> 
> You should note that max_mcs and the csels array are also used in legacy
> systems with 'DCTs'.
> 
> Those had a max of 2 controllers which we already set in
> per_family_init() as the global default. So the legacy systems are
> covered by this change too.
> 
> Without noting this, it seems like that case may be overlooked.
> 
Will mention this in the commit message!

>>
>> Signed-off-by: Avadhut Naik <avadhut.naik@amd.com>
>> ---
>> Changes in v3:
>> Patch introduced.
>> ---
>>  drivers/edac/amd64_edac.c | 19 +++++++++++++------
>>  drivers/edac/amd64_edac.h |  5 ++---
>>  2 files changed, 15 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
>> index 3989794e4f29..0fade110c3fb 100644
>> --- a/drivers/edac/amd64_edac.c
>> +++ b/drivers/edac/amd64_edac.c
>> @@ -4000,30 +4000,34 @@ static int probe_one_instance(unsigned int nid)
>>  	if (ret < 0)
>>  		goto err_enable;
>>  
>> +	pvt->csels = kcalloc(pvt->max_mcs, sizeof(*pvt->csels), GFP_KERNEL);
>> +	if (!pvt->csels)
>> +		goto err_enable;
>> +
> 
> You can move this allocation to the end of per_family_init(). That's
> where we determine 'max_mcs'.
> 
> If you do so, then the 'goto' changes below are not needed.
> 
> Another option is to put it in hw_info_get() like we do for UMCs. But
> that means adding the allocation to three different helper functions
> rather than just the one with per_family_init().
> 

Had considered moving allocation to per_family_init() while sending
this set. But then didn't since I had stated that I would be adding
this allocation in probe_one_instance().
In any case, will move it to per_family_init().

>>  	ret = pvt->ops->hw_info_get(pvt);
>>  	if (ret < 0)
>> -		goto err_enable;
>> +		goto err_csels;
>>  
>>  	ret = 0;
>>  	if (!instance_has_memory(pvt)) {
>>  		amd64_info("Node %d: No DIMMs detected.\n", nid);
>> -		goto err_enable;
>> +		goto err_csels;
>>  	}
>>  
>>  	if (!pvt->ops->ecc_enabled(pvt)) {
>>  		ret = -ENODEV;
>>  
>>  		if (!ecc_enable_override)
>> -			goto err_enable;
>> +			goto err_csels;
>>  
>>  		if (boot_cpu_data.x86 >= 0x17) {
>>  			amd64_warn("Forcing ECC on is not recommended on newer systems. Please enable ECC in BIOS.");
>> -			goto err_enable;
>> +			goto err_csels;
>>  		} else
>>  			amd64_warn("Forcing ECC on!\n");
>>  
>>  		if (!enable_ecc_error_reporting(s, nid, F3))
>> -			goto err_enable;
>> +			goto err_csels;
>>  	}
>>  
>>  	ret = init_one_instance(pvt);
>> @@ -4033,7 +4037,7 @@ static int probe_one_instance(unsigned int nid)
>>  		if (boot_cpu_data.x86 < 0x17)
>>  			restore_ecc_error_reporting(s, nid, F3);
>>  
>> -		goto err_enable;
>> +		goto err_csels;
>>  	}
>>  
>>  	amd64_info("%s detected (node %d).\n", pvt->ctl_name, pvt->mc_node_id);
>> @@ -4043,6 +4047,8 @@ static int probe_one_instance(unsigned int nid)
>>  
>>  	return ret;
>>  
>> +err_csels:
>> +	kfree(pvt->csels);
> 
> This can go in hw_info_put(). We have kfree(pvt->umc) there already.
> 
Okay. Will move it to hw_info_put().

-- 
Thanks,
Avadhut Naik