[PATCH 3/7] hwmon/coretemp: Handle large core id value

Zhang Rui posted 7 patches 3 years, 8 months ago
There is a newer version of this series
[PATCH 3/7] hwmon/coretemp: Handle large core id value
Posted by Zhang Rui 3 years, 8 months ago
The coretemp driver supports up to a hard-coded limit of 128 cores.

Today, the driver can not support a core with an id above that limit.
Yet, the encoding of core_id's is arbitrary (BIOS APIC-id) and so they
may be sparse and they may be large.

Update the driver to map arbitrary core_id numbers into appropriate
array indexes so that 128 cores can be supported, no matter the encoding
of core_ids's.

Acked-by: Len Brown <len.brown@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/hwmon/coretemp.c | 55 +++++++++++++++++++++++++++++-----------
 1 file changed, 40 insertions(+), 15 deletions(-)

diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index ccf0af5b988a..3f0f7d7612ae 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -46,9 +46,6 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
 #define TOTAL_ATTRS		(MAX_CORE_ATTRS + 1)
 #define MAX_CORE_DATA		(NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
 
-#define TO_CORE_ID(cpu)		(cpu_data(cpu).cpu_core_id)
-#define TO_ATTR_NO(cpu)		(TO_CORE_ID(cpu) + BASE_SYSFS_ATTR_NO)
-
 #ifdef CONFIG_SMP
 #define for_each_sibling(i, cpu) \
 	for_each_cpu(i, topology_sibling_cpumask(cpu))
@@ -91,6 +88,8 @@ struct temp_data {
 struct platform_data {
 	struct device		*hwmon_dev;
 	u16			pkg_id;
+	u16			cpu_map[NUM_REAL_CORES];
+	struct ida		ida;
 	struct cpumask		cpumask;
 	struct temp_data	*core_data[MAX_CORE_DATA];
 	struct device_attribute name_attr;
@@ -441,7 +440,7 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag)
 							MSR_IA32_THERM_STATUS;
 	tdata->is_pkg_data = pkg_flag;
 	tdata->cpu = cpu;
-	tdata->cpu_core_id = TO_CORE_ID(cpu);
+	tdata->cpu_core_id = topology_core_id(cpu);
 	tdata->attr_size = MAX_CORE_ATTRS;
 	mutex_init(&tdata->update_lock);
 	return tdata;
@@ -454,7 +453,7 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
 	struct platform_data *pdata = platform_get_drvdata(pdev);
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	u32 eax, edx;
-	int err, attr_no;
+	int err, index, attr_no;
 
 	/*
 	 * Find attr number for sysfs:
@@ -462,14 +461,26 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
 	 * The attr number is always core id + 2
 	 * The Pkgtemp will always show up as temp1_*, if available
 	 */
-	attr_no = pkg_flag ? PKG_SYSFS_ATTR_NO : TO_ATTR_NO(cpu);
+	if (pkg_flag)
+		attr_no = PKG_SYSFS_ATTR_NO;
+	else {
+		index = ida_alloc(&pdata->ida, GFP_KERNEL);
+		if (index < 0)
+			return index;
+		pdata->cpu_map[index] = topology_core_id(cpu);
+		attr_no = index + BASE_SYSFS_ATTR_NO;
+	}
 
-	if (attr_no > MAX_CORE_DATA - 1)
-		return -ERANGE;
+	if (attr_no > MAX_CORE_DATA - 1) {
+		err = -ERANGE;
+		goto ida_free;
+	}
 
 	tdata = init_temp_data(cpu, pkg_flag);
-	if (!tdata)
-		return -ENOMEM;
+	if (!tdata) {
+		err = -ENOMEM;
+		goto ida_free;
+	}
 
 	/* Test if we can access the status register */
 	err = rdmsr_safe_on_cpu(cpu, tdata->status_reg, &eax, &edx);
@@ -505,6 +516,9 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
 exit_free:
 	pdata->core_data[attr_no] = NULL;
 	kfree(tdata);
+ida_free:
+	if (!pkg_flag)
+		ida_free(&pdata->ida, index);
 	return err;
 }
 
@@ -524,6 +538,8 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx)
 
 	kfree(pdata->core_data[indx]);
 	pdata->core_data[indx] = NULL;
+
+	ida_free(&pdata->ida, indx - BASE_SYSFS_ATTR_NO);
 }
 
 static int coretemp_probe(struct platform_device *pdev)
@@ -537,6 +553,7 @@ static int coretemp_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	pdata->pkg_id = pdev->id;
+	ida_init(&pdata->ida);
 	platform_set_drvdata(pdev, pdata);
 
 	pdata->hwmon_dev = devm_hwmon_device_register_with_groups(dev, DRVNAME,
@@ -553,6 +570,7 @@ static int coretemp_remove(struct platform_device *pdev)
 		if (pdata->core_data[i])
 			coretemp_remove_core(pdata, i);
 
+	ida_destroy(&pdata->ida);
 	return 0;
 }
 
@@ -647,7 +665,7 @@ static int coretemp_cpu_offline(unsigned int cpu)
 	struct platform_device *pdev = coretemp_get_pdev(cpu);
 	struct platform_data *pd;
 	struct temp_data *tdata;
-	int indx, target;
+	int i, indx = -1, target;
 
 	/*
 	 * Don't execute this on suspend as the device remove locks
@@ -660,12 +678,19 @@ static int coretemp_cpu_offline(unsigned int cpu)
 	if (!pdev)
 		return 0;
 
-	/* The core id is too big, just return */
-	indx = TO_ATTR_NO(cpu);
-	if (indx > MAX_CORE_DATA - 1)
+	pd = platform_get_drvdata(pdev);
+
+	for (i = 0; i < NUM_REAL_CORES; i++) {
+		if (pd->cpu_map[i] == topology_core_id(cpu)) {
+			indx = i + BASE_SYSFS_ATTR_NO;
+			break;
+		}
+	}
+
+	/* Too many cores and this core is not pupolated, just return */
+	if (indx < 0)
 		return 0;
 
-	pd = platform_get_drvdata(pdev);
 	tdata = pd->core_data[indx];
 
 	cpumask_clear_cpu(cpu, &pd->cpumask);
-- 
2.34.1
Re: [PATCH 3/7] hwmon/coretemp: Handle large core id value
Posted by Ingo Molnar 3 years, 8 months ago
* Zhang Rui <rui.zhang@intel.com> wrote:

> The coretemp driver supports up to a hard-coded limit of 128 cores.
> 
> Today, the driver can not support a core with an id above that limit.
> Yet, the encoding of core_id's is arbitrary (BIOS APIC-id) and so they
> may be sparse and they may be large.
> 
> Update the driver to map arbitrary core_id numbers into appropriate
> array indexes so that 128 cores can be supported, no matter the encoding
> of core_ids's.

Please capitalize 'ID' consistently throughout the series.

> -	attr_no = pkg_flag ? PKG_SYSFS_ATTR_NO : TO_ATTR_NO(cpu);
> +	if (pkg_flag)
> +		attr_no = PKG_SYSFS_ATTR_NO;
> +	else {
> +		index = ida_alloc(&pdata->ida, GFP_KERNEL);
> +		if (index < 0)
> +			return index;
> +		pdata->cpu_map[index] = topology_core_id(cpu);
> +		attr_no = index + BASE_SYSFS_ATTR_NO;
> +	}

Unbalanced curly braces.

> -	int err, attr_no;
> +	int err, index, attr_no;

So it's 'index' here.

> @@ -524,6 +538,8 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx)

But 'indx' here.

> -	int indx, target;
> +	int i, indx = -1, target;

And 'indx' again. Did we run out of the letter 'e'? Either use 'index' 
naming consistently, or 'idx' if it has to be abbreviated.

Thanks,

	Ingo
Re: [PATCH 3/7] hwmon/coretemp: Handle large core id value
Posted by Zhang Rui 3 years, 8 months ago
On Sat, 2022-08-13 at 12:48 +0200, Ingo Molnar wrote:
> 
> * Zhang Rui <rui.zhang@intel.com> wrote:
> 
> > The coretemp driver supports up to a hard-coded limit of 128 cores.
> > 
> > Today, the driver can not support a core with an id above that
> > limit.
> > Yet, the encoding of core_id's is arbitrary (BIOS APIC-id) and so
> > they
> > may be sparse and they may be large.
> > 
> > Update the driver to map arbitrary core_id numbers into appropriate
> > array indexes so that 128 cores can be supported, no matter the
> > encoding
> > of core_ids's.
> 
> Please capitalize 'ID' consistently throughout the series.
> 
> > -       attr_no = pkg_flag ? PKG_SYSFS_ATTR_NO : TO_ATTR_NO(cpu);
> > +       if (pkg_flag)
> > +               attr_no = PKG_SYSFS_ATTR_NO;
> > +       else {
> > +               index = ida_alloc(&pdata->ida, GFP_KERNEL);
> > +               if (index < 0)
> > +                       return index;
> > +               pdata->cpu_map[index] = topology_core_id(cpu);
> > +               attr_no = index + BASE_SYSFS_ATTR_NO;
> > +       }
> 
> Unbalanced curly braces.

Sure, will fix these two issues in next version.

> 
> > -       int err, attr_no;
> > +       int err, index, attr_no;
> 
> So it's 'index' here.
> 
> > @@ -524,6 +538,8 @@ static void coretemp_remove_core(struct
> > platform_data *pdata, int indx)
> 
> But 'indx' here.
> 
> > -       int indx, target;
> > +       int i, indx = -1, target;
> 
> And 'indx' again. Did we run out of the letter 'e'? Either use
> 'index' 
> naming consistently, or 'idx' if it has to be abbreviated.

I'd prefer 'index', but here, this 'indx' is from previous code and
this patch just initializes it to -1.

thanks,
rui
> 
> Thanks,
> 
>         Ingo

Re: [PATCH 3/7] hwmon/coretemp: Handle large core id value
Posted by Ingo Molnar 3 years, 8 months ago
* Zhang Rui <rui.zhang@intel.com> wrote:

> On Sat, 2022-08-13 at 12:48 +0200, Ingo Molnar wrote:
> > 
> > * Zhang Rui <rui.zhang@intel.com> wrote:
> > 
> > > The coretemp driver supports up to a hard-coded limit of 128 cores.
> > > 
> > > Today, the driver can not support a core with an id above that
> > > limit.
> > > Yet, the encoding of core_id's is arbitrary (BIOS APIC-id) and so
> > > they
> > > may be sparse and they may be large.
> > > 
> > > Update the driver to map arbitrary core_id numbers into appropriate
> > > array indexes so that 128 cores can be supported, no matter the
> > > encoding
> > > of core_ids's.
> > 
> > Please capitalize 'ID' consistently throughout the series.
> > 
> > > -       attr_no = pkg_flag ? PKG_SYSFS_ATTR_NO : TO_ATTR_NO(cpu);
> > > +       if (pkg_flag)
> > > +               attr_no = PKG_SYSFS_ATTR_NO;
> > > +       else {
> > > +               index = ida_alloc(&pdata->ida, GFP_KERNEL);
> > > +               if (index < 0)
> > > +                       return index;
> > > +               pdata->cpu_map[index] = topology_core_id(cpu);
> > > +               attr_no = index + BASE_SYSFS_ATTR_NO;
> > > +       }
> > 
> > Unbalanced curly braces.
> 
> Sure, will fix these two issues in next version.
> 
> > 
> > > -       int err, attr_no;
> > > +       int err, index, attr_no;
> > 
> > So it's 'index' here.
> > 
> > > @@ -524,6 +538,8 @@ static void coretemp_remove_core(struct
> > > platform_data *pdata, int indx)
> > 
> > But 'indx' here.
> > 
> > > -       int indx, target;
> > > +       int i, indx = -1, target;
> > 
> > And 'indx' again. Did we run out of the letter 'e'? Either use
> > 'index' 
> > naming consistently, or 'idx' if it has to be abbreviated.
> 
> I'd prefer 'index', but here, this 'indx' is from previous code and
> this patch just initializes it to -1.

Then queue up a cleanup patch as patch #1 - but idiosyncratic noise like 
that makes review harder.

Thanks,

	Ingo
Re: [PATCH 3/7] hwmon/coretemp: Handle large core id value
Posted by Guenter Roeck 3 years, 8 months ago
On Sat, Aug 13, 2022 at 12:41:40AM +0800, Zhang Rui wrote:
> The coretemp driver supports up to a hard-coded limit of 128 cores.
> 
> Today, the driver can not support a core with an id above that limit.
> Yet, the encoding of core_id's is arbitrary (BIOS APIC-id) and so they
> may be sparse and they may be large.
> 
> Update the driver to map arbitrary core_id numbers into appropriate
> array indexes so that 128 cores can be supported, no matter the encoding
> of core_ids's.
> 
> Acked-by: Len Brown <len.brown@intel.com>
> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> ---
>  drivers/hwmon/coretemp.c | 55 +++++++++++++++++++++++++++++-----------
>  1 file changed, 40 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> index ccf0af5b988a..3f0f7d7612ae 100644
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -46,9 +46,6 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
>  #define TOTAL_ATTRS		(MAX_CORE_ATTRS + 1)
>  #define MAX_CORE_DATA		(NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
>  
> -#define TO_CORE_ID(cpu)		(cpu_data(cpu).cpu_core_id)
> -#define TO_ATTR_NO(cpu)		(TO_CORE_ID(cpu) + BASE_SYSFS_ATTR_NO)
> -
>  #ifdef CONFIG_SMP
>  #define for_each_sibling(i, cpu) \
>  	for_each_cpu(i, topology_sibling_cpumask(cpu))
> @@ -91,6 +88,8 @@ struct temp_data {
>  struct platform_data {
>  	struct device		*hwmon_dev;
>  	u16			pkg_id;
> +	u16			cpu_map[NUM_REAL_CORES];
> +	struct ida		ida;
>  	struct cpumask		cpumask;
>  	struct temp_data	*core_data[MAX_CORE_DATA];
>  	struct device_attribute name_attr;
> @@ -441,7 +440,7 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag)
>  							MSR_IA32_THERM_STATUS;
>  	tdata->is_pkg_data = pkg_flag;
>  	tdata->cpu = cpu;
> -	tdata->cpu_core_id = TO_CORE_ID(cpu);
> +	tdata->cpu_core_id = topology_core_id(cpu);
>  	tdata->attr_size = MAX_CORE_ATTRS;
>  	mutex_init(&tdata->update_lock);
>  	return tdata;
> @@ -454,7 +453,7 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
>  	struct platform_data *pdata = platform_get_drvdata(pdev);
>  	struct cpuinfo_x86 *c = &cpu_data(cpu);
>  	u32 eax, edx;
> -	int err, attr_no;
> +	int err, index, attr_no;
>  
>  	/*
>  	 * Find attr number for sysfs:
> @@ -462,14 +461,26 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
>  	 * The attr number is always core id + 2
>  	 * The Pkgtemp will always show up as temp1_*, if available
>  	 */
> -	attr_no = pkg_flag ? PKG_SYSFS_ATTR_NO : TO_ATTR_NO(cpu);
> +	if (pkg_flag)
> +		attr_no = PKG_SYSFS_ATTR_NO;
> +	else {
> +		index = ida_alloc(&pdata->ida, GFP_KERNEL);
> +		if (index < 0)
> +			return index;
> +		pdata->cpu_map[index] = topology_core_id(cpu);
> +		attr_no = index + BASE_SYSFS_ATTR_NO;
> +	}
>  
> -	if (attr_no > MAX_CORE_DATA - 1)
> -		return -ERANGE;
> +	if (attr_no > MAX_CORE_DATA - 1) {
> +		err = -ERANGE;
> +		goto ida_free;
> +	}
>  
>  	tdata = init_temp_data(cpu, pkg_flag);
> -	if (!tdata)
> -		return -ENOMEM;
> +	if (!tdata) {
> +		err = -ENOMEM;
> +		goto ida_free;
> +	}
>  
>  	/* Test if we can access the status register */
>  	err = rdmsr_safe_on_cpu(cpu, tdata->status_reg, &eax, &edx);
> @@ -505,6 +516,9 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
>  exit_free:
>  	pdata->core_data[attr_no] = NULL;
>  	kfree(tdata);
> +ida_free:
> +	if (!pkg_flag)
> +		ida_free(&pdata->ida, index);
>  	return err;
>  }
>  
> @@ -524,6 +538,8 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx)
>  
>  	kfree(pdata->core_data[indx]);
>  	pdata->core_data[indx] = NULL;
> +
> +	ida_free(&pdata->ida, indx - BASE_SYSFS_ATTR_NO);
>  }
>  
>  static int coretemp_probe(struct platform_device *pdev)
> @@ -537,6 +553,7 @@ static int coretemp_probe(struct platform_device *pdev)
>  		return -ENOMEM;
>  
>  	pdata->pkg_id = pdev->id;
> +	ida_init(&pdata->ida);
>  	platform_set_drvdata(pdev, pdata);
>  
>  	pdata->hwmon_dev = devm_hwmon_device_register_with_groups(dev, DRVNAME,
> @@ -553,6 +570,7 @@ static int coretemp_remove(struct platform_device *pdev)
>  		if (pdata->core_data[i])
>  			coretemp_remove_core(pdata, i);
>  
> +	ida_destroy(&pdata->ida);
>  	return 0;
>  }
>  
> @@ -647,7 +665,7 @@ static int coretemp_cpu_offline(unsigned int cpu)
>  	struct platform_device *pdev = coretemp_get_pdev(cpu);
>  	struct platform_data *pd;
>  	struct temp_data *tdata;
> -	int indx, target;
> +	int i, indx = -1, target;
>  
>  	/*
>  	 * Don't execute this on suspend as the device remove locks
> @@ -660,12 +678,19 @@ static int coretemp_cpu_offline(unsigned int cpu)
>  	if (!pdev)
>  		return 0;
>  
> -	/* The core id is too big, just return */
> -	indx = TO_ATTR_NO(cpu);
> -	if (indx > MAX_CORE_DATA - 1)
> +	pd = platform_get_drvdata(pdev);
> +
> +	for (i = 0; i < NUM_REAL_CORES; i++) {
> +		if (pd->cpu_map[i] == topology_core_id(cpu)) {
> +			indx = i + BASE_SYSFS_ATTR_NO;
> +			break;
> +		}
> +	}
> +
> +	/* Too many cores and this core is not pupolated, just return */

populated

Other than that looks good.

Acked-by: Guenter Roeck <linux@roeck-us.net>

> +	if (indx < 0)
>  		return 0;
>  
> -	pd = platform_get_drvdata(pdev);
>  	tdata = pd->core_data[indx];
>  
>  	cpumask_clear_cpu(cpu, &pd->cpumask);
> -- 
> 2.34.1
>
Re: [PATCH 3/7] hwmon/coretemp: Handle large core id value
Posted by Zhang Rui 3 years, 8 months ago
Hi, Guenter,

On Fri, 2022-08-12 at 10:16 -0700, Guenter Roeck wrote:
> 
> > -       /* The core id is too big, just return */
> > -       indx = TO_ATTR_NO(cpu);
> > -       if (indx > MAX_CORE_DATA - 1)
> > +       pd = platform_get_drvdata(pdev);
> > +
> > +       for (i = 0; i < NUM_REAL_CORES; i++) {
> > +               if (pd->cpu_map[i] == topology_core_id(cpu)) {
> > +                       indx = i + BASE_SYSFS_ATTR_NO;
> > +                       break;
> > +               }
> > +       }
> > +
> > +       /* Too many cores and this core is not pupolated, just
> > return */
> 
> populated
> 
> Other than that looks good.
> 
> Acked-by: Guenter Roeck <linux@roeck-us.net>

Thanks for reviewing, will fix in next version.

thanks,
rui