[PATCH v20 17/18] x86/resctrl: Sub-NUMA Cluster (SNC) detection

Tony Luck posted 18 patches 1 year, 8 months ago
There is a newer version of this series
[PATCH v20 17/18] x86/resctrl: Sub-NUMA Cluster (SNC) detection
Posted by Tony Luck 1 year, 8 months ago
There isn't a simple hardware bit that indicates whether a CPU is
running in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing
the number of CPUs sharing the L3 cache with CPU0 to the number of CPUs in
the same NUMA node as CPU0.

If SNC mode is detected, print a single informational message to the
console.

Add the missing definition of pr_fmt() to monitor.c. This wasn't
noticed before as there are only "can't happen" console messages
from this file.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/resctrl/monitor.c | 66 +++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index efbb84c00d79..9835706ef772 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -15,6 +15,8 @@
  * Software Developer Manual June 2016, volume 3, section 17.17.
  */
 
+#define pr_fmt(fmt)	"resctrl: " fmt
+
 #include <linux/cpu.h>
 #include <linux/module.h>
 #include <linux/sizes.h>
@@ -1108,6 +1110,68 @@ void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d)
 	wrmsrl(MSR_RMID_SNC_CONFIG, val);
 }
 
+/* CPU models that support MSR_RMID_SNC_CONFIG */
+static const struct x86_cpu_id snc_cpu_ids[] __initconst = {
+	X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
+	X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, 0),
+	X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, 0),
+	X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, 0),
+	X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, 0),
+	{}
+};
+
+/*
+ * There isn't a simple hardware bit that indicates whether a CPU is running
+ * in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing the
+ * number CPUs sharing the L3 cache with CPU0 to the number of CPUs in
+ * the same NUMA node as CPU0.
+ * It is not possible to accurately determine SNC state if the system is
+ * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes
+ * to L3 caches. It will be OK if system is booted with hyperthreading
+ * disabled (since this doesn't affect the ratio).
+ */
+static __init int snc_get_config(void)
+{
+	struct cacheinfo *ci = get_cpu_cacheinfo_level(0, RESCTRL_L3_CACHE);
+	const cpumask_t *node0_cpumask;
+	int cpus_per_node, cpus_per_l3;
+	int ret;
+
+	if (!x86_match_cpu(snc_cpu_ids) || !ci)
+		return 1;
+
+	cpus_read_lock();
+	if (num_online_cpus() != num_present_cpus())
+		pr_warn("Some CPUs offline, SNC detection may be incorrect\n");
+	cpus_read_unlock();
+
+	node0_cpumask = cpumask_of_node(cpu_to_node(0));
+
+	cpus_per_node = cpumask_weight(node0_cpumask);
+	cpus_per_l3 = cpumask_weight(&ci->shared_cpu_map);
+
+	if (!cpus_per_node || !cpus_per_l3)
+		return 1;
+
+	ret = cpus_per_l3 / cpus_per_node;
+
+	/* sanity check: Only valid results are 1, 2, 3, 4 */
+	switch (ret) {
+	case 1:
+		break;
+	case 2 ... 4:
+		pr_info("Sub-NUMA Cluster mode detected with %d nodes per L3 cache\n", ret);
+		rdt_resources_all[RDT_RESOURCE_L3].r_resctrl.mon_scope = RESCTRL_L3_NODE;
+		break;
+	default:
+		pr_warn("Ignore improbable SNC node count %d\n", ret);
+		ret = 1;
+		break;
+	}
+
+	return ret;
+}
+
 int __init rdt_get_mon_l3_config(struct rdt_resource *r)
 {
 	unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
@@ -1115,6 +1179,8 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
 	unsigned int threshold;
 	int ret;
 
+	snc_nodes_per_l3_cache = snc_get_config();
+
 	resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024;
 	hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale / snc_nodes_per_l3_cache;
 	r->num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache;
-- 
2.45.0
Re: [PATCH v20 17/18] x86/resctrl: Sub-NUMA Cluster (SNC) detection
Posted by Markus Elfring 1 year, 7 months ago
…
> Add the missing definition of pr_fmt() to monitor.c. …

How do you think about to add the tag “Fixes” accordingly?


…
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
…
> +static __init int snc_get_config(void)
> +{
…
> +	cpus_read_lock();
> +	if (num_online_cpus() != num_present_cpus())
> +		pr_warn("Some CPUs offline, SNC detection may be incorrect\n");
> +	cpus_read_unlock();
…

Would you become interested to apply a statement like “guard(cpus_read_lock)();”?
https://elixir.bootlin.com/linux/v6.10-rc4/source/include/linux/cleanup.h#L133

Regards,
Markus
RE: [PATCH v20 17/18] x86/resctrl: Sub-NUMA Cluster (SNC) detection
Posted by Luck, Tony 1 year, 7 months ago
> > Add the missing definition of pr_fmt() to monitor.c. …
>
> How do you think about to add the tag “Fixes” accordingly?

Until this patch there were only "can't happen" pr_info()/pr_warn()
messages. So no real benefit from having this backported.

If it were to be backported, would need to split this out from the
rest of this patch as the rest of the changes are dependent on
on the previous 16 patches in this series.

> > +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> …
> > +static __init int snc_get_config(void)
> > +{
> …
> > +   cpus_read_lock();
> > +   if (num_online_cpus() != num_present_cpus())
> > +           pr_warn("Some CPUs offline, SNC detection may be incorrect\n");
> > +   cpus_read_unlock();
> …
>
> Would you become interested to apply a statement like “guard(cpus_read_lock)();”?
> https://elixir.bootlin.com/linux/v6.10-rc4/source/include/linux/cleanup.h#L133

IMHO it would be better to convert resctrl to using the cleanup.h helpers
as a separate series rather than having just one place use it.

-Tony
Re: [PATCH v20 17/18] x86/resctrl: Sub-NUMA Cluster (SNC) detection
Posted by Reinette Chatre 1 year, 7 months ago
Hi Tony,

On 6/10/24 11:35 AM, Tony Luck wrote:
> There isn't a simple hardware bit that indicates whether a CPU is
> running in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing
> the number of CPUs sharing the L3 cache with CPU0 to the number of CPUs in
> the same NUMA node as CPU0.
> 
> If SNC mode is detected, print a single informational message to the
> console.
> 
> Add the missing definition of pr_fmt() to monitor.c. This wasn't
> noticed before as there are only "can't happen" console messages
> from this file.
> 
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>   arch/x86/kernel/cpu/resctrl/monitor.c | 66 +++++++++++++++++++++++++++
>   1 file changed, 66 insertions(+)
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index efbb84c00d79..9835706ef772 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -15,6 +15,8 @@
>    * Software Developer Manual June 2016, volume 3, section 17.17.
>    */
>   
> +#define pr_fmt(fmt)	"resctrl: " fmt
> +
>   #include <linux/cpu.h>
>   #include <linux/module.h>
>   #include <linux/sizes.h>
> @@ -1108,6 +1110,68 @@ void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d)
>   	wrmsrl(MSR_RMID_SNC_CONFIG, val);
>   }
>   
> +/* CPU models that support MSR_RMID_SNC_CONFIG */
> +static const struct x86_cpu_id snc_cpu_ids[] __initconst = {
> +	X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
> +	X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, 0),
> +	X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, 0),
> +	X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, 0),
> +	X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, 0),
> +	{}
> +};
> +
> +/*
> + * There isn't a simple hardware bit that indicates whether a CPU is running
> + * in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing the
> + * number CPUs sharing the L3 cache with CPU0 to the number of CPUs in

"number CPUs sharing" -> "number of CPUs sharing"?

> + * the same NUMA node as CPU0.
> + * It is not possible to accurately determine SNC state if the system is
> + * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes
> + * to L3 caches. It will be OK if system is booted with hyperthreading
> + * disabled (since this doesn't affect the ratio).
> + */
> +static __init int snc_get_config(void)
> +{
> +	struct cacheinfo *ci = get_cpu_cacheinfo_level(0, RESCTRL_L3_CACHE);
> +	const cpumask_t *node0_cpumask;
> +	int cpus_per_node, cpus_per_l3;
> +	int ret;
> +
> +	if (!x86_match_cpu(snc_cpu_ids) || !ci)
> +		return 1;
> +
> +	cpus_read_lock();
> +	if (num_online_cpus() != num_present_cpus())
> +		pr_warn("Some CPUs offline, SNC detection may be incorrect\n");
> +	cpus_read_unlock();
> +
> +	node0_cpumask = cpumask_of_node(cpu_to_node(0));
> +
> +	cpus_per_node = cpumask_weight(node0_cpumask);
> +	cpus_per_l3 = cpumask_weight(&ci->shared_cpu_map);
> +
> +	if (!cpus_per_node || !cpus_per_l3)
> +		return 1;
> +
> +	ret = cpus_per_l3 / cpus_per_node;
> +
> +	/* sanity check: Only valid results are 1, 2, 3, 4 */
> +	switch (ret) {
> +	case 1:
> +		break;
> +	case 2 ... 4:
> +		pr_info("Sub-NUMA Cluster mode detected with %d nodes per L3 cache\n", ret);
> +		rdt_resources_all[RDT_RESOURCE_L3].r_resctrl.mon_scope = RESCTRL_L3_NODE;
> +		break;
> +	default:
> +		pr_warn("Ignore improbable SNC node count %d\n", ret);
> +		ret = 1;
> +		break;
> +	}
> +
> +	return ret;
> +}
> +
>   int __init rdt_get_mon_l3_config(struct rdt_resource *r)
>   {
>   	unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
> @@ -1115,6 +1179,8 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
>   	unsigned int threshold;
>   	int ret;
>   
> +	snc_nodes_per_l3_cache = snc_get_config();
> +
>   	resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024;
>   	hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale / snc_nodes_per_l3_cache;
>   	r->num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache;

With typo fixed:

| Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>

Reinette