[PATCH v2] x86/resctrl: Fix arch_mbm_* array overrun on SNC

Peter Newman posted 1 patch 1 year, 3 months ago
arch/x86/include/asm/resctrl.h     | 6 ------
arch/x86/kernel/cpu/resctrl/core.c | 8 ++++++++
include/linux/resctrl.h            | 1 +
3 files changed, 9 insertions(+), 6 deletions(-)
[PATCH v2] x86/resctrl: Fix arch_mbm_* array overrun on SNC
Posted by Peter Newman 1 year, 3 months ago
When using resctrl on systems with Sub-NUMA Clustering enabled,
monitoring groups may be allocated RMID values which would overrun the
arch_mbm_{local,total} arrays.

This is due to inconsistencies in whether the SNC-adjusted num_rmid
value or the unadjusted value in resctrl_arch_system_num_rmid_idx() is
used. The num_rmid value for the L3 resource is currently:

 resctrl_arch_system_num_rmid_idx() / snc_nodes_per_l3_cache

As a simple fix, make resctrl_arch_system_num_rmid_idx() return the
SNC-adjusted, L3 num_rmid value on x86.

Fixes: e13db55b5a0d ("x86/resctrl: Introduce snc_nodes_per_l3_cache")
Signed-off-by: Peter Newman <peternewman@google.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
---
v1->v2:
 - Squashed blank lines added to linux/resctrl.h
 - Reviewed-by Reinette
 
v1: https://lore.kernel.org/lkml/20240722204611.3549213-1-peternewman@google.com/

 arch/x86/include/asm/resctrl.h     | 6 ------
 arch/x86/kernel/cpu/resctrl/core.c | 8 ++++++++
 include/linux/resctrl.h            | 1 +
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 12dbd2588ca7..8b1b6ce1e51b 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -156,12 +156,6 @@ static inline void resctrl_sched_in(struct task_struct *tsk)
 		__resctrl_sched_in(tsk);
 }
 
-static inline u32 resctrl_arch_system_num_rmid_idx(void)
-{
-	/* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
-	return boot_cpu_data.x86_cache_max_rmid + 1;
-}
-
 static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
 {
 	*rmid = idx;
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 1930fce9dfe9..8591d53c144b 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -119,6 +119,14 @@ struct rdt_hw_resource rdt_resources_all[] = {
 	},
 };
 
+u32 resctrl_arch_system_num_rmid_idx(void)
+{
+	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+
+	/* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
+	return r->num_rmid;
+}
+
 /*
  * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
  * as they do not have CPUID enumeration support for Cache allocation.
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index b0875b99e811..d94abba1c716 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -248,6 +248,7 @@ struct resctrl_schema {
 
 /* The number of closid supported by this resource regardless of CDP */
 u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
+u32 resctrl_arch_system_num_rmid_idx(void);
 int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
 
 /*

base-commit: 47ac09b91befbb6a235ab620c32af719f8208399
-- 
2.46.0.295.g3b9ea8a38a-goog
Re: [PATCH v2] x86/resctrl: Fix arch_mbm_* array overrun on SNC
Posted by Reinette Chatre 1 year, 3 months ago
Thank you very much Peter.

Boris, could you please consider this for inclusion?

Thank you

Reinette

On 8/22/24 12:02 PM, Peter Newman wrote:
> When using resctrl on systems with Sub-NUMA Clustering enabled,
> monitoring groups may be allocated RMID values which would overrun the
> arch_mbm_{local,total} arrays.
> 
> This is due to inconsistencies in whether the SNC-adjusted num_rmid
> value or the unadjusted value in resctrl_arch_system_num_rmid_idx() is
> used. The num_rmid value for the L3 resource is currently:
> 
>   resctrl_arch_system_num_rmid_idx() / snc_nodes_per_l3_cache
> 
> As a simple fix, make resctrl_arch_system_num_rmid_idx() return the
> SNC-adjusted, L3 num_rmid value on x86.
> 
> Fixes: e13db55b5a0d ("x86/resctrl: Introduce snc_nodes_per_l3_cache")
> Signed-off-by: Peter Newman <peternewman@google.com>
> Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
> ---
> v1->v2:
>   - Squashed blank lines added to linux/resctrl.h
>   - Reviewed-by Reinette
>   
> v1: https://lore.kernel.org/lkml/20240722204611.3549213-1-peternewman@google.com/
> 
>   arch/x86/include/asm/resctrl.h     | 6 ------
>   arch/x86/kernel/cpu/resctrl/core.c | 8 ++++++++
>   include/linux/resctrl.h            | 1 +
>   3 files changed, 9 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
> index 12dbd2588ca7..8b1b6ce1e51b 100644
> --- a/arch/x86/include/asm/resctrl.h
> +++ b/arch/x86/include/asm/resctrl.h
> @@ -156,12 +156,6 @@ static inline void resctrl_sched_in(struct task_struct *tsk)
>   		__resctrl_sched_in(tsk);
>   }
>   
> -static inline u32 resctrl_arch_system_num_rmid_idx(void)
> -{
> -	/* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
> -	return boot_cpu_data.x86_cache_max_rmid + 1;
> -}
> -
>   static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
>   {
>   	*rmid = idx;
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index 1930fce9dfe9..8591d53c144b 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -119,6 +119,14 @@ struct rdt_hw_resource rdt_resources_all[] = {
>   	},
>   };
>   
> +u32 resctrl_arch_system_num_rmid_idx(void)
> +{
> +	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
> +
> +	/* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
> +	return r->num_rmid;
> +}
> +
>   /*
>    * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
>    * as they do not have CPUID enumeration support for Cache allocation.
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index b0875b99e811..d94abba1c716 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -248,6 +248,7 @@ struct resctrl_schema {
>   
>   /* The number of closid supported by this resource regardless of CDP */
>   u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
> +u32 resctrl_arch_system_num_rmid_idx(void);
>   int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
>   
>   /*
> 
> base-commit: 47ac09b91befbb6a235ab620c32af719f8208399
[tip: x86/urgent] x86/resctrl: Fix arch_mbm_* array overrun on SNC
Posted by tip-bot2 for Peter Newman 1 year, 3 months ago
The following commit has been merged into the x86/urgent branch of tip:

Commit-ID:     a547a5880cba6f287179135381f1b484b251be31
Gitweb:        https://git.kernel.org/tip/a547a5880cba6f287179135381f1b484b251be31
Author:        Peter Newman <peternewman@google.com>
AuthorDate:    Thu, 22 Aug 2024 12:02:11 -07:00
Committer:     Borislav Petkov (AMD) <bp@alien8.de>
CommitterDate: Wed, 28 Aug 2024 11:13:08 +02:00

x86/resctrl: Fix arch_mbm_* array overrun on SNC

When using resctrl on systems with Sub-NUMA Clustering enabled, monitoring
groups may be allocated RMID values which would overrun the
arch_mbm_{local,total} arrays.

This is due to inconsistencies in whether the SNC-adjusted num_rmid value or
the unadjusted value in resctrl_arch_system_num_rmid_idx() is used. The
num_rmid value for the L3 resource is currently:

  resctrl_arch_system_num_rmid_idx() / snc_nodes_per_l3_cache

As a simple fix, make resctrl_arch_system_num_rmid_idx() return the
SNC-adjusted, L3 num_rmid value on x86.

Fixes: e13db55b5a0d ("x86/resctrl: Introduce snc_nodes_per_l3_cache")
Signed-off-by: Peter Newman <peternewman@google.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/r/20240822190212.1848788-1-peternewman@google.com
---
 arch/x86/include/asm/resctrl.h     | 6 ------
 arch/x86/kernel/cpu/resctrl/core.c | 8 ++++++++
 include/linux/resctrl.h            | 1 +
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 12dbd25..8b1b6ce 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -156,12 +156,6 @@ static inline void resctrl_sched_in(struct task_struct *tsk)
 		__resctrl_sched_in(tsk);
 }
 
-static inline u32 resctrl_arch_system_num_rmid_idx(void)
-{
-	/* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
-	return boot_cpu_data.x86_cache_max_rmid + 1;
-}
-
 static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
 {
 	*rmid = idx;
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 1930fce..8591d53 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -119,6 +119,14 @@ struct rdt_hw_resource rdt_resources_all[] = {
 	},
 };
 
+u32 resctrl_arch_system_num_rmid_idx(void)
+{
+	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+
+	/* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
+	return r->num_rmid;
+}
+
 /*
  * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
  * as they do not have CPUID enumeration support for Cache allocation.
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index b0875b9..d94abba 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -248,6 +248,7 @@ struct resctrl_schema {
 
 /* The number of closid supported by this resource regardless of CDP */
 u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
+u32 resctrl_arch_system_num_rmid_idx(void);
 int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
 
 /*