[PATCH v1 5/6] x86/resctrl: Abstract PQR_ASSOC from generic code

Peter Newman posted 6 patches 1 year, 10 months ago
[PATCH v1 5/6] x86/resctrl: Abstract PQR_ASSOC from generic code
Posted by Peter Newman 1 year, 10 months ago
While CLOSID and RMID originated in RDT, the concept applies to other
architectures, as it's standard to write allocation and monitoring IDs
into per-CPU registers.

 - Rename resctrl_pqr_state and pqr_state to be more
   architecturally-neutral.

 - Introduce resctrl_arch_update_cpu() to replace the explicit write to
   MSR_IA32_PQR_ASSOC in __resctrl_sched_in(). In the case of MPAM,
   PARTID[_I,D] and PMG are a simple function of closid, rmid, and an
   internal global.

 - Update terminology containing explicit references to the PQR_ASSOC
   register.

Signed-off-by: Peter Newman <peternewman@google.com>
---
 arch/x86/kernel/cpu/resctrl/core.c        | 11 ++++++++---
 arch/x86/kernel/cpu/resctrl/internal.h    |  6 +++---
 arch/x86/kernel/cpu/resctrl/pseudo_lock.c |  4 ++--
 arch/x86/kernel/cpu/resctrl/rdtgroup.c    | 18 +++++++++---------
 include/linux/resctrl.h                   | 11 +++++++++++
 5 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index ae5878d748fc..4cc584754f8b 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -37,12 +37,12 @@
 static DEFINE_MUTEX(domain_list_lock);
 
 /*
- * The cached resctrl_pqr_state is strictly per CPU and can never be
+ * The cached resctrl_cpu_state is strictly per CPU and can never be
  * updated from a remote CPU. Functions which modify the state
  * are called with interrupts disabled and no preemption, which
  * is sufficient for the protection.
  */
-DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state);
+DEFINE_PER_CPU(struct resctrl_cpu_state, resctrl_state);
 
 /*
  * Used to store the max resource name width and max resource data width
@@ -309,6 +309,11 @@ static void rdt_get_cdp_l2_config(void)
 	rdt_get_cdp_config(RDT_RESOURCE_L2);
 }
 
+void resctrl_arch_update_cpu(u32 ctrl_id, u32 mon_id)
+{
+	wrmsr(MSR_IA32_PQR_ASSOC, mon_id, ctrl_id);
+}
+
 static void
 mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
 {
@@ -598,7 +603,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 
 static void clear_closid_rmid(int cpu)
 {
-	struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
+	struct resctrl_cpu_state *state = this_cpu_ptr(&resctrl_state);
 
 	state->default_group = &rdtgroup_default;
 	state->cur_closid = RESCTRL_RESERVED_CLOSID;
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 0ba0d2428780..e30f42744ac7 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -332,7 +332,7 @@ struct rftype {
 };
 
 /**
- * struct resctrl_pqr_state - State cache for the PQR MSR
+ * struct resctrl_cpu_state - State cache for allocation/monitoring group IDs
  * @cur_rmid:		The cached Resource Monitoring ID
  * @cur_closid:		The cached Class Of Service ID
  * @default_group:	The user assigned rdtgroup
@@ -340,13 +340,13 @@ struct rftype {
  * The cache also helps to avoid pointless updates if the value does
  * not change.
  */
-struct resctrl_pqr_state {
+struct resctrl_cpu_state {
 	u32			cur_rmid;
 	u32			cur_closid;
 	struct rdtgroup		*default_group;
 };
 
-DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);
+DECLARE_PER_CPU(struct resctrl_cpu_state, resctrl_state);
 
 /**
  * struct mbm_state - status for each MBM counter in each domain
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 884b88e25141..ca1805a566cb 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -480,8 +480,8 @@ static int pseudo_lock_fn(void *_rdtgrp)
 	 */
 	saved_msr = __rdmsr(MSR_MISC_FEATURE_CONTROL);
 	__wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
-	closid_p = this_cpu_read(pqr_state.cur_closid);
-	rmid_p = this_cpu_read(pqr_state.cur_rmid);
+	closid_p = this_cpu_read(resctrl_state.cur_closid);
+	rmid_p = this_cpu_read(resctrl_state.cur_rmid);
 	mem_r = plr->kmem;
 	size = plr->size;
 	line_size = plr->line_size;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index badf181c8cbb..bd067f7ed5b6 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -112,7 +112,7 @@ void rdt_staged_configs_clear(void)
  * + We can simply set current's closid to assign a task to a resource
  *   group.
  * + Context switch code can avoid extra memory references deciding which
- *   CLOSID to load into the PQR_ASSOC MSR
+ *   CLOSID to load into the CPU
  * - We give up some options in configuring resource groups across multi-socket
  *   systems.
  * - Our choices on how to configure each resource become progressively more
@@ -347,7 +347,7 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
  */
 void __resctrl_sched_in(struct task_struct *tsk)
 {
-	struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
+	struct resctrl_cpu_state *state = this_cpu_ptr(&resctrl_state);
 	u32 closid = state->cur_closid;
 	u32 rmid = state->cur_rmid;
 	struct rdtgroup *rgrp;
@@ -401,7 +401,7 @@ void __resctrl_sched_in(struct task_struct *tsk)
 	if (closid != state->cur_closid || rmid != state->cur_rmid) {
 		state->cur_closid = closid;
 		state->cur_rmid = rmid;
-		wrmsr(MSR_IA32_PQR_ASSOC, rmid, closid);
+		resctrl_arch_update_cpu(closid, rmid);
 	}
 }
 
@@ -416,7 +416,7 @@ static void update_cpu_closid_rmid(void *info)
 	struct rdtgroup *r = info;
 
 	if (r)
-		this_cpu_write(pqr_state.default_group, r);
+		this_cpu_write(resctrl_state.default_group, r);
 
 	/*
 	 * We cannot unconditionally write the MSR because the current
@@ -635,8 +635,8 @@ static void rdtgroup_remove(struct rdtgroup *rdtgrp)
 static void _update_task_closid_rmid(void *task)
 {
 	/*
-	 * If the task is still current on this CPU, update PQR_ASSOC MSR.
-	 * Otherwise, the MSR is updated when the task is scheduled in.
+	 * If the task is still current on this CPU, update the current ctrl
+	 * group. Otherwise, the CPU is updated when the task is scheduled in.
 	 */
 	if (task == current)
 		resctrl_sched_in(task);
@@ -3005,7 +3005,7 @@ static void rmdir_all_sub(void)
 		else
 			rdtgroup_remove(rdtgrp);
 	}
-	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
+	/* Update online CPUs to propagate group membership changes. */
 	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
 
 	kernfs_remove(kn_info);
@@ -3688,7 +3688,7 @@ static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
 
 	/* Update per cpu rmid of the moved CPUs first */
 	for_each_cpu(cpu, &rdtgrp->cpu_mask)
-		per_cpu(pqr_state.default_group, cpu) = prdtgrp;
+		per_cpu(resctrl_state.default_group, cpu) = prdtgrp;
 	/*
 	 * Update the MSR on moved CPUs and CPUs which have moved
 	 * task running on them.
@@ -3732,7 +3732,7 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
 
 	/* Update per cpu closid and rmid of the moved CPUs first */
 	for_each_cpu(cpu, &rdtgrp->cpu_mask)
-		per_cpu(pqr_state.default_group, cpu) = &rdtgroup_default;
+		per_cpu(resctrl_state.default_group, cpu) = &rdtgroup_default;
 
 	/*
 	 * Update the MSR on moved CPUs and CPUs which have moved
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index b2af1fbc7aa1..a6b1b13cc769 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -306,6 +306,17 @@ extern unsigned int resctrl_rmid_realloc_limit;
 
 DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
 
+/*
+ * resctrl_arch_update_cpu() - Make control and monitoring group IDs effective
+ *			       on the current CPU
+ *
+ * @ctrl_id:	An identifier for the control group which is to be used on the
+ *		current CPU.
+ * @mon_id:	An identifier for the monitoring group which is to be used on
+ *		the current CPU.
+ */
+void resctrl_arch_update_cpu(u32 ctrl_id, u32 mon_id);
+
 void __resctrl_sched_in(struct task_struct *tsk);
 
 /*
-- 
2.44.0.396.g6e790dbe36-goog
Re: [PATCH v1 5/6] x86/resctrl: Abstract PQR_ASSOC from generic code
Posted by Reinette Chatre 1 year, 10 months ago
Hi Peter,

On 3/25/2024 10:27 AM, Peter Newman wrote:
> While CLOSID and RMID originated in RDT, the concept applies to other
> architectures, as it's standard to write allocation and monitoring IDs
> into per-CPU registers.
> 
>  - Rename resctrl_pqr_state and pqr_state to be more
>    architecturally-neutral.
> 
>  - Introduce resctrl_arch_update_cpu() to replace the explicit write to
>    MSR_IA32_PQR_ASSOC in __resctrl_sched_in(). In the case of MPAM,
>    PARTID[_I,D] and PMG are a simple function of closid, rmid, and an
>    internal global.
> 
>  - Update terminology containing explicit references to the PQR_ASSOC
>    register.
> 
> Signed-off-by: Peter Newman <peternewman@google.com>
> ---
>  arch/x86/kernel/cpu/resctrl/core.c        | 11 ++++++++---
>  arch/x86/kernel/cpu/resctrl/internal.h    |  6 +++---
>  arch/x86/kernel/cpu/resctrl/pseudo_lock.c |  4 ++--
>  arch/x86/kernel/cpu/resctrl/rdtgroup.c    | 18 +++++++++---------
>  include/linux/resctrl.h                   | 11 +++++++++++
>  5 files changed, 33 insertions(+), 17 deletions(-)

Looks like __rdtgroup_move_task() still refers to the PQR MSR in comments
and it eventually ends up in fs code.

Also found in arch/x86/kernel/process_{32,64}.c:
	/* Load the Intel cache allocation PQR MSR. */

Reinette
Re: [PATCH v1 5/6] x86/resctrl: Abstract PQR_ASSOC from generic code
Posted by Reinette Chatre 1 year, 10 months ago
Hi Peter,

On 3/25/2024 10:27 AM, Peter Newman wrote:
> While CLOSID and RMID originated in RDT, the concept applies to other
> architectures, as it's standard to write allocation and monitoring IDs
> into per-CPU registers.
> 
>  - Rename resctrl_pqr_state and pqr_state to be more
>    architecturally-neutral.
> 
>  - Introduce resctrl_arch_update_cpu() to replace the explicit write to
>    MSR_IA32_PQR_ASSOC in __resctrl_sched_in(). In the case of MPAM,
>    PARTID[_I,D] and PMG are a simple function of closid, rmid, and an
>    internal global.
> 
>  - Update terminology containing explicit references to the PQR_ASSOC
>    register.

fyi ... I just noticed this while looking at the MPAM patches that there
is an instance in comments of update_closid_rmid() where this
was misspelled (PGR_ASSOC) and thus not picked up by your rename
script.

Reinette
Re: [PATCH v1 5/6] x86/resctrl: Abstract PQR_ASSOC from generic code
Posted by Reinette Chatre 1 year, 10 months ago
Hi Peter,

On 3/25/2024 10:27 AM, Peter Newman wrote:
> While CLOSID and RMID originated in RDT, the concept applies to other
> architectures, as it's standard to write allocation and monitoring IDs
> into per-CPU registers.
> 
>  - Rename resctrl_pqr_state and pqr_state to be more
>    architecturally-neutral.

I think it will be helpful to also introduce the PQR register, otherwise
it is not clear how this rename is motivated by the first paragraph.

> 
>  - Introduce resctrl_arch_update_cpu() to replace the explicit write to
>    MSR_IA32_PQR_ASSOC in __resctrl_sched_in(). In the case of MPAM,
>    PARTID[_I,D] and PMG are a simple function of closid, rmid, and an
>    internal global.

It is not obvious where this is going ... it sounds as though MPAM will
need three parameters for this function?

> 
>  - Update terminology containing explicit references to the PQR_ASSOC
>    register.
> 
> Signed-off-by: Peter Newman <peternewman@google.com>
> ---
>  arch/x86/kernel/cpu/resctrl/core.c        | 11 ++++++++---
>  arch/x86/kernel/cpu/resctrl/internal.h    |  6 +++---
>  arch/x86/kernel/cpu/resctrl/pseudo_lock.c |  4 ++--
>  arch/x86/kernel/cpu/resctrl/rdtgroup.c    | 18 +++++++++---------
>  include/linux/resctrl.h                   | 11 +++++++++++
>  5 files changed, 33 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index ae5878d748fc..4cc584754f8b 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -37,12 +37,12 @@
>  static DEFINE_MUTEX(domain_list_lock);
>  
>  /*
> - * The cached resctrl_pqr_state is strictly per CPU and can never be
> + * The cached resctrl_cpu_state is strictly per CPU and can never be
>   * updated from a remote CPU. Functions which modify the state
>   * are called with interrupts disabled and no preemption, which
>   * is sufficient for the protection.
>   */
> -DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state);
> +DEFINE_PER_CPU(struct resctrl_cpu_state, resctrl_state);
>  
>  /*
>   * Used to store the max resource name width and max resource data width
> @@ -309,6 +309,11 @@ static void rdt_get_cdp_l2_config(void)
>  	rdt_get_cdp_config(RDT_RESOURCE_L2);
>  }
>  
> +void resctrl_arch_update_cpu(u32 ctrl_id, u32 mon_id)

We already started using the names ctrl_hw_id and mon_hw_id. Could this be consistent
with that?

> +{
> +	wrmsr(MSR_IA32_PQR_ASSOC, mon_id, ctrl_id);
> +}
> +
>  static void
>  mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
>  {
> @@ -598,7 +603,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
>  
>  static void clear_closid_rmid(int cpu)
>  {
> -	struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
> +	struct resctrl_cpu_state *state = this_cpu_ptr(&resctrl_state);
>  
>  	state->default_group = &rdtgroup_default;
>  	state->cur_closid = RESCTRL_RESERVED_CLOSID;
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 0ba0d2428780..e30f42744ac7 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -332,7 +332,7 @@ struct rftype {
>  };
>  
>  /**
> - * struct resctrl_pqr_state - State cache for the PQR MSR
> + * struct resctrl_cpu_state - State cache for allocation/monitoring group IDs
>   * @cur_rmid:		The cached Resource Monitoring ID
>   * @cur_closid:		The cached Class Of Service ID
>   * @default_group:	The user assigned rdtgroup
> @@ -340,13 +340,13 @@ struct rftype {
>   * The cache also helps to avoid pointless updates if the value does
>   * not change.
>   */
> -struct resctrl_pqr_state {
> +struct resctrl_cpu_state {
>  	u32			cur_rmid;
>  	u32			cur_closid;
>  	struct rdtgroup		*default_group;
>  };
>  
> -DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);
> +DECLARE_PER_CPU(struct resctrl_cpu_state, resctrl_state);
>  
>  /**
>   * struct mbm_state - status for each MBM counter in each domain
> diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> index 884b88e25141..ca1805a566cb 100644
> --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> @@ -480,8 +480,8 @@ static int pseudo_lock_fn(void *_rdtgrp)
>  	 */
>  	saved_msr = __rdmsr(MSR_MISC_FEATURE_CONTROL);
>  	__wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
> -	closid_p = this_cpu_read(pqr_state.cur_closid);
> -	rmid_p = this_cpu_read(pqr_state.cur_rmid);
> +	closid_p = this_cpu_read(resctrl_state.cur_closid);
> +	rmid_p = this_cpu_read(resctrl_state.cur_rmid);
>  	mem_r = plr->kmem;
>  	size = plr->size;
>  	line_size = plr->line_size;
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index badf181c8cbb..bd067f7ed5b6 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -112,7 +112,7 @@ void rdt_staged_configs_clear(void)
>   * + We can simply set current's closid to assign a task to a resource
>   *   group.
>   * + Context switch code can avoid extra memory references deciding which
> - *   CLOSID to load into the PQR_ASSOC MSR
> + *   CLOSID to load into the CPU
>   * - We give up some options in configuring resource groups across multi-socket
>   *   systems.
>   * - Our choices on how to configure each resource become progressively more
> @@ -347,7 +347,7 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
>   */
>  void __resctrl_sched_in(struct task_struct *tsk)
>  {
> -	struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
> +	struct resctrl_cpu_state *state = this_cpu_ptr(&resctrl_state);
>  	u32 closid = state->cur_closid;
>  	u32 rmid = state->cur_rmid;
>  	struct rdtgroup *rgrp;
> @@ -401,7 +401,7 @@ void __resctrl_sched_in(struct task_struct *tsk)
>  	if (closid != state->cur_closid || rmid != state->cur_rmid) {
>  		state->cur_closid = closid;
>  		state->cur_rmid = rmid;
> -		wrmsr(MSR_IA32_PQR_ASSOC, rmid, closid);
> +		resctrl_arch_update_cpu(closid, rmid);
>  	}
>  }
>  
> @@ -416,7 +416,7 @@ static void update_cpu_closid_rmid(void *info)
>  	struct rdtgroup *r = info;
>  
>  	if (r)
> -		this_cpu_write(pqr_state.default_group, r);
> +		this_cpu_write(resctrl_state.default_group, r);
>  
>  	/*
>  	 * We cannot unconditionally write the MSR because the current
> @@ -635,8 +635,8 @@ static void rdtgroup_remove(struct rdtgroup *rdtgrp)
>  static void _update_task_closid_rmid(void *task)
>  {
>  	/*
> -	 * If the task is still current on this CPU, update PQR_ASSOC MSR.
> -	 * Otherwise, the MSR is updated when the task is scheduled in.
> +	 * If the task is still current on this CPU, update the current ctrl
> +	 * group. Otherwise, the CPU is updated when the task is scheduled in.

I think control and/or monitor group could be updated, not just control group?

>  	 */
>  	if (task == current)
>  		resctrl_sched_in(task);
> @@ -3005,7 +3005,7 @@ static void rmdir_all_sub(void)
>  		else
>  			rdtgroup_remove(rdtgrp);
>  	}
> -	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
> +	/* Update online CPUs to propagate group membership changes. */
>  	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
>  
>  	kernfs_remove(kn_info);
> @@ -3688,7 +3688,7 @@ static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
>  
>  	/* Update per cpu rmid of the moved CPUs first */
>  	for_each_cpu(cpu, &rdtgrp->cpu_mask)
> -		per_cpu(pqr_state.default_group, cpu) = prdtgrp;
> +		per_cpu(resctrl_state.default_group, cpu) = prdtgrp;
>  	/*
>  	 * Update the MSR on moved CPUs and CPUs which have moved
>  	 * task running on them.
> @@ -3732,7 +3732,7 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
>  
>  	/* Update per cpu closid and rmid of the moved CPUs first */
>  	for_each_cpu(cpu, &rdtgrp->cpu_mask)
> -		per_cpu(pqr_state.default_group, cpu) = &rdtgroup_default;
> +		per_cpu(resctrl_state.default_group, cpu) = &rdtgroup_default;
>  
>  	/*
>  	 * Update the MSR on moved CPUs and CPUs which have moved
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index b2af1fbc7aa1..a6b1b13cc769 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -306,6 +306,17 @@ extern unsigned int resctrl_rmid_realloc_limit;
>  
>  DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
>  
> +/*

For valid kerneldoc this needs to be /**

> + * resctrl_arch_update_cpu() - Make control and monitoring group IDs effective
> + *			       on the current CPU
> + *
> + * @ctrl_id:	An identifier for the control group which is to be used on the
> + *		current CPU.

Same comment here about naming.

> + * @mon_id:	An identifier for the monitoring group which is to be used on
> + *		the current CPU.
> + */
> +void resctrl_arch_update_cpu(u32 ctrl_id, u32 mon_id);
> +
>  void __resctrl_sched_in(struct task_struct *tsk);
>  
>  /*

Reinette