[PATCH v4 21/24] x86/resctrl: Allow overflow/limbo handlers to be scheduled on any-but cpu

James Morse posted 24 patches 2 years, 8 months ago
There is a newer version of this series
[PATCH v4 21/24] x86/resctrl: Allow overflow/limbo handlers to be scheduled on any-but cpu
Posted by James Morse 2 years, 8 months ago
When a CPU is taken offline resctrl may need to move the overflow or
limbo handlers to run on a different CPU.

Once the offline callbacks have been split, cqm_setup_limbo_handler()
will be called while the CPU that is going offline is still present
in the cpu_mask.

Pass the CPU to exclude to cqm_setup_limbo_handler() and
mbm_setup_overflow_handler(). These functions can use a variant of
cpumask_any_but() when selecting the CPU. -1 is used to indicate no CPUs
need excluding.

A subsequent patch moves these calls to be before CPUs have been removed,
so this exclude_cpus behaviour is temporary.

Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
Signed-off-by: James Morse <james.morse@arm.com>
---
Changes since v2:
 * Rephrased a comment to avoid a two letter bad-word. (we)
 * Avoid assigning mbm_work_cpu if the domain is going to be free()d
 * Added cpumask_any_housekeeping_but(), I dislike the name

Changes since v3:
 * Marked an explanatory comment as temporary as the subsequent patch is
   no longer adjacent.
---
 arch/x86/kernel/cpu/resctrl/core.c     |  8 +++--
 arch/x86/kernel/cpu/resctrl/internal.h | 37 +++++++++++++++++++++--
 arch/x86/kernel/cpu/resctrl/monitor.c  | 42 +++++++++++++++++++++-----
 arch/x86/kernel/cpu/resctrl/rdtgroup.c |  6 ++--
 include/linux/resctrl.h                |  3 ++
 5 files changed, 82 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index e00f3542e60e..187ed127a446 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -582,12 +582,16 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 	if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
 		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
 			cancel_delayed_work(&d->mbm_over);
-			mbm_setup_overflow_handler(d, 0);
+			/*
+			 * temporary: exclude_cpu=-1 as this CPU has already
+			 * been removed by cpumask_clear_cpu()d
+			 */
+			mbm_setup_overflow_handler(d, 0, RESCTRL_PICK_ANY_CPU);
 		}
 		if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
 		    has_busy_rmid(r, d)) {
 			cancel_delayed_work(&d->cqm_limbo);
-			cqm_setup_limbo_handler(d, 0);
+			cqm_setup_limbo_handler(d, 0, RESCTRL_PICK_ANY_CPU);
 		}
 	}
 }
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 021a8956518c..9cba8fc405b9 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -79,6 +79,37 @@ static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
 	return cpu;
 }
 
+/**
+ * cpumask_any_housekeeping_but() - Chose any cpu in @mask, preferring those
+ *			            that aren't marked nohz_full, excluding
+ *				    the provided CPU
+ * @mask:	The mask to pick a CPU from.
+ * @exclude_cpu:The CPU to avoid picking.
+ *
+ * Returns a CPU from @mask, but not @but. If there are housekeeping CPUs that
+ * don't use nohz_full, these are preferred.
+ * Returns >= nr_cpu_ids if no CPUs are available.
+ */
+static inline unsigned int
+cpumask_any_housekeeping_but(const struct cpumask *mask, int exclude_cpu)
+{
+	int cpu, hk_cpu;
+
+	cpu = cpumask_any_but(mask, exclude_cpu);
+	if (tick_nohz_full_cpu(cpu)) {
+		hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
+		if  (hk_cpu == exclude_cpu) {
+			hk_cpu = cpumask_nth_andnot(1, mask,
+						    tick_nohz_full_mask);
+		}
+
+		if (hk_cpu < nr_cpu_ids)
+			cpu = hk_cpu;
+	}
+
+	return cpu;
+}
+
 struct rdt_fs_context {
 	struct kernfs_fs_context	kfc;
 	bool				enable_cdpl2;
@@ -564,11 +595,13 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
 		    struct rdt_domain *d, struct rdtgroup *rdtgrp,
 		    int evtid, int first);
 void mbm_setup_overflow_handler(struct rdt_domain *dom,
-				unsigned long delay_ms);
+				unsigned long delay_ms,
+				int exclude_cpu);
 void mbm_handle_overflow(struct work_struct *work);
 void __init intel_rdt_mbm_apply_quirk(void);
 bool is_mba_sc(struct rdt_resource *r);
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+			     int exclude_cpu);
 void cqm_handle_limbo(struct work_struct *work);
 bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
 void __check_limbo(struct rdt_domain *d, bool force_free);
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index ced933694f60..ae02185f3354 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -485,7 +485,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
 		 * setup up the limbo worker.
 		 */
 		if (!has_busy_rmid(r, d))
-			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
+			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL, -1);
 		set_bit(idx, d->rmid_busy_llc);
 		entry->busy++;
 	}
@@ -810,15 +810,28 @@ void cqm_handle_limbo(struct work_struct *work)
 	mutex_unlock(&rdtgroup_mutex);
 }
 
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
+/**
+ * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
+ *                             domain.
+ * @delay_ms:      How far in the future the handler should run.
+ * @exclude_cpu:   Which CPU the handler should not run on, -1 to pick any CPU.
+ */
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+			     int exclude_cpu)
 {
 	unsigned long delay = msecs_to_jiffies(delay_ms);
 	int cpu;
 
-	cpu = cpumask_any_housekeeping(&dom->cpu_mask);
-	dom->cqm_work_cpu = cpu;
+	if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
+		cpu = cpumask_any_housekeeping(&dom->cpu_mask);
+	else
+		cpu = cpumask_any_housekeeping_but(&dom->cpu_mask,
+						   exclude_cpu);
 
-	schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
+	if (cpu < nr_cpu_ids) {
+		dom->cqm_work_cpu = cpu;
+		schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
+	}
 }
 
 void mbm_handle_overflow(struct work_struct *work)
@@ -864,7 +877,14 @@ void mbm_handle_overflow(struct work_struct *work)
 	mutex_unlock(&rdtgroup_mutex);
 }
 
-void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
+/**
+ * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
+ *                                domain.
+ * @delay_ms:      How far in the future the handler should run.
+ * @exclude_cpu:   Which CPU the handler should not run on, -1 to pick any CPU.
+ */
+void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
+				int exclude_cpu)
 {
 	unsigned long delay = msecs_to_jiffies(delay_ms);
 	int cpu;
@@ -875,9 +895,15 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
 	 */
 	if (!resctrl_mounted || !resctrl_arch_mon_capable())
 		return;
-	cpu = cpumask_any_housekeeping(&dom->cpu_mask);
+	if (exclude_cpu == -1)
+		cpu = cpumask_any_housekeeping(&dom->cpu_mask);
+	else
+		cpu = cpumask_any_housekeeping_but(&dom->cpu_mask,
+						   exclude_cpu);
 	dom->mbm_work_cpu = cpu;
-	schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
+
+	if (cpu < nr_cpu_ids)
+		schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
 }
 
 static int dom_data_init(struct rdt_resource *r)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 7c3de5ea0482..3373b11afe01 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2539,7 +2539,8 @@ static int rdt_get_tree(struct fs_context *fc)
 	if (is_mbm_enabled()) {
 		r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
 		list_for_each_entry(dom, &r->domains, list)
-			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
+			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
+						   RESCTRL_PICK_ANY_CPU);
 	}
 
 	goto out;
@@ -3709,7 +3710,8 @@ int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
 
 	if (is_mbm_enabled()) {
 		INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
-		mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL);
+		mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
+					   RESCTRL_PICK_ANY_CPU);
 	}
 
 	if (is_llc_occupancy_enabled())
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index ecd41762d61a..089b91133e5e 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -9,6 +9,9 @@
 /* CLOSID value used by the default control group */
 #define RESCTRL_RESERVED_CLOSID		0
 
+/* Indicates no CPU needs to be excluded */
+#define RESCTRL_PICK_ANY_CPU		-1
+
 #ifdef CONFIG_PROC_CPU_RESCTRL
 
 int proc_resctrl_show(struct seq_file *m,
-- 
2.39.2
Re: [PATCH v4 21/24] x86/resctrl: Allow overflow/limbo handlers to be scheduled on any-but cpu
Posted by Reinette Chatre 2 years, 7 months ago
Hi James,

On 5/25/2023 11:02 AM, James Morse wrote:

...

> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 021a8956518c..9cba8fc405b9 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -79,6 +79,37 @@ static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
>  	return cpu;
>  }
>  
> +/**
> + * cpumask_any_housekeeping_but() - Chose any cpu in @mask, preferring those
> + *			            that aren't marked nohz_full, excluding
> + *				    the provided CPU
> + * @mask:	The mask to pick a CPU from.
> + * @exclude_cpu:The CPU to avoid picking.
> + *
> + * Returns a CPU from @mask, but not @but. If there are housekeeping CPUs that

"but not @exclude_cpu"

> + * don't use nohz_full, these are preferred.
> + * Returns >= nr_cpu_ids if no CPUs are available.
> + */
> +static inline unsigned int
> +cpumask_any_housekeeping_but(const struct cpumask *mask, int exclude_cpu)
> +{
> +	int cpu, hk_cpu;

Should these be unsigned int?

> +
> +	cpu = cpumask_any_but(mask, exclude_cpu);
> +	if (tick_nohz_full_cpu(cpu)) {
> +		hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
> +		if  (hk_cpu == exclude_cpu) {
> +			hk_cpu = cpumask_nth_andnot(1, mask,
> +						    tick_nohz_full_mask);
> +		}
> +

These braces are not necessary. If they are added to help readability then
perhaps the indentation can be reduced by using an earlier:

	if (!tick_nohz_full_cpu(cpu))
		return cpu;


> +		if (hk_cpu < nr_cpu_ids)
> +			cpu = hk_cpu;
> +	}
> +
> +	return cpu;
> +}
> +
>  struct rdt_fs_context {
>  	struct kernfs_fs_context	kfc;
>  	bool				enable_cdpl2;
> @@ -564,11 +595,13 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
>  		    struct rdt_domain *d, struct rdtgroup *rdtgrp,
>  		    int evtid, int first);
>  void mbm_setup_overflow_handler(struct rdt_domain *dom,
> -				unsigned long delay_ms);
> +				unsigned long delay_ms,
> +				int exclude_cpu);
>  void mbm_handle_overflow(struct work_struct *work);
>  void __init intel_rdt_mbm_apply_quirk(void);
>  bool is_mba_sc(struct rdt_resource *r);
> -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
> +void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
> +			     int exclude_cpu);
>  void cqm_handle_limbo(struct work_struct *work);
>  bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
>  void __check_limbo(struct rdt_domain *d, bool force_free);
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index ced933694f60..ae02185f3354 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -485,7 +485,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
>  		 * setup up the limbo worker.
>  		 */
>  		if (!has_busy_rmid(r, d))
> -			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
> +			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL, -1);

Should this -1 be RESCTRL_PICK_ANY_CPU?

>  		set_bit(idx, d->rmid_busy_llc);
>  		entry->busy++;
>  	}
> @@ -810,15 +810,28 @@ void cqm_handle_limbo(struct work_struct *work)
>  	mutex_unlock(&rdtgroup_mutex);
>  }
>  
> -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
> +/**
> + * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
> + *                             domain.
> + * @delay_ms:      How far in the future the handler should run.
> + * @exclude_cpu:   Which CPU the handler should not run on, -1 to pick any CPU.

Should -1 be RESCTRL_PICK_ANY_CPU? 

> + */
> +void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
> +			     int exclude_cpu)
>  {
>  	unsigned long delay = msecs_to_jiffies(delay_ms);
>  	int cpu;
>  
> -	cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> -	dom->cqm_work_cpu = cpu;
> +	if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
> +		cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> +	else
> +		cpu = cpumask_any_housekeeping_but(&dom->cpu_mask,
> +						   exclude_cpu);
>  
> -	schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
> +	if (cpu < nr_cpu_ids) {
> +		dom->cqm_work_cpu = cpu;

Should cqm_work_cpu not perhaps be set to nr_cpu_ids on failure? If it keeps
pointing to CPU that ran worker previously there may be unexpected behavior. 

Note the different behavior between cqm_setup_limbo_handler() and
mbm_setup_overflow_handler() in this regard.

> +		schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
> +	}
>  }
>  
>  void mbm_handle_overflow(struct work_struct *work)
> @@ -864,7 +877,14 @@ void mbm_handle_overflow(struct work_struct *work)
>  	mutex_unlock(&rdtgroup_mutex);
>  }
>  
> -void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
> +/**
> + * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
> + *                                domain.
> + * @delay_ms:      How far in the future the handler should run.
> + * @exclude_cpu:   Which CPU the handler should not run on, -1 to pick any CPU.

RESCTRL_PICK_ANY_CPU?

> + */
> +void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
> +				int exclude_cpu)
>  {
>  	unsigned long delay = msecs_to_jiffies(delay_ms);
>  	int cpu;
> @@ -875,9 +895,15 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
>  	 */
>  	if (!resctrl_mounted || !resctrl_arch_mon_capable())
>  		return;
> -	cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> +	if (exclude_cpu == -1)

same

> +		cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> +	else
> +		cpu = cpumask_any_housekeeping_but(&dom->cpu_mask,
> +						   exclude_cpu);
>  	dom->mbm_work_cpu = cpu;
> -	schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
> +
> +	if (cpu < nr_cpu_ids)
> +		schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
>  }
>  

...

> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index ecd41762d61a..089b91133e5e 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -9,6 +9,9 @@
>  /* CLOSID value used by the default control group */
>  #define RESCTRL_RESERVED_CLOSID		0
>  
> +/* Indicates no CPU needs to be excluded */

This comment seems to just be a rewrite of the macro name.

> +#define RESCTRL_PICK_ANY_CPU		-1
> +
>  #ifdef CONFIG_PROC_CPU_RESCTRL
>  
>  int proc_resctrl_show(struct seq_file *m,

Reinette
Re: [PATCH v4 21/24] x86/resctrl: Allow overflow/limbo handlers to be scheduled on any-but cpu
Posted by James Morse 2 years, 6 months ago
Hi Reinette,

On 15/06/2023 23:25, Reinette Chatre wrote:
> On 5/25/2023 11:02 AM, James Morse wrote:
>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
>> index 021a8956518c..9cba8fc405b9 100644
>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>> @@ -79,6 +79,37 @@ static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
>>  	return cpu;
>>  }
>>  
>> +/**
>> + * cpumask_any_housekeeping_but() - Chose any cpu in @mask, preferring those
>> + *			            that aren't marked nohz_full, excluding
>> + *				    the provided CPU
>> + * @mask:	The mask to pick a CPU from.
>> + * @exclude_cpu:The CPU to avoid picking.
>> + *
>> + * Returns a CPU from @mask, but not @but. If there are housekeeping CPUs that
> 
> "but not @exclude_cpu"
> 
>> + * don't use nohz_full, these are preferred.
>> + * Returns >= nr_cpu_ids if no CPUs are available.
>> + */
>> +static inline unsigned int
>> +cpumask_any_housekeeping_but(const struct cpumask *mask, int exclude_cpu)
>> +{
>> +	int cpu, hk_cpu;
> 
> Should these be unsigned int?

Yup, fixed.


>> +
>> +	cpu = cpumask_any_but(mask, exclude_cpu);
>> +	if (tick_nohz_full_cpu(cpu)) {
>> +		hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
>> +		if  (hk_cpu == exclude_cpu) {
>> +			hk_cpu = cpumask_nth_andnot(1, mask,
>> +						    tick_nohz_full_mask);
>> +		}
>> +
> 
> These braces are not necessary.

My C parser is pretty dumb, and is easily confused by things like that....


> If they are added to help readability then
> perhaps the indentation can be reduced by using an earlier:
> 
> 	if (!tick_nohz_full_cpu(cpu))
> 		return cpu;

Even better!


>> +		if (hk_cpu < nr_cpu_ids)
>> +			cpu = hk_cpu;
>> +	}
>> +
>> +	return cpu;
>> +}
>> +
>>  struct rdt_fs_context {
>>  	struct kernfs_fs_context	kfc;
>>  	bool				enable_cdpl2;

>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
>> index ced933694f60..ae02185f3354 100644
>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>> @@ -485,7 +485,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
>>  		 * setup up the limbo worker.
>>  		 */
>>  		if (!has_busy_rmid(r, d))
>> -			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
>> +			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL, -1);
> 
> Should this -1 be RESCTRL_PICK_ANY_CPU?
> 
>>  		set_bit(idx, d->rmid_busy_llc);
>>  		entry->busy++;
>>  	}
>> @@ -810,15 +810,28 @@ void cqm_handle_limbo(struct work_struct *work)
>>  	mutex_unlock(&rdtgroup_mutex);
>>  }
>>  
>> -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
>> +/**
>> + * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
>> + *                             domain.
>> + * @delay_ms:      How far in the future the handler should run.
>> + * @exclude_cpu:   Which CPU the handler should not run on, -1 to pick any CPU.
> 
> Should -1 be RESCTRL_PICK_ANY_CPU? 
> 
>> + */
>> +void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
>> +			     int exclude_cpu)
>>  {
>>  	unsigned long delay = msecs_to_jiffies(delay_ms);
>>  	int cpu;
>>  
>> -	cpu = cpumask_any_housekeeping(&dom->cpu_mask);
>> -	dom->cqm_work_cpu = cpu;
>> +	if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
>> +		cpu = cpumask_any_housekeeping(&dom->cpu_mask);
>> +	else
>> +		cpu = cpumask_any_housekeeping_but(&dom->cpu_mask,
>> +						   exclude_cpu);
>>  
>> -	schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
>> +	if (cpu < nr_cpu_ids) {
>> +		dom->cqm_work_cpu = cpu;
> 
> Should cqm_work_cpu not perhaps be set to nr_cpu_ids on failure? If it keeps
> pointing to CPU that ran worker previously there may be unexpected behavior. 
> 
> Note the different behavior between cqm_setup_limbo_handler() and
> mbm_setup_overflow_handler() in this regard.

Sure,


>> +		schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
>> +	}
>>  }
>>  
>>  void mbm_handle_overflow(struct work_struct *work)

>> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
>> index ecd41762d61a..089b91133e5e 100644
>> --- a/include/linux/resctrl.h
>> +++ b/include/linux/resctrl.h
>> @@ -9,6 +9,9 @@
>>  /* CLOSID value used by the default control group */
>>  #define RESCTRL_RESERVED_CLOSID		0
>>  
>> +/* Indicates no CPU needs to be excluded */
> 
> This comment seems to just be a rewrite of the macro name.

I'm more than happy to remove it!



Thanks,

James
RE: [PATCH v4 21/24] x86/resctrl: Allow overflow/limbo handlers to be scheduled on any-but cpu
Posted by Shaopeng Tan (Fujitsu) 2 years, 8 months ago
Hi James,

> When a CPU is taken offline resctrl may need to move the overflow or limbo
> handlers to run on a different CPU.
> 
> Once the offline callbacks have been split, cqm_setup_limbo_handler() will be
> called while the CPU that is going offline is still present in the cpu_mask.
> 
> Pass the CPU to exclude to cqm_setup_limbo_handler() and
> mbm_setup_overflow_handler(). These functions can use a variant of
> cpumask_any_but() when selecting the CPU. -1 is used to indicate no CPUs
> need excluding.
> 
> A subsequent patch moves these calls to be before CPUs have been removed,
> so this exclude_cpus behaviour is temporary.
> 
> Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> Changes since v2:
>  * Rephrased a comment to avoid a two letter bad-word. (we)
>  * Avoid assigning mbm_work_cpu if the domain is going to be free()d
>  * Added cpumask_any_housekeeping_but(), I dislike the name
> 
> Changes since v3:
>  * Marked an explanatory comment as temporary as the subsequent patch is
>    no longer adjacent.
> ---
>  arch/x86/kernel/cpu/resctrl/core.c     |  8 +++--
>  arch/x86/kernel/cpu/resctrl/internal.h | 37
> +++++++++++++++++++++--  arch/x86/kernel/cpu/resctrl/monitor.c
> | 42 +++++++++++++++++++++-----
> arch/x86/kernel/cpu/resctrl/rdtgroup.c |  6 ++--
>  include/linux/resctrl.h                |  3 ++
>  5 files changed, 82 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c
> b/arch/x86/kernel/cpu/resctrl/core.c
> index e00f3542e60e..187ed127a446 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -582,12 +582,16 @@ static void domain_remove_cpu(int cpu, struct
> rdt_resource *r)
>  	if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
>  		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
>  			cancel_delayed_work(&d->mbm_over);
> -			mbm_setup_overflow_handler(d, 0);
> +			/*
> +			 * temporary: exclude_cpu=-1 as this CPU has
> already
> +			 * been removed by cpumask_clear_cpu()d
> +			 */
> +			mbm_setup_overflow_handler(d, 0,
> RESCTRL_PICK_ANY_CPU);
>  		}
>  		if (is_llc_occupancy_enabled() && cpu ==
> d->cqm_work_cpu &&
>  		    has_busy_rmid(r, d)) {
>  			cancel_delayed_work(&d->cqm_limbo);
> -			cqm_setup_limbo_handler(d, 0);
> +			cqm_setup_limbo_handler(d, 0,
> RESCTRL_PICK_ANY_CPU);
>  		}
>  	}
>  }
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h
> b/arch/x86/kernel/cpu/resctrl/internal.h
> index 021a8956518c..9cba8fc405b9 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -79,6 +79,37 @@ static inline unsigned int
> cpumask_any_housekeeping(const struct cpumask *mask)
>  	return cpu;
>  }
> 
> +/**
> + * cpumask_any_housekeeping_but() - Chose any cpu in @mask, preferring
> those
> + *			            that aren't marked nohz_full, excluding
> + *				    the provided CPU
> + * @mask:	The mask to pick a CPU from.
> + * @exclude_cpu:The CPU to avoid picking.
> + *
> + * Returns a CPU from @mask, but not @but. If there are housekeeping
> +CPUs that
> + * don't use nohz_full, these are preferred.
> + * Returns >= nr_cpu_ids if no CPUs are available.
> + */
> +static inline unsigned int
> +cpumask_any_housekeeping_but(const struct cpumask *mask, int
> +exclude_cpu) {
> +	int cpu, hk_cpu;
> +
> +	cpu = cpumask_any_but(mask, exclude_cpu);
> +	if (tick_nohz_full_cpu(cpu)) {
> +		hk_cpu = cpumask_nth_andnot(0, mask,
> tick_nohz_full_mask);
> +		if  (hk_cpu == exclude_cpu) {
> +			hk_cpu = cpumask_nth_andnot(1, mask,
> +						    tick_nohz_full_mask);
> +		}
> +
> +		if (hk_cpu < nr_cpu_ids)
> +			cpu = hk_cpu;
> +	}
> +
> +	return cpu;
> +}
> +
>  struct rdt_fs_context {
>  	struct kernfs_fs_context	kfc;
>  	bool				enable_cdpl2;
> @@ -564,11 +595,13 @@ void mon_event_read(struct rmid_read *rr, struct
> rdt_resource *r,
>  		    struct rdt_domain *d, struct rdtgroup *rdtgrp,
>  		    int evtid, int first);
>  void mbm_setup_overflow_handler(struct rdt_domain *dom,
> -				unsigned long delay_ms);
> +				unsigned long delay_ms,
> +				int exclude_cpu);
>  void mbm_handle_overflow(struct work_struct *work);  void __init
> intel_rdt_mbm_apply_quirk(void);  bool is_mba_sc(struct rdt_resource *r);
> -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long
> delay_ms);
> +void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long
> delay_ms,
> +			     int exclude_cpu);
>  void cqm_handle_limbo(struct work_struct *work);  bool
> has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);  void
> __check_limbo(struct rdt_domain *d, bool force_free); diff --git
> a/arch/x86/kernel/cpu/resctrl/monitor.c
> b/arch/x86/kernel/cpu/resctrl/monitor.c
> index ced933694f60..ae02185f3354 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -485,7 +485,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
>  		 * setup up the limbo worker.
>  		 */
>  		if (!has_busy_rmid(r, d))
> -			cqm_setup_limbo_handler(d,
> CQM_LIMBOCHECK_INTERVAL);
> +			cqm_setup_limbo_handler(d,
> CQM_LIMBOCHECK_INTERVAL, -1);
>  		set_bit(idx, d->rmid_busy_llc);
>  		entry->busy++;
>  	}
> @@ -810,15 +810,28 @@ void cqm_handle_limbo(struct work_struct *work)
>  	mutex_unlock(&rdtgroup_mutex);
>  }
> 
> -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long
> delay_ms)
> +/**
> + * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
> + *                             domain.
> + * @delay_ms:      How far in the future the handler should run.
> + * @exclude_cpu:   Which CPU the handler should not run on, -1 to pick any
> CPU.
> + */
> +void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long
> delay_ms,
> +			     int exclude_cpu)
>  {
>  	unsigned long delay = msecs_to_jiffies(delay_ms);
>  	int cpu;
> 
> -	cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> -	dom->cqm_work_cpu = cpu;
> +	if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
> +		cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> +	else
> +		cpu = cpumask_any_housekeeping_but(&dom->cpu_mask,
> +						   exclude_cpu);
> 
> -	schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
> +	if (cpu < nr_cpu_ids) {
> +		dom->cqm_work_cpu = cpu;
> +		schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
> +	}
>  }
> 
>  void mbm_handle_overflow(struct work_struct *work) @@ -864,7 +877,14
> @@ void mbm_handle_overflow(struct work_struct *work)
>  	mutex_unlock(&rdtgroup_mutex);
>  }
> 
> -void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long
> delay_ms)
> +/**
> + * mbm_setup_overflow_handler() - Schedule the overflow handler to run for
> this
> + *                                domain.
> + * @delay_ms:      How far in the future the handler should run.
> + * @exclude_cpu:   Which CPU the handler should not run on, -1 to pick any
> CPU.
> + */
> +void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long
> delay_ms,
> +				int exclude_cpu)
>  {
>  	unsigned long delay = msecs_to_jiffies(delay_ms);
>  	int cpu;
> @@ -875,9 +895,15 @@ void mbm_setup_overflow_handler(struct rdt_domain
> *dom, unsigned long delay_ms)
>  	 */
>  	if (!resctrl_mounted || !resctrl_arch_mon_capable())
>  		return;
> -	cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> +	if (exclude_cpu == -1)
> +		cpu = cpumask_any_housekeeping(&dom->cpu_mask);

Should RESCTRL_PICK_ANY_CPU be used instead of -1?

Best regards,
Shaopeng TAN
Re: [PATCH v4 21/24] x86/resctrl: Allow overflow/limbo handlers to be scheduled on any-but cpu
Posted by James Morse 2 years, 6 months ago
Hi Shaopeng Tan,

On 09/06/2023 12:10, Shaopeng Tan (Fujitsu) wrote:
>> When a CPU is taken offline resctrl may need to move the overflow or limbo
>> handlers to run on a different CPU.
>>
>> Once the offline callbacks have been split, cqm_setup_limbo_handler() will be
>> called while the CPU that is going offline is still present in the cpu_mask.
>>
>> Pass the CPU to exclude to cqm_setup_limbo_handler() and
>> mbm_setup_overflow_handler(). These functions can use a variant of
>> cpumask_any_but() when selecting the CPU. -1 is used to indicate no CPUs
>> need excluding.
>>
>> A subsequent patch moves these calls to be before CPUs have been removed,
>> so this exclude_cpus behaviour is temporary.

>> diff --git
>> a/arch/x86/kernel/cpu/resctrl/monitor.c
>> b/arch/x86/kernel/cpu/resctrl/monitor.c
>> index ced933694f60..ae02185f3354 100644
>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>> @@ -875,9 +895,15 @@ void mbm_setup_overflow_handler(struct rdt_domain
>> *dom, unsigned long delay_ms)
>>  	 */
>>  	if (!resctrl_mounted || !resctrl_arch_mon_capable())
>>  		return;
>> -	cpu = cpumask_any_housekeeping(&dom->cpu_mask);
>> +	if (exclude_cpu == -1)
>> +		cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> 
> Should RESCTRL_PICK_ANY_CPU be used instead of -1?

Yup, that would be more readable. I did this for cqm_setup_limbo_handler(), but for some
reason missed this one.


Thanks,

James