[PATCH v9 21/24] x86/resctrl: Allow overflow/limbo handlers to be scheduled on any-but cpu

James Morse posted 24 patches 1 year, 10 months ago
[PATCH v9 21/24] x86/resctrl: Allow overflow/limbo handlers to be scheduled on any-but cpu
Posted by James Morse 1 year, 10 months ago
When a CPU is taken offline resctrl may need to move the overflow or
limbo handlers to run on a different CPU.

Once the offline callbacks have been split, cqm_setup_limbo_handler()
will be called while the CPU that is going offline is still present
in the cpu_mask.

Pass the CPU to exclude to cqm_setup_limbo_handler() and
mbm_setup_overflow_handler(). These functions can use a variant of
cpumask_any_but() when selecting the CPU. -1 is used to indicate no CPUs
need excluding.

Signed-off-by: James Morse <james.morse@arm.com>
Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
Tested-by: Peter Newman <peternewman@google.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Carl Worth <carl@os.amperecomputing.com> # arm64
Reviewed-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
Reviewed-by: Babu Moger <babu.moger@amd.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
---
Changes since v2:
 * Rephrased a comment to avoid a two letter bad-word. (we)
 * Avoid assigning mbm_work_cpu if the domain is going to be free()d
 * Added cpumask_any_housekeeping_but(), I dislike the name

Changes since v3:
 * Marked an explanatory comment as temporary as the subsequent patch is
   no longer adjacent.

Changes since v4:
 * Check against RESCTRL_PICK_ANY_CPU instead of -1.
 * Leave cqm_work_cpu as nr_cpu_ids when no CPU is available.
 * Made cpumask_any_housekeeping_but() more readable.

Changes since v5:
 * Changes in captialisation, and a typo.
 * Merged cpumask helpers.

Changes since v6:
 * Added the missing dom parameter to some kernel doc.
 * Re-added use of cpumask_any_but(),
 * Expanded comment above cpumask_any_housekeeping(),
 * Added some more comments for good measure.
 * Added explicit IS_ENABLED() check as gcc-12 doesn't seem to work this out.

Changes since v7:
 * Removed the paragraph with the badword 'subsequent'.
 * Reworded a comment with the badword 'we'.
 * Fixed a bounds checking bug in cpumask_any_housekeeping().
---
 arch/x86/kernel/cpu/resctrl/core.c        |  8 +++--
 arch/x86/kernel/cpu/resctrl/ctrlmondata.c |  2 +-
 arch/x86/kernel/cpu/resctrl/internal.h    | 33 ++++++++++++++----
 arch/x86/kernel/cpu/resctrl/monitor.c     | 42 ++++++++++++++++++-----
 arch/x86/kernel/cpu/resctrl/rdtgroup.c    |  6 ++--
 include/linux/resctrl.h                   |  2 ++
 6 files changed, 72 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 4627d447bc3d..55322ba629da 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -584,12 +584,16 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 	if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
 		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
 			cancel_delayed_work(&d->mbm_over);
-			mbm_setup_overflow_handler(d, 0);
+			/*
+			 * temporary: exclude_cpu=-1 as this CPU has already
+			 * been removed by cpumask_clear_cpu()d
+			 */
+			mbm_setup_overflow_handler(d, 0, RESCTRL_PICK_ANY_CPU);
 		}
 		if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
 		    has_busy_rmid(d)) {
 			cancel_delayed_work(&d->cqm_limbo);
-			cqm_setup_limbo_handler(d, 0);
+			cqm_setup_limbo_handler(d, 0, RESCTRL_PICK_ANY_CPU);
 		}
 	}
 }
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index a033e8e32108..64db51455df3 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -552,7 +552,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
 		return;
 	}
 
-	cpu = cpumask_any_housekeeping(&d->cpu_mask);
+	cpu = cpumask_any_housekeeping(&d->cpu_mask, RESCTRL_PICK_ANY_CPU);
 
 	/*
 	 * cpumask_any_housekeeping() prefers housekeeping CPUs, but
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 3ee855c37447..c99f26ebe7a6 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -60,19 +60,36 @@
  * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that
  *			        aren't marked nohz_full
  * @mask:	The mask to pick a CPU from.
+ * @exclude_cpu:The CPU to avoid picking.
  *
- * Returns a CPU in @mask. If there are housekeeping CPUs that don't use
- * nohz_full, these are preferred.
+ * Returns a CPU from @mask, but not @exclude_cpu. If there are housekeeping
+ * CPUs that don't use nohz_full, these are preferred. Pass
+ * RESCTRL_PICK_ANY_CPU to avoid excluding any CPUs.
+ *
+ * When a CPU is excluded, returns >= nr_cpu_ids if no CPUs are available.
  */
-static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
+static inline unsigned int
+cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu)
 {
 	unsigned int cpu, hk_cpu;
 
-	cpu = cpumask_any(mask);
-	if (!tick_nohz_full_cpu(cpu))
+	if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
+		cpu = cpumask_any(mask);
+	else
+		cpu = cpumask_any_but(mask, exclude_cpu);
+
+	if (!IS_ENABLED(CONFIG_NO_HZ_FULL))
+		return cpu;
+
+	/* If the CPU picked isn't marked nohz_full nothing more needs doing. */
+	if (cpu < nr_cpu_ids && !tick_nohz_full_cpu(cpu))
 		return cpu;
 
+	/* Try to find a CPU that isn't nohz_full to use in preference */
 	hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
+	if (hk_cpu == exclude_cpu)
+		hk_cpu = cpumask_nth_andnot(1, mask, tick_nohz_full_mask);
+
 	if (hk_cpu < nr_cpu_ids)
 		cpu = hk_cpu;
 
@@ -573,11 +590,13 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
 		    struct rdt_domain *d, struct rdtgroup *rdtgrp,
 		    int evtid, int first);
 void mbm_setup_overflow_handler(struct rdt_domain *dom,
-				unsigned long delay_ms);
+				unsigned long delay_ms,
+				int exclude_cpu);
 void mbm_handle_overflow(struct work_struct *work);
 void __init intel_rdt_mbm_apply_quirk(void);
 bool is_mba_sc(struct rdt_resource *r);
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+			     int exclude_cpu);
 void cqm_handle_limbo(struct work_struct *work);
 bool has_busy_rmid(struct rdt_domain *d);
 void __check_limbo(struct rdt_domain *d, bool force_free);
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 92d7ba674003..67edd4c440f0 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -481,7 +481,8 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
 		 * setup up the limbo worker.
 		 */
 		if (!has_busy_rmid(d))
-			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
+			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
+						RESCTRL_PICK_ANY_CPU);
 		set_bit(idx, d->rmid_busy_llc);
 		entry->busy++;
 	}
@@ -784,7 +785,8 @@ void cqm_handle_limbo(struct work_struct *work)
 	__check_limbo(d, false);
 
 	if (has_busy_rmid(d)) {
-		d->cqm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask);
+		d->cqm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask,
+							   RESCTRL_PICK_ANY_CPU);
 		schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
 					 delay);
 	}
@@ -792,15 +794,25 @@ void cqm_handle_limbo(struct work_struct *work)
 	mutex_unlock(&rdtgroup_mutex);
 }
 
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
+/**
+ * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
+ *                             domain.
+ * @dom:           The domain the limbo handler should run for.
+ * @delay_ms:      How far in the future the handler should run.
+ * @exclude_cpu:   Which CPU the handler should not run on,
+ *		   RESCTRL_PICK_ANY_CPU to pick any CPU.
+ */
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+			     int exclude_cpu)
 {
 	unsigned long delay = msecs_to_jiffies(delay_ms);
 	int cpu;
 
-	cpu = cpumask_any_housekeeping(&dom->cpu_mask);
+	cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
 	dom->cqm_work_cpu = cpu;
 
-	schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
+	if (cpu < nr_cpu_ids)
+		schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
 }
 
 void mbm_handle_overflow(struct work_struct *work)
@@ -838,14 +850,24 @@ void mbm_handle_overflow(struct work_struct *work)
 	 * Re-check for housekeeping CPUs. This allows the overflow handler to
 	 * move off a nohz_full CPU quickly.
 	 */
-	d->mbm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask);
+	d->mbm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask,
+						   RESCTRL_PICK_ANY_CPU);
 	schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);
 
 out_unlock:
 	mutex_unlock(&rdtgroup_mutex);
 }
 
-void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
+/**
+ * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
+ *                                domain.
+ * @dom:           The domain the overflow handler should run for.
+ * @delay_ms:      How far in the future the handler should run.
+ * @exclude_cpu:   Which CPU the handler should not run on,
+ *		   RESCTRL_PICK_ANY_CPU to pick any CPU.
+ */
+void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
+				int exclude_cpu)
 {
 	unsigned long delay = msecs_to_jiffies(delay_ms);
 	int cpu;
@@ -856,9 +878,11 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
 	 */
 	if (!resctrl_mounted || !resctrl_arch_mon_capable())
 		return;
-	cpu = cpumask_any_housekeeping(&dom->cpu_mask);
+	cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
 	dom->mbm_work_cpu = cpu;
-	schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
+
+	if (cpu < nr_cpu_ids)
+		schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
 }
 
 static int dom_data_init(struct rdt_resource *r)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 38d3b19a3aca..f5688c79d94f 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2678,7 +2678,8 @@ static int rdt_get_tree(struct fs_context *fc)
 	if (is_mbm_enabled()) {
 		r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
 		list_for_each_entry(dom, &r->domains, list)
-			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
+			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
+						   RESCTRL_PICK_ANY_CPU);
 	}
 
 	goto out;
@@ -3989,7 +3990,8 @@ int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
 
 	if (is_mbm_enabled()) {
 		INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
-		mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL);
+		mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
+					   RESCTRL_PICK_ANY_CPU);
 	}
 
 	if (is_llc_occupancy_enabled())
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 4c4bad3c34e4..ccbbbe5d18d3 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -10,6 +10,8 @@
 #define RESCTRL_RESERVED_CLOSID		0
 #define RESCTRL_RESERVED_RMID		0
 
+#define RESCTRL_PICK_ANY_CPU		-1
+
 #ifdef CONFIG_PROC_CPU_RESCTRL
 
 int proc_resctrl_show(struct seq_file *m,
-- 
2.39.2
Re: [PATCH v9 21/24] x86/resctrl: Allow overflow/limbo handlers to be scheduled on any-but cpu
Posted by Tony Luck 1 year, 8 months ago
> -static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
> +static inline unsigned int
> +cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu)
>  {
>  	unsigned int cpu, hk_cpu;
>  
> -	cpu = cpumask_any(mask);
> -	if (!tick_nohz_full_cpu(cpu))
> +	if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
> +		cpu = cpumask_any(mask);
> +	else
> +		cpu = cpumask_any_but(mask, exclude_cpu);
> +
> +	if (!IS_ENABLED(CONFIG_NO_HZ_FULL))
> +		return cpu;
> +
> +	/* If the CPU picked isn't marked nohz_full nothing more needs doing. */
> +	if (cpu < nr_cpu_ids && !tick_nohz_full_cpu(cpu))
>  		return cpu;
>  
> +	/* Try to find a CPU that isn't nohz_full to use in preference */
>  	hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
> +	if (hk_cpu == exclude_cpu)
> +		hk_cpu = cpumask_nth_andnot(1, mask, tick_nohz_full_mask);

               OOPS HERE ^^^^^^^^^^^^^^^^^^
> +
>  	if (hk_cpu < nr_cpu_ids)
>  		cpu = hk_cpu;

James,

I'm playing around with v6.9-rc1. When I take all the CPUs in a domain
offline, resctrl wants to stop the mbm overflow handler. While doing
so it crashes with a NULL dereference in the code above.

At this point the cpu_mask for the domain still has the bit for
the last CPU standing in this domain. So cpumask_any_housekeeping()
is called with a mask that just has CPU 36 set, but request to find
a CPU other than 36.

-Tony

Heres the OOPs:

[   97.107355] smpboot: CPU 37 is now offline
[   97.166136] BUG: kernel NULL pointer dereference, address: 0000000000000000
[   97.173118] #PF: supervisor read access in kernel mode
[   97.178263] #PF: error_code(0x0000) - not-present page
[   97.183410] PGD 0
[   97.185438] Oops: 0000 [#1] PREEMPT SMP NOPTI
[   97.189805] CPU: 36 PID: 235 Comm: cpuhp/36 Tainted: G                T  6.9.0-rc1 #356
[   97.197810] Hardware name: Intel Corporation WilsonCity/WilsonCity, BIOS WLYDCRB1.SYS.0021.P06.2104260458 04/26/2021
[   97.208322] RIP: 0010:__find_nth_andnot_bit+0x66/0x110
[   97.213472] Code: d2 eb 20 48 29 c6 48 8d 41 c0 48 39 ca 0f 82 8b 00 00 00 48 01 f0 48 83 c1 40 48 39 d0 0f 83 9e 00 00 00 4d 89 d3 49 83 c2 01 <4b> 8b 3c d8 48 f7 d7 4b 23 3c d9 f3 48 0f b8 c7 48 39 c6 73 c4 4c
[   97.232214] RSP: 0018:ff462f11872bbd98 EFLAGS: 00010202
[   97.237440] RAX: 0000000000000000 RBX: 0000000000000024 RCX: 0000000000000080
[   97.244574] RDX: 0000000000000090 RSI: 0000000000000000 RDI: 0000000000000000
[   97.251705] RBP: ff43a0b049599018 R08: 0000000000000000 R09: ff43a0b049599018
[   97.258835] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000025
[   97.265970] R13: 0000000000000090 R14: 0000000000000000 R15: ffffffffaa068fb0
[   97.273100] FS:  0000000000000000(0000) GS:ff43a0b81d800000(0000) knlGS:0000000000000000
[   97.281187] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   97.286932] CR2: 0000000000000000 CR3: 0000000a50e3a004 CR4: 0000000000771ef0
[   97.294065] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   97.301197] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   97.308328] PKRU: 55555554
[   97.311039] Call Trace:
[   97.313492]  <TASK>
[   97.315600]  ? __die+0x1f/0x60
[   97.318665]  ? page_fault_oops+0x176/0x5a0
[   97.322777]  ? exc_page_fault+0x7f/0x260
[   97.326708]  ? asm_exc_page_fault+0x22/0x30
[   97.330903]  ? __pfx_resctrl_arch_offline_cpu+0x10/0x10
[   97.336140]  ? __find_nth_andnot_bit+0x66/0x110
[   97.340680]  ? __cancel_work+0x7d/0xc0
[   97.344438]  cpumask_any_housekeeping+0x55/0x110
[   97.349060]  mbm_setup_overflow_handler+0x40/0x70
[   97.353774]  resctrl_offline_cpu+0x101/0x110
[   97.358054]  resctrl_arch_offline_cpu+0x19/0x260
[   97.362673]  cpuhp_invoke_callback+0x156/0x6b0
[   97.367127]  ? cpuhp_thread_fun+0x5f/0x250
[   97.371234]  cpuhp_thread_fun+0x1ca/0x250
[   97.375247]  ? __pfx_smpboot_thread_fn+0x10/0x10
[   97.379874]  smpboot_thread_fn+0x184/0x220
[   97.383974]  kthread+0xe0/0x110
[   97.387129]  ? __pfx_kthread+0x10/0x10
[   97.390888]  ret_from_fork+0x2d/0x50
[   97.394467]  ? __pfx_kthread+0x10/0x10
[   97.398220]  ret_from_fork_asm+0x1a/0x30
[   97.402149]  </TASK>
[tip: x86/cache] x86/resctrl: Allow overflow/limbo handlers to be scheduled on any-but CPU
Posted by tip-bot2 for James Morse 1 year, 10 months ago
The following commit has been merged into the x86/cache branch of tip:

Commit-ID:     978fcca954cb52249babbc14e53de53c88dd6433
Gitweb:        https://git.kernel.org/tip/978fcca954cb52249babbc14e53de53c88dd6433
Author:        James Morse <james.morse@arm.com>
AuthorDate:    Tue, 13 Feb 2024 18:44:35 
Committer:     Borislav Petkov (AMD) <bp@alien8.de>
CommitterDate: Fri, 16 Feb 2024 19:18:33 +01:00

x86/resctrl: Allow overflow/limbo handlers to be scheduled on any-but CPU

When a CPU is taken offline resctrl may need to move the overflow or limbo
handlers to run on a different CPU.

Once the offline callbacks have been split, cqm_setup_limbo_handler() will be
called while the CPU that is going offline is still present in the CPU mask.

Pass the CPU to exclude to cqm_setup_limbo_handler() and
mbm_setup_overflow_handler(). These functions can use a variant of
cpumask_any_but() when selecting the CPU. -1 is used to indicate no CPUs need
excluding.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
Reviewed-by: Babu Moger <babu.moger@amd.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
Tested-by: Peter Newman <peternewman@google.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Carl Worth <carl@os.amperecomputing.com> # arm64
Link: https://lore.kernel.org/r/20240213184438.16675-22-james.morse@arm.com
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
---
 arch/x86/kernel/cpu/resctrl/core.c        |  8 +++-
 arch/x86/kernel/cpu/resctrl/ctrlmondata.c |  2 +-
 arch/x86/kernel/cpu/resctrl/internal.h    | 33 +++++++++++++----
 arch/x86/kernel/cpu/resctrl/monitor.c     | 42 +++++++++++++++++-----
 arch/x86/kernel/cpu/resctrl/rdtgroup.c    |  6 ++-
 include/linux/resctrl.h                   |  2 +-
 6 files changed, 72 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 4627d44..55322ba 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -584,12 +584,16 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 	if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
 		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
 			cancel_delayed_work(&d->mbm_over);
-			mbm_setup_overflow_handler(d, 0);
+			/*
+			 * temporary: exclude_cpu=-1 as this CPU has already
+			 * been removed by cpumask_clear_cpu()d
+			 */
+			mbm_setup_overflow_handler(d, 0, RESCTRL_PICK_ANY_CPU);
 		}
 		if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
 		    has_busy_rmid(d)) {
 			cancel_delayed_work(&d->cqm_limbo);
-			cqm_setup_limbo_handler(d, 0);
+			cqm_setup_limbo_handler(d, 0, RESCTRL_PICK_ANY_CPU);
 		}
 	}
 }
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 52fa0e1..20b02d6 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -552,7 +552,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
 		return;
 	}
 
-	cpu = cpumask_any_housekeeping(&d->cpu_mask);
+	cpu = cpumask_any_housekeeping(&d->cpu_mask, RESCTRL_PICK_ANY_CPU);
 
 	/*
 	 * cpumask_any_housekeeping() prefers housekeeping CPUs, but
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 3ee855c..c99f26e 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -60,19 +60,36 @@
  * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that
  *			        aren't marked nohz_full
  * @mask:	The mask to pick a CPU from.
+ * @exclude_cpu:The CPU to avoid picking.
  *
- * Returns a CPU in @mask. If there are housekeeping CPUs that don't use
- * nohz_full, these are preferred.
+ * Returns a CPU from @mask, but not @exclude_cpu. If there are housekeeping
+ * CPUs that don't use nohz_full, these are preferred. Pass
+ * RESCTRL_PICK_ANY_CPU to avoid excluding any CPUs.
+ *
+ * When a CPU is excluded, returns >= nr_cpu_ids if no CPUs are available.
  */
-static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
+static inline unsigned int
+cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu)
 {
 	unsigned int cpu, hk_cpu;
 
-	cpu = cpumask_any(mask);
-	if (!tick_nohz_full_cpu(cpu))
+	if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
+		cpu = cpumask_any(mask);
+	else
+		cpu = cpumask_any_but(mask, exclude_cpu);
+
+	if (!IS_ENABLED(CONFIG_NO_HZ_FULL))
 		return cpu;
 
+	/* If the CPU picked isn't marked nohz_full nothing more needs doing. */
+	if (cpu < nr_cpu_ids && !tick_nohz_full_cpu(cpu))
+		return cpu;
+
+	/* Try to find a CPU that isn't nohz_full to use in preference */
 	hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
+	if (hk_cpu == exclude_cpu)
+		hk_cpu = cpumask_nth_andnot(1, mask, tick_nohz_full_mask);
+
 	if (hk_cpu < nr_cpu_ids)
 		cpu = hk_cpu;
 
@@ -573,11 +590,13 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
 		    struct rdt_domain *d, struct rdtgroup *rdtgrp,
 		    int evtid, int first);
 void mbm_setup_overflow_handler(struct rdt_domain *dom,
-				unsigned long delay_ms);
+				unsigned long delay_ms,
+				int exclude_cpu);
 void mbm_handle_overflow(struct work_struct *work);
 void __init intel_rdt_mbm_apply_quirk(void);
 bool is_mba_sc(struct rdt_resource *r);
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+			     int exclude_cpu);
 void cqm_handle_limbo(struct work_struct *work);
 bool has_busy_rmid(struct rdt_domain *d);
 void __check_limbo(struct rdt_domain *d, bool force_free);
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 92d7ba6..67edd4c 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -481,7 +481,8 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
 		 * setup up the limbo worker.
 		 */
 		if (!has_busy_rmid(d))
-			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
+			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
+						RESCTRL_PICK_ANY_CPU);
 		set_bit(idx, d->rmid_busy_llc);
 		entry->busy++;
 	}
@@ -784,7 +785,8 @@ void cqm_handle_limbo(struct work_struct *work)
 	__check_limbo(d, false);
 
 	if (has_busy_rmid(d)) {
-		d->cqm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask);
+		d->cqm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask,
+							   RESCTRL_PICK_ANY_CPU);
 		schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
 					 delay);
 	}
@@ -792,15 +794,25 @@ void cqm_handle_limbo(struct work_struct *work)
 	mutex_unlock(&rdtgroup_mutex);
 }
 
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
+/**
+ * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
+ *                             domain.
+ * @dom:           The domain the limbo handler should run for.
+ * @delay_ms:      How far in the future the handler should run.
+ * @exclude_cpu:   Which CPU the handler should not run on,
+ *		   RESCTRL_PICK_ANY_CPU to pick any CPU.
+ */
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+			     int exclude_cpu)
 {
 	unsigned long delay = msecs_to_jiffies(delay_ms);
 	int cpu;
 
-	cpu = cpumask_any_housekeeping(&dom->cpu_mask);
+	cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
 	dom->cqm_work_cpu = cpu;
 
-	schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
+	if (cpu < nr_cpu_ids)
+		schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
 }
 
 void mbm_handle_overflow(struct work_struct *work)
@@ -838,14 +850,24 @@ void mbm_handle_overflow(struct work_struct *work)
 	 * Re-check for housekeeping CPUs. This allows the overflow handler to
 	 * move off a nohz_full CPU quickly.
 	 */
-	d->mbm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask);
+	d->mbm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask,
+						   RESCTRL_PICK_ANY_CPU);
 	schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);
 
 out_unlock:
 	mutex_unlock(&rdtgroup_mutex);
 }
 
-void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
+/**
+ * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
+ *                                domain.
+ * @dom:           The domain the overflow handler should run for.
+ * @delay_ms:      How far in the future the handler should run.
+ * @exclude_cpu:   Which CPU the handler should not run on,
+ *		   RESCTRL_PICK_ANY_CPU to pick any CPU.
+ */
+void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
+				int exclude_cpu)
 {
 	unsigned long delay = msecs_to_jiffies(delay_ms);
 	int cpu;
@@ -856,9 +878,11 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
 	 */
 	if (!resctrl_mounted || !resctrl_arch_mon_capable())
 		return;
-	cpu = cpumask_any_housekeeping(&dom->cpu_mask);
+	cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
 	dom->mbm_work_cpu = cpu;
-	schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
+
+	if (cpu < nr_cpu_ids)
+		schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
 }
 
 static int dom_data_init(struct rdt_resource *r)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 38d3b19..f5688c7 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2678,7 +2678,8 @@ static int rdt_get_tree(struct fs_context *fc)
 	if (is_mbm_enabled()) {
 		r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
 		list_for_each_entry(dom, &r->domains, list)
-			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
+			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
+						   RESCTRL_PICK_ANY_CPU);
 	}
 
 	goto out;
@@ -3989,7 +3990,8 @@ int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
 
 	if (is_mbm_enabled()) {
 		INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
-		mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL);
+		mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
+					   RESCTRL_PICK_ANY_CPU);
 	}
 
 	if (is_llc_occupancy_enabled())
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 4c4bad3..ccbbbe5 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -10,6 +10,8 @@
 #define RESCTRL_RESERVED_CLOSID		0
 #define RESCTRL_RESERVED_RMID		0
 
+#define RESCTRL_PICK_ANY_CPU		-1
+
 #ifdef CONFIG_PROC_CPU_RESCTRL
 
 int proc_resctrl_show(struct seq_file *m,