[PATCH 14/33] cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset

Frederic Weisbecker posted 33 patches 1 month ago
[PATCH 14/33] cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset
Posted by Frederic Weisbecker 1 month ago
Until now, HK_TYPE_DOMAIN used to only include boot defined isolated
CPUs passed through isolcpus= boot option. Users interested in also
knowing the runtime defined isolated CPUs through cpuset must use
different APIs: cpuset_cpu_is_isolated(), cpu_is_isolated(), etc...

There are many drawbacks to that approach:

1) Most interested subsystems want to know about all isolated CPUs, not
  just those defined on boot time.

2) cpuset_cpu_is_isolated() / cpu_is_isolated() are not synchronized with
  concurrent cpuset changes.

3) Further cpuset modifications are not propagated to subsystems

Solve 1) and 2) and centralize all isolated CPUs within the
HK_TYPE_DOMAIN housekeeping cpumask.

Subsystems can rely on RCU to synchronize against concurrent changes.

The propagation mentioned in 3) will be handled in further patches.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
 include/linux/sched/isolation.h |  4 +-
 kernel/cgroup/cpuset.c          |  2 +
 kernel/sched/isolation.c        | 65 ++++++++++++++++++++++++++++++---
 kernel/sched/sched.h            |  1 +
 4 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index 9262378760b1..199d0fc4646f 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -36,12 +36,13 @@ extern bool housekeeping_test_cpu(int cpu, enum hk_type type);
 
 static inline bool housekeeping_cpu(int cpu, enum hk_type type)
 {
-	if (housekeeping_flags & BIT(type))
+	if (READ_ONCE(housekeeping_flags) & BIT(type))
 		return housekeeping_test_cpu(cpu, type);
 	else
 		return true;
 }
 
+extern int housekeeping_update(struct cpumask *mask, enum hk_type type);
 extern void __init housekeeping_init(void);
 
 #else
@@ -74,6 +75,7 @@ static inline bool housekeeping_cpu(int cpu, enum hk_type type)
 	return true;
 }
 
+static inline int housekeeping_update(struct cpumask *mask, enum hk_type type) { return 0; }
 static inline void housekeeping_init(void) { }
 #endif /* CONFIG_CPU_ISOLATION */
 
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 2d2fc74bc00c..4f2bc68332a7 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1351,6 +1351,8 @@ static void update_unbound_workqueue_cpumask(bool isolcpus_updated)
 
 	ret = workqueue_unbound_exclude_cpumask(isolated_cpus);
 	WARN_ON_ONCE(ret < 0);
+	ret = housekeeping_update(isolated_cpus, HK_TYPE_DOMAIN);
+	WARN_ON_ONCE(ret < 0);
 }
 
 /**
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 5ddb8dc5ca91..48f3b6b20604 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -23,16 +23,39 @@ EXPORT_SYMBOL_GPL(housekeeping_flags);
 
 bool housekeeping_enabled(enum hk_type type)
 {
-	return !!(housekeeping_flags & BIT(type));
+	return !!(READ_ONCE(housekeeping_flags) & BIT(type));
 }
 EXPORT_SYMBOL_GPL(housekeeping_enabled);
 
+static bool housekeeping_dereference_check(enum hk_type type)
+{
+	if (type == HK_TYPE_DOMAIN) {
+		if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_write_held())
+			return true;
+		if (IS_ENABLED(CONFIG_CPUSETS) && lockdep_is_cpuset_held())
+			return true;
+
+		return false;
+	}
+
+	return true;
+}
+
+static inline struct cpumask *__housekeeping_cpumask(enum hk_type type)
+{
+	return rcu_dereference_check(housekeeping_cpumasks[type],
+				     housekeeping_dereference_check(type));
+}
+
 const struct cpumask *housekeeping_cpumask(enum hk_type type)
 {
-	if (housekeeping_flags & BIT(type)) {
-		return rcu_dereference_check(housekeeping_cpumasks[type], 1);
-	}
-	return cpu_possible_mask;
+	const struct cpumask *mask = NULL;
+
+	if (READ_ONCE(housekeeping_flags) & BIT(type))
+		mask = __housekeeping_cpumask(type);
+	if (!mask)
+		mask = cpu_possible_mask;
+	return mask;
 }
 EXPORT_SYMBOL_GPL(housekeeping_cpumask);
 
@@ -70,12 +93,42 @@ EXPORT_SYMBOL_GPL(housekeeping_affine);
 
 bool housekeeping_test_cpu(int cpu, enum hk_type type)
 {
-	if (housekeeping_flags & BIT(type))
+	if (READ_ONCE(housekeeping_flags) & BIT(type))
 		return cpumask_test_cpu(cpu, housekeeping_cpumask(type));
 	return true;
 }
 EXPORT_SYMBOL_GPL(housekeeping_test_cpu);
 
+int housekeeping_update(struct cpumask *mask, enum hk_type type)
+{
+	struct cpumask *trial, *old = NULL;
+
+	if (type != HK_TYPE_DOMAIN)
+		return -ENOTSUPP;
+
+	trial = kmalloc(sizeof(*trial), GFP_KERNEL);
+	if (!trial)
+		return -ENOMEM;
+
+	cpumask_andnot(trial, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT), mask);
+	if (!cpumask_intersects(trial, cpu_online_mask)) {
+		kfree(trial);
+		return -EINVAL;
+	}
+
+	if (housekeeping_flags & BIT(type))
+		old = __housekeeping_cpumask(type);
+	else
+		WRITE_ONCE(housekeeping_flags, housekeeping_flags | BIT(type));
+	rcu_assign_pointer(housekeeping_cpumasks[type], trial);
+
+	synchronize_rcu();
+
+	kfree(old);
+
+	return 0;
+}
+
 void __init housekeeping_init(void)
 {
 	enum hk_type type;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0b1a233dcabf..d3512138027b 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -30,6 +30,7 @@
 #include <linux/context_tracking.h>
 #include <linux/cpufreq.h>
 #include <linux/cpumask_api.h>
+#include <linux/cpuset.h>
 #include <linux/ctype.h>
 #include <linux/file.h>
 #include <linux/fs_api.h>
-- 
2.51.0
Re: [PATCH 14/33] cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset
Posted by Waiman Long 1 month ago
On 8/29/25 11:47 AM, Frederic Weisbecker wrote:
> Until now, HK_TYPE_DOMAIN used to only include boot defined isolated
> CPUs passed through isolcpus= boot option. Users interested in also
> knowing the runtime defined isolated CPUs through cpuset must use
> different APIs: cpuset_cpu_is_isolated(), cpu_is_isolated(), etc...
>
> There are many drawbacks to that approach:
>
> 1) Most interested subsystems want to know about all isolated CPUs, not
>    just those defined on boot time.
>
> 2) cpuset_cpu_is_isolated() / cpu_is_isolated() are not synchronized with
>    concurrent cpuset changes.
>
> 3) Further cpuset modifications are not propagated to subsystems
>
> Solve 1) and 2) and centralize all isolated CPUs within the
> HK_TYPE_DOMAIN housekeeping cpumask.
>
> Subsystems can rely on RCU to synchronize against concurrent changes.
>
> The propagation mentioned in 3) will be handled in further patches.
>
> Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
> ---
>   include/linux/sched/isolation.h |  4 +-
>   kernel/cgroup/cpuset.c          |  2 +
>   kernel/sched/isolation.c        | 65 ++++++++++++++++++++++++++++++---
>   kernel/sched/sched.h            |  1 +
>   4 files changed, 65 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
> index 5ddb8dc5ca91..48f3b6b20604 100644
> --- a/kernel/sched/isolation.c
> +++ b/kernel/sched/isolation.c
> @@ -23,16 +23,39 @@ EXPORT_SYMBOL_GPL(housekeeping_flags);
>   
>   bool housekeeping_enabled(enum hk_type type)
>   {
> -	return !!(housekeeping_flags & BIT(type));
> +	return !!(READ_ONCE(housekeeping_flags) & BIT(type));
>   }
>   EXPORT_SYMBOL_GPL(housekeeping_enabled);
>   
> +static bool housekeeping_dereference_check(enum hk_type type)
> +{
> +	if (type == HK_TYPE_DOMAIN) {
> +		if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_write_held())
> +			return true;
> +		if (IS_ENABLED(CONFIG_CPUSETS) && lockdep_is_cpuset_held())
> +			return true;
> +
> +		return false;
> +	}
> +
> +	return true;
> +}

Both lockdep_is_cpuset_held() and lockdep_is_cpus_write_held() may be 
defined only if CONFIG_LOCKDEP is set. However, this function is 
currently referenced by __housekeeping_cpumask() via RCU_LOCKDEP_WARN(). 
So it is not invoked if CONFIG_LOCKDEP is not set. You are assuming that 
static function not referenced is not being compiled into the object 
file. Should we bracket it with "ifdef CONFIG_LOCKDEP" just to make this 
clear?


> +
> +static inline struct cpumask *__housekeeping_cpumask(enum hk_type type)
> +{
> +	return rcu_dereference_check(housekeeping_cpumasks[type],
> +				     housekeeping_dereference_check(type));
> +}
> +
>   const struct cpumask *housekeeping_cpumask(enum hk_type type)
>   {
> -	if (housekeeping_flags & BIT(type)) {
> -		return rcu_dereference_check(housekeeping_cpumasks[type], 1);
> -	}
> -	return cpu_possible_mask;
> +	const struct cpumask *mask = NULL;
> +
> +	if (READ_ONCE(housekeeping_flags) & BIT(type))
> +		mask = __housekeeping_cpumask(type);
> +	if (!mask)
> +		mask = cpu_possible_mask;
> +	return mask;
>   }
>   EXPORT_SYMBOL_GPL(housekeeping_cpumask);
>   
> @@ -70,12 +93,42 @@ EXPORT_SYMBOL_GPL(housekeeping_affine);
>   
>   bool housekeeping_test_cpu(int cpu, enum hk_type type)
>   {
> -	if (housekeeping_flags & BIT(type))
> +	if (READ_ONCE(housekeeping_flags) & BIT(type))
>   		return cpumask_test_cpu(cpu, housekeeping_cpumask(type));
>   	return true;
>   }
>   EXPORT_SYMBOL_GPL(housekeeping_test_cpu);
>   
> +int housekeeping_update(struct cpumask *mask, enum hk_type type)
> +{
> +	struct cpumask *trial, *old = NULL;
> +
> +	if (type != HK_TYPE_DOMAIN)
> +		return -ENOTSUPP;
> +
> +	trial = kmalloc(sizeof(*trial), GFP_KERNEL);
> +	if (!trial)
> +		return -ENOMEM;
> +
> +	cpumask_andnot(trial, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT), mask);
> +	if (!cpumask_intersects(trial, cpu_online_mask)) {
> +		kfree(trial);
> +		return -EINVAL;
> +	}
> +
> +	if (housekeeping_flags & BIT(type))
> +		old = __housekeeping_cpumask(type);
> +	else
> +		WRITE_ONCE(housekeeping_flags, housekeeping_flags | BIT(type));

Should we use to READ_ONCE() to retrieve the current housekeeping_flags 
value?

Cheers,
Longman
Re: [PATCH 14/33] cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset
Posted by Frederic Weisbecker 1 week, 3 days ago
Le Sun, Aug 31, 2025 at 08:40:36PM -0400, Waiman Long a écrit :
> On 8/29/25 11:47 AM, Frederic Weisbecker wrote:
> > Until now, HK_TYPE_DOMAIN used to only include boot defined isolated
> > CPUs passed through isolcpus= boot option. Users interested in also
> > knowing the runtime defined isolated CPUs through cpuset must use
> > different APIs: cpuset_cpu_is_isolated(), cpu_is_isolated(), etc...
> > 
> > There are many drawbacks to that approach:
> > 
> > 1) Most interested subsystems want to know about all isolated CPUs, not
> >    just those defined on boot time.
> > 
> > 2) cpuset_cpu_is_isolated() / cpu_is_isolated() are not synchronized with
> >    concurrent cpuset changes.
> > 
> > 3) Further cpuset modifications are not propagated to subsystems
> > 
> > Solve 1) and 2) and centralize all isolated CPUs within the
> > HK_TYPE_DOMAIN housekeeping cpumask.
> > 
> > Subsystems can rely on RCU to synchronize against concurrent changes.
> > 
> > The propagation mentioned in 3) will be handled in further patches.
> > 
> > Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
> > ---
> >   include/linux/sched/isolation.h |  4 +-
> >   kernel/cgroup/cpuset.c          |  2 +
> >   kernel/sched/isolation.c        | 65 ++++++++++++++++++++++++++++++---
> >   kernel/sched/sched.h            |  1 +
> >   4 files changed, 65 insertions(+), 7 deletions(-)
> > 
> > diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
> > index 5ddb8dc5ca91..48f3b6b20604 100644
> > --- a/kernel/sched/isolation.c
> > +++ b/kernel/sched/isolation.c
> > @@ -23,16 +23,39 @@ EXPORT_SYMBOL_GPL(housekeeping_flags);
> >   bool housekeeping_enabled(enum hk_type type)
> >   {
> > -	return !!(housekeeping_flags & BIT(type));
> > +	return !!(READ_ONCE(housekeeping_flags) & BIT(type));
> >   }
> >   EXPORT_SYMBOL_GPL(housekeeping_enabled);
> > +static bool housekeeping_dereference_check(enum hk_type type)
> > +{
> > +	if (type == HK_TYPE_DOMAIN) {
> > +		if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_write_held())
> > +			return true;
> > +		if (IS_ENABLED(CONFIG_CPUSETS) && lockdep_is_cpuset_held())
> > +			return true;
> > +
> > +		return false;
> > +	}
> > +
> > +	return true;
> > +}
> 
> Both lockdep_is_cpuset_held() and lockdep_is_cpus_write_held() may be
> defined only if CONFIG_LOCKDEP is set. However, this function is currently
> referenced by __housekeeping_cpumask() via RCU_LOCKDEP_WARN(). So it is not
> invoked if CONFIG_LOCKDEP is not set. You are assuming that static function
> not referenced is not being compiled into the object file. Should we bracket
> it with "ifdef CONFIG_LOCKDEP" just to make this clear?

Yes you're right. And I remember some O-day warnings about that on earlier
dev versions. I thought the issue was gone somehow but I think 0-day actually
finally concluded I ignored it :-)

> > +int housekeeping_update(struct cpumask *mask, enum hk_type type)
> > +{
> > +	struct cpumask *trial, *old = NULL;
> > +
> > +	if (type != HK_TYPE_DOMAIN)
> > +		return -ENOTSUPP;
> > +
> > +	trial = kmalloc(sizeof(*trial), GFP_KERNEL);
> > +	if (!trial)
> > +		return -ENOMEM;
> > +
> > +	cpumask_andnot(trial, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT), mask);
> > +	if (!cpumask_intersects(trial, cpu_online_mask)) {
> > +		kfree(trial);
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (housekeeping_flags & BIT(type))
> > +		old = __housekeeping_cpumask(type);
> > +	else
> > +		WRITE_ONCE(housekeeping_flags, housekeeping_flags | BIT(type));
> 
> Should we use to READ_ONCE() to retrieve the current housekeeping_flags
> value?

Not here, this path is the only updater and it's locked by cpuset mutex.

Thanks.

> 
> Cheers,
> Longman
> 

-- 
Frederic Weisbecker
SUSE Labs