From: Chen Ridong <chenridong@huawei.com>
A bug was detected with the following steps:
# cd /sys/fs/cgroup/
# mkdir test
# echo 9 > test/cpuset.cpus
# echo isolated > test/cpuset.cpus.partition
# cat test/cpuset.cpus.partition
isolated
# cat test/cpuset.cpus
9
# echo root > test/cpuset.cpus.partition
# cat test/cpuset.cpus
9
# cat test/cpuset.cpus.partition
root
CPU 9 was initially placed in an isolated partition. When the partition
type is changed from isolated to root, CPU 9 remains in what becomes a
valid root partition. This violates the rule that isolcpus can only be
assigned to isolated partitions.
Fix the issue by re-enabling partition validation, which performs
comprehensive partition error checking. In the scenario described above,
this change causes the operation to fail with housekeeping conflicts,
preventing the invalid configuration.
Additionally, when enable a local partition, the warning for tmp->addmask
not being a subset of parent's effective CPUs was removed. This warning was
triggered during local partition re-enablement because the CPUs were
already added to exclusive_cpus during the previous enable operation. The
subset check is not applicable in this re-enablement scenario.
Fixes: f28e22441f35 ("cgroup/cpuset: Add a new isolated cpus.partition type")
Signed-off-by: Chen Ridong <chenridong@huawei.com>
---
kernel/cgroup/cpuset.c | 35 +++++++++--------------------------
1 file changed, 9 insertions(+), 26 deletions(-)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 20288dbd6ccf..2aaa688c596f 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1873,6 +1873,7 @@ static int local_partition_enable(struct cpuset *cs,
{
struct cpuset *parent = parent_cs(cs);
enum prs_errcode part_error;
+ bool cpumask_updated = false;
lockdep_assert_held(&cpuset_mutex);
WARN_ON_ONCE(is_remote_partition(cs)); /* For local partition only */
@@ -1899,22 +1900,14 @@ static int local_partition_enable(struct cpuset *cs,
if (part_error)
return part_error;
- /*
- * This function will only be called when all the preliminary
- * checks have passed. At this point, the following condition
- * should hold.
- *
- * (cs->effective_xcpus & cpu_active_mask) ⊆ parent->effective_cpus
- *
- * Warn if it is not the case.
- * addmask is used as temporary variable.
- */
- cpumask_and(tmp->addmask, tmp->new_cpus, cpu_active_mask);
- WARN_ON_ONCE(!cpumask_subset(tmp->addmask, parent->effective_cpus));
+ cpumask_updated = cpumask_andnot(tmp->addmask, tmp->new_cpus,
+ parent->effective_cpus);
partition_enable(cs, parent, new_prs, tmp->new_cpus);
- cpuset_update_tasks_cpumask(parent, tmp->addmask);
- update_sibling_cpumasks(parent, cs, tmp);
+ if (cpumask_updated) {
+ cpuset_update_tasks_cpumask(parent, tmp->addmask);
+ update_sibling_cpumasks(parent, cs, tmp);
+ }
return 0;
}
@@ -2902,7 +2895,6 @@ static int update_prstate(struct cpuset *cs, int new_prs)
int err = PERR_NONE, old_prs = cs->partition_root_state;
struct cpuset *parent = parent_cs(cs);
struct tmpmasks tmpmask;
- bool isolcpus_updated = false;
if (old_prs == new_prs)
return 0;
@@ -2920,7 +2912,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
if (err)
goto out;
- if (!old_prs) {
+ if (new_prs > 0) {
/*
* cpus_allowed and exclusive_cpus cannot be both empty.
*/
@@ -2950,12 +2942,6 @@ static int update_prstate(struct cpuset *cs, int new_prs)
err = local_partition_enable(cs, new_prs, &tmpmask);
else
err = remote_partition_enable(cs, new_prs, &tmpmask);
- } else if (old_prs && new_prs) {
- /*
- * A change in load balance state only, no change in cpumasks.
- * Need to update isolated_cpus.
- */
- isolcpus_updated = true;
} else {
/*
* Switching back to member is always allowed even if it
@@ -2985,16 +2971,13 @@ static int update_prstate(struct cpuset *cs, int new_prs)
WRITE_ONCE(cs->prs_err, err);
if (!is_partition_valid(cs))
reset_partition_data(cs);
- else if (isolcpus_updated)
- isolated_cpus_update(old_prs, new_prs, cs->effective_xcpus);
spin_unlock_irq(&callback_lock);
- update_unbound_workqueue_cpumask(isolcpus_updated);
/* Force update if switching back to member & update effective_xcpus */
update_cpumasks_hier(cs, &tmpmask, !new_prs);
/* A newly created partition must have effective_xcpus set */
- WARN_ON_ONCE(!old_prs && (new_prs > 0)
+ WARN_ON_ONCE(!old_prs && (cs->partition_root_state > 0)
&& cpumask_empty(cs->effective_xcpus));
/* Update sched domains and load balance flag */
--
2.34.1
On 9/28/25 3:13 AM, Chen Ridong wrote:
> From: Chen Ridong <chenridong@huawei.com>
>
> A bug was detected with the following steps:
>
> # cd /sys/fs/cgroup/
> # mkdir test
> # echo 9 > test/cpuset.cpus
> # echo isolated > test/cpuset.cpus.partition
> # cat test/cpuset.cpus.partition
> isolated
> # cat test/cpuset.cpus
> 9
> # echo root > test/cpuset.cpus.partition
> # cat test/cpuset.cpus
> 9
> # cat test/cpuset.cpus.partition
> root
>
> CPU 9 was initially placed in an isolated partition. When the partition
> type is changed from isolated to root, CPU 9 remains in what becomes a
> valid root partition. This violates the rule that isolcpus can only be
> assigned to isolated partitions.
I am a bit confused at the beginning about this as it does not clearly
state that CPU 9 was listed in the "isolcpus" boot command line
parameter, but I believe this is what you mean here. Yes, there is a
restriction that a boot time isolcpus CPU cannot be put into a
non-isolated partition, though that will likely to be relaxed in the
near future.
Anyway, it is a real corner case. I also don't believe commit
f28e22441f35 is the one that introduced this issue as the restriction
was added later on via commit 4a74e418881f ("cgroup/cpuset: Check
partition conflict with housekeeping setup").
As you have added a Fixes tag, it should be moved to the front of the
series as it is likely to be backported to stable. Putting it near the
end of a series with a lot of changes in between will make it harder to
backport to the stable kernels.
> Fix the issue by re-enabling partition validation, which performs
> comprehensive partition error checking. In the scenario described above,
> this change causes the operation to fail with housekeeping conflicts,
> preventing the invalid configuration.
From the code diff below, I don't know how you re-enable partition
validation.
>
> Additionally, when enable a local partition, the warning for tmp->addmask
> not being a subset of parent's effective CPUs was removed. This warning was
> triggered during local partition re-enablement because the CPUs were
> already added to exclusive_cpus during the previous enable operation. The
> subset check is not applicable in this re-enablement scenario.
That should be in the new code that you introduce in this series. So it
either be integrated into one of your earlier patches or be separated
out as a separate patch without the Fixes tag as it is not applicable
for the stable releases.
Cheers,
Longman
>
> Fixes: f28e22441f35 ("cgroup/cpuset: Add a new isolated cpus.partition type")
> Signed-off-by: Chen Ridong <chenridong@huawei.com>
> ---
> kernel/cgroup/cpuset.c | 35 +++++++++--------------------------
> 1 file changed, 9 insertions(+), 26 deletions(-)
>
> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
> index 20288dbd6ccf..2aaa688c596f 100644
> --- a/kernel/cgroup/cpuset.c
> +++ b/kernel/cgroup/cpuset.c
> @@ -1873,6 +1873,7 @@ static int local_partition_enable(struct cpuset *cs,
> {
> struct cpuset *parent = parent_cs(cs);
> enum prs_errcode part_error;
> + bool cpumask_updated = false;
>
> lockdep_assert_held(&cpuset_mutex);
> WARN_ON_ONCE(is_remote_partition(cs)); /* For local partition only */
> @@ -1899,22 +1900,14 @@ static int local_partition_enable(struct cpuset *cs,
> if (part_error)
> return part_error;
>
> - /*
> - * This function will only be called when all the preliminary
> - * checks have passed. At this point, the following condition
> - * should hold.
> - *
> - * (cs->effective_xcpus & cpu_active_mask) ⊆ parent->effective_cpus
> - *
> - * Warn if it is not the case.
> - * addmask is used as temporary variable.
> - */
> - cpumask_and(tmp->addmask, tmp->new_cpus, cpu_active_mask);
> - WARN_ON_ONCE(!cpumask_subset(tmp->addmask, parent->effective_cpus));
> + cpumask_updated = cpumask_andnot(tmp->addmask, tmp->new_cpus,
> + parent->effective_cpus);
> partition_enable(cs, parent, new_prs, tmp->new_cpus);
>
> - cpuset_update_tasks_cpumask(parent, tmp->addmask);
> - update_sibling_cpumasks(parent, cs, tmp);
> + if (cpumask_updated) {
> + cpuset_update_tasks_cpumask(parent, tmp->addmask);
> + update_sibling_cpumasks(parent, cs, tmp);
> + }
> return 0;
> }
>
> @@ -2902,7 +2895,6 @@ static int update_prstate(struct cpuset *cs, int new_prs)
> int err = PERR_NONE, old_prs = cs->partition_root_state;
> struct cpuset *parent = parent_cs(cs);
> struct tmpmasks tmpmask;
> - bool isolcpus_updated = false;
>
> if (old_prs == new_prs)
> return 0;
> @@ -2920,7 +2912,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
> if (err)
> goto out;
>
> - if (!old_prs) {
> + if (new_prs > 0) {
> /*
> * cpus_allowed and exclusive_cpus cannot be both empty.
> */
> @@ -2950,12 +2942,6 @@ static int update_prstate(struct cpuset *cs, int new_prs)
> err = local_partition_enable(cs, new_prs, &tmpmask);
> else
> err = remote_partition_enable(cs, new_prs, &tmpmask);
> - } else if (old_prs && new_prs) {
> - /*
> - * A change in load balance state only, no change in cpumasks.
> - * Need to update isolated_cpus.
> - */
> - isolcpus_updated = true;
> } else {
> /*
> * Switching back to member is always allowed even if it
> @@ -2985,16 +2971,13 @@ static int update_prstate(struct cpuset *cs, int new_prs)
> WRITE_ONCE(cs->prs_err, err);
> if (!is_partition_valid(cs))
> reset_partition_data(cs);
> - else if (isolcpus_updated)
> - isolated_cpus_update(old_prs, new_prs, cs->effective_xcpus);
> spin_unlock_irq(&callback_lock);
> - update_unbound_workqueue_cpumask(isolcpus_updated);
>
> /* Force update if switching back to member & update effective_xcpus */
> update_cpumasks_hier(cs, &tmpmask, !new_prs);
>
> /* A newly created partition must have effective_xcpus set */
> - WARN_ON_ONCE(!old_prs && (new_prs > 0)
> + WARN_ON_ONCE(!old_prs && (cs->partition_root_state > 0)
> && cpumask_empty(cs->effective_xcpus));
>
> /* Update sched domains and load balance flag */
On 2025/10/20 11:06, Waiman Long wrote: > On 9/28/25 3:13 AM, Chen Ridong wrote: >> From: Chen Ridong <chenridong@huawei.com> >> >> A bug was detected with the following steps: >> >> # cd /sys/fs/cgroup/ >> # mkdir test >> # echo 9 > test/cpuset.cpus >> # echo isolated > test/cpuset.cpus.partition >> # cat test/cpuset.cpus.partition >> isolated >> # cat test/cpuset.cpus >> 9 >> # echo root > test/cpuset.cpus.partition >> # cat test/cpuset.cpus >> 9 >> # cat test/cpuset.cpus.partition >> root >> >> CPU 9 was initially placed in an isolated partition. When the partition >> type is changed from isolated to root, CPU 9 remains in what becomes a >> valid root partition. This violates the rule that isolcpus can only be >> assigned to isolated partitions. > > I am a bit confused at the beginning about this as it does not clearly state that CPU 9 was listed > in the "isolcpus" boot command line parameter, but I believe this is what you mean here. Yes, there > is a restriction that a boot time isolcpus CPU cannot be put into a non-isolated partition, though > that will likely to be relaxed in the near future. > Hi Longman, Just to confirm, does this restriction also apply to remote partitions? -- Best regards, Ridong
On 2025/10/20 11:06, Waiman Long wrote:
> On 9/28/25 3:13 AM, Chen Ridong wrote:
>> From: Chen Ridong <chenridong@huawei.com>
>>
>> A bug was detected with the following steps:
>>
>> # cd /sys/fs/cgroup/
>> # mkdir test
>> # echo 9 > test/cpuset.cpus
>> # echo isolated > test/cpuset.cpus.partition
>> # cat test/cpuset.cpus.partition
>> isolated
>> # cat test/cpuset.cpus
>> 9
>> # echo root > test/cpuset.cpus.partition
>> # cat test/cpuset.cpus
>> 9
>> # cat test/cpuset.cpus.partition
>> root
>>
>> CPU 9 was initially placed in an isolated partition. When the partition
>> type is changed from isolated to root, CPU 9 remains in what becomes a
>> valid root partition. This violates the rule that isolcpus can only be
>> assigned to isolated partitions.
>
> I am a bit confused at the beginning about this as it does not clearly state that CPU 9 was listed
> in the "isolcpus" boot command line parameter, but I believe this is what you mean here. Yes, there
> is a restriction that a boot time isolcpus CPU cannot be put into a non-isolated partition, though
> that will likely to be relaxed in the near future.
>
Yep, the CPU 9 was listed in the "isolcpus" boot command line parameter.
> Anyway, it is a real corner case. I also don't believe commit f28e22441f35 is the one that
> introduced this issue as the restriction was added later on via commit 4a74e418881f ("cgroup/cpuset:
> Check partition conflict with housekeeping setup").
>
> As you have added a Fixes tag, it should be moved to the front of the series as it is likely to be
> backported to stable. Putting it near the end of a series with a lot of changes in between will make
> it harder to backport to the stable kernels.
>
Maybe I should find some way to fix this issue first.
>> Fix the issue by re-enabling partition validation, which performs
>> comprehensive partition error checking. In the scenario described above,
>> this change causes the operation to fail with housekeeping conflicts,
>> preventing the invalid configuration.
> From the code diff below, I don't know how you re-enable partition validation.
>
When a valid local partition has its type changed (e.g., from "isolated" to "root"), the
remote_partition_enable function is not invoked again. Consequently, the critical check for whether
the partition can be enabled is skipped during this process.
echo isolated > test/cpuset.cpus.partition
echo root > test/cpuset.cpus.partition
>>
>> Additionally, when enable a local partition, the warning for tmp->addmask
>> not being a subset of parent's effective CPUs was removed. This warning was
>> triggered during local partition re-enablement because the CPUs were
>> already added to exclusive_cpus during the previous enable operation. The
>> subset check is not applicable in this re-enablement scenario.
>
> That should be in the new code that you introduce in this series. So it either be integrated into
> one of your earlier patches or be separated out as a separate patch without the Fixes tag as it is
> not applicable for the stable releases.
>
> Cheers,
> Longman
>
Okay, I will try to integrate it into earlier patches.
>>
>> Fixes: f28e22441f35 ("cgroup/cpuset: Add a new isolated cpus.partition type")
>> Signed-off-by: Chen Ridong <chenridong@huawei.com>
>> ---
>> kernel/cgroup/cpuset.c | 35 +++++++++--------------------------
>> 1 file changed, 9 insertions(+), 26 deletions(-)
>>
>> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
>> index 20288dbd6ccf..2aaa688c596f 100644
>> --- a/kernel/cgroup/cpuset.c
>> +++ b/kernel/cgroup/cpuset.c
>> @@ -1873,6 +1873,7 @@ static int local_partition_enable(struct cpuset *cs,
>> {
>> struct cpuset *parent = parent_cs(cs);
>> enum prs_errcode part_error;
>> + bool cpumask_updated = false;
>> lockdep_assert_held(&cpuset_mutex);
>> WARN_ON_ONCE(is_remote_partition(cs)); /* For local partition only */
>> @@ -1899,22 +1900,14 @@ static int local_partition_enable(struct cpuset *cs,
>> if (part_error)
>> return part_error;
>> - /*
>> - * This function will only be called when all the preliminary
>> - * checks have passed. At this point, the following condition
>> - * should hold.
>> - *
>> - * (cs->effective_xcpus & cpu_active_mask) ⊆ parent->effective_cpus
>> - *
>> - * Warn if it is not the case.
>> - * addmask is used as temporary variable.
>> - */
>> - cpumask_and(tmp->addmask, tmp->new_cpus, cpu_active_mask);
>> - WARN_ON_ONCE(!cpumask_subset(tmp->addmask, parent->effective_cpus));
>> + cpumask_updated = cpumask_andnot(tmp->addmask, tmp->new_cpus,
>> + parent->effective_cpus);
>> partition_enable(cs, parent, new_prs, tmp->new_cpus);
>> - cpuset_update_tasks_cpumask(parent, tmp->addmask);
>> - update_sibling_cpumasks(parent, cs, tmp);
>> + if (cpumask_updated) {
>> + cpuset_update_tasks_cpumask(parent, tmp->addmask);
>> + update_sibling_cpumasks(parent, cs, tmp);
>> + }
>> return 0;
>> }
>> @@ -2902,7 +2895,6 @@ static int update_prstate(struct cpuset *cs, int new_prs)
>> int err = PERR_NONE, old_prs = cs->partition_root_state;
>> struct cpuset *parent = parent_cs(cs);
>> struct tmpmasks tmpmask;
>> - bool isolcpus_updated = false;
>> if (old_prs == new_prs)
>> return 0;
>> @@ -2920,7 +2912,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
>> if (err)
>> goto out;
>> - if (!old_prs) {
>> + if (new_prs > 0) {
>> /*
>> * cpus_allowed and exclusive_cpus cannot be both empty.
>> */
>> @@ -2950,12 +2942,6 @@ static int update_prstate(struct cpuset *cs, int new_prs)
>> err = local_partition_enable(cs, new_prs, &tmpmask);
>> else
>> err = remote_partition_enable(cs, new_prs, &tmpmask);
>> - } else if (old_prs && new_prs) {
>> - /*
>> - * A change in load balance state only, no change in cpumasks.
>> - * Need to update isolated_cpus.
>> - */
>> - isolcpus_updated = true;
>> } else {
>> /*
>> * Switching back to member is always allowed even if it
>> @@ -2985,16 +2971,13 @@ static int update_prstate(struct cpuset *cs, int new_prs)
>> WRITE_ONCE(cs->prs_err, err);
>> if (!is_partition_valid(cs))
>> reset_partition_data(cs);
>> - else if (isolcpus_updated)
>> - isolated_cpus_update(old_prs, new_prs, cs->effective_xcpus);
>> spin_unlock_irq(&callback_lock);
>> - update_unbound_workqueue_cpumask(isolcpus_updated);
>> /* Force update if switching back to member & update effective_xcpus */
>> update_cpumasks_hier(cs, &tmpmask, !new_prs);
>> /* A newly created partition must have effective_xcpus set */
>> - WARN_ON_ONCE(!old_prs && (new_prs > 0)
>> + WARN_ON_ONCE(!old_prs && (cs->partition_root_state > 0)
>> && cpumask_empty(cs->effective_xcpus));
>> /* Update sched domains and load balance flag */
--
Best regards,
Ridong
© 2016 - 2026 Red Hat, Inc.