kernel/cgroup/cpuset.c | 2 -- kernel/sched/isolation.c | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-)
From: Chen Ridong <chenridong@huawei.com>
The warning is observed:
WARNING: possible recursive locking detected
6.19.0-rc6-next-20260121 #1046 Not tainted
--------------------------------------------
test_cpuset_prs/686 is trying to acquire lock:
(cpu_hotplug_lock){++++}-{0:0}, at: static_key_enable+0xd/0x20
but task is already holding lock:
(cpu_hotplug_lock){++++}-{0:0}, at: cpuset_partition_write+0x72/0x10
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(cpu_hotplug_lock);
lock(cpu_hotplug_lock);
*** DEADLOCK ***
May be due to missing lock nesting notation
stack backtrace:
CPU: 11 UID: 0 PID: 686 Comm: test_cpuset_prs 6.19.0-rc6-next-20260121 #1
Call Trace:
<TASK>
dump_stack_lvl+0x82/0xd0
print_deadlock_bug+0x288/0x3c0
__lock_acquire+0x1506/0x27f0
lock_acquire+0xc8/0x2d0
? static_key_enable+0xd/0x20
cpus_read_lock+0x3b/0xd0
? static_key_enable+0xd/0x20
static_key_enable+0xd/0x20
housekeeping_update+0xe7/0x1b0
update_prstate+0x3f2/0x580
cpuset_partition_write+0x98/0x100
kernfs_fop_write_iter+0x14e/0x200
vfs_write+0x367/0x510
ksys_write+0x66/0xe0
do_syscall_64+0x6b/0x390
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7f824cf8c887
The commit 7109b22e6581 ("cpuset: Update HK_TYPE_DOMAIN cpumask from
cpuset") introduced housekeeping_update, which calls static_branch_enable
while cpu_read_lock() is held. Since static_key_enable itself also acquires
cpu_read_lock, this leads to a lockdep warning. To resolve this issue,
replace the call to static_key_enable with static_branch_enable_cpuslocked.
Fixes: 7109b22e6581 ("cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset")
Signed-off-by: Chen Ridong <chenridong@huawei.com>
---
kernel/cgroup/cpuset.c | 2 --
kernel/sched/isolation.c | 4 +++-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 392ce795656d..a26ccff55bb7 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1308,8 +1308,6 @@ static void update_isolation_cpumasks(void)
if (!isolated_cpus_updating)
return;
- lockdep_assert_cpus_held();
-
ret = housekeeping_update(isolated_cpus);
WARN_ON_ONCE(ret < 0);
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index ef152d401fe2..3b725d39c06e 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -123,6 +123,8 @@ int housekeeping_update(struct cpumask *isol_mask)
struct cpumask *trial, *old = NULL;
int err;
+ lockdep_assert_cpus_held();
+
trial = kmalloc(cpumask_size(), GFP_KERNEL);
if (!trial)
return -ENOMEM;
@@ -134,7 +136,7 @@ int housekeeping_update(struct cpumask *isol_mask)
}
if (!housekeeping.flags)
- static_branch_enable(&housekeeping_overridden);
+ static_branch_enable_cpuslocked(&housekeeping_overridden);
if (housekeeping.flags & HK_FLAG_DOMAIN)
old = housekeeping_cpumask_dereference(HK_TYPE_DOMAIN);
--
2.34.1
On 1/22/26 3:09 AM, Chen Ridong wrote:
> From: Chen Ridong <chenridong@huawei.com>
>
> The warning is observed:
>
> WARNING: possible recursive locking detected
> 6.19.0-rc6-next-20260121 #1046 Not tainted
> --------------------------------------------
> test_cpuset_prs/686 is trying to acquire lock:
> (cpu_hotplug_lock){++++}-{0:0}, at: static_key_enable+0xd/0x20
>
> but task is already holding lock:
> (cpu_hotplug_lock){++++}-{0:0}, at: cpuset_partition_write+0x72/0x10
>
> other info that might help us debug this:
> Possible unsafe locking scenario:
>
> CPU0
> ----
> lock(cpu_hotplug_lock);
> lock(cpu_hotplug_lock);
>
> *** DEADLOCK ***
>
> May be due to missing lock nesting notation
>
> stack backtrace:
> CPU: 11 UID: 0 PID: 686 Comm: test_cpuset_prs 6.19.0-rc6-next-20260121 #1
> Call Trace:
> <TASK>
> dump_stack_lvl+0x82/0xd0
> print_deadlock_bug+0x288/0x3c0
> __lock_acquire+0x1506/0x27f0
> lock_acquire+0xc8/0x2d0
> ? static_key_enable+0xd/0x20
> cpus_read_lock+0x3b/0xd0
> ? static_key_enable+0xd/0x20
> static_key_enable+0xd/0x20
> housekeeping_update+0xe7/0x1b0
> update_prstate+0x3f2/0x580
> cpuset_partition_write+0x98/0x100
> kernfs_fop_write_iter+0x14e/0x200
> vfs_write+0x367/0x510
> ksys_write+0x66/0xe0
> do_syscall_64+0x6b/0x390
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
> RIP: 0033:0x7f824cf8c887
>
> The commit 7109b22e6581 ("cpuset: Update HK_TYPE_DOMAIN cpumask from
> cpuset") introduced housekeeping_update, which calls static_branch_enable
> while cpu_read_lock() is held. Since static_key_enable itself also acquires
> cpu_read_lock, this leads to a lockdep warning. To resolve this issue,
> replace the call to static_key_enable with static_branch_enable_cpuslocked.
>
> Fixes: 7109b22e6581 ("cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset")
> Signed-off-by: Chen Ridong <chenridong@huawei.com>
> ---
> kernel/cgroup/cpuset.c | 2 --
> kernel/sched/isolation.c | 4 +++-
> 2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
> index 392ce795656d..a26ccff55bb7 100644
> --- a/kernel/cgroup/cpuset.c
> +++ b/kernel/cgroup/cpuset.c
> @@ -1308,8 +1308,6 @@ static void update_isolation_cpumasks(void)
> if (!isolated_cpus_updating)
> return;
>
> - lockdep_assert_cpus_held();
> -
> ret = housekeeping_update(isolated_cpus);
> WARN_ON_ONCE(ret < 0);
>
> diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
> index ef152d401fe2..3b725d39c06e 100644
> --- a/kernel/sched/isolation.c
> +++ b/kernel/sched/isolation.c
> @@ -123,6 +123,8 @@ int housekeeping_update(struct cpumask *isol_mask)
> struct cpumask *trial, *old = NULL;
> int err;
>
> + lockdep_assert_cpus_held();
> +
> trial = kmalloc(cpumask_size(), GFP_KERNEL);
> if (!trial)
> return -ENOMEM;
> @@ -134,7 +136,7 @@ int housekeeping_update(struct cpumask *isol_mask)
> }
>
> if (!housekeeping.flags)
> - static_branch_enable(&housekeeping_overridden);
> + static_branch_enable_cpuslocked(&housekeeping_overridden);
>
> if (housekeeping.flags & HK_FLAG_DOMAIN)
> old = housekeeping_cpumask_dereference(HK_TYPE_DOMAIN);
Good catch.
Thanks,
Longman
Le Thu, Jan 22, 2026 at 08:09:02AM +0000, Chen Ridong a écrit :
> From: Chen Ridong <chenridong@huawei.com>
>
> The warning is observed:
>
> WARNING: possible recursive locking detected
> 6.19.0-rc6-next-20260121 #1046 Not tainted
> --------------------------------------------
> test_cpuset_prs/686 is trying to acquire lock:
> (cpu_hotplug_lock){++++}-{0:0}, at: static_key_enable+0xd/0x20
>
> but task is already holding lock:
> (cpu_hotplug_lock){++++}-{0:0}, at: cpuset_partition_write+0x72/0x10
>
> other info that might help us debug this:
> Possible unsafe locking scenario:
>
> CPU0
> ----
> lock(cpu_hotplug_lock);
> lock(cpu_hotplug_lock);
>
> *** DEADLOCK ***
>
> May be due to missing lock nesting notation
>
> stack backtrace:
> CPU: 11 UID: 0 PID: 686 Comm: test_cpuset_prs 6.19.0-rc6-next-20260121 #1
> Call Trace:
> <TASK>
> dump_stack_lvl+0x82/0xd0
> print_deadlock_bug+0x288/0x3c0
> __lock_acquire+0x1506/0x27f0
> lock_acquire+0xc8/0x2d0
> ? static_key_enable+0xd/0x20
> cpus_read_lock+0x3b/0xd0
> ? static_key_enable+0xd/0x20
> static_key_enable+0xd/0x20
> housekeeping_update+0xe7/0x1b0
> update_prstate+0x3f2/0x580
> cpuset_partition_write+0x98/0x100
> kernfs_fop_write_iter+0x14e/0x200
> vfs_write+0x367/0x510
> ksys_write+0x66/0xe0
> do_syscall_64+0x6b/0x390
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
> RIP: 0033:0x7f824cf8c887
>
> The commit 7109b22e6581 ("cpuset: Update HK_TYPE_DOMAIN cpumask from
> cpuset") introduced housekeeping_update, which calls static_branch_enable
> while cpu_read_lock() is held. Since static_key_enable itself also acquires
> cpu_read_lock, this leads to a lockdep warning. To resolve this issue,
> replace the call to static_key_enable with static_branch_enable_cpuslocked.
>
> Fixes: 7109b22e6581 ("cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset")
> Signed-off-by: Chen Ridong <chenridong@huawei.com>
Thanks for spotting that! Funny that it didn't deadlock when I tested it.
Ah probably because I always booted with isolcpus= filled.
So ideally I should add your change as a fixup within
7109b22e6581 ("cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset") in order
not to break bisection.
Do you mind if I do that? I'll still add your Signed-off-by to the commit.
Thanks.
--
Frederic Weisbecker
SUSE Labs
On 2026/1/22 19:45, Frederic Weisbecker wrote:
> Le Thu, Jan 22, 2026 at 08:09:02AM +0000, Chen Ridong a écrit :
>> From: Chen Ridong <chenridong@huawei.com>
>>
>> The warning is observed:
>>
>> WARNING: possible recursive locking detected
>> 6.19.0-rc6-next-20260121 #1046 Not tainted
>> --------------------------------------------
>> test_cpuset_prs/686 is trying to acquire lock:
>> (cpu_hotplug_lock){++++}-{0:0}, at: static_key_enable+0xd/0x20
>>
>> but task is already holding lock:
>> (cpu_hotplug_lock){++++}-{0:0}, at: cpuset_partition_write+0x72/0x10
>>
>> other info that might help us debug this:
>> Possible unsafe locking scenario:
>>
>> CPU0
>> ----
>> lock(cpu_hotplug_lock);
>> lock(cpu_hotplug_lock);
>>
>> *** DEADLOCK ***
>>
>> May be due to missing lock nesting notation
>>
>> stack backtrace:
>> CPU: 11 UID: 0 PID: 686 Comm: test_cpuset_prs 6.19.0-rc6-next-20260121 #1
>> Call Trace:
>> <TASK>
>> dump_stack_lvl+0x82/0xd0
>> print_deadlock_bug+0x288/0x3c0
>> __lock_acquire+0x1506/0x27f0
>> lock_acquire+0xc8/0x2d0
>> ? static_key_enable+0xd/0x20
>> cpus_read_lock+0x3b/0xd0
>> ? static_key_enable+0xd/0x20
>> static_key_enable+0xd/0x20
>> housekeeping_update+0xe7/0x1b0
>> update_prstate+0x3f2/0x580
>> cpuset_partition_write+0x98/0x100
>> kernfs_fop_write_iter+0x14e/0x200
>> vfs_write+0x367/0x510
>> ksys_write+0x66/0xe0
>> do_syscall_64+0x6b/0x390
>> entry_SYSCALL_64_after_hwframe+0x76/0x7e
>> RIP: 0033:0x7f824cf8c887
>>
>> The commit 7109b22e6581 ("cpuset: Update HK_TYPE_DOMAIN cpumask from
>> cpuset") introduced housekeeping_update, which calls static_branch_enable
>> while cpu_read_lock() is held. Since static_key_enable itself also acquires
>> cpu_read_lock, this leads to a lockdep warning. To resolve this issue,
>> replace the call to static_key_enable with static_branch_enable_cpuslocked.
>>
>> Fixes: 7109b22e6581 ("cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset")
>> Signed-off-by: Chen Ridong <chenridong@huawei.com>
>
> Thanks for spotting that! Funny that it didn't deadlock when I tested it.
> Ah probably because I always booted with isolcpus= filled.
>
> So ideally I should add your change as a fixup within
> 7109b22e6581 ("cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset") in order
> not to break bisection.
>
> Do you mind if I do that? I'll still add your Signed-off-by to the commit.
>
> Thanks.
>
I'm not entirely clear on the specifics of "breaking bisection", never mind, I
trust your judgment. Please go ahead and fix it in the way that you like.
--
Best regards,
Ridong
Le Thu, Jan 22, 2026 at 08:03:56PM +0800, Chen Ridong a écrit :
>
>
> On 2026/1/22 19:45, Frederic Weisbecker wrote:
> > Le Thu, Jan 22, 2026 at 08:09:02AM +0000, Chen Ridong a écrit :
> >> From: Chen Ridong <chenridong@huawei.com>
> >>
> >> The warning is observed:
> >>
> >> WARNING: possible recursive locking detected
> >> 6.19.0-rc6-next-20260121 #1046 Not tainted
> >> --------------------------------------------
> >> test_cpuset_prs/686 is trying to acquire lock:
> >> (cpu_hotplug_lock){++++}-{0:0}, at: static_key_enable+0xd/0x20
> >>
> >> but task is already holding lock:
> >> (cpu_hotplug_lock){++++}-{0:0}, at: cpuset_partition_write+0x72/0x10
> >>
> >> other info that might help us debug this:
> >> Possible unsafe locking scenario:
> >>
> >> CPU0
> >> ----
> >> lock(cpu_hotplug_lock);
> >> lock(cpu_hotplug_lock);
> >>
> >> *** DEADLOCK ***
> >>
> >> May be due to missing lock nesting notation
> >>
> >> stack backtrace:
> >> CPU: 11 UID: 0 PID: 686 Comm: test_cpuset_prs 6.19.0-rc6-next-20260121 #1
> >> Call Trace:
> >> <TASK>
> >> dump_stack_lvl+0x82/0xd0
> >> print_deadlock_bug+0x288/0x3c0
> >> __lock_acquire+0x1506/0x27f0
> >> lock_acquire+0xc8/0x2d0
> >> ? static_key_enable+0xd/0x20
> >> cpus_read_lock+0x3b/0xd0
> >> ? static_key_enable+0xd/0x20
> >> static_key_enable+0xd/0x20
> >> housekeeping_update+0xe7/0x1b0
> >> update_prstate+0x3f2/0x580
> >> cpuset_partition_write+0x98/0x100
> >> kernfs_fop_write_iter+0x14e/0x200
> >> vfs_write+0x367/0x510
> >> ksys_write+0x66/0xe0
> >> do_syscall_64+0x6b/0x390
> >> entry_SYSCALL_64_after_hwframe+0x76/0x7e
> >> RIP: 0033:0x7f824cf8c887
> >>
> >> The commit 7109b22e6581 ("cpuset: Update HK_TYPE_DOMAIN cpumask from
> >> cpuset") introduced housekeeping_update, which calls static_branch_enable
> >> while cpu_read_lock() is held. Since static_key_enable itself also acquires
> >> cpu_read_lock, this leads to a lockdep warning. To resolve this issue,
> >> replace the call to static_key_enable with static_branch_enable_cpuslocked.
> >>
> >> Fixes: 7109b22e6581 ("cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset")
> >> Signed-off-by: Chen Ridong <chenridong@huawei.com>
> >
> > Thanks for spotting that! Funny that it didn't deadlock when I tested it.
> > Ah probably because I always booted with isolcpus= filled.
> >
> > So ideally I should add your change as a fixup within
> > 7109b22e6581 ("cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset") in order
> > not to break bisection.
> >
> > Do you mind if I do that? I'll still add your Signed-off-by to the commit.
> >
> > Thanks.
> >
>
> I'm not entirely clear on the specifics of "breaking bisection", never mind, I
> trust your judgment. Please go ahead and fix it in the way that you like.
git bisect requires that no commit breaks testing in the middle.
The preferred way to deal with fixes on commits that haven't yet been
pulled upstream is to apply directly the fixup to the offending patches.
Here is the new version:
---
From: Frederic Weisbecker <frederic@kernel.org>
Date: Wed, 28 May 2025 18:05:32 +0200
Subject: [PATCH] cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Until now, HK_TYPE_DOMAIN used to only include boot defined isolated
CPUs passed through isolcpus= boot option. Users interested in also
knowing the runtime defined isolated CPUs through cpuset must use
different APIs: cpuset_cpu_is_isolated(), cpu_is_isolated(), etc...
There are many drawbacks to that approach:
1) Most interested subsystems want to know about all isolated CPUs, not
just those defined on boot time.
2) cpuset_cpu_is_isolated() / cpu_is_isolated() are not synchronized with
concurrent cpuset changes.
3) Further cpuset modifications are not propagated to subsystems
Solve 1) and 2) and centralize all isolated CPUs within the
HK_TYPE_DOMAIN housekeeping cpumask.
Subsystems can rely on RCU to synchronize against concurrent changes.
The propagation mentioned in 3) will be handled in further patches.
[Chen Ridong: Fix cpu_hotplug_lock deadlock and use correct static
branch API]
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Reviewed-by: Chen Ridong <chenridong@huawei.com>
Signed-off-by: Chen Ridong <chenridong@huawei.com>
Cc: "Michal Koutný" <mkoutny@suse.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Marco Crivellari <marco.crivellari@suse.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Waiman Long <longman@redhat.com>
Cc: cgroups@vger.kernel.org
---
include/linux/sched/isolation.h | 7 +++
kernel/cgroup/cpuset.c | 5 ++-
kernel/sched/isolation.c | 75 ++++++++++++++++++++++++++++++---
kernel/sched/sched.h | 1 +
4 files changed, 80 insertions(+), 8 deletions(-)
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index c7cf6934489c..d8d9baf44516 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -9,6 +9,11 @@
enum hk_type {
/* Inverse of boot-time isolcpus= argument */
HK_TYPE_DOMAIN_BOOT,
+ /*
+ * Same as HK_TYPE_DOMAIN_BOOT but also includes the
+ * inverse of cpuset isolated partitions. As such it
+ * is always a subset of HK_TYPE_DOMAIN_BOOT.
+ */
HK_TYPE_DOMAIN,
/* Inverse of boot-time isolcpus=managed_irq argument */
HK_TYPE_MANAGED_IRQ,
@@ -35,6 +40,7 @@ extern const struct cpumask *housekeeping_cpumask(enum hk_type type);
extern bool housekeeping_enabled(enum hk_type type);
extern void housekeeping_affine(struct task_struct *t, enum hk_type type);
extern bool housekeeping_test_cpu(int cpu, enum hk_type type);
+extern int housekeeping_update(struct cpumask *isol_mask);
extern void __init housekeeping_init(void);
#else
@@ -62,6 +68,7 @@ static inline bool housekeeping_test_cpu(int cpu, enum hk_type type)
return true;
}
+static inline int housekeeping_update(struct cpumask *isol_mask) { return 0; }
static inline void housekeeping_init(void) { }
#endif /* CONFIG_CPU_ISOLATION */
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 5e2e3514c22e..e146e1f34bf9 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1482,14 +1482,15 @@ static void update_isolation_cpumasks(void)
if (!isolated_cpus_updating)
return;
- lockdep_assert_cpus_held();
-
ret = workqueue_unbound_exclude_cpumask(isolated_cpus);
WARN_ON_ONCE(ret < 0);
ret = tmigr_isolated_exclude_cpumask(isolated_cpus);
WARN_ON_ONCE(ret < 0);
+ ret = housekeeping_update(isolated_cpus);
+ WARN_ON_ONCE(ret < 0);
+
isolated_cpus_updating = false;
}
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 6f77289c14c3..674a02891b38 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -29,18 +29,48 @@ static struct housekeeping housekeeping;
bool housekeeping_enabled(enum hk_type type)
{
- return !!(housekeeping.flags & BIT(type));
+ return !!(READ_ONCE(housekeeping.flags) & BIT(type));
}
EXPORT_SYMBOL_GPL(housekeeping_enabled);
+static bool housekeeping_dereference_check(enum hk_type type)
+{
+ if (IS_ENABLED(CONFIG_LOCKDEP) && type == HK_TYPE_DOMAIN) {
+ /* Cpuset isn't even writable yet? */
+ if (system_state <= SYSTEM_SCHEDULING)
+ return true;
+
+ /* CPU hotplug write locked, so cpuset partition can't be overwritten */
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_write_held())
+ return true;
+
+ /* Cpuset lock held, partitions not writable */
+ if (IS_ENABLED(CONFIG_CPUSETS) && lockdep_is_cpuset_held())
+ return true;
+
+ return false;
+ }
+
+ return true;
+}
+
+static inline struct cpumask *housekeeping_cpumask_dereference(enum hk_type type)
+{
+ return rcu_dereference_all_check(housekeeping.cpumasks[type],
+ housekeeping_dereference_check(type));
+}
+
const struct cpumask *housekeeping_cpumask(enum hk_type type)
{
+ const struct cpumask *mask = NULL;
+
if (static_branch_unlikely(&housekeeping_overridden)) {
- if (housekeeping.flags & BIT(type)) {
- return rcu_dereference_check(housekeeping.cpumasks[type], 1);
- }
+ if (READ_ONCE(housekeeping.flags) & BIT(type))
+ mask = housekeeping_cpumask_dereference(type);
}
- return cpu_possible_mask;
+ if (!mask)
+ mask = cpu_possible_mask;
+ return mask;
}
EXPORT_SYMBOL_GPL(housekeeping_cpumask);
@@ -80,12 +110,45 @@ EXPORT_SYMBOL_GPL(housekeeping_affine);
bool housekeeping_test_cpu(int cpu, enum hk_type type)
{
- if (static_branch_unlikely(&housekeeping_overridden) && housekeeping.flags & BIT(type))
+ if (static_branch_unlikely(&housekeeping_overridden) &&
+ READ_ONCE(housekeeping.flags) & BIT(type))
return cpumask_test_cpu(cpu, housekeeping_cpumask(type));
return true;
}
EXPORT_SYMBOL_GPL(housekeeping_test_cpu);
+int housekeeping_update(struct cpumask *isol_mask)
+{
+ struct cpumask *trial, *old = NULL;
+
+ lockdep_assert_cpus_held();
+
+ trial = kmalloc(cpumask_size(), GFP_KERNEL);
+ if (!trial)
+ return -ENOMEM;
+
+ cpumask_andnot(trial, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT), isol_mask);
+ if (!cpumask_intersects(trial, cpu_online_mask)) {
+ kfree(trial);
+ return -EINVAL;
+ }
+
+ if (!housekeeping.flags)
+ static_branch_enable_cpuslocked(&housekeeping_overridden);
+
+ if (housekeeping.flags & HK_FLAG_DOMAIN)
+ old = housekeeping_cpumask_dereference(HK_TYPE_DOMAIN);
+ else
+ WRITE_ONCE(housekeeping.flags, housekeeping.flags | HK_FLAG_DOMAIN);
+ rcu_assign_pointer(housekeeping.cpumasks[HK_TYPE_DOMAIN], trial);
+
+ synchronize_rcu();
+
+ kfree(old);
+
+ return 0;
+}
+
void __init housekeeping_init(void)
{
enum hk_type type;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 475bdab3b8db..653e898a996a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -30,6 +30,7 @@
#include <linux/context_tracking.h>
#include <linux/cpufreq.h>
#include <linux/cpumask_api.h>
+#include <linux/cpuset.h>
#include <linux/ctype.h>
#include <linux/file.h>
#include <linux/fs_api.h>
--
2.51.1
On 2026/1/23 0:57, Frederic Weisbecker wrote:
> Le Thu, Jan 22, 2026 at 08:03:56PM +0800, Chen Ridong a écrit :
>>
>>
>> On 2026/1/22 19:45, Frederic Weisbecker wrote:
>>> Le Thu, Jan 22, 2026 at 08:09:02AM +0000, Chen Ridong a écrit :
>>>> From: Chen Ridong <chenridong@huawei.com>
>>>>
>>>> The warning is observed:
>>>>
>>>> WARNING: possible recursive locking detected
>>>> 6.19.0-rc6-next-20260121 #1046 Not tainted
>>>> --------------------------------------------
>>>> test_cpuset_prs/686 is trying to acquire lock:
>>>> (cpu_hotplug_lock){++++}-{0:0}, at: static_key_enable+0xd/0x20
>>>>
>>>> but task is already holding lock:
>>>> (cpu_hotplug_lock){++++}-{0:0}, at: cpuset_partition_write+0x72/0x10
>>>>
>>>> other info that might help us debug this:
>>>> Possible unsafe locking scenario:
>>>>
>>>> CPU0
>>>> ----
>>>> lock(cpu_hotplug_lock);
>>>> lock(cpu_hotplug_lock);
>>>>
>>>> *** DEADLOCK ***
>>>>
>>>> May be due to missing lock nesting notation
>>>>
>>>> stack backtrace:
>>>> CPU: 11 UID: 0 PID: 686 Comm: test_cpuset_prs 6.19.0-rc6-next-20260121 #1
>>>> Call Trace:
>>>> <TASK>
>>>> dump_stack_lvl+0x82/0xd0
>>>> print_deadlock_bug+0x288/0x3c0
>>>> __lock_acquire+0x1506/0x27f0
>>>> lock_acquire+0xc8/0x2d0
>>>> ? static_key_enable+0xd/0x20
>>>> cpus_read_lock+0x3b/0xd0
>>>> ? static_key_enable+0xd/0x20
>>>> static_key_enable+0xd/0x20
>>>> housekeeping_update+0xe7/0x1b0
>>>> update_prstate+0x3f2/0x580
>>>> cpuset_partition_write+0x98/0x100
>>>> kernfs_fop_write_iter+0x14e/0x200
>>>> vfs_write+0x367/0x510
>>>> ksys_write+0x66/0xe0
>>>> do_syscall_64+0x6b/0x390
>>>> entry_SYSCALL_64_after_hwframe+0x76/0x7e
>>>> RIP: 0033:0x7f824cf8c887
>>>>
>>>> The commit 7109b22e6581 ("cpuset: Update HK_TYPE_DOMAIN cpumask from
>>>> cpuset") introduced housekeeping_update, which calls static_branch_enable
>>>> while cpu_read_lock() is held. Since static_key_enable itself also acquires
>>>> cpu_read_lock, this leads to a lockdep warning. To resolve this issue,
>>>> replace the call to static_key_enable with static_branch_enable_cpuslocked.
>>>>
>>>> Fixes: 7109b22e6581 ("cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset")
>>>> Signed-off-by: Chen Ridong <chenridong@huawei.com>
>>>
>>> Thanks for spotting that! Funny that it didn't deadlock when I tested it.
>>> Ah probably because I always booted with isolcpus= filled.
>>>
>>> So ideally I should add your change as a fixup within
>>> 7109b22e6581 ("cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset") in order
>>> not to break bisection.
>>>
>>> Do you mind if I do that? I'll still add your Signed-off-by to the commit.
>>>
>>> Thanks.
>>>
>>
>> I'm not entirely clear on the specifics of "breaking bisection", never mind, I
>> trust your judgment. Please go ahead and fix it in the way that you like.
>
> git bisect requires that no commit breaks testing in the middle.
> The preferred way to deal with fixes on commits that haven't yet been
> pulled upstream is to apply directly the fixup to the offending patches.
>
> Here is the new version:
>
Thanks.
It is fine for me.
> ---
> From: Frederic Weisbecker <frederic@kernel.org>
> Date: Wed, 28 May 2025 18:05:32 +0200
> Subject: [PATCH] cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Until now, HK_TYPE_DOMAIN used to only include boot defined isolated
> CPUs passed through isolcpus= boot option. Users interested in also
> knowing the runtime defined isolated CPUs through cpuset must use
> different APIs: cpuset_cpu_is_isolated(), cpu_is_isolated(), etc...
>
> There are many drawbacks to that approach:
>
> 1) Most interested subsystems want to know about all isolated CPUs, not
> just those defined on boot time.
>
> 2) cpuset_cpu_is_isolated() / cpu_is_isolated() are not synchronized with
> concurrent cpuset changes.
>
> 3) Further cpuset modifications are not propagated to subsystems
>
> Solve 1) and 2) and centralize all isolated CPUs within the
> HK_TYPE_DOMAIN housekeeping cpumask.
>
> Subsystems can rely on RCU to synchronize against concurrent changes.
>
> The propagation mentioned in 3) will be handled in further patches.
>
> [Chen Ridong: Fix cpu_hotplug_lock deadlock and use correct static
> branch API]
>
> Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
> Reviewed-by: Waiman Long <longman@redhat.com>
> Reviewed-by: Chen Ridong <chenridong@huawei.com>
> Signed-off-by: Chen Ridong <chenridong@huawei.com>
> Cc: "Michal Koutný" <mkoutny@suse.com>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Marco Crivellari <marco.crivellari@suse.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Tejun Heo <tj@kernel.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Waiman Long <longman@redhat.com>
> Cc: cgroups@vger.kernel.org
> ---
> include/linux/sched/isolation.h | 7 +++
> kernel/cgroup/cpuset.c | 5 ++-
> kernel/sched/isolation.c | 75 ++++++++++++++++++++++++++++++---
> kernel/sched/sched.h | 1 +
> 4 files changed, 80 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
> index c7cf6934489c..d8d9baf44516 100644
> --- a/include/linux/sched/isolation.h
> +++ b/include/linux/sched/isolation.h
> @@ -9,6 +9,11 @@
> enum hk_type {
> /* Inverse of boot-time isolcpus= argument */
> HK_TYPE_DOMAIN_BOOT,
> + /*
> + * Same as HK_TYPE_DOMAIN_BOOT but also includes the
> + * inverse of cpuset isolated partitions. As such it
> + * is always a subset of HK_TYPE_DOMAIN_BOOT.
> + */
> HK_TYPE_DOMAIN,
> /* Inverse of boot-time isolcpus=managed_irq argument */
> HK_TYPE_MANAGED_IRQ,
> @@ -35,6 +40,7 @@ extern const struct cpumask *housekeeping_cpumask(enum hk_type type);
> extern bool housekeeping_enabled(enum hk_type type);
> extern void housekeeping_affine(struct task_struct *t, enum hk_type type);
> extern bool housekeeping_test_cpu(int cpu, enum hk_type type);
> +extern int housekeeping_update(struct cpumask *isol_mask);
> extern void __init housekeeping_init(void);
>
> #else
> @@ -62,6 +68,7 @@ static inline bool housekeeping_test_cpu(int cpu, enum hk_type type)
> return true;
> }
>
> +static inline int housekeeping_update(struct cpumask *isol_mask) { return 0; }
> static inline void housekeeping_init(void) { }
> #endif /* CONFIG_CPU_ISOLATION */
>
> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
> index 5e2e3514c22e..e146e1f34bf9 100644
> --- a/kernel/cgroup/cpuset.c
> +++ b/kernel/cgroup/cpuset.c
> @@ -1482,14 +1482,15 @@ static void update_isolation_cpumasks(void)
> if (!isolated_cpus_updating)
> return;
>
> - lockdep_assert_cpus_held();
> -
> ret = workqueue_unbound_exclude_cpumask(isolated_cpus);
> WARN_ON_ONCE(ret < 0);
>
> ret = tmigr_isolated_exclude_cpumask(isolated_cpus);
> WARN_ON_ONCE(ret < 0);
>
> + ret = housekeeping_update(isolated_cpus);
> + WARN_ON_ONCE(ret < 0);
> +
> isolated_cpus_updating = false;
> }
>
> diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
> index 6f77289c14c3..674a02891b38 100644
> --- a/kernel/sched/isolation.c
> +++ b/kernel/sched/isolation.c
> @@ -29,18 +29,48 @@ static struct housekeeping housekeeping;
>
> bool housekeeping_enabled(enum hk_type type)
> {
> - return !!(housekeeping.flags & BIT(type));
> + return !!(READ_ONCE(housekeeping.flags) & BIT(type));
> }
> EXPORT_SYMBOL_GPL(housekeeping_enabled);
>
> +static bool housekeeping_dereference_check(enum hk_type type)
> +{
> + if (IS_ENABLED(CONFIG_LOCKDEP) && type == HK_TYPE_DOMAIN) {
> + /* Cpuset isn't even writable yet? */
> + if (system_state <= SYSTEM_SCHEDULING)
> + return true;
> +
> + /* CPU hotplug write locked, so cpuset partition can't be overwritten */
> + if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_write_held())
> + return true;
> +
> + /* Cpuset lock held, partitions not writable */
> + if (IS_ENABLED(CONFIG_CPUSETS) && lockdep_is_cpuset_held())
> + return true;
> +
> + return false;
> + }
> +
> + return true;
> +}
> +
> +static inline struct cpumask *housekeeping_cpumask_dereference(enum hk_type type)
> +{
> + return rcu_dereference_all_check(housekeeping.cpumasks[type],
> + housekeeping_dereference_check(type));
> +}
> +
> const struct cpumask *housekeeping_cpumask(enum hk_type type)
> {
> + const struct cpumask *mask = NULL;
> +
> if (static_branch_unlikely(&housekeeping_overridden)) {
> - if (housekeeping.flags & BIT(type)) {
> - return rcu_dereference_check(housekeeping.cpumasks[type], 1);
> - }
> + if (READ_ONCE(housekeeping.flags) & BIT(type))
> + mask = housekeeping_cpumask_dereference(type);
> }
> - return cpu_possible_mask;
> + if (!mask)
> + mask = cpu_possible_mask;
> + return mask;
> }
> EXPORT_SYMBOL_GPL(housekeeping_cpumask);
>
> @@ -80,12 +110,45 @@ EXPORT_SYMBOL_GPL(housekeeping_affine);
>
> bool housekeeping_test_cpu(int cpu, enum hk_type type)
> {
> - if (static_branch_unlikely(&housekeeping_overridden) && housekeeping.flags & BIT(type))
> + if (static_branch_unlikely(&housekeeping_overridden) &&
> + READ_ONCE(housekeeping.flags) & BIT(type))
> return cpumask_test_cpu(cpu, housekeeping_cpumask(type));
> return true;
> }
> EXPORT_SYMBOL_GPL(housekeeping_test_cpu);
>
> +int housekeeping_update(struct cpumask *isol_mask)
> +{
> + struct cpumask *trial, *old = NULL;
> +
> + lockdep_assert_cpus_held();
> +
> + trial = kmalloc(cpumask_size(), GFP_KERNEL);
> + if (!trial)
> + return -ENOMEM;
> +
> + cpumask_andnot(trial, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT), isol_mask);
> + if (!cpumask_intersects(trial, cpu_online_mask)) {
> + kfree(trial);
> + return -EINVAL;
> + }
> +
> + if (!housekeeping.flags)
> + static_branch_enable_cpuslocked(&housekeeping_overridden);
> +
> + if (housekeeping.flags & HK_FLAG_DOMAIN)
> + old = housekeeping_cpumask_dereference(HK_TYPE_DOMAIN);
> + else
> + WRITE_ONCE(housekeeping.flags, housekeeping.flags | HK_FLAG_DOMAIN);
> + rcu_assign_pointer(housekeeping.cpumasks[HK_TYPE_DOMAIN], trial);
> +
> + synchronize_rcu();
> +
> + kfree(old);
> +
> + return 0;
> +}
> +
> void __init housekeeping_init(void)
> {
> enum hk_type type;
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 475bdab3b8db..653e898a996a 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -30,6 +30,7 @@
> #include <linux/context_tracking.h>
> #include <linux/cpufreq.h>
> #include <linux/cpumask_api.h>
> +#include <linux/cpuset.h>
> #include <linux/ctype.h>
> #include <linux/file.h>
> #include <linux/fs_api.h>
--
Best regards,
Ridong
© 2016 - 2026 Red Hat, Inc.