[PATCH] sched_ext: Fix the memleak for sch->helper objects

Zqiang posted 1 patch 1 week, 3 days ago
kernel/sched/ext.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
[PATCH] sched_ext: Fix the memleak for sch->helper objects
Posted by Zqiang 1 week, 3 days ago
This commit use kthread_destroy_worker() to release sch->helper
objects to fix the following kmemleak:

unreferenced object 0xffff888121ec7b00 (size 128):
  comm "scx_simple", pid 1197, jiffies 4295884415
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 ad 4e ad de  .............N..
    ff ff ff ff 00 00 00 00 ff ff ff ff ff ff ff ff  ................
  backtrace (crc 587b3352):
    kmemleak_alloc+0x62/0xa0
    __kmalloc_cache_noprof+0x28d/0x3e0
    kthread_create_worker_on_node+0xd5/0x1f0
    scx_enable.isra.210+0x6c2/0x25b0
    bpf_scx_reg+0x12/0x20
    bpf_struct_ops_link_create+0x2c3/0x3b0
    __sys_bpf+0x3102/0x4b00
    __x64_sys_bpf+0x79/0xc0
    x64_sys_call+0x15d9/0x1dd0
    do_syscall_64+0xf0/0x470
    entry_SYSCALL_64_after_hwframe+0x77/0x7f

Signed-off-by: Zqiang <qiang.zhang@linux.dev>
---
 kernel/sched/ext.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index df00df359067..59fa391636c7 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3516,7 +3516,7 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
 	int node;
 
 	irq_work_sync(&sch->error_irq_work);
-	kthread_stop(sch->helper->task);
+	kthread_destroy_worker(sch->helper);
 
 	free_percpu(sch->pcpu);
 
@@ -4508,7 +4508,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
 	return sch;
 
 err_stop_helper:
-	kthread_stop(sch->helper->task);
+	kthread_destroy_worker(sch->helper);
 err_free_pcpu:
 	free_percpu(sch->pcpu);
 err_free_gdsqs:
-- 
2.17.1
Re: [PATCH] sched_ext: Fix the memleak for sch->helper objects
Posted by Tejun Heo 1 week, 3 days ago
Applied to sched_ext/for-6.19-fixes with Fixes and stable tags.

Thanks.

--
tejun
[PATCH] sched_ext: Fix lockdep warnings in the ops_dump_init()
Posted by Zqiang 1 week, 3 days ago
For builts with PREEMPT_RT=y kernel, the dump_lock convert to
sleepable rt-spinlock and not disable-irq, the following lockdep
warning will appear:

WARNING: CPU: 3 PID: 47 at kernel/sched/ext.c:4130 ops_dump_init+0x60/0xa0
Modules linked in:
CPU: 3 UID: 0 PID: 47 Comm: irq_work/3
RIP: 0010:ops_dump_init+0x60/0xa0
Call Trace:
<TASK>
scx_dump_state+0x1091/0x1620
? __schedule+0xf1c/0x3ec0
? rcu_is_watching+0x19/0xb0
? __pfx_scx_dump_state+0x10/0x10
? kvm_sched_clock_read+0x15/0x30
? __lock_acquire+0x98b/0x1ae0
? __this_cpu_preempt_check+0x17/0x20
? insn_get_modrm+0x107/0x6e0
scx_error_irq_workfn+0xeb/0x120
irq_work_single+0x113/0x260
irq_work_run_list.part.7+0x44/0x70
run_irq_workd+0x6b/0x90
? __pfx_run_irq_workd+0x10/0x10
smpboot_thread_fn+0x34c/0xa50
? trace_preempt_on+0x54/0x120
? __pfx_smpboot_thread_fn+0x10/0x10
kthread+0x411/0x8a0
? __pfx_kthread+0x10/0x10
? rt_spin_unlock+0x9f/0x210
? __pfx_kthread+0x10/0x10
ret_from_fork+0x406/0x500
? __switch_to_asm+0x33/0x70
? __pfx_kthread+0x10/0x10
ret_from_fork_asm+0x1a/0x30
</TASK>

This commit therefore convert dump_lock to raw spinlock.

Signed-off-by: Zqiang <qiang.zhang@linux.dev>
---
 kernel/sched/ext.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 59fa391636c7..2e19e3689f1c 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4235,7 +4235,7 @@ static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx,
 
 static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
 {
-	static DEFINE_SPINLOCK(dump_lock);
+	static DEFINE_RAW_SPINLOCK(dump_lock);
 	static const char trunc_marker[] = "\n\n~~~~ TRUNCATED ~~~~\n";
 	struct scx_sched *sch = scx_root;
 	struct scx_dump_ctx dctx = {
@@ -4251,7 +4251,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
 	char *buf;
 	int cpu;
 
-	spin_lock_irqsave(&dump_lock, flags);
+	raw_spin_lock_irqsave(&dump_lock, flags);
 
 	seq_buf_init(&s, ei->dump, dump_len);
 
@@ -4375,7 +4375,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
 		memcpy(ei->dump + dump_len - sizeof(trunc_marker),
 		       trunc_marker, sizeof(trunc_marker));
 
-	spin_unlock_irqrestore(&dump_lock, flags);
+	raw_spin_unlock_irqrestore(&dump_lock, flags);
 }
 
 static void scx_error_irq_workfn(struct irq_work *irq_work)
-- 
2.17.1
Re: [PATCH] sched_ext: Fix lockdep warnings in the ops_dump_init()
Posted by Christian Loehle 1 week, 3 days ago
On 12/8/25 11:23, Zqiang wrote:
> For builts with PREEMPT_RT=y kernel, the dump_lock convert to
> sleepable rt-spinlock and not disable-irq, the following lockdep
> warning will appear:
> 
> WARNING: CPU: 3 PID: 47 at kernel/sched/ext.c:4130 ops_dump_init+0x60/0xa0
> Modules linked in:
> CPU: 3 UID: 0 PID: 47 Comm: irq_work/3
> RIP: 0010:ops_dump_init+0x60/0xa0
> Call Trace:
> <TASK>
> scx_dump_state+0x1091/0x1620
> ? __schedule+0xf1c/0x3ec0
> ? rcu_is_watching+0x19/0xb0
> ? __pfx_scx_dump_state+0x10/0x10
> ? kvm_sched_clock_read+0x15/0x30
> ? __lock_acquire+0x98b/0x1ae0
> ? __this_cpu_preempt_check+0x17/0x20
> ? insn_get_modrm+0x107/0x6e0
> scx_error_irq_workfn+0xeb/0x120
> irq_work_single+0x113/0x260
> irq_work_run_list.part.7+0x44/0x70
> run_irq_workd+0x6b/0x90
> ? __pfx_run_irq_workd+0x10/0x10
> smpboot_thread_fn+0x34c/0xa50
> ? trace_preempt_on+0x54/0x120
> ? __pfx_smpboot_thread_fn+0x10/0x10
> kthread+0x411/0x8a0
> ? __pfx_kthread+0x10/0x10
> ? rt_spin_unlock+0x9f/0x210
> ? __pfx_kthread+0x10/0x10
> ret_from_fork+0x406/0x500
> ? __switch_to_asm+0x33/0x70
> ? __pfx_kthread+0x10/0x10
> ret_from_fork_asm+0x1a/0x30
> </TASK>
> 
> This commit therefore convert dump_lock to raw spinlock.
> 
> Signed-off-by: Zqiang <qiang.zhang@linux.dev>
> ---
>  kernel/sched/ext.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 59fa391636c7..2e19e3689f1c 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -4235,7 +4235,7 @@ static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx,
>  
>  static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
>  {
> -	static DEFINE_SPINLOCK(dump_lock);
> +	static DEFINE_RAW_SPINLOCK(dump_lock);
>  	static const char trunc_marker[] = "\n\n~~~~ TRUNCATED ~~~~\n";
>  	struct scx_sched *sch = scx_root;
>  	struct scx_dump_ctx dctx = {
> @@ -4251,7 +4251,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
>  	char *buf;
>  	int cpu;
>  
> -	spin_lock_irqsave(&dump_lock, flags);
> +	raw_spin_lock_irqsave(&dump_lock, flags);
>  
>  	seq_buf_init(&s, ei->dump, dump_len);
>  
> @@ -4375,7 +4375,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
>  		memcpy(ei->dump + dump_len - sizeof(trunc_marker),
>  		       trunc_marker, sizeof(trunc_marker));
>  
> -	spin_unlock_irqrestore(&dump_lock, flags);
> +	raw_spin_unlock_irqrestore(&dump_lock, flags);
>  }
>  
>  static void scx_error_irq_workfn(struct irq_work *irq_work)

This is already part of Tejun's cgroup sub-scheduler support series:
https://lore.kernel.org/lkml/20250920005931.2753828-39-tj@kernel.org/
Re: [PATCH] sched_ext: Fix lockdep warnings in the ops_dump_init()
Posted by Zqiang 1 week, 3 days ago
> On 12/8/25 11:23, Zqiang wrote:  
> 
> > For builts with PREEMPT\_RT=y kernel, the dump\_lock convert to  
> > sleepable rt-spinlock and not disable-irq, the following lockdep  
> > warning will appear:  
> >   
> > WARNING: CPU: 3 PID: 47 at kernel/sched/ext.c:4130 ops\_dump\_init+0x60/0xa0  
> > Modules linked in:  
> > CPU: 3 UID: 0 PID: 47 Comm: irq\_work/3  
> > RIP: 0010:ops\_dump\_init+0x60/0xa0  
> > Call Trace:  
> > <TASK>  
> > scx\_dump\_state+0x1091/0x1620  
> > ? \_\_schedule+0xf1c/0x3ec0  
> > ? rcu\_is\_watching+0x19/0xb0  
> > ? \_\_pfx\_scx\_dump\_state+0x10/0x10  
> > ? kvm\_sched\_clock\_read+0x15/0x30  
> > ? \_\_lock\_acquire+0x98b/0x1ae0  
> > ? \_\_this\_cpu\_preempt\_check+0x17/0x20  
> > ? insn\_get\_modrm+0x107/0x6e0  
> > scx\_error\_irq\_workfn+0xeb/0x120  
> > irq\_work\_single+0x113/0x260  
> > irq\_work\_run\_list.part.7+0x44/0x70  
> > run\_irq\_workd+0x6b/0x90  
> > ? \_\_pfx\_run\_irq\_workd+0x10/0x10  
> > smpboot\_thread\_fn+0x34c/0xa50  
> > ? trace\_preempt\_on+0x54/0x120  
> > ? \_\_pfx\_smpboot\_thread\_fn+0x10/0x10  
> > kthread+0x411/0x8a0  
> > ? \_\_pfx\_kthread+0x10/0x10  
> > ? rt\_spin\_unlock+0x9f/0x210  
> > ? \_\_pfx\_kthread+0x10/0x10  
> > ret\_from\_fork+0x406/0x500  
> > ? \_\_switch\_to\_asm+0x33/0x70  
> > ? \_\_pfx\_kthread+0x10/0x10  
> > ret\_from\_fork\_asm+0x1a/0x30  
> > </TASK>  
> >   
> > This commit therefore convert dump\_lock to raw spinlock.  
> >   
> > Signed-off-by: Zqiang <qiang.zhang@linux.dev>  
> > \---  
> > kernel/sched/ext.c | 6 +++---  
> > 1 file changed, 3 insertions(+), 3 deletions(-)  
> >   
> > diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c  
> > index 59fa391636c7..2e19e3689f1c 100644  
> > \--- a/kernel/sched/ext.c  
> > +++ b/kernel/sched/ext.c  
> > @@ -4235,7 +4235,7 @@ static void scx\_dump\_task(struct seq\_buf \*s, struct scx\_dump\_ctx \*dctx,  
> >   
> > static void scx\_dump\_state(struct scx\_exit\_info \*ei, size\_t dump\_len)  
> > {  
> > \- static DEFINE\_SPINLOCK(dump\_lock);  
> > \+ static DEFINE\_RAW\_SPINLOCK(dump\_lock);  
> > static const char trunc\_marker\[\] = "\\n\\n~~~~ TRUNCATED ~~~~\\n";  
> > struct scx\_sched \*sch = scx\_root;  
> > struct scx\_dump\_ctx dctx = {  
> > @@ -4251,7 +4251,7 @@ static void scx\_dump\_state(struct scx\_exit\_info \*ei, size\_t dump\_len)  
> > char \*buf;  
> > int cpu;  
> >   
> > \- spin\_lock\_irqsave(&dump\_lock, flags);  
> > \+ raw\_spin\_lock\_irqsave(&dump\_lock, flags);  
> >   
> > seq\_buf\_init(&s, ei->dump, dump\_len);  
> >   
> > @@ -4375,7 +4375,7 @@ static void scx\_dump\_state(struct scx\_exit\_info \*ei, size\_t dump\_len)  
> > memcpy(ei->dump + dump\_len - sizeof(trunc\_marker),  
> > trunc\_marker, sizeof(trunc\_marker));  
> >   
> > \- spin\_unlock\_irqrestore(&dump\_lock, flags);  
> > \+ raw\_spin\_unlock\_irqrestore(&dump\_lock, flags);  
> > }  
> >   
> > static void scx\_error\_irq\_workfn(struct irq\_work \*irq\_work)
> 
>   
> This is already part of Tejun's cgroup sub-scheduler support series:  
> [https://lore.kernel.org/lkml/20250920005931.2753828-39-tj@kernel.org/](https://lore.kernel.org/lkml/20250920005931.2753828-39-tj@kernel.org/)


Thank you for pointing it :),  please this patch.

Thanks
Zqiang