kernel/sched/ext.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
From: Koba Ko <kobak@nvidia.com>
In scx_init(), two cpumask allocations are combined with || short-circuit
evaluation. If the first alloc_cpumask_var (scx_bypass_lb_donee_cpumask)
succeeds but the second (scx_bypass_lb_resched_cpumask) fails, the first
cpumask is leaked.
Split the allocations into separate checks with per-allocation error
messages and free the first cpumask when the second allocation fails.
Fixes: 95d1df610cdc7 ("sched_ext: Implement load balancer for bypass mode")
Signed-off-by: Koba Ko <kobak@nvidia.com>
---
kernel/sched/ext.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index b757b853b42bb..0648088a76f09 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -9662,9 +9662,14 @@ static int __init scx_init(void)
return ret;
}
- if (!alloc_cpumask_var(&scx_bypass_lb_donee_cpumask, GFP_KERNEL) ||
- !alloc_cpumask_var(&scx_bypass_lb_resched_cpumask, GFP_KERNEL)) {
- pr_err("sched_ext: Failed to allocate cpumasks\n");
+ if (!alloc_cpumask_var(&scx_bypass_lb_donee_cpumask, GFP_KERNEL)) {
+ pr_err("sched_ext: Failed to allocate donee cpumask\n");
+ return -ENOMEM;
+ }
+
+ if (!alloc_cpumask_var(&scx_bypass_lb_resched_cpumask, GFP_KERNEL)) {
+ pr_err("sched_ext: Failed to allocate resched cpumask\n");
+ free_cpumask_var(scx_bypass_lb_donee_cpumask);
return -ENOMEM;
}
--
2.43.0
On Thu, Apr 09, 2026 at 01:23:12AM +0800, KobaK wrote: > From: Koba Ko <kobak@nvidia.com> > > In scx_init(), two cpumask allocations are combined with || short-circuit > evaluation. If the first alloc_cpumask_var (scx_bypass_lb_donee_cpumask) > succeeds but the second (scx_bypass_lb_resched_cpumask) fails, the first > cpumask is leaked. This is an initcall. It runs once and failure is a kernel bug triggering a WARN. It won't fail and error handling is meaningless here. Once you fail one of these, the system is not in a good state as later code paths assume that these succeeded during boot. Not freeing is the least of our problems. Thanks. -- tejun
Hi Koba,
On Thu, Apr 09, 2026 at 01:23:12AM +0800, KobaK wrote:
> From: Koba Ko <kobak@nvidia.com>
>
> In scx_init(), two cpumask allocations are combined with || short-circuit
> evaluation. If the first alloc_cpumask_var (scx_bypass_lb_donee_cpumask)
> succeeds but the second (scx_bypass_lb_resched_cpumask) fails, the first
> cpumask is leaked.
>
> Split the allocations into separate checks with per-allocation error
> messages and free the first cpumask when the second allocation fails.
>
> Fixes: 95d1df610cdc7 ("sched_ext: Implement load balancer for bypass mode")
> Signed-off-by: Koba Ko <kobak@nvidia.com>
> ---
> kernel/sched/ext.c | 11 ++++++++---
> 1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index b757b853b42bb..0648088a76f09 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -9662,9 +9662,14 @@ static int __init scx_init(void)
> return ret;
> }
>
> - if (!alloc_cpumask_var(&scx_bypass_lb_donee_cpumask, GFP_KERNEL) ||
> - !alloc_cpumask_var(&scx_bypass_lb_resched_cpumask, GFP_KERNEL)) {
> - pr_err("sched_ext: Failed to allocate cpumasks\n");
> + if (!alloc_cpumask_var(&scx_bypass_lb_donee_cpumask, GFP_KERNEL)) {
> + pr_err("sched_ext: Failed to allocate donee cpumask\n");
> + return -ENOMEM;
> + }
> +
> + if (!alloc_cpumask_var(&scx_bypass_lb_resched_cpumask, GFP_KERNEL)) {
> + pr_err("sched_ext: Failed to allocate resched cpumask\n");
> + free_cpumask_var(scx_bypass_lb_donee_cpumask);
We should probably do a proper teardown of the other stuff as well at some point
(sysfs group, kset, PM notifier, etc.), but this looks good to me.
> return -ENOMEM;
> }
>
> --
> 2.43.0
>
Acked-by: Andrea Righi <arighi@nvidia.com>
Thanks,
-Andrea
Hi Andrea, Koba,
On Wed, Apr 08, 2026 at 07:34:19PM +0200, Andrea Righi wrote:
> > +++ b/kernel/sched/ext.c
> > @@ -9662,9 +9662,14 @@ static int __init scx_init(void)
> > return ret;
> > }
> >
> > - if (!alloc_cpumask_var(&scx_bypass_lb_donee_cpumask, GFP_KERNEL) ||
> > - !alloc_cpumask_var(&scx_bypass_lb_resched_cpumask, GFP_KERNEL)) {
> > - pr_err("sched_ext: Failed to allocate cpumasks\n");
> > + if (!alloc_cpumask_var(&scx_bypass_lb_donee_cpumask, GFP_KERNEL)) {
> > + pr_err("sched_ext: Failed to allocate donee cpumask\n");
> > + return -ENOMEM;
> > + }
> > +
> > + if (!alloc_cpumask_var(&scx_bypass_lb_resched_cpumask, GFP_KERNEL)) {
> > + pr_err("sched_ext: Failed to allocate resched cpumask\n");
> > + free_cpumask_var(scx_bypass_lb_donee_cpumask);
>
> We should probably do a proper teardown of the other stuff as well at some point
> (sysfs group, kset, PM notifier, etc.), but this looks good to me.
>
> > return -ENOMEM;
> > }
Should kset_unregister() also be called in these error paths?
scx_kset is allocated earlier in scx_init() and none of the failure
paths after that call kset_unregister()
scx_kset = kset_create_and_add("sched_ext", &scx_uevent_ops, kernel_kobj);
...
ret = sysfs_create_group(&scx_kset->kobj, &scx_global_attr_group);
if (ret < 0) {
pr_err("sched_ext: Failed to add global attributes\n");
+ kset_unregister(scx_kset);
return ret; /* scx_kset leaked */
}
if (!alloc_cpumask_var(&scx_bypass_lb_donee_cpumask, GFP_KERNEL)) {
pr_err("sched_ext: Failed to allocate donee cpumask\n");
+ kset_unregister(scx_kset);
return -ENOMEM; /* scx_kset leaked */
}
if (!alloc_cpumask_var(&scx_bypass_lb_resched_cpumask, GFP_KERNEL)) {
pr_err("sched_ext: Failed to allocate resched cpumask\n");
free_cpumask_var(scx_bypass_lb_donee_cpumask);
+ kset_unregister(scx_kset);
return -ENOMEM; /* scx_kset leaked */
}
--
Thanks,
Cheng-Yang
© 2016 - 2026 Red Hat, Inc.