There is one struct cfs_rq and one struct se on each cpu for a taskgroup
and when allocation for tg->cfs_rq[X] failed, the already allocated
tg->cfs_rq[0]..tg->cfs_rq[X-1] should be freed. The same for tg->se.
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
---
kernel/sched/fair.c | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a80a73909dc2..0f913487928d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -12443,10 +12443,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
tg->cfs_rq = kcalloc(nr_cpu_ids, sizeof(cfs_rq), GFP_KERNEL);
if (!tg->cfs_rq)
- goto err;
+ return 0;
tg->se = kcalloc(nr_cpu_ids, sizeof(se), GFP_KERNEL);
if (!tg->se)
- goto err;
+ goto err_free_rq_pointer;
tg->shares = NICE_0_LOAD;
@@ -12456,12 +12456,12 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
GFP_KERNEL, cpu_to_node(i));
if (!cfs_rq)
- goto err;
+ goto err_free;
se = kzalloc_node(sizeof(struct sched_entity_stats),
GFP_KERNEL, cpu_to_node(i));
if (!se)
- goto err_free_rq;
+ goto err_free;
init_cfs_rq(cfs_rq);
init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
@@ -12470,9 +12470,18 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
return 1;
-err_free_rq:
- kfree(cfs_rq);
-err:
+err_free:
+ for_each_possible_cpu(i) {
+ kfree(tg->cfs_rq[i]);
+ kfree(tg->se[i]);
+
+ if (!tg->cfs_rq[i] && !tg->se[i])
+ break;
+ }
+ kfree(tg->se);
+err_free_rq_pointer:
+ kfree(tg->cfs_rq);
+
return 0;
}
--
2.41.0
On 2023-07-18 at 21:41:17 +0800, Aaron Lu wrote: > There is one struct cfs_rq and one struct se on each cpu for a taskgroup > and when allocation for tg->cfs_rq[X] failed, the already allocated > tg->cfs_rq[0]..tg->cfs_rq[X-1] should be freed. The same for tg->se. > > Signed-off-by: Aaron Lu <aaron.lu@intel.com> > --- > kernel/sched/fair.c | 23 ++++++++++++++++------- > 1 file changed, 16 insertions(+), 7 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index a80a73909dc2..0f913487928d 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -12443,10 +12443,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) > > tg->cfs_rq = kcalloc(nr_cpu_ids, sizeof(cfs_rq), GFP_KERNEL); > if (!tg->cfs_rq) > - goto err; > + return 0; > tg->se = kcalloc(nr_cpu_ids, sizeof(se), GFP_KERNEL); > if (!tg->se) > - goto err; > + goto err_free_rq_pointer; > > tg->shares = NICE_0_LOAD; > > @@ -12456,12 +12456,12 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) > cfs_rq = kzalloc_node(sizeof(struct cfs_rq), > GFP_KERNEL, cpu_to_node(i)); > if (!cfs_rq) > - goto err; > + goto err_free; > > se = kzalloc_node(sizeof(struct sched_entity_stats), > GFP_KERNEL, cpu_to_node(i)); > if (!se) > - goto err_free_rq; > + goto err_free; > > init_cfs_rq(cfs_rq); > init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); > @@ -12470,9 +12470,18 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) > > return 1; > > -err_free_rq: > - kfree(cfs_rq); > -err: > +err_free: > + for_each_possible_cpu(i) { > + kfree(tg->cfs_rq[i]); > + kfree(tg->se[i]); > + > + if (!tg->cfs_rq[i] && !tg->se[i]) > + break; > + } > + kfree(tg->se); > +err_free_rq_pointer: > + kfree(tg->cfs_rq); > + Not sure if I overlooked, if alloc_fair_sched_group() fails in sched_create_group(), would sched_free_group()->free_fair_sched_group() do the cleanup? thanks, Chenyu
On Tue, Jul 18, 2023 at 11:13:12PM +0800, Chen Yu wrote: > On 2023-07-18 at 21:41:17 +0800, Aaron Lu wrote: > > There is one struct cfs_rq and one struct se on each cpu for a taskgroup > > and when allocation for tg->cfs_rq[X] failed, the already allocated > > tg->cfs_rq[0]..tg->cfs_rq[X-1] should be freed. The same for tg->se. > > > > Signed-off-by: Aaron Lu <aaron.lu@intel.com> > > --- > > kernel/sched/fair.c | 23 ++++++++++++++++------- > > 1 file changed, 16 insertions(+), 7 deletions(-) > > > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > > index a80a73909dc2..0f913487928d 100644 > > --- a/kernel/sched/fair.c > > +++ b/kernel/sched/fair.c > > @@ -12443,10 +12443,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) > > > > tg->cfs_rq = kcalloc(nr_cpu_ids, sizeof(cfs_rq), GFP_KERNEL); > > if (!tg->cfs_rq) > > - goto err; > > + return 0; > > tg->se = kcalloc(nr_cpu_ids, sizeof(se), GFP_KERNEL); > > if (!tg->se) > > - goto err; > > + goto err_free_rq_pointer; > > > > tg->shares = NICE_0_LOAD; > > > > @@ -12456,12 +12456,12 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) > > cfs_rq = kzalloc_node(sizeof(struct cfs_rq), > > GFP_KERNEL, cpu_to_node(i)); > > if (!cfs_rq) > > - goto err; > > + goto err_free; > > > > se = kzalloc_node(sizeof(struct sched_entity_stats), > > GFP_KERNEL, cpu_to_node(i)); > > if (!se) > > - goto err_free_rq; > > + goto err_free; > > > > init_cfs_rq(cfs_rq); > > init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); > > @@ -12470,9 +12470,18 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) > > > > return 1; > > > > -err_free_rq: > > - kfree(cfs_rq); > > -err: > > +err_free: > > + for_each_possible_cpu(i) { > > + kfree(tg->cfs_rq[i]); > > + kfree(tg->se[i]); > > + > > + if (!tg->cfs_rq[i] && !tg->se[i]) > > + break; > > + } > > + kfree(tg->se); > > +err_free_rq_pointer: > > + kfree(tg->cfs_rq); > > + > Not sure if I overlooked, if alloc_fair_sched_group() fails in sched_create_group(), > would sched_free_group()->free_fair_sched_group() do the cleanup? You are right, I overlooked... Thanks for pointing this out.
On Wed, Jul 19, 2023 at 10:13:55AM +0800, Aaron Lu wrote: > On Tue, Jul 18, 2023 at 11:13:12PM +0800, Chen Yu wrote: > > On 2023-07-18 at 21:41:17 +0800, Aaron Lu wrote: > > > There is one struct cfs_rq and one struct se on each cpu for a taskgroup > > > and when allocation for tg->cfs_rq[X] failed, the already allocated > > > tg->cfs_rq[0]..tg->cfs_rq[X-1] should be freed. The same for tg->se. > > > > > > Signed-off-by: Aaron Lu <aaron.lu@intel.com> > > > --- > > > kernel/sched/fair.c | 23 ++++++++++++++++------- > > > 1 file changed, 16 insertions(+), 7 deletions(-) > > > > > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > > > index a80a73909dc2..0f913487928d 100644 > > > --- a/kernel/sched/fair.c > > > +++ b/kernel/sched/fair.c > > > @@ -12443,10 +12443,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) > > > > > > tg->cfs_rq = kcalloc(nr_cpu_ids, sizeof(cfs_rq), GFP_KERNEL); > > > if (!tg->cfs_rq) > > > - goto err; > > > + return 0; > > > tg->se = kcalloc(nr_cpu_ids, sizeof(se), GFP_KERNEL); > > > if (!tg->se) > > > - goto err; > > > + goto err_free_rq_pointer; > > > > > > tg->shares = NICE_0_LOAD; > > > > > > @@ -12456,12 +12456,12 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) > > > cfs_rq = kzalloc_node(sizeof(struct cfs_rq), > > > GFP_KERNEL, cpu_to_node(i)); > > > if (!cfs_rq) > > > - goto err; > > > + goto err_free; > > > > > > se = kzalloc_node(sizeof(struct sched_entity_stats), > > > GFP_KERNEL, cpu_to_node(i)); > > > if (!se) > > > - goto err_free_rq; > > > + goto err_free; > > > > > > init_cfs_rq(cfs_rq); > > > init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); > > > @@ -12470,9 +12470,18 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) > > > > > > return 1; > > > > > > -err_free_rq: > > > - kfree(cfs_rq); > > > -err: > > > +err_free: > > > + for_each_possible_cpu(i) { > > > + kfree(tg->cfs_rq[i]); > > > + kfree(tg->se[i]); > > > + > > > + if (!tg->cfs_rq[i] && !tg->se[i]) > > > + break; > > > + } > > > + kfree(tg->se); > > > +err_free_rq_pointer: > > > + kfree(tg->cfs_rq); > > > + > > Not sure if I overlooked, if alloc_fair_sched_group() fails in sched_create_group(), > > would sched_free_group()->free_fair_sched_group() do the cleanup? > > You are right, I overlooked... Thanks for pointing this out. While preparing v2, one thing still looks strange in the existing code: int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) { ... ... for_each_possible_cpu(i) { cfs_rq = kzalloc_node(sizeof(struct cfs_rq), GFP_KERNEL, cpu_to_node(i)); if (!cfs_rq) goto err; se = kzalloc_node(sizeof(struct sched_entity_stats), GFP_KERNEL, cpu_to_node(i)); if (!se) goto err_free_rq; ~~~~~~~~~~~~~~~~ Since free_fair_sched_group() will take care freeing these memories on error path, it looks unnecessary to do this free for a random cfs_rq here. Or do I miss something again...? init_cfs_rq(cfs_rq); init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); init_entity_runnable_average(se); } return 1; err_free_rq: kfree(cfs_rq); err: return 0; } I plan to change it to this: diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a80a73909dc2..ef2618ad26eb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -12461,7 +12461,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) se = kzalloc_node(sizeof(struct sched_entity_stats), GFP_KERNEL, cpu_to_node(i)); if (!se) - goto err_free_rq; + goto err; init_cfs_rq(cfs_rq); init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); @@ -12470,8 +12470,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) return 1; -err_free_rq: - kfree(cfs_rq); err: return 0; }
On 2023-08-02 at 15:01:05 +0800, Aaron Lu wrote: > On Wed, Jul 19, 2023 at 10:13:55AM +0800, Aaron Lu wrote: > > On Tue, Jul 18, 2023 at 11:13:12PM +0800, Chen Yu wrote: > > > On 2023-07-18 at 21:41:17 +0800, Aaron Lu wrote: > > > > There is one struct cfs_rq and one struct se on each cpu for a taskgroup > > > > and when allocation for tg->cfs_rq[X] failed, the already allocated > > > > tg->cfs_rq[0]..tg->cfs_rq[X-1] should be freed. The same for tg->se. > > > > > > > > Signed-off-by: Aaron Lu <aaron.lu@intel.com> > > > > --- > > > > kernel/sched/fair.c | 23 ++++++++++++++++------- > > > > 1 file changed, 16 insertions(+), 7 deletions(-) > > > > > > > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > > > > index a80a73909dc2..0f913487928d 100644 > > > > --- a/kernel/sched/fair.c > > > > +++ b/kernel/sched/fair.c > > > > @@ -12443,10 +12443,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) > > > > > > > > tg->cfs_rq = kcalloc(nr_cpu_ids, sizeof(cfs_rq), GFP_KERNEL); > > > > if (!tg->cfs_rq) > > > > - goto err; > > > > + return 0; > > > > tg->se = kcalloc(nr_cpu_ids, sizeof(se), GFP_KERNEL); > > > > if (!tg->se) > > > > - goto err; > > > > + goto err_free_rq_pointer; > > > > > > > > tg->shares = NICE_0_LOAD; > > > > > > > > @@ -12456,12 +12456,12 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) > > > > cfs_rq = kzalloc_node(sizeof(struct cfs_rq), > > > > GFP_KERNEL, cpu_to_node(i)); > > > > if (!cfs_rq) > > > > - goto err; > > > > + goto err_free; > > > > > > > > se = kzalloc_node(sizeof(struct sched_entity_stats), > > > > GFP_KERNEL, cpu_to_node(i)); > > > > if (!se) > > > > - goto err_free_rq; > > > > + goto err_free; > > > > > > > > init_cfs_rq(cfs_rq); > > > > init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); > > > > @@ -12470,9 +12470,18 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) > > > > > > > > return 1; > > > > > > > > -err_free_rq: > > > > - kfree(cfs_rq); > > > > -err: > > > > +err_free: > > > > + for_each_possible_cpu(i) { > > > > + kfree(tg->cfs_rq[i]); > > > > + kfree(tg->se[i]); > > > > + > > > > + if (!tg->cfs_rq[i] && !tg->se[i]) > > > > + break; > > > > + } > > > > + kfree(tg->se); > > > > +err_free_rq_pointer: > > > > + kfree(tg->cfs_rq); > > > > + > > > Not sure if I overlooked, if alloc_fair_sched_group() fails in sched_create_group(), > > > would sched_free_group()->free_fair_sched_group() do the cleanup? > > > > You are right, I overlooked... Thanks for pointing this out. > > While preparing v2, one thing still looks strange in the existing code: > > int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) > { > ... ... > > for_each_possible_cpu(i) { > cfs_rq = kzalloc_node(sizeof(struct cfs_rq), > GFP_KERNEL, cpu_to_node(i)); > if (!cfs_rq) > goto err; > > se = kzalloc_node(sizeof(struct sched_entity_stats), > GFP_KERNEL, cpu_to_node(i)); > if (!se) > goto err_free_rq; > ~~~~~~~~~~~~~~~~ > > Since free_fair_sched_group() will take care freeing these memories on > error path, it looks unnecessary to do this free for a random cfs_rq > here. Or do I miss something again...? > > init_cfs_rq(cfs_rq); > init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); > init_entity_runnable_average(se); > } > > return 1; > > err_free_rq: > kfree(cfs_rq); > err: > return 0; > } > > I plan to change it to this: > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index a80a73909dc2..ef2618ad26eb 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -12461,7 +12461,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) > se = kzalloc_node(sizeof(struct sched_entity_stats), > GFP_KERNEL, cpu_to_node(i)); > if (!se) > - goto err_free_rq; > + goto err; > > init_cfs_rq(cfs_rq); > init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); > @@ -12470,8 +12470,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) > > return 1; > > -err_free_rq: > - kfree(cfs_rq); > err: > return 0; > } It seems that the err_free_rq was introduced in Commit dfc12eb26a28 ("sched: Fix memory leak in two error corner cases") The memory leak was detected by the static tool, while that tools is unaware of free_fair_sched_group() which will take care of this. Not sure if a self-maintained function is preferred, or let other function to take care of that. For now it seems to be a duplicated free. And alloc_rt_sched_group() has the same issue. thanks, Chenyu
© 2016 - 2025 Red Hat, Inc.