kernel/sched/core.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-)
Commit 851a723e45d1 ("sched: Always clear user_cpus_ptr in
do_set_cpus_allowed()") may call kfree() if user_cpus_ptr was previously
set. Unfortunately, some of the callers of do_set_cpus_allowed()
may have pi_lock held when calling it. So the following splats may be
printed especially when running with a PREEMPT_RT kernel:
WARNING: possible circular locking dependency detected
BUG: sleeping function called from invalid context
To avoid these problems, kfree_rcu() is used instead. An internal
cpumask_rcuhead union is created for the sole purpose of facilitating
the use of kfree_rcu() to free the cpumask.
Fixes: 851a723e45d1 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()")
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Waiman Long <longman@redhat.com>
---
kernel/sched/core.c | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
[v3: Fix build problem reported by kernel test robot]
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 57e5932f81a9..155b6cfe119a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2604,9 +2604,19 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
.user_mask = NULL,
.flags = SCA_USER, /* clear the user requested mask */
};
+ union cpumask_rcuhead {
+ cpumask_t cpumask;
+ struct rcu_head rcu;
+ };
__do_set_cpus_allowed(p, &ac);
- kfree(ac.user_mask);
+
+ /*
+ * Because this is called with p->pi_lock held, it is not possible
+ * to use kfree() here (when PREEMPT_RT=y), therefore punt to using
+ * kfree_rcu().
+ */
+ kfree_rcu((union cpumask_rcuhead *)ac.user_mask, rcu);
}
int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src,
@@ -8220,7 +8230,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
struct affinity_context ac;
struct cpumask *user_mask;
struct task_struct *p;
- int retval;
+ int retval, size;
rcu_read_lock();
@@ -8253,7 +8263,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
if (retval)
goto out_put_task;
- user_mask = kmalloc(cpumask_size(), GFP_KERNEL);
+ /*
+ * See do_set_cpus_allowed() for the rcu_head usage.
+ */
+ size = max_t(int, cpumask_size(), sizeof(struct rcu_head));
+ user_mask = kmalloc(size, GFP_KERNEL);
if (!user_mask) {
retval = -ENOMEM;
goto out_put_task;
--
2.31.1
On Mon, Dec 05, 2022 at 11:39:36AM -0500, Waiman Long wrote: > Commit 851a723e45d1 ("sched: Always clear user_cpus_ptr in > do_set_cpus_allowed()") may call kfree() if user_cpus_ptr was previously > set. Unfortunately, some of the callers of do_set_cpus_allowed() > may have pi_lock held when calling it. So the following splats may be > printed especially when running with a PREEMPT_RT kernel: > > WARNING: possible circular locking dependency detected > BUG: sleeping function called from invalid context > > To avoid these problems, kfree_rcu() is used instead. An internal > cpumask_rcuhead union is created for the sole purpose of facilitating > the use of kfree_rcu() to free the cpumask. > > Fixes: 851a723e45d1 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()") > Suggested-by: Peter Zijlstra <peterz@infradead.org> > Signed-off-by: Waiman Long <longman@redhat.com> > --- > kernel/sched/core.c | 20 +++++++++++++++++--- > 1 file changed, 17 insertions(+), 3 deletions(-) > > [v3: Fix build problem reported by kernel test robot] > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > index 57e5932f81a9..155b6cfe119a 100644 > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -2604,9 +2604,19 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) > .user_mask = NULL, > .flags = SCA_USER, /* clear the user requested mask */ > }; > + union cpumask_rcuhead { > + cpumask_t cpumask; > + struct rcu_head rcu; > + }; > > __do_set_cpus_allowed(p, &ac); > - kfree(ac.user_mask); > + > + /* > + * Because this is called with p->pi_lock held, it is not possible > + * to use kfree() here (when PREEMPT_RT=y), therefore punt to using > + * kfree_rcu(). > + */ > + kfree_rcu((union cpumask_rcuhead *)ac.user_mask, rcu); > } > > int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, > @@ -8220,7 +8230,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) > struct affinity_context ac; > struct cpumask *user_mask; > struct task_struct *p; > - int retval; > + int retval, size; > > rcu_read_lock(); > > @@ -8253,7 +8263,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) > if (retval) > goto out_put_task; > > - user_mask = kmalloc(cpumask_size(), GFP_KERNEL); > + /* > + * See do_set_cpus_allowed() for the rcu_head usage. > + */ > + size = max_t(int, cpumask_size(), sizeof(struct rcu_head)); > + user_mask = kmalloc(size, GFP_KERNEL); > if (!user_mask) { > retval = -ENOMEM; > goto out_put_task; AFAICT you forgot dup_user_cpus_ptr().
On 12/22/22 14:32, Peter Zijlstra wrote: > On Mon, Dec 05, 2022 at 11:39:36AM -0500, Waiman Long wrote: >> Commit 851a723e45d1 ("sched: Always clear user_cpus_ptr in >> do_set_cpus_allowed()") may call kfree() if user_cpus_ptr was previously >> set. Unfortunately, some of the callers of do_set_cpus_allowed() >> may have pi_lock held when calling it. So the following splats may be >> printed especially when running with a PREEMPT_RT kernel: >> >> WARNING: possible circular locking dependency detected >> BUG: sleeping function called from invalid context >> >> To avoid these problems, kfree_rcu() is used instead. An internal >> cpumask_rcuhead union is created for the sole purpose of facilitating >> the use of kfree_rcu() to free the cpumask. >> >> Fixes: 851a723e45d1 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()") >> Suggested-by: Peter Zijlstra <peterz@infradead.org> >> Signed-off-by: Waiman Long <longman@redhat.com> >> --- >> kernel/sched/core.c | 20 +++++++++++++++++--- >> 1 file changed, 17 insertions(+), 3 deletions(-) >> >> [v3: Fix build problem reported by kernel test robot] >> >> diff --git a/kernel/sched/core.c b/kernel/sched/core.c >> index 57e5932f81a9..155b6cfe119a 100644 >> --- a/kernel/sched/core.c >> +++ b/kernel/sched/core.c >> @@ -2604,9 +2604,19 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) >> .user_mask = NULL, >> .flags = SCA_USER, /* clear the user requested mask */ >> }; >> + union cpumask_rcuhead { >> + cpumask_t cpumask; >> + struct rcu_head rcu; >> + }; >> >> __do_set_cpus_allowed(p, &ac); >> - kfree(ac.user_mask); >> + >> + /* >> + * Because this is called with p->pi_lock held, it is not possible >> + * to use kfree() here (when PREEMPT_RT=y), therefore punt to using >> + * kfree_rcu(). >> + */ >> + kfree_rcu((union cpumask_rcuhead *)ac.user_mask, rcu); >> } >> >> int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, >> @@ -8220,7 +8230,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) >> struct affinity_context ac; >> struct cpumask *user_mask; >> struct task_struct *p; >> - int retval; >> + int retval, size; >> >> rcu_read_lock(); >> >> @@ -8253,7 +8263,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) >> if (retval) >> goto out_put_task; >> >> - user_mask = kmalloc(cpumask_size(), GFP_KERNEL); >> + /* >> + * See do_set_cpus_allowed() for the rcu_head usage. >> + */ >> + size = max_t(int, cpumask_size(), sizeof(struct rcu_head)); >> + user_mask = kmalloc(size, GFP_KERNEL); >> if (!user_mask) { >> retval = -ENOMEM; >> goto out_put_task; > AFAICT you forgot dup_user_cpus_ptr(). I haven't received any response from you for a while. So it is just a ping. Of course, I am aware that there is another dup_user_cpus_ptr() patch ouststanding. I will of course talk about that when you respond. I also have a pending rwsem patch series waiting for your review:-) Cheers, Longman
On Thu, Dec 22, 2022 at 03:18:29PM -0500, Waiman Long wrote: > > > @@ -8220,7 +8230,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) > > > struct affinity_context ac; > > > struct cpumask *user_mask; > > > struct task_struct *p; > > > - int retval; > > > + int retval, size; > > > rcu_read_lock(); > > > @@ -8253,7 +8263,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) > > > if (retval) > > > goto out_put_task; > > > - user_mask = kmalloc(cpumask_size(), GFP_KERNEL); > > > + /* > > > + * See do_set_cpus_allowed() for the rcu_head usage. > > > + */ > > > + size = max_t(int, cpumask_size(), sizeof(struct rcu_head)); > > > + user_mask = kmalloc(size, GFP_KERNEL); > > > if (!user_mask) { > > > retval = -ENOMEM; > > > goto out_put_task; > > AFAICT you forgot dup_user_cpus_ptr(). > > I haven't received any response from you for a while. So it is just a ping. I was out sick :/ > Of course, I am aware that there is another dup_user_cpus_ptr() patch > ouststanding. I will of course talk about that when you respond. I also have > a pending rwsem patch series waiting for your review:-) The point was that dup_user_cpus_ptr() also does an allocation and needs to allocate the possibly bigger size too, no?
On 12/22/22 15:25, Peter Zijlstra wrote: > On Thu, Dec 22, 2022 at 03:18:29PM -0500, Waiman Long wrote: > >>>> @@ -8220,7 +8230,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) >>>> struct affinity_context ac; >>>> struct cpumask *user_mask; >>>> struct task_struct *p; >>>> - int retval; >>>> + int retval, size; >>>> rcu_read_lock(); >>>> @@ -8253,7 +8263,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) >>>> if (retval) >>>> goto out_put_task; >>>> - user_mask = kmalloc(cpumask_size(), GFP_KERNEL); >>>> + /* >>>> + * See do_set_cpus_allowed() for the rcu_head usage. >>>> + */ >>>> + size = max_t(int, cpumask_size(), sizeof(struct rcu_head)); >>>> + user_mask = kmalloc(size, GFP_KERNEL); >>>> if (!user_mask) { >>>> retval = -ENOMEM; >>>> goto out_put_task; >>> AFAICT you forgot dup_user_cpus_ptr(). >> I haven't received any response from you for a while. So it is just a ping. > I was out sick :/ I am sorry to hear that. Hope you are all right now. > >> Of course, I am aware that there is another dup_user_cpus_ptr() patch >> ouststanding. I will of course talk about that when you respond. I also have >> a pending rwsem patch series waiting for your review:-) > The point was that dup_user_cpus_ptr() also does an allocation and needs > to allocate the possibly bigger size too, no? Oh, yes. I missed that. I will combine the two patches into a patch series. Thanks, Longman
On 12/5/22 11:39, Waiman Long wrote: > Commit 851a723e45d1 ("sched: Always clear user_cpus_ptr in > do_set_cpus_allowed()") may call kfree() if user_cpus_ptr was previously > set. Unfortunately, some of the callers of do_set_cpus_allowed() > may have pi_lock held when calling it. So the following splats may be > printed especially when running with a PREEMPT_RT kernel: > > WARNING: possible circular locking dependency detected > BUG: sleeping function called from invalid context > > To avoid these problems, kfree_rcu() is used instead. An internal > cpumask_rcuhead union is created for the sole purpose of facilitating > the use of kfree_rcu() to free the cpumask. > > Fixes: 851a723e45d1 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()") > Suggested-by: Peter Zijlstra <peterz@infradead.org> > Signed-off-by: Waiman Long <longman@redhat.com> > --- > kernel/sched/core.c | 20 +++++++++++++++++--- > 1 file changed, 17 insertions(+), 3 deletions(-) > > [v3: Fix build problem reported by kernel test robot] > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > index 57e5932f81a9..155b6cfe119a 100644 > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -2604,9 +2604,19 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) > .user_mask = NULL, > .flags = SCA_USER, /* clear the user requested mask */ > }; > + union cpumask_rcuhead { > + cpumask_t cpumask; > + struct rcu_head rcu; > + }; > > __do_set_cpus_allowed(p, &ac); > - kfree(ac.user_mask); > + > + /* > + * Because this is called with p->pi_lock held, it is not possible > + * to use kfree() here (when PREEMPT_RT=y), therefore punt to using > + * kfree_rcu(). > + */ > + kfree_rcu((union cpumask_rcuhead *)ac.user_mask, rcu); > } > > int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, > @@ -8220,7 +8230,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) > struct affinity_context ac; > struct cpumask *user_mask; > struct task_struct *p; > - int retval; > + int retval, size; > > rcu_read_lock(); > > @@ -8253,7 +8263,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) > if (retval) > goto out_put_task; > > - user_mask = kmalloc(cpumask_size(), GFP_KERNEL); > + /* > + * See do_set_cpus_allowed() for the rcu_head usage. > + */ > + size = max_t(int, cpumask_size(), sizeof(struct rcu_head)); > + user_mask = kmalloc(size, GFP_KERNEL); > if (!user_mask) { > retval = -ENOMEM; > goto out_put_task; Hi Peter, Is this patch good enough to be merged as commit 851a723e45d1 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()") is now in the Linus' tree? Thanks, Longman
© 2016 - 2025 Red Hat, Inc.