kernel/sched/ext.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-)
When the function consume_dispatch_q() returns true, the dsq lock may
remains held and is not unlocked.
Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com>
---
kernel/sched/ext.c | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 410a4df8a121..4d80aa3de00e 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2377,7 +2377,8 @@ static inline bool consume_remote_task(struct rq *this_rq, struct task_struct *p
static bool consume_dispatch_q(struct rq *rq, struct scx_dispatch_q *dsq)
{
struct task_struct *p;
-retry:
+ bool ret = false;
+
/*
* The caller can't expect to successfully consume a task if the task's
* addition to @dsq isn't guaranteed to be visible somehow. Test
@@ -2394,19 +2395,20 @@ static bool consume_dispatch_q(struct rq *rq, struct scx_dispatch_q *dsq)
if (rq == task_rq) {
task_unlink_from_dsq(p, dsq);
move_local_task_to_local_dsq(p, 0, dsq, rq);
- raw_spin_unlock(&dsq->lock);
- return true;
+ ret = true;
+ break;
}
if (task_can_run_on_remote_rq(p, rq, false)) {
- if (likely(consume_remote_task(rq, p, dsq, task_rq)))
- return true;
- goto retry;
+ if (likely(consume_remote_task(rq, p, dsq, task_rq))) {
+ ret = true;
+ break;
+ }
}
}
raw_spin_unlock(&dsq->lock);
- return false;
+ return ret;
}
static bool consume_global_dsq(struct rq *rq)
--
2.33.0
Hello, On Tue, Oct 15, 2024 at 10:29:17PM +0800, Zhang Qiao wrote: > When the function consume_dispatch_q() returns true, the dsq lock may > remains held and is not unlocked. > > Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com> > --- > kernel/sched/ext.c | 16 +++++++++------- > 1 file changed, 9 insertions(+), 7 deletions(-) > > diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c > index 410a4df8a121..4d80aa3de00e 100644 > --- a/kernel/sched/ext.c > +++ b/kernel/sched/ext.c > @@ -2377,7 +2377,8 @@ static inline bool consume_remote_task(struct rq *this_rq, struct task_struct *p > static bool consume_dispatch_q(struct rq *rq, struct scx_dispatch_q *dsq) > { > struct task_struct *p; > -retry: > + bool ret = false; > + > /* > * The caller can't expect to successfully consume a task if the task's > * addition to @dsq isn't guaranteed to be visible somehow. Test > @@ -2394,19 +2395,20 @@ static bool consume_dispatch_q(struct rq *rq, struct scx_dispatch_q *dsq) > if (rq == task_rq) { > task_unlink_from_dsq(p, dsq); > move_local_task_to_local_dsq(p, 0, dsq, rq); > - raw_spin_unlock(&dsq->lock); > - return true; > + ret = true; > + break; > } > > if (task_can_run_on_remote_rq(p, rq, false)) { > - if (likely(consume_remote_task(rq, p, dsq, task_rq))) > - return true; > - goto retry; > + if (likely(consume_remote_task(rq, p, dsq, task_rq))) { > + ret = true; > + break; > + } > } > } > > raw_spin_unlock(&dsq->lock); > - return false; > + return ret; Has this change been tested at all? There's quite a bit of lock dancing happening in the remote consumption path because the task needs to get hot-migrated to the local CPU - consume_remote_task() calls unlink_dsq_and_lock_src_rq() which drops the DSQ lock among other things. Thanks. -- tejun
hello 在 2024/10/16 4:12, Tejun Heo 写道: > Hello, > > On Tue, Oct 15, 2024 at 10:29:17PM +0800, Zhang Qiao wrote: >> When the function consume_dispatch_q() returns true, the dsq lock may >> remains held and is not unlocked. >> >> Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com> >> --- >> kernel/sched/ext.c | 16 +++++++++------- >> 1 file changed, 9 insertions(+), 7 deletions(-) >> >> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c >> index 410a4df8a121..4d80aa3de00e 100644 >> --- a/kernel/sched/ext.c >> +++ b/kernel/sched/ext.c >> @@ -2377,7 +2377,8 @@ static inline bool consume_remote_task(struct rq *this_rq, struct task_struct *p >> static bool consume_dispatch_q(struct rq *rq, struct scx_dispatch_q *dsq) >> { >> struct task_struct *p; >> -retry: >> + bool ret = false; >> + >> /* >> * The caller can't expect to successfully consume a task if the task's >> * addition to @dsq isn't guaranteed to be visible somehow. Test >> @@ -2394,19 +2395,20 @@ static bool consume_dispatch_q(struct rq *rq, struct scx_dispatch_q *dsq) >> if (rq == task_rq) { >> task_unlink_from_dsq(p, dsq); >> move_local_task_to_local_dsq(p, 0, dsq, rq); >> - raw_spin_unlock(&dsq->lock); >> - return true; >> + ret = true; >> + break; >> } >> >> if (task_can_run_on_remote_rq(p, rq, false)) { >> - if (likely(consume_remote_task(rq, p, dsq, task_rq))) >> - return true; >> - goto retry; >> + if (likely(consume_remote_task(rq, p, dsq, task_rq))) { >> + ret = true; >> + break; >> + } >> } >> } >> >> raw_spin_unlock(&dsq->lock); >> - return false; >> + return ret; > > Has this change been tested at all? There's quite a bit of lock dancing I tested this patch using scx_simple scheduler. > happening in the remote consumption path because the task needs to get > hot-migrated to the local CPU - consume_remote_task() calls > unlink_dsq_and_lock_src_rq() which drops the DSQ lock among other things. Yes, i missed it. Thanks. > > Thanks. >
© 2016 - 2024 Red Hat, Inc.