Provide scx_bpf_remote_curr() as a way for scx schedulers to
check the curr task of a remote rq, without assuming its lock
or acquiring any.
Many scx schedulers make use of scx_bpf_cpu_rq() to check a
remote curr (e.g. to see if it should be preempted). This is
problematic because scx_bpf_cpu_rq() provides access to all
fields of struct rq, most of which aren't safe to use without
holding the associated rq lock.
Signed-off-by: Christian Loehle <christian.loehle@arm.com>
---
kernel/sched/ext.c | 15 +++++++++++++++
tools/sched_ext/include/scx/common.bpf.h | 1 +
2 files changed, 16 insertions(+)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index b734f55f3318..92e66bb0b5f2 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -7436,6 +7436,20 @@ __bpf_kfunc struct rq *scx_bpf_cpu_rq(s32 cpu)
return cpu_rq(cpu);
}
+/**
+ * scx_bpf_remote_curr - Fetch the curr of a rq without acquiring its rq lock
+ * @cpu: CPU of the rq
+ *
+ * Neither a rq lock nor a task reference is acquired.
+ */
+__bpf_kfunc struct task_struct *scx_bpf_remote_curr(s32 cpu)
+{
+ if (!kf_cpu_valid(cpu, NULL))
+ return NULL;
+
+ return cpu_rq(cpu)->curr;
+}
+
/**
* scx_bpf_task_cgroup - Return the sched cgroup of a task
* @p: task of interest
@@ -7600,6 +7614,7 @@ BTF_ID_FLAGS(func, scx_bpf_put_cpumask, KF_RELEASE)
BTF_ID_FLAGS(func, scx_bpf_task_running, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_task_cpu, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_cpu_rq, KF_RET_NULL)
+BTF_ID_FLAGS(func, scx_bpf_remote_curr, KF_RET_NULL)
#ifdef CONFIG_CGROUP_SCHED
BTF_ID_FLAGS(func, scx_bpf_task_cgroup, KF_RCU | KF_ACQUIRE)
#endif
diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h
index d4e21558e982..e5d4ef124532 100644
--- a/tools/sched_ext/include/scx/common.bpf.h
+++ b/tools/sched_ext/include/scx/common.bpf.h
@@ -91,6 +91,7 @@ s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
bool scx_bpf_task_running(const struct task_struct *p) __ksym;
s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym;
+struct task_struct *scx_bpf_remote_curr(s32 cpu) __ksym;
struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak;
u64 scx_bpf_now(void) __ksym __weak;
void scx_bpf_events(struct scx_event_stats *events, size_t events__sz) __ksym __weak;
--
2.34.1
On Fri, Aug 01, 2025 at 03:17:40PM +0100, Christian Loehle wrote: > Provide scx_bpf_remote_curr() as a way for scx schedulers to > check the curr task of a remote rq, without assuming its lock > or acquiring any. > > Many scx schedulers make use of scx_bpf_cpu_rq() to check a > remote curr (e.g. to see if it should be preempted). This is > problematic because scx_bpf_cpu_rq() provides access to all > fields of struct rq, most of which aren't safe to use without > holding the associated rq lock. > > Signed-off-by: Christian Loehle <christian.loehle@arm.com> > --- > kernel/sched/ext.c | 15 +++++++++++++++ > tools/sched_ext/include/scx/common.bpf.h | 1 + > 2 files changed, 16 insertions(+) > > diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c > index b734f55f3318..92e66bb0b5f2 100644 > --- a/kernel/sched/ext.c > +++ b/kernel/sched/ext.c > @@ -7436,6 +7436,20 @@ __bpf_kfunc struct rq *scx_bpf_cpu_rq(s32 cpu) > return cpu_rq(cpu); > } > > +/** > + * scx_bpf_remote_curr - Fetch the curr of a rq without acquiring its rq lock > + * @cpu: CPU of the rq > + * > + * Neither a rq lock nor a task reference is acquired. > + */ > +__bpf_kfunc struct task_struct *scx_bpf_remote_curr(s32 cpu) > +{ > + if (!kf_cpu_valid(cpu, NULL)) > + return NULL; > + > + return cpu_rq(cpu)->curr; > +} As mentioned in my previou comment, this should be something like: if (!kf_cpu_valid(cpu, NULL)) return NULL; rcu_read_lock(); p = cpu_rq(cpu)->curr; if (p) p = bpf_task_acquire(p); rcu_read_unlock(); return p; We may still race with CPU hotplugging, but I think it's not always possible to use cpus_read_lock/unlock() here. Also, most of the scx schedulers are restarted on CPU hotplugging events, so... one thing at a time. :) > + > /** > * scx_bpf_task_cgroup - Return the sched cgroup of a task > * @p: task of interest > @@ -7600,6 +7614,7 @@ BTF_ID_FLAGS(func, scx_bpf_put_cpumask, KF_RELEASE) > BTF_ID_FLAGS(func, scx_bpf_task_running, KF_RCU) > BTF_ID_FLAGS(func, scx_bpf_task_cpu, KF_RCU) > BTF_ID_FLAGS(func, scx_bpf_cpu_rq, KF_RET_NULL) > +BTF_ID_FLAGS(func, scx_bpf_remote_curr, KF_RET_NULL) > #ifdef CONFIG_CGROUP_SCHED > BTF_ID_FLAGS(func, scx_bpf_task_cgroup, KF_RCU | KF_ACQUIRE) > #endif > diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h > index d4e21558e982..e5d4ef124532 100644 > --- a/tools/sched_ext/include/scx/common.bpf.h > +++ b/tools/sched_ext/include/scx/common.bpf.h > @@ -91,6 +91,7 @@ s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; > bool scx_bpf_task_running(const struct task_struct *p) __ksym; > s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym; > struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym; > +struct task_struct *scx_bpf_remote_curr(s32 cpu) __ksym; > struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak; > u64 scx_bpf_now(void) __ksym __weak; > void scx_bpf_events(struct scx_event_stats *events, size_t events__sz) __ksym __weak; > -- > 2.34.1 > -Andrea
On 8/1/25 15:47, Andrea Righi wrote: > On Fri, Aug 01, 2025 at 03:17:40PM +0100, Christian Loehle wrote: >> Provide scx_bpf_remote_curr() as a way for scx schedulers to >> check the curr task of a remote rq, without assuming its lock >> or acquiring any. >> >> Many scx schedulers make use of scx_bpf_cpu_rq() to check a >> remote curr (e.g. to see if it should be preempted). This is >> problematic because scx_bpf_cpu_rq() provides access to all >> fields of struct rq, most of which aren't safe to use without >> holding the associated rq lock. >> >> Signed-off-by: Christian Loehle <christian.loehle@arm.com> >> --- >> kernel/sched/ext.c | 15 +++++++++++++++ >> tools/sched_ext/include/scx/common.bpf. | 1 + >> 2 files changed, 16 insertions(+) >> >> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c >> index b734f55f3318..92e66bb0b5f2 100644 >> --- a/kernel/sched/ext.c >> +++ b/kernel/sched/ext.c >> @@ -7436,6 +7436,20 @@ __bpf_kfunc struct rq *scx_bpf_cpu_rq(s32 cpu) >> return cpu_rq(cpu); >> } >> >> +/** >> + * scx_bpf_remote_curr - Fetch the curr of a rq without acquiring its rq lock >> + * @cpu: CPU of the rq >> + * >> + * Neither a rq lock nor a task reference is acquired. >> + */ >> +__bpf_kfunc struct task_struct *scx_bpf_remote_curr(s32 cpu) >> +{ >> + if (!kf_cpu_valid(cpu, NULL)) >> + return NULL; >> + >> + return cpu_rq(cpu)->curr; >> +} > > As mentioned in my previou comment, this should be something like: > > if (!kf_cpu_valid(cpu, NULL)) > return NULL; > > rcu_read_lock(); > p = cpu_rq(cpu)->curr; > if (p) > p = bpf_task_acquire(p); > rcu_read_unlock(); > > return p; Alright, that's actually what I had at first, but went with the drop-in-replacement that doesn't acquire. I'll resend with an acquire. Thanks, Andrea! > > We may still race with CPU hotplugging, but I think it's not always > possible to use cpus_read_lock/unlock() here. Also, most of the scx > schedulers are restarted on CPU hotplugging events, so... one thing at a > time. :) > >> + >> /** >> * scx_bpf_task_cgroup - Return the sched cgroup of a task >> * @p: task of interest >> @@ -7600,6 +7614,7 @@ BTF_ID_FLAGS(func, scx_bpf_put_cpumask, KF_RELEASE) >> BTF_ID_FLAGS(func, scx_bpf_task_running, KF_RCU) >> BTF_ID_FLAGS(func, scx_bpf_task_cpu, KF_RCU) >> BTF_ID_FLAGS(func, scx_bpf_cpu_rq, KF_RET_NULL) >> +BTF_ID_FLAGS(func, scx_bpf_remote_curr, KF_RET_NULL) >> #ifdef CONFIG_CGROUP_SCHED >> BTF_ID_FLAGS(func, scx_bpf_task_cgroup, KF_RCU | KF_ACQUIRE) >> #endif >> diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h >> index d4e21558e982..e5d4ef124532 100644 >> --- a/tools/sched_ext/include/scx/common.bpf.h >> +++ b/tools/sched_ext/include/scx/common.bpf.h >> @@ -91,6 +91,7 @@ s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; >> bool scx_bpf_task_running(const struct task_struct *p) __ksym; >> s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym; >> struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym; >> +struct task_struct *scx_bpf_remote_curr(s32 cpu) __ksym; >> struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak; >> u64 scx_bpf_now(void) __ksym __weak; >> void scx_bpf_events(struct scx_event_stats *events, size_t events__sz) __ksym __weak; >> -- >> 2.34.1 >> > > -Andrea
© 2016 - 2025 Red Hat, Inc.