From: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
Under Clang 17/18, when running scx_sdt scheduler, it fails with:
libbpf: prog 'sdt_init_task': BPF program load failed: -EACCES
libbpf: prog 'sdt_init_task': -- BEGIN PROG LOAD LOG --
...
; desc = desc_find_empty(alloc->root, &idx); @ scx_sdt.bpf.c:479
43: (79) r8 = *(u64 *)(r6 +32) ; frame1: R6=map_value(map=scx_sdt.bss,ks=4,vs=200,off=120) R8=scalar()
; for (level = zero; level < SDT_TASK_LEVELS && can_loop; level++) { @ scx_sdt.bpf.c:407
44: (e5) may_goto pc+51
; idx |= pos; @ scx_sdt.bpf.c:418
96: (bf) r7 = r2 ; frame1: R2=0 R7=0
97: (bf) r1 = r10 ; frame1: R1=fp0 R10=fp0
; @ scx_sdt.bpf.c:0
98: (07) r1 += -56 ; frame1: R1=fp-56
; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
99: (b4) w2 = 0 ; frame1: R2=0
100: (b4) w3 = 3 ; frame1: R3=3
101: (85) call bpf_iter_num_new#82234 ; frame1: R0=scalar() fp-56=iter_num(ref_id=2,state=active,depth=0)
102: (18) r9 = 0x1ffffffffffffff8 ; frame1: R9=0x1ffffffffffffff8
104: (bf) r1 = r10 ; frame1: R1=fp0 R10=fp0
; @ scx_sdt.bpf.c:0
105: (07) r1 += -56 ; frame1: R1=fp-56
; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
106: (85) call bpf_iter_num_next#82235 ; frame1: R0=0 fp-56=iter_num(ref_id=2,state=drained,depth=0)
107: (15) if r0 == 0x0 goto pc+29 ; frame1: R0=0
; if (tmp->nr_free > 0) @ scx_sdt.bpf.c:456
137: (bf) r1 = r10 ; frame1: R1=fp0 R10=fp0
; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
138: (07) r1 += -56 ; frame1: R1=fp-56
139: (85) call bpf_iter_num_destroy#82232 ; frame1:
140: (b7) r9 = 0 ; frame1: R9=0
; if (unlikely(desc == NULL)) { @ scx_sdt.bpf.c:480
141: (15) if r8 == 0x0 goto pc+15 ; frame1: R8=scalar(umin=1)
; chunk = desc->chunk; @ scx_sdt.bpf.c:485
142: (79) r4 = *(u64 *)(r8 +72)
R8 invalid mem access 'scalar'
The reason is these older compilers lacks native support for
__BPF_FEATURE_ADDR_SPACE_CAST, __arena macro is defined as empty.
Fix it by adding cast_kern when dereferencing variables with __arena tag.
Signed-off-by: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
---
tools/sched_ext/scx_sdt.bpf.c | 20 +++++++++++++++++++-
1 file changed, 19 insertions(+), 1 deletion(-)
diff --git a/tools/sched_ext/scx_sdt.bpf.c b/tools/sched_ext/scx_sdt.bpf.c
index 31b09958e8d5..caacc55bd7a5 100644
--- a/tools/sched_ext/scx_sdt.bpf.c
+++ b/tools/sched_ext/scx_sdt.bpf.c
@@ -148,6 +148,7 @@ static sdt_desc_t *scx_alloc_chunk(void)
out = desc;
+ cast_kern(desc);
desc->nr_free = SDT_TASK_ENTS_PER_CHUNK;
desc->chunk = chunk;
@@ -244,6 +245,7 @@ int mark_nodes_avail(sdt_desc_t *lv_desc[SDT_TASK_LEVELS], __u64 lv_pos[SDT_TASK
/* Only propagate upwards if we are the parent's only free chunk. */
desc = lv_desc[level];
+ cast_kern(desc);
ret = set_idx_state(desc, lv_pos[level], false);
if (unlikely(ret != 0))
return ret;
@@ -298,20 +300,26 @@ int scx_alloc_free_idx(struct scx_allocator *alloc, __u64 idx)
if (level == SDT_TASK_LEVELS - 1)
break;
+ cast_kern(desc);
chunk = desc->chunk;
+ cast_kern(chunk);
desc_children = (sdt_desc_t * __arena *)chunk->descs;
+ cast_kern(desc_children);
desc = desc_children[pos];
if (unlikely(!desc))
return -EINVAL;
}
+ cast_kern(desc);
chunk = desc->chunk;
pos = idx & mask;
+ cast_kern(chunk);
data = chunk->data[pos];
if (likely(data)) {
+ cast_kern(data);
*data = (struct sdt_data) {
.tid.genn = data->tid.genn + 1,
};
@@ -378,6 +386,7 @@ __u64 chunk_find_empty(sdt_desc_t __arg_arena *desc)
__u64 freeslots;
__u64 i;
+ cast_kern(desc);
for (i = 0; i < SDT_TASK_CHUNK_BITMAP_U64S; i++) {
freeslots = ~desc->allocated[i];
if (freeslots == (__u64)0)
@@ -426,9 +435,12 @@ static sdt_desc_t * desc_find_empty(sdt_desc_t *desc, __u64 *idxp)
break;
/* Allocate an internal node if necessary. */
+ cast_kern(desc);
chunk = desc->chunk;
+ cast_kern(chunk);
desc_children = (sdt_desc_t * __arena *)chunk->descs;
+ cast_kern(desc_children);
desc = desc_children[pos];
if (!desc) {
desc = scx_alloc_chunk();
@@ -448,6 +460,7 @@ static sdt_desc_t * desc_find_empty(sdt_desc_t *desc, __u64 *idxp)
level = SDT_TASK_LEVELS - 1 - u;
tmp = lv_desc[level];
+ cast_kern(tmp);
ret = set_idx_state(tmp, lv_pos[level], true);
if (ret != 0)
break;
@@ -482,10 +495,12 @@ void __arena *scx_alloc(struct scx_allocator *alloc)
return NULL;
}
+ cast_kern(desc);
chunk = desc->chunk;
/* Populate the leaf node if necessary. */
pos = idx & (SDT_TASK_ENTS_PER_CHUNK - 1);
+ cast_kern(chunk);
data = chunk->data[pos];
if (!data) {
data = scx_alloc_from_pool(&alloc->pool);
@@ -503,10 +518,12 @@ void __arena *scx_alloc(struct scx_allocator *alloc)
alloc_stats.alloc_ops += 1;
alloc_stats.active_allocs += 1;
+ cast_kern(data);
data->tid.idx = idx;
bpf_spin_unlock(&alloc_lock);
+ cast_user(data);
return data;
}
@@ -544,9 +561,10 @@ void __arena *scx_task_alloc(struct task_struct *p)
if (unlikely(!data))
return NULL;
+ mval->data = data;
+ cast_kern(data);
mval->tid = data->tid;
mval->tptr = (__u64) p;
- mval->data = data;
return (void __arena *)data->payload;
}
--
2.43.0
On Sun Mar 8, 2026 at 10:28 PM EDT, Zhao Mengmeng wrote:
> From: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
>
> Under Clang 17/18, when running scx_sdt scheduler, it fails with:
>
> libbpf: prog 'sdt_init_task': BPF program load failed: -EACCES
> libbpf: prog 'sdt_init_task': -- BEGIN PROG LOAD LOG --
> ...
> ; desc = desc_find_empty(alloc->root, &idx); @ scx_sdt.bpf.c:479
> 43: (79) r8 = *(u64 *)(r6 +32) ; frame1: R6=map_value(map=scx_sdt.bss,ks=4,vs=200,off=120) R8=scalar()
> ; for (level = zero; level < SDT_TASK_LEVELS && can_loop; level++) { @ scx_sdt.bpf.c:407
> 44: (e5) may_goto pc+51
> ; idx |= pos; @ scx_sdt.bpf.c:418
> 96: (bf) r7 = r2 ; frame1: R2=0 R7=0
> 97: (bf) r1 = r10 ; frame1: R1=fp0 R10=fp0
> ; @ scx_sdt.bpf.c:0
> 98: (07) r1 += -56 ; frame1: R1=fp-56
> ; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
> 99: (b4) w2 = 0 ; frame1: R2=0
> 100: (b4) w3 = 3 ; frame1: R3=3
> 101: (85) call bpf_iter_num_new#82234 ; frame1: R0=scalar() fp-56=iter_num(ref_id=2,state=active,depth=0)
> 102: (18) r9 = 0x1ffffffffffffff8 ; frame1: R9=0x1ffffffffffffff8
> 104: (bf) r1 = r10 ; frame1: R1=fp0 R10=fp0
> ; @ scx_sdt.bpf.c:0
> 105: (07) r1 += -56 ; frame1: R1=fp-56
> ; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
> 106: (85) call bpf_iter_num_next#82235 ; frame1: R0=0 fp-56=iter_num(ref_id=2,state=drained,depth=0)
> 107: (15) if r0 == 0x0 goto pc+29 ; frame1: R0=0
> ; if (tmp->nr_free > 0) @ scx_sdt.bpf.c:456
> 137: (bf) r1 = r10 ; frame1: R1=fp0 R10=fp0
> ; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
> 138: (07) r1 += -56 ; frame1: R1=fp-56
> 139: (85) call bpf_iter_num_destroy#82232 ; frame1:
> 140: (b7) r9 = 0 ; frame1: R9=0
> ; if (unlikely(desc == NULL)) { @ scx_sdt.bpf.c:480
> 141: (15) if r8 == 0x0 goto pc+15 ; frame1: R8=scalar(umin=1)
> ; chunk = desc->chunk; @ scx_sdt.bpf.c:485
> 142: (79) r4 = *(u64 *)(r8 +72)
> R8 invalid mem access 'scalar'
>
> The reason is these older compilers lacks native support for
> __BPF_FEATURE_ADDR_SPACE_CAST, __arena macro is defined as empty.
>
> Fix it by adding cast_kern when dereferencing variables with __arena tag.
>
I am not sure if we want to support older Clang versions at this point.
This issue is fixed for Clang 19, and adding the macros back in makes it
confusing for those who use the code as a starting point. And while
it would be nice to support older Clang versions, we already don't
handle Clang 15/16 that don't have arena support. So it's not
unreasonable if we say Clang 17/18 are also incompatible with this
example.
On the other hand, maybe the extra compatibility is worth re-adding
cast_kern/cast_user to the code. I am slightly in favor of keeping it
as-is to avoid churn, but can easily see why we'd go the other way.
@htejun WDYT?
> Signed-off-by: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
> ---
> tools/sched_ext/scx_sdt.bpf.c | 20 +++++++++++++++++++-
> 1 file changed, 19 insertions(+), 1 deletion(-)
>
> diff --git a/tools/sched_ext/scx_sdt.bpf.c b/tools/sched_ext/scx_sdt.bpf.c
> index 31b09958e8d5..caacc55bd7a5 100644
> --- a/tools/sched_ext/scx_sdt.bpf.c
> +++ b/tools/sched_ext/scx_sdt.bpf.c
> @@ -148,6 +148,7 @@ static sdt_desc_t *scx_alloc_chunk(void)
>
> out = desc;
>
> + cast_kern(desc);
> desc->nr_free = SDT_TASK_ENTS_PER_CHUNK;
> desc->chunk = chunk;
>
> @@ -244,6 +245,7 @@ int mark_nodes_avail(sdt_desc_t *lv_desc[SDT_TASK_LEVELS], __u64 lv_pos[SDT_TASK
> /* Only propagate upwards if we are the parent's only free chunk. */
> desc = lv_desc[level];
>
> + cast_kern(desc);
> ret = set_idx_state(desc, lv_pos[level], false);
> if (unlikely(ret != 0))
> return ret;
> @@ -298,20 +300,26 @@ int scx_alloc_free_idx(struct scx_allocator *alloc, __u64 idx)
> if (level == SDT_TASK_LEVELS - 1)
> break;
>
> + cast_kern(desc);
> chunk = desc->chunk;
>
> + cast_kern(chunk);
> desc_children = (sdt_desc_t * __arena *)chunk->descs;
> + cast_kern(desc_children);
> desc = desc_children[pos];
>
> if (unlikely(!desc))
> return -EINVAL;
> }
>
> + cast_kern(desc);
> chunk = desc->chunk;
>
> pos = idx & mask;
> + cast_kern(chunk);
> data = chunk->data[pos];
> if (likely(data)) {
> + cast_kern(data);
> *data = (struct sdt_data) {
> .tid.genn = data->tid.genn + 1,
> };
> @@ -378,6 +386,7 @@ __u64 chunk_find_empty(sdt_desc_t __arg_arena *desc)
> __u64 freeslots;
> __u64 i;
>
> + cast_kern(desc);
> for (i = 0; i < SDT_TASK_CHUNK_BITMAP_U64S; i++) {
> freeslots = ~desc->allocated[i];
> if (freeslots == (__u64)0)
> @@ -426,9 +435,12 @@ static sdt_desc_t * desc_find_empty(sdt_desc_t *desc, __u64 *idxp)
> break;
>
> /* Allocate an internal node if necessary. */
> + cast_kern(desc);
> chunk = desc->chunk;
> + cast_kern(chunk);
> desc_children = (sdt_desc_t * __arena *)chunk->descs;
>
> + cast_kern(desc_children);
> desc = desc_children[pos];
> if (!desc) {
> desc = scx_alloc_chunk();
> @@ -448,6 +460,7 @@ static sdt_desc_t * desc_find_empty(sdt_desc_t *desc, __u64 *idxp)
> level = SDT_TASK_LEVELS - 1 - u;
> tmp = lv_desc[level];
>
> + cast_kern(tmp);
> ret = set_idx_state(tmp, lv_pos[level], true);
> if (ret != 0)
> break;
> @@ -482,10 +495,12 @@ void __arena *scx_alloc(struct scx_allocator *alloc)
> return NULL;
> }
>
> + cast_kern(desc);
> chunk = desc->chunk;
>
> /* Populate the leaf node if necessary. */
> pos = idx & (SDT_TASK_ENTS_PER_CHUNK - 1);
> + cast_kern(chunk);
> data = chunk->data[pos];
> if (!data) {
> data = scx_alloc_from_pool(&alloc->pool);
> @@ -503,10 +518,12 @@ void __arena *scx_alloc(struct scx_allocator *alloc)
> alloc_stats.alloc_ops += 1;
> alloc_stats.active_allocs += 1;
>
> + cast_kern(data);
> data->tid.idx = idx;
>
> bpf_spin_unlock(&alloc_lock);
>
> + cast_user(data);
> return data;
> }
>
> @@ -544,9 +561,10 @@ void __arena *scx_task_alloc(struct task_struct *p)
> if (unlikely(!data))
> return NULL;
>
> + mval->data = data;
> + cast_kern(data);
> mval->tid = data->tid;
> mval->tptr = (__u64) p;
> - mval->data = data;
>
> return (void __arena *)data->payload;
> }
On Mon, Mar 09, 2026 at 12:36:40PM -0400, Emil Tsalapatis wrote: > On the other hand, maybe the extra compatibility is worth re-adding > cast_kern/cast_user to the code. I am slightly in favor of keeping it > as-is to avoid churn, but can easily see why we'd go the other way. > > @htejun WDYT? Let's leave it as-is. The main purpose is to serve as an example for future arena users. The benefit of keeping it simple trumps over supporting older compilers. thanks. -- tejun
On March 10, 2026 12:44:56 AM GMT+08:00, Tejun Heo <tj@kernel.org> wrote: >On Mon, Mar 09, 2026 at 12:36:40PM -0400, Emil Tsalapatis wrote: >> On the other hand, maybe the extra compatibility is worth re-adding >> cast_kern/cast_user to the code. I am slightly in favor of keeping it >> as-is to avoid churn, but can easily see why we'd go the other way. >> >> @htejun WDYT? > >Let's leave it as-is. The main purpose is to serve as an example for future >arena users. The benefit of keeping it simple trumps over supporting older >compilers. > >thanks. > That's reasonable. I was hesitated to send this patch because it actually is a compiler problem, but I also has seen cast_kern/cast_user macros already defined to address the compatible problem. Shall we use __BPF_FEATURE_ADDR_SPACE_CAST to guard the code or check the llvm version before compiling these arena based schedules, or add some information on README? After all, it only demands llvm >=16 in README.md
On Mon, Mar 9, 2026 at 9:40 AM Emil Tsalapatis <emil@etsalapatis.com> wrote:
>
> On Sun Mar 8, 2026 at 10:28 PM EDT, Zhao Mengmeng wrote:
> > From: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
> >
> > Under Clang 17/18, when running scx_sdt scheduler, it fails with:
> >
> > libbpf: prog 'sdt_init_task': BPF program load failed: -EACCES
> > libbpf: prog 'sdt_init_task': -- BEGIN PROG LOAD LOG --
> > ...
> > ; desc = desc_find_empty(alloc->root, &idx); @ scx_sdt.bpf.c:479
> > 43: (79) r8 = *(u64 *)(r6 +32) ; frame1: R6=map_value(map=scx_sdt.bss,ks=4,vs=200,off=120) R8=scalar()
> > ; for (level = zero; level < SDT_TASK_LEVELS && can_loop; level++) { @ scx_sdt.bpf.c:407
> > 44: (e5) may_goto pc+51
> > ; idx |= pos; @ scx_sdt.bpf.c:418
> > 96: (bf) r7 = r2 ; frame1: R2=0 R7=0
> > 97: (bf) r1 = r10 ; frame1: R1=fp0 R10=fp0
> > ; @ scx_sdt.bpf.c:0
> > 98: (07) r1 += -56 ; frame1: R1=fp-56
> > ; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
> > 99: (b4) w2 = 0 ; frame1: R2=0
> > 100: (b4) w3 = 3 ; frame1: R3=3
> > 101: (85) call bpf_iter_num_new#82234 ; frame1: R0=scalar() fp-56=iter_num(ref_id=2,state=active,depth=0)
> > 102: (18) r9 = 0x1ffffffffffffff8 ; frame1: R9=0x1ffffffffffffff8
> > 104: (bf) r1 = r10 ; frame1: R1=fp0 R10=fp0
> > ; @ scx_sdt.bpf.c:0
> > 105: (07) r1 += -56 ; frame1: R1=fp-56
> > ; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
> > 106: (85) call bpf_iter_num_next#82235 ; frame1: R0=0 fp-56=iter_num(ref_id=2,state=drained,depth=0)
> > 107: (15) if r0 == 0x0 goto pc+29 ; frame1: R0=0
> > ; if (tmp->nr_free > 0) @ scx_sdt.bpf.c:456
> > 137: (bf) r1 = r10 ; frame1: R1=fp0 R10=fp0
> > ; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
> > 138: (07) r1 += -56 ; frame1: R1=fp-56
> > 139: (85) call bpf_iter_num_destroy#82232 ; frame1:
> > 140: (b7) r9 = 0 ; frame1: R9=0
> > ; if (unlikely(desc == NULL)) { @ scx_sdt.bpf.c:480
> > 141: (15) if r8 == 0x0 goto pc+15 ; frame1: R8=scalar(umin=1)
> > ; chunk = desc->chunk; @ scx_sdt.bpf.c:485
> > 142: (79) r4 = *(u64 *)(r8 +72)
> > R8 invalid mem access 'scalar'
> >
> > The reason is these older compilers lacks native support for
> > __BPF_FEATURE_ADDR_SPACE_CAST, __arena macro is defined as empty.
> >
> > Fix it by adding cast_kern when dereferencing variables with __arena tag.
> >
>
> I am not sure if we want to support older Clang versions at this point.
> This issue is fixed for Clang 19, and adding the macros back in makes it
> confusing for those who use the code as a starting point. And while
> it would be nice to support older Clang versions, we already don't
> handle Clang 15/16 that don't have arena support. So it's not
> unreasonable if we say Clang 17/18 are also incompatible with this
> example.
+1
People should upgrade their compilers.
© 2016 - 2026 Red Hat, Inc.