[PATCH 3/3] tools/sched_ext: scx_sdt: Fix BPF verifier rejection on older LLVMs

Zhao Mengmeng posted 3 patches 1 month ago
There is a newer version of this series
[PATCH 3/3] tools/sched_ext: scx_sdt: Fix BPF verifier rejection on older LLVMs
Posted by Zhao Mengmeng 1 month ago
From: Zhao Mengmeng <zhaomengmeng@kylinos.cn>

Under Clang 17/18, when running scx_sdt scheduler, it fails with:

libbpf: prog 'sdt_init_task': BPF program load failed: -EACCES
libbpf: prog 'sdt_init_task': -- BEGIN PROG LOAD LOG --
...
; desc = desc_find_empty(alloc->root, &idx); @ scx_sdt.bpf.c:479
43: (79) r8 = *(u64 *)(r6 +32)        ; frame1: R6=map_value(map=scx_sdt.bss,ks=4,vs=200,off=120) R8=scalar()
; for (level = zero; level < SDT_TASK_LEVELS && can_loop; level++) { @ scx_sdt.bpf.c:407
44: (e5) may_goto pc+51
; idx |= pos; @ scx_sdt.bpf.c:418
96: (bf) r7 = r2                      ; frame1: R2=0 R7=0
97: (bf) r1 = r10                     ; frame1: R1=fp0 R10=fp0
;  @ scx_sdt.bpf.c:0
98: (07) r1 += -56                    ; frame1: R1=fp-56
; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
99: (b4) w2 = 0                       ; frame1: R2=0
100: (b4) w3 = 3                      ; frame1: R3=3
101: (85) call bpf_iter_num_new#82234         ; frame1: R0=scalar() fp-56=iter_num(ref_id=2,state=active,depth=0)
102: (18) r9 = 0x1ffffffffffffff8     ; frame1: R9=0x1ffffffffffffff8
104: (bf) r1 = r10                    ; frame1: R1=fp0 R10=fp0
;  @ scx_sdt.bpf.c:0
105: (07) r1 += -56                   ; frame1: R1=fp-56
; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
106: (85) call bpf_iter_num_next#82235        ; frame1: R0=0 fp-56=iter_num(ref_id=2,state=drained,depth=0)
107: (15) if r0 == 0x0 goto pc+29     ; frame1: R0=0
; if (tmp->nr_free > 0) @ scx_sdt.bpf.c:456
137: (bf) r1 = r10                    ; frame1: R1=fp0 R10=fp0
; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
138: (07) r1 += -56                   ; frame1: R1=fp-56
139: (85) call bpf_iter_num_destroy#82232     ; frame1:
140: (b7) r9 = 0                      ; frame1: R9=0
; if (unlikely(desc == NULL)) { @ scx_sdt.bpf.c:480
141: (15) if r8 == 0x0 goto pc+15     ; frame1: R8=scalar(umin=1)
; chunk = desc->chunk; @ scx_sdt.bpf.c:485
142: (79) r4 = *(u64 *)(r8 +72)
R8 invalid mem access 'scalar'

The reason is these older compilers lacks native support for
__BPF_FEATURE_ADDR_SPACE_CAST, __arena macro is defined as empty.

Fix it by adding cast_kern when dereferencing variables with __arena tag.

Signed-off-by: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
---
 tools/sched_ext/scx_sdt.bpf.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tools/sched_ext/scx_sdt.bpf.c b/tools/sched_ext/scx_sdt.bpf.c
index 31b09958e8d5..caacc55bd7a5 100644
--- a/tools/sched_ext/scx_sdt.bpf.c
+++ b/tools/sched_ext/scx_sdt.bpf.c
@@ -148,6 +148,7 @@ static sdt_desc_t *scx_alloc_chunk(void)
 
 	out = desc;
 
+	cast_kern(desc);
 	desc->nr_free = SDT_TASK_ENTS_PER_CHUNK;
 	desc->chunk = chunk;
 
@@ -244,6 +245,7 @@ int mark_nodes_avail(sdt_desc_t *lv_desc[SDT_TASK_LEVELS], __u64 lv_pos[SDT_TASK
 		/* Only propagate upwards if we are the parent's only free chunk. */
 		desc = lv_desc[level];
 
+		cast_kern(desc);
 		ret = set_idx_state(desc, lv_pos[level], false);
 		if (unlikely(ret != 0))
 			return ret;
@@ -298,20 +300,26 @@ int scx_alloc_free_idx(struct scx_allocator *alloc, __u64 idx)
 		if (level == SDT_TASK_LEVELS - 1)
 			break;
 
+		cast_kern(desc);
 		chunk = desc->chunk;
 
+		cast_kern(chunk);
 		desc_children = (sdt_desc_t * __arena *)chunk->descs;
+		cast_kern(desc_children);
 		desc = desc_children[pos];
 
 		if (unlikely(!desc))
 			return -EINVAL;
 	}
 
+	cast_kern(desc);
 	chunk = desc->chunk;
 
 	pos = idx & mask;
+	cast_kern(chunk);
 	data = chunk->data[pos];
 	if (likely(data)) {
+		cast_kern(data);
 		*data = (struct sdt_data) {
 			.tid.genn = data->tid.genn + 1,
 		};
@@ -378,6 +386,7 @@ __u64 chunk_find_empty(sdt_desc_t __arg_arena *desc)
 	__u64 freeslots;
 	__u64 i;
 
+	cast_kern(desc);
 	for (i = 0; i < SDT_TASK_CHUNK_BITMAP_U64S; i++) {
 		freeslots = ~desc->allocated[i];
 		if (freeslots == (__u64)0)
@@ -426,9 +435,12 @@ static sdt_desc_t * desc_find_empty(sdt_desc_t *desc, __u64 *idxp)
 			break;
 
 		/* Allocate an internal node if necessary. */
+		cast_kern(desc);
 		chunk = desc->chunk;
+		cast_kern(chunk);
 		desc_children = (sdt_desc_t * __arena *)chunk->descs;
 
+		cast_kern(desc_children);
 		desc = desc_children[pos];
 		if (!desc) {
 			desc = scx_alloc_chunk();
@@ -448,6 +460,7 @@ static sdt_desc_t * desc_find_empty(sdt_desc_t *desc, __u64 *idxp)
 		level = SDT_TASK_LEVELS - 1 - u;
 		tmp = lv_desc[level];
 
+		cast_kern(tmp);
 		ret = set_idx_state(tmp, lv_pos[level], true);
 		if (ret != 0)
 			break;
@@ -482,10 +495,12 @@ void __arena *scx_alloc(struct scx_allocator *alloc)
 		return NULL;
 	}
 
+	cast_kern(desc);
 	chunk = desc->chunk;
 
 	/* Populate the leaf node if necessary. */
 	pos = idx & (SDT_TASK_ENTS_PER_CHUNK - 1);
+	cast_kern(chunk);
 	data = chunk->data[pos];
 	if (!data) {
 		data = scx_alloc_from_pool(&alloc->pool);
@@ -503,10 +518,12 @@ void __arena *scx_alloc(struct scx_allocator *alloc)
 	alloc_stats.alloc_ops += 1;
 	alloc_stats.active_allocs += 1;
 
+	cast_kern(data);
 	data->tid.idx = idx;
 
 	bpf_spin_unlock(&alloc_lock);
 
+	cast_user(data);
 	return data;
 }
 
@@ -544,9 +561,10 @@ void __arena *scx_task_alloc(struct task_struct *p)
 	if (unlikely(!data))
 		return NULL;
 
+	mval->data = data;
+	cast_kern(data);
 	mval->tid = data->tid;
 	mval->tptr = (__u64) p;
-	mval->data = data;
 
 	return (void __arena *)data->payload;
 }
-- 
2.43.0