From nobody Tue Feb 10 21:39:10 2026 Received: from out-174.mta1.migadu.com (out-174.mta1.migadu.com [95.215.58.174]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A01CD1A00F0; Fri, 2 Jan 2026 15:01:50 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=95.215.58.174 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767366113; cv=none; b=dvTnDgmLP61JQHD6UGE3Ub4+aTe8QDPpLlxZct8wgNrbGzHGe5qKf1s62jmuBu5kpiaiLsytJsinB9Se21TeiiL7oPiD+ci8Ya/a9BSpcdKnrGr/vD/J3ojxHtGl5IjYiuebuxl5YXXdhEe/jRFrZkkmOXtORrZQnTIQxi+abE4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767366113; c=relaxed/simple; bh=Ijwkz7QzHZs+f5e9/V+7fXI6NvObA0hQXuFbGSjIzJQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=CtLzSnQFXuI1Liv7XlxHp3wfCr9DgLy0co1bvaSCc8g2FksmATUmmHVLOeM/hAzzb0AY6M2udEBO9tIVFHWSNDk7w2pnvPNzu0cPQnztc3cYH019c8O9WYLD6SIIOZxkoIYlfbNOP8yFXVeCKoC6i+mZYKiRBcLOqQ/G7qarqxc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=EM8bLGf5; arc=none smtp.client-ip=95.215.58.174 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="EM8bLGf5" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1767366108; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=9oQ3SeDCJ6nND2F1EMtNHZ6xlfaLC5tyLUl7Cs/5SH4=; b=EM8bLGf5gqL+cBIBPphk6iPPRVabq3f3nlPeYC01WIiT0Dhp2x5DkL28sodqjxwetFCMqm 2AeAAAi9gjt0Psmxc/crbGHUF4P20CQ0I+Xx/cpQgpJEUB0WK133rH5QuCDnd8P3LU6XZk 2iFBJh1F7mpTrJ1WG3lwWOgNrvjcrv0= From: Leon Hwang To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Eduard Zingerman , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Puranjay Mohan , Xu Kuohai , Catalin Marinas , Will Deacon , "David S . Miller" , David Ahern , Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , x86@kernel.org, "H . Peter Anvin" , Andrew Morton , linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, netdev@vger.kernel.org, kernel-patches-bot@fb.com, Leon Hwang Subject: [PATCH bpf-next 1/4] bpf: tailcall: Introduce bpf_arch_tail_call_prologue_offset Date: Fri, 2 Jan 2026 23:00:29 +0800 Message-ID: <20260102150032.53106-2-leon.hwang@linux.dev> In-Reply-To: <20260102150032.53106-1-leon.hwang@linux.dev> References: <20260102150032.53106-1-leon.hwang@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" Introduce bpf_arch_tail_call_prologue_offset() to allow architectures to specify the offset from bpf_func to the actual program entry point for tail calls. This offset accounts for prologue instructions that should be skipped (e.g., fentry NOPs, TCC initialization). When an architecture provides a non-zero prologue offset, prog arrays allocate additional space to cache precomputed tail call targets: array->ptrs[max_entries + index] =3D prog->bpf_func + prologue_offset This cached target is updated atomically via xchg() when programs are added or removed from the prog array, eliminating the need to compute the target address at runtime during tail calls. The function is exported for use by the test_bpf module. Signed-off-by: Leon Hwang --- include/linux/bpf.h | 1 + kernel/bpf/arraymap.c | 27 ++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4e7d72dfbcd4..acd85c239af9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3792,6 +3792,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_t= ype old_t, =20 void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, struct bpf_prog *new, struct bpf_prog *old); +int bpf_arch_tail_call_prologue_offset(void); =20 void *bpf_arch_text_copy(void *dst, void *src, size_t len); int bpf_arch_text_invalidate(void *dst, size_t len); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 1eeb31c5b317..beedd1281c22 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -127,6 +127,9 @@ static struct bpf_map *array_map_alloc(union bpf_attr *= attr) array_size +=3D (u64) max_entries * elem_size; } } + if (attr->map_type =3D=3D BPF_MAP_TYPE_PROG_ARRAY && bpf_arch_tail_call_p= rologue_offset()) + /* Store tailcall targets */ + array_size +=3D (u64) max_entries * sizeof(void *); =20 /* allocate all map elements and zero-initialize them */ if (attr->map_flags & BPF_F_MMAPABLE) { @@ -1087,16 +1090,38 @@ void __weak bpf_arch_poke_desc_update(struct bpf_ji= t_poke_descriptor *poke, WARN_ON_ONCE(1); } =20 +int __weak bpf_arch_tail_call_prologue_offset(void) +{ + return 0; +} +EXPORT_SYMBOL_GPL(bpf_arch_tail_call_prologue_offset); + +static void bpf_tail_call_target_update(struct bpf_array *array, u32 key, = struct bpf_prog *new) +{ + int offset =3D bpf_arch_tail_call_prologue_offset(); + void *target; + + if (!offset) + return; + + target =3D new ? (void *) new->bpf_func + offset : 0; + xchg(array->ptrs + array->map.max_entries + key, target); +} + static void prog_array_map_poke_run(struct bpf_map *map, u32 key, struct bpf_prog *old, struct bpf_prog *new) { struct prog_poke_elem *elem; struct bpf_array_aux *aux; + struct bpf_array *array; =20 - aux =3D container_of(map, struct bpf_array, map)->aux; + array =3D container_of(map, struct bpf_array, map); + aux =3D array->aux; WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex)); =20 + bpf_tail_call_target_update(array, key, new); + list_for_each_entry(elem, &aux->poke_progs, list) { struct bpf_jit_poke_descriptor *poke; int i; --=20 2.52.0 From nobody Tue Feb 10 21:39:10 2026 Received: from out-173.mta1.migadu.com (out-173.mta1.migadu.com [95.215.58.173]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0899A221DB1 for ; Fri, 2 Jan 2026 15:01:59 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=95.215.58.173 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767366123; cv=none; b=i7ZL0XuALW45TcopNGoPrzOthkhpONwjE9UmOLg2I6LVc//zODOHPrUXZmCaHL1RecQwQLgjFBwd94ya6onDEMyXgrH0lRCFn7M4qSDYVBc3AFxlD4JTtFfHb5gn2LneMOSBqLJD0lnqPbT31yxqnYlpKC+9SbT9uVLCorHjM8I= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767366123; c=relaxed/simple; bh=9z9KwVZl2eIaQzW4nWHUEeGkVv8W+C3AnzDLuPuwvtc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=E9Xgbdc+4hMWmvXbigxGZU7RE+cSo66q1HN7wAhukxVzpzcSz2Jha3RZP6sppOIWQC6DJpmuSp1r6SjJAx5vSwNbzwhpYpb3t9wWSpvCHpTzGT/RGAC9WaSZmwHzKqql65lXf0RC0elfV92ASFk5eQiZc9mOKI4NuYCm4UqZFgc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=M2RBCzNU; arc=none smtp.client-ip=95.215.58.173 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="M2RBCzNU" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1767366118; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=GBeqdGdvFl1BmWay9YlcRFa2rBrCpDnyHLyz6aMx8E4=; b=M2RBCzNUMOTNgFWAb784JlFFJsR2RBXl/a5CTpz/ThH165B1UXHNkl1Z6ivhx0WURExrcQ RqglMXX/lbKgQmuIkVZ6ASmLu8QnxeO25eU+6L1LgjYdiiHqDU/JMLP3iFLYDpkyQpNPpc IiB2DGVMv8jAJImawbTJkni1y9JeTiU= From: Leon Hwang To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Eduard Zingerman , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Puranjay Mohan , Xu Kuohai , Catalin Marinas , Will Deacon , "David S . Miller" , David Ahern , Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , x86@kernel.org, "H . Peter Anvin" , Andrew Morton , linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, netdev@vger.kernel.org, kernel-patches-bot@fb.com, Leon Hwang Subject: [PATCH bpf-next 2/4] bpf, x64: tailcall: Eliminate max_entries and bpf_func access at runtime Date: Fri, 2 Jan 2026 23:00:30 +0800 Message-ID: <20260102150032.53106-3-leon.hwang@linux.dev> In-Reply-To: <20260102150032.53106-1-leon.hwang@linux.dev> References: <20260102150032.53106-1-leon.hwang@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" Optimize BPF tail calls on x86_64 by eliminating runtime memory accesses for max_entries and prog->bpf_func when the prog array map is known at verification time. The verifier now encodes three fields in the tail call instruction's imm: - bits 0-7: map index in used_maps[] (max 63) - bits 8-15: dynamic array flag (1 if map pointer is poisoned) - bits 16-31: poke table index + 1 for direct tail calls (max 1023) For static tail calls (map known at verification time): - max_entries is embedded as an immediate in the comparison instruction - The cached target from array->ptrs[max_entries + index] is used directly, avoiding the prog->bpf_func dereference For dynamic tail calls (map pointer poisoned): - Fall back to runtime lookup of max_entries and prog->bpf_func This reduces cache misses and improves tail call performance for the common case where the prog array is statically known. Signed-off-by: Leon Hwang --- arch/x86/net/bpf_jit_comp.c | 51 +++++++++++++++++++++++++++---------- kernel/bpf/verifier.c | 30 ++++++++++++++++++++-- 2 files changed, 66 insertions(+), 15 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index e3b1c4b1d550..9fd707612da5 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -733,11 +733,13 @@ static void emit_return(u8 **pprog, u8 *ip) * out: */ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog, + u32 map_index, bool dyn_array, u8 **pprog, bool *callee_regs_used, u32 stack_depth, u8 *ip, struct jit_context *ctx) { int tcc_ptr_off =3D BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack_depth); + struct bpf_map *map =3D bpf_prog->aux->used_maps[map_index]; u8 *prog =3D *pprog, *start =3D *pprog; int offset; =20 @@ -752,11 +754,14 @@ static void emit_bpf_tail_call_indirect(struct bpf_pr= og *bpf_prog, * goto out; */ EMIT2(0x89, 0xD2); /* mov edx, edx */ - EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], ed= x */ - offsetof(struct bpf_array, map.max_entries)); + if (dyn_array) + EMIT3(0x3B, 0x56, /* cmp edx, dword ptr [rsi + 16] */ + offsetof(struct bpf_array, map.max_entries)); + else + EMIT2_off32(0x81, 0xFA, map->max_entries); /* cmp edx, imm32 (map->max_e= ntries) */ =20 offset =3D ctx->tail_call_indirect_label - (prog + 2 - start); - EMIT2(X86_JBE, offset); /* jbe out */ + EMIT2(X86_JAE, offset); /* jae out */ =20 /* * if ((*tcc_ptr)++ >=3D MAX_TAIL_CALL_CNT) @@ -768,9 +773,15 @@ static void emit_bpf_tail_call_indirect(struct bpf_pro= g *bpf_prog, offset =3D ctx->tail_call_indirect_label - (prog + 2 - start); EMIT2(X86_JAE, offset); /* jae out */ =20 - /* prog =3D array->ptrs[index]; */ - EMIT4_off32(0x48, 0x8B, 0x8C, 0xD6, /* mov rcx, [rsi + rdx * 8 + of= fsetof(...)] */ - offsetof(struct bpf_array, ptrs)); + /* + * if (dyn_array) + * prog =3D array->ptrs[index]; + * else + * tgt =3D array->ptrs[max_entries + index]; + */ + offset =3D offsetof(struct bpf_array, ptrs); + offset +=3D dyn_array ? 0 : map->max_entries * sizeof(void *); + EMIT4_off32(0x48, 0x8B, 0x8C, 0xD6, offset); /* mov rcx, [rsi + rdx * 8 += offset] */ =20 /* * if (prog =3D=3D NULL) @@ -803,11 +814,14 @@ static void emit_bpf_tail_call_indirect(struct bpf_pr= og *bpf_prog, EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */ round_up(stack_depth, 8)); =20 - /* goto *(prog->bpf_func + X86_TAIL_CALL_OFFSET); */ - EMIT4(0x48, 0x8B, 0x49, /* mov rcx, qword ptr [rcx + 32= ] */ - offsetof(struct bpf_prog, bpf_func)); - EMIT4(0x48, 0x83, 0xC1, /* add rcx, X86_TAIL_CALL_OFFSE= T */ - X86_TAIL_CALL_OFFSET); + if (dyn_array) { + /* goto *(prog->bpf_func + X86_TAIL_CALL_OFFSET); */ + EMIT4(0x48, 0x8B, 0x49, /* mov rcx, qword ptr [rcx + 32] */ + offsetof(struct bpf_prog, bpf_func)); + EMIT4(0x48, 0x83, 0xC1, /* add rcx, X86_TAIL_CALL_OFFSET */ + X86_TAIL_CALL_OFFSET); + } + /* * Now we're ready to jump into next BPF program * rdi =3D=3D ctx (1st arg) @@ -2461,15 +2475,21 @@ st: if (is_imm8(insn->off)) } =20 case BPF_JMP | BPF_TAIL_CALL: - if (imm32) + bool dynamic_array =3D (imm32 >> 8) & 0xFF; + u32 map_index =3D imm32 & 0xFF; + s32 imm16 =3D imm32 >> 16; + + if (imm16) emit_bpf_tail_call_direct(bpf_prog, - &bpf_prog->aux->poke_tab[imm32 - 1], + &bpf_prog->aux->poke_tab[imm16 - 1], &prog, image + addrs[i - 1], callee_regs_used, stack_depth, ctx); else emit_bpf_tail_call_indirect(bpf_prog, + map_index, + dynamic_array, &prog, callee_regs_used, stack_depth, @@ -4047,6 +4067,11 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_d= escriptor *poke, } } =20 +int bpf_arch_tail_call_prologue_offset(void) +{ + return X86_TAIL_CALL_OFFSET; +} + bool bpf_jit_supports_arena(void) { return true; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3d44c5d06623..ab9c84e76a62 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -22602,6 +22602,18 @@ static int add_hidden_subprog(struct bpf_verifier_= env *env, struct bpf_insn *pat return 0; } =20 +static int tail_call_find_map_index(struct bpf_verifier_env *env, struct b= pf_map *map) +{ + int i; + + for (i =3D 0; i < env->used_map_cnt; i++) { + if (env->used_maps[i] =3D=3D map) + return i; + } + + return -ENOENT; +} + /* Do various post-verification rewrites in a single program pass. * These rewrites simplify JIT and interpreter implementations. */ @@ -22993,10 +23005,24 @@ static int do_misc_fixups(struct bpf_verifier_env= *env) * call and to prevent accidental JITing by JIT compiler * that doesn't support bpf_tail_call yet */ - insn->imm =3D 0; insn->code =3D BPF_JMP | BPF_TAIL_CALL; =20 + /* + * insn->imm contains 3 fields: + * map index(8 bits): 6 bits are enough, 63 max + * poisoned(8 bits): 1 bit is enough + * poke index(16 bits): 1023 max + */ + aux =3D &env->insn_aux_data[i + delta]; + insn->imm =3D tail_call_find_map_index(env, aux->map_ptr_state.map_ptr); + if (insn->imm < 0) { + verifier_bug(env, "index not found for prog array map\n"); + return -EINVAL; + } + + insn->imm |=3D bpf_map_ptr_poisoned(aux) << 8; + if (env->bpf_capable && !prog->blinding_requested && prog->jit_requested && !bpf_map_key_poisoned(aux) && @@ -23015,7 +23041,7 @@ static int do_misc_fixups(struct bpf_verifier_env *= env) return ret; } =20 - insn->imm =3D ret + 1; + insn->imm |=3D (ret + 1) << 16; goto next_insn; } =20 --=20 2.52.0 From nobody Tue Feb 10 21:39:10 2026 Received: from out-173.mta1.migadu.com (out-173.mta1.migadu.com [95.215.58.173]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5270423AB81 for ; Fri, 2 Jan 2026 15:02:08 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=95.215.58.173 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767366132; cv=none; b=J3EfY6wfDmBCXmI6mBV+0FuerxCpgBGSX8HxfwkEq7sQlZuI78f2iCLuSqJCZa+bluh1DI5Tqj2db5fH2J7QzXgNI9GXtkSKf3ADY+/r8gIjR4AybM4YhsqOhAS0mMadkA+23sHDLv+/Bew4YTMdQef/7KuJcs7kBSrvR/0wg1Y= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767366132; c=relaxed/simple; bh=OY1SBWq4Ap5M1NAc2V9iWM673yY4ujTgrAy/9xNzc+4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=HIVlK3Qrnx36dWu+HEmkYUKKQ9BQOesWjWXn0oVxCS4rkJhKZ988qEv2FlaNpBRWIhPrR3bA034PMejqU1PwGttmtrgLkPNgB9ZRZR0aG8uD5pd6oOY+KVizin6fqyH7+4dgRMRgcwvBf4fwBnesx/hj2aiwi2wNW+ChWEHS7sI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=CiljkX+c; arc=none smtp.client-ip=95.215.58.173 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="CiljkX+c" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1767366126; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=PjHUFzrsqk2p5SI+Seak6yB69TLkO5KnxAZlltu8VN0=; b=CiljkX+cqHhZJB7ZlqBXKzmzY3XQReM24Odz2Ch2tN65WE9slo5Z85mQE+ILAgJPqr2Im7 mLE/Ag4NWpe9g7232/7M/CRvmIwLuJRidJGsjRGDnVfIVpeZZGxi8PO4qDpg2KZgZj8UMG VZhWjy1LqrYgYwt/EVPSAbm1/EWuItM= From: Leon Hwang To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Eduard Zingerman , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Puranjay Mohan , Xu Kuohai , Catalin Marinas , Will Deacon , "David S . Miller" , David Ahern , Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , x86@kernel.org, "H . Peter Anvin" , Andrew Morton , linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, netdev@vger.kernel.org, kernel-patches-bot@fb.com, Leon Hwang Subject: [PATCH bpf-next 3/4] bpf, arm64: tailcall: Eliminate max_entries and bpf_func access at runtime Date: Fri, 2 Jan 2026 23:00:31 +0800 Message-ID: <20260102150032.53106-4-leon.hwang@linux.dev> In-Reply-To: <20260102150032.53106-1-leon.hwang@linux.dev> References: <20260102150032.53106-1-leon.hwang@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" Apply the same tail call optimization to arm64 as done for x86_64. When the prog array map is known at verification time (dyn_array=3Dfalse): - Embed max_entries as an immediate value instead of loading from memory - Use the precomputed target from array->ptrs[max_entries + index] - Jump directly to the cached target without dereferencing prog->bpf_func When the map is dynamically determined (dyn_array=3Dtrue): - Load max_entries from the array at runtime - Look up prog from array->ptrs[index] and compute the target address Implement bpf_arch_tail_call_prologue_offset() returning "PROLOGUE_OFFSET * 4" to convert the instruction count to bytes. Signed-off-by: Leon Hwang --- arch/arm64/net/bpf_jit_comp.c | 71 +++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 0c4d44bcfbf4..bcd890bff36a 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -620,8 +620,10 @@ static int build_prologue(struct jit_ctx *ctx, bool eb= pf_from_cbpf) return 0; } =20 -static int emit_bpf_tail_call(struct jit_ctx *ctx) +static int emit_bpf_tail_call(struct jit_ctx *ctx, u32 map_index, bool dyn= _array) { + struct bpf_map *map =3D ctx->prog->aux->used_maps[map_index]; + /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ const u8 r2 =3D bpf2a64[BPF_REG_2]; const u8 r3 =3D bpf2a64[BPF_REG_3]; @@ -638,9 +640,13 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) /* if (index >=3D array->map.max_entries) * goto out; */ - off =3D offsetof(struct bpf_array, map.max_entries); - emit_a64_mov_i64(tmp, off, ctx); - emit(A64_LDR32(tmp, r2, tmp), ctx); + if (dyn_array) { + off =3D offsetof(struct bpf_array, map.max_entries); + emit_a64_mov_i64(tmp, off, ctx); + emit(A64_LDR32(tmp, r2, tmp), ctx); + } else { + emit_a64_mov_i64(tmp, map->max_entries, ctx); + } emit(A64_MOV(0, r3, r3), ctx); emit(A64_CMP(0, r3, tmp), ctx); branch1 =3D ctx->image + ctx->idx; @@ -659,15 +665,26 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) /* (*tail_call_cnt_ptr)++; */ emit(A64_ADD_I(1, tcc, tcc, 1), ctx); =20 - /* prog =3D array->ptrs[index]; - * if (prog =3D=3D NULL) - * goto out; - */ - off =3D offsetof(struct bpf_array, ptrs); - emit_a64_mov_i64(tmp, off, ctx); - emit(A64_ADD(1, tmp, r2, tmp), ctx); - emit(A64_LSL(1, prg, r3, 3), ctx); - emit(A64_LDR64(prg, tmp, prg), ctx); + if (dyn_array) { + /* prog =3D array->ptrs[index]; + * if (prog =3D=3D NULL) + * goto out; + */ + off =3D offsetof(struct bpf_array, ptrs); + emit_a64_mov_i64(tmp, off, ctx); + emit(A64_ADD(1, tmp, r2, tmp), ctx); + emit(A64_LSL(1, prg, r3, 3), ctx); + emit(A64_LDR64(prg, tmp, prg), ctx); + } else { + /* tgt =3D array->ptrs[max_entries + index]; + * if (tgt =3D=3D 0) + * goto out; + */ + emit(A64_LSL(1, prg, r3, 3), ctx); + off =3D offsetof(struct bpf_array, ptrs) + map->max_entries * sizeof(voi= d *); + emit_a64_add_i(1, prg, prg, tmp, off, ctx); + emit(A64_LDR64(prg, r2, prg), ctx); + } branch3 =3D ctx->image + ctx->idx; emit(A64_NOP, ctx); =20 @@ -680,12 +697,17 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) =20 pop_callee_regs(ctx); =20 - /* goto *(prog->bpf_func + prologue_offset); */ - off =3D offsetof(struct bpf_prog, bpf_func); - emit_a64_mov_i64(tmp, off, ctx); - emit(A64_LDR64(tmp, prg, tmp), ctx); - emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); - emit(A64_BR(tmp), ctx); + if (dyn_array) { + /* goto *(prog->bpf_func + prologue_offset); */ + off =3D offsetof(struct bpf_prog, bpf_func); + emit_a64_mov_i64(tmp, off, ctx); + emit(A64_LDR64(tmp, prg, tmp), ctx); + emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); + emit(A64_BR(tmp), ctx); + } else { + /* goto *tgt; */ + emit(A64_BR(prg), ctx); + } =20 if (ctx->image) { off =3D &ctx->image[ctx->idx] - branch1; @@ -701,6 +723,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) return 0; } =20 +int bpf_arch_tail_call_prologue_offset(void) +{ + /* offset is in instructions, convert to bytes */ + return PROLOGUE_OFFSET * 4; +} + static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *= ctx) { const s32 imm =3D insn->imm; @@ -1617,7 +1645,10 @@ static int build_insn(const struct bpf_insn *insn, s= truct jit_ctx *ctx, } /* tail call */ case BPF_JMP | BPF_TAIL_CALL: - if (emit_bpf_tail_call(ctx)) + bool dynamic_array =3D (insn->imm >> 8) & 0xFF; + u32 map_index =3D insn->imm & 0xFF; + + if (emit_bpf_tail_call(ctx, map_index, dynamic_array)) return -EFAULT; break; /* function return */ --=20 2.52.0 From nobody Tue Feb 10 21:39:10 2026 Received: from out-187.mta1.migadu.com (out-187.mta1.migadu.com [95.215.58.187]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A3D1F1D5CEA for ; Fri, 2 Jan 2026 15:02:16 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=95.215.58.187 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767366138; cv=none; b=F5ZYPoIPzcvi98uENwYn4a+2xfDwsQb+OrWKRdW+RDsHKMdnyHQI3bEDatfT+r29vAjBcINuoyYH5HBlTSq8fVEYkYnzxWvudV3re+X4N34YStmkWvGUZ9dvK7SDPQ2inx7rXYcsPNWC8wddt/2V1Ll9iFIr4HllHQOqE/o2Y3I= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1767366138; c=relaxed/simple; bh=oBa469maWudsSL5hciWDODbQnyvM7WMptEAnoC2Jg2w=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=t9LcbmdYX0Qa9Hy/MF2fAAFYJud8wcf29ifvvBPqM9s7UaOEEm4WGAWb8k+h6j3RxZg2+MmbsJ/ivt4zpQ5pNlzckPcKJorf48YCI7Y1bq0ZDqTFqn5nUe+82kIvIqkQYMemb9hZwY3AvbSnMbccPqfoRqiUu2CNG62Q1W5UcYo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev; spf=pass smtp.mailfrom=linux.dev; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b=dTd7xJ82; arc=none smtp.client-ip=95.215.58.187 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.dev Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=linux.dev header.i=@linux.dev header.b="dTd7xJ82" X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1767366134; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=SWoEF/+O4yZ7+ZS1SC3d02+me9C0QQC50vg1G2pDlFk=; b=dTd7xJ82NQ50raWJ1auBfjVAEwvFe1zNPFyY3dHzp/bG923whF4XKdS/DUgHQEvXkI3H+D 2cu4C7PnzuE4zP0oxCJ+qzCDPlfxsgn1+i8mteiHNpH+MSZjsfbzjqr+Fwb8306pMkT6qV 7dilcsPQzEAPjUodluMNHjnbf0UKuoQ= From: Leon Hwang To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Eduard Zingerman , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Puranjay Mohan , Xu Kuohai , Catalin Marinas , Will Deacon , "David S . Miller" , David Ahern , Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , x86@kernel.org, "H . Peter Anvin" , Andrew Morton , linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, netdev@vger.kernel.org, kernel-patches-bot@fb.com, Leon Hwang Subject: [PATCH bpf-next 4/4] bpf, lib/test_bpf: Fix broken tailcall tests Date: Fri, 2 Jan 2026 23:00:32 +0800 Message-ID: <20260102150032.53106-5-leon.hwang@linux.dev> In-Reply-To: <20260102150032.53106-1-leon.hwang@linux.dev> References: <20260102150032.53106-1-leon.hwang@linux.dev> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Content-Type: text/plain; charset="utf-8" Update the tail call tests in test_bpf to work with the new tail call optimization that requires: 1. A valid used_maps array pointing to the prog array 2. Precomputed tail call targets in array->ptrs[max_entries + index] Signed-off-by: Leon Hwang --- lib/test_bpf.c | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index af0041df2b72..680d34d46f19 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -15448,26 +15448,45 @@ static void __init destroy_tail_call_tests(struct= bpf_array *progs) { int i; =20 - for (i =3D 0; i < ARRAY_SIZE(tail_call_tests); i++) - if (progs->ptrs[i]) - bpf_prog_free(progs->ptrs[i]); + for (i =3D 0; i < ARRAY_SIZE(tail_call_tests); i++) { + struct bpf_prog *fp =3D progs->ptrs[i]; + + if (!fp) + continue; + + /* + * The used_maps points to fake maps that don't have + * proper ops, so clear it before bpf_prog_free to avoid + * bpf_free_used_maps trying to process it. + */ + kfree(fp->aux->used_maps); + fp->aux->used_maps =3D NULL; + fp->aux->used_map_cnt =3D 0; + bpf_prog_free(fp); + } kfree(progs); } =20 static __init int prepare_tail_call_tests(struct bpf_array **pprogs) { + int prologue_offset =3D bpf_arch_tail_call_prologue_offset(); int ntests =3D ARRAY_SIZE(tail_call_tests); + u32 max_entries =3D ntests + 1; struct bpf_array *progs; int which, err; =20 /* Allocate the table of programs to be used for tail calls */ - progs =3D kzalloc(struct_size(progs, ptrs, ntests + 1), GFP_KERNEL); + progs =3D kzalloc(struct_size(progs, ptrs, max_entries * 2), GFP_KERNEL); if (!progs) goto out_nomem; =20 + /* Set max_entries before JIT, as it's used in JIT */ + progs->map.max_entries =3D max_entries; + /* Create all eBPF programs and populate the table */ for (which =3D 0; which < ntests; which++) { struct tail_call_test *test =3D &tail_call_tests[which]; + struct bpf_map *map =3D &progs->map; struct bpf_prog *fp; int len, i; =20 @@ -15487,10 +15506,16 @@ static __init int prepare_tail_call_tests(struct = bpf_array **pprogs) if (!fp) goto out_nomem; =20 + fp->aux->used_maps =3D kmalloc_array(1, sizeof(map), GFP_KERNEL); + if (!fp->aux->used_maps) + goto out_nomem; + fp->len =3D len; fp->type =3D BPF_PROG_TYPE_SOCKET_FILTER; fp->aux->stack_depth =3D test->stack_depth; fp->aux->tail_call_reachable =3D test->has_tail_call; + fp->aux->used_maps[0] =3D map; + fp->aux->used_map_cnt =3D 1; memcpy(fp->insnsi, test->insns, len * sizeof(struct bpf_insn)); =20 /* Relocate runtime tail call offsets and addresses */ @@ -15548,6 +15573,10 @@ static __init int prepare_tail_call_tests(struct b= pf_array **pprogs) if ((long)__bpf_call_base + insn->imm !=3D addr) *insn =3D BPF_JMP_A(0); /* Skip: NOP */ break; + + case BPF_JMP | BPF_TAIL_CALL: + insn->imm =3D 0; + break; } } =20 @@ -15555,11 +15584,11 @@ static __init int prepare_tail_call_tests(struct = bpf_array **pprogs) if (err) goto out_err; =20 + progs->ptrs[max_entries + which] =3D (void *) fp->bpf_func + prologue_of= fset; progs->ptrs[which] =3D fp; } =20 /* The last entry contains a NULL program pointer */ - progs->map.max_entries =3D ntests + 1; *pprogs =3D progs; return 0; =20 --=20 2.52.0