From: Abhishek Dubey <adubey@linux.ibm.com>
The trampoline mechanism sets up its own stack frame and
an additional dummy frame. We need to have additional JIT
instructions handling tailcall dereferencing in the
trampoline's context.
We don't add the two stack frames pointed above, rather
add space for conventional 'non-volatile register save area'
and tail_call_info in trampoline's frame for ppc64. This
makes the trampoline's frame consistent with layout of all
other frames.
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
arch/powerpc/net/bpf_jit_comp.c | 48 ++++++++++++++++++++++++++++-----
1 file changed, 42 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 069a8822c30d..4aaa0a287a45 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -606,15 +606,42 @@ static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context
return 0;
}
-static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_context *ctx,
- int func_frame_offset, int r4_off)
+/*
+ * Refer the label 'Generated stack layout' in this file for actual stack
+ * layout during trampoline invocation.
+ *
+ * Refer __arch_prepare_bpf_trampoline() for stack component details.
+ *
+ * The tailcall count/reference is present in caller's stack frame. Its required
+ * to copy the content of tail_call_info before calling the actual function
+ * to which the trampoline is attached.
+ *
+ */
+
+static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_context *ctx,
+ int func_frame_offset,
+ int bpf_dummy_frame_size, int r4_off)
{
if (IS_ENABLED(CONFIG_PPC64)) {
/* See bpf_jit_stack_tailcallinfo_offset() */
- int tailcallcnt_offset = 7 * 8;
+ int tailcallinfo_offset = BPF_PPC_STACK_SAVE + SZL;
+ /*
+ * func_frame_offset =
+ * bpf_dummy_frame_size + trampoline_frame_size
+ */
+ EMIT(PPC_RAW_LD(_R4, _R1, func_frame_offset));
+ EMIT(PPC_RAW_LD(_R3, _R4, -tailcallinfo_offset));
+
+ /*
+ * Setting the tail_call_info in trampoline's frame
+ * depending on if previous frame had value or reference.
+ */
+ EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT));
+ PPC_COND_BRANCH(COND_GT, CTX_NIA(ctx) + 8);
+ EMIT(PPC_RAW_ADDI(_R3, _R4, bpf_jit_stack_tailcallinfo_offset(ctx)));
+ EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset
+ - bpf_dummy_frame_size - tailcallinfo_offset));
- EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset));
- EMIT(PPC_RAW_STL(_R3, _R1, -tailcallcnt_offset));
} else {
/* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
@@ -721,6 +748,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
* [ r0 save (32-bit) ] |
* dummy frame for unwind [ back chain 1 ] --
* [ padding ] align stack frame
+ * [ r26..r31 ] nvr save : BPF_PPC_STACK_SAVE
+ * [ tail_call_info ] non optional - 64-bit powerpc
* r4_off [ r4 (tailcallcnt) ] optional - 32-bit powerpc
* alt_lr_off [ real lr (ool stub)] optional - actual lr
* [ r26 ]
@@ -801,6 +830,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
}
+ /* Room for 64-bit tail_call_cnt */
+ bpf_frame_size += SZL;
+
+ /* Room for nvr save area */
+ bpf_frame_size += BPF_PPC_STACK_SAVE;
+
/* Padding to align stack frame, if any */
bpf_frame_size = round_up(bpf_frame_size, SZL * 2);
@@ -902,7 +937,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
/* Replicate tail_call_cnt before calling the original BPF prog */
if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
- bpf_trampoline_setup_tail_call_cnt(image, ctx, func_frame_offset, r4_off);
+ bpf_trampoline_setup_tail_call_info(image, ctx, func_frame_offset,
+ bpf_dummy_frame_size, r4_off);
/* Restore args */
bpf_trampoline_restore_args_stack(image, ctx, func_frame_offset, nr_regs, regs_off);
--
2.48.1
On 05/01/26 4:22 pm, adubey@linux.ibm.com wrote:
> From: Abhishek Dubey <adubey@linux.ibm.com>
>
> The trampoline mechanism sets up its own stack frame and
> an additional dummy frame. We need to have additional JIT
> instructions handling tailcall dereferencing in the
> trampoline's context.
>
> We don't add the two stack frames pointed above, rather
> add space for conventional 'non-volatile register save area'
> and tail_call_info in trampoline's frame for ppc64. This
> makes the trampoline's frame consistent with layout of all
> other frames.
>
> Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
> ---
> arch/powerpc/net/bpf_jit_comp.c | 48 ++++++++++++++++++++++++++++-----
> 1 file changed, 42 insertions(+), 6 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 069a8822c30d..4aaa0a287a45 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -606,15 +606,42 @@ static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context
> return 0;
> }
>
> -static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_context *ctx,
> - int func_frame_offset, int r4_off)
> +/*
> + * Refer the label 'Generated stack layout' in this file for actual stack
> + * layout during trampoline invocation.
> + *
> + * Refer __arch_prepare_bpf_trampoline() for stack component details.
> + *
> + * The tailcall count/reference is present in caller's stack frame. Its required
> + * to copy the content of tail_call_info before calling the actual function
> + * to which the trampoline is attached.
> + *
> + */
> +
> +static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_context *ctx,
> + int func_frame_offset,
> + int bpf_dummy_frame_size, int r4_off)
> {
> if (IS_ENABLED(CONFIG_PPC64)) {
> /* See bpf_jit_stack_tailcallinfo_offset() */
> - int tailcallcnt_offset = 7 * 8;
This offset update should be part of patch#1
- Hari
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 069a8822c30d..4aaa0a287a45 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
[ ... ]
> -static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_context *ctx,
> - int func_frame_offset, int r4_off)
> +static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_context *ctx,
> + int func_frame_offset,
> + int bpf_dummy_frame_size, int r4_off)
> {
> if (IS_ENABLED(CONFIG_PPC64)) {
> /* See bpf_jit_stack_tailcallinfo_offset() */
> - int tailcallcnt_offset = 7 * 8;
> + int tailcallinfo_offset = BPF_PPC_STACK_SAVE + SZL;
[ ... ]
> + EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset
> + - bpf_dummy_frame_size - tailcallinfo_offset));
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The new setup function stores tail_call_info at offset
(func_frame_offset - bpf_dummy_frame_size - tailcallinfo_offset) from R1.
Since func_frame_offset = bpf_dummy_frame_size + bpf_frame_size, this
resolves to (bpf_frame_size - 56), which is a positive offset from R1.
However, the corresponding bpf_trampoline_restore_tail_call_cnt() function
was not updated. It still reads from (R1 - 56):
static void bpf_trampoline_restore_tail_call_cnt(...)
{
if (IS_ENABLED(CONFIG_PPC64)) {
int tailcallcnt_offset = 7 * 8;
EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset));
^^^^^^^^^^^^^^^^^^
...
}
}
Is there an offset mismatch here? The setup function stores at
R1 + (bpf_frame_size - 56) while restore reads from R1 - 56. These point
to different memory locations since bpf_frame_size is a positive value
(at minimum ~200 bytes given the frame contents).
When BPF_TRAMP_F_TAIL_CALL_CTX is set, the restore function would read
incorrect data after the traced function returns, potentially causing
wrong tail call context to be propagated.
> @@ -801,6 +830,12 @@ static int __arch_prepare_bpf_trampoline(...)
> }
> }
>
> + /* Room for 64-bit tail_call_cnt */
> + bpf_frame_size += SZL;
> +
> + /* Room for nvr save area */
> + bpf_frame_size += BPF_PPC_STACK_SAVE;
> +
> /* Padding to align stack frame, if any */
> bpf_frame_size = round_up(bpf_frame_size, SZL * 2);
The frame size is increased by 56 bytes (SZL + BPF_PPC_STACK_SAVE = 8 + 48)
for the tail_call_info and nvr save area. This affects all calculations
using bpf_frame_size, including the store offset in the setup function,
but the restore function continues to use a hardcoded negative offset.
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/20713322833
On 05/01/26 4:45 pm, bot+bpf-ci@kernel.org wrote:
>> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
>> index 069a8822c30d..4aaa0a287a45 100644
>> --- a/arch/powerpc/net/bpf_jit_comp.c
>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>
> [ ... ]
>
>> -static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_context *ctx,
>> - int func_frame_offset, int r4_off)
>> +static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_context *ctx,
>> + int func_frame_offset,
>> + int bpf_dummy_frame_size, int r4_off)
>> {
>> if (IS_ENABLED(CONFIG_PPC64)) {
>> /* See bpf_jit_stack_tailcallinfo_offset() */
>> - int tailcallcnt_offset = 7 * 8;
>> + int tailcallinfo_offset = BPF_PPC_STACK_SAVE + SZL;
>
> [ ... ]
>
>> + EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset
>> + - bpf_dummy_frame_size - tailcallinfo_offset));
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>
> The new setup function stores tail_call_info at offset
> (func_frame_offset - bpf_dummy_frame_size - tailcallinfo_offset) from R1.
> Since func_frame_offset = bpf_dummy_frame_size + bpf_frame_size, this
> resolves to (bpf_frame_size - 56), which is a positive offset from R1.
With this patchset, back propagation of tail call count is not needed
anymore, as tail call count is saved only at one place and all
subsequent uses only hold the pointer to it. So, I can't think of a
good reason to restore tailcall count. Restore can be skipped?
@abhishek, a comment explaining how tailcall count/pointer is being
setup would help here...
Also, the trampoline frame has increased by as much as the size of
the redzone for bpf program. We are doing that just to keep tailcall
info at the same offset. No reason to save the NVRs in this frame
though. I suggest to adjust the stack layout to have tailcall info
as the first doubleword in the redzone instead of being the (n+1)th
doubleword after n NVRs. Saves stack space and makes tailcall info
offset calculation simpler.
- Hari
© 2016 - 2026 Red Hat, Inc.