From: Abhishek Dubey <adubey@linux.ibm.com>
The trampoline mechanism sets up its own stack frame and
an additional dummy frame. We need to have additional JIT
instructions handling tailcall dereferencing in the
trampoline's context.
We don't add the two stack frames pointed above, rather
add space for tail_call_info at bottom in trampoline frame
for ppc64. This makes the trampoline's frame consistent with
layout of all other frames wrt tail_call_info offset.
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
arch/powerpc/net/bpf_jit_comp.c | 83 ++++++++++++++++++++++-----------
1 file changed, 56 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 069a8822c30d..e3088cf089d1 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -606,33 +606,58 @@ static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context
return 0;
}
-static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_context *ctx,
- int func_frame_offset, int r4_off)
-{
- if (IS_ENABLED(CONFIG_PPC64)) {
- /* See bpf_jit_stack_tailcallinfo_offset() */
- int tailcallcnt_offset = 7 * 8;
-
- EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset));
- EMIT(PPC_RAW_STL(_R3, _R1, -tailcallcnt_offset));
- } else {
- /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
- EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
- }
-}
+/*
+ * Refer the label 'Generated stack layout' in this file for actual stack
+ * layout during trampoline invocation.
+ *
+ * Refer __arch_prepare_bpf_trampoline() for stack component details.
+ *
+ * The tailcall count/reference is present in caller's stack frame. Its required
+ * to copy the content of tail_call_info before calling the actual function
+ * to which the trampoline is attached.
+ *
+ */
-static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_context *ctx,
- int func_frame_offset, int r4_off)
+static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_context *ctx,
+ int func_frame_offset,
+ int bpf_dummy_frame_size, int r4_off)
{
if (IS_ENABLED(CONFIG_PPC64)) {
/* See bpf_jit_stack_tailcallinfo_offset() */
- int tailcallcnt_offset = 7 * 8;
+ int tailcallinfo_offset = BPF_PPC_TAILCALL;
+ /*
+ * func_frame_offset = ...(1)
+ * bpf_dummy_frame_size + trampoline_frame_size
+ */
+ EMIT(PPC_RAW_LD(_R4, _R1, func_frame_offset));
+ EMIT(PPC_RAW_LD(_R3, _R4, -tailcallinfo_offset));
+
+ /*
+ * Setting the tail_call_info in trampoline's frame
+ * depending on if previous frame had value or reference.
+ */
+ EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT));
+ PPC_COND_BRANCH(COND_GT, CTX_NIA(ctx) + 8);
+ EMIT(PPC_RAW_ADDI(_R3, _R4, bpf_jit_stack_tailcallinfo_offset(ctx)));
+ /*
+ * From ...(1) above:
+ * trampoline_frame_bottom = ...(2)
+ * func_frame_offset - bpf_dummy_frame_size
+ *
+ * Using ...(2) derived above:
+ * trampoline_tail_call_info_offset = ...(3)
+ * trampoline_frame_bottom - tailcallinfo_offset
+ *
+ * From ...(3):
+ * Use trampoline_tail_call_info_offset to write reference of main's
+ * tail_call_info in trampoline frame.
+ */
+ EMIT(PPC_RAW_STL(_R3, _R1, (func_frame_offset - bpf_dummy_frame_size)
+ - tailcallinfo_offset));
- EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset));
- EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset - tailcallcnt_offset));
} else {
/* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
- EMIT(PPC_RAW_STL(_R4, _R1, r4_off));
+ EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
}
}
@@ -720,6 +745,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
* LR save area [ r0 save (64-bit) ] | header
* [ r0 save (32-bit) ] |
* dummy frame for unwind [ back chain 1 ] --
+ * [ tail_call_info ] non optional - 64-bit powerpc
* [ padding ] align stack frame
* r4_off [ r4 (tailcallcnt) ] optional - 32-bit powerpc
* alt_lr_off [ real lr (ool stub)] optional - actual lr
@@ -801,8 +827,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
}
- /* Padding to align stack frame, if any */
- bpf_frame_size = round_up(bpf_frame_size, SZL * 2);
+ if (!(bpf_frame_size % (2 * SZL))) {
+ /* Stack is 16-byte aligned */
+ /* Room for padding followed by 64-bit tail_call_info */
+ bpf_frame_size += SZL + BPF_PPC_TAILCALL;
+ } else {
+ /* Room for 64-bit tail_call_info */
+ bpf_frame_size += BPF_PPC_TAILCALL;
+ }
/* Dummy frame size for proper unwind - includes 64-bytes red zone for 64-bit powerpc */
bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64;
@@ -902,7 +934,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
/* Replicate tail_call_cnt before calling the original BPF prog */
if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
- bpf_trampoline_setup_tail_call_cnt(image, ctx, func_frame_offset, r4_off);
+ bpf_trampoline_setup_tail_call_info(image, ctx, func_frame_offset,
+ bpf_dummy_frame_size, r4_off);
/* Restore args */
bpf_trampoline_restore_args_stack(image, ctx, func_frame_offset, nr_regs, regs_off);
@@ -917,10 +950,6 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
/* Store return value for bpf prog to access */
EMIT(PPC_RAW_STL(_R3, _R1, retval_off));
- /* Restore updated tail_call_cnt */
- if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
- bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off);
-
/* Reserve space to patch branch instruction to skip fexit progs */
if (ro_image) /* image is NULL for dummy pass */
im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
--
2.48.1
On 14/01/26 5:14 pm, adubey@linux.ibm.com wrote:
> From: Abhishek Dubey <adubey@linux.ibm.com>
>
> The trampoline mechanism sets up its own stack frame and
> an additional dummy frame. We need to have additional JIT
> instructions handling tailcall dereferencing in the
> trampoline's context.
>
> We don't add the two stack frames pointed above, rather
> add space for tail_call_info at bottom in trampoline frame
> for ppc64. This makes the trampoline's frame consistent with
> layout of all other frames wrt tail_call_info offset.
>
> Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
> ---
> arch/powerpc/net/bpf_jit_comp.c | 83 ++++++++++++++++++++++-----------
> 1 file changed, 56 insertions(+), 27 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 069a8822c30d..e3088cf089d1 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -606,33 +606,58 @@ static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context
> return 0;
> }
>
> -static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_context *ctx,
> - int func_frame_offset, int r4_off)
> -{
> - if (IS_ENABLED(CONFIG_PPC64)) {
> - /* See bpf_jit_stack_tailcallinfo_offset() */
> - int tailcallcnt_offset = 7 * 8;
> -
> - EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset));
> - EMIT(PPC_RAW_STL(_R3, _R1, -tailcallcnt_offset));
> - } else {
> - /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
> - EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
> - }
> -}
> +/*
> + * Refer the label 'Generated stack layout' in this file for actual stack
> + * layout during trampoline invocation.
> + *
> + * Refer __arch_prepare_bpf_trampoline() for stack component details.
> + *
> + * The tailcall count/reference is present in caller's stack frame. Its required
> + * to copy the content of tail_call_info before calling the actual function
> + * to which the trampoline is attached.
> + *
> + */
>
> -static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_context *ctx,
> - int func_frame_offset, int r4_off)
> +static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_context *ctx,
> + int func_frame_offset,
> + int bpf_dummy_frame_size, int r4_off)
> {
> if (IS_ENABLED(CONFIG_PPC64)) {
> /* See bpf_jit_stack_tailcallinfo_offset() */
> - int tailcallcnt_offset = 7 * 8;
> + int tailcallinfo_offset = BPF_PPC_TAILCALL;
This offset update should have been part of patch#1
> + /*
> + * func_frame_offset = ...(1)
> + * bpf_dummy_frame_size + trampoline_frame_size
> + */
> + EMIT(PPC_RAW_LD(_R4, _R1, func_frame_offset));
> + EMIT(PPC_RAW_LD(_R3, _R4, -tailcallinfo_offset));
> +
> + /*
> + * Setting the tail_call_info in trampoline's frame
> + * depending on if previous frame had value or reference.
> + */
> + EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT));
> + PPC_COND_BRANCH(COND_GT, CTX_NIA(ctx) + 8);
> + EMIT(PPC_RAW_ADDI(_R3, _R4, bpf_jit_stack_tailcallinfo_offset(ctx)));
> + /*
> + * From ...(1) above:
> + * trampoline_frame_bottom = ...(2)
> + * func_frame_offset - bpf_dummy_frame_size
> + *
> + * Using ...(2) derived above:
> + * trampoline_tail_call_info_offset = ...(3)
> + * trampoline_frame_bottom - tailcallinfo_offset
> + *
> + * From ...(3):
> + * Use trampoline_tail_call_info_offset to write reference of main's
> + * tail_call_info in trampoline frame.
> + */
> + EMIT(PPC_RAW_STL(_R3, _R1, (func_frame_offset - bpf_dummy_frame_size)
> + - tailcallinfo_offset));
>
> - EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset));
> - EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset - tailcallcnt_offset));
> } else {
> /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
> - EMIT(PPC_RAW_STL(_R4, _R1, r4_off));
> + EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
> }
> }
>
> @@ -720,6 +745,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> * LR save area [ r0 save (64-bit) ] | header
> * [ r0 save (32-bit) ] |
> * dummy frame for unwind [ back chain 1 ] --
> + * [ tail_call_info ] non optional - 64-bit powerpc
> * [ padding ] align stack frame
> * r4_off [ r4 (tailcallcnt) ] optional - 32-bit powerpc
> * alt_lr_off [ real lr (ool stub)] optional - actual lr
> @@ -801,8 +827,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> }
> }
>
> - /* Padding to align stack frame, if any */
> - bpf_frame_size = round_up(bpf_frame_size, SZL * 2);
> + if (!(bpf_frame_size % (2 * SZL))) {
> + /* Stack is 16-byte aligned */
> + /* Room for padding followed by 64-bit tail_call_info */
> + bpf_frame_size += SZL + BPF_PPC_TAILCALL;
> + } else {
> + /* Room for 64-bit tail_call_info */
> + bpf_frame_size += BPF_PPC_TAILCALL;
> + }
>
> /* Dummy frame size for proper unwind - includes 64-bytes red zone for 64-bit powerpc */
> bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64;
This change assumes the size is at least 8-byte aligned which is
true today but better skip that assumption by not touching the
padding part. The above hunk could simply be:
diff --git a/arch/powerpc/net/bpf_jit_comp.c
b/arch/powerpc/net/bpf_jit_comp.c
index 5e976730b2f5..266cc6f17dcc 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -795,6 +795,10 @@ static int __arch_prepare_bpf_trampoline(struct
bpf_tramp_image *im, void *rw_im
}
}
+ /* Save tailcall count pointer at the same offset on the stack
where subprogs expect it */
+ if ((flags & BPF_TRAMP_F_CALL_ORIG) && (flags &
BPF_TRAMP_F_TAIL_CALL_CTX))
+ bpf_frame_size += SZL;
+
/* Padding to align stack frame, if any */
bpf_frame_size = round_up(bpf_frame_size, SZL * 2);
Patch#2 is not complete without this change. Please fold this patch
into patch#2 itself.
- Hari
On 17/01/26 4:09 pm, Hari Bathini wrote:
>
>
> On 14/01/26 5:14 pm, adubey@linux.ibm.com wrote:
>> From: Abhishek Dubey <adubey@linux.ibm.com>
>>
>> The trampoline mechanism sets up its own stack frame and
>> an additional dummy frame. We need to have additional JIT
>> instructions handling tailcall dereferencing in the
>> trampoline's context.
>>
>> We don't add the two stack frames pointed above, rather
>> add space for tail_call_info at bottom in trampoline frame
>> for ppc64. This makes the trampoline's frame consistent with
>> layout of all other frames wrt tail_call_info offset.
>>
>> Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
>> ---
>> arch/powerpc/net/bpf_jit_comp.c | 83 ++++++++++++++++++++++-----------
>> 1 file changed, 56 insertions(+), 27 deletions(-)
>>
>> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/
>> bpf_jit_comp.c
>> index 069a8822c30d..e3088cf089d1 100644
>> --- a/arch/powerpc/net/bpf_jit_comp.c
>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>> @@ -606,33 +606,58 @@ static int invoke_bpf_mod_ret(u32 *image, u32
>> *ro_image, struct codegen_context
>> return 0;
>> }
>> -static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct
>> codegen_context *ctx,
>> - int func_frame_offset, int r4_off)
>> -{
>> - if (IS_ENABLED(CONFIG_PPC64)) {
>> - /* See bpf_jit_stack_tailcallinfo_offset() */
>> - int tailcallcnt_offset = 7 * 8;
>> -
>> - EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset -
>> tailcallcnt_offset));
>> - EMIT(PPC_RAW_STL(_R3, _R1, -tailcallcnt_offset));
>> - } else {
>> - /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
>> - EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
>> - }
>> -}
>> +/*
>> + * Refer the label 'Generated stack layout' in this file for actual
>> stack
>> + * layout during trampoline invocation.
>> + *
>> + * Refer __arch_prepare_bpf_trampoline() for stack component details.
>> + *
>> + * The tailcall count/reference is present in caller's stack frame.
>> Its required
>> + * to copy the content of tail_call_info before calling the actual
>> function
>> + * to which the trampoline is attached.
>> + *
>> + */
>> -static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct
>> codegen_context *ctx,
>> - int func_frame_offset, int r4_off)
>> +static void bpf_trampoline_setup_tail_call_info(u32 *image, struct
>> codegen_context *ctx,
>> + int func_frame_offset,
>> + int bpf_dummy_frame_size, int r4_off)
>> {
>> if (IS_ENABLED(CONFIG_PPC64)) {
>> /* See bpf_jit_stack_tailcallinfo_offset() */
>
>> - int tailcallcnt_offset = 7 * 8;
>> + int tailcallinfo_offset = BPF_PPC_TAILCALL;
>
> This offset update should have been part of patch#1
>
>> + /*
>> + * func_frame_offset = ...(1)
>> + * bpf_dummy_frame_size + trampoline_frame_size
>> + */
>> + EMIT(PPC_RAW_LD(_R4, _R1, func_frame_offset));
>> + EMIT(PPC_RAW_LD(_R3, _R4, -tailcallinfo_offset));
>> +
>> + /*
>> + * Setting the tail_call_info in trampoline's frame
>> + * depending on if previous frame had value or reference.
>> + */
>> + EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT));
>> + PPC_COND_BRANCH(COND_GT, CTX_NIA(ctx) + 8);
>> + EMIT(PPC_RAW_ADDI(_R3, _R4,
>> bpf_jit_stack_tailcallinfo_offset(ctx)));
>> + /*
>> + * From ...(1) above:
>> + * trampoline_frame_bottom = ...(2)
>> + * func_frame_offset - bpf_dummy_frame_size
>> + *
>> + * Using ...(2) derived above:
>> + * trampoline_tail_call_info_offset = ...(3)
>> + * trampoline_frame_bottom - tailcallinfo_offset
>> + *
>> + * From ...(3):
>> + * Use trampoline_tail_call_info_offset to write reference of
>> main's
>> + * tail_call_info in trampoline frame.
>> + */
>> + EMIT(PPC_RAW_STL(_R3, _R1, (func_frame_offset -
>> bpf_dummy_frame_size)
>> + - tailcallinfo_offset));
>> - EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset));
>> - EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset -
>> tailcallcnt_offset));
>> } else {
>> /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
>> - EMIT(PPC_RAW_STL(_R4, _R1, r4_off));
>> + EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
>> }
>> }
>> @@ -720,6 +745,7 @@ static int __arch_prepare_bpf_trampoline(struct
>> bpf_tramp_image *im, void *rw_im
>> * LR save area [ r0 save (64-bit) ] | header
>> * [ r0 save (32-bit) ] |
>> * dummy frame for unwind [ back chain 1 ] --
>> + * [ tail_call_info ] non
>> optional - 64-bit powerpc
Also, why non-optional? This can be optional on BPF_TRAMP_F_CALL_ORIG
and BPF_TRAMP_F_TAIL_CALL_CTX flags?
>> * [ padding ] align
>> stack frame
>> * r4_off [ r4 (tailcallcnt) ] optional -
>> 32-bit powerpc
>> * alt_lr_off [ real lr (ool stub)] optional -
>> actual lr
>
>> @@ -801,8 +827,14 @@ static int __arch_prepare_bpf_trampoline(struct
>> bpf_tramp_image *im, void *rw_im
>> }
>> }
>> - /* Padding to align stack frame, if any */
>> - bpf_frame_size = round_up(bpf_frame_size, SZL * 2);
>> + if (!(bpf_frame_size % (2 * SZL))) {
>> + /* Stack is 16-byte aligned */
>> + /* Room for padding followed by 64-bit tail_call_info */
>> + bpf_frame_size += SZL + BPF_PPC_TAILCALL;
>> + } else {
>> + /* Room for 64-bit tail_call_info */
>> + bpf_frame_size += BPF_PPC_TAILCALL;
>> + }
>> /* Dummy frame size for proper unwind - includes 64-bytes red
>> zone for 64-bit powerpc */
>> bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64;
>
> This change assumes the size is at least 8-byte aligned which is
> true today but better skip that assumption by not touching the
> padding part. The above hunk could simply be:
>
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/
> bpf_jit_comp.c
> index 5e976730b2f5..266cc6f17dcc 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -795,6 +795,10 @@ static int __arch_prepare_bpf_trampoline(struct
> bpf_tramp_image *im, void *rw_im
> }
> }
>
> + /* Save tailcall count pointer at the same offset on the stack
> where subprogs expect it */
> + if ((flags & BPF_TRAMP_F_CALL_ORIG) && (flags &
> BPF_TRAMP_F_TAIL_CALL_CTX))
> + bpf_frame_size += SZL;
> +
> /* Padding to align stack frame, if any */
> bpf_frame_size = round_up(bpf_frame_size, SZL * 2);
>
>
> Patch#2 is not complete without this change. Please fold this patch
> into patch#2 itself.
>
- Hari
Hi,
kernel test robot noticed the following build errors:
[auto build test ERROR on powerpc/next]
[also build test ERROR on powerpc/fixes linus/master v6.19-rc5 next-20260114]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/adubey-linux-ibm-com/powerpc64-bpf-Move-tail_call_cnt-to-bottom-of-stack-frame/20260114-195044
base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
patch link: https://lore.kernel.org/r/20260114114450.30405-4-adubey%40linux.ibm.com
patch subject: [PATCH v2 3/6] powerpc64/bpf: Tailcall handling with trampolines
config: powerpc-randconfig-002-20260114 (https://download.01.org/0day-ci/archive/20260115/202601150350.ZftaCBVV-lkp@intel.com/config)
compiler: powerpc-linux-gcc (GCC) 12.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260115/202601150350.ZftaCBVV-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202601150350.ZftaCBVV-lkp@intel.com/
All errors (new ones prefixed by >>):
arch/powerpc/net/bpf_jit_comp.c: In function 'bpf_trampoline_setup_tail_call_info':
>> arch/powerpc/net/bpf_jit_comp.c:644:43: error: 'BPF_PPC_TAILCALL' undeclared (first use in this function); did you mean 'BPF_TAIL_CALL'?
644 | int tailcallinfo_offset = BPF_PPC_TAILCALL;
| ^~~~~~~~~~~~~~~~
| BPF_TAIL_CALL
arch/powerpc/net/bpf_jit_comp.c:644:43: note: each undeclared identifier is reported only once for each function it appears in
arch/powerpc/net/bpf_jit_comp.c: In function '__arch_prepare_bpf_trampoline':
arch/powerpc/net/bpf_jit_comp.c:850:41: error: 'BPF_PPC_TAILCALL' undeclared (first use in this function); did you mean 'BPF_TAIL_CALL'?
850 | bpf_frame_size += SZL + BPF_PPC_TAILCALL;
| ^~~~~~~~~~~~~~~~
| BPF_TAIL_CALL
vim +644 arch/powerpc/net/bpf_jit_comp.c
625
626 /*
627 * Refer the label 'Generated stack layout' in this file for actual stack
628 * layout during trampoline invocation.
629 *
630 * Refer __arch_prepare_bpf_trampoline() for stack component details.
631 *
632 * The tailcall count/reference is present in caller's stack frame. Its required
633 * to copy the content of tail_call_info before calling the actual function
634 * to which the trampoline is attached.
635 *
636 */
637
638 static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_context *ctx,
639 int func_frame_offset,
640 int bpf_dummy_frame_size, int r4_off)
641 {
642 if (IS_ENABLED(CONFIG_PPC64)) {
643 /* See bpf_jit_stack_tailcallinfo_offset() */
> 644 int tailcallinfo_offset = BPF_PPC_TAILCALL;
645 /*
646 * func_frame_offset = ...(1)
647 * bpf_dummy_frame_size + trampoline_frame_size
648 */
649 EMIT(PPC_RAW_LD(_R4, _R1, func_frame_offset));
650 EMIT(PPC_RAW_LD(_R3, _R4, -tailcallinfo_offset));
651
652 /*
653 * Setting the tail_call_info in trampoline's frame
654 * depending on if previous frame had value or reference.
655 */
656 EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT));
657 PPC_COND_BRANCH(COND_GT, CTX_NIA(ctx) + 8);
658 EMIT(PPC_RAW_ADDI(_R3, _R4, bpf_jit_stack_tailcallinfo_offset(ctx)));
659 /*
660 * From ...(1) above:
661 * trampoline_frame_bottom = ...(2)
662 * func_frame_offset - bpf_dummy_frame_size
663 *
664 * Using ...(2) derived above:
665 * trampoline_tail_call_info_offset = ...(3)
666 * trampoline_frame_bottom - tailcallinfo_offset
667 *
668 * From ...(3):
669 * Use trampoline_tail_call_info_offset to write reference of main's
670 * tail_call_info in trampoline frame.
671 */
672 EMIT(PPC_RAW_STL(_R3, _R1, (func_frame_offset - bpf_dummy_frame_size)
673 - tailcallinfo_offset));
674
675 } else {
676 /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
677 EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
678 }
679 }
680
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 069a8822c30d..e3088cf089d1 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
[ ... ]
> +static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_context *ctx,
> + int func_frame_offset,
> + int bpf_dummy_frame_size, int r4_off)
> +{
> + if (IS_ENABLED(CONFIG_PPC64)) {
> + /* See bpf_jit_stack_tailcallinfo_offset() */
> + int tailcallinfo_offset = BPF_PPC_TAILCALL;
> + /*
> + * func_frame_offset = ...(1)
> + * bpf_dummy_frame_size + trampoline_frame_size
> + */
> + EMIT(PPC_RAW_LD(_R4, _R1, func_frame_offset));
> + EMIT(PPC_RAW_LD(_R3, _R4, -tailcallinfo_offset));
^^^^
Should this LD instruction be an ADDI instead? Looking at the stack frame
setup, after the two STLU instructions in __arch_prepare_bpf_trampoline():
STLU _R1, _R1, -bpf_dummy_frame_size -> stores original _R1 at dummy frame
STLU _R1, _R1, -bpf_frame_size -> stores dummy frame ptr at tramp frame
From the trampoline frame, the back chain stored in the dummy frame at offset
bpf_frame_size points to the caller's frame. But func_frame_offset equals
(bpf_dummy_frame_size + bpf_frame_size), which points into the caller's frame
itself. Loading from that address returns the caller's back chain (pointing
to the grandparent frame), not a pointer to the caller's frame.
This appears to cause the subsequent load at [_R4 - 8] to read the grandparent's
tail_call_info rather than the caller's. Would loading from [_R1 + bpf_frame_size]
give the correct pointer to the caller's frame?
For comparison, the subprogram prologue in bpf_jit_build_prologue() uses
[_R1 + 0] to get the back chain pointing to its caller, which seems like
the right pattern.
> +
> + /*
> + * Setting the tail_call_info in trampoline's frame
> + * depending on if previous frame had value or reference.
> + */
> + EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT));
> + PPC_COND_BRANCH(COND_GT, CTX_NIA(ctx) + 8);
> + EMIT(PPC_RAW_ADDI(_R3, _R4, bpf_jit_stack_tailcallinfo_offset(ctx)));
[ ... ]
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/20993216190
© 2016 - 2026 Red Hat, Inc.