[PATCH] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function

Haoran Jiang posted 1 patch 6 months, 3 weeks ago
arch/loongarch/net/bpf_jit.c | 28 +++++++++-------------------
1 file changed, 9 insertions(+), 19 deletions(-)
[PATCH] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function
Posted by Haoran Jiang 6 months, 3 weeks ago
For a ebpf subprog JIT,the last call bpf_int_jit_compile function will
directly enter the skip_init_ctx process. At this point, out_offset = -1,
the jmp_offset in emit_bpf_tail_call is calculated
by #define jmp_offset (out_offset - (cur_offset)) is a negative number,
which does not meet expectations.The final generated assembly as follow.

54:	bgeu        	$a2, $t1, -8	    # 0x0000004c
58:	addi.d      	$a6, $s5, -1
5c:	bltz        	$a6, -16	    # 0x0000004c
60:	alsl.d      	$t2, $a2, $a1, 0x3
64:	ld.d        	$t2, $t2, 264
68:	beq         	$t2, $zero, -28	    # 0x0000004c

Before apply this patch, the follow test case will reveal soft lock issues.

cd tools/testing/selftests/bpf/
./test_progs --allow=tailcalls/tailcall_bpf2bpf_1

dmesg:
watchdog: BUG: soft lockup - CPU#2 stuck for 26s! [test_progs:25056]

Signed-off-by: Haoran Jiang <jianghaoran@kylinos.cn>
---
 arch/loongarch/net/bpf_jit.c | 28 +++++++++-------------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index fa1500d4aa3e..d85490e7de89 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -208,9 +208,7 @@ bool bpf_jit_supports_far_kfunc_call(void)
 	return true;
 }
 
-/* initialized on the first pass of build_body() */
-static int out_offset = -1;
-static int emit_bpf_tail_call(struct jit_ctx *ctx)
+static int emit_bpf_tail_call(int insn, struct jit_ctx *ctx)
 {
 	int off;
 	u8 tcc = tail_call_reg(ctx);
@@ -220,9 +218,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	u8 t2 = LOONGARCH_GPR_T2;
 	u8 t3 = LOONGARCH_GPR_T3;
 	const int idx0 = ctx->idx;
-
-#define cur_offset (ctx->idx - idx0)
-#define jmp_offset (out_offset - (cur_offset))
+	int tc_ninsn = 0;
+	int jmp_offset = 0;
 
 	/*
 	 * a0: &ctx
@@ -232,8 +229,11 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	 * if (index >= array->map.max_entries)
 	 *	 goto out;
 	 */
+	tc_ninsn = insn ? ctx->offset[insn+1] - ctx->offset[insn] :
+		ctx->offset[0];
 	off = offsetof(struct bpf_array, map.max_entries);
 	emit_insn(ctx, ldwu, t1, a1, off);
+	jmp_offset = tc_ninsn - (ctx->idx - idx0);
 	/* bgeu $a2, $t1, jmp_offset */
 	if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
 		goto toofar;
@@ -243,6 +243,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	 *	 goto out;
 	 */
 	emit_insn(ctx, addid, REG_TCC, tcc, -1);
+	jmp_offset = tc_ninsn - (ctx->idx - idx0);
 	if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
 		goto toofar;
 
@@ -254,6 +255,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	emit_insn(ctx, alsld, t2, a2, a1, 2);
 	off = offsetof(struct bpf_array, ptrs);
 	emit_insn(ctx, ldd, t2, t2, off);
+	jmp_offset = tc_ninsn - (ctx->idx - idx0);
 	/* beq $t2, $zero, jmp_offset */
 	if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
 		goto toofar;
@@ -263,22 +265,11 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	emit_insn(ctx, ldd, t3, t2, off);
 	__build_epilogue(ctx, true);
 
-	/* out: */
-	if (out_offset == -1)
-		out_offset = cur_offset;
-	if (cur_offset != out_offset) {
-		pr_err_once("tail_call out_offset = %d, expected %d!\n",
-			    cur_offset, out_offset);
-		return -1;
-	}
-
 	return 0;
 
 toofar:
 	pr_info_once("tail_call: jump too far\n");
 	return -1;
-#undef cur_offset
-#undef jmp_offset
 }
 
 static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
@@ -916,7 +907,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 	/* tail call */
 	case BPF_JMP | BPF_TAIL_CALL:
 		mark_tail_call(ctx);
-		if (emit_bpf_tail_call(ctx) < 0)
+		if (emit_bpf_tail_call(i, ctx) < 0)
 			return -EINVAL;
 		break;
 
@@ -1342,7 +1333,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	if (tmp_blinded)
 		bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
 
-	out_offset = -1;
 
 	return prog;
 
-- 
2.43.0

Re: [PATCH] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function
Posted by Hengqi Chen 6 months, 3 weeks ago
Hi Haoran,

On Wed, May 28, 2025 at 6:40 PM Haoran Jiang <jianghaoran@kylinos.cn> wrote:
>
> For a ebpf subprog JIT,the last call bpf_int_jit_compile function will
> directly enter the skip_init_ctx process. At this point, out_offset = -1,
> the jmp_offset in emit_bpf_tail_call is calculated
> by #define jmp_offset (out_offset - (cur_offset)) is a negative number,
> which does not meet expectations.The final generated assembly as follow.
>
> 54:     bgeu            $a2, $t1, -8        # 0x0000004c
> 58:     addi.d          $a6, $s5, -1
> 5c:     bltz            $a6, -16            # 0x0000004c
> 60:     alsl.d          $t2, $a2, $a1, 0x3
> 64:     ld.d            $t2, $t2, 264
> 68:     beq             $t2, $zero, -28     # 0x0000004c
>
> Before apply this patch, the follow test case will reveal soft lock issues.
>
> cd tools/testing/selftests/bpf/
> ./test_progs --allow=tailcalls/tailcall_bpf2bpf_1
>
> dmesg:
> watchdog: BUG: soft lockup - CPU#2 stuck for 26s! [test_progs:25056]
>

This is a known issue. Does this change pass all tailcall tests ?
If not, please refer to the tailcall hierarchy patchset([1]).
We should address it once and for all. Thanks.

  [1]: https://lore.kernel.org/bpf/20240714123902.32305-1-hffilwlqm@gmail.com/

> Signed-off-by: Haoran Jiang <jianghaoran@kylinos.cn>
> ---
>  arch/loongarch/net/bpf_jit.c | 28 +++++++++-------------------
>  1 file changed, 9 insertions(+), 19 deletions(-)
>
> diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
> index fa1500d4aa3e..d85490e7de89 100644
> --- a/arch/loongarch/net/bpf_jit.c
> +++ b/arch/loongarch/net/bpf_jit.c
> @@ -208,9 +208,7 @@ bool bpf_jit_supports_far_kfunc_call(void)
>         return true;
>  }
>
> -/* initialized on the first pass of build_body() */
> -static int out_offset = -1;
> -static int emit_bpf_tail_call(struct jit_ctx *ctx)
> +static int emit_bpf_tail_call(int insn, struct jit_ctx *ctx)
>  {
>         int off;
>         u8 tcc = tail_call_reg(ctx);
> @@ -220,9 +218,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
>         u8 t2 = LOONGARCH_GPR_T2;
>         u8 t3 = LOONGARCH_GPR_T3;
>         const int idx0 = ctx->idx;
> -
> -#define cur_offset (ctx->idx - idx0)
> -#define jmp_offset (out_offset - (cur_offset))
> +       int tc_ninsn = 0;
> +       int jmp_offset = 0;
>
>         /*
>          * a0: &ctx
> @@ -232,8 +229,11 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
>          * if (index >= array->map.max_entries)
>          *       goto out;
>          */
> +       tc_ninsn = insn ? ctx->offset[insn+1] - ctx->offset[insn] :
> +               ctx->offset[0];
>         off = offsetof(struct bpf_array, map.max_entries);
>         emit_insn(ctx, ldwu, t1, a1, off);
> +       jmp_offset = tc_ninsn - (ctx->idx - idx0);
>         /* bgeu $a2, $t1, jmp_offset */
>         if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
>                 goto toofar;
> @@ -243,6 +243,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
>          *       goto out;
>          */
>         emit_insn(ctx, addid, REG_TCC, tcc, -1);
> +       jmp_offset = tc_ninsn - (ctx->idx - idx0);
>         if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
>                 goto toofar;
>
> @@ -254,6 +255,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
>         emit_insn(ctx, alsld, t2, a2, a1, 2);
>         off = offsetof(struct bpf_array, ptrs);
>         emit_insn(ctx, ldd, t2, t2, off);
> +       jmp_offset = tc_ninsn - (ctx->idx - idx0);
>         /* beq $t2, $zero, jmp_offset */
>         if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
>                 goto toofar;
> @@ -263,22 +265,11 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
>         emit_insn(ctx, ldd, t3, t2, off);
>         __build_epilogue(ctx, true);
>
> -       /* out: */
> -       if (out_offset == -1)
> -               out_offset = cur_offset;
> -       if (cur_offset != out_offset) {
> -               pr_err_once("tail_call out_offset = %d, expected %d!\n",
> -                           cur_offset, out_offset);
> -               return -1;
> -       }
> -
>         return 0;
>
>  toofar:
>         pr_info_once("tail_call: jump too far\n");
>         return -1;
> -#undef cur_offset
> -#undef jmp_offset
>  }
>
>  static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
> @@ -916,7 +907,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
>         /* tail call */
>         case BPF_JMP | BPF_TAIL_CALL:
>                 mark_tail_call(ctx);
> -               if (emit_bpf_tail_call(ctx) < 0)
> +               if (emit_bpf_tail_call(i, ctx) < 0)
>                         return -EINVAL;
>                 break;
>
> @@ -1342,7 +1333,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
>         if (tmp_blinded)
>                 bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
>
> -       out_offset = -1;
>
>         return prog;
>
> --
> 2.43.0
>
回复:[PATCH] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function
Posted by jianghaoran 6 months, 3 weeks ago



在 2025-05-29星期四的 10:02 +0800,Hengqi Chen写道:
> Hi Haoran,
> 
> On Wed, May 28, 2025 at 6:40 PM Haoran Jiang <
> jianghaoran@kylinos.cn
> > wrote:
> > For a ebpf subprog JIT,the last call bpf_int_jit_compile
> > function will
> > directly enter the skip_init_ctx process. At this point,
> > out_offset = -1,
> > the jmp_offset in emit_bpf_tail_call is calculated
> > by #define jmp_offset (out_offset - (cur_offset)) is a negative
> > number,
> > which does not meet expectations.The final generated assembly
> > as follow.
> > 
> > 54:     bgeu            $a2, $t1, -8        # 0x0000004c
> > 58:     addi.d          $a6, $s5, -1
> > 5c:     bltz            $a6, -16            # 0x0000004c
> > 60:     alsl.d          $t2, $a2, $a1, 0x3
> > 64:     ld.d            $t2, $t2, 264
> > 68:     beq             $t2, $zero, -28     # 0x0000004c
> > 
> > Before apply this patch, the follow test case will reveal soft
> > lock issues.
> > 
> > cd tools/testing/selftests/bpf/
> > ./test_progs --allow=tailcalls/tailcall_bpf2bpf_1
> > 
> > dmesg:
> > watchdog: BUG: soft lockup - CPU#2 stuck for 26s!
> > [test_progs:25056]
> > 
> 
> This is a known issue. Does this change pass all tailcall tests ?
> If not, please refer to the tailcall hierarchy patchset([1]).
> We should address it once and for all. Thanks.
> 
>   [1]: 
> https://lore.kernel.org/bpf/20240714123902.32305-1-hffilwlqm@gmail.com/
> 
> Thanks,I'll keep looking into these patches.
> > Signed-off-by: Haoran Jiang <
> > jianghaoran@kylinos.cn
> > >
> > ---
> >  arch/loongarch/net/bpf_jit.c | 28 +++++++++-------------------
> >  1 file changed, 9 insertions(+), 19 deletions(-)
> > 
> > diff --git a/arch/loongarch/net/bpf_jit.c
> > b/arch/loongarch/net/bpf_jit.c
> > index fa1500d4aa3e..d85490e7de89 100644
> > --- a/arch/loongarch/net/bpf_jit.c
> > +++ b/arch/loongarch/net/bpf_jit.c
> > @@ -208,9 +208,7 @@ bool bpf_jit_supports_far_kfunc_call(void)
> >         return true;
> >  }
> > 
> > -/* initialized on the first pass of build_body() */
> > -static int out_offset = -1;
> > -static int emit_bpf_tail_call(struct jit_ctx *ctx)
> > +static int emit_bpf_tail_call(int insn, struct jit_ctx *ctx)
> >  {
> >         int off;
> >         u8 tcc = tail_call_reg(ctx);
> > @@ -220,9 +218,8 @@ static int emit_bpf_tail_call(struct
> > jit_ctx *ctx)
> >         u8 t2 = LOONGARCH_GPR_T2;
> >         u8 t3 = LOONGARCH_GPR_T3;
> >         const int idx0 = ctx->idx;
> > -
> > -#define cur_offset (ctx->idx - idx0)
> > -#define jmp_offset (out_offset - (cur_offset))
> > +       int tc_ninsn = 0;
> > +       int jmp_offset = 0;
> > 
> >         /*
> >          * a0: &ctx
> > @@ -232,8 +229,11 @@ static int emit_bpf_tail_call(struct
> > jit_ctx *ctx)
> >          * if (index >= array->map.max_entries)
> >          *       goto out;
> >          */
> > +       tc_ninsn = insn ? ctx->offset[insn+1] - ctx-
> > >offset[insn] :
> > +               ctx->offset[0];
> >         off = offsetof(struct bpf_array, map.max_entries);
> >         emit_insn(ctx, ldwu, t1, a1, off);
> > +       jmp_offset = tc_ninsn - (ctx->idx - idx0);
> >         /* bgeu $a2, $t1, jmp_offset */
> >         if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset)
> > < 0)
> >                 goto toofar;
> > @@ -243,6 +243,7 @@ static int emit_bpf_tail_call(struct
> > jit_ctx *ctx)
> >          *       goto out;
> >          */
> >         emit_insn(ctx, addid, REG_TCC, tcc, -1);
> > +       jmp_offset = tc_ninsn - (ctx->idx - idx0);
> >         if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC,
> > LOONGARCH_GPR_ZERO, jmp_offset) < 0)
> >                 goto toofar;
> > 
> > @@ -254,6 +255,7 @@ static int emit_bpf_tail_call(struct
> > jit_ctx *ctx)
> >         emit_insn(ctx, alsld, t2, a2, a1, 2);
> >         off = offsetof(struct bpf_array, ptrs);
> >         emit_insn(ctx, ldd, t2, t2, off);
> > +       jmp_offset = tc_ninsn - (ctx->idx - idx0);
> >         /* beq $t2, $zero, jmp_offset */
> >         if (emit_tailcall_jmp(ctx, BPF_JEQ, t2,
> > LOONGARCH_GPR_ZERO, jmp_offset) < 0)
> >                 goto toofar;
> > @@ -263,22 +265,11 @@ static int emit_bpf_tail_call(struct
> > jit_ctx *ctx)
> >         emit_insn(ctx, ldd, t3, t2, off);
> >         __build_epilogue(ctx, true);
> > 
> > -       /* out: */
> > -       if (out_offset == -1)
> > -               out_offset = cur_offset;
> > -       if (cur_offset != out_offset) {
> > -               pr_err_once("tail_call out_offset = %d,
> > expected %d!\n",
> > -                           cur_offset, out_offset);
> > -               return -1;
> > -       }
> > -
> >         return 0;
> > 
> >  toofar:
> >         pr_info_once("tail_call: jump too far\n");
> >         return -1;
> > -#undef cur_offset
> > -#undef jmp_offset
> >  }
> > 
> >  static void emit_atomic(const struct bpf_insn *insn, struct
> > jit_ctx *ctx)
> > @@ -916,7 +907,7 @@ static int build_insn(const struct bpf_insn
> > *insn, struct jit_ctx *ctx, bool ext
> >         /* tail call */
> >         case BPF_JMP | BPF_TAIL_CALL:
> >                 mark_tail_call(ctx);
> > -               if (emit_bpf_tail_call(ctx) < 0)
> > +               if (emit_bpf_tail_call(i, ctx) < 0)
> >                         return -EINVAL;
> >                 break;
> > 
> > @@ -1342,7 +1333,6 @@ struct bpf_prog
> > *bpf_int_jit_compile(struct bpf_prog *prog)
> >         if (tmp_blinded)
> >                 bpf_jit_prog_release_other(prog, prog ==
> > orig_prog ? tmp : orig_prog);
> > 
> > -       out_offset = -1;
> > 
> >         return prog;
> > 
> > --
> > 2.43.0
> > 

Re: [PATCH] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function
Posted by Huacai Chen 5 months, 4 weeks ago
Hi, Haoran,

On Fri, May 30, 2025 at 9:22 AM jianghaoran <jianghaoran@kylinos.cn> wrote:
>
>
>
>
>
> 在 2025-05-29星期四的 10:02 +0800,Hengqi Chen写道:
> > Hi Haoran,
> >
> > On Wed, May 28, 2025 at 6:40 PM Haoran Jiang <
> > jianghaoran@kylinos.cn
> > > wrote:
> > > For a ebpf subprog JIT,the last call bpf_int_jit_compile
> > > function will
> > > directly enter the skip_init_ctx process. At this point,
> > > out_offset = -1,
> > > the jmp_offset in emit_bpf_tail_call is calculated
> > > by #define jmp_offset (out_offset - (cur_offset)) is a negative
> > > number,
> > > which does not meet expectations.The final generated assembly
> > > as follow.
> > >
> > > 54:     bgeu            $a2, $t1, -8        # 0x0000004c
> > > 58:     addi.d          $a6, $s5, -1
> > > 5c:     bltz            $a6, -16            # 0x0000004c
> > > 60:     alsl.d          $t2, $a2, $a1, 0x3
> > > 64:     ld.d            $t2, $t2, 264
> > > 68:     beq             $t2, $zero, -28     # 0x0000004c
> > >
> > > Before apply this patch, the follow test case will reveal soft
> > > lock issues.
> > >
> > > cd tools/testing/selftests/bpf/
> > > ./test_progs --allow=tailcalls/tailcall_bpf2bpf_1
> > >
> > > dmesg:
> > > watchdog: BUG: soft lockup - CPU#2 stuck for 26s!
> > > [test_progs:25056]
> > >
> >
> > This is a known issue. Does this change pass all tailcall tests ?
> > If not, please refer to the tailcall hierarchy patchset([1]).
> > We should address it once and for all. Thanks.
Do you mean you will update this patch?

Huacai

> >
> >   [1]:
> > https://lore.kernel.org/bpf/20240714123902.32305-1-hffilwlqm@gmail.com/
> >
> > Thanks,I'll keep looking into these patches.
> > > Signed-off-by: Haoran Jiang <
> > > jianghaoran@kylinos.cn
> > > >
> > > ---
> > >  arch/loongarch/net/bpf_jit.c | 28 +++++++++-------------------
> > >  1 file changed, 9 insertions(+), 19 deletions(-)
> > >
> > > diff --git a/arch/loongarch/net/bpf_jit.c
> > > b/arch/loongarch/net/bpf_jit.c
> > > index fa1500d4aa3e..d85490e7de89 100644
> > > --- a/arch/loongarch/net/bpf_jit.c
> > > +++ b/arch/loongarch/net/bpf_jit.c
> > > @@ -208,9 +208,7 @@ bool bpf_jit_supports_far_kfunc_call(void)
> > >         return true;
> > >  }
> > >
> > > -/* initialized on the first pass of build_body() */
> > > -static int out_offset = -1;
> > > -static int emit_bpf_tail_call(struct jit_ctx *ctx)
> > > +static int emit_bpf_tail_call(int insn, struct jit_ctx *ctx)
> > >  {
> > >         int off;
> > >         u8 tcc = tail_call_reg(ctx);
> > > @@ -220,9 +218,8 @@ static int emit_bpf_tail_call(struct
> > > jit_ctx *ctx)
> > >         u8 t2 = LOONGARCH_GPR_T2;
> > >         u8 t3 = LOONGARCH_GPR_T3;
> > >         const int idx0 = ctx->idx;
> > > -
> > > -#define cur_offset (ctx->idx - idx0)
> > > -#define jmp_offset (out_offset - (cur_offset))
> > > +       int tc_ninsn = 0;
> > > +       int jmp_offset = 0;
> > >
> > >         /*
> > >          * a0: &ctx
> > > @@ -232,8 +229,11 @@ static int emit_bpf_tail_call(struct
> > > jit_ctx *ctx)
> > >          * if (index >= array->map.max_entries)
> > >          *       goto out;
> > >          */
> > > +       tc_ninsn = insn ? ctx->offset[insn+1] - ctx-
> > > >offset[insn] :
> > > +               ctx->offset[0];
> > >         off = offsetof(struct bpf_array, map.max_entries);
> > >         emit_insn(ctx, ldwu, t1, a1, off);
> > > +       jmp_offset = tc_ninsn - (ctx->idx - idx0);
> > >         /* bgeu $a2, $t1, jmp_offset */
> > >         if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset)
> > > < 0)
> > >                 goto toofar;
> > > @@ -243,6 +243,7 @@ static int emit_bpf_tail_call(struct
> > > jit_ctx *ctx)
> > >          *       goto out;
> > >          */
> > >         emit_insn(ctx, addid, REG_TCC, tcc, -1);
> > > +       jmp_offset = tc_ninsn - (ctx->idx - idx0);
> > >         if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC,
> > > LOONGARCH_GPR_ZERO, jmp_offset) < 0)
> > >                 goto toofar;
> > >
> > > @@ -254,6 +255,7 @@ static int emit_bpf_tail_call(struct
> > > jit_ctx *ctx)
> > >         emit_insn(ctx, alsld, t2, a2, a1, 2);
> > >         off = offsetof(struct bpf_array, ptrs);
> > >         emit_insn(ctx, ldd, t2, t2, off);
> > > +       jmp_offset = tc_ninsn - (ctx->idx - idx0);
> > >         /* beq $t2, $zero, jmp_offset */
> > >         if (emit_tailcall_jmp(ctx, BPF_JEQ, t2,
> > > LOONGARCH_GPR_ZERO, jmp_offset) < 0)
> > >                 goto toofar;
> > > @@ -263,22 +265,11 @@ static int emit_bpf_tail_call(struct
> > > jit_ctx *ctx)
> > >         emit_insn(ctx, ldd, t3, t2, off);
> > >         __build_epilogue(ctx, true);
> > >
> > > -       /* out: */
> > > -       if (out_offset == -1)
> > > -               out_offset = cur_offset;
> > > -       if (cur_offset != out_offset) {
> > > -               pr_err_once("tail_call out_offset = %d,
> > > expected %d!\n",
> > > -                           cur_offset, out_offset);
> > > -               return -1;
> > > -       }
> > > -
> > >         return 0;
> > >
> > >  toofar:
> > >         pr_info_once("tail_call: jump too far\n");
> > >         return -1;
> > > -#undef cur_offset
> > > -#undef jmp_offset
> > >  }
> > >
> > >  static void emit_atomic(const struct bpf_insn *insn, struct
> > > jit_ctx *ctx)
> > > @@ -916,7 +907,7 @@ static int build_insn(const struct bpf_insn
> > > *insn, struct jit_ctx *ctx, bool ext
> > >         /* tail call */
> > >         case BPF_JMP | BPF_TAIL_CALL:
> > >                 mark_tail_call(ctx);
> > > -               if (emit_bpf_tail_call(ctx) < 0)
> > > +               if (emit_bpf_tail_call(i, ctx) < 0)
> > >                         return -EINVAL;
> > >                 break;
> > >
> > > @@ -1342,7 +1333,6 @@ struct bpf_prog
> > > *bpf_int_jit_compile(struct bpf_prog *prog)
> > >         if (tmp_blinded)
> > >                 bpf_jit_prog_release_other(prog, prog ==
> > > orig_prog ? tmp : orig_prog);
> > >
> > > -       out_offset = -1;
> > >
> > >         return prog;
> > >
> > > --
> > > 2.43.0
> > >
>
>
回复:[PATCH] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function
Posted by jianghaoran 5 months, 3 weeks ago



在 2025-06-24星期二的 20:09 +0800,Huacai Chen写道:
> Hi, Haoran,
> 
> On Fri, May 30, 2025 at 9:22 AM jianghaoran <
> jianghaoran@kylinos.cn
> > wrote:
> > 
> > 
> > 
> > 
> > 在 2025-05-29星期四的 10:02 +0800,Hengqi Chen写道:
> > > Hi Haoran,
> > > 
> > > On Wed, May 28, 2025 at 6:40 PM Haoran Jiang <
> > > jianghaoran@kylinos.cn
> > > 
> > > > wrote:
> > > > For a ebpf subprog JIT,the last call bpf_int_jit_compile
> > > > function will
> > > > directly enter the skip_init_ctx process. At this point,
> > > > out_offset = -1,
> > > > the jmp_offset in emit_bpf_tail_call is calculated
> > > > by #define jmp_offset (out_offset - (cur_offset)) is a
> > > > negative
> > > > number,
> > > > which does not meet expectations.The final generated
> > > > assembly
> > > > as follow.
> > > > 
> > > > 54:     bgeu            $a2, $t1, -8        # 0x0000004c
> > > > 58:     addi.d          $a6, $s5, -1
> > > > 5c:     bltz            $a6, -16            # 0x0000004c
> > > > 60:     alsl.d          $t2, $a2, $a1, 0x3
> > > > 64:     ld.d            $t2, $t2, 264
> > > > 68:     beq             $t2, $zero, -28     # 0x0000004c
> > > > 
> > > > Before apply this patch, the follow test case will reveal
> > > > soft
> > > > lock issues.
> > > > 
> > > > cd tools/testing/selftests/bpf/
> > > > ./test_progs --allow=tailcalls/tailcall_bpf2bpf_1
> > > > 
> > > > dmesg:
> > > > watchdog: BUG: soft lockup - CPU#2 stuck for 26s!
> > > > [test_progs:25056]
> > > > 
> > > 
> > > This is a known issue. Does this change pass all tailcall
> > > tests ?
> > > If not, please refer to the tailcall hierarchy patchset([1]).
> > > We should address it once and for all. Thanks.
> 
> Do you mean you will update this patch?
> 
> Huacai
> 

yes, I'm making revisions according to the suggestions.
> 
> > > 
> > >   [1]:
https://lore.kernel.org/bpf/20240714123902.32305-1-hffilwlqm@gmail.com/> > > 
> > > 
> > > Thanks,I'll keep looking into these patches.
> > > > Signed-off-by: Haoran Jiang <
jianghaoran@kylinos.cn> > > > 
> > > > > 
> > > > ---
> > > >  arch/loongarch/net/bpf_jit.c | 28 +++++++++-------------------
> > > >  1 file changed, 9 insertions(+), 19 deletions(-)
> > > > 
> > > > diff --git a/arch/loongarch/net/bpf_jit.c
> > > > b/arch/loongarch/net/bpf_jit.c
> > > > index fa1500d4aa3e..d85490e7de89 100644
> > > > --- a/arch/loongarch/net/bpf_jit.c
> > > > +++ b/arch/loongarch/net/bpf_jit.c
> > > > @@ -208,9 +208,7 @@ bool bpf_jit_supports_far_kfunc_call(void)
> > > >         return true;
> > > >  }
> > > > 
> > > > -/* initialized on the first pass of build_body() */
> > > > -static int out_offset = -1;
> > > > -static int emit_bpf_tail_call(struct jit_ctx *ctx)
> > > > +static int emit_bpf_tail_call(int insn, struct jit_ctx *ctx)
> > > >  {
> > > >         int off;
> > > >         u8 tcc = tail_call_reg(ctx);
> > > > @@ -220,9 +218,8 @@ static int emit_bpf_tail_call(struct
> > > > jit_ctx *ctx)
> > > >         u8 t2 = LOONGARCH_GPR_T2;
> > > >         u8 t3 = LOONGARCH_GPR_T3;
> > > >         const int idx0 = ctx->idx;
> > > > -
> > > > -#define cur_offset (ctx->idx - idx0)
> > > > -#define jmp_offset (out_offset - (cur_offset))
> > > > +       int tc_ninsn = 0;
> > > > +       int jmp_offset = 0;
> > > > 
> > > >         /*
> > > >          * a0: &ctx
> > > > @@ -232,8 +229,11 @@ static int emit_bpf_tail_call(struct
> > > > jit_ctx *ctx)
> > > >          * if (index >= array->map.max_entries)
> > > >          *       goto out;
> > > >          */
> > > > +       tc_ninsn = insn ? ctx->offset[insn+1] - ctx-
> > > > > offset[insn] :
> > > > +               ctx->offset[0];
> > > >         off = offsetof(struct bpf_array, map.max_entries);
> > > >         emit_insn(ctx, ldwu, t1, a1, off);
> > > > +       jmp_offset = tc_ninsn - (ctx->idx - idx0);
> > > >         /* bgeu $a2, $t1, jmp_offset */
> > > >         if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset)
> > > > < 0)
> > > >                 goto toofar;
> > > > @@ -243,6 +243,7 @@ static int emit_bpf_tail_call(struct
> > > > jit_ctx *ctx)
> > > >          *       goto out;
> > > >          */
> > > >         emit_insn(ctx, addid, REG_TCC, tcc, -1);
> > > > +       jmp_offset = tc_ninsn - (ctx->idx - idx0);
> > > >         if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC,
> > > > LOONGARCH_GPR_ZERO, jmp_offset) < 0)
> > > >                 goto toofar;
> > > > 
> > > > @@ -254,6 +255,7 @@ static int emit_bpf_tail_call(struct
> > > > jit_ctx *ctx)
> > > >         emit_insn(ctx, alsld, t2, a2, a1, 2);
> > > >         off = offsetof(struct bpf_array, ptrs);
> > > >         emit_insn(ctx, ldd, t2, t2, off);
> > > > +       jmp_offset = tc_ninsn - (ctx->idx - idx0);
> > > >         /* beq $t2, $zero, jmp_offset */
> > > >         if (emit_tailcall_jmp(ctx, BPF_JEQ, t2,
> > > > LOONGARCH_GPR_ZERO, jmp_offset) < 0)
> > > >                 goto toofar;
> > > > @@ -263,22 +265,11 @@ static int emit_bpf_tail_call(struct
> > > > jit_ctx *ctx)
> > > >         emit_insn(ctx, ldd, t3, t2, off);
> > > >         __build_epilogue(ctx, true);
> > > > 
> > > > -       /* out: */
> > > > -       if (out_offset == -1)
> > > > -               out_offset = cur_offset;
> > > > -       if (cur_offset != out_offset) {
> > > > -               pr_err_once("tail_call out_offset = %d,
> > > > expected %d!\n",
> > > > -                           cur_offset, out_offset);
> > > > -               return -1;
> > > > -       }
> > > > -
> > > >         return 0;
> > > > 
> > > >  toofar:
> > > >         pr_info_once("tail_call: jump too far\n");
> > > >         return -1;
> > > > -#undef cur_offset
> > > > -#undef jmp_offset
> > > >  }
> > > > 
> > > >  static void emit_atomic(const struct bpf_insn *insn, struct
> > > > jit_ctx *ctx)
> > > > @@ -916,7 +907,7 @@ static int build_insn(const struct bpf_insn
> > > > *insn, struct jit_ctx *ctx, bool ext
> > > >         /* tail call */
> > > >         case BPF_JMP | BPF_TAIL_CALL:
> > > >                 mark_tail_call(ctx);
> > > > -               if (emit_bpf_tail_call(ctx) < 0)
> > > > +               if (emit_bpf_tail_call(i, ctx) < 0)
> > > >                         return -EINVAL;
> > > >                 break;
> > > > 
> > > > @@ -1342,7 +1333,6 @@ struct bpf_prog
> > > > *bpf_int_jit_compile(struct bpf_prog *prog)
> > > >         if (tmp_blinded)
> > > >                 bpf_jit_prog_release_other(prog, prog ==
> > > > orig_prog ? tmp : orig_prog);
> > > > 
> > > > -       out_offset = -1;
> > > > 
> > > >         return prog;
> > > > 
> > > > --
> > > > 2.43.0
> > > > 
> > 
> >