This commit adds support for BPF dynamic code modification on the
LoongArch architecture.:
1. Implement bpf_arch_text_poke() for runtime instruction patching.
2. Add bpf_arch_text_copy() for instruction block copying.
3. Create bpf_arch_text_invalidate() for code invalidation.
On LoongArch, since symbol addresses in the direct mapping
region cannot be reached via relative jump instructions from the paged
mapping region, we use the move_imm+jirl instruction pair as absolute
jump instructions. These require 2-5 instructions, so we reserve 5 NOP
instructions in the program as placeholders for function jumps.
larch_insn_text_copy is solely used for BPF. The use of
larch_insn_text_copy() requires page_size alignment. Currently, only
the size of the trampoline is page-aligned.
Co-developed-by: George Guo <guodongtai@kylinos.cn>
Signed-off-by: George Guo <guodongtai@kylinos.cn>
Signed-off-by: Chenghao Duan <duanchenghao@kylinos.cn>
---
arch/loongarch/include/asm/inst.h | 1 +
arch/loongarch/kernel/inst.c | 27 ++++++++
arch/loongarch/net/bpf_jit.c | 104 ++++++++++++++++++++++++++++++
3 files changed, 132 insertions(+)
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index 2ae96a35d..88bb73e46 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -497,6 +497,7 @@ void arch_simulate_insn(union loongarch_instruction insn, struct pt_regs *regs);
int larch_insn_read(void *addr, u32 *insnp);
int larch_insn_write(void *addr, u32 insn);
int larch_insn_patch_text(void *addr, u32 insn);
+int larch_insn_text_copy(void *dst, void *src, size_t len);
u32 larch_insn_gen_nop(void);
u32 larch_insn_gen_b(unsigned long pc, unsigned long dest);
diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c
index 674e3b322..7df63a950 100644
--- a/arch/loongarch/kernel/inst.c
+++ b/arch/loongarch/kernel/inst.c
@@ -4,6 +4,7 @@
*/
#include <linux/sizes.h>
#include <linux/uaccess.h>
+#include <linux/set_memory.h>
#include <asm/cacheflush.h>
#include <asm/inst.h>
@@ -218,6 +219,32 @@ int larch_insn_patch_text(void *addr, u32 insn)
return ret;
}
+int larch_insn_text_copy(void *dst, void *src, size_t len)
+{
+ int ret;
+ unsigned long flags;
+ unsigned long dst_start, dst_end, dst_len;
+
+ dst_start = round_down((unsigned long)dst, PAGE_SIZE);
+ dst_end = round_up((unsigned long)dst + len, PAGE_SIZE);
+ dst_len = dst_end - dst_start;
+
+ set_memory_rw(dst_start, dst_len / PAGE_SIZE);
+ raw_spin_lock_irqsave(&patch_lock, flags);
+
+ ret = copy_to_kernel_nofault(dst, src, len);
+ if (ret)
+ pr_err("%s: operation failed\n", __func__);
+
+ raw_spin_unlock_irqrestore(&patch_lock, flags);
+ set_memory_rox(dst_start, dst_len / PAGE_SIZE);
+
+ if (!ret)
+ flush_icache_range((unsigned long)dst, (unsigned long)dst + len);
+
+ return ret;
+}
+
u32 larch_insn_gen_nop(void)
{
return INSN_NOP;
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 7032f11d3..5e6ae7e0e 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -4,8 +4,12 @@
*
* Copyright (C) 2022 Loongson Technology Corporation Limited
*/
+#include <linux/memory.h>
#include "bpf_jit.h"
+#define LOONGARCH_LONG_JUMP_NINSNS 5
+#define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4)
+
#define REG_TCC LOONGARCH_GPR_A6
#define TCC_SAVED LOONGARCH_GPR_S5
@@ -88,6 +92,7 @@ static u8 tail_call_reg(struct jit_ctx *ctx)
*/
static void build_prologue(struct jit_ctx *ctx)
{
+ int i;
int stack_adjust = 0, store_offset, bpf_stack_adjust;
bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
@@ -98,6 +103,10 @@ static void build_prologue(struct jit_ctx *ctx)
stack_adjust = round_up(stack_adjust, 16);
stack_adjust += bpf_stack_adjust;
+ /* Reserve space for the move_imm + jirl instruction */
+ for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++)
+ emit_insn(ctx, nop);
+
/*
* First instruction initializes the tail call count (TCC).
* On tail call we skip this instruction, and the TCC is
@@ -1367,3 +1376,98 @@ bool bpf_jit_supports_subprog_tailcalls(void)
{
return true;
}
+
+static int emit_jump_and_link(struct jit_ctx *ctx, u8 rd, u64 target)
+{
+ if (!target) {
+ pr_err("bpf_jit: jump target address is error\n");
+ return -EFAULT;
+ }
+
+ move_imm(ctx, LOONGARCH_GPR_T1, target, false);
+ emit_insn(ctx, jirl, rd, LOONGARCH_GPR_T1, 0);
+
+ return 0;
+}
+
+static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call)
+{
+ struct jit_ctx ctx;
+
+ ctx.idx = 0;
+ ctx.image = (union loongarch_instruction *)insns;
+
+ if (!target) {
+ emit_insn((&ctx), nop);
+ emit_insn((&ctx), nop);
+ return 0;
+ }
+
+ return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_T0 : LOONGARCH_GPR_ZERO,
+ (unsigned long)target);
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
+ void *old_addr, void *new_addr)
+{
+ u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
+ u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
+ bool is_call = poke_type == BPF_MOD_CALL;
+ int ret;
+
+ if (!is_kernel_text((unsigned long)ip) &&
+ !is_bpf_text_address((unsigned long)ip))
+ return -ENOTSUPP;
+
+ ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call);
+ if (ret)
+ return ret;
+
+ if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES))
+ return -EFAULT;
+
+ ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call);
+ if (ret)
+ return ret;
+
+ mutex_lock(&text_mutex);
+ if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES))
+ ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES);
+ mutex_unlock(&text_mutex);
+ return ret;
+}
+
+int bpf_arch_text_invalidate(void *dst, size_t len)
+{
+ int i;
+ int ret = 0;
+ u32 *inst;
+
+ inst = kvmalloc(len, GFP_KERNEL);
+ if (!inst)
+ return -ENOMEM;
+
+ for (i = 0; i < (len/sizeof(u32)); i++)
+ inst[i] = INSN_BREAK;
+
+ mutex_lock(&text_mutex);
+ if (larch_insn_text_copy(dst, inst, len))
+ ret = -EINVAL;
+ mutex_unlock(&text_mutex);
+
+ kvfree(inst);
+ return ret;
+}
+
+void *bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+ int ret;
+
+ mutex_lock(&text_mutex);
+ ret = larch_insn_text_copy(dst, src, len);
+ mutex_unlock(&text_mutex);
+ if (ret)
+ return ERR_PTR(-EINVAL);
+
+ return dst;
+}
--
2.25.1
On Wed, Jul 30, 2025 at 9:13 PM Chenghao Duan <duanchenghao@kylinos.cn> wrote: > > This commit adds support for BPF dynamic code modification on the > LoongArch architecture.: > 1. Implement bpf_arch_text_poke() for runtime instruction patching. > 2. Add bpf_arch_text_copy() for instruction block copying. > 3. Create bpf_arch_text_invalidate() for code invalidation. > > On LoongArch, since symbol addresses in the direct mapping > region cannot be reached via relative jump instructions from the paged > mapping region, we use the move_imm+jirl instruction pair as absolute > jump instructions. These require 2-5 instructions, so we reserve 5 NOP > instructions in the program as placeholders for function jumps. > > larch_insn_text_copy is solely used for BPF. The use of > larch_insn_text_copy() requires page_size alignment. Currently, only > the size of the trampoline is page-aligned. > > Co-developed-by: George Guo <guodongtai@kylinos.cn> > Signed-off-by: George Guo <guodongtai@kylinos.cn> > Signed-off-by: Chenghao Duan <duanchenghao@kylinos.cn> > --- > arch/loongarch/include/asm/inst.h | 1 + > arch/loongarch/kernel/inst.c | 27 ++++++++ > arch/loongarch/net/bpf_jit.c | 104 ++++++++++++++++++++++++++++++ > 3 files changed, 132 insertions(+) > > diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h > index 2ae96a35d..88bb73e46 100644 > --- a/arch/loongarch/include/asm/inst.h > +++ b/arch/loongarch/include/asm/inst.h > @@ -497,6 +497,7 @@ void arch_simulate_insn(union loongarch_instruction insn, struct pt_regs *regs); > int larch_insn_read(void *addr, u32 *insnp); > int larch_insn_write(void *addr, u32 insn); > int larch_insn_patch_text(void *addr, u32 insn); > +int larch_insn_text_copy(void *dst, void *src, size_t len); > > u32 larch_insn_gen_nop(void); > u32 larch_insn_gen_b(unsigned long pc, unsigned long dest); > diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c > index 674e3b322..7df63a950 100644 > --- a/arch/loongarch/kernel/inst.c > +++ b/arch/loongarch/kernel/inst.c > @@ -4,6 +4,7 @@ > */ > #include <linux/sizes.h> > #include <linux/uaccess.h> > +#include <linux/set_memory.h> > > #include <asm/cacheflush.h> > #include <asm/inst.h> > @@ -218,6 +219,32 @@ int larch_insn_patch_text(void *addr, u32 insn) > return ret; > } > > +int larch_insn_text_copy(void *dst, void *src, size_t len) > +{ > + int ret; > + unsigned long flags; > + unsigned long dst_start, dst_end, dst_len; > + > + dst_start = round_down((unsigned long)dst, PAGE_SIZE); > + dst_end = round_up((unsigned long)dst + len, PAGE_SIZE); > + dst_len = dst_end - dst_start; > + > + set_memory_rw(dst_start, dst_len / PAGE_SIZE); > + raw_spin_lock_irqsave(&patch_lock, flags); > + > + ret = copy_to_kernel_nofault(dst, src, len); > + if (ret) > + pr_err("%s: operation failed\n", __func__); > + > + raw_spin_unlock_irqrestore(&patch_lock, flags); > + set_memory_rox(dst_start, dst_len / PAGE_SIZE); > + > + if (!ret) > + flush_icache_range((unsigned long)dst, (unsigned long)dst + len); > + > + return ret; > +} > + > u32 larch_insn_gen_nop(void) > { > return INSN_NOP; > diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c > index 7032f11d3..5e6ae7e0e 100644 > --- a/arch/loongarch/net/bpf_jit.c > +++ b/arch/loongarch/net/bpf_jit.c > @@ -4,8 +4,12 @@ > * > * Copyright (C) 2022 Loongson Technology Corporation Limited > */ > +#include <linux/memory.h> > #include "bpf_jit.h" > > +#define LOONGARCH_LONG_JUMP_NINSNS 5 > +#define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) > + > #define REG_TCC LOONGARCH_GPR_A6 > #define TCC_SAVED LOONGARCH_GPR_S5 > > @@ -88,6 +92,7 @@ static u8 tail_call_reg(struct jit_ctx *ctx) > */ > static void build_prologue(struct jit_ctx *ctx) > { > + int i; > int stack_adjust = 0, store_offset, bpf_stack_adjust; > > bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); > @@ -98,6 +103,10 @@ static void build_prologue(struct jit_ctx *ctx) > stack_adjust = round_up(stack_adjust, 16); > stack_adjust += bpf_stack_adjust; > > + /* Reserve space for the move_imm + jirl instruction */ > + for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) > + emit_insn(ctx, nop); > + > /* > * First instruction initializes the tail call count (TCC). > * On tail call we skip this instruction, and the TCC is > @@ -1367,3 +1376,98 @@ bool bpf_jit_supports_subprog_tailcalls(void) > { > return true; > } > + > +static int emit_jump_and_link(struct jit_ctx *ctx, u8 rd, u64 target) > +{ > + if (!target) { > + pr_err("bpf_jit: jump target address is error\n"); > + return -EFAULT; > + } > + > + move_imm(ctx, LOONGARCH_GPR_T1, target, false); > + emit_insn(ctx, jirl, rd, LOONGARCH_GPR_T1, 0); > + > + return 0; > +} > + > +static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) > +{ > + struct jit_ctx ctx; > + > + ctx.idx = 0; > + ctx.image = (union loongarch_instruction *)insns; > + > + if (!target) { > + emit_insn((&ctx), nop); > + emit_insn((&ctx), nop); There should be 5 nops, no ? > + return 0; > + } > + > + return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_T0 : LOONGARCH_GPR_ZERO, > + (unsigned long)target); > +} > + > +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, > + void *old_addr, void *new_addr) > +{ > + u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; > + u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; > + bool is_call = poke_type == BPF_MOD_CALL; > + int ret; > + > + if (!is_kernel_text((unsigned long)ip) && > + !is_bpf_text_address((unsigned long)ip)) > + return -ENOTSUPP; > + > + ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call); > + if (ret) > + return ret; > + > + if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES)) > + return -EFAULT; > + > + ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call); > + if (ret) > + return ret; > + > + mutex_lock(&text_mutex); > + if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES)) > + ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES); > + mutex_unlock(&text_mutex); > + return ret; > +} > + > +int bpf_arch_text_invalidate(void *dst, size_t len) > +{ > + int i; > + int ret = 0; > + u32 *inst; > + > + inst = kvmalloc(len, GFP_KERNEL); > + if (!inst) > + return -ENOMEM; > + > + for (i = 0; i < (len/sizeof(u32)); i++) > + inst[i] = INSN_BREAK; > + > + mutex_lock(&text_mutex); > + if (larch_insn_text_copy(dst, inst, len)) > + ret = -EINVAL; > + mutex_unlock(&text_mutex); > + > + kvfree(inst); > + return ret; > +} > + > +void *bpf_arch_text_copy(void *dst, void *src, size_t len) > +{ > + int ret; > + > + mutex_lock(&text_mutex); > + ret = larch_insn_text_copy(dst, src, len); > + mutex_unlock(&text_mutex); > + if (ret) > + return ERR_PTR(-EINVAL); > + > + return dst; > +} > -- bpf_arch_text_invalidate() and bpf_arch_text_copy() is not related to BPF trampoline, right ? > 2.25.1 >
On Mon, Aug 4, 2025 at 10:02 AM Hengqi Chen <hengqi.chen@gmail.com> wrote: > > On Wed, Jul 30, 2025 at 9:13 PM Chenghao Duan <duanchenghao@kylinos.cn> wrote: > > > > This commit adds support for BPF dynamic code modification on the > > LoongArch architecture.: > > 1. Implement bpf_arch_text_poke() for runtime instruction patching. > > 2. Add bpf_arch_text_copy() for instruction block copying. > > 3. Create bpf_arch_text_invalidate() for code invalidation. > > > > On LoongArch, since symbol addresses in the direct mapping > > region cannot be reached via relative jump instructions from the paged > > mapping region, we use the move_imm+jirl instruction pair as absolute > > jump instructions. These require 2-5 instructions, so we reserve 5 NOP > > instructions in the program as placeholders for function jumps. > > > > larch_insn_text_copy is solely used for BPF. The use of > > larch_insn_text_copy() requires page_size alignment. Currently, only > > the size of the trampoline is page-aligned. > > > > Co-developed-by: George Guo <guodongtai@kylinos.cn> > > Signed-off-by: George Guo <guodongtai@kylinos.cn> > > Signed-off-by: Chenghao Duan <duanchenghao@kylinos.cn> > > --- > > arch/loongarch/include/asm/inst.h | 1 + > > arch/loongarch/kernel/inst.c | 27 ++++++++ > > arch/loongarch/net/bpf_jit.c | 104 ++++++++++++++++++++++++++++++ > > 3 files changed, 132 insertions(+) > > > > diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h > > index 2ae96a35d..88bb73e46 100644 > > --- a/arch/loongarch/include/asm/inst.h > > +++ b/arch/loongarch/include/asm/inst.h > > @@ -497,6 +497,7 @@ void arch_simulate_insn(union loongarch_instruction insn, struct pt_regs *regs); > > int larch_insn_read(void *addr, u32 *insnp); > > int larch_insn_write(void *addr, u32 insn); > > int larch_insn_patch_text(void *addr, u32 insn); > > +int larch_insn_text_copy(void *dst, void *src, size_t len); > > > > u32 larch_insn_gen_nop(void); > > u32 larch_insn_gen_b(unsigned long pc, unsigned long dest); > > diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c > > index 674e3b322..7df63a950 100644 > > --- a/arch/loongarch/kernel/inst.c > > +++ b/arch/loongarch/kernel/inst.c > > @@ -4,6 +4,7 @@ > > */ > > #include <linux/sizes.h> > > #include <linux/uaccess.h> > > +#include <linux/set_memory.h> > > > > #include <asm/cacheflush.h> > > #include <asm/inst.h> > > @@ -218,6 +219,32 @@ int larch_insn_patch_text(void *addr, u32 insn) > > return ret; > > } > > > > +int larch_insn_text_copy(void *dst, void *src, size_t len) > > +{ > > + int ret; > > + unsigned long flags; > > + unsigned long dst_start, dst_end, dst_len; > > + > > + dst_start = round_down((unsigned long)dst, PAGE_SIZE); > > + dst_end = round_up((unsigned long)dst + len, PAGE_SIZE); > > + dst_len = dst_end - dst_start; > > + > > + set_memory_rw(dst_start, dst_len / PAGE_SIZE); > > + raw_spin_lock_irqsave(&patch_lock, flags); > > + > > + ret = copy_to_kernel_nofault(dst, src, len); > > + if (ret) > > + pr_err("%s: operation failed\n", __func__); > > + > > + raw_spin_unlock_irqrestore(&patch_lock, flags); > > + set_memory_rox(dst_start, dst_len / PAGE_SIZE); > > + > > + if (!ret) > > + flush_icache_range((unsigned long)dst, (unsigned long)dst + len); > > + > > + return ret; > > +} > > + > > u32 larch_insn_gen_nop(void) > > { > > return INSN_NOP; > > diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c > > index 7032f11d3..5e6ae7e0e 100644 > > --- a/arch/loongarch/net/bpf_jit.c > > +++ b/arch/loongarch/net/bpf_jit.c > > @@ -4,8 +4,12 @@ > > * > > * Copyright (C) 2022 Loongson Technology Corporation Limited > > */ > > +#include <linux/memory.h> > > #include "bpf_jit.h" > > > > +#define LOONGARCH_LONG_JUMP_NINSNS 5 > > +#define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) > > + > > #define REG_TCC LOONGARCH_GPR_A6 > > #define TCC_SAVED LOONGARCH_GPR_S5 > > > > @@ -88,6 +92,7 @@ static u8 tail_call_reg(struct jit_ctx *ctx) > > */ > > static void build_prologue(struct jit_ctx *ctx) > > { > > + int i; > > int stack_adjust = 0, store_offset, bpf_stack_adjust; > > > > bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); > > @@ -98,6 +103,10 @@ static void build_prologue(struct jit_ctx *ctx) > > stack_adjust = round_up(stack_adjust, 16); > > stack_adjust += bpf_stack_adjust; > > > > + /* Reserve space for the move_imm + jirl instruction */ > > + for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) > > + emit_insn(ctx, nop); > > + > > /* > > * First instruction initializes the tail call count (TCC). > > * On tail call we skip this instruction, and the TCC is > > @@ -1367,3 +1376,98 @@ bool bpf_jit_supports_subprog_tailcalls(void) > > { > > return true; > > } > > + > > +static int emit_jump_and_link(struct jit_ctx *ctx, u8 rd, u64 target) > > +{ > > + if (!target) { > > + pr_err("bpf_jit: jump target address is error\n"); > > + return -EFAULT; > > + } > > + > > + move_imm(ctx, LOONGARCH_GPR_T1, target, false); > > + emit_insn(ctx, jirl, rd, LOONGARCH_GPR_T1, 0); > > + > > + return 0; > > +} > > + > > +static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) > > +{ > > + struct jit_ctx ctx; > > + > > + ctx.idx = 0; > > + ctx.image = (union loongarch_instruction *)insns; > > + > > + if (!target) { > > + emit_insn((&ctx), nop); > > + emit_insn((&ctx), nop); > > There should be 5 nops, no ? Chenghao, We have already fixed the concurrent problem, now this is the only issue, please reply tas soon as possible. Huacai > > > + return 0; > > + } > > + > > + return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_T0 : LOONGARCH_GPR_ZERO, > > + (unsigned long)target); > > +} > > + > > +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, > > + void *old_addr, void *new_addr) > > +{ > > + u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; > > + u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; > > + bool is_call = poke_type == BPF_MOD_CALL; > > + int ret; > > + > > + if (!is_kernel_text((unsigned long)ip) && > > + !is_bpf_text_address((unsigned long)ip)) > > + return -ENOTSUPP; > > + > > + ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call); > > + if (ret) > > + return ret; > > + > > + if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES)) > > + return -EFAULT; > > + > > + ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call); > > + if (ret) > > + return ret; > > + > > + mutex_lock(&text_mutex); > > + if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES)) > > + ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES); > > + mutex_unlock(&text_mutex); > > + return ret; > > +} > > + > > +int bpf_arch_text_invalidate(void *dst, size_t len) > > +{ > > + int i; > > + int ret = 0; > > + u32 *inst; > > + > > + inst = kvmalloc(len, GFP_KERNEL); > > + if (!inst) > > + return -ENOMEM; > > + > > + for (i = 0; i < (len/sizeof(u32)); i++) > > + inst[i] = INSN_BREAK; > > + > > + mutex_lock(&text_mutex); > > + if (larch_insn_text_copy(dst, inst, len)) > > + ret = -EINVAL; > > + mutex_unlock(&text_mutex); > > + > > + kvfree(inst); > > + return ret; > > +} > > + > > +void *bpf_arch_text_copy(void *dst, void *src, size_t len) > > +{ > > + int ret; > > + > > + mutex_lock(&text_mutex); > > + ret = larch_insn_text_copy(dst, src, len); > > + mutex_unlock(&text_mutex); > > + if (ret) > > + return ERR_PTR(-EINVAL); > > + > > + return dst; > > +} > > -- > > bpf_arch_text_invalidate() and bpf_arch_text_copy() is not related to > BPF trampoline, right ? > > > 2.25.1 > >
On Tue, Aug 05, 2025 at 12:10:05PM +0800, Huacai Chen wrote: > On Mon, Aug 4, 2025 at 10:02 AM Hengqi Chen <hengqi.chen@gmail.com> wrote: > > > > On Wed, Jul 30, 2025 at 9:13 PM Chenghao Duan <duanchenghao@kylinos.cn> wrote: > > > > > > This commit adds support for BPF dynamic code modification on the > > > LoongArch architecture.: > > > 1. Implement bpf_arch_text_poke() for runtime instruction patching. > > > 2. Add bpf_arch_text_copy() for instruction block copying. > > > 3. Create bpf_arch_text_invalidate() for code invalidation. > > > > > > On LoongArch, since symbol addresses in the direct mapping > > > region cannot be reached via relative jump instructions from the paged > > > mapping region, we use the move_imm+jirl instruction pair as absolute > > > jump instructions. These require 2-5 instructions, so we reserve 5 NOP > > > instructions in the program as placeholders for function jumps. > > > > > > larch_insn_text_copy is solely used for BPF. The use of > > > larch_insn_text_copy() requires page_size alignment. Currently, only > > > the size of the trampoline is page-aligned. > > > > > > Co-developed-by: George Guo <guodongtai@kylinos.cn> > > > Signed-off-by: George Guo <guodongtai@kylinos.cn> > > > Signed-off-by: Chenghao Duan <duanchenghao@kylinos.cn> > > > --- > > > arch/loongarch/include/asm/inst.h | 1 + > > > arch/loongarch/kernel/inst.c | 27 ++++++++ > > > arch/loongarch/net/bpf_jit.c | 104 ++++++++++++++++++++++++++++++ > > > 3 files changed, 132 insertions(+) > > > > > > diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h > > > index 2ae96a35d..88bb73e46 100644 > > > --- a/arch/loongarch/include/asm/inst.h > > > +++ b/arch/loongarch/include/asm/inst.h > > > @@ -497,6 +497,7 @@ void arch_simulate_insn(union loongarch_instruction insn, struct pt_regs *regs); > > > int larch_insn_read(void *addr, u32 *insnp); > > > int larch_insn_write(void *addr, u32 insn); > > > int larch_insn_patch_text(void *addr, u32 insn); > > > +int larch_insn_text_copy(void *dst, void *src, size_t len); > > > > > > u32 larch_insn_gen_nop(void); > > > u32 larch_insn_gen_b(unsigned long pc, unsigned long dest); > > > diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c > > > index 674e3b322..7df63a950 100644 > > > --- a/arch/loongarch/kernel/inst.c > > > +++ b/arch/loongarch/kernel/inst.c > > > @@ -4,6 +4,7 @@ > > > */ > > > #include <linux/sizes.h> > > > #include <linux/uaccess.h> > > > +#include <linux/set_memory.h> > > > > > > #include <asm/cacheflush.h> > > > #include <asm/inst.h> > > > @@ -218,6 +219,32 @@ int larch_insn_patch_text(void *addr, u32 insn) > > > return ret; > > > } > > > > > > +int larch_insn_text_copy(void *dst, void *src, size_t len) > > > +{ > > > + int ret; > > > + unsigned long flags; > > > + unsigned long dst_start, dst_end, dst_len; > > > + > > > + dst_start = round_down((unsigned long)dst, PAGE_SIZE); > > > + dst_end = round_up((unsigned long)dst + len, PAGE_SIZE); > > > + dst_len = dst_end - dst_start; > > > + > > > + set_memory_rw(dst_start, dst_len / PAGE_SIZE); > > > + raw_spin_lock_irqsave(&patch_lock, flags); > > > + > > > + ret = copy_to_kernel_nofault(dst, src, len); > > > + if (ret) > > > + pr_err("%s: operation failed\n", __func__); > > > + > > > + raw_spin_unlock_irqrestore(&patch_lock, flags); > > > + set_memory_rox(dst_start, dst_len / PAGE_SIZE); > > > + > > > + if (!ret) > > > + flush_icache_range((unsigned long)dst, (unsigned long)dst + len); > > > + > > > + return ret; > > > +} > > > + > > > u32 larch_insn_gen_nop(void) > > > { > > > return INSN_NOP; > > > diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c > > > index 7032f11d3..5e6ae7e0e 100644 > > > --- a/arch/loongarch/net/bpf_jit.c > > > +++ b/arch/loongarch/net/bpf_jit.c > > > @@ -4,8 +4,12 @@ > > > * > > > * Copyright (C) 2022 Loongson Technology Corporation Limited > > > */ > > > +#include <linux/memory.h> > > > #include "bpf_jit.h" > > > > > > +#define LOONGARCH_LONG_JUMP_NINSNS 5 > > > +#define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) > > > + > > > #define REG_TCC LOONGARCH_GPR_A6 > > > #define TCC_SAVED LOONGARCH_GPR_S5 > > > > > > @@ -88,6 +92,7 @@ static u8 tail_call_reg(struct jit_ctx *ctx) > > > */ > > > static void build_prologue(struct jit_ctx *ctx) > > > { > > > + int i; > > > int stack_adjust = 0, store_offset, bpf_stack_adjust; > > > > > > bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); > > > @@ -98,6 +103,10 @@ static void build_prologue(struct jit_ctx *ctx) > > > stack_adjust = round_up(stack_adjust, 16); > > > stack_adjust += bpf_stack_adjust; > > > > > > + /* Reserve space for the move_imm + jirl instruction */ > > > + for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) > > > + emit_insn(ctx, nop); > > > + > > > /* > > > * First instruction initializes the tail call count (TCC). > > > * On tail call we skip this instruction, and the TCC is > > > @@ -1367,3 +1376,98 @@ bool bpf_jit_supports_subprog_tailcalls(void) > > > { > > > return true; > > > } > > > + > > > +static int emit_jump_and_link(struct jit_ctx *ctx, u8 rd, u64 target) > > > +{ > > > + if (!target) { > > > + pr_err("bpf_jit: jump target address is error\n"); > > > + return -EFAULT; > > > + } > > > + > > > + move_imm(ctx, LOONGARCH_GPR_T1, target, false); > > > + emit_insn(ctx, jirl, rd, LOONGARCH_GPR_T1, 0); > > > + > > > + return 0; > > > +} > > > + > > > +static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) > > > +{ > > > + struct jit_ctx ctx; > > > + > > > + ctx.idx = 0; > > > + ctx.image = (union loongarch_instruction *)insns; > > > + > > > + if (!target) { > > > + emit_insn((&ctx), nop); > > > + emit_insn((&ctx), nop); > > > > There should be 5 nops, no ? > Chenghao, > > We have already fixed the concurrent problem, now this is the only > issue, please reply tas soon as possible. > > Huacai Hi Hengqi & Huacai, I'm sorry I just saw the email. This position can be configured with 5 NOP instructions, and I have tested it successfully. sudo ./test_progs -a fentry_test/fentry sudo ./test_progs -a fexit_test/fexit sudo ./test_progs -a fentry_fexit sudo ./test_progs -a modify_return sudo ./test_progs -a fexit_sleep sudo ./test_progs -a test_overhead sudo ./test_progs -a trampoline_count sudo ./test_progs -a fexit_bpf2bpf if (!target) { int i; for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) emit_insn((&ctx), nop); return 0; } Chenghao > > > > > > + return 0; > > > + } > > > + > > > + return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_T0 : LOONGARCH_GPR_ZERO, > > > + (unsigned long)target); > > > +} > > > + > > > +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, > > > + void *old_addr, void *new_addr) > > > +{ > > > + u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; > > > + u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; > > > + bool is_call = poke_type == BPF_MOD_CALL; > > > + int ret; > > > + > > > + if (!is_kernel_text((unsigned long)ip) && > > > + !is_bpf_text_address((unsigned long)ip)) > > > + return -ENOTSUPP; > > > + > > > + ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call); > > > + if (ret) > > > + return ret; > > > + > > > + if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES)) > > > + return -EFAULT; > > > + > > > + ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call); > > > + if (ret) > > > + return ret; > > > + > > > + mutex_lock(&text_mutex); > > > + if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES)) > > > + ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES); > > > + mutex_unlock(&text_mutex); > > > + return ret; > > > +} > > > + > > > +int bpf_arch_text_invalidate(void *dst, size_t len) > > > +{ > > > + int i; > > > + int ret = 0; > > > + u32 *inst; > > > + > > > + inst = kvmalloc(len, GFP_KERNEL); > > > + if (!inst) > > > + return -ENOMEM; > > > + > > > + for (i = 0; i < (len/sizeof(u32)); i++) > > > + inst[i] = INSN_BREAK; > > > + > > > + mutex_lock(&text_mutex); > > > + if (larch_insn_text_copy(dst, inst, len)) > > > + ret = -EINVAL; > > > + mutex_unlock(&text_mutex); > > > + > > > + kvfree(inst); > > > + return ret; > > > +} > > > + > > > +void *bpf_arch_text_copy(void *dst, void *src, size_t len) > > > +{ > > > + int ret; > > > + > > > + mutex_lock(&text_mutex); > > > + ret = larch_insn_text_copy(dst, src, len); > > > + mutex_unlock(&text_mutex); > > > + if (ret) > > > + return ERR_PTR(-EINVAL); > > > + > > > + return dst; > > > +} > > > -- > > > > bpf_arch_text_invalidate() and bpf_arch_text_copy() is not related to > > BPF trampoline, right ? From the perspective of BPF core source code calls, the two functions bpf_arch_text_invalidate() and bpf_arch_text_copy() are not only used for trampolines. > > > > > 2.25.1 > > >
On Tue, Aug 5, 2025 at 2:30 PM Chenghao Duan <duanchenghao@kylinos.cn> wrote: > > On Tue, Aug 05, 2025 at 12:10:05PM +0800, Huacai Chen wrote: > > On Mon, Aug 4, 2025 at 10:02 AM Hengqi Chen <hengqi.chen@gmail.com> wrote: > > > > > > On Wed, Jul 30, 2025 at 9:13 PM Chenghao Duan <duanchenghao@kylinos.cn> wrote: > > > > > > > > This commit adds support for BPF dynamic code modification on the > > > > LoongArch architecture.: > > > > 1. Implement bpf_arch_text_poke() for runtime instruction patching. > > > > 2. Add bpf_arch_text_copy() for instruction block copying. > > > > 3. Create bpf_arch_text_invalidate() for code invalidation. > > > > > > > > On LoongArch, since symbol addresses in the direct mapping > > > > region cannot be reached via relative jump instructions from the paged > > > > mapping region, we use the move_imm+jirl instruction pair as absolute > > > > jump instructions. These require 2-5 instructions, so we reserve 5 NOP > > > > instructions in the program as placeholders for function jumps. > > > > > > > > larch_insn_text_copy is solely used for BPF. The use of > > > > larch_insn_text_copy() requires page_size alignment. Currently, only > > > > the size of the trampoline is page-aligned. > > > > > > > > Co-developed-by: George Guo <guodongtai@kylinos.cn> > > > > Signed-off-by: George Guo <guodongtai@kylinos.cn> > > > > Signed-off-by: Chenghao Duan <duanchenghao@kylinos.cn> > > > > --- > > > > arch/loongarch/include/asm/inst.h | 1 + > > > > arch/loongarch/kernel/inst.c | 27 ++++++++ > > > > arch/loongarch/net/bpf_jit.c | 104 ++++++++++++++++++++++++++++++ > > > > 3 files changed, 132 insertions(+) > > > > > > > > diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h > > > > index 2ae96a35d..88bb73e46 100644 > > > > --- a/arch/loongarch/include/asm/inst.h > > > > +++ b/arch/loongarch/include/asm/inst.h > > > > @@ -497,6 +497,7 @@ void arch_simulate_insn(union loongarch_instruction insn, struct pt_regs *regs); > > > > int larch_insn_read(void *addr, u32 *insnp); > > > > int larch_insn_write(void *addr, u32 insn); > > > > int larch_insn_patch_text(void *addr, u32 insn); > > > > +int larch_insn_text_copy(void *dst, void *src, size_t len); > > > > > > > > u32 larch_insn_gen_nop(void); > > > > u32 larch_insn_gen_b(unsigned long pc, unsigned long dest); > > > > diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c > > > > index 674e3b322..7df63a950 100644 > > > > --- a/arch/loongarch/kernel/inst.c > > > > +++ b/arch/loongarch/kernel/inst.c > > > > @@ -4,6 +4,7 @@ > > > > */ > > > > #include <linux/sizes.h> > > > > #include <linux/uaccess.h> > > > > +#include <linux/set_memory.h> > > > > > > > > #include <asm/cacheflush.h> > > > > #include <asm/inst.h> > > > > @@ -218,6 +219,32 @@ int larch_insn_patch_text(void *addr, u32 insn) > > > > return ret; > > > > } > > > > > > > > +int larch_insn_text_copy(void *dst, void *src, size_t len) > > > > +{ > > > > + int ret; > > > > + unsigned long flags; > > > > + unsigned long dst_start, dst_end, dst_len; > > > > + > > > > + dst_start = round_down((unsigned long)dst, PAGE_SIZE); > > > > + dst_end = round_up((unsigned long)dst + len, PAGE_SIZE); > > > > + dst_len = dst_end - dst_start; > > > > + > > > > + set_memory_rw(dst_start, dst_len / PAGE_SIZE); > > > > + raw_spin_lock_irqsave(&patch_lock, flags); > > > > + > > > > + ret = copy_to_kernel_nofault(dst, src, len); > > > > + if (ret) > > > > + pr_err("%s: operation failed\n", __func__); > > > > + > > > > + raw_spin_unlock_irqrestore(&patch_lock, flags); > > > > + set_memory_rox(dst_start, dst_len / PAGE_SIZE); > > > > + > > > > + if (!ret) > > > > + flush_icache_range((unsigned long)dst, (unsigned long)dst + len); > > > > + > > > > + return ret; > > > > +} > > > > + > > > > u32 larch_insn_gen_nop(void) > > > > { > > > > return INSN_NOP; > > > > diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c > > > > index 7032f11d3..5e6ae7e0e 100644 > > > > --- a/arch/loongarch/net/bpf_jit.c > > > > +++ b/arch/loongarch/net/bpf_jit.c > > > > @@ -4,8 +4,12 @@ > > > > * > > > > * Copyright (C) 2022 Loongson Technology Corporation Limited > > > > */ > > > > +#include <linux/memory.h> > > > > #include "bpf_jit.h" > > > > > > > > +#define LOONGARCH_LONG_JUMP_NINSNS 5 > > > > +#define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) > > > > + > > > > #define REG_TCC LOONGARCH_GPR_A6 > > > > #define TCC_SAVED LOONGARCH_GPR_S5 > > > > > > > > @@ -88,6 +92,7 @@ static u8 tail_call_reg(struct jit_ctx *ctx) > > > > */ > > > > static void build_prologue(struct jit_ctx *ctx) > > > > { > > > > + int i; > > > > int stack_adjust = 0, store_offset, bpf_stack_adjust; > > > > > > > > bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); > > > > @@ -98,6 +103,10 @@ static void build_prologue(struct jit_ctx *ctx) > > > > stack_adjust = round_up(stack_adjust, 16); > > > > stack_adjust += bpf_stack_adjust; > > > > > > > > + /* Reserve space for the move_imm + jirl instruction */ > > > > + for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) > > > > + emit_insn(ctx, nop); > > > > + > > > > /* > > > > * First instruction initializes the tail call count (TCC). > > > > * On tail call we skip this instruction, and the TCC is > > > > @@ -1367,3 +1376,98 @@ bool bpf_jit_supports_subprog_tailcalls(void) > > > > { > > > > return true; > > > > } > > > > + > > > > +static int emit_jump_and_link(struct jit_ctx *ctx, u8 rd, u64 target) > > > > +{ > > > > + if (!target) { > > > > + pr_err("bpf_jit: jump target address is error\n"); > > > > + return -EFAULT; > > > > + } > > > > + > > > > + move_imm(ctx, LOONGARCH_GPR_T1, target, false); > > > > + emit_insn(ctx, jirl, rd, LOONGARCH_GPR_T1, 0); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) > > > > +{ > > > > + struct jit_ctx ctx; > > > > + > > > > + ctx.idx = 0; > > > > + ctx.image = (union loongarch_instruction *)insns; > > > > + > > > > + if (!target) { > > > > + emit_insn((&ctx), nop); > > > > + emit_insn((&ctx), nop); > > > > > > There should be 5 nops, no ? > > Chenghao, > > > > We have already fixed the concurrent problem, now this is the only > > issue, please reply tas soon as possible. > > > > Huacai > > Hi Hengqi & Huacai, > > I'm sorry I just saw the email. > This position can be configured with 5 NOP instructions, and I have > tested it successfully. OK, now loongarch-next [1] has integrated all needed changes, you and Vincent can test to see if everything is OK. [1] https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git/log/?h=loongarch-next Huacai > > sudo ./test_progs -a fentry_test/fentry > sudo ./test_progs -a fexit_test/fexit > sudo ./test_progs -a fentry_fexit > sudo ./test_progs -a modify_return > sudo ./test_progs -a fexit_sleep > sudo ./test_progs -a test_overhead > sudo ./test_progs -a trampoline_count > sudo ./test_progs -a fexit_bpf2bpf > > if (!target) { > int i; > for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) > emit_insn((&ctx), nop); > return 0; > } > > > Chenghao > > > > > > > > > > + return 0; > > > > + } > > > > + > > > > + return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_T0 : LOONGARCH_GPR_ZERO, > > > > + (unsigned long)target); > > > > +} > > > > + > > > > +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, > > > > + void *old_addr, void *new_addr) > > > > +{ > > > > + u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; > > > > + u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; > > > > + bool is_call = poke_type == BPF_MOD_CALL; > > > > + int ret; > > > > + > > > > + if (!is_kernel_text((unsigned long)ip) && > > > > + !is_bpf_text_address((unsigned long)ip)) > > > > + return -ENOTSUPP; > > > > + > > > > + ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call); > > > > + if (ret) > > > > + return ret; > > > > + > > > > + if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES)) > > > > + return -EFAULT; > > > > + > > > > + ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call); > > > > + if (ret) > > > > + return ret; > > > > + > > > > + mutex_lock(&text_mutex); > > > > + if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES)) > > > > + ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES); > > > > + mutex_unlock(&text_mutex); > > > > + return ret; > > > > +} > > > > + > > > > +int bpf_arch_text_invalidate(void *dst, size_t len) > > > > +{ > > > > + int i; > > > > + int ret = 0; > > > > + u32 *inst; > > > > + > > > > + inst = kvmalloc(len, GFP_KERNEL); > > > > + if (!inst) > > > > + return -ENOMEM; > > > > + > > > > + for (i = 0; i < (len/sizeof(u32)); i++) > > > > + inst[i] = INSN_BREAK; > > > > + > > > > + mutex_lock(&text_mutex); > > > > + if (larch_insn_text_copy(dst, inst, len)) > > > > + ret = -EINVAL; > > > > + mutex_unlock(&text_mutex); > > > > + > > > > + kvfree(inst); > > > > + return ret; > > > > +} > > > > + > > > > +void *bpf_arch_text_copy(void *dst, void *src, size_t len) > > > > +{ > > > > + int ret; > > > > + > > > > + mutex_lock(&text_mutex); > > > > + ret = larch_insn_text_copy(dst, src, len); > > > > + mutex_unlock(&text_mutex); > > > > + if (ret) > > > > + return ERR_PTR(-EINVAL); > > > > + > > > > + return dst; > > > > +} > > > > -- > > > > > > bpf_arch_text_invalidate() and bpf_arch_text_copy() is not related to > > > BPF trampoline, right ? > > From the perspective of BPF core source code calls, the two functions > bpf_arch_text_invalidate() and bpf_arch_text_copy() are not only used for > trampolines. > > > > > > > > 2.25.1 > > > > >
On Tue, Aug 5, 2025 at 4:13 AM Huacai Chen <chenhuacai@kernel.org> wrote: > > On Tue, Aug 5, 2025 at 2:30 PM Chenghao Duan <duanchenghao@kylinos.cn> wrote: > > > > On Tue, Aug 05, 2025 at 12:10:05PM +0800, Huacai Chen wrote: > > > On Mon, Aug 4, 2025 at 10:02 AM Hengqi Chen <hengqi.chen@gmail.com> wrote: > > > > > > > > On Wed, Jul 30, 2025 at 9:13 PM Chenghao Duan <duanchenghao@kylinos.cn> wrote: > > > > > > > > > > This commit adds support for BPF dynamic code modification on the > > > > > LoongArch architecture.: > > > > > 1. Implement bpf_arch_text_poke() for runtime instruction patching. > > > > > 2. Add bpf_arch_text_copy() for instruction block copying. > > > > > 3. Create bpf_arch_text_invalidate() for code invalidation. > > > > > > > > > > On LoongArch, since symbol addresses in the direct mapping > > > > > region cannot be reached via relative jump instructions from the paged > > > > > mapping region, we use the move_imm+jirl instruction pair as absolute > > > > > jump instructions. These require 2-5 instructions, so we reserve 5 NOP > > > > > instructions in the program as placeholders for function jumps. > > > > > > > > > > larch_insn_text_copy is solely used for BPF. The use of > > > > > larch_insn_text_copy() requires page_size alignment. Currently, only > > > > > the size of the trampoline is page-aligned. > > > > > > > > > > Co-developed-by: George Guo <guodongtai@kylinos.cn> > > > > > Signed-off-by: George Guo <guodongtai@kylinos.cn> > > > > > Signed-off-by: Chenghao Duan <duanchenghao@kylinos.cn> > > > > > --- > > > > > arch/loongarch/include/asm/inst.h | 1 + > > > > > arch/loongarch/kernel/inst.c | 27 ++++++++ > > > > > arch/loongarch/net/bpf_jit.c | 104 ++++++++++++++++++++++++++++++ > > > > > 3 files changed, 132 insertions(+) > > > > > > > > > > diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h > > > > > index 2ae96a35d..88bb73e46 100644 > > > > > --- a/arch/loongarch/include/asm/inst.h > > > > > +++ b/arch/loongarch/include/asm/inst.h > > > > > @@ -497,6 +497,7 @@ void arch_simulate_insn(union loongarch_instruction insn, struct pt_regs *regs); > > > > > int larch_insn_read(void *addr, u32 *insnp); > > > > > int larch_insn_write(void *addr, u32 insn); > > > > > int larch_insn_patch_text(void *addr, u32 insn); > > > > > +int larch_insn_text_copy(void *dst, void *src, size_t len); > > > > > > > > > > u32 larch_insn_gen_nop(void); > > > > > u32 larch_insn_gen_b(unsigned long pc, unsigned long dest); > > > > > diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c > > > > > index 674e3b322..7df63a950 100644 > > > > > --- a/arch/loongarch/kernel/inst.c > > > > > +++ b/arch/loongarch/kernel/inst.c > > > > > @@ -4,6 +4,7 @@ > > > > > */ > > > > > #include <linux/sizes.h> > > > > > #include <linux/uaccess.h> > > > > > +#include <linux/set_memory.h> > > > > > > > > > > #include <asm/cacheflush.h> > > > > > #include <asm/inst.h> > > > > > @@ -218,6 +219,32 @@ int larch_insn_patch_text(void *addr, u32 insn) > > > > > return ret; > > > > > } > > > > > > > > > > +int larch_insn_text_copy(void *dst, void *src, size_t len) > > > > > +{ > > > > > + int ret; > > > > > + unsigned long flags; > > > > > + unsigned long dst_start, dst_end, dst_len; > > > > > + > > > > > + dst_start = round_down((unsigned long)dst, PAGE_SIZE); > > > > > + dst_end = round_up((unsigned long)dst + len, PAGE_SIZE); > > > > > + dst_len = dst_end - dst_start; > > > > > + > > > > > + set_memory_rw(dst_start, dst_len / PAGE_SIZE); > > > > > + raw_spin_lock_irqsave(&patch_lock, flags); > > > > > + > > > > > + ret = copy_to_kernel_nofault(dst, src, len); > > > > > + if (ret) > > > > > + pr_err("%s: operation failed\n", __func__); > > > > > + > > > > > + raw_spin_unlock_irqrestore(&patch_lock, flags); > > > > > + set_memory_rox(dst_start, dst_len / PAGE_SIZE); > > > > > + > > > > > + if (!ret) > > > > > + flush_icache_range((unsigned long)dst, (unsigned long)dst + len); > > > > > + > > > > > + return ret; > > > > > +} > > > > > + > > > > > u32 larch_insn_gen_nop(void) > > > > > { > > > > > return INSN_NOP; > > > > > diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c > > > > > index 7032f11d3..5e6ae7e0e 100644 > > > > > --- a/arch/loongarch/net/bpf_jit.c > > > > > +++ b/arch/loongarch/net/bpf_jit.c > > > > > @@ -4,8 +4,12 @@ > > > > > * > > > > > * Copyright (C) 2022 Loongson Technology Corporation Limited > > > > > */ > > > > > +#include <linux/memory.h> > > > > > #include "bpf_jit.h" > > > > > > > > > > +#define LOONGARCH_LONG_JUMP_NINSNS 5 > > > > > +#define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) > > > > > + > > > > > #define REG_TCC LOONGARCH_GPR_A6 > > > > > #define TCC_SAVED LOONGARCH_GPR_S5 > > > > > > > > > > @@ -88,6 +92,7 @@ static u8 tail_call_reg(struct jit_ctx *ctx) > > > > > */ > > > > > static void build_prologue(struct jit_ctx *ctx) > > > > > { > > > > > + int i; > > > > > int stack_adjust = 0, store_offset, bpf_stack_adjust; > > > > > > > > > > bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); > > > > > @@ -98,6 +103,10 @@ static void build_prologue(struct jit_ctx *ctx) > > > > > stack_adjust = round_up(stack_adjust, 16); > > > > > stack_adjust += bpf_stack_adjust; > > > > > > > > > > + /* Reserve space for the move_imm + jirl instruction */ > > > > > + for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) > > > > > + emit_insn(ctx, nop); > > > > > + > > > > > /* > > > > > * First instruction initializes the tail call count (TCC). > > > > > * On tail call we skip this instruction, and the TCC is > > > > > @@ -1367,3 +1376,98 @@ bool bpf_jit_supports_subprog_tailcalls(void) > > > > > { > > > > > return true; > > > > > } > > > > > + > > > > > +static int emit_jump_and_link(struct jit_ctx *ctx, u8 rd, u64 target) > > > > > +{ > > > > > + if (!target) { > > > > > + pr_err("bpf_jit: jump target address is error\n"); > > > > > + return -EFAULT; > > > > > + } > > > > > + > > > > > + move_imm(ctx, LOONGARCH_GPR_T1, target, false); > > > > > + emit_insn(ctx, jirl, rd, LOONGARCH_GPR_T1, 0); > > > > > + > > > > > + return 0; > > > > > +} > > > > > + > > > > > +static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) > > > > > +{ > > > > > + struct jit_ctx ctx; > > > > > + > > > > > + ctx.idx = 0; > > > > > + ctx.image = (union loongarch_instruction *)insns; > > > > > + > > > > > + if (!target) { > > > > > + emit_insn((&ctx), nop); > > > > > + emit_insn((&ctx), nop); > > > > > > > > There should be 5 nops, no ? > > > Chenghao, > > > > > > We have already fixed the concurrent problem, now this is the only > > > issue, please reply tas soon as possible. > > > > > > Huacai > > > > Hi Hengqi & Huacai, > > > > I'm sorry I just saw the email. > > This position can be configured with 5 NOP instructions, and I have > > tested it successfully. > OK, now loongarch-next [1] has integrated all needed changes, you and > Vincent can test to see if everything is OK. > > [1] https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git/log/?h=loongarch-next > Tested-by: Vincent Li <vincent.mc.li@gmail.com> > Huacai > > > > > sudo ./test_progs -a fentry_test/fentry > > sudo ./test_progs -a fexit_test/fexit > > sudo ./test_progs -a fentry_fexit > > sudo ./test_progs -a modify_return > > sudo ./test_progs -a fexit_sleep > > sudo ./test_progs -a test_overhead > > sudo ./test_progs -a trampoline_count > > sudo ./test_progs -a fexit_bpf2bpf > > > > if (!target) { > > int i; > > for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) > > emit_insn((&ctx), nop); > > return 0; > > } > > > > > > Chenghao > > > > > > > > > > > > > > + return 0; > > > > > + } > > > > > + > > > > > + return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_T0 : LOONGARCH_GPR_ZERO, > > > > > + (unsigned long)target); > > > > > +} > > > > > + > > > > > +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, > > > > > + void *old_addr, void *new_addr) > > > > > +{ > > > > > + u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; > > > > > + u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; > > > > > + bool is_call = poke_type == BPF_MOD_CALL; > > > > > + int ret; > > > > > + > > > > > + if (!is_kernel_text((unsigned long)ip) && > > > > > + !is_bpf_text_address((unsigned long)ip)) > > > > > + return -ENOTSUPP; > > > > > + > > > > > + ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call); > > > > > + if (ret) > > > > > + return ret; > > > > > + > > > > > + if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES)) > > > > > + return -EFAULT; > > > > > + > > > > > + ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call); > > > > > + if (ret) > > > > > + return ret; > > > > > + > > > > > + mutex_lock(&text_mutex); > > > > > + if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES)) > > > > > + ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES); > > > > > + mutex_unlock(&text_mutex); > > > > > + return ret; > > > > > +} > > > > > + > > > > > +int bpf_arch_text_invalidate(void *dst, size_t len) > > > > > +{ > > > > > + int i; > > > > > + int ret = 0; > > > > > + u32 *inst; > > > > > + > > > > > + inst = kvmalloc(len, GFP_KERNEL); > > > > > + if (!inst) > > > > > + return -ENOMEM; > > > > > + > > > > > + for (i = 0; i < (len/sizeof(u32)); i++) > > > > > + inst[i] = INSN_BREAK; > > > > > + > > > > > + mutex_lock(&text_mutex); > > > > > + if (larch_insn_text_copy(dst, inst, len)) > > > > > + ret = -EINVAL; > > > > > + mutex_unlock(&text_mutex); > > > > > + > > > > > + kvfree(inst); > > > > > + return ret; > > > > > +} > > > > > + > > > > > +void *bpf_arch_text_copy(void *dst, void *src, size_t len) > > > > > +{ > > > > > + int ret; > > > > > + > > > > > + mutex_lock(&text_mutex); > > > > > + ret = larch_insn_text_copy(dst, src, len); > > > > > + mutex_unlock(&text_mutex); > > > > > + if (ret) > > > > > + return ERR_PTR(-EINVAL); > > > > > + > > > > > + return dst; > > > > > +} > > > > > -- > > > > > > > > bpf_arch_text_invalidate() and bpf_arch_text_copy() is not related to > > > > BPF trampoline, right ? > > > > From the perspective of BPF core source code calls, the two functions > > bpf_arch_text_invalidate() and bpf_arch_text_copy() are not only used for > > trampolines. > > > > > > > > > > > 2.25.1 > > > > > > >
On Tue, Aug 05, 2025 at 07:13:04PM +0800, Huacai Chen wrote: > On Tue, Aug 5, 2025 at 2:30 PM Chenghao Duan <duanchenghao@kylinos.cn> wrote: > > > > On Tue, Aug 05, 2025 at 12:10:05PM +0800, Huacai Chen wrote: > > > On Mon, Aug 4, 2025 at 10:02 AM Hengqi Chen <hengqi.chen@gmail.com> wrote: > > > > > > > > On Wed, Jul 30, 2025 at 9:13 PM Chenghao Duan <duanchenghao@kylinos.cn> wrote: > > > > > > > > > > This commit adds support for BPF dynamic code modification on the > > > > > LoongArch architecture.: > > > > > 1. Implement bpf_arch_text_poke() for runtime instruction patching. > > > > > 2. Add bpf_arch_text_copy() for instruction block copying. > > > > > 3. Create bpf_arch_text_invalidate() for code invalidation. > > > > > > > > > > On LoongArch, since symbol addresses in the direct mapping > > > > > region cannot be reached via relative jump instructions from the paged > > > > > mapping region, we use the move_imm+jirl instruction pair as absolute > > > > > jump instructions. These require 2-5 instructions, so we reserve 5 NOP > > > > > instructions in the program as placeholders for function jumps. > > > > > > > > > > larch_insn_text_copy is solely used for BPF. The use of > > > > > larch_insn_text_copy() requires page_size alignment. Currently, only > > > > > the size of the trampoline is page-aligned. > > > > > > > > > > Co-developed-by: George Guo <guodongtai@kylinos.cn> > > > > > Signed-off-by: George Guo <guodongtai@kylinos.cn> > > > > > Signed-off-by: Chenghao Duan <duanchenghao@kylinos.cn> > > > > > --- > > > > > arch/loongarch/include/asm/inst.h | 1 + > > > > > arch/loongarch/kernel/inst.c | 27 ++++++++ > > > > > arch/loongarch/net/bpf_jit.c | 104 ++++++++++++++++++++++++++++++ > > > > > 3 files changed, 132 insertions(+) > > > > > > > > > > diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h > > > > > index 2ae96a35d..88bb73e46 100644 > > > > > --- a/arch/loongarch/include/asm/inst.h > > > > > +++ b/arch/loongarch/include/asm/inst.h > > > > > @@ -497,6 +497,7 @@ void arch_simulate_insn(union loongarch_instruction insn, struct pt_regs *regs); > > > > > int larch_insn_read(void *addr, u32 *insnp); > > > > > int larch_insn_write(void *addr, u32 insn); > > > > > int larch_insn_patch_text(void *addr, u32 insn); > > > > > +int larch_insn_text_copy(void *dst, void *src, size_t len); > > > > > > > > > > u32 larch_insn_gen_nop(void); > > > > > u32 larch_insn_gen_b(unsigned long pc, unsigned long dest); > > > > > diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c > > > > > index 674e3b322..7df63a950 100644 > > > > > --- a/arch/loongarch/kernel/inst.c > > > > > +++ b/arch/loongarch/kernel/inst.c > > > > > @@ -4,6 +4,7 @@ > > > > > */ > > > > > #include <linux/sizes.h> > > > > > #include <linux/uaccess.h> > > > > > +#include <linux/set_memory.h> > > > > > > > > > > #include <asm/cacheflush.h> > > > > > #include <asm/inst.h> > > > > > @@ -218,6 +219,32 @@ int larch_insn_patch_text(void *addr, u32 insn) > > > > > return ret; > > > > > } > > > > > > > > > > +int larch_insn_text_copy(void *dst, void *src, size_t len) > > > > > +{ > > > > > + int ret; > > > > > + unsigned long flags; > > > > > + unsigned long dst_start, dst_end, dst_len; > > > > > + > > > > > + dst_start = round_down((unsigned long)dst, PAGE_SIZE); > > > > > + dst_end = round_up((unsigned long)dst + len, PAGE_SIZE); > > > > > + dst_len = dst_end - dst_start; > > > > > + > > > > > + set_memory_rw(dst_start, dst_len / PAGE_SIZE); > > > > > + raw_spin_lock_irqsave(&patch_lock, flags); > > > > > + > > > > > + ret = copy_to_kernel_nofault(dst, src, len); > > > > > + if (ret) > > > > > + pr_err("%s: operation failed\n", __func__); > > > > > + > > > > > + raw_spin_unlock_irqrestore(&patch_lock, flags); > > > > > + set_memory_rox(dst_start, dst_len / PAGE_SIZE); > > > > > + > > > > > + if (!ret) > > > > > + flush_icache_range((unsigned long)dst, (unsigned long)dst + len); > > > > > + > > > > > + return ret; > > > > > +} > > > > > + > > > > > u32 larch_insn_gen_nop(void) > > > > > { > > > > > return INSN_NOP; > > > > > diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c > > > > > index 7032f11d3..5e6ae7e0e 100644 > > > > > --- a/arch/loongarch/net/bpf_jit.c > > > > > +++ b/arch/loongarch/net/bpf_jit.c > > > > > @@ -4,8 +4,12 @@ > > > > > * > > > > > * Copyright (C) 2022 Loongson Technology Corporation Limited > > > > > */ > > > > > +#include <linux/memory.h> > > > > > #include "bpf_jit.h" > > > > > > > > > > +#define LOONGARCH_LONG_JUMP_NINSNS 5 > > > > > +#define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) > > > > > + > > > > > #define REG_TCC LOONGARCH_GPR_A6 > > > > > #define TCC_SAVED LOONGARCH_GPR_S5 > > > > > > > > > > @@ -88,6 +92,7 @@ static u8 tail_call_reg(struct jit_ctx *ctx) > > > > > */ > > > > > static void build_prologue(struct jit_ctx *ctx) > > > > > { > > > > > + int i; > > > > > int stack_adjust = 0, store_offset, bpf_stack_adjust; > > > > > > > > > > bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); > > > > > @@ -98,6 +103,10 @@ static void build_prologue(struct jit_ctx *ctx) > > > > > stack_adjust = round_up(stack_adjust, 16); > > > > > stack_adjust += bpf_stack_adjust; > > > > > > > > > > + /* Reserve space for the move_imm + jirl instruction */ > > > > > + for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) > > > > > + emit_insn(ctx, nop); > > > > > + > > > > > /* > > > > > * First instruction initializes the tail call count (TCC). > > > > > * On tail call we skip this instruction, and the TCC is > > > > > @@ -1367,3 +1376,98 @@ bool bpf_jit_supports_subprog_tailcalls(void) > > > > > { > > > > > return true; > > > > > } > > > > > + > > > > > +static int emit_jump_and_link(struct jit_ctx *ctx, u8 rd, u64 target) > > > > > +{ > > > > > + if (!target) { > > > > > + pr_err("bpf_jit: jump target address is error\n"); > > > > > + return -EFAULT; > > > > > + } > > > > > + > > > > > + move_imm(ctx, LOONGARCH_GPR_T1, target, false); > > > > > + emit_insn(ctx, jirl, rd, LOONGARCH_GPR_T1, 0); > > > > > + > > > > > + return 0; > > > > > +} > > > > > + > > > > > +static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) > > > > > +{ > > > > > + struct jit_ctx ctx; > > > > > + > > > > > + ctx.idx = 0; > > > > > + ctx.image = (union loongarch_instruction *)insns; > > > > > + > > > > > + if (!target) { > > > > > + emit_insn((&ctx), nop); > > > > > + emit_insn((&ctx), nop); > > > > > > > > There should be 5 nops, no ? > > > Chenghao, > > > > > > We have already fixed the concurrent problem, now this is the only > > > issue, please reply tas soon as possible. > > > > > > Huacai > > > > Hi Hengqi & Huacai, > > > > I'm sorry I just saw the email. > > This position can be configured with 5 NOP instructions, and I have > > tested it successfully. > OK, now loongarch-next [1] has integrated all needed changes, you and > Vincent can test to see if everything is OK. > > [1] https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git/log/?h=loongarch-next > > Huacai The following test items have been successfully tested: ./test_progs -a fentry_test/fentry ./test_progs -a fexit_test/fexit ./test_progs -a fentry_fexit ./test_progs -a modify_return ./test_progs -a fexit_sleep ./test_progs -a test_overhead ./test_progs -a trampoline_count ./test_progs -a fexit_bpf2bpf ./test_progs -t struct_ops -d struct_ops_multi_pages #15/1 bad_struct_ops/invalid_prog_reuse:OK #15/2 bad_struct_ops/unused_program:OK #15 bad_struct_ops:OK #408/1 struct_ops_autocreate/cant_load_full_object:OK #408/2 struct_ops_autocreate/can_load_partial_object:OK #408/3 struct_ops_autocreate/autoload_and_shadow_vars:OK #408/4 struct_ops_autocreate/optional_maps:OK #408 struct_ops_autocreate:OK #409/1 struct_ops_kptr_return/kptr_return:OK #409/2 struct_ops_kptr_return/kptr_return_fail__wrong_type:OK #409/3 struct_ops_kptr_return/kptr_return_fail__invalid_scalar:OK #409/4 struct_ops_kptr_return/kptr_return_fail__nonzero_offset:OK #409/5 struct_ops_kptr_return/kptr_return_fail__local_kptr:OK #409 struct_ops_kptr_return:OK #410/1 struct_ops_maybe_null/maybe_null:OK #410/2 struct_ops_maybe_null/maybe_null_fail:OK #410 struct_ops_maybe_null:OK #411/1 struct_ops_module/struct_ops_load:OK #411/2 struct_ops_module/struct_ops_not_zeroed:OK #411/3 struct_ops_module/struct_ops_incompatible:OK #411/4 struct_ops_module/struct_ops_null_out_cb:OK #411/5 struct_ops_module/struct_ops_forgotten_cb:OK #411/6 struct_ops_module/test_detach_link:OK #411/7 struct_ops_module/unsupported_ops:OK #411 struct_ops_module:OK #413/1 struct_ops_no_cfi/load_bpf_test_no_cfi:OK #413 struct_ops_no_cfi:OK #414/1 struct_ops_private_stack/private_stack:SKIP #414/2 struct_ops_private_stack/private_stack_fail:SKIP #414/3 struct_ops_private_stack/private_stack_recur:SKIP #414 struct_ops_private_stack:SKIP #415/1 struct_ops_refcounted/refcounted:OK #415/2 struct_ops_refcounted/refcounted_fail__ref_leak:OK #415/3 struct_ops_refcounted/refcounted_fail__global_subprog:OK #415/4 struct_ops_refcounted/refcounted_fail__tail_call:OK #415 struct_ops_refcounted:OK Summary: 8/25 PASSED, 3 SKIPPED, 0 FAILED while true; do ./test_progs -a fentry_attach_stress; sleep 1; done (Loop 60 times.) Chenghao > > > > > sudo ./test_progs -a fentry_test/fentry > > sudo ./test_progs -a fexit_test/fexit > > sudo ./test_progs -a fentry_fexit > > sudo ./test_progs -a modify_return > > sudo ./test_progs -a fexit_sleep > > sudo ./test_progs -a test_overhead > > sudo ./test_progs -a trampoline_count > > sudo ./test_progs -a fexit_bpf2bpf > > > > if (!target) { > > int i; > > for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) > > emit_insn((&ctx), nop); > > return 0; > > } > > > > > > Chenghao > > > > > > > > > > > > > > + return 0; > > > > > + } > > > > > + > > > > > + return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_T0 : LOONGARCH_GPR_ZERO, > > > > > + (unsigned long)target); > > > > > +} > > > > > + > > > > > +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, > > > > > + void *old_addr, void *new_addr) > > > > > +{ > > > > > + u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; > > > > > + u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; > > > > > + bool is_call = poke_type == BPF_MOD_CALL; > > > > > + int ret; > > > > > + > > > > > + if (!is_kernel_text((unsigned long)ip) && > > > > > + !is_bpf_text_address((unsigned long)ip)) > > > > > + return -ENOTSUPP; > > > > > + > > > > > + ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call); > > > > > + if (ret) > > > > > + return ret; > > > > > + > > > > > + if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES)) > > > > > + return -EFAULT; > > > > > + > > > > > + ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call); > > > > > + if (ret) > > > > > + return ret; > > > > > + > > > > > + mutex_lock(&text_mutex); > > > > > + if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES)) > > > > > + ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES); > > > > > + mutex_unlock(&text_mutex); > > > > > + return ret; > > > > > +} > > > > > + > > > > > +int bpf_arch_text_invalidate(void *dst, size_t len) > > > > > +{ > > > > > + int i; > > > > > + int ret = 0; > > > > > + u32 *inst; > > > > > + > > > > > + inst = kvmalloc(len, GFP_KERNEL); > > > > > + if (!inst) > > > > > + return -ENOMEM; > > > > > + > > > > > + for (i = 0; i < (len/sizeof(u32)); i++) > > > > > + inst[i] = INSN_BREAK; > > > > > + > > > > > + mutex_lock(&text_mutex); > > > > > + if (larch_insn_text_copy(dst, inst, len)) > > > > > + ret = -EINVAL; > > > > > + mutex_unlock(&text_mutex); > > > > > + > > > > > + kvfree(inst); > > > > > + return ret; > > > > > +} > > > > > + > > > > > +void *bpf_arch_text_copy(void *dst, void *src, size_t len) > > > > > +{ > > > > > + int ret; > > > > > + > > > > > + mutex_lock(&text_mutex); > > > > > + ret = larch_insn_text_copy(dst, src, len); > > > > > + mutex_unlock(&text_mutex); > > > > > + if (ret) > > > > > + return ERR_PTR(-EINVAL); > > > > > + > > > > > + return dst; > > > > > +} > > > > > -- > > > > > > > > bpf_arch_text_invalidate() and bpf_arch_text_copy() is not related to > > > > BPF trampoline, right ? > > > > From the perspective of BPF core source code calls, the two functions > > bpf_arch_text_invalidate() and bpf_arch_text_copy() are not only used for > > trampolines. > > > > > > > > > > > 2.25.1 > > > > > > >
On Wed, Jul 30, 2025 at 9:13 PM Chenghao Duan <duanchenghao@kylinos.cn> wrote: > > This commit adds support for BPF dynamic code modification on the > LoongArch architecture.: > 1. Implement bpf_arch_text_poke() for runtime instruction patching. > 2. Add bpf_arch_text_copy() for instruction block copying. > 3. Create bpf_arch_text_invalidate() for code invalidation. > > On LoongArch, since symbol addresses in the direct mapping > region cannot be reached via relative jump instructions from the paged > mapping region, we use the move_imm+jirl instruction pair as absolute > jump instructions. These require 2-5 instructions, so we reserve 5 NOP > instructions in the program as placeholders for function jumps. > > larch_insn_text_copy is solely used for BPF. The use of > larch_insn_text_copy() requires page_size alignment. Currently, only > the size of the trampoline is page-aligned. > > Co-developed-by: George Guo <guodongtai@kylinos.cn> > Signed-off-by: George Guo <guodongtai@kylinos.cn> > Signed-off-by: Chenghao Duan <duanchenghao@kylinos.cn> > --- > arch/loongarch/include/asm/inst.h | 1 + > arch/loongarch/kernel/inst.c | 27 ++++++++ > arch/loongarch/net/bpf_jit.c | 104 ++++++++++++++++++++++++++++++ > 3 files changed, 132 insertions(+) > > diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h > index 2ae96a35d..88bb73e46 100644 > --- a/arch/loongarch/include/asm/inst.h > +++ b/arch/loongarch/include/asm/inst.h > @@ -497,6 +497,7 @@ void arch_simulate_insn(union loongarch_instruction insn, struct pt_regs *regs); > int larch_insn_read(void *addr, u32 *insnp); > int larch_insn_write(void *addr, u32 insn); > int larch_insn_patch_text(void *addr, u32 insn); > +int larch_insn_text_copy(void *dst, void *src, size_t len); > > u32 larch_insn_gen_nop(void); > u32 larch_insn_gen_b(unsigned long pc, unsigned long dest); > diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c > index 674e3b322..7df63a950 100644 > --- a/arch/loongarch/kernel/inst.c > +++ b/arch/loongarch/kernel/inst.c > @@ -4,6 +4,7 @@ > */ > #include <linux/sizes.h> > #include <linux/uaccess.h> > +#include <linux/set_memory.h> > > #include <asm/cacheflush.h> > #include <asm/inst.h> > @@ -218,6 +219,32 @@ int larch_insn_patch_text(void *addr, u32 insn) > return ret; > } > > +int larch_insn_text_copy(void *dst, void *src, size_t len) > +{ > + int ret; > + unsigned long flags; > + unsigned long dst_start, dst_end, dst_len; > + > + dst_start = round_down((unsigned long)dst, PAGE_SIZE); > + dst_end = round_up((unsigned long)dst + len, PAGE_SIZE); > + dst_len = dst_end - dst_start; > + > + set_memory_rw(dst_start, dst_len / PAGE_SIZE); > + raw_spin_lock_irqsave(&patch_lock, flags); > + > + ret = copy_to_kernel_nofault(dst, src, len); > + if (ret) > + pr_err("%s: operation failed\n", __func__); > + > + raw_spin_unlock_irqrestore(&patch_lock, flags); > + set_memory_rox(dst_start, dst_len / PAGE_SIZE); > + > + if (!ret) > + flush_icache_range((unsigned long)dst, (unsigned long)dst + len); > + > + return ret; > +} > + > u32 larch_insn_gen_nop(void) > { > return INSN_NOP; > diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c > index 7032f11d3..5e6ae7e0e 100644 > --- a/arch/loongarch/net/bpf_jit.c > +++ b/arch/loongarch/net/bpf_jit.c > @@ -4,8 +4,12 @@ > * > * Copyright (C) 2022 Loongson Technology Corporation Limited > */ > +#include <linux/memory.h> > #include "bpf_jit.h" > > +#define LOONGARCH_LONG_JUMP_NINSNS 5 > +#define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) > + > #define REG_TCC LOONGARCH_GPR_A6 > #define TCC_SAVED LOONGARCH_GPR_S5 > > @@ -88,6 +92,7 @@ static u8 tail_call_reg(struct jit_ctx *ctx) > */ > static void build_prologue(struct jit_ctx *ctx) > { > + int i; > int stack_adjust = 0, store_offset, bpf_stack_adjust; > > bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); > @@ -98,6 +103,10 @@ static void build_prologue(struct jit_ctx *ctx) > stack_adjust = round_up(stack_adjust, 16); > stack_adjust += bpf_stack_adjust; > > + /* Reserve space for the move_imm + jirl instruction */ > + for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) > + emit_insn(ctx, nop); > + > /* > * First instruction initializes the tail call count (TCC). > * On tail call we skip this instruction, and the TCC is > @@ -1367,3 +1376,98 @@ bool bpf_jit_supports_subprog_tailcalls(void) > { > return true; > } > + > +static int emit_jump_and_link(struct jit_ctx *ctx, u8 rd, u64 target) > +{ > + if (!target) { > + pr_err("bpf_jit: jump target address is error\n"); > + return -EFAULT; > + } > + > + move_imm(ctx, LOONGARCH_GPR_T1, target, false); > + emit_insn(ctx, jirl, rd, LOONGARCH_GPR_T1, 0); > + > + return 0; > +} > + > +static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) > +{ > + struct jit_ctx ctx; > + > + ctx.idx = 0; > + ctx.image = (union loongarch_instruction *)insns; > + > + if (!target) { > + emit_insn((&ctx), nop); > + emit_insn((&ctx), nop); > + return 0; > + } > + > + return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_T0 : LOONGARCH_GPR_ZERO, > + (unsigned long)target); > +} > + > +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, > + void *old_addr, void *new_addr) > +{ > + u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; > + u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; > + bool is_call = poke_type == BPF_MOD_CALL; > + int ret; > + > + if (!is_kernel_text((unsigned long)ip) && > + !is_bpf_text_address((unsigned long)ip)) > + return -ENOTSUPP; > + > + ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call); > + if (ret) > + return ret; > + > + if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES)) > + return -EFAULT; > + > + ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call); > + if (ret) > + return ret; > + > + mutex_lock(&text_mutex); > + if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES)) > + ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES); > + mutex_unlock(&text_mutex); The text_mutex and patch_lock inside larch_insn_text_copy() ONLY prevent concurrent modifications. You may need stop_machine() to prevent concurrent modifications/executions. > + return ret; > +} > + > +int bpf_arch_text_invalidate(void *dst, size_t len) > +{ > + int i; > + int ret = 0; > + u32 *inst; > + > + inst = kvmalloc(len, GFP_KERNEL); > + if (!inst) > + return -ENOMEM; > + > + for (i = 0; i < (len/sizeof(u32)); i++) > + inst[i] = INSN_BREAK; > + > + mutex_lock(&text_mutex); > + if (larch_insn_text_copy(dst, inst, len)) > + ret = -EINVAL; > + mutex_unlock(&text_mutex); > + > + kvfree(inst); > + return ret; > +} > + > +void *bpf_arch_text_copy(void *dst, void *src, size_t len) > +{ > + int ret; > + > + mutex_lock(&text_mutex); > + ret = larch_insn_text_copy(dst, src, len); > + mutex_unlock(&text_mutex); > + if (ret) > + return ERR_PTR(-EINVAL); > + > + return dst; > +} > -- > 2.25.1 >
© 2016 - 2025 Red Hat, Inc.