Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
instructions. They are similar to PROBE_MEM instructions with the
following differences:
- PROBE_MEM32 supports store.
- PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
src/dst register
- PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in R28
in the prologue). Due to bpf_arena constructions such R28 + reg +
off16 access is guaranteed to be within arena virtual range, so no
address check at run-time.
- PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
LDX faults the destination register is zeroed.
To support these on arm64, we do tmp2 = R28 + src/dst reg and then use
tmp2 as the new src/dst register. This allows us to reuse most of the
code for normal [LDX | STX | ST].
Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
---
arch/arm64/net/bpf_jit_comp.c | 70 ++++++++++++++++++++++++++++++-----
1 file changed, 60 insertions(+), 10 deletions(-)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index c5b461dda438..ce66c17b73a0 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -29,6 +29,7 @@
#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
#define FP_BOTTOM (MAX_BPF_JIT_REG + 4)
+#define PROBE_MEM32_BASE (MAX_BPF_JIT_REG + 5)
#define check_imm(bits, imm) do { \
if ((((imm) > 0) && ((imm) >> (bits))) || \
@@ -67,6 +68,8 @@ static const int bpf2a64[] = {
/* temporary register for blinding constants */
[BPF_REG_AX] = A64_R(9),
[FP_BOTTOM] = A64_R(27),
+ /* callee saved register for kern_vm_start address */
+ [PROBE_MEM32_BASE] = A64_R(28),
};
struct jit_ctx {
@@ -295,7 +298,7 @@ static bool is_lsi_offset(int offset, int scale)
#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8)
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
- bool is_exception_cb)
+ bool is_exception_cb, u64 arena_vm_start)
{
const struct bpf_prog *prog = ctx->prog;
const bool is_main_prog = !bpf_is_subprog(prog);
@@ -306,6 +309,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 tcc = bpf2a64[TCALL_CNT];
const u8 fpb = bpf2a64[FP_BOTTOM];
+ const u8 pb = bpf2a64[PROBE_MEM32_BASE];
const int idx0 = ctx->idx;
int cur_offset;
@@ -411,6 +415,10 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
/* Set up function call stack */
emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
+
+ if (arena_vm_start)
+ emit_a64_mov_i64(pb, arena_vm_start, ctx);
+
return 0;
}
@@ -738,6 +746,7 @@ static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb)
#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
+#define DONT_CLEAR 32
bool ex_handler_bpf(const struct exception_table_entry *ex,
struct pt_regs *regs)
@@ -745,7 +754,8 @@ bool ex_handler_bpf(const struct exception_table_entry *ex,
off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
- regs->regs[dst_reg] = 0;
+ if (dst_reg != DONT_CLEAR)
+ regs->regs[dst_reg] = 0;
regs->pc = (unsigned long)&ex->fixup - offset;
return true;
}
@@ -765,7 +775,8 @@ static int add_exception_handler(const struct bpf_insn *insn,
return 0;
if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
- BPF_MODE(insn->code) != BPF_PROBE_MEMSX)
+ BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
+ BPF_MODE(insn->code) != BPF_PROBE_MEM32)
return 0;
if (!ctx->prog->aux->extable ||
@@ -810,6 +821,9 @@ static int add_exception_handler(const struct bpf_insn *insn,
ex->insn = ins_offset;
+ if (BPF_CLASS(insn->code) != BPF_LDX)
+ dst_reg = DONT_CLEAR;
+
ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
@@ -829,12 +843,13 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
bool extra_pass)
{
const u8 code = insn->code;
- const u8 dst = bpf2a64[insn->dst_reg];
- const u8 src = bpf2a64[insn->src_reg];
+ u8 dst = bpf2a64[insn->dst_reg];
+ u8 src = bpf2a64[insn->src_reg];
const u8 tmp = bpf2a64[TMP_REG_1];
const u8 tmp2 = bpf2a64[TMP_REG_2];
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 fpb = bpf2a64[FP_BOTTOM];
+ const u8 pb = bpf2a64[PROBE_MEM32_BASE];
const s16 off = insn->off;
const s32 imm = insn->imm;
const int i = insn - ctx->prog->insnsi;
@@ -1237,7 +1252,15 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
- if (ctx->fpb_offset > 0 && src == fp) {
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
+ emit(A64_ADD(1, tmp2, src, pb), ctx);
+ src = tmp2;
+ }
+ if (ctx->fpb_offset > 0 && src == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) {
src_adj = fpb;
off_adj = off + ctx->fpb_offset;
} else {
@@ -1322,7 +1345,15 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
case BPF_ST | BPF_MEM | BPF_H:
case BPF_ST | BPF_MEM | BPF_B:
case BPF_ST | BPF_MEM | BPF_DW:
- if (ctx->fpb_offset > 0 && dst == fp) {
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
+ emit(A64_ADD(1, tmp2, dst, pb), ctx);
+ dst = tmp2;
+ }
+ if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) {
dst_adj = fpb;
off_adj = off + ctx->fpb_offset;
} else {
@@ -1365,6 +1396,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
}
break;
}
+
+ ret = add_exception_handler(insn, ctx, dst);
+ if (ret)
+ return ret;
break;
/* STX: *(size *)(dst + off) = src */
@@ -1372,7 +1407,15 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
case BPF_STX | BPF_MEM | BPF_H:
case BPF_STX | BPF_MEM | BPF_B:
case BPF_STX | BPF_MEM | BPF_DW:
- if (ctx->fpb_offset > 0 && dst == fp) {
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
+ emit(A64_ADD(1, tmp2, dst, pb), ctx);
+ dst = tmp2;
+ }
+ if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) {
dst_adj = fpb;
off_adj = off + ctx->fpb_offset;
} else {
@@ -1413,6 +1456,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
}
break;
}
+
+ ret = add_exception_handler(insn, ctx, dst);
+ if (ret)
+ return ret;
break;
case BPF_STX | BPF_ATOMIC | BPF_W:
@@ -1594,6 +1641,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
bool tmp_blinded = false;
bool extra_pass = false;
struct jit_ctx ctx;
+ u64 arena_vm_start;
u8 *image_ptr;
u8 *ro_image_ptr;
@@ -1611,6 +1659,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog = tmp;
}
+ arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
jit_data = prog->aux->jit_data;
if (!jit_data) {
jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
@@ -1648,7 +1697,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
* BPF line info needs ctx->offset[i] to be the offset of
* instruction[i] in jited image, so build prologue first.
*/
- if (build_prologue(&ctx, was_classic, prog->aux->exception_cb)) {
+ if (build_prologue(&ctx, was_classic, prog->aux->exception_cb,
+ arena_vm_start)) {
prog = orig_prog;
goto out_off;
}
@@ -1696,7 +1746,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
ctx.idx = 0;
ctx.exentry_idx = 0;
- build_prologue(&ctx, was_classic, prog->aux->exception_cb);
+ build_prologue(&ctx, was_classic, prog->aux->exception_cb, arena_vm_start);
if (build_body(&ctx, extra_pass)) {
prog = orig_prog;
--
2.40.1
On Thu, 14 Mar 2024 at 16:00, Puranjay Mohan <puranjay12@gmail.com> wrote:
>
> Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
> instructions. They are similar to PROBE_MEM instructions with the
> following differences:
> - PROBE_MEM32 supports store.
> - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
> src/dst register
> - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in R28
> in the prologue). Due to bpf_arena constructions such R28 + reg +
> off16 access is guaranteed to be within arena virtual range, so no
> address check at run-time.
> - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
> LDX faults the destination register is zeroed.
>
> To support these on arm64, we do tmp2 = R28 + src/dst reg and then use
> tmp2 as the new src/dst register. This allows us to reuse most of the
> code for normal [LDX | STX | ST].
>
> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
> ---
Hi Alexei,
Puranjay and I were discussing this stuff off list and noticed that
atomic instructions are not handled.
It turns out that will cause a kernel crash right now because the
32-bit offset into arena will be dereferenced directly.
e.g. something like this:
@@ -55,6 +56,7 @@ int arena_list_add(void *ctx)
test_val++;
n->value = i;
arena_sum += i;
+ __sync_fetch_and_add(&arena_sum, 0);
list_add_head(&n->node, list_head);
}
#else
I will try to prepare a fix for the x86 JIT. Puranjay will do the same
for his set.
Kumar Kartikeya Dwivedi <memxor@gmail.com> writes: > On Thu, 14 Mar 2024 at 16:00, Puranjay Mohan <puranjay12@gmail.com> wrote: >> >> Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW] >> instructions. They are similar to PROBE_MEM instructions with the >> following differences: >> - PROBE_MEM32 supports store. >> - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the >> src/dst register >> - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in R28 >> in the prologue). Due to bpf_arena constructions such R28 + reg + >> off16 access is guaranteed to be within arena virtual range, so no >> address check at run-time. >> - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When >> LDX faults the destination register is zeroed. >> >> To support these on arm64, we do tmp2 = R28 + src/dst reg and then use >> tmp2 as the new src/dst register. This allows us to reuse most of the >> code for normal [LDX | STX | ST]. >> >> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com> >> --- > > Hi Alexei, > Puranjay and I were discussing this stuff off list and noticed that > atomic instructions are not handled. > It turns out that will cause a kernel crash right now because the > 32-bit offset into arena will be dereferenced directly. > > e.g. something like this: > > @@ -55,6 +56,7 @@ int arena_list_add(void *ctx) > test_val++; > n->value = i; > arena_sum += i; > + __sync_fetch_and_add(&arena_sum, 0); > list_add_head(&n->node, list_head); > } > #else > > I will try to prepare a fix for the x86 JIT. Puranjay will do the same > for his set. Yes, testing the change mentioned by Kumar on ARM64 causes a crashes as well: bpf_testmod: loading out-of-tree module taints kernel. bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 Mem abort info: ESR = 0x0000000096000006 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x06: level 2 translation fault Data abort info: ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=00000004043cc000 [0000000000000010] pgd=0800000410d8f003, p4d=0800000410d8f003, pud=0800000405972003, pmd=0000000000000000 Internal error: Oops: 0000000096000006 [#1] SMP Modules linked in: bpf_testmod(OE) nls_ascii nls_cp437 sunrpc vfat fat aes_ce_blk aes_ce_cipher ghash_ce sha1_ce button sch_fq_codel dm_mod dax configfs dmi_sysfs sha2_ce sha256_arm64 efivarfs CPU: 8 PID: 5631 Comm: test_progs Tainted: G OE 6.8.0+ #2 Hardware name: Amazon EC2 c6g.16xlarge/, BIOS 1.0 11/1/2018 pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8 lr : bpf_prog_8771c336cb6a18eb_arena_list_add+0x144/0x2b8 sp : ffff80008b84bc30 x29: ffff80008b84bca0 x28: ffff8000a5008000 x27: ffff80008b84bc38 x26: 0000000000000000 x25: ffff80008b84bc60 x24: 0000000000000000 x23: 0000000000000000 x22: 0000000000000058 x21: 0000000000000838 x20: 0000000000000000 x19: 0000000100001fe0 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffffcc66d2c8 x14: 0000000000000000 x13: 0000000000000000 x12: 000000000004058c x11: ffff8000a5008010 x10: 00000000ffffffff x9 : 00000000000002cf x8 : ffff800082ff4ab8 x7 : 0000000100001000 x6 : 0000000000000001 x5 : 0000000010e5e3fd x4 : 000000003619b978 x3 : 0000000000000010 x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000001fe0 Call trace: bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8 bpf_prog_test_run_syscall+0x100/0x340 __sys_bpf+0x8e8/0xa20 __arm64_sys_bpf+0x2c/0x48 invoke_syscall+0x50/0x128 el0_svc_common.constprop.0+0x48/0xf8 do_el0_svc+0x28/0x40 el0_svc+0x58/0x190 el0t_64_sync_handler+0x13c/0x158 el0t_64_sync+0x1a8/0x1b0 Code: 8b010042 8b1c006b f9000162 d2800001 (f821307f) ---[ end trace 0000000000000000 ]--- Kernel panic - not syncing: Oops: Fatal exception SMP: stopping secondary CPUs Kernel Offset: disabled CPU features: 0x0,00000120,7002014a,21407a0b Memory Limit: none Rebooting in 5 seconds.. I will send v2 with the arm64 JIT fix, but I guess verifier has to be modified as well to add BPF_PROBE_MEM32 to atomic instructions. Thanks, Puranjay
On Thu, Mar 14, 2024 at 10:13 AM Puranjay Mohan <puranjay12@gmail.com> wrote: > > Kumar Kartikeya Dwivedi <memxor@gmail.com> writes: > > > On Thu, 14 Mar 2024 at 16:00, Puranjay Mohan <puranjay12@gmail.com> wrote: > >> > >> Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW] > >> instructions. They are similar to PROBE_MEM instructions with the > >> following differences: > >> - PROBE_MEM32 supports store. > >> - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the > >> src/dst register > >> - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in R28 > >> in the prologue). Due to bpf_arena constructions such R28 + reg + > >> off16 access is guaranteed to be within arena virtual range, so no > >> address check at run-time. > >> - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When > >> LDX faults the destination register is zeroed. > >> > >> To support these on arm64, we do tmp2 = R28 + src/dst reg and then use > >> tmp2 as the new src/dst register. This allows us to reuse most of the > >> code for normal [LDX | STX | ST]. > >> > >> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com> > >> --- > > > > Hi Alexei, > > Puranjay and I were discussing this stuff off list and noticed that > > atomic instructions are not handled. > > It turns out that will cause a kernel crash right now because the > > 32-bit offset into arena will be dereferenced directly. > > > > e.g. something like this: > > > > @@ -55,6 +56,7 @@ int arena_list_add(void *ctx) > > test_val++; > > n->value = i; > > arena_sum += i; > > + __sync_fetch_and_add(&arena_sum, 0); > > list_add_head(&n->node, list_head); > > } > > #else > > > > I will try to prepare a fix for the x86 JIT. Puranjay will do the same > > for his set. > > Yes, testing the change mentioned by Kumar on ARM64 causes a crashes as well: > > bpf_testmod: loading out-of-tree module taints kernel. > bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel > Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 > Mem abort info: > ESR = 0x0000000096000006 > EC = 0x25: DABT (current EL), IL = 32 bits > SET = 0, FnV = 0 > EA = 0, S1PTW = 0 > FSC = 0x06: level 2 translation fault > Data abort info: > ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000 > CM = 0, WnR = 0, TnD = 0, TagAccess = 0 > GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 > user pgtable: 4k pages, 48-bit VAs, pgdp=00000004043cc000 > [0000000000000010] pgd=0800000410d8f003, p4d=0800000410d8f003, pud=0800000405972003, pmd=0000000000000000 > Internal error: Oops: 0000000096000006 [#1] SMP > Modules linked in: bpf_testmod(OE) nls_ascii nls_cp437 sunrpc vfat fat aes_ce_blk aes_ce_cipher ghash_ce sha1_ce button sch_fq_codel dm_mod dax configfs dmi_sysfs sha2_ce sha256_arm64 efivarfs > CPU: 8 PID: 5631 Comm: test_progs Tainted: G OE 6.8.0+ #2 > Hardware name: Amazon EC2 c6g.16xlarge/, BIOS 1.0 11/1/2018 > pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) > pc : bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8 > lr : bpf_prog_8771c336cb6a18eb_arena_list_add+0x144/0x2b8 > sp : ffff80008b84bc30 > x29: ffff80008b84bca0 x28: ffff8000a5008000 x27: ffff80008b84bc38 > x26: 0000000000000000 x25: ffff80008b84bc60 x24: 0000000000000000 > x23: 0000000000000000 x22: 0000000000000058 x21: 0000000000000838 > x20: 0000000000000000 x19: 0000000100001fe0 x18: 0000000000000000 > x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffffcc66d2c8 > x14: 0000000000000000 x13: 0000000000000000 x12: 000000000004058c > x11: ffff8000a5008010 x10: 00000000ffffffff x9 : 00000000000002cf > x8 : ffff800082ff4ab8 x7 : 0000000100001000 x6 : 0000000000000001 > x5 : 0000000010e5e3fd x4 : 000000003619b978 x3 : 0000000000000010 > x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000001fe0 > Call trace: > bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8 > bpf_prog_test_run_syscall+0x100/0x340 > __sys_bpf+0x8e8/0xa20 > __arm64_sys_bpf+0x2c/0x48 > invoke_syscall+0x50/0x128 > el0_svc_common.constprop.0+0x48/0xf8 > do_el0_svc+0x28/0x40 > el0_svc+0x58/0x190 > el0t_64_sync_handler+0x13c/0x158 > el0t_64_sync+0x1a8/0x1b0 > Code: 8b010042 8b1c006b f9000162 d2800001 (f821307f) > ---[ end trace 0000000000000000 ]--- > Kernel panic - not syncing: Oops: Fatal exception > SMP: stopping secondary CPUs > Kernel Offset: disabled > CPU features: 0x0,00000120,7002014a,21407a0b > Memory Limit: none > Rebooting in 5 seconds.. > > I will send v2 with the arm64 JIT fix, but I guess verifier has to be modified > as well to add BPF_PROBE_MEM32 to atomic instructions. The JIT and the verifier changes for atomics might be too big. Let's disable atomics in arena in the verifier for now. Pls send a patch.
On Thu, Mar 14, 2024 at 6:21 PM Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > > On Thu, Mar 14, 2024 at 10:13 AM Puranjay Mohan <puranjay12@gmail.com> wrote: > > > > Kumar Kartikeya Dwivedi <memxor@gmail.com> writes: > > > > > On Thu, 14 Mar 2024 at 16:00, Puranjay Mohan <puranjay12@gmail.com> wrote: > > >> > > >> Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW] > > >> instructions. They are similar to PROBE_MEM instructions with the > > >> following differences: > > >> - PROBE_MEM32 supports store. > > >> - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the > > >> src/dst register > > >> - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in R28 > > >> in the prologue). Due to bpf_arena constructions such R28 + reg + > > >> off16 access is guaranteed to be within arena virtual range, so no > > >> address check at run-time. > > >> - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When > > >> LDX faults the destination register is zeroed. > > >> > > >> To support these on arm64, we do tmp2 = R28 + src/dst reg and then use > > >> tmp2 as the new src/dst register. This allows us to reuse most of the > > >> code for normal [LDX | STX | ST]. > > >> > > >> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com> > > >> --- > > > > > > Hi Alexei, > > > Puranjay and I were discussing this stuff off list and noticed that > > > atomic instructions are not handled. > > > It turns out that will cause a kernel crash right now because the > > > 32-bit offset into arena will be dereferenced directly. > > > > > > e.g. something like this: > > > > > > @@ -55,6 +56,7 @@ int arena_list_add(void *ctx) > > > test_val++; > > > n->value = i; > > > arena_sum += i; > > > + __sync_fetch_and_add(&arena_sum, 0); > > > list_add_head(&n->node, list_head); > > > } > > > #else > > > > > > I will try to prepare a fix for the x86 JIT. Puranjay will do the same > > > for his set. > > > > Yes, testing the change mentioned by Kumar on ARM64 causes a crashes as well: > > > > bpf_testmod: loading out-of-tree module taints kernel. > > bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel > > Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 > > Mem abort info: > > ESR = 0x0000000096000006 > > EC = 0x25: DABT (current EL), IL = 32 bits > > SET = 0, FnV = 0 > > EA = 0, S1PTW = 0 > > FSC = 0x06: level 2 translation fault > > Data abort info: > > ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000 > > CM = 0, WnR = 0, TnD = 0, TagAccess = 0 > > GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 > > user pgtable: 4k pages, 48-bit VAs, pgdp=00000004043cc000 > > [0000000000000010] pgd=0800000410d8f003, p4d=0800000410d8f003, pud=0800000405972003, pmd=0000000000000000 > > Internal error: Oops: 0000000096000006 [#1] SMP > > Modules linked in: bpf_testmod(OE) nls_ascii nls_cp437 sunrpc vfat fat aes_ce_blk aes_ce_cipher ghash_ce sha1_ce button sch_fq_codel dm_mod dax configfs dmi_sysfs sha2_ce sha256_arm64 efivarfs > > CPU: 8 PID: 5631 Comm: test_progs Tainted: G OE 6.8.0+ #2 > > Hardware name: Amazon EC2 c6g.16xlarge/, BIOS 1.0 11/1/2018 > > pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) > > pc : bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8 > > lr : bpf_prog_8771c336cb6a18eb_arena_list_add+0x144/0x2b8 > > sp : ffff80008b84bc30 > > x29: ffff80008b84bca0 x28: ffff8000a5008000 x27: ffff80008b84bc38 > > x26: 0000000000000000 x25: ffff80008b84bc60 x24: 0000000000000000 > > x23: 0000000000000000 x22: 0000000000000058 x21: 0000000000000838 > > x20: 0000000000000000 x19: 0000000100001fe0 x18: 0000000000000000 > > x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffffcc66d2c8 > > x14: 0000000000000000 x13: 0000000000000000 x12: 000000000004058c > > x11: ffff8000a5008010 x10: 00000000ffffffff x9 : 00000000000002cf > > x8 : ffff800082ff4ab8 x7 : 0000000100001000 x6 : 0000000000000001 > > x5 : 0000000010e5e3fd x4 : 000000003619b978 x3 : 0000000000000010 > > x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000001fe0 > > Call trace: > > bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8 > > bpf_prog_test_run_syscall+0x100/0x340 > > __sys_bpf+0x8e8/0xa20 > > __arm64_sys_bpf+0x2c/0x48 > > invoke_syscall+0x50/0x128 > > el0_svc_common.constprop.0+0x48/0xf8 > > do_el0_svc+0x28/0x40 > > el0_svc+0x58/0x190 > > el0t_64_sync_handler+0x13c/0x158 > > el0t_64_sync+0x1a8/0x1b0 > > Code: 8b010042 8b1c006b f9000162 d2800001 (f821307f) > > ---[ end trace 0000000000000000 ]--- > > Kernel panic - not syncing: Oops: Fatal exception > > SMP: stopping secondary CPUs > > Kernel Offset: disabled > > CPU features: 0x0,00000120,7002014a,21407a0b > > Memory Limit: none > > Rebooting in 5 seconds.. > > > > I will send v2 with the arm64 JIT fix, but I guess verifier has to be modified > > as well to add BPF_PROBE_MEM32 to atomic instructions. > > The JIT and the verifier changes for atomics might be too big. > Let's disable atomics in arena in the verifier for now. > Pls send a patch. As atomics are disabled in the Arena now, this series will not require any changes. Looking forward to the reviews. Thanks, Puranjay
© 2016 - 2026 Red Hat, Inc.