Introduce BPF instructions with load-acquire and store-release
semantics, as discussed in [1]. The following new flags are defined:
BPF_ATOMIC_LOAD 0x10
BPF_ATOMIC_STORE 0x20
BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0)
BPF_RELAXED 0x0
BPF_ACQUIRE 0x1
BPF_RELEASE 0x2
BPF_ACQ_REL 0x3
BPF_SEQ_CST 0x4
BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE)
BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE)
A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm'
field set to BPF_LOAD_ACQ (0x11).
Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with
the 'imm' field set to BPF_STORE_REL (0x22).
Unlike existing atomic operations that only support BPF_W (32-bit) and
BPF_DW (64-bit) size modifiers, load-acquires and store-releases also
support BPF_B (8-bit) and BPF_H (16-bit). An 8- or 16-bit load-acquire
zero-extends the value before writing it to a 32-bit register, just like
ARM64 instruction LDARH and friends.
As an example, consider the following 64-bit load-acquire BPF
instruction (assuming little-endian from now on):
db 10 00 00 11 00 00 00 r0 = load_acquire((u64 *)(r1 + 0x0))
opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX
imm (0x00000011): BPF_LOAD_ACQ
For ARM64, an LDAR instruction will be generated by the JIT compiler for
the above:
ldar x7, [x0]
Similarly, a 16-bit BPF store-release:
cb 21 00 00 22 00 00 00 store_release((u16 *)(r1 + 0x0), w2)
opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX
imm (0x00000022): BPF_STORE_REL
An STLRH will be generated for it:
stlrh w1, [x0]
For a complete mapping for ARM64:
load-acquire 8-bit LDARB
(BPF_LOAD_ACQ) 16-bit LDARH
32-bit LDAR (32-bit)
64-bit LDAR (64-bit)
store-release 8-bit STLRB
(BPF_STORE_REL) 16-bit STLRH
32-bit STLR (32-bit)
64-bit STLR (64-bit)
Reviewed-by: Josh Don <joshdon@google.com>
Reviewed-by: Barret Rhoden <brho@google.com>
Signed-off-by: Peilin Ye <yepeilin@google.com>
---
arch/arm64/include/asm/insn.h | 8 ++++
arch/arm64/lib/insn.c | 34 ++++++++++++++
arch/arm64/net/bpf_jit.h | 20 ++++++++
arch/arm64/net/bpf_jit_comp.c | 85 +++++++++++++++++++++++++++++++++-
include/uapi/linux/bpf.h | 13 ++++++
kernel/bpf/core.c | 41 +++++++++++++++-
kernel/bpf/disasm.c | 14 ++++++
kernel/bpf/verifier.c | 32 +++++++++----
tools/include/uapi/linux/bpf.h | 13 ++++++
9 files changed, 246 insertions(+), 14 deletions(-)
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index e390c432f546..bbfdbe570ff6 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -188,8 +188,10 @@ enum aarch64_insn_ldst_type {
AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX,
AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX,
+ AARCH64_INSN_LDST_LOAD_ACQ,
AARCH64_INSN_LDST_LOAD_EX,
AARCH64_INSN_LDST_LOAD_ACQ_EX,
+ AARCH64_INSN_LDST_STORE_REL,
AARCH64_INSN_LDST_STORE_EX,
AARCH64_INSN_LDST_STORE_REL_EX,
AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET,
@@ -351,6 +353,8 @@ __AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000)
__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000)
+__AARCH64_INSN_FUNCS(load_acq, 0x3FC08000, 0x08C08000)
+__AARCH64_INSN_FUNCS(store_rel, 0x3FC08000, 0x08808000)
__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000)
__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000)
__AARCH64_INSN_FUNCS(mops, 0x3B200C00, 0x19000400)
@@ -602,6 +606,10 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
int offset,
enum aarch64_insn_variant variant,
enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg,
+ enum aarch64_insn_register base,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_ldst_type type);
u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
enum aarch64_insn_register base,
enum aarch64_insn_register state,
diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c
index b008a9b46a7f..80e5b191d96a 100644
--- a/arch/arm64/lib/insn.c
+++ b/arch/arm64/lib/insn.c
@@ -540,6 +540,40 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
offset >> shift);
}
+u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg,
+ enum aarch64_insn_register base,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_ldst_type type)
+{
+ u32 insn;
+
+ switch (type) {
+ case AARCH64_INSN_LDST_LOAD_ACQ:
+ insn = aarch64_insn_get_load_acq_value();
+ break;
+ case AARCH64_INSN_LDST_STORE_REL:
+ insn = aarch64_insn_get_store_rel_value();
+ break;
+ default:
+ pr_err("%s: unknown load-acquire/store-release encoding %d\n", __func__, type);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_ldst_size(size, insn);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
+ reg);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+ base);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT2, insn,
+ AARCH64_INSN_REG_ZR);
+
+ return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
+ AARCH64_INSN_REG_ZR);
+}
+
u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
enum aarch64_insn_register base,
enum aarch64_insn_register state,
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index b22ab2f97a30..a3b0e693a125 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -119,6 +119,26 @@
aarch64_insn_gen_load_store_ex(Rt, Rn, Rs, A64_SIZE(sf), \
AARCH64_INSN_LDST_STORE_REL_EX)
+/* Load-acquire & store-release */
+#define A64_LDAR(Rt, Rn, size) \
+ aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \
+ AARCH64_INSN_LDST_LOAD_ACQ)
+#define A64_STLR(Rt, Rn, size) \
+ aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \
+ AARCH64_INSN_LDST_STORE_REL)
+
+/* Rt = [Rn] (load acquire) */
+#define A64_LDARB(Wt, Xn) A64_LDAR(Wt, Xn, 8)
+#define A64_LDARH(Wt, Xn) A64_LDAR(Wt, Xn, 16)
+#define A64_LDAR32(Wt, Xn) A64_LDAR(Wt, Xn, 32)
+#define A64_LDAR64(Xt, Xn) A64_LDAR(Xt, Xn, 64)
+
+/* [Rn] = Rt (store release) */
+#define A64_STLRB(Wt, Xn) A64_STLR(Wt, Xn, 8)
+#define A64_STLRH(Wt, Xn) A64_STLR(Wt, Xn, 16)
+#define A64_STLR32(Wt, Xn) A64_STLR(Wt, Xn, 32)
+#define A64_STLR64(Xt, Xn) A64_STLR(Xt, Xn, 64)
+
/*
* LSE atomics
*
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 66708b95493a..15fc0f391f14 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -634,6 +634,80 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
return 0;
}
+static inline bool is_atomic_load_store(const s32 imm)
+{
+ const s32 type = BPF_ATOMIC_TYPE(imm);
+
+ return type == BPF_ATOMIC_LOAD || type == BPF_ATOMIC_STORE;
+}
+
+static int emit_atomic_load_store(const struct bpf_insn *insn, struct jit_ctx *ctx)
+{
+ const s16 off = insn->off;
+ const u8 code = insn->code;
+ const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC;
+ const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
+ const u8 dst = bpf2a64[insn->dst_reg];
+ const u8 src = bpf2a64[insn->src_reg];
+ const u8 tmp = bpf2a64[TMP_REG_1];
+ u8 ptr;
+
+ if (BPF_ATOMIC_TYPE(insn->imm) == BPF_ATOMIC_LOAD)
+ ptr = src;
+ else
+ ptr = dst;
+
+ if (off) {
+ emit_a64_mov_i(true, tmp, off, ctx);
+ emit(A64_ADD(true, tmp, tmp, ptr), ctx);
+ ptr = tmp;
+ }
+ if (arena) {
+ emit(A64_ADD(true, tmp, ptr, arena_vm_base), ctx);
+ ptr = tmp;
+ }
+
+ switch (insn->imm) {
+ case BPF_LOAD_ACQ:
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ emit(A64_LDARB(dst, ptr), ctx);
+ break;
+ case BPF_H:
+ emit(A64_LDARH(dst, ptr), ctx);
+ break;
+ case BPF_W:
+ emit(A64_LDAR32(dst, ptr), ctx);
+ break;
+ case BPF_DW:
+ emit(A64_LDAR64(dst, ptr), ctx);
+ break;
+ }
+ break;
+ case BPF_STORE_REL:
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ emit(A64_STLRB(src, ptr), ctx);
+ break;
+ case BPF_H:
+ emit(A64_STLRH(src, ptr), ctx);
+ break;
+ case BPF_W:
+ emit(A64_STLR32(src, ptr), ctx);
+ break;
+ case BPF_DW:
+ emit(A64_STLR64(src, ptr), ctx);
+ break;
+ }
+ break;
+ default:
+ pr_err_once("unknown atomic load/store op code %02x\n", insn->imm);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
#ifdef CONFIG_ARM64_LSE_ATOMICS
static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
{
@@ -1641,11 +1715,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
return ret;
break;
+ case BPF_STX | BPF_ATOMIC | BPF_B:
+ case BPF_STX | BPF_ATOMIC | BPF_H:
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_B:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_H:
case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
- if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
+ if (is_atomic_load_store(insn->imm))
+ ret = emit_atomic_load_store(insn, ctx);
+ else if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
ret = emit_lse_atomic(insn, ctx);
else
ret = emit_ll_sc_atomic(insn, ctx);
@@ -2669,7 +2749,8 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
switch (insn->code) {
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
- if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
+ if (!is_atomic_load_store(insn->imm) &&
+ !cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
return false;
}
return true;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2acf9b336371..4a20a125eb46 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -51,6 +51,19 @@
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
+#define BPF_ATOMIC_LOAD 0x10
+#define BPF_ATOMIC_STORE 0x20
+#define BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0)
+
+#define BPF_RELAXED 0x00
+#define BPF_ACQUIRE 0x01
+#define BPF_RELEASE 0x02
+#define BPF_ACQ_REL 0x03
+#define BPF_SEQ_CST 0x04
+
+#define BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE) /* load-acquire */
+#define BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE) /* store-release */
+
enum bpf_cond_pseudo_jmp {
BPF_MAY_GOTO = 0,
};
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index da729cbbaeb9..ab082ab9d535 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1663,14 +1663,17 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
INSN_3(JMP, JSET, K), \
INSN_2(JMP, JA), \
INSN_2(JMP32, JA), \
+ /* Atomic operations. */ \
+ INSN_3(STX, ATOMIC, B), \
+ INSN_3(STX, ATOMIC, H), \
+ INSN_3(STX, ATOMIC, W), \
+ INSN_3(STX, ATOMIC, DW), \
/* Store instructions. */ \
/* Register based. */ \
INSN_3(STX, MEM, B), \
INSN_3(STX, MEM, H), \
INSN_3(STX, MEM, W), \
INSN_3(STX, MEM, DW), \
- INSN_3(STX, ATOMIC, W), \
- INSN_3(STX, ATOMIC, DW), \
/* Immediate based. */ \
INSN_3(ST, MEM, B), \
INSN_3(ST, MEM, H), \
@@ -2169,6 +2172,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
STX_ATOMIC_DW:
STX_ATOMIC_W:
+ STX_ATOMIC_H:
+ STX_ATOMIC_B:
switch (IMM) {
ATOMIC_ALU_OP(BPF_ADD, add)
ATOMIC_ALU_OP(BPF_AND, and)
@@ -2196,6 +2201,38 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
(atomic64_t *)(unsigned long) (DST + insn->off),
(u64) BPF_R0, (u64) SRC);
break;
+ case BPF_LOAD_ACQ:
+ switch (BPF_SIZE(insn->code)) {
+#define LOAD_ACQUIRE(SIZEOP, SIZE) \
+ case BPF_##SIZEOP: \
+ DST = (SIZE)smp_load_acquire( \
+ (SIZE *)(unsigned long)(SRC + insn->off)); \
+ break;
+ LOAD_ACQUIRE(B, u8)
+ LOAD_ACQUIRE(H, u16)
+ LOAD_ACQUIRE(W, u32)
+ LOAD_ACQUIRE(DW, u64)
+#undef LOAD_ACQUIRE
+ default:
+ goto default_label;
+ }
+ break;
+ case BPF_STORE_REL:
+ switch (BPF_SIZE(insn->code)) {
+#define STORE_RELEASE(SIZEOP, SIZE) \
+ case BPF_##SIZEOP: \
+ smp_store_release( \
+ (SIZE *)(unsigned long)(DST + insn->off), (SIZE)SRC); \
+ break;
+ STORE_RELEASE(B, u8)
+ STORE_RELEASE(H, u16)
+ STORE_RELEASE(W, u32)
+ STORE_RELEASE(DW, u64)
+#undef STORE_RELEASE
+ default:
+ goto default_label;
+ }
+ break;
default:
goto default_label;
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 309c4aa1b026..2a354a44f209 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -267,6 +267,20 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->dst_reg, insn->off, insn->src_reg);
+ } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
+ insn->imm == BPF_LOAD_ACQ) {
+ verbose(cbs->private_data, "(%02x) %s%d = load_acquire((%s *)(r%d %+d))\n",
+ insn->code,
+ BPF_SIZE(insn->code) == BPF_DW ? "r" : "w", insn->dst_reg,
+ bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+ insn->src_reg, insn->off);
+ } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
+ insn->imm == BPF_STORE_REL) {
+ verbose(cbs->private_data, "(%02x) store_release((%s *)(r%d %+d), %s%d)\n",
+ insn->code,
+ bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+ insn->dst_reg, insn->off,
+ BPF_SIZE(insn->code) == BPF_DW ? "r" : "w", insn->src_reg);
} else {
verbose(cbs->private_data, "BUG_%02x\n", insn->code);
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fa40a0440590..dc3ecc925b97 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3480,7 +3480,7 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
if (class == BPF_STX) {
- /* BPF_STX (including atomic variants) has multiple source
+ /* BPF_STX (including atomic variants) has one or more source
* operands, one of which is a ptr. Check whether the caller is
* asking about it.
*/
@@ -7550,6 +7550,8 @@ static int check_load(struct bpf_verifier_env *env, struct bpf_insn *insn, const
static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
{
+ const int bpf_size = BPF_SIZE(insn->code);
+ bool write_only = false;
int load_reg;
int err;
@@ -7564,17 +7566,21 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
case BPF_XOR | BPF_FETCH:
case BPF_XCHG:
case BPF_CMPXCHG:
+ if (bpf_size != BPF_W && bpf_size != BPF_DW) {
+ verbose(env, "invalid atomic operand size\n");
+ return -EINVAL;
+ }
+ break;
+ case BPF_LOAD_ACQ:
+ return check_load(env, insn, "atomic");
+ case BPF_STORE_REL:
+ write_only = true;
break;
default:
verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
return -EINVAL;
}
- if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
- verbose(env, "invalid atomic operand size\n");
- return -EINVAL;
- }
-
/* check src1 operand */
err = check_reg_arg(env, insn->src_reg, SRC_OP);
if (err)
@@ -7615,6 +7621,9 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
return -EACCES;
}
+ if (write_only)
+ goto skip_read_check;
+
if (insn->imm & BPF_FETCH) {
if (insn->imm == BPF_CMPXCHG)
load_reg = BPF_REG_0;
@@ -7636,14 +7645,15 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
* case to simulate the register fill.
*/
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
- BPF_SIZE(insn->code), BPF_READ, -1, true, false);
+ bpf_size, BPF_READ, -1, true, false);
if (!err && load_reg >= 0)
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
- BPF_SIZE(insn->code), BPF_READ, load_reg,
- true, false);
+ bpf_size, BPF_READ, load_reg, true,
+ false);
if (err)
return err;
+skip_read_check:
if (is_arena_reg(env, insn->dst_reg)) {
err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
if (err)
@@ -20320,7 +20330,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
type = BPF_WRITE;
- } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
+ } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) ||
+ insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) ||
+ insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2acf9b336371..4a20a125eb46 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -51,6 +51,19 @@
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
+#define BPF_ATOMIC_LOAD 0x10
+#define BPF_ATOMIC_STORE 0x20
+#define BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0)
+
+#define BPF_RELAXED 0x00
+#define BPF_ACQUIRE 0x01
+#define BPF_RELEASE 0x02
+#define BPF_ACQ_REL 0x03
+#define BPF_SEQ_CST 0x04
+
+#define BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE) /* load-acquire */
+#define BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE) /* store-release */
+
enum bpf_cond_pseudo_jmp {
BPF_MAY_GOTO = 0,
};
--
2.47.1.613.gc27f4b7a9f-goog
On Sat, 2024-12-21 at 01:25 +0000, Peilin Ye wrote:
> Introduce BPF instructions with load-acquire and store-release
> semantics, as discussed in [1]. The following new flags are defined:
The '[1]' link is missing.
> BPF_ATOMIC_LOAD 0x10
> BPF_ATOMIC_STORE 0x20
> BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0)
>
> BPF_RELAXED 0x0
> BPF_ACQUIRE 0x1
> BPF_RELEASE 0x2
> BPF_ACQ_REL 0x3
> BPF_SEQ_CST 0x4
>
> BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE)
> BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE)
>
> A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm'
> field set to BPF_LOAD_ACQ (0x11).
>
> Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with
> the 'imm' field set to BPF_STORE_REL (0x22).
[...]
> diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
> index 309c4aa1b026..2a354a44f209 100644
> --- a/kernel/bpf/disasm.c
> +++ b/kernel/bpf/disasm.c
> @@ -267,6 +267,20 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
> BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
> bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
> insn->dst_reg, insn->off, insn->src_reg);
> + } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
> + insn->imm == BPF_LOAD_ACQ) {
> + verbose(cbs->private_data, "(%02x) %s%d = load_acquire((%s *)(r%d %+d))\n",
> + insn->code,
> + BPF_SIZE(insn->code) == BPF_DW ? "r" : "w", insn->dst_reg,
Nit: I think that 'BPF_DW ? "r" : "w"' part is not really necessary.
> + bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
> + insn->src_reg, insn->off);
> + } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
> + insn->imm == BPF_STORE_REL) {
> + verbose(cbs->private_data, "(%02x) store_release((%s *)(r%d %+d), %s%d)\n",
> + insn->code,
> + bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
> + insn->dst_reg, insn->off,
> + BPF_SIZE(insn->code) == BPF_DW ? "r" : "w", insn->src_reg);
> } else {
> verbose(cbs->private_data, "BUG_%02x\n", insn->code);
> }
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index fa40a0440590..dc3ecc925b97 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -3480,7 +3480,7 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
> }
>
> if (class == BPF_STX) {
> - /* BPF_STX (including atomic variants) has multiple source
> + /* BPF_STX (including atomic variants) has one or more source
> * operands, one of which is a ptr. Check whether the caller is
> * asking about it.
> */
> @@ -7550,6 +7550,8 @@ static int check_load(struct bpf_verifier_env *env, struct bpf_insn *insn, const
>
> static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
> {
> + const int bpf_size = BPF_SIZE(insn->code);
> + bool write_only = false;
> int load_reg;
> int err;
>
> @@ -7564,17 +7566,21 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
> case BPF_XOR | BPF_FETCH:
> case BPF_XCHG:
> case BPF_CMPXCHG:
> + if (bpf_size != BPF_W && bpf_size != BPF_DW) {
> + verbose(env, "invalid atomic operand size\n");
> + return -EINVAL;
> + }
> + break;
> + case BPF_LOAD_ACQ:
Several notes here:
- This skips the 'bpf_jit_supports_insn()' call at the end of the function.
- Also 'check_load()' allows source register to be PTR_TO_CTX,
but convert_ctx_access() is not adjusted to handle these atomic instructions.
(Just in case: context access is special, context structures are not "real",
e.g. during runtime real sk_buff is passed to the program, not __sk_buff,
in convert_ctx_access() verifier adjusts offsets of load and store instructions
to point to real fields, this is done per program type, e.g. see
filter.c:bpf_convert_ctx_access);
- backtrack_insn() needs special rules to handle BPF_LOAD_ACQ same way
it handles loads.
> + return check_load(env, insn, "atomic");
> + case BPF_STORE_REL:
> + write_only = true;
> break;
> default:
> verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
> return -EINVAL;
> }
>
> - if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
> - verbose(env, "invalid atomic operand size\n");
> - return -EINVAL;
> - }
> -
> /* check src1 operand */
> err = check_reg_arg(env, insn->src_reg, SRC_OP);
> if (err)
Note: this code fragment looks as follows:
/* check src1 operand */
err = check_reg_arg(env, insn->src_reg, SRC_OP);
if (err)
return err;
/* check src2 operand */
err = check_reg_arg(env, insn->dst_reg, SRC_OP);
if (err)
return err;
And there is no need for 'check_reg_arg(env, insn->dst_reg, SRC_OP)'
for BPF_STORE_REL.
> @@ -7615,6 +7621,9 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
> return -EACCES;
> }
>
> + if (write_only)
> + goto skip_read_check;
> +
> if (insn->imm & BPF_FETCH) {
> if (insn->imm == BPF_CMPXCHG)
> load_reg = BPF_REG_0;
> @@ -7636,14 +7645,15 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
> * case to simulate the register fill.
> */
> err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
> - BPF_SIZE(insn->code), BPF_READ, -1, true, false);
> + bpf_size, BPF_READ, -1, true, false);
> if (!err && load_reg >= 0)
> err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
> - BPF_SIZE(insn->code), BPF_READ, load_reg,
> - true, false);
> + bpf_size, BPF_READ, load_reg, true,
> + false);
> if (err)
> return err;
>
> +skip_read_check:
> if (is_arena_reg(env, insn->dst_reg)) {
> err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
> if (err)
[...]
Hi Eduard,
Thanks for the review!
On Fri, Jan 03, 2025 at 04:12:08PM -0800, Eduard Zingerman wrote:
> On Sat, 2024-12-21 at 01:25 +0000, Peilin Ye wrote:
> > Introduce BPF instructions with load-acquire and store-release
> > semantics, as discussed in [1]. The following new flags are defined:
>
> The '[1]' link is missing.
Oops, thanks.
[1] https://lore.kernel.org/all/20240729183246.4110549-1-yepeilin@google.com/
> > --- a/kernel/bpf/disasm.c
> > +++ b/kernel/bpf/disasm.c
> > @@ -267,6 +267,20 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
> > BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
> > bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
> > insn->dst_reg, insn->off, insn->src_reg);
> > + } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
> > + insn->imm == BPF_LOAD_ACQ) {
> > + verbose(cbs->private_data, "(%02x) %s%d = load_acquire((%s *)(r%d %+d))\n",
> > + insn->code,
> > + BPF_SIZE(insn->code) == BPF_DW ? "r" : "w", insn->dst_reg,
>
> Nit: I think that 'BPF_DW ? "r" : "w"' part is not really necessary.
We already do that in other places in the file, so I wanted to keep it
consistent:
$ git grep "? 'w' : 'r'" kernel/bpf/disasm.c | wc -l
8
(Though I just realized that I could've used '%c' instead of '%s'.)
> > static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
> > {
> > + const int bpf_size = BPF_SIZE(insn->code);
> > + bool write_only = false;
> > int load_reg;
> > int err;
> >
> > @@ -7564,17 +7566,21 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
> > case BPF_XOR | BPF_FETCH:
> > case BPF_XCHG:
> > case BPF_CMPXCHG:
> > + if (bpf_size != BPF_W && bpf_size != BPF_DW) {
> > + verbose(env, "invalid atomic operand size\n");
> > + return -EINVAL;
> > + }
> > + break;
> > + case BPF_LOAD_ACQ:
>
> Several notes here:
> - This skips the 'bpf_jit_supports_insn()' call at the end of the function.
> - Also 'check_load()' allows source register to be PTR_TO_CTX,
> but convert_ctx_access() is not adjusted to handle these atomic instructions.
> (Just in case: context access is special, context structures are not "real",
> e.g. during runtime real sk_buff is passed to the program, not __sk_buff,
> in convert_ctx_access() verifier adjusts offsets of load and store instructions
> to point to real fields, this is done per program type, e.g. see
> filter.c:bpf_convert_ctx_access);
I see, thanks for pointing these out! I'll add logic to handle
BPF_LOAD_ACQ in check_atomic() directly, instead of introducing
check_load(). I'll disallow using BPF_LOAD_ACQ with src_reg being
PTR_TO_CTX (just like all existing BPF_ATOMIC instructions), as we don't
think there'll be a use case for it.
> - backtrack_insn() needs special rules to handle BPF_LOAD_ACQ same way
> it handles loads.
Got it, I'll read backtrack_insn().
> > + return check_load(env, insn, "atomic");
> > + case BPF_STORE_REL:
> > + write_only = true;
> > break;
> > default:
> > verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
> > return -EINVAL;
> > }
> >
> > - if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
> > - verbose(env, "invalid atomic operand size\n");
> > - return -EINVAL;
> > - }
> > -
> > /* check src1 operand */
> > err = check_reg_arg(env, insn->src_reg, SRC_OP);
> > if (err)
>
> Note: this code fragment looks as follows:
>
> /* check src1 operand */
> err = check_reg_arg(env, insn->src_reg, SRC_OP);
> if (err)
> return err;
>
> /* check src2 operand */
> err = check_reg_arg(env, insn->dst_reg, SRC_OP);
> if (err)
> return err;
>
> And there is no need for 'check_reg_arg(env, insn->dst_reg, SRC_OP)'
> for BPF_STORE_REL.
Why is that? IIUC, 'check_reg_arg(..., SRC_OP)' checks if we can read
the register, instead of the memory? For example, doing
'check_reg_arg(env, insn->dst_reg, SRC_OP)' prevents BPF_STORE_REL from
using an uninitialized dst_reg.
We also do this check for BPF_ST in do_check():
} else if (class == BPF_ST) {
enum bpf_reg_type dst_reg_type;
<...>
/* check src operand */
err = check_reg_arg(env, insn->dst_reg, SRC_OP);
if (err)
return err;
Thanks,
Peilin Ye
On Tue, 2025-01-07 at 01:08 +0000, Peilin Ye wrote:
Hi Peilin,
[...]
> > > --- a/kernel/bpf/disasm.c
> > > +++ b/kernel/bpf/disasm.c
> > > @@ -267,6 +267,20 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
> > > BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
> > > bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
> > > insn->dst_reg, insn->off, insn->src_reg);
> > > + } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
> > > + insn->imm == BPF_LOAD_ACQ) {
> > > + verbose(cbs->private_data, "(%02x) %s%d = load_acquire((%s *)(r%d %+d))\n",
> > > + insn->code,
> > > + BPF_SIZE(insn->code) == BPF_DW ? "r" : "w", insn->dst_reg,
> >
> > Nit: I think that 'BPF_DW ? "r" : "w"' part is not really necessary.
>
> We already do that in other places in the file, so I wanted to keep it
> consistent:
>
> $ git grep "? 'w' : 'r'" kernel/bpf/disasm.c | wc -l
> 8
>
> (Though I just realized that I could've used '%c' instead of '%s'.)
These are used for operations that can have either BPF_ALU or
BPF_ALU32 class. I don't think there is such distinction for
BPF_LOAD_ACQ / BPF_STORE_REL.
> > > static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
> > > {
> > > + const int bpf_size = BPF_SIZE(insn->code);
> > > + bool write_only = false;
> > > int load_reg;
> > > int err;
> > >
> > > @@ -7564,17 +7566,21 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
> > > case BPF_XOR | BPF_FETCH:
> > > case BPF_XCHG:
> > > case BPF_CMPXCHG:
> > > + if (bpf_size != BPF_W && bpf_size != BPF_DW) {
> > > + verbose(env, "invalid atomic operand size\n");
> > > + return -EINVAL;
> > > + }
> > > + break;
> > > + case BPF_LOAD_ACQ:
> >
> > Several notes here:
> > - This skips the 'bpf_jit_supports_insn()' call at the end of the function.
> > - Also 'check_load()' allows source register to be PTR_TO_CTX,
> > but convert_ctx_access() is not adjusted to handle these atomic instructions.
> > (Just in case: context access is special, context structures are not "real",
> > e.g. during runtime real sk_buff is passed to the program, not __sk_buff,
> > in convert_ctx_access() verifier adjusts offsets of load and store instructions
> > to point to real fields, this is done per program type, e.g. see
> > filter.c:bpf_convert_ctx_access);
>
> I see, thanks for pointing these out! I'll add logic to handle
> BPF_LOAD_ACQ in check_atomic() directly, instead of introducing
> check_load(). I'll disallow using BPF_LOAD_ACQ with src_reg being
> PTR_TO_CTX (just like all existing BPF_ATOMIC instructions), as we don't
> think there'll be a use case for it.
(Just in case: the full list of types currently disallowed for atomics is:
is_ctx_reg, is_pkt_reg, is_flow_key_reg, is_sk_reg, is_arena_reg,
see slightly below in the same function).
[...]
> > And there is no need for 'check_reg_arg(env, insn->dst_reg, SRC_OP)'
> > for BPF_STORE_REL.
>
> Why is that? IIUC, 'check_reg_arg(..., SRC_OP)' checks if we can read
> the register, instead of the memory? For example, doing
> 'check_reg_arg(env, insn->dst_reg, SRC_OP)' prevents BPF_STORE_REL from
> using an uninitialized dst_reg.
>
> We also do this check for BPF_ST in do_check():
>
> } else if (class == BPF_ST) {
> enum bpf_reg_type dst_reg_type;
> <...>
> /* check src operand */
> err = check_reg_arg(env, insn->dst_reg, SRC_OP);
> if (err)
> return err;
Sorry, my bad, the 'check_reg_arg(env, insn->dst_reg, SRC_OP)'
is necessary and is done for BPF_STX as well.
Thanks,
Eduard
On Mon, Jan 06, 2025 at 05:20:56PM -0800, Eduard Zingerman wrote:
> > > > --- a/kernel/bpf/disasm.c
> > > > +++ b/kernel/bpf/disasm.c
> > > > @@ -267,6 +267,20 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
> > > > BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
> > > > bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
> > > > insn->dst_reg, insn->off, insn->src_reg);
> > > > + } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
> > > > + insn->imm == BPF_LOAD_ACQ) {
> > > > + verbose(cbs->private_data, "(%02x) %s%d = load_acquire((%s *)(r%d %+d))\n",
> > > > + insn->code,
> > > > + BPF_SIZE(insn->code) == BPF_DW ? "r" : "w", insn->dst_reg,
> > >
> > > Nit: I think that 'BPF_DW ? "r" : "w"' part is not really necessary.
> >
> > We already do that in other places in the file, so I wanted to keep it
> > consistent:
> >
> > $ git grep "? 'w' : 'r'" kernel/bpf/disasm.c | wc -l
> > 8
> >
> > (Though I just realized that I could've used '%c' instead of '%s'.)
>
> These are used for operations that can have either BPF_ALU or
> BPF_ALU32 class. I don't think there is such distinction for
> BPF_LOAD_ACQ / BPF_STORE_REL.
I see; I just realized that the same instruction can be disassembled
differently by llvm-objdump, depending on --mcpu= version. For example:
63 21 00 00 00 00 00 00
opcode (0x63): BPF_MEM | BPF_W | BPF_STX
--mcpu=v3: *(u32 *)(r1 + 0x0) = w2
--mcpu=v2 (NoALU32): *(u32 *)(r1 + 0x0) = r2
^^
So from kernel's perspective, it doesn't really matter if it's 'r' or
'w', if the encoding is the same. I'll remove the 'BPF_DW ? "r" : "w"'
part and make it always use 'r'.
> > > > static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
> > > > {
> > > > + const int bpf_size = BPF_SIZE(insn->code);
> > > > + bool write_only = false;
> > > > int load_reg;
> > > > int err;
> > > >
> > > > @@ -7564,17 +7566,21 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
> > > > case BPF_XOR | BPF_FETCH:
> > > > case BPF_XCHG:
> > > > case BPF_CMPXCHG:
> > > > + if (bpf_size != BPF_W && bpf_size != BPF_DW) {
> > > > + verbose(env, "invalid atomic operand size\n");
> > > > + return -EINVAL;
> > > > + }
> > > > + break;
> > > > + case BPF_LOAD_ACQ:
> > >
> > > Several notes here:
> > > - This skips the 'bpf_jit_supports_insn()' call at the end of the function.
> > > - Also 'check_load()' allows source register to be PTR_TO_CTX,
> > > but convert_ctx_access() is not adjusted to handle these atomic instructions.
> > > (Just in case: context access is special, context structures are not "real",
> > > e.g. during runtime real sk_buff is passed to the program, not __sk_buff,
> > > in convert_ctx_access() verifier adjusts offsets of load and store instructions
> > > to point to real fields, this is done per program type, e.g. see
> > > filter.c:bpf_convert_ctx_access);
> >
> > I see, thanks for pointing these out! I'll add logic to handle
> > BPF_LOAD_ACQ in check_atomic() directly, instead of introducing
> > check_load(). I'll disallow using BPF_LOAD_ACQ with src_reg being
> > PTR_TO_CTX (just like all existing BPF_ATOMIC instructions), as we don't
> > think there'll be a use case for it.
>
> (Just in case: the full list of types currently disallowed for atomics is:
> is_ctx_reg, is_pkt_reg, is_flow_key_reg, is_sk_reg,
> is_arena_reg,
...if !bpf_jit_supports_insn(), right.
> see slightly below in the same function).
Thanks,
Peilin Ye
On 12/21/2024 9:25 AM, Peilin Ye wrote:
> Introduce BPF instructions with load-acquire and store-release
> semantics, as discussed in [1]. The following new flags are defined:
>
> BPF_ATOMIC_LOAD 0x10
> BPF_ATOMIC_STORE 0x20
> BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0)
>
> BPF_RELAXED 0x0
> BPF_ACQUIRE 0x1
> BPF_RELEASE 0x2
> BPF_ACQ_REL 0x3
> BPF_SEQ_CST 0x4
>
> BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE)
> BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE)
>
> A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm'
> field set to BPF_LOAD_ACQ (0x11).
>
> Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with
> the 'imm' field set to BPF_STORE_REL (0x22).
>
> Unlike existing atomic operations that only support BPF_W (32-bit) and
> BPF_DW (64-bit) size modifiers, load-acquires and store-releases also
> support BPF_B (8-bit) and BPF_H (16-bit). An 8- or 16-bit load-acquire
> zero-extends the value before writing it to a 32-bit register, just like
> ARM64 instruction LDARH and friends.
>
> As an example, consider the following 64-bit load-acquire BPF
> instruction (assuming little-endian from now on):
>
> db 10 00 00 11 00 00 00 r0 = load_acquire((u64 *)(r1 + 0x0))
>
> opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX
> imm (0x00000011): BPF_LOAD_ACQ
>
> For ARM64, an LDAR instruction will be generated by the JIT compiler for
> the above:
>
> ldar x7, [x0]
>
> Similarly, a 16-bit BPF store-release:
>
> cb 21 00 00 22 00 00 00 store_release((u16 *)(r1 + 0x0), w2)
>
> opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX
> imm (0x00000022): BPF_STORE_REL
>
> An STLRH will be generated for it:
>
> stlrh w1, [x0]
>
> For a complete mapping for ARM64:
>
> load-acquire 8-bit LDARB
> (BPF_LOAD_ACQ) 16-bit LDARH
> 32-bit LDAR (32-bit)
> 64-bit LDAR (64-bit)
> store-release 8-bit STLRB
> (BPF_STORE_REL) 16-bit STLRH
> 32-bit STLR (32-bit)
> 64-bit STLR (64-bit)
>
> Reviewed-by: Josh Don <joshdon@google.com>
> Reviewed-by: Barret Rhoden <brho@google.com>
> Signed-off-by: Peilin Ye <yepeilin@google.com>
> ---
> arch/arm64/include/asm/insn.h | 8 ++++
> arch/arm64/lib/insn.c | 34 ++++++++++++++
> arch/arm64/net/bpf_jit.h | 20 ++++++++
> arch/arm64/net/bpf_jit_comp.c | 85 +++++++++++++++++++++++++++++++++-
> include/uapi/linux/bpf.h | 13 ++++++
> kernel/bpf/core.c | 41 +++++++++++++++-
> kernel/bpf/disasm.c | 14 ++++++
> kernel/bpf/verifier.c | 32 +++++++++----
> tools/include/uapi/linux/bpf.h | 13 ++++++
> 9 files changed, 246 insertions(+), 14 deletions(-)
>
> diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
> index e390c432f546..bbfdbe570ff6 100644
> --- a/arch/arm64/include/asm/insn.h
> +++ b/arch/arm64/include/asm/insn.h
> @@ -188,8 +188,10 @@ enum aarch64_insn_ldst_type {
> AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX,
> AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
> AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX,
> + AARCH64_INSN_LDST_LOAD_ACQ,
> AARCH64_INSN_LDST_LOAD_EX,
> AARCH64_INSN_LDST_LOAD_ACQ_EX,
> + AARCH64_INSN_LDST_STORE_REL,
> AARCH64_INSN_LDST_STORE_EX,
> AARCH64_INSN_LDST_STORE_REL_EX,
> AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET,
> @@ -351,6 +353,8 @@ __AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000)
> __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
> __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
> __AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000)
> +__AARCH64_INSN_FUNCS(load_acq, 0x3FC08000, 0x08C08000)
> +__AARCH64_INSN_FUNCS(store_rel, 0x3FC08000, 0x08808000)
I checked Arm Architecture Reference Manual [1].
Section C6.2.{168,169,170,371,372,373} state that field Rt2 (bits 10-14) and
Rs (bits 16-20) for LDARB/LDARH/LDAR/STLRB/STLRH and no offset type STLR
instructions are fixed to (1).
Section C2.2.2 explains that (1) means a Should-Be-One (SBO) bit.
And the Glossary section says "Arm strongly recommends that software writes
the field as all 1s. If software writes a value that is not all 1s, it must
expect an UNPREDICTABLE or CONSTRAINED UNPREDICTABLE result."
Although the pre-index type of STLR is an excetpion, it is not used in this
series. Therefore, both bits 10-14 and 16-20 in mask and value should be set
to 1s.
[1] https://developer.arm.com/documentation/ddi0487/latest/
> __AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000)
> __AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000)
> __AARCH64_INSN_FUNCS(mops, 0x3B200C00, 0x19000400)
> @@ -602,6 +606,10 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
> int offset,
> enum aarch64_insn_variant variant,
> enum aarch64_insn_ldst_type type);
> +u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg,
> + enum aarch64_insn_register base,
> + enum aarch64_insn_size_type size,
> + enum aarch64_insn_ldst_type type);
> u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
> enum aarch64_insn_register base,
> enum aarch64_insn_register state,
> diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c
> index b008a9b46a7f..80e5b191d96a 100644
> --- a/arch/arm64/lib/insn.c
> +++ b/arch/arm64/lib/insn.c
> @@ -540,6 +540,40 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
> offset >> shift);
> }
>
> +u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg,
> + enum aarch64_insn_register base,
> + enum aarch64_insn_size_type size,
> + enum aarch64_insn_ldst_type type)
> +{
> + u32 insn;
> +
> + switch (type) {
> + case AARCH64_INSN_LDST_LOAD_ACQ:
> + insn = aarch64_insn_get_load_acq_value();
> + break;
> + case AARCH64_INSN_LDST_STORE_REL:
> + insn = aarch64_insn_get_store_rel_value();
> + break;
> + default:
> + pr_err("%s: unknown load-acquire/store-release encoding %d\n", __func__, type);
> + return AARCH64_BREAK_FAULT;
> + }
> +
> + insn = aarch64_insn_encode_ldst_size(size, insn);
> +
> + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
> + reg);
> +
> + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
> + base);
> +
> + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT2, insn,
> + AARCH64_INSN_REG_ZR);
> +
> + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
> + AARCH64_INSN_REG_ZR);
As explained above, RS and RT2 fields should be fixed to 1s.
> +}
> +
> u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
> enum aarch64_insn_register base,
> enum aarch64_insn_register state,
> diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
> index b22ab2f97a30..a3b0e693a125 100644
> --- a/arch/arm64/net/bpf_jit.h
> +++ b/arch/arm64/net/bpf_jit.h
> @@ -119,6 +119,26 @@
> aarch64_insn_gen_load_store_ex(Rt, Rn, Rs, A64_SIZE(sf), \
> AARCH64_INSN_LDST_STORE_REL_EX)
>
> +/* Load-acquire & store-release */
> +#define A64_LDAR(Rt, Rn, size) \
> + aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \
> + AARCH64_INSN_LDST_LOAD_ACQ)
> +#define A64_STLR(Rt, Rn, size) \
> + aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \
> + AARCH64_INSN_LDST_STORE_REL)
> +
> +/* Rt = [Rn] (load acquire) */
> +#define A64_LDARB(Wt, Xn) A64_LDAR(Wt, Xn, 8)
> +#define A64_LDARH(Wt, Xn) A64_LDAR(Wt, Xn, 16)
> +#define A64_LDAR32(Wt, Xn) A64_LDAR(Wt, Xn, 32)
> +#define A64_LDAR64(Xt, Xn) A64_LDAR(Xt, Xn, 64)
> +
> +/* [Rn] = Rt (store release) */
> +#define A64_STLRB(Wt, Xn) A64_STLR(Wt, Xn, 8)
> +#define A64_STLRH(Wt, Xn) A64_STLR(Wt, Xn, 16)
> +#define A64_STLR32(Wt, Xn) A64_STLR(Wt, Xn, 32)
> +#define A64_STLR64(Xt, Xn) A64_STLR(Xt, Xn, 64)
> +
> /*
> * LSE atomics
> *
> diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
> index 66708b95493a..15fc0f391f14 100644
> --- a/arch/arm64/net/bpf_jit_comp.c
> +++ b/arch/arm64/net/bpf_jit_comp.c
> @@ -634,6 +634,80 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
> return 0;
> }
>
> +static inline bool is_atomic_load_store(const s32 imm)
> +{
> + const s32 type = BPF_ATOMIC_TYPE(imm);
> +
> + return type == BPF_ATOMIC_LOAD || type == BPF_ATOMIC_STORE;
> +}
> +
> +static int emit_atomic_load_store(const struct bpf_insn *insn, struct jit_ctx *ctx)
> +{
> + const s16 off = insn->off;
> + const u8 code = insn->code;
> + const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC;
> + const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
> + const u8 dst = bpf2a64[insn->dst_reg];
> + const u8 src = bpf2a64[insn->src_reg];
> + const u8 tmp = bpf2a64[TMP_REG_1];
> + u8 ptr;
> +
> + if (BPF_ATOMIC_TYPE(insn->imm) == BPF_ATOMIC_LOAD)
> + ptr = src;
> + else
> + ptr = dst;
> +
> + if (off) {
> + emit_a64_mov_i(true, tmp, off, ctx);
> + emit(A64_ADD(true, tmp, tmp, ptr), ctx);
The mov and add instructions can be optimized to a single A64_ADD_I
if is_addsub_imm(off) is true.
> + ptr = tmp;
> + }
> + if (arena) {
> + emit(A64_ADD(true, tmp, ptr, arena_vm_base), ctx);
> + ptr = tmp;
> + }
> +
> + switch (insn->imm) {
> + case BPF_LOAD_ACQ:
> + switch (BPF_SIZE(code)) {
> + case BPF_B:
> + emit(A64_LDARB(dst, ptr), ctx);
> + break;
> + case BPF_H:
> + emit(A64_LDARH(dst, ptr), ctx);
> + break;
> + case BPF_W:
> + emit(A64_LDAR32(dst, ptr), ctx);
> + break;
> + case BPF_DW:
> + emit(A64_LDAR64(dst, ptr), ctx);
> + break;
> + }
> + break;
> + case BPF_STORE_REL:
> + switch (BPF_SIZE(code)) {
> + case BPF_B:
> + emit(A64_STLRB(src, ptr), ctx);
> + break;
> + case BPF_H:
> + emit(A64_STLRH(src, ptr), ctx);
> + break;
> + case BPF_W:
> + emit(A64_STLR32(src, ptr), ctx);
> + break;
> + case BPF_DW:
> + emit(A64_STLR64(src, ptr), ctx);
> + break;
> + }
> + break;
> + default:
> + pr_err_once("unknown atomic load/store op code %02x\n", insn->imm);
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> #ifdef CONFIG_ARM64_LSE_ATOMICS
> static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
> {
> @@ -1641,11 +1715,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
> return ret;
> break;
>
> + case BPF_STX | BPF_ATOMIC | BPF_B:
> + case BPF_STX | BPF_ATOMIC | BPF_H:
> case BPF_STX | BPF_ATOMIC | BPF_W:
> case BPF_STX | BPF_ATOMIC | BPF_DW:
> + case BPF_STX | BPF_PROBE_ATOMIC | BPF_B:
> + case BPF_STX | BPF_PROBE_ATOMIC | BPF_H:
> case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
> case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
> - if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
> + if (is_atomic_load_store(insn->imm))
> + ret = emit_atomic_load_store(insn, ctx);
> + else if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
> ret = emit_lse_atomic(insn, ctx);
> else
> ret = emit_ll_sc_atomic(insn, ctx);
> @@ -2669,7 +2749,8 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
> switch (insn->code) {
> case BPF_STX | BPF_ATOMIC | BPF_W:
> case BPF_STX | BPF_ATOMIC | BPF_DW:
> - if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
> + if (!is_atomic_load_store(insn->imm) &&
> + !cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
> return false;
> }
> return true;
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 2acf9b336371..4a20a125eb46 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -51,6 +51,19 @@
> #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
> #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
>
> +#define BPF_ATOMIC_LOAD 0x10
> +#define BPF_ATOMIC_STORE 0x20
> +#define BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0)
> +
> +#define BPF_RELAXED 0x00
> +#define BPF_ACQUIRE 0x01
> +#define BPF_RELEASE 0x02
> +#define BPF_ACQ_REL 0x03
> +#define BPF_SEQ_CST 0x04
> +
> +#define BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE) /* load-acquire */
> +#define BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE) /* store-release */
> +
> enum bpf_cond_pseudo_jmp {
> BPF_MAY_GOTO = 0,
> };
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index da729cbbaeb9..ab082ab9d535 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -1663,14 +1663,17 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
> INSN_3(JMP, JSET, K), \
> INSN_2(JMP, JA), \
> INSN_2(JMP32, JA), \
> + /* Atomic operations. */ \
> + INSN_3(STX, ATOMIC, B), \
> + INSN_3(STX, ATOMIC, H), \
> + INSN_3(STX, ATOMIC, W), \
> + INSN_3(STX, ATOMIC, DW), \
> /* Store instructions. */ \
> /* Register based. */ \
> INSN_3(STX, MEM, B), \
> INSN_3(STX, MEM, H), \
> INSN_3(STX, MEM, W), \
> INSN_3(STX, MEM, DW), \
> - INSN_3(STX, ATOMIC, W), \
> - INSN_3(STX, ATOMIC, DW), \
> /* Immediate based. */ \
> INSN_3(ST, MEM, B), \
> INSN_3(ST, MEM, H), \
> @@ -2169,6 +2172,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
>
> STX_ATOMIC_DW:
> STX_ATOMIC_W:
> + STX_ATOMIC_H:
> + STX_ATOMIC_B:
> switch (IMM) {
> ATOMIC_ALU_OP(BPF_ADD, add)
> ATOMIC_ALU_OP(BPF_AND, and)
> @@ -2196,6 +2201,38 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
> (atomic64_t *)(unsigned long) (DST + insn->off),
> (u64) BPF_R0, (u64) SRC);
> break;
> + case BPF_LOAD_ACQ:
> + switch (BPF_SIZE(insn->code)) {
> +#define LOAD_ACQUIRE(SIZEOP, SIZE) \
> + case BPF_##SIZEOP: \
> + DST = (SIZE)smp_load_acquire( \
> + (SIZE *)(unsigned long)(SRC + insn->off)); \
> + break;
> + LOAD_ACQUIRE(B, u8)
> + LOAD_ACQUIRE(H, u16)
> + LOAD_ACQUIRE(W, u32)
> + LOAD_ACQUIRE(DW, u64)
> +#undef LOAD_ACQUIRE
> + default:
> + goto default_label;
> + }
> + break;
> + case BPF_STORE_REL:
> + switch (BPF_SIZE(insn->code)) {
> +#define STORE_RELEASE(SIZEOP, SIZE) \
> + case BPF_##SIZEOP: \
> + smp_store_release( \
> + (SIZE *)(unsigned long)(DST + insn->off), (SIZE)SRC); \
> + break;
> + STORE_RELEASE(B, u8)
> + STORE_RELEASE(H, u16)
> + STORE_RELEASE(W, u32)
> + STORE_RELEASE(DW, u64)
> +#undef STORE_RELEASE
> + default:
> + goto default_label;
> + }
> + break;
>
> default:
> goto default_label;
> diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
> index 309c4aa1b026..2a354a44f209 100644
> --- a/kernel/bpf/disasm.c
> +++ b/kernel/bpf/disasm.c
> @@ -267,6 +267,20 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
> BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
> bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
> insn->dst_reg, insn->off, insn->src_reg);
> + } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
> + insn->imm == BPF_LOAD_ACQ) {
> + verbose(cbs->private_data, "(%02x) %s%d = load_acquire((%s *)(r%d %+d))\n",
> + insn->code,
> + BPF_SIZE(insn->code) == BPF_DW ? "r" : "w", insn->dst_reg,
> + bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
> + insn->src_reg, insn->off);
> + } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
> + insn->imm == BPF_STORE_REL) {
> + verbose(cbs->private_data, "(%02x) store_release((%s *)(r%d %+d), %s%d)\n",
> + insn->code,
> + bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
> + insn->dst_reg, insn->off,
> + BPF_SIZE(insn->code) == BPF_DW ? "r" : "w", insn->src_reg);
> } else {
> verbose(cbs->private_data, "BUG_%02x\n", insn->code);
> }
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index fa40a0440590..dc3ecc925b97 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -3480,7 +3480,7 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
> }
>
> if (class == BPF_STX) {
> - /* BPF_STX (including atomic variants) has multiple source
> + /* BPF_STX (including atomic variants) has one or more source
> * operands, one of which is a ptr. Check whether the caller is
> * asking about it.
> */
> @@ -7550,6 +7550,8 @@ static int check_load(struct bpf_verifier_env *env, struct bpf_insn *insn, const
>
> static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
> {
> + const int bpf_size = BPF_SIZE(insn->code);
> + bool write_only = false;
> int load_reg;
> int err;
>
> @@ -7564,17 +7566,21 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
> case BPF_XOR | BPF_FETCH:
> case BPF_XCHG:
> case BPF_CMPXCHG:
> + if (bpf_size != BPF_W && bpf_size != BPF_DW) {
> + verbose(env, "invalid atomic operand size\n");
> + return -EINVAL;
> + }
> + break;
> + case BPF_LOAD_ACQ:
> + return check_load(env, insn, "atomic");
> + case BPF_STORE_REL:
> + write_only = true;
> break;
> default:
> verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
> return -EINVAL;
> }
>
> - if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
> - verbose(env, "invalid atomic operand size\n");
> - return -EINVAL;
> - }
> -
> /* check src1 operand */
> err = check_reg_arg(env, insn->src_reg, SRC_OP);
> if (err)
> @@ -7615,6 +7621,9 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
> return -EACCES;
> }
>
> + if (write_only)
> + goto skip_read_check;
> +
> if (insn->imm & BPF_FETCH) {
> if (insn->imm == BPF_CMPXCHG)
> load_reg = BPF_REG_0;
> @@ -7636,14 +7645,15 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
> * case to simulate the register fill.
> */
> err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
> - BPF_SIZE(insn->code), BPF_READ, -1, true, false);
> + bpf_size, BPF_READ, -1, true, false);
> if (!err && load_reg >= 0)
> err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
> - BPF_SIZE(insn->code), BPF_READ, load_reg,
> - true, false);
> + bpf_size, BPF_READ, load_reg, true,
> + false);
> if (err)
> return err;
>
> +skip_read_check:
> if (is_arena_reg(env, insn->dst_reg)) {
> err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
> if (err)
> @@ -20320,7 +20330,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
> insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
> insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
> type = BPF_WRITE;
> - } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
> + } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) ||
> + insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) ||
> + insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
> insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
> env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
> insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 2acf9b336371..4a20a125eb46 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -51,6 +51,19 @@
> #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
> #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
>
> +#define BPF_ATOMIC_LOAD 0x10
> +#define BPF_ATOMIC_STORE 0x20
> +#define BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0)
> +
> +#define BPF_RELAXED 0x00
> +#define BPF_ACQUIRE 0x01
> +#define BPF_RELEASE 0x02
> +#define BPF_ACQ_REL 0x03
> +#define BPF_SEQ_CST 0x04
> +
> +#define BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE) /* load-acquire */
> +#define BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE) /* store-release */
> +
> enum bpf_cond_pseudo_jmp {
> BPF_MAY_GOTO = 0,
> };
I think it's better to split the arm64 related changes into two separate
patches: one for adding the arm64 LDAR/STLR instruction encodings, and
the other for adding jit support.
Hi Xu,
Thanks for reviewing this!
On Tue, Dec 24, 2024 at 06:07:14PM +0800, Xu Kuohai wrote:
> On 12/21/2024 9:25 AM, Peilin Ye wrote:
> > +__AARCH64_INSN_FUNCS(load_acq, 0x3FC08000, 0x08C08000)
> > +__AARCH64_INSN_FUNCS(store_rel, 0x3FC08000, 0x08808000)
>
> I checked Arm Architecture Reference Manual [1].
>
> Section C6.2.{168,169,170,371,372,373} state that field Rt2 (bits 10-14) and
> Rs (bits 16-20) for LDARB/LDARH/LDAR/STLRB/STLRH and no offset type STLR
> instructions are fixed to (1).
>
> Section C2.2.2 explains that (1) means a Should-Be-One (SBO) bit.
>
> And the Glossary section says "Arm strongly recommends that software writes
> the field as all 1s. If software writes a value that is not all 1s, it must
> expect an UNPREDICTABLE or CONSTRAINED UNPREDICTABLE result."
>
> Although the pre-index type of STLR is an excetpion, it is not used in this
> series. Therefore, both bits 10-14 and 16-20 in mask and value should be set
> to 1s.
>
> [1] https://developer.arm.com/documentation/ddi0487/latest/
<...>
> > + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT2, insn,
> > + AARCH64_INSN_REG_ZR);
> > +
> > + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
> > + AARCH64_INSN_REG_ZR);
>
> As explained above, RS and RT2 fields should be fixed to 1s.
I'm already setting Rs and Rt2 to all 1's here, as AARCH64_INSN_REG_ZR
is defined as 31 (0b11111):
AARCH64_INSN_REG_ZR = 31,
Similar to how load- and store-exclusive instructions are handled
currently:
> > __AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000)
> > __AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000)
For example, in the manual, Rs is all (1)'s for LDXR{,B,H}, and Rt2 is
all (1)'s for both LDXR{,B,H} and STXR{,B,H}. However, neither Rs nor
Rt2 bits are in the mask, and (1) bits are set manually, see
aarch64_insn_gen_load_store_ex():
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT2, insn,
AARCH64_INSN_REG_ZR);
return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
state);
(For LDXR{,B,H}, 'state' is A64_ZR, which is just an alias to
AARCH64_INSN_REG_ZR (0b11111).)
- - -
On a related note, I simply grabbed {load,store}_ex's MASK and VALUE,
then set their 15th and 23rd bits to make them load-acquire and
store-release:
+__AARCH64_INSN_FUNCS(load_acq, 0x3FC08000, 0x08C08000)
+__AARCH64_INSN_FUNCS(store_rel, 0x3FC08000, 0x08808000)
__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000)
__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000)
My question is, should we extend {load,store}_ex's MASK to make them
contain BIT(15) and BIT(23) as well? As-is, aarch64_insn_is_load_ex()
would return true for a load-acquire.
The only user of aarch64_insn_is_load_ex() seems to be this
arm64-specific kprobe code in arch/arm64/kernel/probes/decode-insn.c:
#ifdef CONFIG_KPROBES
static bool __kprobes
is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end)
{
while (scan_start >= scan_end) {
/*
* atomic region starts from exclusive load and ends with
* exclusive store.
*/
if (aarch64_insn_is_store_ex(le32_to_cpu(*scan_start)))
return false;
else if (aarch64_insn_is_load_ex(le32_to_cpu(*scan_start)))
return true;
But I'm not sure yet if changing {load,store}_ex's MASK would affect the
above code. Do you happen to know the context?
> > + if (BPF_ATOMIC_TYPE(insn->imm) == BPF_ATOMIC_LOAD)
> > + ptr = src;
> > + else
> > + ptr = dst;
> > +
> > + if (off) {
> > + emit_a64_mov_i(true, tmp, off, ctx);
> > + emit(A64_ADD(true, tmp, tmp, ptr), ctx);
>
> The mov and add instructions can be optimized to a single A64_ADD_I
> if is_addsub_imm(off) is true.
Thanks! I'll try this.
> I think it's better to split the arm64 related changes into two separate
> patches: one for adding the arm64 LDAR/STLR instruction encodings, and
> the other for adding jit support.
Got it, in the next version I'll split this patch into (a) core/verifier
changes, (b) arm64 insn.{h,c} changes, and (c) arm64 JIT compiler
support.
Thanks,
Peilin Ye
On 12/27/2024 7:07 AM, Peilin Ye wrote:
> Hi Xu,
>
> Thanks for reviewing this!
>
> On Tue, Dec 24, 2024 at 06:07:14PM +0800, Xu Kuohai wrote:
>> On 12/21/2024 9:25 AM, Peilin Ye wrote:
>>> +__AARCH64_INSN_FUNCS(load_acq, 0x3FC08000, 0x08C08000)
>>> +__AARCH64_INSN_FUNCS(store_rel, 0x3FC08000, 0x08808000)
>>
>> I checked Arm Architecture Reference Manual [1].
>>
>> Section C6.2.{168,169,170,371,372,373} state that field Rt2 (bits 10-14) and
>> Rs (bits 16-20) for LDARB/LDARH/LDAR/STLRB/STLRH and no offset type STLR
>> instructions are fixed to (1).
>>
>> Section C2.2.2 explains that (1) means a Should-Be-One (SBO) bit.
>>
>> And the Glossary section says "Arm strongly recommends that software writes
>> the field as all 1s. If software writes a value that is not all 1s, it must
>> expect an UNPREDICTABLE or CONSTRAINED UNPREDICTABLE result."
>>
>> Although the pre-index type of STLR is an excetpion, it is not used in this
>> series. Therefore, both bits 10-14 and 16-20 in mask and value should be set
>> to 1s.
>>
>> [1] https://developer.arm.com/documentation/ddi0487/latest/
>
> <...>
>
>>> + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT2, insn,
>>> + AARCH64_INSN_REG_ZR);
>>> +
>>> + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
>>> + AARCH64_INSN_REG_ZR);
>>
>> As explained above, RS and RT2 fields should be fixed to 1s.
>
> I'm already setting Rs and Rt2 to all 1's here, as AARCH64_INSN_REG_ZR
> is defined as 31 (0b11111):
>
> AARCH64_INSN_REG_ZR = 31,
>
I see, but the setting of fixed bits is smomewhat of a waste of jit time.
> Similar to how load- and store-exclusive instructions are handled
> currently:
>
>>> __AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000)
>>> __AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000)
>
> For example, in the manual, Rs is all (1)'s for LDXR{,B,H}, and Rt2 is
> all (1)'s for both LDXR{,B,H} and STXR{,B,H}. However, neither Rs nor
> Rt2 bits are in the mask, and (1) bits are set manually, see
> aarch64_insn_gen_load_store_ex():
>
> insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT2, insn,
> AARCH64_INSN_REG_ZR);
>
> return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
> state);
>
> (For LDXR{,B,H}, 'state' is A64_ZR, which is just an alias to
> AARCH64_INSN_REG_ZR (0b11111).)
>
> - - -
>
> On a related note, I simply grabbed {load,store}_ex's MASK and VALUE,
> then set their 15th and 23rd bits to make them load-acquire and
> store-release:
>
> +__AARCH64_INSN_FUNCS(load_acq, 0x3FC08000, 0x08C08000)
> +__AARCH64_INSN_FUNCS(store_rel, 0x3FC08000, 0x08808000)
> __AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000)
> __AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000)
>
> My question is, should we extend {load,store}_ex's MASK to make them
> contain BIT(15) and BIT(23) as well? As-is, aarch64_insn_is_load_ex()
> would return true for a load-acquire.
>
> The only user of aarch64_insn_is_load_ex() seems to be this
> arm64-specific kprobe code in arch/arm64/kernel/probes/decode-insn.c:
>
> #ifdef CONFIG_KPROBES
> static bool __kprobes
> is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end)
> {
> while (scan_start >= scan_end) {
> /*
> * atomic region starts from exclusive load and ends with
> * exclusive store.
> */
> if (aarch64_insn_is_store_ex(le32_to_cpu(*scan_start)))
> return false;
> else if (aarch64_insn_is_load_ex(le32_to_cpu(*scan_start)))
> return true;
>
> But I'm not sure yet if changing {load,store}_ex's MASK would affect the
> above code. Do you happen to know the context?
>
IIUC, this code prevents kprobe from interrupting the LL-SC loop constructed
by LDXR/STXR pair, as the kprobe trap causes unexpected memory access that
prevents the exclusive memory access loop from exiting.
Since load-acquire/store-release instructions are not used to construct LL-SC
loop, I think it is safe to exclude them from {load,store}_ex.
>>> + if (BPF_ATOMIC_TYPE(insn->imm) == BPF_ATOMIC_LOAD)
>>> + ptr = src;
>>> + else
>>> + ptr = dst;
>>> +
>>> + if (off) {
>>> + emit_a64_mov_i(true, tmp, off, ctx);
>>> + emit(A64_ADD(true, tmp, tmp, ptr), ctx);
>>
>> The mov and add instructions can be optimized to a single A64_ADD_I
>> if is_addsub_imm(off) is true.
>
> Thanks! I'll try this.
>
>> I think it's better to split the arm64 related changes into two separate
>> patches: one for adding the arm64 LDAR/STLR instruction encodings, and
>> the other for adding jit support.
>
> Got it, in the next version I'll split this patch into (a) core/verifier
> changes, (b) arm64 insn.{h,c} changes, and (c) arm64 JIT compiler
> support.
>
> Thanks,
> Peilin Ye
On Mon, Dec 30, 2024 at 04:27:21PM +0800, Xu Kuohai wrote:
> > > As explained above, RS and RT2 fields should be fixed to 1s.
> >
> > I'm already setting Rs and Rt2 to all 1's here, as AARCH64_INSN_REG_ZR
> > is defined as 31 (0b11111):
> >
> > AARCH64_INSN_REG_ZR = 31,
>
> I see, but the setting of fixed bits is smomewhat of a waste of jit time.
Fair point, I'll instead make load_acq/store_rel's MASK/VALUE include
those (1) bits.
> > On a related note, I simply grabbed {load,store}_ex's MASK and VALUE,
> > then set their 15th and 23rd bits to make them load-acquire and
> > store-release:
> >
> > +__AARCH64_INSN_FUNCS(load_acq, 0x3FC08000, 0x08C08000)
> > +__AARCH64_INSN_FUNCS(store_rel, 0x3FC08000, 0x08808000)
> > __AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000)
> > __AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000)
> >
> > My question is, should we extend {load,store}_ex's MASK to make them
> > contain BIT(15) and BIT(23) as well? As-is, aarch64_insn_is_load_ex()
> > would return true for a load-acquire.
> >
> > The only user of aarch64_insn_is_load_ex() seems to be this
> > arm64-specific kprobe code in arch/arm64/kernel/probes/decode-insn.c:
> >
> > #ifdef CONFIG_KPROBES
> > static bool __kprobes
> > is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end)
> > {
> > while (scan_start >= scan_end) {
> > /*
> > * atomic region starts from exclusive load and ends with
> > * exclusive store.
> > */
> > if (aarch64_insn_is_store_ex(le32_to_cpu(*scan_start)))
> > return false;
> > else if (aarch64_insn_is_load_ex(le32_to_cpu(*scan_start)))
> > return true;
> >
> > But I'm not sure yet if changing {load,store}_ex's MASK would affect the
> > above code. Do you happen to know the context?
>
> IIUC, this code prevents kprobe from interrupting the LL-SC loop constructed
> by LDXR/STXR pair, as the kprobe trap causes unexpected memory access that
> prevents the exclusive memory access loop from exiting.
>
> Since load-acquire/store-release instructions are not used to construct LL-SC
> loop, I think it is safe to exclude them from {load,store}_ex.
Ah, I see, thanks! I'll extend {load,store}_ex's MASK to prevent
aarch64_insn_is_{load,store}_ex() from returning false-positives for
load-acquire/store-release.
Thanks,
Peilin Ye
On Thu, Dec 26, 2024 at 11:07:10PM +0000, Peilin Ye wrote:
> > > + if (BPF_ATOMIC_TYPE(insn->imm) == BPF_ATOMIC_LOAD)
> > > + ptr = src;
> > > + else
> > > + ptr = dst;
> > > +
> > > + if (off) {
> > > + emit_a64_mov_i(true, tmp, off, ctx);
> > > + emit(A64_ADD(true, tmp, tmp, ptr), ctx);
> >
> > The mov and add instructions can be optimized to a single A64_ADD_I
> > if is_addsub_imm(off) is true.
>
> Thanks! I'll try this.
The following diff seems to work:
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -658,9 +658,15 @@ static int emit_atomic_load_store(const struct bpf_insn *insn, struct jit_ctx *c
ptr = dst;
if (off) {
- emit_a64_mov_i(true, tmp, off, ctx);
- emit(A64_ADD(true, tmp, tmp, ptr), ctx);
- ptr = tmp;
+ if (is_addsub_imm(off)) {
+ emit(A64_ADD_I(true, ptr, ptr, off), ctx);
+ } else if (is_addsub_imm(-off)) {
+ emit(A64_SUB_I(true, ptr, ptr, -off), ctx);
+ } else {
+ emit_a64_mov_i(true, tmp, off, ctx);
+ emit(A64_ADD(true, tmp, tmp, ptr), ctx);
+ ptr = tmp;
+ }
}
if (arena) {
emit(A64_ADD(true, tmp, ptr, arena_vm_base), ctx);
I'll include it in the next version. I think the same thing can be done
for emit_lse_atomic() and emit_ll_sc_atomic(); let me do that in a
separate patch.
Thanks,
Peilin Ye
On Fri, Dec 27, 2024 at 12:23:22AM +0000, Peilin Ye wrote:
> if (off) {
> - emit_a64_mov_i(true, tmp, off, ctx);
> - emit(A64_ADD(true, tmp, tmp, ptr), ctx);
> - ptr = tmp;
> + if (is_addsub_imm(off)) {
> + emit(A64_ADD_I(true, ptr, ptr, off), ctx);
~~~
> + } else if (is_addsub_imm(-off)) {
> + emit(A64_SUB_I(true, ptr, ptr, -off), ctx);
~~~
No, I must not write to the 'ptr' register here.
> + } else {
> + emit_a64_mov_i(true, tmp, off, ctx);
> + emit(A64_ADD(true, tmp, tmp, ptr), ctx);
> + ptr = tmp;
> + }
> }
I will do this instead:
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -658,8 +658,14 @@ static int emit_atomic_load_store(const struct bpf_insn *insn, struct jit_ctx *c
ptr = dst;
if (off) {
- emit_a64_mov_i(true, tmp, off, ctx);
- emit(A64_ADD(true, tmp, tmp, ptr), ctx);
+ if (is_addsub_imm(off)) {
+ emit(A64_ADD_I(true, tmp, ptr, off), ctx);
+ } else if (is_addsub_imm(-off)) {
+ emit(A64_SUB_I(true, tmp, ptr, -off), ctx);
+ } else {
+ emit_a64_mov_i(true, tmp, off, ctx);
+ emit(A64_ADD(true, tmp, tmp, ptr), ctx);
+ }
ptr = tmp;
}
if (arena) {
Thanks,
Peilin Ye
© 2016 - 2026 Red Hat, Inc.