Support BPF load-acquire (BPF_LOAD_ACQ) and store-release
(BPF_STORE_REL) instructions in the arm64 JIT compiler. For example
(assuming little-endian):
db 10 00 00 00 01 00 00 r0 = load_acquire((u64 *)(r1 + 0x0))
95 00 00 00 00 00 00 00 exit
opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX
imm (0x00000100): BPF_LOAD_ACQ
The JIT compiler would emit an LDAR instruction for the above, e.g.:
ldar x7, [x0]
Similarly, consider the following 16-bit store-release:
cb 21 00 00 10 01 00 00 store_release((u16 *)(r1 + 0x0), w2)
95 00 00 00 00 00 00 00 exit
opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX
imm (0x00000110): BPF_STORE_REL
An STLRH instruction would be emitted, e.g.:
stlrh w1, [x0]
For a complete mapping:
load-acquire 8-bit LDARB
(BPF_LOAD_ACQ) 16-bit LDARH
32-bit LDAR (32-bit)
64-bit LDAR (64-bit)
store-release 8-bit STLRB
(BPF_STORE_REL) 16-bit STLRH
32-bit STLR (32-bit)
64-bit STLR (64-bit)
Arena accesses are supported.
bpf_jit_supports_insn(..., /*in_arena=*/true) always returns true for
BPF_LOAD_ACQ and BPF_STORE_REL instructions, as they don't depend on
ARM64_HAS_LSE_ATOMICS.
Signed-off-by: Peilin Ye <yepeilin@google.com>
---
arch/arm64/net/bpf_jit.h | 20 ++++++++
arch/arm64/net/bpf_jit_comp.c | 91 ++++++++++++++++++++++++++++++++---
2 files changed, 105 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index b22ab2f97a30..a3b0e693a125 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -119,6 +119,26 @@
aarch64_insn_gen_load_store_ex(Rt, Rn, Rs, A64_SIZE(sf), \
AARCH64_INSN_LDST_STORE_REL_EX)
+/* Load-acquire & store-release */
+#define A64_LDAR(Rt, Rn, size) \
+ aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \
+ AARCH64_INSN_LDST_LOAD_ACQ)
+#define A64_STLR(Rt, Rn, size) \
+ aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \
+ AARCH64_INSN_LDST_STORE_REL)
+
+/* Rt = [Rn] (load acquire) */
+#define A64_LDARB(Wt, Xn) A64_LDAR(Wt, Xn, 8)
+#define A64_LDARH(Wt, Xn) A64_LDAR(Wt, Xn, 16)
+#define A64_LDAR32(Wt, Xn) A64_LDAR(Wt, Xn, 32)
+#define A64_LDAR64(Xt, Xn) A64_LDAR(Xt, Xn, 64)
+
+/* [Rn] = Rt (store release) */
+#define A64_STLRB(Wt, Xn) A64_STLR(Wt, Xn, 8)
+#define A64_STLRH(Wt, Xn) A64_STLR(Wt, Xn, 16)
+#define A64_STLR32(Wt, Xn) A64_STLR(Wt, Xn, 32)
+#define A64_STLR64(Xt, Xn) A64_STLR(Xt, Xn, 64)
+
/*
* LSE atomics
*
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 8c3b47d9e441..25562bdb8eb5 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -647,6 +647,82 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
return 0;
}
+static int emit_atomic_load_store(const struct bpf_insn *insn,
+ struct jit_ctx *ctx)
+{
+ const s32 imm = insn->imm;
+ const s16 off = insn->off;
+ const u8 code = insn->code;
+ const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC;
+ const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
+ const u8 dst = bpf2a64[insn->dst_reg];
+ const u8 src = bpf2a64[insn->src_reg];
+ const u8 tmp = bpf2a64[TMP_REG_1];
+ u8 reg;
+
+ switch (imm) {
+ case BPF_LOAD_ACQ:
+ reg = src;
+ break;
+ case BPF_STORE_REL:
+ reg = dst;
+ break;
+ default:
+ pr_err_once("unknown atomic load/store op code %02x\n", imm);
+ return -EINVAL;
+ }
+
+ if (off) {
+ emit_a64_add_i(1, tmp, reg, tmp, off, ctx);
+ reg = tmp;
+ }
+ if (arena) {
+ emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx);
+ reg = tmp;
+ }
+
+ switch (imm) {
+ case BPF_LOAD_ACQ:
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ emit(A64_LDARB(dst, reg), ctx);
+ break;
+ case BPF_H:
+ emit(A64_LDARH(dst, reg), ctx);
+ break;
+ case BPF_W:
+ emit(A64_LDAR32(dst, reg), ctx);
+ break;
+ case BPF_DW:
+ emit(A64_LDAR64(dst, reg), ctx);
+ break;
+ }
+ break;
+ case BPF_STORE_REL:
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ emit(A64_STLRB(src, reg), ctx);
+ break;
+ case BPF_H:
+ emit(A64_STLRH(src, reg), ctx);
+ break;
+ case BPF_W:
+ emit(A64_STLR32(src, reg), ctx);
+ break;
+ case BPF_DW:
+ emit(A64_STLR64(src, reg), ctx);
+ break;
+ }
+ break;
+ default:
+ pr_err_once("unexpected atomic load/store op code %02x\n",
+ imm);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
#ifdef CONFIG_ARM64_LSE_ATOMICS
static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
{
@@ -1641,11 +1717,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
return ret;
break;
+ case BPF_STX | BPF_ATOMIC | BPF_B:
+ case BPF_STX | BPF_ATOMIC | BPF_H:
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_B:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_H:
case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
- if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
+ if (bpf_atomic_is_load_store(insn))
+ ret = emit_atomic_load_store(insn, ctx);
+ else if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
ret = emit_lse_atomic(insn, ctx);
else
ret = emit_ll_sc_atomic(insn, ctx);
@@ -2667,13 +2749,10 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
if (!in_arena)
return true;
switch (insn->code) {
- case BPF_STX | BPF_ATOMIC | BPF_B:
- case BPF_STX | BPF_ATOMIC | BPF_H:
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
- if (bpf_atomic_is_load_store(insn))
- return false;
- if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
+ if (!bpf_atomic_is_load_store(insn) &&
+ !cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
return false;
}
return true;
--
2.48.1.601.g30ceb7b040-goog
On 2/20/2025 9:21 AM, Peilin Ye wrote:
> Support BPF load-acquire (BPF_LOAD_ACQ) and store-release
> (BPF_STORE_REL) instructions in the arm64 JIT compiler. For example
> (assuming little-endian):
>
> db 10 00 00 00 01 00 00 r0 = load_acquire((u64 *)(r1 + 0x0))
> 95 00 00 00 00 00 00 00 exit
>
> opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX
> imm (0x00000100): BPF_LOAD_ACQ
>
> The JIT compiler would emit an LDAR instruction for the above, e.g.:
>
> ldar x7, [x0]
>
> Similarly, consider the following 16-bit store-release:
>
> cb 21 00 00 10 01 00 00 store_release((u16 *)(r1 + 0x0), w2)
> 95 00 00 00 00 00 00 00 exit
>
> opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX
> imm (0x00000110): BPF_STORE_REL
>
> An STLRH instruction would be emitted, e.g.:
>
> stlrh w1, [x0]
>
> For a complete mapping:
>
> load-acquire 8-bit LDARB
> (BPF_LOAD_ACQ) 16-bit LDARH
> 32-bit LDAR (32-bit)
> 64-bit LDAR (64-bit)
> store-release 8-bit STLRB
> (BPF_STORE_REL) 16-bit STLRH
> 32-bit STLR (32-bit)
> 64-bit STLR (64-bit)
>
> Arena accesses are supported.
> bpf_jit_supports_insn(..., /*in_arena=*/true) always returns true for
> BPF_LOAD_ACQ and BPF_STORE_REL instructions, as they don't depend on
> ARM64_HAS_LSE_ATOMICS.
>
> Signed-off-by: Peilin Ye <yepeilin@google.com>
> ---
> arch/arm64/net/bpf_jit.h | 20 ++++++++
> arch/arm64/net/bpf_jit_comp.c | 91 ++++++++++++++++++++++++++++++++---
> 2 files changed, 105 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
> index b22ab2f97a30..a3b0e693a125 100644
> --- a/arch/arm64/net/bpf_jit.h
> +++ b/arch/arm64/net/bpf_jit.h
> @@ -119,6 +119,26 @@
> aarch64_insn_gen_load_store_ex(Rt, Rn, Rs, A64_SIZE(sf), \
> AARCH64_INSN_LDST_STORE_REL_EX)
>
> +/* Load-acquire & store-release */
> +#define A64_LDAR(Rt, Rn, size) \
> + aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \
> + AARCH64_INSN_LDST_LOAD_ACQ)
> +#define A64_STLR(Rt, Rn, size) \
> + aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \
> + AARCH64_INSN_LDST_STORE_REL)
> +
> +/* Rt = [Rn] (load acquire) */
> +#define A64_LDARB(Wt, Xn) A64_LDAR(Wt, Xn, 8)
> +#define A64_LDARH(Wt, Xn) A64_LDAR(Wt, Xn, 16)
> +#define A64_LDAR32(Wt, Xn) A64_LDAR(Wt, Xn, 32)
> +#define A64_LDAR64(Xt, Xn) A64_LDAR(Xt, Xn, 64)
> +
> +/* [Rn] = Rt (store release) */
> +#define A64_STLRB(Wt, Xn) A64_STLR(Wt, Xn, 8)
> +#define A64_STLRH(Wt, Xn) A64_STLR(Wt, Xn, 16)
> +#define A64_STLR32(Wt, Xn) A64_STLR(Wt, Xn, 32)
> +#define A64_STLR64(Xt, Xn) A64_STLR(Xt, Xn, 64)
> +
> /*
> * LSE atomics
> *
> diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
> index 8c3b47d9e441..25562bdb8eb5 100644
> --- a/arch/arm64/net/bpf_jit_comp.c
> +++ b/arch/arm64/net/bpf_jit_comp.c
> @@ -647,6 +647,82 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
> return 0;
> }
>
> +static int emit_atomic_load_store(const struct bpf_insn *insn,
> + struct jit_ctx *ctx)
> +{
> + const s32 imm = insn->imm;
> + const s16 off = insn->off;
> + const u8 code = insn->code;
> + const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC;
> + const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
> + const u8 dst = bpf2a64[insn->dst_reg];
> + const u8 src = bpf2a64[insn->src_reg];
> + const u8 tmp = bpf2a64[TMP_REG_1];
> + u8 reg;
> +
> + switch (imm) {
> + case BPF_LOAD_ACQ:
> + reg = src;
> + break;
> + case BPF_STORE_REL:
> + reg = dst;
> + break;
> + default:
> + pr_err_once("unknown atomic load/store op code %02x\n", imm);
> + return -EINVAL;
> + }
> +
> + if (off) {
> + emit_a64_add_i(1, tmp, reg, tmp, off, ctx);
> + reg = tmp;
> + }
> + if (arena) {
> + emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx);
> + reg = tmp;
> + }
> +
> + switch (imm) {
> + case BPF_LOAD_ACQ:
> + switch (BPF_SIZE(code)) {
> + case BPF_B:
> + emit(A64_LDARB(dst, reg), ctx);
> + break;
> + case BPF_H:
> + emit(A64_LDARH(dst, reg), ctx);
> + break;
> + case BPF_W:
> + emit(A64_LDAR32(dst, reg), ctx);
> + break;
> + case BPF_DW:
> + emit(A64_LDAR64(dst, reg), ctx);
> + break;
> + }
> + break;
> + case BPF_STORE_REL:
> + switch (BPF_SIZE(code)) {
> + case BPF_B:
> + emit(A64_STLRB(src, reg), ctx);
> + break;
> + case BPF_H:
> + emit(A64_STLRH(src, reg), ctx);
> + break;
> + case BPF_W:
> + emit(A64_STLR32(src, reg), ctx);
> + break;
> + case BPF_DW:
> + emit(A64_STLR64(src, reg), ctx);
> + break;
> + }
> + break;
> + default:
> + pr_err_once("unexpected atomic load/store op code %02x\n",
> + imm);
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> #ifdef CONFIG_ARM64_LSE_ATOMICS
> static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
> {
> @@ -1641,11 +1717,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
> return ret;
> break;
>
> + case BPF_STX | BPF_ATOMIC | BPF_B:
> + case BPF_STX | BPF_ATOMIC | BPF_H:
> case BPF_STX | BPF_ATOMIC | BPF_W:
> case BPF_STX | BPF_ATOMIC | BPF_DW:
> + case BPF_STX | BPF_PROBE_ATOMIC | BPF_B:
> + case BPF_STX | BPF_PROBE_ATOMIC | BPF_H:
> case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
> case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
> - if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
> + if (bpf_atomic_is_load_store(insn))
> + ret = emit_atomic_load_store(insn, ctx);
> + else if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
> ret = emit_lse_atomic(insn, ctx);
> else
> ret = emit_ll_sc_atomic(insn, ctx);
> @@ -2667,13 +2749,10 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
> if (!in_arena)
> return true;
> switch (insn->code) {
> - case BPF_STX | BPF_ATOMIC | BPF_B:
> - case BPF_STX | BPF_ATOMIC | BPF_H:
> case BPF_STX | BPF_ATOMIC | BPF_W:
> case BPF_STX | BPF_ATOMIC | BPF_DW:
> - if (bpf_atomic_is_load_store(insn))
> - return false;
> - if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
> + if (!bpf_atomic_is_load_store(insn) &&
> + !cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
> return false;
> }
> return true;
Acked-by: Xu Kuohai <xukuohai@huawei.com>
© 2016 - 2025 Red Hat, Inc.