Introduce BPF instructions with load-acquire and store-release
semantics, as discussed in [1]. The following new flags are defined:
BPF_ATOMIC_LOAD 0x10
BPF_ATOMIC_STORE 0x20
BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0)
BPF_RELAXED 0x0
BPF_ACQUIRE 0x1
BPF_RELEASE 0x2
BPF_ACQ_REL 0x3
BPF_SEQ_CST 0x4
BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE)
BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE)
A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm'
field set to BPF_LOAD_ACQ (0x11).
Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with
the 'imm' field set to BPF_STORE_REL (0x22).
Unlike existing atomic operations that only support BPF_W (32-bit) and
BPF_DW (64-bit) size modifiers, load-acquires and store-releases also
support BPF_B (8-bit) and BPF_H (16-bit). An 8- or 16-bit load-acquire
zero-extends the value before writing it to a 32-bit register, just like
ARM64 instruction LDARH and friends.
As an example, consider the following 64-bit load-acquire BPF
instruction:
db 10 00 00 11 00 00 00 r0 = load_acquire((u64 *)(r1 + 0x0))
opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX
imm (0x00000011): BPF_LOAD_ACQ
Similarly, a 16-bit BPF store-release:
cb 21 00 00 22 00 00 00 store_release((u16 *)(r1 + 0x0), w2)
opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX
imm (0x00000022): BPF_STORE_REL
[1] https://lore.kernel.org/all/20240729183246.4110549-1-yepeilin@google.com/
Signed-off-by: Peilin Ye <yepeilin@google.com>
---
include/linux/filter.h | 2 +
include/uapi/linux/bpf.h | 13 +++++
kernel/bpf/core.c | 41 +++++++++++++-
kernel/bpf/disasm.c | 12 +++++
kernel/bpf/verifier.c | 99 ++++++++++++++++++++++++++++++++--
tools/include/uapi/linux/bpf.h | 13 +++++
6 files changed, 175 insertions(+), 5 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a3ea46281595..e36812a5b01f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -364,6 +364,8 @@ static inline bool insn_is_cast_user(const struct bpf_insn *insn)
* BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg);
* BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg)
* BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg)
+ * BPF_LOAD_ACQ dst_reg = smp_load_acquire(src_reg + off16)
+ * BPF_STORE_REL smp_store_release(dst_reg + off16, src_reg)
*/
#define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2acf9b336371..4a20a125eb46 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -51,6 +51,19 @@
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
+#define BPF_ATOMIC_LOAD 0x10
+#define BPF_ATOMIC_STORE 0x20
+#define BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0)
+
+#define BPF_RELAXED 0x00
+#define BPF_ACQUIRE 0x01
+#define BPF_RELEASE 0x02
+#define BPF_ACQ_REL 0x03
+#define BPF_SEQ_CST 0x04
+
+#define BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE) /* load-acquire */
+#define BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE) /* store-release */
+
enum bpf_cond_pseudo_jmp {
BPF_MAY_GOTO = 0,
};
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index da729cbbaeb9..ab082ab9d535 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1663,14 +1663,17 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
INSN_3(JMP, JSET, K), \
INSN_2(JMP, JA), \
INSN_2(JMP32, JA), \
+ /* Atomic operations. */ \
+ INSN_3(STX, ATOMIC, B), \
+ INSN_3(STX, ATOMIC, H), \
+ INSN_3(STX, ATOMIC, W), \
+ INSN_3(STX, ATOMIC, DW), \
/* Store instructions. */ \
/* Register based. */ \
INSN_3(STX, MEM, B), \
INSN_3(STX, MEM, H), \
INSN_3(STX, MEM, W), \
INSN_3(STX, MEM, DW), \
- INSN_3(STX, ATOMIC, W), \
- INSN_3(STX, ATOMIC, DW), \
/* Immediate based. */ \
INSN_3(ST, MEM, B), \
INSN_3(ST, MEM, H), \
@@ -2169,6 +2172,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
STX_ATOMIC_DW:
STX_ATOMIC_W:
+ STX_ATOMIC_H:
+ STX_ATOMIC_B:
switch (IMM) {
ATOMIC_ALU_OP(BPF_ADD, add)
ATOMIC_ALU_OP(BPF_AND, and)
@@ -2196,6 +2201,38 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
(atomic64_t *)(unsigned long) (DST + insn->off),
(u64) BPF_R0, (u64) SRC);
break;
+ case BPF_LOAD_ACQ:
+ switch (BPF_SIZE(insn->code)) {
+#define LOAD_ACQUIRE(SIZEOP, SIZE) \
+ case BPF_##SIZEOP: \
+ DST = (SIZE)smp_load_acquire( \
+ (SIZE *)(unsigned long)(SRC + insn->off)); \
+ break;
+ LOAD_ACQUIRE(B, u8)
+ LOAD_ACQUIRE(H, u16)
+ LOAD_ACQUIRE(W, u32)
+ LOAD_ACQUIRE(DW, u64)
+#undef LOAD_ACQUIRE
+ default:
+ goto default_label;
+ }
+ break;
+ case BPF_STORE_REL:
+ switch (BPF_SIZE(insn->code)) {
+#define STORE_RELEASE(SIZEOP, SIZE) \
+ case BPF_##SIZEOP: \
+ smp_store_release( \
+ (SIZE *)(unsigned long)(DST + insn->off), (SIZE)SRC); \
+ break;
+ STORE_RELEASE(B, u8)
+ STORE_RELEASE(H, u16)
+ STORE_RELEASE(W, u32)
+ STORE_RELEASE(DW, u64)
+#undef STORE_RELEASE
+ default:
+ goto default_label;
+ }
+ break;
default:
goto default_label;
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 309c4aa1b026..974d172d6735 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -267,6 +267,18 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->dst_reg, insn->off, insn->src_reg);
+ } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
+ insn->imm == BPF_LOAD_ACQ) {
+ verbose(cbs->private_data, "(%02x) r%d = load_acquire((%s *)(r%d %+d))\n",
+ insn->code, insn->dst_reg,
+ bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+ insn->src_reg, insn->off);
+ } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
+ insn->imm == BPF_STORE_REL) {
+ verbose(cbs->private_data, "(%02x) store_release((%s *)(r%d %+d), r%d)\n",
+ insn->code,
+ bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+ insn->dst_reg, insn->off, insn->src_reg);
} else {
verbose(cbs->private_data, "BUG_%02x\n", insn->code);
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2b3caa7549af..e44abe22aac9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -579,6 +579,13 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn)
insn->imm == BPF_CMPXCHG;
}
+static bool is_atomic_load_insn(const struct bpf_insn *insn)
+{
+ return BPF_CLASS(insn->code) == BPF_STX &&
+ BPF_MODE(insn->code) == BPF_ATOMIC &&
+ BPF_ATOMIC_TYPE(insn->imm) == BPF_ATOMIC_LOAD;
+}
+
static int __get_spi(s32 off)
{
return (-off - 1) / BPF_REG_SIZE;
@@ -3481,7 +3488,7 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
if (class == BPF_STX) {
- /* BPF_STX (including atomic variants) has multiple source
+ /* BPF_STX (including atomic variants) has one or more source
* operands, one of which is a ptr. Check whether the caller is
* asking about it.
*/
@@ -4095,7 +4102,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* dreg still needs precision before this insn
*/
}
- } else if (class == BPF_LDX) {
+ } else if (class == BPF_LDX || is_atomic_load_insn(insn)) {
if (!bt_is_reg_set(bt, dreg))
return 0;
bt_clear_reg(bt, dreg);
@@ -7625,6 +7632,86 @@ static int check_atomic_rmw(struct bpf_verifier_env *env, int insn_idx,
return 0;
}
+static int check_atomic_load(struct bpf_verifier_env *env, int insn_idx,
+ struct bpf_insn *insn)
+{
+ struct bpf_reg_state *regs = cur_regs(env);
+ int err;
+
+ err = check_reg_arg(env, insn->src_reg, SRC_OP);
+ if (err)
+ return err;
+
+ err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
+ if (err)
+ return err;
+
+ if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
+ verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
+ insn->src_reg,
+ reg_type_str(env, reg_state(env, insn->src_reg)->type));
+ return -EACCES;
+ }
+
+ if (is_arena_reg(env, insn->src_reg)) {
+ err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
+ if (err)
+ return err;
+ }
+
+ /* Check whether we can read the memory. */
+ err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
+ BPF_SIZE(insn->code), BPF_READ, insn->dst_reg,
+ true, false);
+ if (err)
+ return err;
+
+ err = reg_bounds_sanity_check(env, ®s[insn->dst_reg], "atomic_load");
+ if (err)
+ return err;
+ return 0;
+}
+
+static int check_atomic_store(struct bpf_verifier_env *env, int insn_idx,
+ struct bpf_insn *insn)
+{
+ int err;
+
+ err = check_reg_arg(env, insn->src_reg, SRC_OP);
+ if (err)
+ return err;
+
+ err = check_reg_arg(env, insn->dst_reg, SRC_OP);
+ if (err)
+ return err;
+
+ if (is_pointer_value(env, insn->src_reg)) {
+ verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
+ return -EACCES;
+ }
+
+ if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
+ verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
+ insn->dst_reg,
+ reg_type_str(env, reg_state(env, insn->dst_reg)->type));
+ return -EACCES;
+ }
+
+ if (is_arena_reg(env, insn->dst_reg)) {
+ err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
+ if (err)
+ return err;
+ }
+
+ /* Check whether we can write into the memory. */
+ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+ BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg,
+ true, false);
+ if (err)
+ return err;
+ return 0;
+}
+
static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
{
switch (insn->imm) {
@@ -7639,6 +7726,10 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
case BPF_XCHG:
case BPF_CMPXCHG:
return check_atomic_rmw(env, insn_idx, insn);
+ case BPF_LOAD_ACQ:
+ return check_atomic_load(env, insn_idx, insn);
+ case BPF_STORE_REL:
+ return check_atomic_store(env, insn_idx, insn);
default:
verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
return -EINVAL;
@@ -20420,7 +20511,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
type = BPF_WRITE;
- } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
+ } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) ||
+ insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) ||
+ insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2acf9b336371..4a20a125eb46 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -51,6 +51,19 @@
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
+#define BPF_ATOMIC_LOAD 0x10
+#define BPF_ATOMIC_STORE 0x20
+#define BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0)
+
+#define BPF_RELAXED 0x00
+#define BPF_ACQUIRE 0x01
+#define BPF_RELEASE 0x02
+#define BPF_ACQ_REL 0x03
+#define BPF_SEQ_CST 0x04
+
+#define BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE) /* load-acquire */
+#define BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE) /* store-release */
+
enum bpf_cond_pseudo_jmp {
BPF_MAY_GOTO = 0,
};
--
2.48.1.262.g85cc9f2d1e-goog
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index da729cbbaeb9..ab082ab9d535 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -1663,14 +1663,17 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
> INSN_3(JMP, JSET, K), \
> INSN_2(JMP, JA), \
> INSN_2(JMP32, JA), \
> + /* Atomic operations. */ \
> + INSN_3(STX, ATOMIC, B), \
> + INSN_3(STX, ATOMIC, H), \
> + INSN_3(STX, ATOMIC, W), \
> + INSN_3(STX, ATOMIC, DW), \
> /* Store instructions. */ \
> /* Register based. */ \
> INSN_3(STX, MEM, B), \
> INSN_3(STX, MEM, H), \
> INSN_3(STX, MEM, W), \
> INSN_3(STX, MEM, DW), \
> - INSN_3(STX, ATOMIC, W), \
> - INSN_3(STX, ATOMIC, DW), \
> /* Immediate based. */ \
> INSN_3(ST, MEM, B), \
> INSN_3(ST, MEM, H), \
> @@ -2169,6 +2172,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
>
> STX_ATOMIC_DW:
> STX_ATOMIC_W:
> + STX_ATOMIC_H:
> + STX_ATOMIC_B:
> switch (IMM) {
> ATOMIC_ALU_OP(BPF_ADD, add)
> ATOMIC_ALU_OP(BPF_AND, and)
STX_ATOMI_[BH] looks wrong.
It will do atomic64_*() ops in wrong size.
BPF_INSN_MAP() applies to bpf_opcode_in_insntable()
and the verifier will allow such new insns.
Hi Alexei,
On Wed, Jan 29, 2025 at 04:41:31PM -0800, Alexei Starovoitov wrote:
> > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> > index da729cbbaeb9..ab082ab9d535 100644
> > --- a/kernel/bpf/core.c
> > +++ b/kernel/bpf/core.c
> > @@ -1663,14 +1663,17 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
> > INSN_3(JMP, JSET, K), \
> > INSN_2(JMP, JA), \
> > INSN_2(JMP32, JA), \
> > + /* Atomic operations. */ \
> > + INSN_3(STX, ATOMIC, B), \
> > + INSN_3(STX, ATOMIC, H), \
> > + INSN_3(STX, ATOMIC, W), \
> > + INSN_3(STX, ATOMIC, DW), \
> > /* Store instructions. */ \
> > /* Register based. */ \
> > INSN_3(STX, MEM, B), \
> > INSN_3(STX, MEM, H), \
> > INSN_3(STX, MEM, W), \
> > INSN_3(STX, MEM, DW), \
> > - INSN_3(STX, ATOMIC, W), \
> > - INSN_3(STX, ATOMIC, DW), \
> > /* Immediate based. */ \
> > INSN_3(ST, MEM, B), \
> > INSN_3(ST, MEM, H), \
> > @@ -2169,6 +2172,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
> >
> > STX_ATOMIC_DW:
> > STX_ATOMIC_W:
> > + STX_ATOMIC_H:
> > + STX_ATOMIC_B:
> > switch (IMM) {
> > ATOMIC_ALU_OP(BPF_ADD, add)
> > ATOMIC_ALU_OP(BPF_AND, and)
>
> STX_ATOMI_[BH] looks wrong.
> It will do atomic64_*() ops in wrong size.
> BPF_INSN_MAP() applies to bpf_opcode_in_insntable()
> and the verifier will allow such new insns.
We still have this check in check_atomic():
if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
verbose(env, "invalid atomic operand size\n");
return -EINVAL;
}
(moved to check_atomic_rmw() in PATCH 2/8)
Looks like it cannot be triggered before this patch, since
STX_ATOMIC_[BH] would've already been rejected by that
bpf_opcode_in_insntable() check before reaching check_atomic().
I agree that the interpreter code handling RMW atomics might now look a
bit confusing though. In v2 I'll refactor that part and/or add comments
to make it clearer in the code that:
* only W and DW are allowed for atomic RMW
* all of B, H, W and DW are allowed for atomic load/store
Thanks,
Peilin Ye
On Sat, 2025-01-25 at 02:18 +0000, Peilin Ye wrote:
[...]
> +static int check_atomic_store(struct bpf_verifier_env *env, int insn_idx,
> + struct bpf_insn *insn)
> +{
> + int err;
> +
> + err = check_reg_arg(env, insn->src_reg, SRC_OP);
> + if (err)
> + return err;
> +
> + err = check_reg_arg(env, insn->dst_reg, SRC_OP);
> + if (err)
> + return err;
> +
> + if (is_pointer_value(env, insn->src_reg)) {
> + verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
> + return -EACCES;
> + }
Nit: this check is done by check_mem_access(), albeit only for
PTR_TO_MEM, I think it's better to be consistent with
what happens for regular stores and avoid this check here.
> +
> + if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
> + verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
> + insn->dst_reg,
> + reg_type_str(env, reg_state(env, insn->dst_reg)->type));
> + return -EACCES;
> + }
> +
> + if (is_arena_reg(env, insn->dst_reg)) {
> + err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
> + if (err)
> + return err;
> + }
> +
> + /* Check whether we can write into the memory. */
> + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
> + BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg,
> + true, false);
> + if (err)
> + return err;
> + return 0;
> +}
[...]
On Tue, Jan 28, 2025 at 05:30:19PM -0800, Eduard Zingerman wrote:
> On Sat, 2025-01-25 at 02:18 +0000, Peilin Ye wrote:
> > +static int check_atomic_store(struct bpf_verifier_env *env, int insn_idx,
> > + struct bpf_insn *insn)
> > +{
> > + int err;
> > +
> > + err = check_reg_arg(env, insn->src_reg, SRC_OP);
> > + if (err)
> > + return err;
> > +
> > + err = check_reg_arg(env, insn->dst_reg, SRC_OP);
> > + if (err)
> > + return err;
> > +
> > + if (is_pointer_value(env, insn->src_reg)) {
> > + verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
> > + return -EACCES;
> > + }
>
> Nit: this check is done by check_mem_access(), albeit only for
> PTR_TO_MEM, I think it's better to be consistent with
> what happens for regular stores and avoid this check here.
Got it. Unprivileged programs will be able to store-release pointers to
the stack, then. I'll update selftests accordingly.
Thanks,
Peilin Ye
On Sat, 2025-01-25 at 02:18 +0000, Peilin Ye wrote:
> Introduce BPF instructions with load-acquire and store-release
> semantics, as discussed in [1]. The following new flags are defined:
>
> BPF_ATOMIC_LOAD 0x10
> BPF_ATOMIC_STORE 0x20
> BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0)
>
> BPF_RELAXED 0x0
> BPF_ACQUIRE 0x1
> BPF_RELEASE 0x2
> BPF_ACQ_REL 0x3
> BPF_SEQ_CST 0x4
>
> BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE)
> BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE)
>
> A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm'
> field set to BPF_LOAD_ACQ (0x11).
>
> Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with
> the 'imm' field set to BPF_STORE_REL (0x22).
>
> Unlike existing atomic operations that only support BPF_W (32-bit) and
> BPF_DW (64-bit) size modifiers, load-acquires and store-releases also
> support BPF_B (8-bit) and BPF_H (16-bit). An 8- or 16-bit load-acquire
> zero-extends the value before writing it to a 32-bit register, just like
> ARM64 instruction LDARH and friends.
>
> As an example, consider the following 64-bit load-acquire BPF
> instruction:
>
> db 10 00 00 11 00 00 00 r0 = load_acquire((u64 *)(r1 + 0x0))
>
> opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX
> imm (0x00000011): BPF_LOAD_ACQ
>
> Similarly, a 16-bit BPF store-release:
>
> cb 21 00 00 22 00 00 00 store_release((u16 *)(r1 + 0x0), w2)
>
> opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX
> imm (0x00000022): BPF_STORE_REL
>
> [1] https://lore.kernel.org/all/20240729183246.4110549-1-yepeilin@google.com/
>
> Signed-off-by: Peilin Ye <yepeilin@google.com>
> ---
I think bpf_jit_supports_insn() in arch/{x86,s390}/net/bpf_jit_comp.c
need an update, as both would accept BPF_LOAD_ACQ/BPF_STORE_REL at the
moment.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
[...]
> +static int check_atomic_load(struct bpf_verifier_env *env, int insn_idx,
> + struct bpf_insn *insn)
> +{
> + struct bpf_reg_state *regs = cur_regs(env);
> + int err;
> +
> + err = check_reg_arg(env, insn->src_reg, SRC_OP);
> + if (err)
> + return err;
> +
> + err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
> + if (err)
> + return err;
> +
> + if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
> + verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
> + insn->src_reg,
> + reg_type_str(env, reg_state(env, insn->src_reg)->type));
> + return -EACCES;
> + }
> +
> + if (is_arena_reg(env, insn->src_reg)) {
> + err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
> + if (err)
> + return err;
Nit: this and the next function look very similar to processing of
generic load and store in do_check(). Maybe extract that code
as an auxiliary function and call it in both places?
The only major difference is is_arena_reg() check guarding
save_aux_ptr_type(), but I think it is ok to do save_aux_ptr_type
unconditionally. Fwiw, the code would be a bit simpler,
just spent half an hour convincing myself that such conditional handling
is not an error. Wdyt?
> + }
> +
> + /* Check whether we can read the memory. */
> + err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
> + BPF_SIZE(insn->code), BPF_READ, insn->dst_reg,
> + true, false);
> + if (err)
> + return err;
> +
> + err = reg_bounds_sanity_check(env, ®s[insn->dst_reg], "atomic_load");
> + if (err)
> + return err;
> + return 0;
> +}
> +
> +static int check_atomic_store(struct bpf_verifier_env *env, int insn_idx,
> + struct bpf_insn *insn)
> +{
> + int err;
> +
> + err = check_reg_arg(env, insn->src_reg, SRC_OP);
> + if (err)
> + return err;
> +
> + err = check_reg_arg(env, insn->dst_reg, SRC_OP);
> + if (err)
> + return err;
> +
> + if (is_pointer_value(env, insn->src_reg)) {
> + verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
> + return -EACCES;
> + }
> +
> + if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
> + verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
> + insn->dst_reg,
> + reg_type_str(env, reg_state(env, insn->dst_reg)->type));
> + return -EACCES;
> + }
> +
> + if (is_arena_reg(env, insn->dst_reg)) {
> + err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
> + if (err)
> + return err;
> + }
> +
> + /* Check whether we can write into the memory. */
> + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
> + BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg,
> + true, false);
> + if (err)
> + return err;
> + return 0;
> +}
> +
[...]
On Tue, Jan 28, 2025 at 04:19:25PM -0800, Eduard Zingerman wrote:
> On Sat, 2025-01-25 at 02:18 +0000, Peilin Ye wrote:
> > Signed-off-by: Peilin Ye <yepeilin@google.com>
> > ---
>
> I think bpf_jit_supports_insn() in arch/{x86,s390}/net/bpf_jit_comp.c
> need an update, as both would accept BPF_LOAD_ACQ/BPF_STORE_REL at the
> moment.
Got it - I will move is_atomic_load_store() into <linux/bpf.h> for that.
> Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Thanks!
> > +static int check_atomic_load(struct bpf_verifier_env *env, int insn_idx,
> > + struct bpf_insn *insn)
> > +{
> > + struct bpf_reg_state *regs = cur_regs(env);
> > + int err;
> > +
> > + err = check_reg_arg(env, insn->src_reg, SRC_OP);
> > + if (err)
> > + return err;
> > +
> > + err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
> > + if (err)
> > + return err;
> > +
> > + if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
> > + verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
> > + insn->src_reg,
> > + reg_type_str(env, reg_state(env, insn->src_reg)->type));
> > + return -EACCES;
> > + }
> > +
> > + if (is_arena_reg(env, insn->src_reg)) {
> > + err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
> > + if (err)
> > + return err;
>
> Nit: this and the next function look very similar to processing of
> generic load and store in do_check(). Maybe extract that code
> as an auxiliary function and call it in both places?
Sure, I agree that they look a bit repetitive.
> The only major difference is is_arena_reg() check guarding
> save_aux_ptr_type(), but I think it is ok to do save_aux_ptr_type
> unconditionally. Fwiw, the code would be a bit simpler,
> just spent half an hour convincing myself that such conditional handling
> is not an error. Wdyt?
:-O
Thanks a lot for that; would you mind sharing a bit more on how you
reasoned about it (i.e., why is it OK to save_aux_ptr_type()
unconditionally) ?
> > + }
> > +
> > + /* Check whether we can read the memory. */
> > + err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
> > + BPF_SIZE(insn->code), BPF_READ, insn->dst_reg,
> > + true, false);
> > + if (err)
> > + return err;
> > +
> > + err = reg_bounds_sanity_check(env, ®s[insn->dst_reg], "atomic_load");
> > + if (err)
> > + return err;
> > + return 0;
> > +}
> > +
> > +static int check_atomic_store(struct bpf_verifier_env *env, int insn_idx,
> > + struct bpf_insn *insn)
Thanks,
Peilin Ye
On Wed, 2025-01-29 at 22:04 +0000, Peilin Ye wrote:
[...]
> > > +static int check_atomic_load(struct bpf_verifier_env *env, int insn_idx,
> > > + struct bpf_insn *insn)
> > > +{
> > > + struct bpf_reg_state *regs = cur_regs(env);
> > > + int err;
> > > +
> > > + err = check_reg_arg(env, insn->src_reg, SRC_OP);
> > > + if (err)
> > > + return err;
> > > +
> > > + err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
> > > + if (err)
> > > + return err;
> > > +
> > > + if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
> > > + verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
> > > + insn->src_reg,
> > > + reg_type_str(env, reg_state(env, insn->src_reg)->type));
> > > + return -EACCES;
> > > + }
> > > +
> > > + if (is_arena_reg(env, insn->src_reg)) {
> > > + err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
> > > + if (err)
> > > + return err;
> >
> > Nit: this and the next function look very similar to processing of
> > generic load and store in do_check(). Maybe extract that code
> > as an auxiliary function and call it in both places?
>
> Sure, I agree that they look a bit repetitive.
>
> > The only major difference is is_arena_reg() check guarding
> > save_aux_ptr_type(), but I think it is ok to do save_aux_ptr_type
> > unconditionally. Fwiw, the code would be a bit simpler,
> > just spent half an hour convincing myself that such conditional handling
> > is not an error. Wdyt?
>
> :-O
>
> Thanks a lot for that; would you mind sharing a bit more on how you
> reasoned about it (i.e., why is it OK to save_aux_ptr_type()
> unconditionally) ?
Well, save_aux_ptr_type() does two things:
- if there is no env->insn_aux_data[env->insn_idx].ptr_type associated
with the instruction it saves one;
- if there is .ptr_type, it checks if a new one is compatible and
errors out if it's not.
The .ptr_type is used in convert_ctx_accesses() to rewrite access
instruction (STX/LDX, atomic or not) in a way specific to pointer
type.
So, doing save_aux_ptr_type() conditionally is already sketchy,
as there is a risk to miss if some instruction is used in a context
where pointer type requires different rewrites.
convert_ctx_accesses() rewrites instruction for pointer following
types:
- PTR_TO_CTX
- PTR_TO_SOCKET
- PTR_TO_SOCK_COMMON
- PTR_TO_TCP_SOCK
- PTR_TO_XDP_SOCK
- PTR_TO_BTF_ID
- PTR_TO_ARENA
atomic_ptr_type_ok() allows the following pointer types:
- CONST_PTR_TO_MAP
- PTR_TO_MAP_VALUE
- PTR_TO_MAP_KEY
- PTR_TO_STACK
- PTR_TO_BTF_ID
- PTR_TO_MEM
- PTR_TO_ARENA
- PTR_TO_BUF
- PTR_TO_FUNC
- CONST_PTR_TO_DYNPTR
One has to check rewrites applied by convert_ctx_accesses() to atomic
instructions to reason about correctness of the conditional
save_aux_ptr_type() call.
If is_arena_reg() guard is removed from save_aux_ptr_type() we risk to
reject programs that do atomic load/store where same instruction is
used to modify a pointer that can be either of the above types.
I speculate that this is not the problem, as do_check() processing for
BPF_STX/BPF_LDX already calls save_aux_ptr_type() unconditionally.
[...]
On Wed, Jan 29, 2025 at 02:42:41PM -0800, Eduard Zingerman wrote: > On Wed, 2025-01-29 at 22:04 +0000, Peilin Ye wrote: > > Thanks a lot for that; would you mind sharing a bit more on how you > > reasoned about it (i.e., why is it OK to save_aux_ptr_type() > > unconditionally) ? > > Well, save_aux_ptr_type() does two things: > - if there is no env->insn_aux_data[env->insn_idx].ptr_type associated > with the instruction it saves one; > - if there is .ptr_type, it checks if a new one is compatible and > errors out if it's not. > > The .ptr_type is used in convert_ctx_accesses() to rewrite access > instruction (STX/LDX, atomic or not) in a way specific to pointer > type. > > So, doing save_aux_ptr_type() conditionally is already sketchy, > as there is a risk to miss if some instruction is used in a context > where pointer type requires different rewrites. > > convert_ctx_accesses() rewrites instruction for pointer following > types: > - PTR_TO_CTX > - PTR_TO_SOCKET > - PTR_TO_SOCK_COMMON > - PTR_TO_TCP_SOCK > - PTR_TO_XDP_SOCK > - PTR_TO_BTF_ID > - PTR_TO_ARENA > > atomic_ptr_type_ok() allows the following pointer types: > - CONST_PTR_TO_MAP > - PTR_TO_MAP_VALUE > - PTR_TO_MAP_KEY > - PTR_TO_STACK > - PTR_TO_BTF_ID > - PTR_TO_MEM > - PTR_TO_ARENA > - PTR_TO_BUF > - PTR_TO_FUNC > - CONST_PTR_TO_DYNPTR > > One has to check rewrites applied by convert_ctx_accesses() to atomic > instructions to reason about correctness of the conditional > save_aux_ptr_type() call. > > If is_arena_reg() guard is removed from save_aux_ptr_type() we risk to > reject programs that do atomic load/store where same instruction is > used to modify a pointer that can be either of the above types. > I speculate that this is not the problem, as do_check() processing for > BPF_STX/BPF_LDX already calls save_aux_ptr_type() unconditionally. I see, thanks for the explanation! Peilin Ye
© 2016 - 2026 Red Hat, Inc.