[PATCH v6 2/5] bpf: Add bpf_list_add_impl to insert node after a given list node

Chengkaitao posted 5 patches 1 month, 1 week ago
There is a newer version of this series
[PATCH v6 2/5] bpf: Add bpf_list_add_impl to insert node after a given list node
Posted by Chengkaitao 1 month, 1 week ago
From: Kaitao Cheng <chengkaitao@kylinos.cn>

Add a new kfunc bpf_list_add_impl(head, new, prev, meta, off) that
inserts 'new' after 'prev' in the BPF linked list. Both must be in
the same list; 'prev' must already be in the list. The new node must
be an owning reference (e.g. from bpf_obj_new); the kfunc consumes
that reference and the node becomes non-owning once inserted.

We have added an additional parameter bpf_list_head *head to
bpf_list_add_impl, as the verifier requires the head parameter to
check whether the lock is being held.

Returns 0 on success, -EINVAL if 'prev' is not in a list or 'new'
is already in a list (or duplicate insertion). On failure, the
kernel drops the passed-in node.

Signed-off-by: Kaitao Cheng <chengkaitao@kylinos.cn>
---
 kernel/bpf/helpers.c  | 34 ++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c | 17 +++++++++++++----
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index cc1a096a1f64..740b53024283 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2505,6 +2505,39 @@ __bpf_kfunc struct bpf_list_node *bpf_list_back(struct bpf_list_head *head)
 	return (struct bpf_list_node *)h->prev;
 }
 
+__bpf_kfunc int bpf_list_add_impl(struct bpf_list_head *head,
+				  struct bpf_list_node *new,
+				  struct bpf_list_node *prev,
+				  void *meta__ign, u64 off)
+{
+	struct bpf_list_node_kern *kn = (void *)new, *kp = (void *)prev;
+	struct btf_struct_meta *meta = meta__ign;
+	struct list_head *n = &kn->list_head, *p = &kp->list_head;
+	struct list_head *h = (void *)head;
+
+	/* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
+	 * called on its fields, so init here
+	 */
+	if (unlikely(!h->next)) {
+		INIT_LIST_HEAD(h);
+		goto fail;
+	}
+
+	if (WARN_ON_ONCE(READ_ONCE(kp->owner) != head))
+		goto fail;
+
+	if (cmpxchg(&kn->owner, NULL, BPF_PTR_POISON))
+		goto fail;
+
+	list_add(n, p);
+	WRITE_ONCE(kn->owner, head);
+	return 0;
+
+fail:
+	__bpf_obj_drop_impl((void *)n - off, meta ? meta->record : NULL, false);
+	return -EINVAL;
+}
+
 __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
 						  struct bpf_rb_node *node)
 {
@@ -4574,6 +4607,7 @@ BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_list_del, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_list_front, KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_list_add_impl)
 BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c9557d3fb8dd..e458cf3b1dd1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -12464,6 +12464,7 @@ enum special_kfunc_type {
 	KF_bpf_list_del,
 	KF_bpf_list_front,
 	KF_bpf_list_back,
+	KF_bpf_list_add_impl,
 	KF_bpf_cast_to_kern_ctx,
 	KF_bpf_rdonly_cast,
 	KF_bpf_rcu_read_lock,
@@ -12525,6 +12526,7 @@ BTF_ID(func, bpf_list_pop_back)
 BTF_ID(func, bpf_list_del)
 BTF_ID(func, bpf_list_front)
 BTF_ID(func, bpf_list_back)
+BTF_ID(func, bpf_list_add_impl)
 BTF_ID(func, bpf_cast_to_kern_ctx)
 BTF_ID(func, bpf_rdonly_cast)
 BTF_ID(func, bpf_rcu_read_lock)
@@ -13000,7 +13002,8 @@ static bool is_bpf_list_api_kfunc(u32 btf_id)
 	       btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
 	       btf_id == special_kfunc_list[KF_bpf_list_del] ||
 	       btf_id == special_kfunc_list[KF_bpf_list_front] ||
-	       btf_id == special_kfunc_list[KF_bpf_list_back];
+	       btf_id == special_kfunc_list[KF_bpf_list_back] ||
+	       btf_id == special_kfunc_list[KF_bpf_list_add_impl];
 }
 
 static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
@@ -13122,7 +13125,8 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
 	case BPF_LIST_NODE:
 		ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
 		       kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
-		       kfunc_btf_id == special_kfunc_list[KF_bpf_list_del]);
+		       kfunc_btf_id == special_kfunc_list[KF_bpf_list_del] ||
+		       kfunc_btf_id == special_kfunc_list[KF_bpf_list_add_impl]);
 		break;
 	case BPF_RB_NODE:
 		ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
@@ -14264,6 +14268,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 
 	if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
 	    meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
+	    meta.func_id == special_kfunc_list[KF_bpf_list_add_impl] ||
 	    meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
 		release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
 		insn_aux->insert_off = regs[BPF_REG_2].off;
@@ -23230,13 +23235,17 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		*cnt = 3;
 	} else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
 		   desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
+		   desc->func_id == special_kfunc_list[KF_bpf_list_add_impl] ||
 		   desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
 		int struct_meta_reg = BPF_REG_3;
 		int node_offset_reg = BPF_REG_4;
 
-		/* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
-		if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
+		/* list/rbtree_add_impl have an extra arg (prev/less),
+		 * so args-to-fixup are in different regs.
+		 */
+		if (desc->func_id == special_kfunc_list[KF_bpf_list_add_impl] ||
+		    desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
 			struct_meta_reg = BPF_REG_4;
 			node_offset_reg = BPF_REG_5;
 		}
-- 
2.50.1 (Apple Git-155)
Re: [PATCH v6 2/5] bpf: Add bpf_list_add_impl to insert node after a given list node
Posted by Kumar Kartikeya Dwivedi 1 month, 1 week ago
On Wed, 4 Mar 2026 at 15:55, Chengkaitao <pilgrimtao@gmail.com> wrote:
>
> From: Kaitao Cheng <chengkaitao@kylinos.cn>
>
> Add a new kfunc bpf_list_add_impl(head, new, prev, meta, off) that
> inserts 'new' after 'prev' in the BPF linked list. Both must be in
> the same list; 'prev' must already be in the list. The new node must
> be an owning reference (e.g. from bpf_obj_new); the kfunc consumes
> that reference and the node becomes non-owning once inserted.
>
> We have added an additional parameter bpf_list_head *head to
> bpf_list_add_impl, as the verifier requires the head parameter to
> check whether the lock is being held.
>
> Returns 0 on success, -EINVAL if 'prev' is not in a list or 'new'
> is already in a list (or duplicate insertion). On failure, the
> kernel drops the passed-in node.
>
> Signed-off-by: Kaitao Cheng <chengkaitao@kylinos.cn>
> ---
>  kernel/bpf/helpers.c  | 34 ++++++++++++++++++++++++++++++++++
>  kernel/bpf/verifier.c | 17 +++++++++++++----
>  2 files changed, 47 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index cc1a096a1f64..740b53024283 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -2505,6 +2505,39 @@ __bpf_kfunc struct bpf_list_node *bpf_list_back(struct bpf_list_head *head)
>         return (struct bpf_list_node *)h->prev;
>  }
>
> +__bpf_kfunc int bpf_list_add_impl(struct bpf_list_head *head,
> +                                 struct bpf_list_node *new,
> +                                 struct bpf_list_node *prev,
> +                                 void *meta__ign, u64 off)

I have pretty much the same comment here, there is a lot of
duplication between this and __bpf_list_add.
We can write list_add_tail in terms of __list_add, and then unify all of this.

> +{
> +       struct bpf_list_node_kern *kn = (void *)new, *kp = (void *)prev;
> +       struct btf_struct_meta *meta = meta__ign;
> +       struct list_head *n = &kn->list_head, *p = &kp->list_head;
> +       struct list_head *h = (void *)head;
> +
> +       /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
> +        * called on its fields, so init here
> +        */
> +       if (unlikely(!h->next)) {
> +               INIT_LIST_HEAD(h);
> +               goto fail;

Not clear to me why we should fail here; why can't we continue adding
after initializing the list head?
E.g. __bpf_list_add isn't doing it.

> +       }
> +
> +       if (WARN_ON_ONCE(READ_ONCE(kp->owner) != head))
> +               goto fail;
> +
> +       if (cmpxchg(&kn->owner, NULL, BPF_PTR_POISON))
> +               goto fail;
> +
> +       list_add(n, p);
> +       WRITE_ONCE(kn->owner, head);
> +       return 0;
> +
> +fail:
> +       __bpf_obj_drop_impl((void *)n - off, meta ? meta->record : NULL, false);
> +       return -EINVAL;
> +}
> +
>  __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
>                                                   struct bpf_rb_node *node)
>  {
> @@ -4574,6 +4607,7 @@ BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
>  BTF_ID_FLAGS(func, bpf_list_del, KF_ACQUIRE | KF_RET_NULL)
>  BTF_ID_FLAGS(func, bpf_list_front, KF_RET_NULL)
>  BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL)
> +BTF_ID_FLAGS(func, bpf_list_add_impl)
>  BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
>  BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
>  BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL)
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index c9557d3fb8dd..e458cf3b1dd1 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -12464,6 +12464,7 @@ enum special_kfunc_type {
>         KF_bpf_list_del,
>         KF_bpf_list_front,
>         KF_bpf_list_back,
> +       KF_bpf_list_add_impl,
>         KF_bpf_cast_to_kern_ctx,
>         KF_bpf_rdonly_cast,
>         KF_bpf_rcu_read_lock,
> @@ -12525,6 +12526,7 @@ BTF_ID(func, bpf_list_pop_back)
>  BTF_ID(func, bpf_list_del)
>  BTF_ID(func, bpf_list_front)
>  BTF_ID(func, bpf_list_back)
> +BTF_ID(func, bpf_list_add_impl)
>  BTF_ID(func, bpf_cast_to_kern_ctx)
>  BTF_ID(func, bpf_rdonly_cast)
>  BTF_ID(func, bpf_rcu_read_lock)
> @@ -13000,7 +13002,8 @@ static bool is_bpf_list_api_kfunc(u32 btf_id)
>                btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
>                btf_id == special_kfunc_list[KF_bpf_list_del] ||
>                btf_id == special_kfunc_list[KF_bpf_list_front] ||
> -              btf_id == special_kfunc_list[KF_bpf_list_back];
> +              btf_id == special_kfunc_list[KF_bpf_list_back] ||
> +              btf_id == special_kfunc_list[KF_bpf_list_add_impl];
>  }
>
>  static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
> @@ -13122,7 +13125,8 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
>         case BPF_LIST_NODE:
>                 ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
>                        kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
> -                      kfunc_btf_id == special_kfunc_list[KF_bpf_list_del]);
> +                      kfunc_btf_id == special_kfunc_list[KF_bpf_list_del] ||
> +                      kfunc_btf_id == special_kfunc_list[KF_bpf_list_add_impl]);
>                 break;
>         case BPF_RB_NODE:
>                 ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
> @@ -14264,6 +14268,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>
>         if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
>             meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
> +           meta.func_id == special_kfunc_list[KF_bpf_list_add_impl] ||
>             meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
>                 release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
>                 insn_aux->insert_off = regs[BPF_REG_2].off;
> @@ -23230,13 +23235,17 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>                 *cnt = 3;
>         } else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
>                    desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
> +                  desc->func_id == special_kfunc_list[KF_bpf_list_add_impl] ||
>                    desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
>                 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
>                 int struct_meta_reg = BPF_REG_3;
>                 int node_offset_reg = BPF_REG_4;
>
> -               /* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
> -               if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
> +               /* list/rbtree_add_impl have an extra arg (prev/less),
> +                * so args-to-fixup are in different regs.
> +                */
> +               if (desc->func_id == special_kfunc_list[KF_bpf_list_add_impl] ||
> +                   desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
>                         struct_meta_reg = BPF_REG_4;
>                         node_offset_reg = BPF_REG_5;
>                 }
> --
> 2.50.1 (Apple Git-155)
>
>