Add test to verify that updating [lru_,]percpu_hash maps decrements
refcount when BPF_KPTR_REF objects are involved.
The tests perform the following steps:
1. Call update_elem() to insert an initial value.
2. Use bpf_refcount_acquire() to increment the refcount.
3. Store the node pointer in the map value.
4. Add the node to a linked list.
5. Probe-read the refcount and verify it is *2*.
6. Call update_elem() again to trigger refcount decrement.
7. Probe-read the refcount and verify it is *1*.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
.../bpf/prog_tests/refcounted_kptr.c | 57 ++++++++++++++++++
.../selftests/bpf/progs/refcounted_kptr.c | 60 +++++++++++++++++++
2 files changed, 117 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
index d6bd5e16e6372..086f679fa3f61 100644
--- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
@@ -44,3 +44,60 @@ void test_refcounted_kptr_wrong_owner(void)
ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval");
refcounted_kptr__destroy(skel);
}
+
+void test_percpu_hash_refcounted_kptr_refcount_leak(void)
+{
+ struct refcounted_kptr *skel;
+ int cpu_nr, fd, err, key = 0;
+ struct bpf_map *map;
+ size_t values_sz;
+ u64 *values;
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ cpu_nr = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus"))
+ return;
+
+ values = calloc(cpu_nr, sizeof(u64));
+ if (!ASSERT_OK_PTR(values, "calloc values"))
+ return;
+
+ skel = refcounted_kptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) {
+ free(values);
+ return;
+ }
+
+ values_sz = cpu_nr * sizeof(u64);
+ memset(values, 0, values_sz);
+
+ map = skel->maps.percpu_hash;
+ err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
+ if (!ASSERT_OK(err, "bpf_map__update_elem"))
+ goto out;
+
+ fd = bpf_program__fd(skel->progs.percpu_hash_refcount_leak);
+ err = bpf_prog_test_run_opts(fd, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto out;
+ if (!ASSERT_EQ(opts.retval, 2, "opts.retval"))
+ goto out;
+
+ err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
+ if (!ASSERT_OK(err, "bpf_map__update_elem"))
+ goto out;
+
+ fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount);
+ err = bpf_prog_test_run_opts(fd, &opts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_EQ(opts.retval, 1, "opts.retval");
+
+out:
+ refcounted_kptr__destroy(skel);
+ free(values);
+}
+
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
index 893a4fdb4b6e9..1aca85d86aebc 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
@@ -568,4 +568,64 @@ int BPF_PROG(rbtree_sleepable_rcu_no_explicit_rcu_lock,
return 0;
}
+private(kptr_ref) u64 ref;
+
+static int probe_read_refcount(void)
+{
+ u32 refcount;
+
+ bpf_probe_read_kernel(&refcount, sizeof(refcount), (void *) ref);
+ return refcount;
+}
+
+static int __insert_in_list(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct node_data __kptr **node)
+{
+ struct node_data *node_new, *node_ref, *node_old;
+
+ node_new = bpf_obj_new(typeof(*node_new));
+ if (!node_new)
+ return -1;
+
+ node_ref = bpf_refcount_acquire(node_new);
+ node_old = bpf_kptr_xchg(node, node_new);
+ if (node_old) {
+ bpf_obj_drop(node_old);
+ bpf_obj_drop(node_ref);
+ return -2;
+ }
+
+ bpf_spin_lock(lock);
+ bpf_list_push_front(head, &node_ref->l);
+ ref = (u64)(void *) &node_ref->ref;
+ bpf_spin_unlock(lock);
+ return probe_read_refcount();
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} percpu_hash SEC(".maps");
+
+SEC("tc")
+int percpu_hash_refcount_leak(void *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&percpu_hash, &key);
+ if (!v)
+ return 0;
+
+ return __insert_in_list(&head, &lock, &v->node);
+}
+
+SEC("tc")
+int check_percpu_hash_refcount(void *ctx)
+{
+ return probe_read_refcount();
+}
+
char _license[] SEC("license") = "GPL";
--
2.51.2
On 11/5/25 7:14 AM, Leon Hwang wrote:
> Add test to verify that updating [lru_,]percpu_hash maps decrements
> refcount when BPF_KPTR_REF objects are involved.
>
> The tests perform the following steps:
>
> 1. Call update_elem() to insert an initial value.
> 2. Use bpf_refcount_acquire() to increment the refcount.
> 3. Store the node pointer in the map value.
> 4. Add the node to a linked list.
> 5. Probe-read the refcount and verify it is *2*.
> 6. Call update_elem() again to trigger refcount decrement.
> 7. Probe-read the refcount and verify it is *1*.
>
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
LGTM with a few nits below.
Acked-by: Yonghong Song <yonghong.song@linux.dev>
> ---
> .../bpf/prog_tests/refcounted_kptr.c | 57 ++++++++++++++++++
> .../selftests/bpf/progs/refcounted_kptr.c | 60 +++++++++++++++++++
> 2 files changed, 117 insertions(+)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
> index d6bd5e16e6372..086f679fa3f61 100644
> --- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
> +++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
> @@ -44,3 +44,60 @@ void test_refcounted_kptr_wrong_owner(void)
> ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval");
> refcounted_kptr__destroy(skel);
> }
> +
> +void test_percpu_hash_refcounted_kptr_refcount_leak(void)
> +{
> + struct refcounted_kptr *skel;
> + int cpu_nr, fd, err, key = 0;
> + struct bpf_map *map;
> + size_t values_sz;
> + u64 *values;
> + LIBBPF_OPTS(bpf_test_run_opts, opts,
> + .data_in = &pkt_v4,
> + .data_size_in = sizeof(pkt_v4),
> + .repeat = 1,
> + );
> +
> + cpu_nr = libbpf_num_possible_cpus();
> + if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus"))
> + return;
> +
> + values = calloc(cpu_nr, sizeof(u64));
> + if (!ASSERT_OK_PTR(values, "calloc values"))
> + return;
> +
> + skel = refcounted_kptr__open_and_load();
> + if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) {
> + free(values);
> + return;
> + }
> +
> + values_sz = cpu_nr * sizeof(u64);
> + memset(values, 0, values_sz);
> +
> + map = skel->maps.percpu_hash;
> + err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
> + if (!ASSERT_OK(err, "bpf_map__update_elem"))
> + goto out;
> +
> + fd = bpf_program__fd(skel->progs.percpu_hash_refcount_leak);
> + err = bpf_prog_test_run_opts(fd, &opts);
> + if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
> + goto out;
> + if (!ASSERT_EQ(opts.retval, 2, "opts.retval"))
> + goto out;
> +
> + err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
> + if (!ASSERT_OK(err, "bpf_map__update_elem"))
> + goto out;
> +
> + fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount);
> + err = bpf_prog_test_run_opts(fd, &opts);
> + ASSERT_OK(err, "bpf_prog_test_run_opts");
> + ASSERT_EQ(opts.retval, 1, "opts.retval");
> +
> +out:
> + refcounted_kptr__destroy(skel);
> + free(values);
> +}
> +
Empty line here.
> diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
> index 893a4fdb4b6e9..1aca85d86aebc 100644
> --- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c
> +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
> @@ -568,4 +568,64 @@ int BPF_PROG(rbtree_sleepable_rcu_no_explicit_rcu_lock,
> return 0;
> }
>
> +private(kptr_ref) u64 ref;
> +
> +static int probe_read_refcount(void)
> +{
> + u32 refcount;
> +
> + bpf_probe_read_kernel(&refcount, sizeof(refcount), (void *) ref);
> + return refcount;
> +}
> +
> +static int __insert_in_list(struct bpf_list_head *head, struct bpf_spin_lock *lock,
> + struct node_data __kptr **node)
> +{
> + struct node_data *node_new, *node_ref, *node_old;
> +
> + node_new = bpf_obj_new(typeof(*node_new));
> + if (!node_new)
> + return -1;
> +
> + node_ref = bpf_refcount_acquire(node_new);
> + node_old = bpf_kptr_xchg(node, node_new);
Change the above to node_old = bpf_kptr_xchg(node, node_node_ref); might
be better for reasoning although node_ref/node_new are the same.
> + if (node_old) {
> + bpf_obj_drop(node_old);
> + bpf_obj_drop(node_ref);
> + return -2;
> + }
> +
> + bpf_spin_lock(lock);
> + bpf_list_push_front(head, &node_ref->l);
> + ref = (u64)(void *) &node_ref->ref;
> + bpf_spin_unlock(lock);
> + return probe_read_refcount();
> +}
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
> + __type(key, int);
> + __type(value, struct map_value);
> + __uint(max_entries, 1);
> +} percpu_hash SEC(".maps");
> +
> +SEC("tc")
> +int percpu_hash_refcount_leak(void *ctx)
> +{
> + struct map_value *v;
> + int key = 0;
> +
> + v = bpf_map_lookup_elem(&percpu_hash, &key);
> + if (!v)
> + return 0;
> +
> + return __insert_in_list(&head, &lock, &v->node);
> +}
> +
> +SEC("tc")
> +int check_percpu_hash_refcount(void *ctx)
> +{
> + return probe_read_refcount();
> +}
> +
> char _license[] SEC("license") = "GPL";
On 2025/11/7 10:00, Yonghong Song wrote:
>
>
> On 11/5/25 7:14 AM, Leon Hwang wrote:
>> Add test to verify that updating [lru_,]percpu_hash maps decrements
>> refcount when BPF_KPTR_REF objects are involved.
>>
>> The tests perform the following steps:
>>
>> 1. Call update_elem() to insert an initial value.
>> 2. Use bpf_refcount_acquire() to increment the refcount.
>> 3. Store the node pointer in the map value.
>> 4. Add the node to a linked list.
>> 5. Probe-read the refcount and verify it is *2*.
>> 6. Call update_elem() again to trigger refcount decrement.
>> 7. Probe-read the refcount and verify it is *1*.
>>
>> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
>
> LGTM with a few nits below.
>
> Acked-by: Yonghong Song <yonghong.song@linux.dev>
>
Hi Yonghong,
Thanks for your review and ack.
>> ---
>> .../bpf/prog_tests/refcounted_kptr.c | 57 ++++++++++++++++++
>> .../selftests/bpf/progs/refcounted_kptr.c | 60 +++++++++++++++++++
>> 2 files changed, 117 insertions(+)
>>
>> diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
>> b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
>> index d6bd5e16e6372..086f679fa3f61 100644
>> --- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
>> +++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
>> @@ -44,3 +44,60 @@ void test_refcounted_kptr_wrong_owner(void)
>> ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval");
>> refcounted_kptr__destroy(skel);
>> }
>> +
>> +void test_percpu_hash_refcounted_kptr_refcount_leak(void)
>> +{
>> + struct refcounted_kptr *skel;
>> + int cpu_nr, fd, err, key = 0;
>> + struct bpf_map *map;
>> + size_t values_sz;
>> + u64 *values;
>> + LIBBPF_OPTS(bpf_test_run_opts, opts,
>> + .data_in = &pkt_v4,
>> + .data_size_in = sizeof(pkt_v4),
>> + .repeat = 1,
>> + );
>> +
>> + cpu_nr = libbpf_num_possible_cpus();
>> + if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus"))
>> + return;
>> +
>> + values = calloc(cpu_nr, sizeof(u64));
>> + if (!ASSERT_OK_PTR(values, "calloc values"))
>> + return;
>> +
>> + skel = refcounted_kptr__open_and_load();
>> + if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) {
>> + free(values);
>> + return;
>> + }
>> +
>> + values_sz = cpu_nr * sizeof(u64);
>> + memset(values, 0, values_sz);
>> +
>> + map = skel->maps.percpu_hash;
>> + err = bpf_map__update_elem(map, &key, sizeof(key), values,
>> values_sz, 0);
>> + if (!ASSERT_OK(err, "bpf_map__update_elem"))
>> + goto out;
>> +
>> + fd = bpf_program__fd(skel->progs.percpu_hash_refcount_leak);
>> + err = bpf_prog_test_run_opts(fd, &opts);
>> + if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
>> + goto out;
>> + if (!ASSERT_EQ(opts.retval, 2, "opts.retval"))
>> + goto out;
>> +
>> + err = bpf_map__update_elem(map, &key, sizeof(key), values,
>> values_sz, 0);
>> + if (!ASSERT_OK(err, "bpf_map__update_elem"))
>> + goto out;
>> +
>> + fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount);
>> + err = bpf_prog_test_run_opts(fd, &opts);
>> + ASSERT_OK(err, "bpf_prog_test_run_opts");
>> + ASSERT_EQ(opts.retval, 1, "opts.retval");
>> +
>> +out:
>> + refcounted_kptr__destroy(skel);
>> + free(values);
>> +}
>> +
>
> Empty line here.
>
>> diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/
>> tools/testing/selftests/bpf/progs/refcounted_kptr.c
>> index 893a4fdb4b6e9..1aca85d86aebc 100644
>> --- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c
>> +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
>> @@ -568,4 +568,64 @@ int
>> BPF_PROG(rbtree_sleepable_rcu_no_explicit_rcu_lock,
>> return 0;
>> }
>> +private(kptr_ref) u64 ref;
>> +
>> +static int probe_read_refcount(void)
>> +{
>> + u32 refcount;
>> +
>> + bpf_probe_read_kernel(&refcount, sizeof(refcount), (void *) ref);
>> + return refcount;
>> +}
>> +
>> +static int __insert_in_list(struct bpf_list_head *head, struct
>> bpf_spin_lock *lock,
>> + struct node_data __kptr **node)
>> +{
>> + struct node_data *node_new, *node_ref, *node_old;
>> +
>> + node_new = bpf_obj_new(typeof(*node_new));
>> + if (!node_new)
>> + return -1;
>> +
>> + node_ref = bpf_refcount_acquire(node_new);
>> + node_old = bpf_kptr_xchg(node, node_new);
>
> Change the above to node_old = bpf_kptr_xchg(node, node_node_ref); might
> be better for reasoning although node_ref/node_new are the same.
>
Nope — node_ref and node_new are different for the verifier.
When trying node_old = bpf_kptr_xchg(node, node_ref), the verifier reported:
[verifier log snipped for brevity...]
; bpf_obj_drop(node_ref); @ refcounted_kptr.c:594
26: (bf) r1 = r6 ; R1=scalar(id=7) R6=scalar(id=7)
refs=3
27: (b7) r2 = 0 ; R2=0 refs=3
28: (85) call bpf_obj_drop_impl#54490
R1 must be referenced or trusted
processed 27 insns (limit 1000000) max_states_per_insn 0 total_states 2
peak_states 2 mark_read 0
So the verifier rejected it because R6 became scalar(id=7) from
ptr_node_data(ref_obj_id=4).
---
Hi Alexei, could you please drop the extra empty line when applying this
patch?
Then I don't need to send another revision.
Thanks,
Leon
[...]
On 2025/11/11 21:38, Leon Hwang wrote:
>
>
> On 2025/11/7 10:00, Yonghong Song wrote:
>>
>>
>> On 11/5/25 7:14 AM, Leon Hwang wrote:
>>> Add test to verify that updating [lru_,]percpu_hash maps decrements
>>> refcount when BPF_KPTR_REF objects are involved.
>>>
>>> The tests perform the following steps:
>>>
>>> 1. Call update_elem() to insert an initial value.
>>> 2. Use bpf_refcount_acquire() to increment the refcount.
>>> 3. Store the node pointer in the map value.
>>> 4. Add the node to a linked list.
>>> 5. Probe-read the refcount and verify it is *2*.
>>> 6. Call update_elem() again to trigger refcount decrement.
>>> 7. Probe-read the refcount and verify it is *1*.
>>>
>>> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
>>
>> LGTM with a few nits below.
>>
>> Acked-by: Yonghong Song <yonghong.song@linux.dev>
>>
>
> Hi Yonghong,
>
> Thanks for your review and ack.
>
>>> ---
>>> .../bpf/prog_tests/refcounted_kptr.c | 57 ++++++++++++++++++
>>> .../selftests/bpf/progs/refcounted_kptr.c | 60 +++++++++++++++++++
>>> 2 files changed, 117 insertions(+)
>>>
>>> diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
>>> b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
>>> index d6bd5e16e6372..086f679fa3f61 100644
>>> --- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
>>> +++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
>>> @@ -44,3 +44,60 @@ void test_refcounted_kptr_wrong_owner(void)
>>> ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval");
>>> refcounted_kptr__destroy(skel);
>>> }
>>> +
>>> +void test_percpu_hash_refcounted_kptr_refcount_leak(void)
>>> +{
>>> + struct refcounted_kptr *skel;
>>> + int cpu_nr, fd, err, key = 0;
>>> + struct bpf_map *map;
>>> + size_t values_sz;
>>> + u64 *values;
>>> + LIBBPF_OPTS(bpf_test_run_opts, opts,
>>> + .data_in = &pkt_v4,
>>> + .data_size_in = sizeof(pkt_v4),
>>> + .repeat = 1,
>>> + );
>>> +
>>> + cpu_nr = libbpf_num_possible_cpus();
>>> + if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus"))
>>> + return;
>>> +
>>> + values = calloc(cpu_nr, sizeof(u64));
>>> + if (!ASSERT_OK_PTR(values, "calloc values"))
>>> + return;
>>> +
>>> + skel = refcounted_kptr__open_and_load();
>>> + if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) {
>>> + free(values);
>>> + return;
>>> + }
>>> +
>>> + values_sz = cpu_nr * sizeof(u64);
>>> + memset(values, 0, values_sz);
>>> +
>>> + map = skel->maps.percpu_hash;
>>> + err = bpf_map__update_elem(map, &key, sizeof(key), values,
>>> values_sz, 0);
>>> + if (!ASSERT_OK(err, "bpf_map__update_elem"))
>>> + goto out;
>>> +
>>> + fd = bpf_program__fd(skel->progs.percpu_hash_refcount_leak);
>>> + err = bpf_prog_test_run_opts(fd, &opts);
>>> + if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
>>> + goto out;
>>> + if (!ASSERT_EQ(opts.retval, 2, "opts.retval"))
>>> + goto out;
>>> +
>>> + err = bpf_map__update_elem(map, &key, sizeof(key), values,
>>> values_sz, 0);
>>> + if (!ASSERT_OK(err, "bpf_map__update_elem"))
>>> + goto out;
>>> +
>>> + fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount);
>>> + err = bpf_prog_test_run_opts(fd, &opts);
>>> + ASSERT_OK(err, "bpf_prog_test_run_opts");
>>> + ASSERT_EQ(opts.retval, 1, "opts.retval");
>>> +
>>> +out:
>>> + refcounted_kptr__destroy(skel);
>>> + free(values);
>>> +}
>>> +
>>
>> Empty line here.
>>
>>> diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/
>>> tools/testing/selftests/bpf/progs/refcounted_kptr.c
>>> index 893a4fdb4b6e9..1aca85d86aebc 100644
>>> --- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c
>>> +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
>>> @@ -568,4 +568,64 @@ int
>>> BPF_PROG(rbtree_sleepable_rcu_no_explicit_rcu_lock,
>>> return 0;
>>> }
>>> +private(kptr_ref) u64 ref;
>>> +
>>> +static int probe_read_refcount(void)
>>> +{
>>> + u32 refcount;
>>> +
>>> + bpf_probe_read_kernel(&refcount, sizeof(refcount), (void *) ref);
>>> + return refcount;
>>> +}
>>> +
>>> +static int __insert_in_list(struct bpf_list_head *head, struct
>>> bpf_spin_lock *lock,
>>> + struct node_data __kptr **node)
>>> +{
>>> + struct node_data *node_new, *node_ref, *node_old;
>>> +
>>> + node_new = bpf_obj_new(typeof(*node_new));
>>> + if (!node_new)
>>> + return -1;
>>> +
>>> + node_ref = bpf_refcount_acquire(node_new);
>>> + node_old = bpf_kptr_xchg(node, node_new);
>>
>> Change the above to node_old = bpf_kptr_xchg(node, node_node_ref); might
>> be better for reasoning although node_ref/node_new are the same.
>>
>
> Nope — node_ref and node_new are different for the verifier.
They are the same in theory.
The verifier failure was likely caused by something else, but I'm not
sure of the exact reason.
>
> When trying node_old = bpf_kptr_xchg(node, node_ref), the verifier reported:
>
> [verifier log snipped for brevity...]
> ; bpf_obj_drop(node_ref); @ refcounted_kptr.c:594
> 26: (bf) r1 = r6 ; R1=scalar(id=7) R6=scalar(id=7)
> refs=3
> 27: (b7) r2 = 0 ; R2=0 refs=3
> 28: (85) call bpf_obj_drop_impl#54490
> R1 must be referenced or trusted
> processed 27 insns (limit 1000000) max_states_per_insn 0 total_states 2
> peak_states 2 mark_read 0
>
> So the verifier rejected it because R6 became scalar(id=7) from
> ptr_node_data(ref_obj_id=4).
>
> ---
>
> Hi Alexei, could you please drop the extra empty line when applying this
> patch?
>
> Then I don't need to send another revision.
>
> Thanks,
> Leon
>
> [...]
>
On 11/11/25 5:52 AM, Leon Hwang wrote:
>
> On 2025/11/11 21:38, Leon Hwang wrote:
>>
>> On 2025/11/7 10:00, Yonghong Song wrote:
>>>
>>> On 11/5/25 7:14 AM, Leon Hwang wrote:
>>>> Add test to verify that updating [lru_,]percpu_hash maps decrements
>>>> refcount when BPF_KPTR_REF objects are involved.
>>>>
>>>> The tests perform the following steps:
>>>>
>>>> 1. Call update_elem() to insert an initial value.
>>>> 2. Use bpf_refcount_acquire() to increment the refcount.
>>>> 3. Store the node pointer in the map value.
>>>> 4. Add the node to a linked list.
>>>> 5. Probe-read the refcount and verify it is *2*.
>>>> 6. Call update_elem() again to trigger refcount decrement.
>>>> 7. Probe-read the refcount and verify it is *1*.
>>>>
>>>> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
>>> LGTM with a few nits below.
>>>
>>> Acked-by: Yonghong Song <yonghong.song@linux.dev>
>>>
>> Hi Yonghong,
>>
>> Thanks for your review and ack.
>>
>>>> ---
>>>> .../bpf/prog_tests/refcounted_kptr.c | 57 ++++++++++++++++++
>>>> .../selftests/bpf/progs/refcounted_kptr.c | 60 +++++++++++++++++++
>>>> 2 files changed, 117 insertions(+)
>>>>
>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
>>>> b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
>>>> index d6bd5e16e6372..086f679fa3f61 100644
>>>> --- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
>>>> +++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
>>>> @@ -44,3 +44,60 @@ void test_refcounted_kptr_wrong_owner(void)
>>>> ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval");
>>>> refcounted_kptr__destroy(skel);
>>>> }
>>>> +
>>>> +void test_percpu_hash_refcounted_kptr_refcount_leak(void)
>>>> +{
>>>> + struct refcounted_kptr *skel;
>>>> + int cpu_nr, fd, err, key = 0;
>>>> + struct bpf_map *map;
>>>> + size_t values_sz;
>>>> + u64 *values;
>>>> + LIBBPF_OPTS(bpf_test_run_opts, opts,
>>>> + .data_in = &pkt_v4,
>>>> + .data_size_in = sizeof(pkt_v4),
>>>> + .repeat = 1,
>>>> + );
>>>> +
>>>> + cpu_nr = libbpf_num_possible_cpus();
>>>> + if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus"))
>>>> + return;
>>>> +
>>>> + values = calloc(cpu_nr, sizeof(u64));
>>>> + if (!ASSERT_OK_PTR(values, "calloc values"))
>>>> + return;
>>>> +
>>>> + skel = refcounted_kptr__open_and_load();
>>>> + if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) {
>>>> + free(values);
>>>> + return;
>>>> + }
>>>> +
>>>> + values_sz = cpu_nr * sizeof(u64);
>>>> + memset(values, 0, values_sz);
>>>> +
>>>> + map = skel->maps.percpu_hash;
>>>> + err = bpf_map__update_elem(map, &key, sizeof(key), values,
>>>> values_sz, 0);
>>>> + if (!ASSERT_OK(err, "bpf_map__update_elem"))
>>>> + goto out;
>>>> +
>>>> + fd = bpf_program__fd(skel->progs.percpu_hash_refcount_leak);
>>>> + err = bpf_prog_test_run_opts(fd, &opts);
>>>> + if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
>>>> + goto out;
>>>> + if (!ASSERT_EQ(opts.retval, 2, "opts.retval"))
>>>> + goto out;
>>>> +
>>>> + err = bpf_map__update_elem(map, &key, sizeof(key), values,
>>>> values_sz, 0);
>>>> + if (!ASSERT_OK(err, "bpf_map__update_elem"))
>>>> + goto out;
>>>> +
>>>> + fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount);
>>>> + err = bpf_prog_test_run_opts(fd, &opts);
>>>> + ASSERT_OK(err, "bpf_prog_test_run_opts");
>>>> + ASSERT_EQ(opts.retval, 1, "opts.retval");
>>>> +
>>>> +out:
>>>> + refcounted_kptr__destroy(skel);
>>>> + free(values);
>>>> +}
>>>> +
>>> Empty line here.
>>>
>>>> diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/
>>>> tools/testing/selftests/bpf/progs/refcounted_kptr.c
>>>> index 893a4fdb4b6e9..1aca85d86aebc 100644
>>>> --- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c
>>>> +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
>>>> @@ -568,4 +568,64 @@ int
>>>> BPF_PROG(rbtree_sleepable_rcu_no_explicit_rcu_lock,
>>>> return 0;
>>>> }
>>>> +private(kptr_ref) u64 ref;
>>>> +
>>>> +static int probe_read_refcount(void)
>>>> +{
>>>> + u32 refcount;
>>>> +
>>>> + bpf_probe_read_kernel(&refcount, sizeof(refcount), (void *) ref);
>>>> + return refcount;
>>>> +}
>>>> +
>>>> +static int __insert_in_list(struct bpf_list_head *head, struct
>>>> bpf_spin_lock *lock,
>>>> + struct node_data __kptr **node)
>>>> +{
>>>> + struct node_data *node_new, *node_ref, *node_old;
>>>> +
>>>> + node_new = bpf_obj_new(typeof(*node_new));
>>>> + if (!node_new)
>>>> + return -1;
>>>> +
>>>> + node_ref = bpf_refcount_acquire(node_new);
>>>> + node_old = bpf_kptr_xchg(node, node_new);
>>> Change the above to node_old = bpf_kptr_xchg(node, node_node_ref); might
>>> be better for reasoning although node_ref/node_new are the same.
>>>
>> Nope — node_ref and node_new are different for the verifier.
> They are the same in theory.
>
> The verifier failure was likely caused by something else, but I'm not
> sure of the exact reason.
I did some analysis and your code works as expected:
node_ref = bpf_refcount_acquire(node_new);
node_old = bpf_kptr_xchg(node, node_new);
if (node_old) {
bpf_obj_drop(node_old);
bpf_obj_drop(node_ref);
return -2;
}
bpf_spin_lock(lock);
bpf_list_push_front(head, &node_ref->l);
ref = (u64)(void *) &node_ref->ref;
bpf_spin_unlock(lock);
In the above, after the following insn:
node_old = bpf_kptr_xchg(node, node_new);
the second argument 'node_new' will become a scalar since it
may be changed by another bpf program accessing the same map.
So your code is okay as node_ref still valid ptr_node_data
and can be used in following codes.
My suggestion to replace
node_old = bpf_kptr_xchg(node, node_new);
with
node_old = bpf_kptr_xchg(node, node_ref);
will not work since node_ref will be a scalar
so subsequent bpf_obj_drop(node_ref) and bpf_list_push_front(...)
won't work.
In summary, your change look okay to me. Sorry for noise.
>
>> When trying node_old = bpf_kptr_xchg(node, node_ref), the verifier reported:
>>
>> [verifier log snipped for brevity...]
>> ; bpf_obj_drop(node_ref); @ refcounted_kptr.c:594
>> 26: (bf) r1 = r6 ; R1=scalar(id=7) R6=scalar(id=7)
>> refs=3
>> 27: (b7) r2 = 0 ; R2=0 refs=3
>> 28: (85) call bpf_obj_drop_impl#54490
>> R1 must be referenced or trusted
>> processed 27 insns (limit 1000000) max_states_per_insn 0 total_states 2
>> peak_states 2 mark_read 0
>>
>> So the verifier rejected it because R6 became scalar(id=7) from
>> ptr_node_data(ref_obj_id=4).
>>
>> ---
>>
>> Hi Alexei, could you please drop the extra empty line when applying this
>> patch?
>>
>> Then I don't need to send another revision.
>>
>> Thanks,
>> Leon
>>
>> [...]
>>
On 2025/11/12 05:58, Yonghong Song wrote:
>
>
> On 11/11/25 5:52 AM, Leon Hwang wrote:
>>
>> On 2025/11/11 21:38, Leon Hwang wrote:
>>>
>>> On 2025/11/7 10:00, Yonghong Song wrote:
>>>>
>>>> On 11/5/25 7:14 AM, Leon Hwang wrote:
[...]
>>>>> +
>>>>> +static int __insert_in_list(struct bpf_list_head *head, struct
>>>>> bpf_spin_lock *lock,
>>>>> + struct node_data __kptr **node)
>>>>> +{
>>>>> + struct node_data *node_new, *node_ref, *node_old;
>>>>> +
>>>>> + node_new = bpf_obj_new(typeof(*node_new));
>>>>> + if (!node_new)
>>>>> + return -1;
>>>>> +
>>>>> + node_ref = bpf_refcount_acquire(node_new);
>>>>> + node_old = bpf_kptr_xchg(node, node_new);
>>>> Change the above to node_old = bpf_kptr_xchg(node, node_node_ref);
>>>> might
>>>> be better for reasoning although node_ref/node_new are the same.
>>>>
>>> Nope — node_ref and node_new are different for the verifier.
>> They are the same in theory.
>>
>> The verifier failure was likely caused by something else, but I'm not
>> sure of the exact reason.
>
> I did some analysis and your code works as expected:
>
> node_ref = bpf_refcount_acquire(node_new);
> node_old = bpf_kptr_xchg(node, node_new);
> if (node_old) {
> bpf_obj_drop(node_old);
> bpf_obj_drop(node_ref);
> return -2;
> }
>
> bpf_spin_lock(lock);
> bpf_list_push_front(head, &node_ref->l);
> ref = (u64)(void *) &node_ref->ref;
> bpf_spin_unlock(lock);
>
> In the above, after the following insn:
> node_old = bpf_kptr_xchg(node, node_new);
> the second argument 'node_new' will become a scalar since it
> may be changed by another bpf program accessing the same map.
>
> So your code is okay as node_ref still valid ptr_node_data
> and can be used in following codes.
>
>
> My suggestion to replace
> node_old = bpf_kptr_xchg(node, node_new);
> with
> node_old = bpf_kptr_xchg(node, node_ref);
> will not work since node_ref will be a scalar
> so subsequent bpf_obj_drop(node_ref) and bpf_list_push_front(...)
> won't work.
>
> In summary, your change look okay to me. Sorry for noise.
>
Hi Yonghong,
Thanks for your detailed analysis.
Indeed, in the case of
node_old = bpf_kptr_xchg(node, node_ref);,
the verifier marks node_ref as SCALAR_VALUE.
Here's the relevant part in check_helper_call():
static int check_helper_call(struct bpf_verifier_env *env, struct
bpf_insn *insn,
int *insn_idx_p)
{
//...
if (meta.release_regno) {
err = -EINVAL;
if (arg_type_is_dynptr(fn->arg_type[meta.release_regno -
BPF_REG_1])) {
err = unmark_stack_slots_dynptr(env,
®s[meta.release_regno]);
} else if (func_id == BPF_FUNC_kptr_xchg &&
meta.ref_obj_id) {
u32 ref_obj_id = meta.ref_obj_id;
bool in_rcu = in_rcu_cs(env);
struct bpf_func_state *state;
struct bpf_reg_state *reg;
err = release_reference_nomark(env->cur_state,
ref_obj_id);
if (!err) {
bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
if (reg->ref_obj_id == ref_obj_id) {
if (in_rcu && (reg->type
& MEM_ALLOC) && (reg->type & MEM_PERCPU)) {
reg->ref_obj_id = 0;
reg->type &=
~MEM_ALLOC;
reg->type |=
MEM_RCU;
} else {
mark_reg_invalid(env, reg);
}
}
}));
}
} else if (meta.ref_obj_id) {
//...
}
//...
}
This logic matches your explanation — when the argument passed to
bpf_kptr_xchg() holds a reference (meta.ref_obj_id), that reference is
not a KPTR_PERCPU inside an RCU critical section.
As a result, node_ref is invalidated and becomes a scalar after the call.
So yes, your reasoning is correct, and this behavior explains why using
node_ref as the second argument doesn't work.
Thanks,
Leon
[...]
© 2016 - 2025 Red Hat, Inc.