bpf: add bpf_map_lookup_percpu_elem for percpu map

[PATCH bpf-next] bpf: add bpf_map_lookup_percpu_elem for percpu map

Posted by Feng zhou 4 years ago

From: Feng Zhou <zhoufeng.zf@bytedance.com>

Trace some functions, such as enqueue_task_fair, need to access the
corresponding cpu, not the current cpu, and bpf_map_lookup_elem percpu map
cannot do it. So add bpf_map_lookup_percpu_elem to accomplish it for
percpu_array_map, percpu_hash_map, lru_percpu_hash_map.

The implementation method is relatively simple, refer to the implementation
method of map_lookup_elem of percpu map, increase the parameters of cpu, and
obtain it according to the specified cpu.

Signed-off-by: Feng Zhou <zhoufeng.zf@bytedance.com>
---
 include/linux/bpf.h            |  2 ++
 include/uapi/linux/bpf.h       |  9 +++++++++
 kernel/bpf/arraymap.c          | 15 +++++++++++++++
 kernel/bpf/core.c              |  1 +
 kernel/bpf/hashtab.c           | 32 ++++++++++++++++++++++++++++++++
 kernel/bpf/helpers.c           | 18 ++++++++++++++++++
 kernel/bpf/verifier.c          | 17 +++++++++++++++--
 kernel/trace/bpf_trace.c       |  2 ++
 tools/include/uapi/linux/bpf.h |  9 +++++++++
 9 files changed, 103 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index be94833d390a..6b5cf5a90d73 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -89,6 +89,7 @@ struct bpf_map_ops {
 	int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
 	int (*map_pop_elem)(struct bpf_map *map, void *value);
 	int (*map_peek_elem)(struct bpf_map *map, void *value);
+	void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu);
 
 	/* funcs called by prog_array and perf_event_array map */
 	void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -2161,6 +2162,7 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto;
 extern const struct bpf_func_proto bpf_map_push_elem_proto;
 extern const struct bpf_func_proto bpf_map_pop_elem_proto;
 extern const struct bpf_func_proto bpf_map_peek_elem_proto;
+extern const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 444fe6f1cf35..024fb9f319a8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5154,6 +5154,14 @@ union bpf_attr {
  *		if not NULL, is a reference which must be released using its
  *		corresponding release function, or moved into a BPF map before
  *		program exit.
+ *
+ * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu)
+ * 	Description
+ * 		Perform a lookup in *percpu map* for an entry associated to
+ * 		*key* on *cpu*.
+ * 	Return
+ * 		Map value associated to *key* on *cpu*, or **NULL** if no entry
+ * 		was found or *cpu* is invalid.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5351,6 +5359,7 @@ union bpf_attr {
 	FN(skb_set_tstamp),		\
 	FN(ima_file_hash),		\
 	FN(kptr_xchg),			\
+	FN(map_lookup_percpu_elem),     \
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index b3bf31fd9458..71d9db976ab0 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -243,6 +243,20 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
 	return this_cpu_ptr(array->pptrs[index & array->index_mask]);
 }
 
+static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	u32 index = *(u32 *)key;
+
+	if (cpu >= nr_cpu_ids)
+		return NULL;
+
+	if (unlikely(index >= array->map.max_entries))
+		return NULL;
+
+	return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu);
+}
+
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
@@ -725,6 +739,7 @@ const struct bpf_map_ops percpu_array_map_ops = {
 	.map_lookup_elem = percpu_array_map_lookup_elem,
 	.map_update_elem = array_map_update_elem,
 	.map_delete_elem = array_map_delete_elem,
+	.map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
 	.map_seq_show_elem = percpu_array_map_seq_show_elem,
 	.map_check_btf = array_map_check_btf,
 	.map_lookup_batch = generic_map_lookup_batch,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 13e9dbeeedf3..76f68d0a7ae8 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2619,6 +2619,7 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
 const struct bpf_func_proto bpf_map_push_elem_proto __weak;
 const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
 const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
+const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto __weak;
 const struct bpf_func_proto bpf_spin_lock_proto __weak;
 const struct bpf_func_proto bpf_spin_unlock_proto __weak;
 const struct bpf_func_proto bpf_jiffies64_proto __weak;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3e00e62b2218..9c45b07dd5b6 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -2191,6 +2191,20 @@ static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
 		return NULL;
 }
 
+static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
+{
+	struct htab_elem *l;
+
+	if (cpu >= nr_cpu_ids)
+		return NULL;
+
+	l = __htab_map_lookup_elem(map, key);
+	if (l)
+		return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu);
+	else
+		return NULL;
+}
+
 static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
 {
 	struct htab_elem *l = __htab_map_lookup_elem(map, key);
@@ -2203,6 +2217,22 @@ static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
 	return NULL;
 }
 
+static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
+{
+	struct htab_elem *l;
+
+	if (cpu >= nr_cpu_ids)
+		return NULL;
+
+	l = __htab_map_lookup_elem(map, key);
+	if (l) {
+		bpf_lru_node_set_ref(&l->lru_node);
+		return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu);
+	}
+
+	return NULL;
+}
+
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
 {
 	struct htab_elem *l;
@@ -2292,6 +2322,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
 	.map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
 	.map_update_elem = htab_percpu_map_update_elem,
 	.map_delete_elem = htab_map_delete_elem,
+	.map_lookup_percpu_elem = htab_percpu_map_lookup_percpu_elem,
 	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
 	.map_set_for_each_callback_args = map_set_for_each_callback_args,
 	.map_for_each_callback = bpf_for_each_hash_elem,
@@ -2310,6 +2341,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
 	.map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
 	.map_update_elem = htab_lru_percpu_map_update_elem,
 	.map_delete_elem = htab_lru_map_delete_elem,
+	.map_lookup_percpu_elem = htab_lru_percpu_map_lookup_percpu_elem,
 	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
 	.map_set_for_each_callback_args = map_set_for_each_callback_args,
 	.map_for_each_callback = bpf_for_each_hash_elem,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 3e709fed5306..d5f104a39092 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -119,6 +119,22 @@ const struct bpf_func_proto bpf_map_peek_elem_proto = {
 	.arg2_type	= ARG_PTR_TO_UNINIT_MAP_VALUE,
 };
 
+BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+	return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu);
+}
+
+const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = {
+	.func		= bpf_map_lookup_percpu_elem,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_MAP_KEY,
+	.arg3_type	= ARG_ANYTHING,
+};
+
 const struct bpf_func_proto bpf_get_prandom_u32_proto = {
 	.func		= bpf_user_rnd_u32,
 	.gpl_only	= false,
@@ -1420,6 +1436,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_map_pop_elem_proto;
 	case BPF_FUNC_map_peek_elem:
 		return &bpf_map_peek_elem_proto;
+	case BPF_FUNC_map_lookup_percpu_elem:
+		return &bpf_map_lookup_percpu_elem_proto;
 	case BPF_FUNC_get_prandom_u32:
 		return &bpf_get_prandom_u32_proto;
 	case BPF_FUNC_get_smp_processor_id:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 813f6ee80419..67ac0b047caf 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6138,6 +6138,12 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
 			goto error;
 		break;
+	case BPF_FUNC_map_lookup_percpu_elem:
+		if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
+		    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
+		    map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
+			goto error;
+		break;
 	case BPF_FUNC_sk_storage_get:
 	case BPF_FUNC_sk_storage_delete:
 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
@@ -6751,7 +6757,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 	    func_id != BPF_FUNC_map_pop_elem &&
 	    func_id != BPF_FUNC_map_peek_elem &&
 	    func_id != BPF_FUNC_for_each_map_elem &&
-	    func_id != BPF_FUNC_redirect_map)
+	    func_id != BPF_FUNC_redirect_map &&
+	    func_id != BPF_FUNC_map_lookup_percpu_elem)
 		return 0;
 
 	if (map == NULL) {
@@ -13811,7 +13818,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 		     insn->imm == BPF_FUNC_map_pop_elem    ||
 		     insn->imm == BPF_FUNC_map_peek_elem   ||
 		     insn->imm == BPF_FUNC_redirect_map    ||
-		     insn->imm == BPF_FUNC_for_each_map_elem)) {
+		     insn->imm == BPF_FUNC_for_each_map_elem ||
+		     insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
 			aux = &env->insn_aux_data[i + delta];
 			if (bpf_map_ptr_poisoned(aux))
 				goto patch_call_imm;
@@ -13860,6 +13868,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 					      bpf_callback_t callback_fn,
 					      void *callback_ctx,
 					      u64 flags))NULL));
+			BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
+				     (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
 
 patch_map_ops_generic:
 			switch (insn->imm) {
@@ -13887,6 +13897,9 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 			case BPF_FUNC_for_each_map_elem:
 				insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
 				continue;
+			case BPF_FUNC_map_lookup_percpu_elem:
+				insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
+				continue;
 			}
 
 			goto patch_call_imm;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f15b826f9899..af4125407c20 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1182,6 +1182,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_map_pop_elem_proto;
 	case BPF_FUNC_map_peek_elem:
 		return &bpf_map_peek_elem_proto;
+	case BPF_FUNC_map_lookup_percpu_elem:
+		return &bpf_map_lookup_percpu_elem_proto;
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_ktime_get_boot_ns:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 444fe6f1cf35..024fb9f319a8 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5154,6 +5154,14 @@ union bpf_attr {
  *		if not NULL, is a reference which must be released using its
  *		corresponding release function, or moved into a BPF map before
  *		program exit.
+ *
+ * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu)
+ * 	Description
+ * 		Perform a lookup in *percpu map* for an entry associated to
+ * 		*key* on *cpu*.
+ * 	Return
+ * 		Map value associated to *key* on *cpu*, or **NULL** if no entry
+ * 		was found or *cpu* is invalid.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5351,6 +5359,7 @@ union bpf_attr {
 	FN(skb_set_tstamp),		\
 	FN(ima_file_hash),		\
 	FN(kptr_xchg),			\
+	FN(map_lookup_percpu_elem),     \
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.20.1

Re: [PATCH bpf-next] bpf: add bpf_map_lookup_percpu_elem for percpu map

Posted by Andrii Nakryiko 3 years, 12 months ago

On Fri, May 6, 2022 at 7:49 PM Feng zhou <zhoufeng.zf@bytedance.com> wrote:
>
> From: Feng Zhou <zhoufeng.zf@bytedance.com>
>
> Trace some functions, such as enqueue_task_fair, need to access the
> corresponding cpu, not the current cpu, and bpf_map_lookup_elem percpu map
> cannot do it. So add bpf_map_lookup_percpu_elem to accomplish it for
> percpu_array_map, percpu_hash_map, lru_percpu_hash_map.
>
> The implementation method is relatively simple, refer to the implementation
> method of map_lookup_elem of percpu map, increase the parameters of cpu, and
> obtain it according to the specified cpu.
>

I don't think it's safe in general to access per-cpu data from another
CPU. I'd suggest just having either a ARRAY_OF_MAPS or adding CPU ID
as part of the key, if you need such a custom access pattern.

> Signed-off-by: Feng Zhou <zhoufeng.zf@bytedance.com>
> ---
>  include/linux/bpf.h            |  2 ++
>  include/uapi/linux/bpf.h       |  9 +++++++++
>  kernel/bpf/arraymap.c          | 15 +++++++++++++++
>  kernel/bpf/core.c              |  1 +
>  kernel/bpf/hashtab.c           | 32 ++++++++++++++++++++++++++++++++
>  kernel/bpf/helpers.c           | 18 ++++++++++++++++++
>  kernel/bpf/verifier.c          | 17 +++++++++++++++--
>  kernel/trace/bpf_trace.c       |  2 ++
>  tools/include/uapi/linux/bpf.h |  9 +++++++++
>  9 files changed, 103 insertions(+), 2 deletions(-)
>

[...]

Re: [PATCH bpf-next] bpf: add bpf_map_lookup_percpu_elem for percpu map

Posted by Yosry Ahmed 3 years, 12 months ago

On Mon, May 9, 2022 at 5:34 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Fri, May 6, 2022 at 7:49 PM Feng zhou <zhoufeng.zf@bytedance.com> wrote:
> >
> > From: Feng Zhou <zhoufeng.zf@bytedance.com>
> >
> > Trace some functions, such as enqueue_task_fair, need to access the
> > corresponding cpu, not the current cpu, and bpf_map_lookup_elem percpu map
> > cannot do it. So add bpf_map_lookup_percpu_elem to accomplish it for
> > percpu_array_map, percpu_hash_map, lru_percpu_hash_map.
> >
> > The implementation method is relatively simple, refer to the implementation
> > method of map_lookup_elem of percpu map, increase the parameters of cpu, and
> > obtain it according to the specified cpu.
> >
>
> I don't think it's safe in general to access per-cpu data from another
> CPU. I'd suggest just having either a ARRAY_OF_MAPS or adding CPU ID
> as part of the key, if you need such a custom access pattern.

I actually just sent an RFC patch series containing a similar patch
for the exact same purpose. There are instances in the kernel where
per-cpu data is accessed from other cpus (e.g.
mem_cgroup_css_rstat_flush()). I believe, like any other variable,
percpu data can be safe or not safe to access, based on the access
pattern. It is up to the user to coordinate accesses to the variable.

For example, in my use case, one of the accessors only reads percpu
values of different cpus, so it should be safe. If a user accesses
percpu data of another cpu without guaranteeing safety, they corrupt
their own data. I understand that the main purpose of percpu data is
lockless (and therefore fast) access, but in some use cases the user
may be able to safely (and locklessly) access the data concurrently.

>
> > Signed-off-by: Feng Zhou <zhoufeng.zf@bytedance.com>
> > ---
> >  include/linux/bpf.h            |  2 ++
> >  include/uapi/linux/bpf.h       |  9 +++++++++
> >  kernel/bpf/arraymap.c          | 15 +++++++++++++++
> >  kernel/bpf/core.c              |  1 +
> >  kernel/bpf/hashtab.c           | 32 ++++++++++++++++++++++++++++++++
> >  kernel/bpf/helpers.c           | 18 ++++++++++++++++++
> >  kernel/bpf/verifier.c          | 17 +++++++++++++++--
> >  kernel/trace/bpf_trace.c       |  2 ++
> >  tools/include/uapi/linux/bpf.h |  9 +++++++++
> >  9 files changed, 103 insertions(+), 2 deletions(-)
> >
>
> [...]

Re: [External] Re: [PATCH bpf-next] bpf: add bpf_map_lookup_percpu_elem for percpu map

Posted by Feng Zhou 3 years, 12 months ago

在 2022/5/10 上午9:04, Yosry Ahmed 写道:
> On Mon, May 9, 2022 at 5:34 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
>> On Fri, May 6, 2022 at 7:49 PM Feng zhou <zhoufeng.zf@bytedance.com> wrote:
>>> From: Feng Zhou <zhoufeng.zf@bytedance.com>
>>>
>>> Trace some functions, such as enqueue_task_fair, need to access the
>>> corresponding cpu, not the current cpu, and bpf_map_lookup_elem percpu map
>>> cannot do it. So add bpf_map_lookup_percpu_elem to accomplish it for
>>> percpu_array_map, percpu_hash_map, lru_percpu_hash_map.
>>>
>>> The implementation method is relatively simple, refer to the implementation
>>> method of map_lookup_elem of percpu map, increase the parameters of cpu, and
>>> obtain it according to the specified cpu.
>>>
>> I don't think it's safe in general to access per-cpu data from another
>> CPU. I'd suggest just having either a ARRAY_OF_MAPS or adding CPU ID
>> as part of the key, if you need such a custom access pattern.
> I actually just sent an RFC patch series containing a similar patch
> for the exact same purpose. There are instances in the kernel where
> per-cpu data is accessed from other cpus (e.g.
> mem_cgroup_css_rstat_flush()). I believe, like any other variable,
> percpu data can be safe or not safe to access, based on the access
> pattern. It is up to the user to coordinate accesses to the variable.
>
> For example, in my use case, one of the accessors only reads percpu
> values of different cpus, so it should be safe. If a user accesses
> percpu data of another cpu without guaranteeing safety, they corrupt
> their own data. I understand that the main purpose of percpu data is
> lockless (and therefore fast) access, but in some use cases the user
> may be able to safely (and locklessly) access the data concurrently.
>

Regarding data security, I think users need to consider before using it, 
such
as hook enqueue_task_fair, the function itself takes the rq lock of the
corresponding cpu, there is no problem, and the kernel only provides a 
method,
like bpf_per_cpu_ptr and bpf_this_cpu_ptr, data security needs to be 
guaranteed
by users in different scenarios, such as using bpf_spin_lock.


>>> Signed-off-by: Feng Zhou <zhoufeng.zf@bytedance.com>
>>> ---
>>>   include/linux/bpf.h            |  2 ++
>>>   include/uapi/linux/bpf.h       |  9 +++++++++
>>>   kernel/bpf/arraymap.c          | 15 +++++++++++++++
>>>   kernel/bpf/core.c              |  1 +
>>>   kernel/bpf/hashtab.c           | 32 ++++++++++++++++++++++++++++++++
>>>   kernel/bpf/helpers.c           | 18 ++++++++++++++++++
>>>   kernel/bpf/verifier.c          | 17 +++++++++++++++--
>>>   kernel/trace/bpf_trace.c       |  2 ++
>>>   tools/include/uapi/linux/bpf.h |  9 +++++++++
>>>   9 files changed, 103 insertions(+), 2 deletions(-)
>>>
>> [...]

Re: [External] Re: [PATCH bpf-next] bpf: add bpf_map_lookup_percpu_elem for percpu map

Posted by Alexei Starovoitov 3 years, 12 months ago

On Mon, May 9, 2022 at 7:41 PM Feng Zhou <zhoufeng.zf@bytedance.com> wrote:
>
> 在 2022/5/10 上午9:04, Yosry Ahmed 写道:
> > On Mon, May 9, 2022 at 5:34 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> >> On Fri, May 6, 2022 at 7:49 PM Feng zhou <zhoufeng.zf@bytedance.com> wrote:
> >>> From: Feng Zhou <zhoufeng.zf@bytedance.com>
> >>>
> >>> Trace some functions, such as enqueue_task_fair, need to access the
> >>> corresponding cpu, not the current cpu, and bpf_map_lookup_elem percpu map
> >>> cannot do it. So add bpf_map_lookup_percpu_elem to accomplish it for
> >>> percpu_array_map, percpu_hash_map, lru_percpu_hash_map.
> >>>
> >>> The implementation method is relatively simple, refer to the implementation
> >>> method of map_lookup_elem of percpu map, increase the parameters of cpu, and
> >>> obtain it according to the specified cpu.
> >>>
> >> I don't think it's safe in general to access per-cpu data from another
> >> CPU. I'd suggest just having either a ARRAY_OF_MAPS or adding CPU ID
> >> as part of the key, if you need such a custom access pattern.
> > I actually just sent an RFC patch series containing a similar patch
> > for the exact same purpose. There are instances in the kernel where
> > per-cpu data is accessed from other cpus (e.g.
> > mem_cgroup_css_rstat_flush()). I believe, like any other variable,
> > percpu data can be safe or not safe to access, based on the access
> > pattern. It is up to the user to coordinate accesses to the variable.
> >
> > For example, in my use case, one of the accessors only reads percpu
> > values of different cpus, so it should be safe. If a user accesses
> > percpu data of another cpu without guaranteeing safety, they corrupt
> > their own data. I understand that the main purpose of percpu data is
> > lockless (and therefore fast) access, but in some use cases the user
> > may be able to safely (and locklessly) access the data concurrently.
> >
>
> Regarding data security, I think users need to consider before using it,
> such
> as hook enqueue_task_fair, the function itself takes the rq lock of the
> corresponding cpu, there is no problem, and the kernel only provides a
> method,
> like bpf_per_cpu_ptr and bpf_this_cpu_ptr, data security needs to be
> guaranteed
> by users in different scenarios, such as using bpf_spin_lock.

Right. The new helper looks useful and is safe.
Please add a selftest and respin.

Re: [External] Re: [PATCH bpf-next] bpf: add bpf_map_lookup_percpu_elem for percpu map

Posted by Feng Zhou 3 years, 12 months ago

在 2022/5/10 上午11:15, Alexei Starovoitov 写道:
> On Mon, May 9, 2022 at 7:41 PM Feng Zhou <zhoufeng.zf@bytedance.com> wrote:
>> 在 2022/5/10 上午9:04, Yosry Ahmed 写道:
>>> On Mon, May 9, 2022 at 5:34 PM Andrii Nakryiko
>>> <andrii.nakryiko@gmail.com> wrote:
>>>> On Fri, May 6, 2022 at 7:49 PM Feng zhou <zhoufeng.zf@bytedance.com> wrote:
>>>>> From: Feng Zhou <zhoufeng.zf@bytedance.com>
>>>>>
>>>>> Trace some functions, such as enqueue_task_fair, need to access the
>>>>> corresponding cpu, not the current cpu, and bpf_map_lookup_elem percpu map
>>>>> cannot do it. So add bpf_map_lookup_percpu_elem to accomplish it for
>>>>> percpu_array_map, percpu_hash_map, lru_percpu_hash_map.
>>>>>
>>>>> The implementation method is relatively simple, refer to the implementation
>>>>> method of map_lookup_elem of percpu map, increase the parameters of cpu, and
>>>>> obtain it according to the specified cpu.
>>>>>
>>>> I don't think it's safe in general to access per-cpu data from another
>>>> CPU. I'd suggest just having either a ARRAY_OF_MAPS or adding CPU ID
>>>> as part of the key, if you need such a custom access pattern.
>>> I actually just sent an RFC patch series containing a similar patch
>>> for the exact same purpose. There are instances in the kernel where
>>> per-cpu data is accessed from other cpus (e.g.
>>> mem_cgroup_css_rstat_flush()). I believe, like any other variable,
>>> percpu data can be safe or not safe to access, based on the access
>>> pattern. It is up to the user to coordinate accesses to the variable.
>>>
>>> For example, in my use case, one of the accessors only reads percpu
>>> values of different cpus, so it should be safe. If a user accesses
>>> percpu data of another cpu without guaranteeing safety, they corrupt
>>> their own data. I understand that the main purpose of percpu data is
>>> lockless (and therefore fast) access, but in some use cases the user
>>> may be able to safely (and locklessly) access the data concurrently.
>>>
>> Regarding data security, I think users need to consider before using it,
>> such
>> as hook enqueue_task_fair, the function itself takes the rq lock of the
>> corresponding cpu, there is no problem, and the kernel only provides a
>> method,
>> like bpf_per_cpu_ptr and bpf_this_cpu_ptr, data security needs to be
>> guaranteed
>> by users in different scenarios, such as using bpf_spin_lock.
> Right. The new helper looks useful and is safe.
> Please add a selftest and respin.


Ok, will do. Thanks.