[v10] Improve the performance of BTF type lookups with binary search

[PATCH bpf-next v10 04/13] libbpf: Optimize type lookup with binary search for sorted BTF

Posted by Donglin Peng 1 month, 3 weeks ago

From: pengdonglin <pengdonglin@xiaomi.com>

This patch introduces binary search optimization for BTF type lookups
when the BTF instance contains sorted types.

The optimization significantly improves performance when searching for
types in large BTF instances with sorted types. For unsorted BTF, the
implementation falls back to the original linear search.

Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Ihor Solodrai <ihor.solodrai@linux.dev>
Cc: Xiaoqin Zhang <zhangxiaoqin@xiaomi.com>
Signed-off-by: pengdonglin <pengdonglin@xiaomi.com>
---
 tools/lib/bpf/btf.c | 103 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 80 insertions(+), 23 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index ab204ca403dc..2facb57d7e5f 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -92,6 +92,8 @@ struct btf {
 	 *   - for split BTF counts number of types added on top of base BTF.
 	 */
 	__u32 nr_types;
+	/* the start IDs of named types in sorted BTF */
+	int sorted_start_id;
 	/* if not NULL, points to the base BTF on top of which the current
 	 * split BTF is based
 	 */
@@ -897,46 +899,98 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
 	return type_id;
 }
 
-__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
+static __s32 btf_find_by_name_bsearch(const struct btf *btf, const char *name,
+						__s32 start_id, __s32 end_id)
 {
-	__u32 i, nr_types = btf__type_cnt(btf);
-
-	if (!strcmp(type_name, "void"))
-		return 0;
-
-	for (i = 1; i < nr_types; i++) {
-		const struct btf_type *t = btf__type_by_id(btf, i);
-		const char *name = btf__name_by_offset(btf, t->name_off);
-
-		if (name && !strcmp(type_name, name))
-			return i;
+	const struct btf_type *t;
+	const char *tname;
+	__s32 l, r, m, lmost = -ENOENT;
+	int ret;
+
+	l = start_id;
+	r = end_id;
+	while (l <= r) {
+		m = l + (r - l) / 2;
+		t = btf_type_by_id(btf, m);
+		tname = btf__str_by_offset(btf, t->name_off);
+		ret = strcmp(tname, name);
+		if (ret < 0) {
+			l = m + 1;
+		} else {
+			if (ret == 0)
+				lmost = m;
+			r = m - 1;
+		}
 	}
 
-	return libbpf_err(-ENOENT);
+	return lmost;
 }
 
 static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
 				   const char *type_name, __u32 kind)
 {
-	__u32 i, nr_types = btf__type_cnt(btf);
+	const struct btf_type *t;
+	const char *tname;
+	__s32 idx;
+
+	if (start_id < btf->start_id) {
+		idx = btf_find_by_name_kind(btf->base_btf, start_id,
+					    type_name, kind);
+		if (idx >= 0)
+			return idx;
+		start_id = btf->start_id;
+	}
 
-	if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
+	if (kind == BTF_KIND_UNKN || strcmp(type_name, "void") == 0)
 		return 0;
 
-	for (i = start_id; i < nr_types; i++) {
-		const struct btf_type *t = btf__type_by_id(btf, i);
-		const char *name;
+	if (btf->sorted_start_id > 0 && type_name[0]) {
+		__s32 end_id = btf__type_cnt(btf) - 1;
+
+		/* skip anonymous types */
+		start_id = max(start_id, btf->sorted_start_id);
+		idx = btf_find_by_name_bsearch(btf, type_name, start_id, end_id);
+		if (unlikely(idx < 0))
+			return libbpf_err(-ENOENT);
+
+		if (unlikely(kind == -1))
+			return idx;
+
+		t = btf_type_by_id(btf, idx);
+		if (likely(BTF_INFO_KIND(t->info) == kind))
+			return idx;
+
+		for (idx++; idx <= end_id; idx++) {
+			t = btf__type_by_id(btf, idx);
+			tname = btf__str_by_offset(btf, t->name_off);
+			if (strcmp(tname, type_name) != 0)
+				return libbpf_err(-ENOENT);
+			if (btf_kind(t) == kind)
+				return idx;
+		}
+	} else {
+		__u32 i, total;
 
-		if (btf_kind(t) != kind)
-			continue;
-		name = btf__name_by_offset(btf, t->name_off);
-		if (name && !strcmp(type_name, name))
-			return i;
+		total = btf__type_cnt(btf);
+		for (i = start_id; i < total; i++) {
+			t = btf_type_by_id(btf, i);
+			if (kind != -1 && btf_kind(t) != kind)
+				continue;
+			tname = btf__str_by_offset(btf, t->name_off);
+			if (strcmp(tname, type_name) == 0)
+				return i;
+		}
 	}
 
 	return libbpf_err(-ENOENT);
 }
 
+/* the kind value of -1 indicates that kind matching should be skipped */
+__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
+{
+	return btf_find_by_name_kind(btf, btf->start_id, type_name, -1);
+}
+
 __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name,
 				 __u32 kind)
 {
@@ -1006,6 +1060,7 @@ static struct btf *btf_new_empty(struct btf *base_btf)
 	btf->fd = -1;
 	btf->ptr_sz = sizeof(void *);
 	btf->swapped_endian = false;
+	btf->sorted_start_id = 0;
 
 	if (base_btf) {
 		btf->base_btf = base_btf;
@@ -1057,6 +1112,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, b
 	btf->start_id = 1;
 	btf->start_str_off = 0;
 	btf->fd = -1;
+	btf->sorted_start_id = 0;
 
 	if (base_btf) {
 		btf->base_btf = base_btf;
@@ -1715,6 +1771,7 @@ static void btf_invalidate_raw_data(struct btf *btf)
 		free(btf->raw_data_swapped);
 		btf->raw_data_swapped = NULL;
 	}
+	btf->sorted_start_id = 0;
 }
 
 /* Ensure BTF is ready to be modified (by splitting into a three memory
-- 
2.34.1

Re: [PATCH bpf-next v10 04/13] libbpf: Optimize type lookup with binary search for sorted BTF

Posted by Andrii Nakryiko 1 month, 3 weeks ago

On Thu, Dec 18, 2025 at 3:31 AM Donglin Peng <dolinux.peng@gmail.com> wrote:
>
> From: pengdonglin <pengdonglin@xiaomi.com>
>
> This patch introduces binary search optimization for BTF type lookups
> when the BTF instance contains sorted types.
>
> The optimization significantly improves performance when searching for
> types in large BTF instances with sorted types. For unsorted BTF, the
> implementation falls back to the original linear search.
>
> Cc: Eduard Zingerman <eddyz87@gmail.com>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> Cc: Alan Maguire <alan.maguire@oracle.com>
> Cc: Ihor Solodrai <ihor.solodrai@linux.dev>
> Cc: Xiaoqin Zhang <zhangxiaoqin@xiaomi.com>
> Signed-off-by: pengdonglin <pengdonglin@xiaomi.com>
> ---
>  tools/lib/bpf/btf.c | 103 ++++++++++++++++++++++++++++++++++----------
>  1 file changed, 80 insertions(+), 23 deletions(-)
>

[...]

> +       l = start_id;
> +       r = end_id;
> +       while (l <= r) {
> +               m = l + (r - l) / 2;
> +               t = btf_type_by_id(btf, m);
> +               tname = btf__str_by_offset(btf, t->name_off);
> +               ret = strcmp(tname, name);
> +               if (ret < 0) {
> +                       l = m + 1;
> +               } else {
> +                       if (ret == 0)
> +                               lmost = m;
> +                       r = m - 1;
> +               }
>         }

this differs from what we discussed in [0], you said you'll use that
approach. Can you please elaborate on why you didn't?

  [0] https://lore.kernel.org/bpf/CAEf4Bzb3Eu0J83O=Y4KA-LkzBMjtx7cbonxPzkiduzZ1Pedajg@mail.gmail.com/

>
> -       return libbpf_err(-ENOENT);
> +       return lmost;
>  }
>
>  static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
>                                    const char *type_name, __u32 kind)

kind is defined as u32 but you expect caller to pass -1 to ignore the
kind. Use int here.

>  {
> -       __u32 i, nr_types = btf__type_cnt(btf);
> +       const struct btf_type *t;
> +       const char *tname;
> +       __s32 idx;
> +
> +       if (start_id < btf->start_id) {
> +               idx = btf_find_by_name_kind(btf->base_btf, start_id,
> +                                           type_name, kind);
> +               if (idx >= 0)
> +                       return idx;
> +               start_id = btf->start_id;
> +       }
>
> -       if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
> +       if (kind == BTF_KIND_UNKN || strcmp(type_name, "void") == 0)
>                 return 0;
>
> -       for (i = start_id; i < nr_types; i++) {
> -               const struct btf_type *t = btf__type_by_id(btf, i);
> -               const char *name;
> +       if (btf->sorted_start_id > 0 && type_name[0]) {
> +               __s32 end_id = btf__type_cnt(btf) - 1;
> +
> +               /* skip anonymous types */
> +               start_id = max(start_id, btf->sorted_start_id);

can sorted_start_id ever be smaller than start_id?

> +               idx = btf_find_by_name_bsearch(btf, type_name, start_id, end_id);

is there ever a time when btf_find_by_name_bsearch() will work with
different start_id and end_id? why is this not done inside the
btf_find_by_name_bsearch()?

> +               if (unlikely(idx < 0))
> +                       return libbpf_err(-ENOENT);

pass through error returned from btf_find_by_name_bsearch(), why redefining it?

> +
> +               if (unlikely(kind == -1))
> +                       return idx;
> +
> +               t = btf_type_by_id(btf, idx);
> +               if (likely(BTF_INFO_KIND(t->info) == kind))

use btf_kind(), but this whole extra check is just unnecessary, this
should be done in the loop below. We talked about all this already,
why do I feel like I'm being ignored?..

> +                       return idx;

drop all these likely and unlikely micro optimizations, please


> +
> +               for (idx++; idx <= end_id; idx++) {
> +                       t = btf__type_by_id(btf, idx);
> +                       tname = btf__str_by_offset(btf, t->name_off);
> +                       if (strcmp(tname, type_name) != 0)
> +                               return libbpf_err(-ENOENT);
> +                       if (btf_kind(t) == kind)
> +                               return idx;
> +               }
> +       } else {
> +               __u32 i, total;
>
> -               if (btf_kind(t) != kind)
> -                       continue;
> -               name = btf__name_by_offset(btf, t->name_off);
> -               if (name && !strcmp(type_name, name))
> -                       return i;
> +               total = btf__type_cnt(btf);
> +               for (i = start_id; i < total; i++) {
> +                       t = btf_type_by_id(btf, i);
> +                       if (kind != -1 && btf_kind(t) != kind)

nit: kind < 0, no need to hard-code -1

> +                               continue;
> +                       tname = btf__str_by_offset(btf, t->name_off);
> +                       if (strcmp(tname, type_name) == 0)
> +                               return i;
> +               }
>         }
>
>         return libbpf_err(-ENOENT);
>  }
>

[...]

Re: [PATCH bpf-next v10 04/13] libbpf: Optimize type lookup with binary search for sorted BTF

Posted by Donglin Peng 1 month, 3 weeks ago

On Fri, Dec 19, 2025 at 7:29 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Thu, Dec 18, 2025 at 3:31 AM Donglin Peng <dolinux.peng@gmail.com> wrote:
> >
> > From: pengdonglin <pengdonglin@xiaomi.com>
> >
> > This patch introduces binary search optimization for BTF type lookups
> > when the BTF instance contains sorted types.
> >
> > The optimization significantly improves performance when searching for
> > types in large BTF instances with sorted types. For unsorted BTF, the
> > implementation falls back to the original linear search.
> >
> > Cc: Eduard Zingerman <eddyz87@gmail.com>
> > Cc: Alexei Starovoitov <ast@kernel.org>
> > Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> > Cc: Alan Maguire <alan.maguire@oracle.com>
> > Cc: Ihor Solodrai <ihor.solodrai@linux.dev>
> > Cc: Xiaoqin Zhang <zhangxiaoqin@xiaomi.com>
> > Signed-off-by: pengdonglin <pengdonglin@xiaomi.com>
> > ---
> >  tools/lib/bpf/btf.c | 103 ++++++++++++++++++++++++++++++++++----------
> >  1 file changed, 80 insertions(+), 23 deletions(-)
> >
>
> [...]
>
> > +       l = start_id;
> > +       r = end_id;
> > +       while (l <= r) {
> > +               m = l + (r - l) / 2;
> > +               t = btf_type_by_id(btf, m);
> > +               tname = btf__str_by_offset(btf, t->name_off);
> > +               ret = strcmp(tname, name);
> > +               if (ret < 0) {
> > +                       l = m + 1;
> > +               } else {
> > +                       if (ret == 0)
> > +                               lmost = m;
> > +                       r = m - 1;
> > +               }
> >         }
>
> this differs from what we discussed in [0], you said you'll use that
> approach. Can you please elaborate on why you didn't?
>
>   [0] https://lore.kernel.org/bpf/CAEf4Bzb3Eu0J83O=Y4KA-LkzBMjtx7cbonxPzkiduzZ1Pedajg@mail.gmail.com/

Yes. As mentioned in the v8 changelog [1], the binary search approach
you referenced was implemented in versions v6 and v7 [2]. However,
testing revealed a slight performance regression. The root cause was
an extra strcmp operation introduced in v7, as discussed in [3]. Therefore,
in v8, I reverted to the approach from v5 [4] and refactored it for clarity.

Benchmark results show that v8 achieves a 4.2% performance improvement
over v7. If we don't care the performance gain, I will revert to the approach
in v7 in the next version.

[1] https://lore.kernel.org/bpf/20251126085025.784288-1-dolinux.peng@gmail.com/
[2] https://lore.kernel.org/all/20251119031531.1817099-1-dolinux.peng@gmail.com/
[3] https://lore.kernel.org/all/CAEf4BzaqEPD46LddJHO1-k5KPGyVWf6d=duDAxG1q=jykJkMBg@mail.gmail.com/
[4] https://lore.kernel.org/all/20251106131956.1222864-4-dolinux.peng@gmail.com/

>
> >
> > -       return libbpf_err(-ENOENT);
> > +       return lmost;
> >  }
> >
> >  static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
> >                                    const char *type_name, __u32 kind)
>
> kind is defined as u32 but you expect caller to pass -1 to ignore the
> kind. Use int here.

Thanks, I will fix it.

>
> >  {
> > -       __u32 i, nr_types = btf__type_cnt(btf);
> > +       const struct btf_type *t;
> > +       const char *tname;
> > +       __s32 idx;
> > +
> > +       if (start_id < btf->start_id) {
> > +               idx = btf_find_by_name_kind(btf->base_btf, start_id,
> > +                                           type_name, kind);
> > +               if (idx >= 0)
> > +                       return idx;
> > +               start_id = btf->start_id;
> > +       }
> >
> > -       if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
> > +       if (kind == BTF_KIND_UNKN || strcmp(type_name, "void") == 0)
> >                 return 0;
> >
> > -       for (i = start_id; i < nr_types; i++) {
> > -               const struct btf_type *t = btf__type_by_id(btf, i);
> > -               const char *name;
> > +       if (btf->sorted_start_id > 0 && type_name[0]) {
> > +               __s32 end_id = btf__type_cnt(btf) - 1;
> > +
> > +               /* skip anonymous types */
> > +               start_id = max(start_id, btf->sorted_start_id);
>
> can sorted_start_id ever be smaller than start_id?
>
> > +               idx = btf_find_by_name_bsearch(btf, type_name, start_id, end_id);
>
> is there ever a time when btf_find_by_name_bsearch() will work with
> different start_id and end_id? why is this not done inside the
> btf_find_by_name_bsearch()?

Because the start_id could be specified by the caller.

>
> > +               if (unlikely(idx < 0))
> > +                       return libbpf_err(-ENOENT);
>
> pass through error returned from btf_find_by_name_bsearch(), why redefining it?

Thanks, I will fix it.

>
> > +
> > +               if (unlikely(kind == -1))
> > +                       return idx;
> > +
> > +               t = btf_type_by_id(btf, idx);
> > +               if (likely(BTF_INFO_KIND(t->info) == kind))
>
> use btf_kind(), but this whole extra check is just unnecessary, this

Thanks, I will do it.

> should be done in the loop below. We talked about all this already,
> why do I feel like I'm being ignored?..

Sorry for the confusion, and absolutely not ignoring you.

>
> > +                       return idx;
>
> drop all these likely and unlikely micro optimizations, please

Thanks, I will do it.

>
>
> > +
> > +               for (idx++; idx <= end_id; idx++) {
> > +                       t = btf__type_by_id(btf, idx);
> > +                       tname = btf__str_by_offset(btf, t->name_off);
> > +                       if (strcmp(tname, type_name) != 0)
> > +                               return libbpf_err(-ENOENT);
> > +                       if (btf_kind(t) == kind)
> > +                               return idx;
> > +               }
> > +       } else {
> > +               __u32 i, total;
> >
> > -               if (btf_kind(t) != kind)
> > -                       continue;
> > -               name = btf__name_by_offset(btf, t->name_off);
> > -               if (name && !strcmp(type_name, name))
> > -                       return i;
> > +               total = btf__type_cnt(btf);
> > +               for (i = start_id; i < total; i++) {
> > +                       t = btf_type_by_id(btf, i);
> > +                       if (kind != -1 && btf_kind(t) != kind)
>
> nit: kind < 0, no need to hard-code -1

Good, I will fix it.

>
> > +                               continue;
> > +                       tname = btf__str_by_offset(btf, t->name_off);
> > +                       if (strcmp(tname, type_name) == 0)
> > +                               return i;
> > +               }
> >         }
> >
> >         return libbpf_err(-ENOENT);
> >  }
> >
>
> [...]

Re: [PATCH bpf-next v10 04/13] libbpf: Optimize type lookup with binary search for sorted BTF

Posted by Andrii Nakryiko 1 month, 3 weeks ago

On Thu, Dec 18, 2025 at 6:53 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
>
> On Fri, Dec 19, 2025 at 7:29 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Thu, Dec 18, 2025 at 3:31 AM Donglin Peng <dolinux.peng@gmail.com> wrote:
> > >
> > > From: pengdonglin <pengdonglin@xiaomi.com>
> > >
> > > This patch introduces binary search optimization for BTF type lookups
> > > when the BTF instance contains sorted types.
> > >
> > > The optimization significantly improves performance when searching for
> > > types in large BTF instances with sorted types. For unsorted BTF, the
> > > implementation falls back to the original linear search.
> > >
> > > Cc: Eduard Zingerman <eddyz87@gmail.com>
> > > Cc: Alexei Starovoitov <ast@kernel.org>
> > > Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> > > Cc: Alan Maguire <alan.maguire@oracle.com>
> > > Cc: Ihor Solodrai <ihor.solodrai@linux.dev>
> > > Cc: Xiaoqin Zhang <zhangxiaoqin@xiaomi.com>
> > > Signed-off-by: pengdonglin <pengdonglin@xiaomi.com>
> > > ---
> > >  tools/lib/bpf/btf.c | 103 ++++++++++++++++++++++++++++++++++----------
> > >  1 file changed, 80 insertions(+), 23 deletions(-)
> > >
> >
> > [...]
> >
> > > +       l = start_id;
> > > +       r = end_id;
> > > +       while (l <= r) {
> > > +               m = l + (r - l) / 2;
> > > +               t = btf_type_by_id(btf, m);
> > > +               tname = btf__str_by_offset(btf, t->name_off);
> > > +               ret = strcmp(tname, name);
> > > +               if (ret < 0) {
> > > +                       l = m + 1;
> > > +               } else {
> > > +                       if (ret == 0)
> > > +                               lmost = m;
> > > +                       r = m - 1;
> > > +               }
> > >         }
> >
> > this differs from what we discussed in [0], you said you'll use that
> > approach. Can you please elaborate on why you didn't?
> >
> >   [0] https://lore.kernel.org/bpf/CAEf4Bzb3Eu0J83O=Y4KA-LkzBMjtx7cbonxPzkiduzZ1Pedajg@mail.gmail.com/
>
> Yes. As mentioned in the v8 changelog [1], the binary search approach
> you referenced was implemented in versions v6 and v7 [2]. However,
> testing revealed a slight performance regression. The root cause was
> an extra strcmp operation introduced in v7, as discussed in [3]. Therefore,
> in v8, I reverted to the approach from v5 [4] and refactored it for clarity.

If you keep oscillating like that this patch set will never land. 4%
(500us) gain on artificial and unrealistic micro-benchmark is
meaningless and irrelevant, you are just adding more work for yourself
and for reviewers by constantly changing your implementation between
revisions for no good reason.


>
> Benchmark results show that v8 achieves a 4.2% performance improvement
> over v7. If we don't care the performance gain, I will revert to the approach
> in v7 in the next version.
>
> [1] https://lore.kernel.org/bpf/20251126085025.784288-1-dolinux.peng@gmail.com/
> [2] https://lore.kernel.org/all/20251119031531.1817099-1-dolinux.peng@gmail.com/
> [3] https://lore.kernel.org/all/CAEf4BzaqEPD46LddJHO1-k5KPGyVWf6d=duDAxG1q=jykJkMBg@mail.gmail.com/
> [4] https://lore.kernel.org/all/20251106131956.1222864-4-dolinux.peng@gmail.com/
>
> >
> > >
> > > -       return libbpf_err(-ENOENT);
> > > +       return lmost;
> > >  }
> > >
> > >  static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
> > >                                    const char *type_name, __u32 kind)
> >
> > kind is defined as u32 but you expect caller to pass -1 to ignore the
> > kind. Use int here.
>
> Thanks, I will fix it.
>
> >
> > >  {
> > > -       __u32 i, nr_types = btf__type_cnt(btf);
> > > +       const struct btf_type *t;
> > > +       const char *tname;
> > > +       __s32 idx;
> > > +
> > > +       if (start_id < btf->start_id) {
> > > +               idx = btf_find_by_name_kind(btf->base_btf, start_id,
> > > +                                           type_name, kind);
> > > +               if (idx >= 0)
> > > +                       return idx;
> > > +               start_id = btf->start_id;
> > > +       }
> > >
> > > -       if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
> > > +       if (kind == BTF_KIND_UNKN || strcmp(type_name, "void") == 0)
> > >                 return 0;
> > >
> > > -       for (i = start_id; i < nr_types; i++) {
> > > -               const struct btf_type *t = btf__type_by_id(btf, i);
> > > -               const char *name;
> > > +       if (btf->sorted_start_id > 0 && type_name[0]) {
> > > +               __s32 end_id = btf__type_cnt(btf) - 1;
> > > +
> > > +               /* skip anonymous types */
> > > +               start_id = max(start_id, btf->sorted_start_id);
> >
> > can sorted_start_id ever be smaller than start_id?
> >
> > > +               idx = btf_find_by_name_bsearch(btf, type_name, start_id, end_id);
> >
> > is there ever a time when btf_find_by_name_bsearch() will work with
> > different start_id and end_id? why is this not done inside the
> > btf_find_by_name_bsearch()?
>
> Because the start_id could be specified by the caller.

Right, start_id has to be passed in. But end_id is always the same, so
maybe determine it internally instead? And let's not return -ENOENT
from btf_find_by_name_bsearch(), as I mentioned before, it would be
more streamlined if you return btf__type_cnt(btf) if search failed.

>
> >
> > > +               if (unlikely(idx < 0))
> > > +                       return libbpf_err(-ENOENT);
> >
> > pass through error returned from btf_find_by_name_bsearch(), why redefining it?
>
> Thanks, I will fix it.
>

see above, by returning btf__type_cnt() you won't even have this error
handling, you'll just go through normal loop checking for a match and
won't find anything, returning -ENOENT then.

> >
> > > +
> > > +               if (unlikely(kind == -1))
> > > +                       return idx;
> > > +
> > > +               t = btf_type_by_id(btf, idx);
> > > +               if (likely(BTF_INFO_KIND(t->info) == kind))
> >
> > use btf_kind(), but this whole extra check is just unnecessary, this
>
> Thanks, I will do it.
>
> > should be done in the loop below. We talked about all this already,
> > why do I feel like I'm being ignored?..
>
> Sorry for the confusion, and absolutely not ignoring you.
>

If you decide to change implementation due to some unforeseen factors
(like concern about 4% microbenchmark improvement), it would be
helpful for you to call this out in a reply to the original
discussion. A line somewhere in the cover letter changelog is way too
easy to miss and that doesn't give me an opportunity to stop you
before you go and produce another revision that I'll then be
rejecting.

> >
> > > +                       return idx;
> >
> > drop all these likely and unlikely micro optimizations, please
>
> Thanks, I will do it.
>
> >
> >
> > > +
> > > +               for (idx++; idx <= end_id; idx++) {
> > > +                       t = btf__type_by_id(btf, idx);
> > > +                       tname = btf__str_by_offset(btf, t->name_off);
> > > +                       if (strcmp(tname, type_name) != 0)
> > > +                               return libbpf_err(-ENOENT);
> > > +                       if (btf_kind(t) == kind)
> > > +                               return idx;
> > > +               }
> > > +       } else {
> > > +               __u32 i, total;
> > >
> > > -               if (btf_kind(t) != kind)
> > > -                       continue;
> > > -               name = btf__name_by_offset(btf, t->name_off);
> > > -               if (name && !strcmp(type_name, name))
> > > -                       return i;
> > > +               total = btf__type_cnt(btf);
> > > +               for (i = start_id; i < total; i++) {
> > > +                       t = btf_type_by_id(btf, i);
> > > +                       if (kind != -1 && btf_kind(t) != kind)
> >
> > nit: kind < 0, no need to hard-code -1
>
> Good, I will fix it.
>
> >
> > > +                               continue;
> > > +                       tname = btf__str_by_offset(btf, t->name_off);
> > > +                       if (strcmp(tname, type_name) == 0)
> > > +                               return i;
> > > +               }
> > >         }
> > >
> > >         return libbpf_err(-ENOENT);
> > >  }
> > >
> >
> > [...]

Re: [PATCH bpf-next v10 04/13] libbpf: Optimize type lookup with binary search for sorted BTF

Posted by Donglin Peng 1 month, 2 weeks ago

On Sat, Dec 20, 2025 at 1:28 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Thu, Dec 18, 2025 at 6:53 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
> >
> > On Fri, Dec 19, 2025 at 7:29 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Thu, Dec 18, 2025 at 3:31 AM Donglin Peng <dolinux.peng@gmail.com> wrote:
> > > >
> > > > From: pengdonglin <pengdonglin@xiaomi.com>
> > > >
> > > > This patch introduces binary search optimization for BTF type lookups
> > > > when the BTF instance contains sorted types.
> > > >
> > > > The optimization significantly improves performance when searching for
> > > > types in large BTF instances with sorted types. For unsorted BTF, the
> > > > implementation falls back to the original linear search.
> > > >
> > > > Cc: Eduard Zingerman <eddyz87@gmail.com>
> > > > Cc: Alexei Starovoitov <ast@kernel.org>
> > > > Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> > > > Cc: Alan Maguire <alan.maguire@oracle.com>
> > > > Cc: Ihor Solodrai <ihor.solodrai@linux.dev>
> > > > Cc: Xiaoqin Zhang <zhangxiaoqin@xiaomi.com>
> > > > Signed-off-by: pengdonglin <pengdonglin@xiaomi.com>
> > > > ---
> > > >  tools/lib/bpf/btf.c | 103 ++++++++++++++++++++++++++++++++++----------
> > > >  1 file changed, 80 insertions(+), 23 deletions(-)
> > > >
> > >
> > > [...]
> > >
> > > > +       l = start_id;
> > > > +       r = end_id;
> > > > +       while (l <= r) {
> > > > +               m = l + (r - l) / 2;
> > > > +               t = btf_type_by_id(btf, m);
> > > > +               tname = btf__str_by_offset(btf, t->name_off);
> > > > +               ret = strcmp(tname, name);
> > > > +               if (ret < 0) {
> > > > +                       l = m + 1;
> > > > +               } else {
> > > > +                       if (ret == 0)
> > > > +                               lmost = m;
> > > > +                       r = m - 1;
> > > > +               }
> > > >         }
> > >
> > > this differs from what we discussed in [0], you said you'll use that
> > > approach. Can you please elaborate on why you didn't?
> > >
> > >   [0] https://lore.kernel.org/bpf/CAEf4Bzb3Eu0J83O=Y4KA-LkzBMjtx7cbonxPzkiduzZ1Pedajg@mail.gmail.com/
> >
> > Yes. As mentioned in the v8 changelog [1], the binary search approach
> > you referenced was implemented in versions v6 and v7 [2]. However,
> > testing revealed a slight performance regression. The root cause was
> > an extra strcmp operation introduced in v7, as discussed in [3]. Therefore,
> > in v8, I reverted to the approach from v5 [4] and refactored it for clarity.
>
> If you keep oscillating like that this patch set will never land. 4%
> (500us) gain on artificial and unrealistic micro-benchmark is
> meaningless and irrelevant, you are just adding more work for yourself
> and for reviewers by constantly changing your implementation between
> revisions for no good reason.

Thank you, I understand and will learn from it. I think the performance gain
makes sense. I’d like to share a specific real-world case where this
optimization
could matter:  the `btf_find_by_name_kind()` function is indeed infrequently
used by the BPF subsystem, but it’s heavily relied upon by the ftrace
subsystem’s features like `func-args`, `funcgraph-args` [1], and the upcoming
`funcgraph-retval` [2]. These features invoke the function nearly once per
trace line when outputting, with a call frequency that can reach **100 kHz**
in intensive tracing workloads.

In such scenarios, the extra `strcmp` operations translate to ~100,000
additional
string comparisons per second. While this might seem negligible in isolation,
the overhead accumulates under high-frequency tracing—potentially impacting
latency for users relying on detailed function argument/return value tracing.

Thanks again for pushing for rigor—it helps make the code more cleaner
and robust.

[1] https://lore.kernel.org/all/20250227185822.639418500@goodmis.org/
[2] https://lore.kernel.org/all/20251215034153.2367756-1-dolinux.peng@gmail.com/

>
>
> >
> > Benchmark results show that v8 achieves a 4.2% performance improvement
> > over v7. If we don't care the performance gain, I will revert to the approach
> > in v7 in the next version.
> >
> > [1] https://lore.kernel.org/bpf/20251126085025.784288-1-dolinux.peng@gmail.com/
> > [2] https://lore.kernel.org/all/20251119031531.1817099-1-dolinux.peng@gmail.com/
> > [3] https://lore.kernel.org/all/CAEf4BzaqEPD46LddJHO1-k5KPGyVWf6d=duDAxG1q=jykJkMBg@mail.gmail.com/
> > [4] https://lore.kernel.org/all/20251106131956.1222864-4-dolinux.peng@gmail.com/
> >
> > >
> > > >
> > > > -       return libbpf_err(-ENOENT);
> > > > +       return lmost;
> > > >  }
> > > >
> > > >  static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
> > > >                                    const char *type_name, __u32 kind)
> > >
> > > kind is defined as u32 but you expect caller to pass -1 to ignore the
> > > kind. Use int here.
> >
> > Thanks, I will fix it.
> >
> > >
> > > >  {
> > > > -       __u32 i, nr_types = btf__type_cnt(btf);
> > > > +       const struct btf_type *t;
> > > > +       const char *tname;
> > > > +       __s32 idx;
> > > > +
> > > > +       if (start_id < btf->start_id) {
> > > > +               idx = btf_find_by_name_kind(btf->base_btf, start_id,
> > > > +                                           type_name, kind);
> > > > +               if (idx >= 0)
> > > > +                       return idx;
> > > > +               start_id = btf->start_id;
> > > > +       }
> > > >
> > > > -       if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
> > > > +       if (kind == BTF_KIND_UNKN || strcmp(type_name, "void") == 0)
> > > >                 return 0;
> > > >
> > > > -       for (i = start_id; i < nr_types; i++) {
> > > > -               const struct btf_type *t = btf__type_by_id(btf, i);
> > > > -               const char *name;
> > > > +       if (btf->sorted_start_id > 0 && type_name[0]) {
> > > > +               __s32 end_id = btf__type_cnt(btf) - 1;
> > > > +
> > > > +               /* skip anonymous types */
> > > > +               start_id = max(start_id, btf->sorted_start_id);
> > >
> > > can sorted_start_id ever be smaller than start_id?
> > >
> > > > +               idx = btf_find_by_name_bsearch(btf, type_name, start_id, end_id);
> > >
> > > is there ever a time when btf_find_by_name_bsearch() will work with
> > > different start_id and end_id? why is this not done inside the
> > > btf_find_by_name_bsearch()?
> >
> > Because the start_id could be specified by the caller.
>
> Right, start_id has to be passed in. But end_id is always the same, so
> maybe determine it internally instead? And let's not return -ENOENT

Thanks, I agree and will put the end_id into btf_find_by_name_bsearch.

> from btf_find_by_name_bsearch(), as I mentioned before, it would be
> more streamlined if you return btf__type_cnt(btf) if search failed.

Thanks, I agree.

>
> >
> > >
> > > > +               if (unlikely(idx < 0))
> > > > +                       return libbpf_err(-ENOENT);
> > >
> > > pass through error returned from btf_find_by_name_bsearch(), why redefining it?
> >
> > Thanks, I will fix it.
> >
>
> see above, by returning btf__type_cnt() you won't even have this error
> handling, you'll just go through normal loop checking for a match and
> won't find anything, returning -ENOENT then.

Thanks, I agree.

>
> > >
> > > > +
> > > > +               if (unlikely(kind == -1))
> > > > +                       return idx;
> > > > +
> > > > +               t = btf_type_by_id(btf, idx);
> > > > +               if (likely(BTF_INFO_KIND(t->info) == kind))
> > >
> > > use btf_kind(), but this whole extra check is just unnecessary, this
> >
> > Thanks, I will do it.
> >
> > > should be done in the loop below. We talked about all this already,
> > > why do I feel like I'm being ignored?..
> >
> > Sorry for the confusion, and absolutely not ignoring you.
> >
>
> If you decide to change implementation due to some unforeseen factors
> (like concern about 4% microbenchmark improvement), it would be
> helpful for you to call this out in a reply to the original
> discussion. A line somewhere in the cover letter changelog is way too
> easy to miss and that doesn't give me an opportunity to stop you
> before you go and produce another revision that I'll then be
> rejecting.

I will learn from it and thank you for the suggestion.

>
> > >
> > > > +                       return idx;
> > >
> > > drop all these likely and unlikely micro optimizations, please
> >
> > Thanks, I will do it.
> >
> > >
> > >
> > > > +
> > > > +               for (idx++; idx <= end_id; idx++) {
> > > > +                       t = btf__type_by_id(btf, idx);
> > > > +                       tname = btf__str_by_offset(btf, t->name_off);
> > > > +                       if (strcmp(tname, type_name) != 0)
> > > > +                               return libbpf_err(-ENOENT);
> > > > +                       if (btf_kind(t) == kind)
> > > > +                               return idx;
> > > > +               }
> > > > +       } else {
> > > > +               __u32 i, total;
> > > >
> > > > -               if (btf_kind(t) != kind)
> > > > -                       continue;
> > > > -               name = btf__name_by_offset(btf, t->name_off);
> > > > -               if (name && !strcmp(type_name, name))
> > > > -                       return i;
> > > > +               total = btf__type_cnt(btf);
> > > > +               for (i = start_id; i < total; i++) {
> > > > +                       t = btf_type_by_id(btf, i);
> > > > +                       if (kind != -1 && btf_kind(t) != kind)
> > >
> > > nit: kind < 0, no need to hard-code -1
> >
> > Good, I will fix it.
> >
> > >
> > > > +                               continue;
> > > > +                       tname = btf__str_by_offset(btf, t->name_off);
> > > > +                       if (strcmp(tname, type_name) == 0)
> > > > +                               return i;
> > > > +               }
> > > >         }
> > > >
> > > >         return libbpf_err(-ENOENT);
> > > >  }
> > > >
> > >
> > > [...]

Re: [PATCH bpf-next v10 04/13] libbpf: Optimize type lookup with binary search for sorted BTF

Posted by Donglin Peng 1 month, 2 weeks ago

On Sat, Dec 20, 2025 at 5:38 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
>
> On Sat, Dec 20, 2025 at 1:28 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Thu, Dec 18, 2025 at 6:53 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
> > >
> > > On Fri, Dec 19, 2025 at 7:29 AM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > On Thu, Dec 18, 2025 at 3:31 AM Donglin Peng <dolinux.peng@gmail.com> wrote:
> > > > >
> > > > > From: pengdonglin <pengdonglin@xiaomi.com>
> > > > >
> > > > > This patch introduces binary search optimization for BTF type lookups
> > > > > when the BTF instance contains sorted types.
> > > > >
> > > > > The optimization significantly improves performance when searching for
> > > > > types in large BTF instances with sorted types. For unsorted BTF, the
> > > > > implementation falls back to the original linear search.
> > > > >
> > > > > Cc: Eduard Zingerman <eddyz87@gmail.com>
> > > > > Cc: Alexei Starovoitov <ast@kernel.org>
> > > > > Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> > > > > Cc: Alan Maguire <alan.maguire@oracle.com>
> > > > > Cc: Ihor Solodrai <ihor.solodrai@linux.dev>
> > > > > Cc: Xiaoqin Zhang <zhangxiaoqin@xiaomi.com>
> > > > > Signed-off-by: pengdonglin <pengdonglin@xiaomi.com>
> > > > > ---
> > > > >  tools/lib/bpf/btf.c | 103 ++++++++++++++++++++++++++++++++++----------
> > > > >  1 file changed, 80 insertions(+), 23 deletions(-)
> > > > >
> > > >
> > > > [...]
> > > >
> > > > > +       l = start_id;
> > > > > +       r = end_id;
> > > > > +       while (l <= r) {
> > > > > +               m = l + (r - l) / 2;
> > > > > +               t = btf_type_by_id(btf, m);
> > > > > +               tname = btf__str_by_offset(btf, t->name_off);
> > > > > +               ret = strcmp(tname, name);
> > > > > +               if (ret < 0) {
> > > > > +                       l = m + 1;
> > > > > +               } else {
> > > > > +                       if (ret == 0)
> > > > > +                               lmost = m;
> > > > > +                       r = m - 1;
> > > > > +               }
> > > > >         }
> > > >
> > > > this differs from what we discussed in [0], you said you'll use that
> > > > approach. Can you please elaborate on why you didn't?
> > > >
> > > >   [0] https://lore.kernel.org/bpf/CAEf4Bzb3Eu0J83O=Y4KA-LkzBMjtx7cbonxPzkiduzZ1Pedajg@mail.gmail.com/
> > >
> > > Yes. As mentioned in the v8 changelog [1], the binary search approach
> > > you referenced was implemented in versions v6 and v7 [2]. However,
> > > testing revealed a slight performance regression. The root cause was
> > > an extra strcmp operation introduced in v7, as discussed in [3]. Therefore,
> > > in v8, I reverted to the approach from v5 [4] and refactored it for clarity.
> >
> > If you keep oscillating like that this patch set will never land. 4%
> > (500us) gain on artificial and unrealistic micro-benchmark is
> > meaningless and irrelevant, you are just adding more work for yourself
> > and for reviewers by constantly changing your implementation between
> > revisions for no good reason.
>
> Thank you, I understand and will learn from it. I think the performance gain
> makes sense. I’d like to share a specific real-world case where this
> optimization
> could matter:  the `btf_find_by_name_kind()` function is indeed infrequently
> used by the BPF subsystem, but it’s heavily relied upon by the ftrace
> subsystem’s features like `func-args`, `funcgraph-args` [1], and the upcoming
> `funcgraph-retval` [2]. These features invoke the function nearly once per
> trace line when outputting, with a call frequency that can reach **100 kHz**
> in intensive tracing workloads.

Hi Andrii,
I think we can refactor the code based on your suggestion like this:

1. If the binary search finds the matching name type, return its index.
    Else, return btf__type_cnt(btf). It will make the code streamlined.
2. Skip the name checking in the first loop to eliminate the extra strcmp.

What do you think?

tatic __s32 btf_find_by_name_bsearch(const struct btf *btf, const char *name,
                                      __s32 start_id)
{
        const struct btf_type *t;
        const char *tname;
        __s32 end_id = btf__type_cnt(btf) - 1;
        __s32 l, r, m, lmost = end_id + 1;
        int ret;

        l = start_id;
        r = end_id;
        while (l <= r) {
                m = l + (r - l) / 2;
                t = btf_type_by_id(btf, m);
                tname = btf__str_by_offset(btf, t->name_off);
                ret = strcmp(tname, name);
                if (ret < 0) {
                        l = m + 1;
                } else {
                        if (ret == 0)
                                lmost = m;
                        r = m - 1;
                }
        }

        return lmost;
}

static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
                                   const char *type_name, __u32 kind)
{
       ......
       if (btf_is_sorted(btf) && type_name[0]) {
                bool first_loop = true;

                start_id = max(start_id, btf_sorted_start_id(btf));
                idx = btf_find_by_name_bsearch(btf, type_name, start_id);
                for (; idx < btf__type_cnt(btf); idx++) {
                        t = btf__type_by_id(btf, idx);
                        tname = btf__str_by_offset(btf, t->name_off);
                        if (!first_loop && strcmp(tname, type_name) != 0)
                                return libbpf_err(-ENOENT);
                        if (kind == -1 || btf_kind(t) == kind)
                                return idx;
                        if (first_loop)
                                first_loop = false;
                }
        } else {
                ......
        }

        return libbpf_err(-ENOENT);
}

>
> In such scenarios, the extra `strcmp` operations translate to ~100,000
> additional
> string comparisons per second. While this might seem negligible in isolation,
> the overhead accumulates under high-frequency tracing—potentially impacting
> latency for users relying on detailed function argument/return value tracing.
>
> Thanks again for pushing for rigor—it helps make the code more cleaner
> and robust.
>
> [1] https://lore.kernel.org/all/20250227185822.639418500@goodmis.org/
> [2] https://lore.kernel.org/all/20251215034153.2367756-1-dolinux.peng@gmail.com/
>
> >
> >
> > >
> > > Benchmark results show that v8 achieves a 4.2% performance improvement
> > > over v7. If we don't care the performance gain, I will revert to the approach
> > > in v7 in the next version.
> > >
> > > [1] https://lore.kernel.org/bpf/20251126085025.784288-1-dolinux.peng@gmail.com/
> > > [2] https://lore.kernel.org/all/20251119031531.1817099-1-dolinux.peng@gmail.com/
> > > [3] https://lore.kernel.org/all/CAEf4BzaqEPD46LddJHO1-k5KPGyVWf6d=duDAxG1q=jykJkMBg@mail.gmail.com/
> > > [4] https://lore.kernel.org/all/20251106131956.1222864-4-dolinux.peng@gmail.com/
> > >
> > > >
> > > > >
> > > > > -       return libbpf_err(-ENOENT);
> > > > > +       return lmost;
> > > > >  }
> > > > >
> > > > >  static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
> > > > >                                    const char *type_name, __u32 kind)
> > > >
> > > > kind is defined as u32 but you expect caller to pass -1 to ignore the
> > > > kind. Use int here.
> > >
> > > Thanks, I will fix it.
> > >
> > > >
> > > > >  {
> > > > > -       __u32 i, nr_types = btf__type_cnt(btf);
> > > > > +       const struct btf_type *t;
> > > > > +       const char *tname;
> > > > > +       __s32 idx;
> > > > > +
> > > > > +       if (start_id < btf->start_id) {
> > > > > +               idx = btf_find_by_name_kind(btf->base_btf, start_id,
> > > > > +                                           type_name, kind);
> > > > > +               if (idx >= 0)
> > > > > +                       return idx;
> > > > > +               start_id = btf->start_id;
> > > > > +       }
> > > > >
> > > > > -       if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
> > > > > +       if (kind == BTF_KIND_UNKN || strcmp(type_name, "void") == 0)
> > > > >                 return 0;
> > > > >
> > > > > -       for (i = start_id; i < nr_types; i++) {
> > > > > -               const struct btf_type *t = btf__type_by_id(btf, i);
> > > > > -               const char *name;
> > > > > +       if (btf->sorted_start_id > 0 && type_name[0]) {
> > > > > +               __s32 end_id = btf__type_cnt(btf) - 1;
> > > > > +
> > > > > +               /* skip anonymous types */
> > > > > +               start_id = max(start_id, btf->sorted_start_id);
> > > >
> > > > can sorted_start_id ever be smaller than start_id?
> > > >
> > > > > +               idx = btf_find_by_name_bsearch(btf, type_name, start_id, end_id);
> > > >
> > > > is there ever a time when btf_find_by_name_bsearch() will work with
> > > > different start_id and end_id? why is this not done inside the
> > > > btf_find_by_name_bsearch()?
> > >
> > > Because the start_id could be specified by the caller.
> >
> > Right, start_id has to be passed in. But end_id is always the same, so
> > maybe determine it internally instead? And let's not return -ENOENT
>
> Thanks, I agree and will put the end_id into btf_find_by_name_bsearch.
>
> > from btf_find_by_name_bsearch(), as I mentioned before, it would be
> > more streamlined if you return btf__type_cnt(btf) if search failed.
>
> Thanks, I agree.
>
> >
> > >
> > > >
> > > > > +               if (unlikely(idx < 0))
> > > > > +                       return libbpf_err(-ENOENT);
> > > >
> > > > pass through error returned from btf_find_by_name_bsearch(), why redefining it?
> > >
> > > Thanks, I will fix it.
> > >
> >
> > see above, by returning btf__type_cnt() you won't even have this error
> > handling, you'll just go through normal loop checking for a match and
> > won't find anything, returning -ENOENT then.
>
> Thanks, I agree.
>
> >
> > > >
> > > > > +
> > > > > +               if (unlikely(kind == -1))
> > > > > +                       return idx;
> > > > > +
> > > > > +               t = btf_type_by_id(btf, idx);
> > > > > +               if (likely(BTF_INFO_KIND(t->info) == kind))
> > > >
> > > > use btf_kind(), but this whole extra check is just unnecessary, this
> > >
> > > Thanks, I will do it.
> > >
> > > > should be done in the loop below. We talked about all this already,
> > > > why do I feel like I'm being ignored?..
> > >
> > > Sorry for the confusion, and absolutely not ignoring you.
> > >
> >
> > If you decide to change implementation due to some unforeseen factors
> > (like concern about 4% microbenchmark improvement), it would be
> > helpful for you to call this out in a reply to the original
> > discussion. A line somewhere in the cover letter changelog is way too
> > easy to miss and that doesn't give me an opportunity to stop you
> > before you go and produce another revision that I'll then be
> > rejecting.
>
> I will learn from it and thank you for the suggestion.
>
> >
> > > >
> > > > > +                       return idx;
> > > >
> > > > drop all these likely and unlikely micro optimizations, please
> > >
> > > Thanks, I will do it.
> > >
> > > >
> > > >
> > > > > +
> > > > > +               for (idx++; idx <= end_id; idx++) {
> > > > > +                       t = btf__type_by_id(btf, idx);
> > > > > +                       tname = btf__str_by_offset(btf, t->name_off);
> > > > > +                       if (strcmp(tname, type_name) != 0)
> > > > > +                               return libbpf_err(-ENOENT);
> > > > > +                       if (btf_kind(t) == kind)
> > > > > +                               return idx;
> > > > > +               }
> > > > > +       } else {
> > > > > +               __u32 i, total;
> > > > >
> > > > > -               if (btf_kind(t) != kind)
> > > > > -                       continue;
> > > > > -               name = btf__name_by_offset(btf, t->name_off);
> > > > > -               if (name && !strcmp(type_name, name))
> > > > > -                       return i;
> > > > > +               total = btf__type_cnt(btf);
> > > > > +               for (i = start_id; i < total; i++) {
> > > > > +                       t = btf_type_by_id(btf, i);
> > > > > +                       if (kind != -1 && btf_kind(t) != kind)
> > > >
> > > > nit: kind < 0, no need to hard-code -1
> > >
> > > Good, I will fix it.
> > >
> > > >
> > > > > +                               continue;
> > > > > +                       tname = btf__str_by_offset(btf, t->name_off);
> > > > > +                       if (strcmp(tname, type_name) == 0)
> > > > > +                               return i;
> > > > > +               }
> > > > >         }
> > > > >
> > > > >         return libbpf_err(-ENOENT);
> > > > >  }
> > > > >
> > > >
> > > > [...]

Re: [PATCH bpf-next v10 04/13] libbpf: Optimize type lookup with binary search for sorted BTF

Posted by Andrii Nakryiko 1 month ago

On Sun, Dec 21, 2025 at 5:58 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
>
> On Sat, Dec 20, 2025 at 5:38 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
> >
> > On Sat, Dec 20, 2025 at 1:28 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Thu, Dec 18, 2025 at 6:53 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
> > > >
> > > > On Fri, Dec 19, 2025 at 7:29 AM Andrii Nakryiko
> > > > <andrii.nakryiko@gmail.com> wrote:
> > > > >
> > > > > On Thu, Dec 18, 2025 at 3:31 AM Donglin Peng <dolinux.peng@gmail.com> wrote:
> > > > > >
> > > > > > From: pengdonglin <pengdonglin@xiaomi.com>
> > > > > >
> > > > > > This patch introduces binary search optimization for BTF type lookups
> > > > > > when the BTF instance contains sorted types.
> > > > > >
> > > > > > The optimization significantly improves performance when searching for
> > > > > > types in large BTF instances with sorted types. For unsorted BTF, the
> > > > > > implementation falls back to the original linear search.
> > > > > >
> > > > > > Cc: Eduard Zingerman <eddyz87@gmail.com>
> > > > > > Cc: Alexei Starovoitov <ast@kernel.org>
> > > > > > Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> > > > > > Cc: Alan Maguire <alan.maguire@oracle.com>
> > > > > > Cc: Ihor Solodrai <ihor.solodrai@linux.dev>
> > > > > > Cc: Xiaoqin Zhang <zhangxiaoqin@xiaomi.com>
> > > > > > Signed-off-by: pengdonglin <pengdonglin@xiaomi.com>
> > > > > > ---
> > > > > >  tools/lib/bpf/btf.c | 103 ++++++++++++++++++++++++++++++++++----------
> > > > > >  1 file changed, 80 insertions(+), 23 deletions(-)
> > > > > >
> > > > >
> > > > > [...]
> > > > >
> > > > > > +       l = start_id;
> > > > > > +       r = end_id;
> > > > > > +       while (l <= r) {
> > > > > > +               m = l + (r - l) / 2;
> > > > > > +               t = btf_type_by_id(btf, m);
> > > > > > +               tname = btf__str_by_offset(btf, t->name_off);
> > > > > > +               ret = strcmp(tname, name);
> > > > > > +               if (ret < 0) {
> > > > > > +                       l = m + 1;
> > > > > > +               } else {
> > > > > > +                       if (ret == 0)
> > > > > > +                               lmost = m;
> > > > > > +                       r = m - 1;
> > > > > > +               }
> > > > > >         }
> > > > >
> > > > > this differs from what we discussed in [0], you said you'll use that
> > > > > approach. Can you please elaborate on why you didn't?
> > > > >
> > > > >   [0] https://lore.kernel.org/bpf/CAEf4Bzb3Eu0J83O=Y4KA-LkzBMjtx7cbonxPzkiduzZ1Pedajg@mail.gmail.com/
> > > >
> > > > Yes. As mentioned in the v8 changelog [1], the binary search approach
> > > > you referenced was implemented in versions v6 and v7 [2]. However,
> > > > testing revealed a slight performance regression. The root cause was
> > > > an extra strcmp operation introduced in v7, as discussed in [3]. Therefore,
> > > > in v8, I reverted to the approach from v5 [4] and refactored it for clarity.
> > >
> > > If you keep oscillating like that this patch set will never land. 4%
> > > (500us) gain on artificial and unrealistic micro-benchmark is
> > > meaningless and irrelevant, you are just adding more work for yourself
> > > and for reviewers by constantly changing your implementation between
> > > revisions for no good reason.
> >
> > Thank you, I understand and will learn from it. I think the performance gain
> > makes sense. I’d like to share a specific real-world case where this
> > optimization
> > could matter:  the `btf_find_by_name_kind()` function is indeed infrequently
> > used by the BPF subsystem, but it’s heavily relied upon by the ftrace
> > subsystem’s features like `func-args`, `funcgraph-args` [1], and the upcoming
> > `funcgraph-retval` [2]. These features invoke the function nearly once per
> > trace line when outputting, with a call frequency that can reach **100 kHz**
> > in intensive tracing workloads.
>
> Hi Andrii,
> I think we can refactor the code based on your suggestion like this:
>
> 1. If the binary search finds the matching name type, return its index.
>     Else, return btf__type_cnt(btf). It will make the code streamlined.
> 2. Skip the name checking in the first loop to eliminate the extra strcmp.
>
> What do you think?
>
> tatic __s32 btf_find_by_name_bsearch(const struct btf *btf, const char *name,
>                                       __s32 start_id)
> {
>         const struct btf_type *t;
>         const char *tname;
>         __s32 end_id = btf__type_cnt(btf) - 1;
>         __s32 l, r, m, lmost = end_id + 1;
>         int ret;
>
>         l = start_id;
>         r = end_id;
>         while (l <= r) {
>                 m = l + (r - l) / 2;
>                 t = btf_type_by_id(btf, m);
>                 tname = btf__str_by_offset(btf, t->name_off);
>                 ret = strcmp(tname, name);
>                 if (ret < 0) {
>                         l = m + 1;
>                 } else {
>                         if (ret == 0)
>                                 lmost = m;
>                         r = m - 1;
>                 }
>         }
>
>         return lmost;
> }
>
> static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
>                                    const char *type_name, __u32 kind)
> {
>        ......
>        if (btf_is_sorted(btf) && type_name[0]) {
>                 bool first_loop = true;
>
>                 start_id = max(start_id, btf_sorted_start_id(btf));
>                 idx = btf_find_by_name_bsearch(btf, type_name, start_id);
>                 for (; idx < btf__type_cnt(btf); idx++) {
>                         t = btf__type_by_id(btf, idx);
>                         tname = btf__str_by_offset(btf, t->name_off);
>                         if (!first_loop && strcmp(tname, type_name) != 0)
>                                 return libbpf_err(-ENOENT);

no, let's keep it simple, please revert to previous implementation we
agreed upon

>                         if (kind == -1 || btf_kind(t) == kind)
>                                 return idx;
>                         if (first_loop)
>                                 first_loop = false;
>                 }
>         } else {
>                 ......
>         }
>
>         return libbpf_err(-ENOENT);
> }
>
> >
> > In such scenarios, the extra `strcmp` operations translate to ~100,000
> > additional
> > string comparisons per second. While this might seem negligible in isolation,
> > the overhead accumulates under high-frequency tracing—potentially impacting
> > latency for users relying on detailed function argument/return value tracing.
> >
> > Thanks again for pushing for rigor—it helps make the code more cleaner
> > and robust.
> >
> > [1] https://lore.kernel.org/all/20250227185822.639418500@goodmis.org/
> > [2] https://lore.kernel.org/all/20251215034153.2367756-1-dolinux.peng@gmail.com/
> >

[...]

Re: [PATCH bpf-next v10 04/13] libbpf: Optimize type lookup with binary search for sorted BTF

Posted by Donglin Peng 1 month ago

On Tue, Jan 6, 2026 at 8:38 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Sun, Dec 21, 2025 at 5:58 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
> >
> > On Sat, Dec 20, 2025 at 5:38 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
> > >
> > > On Sat, Dec 20, 2025 at 1:28 AM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > On Thu, Dec 18, 2025 at 6:53 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
> > > > >
> > > > > On Fri, Dec 19, 2025 at 7:29 AM Andrii Nakryiko
> > > > > <andrii.nakryiko@gmail.com> wrote:
> > > > > >
> > > > > > On Thu, Dec 18, 2025 at 3:31 AM Donglin Peng <dolinux.peng@gmail.com> wrote:
> > > > > > >
> > > > > > > From: pengdonglin <pengdonglin@xiaomi.com>
> > > > > > >
> > > > > > > This patch introduces binary search optimization for BTF type lookups
> > > > > > > when the BTF instance contains sorted types.
> > > > > > >
> > > > > > > The optimization significantly improves performance when searching for
> > > > > > > types in large BTF instances with sorted types. For unsorted BTF, the
> > > > > > > implementation falls back to the original linear search.
> > > > > > >
> > > > > > > Cc: Eduard Zingerman <eddyz87@gmail.com>
> > > > > > > Cc: Alexei Starovoitov <ast@kernel.org>
> > > > > > > Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> > > > > > > Cc: Alan Maguire <alan.maguire@oracle.com>
> > > > > > > Cc: Ihor Solodrai <ihor.solodrai@linux.dev>
> > > > > > > Cc: Xiaoqin Zhang <zhangxiaoqin@xiaomi.com>
> > > > > > > Signed-off-by: pengdonglin <pengdonglin@xiaomi.com>
> > > > > > > ---
> > > > > > >  tools/lib/bpf/btf.c | 103 ++++++++++++++++++++++++++++++++++----------
> > > > > > >  1 file changed, 80 insertions(+), 23 deletions(-)
> > > > > > >
> > > > > >
> > > > > > [...]
> > > > > >
> > > > > > > +       l = start_id;
> > > > > > > +       r = end_id;
> > > > > > > +       while (l <= r) {
> > > > > > > +               m = l + (r - l) / 2;
> > > > > > > +               t = btf_type_by_id(btf, m);
> > > > > > > +               tname = btf__str_by_offset(btf, t->name_off);
> > > > > > > +               ret = strcmp(tname, name);
> > > > > > > +               if (ret < 0) {
> > > > > > > +                       l = m + 1;
> > > > > > > +               } else {
> > > > > > > +                       if (ret == 0)
> > > > > > > +                               lmost = m;
> > > > > > > +                       r = m - 1;
> > > > > > > +               }
> > > > > > >         }
> > > > > >
> > > > > > this differs from what we discussed in [0], you said you'll use that
> > > > > > approach. Can you please elaborate on why you didn't?
> > > > > >
> > > > > >   [0] https://lore.kernel.org/bpf/CAEf4Bzb3Eu0J83O=Y4KA-LkzBMjtx7cbonxPzkiduzZ1Pedajg@mail.gmail.com/
> > > > >
> > > > > Yes. As mentioned in the v8 changelog [1], the binary search approach
> > > > > you referenced was implemented in versions v6 and v7 [2]. However,
> > > > > testing revealed a slight performance regression. The root cause was
> > > > > an extra strcmp operation introduced in v7, as discussed in [3]. Therefore,
> > > > > in v8, I reverted to the approach from v5 [4] and refactored it for clarity.
> > > >
> > > > If you keep oscillating like that this patch set will never land. 4%
> > > > (500us) gain on artificial and unrealistic micro-benchmark is
> > > > meaningless and irrelevant, you are just adding more work for yourself
> > > > and for reviewers by constantly changing your implementation between
> > > > revisions for no good reason.
> > >
> > > Thank you, I understand and will learn from it. I think the performance gain
> > > makes sense. I’d like to share a specific real-world case where this
> > > optimization
> > > could matter:  the `btf_find_by_name_kind()` function is indeed infrequently
> > > used by the BPF subsystem, but it’s heavily relied upon by the ftrace
> > > subsystem’s features like `func-args`, `funcgraph-args` [1], and the upcoming
> > > `funcgraph-retval` [2]. These features invoke the function nearly once per
> > > trace line when outputting, with a call frequency that can reach **100 kHz**
> > > in intensive tracing workloads.
> >
> > Hi Andrii,
> > I think we can refactor the code based on your suggestion like this:
> >
> > 1. If the binary search finds the matching name type, return its index.
> >     Else, return btf__type_cnt(btf). It will make the code streamlined.
> > 2. Skip the name checking in the first loop to eliminate the extra strcmp.
> >
> > What do you think?
> >
> > tatic __s32 btf_find_by_name_bsearch(const struct btf *btf, const char *name,
> >                                       __s32 start_id)
> > {
> >         const struct btf_type *t;
> >         const char *tname;
> >         __s32 end_id = btf__type_cnt(btf) - 1;
> >         __s32 l, r, m, lmost = end_id + 1;
> >         int ret;
> >
> >         l = start_id;
> >         r = end_id;
> >         while (l <= r) {
> >                 m = l + (r - l) / 2;
> >                 t = btf_type_by_id(btf, m);
> >                 tname = btf__str_by_offset(btf, t->name_off);
> >                 ret = strcmp(tname, name);
> >                 if (ret < 0) {
> >                         l = m + 1;
> >                 } else {
> >                         if (ret == 0)
> >                                 lmost = m;
> >                         r = m - 1;
> >                 }
> >         }
> >
> >         return lmost;
> > }
> >
> > static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
> >                                    const char *type_name, __u32 kind)
> > {
> >        ......
> >        if (btf_is_sorted(btf) && type_name[0]) {
> >                 bool first_loop = true;
> >
> >                 start_id = max(start_id, btf_sorted_start_id(btf));
> >                 idx = btf_find_by_name_bsearch(btf, type_name, start_id);
> >                 for (; idx < btf__type_cnt(btf); idx++) {
> >                         t = btf__type_by_id(btf, idx);
> >                         tname = btf__str_by_offset(btf, t->name_off);
> >                         if (!first_loop && strcmp(tname, type_name) != 0)
> >                                 return libbpf_err(-ENOENT);
>
> no, let's keep it simple, please revert to previous implementation we
> agreed upon

Okay. I will do it.

>
> >                         if (kind == -1 || btf_kind(t) == kind)
> >                                 return idx;
> >                         if (first_loop)
> >                                 first_loop = false;
> >                 }
> >         } else {
> >                 ......
> >         }
> >
> >         return libbpf_err(-ENOENT);
> > }
> >
> > >
> > > In such scenarios, the extra `strcmp` operations translate to ~100,000
> > > additional
> > > string comparisons per second. While this might seem negligible in isolation,
> > > the overhead accumulates under high-frequency tracing—potentially impacting
> > > latency for users relying on detailed function argument/return value tracing.
> > >
> > > Thanks again for pushing for rigor—it helps make the code more cleaner
> > > and robust.
> > >
> > > [1] https://lore.kernel.org/all/20250227185822.639418500@goodmis.org/
> > > [2] https://lore.kernel.org/all/20251215034153.2367756-1-dolinux.peng@gmail.com/
> > >
>
> [...]

Re: [PATCH bpf-next v10 04/13] libbpf: Optimize type lookup with binary search for sorted BTF

Posted by Andrii Nakryiko 1 month ago

On Sat, Dec 20, 2025 at 1:38 AM Donglin Peng <dolinux.peng@gmail.com> wrote:
>
> On Sat, Dec 20, 2025 at 1:28 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Thu, Dec 18, 2025 at 6:53 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
> > >
> > > On Fri, Dec 19, 2025 at 7:29 AM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > On Thu, Dec 18, 2025 at 3:31 AM Donglin Peng <dolinux.peng@gmail.com> wrote:
> > > > >
> > > > > From: pengdonglin <pengdonglin@xiaomi.com>
> > > > >
> > > > > This patch introduces binary search optimization for BTF type lookups
> > > > > when the BTF instance contains sorted types.
> > > > >
> > > > > The optimization significantly improves performance when searching for
> > > > > types in large BTF instances with sorted types. For unsorted BTF, the
> > > > > implementation falls back to the original linear search.
> > > > >
> > > > > Cc: Eduard Zingerman <eddyz87@gmail.com>
> > > > > Cc: Alexei Starovoitov <ast@kernel.org>
> > > > > Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> > > > > Cc: Alan Maguire <alan.maguire@oracle.com>
> > > > > Cc: Ihor Solodrai <ihor.solodrai@linux.dev>
> > > > > Cc: Xiaoqin Zhang <zhangxiaoqin@xiaomi.com>
> > > > > Signed-off-by: pengdonglin <pengdonglin@xiaomi.com>
> > > > > ---
> > > > >  tools/lib/bpf/btf.c | 103 ++++++++++++++++++++++++++++++++++----------
> > > > >  1 file changed, 80 insertions(+), 23 deletions(-)
> > > > >
> > > >
> > > > [...]
> > > >
> > > > > +       l = start_id;
> > > > > +       r = end_id;
> > > > > +       while (l <= r) {
> > > > > +               m = l + (r - l) / 2;
> > > > > +               t = btf_type_by_id(btf, m);
> > > > > +               tname = btf__str_by_offset(btf, t->name_off);
> > > > > +               ret = strcmp(tname, name);
> > > > > +               if (ret < 0) {
> > > > > +                       l = m + 1;
> > > > > +               } else {
> > > > > +                       if (ret == 0)
> > > > > +                               lmost = m;
> > > > > +                       r = m - 1;
> > > > > +               }
> > > > >         }
> > > >
> > > > this differs from what we discussed in [0], you said you'll use that
> > > > approach. Can you please elaborate on why you didn't?
> > > >
> > > >   [0] https://lore.kernel.org/bpf/CAEf4Bzb3Eu0J83O=Y4KA-LkzBMjtx7cbonxPzkiduzZ1Pedajg@mail.gmail.com/
> > >
> > > Yes. As mentioned in the v8 changelog [1], the binary search approach
> > > you referenced was implemented in versions v6 and v7 [2]. However,
> > > testing revealed a slight performance regression. The root cause was
> > > an extra strcmp operation introduced in v7, as discussed in [3]. Therefore,
> > > in v8, I reverted to the approach from v5 [4] and refactored it for clarity.
> >
> > If you keep oscillating like that this patch set will never land. 4%
> > (500us) gain on artificial and unrealistic micro-benchmark is
> > meaningless and irrelevant, you are just adding more work for yourself
> > and for reviewers by constantly changing your implementation between
> > revisions for no good reason.
>
> Thank you, I understand and will learn from it. I think the performance gain
> makes sense. I’d like to share a specific real-world case where this
> optimization
> could matter:  the `btf_find_by_name_kind()` function is indeed infrequently
> used by the BPF subsystem, but it’s heavily relied upon by the ftrace
> subsystem’s features like `func-args`, `funcgraph-args` [1], and the upcoming
> `funcgraph-retval` [2]. These features invoke the function nearly once per
> trace line when outputting, with a call frequency that can reach **100 kHz**
> in intensive tracing workloads.
>
> In such scenarios, the extra `strcmp` operations translate to ~100,000
> additional
> string comparisons per second. While this might seem negligible in isolation,

I'm pretty confident it is quite negligible compared to all other
*useful* work you'll have to do to use that BTF type to format those
arguments. Yes, 100KHz is pretty frequent, but it won't be anywhere
close to 4% if you profile the entire end-to-end overhead of emitting
arguments using func-args.

> the overhead accumulates under high-frequency tracing—potentially impacting
> latency for users relying on detailed function argument/return value tracing.
>
> Thanks again for pushing for rigor—it helps make the code more cleaner
> and robust.
>
> [1] https://lore.kernel.org/all/20250227185822.639418500@goodmis.org/
> [2] https://lore.kernel.org/all/20251215034153.2367756-1-dolinux.peng@gmail.com/
>
> >
> >
> > >
> > > Benchmark results show that v8 achieves a 4.2% performance improvement
> > > over v7. If we don't care the performance gain, I will revert to the approach
> > > in v7 in the next version.
> > >
> > > [1] https://lore.kernel.org/bpf/20251126085025.784288-1-dolinux.peng@gmail.com/
> > > [2] https://lore.kernel.org/all/20251119031531.1817099-1-dolinux.peng@gmail.com/
> > > [3] https://lore.kernel.org/all/CAEf4BzaqEPD46LddJHO1-k5KPGyVWf6d=duDAxG1q=jykJkMBg@mail.gmail.com/
> > > [4] https://lore.kernel.org/all/20251106131956.1222864-4-dolinux.peng@gmail.com/
> > >

[...]

Re: [PATCH bpf-next v10 04/13] libbpf: Optimize type lookup with binary search for sorted BTF

Posted by Donglin Peng 1 month ago

On Tue, Jan 6, 2026 at 8:36 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Sat, Dec 20, 2025 at 1:38 AM Donglin Peng <dolinux.peng@gmail.com> wrote:
> >
> > On Sat, Dec 20, 2025 at 1:28 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Thu, Dec 18, 2025 at 6:53 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
> > > >
> > > > On Fri, Dec 19, 2025 at 7:29 AM Andrii Nakryiko
> > > > <andrii.nakryiko@gmail.com> wrote:
> > > > >
> > > > > On Thu, Dec 18, 2025 at 3:31 AM Donglin Peng <dolinux.peng@gmail.com> wrote:
> > > > > >
> > > > > > From: pengdonglin <pengdonglin@xiaomi.com>
> > > > > >
> > > > > > This patch introduces binary search optimization for BTF type lookups
> > > > > > when the BTF instance contains sorted types.
> > > > > >
> > > > > > The optimization significantly improves performance when searching for
> > > > > > types in large BTF instances with sorted types. For unsorted BTF, the
> > > > > > implementation falls back to the original linear search.
> > > > > >
> > > > > > Cc: Eduard Zingerman <eddyz87@gmail.com>
> > > > > > Cc: Alexei Starovoitov <ast@kernel.org>
> > > > > > Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> > > > > > Cc: Alan Maguire <alan.maguire@oracle.com>
> > > > > > Cc: Ihor Solodrai <ihor.solodrai@linux.dev>
> > > > > > Cc: Xiaoqin Zhang <zhangxiaoqin@xiaomi.com>
> > > > > > Signed-off-by: pengdonglin <pengdonglin@xiaomi.com>
> > > > > > ---
> > > > > >  tools/lib/bpf/btf.c | 103 ++++++++++++++++++++++++++++++++++----------
> > > > > >  1 file changed, 80 insertions(+), 23 deletions(-)
> > > > > >
> > > > >
> > > > > [...]
> > > > >
> > > > > > +       l = start_id;
> > > > > > +       r = end_id;
> > > > > > +       while (l <= r) {
> > > > > > +               m = l + (r - l) / 2;
> > > > > > +               t = btf_type_by_id(btf, m);
> > > > > > +               tname = btf__str_by_offset(btf, t->name_off);
> > > > > > +               ret = strcmp(tname, name);
> > > > > > +               if (ret < 0) {
> > > > > > +                       l = m + 1;
> > > > > > +               } else {
> > > > > > +                       if (ret == 0)
> > > > > > +                               lmost = m;
> > > > > > +                       r = m - 1;
> > > > > > +               }
> > > > > >         }
> > > > >
> > > > > this differs from what we discussed in [0], you said you'll use that
> > > > > approach. Can you please elaborate on why you didn't?
> > > > >
> > > > >   [0] https://lore.kernel.org/bpf/CAEf4Bzb3Eu0J83O=Y4KA-LkzBMjtx7cbonxPzkiduzZ1Pedajg@mail.gmail.com/
> > > >
> > > > Yes. As mentioned in the v8 changelog [1], the binary search approach
> > > > you referenced was implemented in versions v6 and v7 [2]. However,
> > > > testing revealed a slight performance regression. The root cause was
> > > > an extra strcmp operation introduced in v7, as discussed in [3]. Therefore,
> > > > in v8, I reverted to the approach from v5 [4] and refactored it for clarity.
> > >
> > > If you keep oscillating like that this patch set will never land. 4%
> > > (500us) gain on artificial and unrealistic micro-benchmark is
> > > meaningless and irrelevant, you are just adding more work for yourself
> > > and for reviewers by constantly changing your implementation between
> > > revisions for no good reason.
> >
> > Thank you, I understand and will learn from it. I think the performance gain
> > makes sense. I’d like to share a specific real-world case where this
> > optimization
> > could matter:  the `btf_find_by_name_kind()` function is indeed infrequently
> > used by the BPF subsystem, but it’s heavily relied upon by the ftrace
> > subsystem’s features like `func-args`, `funcgraph-args` [1], and the upcoming
> > `funcgraph-retval` [2]. These features invoke the function nearly once per
> > trace line when outputting, with a call frequency that can reach **100 kHz**
> > in intensive tracing workloads.
> >
> > In such scenarios, the extra `strcmp` operations translate to ~100,000
> > additional
> > string comparisons per second. While this might seem negligible in isolation,
>
> I'm pretty confident it is quite negligible compared to all other
> *useful* work you'll have to do to use that BTF type to format those
> arguments. Yes, 100KHz is pretty frequent, but it won't be anywhere
> close to 4% if you profile the entire end-to-end overhead of emitting
> arguments using func-args.

Thanks, I agree.

>
> > the overhead accumulates under high-frequency tracing—potentially impacting
> > latency for users relying on detailed function argument/return value tracing.
> >
> > Thanks again for pushing for rigor—it helps make the code more cleaner
> > and robust.
> >
> > [1] https://lore.kernel.org/all/20250227185822.639418500@goodmis.org/
> > [2] https://lore.kernel.org/all/20251215034153.2367756-1-dolinux.peng@gmail.com/
> >
> > >
> > >
> > > >
> > > > Benchmark results show that v8 achieves a 4.2% performance improvement
> > > > over v7. If we don't care the performance gain, I will revert to the approach
> > > > in v7 in the next version.
> > > >
> > > > [1] https://lore.kernel.org/bpf/20251126085025.784288-1-dolinux.peng@gmail.com/
> > > > [2] https://lore.kernel.org/all/20251119031531.1817099-1-dolinux.peng@gmail.com/
> > > > [3] https://lore.kernel.org/all/CAEf4BzaqEPD46LddJHO1-k5KPGyVWf6d=duDAxG1q=jykJkMBg@mail.gmail.com/
> > > > [4] https://lore.kernel.org/all/20251106131956.1222864-4-dolinux.peng@gmail.com/
> > > >
>
> [...]

Re: [PATCH bpf-next v10 04/13] libbpf: Optimize type lookup with binary search for sorted BTF

Posted by Eduard Zingerman 1 month, 3 weeks ago

On Thu, 2025-12-18 at 15:29 -0800, Andrii Nakryiko wrote:

[...]

> >  static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
> >                                    const char *type_name, __u32 kind)
> 
> kind is defined as u32 but you expect caller to pass -1 to ignore the
> kind. Use int here.
> 
> >  {
> > -       __u32 i, nr_types = btf__type_cnt(btf);
> > +       const struct btf_type *t;
> > +       const char *tname;
> > +       __s32 idx;
> > +
> > +       if (start_id < btf->start_id) {
> > +               idx = btf_find_by_name_kind(btf->base_btf, start_id,
> > +                                           type_name, kind);
> > +               if (idx >= 0)
> > +                       return idx;
> > +               start_id = btf->start_id;
> > +       }
> > 
> > -       if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
> > +       if (kind == BTF_KIND_UNKN || strcmp(type_name, "void") == 0)
> >                 return 0;
> > 
> > -       for (i = start_id; i < nr_types; i++) {
> > -               const struct btf_type *t = btf__type_by_id(btf, i);
> > -               const char *name;
> > +       if (btf->sorted_start_id > 0 && type_name[0]) {
> > +               __s32 end_id = btf__type_cnt(btf) - 1;
> > +
> > +               /* skip anonymous types */
> > +               start_id = max(start_id, btf->sorted_start_id);
> 
> can sorted_start_id ever be smaller than start_id?

sorted_start_id can be zero, at two callsites for this function
start_id is passed as btf->start_id and 1.

> 
> > +               idx = btf_find_by_name_bsearch(btf, type_name, start_id, end_id);
> 
> is there ever a time when btf_find_by_name_bsearch() will work with
> different start_id and end_id? why is this not done inside the
> btf_find_by_name_bsearch()?
> 

[...]

Re: [PATCH bpf-next v10 04/13] libbpf: Optimize type lookup with binary search for sorted BTF

Posted by Andrii Nakryiko 1 month, 3 weeks ago

On Thu, Dec 18, 2025 at 4:13 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Thu, 2025-12-18 at 15:29 -0800, Andrii Nakryiko wrote:
>
> [...]
>
> > >  static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
> > >                                    const char *type_name, __u32 kind)
> >
> > kind is defined as u32 but you expect caller to pass -1 to ignore the
> > kind. Use int here.
> >
> > >  {
> > > -       __u32 i, nr_types = btf__type_cnt(btf);
> > > +       const struct btf_type *t;
> > > +       const char *tname;
> > > +       __s32 idx;
> > > +
> > > +       if (start_id < btf->start_id) {
> > > +               idx = btf_find_by_name_kind(btf->base_btf, start_id,
> > > +                                           type_name, kind);
> > > +               if (idx >= 0)
> > > +                       return idx;
> > > +               start_id = btf->start_id;
> > > +       }
> > >
> > > -       if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
> > > +       if (kind == BTF_KIND_UNKN || strcmp(type_name, "void") == 0)
> > >                 return 0;
> > >
> > > -       for (i = start_id; i < nr_types; i++) {
> > > -               const struct btf_type *t = btf__type_by_id(btf, i);
> > > -               const char *name;
> > > +       if (btf->sorted_start_id > 0 && type_name[0]) {
> > > +               __s32 end_id = btf__type_cnt(btf) - 1;
> > > +
> > > +               /* skip anonymous types */
> > > +               start_id = max(start_id, btf->sorted_start_id);
> >
> > can sorted_start_id ever be smaller than start_id?
>
> sorted_start_id can be zero, at two callsites for this function
> start_id is passed as btf->start_id and 1.

Can it with the check above?

  if (btf->sorted_start_id > 0 && type_name[0]) {


This branch is a known sorted case. That's why all these start_id
manipulations look weird and sloppy.

>
> >
> > > +               idx = btf_find_by_name_bsearch(btf, type_name, start_id, end_id);
> >
> > is there ever a time when btf_find_by_name_bsearch() will work with
> > different start_id and end_id? why is this not done inside the
> > btf_find_by_name_bsearch()?
> >
>
> [...]

Re: [PATCH bpf-next v10 04/13] libbpf: Optimize type lookup with binary search for sorted BTF

Posted by Eduard Zingerman 1 month, 3 weeks ago

On Thu, 2025-12-18 at 16:19 -0800, Andrii Nakryiko wrote:
> On Thu, Dec 18, 2025 at 4:13 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > 
> > On Thu, 2025-12-18 at 15:29 -0800, Andrii Nakryiko wrote:
> > 
> > [...]
> > 
> > > >  static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
> > > >                                    const char *type_name, __u32 kind)
> > > 
> > > kind is defined as u32 but you expect caller to pass -1 to ignore the
> > > kind. Use int here.
> > > 
> > > >  {
> > > > -       __u32 i, nr_types = btf__type_cnt(btf);
> > > > +       const struct btf_type *t;
> > > > +       const char *tname;
> > > > +       __s32 idx;
> > > > +
> > > > +       if (start_id < btf->start_id) {
> > > > +               idx = btf_find_by_name_kind(btf->base_btf, start_id,
> > > > +                                           type_name, kind);
> > > > +               if (idx >= 0)
> > > > +                       return idx;
> > > > +               start_id = btf->start_id;
> > > > +       }
> > > > 
> > > > -       if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
> > > > +       if (kind == BTF_KIND_UNKN || strcmp(type_name, "void") == 0)
> > > >                 return 0;
> > > > 
> > > > -       for (i = start_id; i < nr_types; i++) {
> > > > -               const struct btf_type *t = btf__type_by_id(btf, i);
> > > > -               const char *name;
> > > > +       if (btf->sorted_start_id > 0 && type_name[0]) {
> > > > +               __s32 end_id = btf__type_cnt(btf) - 1;
> > > > +
> > > > +               /* skip anonymous types */
> > > > +               start_id = max(start_id, btf->sorted_start_id);
> > > 
> > > can sorted_start_id ever be smaller than start_id?
> > 
> > sorted_start_id can be zero, at two callsites for this function
> > start_id is passed as btf->start_id and 1.
> 
> Can it with the check above?
> 
>   if (btf->sorted_start_id > 0 && type_name[0]) {
> 
> 
> This branch is a known sorted case. That's why all these start_id
> manipulations look weird and sloppy.

Oops, it cannot.
But still it feels strange to pass a 'start_id' parameter to a
function and rely at exact values passed at callsites. Replace the
parameter with boolean 'own'?

> > 
> > > 
> > > > +               idx = btf_find_by_name_bsearch(btf, type_name, start_id, end_id);
> > > 
> > > is there ever a time when btf_find_by_name_bsearch() will work with
> > > different start_id and end_id? why is this not done inside the
> > > btf_find_by_name_bsearch()?
> > > 
> > 
> > [...]

Re: [PATCH bpf-next v10 04/13] libbpf: Optimize type lookup with binary search for sorted BTF

Posted by Andrii Nakryiko 1 month, 3 weeks ago

On Thu, Dec 18, 2025 at 4:25 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Thu, 2025-12-18 at 16:19 -0800, Andrii Nakryiko wrote:
> > On Thu, Dec 18, 2025 at 4:13 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > >
> > > On Thu, 2025-12-18 at 15:29 -0800, Andrii Nakryiko wrote:
> > >
> > > [...]
> > >
> > > > >  static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
> > > > >                                    const char *type_name, __u32 kind)
> > > >
> > > > kind is defined as u32 but you expect caller to pass -1 to ignore the
> > > > kind. Use int here.
> > > >
> > > > >  {
> > > > > -       __u32 i, nr_types = btf__type_cnt(btf);
> > > > > +       const struct btf_type *t;
> > > > > +       const char *tname;
> > > > > +       __s32 idx;
> > > > > +
> > > > > +       if (start_id < btf->start_id) {
> > > > > +               idx = btf_find_by_name_kind(btf->base_btf, start_id,
> > > > > +                                           type_name, kind);
> > > > > +               if (idx >= 0)
> > > > > +                       return idx;
> > > > > +               start_id = btf->start_id;
> > > > > +       }
> > > > >
> > > > > -       if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
> > > > > +       if (kind == BTF_KIND_UNKN || strcmp(type_name, "void") == 0)
> > > > >                 return 0;
> > > > >
> > > > > -       for (i = start_id; i < nr_types; i++) {
> > > > > -               const struct btf_type *t = btf__type_by_id(btf, i);
> > > > > -               const char *name;
> > > > > +       if (btf->sorted_start_id > 0 && type_name[0]) {
> > > > > +               __s32 end_id = btf__type_cnt(btf) - 1;
> > > > > +
> > > > > +               /* skip anonymous types */
> > > > > +               start_id = max(start_id, btf->sorted_start_id);
> > > >
> > > > can sorted_start_id ever be smaller than start_id?
> > >
> > > sorted_start_id can be zero, at two callsites for this function
> > > start_id is passed as btf->start_id and 1.
> >
> > Can it with the check above?
> >
> >   if (btf->sorted_start_id > 0 && type_name[0]) {
> >
> >
> > This branch is a known sorted case. That's why all these start_id
> > manipulations look weird and sloppy.
>
> Oops, it cannot.
> But still it feels strange to pass a 'start_id' parameter to a
> function and rely at exact values passed at callsites. Replace the
> parameter with boolean 'own'?

hm.. looking around a bit more, it seems like passed-in start_id might
be useful for iterator-like searches. We don't use that right now, but
maybe we should preserve this behavior? And then max() does make sense
(though "skip anonymous types" is a bit too brief, I'd mention that
start_id might be within the named types intentionally and we need to
jump forward)

>
> > >
> > > >
> > > > > +               idx = btf_find_by_name_bsearch(btf, type_name, start_id, end_id);
> > > >
> > > > is there ever a time when btf_find_by_name_bsearch() will work with
> > > > different start_id and end_id? why is this not done inside the
> > > > btf_find_by_name_bsearch()?
> > > >
> > >
> > > [...]

Re: [PATCH bpf-next v10 04/13] libbpf: Optimize type lookup with binary search for sorted BTF

Posted by Eduard Zingerman 1 month, 3 weeks ago

On Thu, 2025-12-18 at 19:30 +0800, Donglin Peng wrote:
> From: pengdonglin <pengdonglin@xiaomi.com>
> 
> This patch introduces binary search optimization for BTF type lookups
> when the BTF instance contains sorted types.
> 
> The optimization significantly improves performance when searching for
> types in large BTF instances with sorted types. For unsorted BTF, the
> implementation falls back to the original linear search.
> 
> Cc: Eduard Zingerman <eddyz87@gmail.com>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> Cc: Alan Maguire <alan.maguire@oracle.com>
> Cc: Ihor Solodrai <ihor.solodrai@linux.dev>
> Cc: Xiaoqin Zhang <zhangxiaoqin@xiaomi.com>
> Signed-off-by: pengdonglin <pengdonglin@xiaomi.com>
> ---

Acked-by: Eduard Zingerman <eddyz87@gmail.com>

[...]