[RFC PATCH v7 4/7] libbpf: Optimize type lookup with binary search for sorted BTF

Donglin Peng posted 7 patches 1 week, 5 days ago
There is a newer version of this series
[RFC PATCH v7 4/7] libbpf: Optimize type lookup with binary search for sorted BTF
Posted by Donglin Peng 1 week, 5 days ago
From: Donglin Peng <pengdonglin@xiaomi.com>

This patch introduces binary search optimization for BTF type lookups
when the BTF instance contains sorted types.

The optimization significantly improves performance when searching for
types in large BTF instances with sorted type names. For unsorted BTF
or when nr_sorted_types is zero, the implementation falls back to
the original linear search algorithm.

Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Song Liu <song@kernel.org>
Cc: Xiaoqin Zhang <zhangxiaoqin@xiaomi.com>
Signed-off-by: Donglin Peng <pengdonglin@xiaomi.com>
---
 tools/lib/bpf/btf.c | 104 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 81 insertions(+), 23 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index ab95ff19fde3..1d19d95da1d0 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -92,6 +92,12 @@ struct btf {
 	 *   - for split BTF counts number of types added on top of base BTF.
 	 */
 	__u32 nr_types;
+	/* number of sorted and named types in this BTF instance:
+	 *   - doesn't include special [0] void type;
+	 *   - for split BTF counts number of sorted and named types added on
+	 *     top of base BTF.
+	 */
+	__u32 nr_sorted_types;
 	/* if not NULL, points to the base BTF on top of which the current
 	 * split BTF is based
 	 */
@@ -897,44 +903,93 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
 	return type_id;
 }
 
-__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
+static __s32 btf_find_type_by_name_bsearch(const struct btf *btf, const char *name,
+						__s32 start_id, __s32 end_id)
 {
-	__u32 i, nr_types = btf__type_cnt(btf);
+	const struct btf_type *t;
+	const char *tname;
+	__s32 l, r, m;
+
+	l = start_id;
+	r = end_id;
+	while (l <= r) {
+		m = l + (r - l) / 2;
+		t = btf_type_by_id(btf, m);
+		tname = btf__str_by_offset(btf, t->name_off);
+		if (strcmp(tname, name) >= 0) {
+			if (l == r)
+				return r;
+			r = m;
+		} else {
+			l = m + 1;
+		}
+	}
 
-	if (!strcmp(type_name, "void"))
-		return 0;
+	return btf__type_cnt(btf);
+}
 
-	for (i = 1; i < nr_types; i++) {
-		const struct btf_type *t = btf__type_by_id(btf, i);
-		const char *name = btf__name_by_offset(btf, t->name_off);
+static __s32 btf_find_type_by_name_kind(const struct btf *btf, int start_id,
+				   const char *type_name, __u32 kind)
+{
+	const struct btf_type *t;
+	const char *tname;
+	int err = -ENOENT;
+
+	if (start_id < btf->start_id) {
+		err = btf_find_type_by_name_kind(btf->base_btf, start_id,
+			type_name, kind);
+		if (err > 0)
+			goto out;
+		start_id = btf->start_id;
+	}
+
+	if (btf->nr_sorted_types > 0) {
+		/* binary search */
+		__s32 end_id;
+		int idx;
+
+		end_id = btf->start_id + btf->nr_sorted_types - 1;
+		idx = btf_find_type_by_name_bsearch(btf, type_name, start_id, end_id);
+		for (; idx <= end_id; idx++) {
+			t = btf__type_by_id(btf, idx);
+			tname = btf__str_by_offset(btf, t->name_off);
+			if (strcmp(tname, type_name))
+				goto out;
+			if (kind == -1 || btf_kind(t) == kind)
+				return idx;
+		}
+	} else {
+		/* linear search */
+		__u32 i, total;
 
-		if (name && !strcmp(type_name, name))
-			return i;
+		total = btf__type_cnt(btf);
+		for (i = start_id; i < total; i++) {
+			t = btf_type_by_id(btf, i);
+			if (kind != -1 && btf_kind(t) != kind)
+				continue;
+			tname = btf__str_by_offset(btf, t->name_off);
+			if (tname && !strcmp(tname, type_name))
+				return i;
+		}
 	}
 
-	return libbpf_err(-ENOENT);
+out:
+	return err;
 }
 
 static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
 				   const char *type_name, __u32 kind)
 {
-	__u32 i, nr_types = btf__type_cnt(btf);
-
 	if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
 		return 0;
 
-	for (i = start_id; i < nr_types; i++) {
-		const struct btf_type *t = btf__type_by_id(btf, i);
-		const char *name;
-
-		if (btf_kind(t) != kind)
-			continue;
-		name = btf__name_by_offset(btf, t->name_off);
-		if (name && !strcmp(type_name, name))
-			return i;
-	}
+	return libbpf_err(btf_find_type_by_name_kind(btf, start_id, type_name, kind));
+}
 
-	return libbpf_err(-ENOENT);
+/* the kind value of -1 indicates that kind matching should be skipped */
+__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
+{
+	return btf_find_by_name_kind(btf, btf->start_id, type_name, -1);
 }
 
 __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name,
@@ -1006,6 +1061,7 @@ static struct btf *btf_new_empty(struct btf *base_btf)
 	btf->fd = -1;
 	btf->ptr_sz = sizeof(void *);
 	btf->swapped_endian = false;
+	btf->nr_sorted_types = 0;
 
 	if (base_btf) {
 		btf->base_btf = base_btf;
@@ -1057,6 +1113,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, b
 	btf->start_id = 1;
 	btf->start_str_off = 0;
 	btf->fd = -1;
+	btf->nr_sorted_types = 0;
 
 	if (base_btf) {
 		btf->base_btf = base_btf;
@@ -1715,6 +1772,7 @@ static void btf_invalidate_raw_data(struct btf *btf)
 		free(btf->raw_data_swapped);
 		btf->raw_data_swapped = NULL;
 	}
+	btf->nr_sorted_types = 0;
 }
 
 /* Ensure BTF is ready to be modified (by splitting into a three memory
-- 
2.34.1
Re: [RFC PATCH v7 4/7] libbpf: Optimize type lookup with binary search for sorted BTF
Posted by Andrii Nakryiko 1 week, 5 days ago
On Tue, Nov 18, 2025 at 7:21 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
>
> From: Donglin Peng <pengdonglin@xiaomi.com>
>
> This patch introduces binary search optimization for BTF type lookups
> when the BTF instance contains sorted types.
>
> The optimization significantly improves performance when searching for
> types in large BTF instances with sorted type names. For unsorted BTF
> or when nr_sorted_types is zero, the implementation falls back to
> the original linear search algorithm.
>
> Cc: Eduard Zingerman <eddyz87@gmail.com>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> Cc: Alan Maguire <alan.maguire@oracle.com>
> Cc: Song Liu <song@kernel.org>
> Cc: Xiaoqin Zhang <zhangxiaoqin@xiaomi.com>
> Signed-off-by: Donglin Peng <pengdonglin@xiaomi.com>
> ---
>  tools/lib/bpf/btf.c | 104 ++++++++++++++++++++++++++++++++++----------
>  1 file changed, 81 insertions(+), 23 deletions(-)
>

[...]

> +       const struct btf_type *t;
> +       const char *tname;
> +       int err = -ENOENT;
> +
> +       if (start_id < btf->start_id) {
> +               err = btf_find_type_by_name_kind(btf->base_btf, start_id,
> +                       type_name, kind);

nit: align wrapped args on the second line

also, we expect that err will be set to -ENOENT if we didn't find a
match in the base BTF, right? I'm a bit uneasy about this, I'd rather
do explicit err = -ENOENT setting for each goto out

> +               if (err > 0)
> +                       goto out;
> +               start_id = btf->start_id;
> +       }
> +
> +       if (btf->nr_sorted_types > 0) {
> +               /* binary search */
> +               __s32 end_id;
> +               int idx;
> +
> +               end_id = btf->start_id + btf->nr_sorted_types - 1;
> +               idx = btf_find_type_by_name_bsearch(btf, type_name, start_id, end_id);
> +               for (; idx <= end_id; idx++) {
> +                       t = btf__type_by_id(btf, idx);
> +                       tname = btf__str_by_offset(btf, t->name_off);
> +                       if (strcmp(tname, type_name))

nit: please add explicit != 0 here

also, why not just `return -ENOENT;`?

> +                               goto out;
> +                       if (kind == -1 || btf_kind(t) == kind)
> +                               return idx;
> +               }
> +       } else {
> +               /* linear search */
> +               __u32 i, total;
>
> -               if (name && !strcmp(type_name, name))
> -                       return i;
> +               total = btf__type_cnt(btf);
> +               for (i = start_id; i < total; i++) {
> +                       t = btf_type_by_id(btf, i);
> +                       if (kind != -1 && btf_kind(t) != kind)
> +                               continue;
> +                       tname = btf__str_by_offset(btf, t->name_off);
> +                       if (tname && !strcmp(tname, type_name))

nit: let's do explicit == 0 for strcmp, please

> +                               return i;
> +               }
>         }
>
> -       return libbpf_err(-ENOENT);
> +out:
> +       return err;
>  }
>
>  static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
>                                    const char *type_name, __u32 kind)
>  {
> -       __u32 i, nr_types = btf__type_cnt(btf);
> -
>         if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
>                 return 0;

this is the only thing that btf_find_by_name_kind() does on top of
what btf_find_type_by_name_kind(), right? Any reason we can't merge
those and keep only btf_find_by_name_kind()?

>
> -       for (i = start_id; i < nr_types; i++) {
> -               const struct btf_type *t = btf__type_by_id(btf, i);
> -               const char *name;
> -
> -               if (btf_kind(t) != kind)
> -                       continue;
> -               name = btf__name_by_offset(btf, t->name_off);
> -               if (name && !strcmp(type_name, name))
> -                       return i;
> -       }
> +       return libbpf_err(btf_find_type_by_name_kind(btf, start_id, type_name, kind));
> +}
>

[...]
Re: [RFC PATCH v7 4/7] libbpf: Optimize type lookup with binary search for sorted BTF
Posted by Donglin Peng 1 week, 4 days ago
On Thu, Nov 20, 2025 at 3:47 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Tue, Nov 18, 2025 at 7:21 PM Donglin Peng <dolinux.peng@gmail.com> wrote:
> >
> > From: Donglin Peng <pengdonglin@xiaomi.com>
> >
> > This patch introduces binary search optimization for BTF type lookups
> > when the BTF instance contains sorted types.
> >
> > The optimization significantly improves performance when searching for
> > types in large BTF instances with sorted type names. For unsorted BTF
> > or when nr_sorted_types is zero, the implementation falls back to
> > the original linear search algorithm.
> >
> > Cc: Eduard Zingerman <eddyz87@gmail.com>
> > Cc: Alexei Starovoitov <ast@kernel.org>
> > Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> > Cc: Alan Maguire <alan.maguire@oracle.com>
> > Cc: Song Liu <song@kernel.org>
> > Cc: Xiaoqin Zhang <zhangxiaoqin@xiaomi.com>
> > Signed-off-by: Donglin Peng <pengdonglin@xiaomi.com>
> > ---
> >  tools/lib/bpf/btf.c | 104 ++++++++++++++++++++++++++++++++++----------
> >  1 file changed, 81 insertions(+), 23 deletions(-)
> >
>
> [...]
>
> > +       const struct btf_type *t;
> > +       const char *tname;
> > +       int err = -ENOENT;
> > +
> > +       if (start_id < btf->start_id) {
> > +               err = btf_find_type_by_name_kind(btf->base_btf, start_id,
> > +                       type_name, kind);
>
> nit: align wrapped args on the second line
>
> also, we expect that err will be set to -ENOENT if we didn't find a
> match in the base BTF, right? I'm a bit uneasy about this, I'd rather
> do explicit err = -ENOENT setting for each goto out

Thanks, I will refactor it.

>
> > +               if (err > 0)
> > +                       goto out;
> > +               start_id = btf->start_id;
> > +       }
> > +
> > +       if (btf->nr_sorted_types > 0) {
> > +               /* binary search */
> > +               __s32 end_id;
> > +               int idx;
> > +
> > +               end_id = btf->start_id + btf->nr_sorted_types - 1;
> > +               idx = btf_find_type_by_name_bsearch(btf, type_name, start_id, end_id);
> > +               for (; idx <= end_id; idx++) {
> > +                       t = btf__type_by_id(btf, idx);
> > +                       tname = btf__str_by_offset(btf, t->name_off);
> > +                       if (strcmp(tname, type_name))
>
> nit: please add explicit != 0 here

Thanks. I will fix it.

>
> also, why not just `return -ENOENT;`?

I thought about that, but I feel the function should return from
one place. Frankly, just returning -ENOENT is cleaner

>
> > +                               goto out;
> > +                       if (kind == -1 || btf_kind(t) == kind)
> > +                               return idx;
> > +               }
> > +       } else {
> > +               /* linear search */
> > +               __u32 i, total;
> >
> > -               if (name && !strcmp(type_name, name))
> > -                       return i;
> > +               total = btf__type_cnt(btf);
> > +               for (i = start_id; i < total; i++) {
> > +                       t = btf_type_by_id(btf, i);
> > +                       if (kind != -1 && btf_kind(t) != kind)
> > +                               continue;
> > +                       tname = btf__str_by_offset(btf, t->name_off);
> > +                       if (tname && !strcmp(tname, type_name))
>
> nit: let's do explicit == 0 for strcmp, please

Thanks, I will fix it.

>
> > +                               return i;
> > +               }
> >         }
> >
> > -       return libbpf_err(-ENOENT);
> > +out:
> > +       return err;
> >  }
> >
> >  static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
> >                                    const char *type_name, __u32 kind)
> >  {
> > -       __u32 i, nr_types = btf__type_cnt(btf);
> > -
> >         if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
> >                 return 0;
>
> this is the only thing that btf_find_by_name_kind() does on top of
> what btf_find_type_by_name_kind(), right? Any reason we can't merge
> those and keep only btf_find_by_name_kind()?

Thanks. The check exists in the original btf_find_by_name_kind.
Because btf_find_type_by_name_kind uses recursion, adding the
check there would cause it to run multiple times. I'm open to merging
the functions if the overhead is acceptable.

>
> >
> > -       for (i = start_id; i < nr_types; i++) {
> > -               const struct btf_type *t = btf__type_by_id(btf, i);
> > -               const char *name;
> > -
> > -               if (btf_kind(t) != kind)
> > -                       continue;
> > -               name = btf__name_by_offset(btf, t->name_off);
> > -               if (name && !strcmp(type_name, name))
> > -                       return i;
> > -       }
> > +       return libbpf_err(btf_find_type_by_name_kind(btf, start_id, type_name, kind));
> > +}
> >
>
> [...]
Re: [RFC PATCH v7 4/7] libbpf: Optimize type lookup with binary search for sorted BTF
Posted by bot+bpf-ci@kernel.org 1 week, 5 days ago
> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> index 71d464690..555703b37 100644
> --- a/tools/lib/bpf/btf.c
> +++ b/tools/lib/bpf/btf.c

[ ... ]

> @@ -897,44 +903,93 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
>  	return type_id;
>  }
>
> -__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
> +static __s32 btf_find_type_by_name_bsearch(const struct btf *btf, const char *name,
> +						__s32 start_id, __s32 end_id)
>  {
> -	__u32 i, nr_types = btf__type_cnt(btf);
> +	const struct btf_type *t;
> +	const char *tname;
> +	__s32 l, r, m;
> +
> +	l = start_id;
> +	r = end_id;
> +	while (l <= r) {
> +		m = l + (r - l) / 2;
> +		t = btf_type_by_id(btf, m);
> +		tname = btf__str_by_offset(btf, t->name_off);
> +		if (strcmp(tname, name) >= 0) {
                    ^^^^^^
Can strcmp() handle a NULL tname here? The btf__str_by_offset()
function returns NULL when offset is out of range (line 1674 returns
"errno = EINVAL, NULL"), but strcmp() requires non-NULL arguments.

The original linear search code at line 971 checks "if (tname &&
!strcmp(tname, type_name))" before calling strcmp(). Should the binary
search path also check for NULL?

> +			if (l == r)
> +				return r;
> +			r = m;
> +		} else {
> +			l = m + 1;
> +		}
> +	}
> +
> +	return btf__type_cnt(btf);
> +}
> +
> +static __s32 btf_find_type_by_name_kind(const struct btf *btf, int start_id,
> +				   const char *type_name, __u32 kind)
> +{
> +	const struct btf_type *t;
> +	const char *tname;
> +	int err = -ENOENT;
> +
> +	if (start_id < btf->start_id) {
> +		err = btf_find_type_by_name_kind(btf->base_btf, start_id,
> +			type_name, kind);
> +		if (err > 0)
> +			goto out;
> +		start_id = btf->start_id;
> +	}
> +
> +	if (btf->nr_sorted_types > 0) {
> +		/* binary search */
> +		__s32 end_id;
> +		int idx;
> +
> +		end_id = btf->start_id + btf->nr_sorted_types - 1;
> +		idx = btf_find_type_by_name_bsearch(btf, type_name, start_id, end_id);
> +		for (; idx <= end_id; idx++) {
> +			t = btf__type_by_id(btf, idx);
> +			tname = btf__str_by_offset(btf, t->name_off);
> +			if (strcmp(tname, type_name))
                            ^^^^^^
Same question here - can tname be NULL at this point?

> +				goto out;
> +			if (kind == -1 || btf_kind(t) == kind)
> +				return idx;
> +		}
> +	} else {
> +		/* linear search */
> +		__u32 i, total;
> +
> +		total = btf__type_cnt(btf);
> +		for (i = start_id; i < total; i++) {
> +			t = btf_type_by_id(btf, i);
> +			if (kind != -1 && btf_kind(t) != kind)
> +				continue;
> +			tname = btf__str_by_offset(btf, t->name_off);
> +			if (tname && !strcmp(tname, type_name))
> +				return i;
> +		}
> +	}

[ ... ]


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/19488781158
Re: [RFC PATCH v7 4/7] libbpf: Optimize type lookup with binary search for sorted BTF
Posted by Donglin Peng 1 week, 5 days ago
On Wed, Nov 19, 2025 at 12:11 PM <bot+bpf-ci@kernel.org> wrote:
>
> > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> > index 71d464690..555703b37 100644
> > --- a/tools/lib/bpf/btf.c
> > +++ b/tools/lib/bpf/btf.c
>
> [ ... ]
>
> > @@ -897,44 +903,93 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
> >       return type_id;
> >  }
> >
> > -__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
> > +static __s32 btf_find_type_by_name_bsearch(const struct btf *btf, const char *name,
> > +                                             __s32 start_id, __s32 end_id)
> >  {
> > -     __u32 i, nr_types = btf__type_cnt(btf);
> > +     const struct btf_type *t;
> > +     const char *tname;
> > +     __s32 l, r, m;
> > +
> > +     l = start_id;
> > +     r = end_id;
> > +     while (l <= r) {
> > +             m = l + (r - l) / 2;
> > +             t = btf_type_by_id(btf, m);
> > +             tname = btf__str_by_offset(btf, t->name_off);
> > +             if (strcmp(tname, name) >= 0) {
>                     ^^^^^^
> Can strcmp() handle a NULL tname here? The btf__str_by_offset()
> function returns NULL when offset is out of range (line 1674 returns
> "errno = EINVAL, NULL"), but strcmp() requires non-NULL arguments.
>
> The original linear search code at line 971 checks "if (tname &&
> !strcmp(tname, type_name))" before calling strcmp(). Should the binary
> search path also check for NULL?

Thank you. It is unnecessary to check for a NULL type name in this context
because the btf_check_sorted function introduced in the subsequent patch
ensures that all types accessed via binary search have non-NULL names.

Furthermore, in the current patch, nr_sorted_types is always zero, meaning
the binary search path is never exercised.

>
> > +                     if (l == r)
> > +                             return r;
> > +                     r = m;
> > +             } else {
> > +                     l = m + 1;
> > +             }
> > +     }
> > +
> > +     return btf__type_cnt(btf);
> > +}
> > +
> > +static __s32 btf_find_type_by_name_kind(const struct btf *btf, int start_id,
> > +                                const char *type_name, __u32 kind)
> > +{
> > +     const struct btf_type *t;
> > +     const char *tname;
> > +     int err = -ENOENT;
> > +
> > +     if (start_id < btf->start_id) {
> > +             err = btf_find_type_by_name_kind(btf->base_btf, start_id,
> > +                     type_name, kind);
> > +             if (err > 0)
> > +                     goto out;
> > +             start_id = btf->start_id;
> > +     }
> > +
> > +     if (btf->nr_sorted_types > 0) {
> > +             /* binary search */
> > +             __s32 end_id;
> > +             int idx;
> > +
> > +             end_id = btf->start_id + btf->nr_sorted_types - 1;
> > +             idx = btf_find_type_by_name_bsearch(btf, type_name, start_id, end_id);
> > +             for (; idx <= end_id; idx++) {
> > +                     t = btf__type_by_id(btf, idx);
> > +                     tname = btf__str_by_offset(btf, t->name_off);
> > +                     if (strcmp(tname, type_name))
>                             ^^^^^^
> Same question here - can tname be NULL at this point?

Ditto.

>
> > +                             goto out;
> > +                     if (kind == -1 || btf_kind(t) == kind)
> > +                             return idx;
> > +             }
> > +     } else {
> > +             /* linear search */
> > +             __u32 i, total;
> > +
> > +             total = btf__type_cnt(btf);
> > +             for (i = start_id; i < total; i++) {
> > +                     t = btf_type_by_id(btf, i);
> > +                     if (kind != -1 && btf_kind(t) != kind)
> > +                             continue;
> > +                     tname = btf__str_by_offset(btf, t->name_off);
> > +                     if (tname && !strcmp(tname, type_name))
> > +                             return i;
> > +             }
> > +     }
>
> [ ... ]
>
>
> ---
> AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
> See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
>
> CI run summary: https://github.com/kernel-patches/bpf/actions/runs/19488781158