include/linux/find.h | 35 +++++++++++++++++++++++++++++++++++ lib/bitmap.c | 2 +- lib/find_bit.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-)
Replacing find_next_bit() with find_last_bit_range()
can improve performance by an average of 50%.
===========
Test result:
cnt old_a_cnt new_a_cnt cnt_ratio old_time(ns) new_time(ns) time_ratio
test1 8 71 34 52.1% 51357 25019 51.3%
test2 8 1 1 0% 1150 1153 around 0%
test1 32 81925 10402 87.3% 23103730 2910315 87.4%
test2 32 1 1 0% 434 434 around 0%
test1 128 82166 2572 96.9% 23054634 731453 96.8%
test2 128 1 1 0% 434 438 around 0%
test1 1024 81620 321 99.6% 23035192 234330 99%
test2 1024 14 7 50% 4257 2257 47%
test1 4096 80923 81 99.9% 22700265 57861 99.7%
test2 4096 648 92 85.8% 192854 27177 85.9%
============
Test result explanation:
@test1: The bitmap is filled with random numbers,
so the bitmap is very messy.
@test2: Sparse bitmap.
@cnt: The expected number of consecutive clear bits.
@old_a_cnt: Total number of "goto again" when
using find_next_bit().
@new_a_cnt: Total number of "goto again" when
using find_last_bit_range().
Finding @cnt consecutive clear bits in the bitmap
may require multiple attempts.
The number of repetitions should be recorded.
@cnt_ratio = (old_a_cnt - new_a_cnt) / old_a_cnt.
@old_time(ns): The total time consumed by
bitmap_find_next_zero_area_off() when
using find_next_bit().
@new_time(ns): The total time consumed by
bitmap_find_next_zero_area_off() when
using find_last_bit_range().
@time_ratio = (old_time - new_time) / old_time.
==============
Test case(refer to lib/find_bit_benchmark.c):
define BITMAP_LEN (4096UL * 8 * 10)
define SPARSE 500
static DECLARE_BITMAP(bitmap, BITMAP_LEN);
static void test_main()
{
unsigned long nbits = BITMAP_LEN / SPARSE;
//test1
get_random_bytes(bitmap, sizeof(bitmap));
__test_all();
//test2
bitmap_zero(bitmap, BITMAP_LEN);
while (nbits--)
__set_bit(get_random_u32_below(BITMAP_LEN), bitmap);
__test_all();
}
static void __test_all()
{
//Expected number of consecutive clear bits.
u32 cnt = 8;
//Ignore the results of this test.
__test_new(cnt);
//To mitigate the impact of caching,
//we will use the results of this test.
__test_new(cnt);
//Ignore the results of this test.
__test_old(cnt);
//To mitigate the impact of caching,
//we will use the results of this test.
__test_old(cnt);
}
//Add time-consuming statistics to bitmap_find_next_zero_area_off().
static ktime_t __test_old/__test_new(u32 nr)
{
unsigned long *map = bitmap;
unsigned long size = BITMAP_LEN;
unsigned long start = 0;
unsigned long align_mask = 0;
unsigned long align_offset = 0;
unsigned long index, end, i, again_cnt = 0;
//Here add time-consuming statistics.
ktime_t time = ktime_get();
again:
again_cnt++;
index = find_next_zero_bit(map, size, start);
/* Align allocation */
index = __ALIGN_MASK(index +
align_offset, align_mask) - align_offset;
end = index + nr;
if (end > size) {
//Here add time-consuming statistics.
time = ktime_get() - time;
return time;
}
//__test_old() use this.
i = find_next_bit(map, end, index);
//__test_new() use this.
i = find_last_bit_range(map, end, index);
if (i < end) {
start = i + 1;
goto again;
}
//Here add time-consuming statistics.
time = ktime_get() - time;
return time;
}
Yi Sun (2):
lib: bitmap: add find_last_bit_range() and _find_last_bit_range()
lib: bitmap: reduce the number of goto again in
bitmap_find_next_zero_area_off()
include/linux/find.h | 35 +++++++++++++++++++++++++++++++++++
lib/bitmap.c | 2 +-
lib/find_bit.c | 30 ++++++++++++++++++++++++++++++
3 files changed, 66 insertions(+), 1 deletion(-)
--
2.34.1
On Tue, May 12, 2026 at 12:06:57PM +0800, Yi Sun wrote:
> Replacing find_next_bit() with find_last_bit_range()
> can improve performance by an average of 50%.
>
> ===========
>
> Test result:
> cnt old_a_cnt new_a_cnt cnt_ratio old_time(ns) new_time(ns) time_ratio
> test1 8 71 34 52.1% 51357 25019 51.3%
> test2 8 1 1 0% 1150 1153 around 0%
>
> test1 32 81925 10402 87.3% 23103730 2910315 87.4%
> test2 32 1 1 0% 434 434 around 0%
>
> test1 128 82166 2572 96.9% 23054634 731453 96.8%
> test2 128 1 1 0% 434 438 around 0%
>
> test1 1024 81620 321 99.6% 23035192 234330 99%
> test2 1024 14 7 50% 4257 2257 47%
>
> test1 4096 80923 81 99.9% 22700265 57861 99.7%
> test2 4096 648 92 85.8% 192854 27177 85.9%
>
> ============
>
> Test result explanation:
> @test1: The bitmap is filled with random numbers,
> so the bitmap is very messy.
> @test2: Sparse bitmap.
>
> @cnt: The expected number of consecutive clear bits.
>
> @old_a_cnt: Total number of "goto again" when
> using find_next_bit().
> @new_a_cnt: Total number of "goto again" when
> using find_last_bit_range().
> Finding @cnt consecutive clear bits in the bitmap
> may require multiple attempts.
> The number of repetitions should be recorded.
> @cnt_ratio = (old_a_cnt - new_a_cnt) / old_a_cnt.
>
> @old_time(ns): The total time consumed by
> bitmap_find_next_zero_area_off() when
> using find_next_bit().
> @new_time(ns): The total time consumed by
> bitmap_find_next_zero_area_off() when
> using find_last_bit_range().
> @time_ratio = (old_time - new_time) / old_time.
>
> ==============
>
> Test case(refer to lib/find_bit_benchmark.c):
>
> define BITMAP_LEN (4096UL * 8 * 10)
> define SPARSE 500
> static DECLARE_BITMAP(bitmap, BITMAP_LEN);
>
> static void test_main()
> {
> unsigned long nbits = BITMAP_LEN / SPARSE;
>
> //test1
> get_random_bytes(bitmap, sizeof(bitmap));
> __test_all();
>
> //test2
> bitmap_zero(bitmap, BITMAP_LEN);
> while (nbits--)
> __set_bit(get_random_u32_below(BITMAP_LEN), bitmap);
> __test_all();
> }
>
> static void __test_all()
> {
> //Expected number of consecutive clear bits.
> u32 cnt = 8;
>
> //Ignore the results of this test.
> __test_new(cnt);
>
> //To mitigate the impact of caching,
> //we will use the results of this test.
> __test_new(cnt);
>
> //Ignore the results of this test.
> __test_old(cnt);
>
> //To mitigate the impact of caching,
> //we will use the results of this test.
> __test_old(cnt);
> }
>
> //Add time-consuming statistics to bitmap_find_next_zero_area_off().
> static ktime_t __test_old/__test_new(u32 nr)
> {
> unsigned long *map = bitmap;
> unsigned long size = BITMAP_LEN;
> unsigned long start = 0;
> unsigned long align_mask = 0;
> unsigned long align_offset = 0;
>
> unsigned long index, end, i, again_cnt = 0;
> //Here add time-consuming statistics.
> ktime_t time = ktime_get();
>
> again:
> again_cnt++;
> index = find_next_zero_bit(map, size, start);
> /* Align allocation */
> index = __ALIGN_MASK(index +
> align_offset, align_mask) - align_offset;
> end = index + nr;
> if (end > size) {
> //Here add time-consuming statistics.
> time = ktime_get() - time;
> return time;
> }
>
> //__test_old() use this.
> i = find_next_bit(map, end, index);
>
> //__test_new() use this.
> i = find_last_bit_range(map, end, index);
>
> if (i < end) {
> start = i + 1;
> goto again;
> }
>
> //Here add time-consuming statistics.
> time = ktime_get() - time;
> return time;
> }
Please check the lib/find_bit_benchmark.c and extend it with your
scenario. Please make sure you're printing and everything is aligned
with the existing format.
> Yi Sun (2):
> lib: bitmap: add find_last_bit_range() and _find_last_bit_range()
> lib: bitmap: reduce the number of goto again in
> bitmap_find_next_zero_area_off()
>
> include/linux/find.h | 35 +++++++++++++++++++++++++++++++++++
> lib/bitmap.c | 2 +-
> lib/find_bit.c | 30 ++++++++++++++++++++++++++++++
> 3 files changed, 66 insertions(+), 1 deletion(-)
>
> --
> 2.34.1
© 2016 - 2026 Red Hat, Inc.