[PATCH 0/2] Improve the performance of bitmap_find_next_zero_area_off()

Yi Sun posted 2 patches 1 week, 6 days ago
There is a newer version of this series
include/linux/find.h | 35 +++++++++++++++++++++++++++++++++++
lib/bitmap.c         |  2 +-
lib/find_bit.c       | 30 ++++++++++++++++++++++++++++++
3 files changed, 66 insertions(+), 1 deletion(-)
[PATCH 0/2] Improve the performance of bitmap_find_next_zero_area_off()
Posted by Yi Sun 1 week, 6 days ago
Replacing find_next_bit() with find_last_bit_range()
can improve performance by an average of 50%.

===========

Test result:
	cnt	old_a_cnt	new_a_cnt	cnt_ratio	old_time(ns)	new_time(ns)	time_ratio
test1	8	71		34		52.1%		51357		25019		51.3%
test2	8	1		1		0%		1150		1153		around 0%

test1	32	81925		10402		87.3%		23103730	2910315		87.4%
test2	32	1		1		0%		434		434		around 0%

test1	128	82166		2572		96.9%		23054634	731453		96.8%
test2	128	1		1		0%		434		438		around 0%

test1	1024	81620		321		99.6%		23035192	234330		99%
test2   1024	14		7		50%		4257		2257		47%

test1	4096	80923		81		99.9%		22700265	57861		99.7%
test2	4096	648		92		85.8%		192854		27177		85.9%

============

Test result explanation:
@test1: The bitmap is filled with random numbers,
so the bitmap is very messy.
@test2: Sparse bitmap.

@cnt: The expected number of consecutive clear bits.

@old_a_cnt: Total number of "goto again" when
using find_next_bit().
@new_a_cnt: Total number of "goto again" when
using find_last_bit_range().
Finding @cnt consecutive clear bits in the bitmap
may require multiple attempts.
The number of repetitions should be recorded.
@cnt_ratio = (old_a_cnt - new_a_cnt) / old_a_cnt.

@old_time(ns): The total time consumed by
bitmap_find_next_zero_area_off() when
using find_next_bit().
@new_time(ns): The total time consumed by
bitmap_find_next_zero_area_off() when
using find_last_bit_range().
@time_ratio = (old_time - new_time) / old_time.

==============

Test case(refer to lib/find_bit_benchmark.c):

define BITMAP_LEN      (4096UL * 8 * 10)
define SPARSE          500
static DECLARE_BITMAP(bitmap, BITMAP_LEN);

static void test_main()
{
	unsigned long nbits = BITMAP_LEN / SPARSE;

	//test1
	get_random_bytes(bitmap, sizeof(bitmap));
	__test_all();

	//test2
	bitmap_zero(bitmap, BITMAP_LEN);
        while (nbits--)
		__set_bit(get_random_u32_below(BITMAP_LEN), bitmap);
	__test_all();
}

static void __test_all()
{
	//Expected number of consecutive clear bits.
	u32 cnt = 8;

	//Ignore the results of this test.
	__test_new(cnt);

	//To mitigate the impact of caching,
	//we will use the results of this test.
	__test_new(cnt);

	//Ignore the results of this test.
	__test_old(cnt);

	//To mitigate the impact of caching,
	//we will use the results of this test.
	__test_old(cnt);
}

//Add time-consuming statistics to bitmap_find_next_zero_area_off().
static ktime_t __test_old/__test_new(u32 nr)
{
        unsigned long *map = bitmap;
        unsigned long size = BITMAP_LEN;
        unsigned long start = 0;
        unsigned long align_mask = 0;
        unsigned long align_offset = 0;

        unsigned long index, end, i, again_cnt = 0;
        //Here add time-consuming statistics.
        ktime_t time = ktime_get();

again:
        again_cnt++;
        index = find_next_zero_bit(map, size, start);
        /* Align allocation */
        index = __ALIGN_MASK(index +
			align_offset, align_mask) - align_offset;
        end = index + nr;
        if (end > size) {
                //Here add time-consuming statistics.
                time = ktime_get() - time;
                return time;
        }

        //__test_old() use this.
        i = find_next_bit(map, end, index);

        //__test_new() use this.
        i = find_last_bit_range(map, end, index);

        if (i < end) {
                start = i + 1;
                goto again;
        }

        //Here add time-consuming statistics.
        time = ktime_get() - time;
        return time;
}


Yi Sun (2):
  lib: bitmap: add find_last_bit_range() and _find_last_bit_range()
  lib: bitmap: reduce the number of goto again in
    bitmap_find_next_zero_area_off()

 include/linux/find.h | 35 +++++++++++++++++++++++++++++++++++
 lib/bitmap.c         |  2 +-
 lib/find_bit.c       | 30 ++++++++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 1 deletion(-)

-- 
2.34.1
Re: [PATCH 0/2] Improve the performance of bitmap_find_next_zero_area_off()
Posted by Yury Norov 1 week, 6 days ago
On Tue, May 12, 2026 at 12:06:57PM +0800, Yi Sun wrote:
> Replacing find_next_bit() with find_last_bit_range()
> can improve performance by an average of 50%.
>
> ===========
> 
> Test result:
> 	cnt	old_a_cnt	new_a_cnt	cnt_ratio	old_time(ns)	new_time(ns)	time_ratio
> test1	8	71		34		52.1%		51357		25019		51.3%
> test2	8	1		1		0%		1150		1153		around 0%
> 
> test1	32	81925		10402		87.3%		23103730	2910315		87.4%
> test2	32	1		1		0%		434		434		around 0%
> 
> test1	128	82166		2572		96.9%		23054634	731453		96.8%
> test2	128	1		1		0%		434		438		around 0%
> 
> test1	1024	81620		321		99.6%		23035192	234330		99%
> test2   1024	14		7		50%		4257		2257		47%
> 
> test1	4096	80923		81		99.9%		22700265	57861		99.7%
> test2	4096	648		92		85.8%		192854		27177		85.9%
> 
> ============
> 
> Test result explanation:
> @test1: The bitmap is filled with random numbers,
> so the bitmap is very messy.
> @test2: Sparse bitmap.
> 
> @cnt: The expected number of consecutive clear bits.
> 
> @old_a_cnt: Total number of "goto again" when
> using find_next_bit().
> @new_a_cnt: Total number of "goto again" when
> using find_last_bit_range().
> Finding @cnt consecutive clear bits in the bitmap
> may require multiple attempts.
> The number of repetitions should be recorded.
> @cnt_ratio = (old_a_cnt - new_a_cnt) / old_a_cnt.
> 
> @old_time(ns): The total time consumed by
> bitmap_find_next_zero_area_off() when
> using find_next_bit().
> @new_time(ns): The total time consumed by
> bitmap_find_next_zero_area_off() when
> using find_last_bit_range().
> @time_ratio = (old_time - new_time) / old_time.
> 
> ==============
> 
> Test case(refer to lib/find_bit_benchmark.c):
> 
> define BITMAP_LEN      (4096UL * 8 * 10)
> define SPARSE          500
> static DECLARE_BITMAP(bitmap, BITMAP_LEN);
> 
> static void test_main()
> {
> 	unsigned long nbits = BITMAP_LEN / SPARSE;
> 
> 	//test1
> 	get_random_bytes(bitmap, sizeof(bitmap));
> 	__test_all();
> 
> 	//test2
> 	bitmap_zero(bitmap, BITMAP_LEN);
>         while (nbits--)
> 		__set_bit(get_random_u32_below(BITMAP_LEN), bitmap);
> 	__test_all();
> }
> 
> static void __test_all()
> {
> 	//Expected number of consecutive clear bits.
> 	u32 cnt = 8;
> 
> 	//Ignore the results of this test.
> 	__test_new(cnt);
> 
> 	//To mitigate the impact of caching,
> 	//we will use the results of this test.
> 	__test_new(cnt);
> 
> 	//Ignore the results of this test.
> 	__test_old(cnt);
> 
> 	//To mitigate the impact of caching,
> 	//we will use the results of this test.
> 	__test_old(cnt);
> }
> 
> //Add time-consuming statistics to bitmap_find_next_zero_area_off().
> static ktime_t __test_old/__test_new(u32 nr)
> {
>         unsigned long *map = bitmap;
>         unsigned long size = BITMAP_LEN;
>         unsigned long start = 0;
>         unsigned long align_mask = 0;
>         unsigned long align_offset = 0;
> 
>         unsigned long index, end, i, again_cnt = 0;
>         //Here add time-consuming statistics.
>         ktime_t time = ktime_get();
> 
> again:
>         again_cnt++;
>         index = find_next_zero_bit(map, size, start);
>         /* Align allocation */
>         index = __ALIGN_MASK(index +
> 			align_offset, align_mask) - align_offset;
>         end = index + nr;
>         if (end > size) {
>                 //Here add time-consuming statistics.
>                 time = ktime_get() - time;
>                 return time;
>         }
> 
>         //__test_old() use this.
>         i = find_next_bit(map, end, index);
> 
>         //__test_new() use this.
>         i = find_last_bit_range(map, end, index);
> 
>         if (i < end) {
>                 start = i + 1;
>                 goto again;
>         }
> 
>         //Here add time-consuming statistics.
>         time = ktime_get() - time;
>         return time;
> }

Please check the lib/find_bit_benchmark.c and extend it with your
scenario. Please make sure you're printing and everything is aligned
with the existing format.

> Yi Sun (2):
>   lib: bitmap: add find_last_bit_range() and _find_last_bit_range()
>   lib: bitmap: reduce the number of goto again in
>     bitmap_find_next_zero_area_off()
> 
>  include/linux/find.h | 35 +++++++++++++++++++++++++++++++++++
>  lib/bitmap.c         |  2 +-
>  lib/find_bit.c       | 30 ++++++++++++++++++++++++++++++
>  3 files changed, 66 insertions(+), 1 deletion(-)
> 
> -- 
> 2.34.1