Benchmark the following bitmap find functions applying binary operations
on sets of two bitmaps:
- find_first_andnot_bit,
- find_first_nor_bit,
- find_next_andnot_bit,
- find_next_nor_bit,
- find_next_or_bit.
Note that find_first_or_bit is not part of the current API, so it is not
covered.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Yury Norov <yury.norov@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
---
lib/find_bit_benchmark.c | 93 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 93 insertions(+)
diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c
index aee2ebb6b3cd..3b16254dec23 100644
--- a/lib/find_bit_benchmark.c
+++ b/lib/find_bit_benchmark.c
@@ -70,6 +70,44 @@ static int __init test_find_first_and_bit(void *bitmap, const void *bitmap2, uns
return 0;
}
+static int __init test_find_first_andnot_bit(void *bitmap, const void *bitmap2, unsigned long len)
+{
+ static DECLARE_BITMAP(cp, BITMAP_LEN) __initdata;
+ unsigned long i, cnt;
+ ktime_t time;
+
+ bitmap_copy(cp, bitmap, BITMAP_LEN);
+
+ time = ktime_get();
+ for (cnt = i = 0; i < len; cnt++) {
+ i = find_first_andnot_bit(cp, bitmap2, len);
+ __clear_bit(i, cp);
+ }
+ time = ktime_get() - time;
+ pr_err("find_first_andnot_bit: %18llu ns, %6ld iterations\n", time, cnt);
+
+ return 0;
+}
+
+static int __init test_find_first_nor_bit(void *bitmap, const void *bitmap2, unsigned long len)
+{
+ static DECLARE_BITMAP(cp, BITMAP_LEN) __initdata;
+ unsigned long i, cnt;
+ ktime_t time;
+
+ bitmap_copy(cp, bitmap, BITMAP_LEN);
+
+ time = ktime_get();
+ for (cnt = i = 0; i < len; cnt++) {
+ i = find_first_nor_bit(cp, bitmap2, len);
+ __set_bit(i, cp);
+ }
+ time = ktime_get() - time;
+ pr_err("find_first_nor_bit: %18llu ns, %6ld iterations\n", time, cnt);
+
+ return 0;
+}
+
static int __init test_find_next_bit(const void *bitmap, unsigned long len)
{
unsigned long i, cnt;
@@ -148,6 +186,51 @@ static int __init test_find_next_and_bit(const void *bitmap,
return 0;
}
+static int __init test_find_next_andnot_bit(const void *bitmap,
+ const void *bitmap2, unsigned long len)
+{
+ unsigned long i, cnt;
+ ktime_t time;
+
+ time = ktime_get();
+ for (cnt = i = 0; i < BITMAP_LEN; cnt++)
+ i = find_next_andnot_bit(bitmap, bitmap2, BITMAP_LEN, i + 1);
+ time = ktime_get() - time;
+ pr_err("find_next_andnot_bit: %18llu ns, %6ld iterations\n", time, cnt);
+
+ return 0;
+}
+
+static int __init test_find_next_nor_bit(const void *bitmap,
+ const void *bitmap2, unsigned long len)
+{
+ unsigned long i, cnt;
+ ktime_t time;
+
+ time = ktime_get();
+ for (cnt = i = 0; i < BITMAP_LEN; cnt++)
+ i = find_next_nor_bit(bitmap, bitmap2, BITMAP_LEN, i + 1);
+ time = ktime_get() - time;
+ pr_err("find_next_nor_bit: %18llu ns, %6ld iterations\n", time, cnt);
+
+ return 0;
+}
+
+static int __init test_find_next_or_bit(const void *bitmap,
+ const void *bitmap2, unsigned long len)
+{
+ unsigned long i, cnt;
+ ktime_t time;
+
+ time = ktime_get();
+ for (cnt = i = 0; i < BITMAP_LEN; cnt++)
+ i = find_next_or_bit(bitmap, bitmap2, BITMAP_LEN, i + 1);
+ time = ktime_get() - time;
+ pr_err("find_next_or_bit: %18llu ns, %6ld iterations\n", time, cnt);
+
+ return 0;
+}
+
static int __init find_bit_test(void)
{
unsigned long nbits = BITMAP_LEN / SPARSE;
@@ -169,6 +252,11 @@ static int __init find_bit_test(void)
test_find_first_bit(bitmap, BITMAP_LEN / 10);
test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN / 2);
test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
+ test_find_first_andnot_bit(bitmap, bitmap2, BITMAP_LEN / 2);
+ test_find_next_andnot_bit(bitmap, bitmap2, BITMAP_LEN);
+ test_find_first_nor_bit(bitmap, bitmap2, BITMAP_LEN / 2);
+ test_find_next_nor_bit(bitmap, bitmap2, BITMAP_LEN);
+ test_find_next_or_bit(bitmap, bitmap2, BITMAP_LEN);
pr_err("\nStart testing find_bit() with sparse bitmap\n");
@@ -187,6 +275,11 @@ static int __init find_bit_test(void)
test_find_first_bit(bitmap, BITMAP_LEN);
test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN);
test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
+ test_find_first_andnot_bit(bitmap, bitmap2, BITMAP_LEN);
+ test_find_next_andnot_bit(bitmap, bitmap2, BITMAP_LEN);
+ test_find_first_nor_bit(bitmap, bitmap2, BITMAP_LEN);
+ test_find_next_nor_bit(bitmap, bitmap2, BITMAP_LEN);
+ test_find_next_or_bit(bitmap, bitmap2, BITMAP_LEN);
/*
* Everything is OK. Return error just to let user run benchmark
--
2.39.2
On Thu, Aug 29, 2024 at 09:59:25AM -0400, Mathieu Desnoyers wrote:
> Benchmark the following bitmap find functions applying binary operations
> on sets of two bitmaps:
>
> - find_first_andnot_bit,
> - find_first_nor_bit,
> - find_next_andnot_bit,
> - find_next_nor_bit,
> - find_next_or_bit.
>
> Note that find_first_or_bit is not part of the current API, so it is not
> covered.
Can you please show how the test output looks on your system now? I'll
add that in commit message.
>
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Cc: Yury Norov <yury.norov@gmail.com>
> Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
> ---
> lib/find_bit_benchmark.c | 93 ++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 93 insertions(+)
>
> diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c
> index aee2ebb6b3cd..3b16254dec23 100644
> --- a/lib/find_bit_benchmark.c
> +++ b/lib/find_bit_benchmark.c
> @@ -70,6 +70,44 @@ static int __init test_find_first_and_bit(void *bitmap, const void *bitmap2, uns
> return 0;
> }
>
> +static int __init test_find_first_andnot_bit(void *bitmap, const void *bitmap2, unsigned long len)
> +{
> + static DECLARE_BITMAP(cp, BITMAP_LEN) __initdata;
> + unsigned long i, cnt;
> + ktime_t time;
> +
> + bitmap_copy(cp, bitmap, BITMAP_LEN);
> +
> + time = ktime_get();
> + for (cnt = i = 0; i < len; cnt++) {
> + i = find_first_andnot_bit(cp, bitmap2, len);
> + __clear_bit(i, cp);
> + }
> + time = ktime_get() - time;
> + pr_err("find_first_andnot_bit: %18llu ns, %6ld iterations\n", time, cnt);
> +
> + return 0;
> +}
> +
> +static int __init test_find_first_nor_bit(void *bitmap, const void *bitmap2, unsigned long len)
> +{
> + static DECLARE_BITMAP(cp, BITMAP_LEN) __initdata;
> + unsigned long i, cnt;
> + ktime_t time;
> +
> + bitmap_copy(cp, bitmap, BITMAP_LEN);
> +
> + time = ktime_get();
> + for (cnt = i = 0; i < len; cnt++) {
> + i = find_first_nor_bit(cp, bitmap2, len);
> + __set_bit(i, cp);
> + }
> + time = ktime_get() - time;
> + pr_err("find_first_nor_bit: %18llu ns, %6ld iterations\n", time, cnt);
> +
> + return 0;
> +}
> +
> static int __init test_find_next_bit(const void *bitmap, unsigned long len)
> {
> unsigned long i, cnt;
> @@ -148,6 +186,51 @@ static int __init test_find_next_and_bit(const void *bitmap,
> return 0;
> }
>
> +static int __init test_find_next_andnot_bit(const void *bitmap,
> + const void *bitmap2, unsigned long len)
> +{
> + unsigned long i, cnt;
> + ktime_t time;
> +
> + time = ktime_get();
> + for (cnt = i = 0; i < BITMAP_LEN; cnt++)
> + i = find_next_andnot_bit(bitmap, bitmap2, BITMAP_LEN, i + 1);
> + time = ktime_get() - time;
> + pr_err("find_next_andnot_bit: %18llu ns, %6ld iterations\n", time, cnt);
> +
> + return 0;
> +}
> +
> +static int __init test_find_next_nor_bit(const void *bitmap,
> + const void *bitmap2, unsigned long len)
> +{
> + unsigned long i, cnt;
> + ktime_t time;
> +
> + time = ktime_get();
> + for (cnt = i = 0; i < BITMAP_LEN; cnt++)
> + i = find_next_nor_bit(bitmap, bitmap2, BITMAP_LEN, i + 1);
> + time = ktime_get() - time;
> + pr_err("find_next_nor_bit: %18llu ns, %6ld iterations\n", time, cnt);
> +
> + return 0;
> +}
> +
> +static int __init test_find_next_or_bit(const void *bitmap,
> + const void *bitmap2, unsigned long len)
> +{
> + unsigned long i, cnt;
> + ktime_t time;
> +
> + time = ktime_get();
> + for (cnt = i = 0; i < BITMAP_LEN; cnt++)
> + i = find_next_or_bit(bitmap, bitmap2, BITMAP_LEN, i + 1);
> + time = ktime_get() - time;
> + pr_err("find_next_or_bit: %18llu ns, %6ld iterations\n", time, cnt);
> +
> + return 0;
> +}
> +
> static int __init find_bit_test(void)
> {
> unsigned long nbits = BITMAP_LEN / SPARSE;
> @@ -169,6 +252,11 @@ static int __init find_bit_test(void)
> test_find_first_bit(bitmap, BITMAP_LEN / 10);
> test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN / 2);
> test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
> + test_find_first_andnot_bit(bitmap, bitmap2, BITMAP_LEN / 2);
> + test_find_next_andnot_bit(bitmap, bitmap2, BITMAP_LEN);
> + test_find_first_nor_bit(bitmap, bitmap2, BITMAP_LEN / 2);
> + test_find_next_nor_bit(bitmap, bitmap2, BITMAP_LEN);
> + test_find_next_or_bit(bitmap, bitmap2, BITMAP_LEN);
>
> pr_err("\nStart testing find_bit() with sparse bitmap\n");
>
> @@ -187,6 +275,11 @@ static int __init find_bit_test(void)
> test_find_first_bit(bitmap, BITMAP_LEN);
> test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN);
> test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
> + test_find_first_andnot_bit(bitmap, bitmap2, BITMAP_LEN);
> + test_find_next_andnot_bit(bitmap, bitmap2, BITMAP_LEN);
> + test_find_first_nor_bit(bitmap, bitmap2, BITMAP_LEN);
> + test_find_next_nor_bit(bitmap, bitmap2, BITMAP_LEN);
> + test_find_next_or_bit(bitmap, bitmap2, BITMAP_LEN);
>
> /*
> * Everything is OK. Return error just to let user run benchmark
> --
> 2.39.2
On 2024-08-30 17:48, Yury Norov wrote:
> On Thu, Aug 29, 2024 at 09:59:25AM -0400, Mathieu Desnoyers wrote:
>> Benchmark the following bitmap find functions applying binary operations
>> on sets of two bitmaps:
>>
>> - find_first_andnot_bit,
>> - find_first_nor_bit,
>> - find_next_andnot_bit,
>> - find_next_nor_bit,
>> - find_next_or_bit.
>>
>> Note that find_first_or_bit is not part of the current API, so it is not
>> covered.
>
> Can you please show how the test output looks on your system now? I'll
> add that in commit message.
Start testing find_bit() with random-filled bitmap
find_next_bit: 576314 ns, 163810 iterations
find_next_zero_bit: 626847 ns, 163871 iterations
find_last_bit: 465050 ns, 163810 iterations
find_nth_bit: 2720718 ns, 16329 iterations
find_first_bit: 1409431 ns, 16330 iterations
find_first_and_bit: 15216406 ns, 40975 iterations
find_next_and_bit: 324624 ns, 81708 iterations
find_first_andnot_bit: 23856039 ns, 40955 iterations
find_next_andnot_bit: 327734 ns, 82103 iterations
find_first_nor_bit: 21911075 ns, 40956 iterations
find_next_nor_bit: 345315 ns, 81919 iterations
find_next_or_bit: 886338 ns, 245762 iterations
Start testing find_bit() with sparse bitmap
find_next_bit: 8870 ns, 656 iterations
find_next_zero_bit: 1188951 ns, 327025 iterations
find_last_bit: 8380 ns, 656 iterations
find_nth_bit: 1110068 ns, 655 iterations
find_first_bit: 455799 ns, 656 iterations
find_first_and_bit: 6521 ns, 2 iterations
find_next_and_bit: 3540 ns, 2 iterations
find_first_andnot_bit: 785844 ns, 655 iterations
find_next_andnot_bit: 8950 ns, 655 iterations
find_first_nor_bit: 338646832 ns, 326373 iterations
find_next_nor_bit: 1264144 ns, 326372 iterations
find_next_or_bit: 14020 ns, 1309 iterations
Relevant lscpu output:
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Address sizes: 52 bits physical, 57 bits virtual
Byte Order: Little Endian
CPU(s): 384
On-line CPU(s) list: 0-383
Vendor ID: AuthenticAMD
Model name: AMD EPYC 9654 96-Core Processor
CPU family: 25
Model: 17
Thread(s) per core: 2
Core(s) per socket: 96
Socket(s): 2
Stepping: 1
Frequency boost: enabled
CPU(s) scaling MHz: 100%
CPU max MHz: 3709.0000
CPU min MHz: 400.0000
BogoMIPS: 4799.80
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext
fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good amd_lbr_v2 nopl xtopology nonstop_tsc cpuid extd_apicid aperfmperf rap
l pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_leg
acy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb
bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba perfmon_v2 ibrs ibpb stibp ibrs_enhanced vmmcall fsgsbase
bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb avx512cd sha_ni
avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local user_shstk avx512_b
f16 clzero irperf xsaveerptr rdpru wbnoinvd amd_ppin cppc arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushby
asid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif x2avic v_spec_ctrl vnmi avx512vbmi umip pku ospke
avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq la57 rdpid overflow_recov succor smca fsrm
flush_l1d debug_swap
Virtualization features:
Virtualization: AMD-V
Caches (sum of all):
L1d: 6 MiB (192 instances)
L1i: 6 MiB (192 instances)
L2: 192 MiB (192 instances)
L3: 768 MiB (24 instances)
NUMA:
NUMA node(s): 24
NUMA node0 CPU(s): 0-7,192-199
NUMA node1 CPU(s): 8-15,200-207
NUMA node2 CPU(s): 16-23,208-215
NUMA node3 CPU(s): 24-31,216-223
NUMA node4 CPU(s): 32-39,224-231
NUMA node5 CPU(s): 40-47,232-239
NUMA node6 CPU(s): 48-55,240-247
NUMA node7 CPU(s): 56-63,248-255
NUMA node8 CPU(s): 64-71,256-263
NUMA node9 CPU(s): 72-79,264-271
NUMA node10 CPU(s): 80-87,272-279
NUMA node11 CPU(s): 88-95,280-287
NUMA node12 CPU(s): 96-103,288-295
NUMA node13 CPU(s): 104-111,296-303
NUMA node14 CPU(s): 112-119,304-311
NUMA node15 CPU(s): 120-127,312-319
NUMA node16 CPU(s): 128-135,320-327
NUMA node17 CPU(s): 136-143,328-335
NUMA node18 CPU(s): 144-151,336-343
NUMA node19 CPU(s): 152-159,344-351
NUMA node20 CPU(s): 160-167,352-359
NUMA node21 CPU(s): 168-175,360-367
NUMA node22 CPU(s): 176-183,368-375
NUMA node23 CPU(s): 184-191,376-383
Vulnerabilities:
Gather data sampling: Not affected
Itlb multihit: Not affected
L1tf: Not affected
Mds: Not affected
Meltdown: Not affected
Mmio stale data: Not affected
Reg file data sampling: Not affected
Retbleed: Not affected
Spec rstack overflow: Vulnerable
Spec store bypass: Vulnerable
Spectre v1: Vulnerable: __user pointer sanitization and usercopy barriers only; no swapgs barriers
Spectre v2: Vulnerable; IBPB: disabled; STIBP: disabled; PBRSB-eIBRS: Not affected; BHI: Not affected
Srbds: Not affected
Tsx async abort: Not affected
>
>>
>> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>> Cc: Yury Norov <yury.norov@gmail.com>
>> Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
>> ---
>> lib/find_bit_benchmark.c | 93 ++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 93 insertions(+)
>>
>> diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c
>> index aee2ebb6b3cd..3b16254dec23 100644
>> --- a/lib/find_bit_benchmark.c
>> +++ b/lib/find_bit_benchmark.c
>> @@ -70,6 +70,44 @@ static int __init test_find_first_and_bit(void *bitmap, const void *bitmap2, uns
>> return 0;
>> }
>>
>> +static int __init test_find_first_andnot_bit(void *bitmap, const void *bitmap2, unsigned long len)
>> +{
>> + static DECLARE_BITMAP(cp, BITMAP_LEN) __initdata;
>> + unsigned long i, cnt;
>> + ktime_t time;
>> +
>> + bitmap_copy(cp, bitmap, BITMAP_LEN);
>> +
>> + time = ktime_get();
>> + for (cnt = i = 0; i < len; cnt++) {
>> + i = find_first_andnot_bit(cp, bitmap2, len);
>> + __clear_bit(i, cp);
>> + }
>> + time = ktime_get() - time;
>> + pr_err("find_first_andnot_bit: %18llu ns, %6ld iterations\n", time, cnt);
>> +
>> + return 0;
>> +}
>> +
>> +static int __init test_find_first_nor_bit(void *bitmap, const void *bitmap2, unsigned long len)
>> +{
>> + static DECLARE_BITMAP(cp, BITMAP_LEN) __initdata;
>> + unsigned long i, cnt;
>> + ktime_t time;
>> +
>> + bitmap_copy(cp, bitmap, BITMAP_LEN);
>> +
>> + time = ktime_get();
>> + for (cnt = i = 0; i < len; cnt++) {
>> + i = find_first_nor_bit(cp, bitmap2, len);
>> + __set_bit(i, cp);
>> + }
>> + time = ktime_get() - time;
>> + pr_err("find_first_nor_bit: %18llu ns, %6ld iterations\n", time, cnt);
>> +
>> + return 0;
>> +}
>> +
>> static int __init test_find_next_bit(const void *bitmap, unsigned long len)
>> {
>> unsigned long i, cnt;
>> @@ -148,6 +186,51 @@ static int __init test_find_next_and_bit(const void *bitmap,
>> return 0;
>> }
>>
>> +static int __init test_find_next_andnot_bit(const void *bitmap,
>> + const void *bitmap2, unsigned long len)
>> +{
>> + unsigned long i, cnt;
>> + ktime_t time;
>> +
>> + time = ktime_get();
>> + for (cnt = i = 0; i < BITMAP_LEN; cnt++)
>> + i = find_next_andnot_bit(bitmap, bitmap2, BITMAP_LEN, i + 1);
>> + time = ktime_get() - time;
>> + pr_err("find_next_andnot_bit: %18llu ns, %6ld iterations\n", time, cnt);
>> +
>> + return 0;
>> +}
>> +
>> +static int __init test_find_next_nor_bit(const void *bitmap,
>> + const void *bitmap2, unsigned long len)
>> +{
>> + unsigned long i, cnt;
>> + ktime_t time;
>> +
>> + time = ktime_get();
>> + for (cnt = i = 0; i < BITMAP_LEN; cnt++)
>> + i = find_next_nor_bit(bitmap, bitmap2, BITMAP_LEN, i + 1);
>> + time = ktime_get() - time;
>> + pr_err("find_next_nor_bit: %18llu ns, %6ld iterations\n", time, cnt);
>> +
>> + return 0;
>> +}
>> +
>> +static int __init test_find_next_or_bit(const void *bitmap,
>> + const void *bitmap2, unsigned long len)
>> +{
>> + unsigned long i, cnt;
>> + ktime_t time;
>> +
>> + time = ktime_get();
>> + for (cnt = i = 0; i < BITMAP_LEN; cnt++)
>> + i = find_next_or_bit(bitmap, bitmap2, BITMAP_LEN, i + 1);
>> + time = ktime_get() - time;
>> + pr_err("find_next_or_bit: %18llu ns, %6ld iterations\n", time, cnt);
>> +
>> + return 0;
>> +}
>> +
>> static int __init find_bit_test(void)
>> {
>> unsigned long nbits = BITMAP_LEN / SPARSE;
>> @@ -169,6 +252,11 @@ static int __init find_bit_test(void)
>> test_find_first_bit(bitmap, BITMAP_LEN / 10);
>> test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN / 2);
>> test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
>> + test_find_first_andnot_bit(bitmap, bitmap2, BITMAP_LEN / 2);
>> + test_find_next_andnot_bit(bitmap, bitmap2, BITMAP_LEN);
>> + test_find_first_nor_bit(bitmap, bitmap2, BITMAP_LEN / 2);
>> + test_find_next_nor_bit(bitmap, bitmap2, BITMAP_LEN);
>> + test_find_next_or_bit(bitmap, bitmap2, BITMAP_LEN);
>>
>> pr_err("\nStart testing find_bit() with sparse bitmap\n");
>>
>> @@ -187,6 +275,11 @@ static int __init find_bit_test(void)
>> test_find_first_bit(bitmap, BITMAP_LEN);
>> test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN);
>> test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
>> + test_find_first_andnot_bit(bitmap, bitmap2, BITMAP_LEN);
>> + test_find_next_andnot_bit(bitmap, bitmap2, BITMAP_LEN);
>> + test_find_first_nor_bit(bitmap, bitmap2, BITMAP_LEN);
>> + test_find_next_nor_bit(bitmap, bitmap2, BITMAP_LEN);
>> + test_find_next_or_bit(bitmap, bitmap2, BITMAP_LEN);
>>
>> /*
>> * Everything is OK. Return error just to let user run benchmark
>> --
>> 2.39.2
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
On Fri, Aug 30, 2024 at 12:07:53PM -0400, Mathieu Desnoyers wrote:
> On 2024-08-30 17:48, Yury Norov wrote:
> > On Thu, Aug 29, 2024 at 09:59:25AM -0400, Mathieu Desnoyers wrote:
> > > Benchmark the following bitmap find functions applying binary operations
> > > on sets of two bitmaps:
> > >
> > > - find_first_andnot_bit,
> > > - find_first_nor_bit,
> > > - find_next_andnot_bit,
> > > - find_next_nor_bit,
> > > - find_next_or_bit.
> > >
> > > Note that find_first_or_bit is not part of the current API, so it is not
> > > covered.
> >
> > Can you please show how the test output looks on your system now? I'll
> > add that in commit message.
>
> Start testing find_bit() with random-filled bitmap
> find_next_bit: 576314 ns, 163810 iterations
> find_next_zero_bit: 626847 ns, 163871 iterations
> find_last_bit: 465050 ns, 163810 iterations
> find_nth_bit: 2720718 ns, 16329 iterations
> find_first_bit: 1409431 ns, 16330 iterations
> find_first_and_bit: 15216406 ns, 40975 iterations
> find_next_and_bit: 324624 ns, 81708 iterations
> find_first_andnot_bit: 23856039 ns, 40955 iterations
> find_next_andnot_bit: 327734 ns, 82103 iterations
> find_first_nor_bit: 21911075 ns, 40956 iterations
> find_next_nor_bit: 345315 ns, 81919 iterations
> find_next_or_bit: 886338 ns, 245762 iterations
>
> Start testing find_bit() with sparse bitmap
> find_next_bit: 8870 ns, 656 iterations
> find_next_zero_bit: 1188951 ns, 327025 iterations
> find_last_bit: 8380 ns, 656 iterations
> find_nth_bit: 1110068 ns, 655 iterations
> find_first_bit: 455799 ns, 656 iterations
> find_first_and_bit: 6521 ns, 2 iterations
> find_next_and_bit: 3540 ns, 2 iterations
> find_first_andnot_bit: 785844 ns, 655 iterations
> find_next_andnot_bit: 8950 ns, 655 iterations
> find_first_nor_bit: 338646832 ns, 326373 iterations
0.3 sec is too much. Consider a slower CPU where it may be 10
times slower... Can you keep that in a range of few milliseconds?
It's more than enough to grab stable perf results.
> find_next_nor_bit: 1264144 ns, 326372 iterations
> find_next_or_bit: 14020 ns, 1309 iterations
Before the columns were all aligned. Can you please send a v3
with this fixed, and address the other comments?
Thanks,
Yury
>
> Relevant lscpu output:
>
> Architecture: x86_64
> CPU op-mode(s): 32-bit, 64-bit
> Address sizes: 52 bits physical, 57 bits virtual
> Byte Order: Little Endian
> CPU(s): 384
> On-line CPU(s) list: 0-383
> Vendor ID: AuthenticAMD
> Model name: AMD EPYC 9654 96-Core Processor
> CPU family: 25
> Model: 17
> Thread(s) per core: 2
> Core(s) per socket: 96
> Socket(s): 2
> Stepping: 1
> Frequency boost: enabled
> CPU(s) scaling MHz: 100%
> CPU max MHz: 3709.0000
> CPU min MHz: 400.0000
> BogoMIPS: 4799.80
> Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext
> fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good amd_lbr_v2 nopl xtopology nonstop_tsc cpuid extd_apicid aperfmperf rap
> l pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_leg
> acy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb
> bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba perfmon_v2 ibrs ibpb stibp ibrs_enhanced vmmcall fsgsbase
> bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb avx512cd sha_ni
> avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local user_shstk avx512_b
> f16 clzero irperf xsaveerptr rdpru wbnoinvd amd_ppin cppc arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushby
> asid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif x2avic v_spec_ctrl vnmi avx512vbmi umip pku ospke
> avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq la57 rdpid overflow_recov succor smca fsrm
> flush_l1d debug_swap
> Virtualization features:
> Virtualization: AMD-V
> Caches (sum of all):
> L1d: 6 MiB (192 instances)
> L1i: 6 MiB (192 instances)
> L2: 192 MiB (192 instances)
> L3: 768 MiB (24 instances)
> NUMA:
> NUMA node(s): 24
> NUMA node0 CPU(s): 0-7,192-199
> NUMA node1 CPU(s): 8-15,200-207
> NUMA node2 CPU(s): 16-23,208-215
> NUMA node3 CPU(s): 24-31,216-223
> NUMA node4 CPU(s): 32-39,224-231
> NUMA node5 CPU(s): 40-47,232-239
> NUMA node6 CPU(s): 48-55,240-247
> NUMA node7 CPU(s): 56-63,248-255
> NUMA node8 CPU(s): 64-71,256-263
> NUMA node9 CPU(s): 72-79,264-271
> NUMA node10 CPU(s): 80-87,272-279
> NUMA node11 CPU(s): 88-95,280-287
> NUMA node12 CPU(s): 96-103,288-295
> NUMA node13 CPU(s): 104-111,296-303
> NUMA node14 CPU(s): 112-119,304-311
> NUMA node15 CPU(s): 120-127,312-319
> NUMA node16 CPU(s): 128-135,320-327
> NUMA node17 CPU(s): 136-143,328-335
> NUMA node18 CPU(s): 144-151,336-343
> NUMA node19 CPU(s): 152-159,344-351
> NUMA node20 CPU(s): 160-167,352-359
> NUMA node21 CPU(s): 168-175,360-367
> NUMA node22 CPU(s): 176-183,368-375
> NUMA node23 CPU(s): 184-191,376-383
> Vulnerabilities:
> Gather data sampling: Not affected
> Itlb multihit: Not affected
> L1tf: Not affected
> Mds: Not affected
> Meltdown: Not affected
> Mmio stale data: Not affected
> Reg file data sampling: Not affected
> Retbleed: Not affected
> Spec rstack overflow: Vulnerable
> Spec store bypass: Vulnerable
> Spectre v1: Vulnerable: __user pointer sanitization and usercopy barriers only; no swapgs barriers
> Spectre v2: Vulnerable; IBPB: disabled; STIBP: disabled; PBRSB-eIBRS: Not affected; BHI: Not affected
> Srbds: Not affected
> Tsx async abort: Not affected
>
>
>
> >
> > >
> > > Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> > > Cc: Yury Norov <yury.norov@gmail.com>
> > > Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
> > > ---
> > > lib/find_bit_benchmark.c | 93 ++++++++++++++++++++++++++++++++++++++++
> > > 1 file changed, 93 insertions(+)
> > >
> > > diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c
> > > index aee2ebb6b3cd..3b16254dec23 100644
> > > --- a/lib/find_bit_benchmark.c
> > > +++ b/lib/find_bit_benchmark.c
> > > @@ -70,6 +70,44 @@ static int __init test_find_first_and_bit(void *bitmap, const void *bitmap2, uns
> > > return 0;
> > > }
> > > +static int __init test_find_first_andnot_bit(void *bitmap, const void *bitmap2, unsigned long len)
> > > +{
> > > + static DECLARE_BITMAP(cp, BITMAP_LEN) __initdata;
> > > + unsigned long i, cnt;
> > > + ktime_t time;
> > > +
> > > + bitmap_copy(cp, bitmap, BITMAP_LEN);
> > > +
> > > + time = ktime_get();
> > > + for (cnt = i = 0; i < len; cnt++) {
> > > + i = find_first_andnot_bit(cp, bitmap2, len);
> > > + __clear_bit(i, cp);
> > > + }
> > > + time = ktime_get() - time;
> > > + pr_err("find_first_andnot_bit: %18llu ns, %6ld iterations\n", time, cnt);
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static int __init test_find_first_nor_bit(void *bitmap, const void *bitmap2, unsigned long len)
> > > +{
> > > + static DECLARE_BITMAP(cp, BITMAP_LEN) __initdata;
> > > + unsigned long i, cnt;
> > > + ktime_t time;
> > > +
> > > + bitmap_copy(cp, bitmap, BITMAP_LEN);
> > > +
> > > + time = ktime_get();
> > > + for (cnt = i = 0; i < len; cnt++) {
> > > + i = find_first_nor_bit(cp, bitmap2, len);
> > > + __set_bit(i, cp);
> > > + }
> > > + time = ktime_get() - time;
> > > + pr_err("find_first_nor_bit: %18llu ns, %6ld iterations\n", time, cnt);
> > > +
> > > + return 0;
> > > +}
> > > +
> > > static int __init test_find_next_bit(const void *bitmap, unsigned long len)
> > > {
> > > unsigned long i, cnt;
> > > @@ -148,6 +186,51 @@ static int __init test_find_next_and_bit(const void *bitmap,
> > > return 0;
> > > }
> > > +static int __init test_find_next_andnot_bit(const void *bitmap,
> > > + const void *bitmap2, unsigned long len)
> > > +{
> > > + unsigned long i, cnt;
> > > + ktime_t time;
> > > +
> > > + time = ktime_get();
> > > + for (cnt = i = 0; i < BITMAP_LEN; cnt++)
> > > + i = find_next_andnot_bit(bitmap, bitmap2, BITMAP_LEN, i + 1);
> > > + time = ktime_get() - time;
> > > + pr_err("find_next_andnot_bit: %18llu ns, %6ld iterations\n", time, cnt);
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static int __init test_find_next_nor_bit(const void *bitmap,
> > > + const void *bitmap2, unsigned long len)
> > > +{
> > > + unsigned long i, cnt;
> > > + ktime_t time;
> > > +
> > > + time = ktime_get();
> > > + for (cnt = i = 0; i < BITMAP_LEN; cnt++)
> > > + i = find_next_nor_bit(bitmap, bitmap2, BITMAP_LEN, i + 1);
> > > + time = ktime_get() - time;
> > > + pr_err("find_next_nor_bit: %18llu ns, %6ld iterations\n", time, cnt);
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static int __init test_find_next_or_bit(const void *bitmap,
> > > + const void *bitmap2, unsigned long len)
> > > +{
> > > + unsigned long i, cnt;
> > > + ktime_t time;
> > > +
> > > + time = ktime_get();
> > > + for (cnt = i = 0; i < BITMAP_LEN; cnt++)
> > > + i = find_next_or_bit(bitmap, bitmap2, BITMAP_LEN, i + 1);
> > > + time = ktime_get() - time;
> > > + pr_err("find_next_or_bit: %18llu ns, %6ld iterations\n", time, cnt);
> > > +
> > > + return 0;
> > > +}
> > > +
> > > static int __init find_bit_test(void)
> > > {
> > > unsigned long nbits = BITMAP_LEN / SPARSE;
> > > @@ -169,6 +252,11 @@ static int __init find_bit_test(void)
> > > test_find_first_bit(bitmap, BITMAP_LEN / 10);
> > > test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN / 2);
> > > test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
> > > + test_find_first_andnot_bit(bitmap, bitmap2, BITMAP_LEN / 2);
> > > + test_find_next_andnot_bit(bitmap, bitmap2, BITMAP_LEN);
> > > + test_find_first_nor_bit(bitmap, bitmap2, BITMAP_LEN / 2);
> > > + test_find_next_nor_bit(bitmap, bitmap2, BITMAP_LEN);
> > > + test_find_next_or_bit(bitmap, bitmap2, BITMAP_LEN);
> > > pr_err("\nStart testing find_bit() with sparse bitmap\n");
> > > @@ -187,6 +275,11 @@ static int __init find_bit_test(void)
> > > test_find_first_bit(bitmap, BITMAP_LEN);
> > > test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN);
> > > test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
> > > + test_find_first_andnot_bit(bitmap, bitmap2, BITMAP_LEN);
> > > + test_find_next_andnot_bit(bitmap, bitmap2, BITMAP_LEN);
> > > + test_find_first_nor_bit(bitmap, bitmap2, BITMAP_LEN);
> > > + test_find_next_nor_bit(bitmap, bitmap2, BITMAP_LEN);
> > > + test_find_next_or_bit(bitmap, bitmap2, BITMAP_LEN);
> > > /*
> > > * Everything is OK. Return error just to let user run benchmark
> > > --
> > > 2.39.2
>
> --
> Mathieu Desnoyers
> EfficiOS Inc.
> https://www.efficios.com
© 2016 - 2026 Red Hat, Inc.