[v1] selftests/bpf: Run test_perf_branches more than 1 time

[PATCH bpf] selftests/bpf: Run test_perf_branches more than 1 time

Posted by Maya Matuszczyk 2 months, 1 week ago

test_perf_branches bpf program was only being ran once, and
perf_branches test would fail if the only sample that was passed to
test_perf_branches didn't have branch data.
Fix those failures by running it a few more times and waiting until a valid
sample shows up, and leave the perf event only enabled for the duration of
test spins.

Signed-off-by: Maya Matuszczyk <mayamatuszczyk@google.com>
---
 .../selftests/bpf/prog_tests/perf_branches.c  | 57 +++++++++++++------
 .../selftests/bpf/progs/test_perf_branches.c  | 11 ++--
 2 files changed, 43 insertions(+), 25 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
index 0a7ef770c487..1dcb7c70f545 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -15,10 +15,6 @@ static void check_good_sample(struct test_perf_branches *skel)
 	int pbe_size = sizeof(struct perf_branch_entry);
 	int duration = 0;
 
-	if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
-		  "checked sample validity before prog run"))
-		return;
-
 	if (CHECK(!skel->bss->valid, "output not valid",
 		 "no valid sample from prog"))
 		return;
@@ -49,10 +45,6 @@ static void check_bad_sample(struct test_perf_branches *skel)
 	int written_stack = skel->bss->written_stack_out;
 	int duration = 0;
 
-	if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
-		  "checked sample validity before prog run"))
-		return;
-
 	if (CHECK(!skel->bss->valid, "output not valid",
 		 "no valid sample from prog"))
 		return;
@@ -73,7 +65,6 @@ static void test_perf_branches_common(int perf_fd,
 	bool detached = false;
 	struct bpf_link *link;
 	volatile int j = 0;
-	cpu_set_t cpu_set;
 
 	skel = test_perf_branches__open_and_load();
 	if (CHECK(!skel, "test_perf_branches_load",
@@ -85,19 +76,35 @@ static void test_perf_branches_common(int perf_fd,
 	if (!ASSERT_OK_PTR(link, "attach_perf_event"))
 		goto out_destroy_skel;
 
-	/* generate some branches on cpu 0 */
-	CPU_ZERO(&cpu_set);
-	CPU_SET(0, &cpu_set);
-	err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
-	if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err))
+	err = ioctl(perf_fd, PERF_EVENT_IOC_RESET, 0);
+	if (CHECK(err == -1, "perf event reset", "ioctl err %d", errno))
 		goto out_destroy;
 
-	/* Spin the loop for a while by using a high iteration count, and by
-	 * checking whether the specific run count marker has been explicitly
-	 * incremented at least once by the backing perf_event BPF program.
+	err = ioctl(perf_fd, PERF_EVENT_IOC_REFRESH, 10);
+	if (CHECK(err == -1, "perf event refresh", "ioctl err %d", errno))
+		goto out_destroy;
+
+	/* Make up a bunch of branch events until we time out or bpf prog sets a flag
+	 * that it caught a valid event. To be completely sure that required_size_out,
+	 * written_stack_out or written_global_out writes and reads won't get
+	 * reordered by anyone in a way that would fail this test, use atomic
+	 * operations
 	 */
-	for (i = 0; i < 100000000 && !*(volatile int *)&skel->bss->run_cnt; ++i)
+	for (i = 0; i < 100000000; ++i) {
+		/* This technically should be an atomic load with acquire
+		 * ordering, but we don't want to clog up memory bus while
+		 * spinning
+		 */
+		if (READ_ONCE(skel->bss->valid) > 0)
+			break;
 		++j;
+	}
+
+	__atomic_thread_fence(__ATOMIC_ACQUIRE);
+
+	err = ioctl(perf_fd, PERF_EVENT_IOC_DISABLE, 0);
+	if (CHECK(err == -1, "perf event disable", "ioctl err %d", errno))
+		goto out_destroy;
 
 	test_perf_branches__detach(skel);
 	detached = true;
@@ -121,6 +128,7 @@ static void test_perf_branches_hw(void)
 	attr.size = sizeof(attr);
 	attr.type = PERF_TYPE_HARDWARE;
 	attr.config = PERF_COUNT_HW_CPU_CYCLES;
+	attr.disabled = 1;
 	attr.freq = 1;
 	attr.sample_freq = 1000;
 	attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
@@ -162,6 +170,7 @@ static void test_perf_branches_no_hw(void)
 	attr.size = sizeof(attr);
 	attr.type = PERF_TYPE_SOFTWARE;
 	attr.config = PERF_COUNT_SW_CPU_CLOCK;
+	attr.disabled = 1;
 	attr.freq = 1;
 	attr.sample_freq = 1000;
 	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
@@ -175,6 +184,18 @@ static void test_perf_branches_no_hw(void)
 
 void test_perf_branches(void)
 {
+	int err, duration = 0;
+	cpu_set_t cpu_set;
+
+	/* Pin ourselves to cpu0 so that all branches we generate would be
+	 * also on cpu0
+	 */
+	CPU_ZERO(&cpu_set);
+	CPU_SET(0, &cpu_set);
+	err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+	if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err))
+		return;
+
 	if (test__start_subtest("perf_branches_hw"))
 		test_perf_branches_hw();
 	if (test__start_subtest("perf_branches_no_hw"))
diff --git a/tools/testing/selftests/bpf/progs/test_perf_branches.c b/tools/testing/selftests/bpf/progs/test_perf_branches.c
index 05ac9410cd68..9806414cebc4 100644
--- a/tools/testing/selftests/bpf/progs/test_perf_branches.c
+++ b/tools/testing/selftests/bpf/progs/test_perf_branches.c
@@ -8,7 +8,6 @@
 #include <bpf/bpf_tracing.h>
 
 int valid = 0;
-int run_cnt = 0;
 int required_size_out = 0;
 int written_stack_out = 0;
 int written_global_out = 0;
@@ -25,13 +24,11 @@ int perf_branches(void *ctx)
 	__u64 entries[4 * 3] = {0};
 	int required_size, written_stack, written_global;
 
-	++run_cnt;
-
 	/* write to stack */
 	written_stack = bpf_read_branch_records(ctx, entries, sizeof(entries), 0);
 	/* ignore spurious events */
 	if (!written_stack)
-		return 1;
+		return 0;
 
 	/* get required size */
 	required_size = bpf_read_branch_records(ctx, NULL, 0,
@@ -40,14 +37,14 @@ int perf_branches(void *ctx)
 	written_global = bpf_read_branch_records(ctx, fpbe, sizeof(fpbe), 0);
 	/* ignore spurious events */
 	if (!written_global)
-		return 1;
+		return 0;
 
 	required_size_out = required_size;
 	written_stack_out = written_stack;
 	written_global_out = written_global;
-	valid = 1;
+	__atomic_add_fetch(&valid, 1, __ATOMIC_RELEASE);
 
-	return 0;
+	return 1;
 }
 
 char _license[] SEC("license") = "GPL";
-- 
2.53.0.1213.gd9a14994de-goog

Re: [PATCH bpf] selftests/bpf: Run test_perf_branches more than 1 time

Posted by Alexei Starovoitov 2 months ago

On Wed, Apr 8, 2026 at 2:57 AM Maya Matuszczyk
<mayamatuszczyk@google.com> wrote:
>
> test_perf_branches bpf program was only being ran once, and
> perf_branches test would fail if the only sample that was passed to
> test_perf_branches didn't have branch data.
> Fix those failures by running it a few more times and waiting until a valid
> sample shows up, and leave the perf event only enabled for the duration of
> test spins.
>
> Signed-off-by: Maya Matuszczyk <mayamatuszczyk@google.com>
> ---
>  .../selftests/bpf/prog_tests/perf_branches.c  | 57 +++++++++++++------
>  .../selftests/bpf/progs/test_perf_branches.c  | 11 ++--
>  2 files changed, 43 insertions(+), 25 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
> index 0a7ef770c487..1dcb7c70f545 100644
> --- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c
> +++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
> @@ -15,10 +15,6 @@ static void check_good_sample(struct test_perf_branches *skel)
>         int pbe_size = sizeof(struct perf_branch_entry);
>         int duration = 0;
>
> -       if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
> -                 "checked sample validity before prog run"))
> -               return;
> -
>         if (CHECK(!skel->bss->valid, "output not valid",
>                  "no valid sample from prog"))
>                 return;
> @@ -49,10 +45,6 @@ static void check_bad_sample(struct test_perf_branches *skel)
>         int written_stack = skel->bss->written_stack_out;
>         int duration = 0;
>
> -       if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
> -                 "checked sample validity before prog run"))
> -               return;
> -
>         if (CHECK(!skel->bss->valid, "output not valid",
>                  "no valid sample from prog"))
>                 return;
> @@ -73,7 +65,6 @@ static void test_perf_branches_common(int perf_fd,
>         bool detached = false;
>         struct bpf_link *link;
>         volatile int j = 0;
> -       cpu_set_t cpu_set;
>
>         skel = test_perf_branches__open_and_load();
>         if (CHECK(!skel, "test_perf_branches_load",
> @@ -85,19 +76,35 @@ static void test_perf_branches_common(int perf_fd,
>         if (!ASSERT_OK_PTR(link, "attach_perf_event"))
>                 goto out_destroy_skel;
>
> -       /* generate some branches on cpu 0 */
> -       CPU_ZERO(&cpu_set);
> -       CPU_SET(0, &cpu_set);
> -       err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
> -       if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err))
> +       err = ioctl(perf_fd, PERF_EVENT_IOC_RESET, 0);
> +       if (CHECK(err == -1, "perf event reset", "ioctl err %d", errno))
>                 goto out_destroy;

Since you're touching this part, please use ASSERT*() macros,
since CHECK() is deprecated.

sashiko thinks the same.