perf test: Add a runs-per-test flag

[PATCH v1] perf test: Add a runs-per-test flag

Posted by Ian Rogers 1 year, 3 months ago

To detect flakes it is useful to run tests more than once. Add a
runs-per-test flag that will run each test multiple times.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/tests/builtin-test.c | 38 ++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index d2cabaa8ad92..574fbd5caff0 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -42,6 +42,8 @@
 static bool dont_fork;
 /* Fork the tests in parallel and wait for their completion. */
 static bool sequential;
+/* Numer of times each test is run. */
+static unsigned int runs_per_test = 1;
 const char *dso_to_test;
 const char *test_objdump_path = "objdump";
 
@@ -490,10 +492,10 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[],
 				len = strlen(test_description(*t, subi));
 				if (width < len)
 					width = len;
-				num_tests++;
+				num_tests += runs_per_test;
 			}
 		} else {
-			num_tests++;
+			num_tests += runs_per_test;
 		}
 	}
 	child_tests = calloc(num_tests, sizeof(*child_tests));
@@ -556,21 +558,25 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[],
 			}
 
 			if (!has_subtests(*t)) {
-				err = start_test(*t, curr, -1, &child_tests[child_test_num++],
-						 width, pass);
-				if (err)
-					goto err_out;
+				for (unsigned int run = 0; run < runs_per_test; run++) {
+					err = start_test(*t, curr, -1, &child_tests[child_test_num++],
+							width, pass);
+					if (err)
+						goto err_out;
+				}
 				continue;
 			}
-			for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) {
-				if (!perf_test__matches(test_description(*t, subi),
-							curr, argc, argv))
-					continue;
-
-				err = start_test(*t, curr, subi, &child_tests[child_test_num++],
-						 width, pass);
-				if (err)
-					goto err_out;
+			for (unsigned int run = 0; run < runs_per_test; run++) {
+				for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) {
+					if (!perf_test__matches(test_description(*t, subi),
+									curr, argc, argv))
+						continue;
+
+					err = start_test(*t, curr, subi, &child_tests[child_test_num++],
+							width, pass);
+					if (err)
+						goto err_out;
+				}
 			}
 		}
 		if (!sequential) {
@@ -714,6 +720,8 @@ int cmd_test(int argc, const char **argv)
 		    "Do not fork for testcase"),
 	OPT_BOOLEAN('S', "sequential", &sequential,
 		    "Run the tests one after another rather than in parallel"),
+	OPT_UINTEGER('r', "runs-per-test", &runs_per_test,
+		     "Run each test the given number of times, default 1"),
 	OPT_STRING('w', "workload", &workload, "work", "workload to run for testing, use '--list-workloads' to list the available ones."),
 	OPT_BOOLEAN(0, "list-workloads", &list_workloads, "List the available builtin workloads to use with -w/--workload"),
 	OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"),
-- 
2.47.0.277.g8800431eea-goog

Re: [PATCH v1] perf test: Add a runs-per-test flag

Posted by Liang, Kan 1 year, 2 months ago


On 2024-11-09 11:02 a.m., Ian Rogers wrote:
> To detect flakes it is useful to run tests more than once. Add a
> runs-per-test flag that will run each test multiple times.
> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/tests/builtin-test.c | 38 ++++++++++++++++++++-------------
>  1 file changed, 23 insertions(+), 15 deletions(-)
> 
> diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
> index d2cabaa8ad92..574fbd5caff0 100644
> --- a/tools/perf/tests/builtin-test.c
> +++ b/tools/perf/tests/builtin-test.c
> @@ -42,6 +42,8 @@
>  static bool dont_fork;
>  /* Fork the tests in parallel and wait for their completion. */
>  static bool sequential;
> +/* Numer of times each test is run. */
> +static unsigned int runs_per_test = 1;
>  const char *dso_to_test;
>  const char *test_objdump_path = "objdump";
>  
> @@ -490,10 +492,10 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[],
>  				len = strlen(test_description(*t, subi));
>  				if (width < len)
>  					width = len;
> -				num_tests++;
> +				num_tests += runs_per_test;
>  			}
>  		} else {
> -			num_tests++;
> +			num_tests += runs_per_test;
>  		}
>  	}

Seems we just need to calculate the num_tests once at the end for each
loop. Something as below may works. (not tested)

@@ -482,20 +490,19 @@ static int __cmd_test(struct test_suite **suites,
int argc, const char *argv[],

        for (struct test_suite **t = suites; *t; t++) {
                int len = strlen(test_description(*t, -1));
+               int subi = 0, subn = 1;

                if (width < len)
                        width = len;

                if (has_subtests(*t)) {
-                       for (int subi = 0, subn = num_subtests(*t); subi
< subn; subi++) {
+                       for (subn = num_subtests(*t); subi < subn; subi++) {
                                len = strlen(test_description(*t, subi));
                                if (width < len)
                                        width = len;
-                               num_tests++;
                        }
-               } else {
-                       num_tests++;
                }
+               num_tests += subn * runs_per_test;
        }
        child_tests = calloc(num_tests, sizeof(*child_tests));
        if (!child_tests)




>  	child_tests = calloc(num_tests, sizeof(*child_tests));
> @@ -556,21 +558,25 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[],
>  			}
>  
>  			if (!has_subtests(*t)) {
> -				err = start_test(*t, curr, -1, &child_tests[child_test_num++],
> -						 width, pass);
> -				if (err)
> -					goto err_out;
> +				for (unsigned int run = 0; run < runs_per_test; run++) {
> +					err = start_test(*t, curr, -1, &child_tests[child_test_num++],
> +							width, pass);
> +					if (err)
> +						goto err_out;
> +				}
>  				continue;
>  			}
> -			for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) {
> -				if (!perf_test__matches(test_description(*t, subi),
> -							curr, argc, argv))
> -					continue;
> -
> -				err = start_test(*t, curr, subi, &child_tests[child_test_num++],
> -						 width, pass);
> -				if (err)
> -					goto err_out;
> +			for (unsigned int run = 0; run < runs_per_test; run++) {
> +				for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) {
> +					if (!perf_test__matches(test_description(*t, subi),
> +									curr, argc, argv))
> +						continue;
> +
> +					err = start_test(*t, curr, subi, &child_tests[child_test_num++],
> +							width, pass);
> +					if (err)
> +						goto err_out;
> +				}

Can we add a wrapper for the start_test()? Something similar to below?
It avoids adding the loop for every places using the start_test.

+static int start_test(struct test_suite *test, int i, int subi, struct
child_test **child,
+               int width, int pass)
+{
+       for (unsigned int run = 0; run < runs_per_test; run++) {
+               __start_test();
+       }
+}

Thanks,
Kan

>  			}
>  		}
>  		if (!sequential) {
> @@ -714,6 +720,8 @@ int cmd_test(int argc, const char **argv)
>  		    "Do not fork for testcase"),
>  	OPT_BOOLEAN('S', "sequential", &sequential,
>  		    "Run the tests one after another rather than in parallel"),
> +	OPT_UINTEGER('r', "runs-per-test", &runs_per_test,
> +		     "Run each test the given number of times, default 1"),
>  	OPT_STRING('w', "workload", &workload, "work", "workload to run for testing, use '--list-workloads' to list the available ones."),
>  	OPT_BOOLEAN(0, "list-workloads", &list_workloads, "List the available builtin workloads to use with -w/--workload"),
>  	OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"),

Re: [PATCH v1] perf test: Add a runs-per-test flag

Posted by Ian Rogers 1 year, 2 months ago

On Mon, Nov 11, 2024 at 7:52 AM Liang, Kan <kan.liang@linux.intel.com> wrote:
>
>
>
> On 2024-11-09 11:02 a.m., Ian Rogers wrote:
> > To detect flakes it is useful to run tests more than once. Add a
> > runs-per-test flag that will run each test multiple times.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/perf/tests/builtin-test.c | 38 ++++++++++++++++++++-------------
> >  1 file changed, 23 insertions(+), 15 deletions(-)
> >
> > diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
> > index d2cabaa8ad92..574fbd5caff0 100644
> > --- a/tools/perf/tests/builtin-test.c
> > +++ b/tools/perf/tests/builtin-test.c
> > @@ -42,6 +42,8 @@
> >  static bool dont_fork;
> >  /* Fork the tests in parallel and wait for their completion. */
> >  static bool sequential;
> > +/* Numer of times each test is run. */
> > +static unsigned int runs_per_test = 1;
> >  const char *dso_to_test;
> >  const char *test_objdump_path = "objdump";
> >
> > @@ -490,10 +492,10 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[],
> >                               len = strlen(test_description(*t, subi));
> >                               if (width < len)
> >                                       width = len;
> > -                             num_tests++;
> > +                             num_tests += runs_per_test;
> >                       }
> >               } else {
> > -                     num_tests++;
> > +                     num_tests += runs_per_test;
> >               }
> >       }
>
> Seems we just need to calculate the num_tests once at the end for each
> loop. Something as below may works. (not tested)
>
> @@ -482,20 +490,19 @@ static int __cmd_test(struct test_suite **suites,
> int argc, const char *argv[],
>
>         for (struct test_suite **t = suites; *t; t++) {
>                 int len = strlen(test_description(*t, -1));
> +               int subi = 0, subn = 1;
>
>                 if (width < len)
>                         width = len;
>
>                 if (has_subtests(*t)) {
> -                       for (int subi = 0, subn = num_subtests(*t); subi
> < subn; subi++) {
> +                       for (subn = num_subtests(*t); subi < subn; subi++) {
>                                 len = strlen(test_description(*t, subi));
>                                 if (width < len)
>                                         width = len;
> -                               num_tests++;
>                         }
> -               } else {
> -                       num_tests++;
>                 }
> +               num_tests += subn * runs_per_test;
>         }
>         child_tests = calloc(num_tests, sizeof(*child_tests));
>         if (!child_tests)

It's basically the same thing, instead of doing increments and then
multiplying by runs_per_test you just add on runs_per_test and avoid
the multiply.

> >       child_tests = calloc(num_tests, sizeof(*child_tests));
> > @@ -556,21 +558,25 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[],
> >                       }
> >
> >                       if (!has_subtests(*t)) {
> > -                             err = start_test(*t, curr, -1, &child_tests[child_test_num++],
> > -                                              width, pass);
> > -                             if (err)
> > -                                     goto err_out;
> > +                             for (unsigned int run = 0; run < runs_per_test; run++) {
> > +                                     err = start_test(*t, curr, -1, &child_tests[child_test_num++],
> > +                                                     width, pass);
> > +                                     if (err)
> > +                                             goto err_out;
> > +                             }
> >                               continue;
> >                       }
> > -                     for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) {
> > -                             if (!perf_test__matches(test_description(*t, subi),
> > -                                                     curr, argc, argv))
> > -                                     continue;
> > -
> > -                             err = start_test(*t, curr, subi, &child_tests[child_test_num++],
> > -                                              width, pass);
> > -                             if (err)
> > -                                     goto err_out;
> > +                     for (unsigned int run = 0; run < runs_per_test; run++) {
> > +                             for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) {
> > +                                     if (!perf_test__matches(test_description(*t, subi),
> > +                                                                     curr, argc, argv))
> > +                                             continue;
> > +
> > +                                     err = start_test(*t, curr, subi, &child_tests[child_test_num++],
> > +                                                     width, pass);
> > +                                     if (err)
> > +                                             goto err_out;
> > +                             }
>
> Can we add a wrapper for the start_test()? Something similar to below?
> It avoids adding the loop for every places using the start_test.
>
> +static int start_test(struct test_suite *test, int i, int subi, struct
> child_test **child,
> +               int width, int pass)
> +{
> +       for (unsigned int run = 0; run < runs_per_test; run++) {
> +               __start_test();
> +       }
> +}

I think the issue is the code has become overly indented. Having a
start_test function that starts some number of tests feels less than
intention revealing. Perhaps (in the future I'd like to tackle other
things for now, such as new TMAs :-) ) we can create all the child
tests in one pass, then just have start_test and finish_test work with
the child tests. (Off topic) Something else I'd like is to move the
slower running tests to the end of the list of tests so you can see
the earlier results while waiting.

Thanks,
Ian


> >                       }
> >               }
> >               if (!sequential) {
> > @@ -714,6 +720,8 @@ int cmd_test(int argc, const char **argv)
> >                   "Do not fork for testcase"),
> >       OPT_BOOLEAN('S', "sequential", &sequential,
> >                   "Run the tests one after another rather than in parallel"),
> > +     OPT_UINTEGER('r', "runs-per-test", &runs_per_test,
> > +                  "Run each test the given number of times, default 1"),
> >       OPT_STRING('w', "workload", &workload, "work", "workload to run for testing, use '--list-workloads' to list the available ones."),
> >       OPT_BOOLEAN(0, "list-workloads", &list_workloads, "List the available builtin workloads to use with -w/--workload"),
> >       OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"),
>

Re: [PATCH v1] perf test: Add a runs-per-test flag

Posted by Liang, Kan 1 year, 2 months ago


On 2024-11-11 11:10 a.m., Ian Rogers wrote:
> On Mon, Nov 11, 2024 at 7:52 AM Liang, Kan <kan.liang@linux.intel.com> wrote:
>>
>>
>>
>> On 2024-11-09 11:02 a.m., Ian Rogers wrote:
>>> To detect flakes it is useful to run tests more than once. Add a
>>> runs-per-test flag that will run each test multiple times.
>>>
>>> Signed-off-by: Ian Rogers <irogers@google.com>
>>> ---
>>>  tools/perf/tests/builtin-test.c | 38 ++++++++++++++++++++-------------
>>>  1 file changed, 23 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
>>> index d2cabaa8ad92..574fbd5caff0 100644
>>> --- a/tools/perf/tests/builtin-test.c
>>> +++ b/tools/perf/tests/builtin-test.c
>>> @@ -42,6 +42,8 @@
>>>  static bool dont_fork;
>>>  /* Fork the tests in parallel and wait for their completion. */
>>>  static bool sequential;
>>> +/* Numer of times each test is run. */
>>> +static unsigned int runs_per_test = 1;
>>>  const char *dso_to_test;
>>>  const char *test_objdump_path = "objdump";
>>>
>>> @@ -490,10 +492,10 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[],
>>>                               len = strlen(test_description(*t, subi));
>>>                               if (width < len)
>>>                                       width = len;
>>> -                             num_tests++;
>>> +                             num_tests += runs_per_test;
>>>                       }
>>>               } else {
>>> -                     num_tests++;
>>> +                     num_tests += runs_per_test;
>>>               }
>>>       }
>>
>> Seems we just need to calculate the num_tests once at the end for each
>> loop. Something as below may works. (not tested)
>>
>> @@ -482,20 +490,19 @@ static int __cmd_test(struct test_suite **suites,
>> int argc, const char *argv[],
>>
>>         for (struct test_suite **t = suites; *t; t++) {
>>                 int len = strlen(test_description(*t, -1));
>> +               int subi = 0, subn = 1;
>>
>>                 if (width < len)
>>                         width = len;
>>
>>                 if (has_subtests(*t)) {
>> -                       for (int subi = 0, subn = num_subtests(*t); subi
>> < subn; subi++) {
>> +                       for (subn = num_subtests(*t); subi < subn; subi++) {
>>                                 len = strlen(test_description(*t, subi));
>>                                 if (width < len)
>>                                         width = len;
>> -                               num_tests++;
>>                         }
>> -               } else {
>> -                       num_tests++;
>>                 }
>> +               num_tests += subn * runs_per_test;
>>         }
>>         child_tests = calloc(num_tests, sizeof(*child_tests));
>>         if (!child_tests)
> 
> It's basically the same thing, instead of doing increments and then
> multiplying by runs_per_test you just add on runs_per_test and avoid
> the multiply.

The "else" should be unnecessary either. But the above is just a nit.

> 
>>>       child_tests = calloc(num_tests, sizeof(*child_tests));
>>> @@ -556,21 +558,25 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[],
>>>                       }
>>>
>>>                       if (!has_subtests(*t)) {
>>> -                             err = start_test(*t, curr, -1, &child_tests[child_test_num++],
>>> -                                              width, pass);
>>> -                             if (err)
>>> -                                     goto err_out;
>>> +                             for (unsigned int run = 0; run < runs_per_test; run++) {
>>> +                                     err = start_test(*t, curr, -1, &child_tests[child_test_num++],
>>> +                                                     width, pass);
>>> +                                     if (err)
>>> +                                             goto err_out;
>>> +                             }
>>>                               continue;
>>>                       }
>>> -                     for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) {
>>> -                             if (!perf_test__matches(test_description(*t, subi),
>>> -                                                     curr, argc, argv))
>>> -                                     continue;
>>> -
>>> -                             err = start_test(*t, curr, subi, &child_tests[child_test_num++],
>>> -                                              width, pass);
>>> -                             if (err)
>>> -                                     goto err_out;
>>> +                     for (unsigned int run = 0; run < runs_per_test; run++) {
>>> +                             for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) {
>>> +                                     if (!perf_test__matches(test_description(*t, subi),
>>> +                                                                     curr, argc, argv))
>>> +                                             continue;
>>> +
>>> +                                     err = start_test(*t, curr, subi, &child_tests[child_test_num++],
>>> +                                                     width, pass);
>>> +                                     if (err)
>>> +                                             goto err_out;
>>> +                             }
>>
>> Can we add a wrapper for the start_test()? Something similar to below?
>> It avoids adding the loop for every places using the start_test.
>>
>> +static int start_test(struct test_suite *test, int i, int subi, struct
>> child_test **child,
>> +               int width, int pass)
>> +{
>> +       for (unsigned int run = 0; run < runs_per_test; run++) {
>> +               __start_test();
>> +       }
>> +}
> 
> I think the issue is the code has become overly indented. 

And duplication.

> Having a
> start_test function that starts some number of tests feels less than
> intention revealing. Perhaps (in the future I'd like to tackle other
> things for now, such as new TMAs :-) ) we can create all the child
> tests in one pass, then just have start_test and finish_test work with
> the child tests. 

It may be easier to understand if we have both start_mul/all_tests() and
start_single_test().

> (Off topic) Something else I'd like is to move the
> slower running tests to the end of the list of tests so you can see
> the earlier results while waiting.

I'm not sure how useful it is. But for me, I always wait for all the
tests complete, no matter how fast the results of the first several
cases shows.

Thanks,
Kan
> 
> Thanks,
> Ian
> 
> 
>>>                       }
>>>               }
>>>               if (!sequential) {
>>> @@ -714,6 +720,8 @@ int cmd_test(int argc, const char **argv)
>>>                   "Do not fork for testcase"),
>>>       OPT_BOOLEAN('S', "sequential", &sequential,
>>>                   "Run the tests one after another rather than in parallel"),
>>> +     OPT_UINTEGER('r', "runs-per-test", &runs_per_test,
>>> +                  "Run each test the given number of times, default 1"),
>>>       OPT_STRING('w', "workload", &workload, "work", "workload to run for testing, use '--list-workloads' to list the available ones."),
>>>       OPT_BOOLEAN(0, "list-workloads", &list_workloads, "List the available builtin workloads to use with -w/--workload"),
>>>       OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"),
>>
>

Re: [PATCH v1] perf test: Add a runs-per-test flag

Posted by Ian Rogers 1 year, 2 months ago

On Mon, Nov 11, 2024 at 9:14 AM Liang, Kan <kan.liang@linux.intel.com> wrote:
>
>
>
> On 2024-11-11 11:10 a.m., Ian Rogers wrote:
> > On Mon, Nov 11, 2024 at 7:52 AM Liang, Kan <kan.liang@linux.intel.com> wrote:
> >>
> >>
> >>
> >> On 2024-11-09 11:02 a.m., Ian Rogers wrote:
> >>> To detect flakes it is useful to run tests more than once. Add a
> >>> runs-per-test flag that will run each test multiple times.
> >>>
> >>> Signed-off-by: Ian Rogers <irogers@google.com>
> >>> ---
> >>>  tools/perf/tests/builtin-test.c | 38 ++++++++++++++++++++-------------
> >>>  1 file changed, 23 insertions(+), 15 deletions(-)
> >>>
> >>> diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
> >>> index d2cabaa8ad92..574fbd5caff0 100644
> >>> --- a/tools/perf/tests/builtin-test.c
> >>> +++ b/tools/perf/tests/builtin-test.c
> >>> @@ -42,6 +42,8 @@
> >>>  static bool dont_fork;
> >>>  /* Fork the tests in parallel and wait for their completion. */
> >>>  static bool sequential;
> >>> +/* Numer of times each test is run. */
> >>> +static unsigned int runs_per_test = 1;
> >>>  const char *dso_to_test;
> >>>  const char *test_objdump_path = "objdump";
> >>>
> >>> @@ -490,10 +492,10 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[],
> >>>                               len = strlen(test_description(*t, subi));
> >>>                               if (width < len)
> >>>                                       width = len;
> >>> -                             num_tests++;
> >>> +                             num_tests += runs_per_test;
> >>>                       }
> >>>               } else {
> >>> -                     num_tests++;
> >>> +                     num_tests += runs_per_test;
> >>>               }
> >>>       }
> >>
> >> Seems we just need to calculate the num_tests once at the end for each
> >> loop. Something as below may works. (not tested)
> >>
> >> @@ -482,20 +490,19 @@ static int __cmd_test(struct test_suite **suites,
> >> int argc, const char *argv[],
> >>
> >>         for (struct test_suite **t = suites; *t; t++) {
> >>                 int len = strlen(test_description(*t, -1));
> >> +               int subi = 0, subn = 1;
> >>
> >>                 if (width < len)
> >>                         width = len;
> >>
> >>                 if (has_subtests(*t)) {
> >> -                       for (int subi = 0, subn = num_subtests(*t); subi
> >> < subn; subi++) {
> >> +                       for (subn = num_subtests(*t); subi < subn; subi++) {
> >>                                 len = strlen(test_description(*t, subi));
> >>                                 if (width < len)
> >>                                         width = len;
> >> -                               num_tests++;
> >>                         }
> >> -               } else {
> >> -                       num_tests++;
> >>                 }
> >> +               num_tests += subn * runs_per_test;
> >>         }
> >>         child_tests = calloc(num_tests, sizeof(*child_tests));
> >>         if (!child_tests)
> >
> > It's basically the same thing, instead of doing increments and then
> > multiplying by runs_per_test you just add on runs_per_test and avoid
> > the multiply.
>
> The "else" should be unnecessary either. But the above is just a nit.
>
> >
> >>>       child_tests = calloc(num_tests, sizeof(*child_tests));
> >>> @@ -556,21 +558,25 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[],
> >>>                       }
> >>>
> >>>                       if (!has_subtests(*t)) {
> >>> -                             err = start_test(*t, curr, -1, &child_tests[child_test_num++],
> >>> -                                              width, pass);
> >>> -                             if (err)
> >>> -                                     goto err_out;
> >>> +                             for (unsigned int run = 0; run < runs_per_test; run++) {
> >>> +                                     err = start_test(*t, curr, -1, &child_tests[child_test_num++],
> >>> +                                                     width, pass);
> >>> +                                     if (err)
> >>> +                                             goto err_out;
> >>> +                             }
> >>>                               continue;
> >>>                       }
> >>> -                     for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) {
> >>> -                             if (!perf_test__matches(test_description(*t, subi),
> >>> -                                                     curr, argc, argv))
> >>> -                                     continue;
> >>> -
> >>> -                             err = start_test(*t, curr, subi, &child_tests[child_test_num++],
> >>> -                                              width, pass);
> >>> -                             if (err)
> >>> -                                     goto err_out;
> >>> +                     for (unsigned int run = 0; run < runs_per_test; run++) {
> >>> +                             for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) {
> >>> +                                     if (!perf_test__matches(test_description(*t, subi),
> >>> +                                                                     curr, argc, argv))
> >>> +                                             continue;
> >>> +
> >>> +                                     err = start_test(*t, curr, subi, &child_tests[child_test_num++],
> >>> +                                                     width, pass);
> >>> +                                     if (err)
> >>> +                                             goto err_out;
> >>> +                             }
> >>
> >> Can we add a wrapper for the start_test()? Something similar to below?
> >> It avoids adding the loop for every places using the start_test.
> >>
> >> +static int start_test(struct test_suite *test, int i, int subi, struct
> >> child_test **child,
> >> +               int width, int pass)
> >> +{
> >> +       for (unsigned int run = 0; run < runs_per_test; run++) {
> >> +               __start_test();
> >> +       }
> >> +}
> >
> > I think the issue is the code has become overly indented.
>
> And duplication.

Agreed. Do we want the runs-per-test feature without me overhauling
all of this? I'm not going to have time any time soon.

> > Having a
> > start_test function that starts some number of tests feels less than
> > intention revealing. Perhaps (in the future I'd like to tackle other
> > things for now, such as new TMAs :-) ) we can create all the child
> > tests in one pass, then just have start_test and finish_test work with
> > the child tests.
>
> It may be easier to understand if we have both start_mul/all_tests() and
> start_single_test().
>
> > (Off topic) Something else I'd like is to move the
> > slower running tests to the end of the list of tests so you can see
> > the earlier results while waiting.
>
> I'm not sure how useful it is. But for me, I always wait for all the
> tests complete, no matter how fast the results of the first several
> cases shows.

Agreed. The issue for me is I tend to run with `perf test -v`, the
single verbose means report the error from failing tests only. Some
tests, like testing all metrics, generate 100s of lines of output and
you may lose test output if you don't have enough shell history. It's
nice to see in cases like that, that the regular tests passed. Perhaps
we should also capture test output to a file. That reminds me that I
wish `perf test list` wrote to stdout rather than stderr, but no doubt
"fixing" that would lead to complaints about breaking compatibility
:-/

Thanks,
Ian