[PATCH v3] perf test: Allow tolerance for leader sampling test

Thomas Richter posted 1 patch 3 weeks, 4 days ago
There is a newer version of this series
tools/perf/tests/shell/record.sh | 33 ++++++++++++++++++++++++++------
1 file changed, 27 insertions(+), 6 deletions(-)
[PATCH v3] perf test: Allow tolerance for leader sampling test
Posted by Thomas Richter 3 weeks, 4 days ago
V3: Added check for missing samples as suggested by Chun-Tse.
V2: Changed bc invocation to return 0 on success and 1 on error.

There is a known issue that the leader sampling is inconsistent, since
throttle only affect leader, not the slave. The detail is in [1]. To
maintain test coverage, this patch sets a tolerance rate of 80% to
accommodate the throttled samples and prevent test failures due to
throttling.

[1] lore.kernel.org/20250328182752.769662-1-ctshao@google.com

Signed-off-by: Chun-Tse Shao <ctshao@google.com>
Suggested-by: Ian Rogers <irogers@google.com>
Suggested-by: Thomas Richter <tmricht@linux.ibm.com>
Tested-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
---
 tools/perf/tests/shell/record.sh | 33 ++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index ba8d873d3ca7..0075ffe783ad 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -238,22 +238,43 @@ test_leader_sampling() {
     err=1
     return
   fi
+  perf script -i "${perfdata}" | grep brstack > $script_output
+  # Check if the two instruction counts are equal in each record.
+  # However, the throttling code doesn't consider event grouping. During throttling, only the
+  # leader is stopped, causing the slave's counts significantly higher. To temporarily solve this,
+  # let's set the tolerance rate to 80%.
+  # TODO: Revert the code for tolerance once the throttling mechanism is fixed.
   index=0
-  perf script -i "${perfdata}" > $script_output
+  valid_counts=0
+  invalid_counts=0
+  tolerance_rate=0.8
   while IFS= read -r line
   do
-    # Check if the two instruction counts are equal in each record
     cycles=$(echo $line | awk '{for(i=1;i<=NF;i++) if($i=="cycles:") print $(i-1)}')
     if [ $(($index%2)) -ne 0 ] && [ ${cycles}x != ${prev_cycles}x ]
     then
-      echo "Leader sampling [Failed inconsistent cycles count]"
-      err=1
-      return
+      invalid_counts=$(($invalid_counts+1))
+    else
+      valid_counts=$(($valid_counts+1))
     fi
     index=$(($index+1))
     prev_cycles=$cycles
   done < $script_output
-  echo "Basic leader sampling test [Success]"
+  total_counts=$(bc <<< "$invalid_counts+$valid_counts")
+  if (( $(bc <<< "$total_counts <= 0") ))
+  then
+    echo "Leader sampling [No sample generated]"
+    err=1
+    return
+  fi
+  isok=$(bc <<< "scale=2; if (($invalid_counts/$total_counts) < (1-$tolerance_rate)) { 0 } else { 1 };")
+  if [ $isok -eq 1 ]
+  then
+     echo "Leader sampling [Failed inconsistent cycles count]"
+     err=1
+  else
+    echo "Basic leader sampling test [Success]"
+  fi
 }
 
 test_topdown_leader_sampling() {
-- 
2.49.0
Re: [PATCH v3] perf test: Allow tolerance for leader sampling test
Posted by Namhyung Kim 3 weeks, 4 days ago
Hello,

On Thu, Apr 10, 2025 at 10:55:22AM +0200, Thomas Richter wrote:
> V3: Added check for missing samples as suggested by Chun-Tse.
> V2: Changed bc invocation to return 0 on success and 1 on error.
> 
> There is a known issue that the leader sampling is inconsistent, since
> throttle only affect leader, not the slave. The detail is in [1]. To
> maintain test coverage, this patch sets a tolerance rate of 80% to
> accommodate the throttled samples and prevent test failures due to
> throttling.
> 
> [1] lore.kernel.org/20250328182752.769662-1-ctshao@google.com
> 
> Signed-off-by: Chun-Tse Shao <ctshao@google.com>
> Suggested-by: Ian Rogers <irogers@google.com>
> Suggested-by: Thomas Richter <tmricht@linux.ibm.com>
> Tested-by: Thomas Richter <tmricht@linux.ibm.com>
> Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
> ---
>  tools/perf/tests/shell/record.sh | 33 ++++++++++++++++++++++++++------
>  1 file changed, 27 insertions(+), 6 deletions(-)
> 
> diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
> index ba8d873d3ca7..0075ffe783ad 100755
> --- a/tools/perf/tests/shell/record.sh
> +++ b/tools/perf/tests/shell/record.sh
> @@ -238,22 +238,43 @@ test_leader_sampling() {
>      err=1
>      return
>    fi
> +  perf script -i "${perfdata}" | grep brstack > $script_output
> +  # Check if the two instruction counts are equal in each record.
> +  # However, the throttling code doesn't consider event grouping. During throttling, only the
> +  # leader is stopped, causing the slave's counts significantly higher. To temporarily solve this,
> +  # let's set the tolerance rate to 80%.
> +  # TODO: Revert the code for tolerance once the throttling mechanism is fixed.
>    index=0
> -  perf script -i "${perfdata}" > $script_output
> +  valid_counts=0
> +  invalid_counts=0
> +  tolerance_rate=0.8
>    while IFS= read -r line
>    do
> -    # Check if the two instruction counts are equal in each record
>      cycles=$(echo $line | awk '{for(i=1;i<=NF;i++) if($i=="cycles:") print $(i-1)}')
>      if [ $(($index%2)) -ne 0 ] && [ ${cycles}x != ${prev_cycles}x ]
>      then
> -      echo "Leader sampling [Failed inconsistent cycles count]"
> -      err=1
> -      return
> +      invalid_counts=$(($invalid_counts+1))
> +    else
> +      valid_counts=$(($valid_counts+1))
>      fi
>      index=$(($index+1))
>      prev_cycles=$cycles
>    done < $script_output
> -  echo "Basic leader sampling test [Success]"
> +  total_counts=$(bc <<< "$invalid_counts+$valid_counts")
> +  if (( $(bc <<< "$total_counts <= 0") ))
> +  then
> +    echo "Leader sampling [No sample generated]"
> +    err=1
> +    return
> +  fi
> +  isok=$(bc <<< "scale=2; if (($invalid_counts/$total_counts) < (1-$tolerance_rate)) { 0 } else { 1 };")

Is 'scale=2' really needed?  Does something similar to the above like

  if (( $(bc <<< "($invalid_counts / $total_counts) < (1 - $tolerance_rate)") ))

work?

Thanks,
Namhyung


> +  if [ $isok -eq 1 ]
> +  then
> +     echo "Leader sampling [Failed inconsistent cycles count]"
> +     err=1
> +  else
> +    echo "Basic leader sampling test [Success]"
> +  fi
>  }
>  
>  test_topdown_leader_sampling() {
> -- 
> 2.49.0
>
Re: [PATCH v3] perf test: Allow tolerance for leader sampling test
Posted by Thomas Richter 3 weeks, 3 days ago
On 4/11/25 02:36, Namhyung Kim wrote:
> Hello,
> 
> On Thu, Apr 10, 2025 at 10:55:22AM +0200, Thomas Richter wrote:
>> V3: Added check for missing samples as suggested by Chun-Tse.
>> V2: Changed bc invocation to return 0 on success and 1 on error.
>>
>> There is a known issue that the leader sampling is inconsistent, since
>> throttle only affect leader, not the slave. The detail is in [1]. To
>> maintain test coverage, this patch sets a tolerance rate of 80% to
>> accommodate the throttled samples and prevent test failures due to
>> throttling.
>>
>> [1] lore.kernel.org/20250328182752.769662-1-ctshao@google.com
>>
>> Signed-off-by: Chun-Tse Shao <ctshao@google.com>
>> Suggested-by: Ian Rogers <irogers@google.com>
>> Suggested-by: Thomas Richter <tmricht@linux.ibm.com>
>> Tested-by: Thomas Richter <tmricht@linux.ibm.com>
>> Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
>> ---
>>  tools/perf/tests/shell/record.sh | 33 ++++++++++++++++++++++++++------
>>  1 file changed, 27 insertions(+), 6 deletions(-)
>>
>> diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
>> index ba8d873d3ca7..0075ffe783ad 100755
>> --- a/tools/perf/tests/shell/record.sh
>> +++ b/tools/perf/tests/shell/record.sh
>> @@ -238,22 +238,43 @@ test_leader_sampling() {
>>      err=1
>>      return
>>    fi
>> +  perf script -i "${perfdata}" | grep brstack > $script_output
>> +  # Check if the two instruction counts are equal in each record.
>> +  # However, the throttling code doesn't consider event grouping. During throttling, only the
>> +  # leader is stopped, causing the slave's counts significantly higher. To temporarily solve this,
>> +  # let's set the tolerance rate to 80%.
>> +  # TODO: Revert the code for tolerance once the throttling mechanism is fixed.
>>    index=0
>> -  perf script -i "${perfdata}" > $script_output
>> +  valid_counts=0
>> +  invalid_counts=0
>> +  tolerance_rate=0.8
>>    while IFS= read -r line
>>    do
>> -    # Check if the two instruction counts are equal in each record
>>      cycles=$(echo $line | awk '{for(i=1;i<=NF;i++) if($i=="cycles:") print $(i-1)}')
>>      if [ $(($index%2)) -ne 0 ] && [ ${cycles}x != ${prev_cycles}x ]
>>      then
>> -      echo "Leader sampling [Failed inconsistent cycles count]"
>> -      err=1
>> -      return
>> +      invalid_counts=$(($invalid_counts+1))
>> +    else
>> +      valid_counts=$(($valid_counts+1))
>>      fi
>>      index=$(($index+1))
>>      prev_cycles=$cycles
>>    done < $script_output
>> -  echo "Basic leader sampling test [Success]"
>> +  total_counts=$(bc <<< "$invalid_counts+$valid_counts")
>> +  if (( $(bc <<< "$total_counts <= 0") ))
>> +  then
>> +    echo "Leader sampling [No sample generated]"
>> +    err=1
>> +    return
>> +  fi
>> +  isok=$(bc <<< "scale=2; if (($invalid_counts/$total_counts) < (1-$tolerance_rate)) { 0 } else { 1 };")
> 
> Is 'scale=2' really needed?  Does something similar to the above like
> 
>   if (( $(bc <<< "($invalid_counts / $total_counts) < (1 - $tolerance_rate)") ))
> 
> work?
> 
> Thanks,
> Namhyung
> 
> 

From the man page of bc:


NUMBERS
       The most basic element in bc is the number.  Numbers are arbitrary precision numbers.   This
       precision  is both in the integer part and the fractional part.  All numbers are represented
       internally in decimal and all computation is done in decimal.  (This version  truncates  re‐
       sults from divide and multiply operations.)
       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

This can be proved with:
# bc <<< "2/27"
0
# bc <<< "scale=2;2/27"
.07
#

Without scale there is no fractional part and integer arithmetic will lead to wrong results.

I think scale=2 is necessary or we need to use something different like awk.

Thanks
-- 
Thomas Richter, Dept 3303, IBM s390 Linux Development, Boeblingen, Germany
--
IBM Deutschland Research & Development GmbH

Vorsitzender des Aufsichtsrats: Wolfgang Wendt

Geschäftsführung: David Faller

Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294
Re: [PATCH v3] perf test: Allow tolerance for leader sampling test
Posted by Namhyung Kim 3 weeks, 3 days ago
On Fri, Apr 11, 2025 at 08:58:45AM +0200, Thomas Richter wrote:
> On 4/11/25 02:36, Namhyung Kim wrote:
> > Hello,
> > 
> > On Thu, Apr 10, 2025 at 10:55:22AM +0200, Thomas Richter wrote:
> >> V3: Added check for missing samples as suggested by Chun-Tse.
> >> V2: Changed bc invocation to return 0 on success and 1 on error.
> >>
> >> There is a known issue that the leader sampling is inconsistent, since
> >> throttle only affect leader, not the slave. The detail is in [1]. To
> >> maintain test coverage, this patch sets a tolerance rate of 80% to
> >> accommodate the throttled samples and prevent test failures due to
> >> throttling.
> >>
> >> [1] lore.kernel.org/20250328182752.769662-1-ctshao@google.com
> >>
> >> Signed-off-by: Chun-Tse Shao <ctshao@google.com>
> >> Suggested-by: Ian Rogers <irogers@google.com>
> >> Suggested-by: Thomas Richter <tmricht@linux.ibm.com>
> >> Tested-by: Thomas Richter <tmricht@linux.ibm.com>
> >> Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
> >> ---
> >>  tools/perf/tests/shell/record.sh | 33 ++++++++++++++++++++++++++------
> >>  1 file changed, 27 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
> >> index ba8d873d3ca7..0075ffe783ad 100755
> >> --- a/tools/perf/tests/shell/record.sh
> >> +++ b/tools/perf/tests/shell/record.sh
> >> @@ -238,22 +238,43 @@ test_leader_sampling() {
> >>      err=1
> >>      return
> >>    fi
> >> +  perf script -i "${perfdata}" | grep brstack > $script_output
> >> +  # Check if the two instruction counts are equal in each record.
> >> +  # However, the throttling code doesn't consider event grouping. During throttling, only the
> >> +  # leader is stopped, causing the slave's counts significantly higher. To temporarily solve this,
> >> +  # let's set the tolerance rate to 80%.
> >> +  # TODO: Revert the code for tolerance once the throttling mechanism is fixed.
> >>    index=0
> >> -  perf script -i "${perfdata}" > $script_output
> >> +  valid_counts=0
> >> +  invalid_counts=0
> >> +  tolerance_rate=0.8
> >>    while IFS= read -r line
> >>    do
> >> -    # Check if the two instruction counts are equal in each record
> >>      cycles=$(echo $line | awk '{for(i=1;i<=NF;i++) if($i=="cycles:") print $(i-1)}')
> >>      if [ $(($index%2)) -ne 0 ] && [ ${cycles}x != ${prev_cycles}x ]
> >>      then
> >> -      echo "Leader sampling [Failed inconsistent cycles count]"
> >> -      err=1
> >> -      return
> >> +      invalid_counts=$(($invalid_counts+1))
> >> +    else
> >> +      valid_counts=$(($valid_counts+1))
> >>      fi
> >>      index=$(($index+1))
> >>      prev_cycles=$cycles
> >>    done < $script_output
> >> -  echo "Basic leader sampling test [Success]"
> >> +  total_counts=$(bc <<< "$invalid_counts+$valid_counts")
> >> +  if (( $(bc <<< "$total_counts <= 0") ))
> >> +  then
> >> +    echo "Leader sampling [No sample generated]"
> >> +    err=1
> >> +    return
> >> +  fi
> >> +  isok=$(bc <<< "scale=2; if (($invalid_counts/$total_counts) < (1-$tolerance_rate)) { 0 } else { 1 };")
> > 
> > Is 'scale=2' really needed?  Does something similar to the above like
> > 
> >   if (( $(bc <<< "($invalid_counts / $total_counts) < (1 - $tolerance_rate)") ))
> > 
> > work?
> > 
> > Thanks,
> > Namhyung
> > 
> > 
> 
> From the man page of bc:
> 
> 
> NUMBERS
>        The most basic element in bc is the number.  Numbers are arbitrary precision numbers.   This
>        precision  is both in the integer part and the fractional part.  All numbers are represented
>        internally in decimal and all computation is done in decimal.  (This version  truncates  re‐
>        sults from divide and multiply operations.)
>        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 
> This can be proved with:
> # bc <<< "2/27"
> 0
> # bc <<< "scale=2;2/27"
> .07
> #
> 
> Without scale there is no fractional part and integer arithmetic will lead to wrong results.
> 
> I think scale=2 is necessary or we need to use something different like awk.

Ok, thanks for checking it.  Right, the scale=2 is necessary.

  $ bc <<< '(1 / 10) < (1 - 0.8)'
  1

  $ bc <<< '(3 / 10) < (1 - 0.8)'
  1

  $ bc <<< 'scale=2; (1 / 10) < (1 - 0.8)'
  1

  $ bc <<< 'scale=2; (3 / 10) < (1 - 0.8)'
  0

Thanks,
Namhyung

Re: [PATCH v3] perf test: Allow tolerance for leader sampling test
Posted by Chun-Tse Shao 3 weeks, 4 days ago
On Thu, Apr 10, 2025 at 1:55 AM Thomas Richter <tmricht@linux.ibm.com> wrote:
>
> V3: Added check for missing samples as suggested by Chun-Tse.
> V2: Changed bc invocation to return 0 on success and 1 on error.
>
> There is a known issue that the leader sampling is inconsistent, since
> throttle only affect leader, not the slave. The detail is in [1]. To
> maintain test coverage, this patch sets a tolerance rate of 80% to
> accommodate the throttled samples and prevent test failures due to
> throttling.
>
> [1] lore.kernel.org/20250328182752.769662-1-ctshao@google.com
>
> Signed-off-by: Chun-Tse Shao <ctshao@google.com>
> Suggested-by: Ian Rogers <irogers@google.com>
> Suggested-by: Thomas Richter <tmricht@linux.ibm.com>
> Tested-by: Thomas Richter <tmricht@linux.ibm.com>
> Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Tested-by: Chun-Tse Shao <ctshao@google.com>
> ---
>  tools/perf/tests/shell/record.sh | 33 ++++++++++++++++++++++++++------
>  1 file changed, 27 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
> index ba8d873d3ca7..0075ffe783ad 100755
> --- a/tools/perf/tests/shell/record.sh
> +++ b/tools/perf/tests/shell/record.sh
> @@ -238,22 +238,43 @@ test_leader_sampling() {
>      err=1
>      return
>    fi
> +  perf script -i "${perfdata}" | grep brstack > $script_output
> +  # Check if the two instruction counts are equal in each record.
> +  # However, the throttling code doesn't consider event grouping. During throttling, only the
> +  # leader is stopped, causing the slave's counts significantly higher. To temporarily solve this,
> +  # let's set the tolerance rate to 80%.
> +  # TODO: Revert the code for tolerance once the throttling mechanism is fixed.
>    index=0
> -  perf script -i "${perfdata}" > $script_output
> +  valid_counts=0
> +  invalid_counts=0
> +  tolerance_rate=0.8
>    while IFS= read -r line
>    do
> -    # Check if the two instruction counts are equal in each record
>      cycles=$(echo $line | awk '{for(i=1;i<=NF;i++) if($i=="cycles:") print $(i-1)}')
>      if [ $(($index%2)) -ne 0 ] && [ ${cycles}x != ${prev_cycles}x ]
>      then
> -      echo "Leader sampling [Failed inconsistent cycles count]"
> -      err=1
> -      return
> +      invalid_counts=$(($invalid_counts+1))
> +    else
> +      valid_counts=$(($valid_counts+1))
>      fi
>      index=$(($index+1))
>      prev_cycles=$cycles
>    done < $script_output
> -  echo "Basic leader sampling test [Success]"
> +  total_counts=$(bc <<< "$invalid_counts+$valid_counts")
> +  if (( $(bc <<< "$total_counts <= 0") ))
> +  then
> +    echo "Leader sampling [No sample generated]"
> +    err=1
> +    return
> +  fi
> +  isok=$(bc <<< "scale=2; if (($invalid_counts/$total_counts) < (1-$tolerance_rate)) { 0 } else { 1 };")
> +  if [ $isok -eq 1 ]
> +  then
> +     echo "Leader sampling [Failed inconsistent cycles count]"
> +     err=1
> +  else
> +    echo "Basic leader sampling test [Success]"
> +  fi
>  }
>
>  test_topdown_leader_sampling() {
> --
> 2.49.0
>