[PATCH 1/2] tracing: Add .percent suffix option to histogram values

Masami Hiramatsu (Google) posted 2 patches 3 years, 8 months ago
There is a newer version of this series
[PATCH 1/2] tracing: Add .percent suffix option to histogram values
Posted by Masami Hiramatsu (Google) 3 years, 8 months ago
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Add .percent suffix option to show the histogram values in percentage.
This feature is useful when we need yo undersntand the overall trend
for the histograms of large values.
E.g. this shows the runtime percentage for each tasks.

------
  # cd /sys/kernel/debug/tracing/
  # echo hist:keys=pid:vals=hitcount,runtime.percent:sort=pid > \
    events/sched/sched_stat_runtime/trigger
  # sleep 10
  # cat events/sched/sched_stat_runtime/hist
 # event histogram
 #
 # trigger info: hist:keys=pid:vals=hitcount,runtime.percent:sort=pid:size=2048 [active]
 #

 { pid:         14 } hitcount:          9  runtime:       2.48
 { pid:         16 } hitcount:         38  runtime:       5.11
 { pid:         59 } hitcount:         30  runtime:      10.30
 { pid:         61 } hitcount:         73  runtime:      13.19
 { pid:         64 } hitcount:          1  runtime:       0.22
 { pid:         65 } hitcount:         13  runtime:       2.53
 { pid:         67 } hitcount:         11  runtime:       2.35
 { pid:         69 } hitcount:          8  runtime:       1.40
 { pid:         77 } hitcount:          7  runtime:       1.83
 { pid:        145 } hitcount:         41  runtime:      33.03
 { pid:        152 } hitcount:          8  runtime:      11.90
 { pid:        153 } hitcount:          6  runtime:       8.09
 { pid:        154 } hitcount:          5  runtime:       7.50

 Totals:
     Hits: 250
     Entries: 13
     Dropped: 0
-----

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 kernel/trace/trace.c             |    3 +-
 kernel/trace/trace_events_hist.c |   66 ++++++++++++++++++++++++++++++++++----
 2 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7eb5bce62500..4f54f2494370 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5700,7 +5700,8 @@ static const char readme_msg[] =
 	"\t            .syscall    display a syscall id as a syscall name\n"
 	"\t            .log2       display log2 value rather than raw number\n"
 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
-	"\t            .usecs      display a common_timestamp in microseconds\n\n"
+	"\t            .usecs      display a common_timestamp in microseconds\n"
+	"\t            .percent    display a number of percentage value\n\n"
 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
 	"\t    trigger or to start a hist trigger but not log any events\n"
 	"\t    until told to do so.  'continue' can be used to start or\n"
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index fdf784620c28..1103b9eb0a74 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -477,6 +477,7 @@ enum hist_field_flags {
 	HIST_FIELD_FL_ALIAS		= 1 << 16,
 	HIST_FIELD_FL_BUCKET		= 1 << 17,
 	HIST_FIELD_FL_CONST		= 1 << 18,
+	HIST_FIELD_FL_PERCENT		= 1 << 19,
 };
 
 struct var_defs {
@@ -1682,6 +1683,8 @@ static const char *get_hist_field_flags(struct hist_field *hist_field)
 		flags_str = "buckets";
 	else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS)
 		flags_str = "usecs";
+	else if (hist_field->flags & HIST_FIELD_FL_PERCENT)
+		flags_str = "percent";
 
 	return flags_str;
 }
@@ -2290,6 +2293,10 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
 			if (ret || !(*buckets))
 				goto error;
 			*flags |= HIST_FIELD_FL_BUCKET;
+		} else if (strncmp(modifier, "percent", 7) == 0) {
+			if (*flags & (HIST_FIELD_FL_VAR | HIST_FIELD_FL_KEY))
+				goto error;
+			*flags |= HIST_FIELD_FL_PERCENT;
 		} else {
  error:
 			hist_err(tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier));
@@ -4254,8 +4261,13 @@ static int create_val_fields(struct hist_trigger_data *hist_data,
 		if (!field_str)
 			break;
 
-		if (strcmp(field_str, "hitcount") == 0)
+		if (strncmp(field_str, "hitcount", 8) == 0 &&
+		    (field_str[8] == '.' || field_str[8] == '\0')) {
+			if (strncmp(field_str + 8, ".percent", 8) == 0)
+				hist_data->fields[HITCOUNT_IDX]->flags |=
+					HIST_FIELD_FL_PERCENT;
 			continue;
+		}
 
 		ret = create_val_field(hist_data, j++, file, field_str);
 		if (ret)
@@ -5190,18 +5202,34 @@ static void hist_trigger_print_key(struct seq_file *m,
 	seq_puts(m, "}");
 }
 
+/* Get the 100 times of the percentage of @val in @total */
+static inline unsigned int __get_percentage(u64 val, u64 total)
+{
+	if (val < (U64_MAX / 10000))
+		return (unsigned int)(val * 10000 / total);
+	else
+		return val / (total / 10000);
+}
+
 static void hist_trigger_entry_print(struct seq_file *m,
 				     struct hist_trigger_data *hist_data,
+				     u64 *totals,
 				     void *key,
 				     struct tracing_map_elt *elt)
 {
 	const char *field_name;
-	unsigned int i;
+	unsigned int i, pc;
+	u64 val;
 
 	hist_trigger_print_key(m, hist_data, key, elt);
 
-	seq_printf(m, " hitcount: %10llu",
-		   tracing_map_read_sum(elt, HITCOUNT_IDX));
+	i = HITCOUNT_IDX;
+	val = tracing_map_read_sum(elt, i);
+	if (hist_data->fields[i]->flags & HIST_FIELD_FL_PERCENT) {
+		pc = __get_percentage(val, totals[i]);
+		seq_printf(m, " hitcount: %7u.%02u", pc / 100, pc % 100);
+	} else
+		seq_printf(m, " hitcount: %10llu", val);
 
 	for (i = 1; i < hist_data->n_vals; i++) {
 		field_name = hist_field_name(hist_data->fields[i], 0);
@@ -5210,7 +5238,12 @@ static void hist_trigger_entry_print(struct seq_file *m,
 		    hist_data->fields[i]->flags & HIST_FIELD_FL_EXPR)
 			continue;
 
-		if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) {
+		if (hist_data->fields[i]->flags & HIST_FIELD_FL_PERCENT) {
+			val = tracing_map_read_sum(elt, i);
+			pc = __get_percentage(val, totals[i]);
+			seq_printf(m, "  %s: %7u.%02u", field_name,
+				   pc / 100, pc % 100);
+		} else if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) {
 			seq_printf(m, "  %s: %10llx", field_name,
 				   tracing_map_read_sum(elt, i));
 		} else {
@@ -5229,7 +5262,8 @@ static int print_entries(struct seq_file *m,
 {
 	struct tracing_map_sort_entry **sort_entries = NULL;
 	struct tracing_map *map = hist_data->map;
-	int i, n_entries;
+	int i, j, n_entries;
+	u64 *totals = NULL;
 
 	n_entries = tracing_map_sort_entries(map, hist_data->sort_keys,
 					     hist_data->n_sort_keys,
@@ -5237,11 +5271,29 @@ static int print_entries(struct seq_file *m,
 	if (n_entries < 0)
 		return n_entries;
 
+	for (j = 0; j < hist_data->n_vals; j++) {
+		if (!(hist_data->fields[j]->flags & HIST_FIELD_FL_PERCENT))
+			continue;
+		if (!totals) {
+			totals = kcalloc(hist_data->n_vals, sizeof(u64),
+					 GFP_KERNEL);
+			if (!totals) {
+				n_entries = -ENOMEM;
+				goto out;
+			}
+		}
+		for (i = 0; i < n_entries; i++)
+			totals[j] += tracing_map_read_sum(
+					sort_entries[i]->elt, j);
+	}
+
 	for (i = 0; i < n_entries; i++)
-		hist_trigger_entry_print(m, hist_data,
+		hist_trigger_entry_print(m, hist_data, totals,
 					 sort_entries[i]->key,
 					 sort_entries[i]->elt);
 
+	kfree(totals);
+out:
 	tracing_map_destroy_sort_entries(sort_entries, n_entries);
 
 	return n_entries;
Re: [PATCH 1/2] tracing: Add .percent suffix option to histogram values
Posted by kernel test robot 3 years, 8 months ago
Hi "Masami,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on rostedt-trace/for-next]
[also build test ERROR on linus/master v5.19 next-20220728]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Masami-Hiramatsu-Google/tracing-hist-Add-percentage-histogram-suffixes/20220801-110217
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git for-next
config: i386-randconfig-a001-20220801 (https://download.01.org/0day-ci/archive/20220802/202208021438.2r5RXlo9-lkp@intel.com/config)
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 52cd00cabf479aa7eb6dbb063b7ba41ea57bce9e)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/aef5f602ee3aa9ca07402c1d3da0642e932c1b3a
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Masami-Hiramatsu-Google/tracing-hist-Add-percentage-histogram-suffixes/20220801-110217
        git checkout aef5f602ee3aa9ca07402c1d3da0642e932c1b3a
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> ld.lld: error: undefined symbol: __udivdi3
   >>> referenced by trace_events_hist.c:5211 (kernel/trace/trace_events_hist.c:5211)
   >>>               trace/trace_events_hist.o:(hist_show) in archive kernel/built-in.a
   >>> referenced by trace_events_hist.c:0 (kernel/trace/trace_events_hist.c:0)
   >>>               trace/trace_events_hist.o:(hist_show) in archive kernel/built-in.a
   >>> referenced by trace_events_hist.c:5211 (kernel/trace/trace_events_hist.c:5211)
   >>>               trace/trace_events_hist.o:(hist_show) in archive kernel/built-in.a
   >>> referenced 1 more times

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp
Re: [PATCH 1/2] tracing: Add .percent suffix option to histogram values
Posted by Steven Rostedt 3 years, 8 months ago
On Tue, 2 Aug 2022 14:49:36 +0800
kernel test robot <lkp@intel.com> wrote:

> All errors (new ones prefixed by >>):
> 
> >> ld.lld: error: undefined symbol: __udivdi3  

This is due to this:

> @@ -5190,18 +5202,34 @@ static void hist_trigger_print_key(struct seq_file *m,
>  	seq_puts(m, "}");
>  }
>  
> +/* Get the 100 times of the percentage of @val in @total */
> +static inline unsigned int __get_percentage(u64 val, u64 total)
> +{
> +	if (val < (U64_MAX / 10000))
> +		return (unsigned int)(val * 10000 / total);
> +	else
> +		return val / (total / 10000);
> +}
> +

You can't use '/' on u64 values. You have to use div64*(). Otherwise 32 bit
architectures may use floating point operations or glibc helpers.

See the other divisions in trace_events_hist.c that do so too.

-- Steve


>    >>> referenced by trace_events_hist.c:5211 (kernel/trace/trace_events_hist.c:5211)
>    >>>               trace/trace_events_hist.o:(hist_show) in archive kernel/built-in.a
>    >>> referenced by trace_events_hist.c:0 (kernel/trace/trace_events_hist.c:0)
>    >>>               trace/trace_events_hist.o:(hist_show) in archive kernel/built-in.a
>    >>> referenced by trace_events_hist.c:5211 (kernel/trace/trace_events_hist.c:5211)
>    >>>               trace/trace_events_hist.o:(hist_show) in archive kernel/built-in.a
>    >>> referenced 1 more times
Re: [PATCH 1/2] tracing: Add .percent suffix option to histogram values
Posted by Masami Hiramatsu (Google) 3 years, 8 months ago
On Tue, 2 Aug 2022 10:56:46 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Tue, 2 Aug 2022 14:49:36 +0800
> kernel test robot <lkp@intel.com> wrote:
> 
> > All errors (new ones prefixed by >>):
> > 
> > >> ld.lld: error: undefined symbol: __udivdi3  
> 
> This is due to this:
> 
> > @@ -5190,18 +5202,34 @@ static void hist_trigger_print_key(struct seq_file *m,
> >  	seq_puts(m, "}");
> >  }
> >  
> > +/* Get the 100 times of the percentage of @val in @total */
> > +static inline unsigned int __get_percentage(u64 val, u64 total)
> > +{
> > +	if (val < (U64_MAX / 10000))
> > +		return (unsigned int)(val * 10000 / total);
> > +	else
> > +		return val / (total / 10000);
> > +}
> > +
> 
> You can't use '/' on u64 values. You have to use div64*(). Otherwise 32 bit
> architectures may use floating point operations or glibc helpers.

Yeah, I forgot that. And also I have to check "total != 0" here. 

> 
> See the other divisions in trace_events_hist.c that do so too.

Thanks!

> 
> -- Steve
> 
> 
> >    >>> referenced by trace_events_hist.c:5211 (kernel/trace/trace_events_hist.c:5211)
> >    >>>               trace/trace_events_hist.o:(hist_show) in archive kernel/built-in.a
> >    >>> referenced by trace_events_hist.c:0 (kernel/trace/trace_events_hist.c:0)
> >    >>>               trace/trace_events_hist.o:(hist_show) in archive kernel/built-in.a
> >    >>> referenced by trace_events_hist.c:5211 (kernel/trace/trace_events_hist.c:5211)
> >    >>>               trace/trace_events_hist.o:(hist_show) in archive kernel/built-in.a
> >    >>> referenced 1 more times  
> 


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>