[PATCH v5] perf utilities: cln_size header

Ricky Ringler posted 1 patch 4 days, 14 hours ago
tools/perf/builtin-inject.c |  1 +
tools/perf/util/env.h       |  1 +
tools/perf/util/header.c    | 33 +++++++++++++++++++++++++++++
tools/perf/util/header.h    |  1 +
tools/perf/util/sort.c      | 41 +++++++++++++++++++++++++++----------
5 files changed, 66 insertions(+), 11 deletions(-)
[PATCH v5] perf utilities: cln_size header
Posted by Ricky Ringler 4 days, 14 hours ago
Store cacheline size during perf record in header, so
that cacheline size can be used for other features, like
sort.

V5: Namhyung feedback
V4: Ian feedback
V3: Rebase off perf-tools-next round two
V2: Rebase off perf-tools-next

Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"

Testing:
- Built perf
- Ran record + report with feat enabled
- Ran record + report with feat disabled

Signed-off-by: Ricky Ringler <ricky.ringler@proton.me>
---
 tools/perf/builtin-inject.c |  1 +
 tools/perf/util/env.h       |  1 +
 tools/perf/util/header.c    | 33 +++++++++++++++++++++++++++++
 tools/perf/util/header.h    |  1 +
 tools/perf/util/sort.c      | 41 +++++++++++++++++++++++++++----------
 5 files changed, 66 insertions(+), 11 deletions(-)

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 5b29f4296861..11ac7c8c4be3 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -2134,6 +2134,7 @@ static bool keep_feat(struct perf_inject *inject, int feat)
 	case HEADER_HYBRID_TOPOLOGY:
 	case HEADER_PMU_CAPS:
 	case HEADER_CPU_DOMAIN_INFO:
+	case HEADER_CLN_SIZE:
 		return true;
 	/* Information that can be updated */
 	case HEADER_BUILD_ID:
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index a4501cbca375..c7052ac1f856 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -112,6 +112,7 @@ struct perf_env {
 	struct cpu_cache_level	*caches;
 	struct cpu_domain_map	**cpu_domain;
 	int			 caches_cnt;
+	unsigned int		cln_size;
 	u32			comp_ratio;
 	u32			comp_ver;
 	u32			comp_type;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 9142a8ba4019..4d852bd4ca9a 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -54,6 +54,7 @@
 #include "bpf-event.h"
 #include "bpf-utils.h"
 #include "clockid.h"
+#include "cacheline.h"
 
 #include <linux/ctype.h>
 #include <internal/lib.h>
@@ -1304,6 +1305,22 @@ static int write_cache(struct feat_fd *ff,
 	return ret;
 }
 
+#define DEFAULT_CACHELINE_SIZE = 64
+
+static int write_cln_size(struct feat_fd *ff,
+		       struct evlist *evlist __maybe_unused)
+{
+	int cln_size = cacheline_size();
+
+
+	if(!cln_size)
+		cln_size = DEFAULT_CACHELINE_SIZE;
+
+	ff->ph->env.cln_size = cln_size;
+
+	return do_write(ff, &cln_size, sizeof(cln_size));
+}
+
 static int write_stat(struct feat_fd *ff __maybe_unused,
 		      struct evlist *evlist __maybe_unused)
 {
@@ -2261,6 +2278,11 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
 	}
 }
 
+static void print_cln_size(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# cacheline size: %u\n", ff->ph->env.cln_size);
+}
+
 static void print_compressed(struct feat_fd *ff, FILE *fp)
 {
 	fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
@@ -3154,6 +3176,16 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
 	return -1;
 }
 
+static int process_cln_size(struct feat_fd *ff, void *data __maybe_unused)
+{
+	struct perf_env *env = &ff->ph->env;
+
+	if (do_read_u32(ff, &env->cln_size))
+		return -1;
+
+	return 0;
+}
+
 static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
 {
 	struct perf_session *session;
@@ -3763,6 +3795,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
 	FEAT_OPR(PMU_CAPS,	pmu_caps,	false),
 	FEAT_OPR(CPU_DOMAIN_INFO,	cpu_domain_info,	true),
 	FEAT_OPR(E_MACHINE,	e_machine,	false),
+	FEAT_OPR(CLN_SIZE,	cln_size,	false),
 };
 
 struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index cc40ac796f52..be315040727f 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -55,6 +55,7 @@ enum {
 	HEADER_PMU_CAPS,
 	HEADER_CPU_DOMAIN_INFO,
 	HEADER_E_MACHINE,
+	HEADER_CLN_SIZE,
 	HEADER_LAST_FEATURE,
 	HEADER_FEAT_BITS	= 256,
 };
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 42d5cd7ef4e2..5f617cf03d5d 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -30,6 +30,7 @@
 #include "time-utils.h"
 #include "cgroup.h"
 #include "machine.h"
+#include "session.h"
 #include "trace-event.h"
 #include <linux/kernel.h>
 #include <linux/string.h>
@@ -2474,7 +2475,30 @@ struct sort_entry sort_type_offset = {
 
 /* --sort typecln */
 
-#define DEFAULT_CACHELINE_SIZE 64
+static int
+hist_entry__cln_size(struct hist_entry *he)
+{
+	int ret = 0;
+
+	if (he && he->hists) {
+		struct evsel *evsel = hists_to_evsel(he->hists);
+
+
+		if (evsel) {
+			struct perf_session *session = evsel__session(evsel);
+
+			ret = session->header.env.cln_size;
+		}
+	}
+
+	if (!ret || ret < 1) {
+		int default_cacheline_size = 64; // avoid div/0 later
+
+		ret = default_cacheline_size;
+	}
+
+	return ret;
+}
 
 static int64_t
 sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
@@ -2482,11 +2506,9 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
 	struct annotated_data_type *left_type = left->mem_type;
 	struct annotated_data_type *right_type = right->mem_type;
 	int64_t left_cln, right_cln;
+	int64_t cln_size_left = hist_entry__cln_size(left);
+	int64_t cln_size_right = hist_entry__cln_size(right);
 	int64_t ret;
-	int cln_size = cacheline_size();
-
-	if (cln_size == 0)
-		cln_size = DEFAULT_CACHELINE_SIZE;
 
 	if (!left_type) {
 		sort__type_init(left);
@@ -2502,8 +2524,8 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
 	if (ret)
 		return ret;
 
-	left_cln = left->mem_type_off / cln_size;
-	right_cln = right->mem_type_off / cln_size;
+	left_cln = left->mem_type_off / cln_size_left;
+	right_cln = right->mem_type_off / cln_size_right;
 	return left_cln - right_cln;
 }
 
@@ -2511,10 +2533,7 @@ static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
 				     size_t size, unsigned int width __maybe_unused)
 {
 	struct annotated_data_type *he_type = he->mem_type;
-	int cln_size = cacheline_size();
-
-	if (cln_size == 0)
-		cln_size = DEFAULT_CACHELINE_SIZE;
+	int cln_size = hist_entry__cln_size(he);
 
 	return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
 			       he->mem_type_off / cln_size);
-- 
2.53.0
Re: [PATCH v5] perf utilities: cln_size header
Posted by Namhyung Kim 2 days, 4 hours ago
On Sat, Mar 28, 2026 at 08:04:52PM +0000, Ricky Ringler wrote:
> Store cacheline size during perf record in header, so
> that cacheline size can be used for other features, like
> sort.
> 
> V5: Namhyung feedback
> V4: Ian feedback
> V3: Rebase off perf-tools-next round two
> V2: Rebase off perf-tools-next
> 
> Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"
> 
> Testing:
> - Built perf
> - Ran record + report with feat enabled
> - Ran record + report with feat disabled

It'd be nice if you can add an example output like:

  $ perf report --header-only | grep cacheline

> 
> Signed-off-by: Ricky Ringler <ricky.ringler@proton.me>
> ---
>  tools/perf/builtin-inject.c |  1 +
>  tools/perf/util/env.h       |  1 +
>  tools/perf/util/header.c    | 33 +++++++++++++++++++++++++++++
>  tools/perf/util/header.h    |  1 +
>  tools/perf/util/sort.c      | 41 +++++++++++++++++++++++++++----------
>  5 files changed, 66 insertions(+), 11 deletions(-)
> 
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index 5b29f4296861..11ac7c8c4be3 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -2134,6 +2134,7 @@ static bool keep_feat(struct perf_inject *inject, int feat)
>  	case HEADER_HYBRID_TOPOLOGY:
>  	case HEADER_PMU_CAPS:
>  	case HEADER_CPU_DOMAIN_INFO:
> +	case HEADER_CLN_SIZE:
>  		return true;
>  	/* Information that can be updated */
>  	case HEADER_BUILD_ID:
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index a4501cbca375..c7052ac1f856 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -112,6 +112,7 @@ struct perf_env {
>  	struct cpu_cache_level	*caches;
>  	struct cpu_domain_map	**cpu_domain;
>  	int			 caches_cnt;
> +	unsigned int		cln_size;
>  	u32			comp_ratio;
>  	u32			comp_ver;
>  	u32			comp_type;
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index 9142a8ba4019..4d852bd4ca9a 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -54,6 +54,7 @@
>  #include "bpf-event.h"
>  #include "bpf-utils.h"
>  #include "clockid.h"
> +#include "cacheline.h"
>  
>  #include <linux/ctype.h>
>  #include <internal/lib.h>
> @@ -1304,6 +1305,22 @@ static int write_cache(struct feat_fd *ff,
>  	return ret;
>  }
>  
> +#define DEFAULT_CACHELINE_SIZE = 64

Please move this into a header (after removing '=') ...

> +
> +static int write_cln_size(struct feat_fd *ff,
> +		       struct evlist *evlist __maybe_unused)
> +{
> +	int cln_size = cacheline_size();
> +
> +
> +	if(!cln_size)
> +		cln_size = DEFAULT_CACHELINE_SIZE;
> +
> +	ff->ph->env.cln_size = cln_size;
> +
> +	return do_write(ff, &cln_size, sizeof(cln_size));
> +}
> +
>  static int write_stat(struct feat_fd *ff __maybe_unused,
>  		      struct evlist *evlist __maybe_unused)
>  {
> @@ -2261,6 +2278,11 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
>  	}
>  }
>  
> +static void print_cln_size(struct feat_fd *ff, FILE *fp)
> +{
> +	fprintf(fp, "# cacheline size: %u\n", ff->ph->env.cln_size);
> +}
> +
>  static void print_compressed(struct feat_fd *ff, FILE *fp)
>  {
>  	fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
> @@ -3154,6 +3176,16 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
>  	return -1;
>  }
>  
> +static int process_cln_size(struct feat_fd *ff, void *data __maybe_unused)
> +{
> +	struct perf_env *env = &ff->ph->env;
> +
> +	if (do_read_u32(ff, &env->cln_size))
> +		return -1;
> +
> +	return 0;
> +}
> +
>  static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
>  {
>  	struct perf_session *session;
> @@ -3763,6 +3795,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
>  	FEAT_OPR(PMU_CAPS,	pmu_caps,	false),
>  	FEAT_OPR(CPU_DOMAIN_INFO,	cpu_domain_info,	true),
>  	FEAT_OPR(E_MACHINE,	e_machine,	false),
> +	FEAT_OPR(CLN_SIZE,	cln_size,	false),
>  };
>  
>  struct header_print_data {
> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
> index cc40ac796f52..be315040727f 100644
> --- a/tools/perf/util/header.h
> +++ b/tools/perf/util/header.h
> @@ -55,6 +55,7 @@ enum {
>  	HEADER_PMU_CAPS,
>  	HEADER_CPU_DOMAIN_INFO,
>  	HEADER_E_MACHINE,
> +	HEADER_CLN_SIZE,
>  	HEADER_LAST_FEATURE,
>  	HEADER_FEAT_BITS	= 256,
>  };
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index 42d5cd7ef4e2..5f617cf03d5d 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -30,6 +30,7 @@
>  #include "time-utils.h"
>  #include "cgroup.h"
>  #include "machine.h"
> +#include "session.h"
>  #include "trace-event.h"
>  #include <linux/kernel.h>
>  #include <linux/string.h>
> @@ -2474,7 +2475,30 @@ struct sort_entry sort_type_offset = {
>  
>  /* --sort typecln */
>  
> -#define DEFAULT_CACHELINE_SIZE 64
> +static int
> +hist_entry__cln_size(struct hist_entry *he)
> +{
> +	int ret = 0;
> +
> +	if (he && he->hists) {
> +		struct evsel *evsel = hists_to_evsel(he->hists);
> +
> +
> +		if (evsel) {
> +			struct perf_session *session = evsel__session(evsel);
> +
> +			ret = session->header.env.cln_size;
> +		}
> +	}
> +
> +	if (!ret || ret < 1) {
> +		int default_cacheline_size = 64; // avoid div/0 later
> +
> +		ret = default_cacheline_size;

... and use it here as well.

Thanks,
Namhyung


> +	}
> +
> +	return ret;
> +}
>  
>  static int64_t
>  sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> @@ -2482,11 +2506,9 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
>  	struct annotated_data_type *left_type = left->mem_type;
>  	struct annotated_data_type *right_type = right->mem_type;
>  	int64_t left_cln, right_cln;
> +	int64_t cln_size_left = hist_entry__cln_size(left);
> +	int64_t cln_size_right = hist_entry__cln_size(right);
>  	int64_t ret;
> -	int cln_size = cacheline_size();
> -
> -	if (cln_size == 0)
> -		cln_size = DEFAULT_CACHELINE_SIZE;
>  
>  	if (!left_type) {
>  		sort__type_init(left);
> @@ -2502,8 +2524,8 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
>  	if (ret)
>  		return ret;
>  
> -	left_cln = left->mem_type_off / cln_size;
> -	right_cln = right->mem_type_off / cln_size;
> +	left_cln = left->mem_type_off / cln_size_left;
> +	right_cln = right->mem_type_off / cln_size_right;
>  	return left_cln - right_cln;
>  }
>  
> @@ -2511,10 +2533,7 @@ static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
>  				     size_t size, unsigned int width __maybe_unused)
>  {
>  	struct annotated_data_type *he_type = he->mem_type;
> -	int cln_size = cacheline_size();
> -
> -	if (cln_size == 0)
> -		cln_size = DEFAULT_CACHELINE_SIZE;
> +	int cln_size = hist_entry__cln_size(he);
>  
>  	return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
>  			       he->mem_type_off / cln_size);
> -- 
> 2.53.0
> 
>
Re: [PATCH v5] perf utilities: cln_size header
Posted by kernel test robot 3 days, 3 hours ago
Hi Ricky,

kernel test robot noticed the following build errors:

[auto build test ERROR on perf-tools-next/perf-tools-next]
[also build test ERROR on tip/perf/core perf-tools/perf-tools next-20260327]
[cannot apply to acme/perf/core linus/master v6.16-rc1]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Ricky-Ringler/perf-utilities-cln_size-header/20260329-205729
base:   https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git perf-tools-next
patch link:    https://lore.kernel.org/r/20260328200442.134489-1-ricky.ringler%40proton.me
patch subject: [PATCH v5] perf utilities: cln_size header
config: arm64-allnoconfig-bpf (https://download.01.org/0day-ci/archive/20260329/202603291618.3giyFism-lkp@intel.com/config)
compiler: aarch64-linux-gnu-gcc (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260329/202603291618.3giyFism-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/r/202603291618.3giyFism-lkp@intel.com/

All errors (new ones prefixed by >>):

   Makefile.config:576: No elfutils/debuginfod.h found, no debuginfo server support, please install libdebuginfod-dev/elfutils-debuginfod-client-devel or equivalent
   Makefile.config:963: No libllvm 13+ found, slower source file resolution, please install llvm-devel/llvm-dev
   Makefile.config:1159: Rust is not found. Test workloads with rust are disabled.
     PERF_VERSION = 7.0.rc4.ga971d40d4a8d
   util/header.c: In function 'write_cln_size':
>> util/header.c:1308:32: error: expected expression before '=' token
    1308 | #define DEFAULT_CACHELINE_SIZE = 64
         |                                ^
   util/header.c:1317:28: note: in expansion of macro 'DEFAULT_CACHELINE_SIZE'
    1317 |                 cln_size = DEFAULT_CACHELINE_SIZE;
         |                            ^~~~~~~~~~~~~~~~~~~~~~
   make[4]: *** [tools/build/Makefile.build:95: util/header.o] Error 1
   make[4]: *** Waiting for unfinished jobs....
   make[3]: *** [tools/build/Makefile.build:158: util] Error 2
   make[2]: *** [Makefile.perf:797: perf-util-in.o] Error 2
   make[2]: *** Waiting for unfinished jobs....
   make[1]: *** [Makefile.perf:289: sub-make] Error 2

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki