[PATCH v2] perf cpumap: Reduce cpu size from int to int16_t

Ian Rogers posted 1 patch 12 months ago
There is a newer version of this series
tools/lib/perf/include/perf/cpumap.h |  3 ++-
tools/perf/util/cpumap.c             | 13 ++++++++-----
tools/perf/util/env.c                |  2 +-
3 files changed, 11 insertions(+), 7 deletions(-)
[PATCH v2] perf cpumap: Reduce cpu size from int to int16_t
Posted by Ian Rogers 12 months ago
Fewer than 32k logical CPUs are currently supported by perf. A cpumap
is indexed by an integer (see perf_cpu_map__cpu) yielding a perf_cpu
that wraps a 4-byte int for the logical CPU - the wrapping is done
deliberately to avoid confusing a logical CPU with an index into a
cpumap. Using a 4-byte int within the perf_cpu is larger than required
so this patch reduces it to the 2-byte int16_t. For a cpumap
containing 16 entries this will reduce the array size from 64 to 32
bytes. For very large servers with lots of logical CPUs the size
savings will be greater.

Signed-off-by: Ian Rogers <irogers@google.com>
Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
---
v2. Rebase and tweak commit message.
---
 tools/lib/perf/include/perf/cpumap.h |  3 ++-
 tools/perf/util/cpumap.c             | 13 ++++++++-----
 tools/perf/util/env.c                |  2 +-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 188a667babc6..8c1ab0f9194e 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -4,10 +4,11 @@
 
 #include <perf/core.h>
 #include <stdbool.h>
+#include <stdint.h>
 
 /** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
 struct perf_cpu {
-	int cpu;
+	int16_t cpu;
 };
 
 struct perf_cache {
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 27094211edd8..85e224d8631b 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -427,7 +427,7 @@ static void set_max_cpu_num(void)
 {
 	const char *mnt;
 	char path[PATH_MAX];
-	int ret = -1;
+	int max, ret = -1;
 
 	/* set up default */
 	max_cpu_num.cpu = 4096;
@@ -444,10 +444,12 @@ static void set_max_cpu_num(void)
 		goto out;
 	}
 
-	ret = get_max_num(path, &max_cpu_num.cpu);
+	ret = get_max_num(path, &max);
 	if (ret)
 		goto out;
 
+	max_cpu_num.cpu = max;
+
 	/* get the highest present cpu number for a sparse allocation */
 	ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
 	if (ret >= PATH_MAX) {
@@ -455,8 +457,9 @@ static void set_max_cpu_num(void)
 		goto out;
 	}
 
-	ret = get_max_num(path, &max_present_cpu_num.cpu);
-
+	ret = get_max_num(path, &max);
+	if (!ret)
+		max_present_cpu_num.cpu = max;
 out:
 	if (ret)
 		pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
@@ -606,7 +609,7 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
 #define COMMA first ? "" : ","
 
 	for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) {
-		struct perf_cpu cpu = { .cpu = INT_MAX };
+		struct perf_cpu cpu = { .cpu = INT16_MAX };
 		bool last = i == perf_cpu_map__nr(map);
 
 		if (!last)
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 610c57da5b37..961a92545039 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -543,7 +543,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
 
 		for (i = 0; i < env->nr_numa_nodes; i++) {
 			nn = &env->numa_nodes[i];
-			nr = max(nr, perf_cpu_map__max(nn->map).cpu);
+			nr = max(nr, (int)perf_cpu_map__max(nn->map).cpu);
 		}
 
 		nr++;
-- 
2.47.1.613.gc27f4b7a9f-goog
Re: [PATCH v2] perf cpumap: Reduce cpu size from int to int16_t
Posted by Leo Yan 11 months, 2 weeks ago
On Fri, Dec 20, 2024 at 10:52:07AM -0800, Ian Rogers wrote:
> 
> Fewer than 32k logical CPUs are currently supported by perf. A cpumap
> is indexed by an integer (see perf_cpu_map__cpu) yielding a perf_cpu
> that wraps a 4-byte int for the logical CPU - the wrapping is done
> deliberately to avoid confusing a logical CPU with an index into a
> cpumap. Using a 4-byte int within the perf_cpu is larger than required
> so this patch reduces it to the 2-byte int16_t. For a cpumap
> containing 16 entries this will reduce the array size from 64 to 32
> bytes. For very large servers with lots of logical CPUs the size
> savings will be greater.
> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> ---
> v2. Rebase and tweak commit message.
> ---
>  tools/lib/perf/include/perf/cpumap.h |  3 ++-
>  tools/perf/util/cpumap.c             | 13 ++++++++-----
>  tools/perf/util/env.c                |  2 +-
>  3 files changed, 11 insertions(+), 7 deletions(-)
> 
> diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
> index 188a667babc6..8c1ab0f9194e 100644
> --- a/tools/lib/perf/include/perf/cpumap.h
> +++ b/tools/lib/perf/include/perf/cpumap.h
> @@ -4,10 +4,11 @@
> 
>  #include <perf/core.h>
>  #include <stdbool.h>
> +#include <stdint.h>
> 
>  /** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
>  struct perf_cpu {
> -       int cpu;
> +       int16_t cpu;
>  };
> 
>  struct perf_cache {
> diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
> index 27094211edd8..85e224d8631b 100644
> --- a/tools/perf/util/cpumap.c
> +++ b/tools/perf/util/cpumap.c
> @@ -427,7 +427,7 @@ static void set_max_cpu_num(void)
>  {
>         const char *mnt;
>         char path[PATH_MAX];
> -       int ret = -1;
> +       int max, ret = -1;
> 
>         /* set up default */
>         max_cpu_num.cpu = 4096;
> @@ -444,10 +444,12 @@ static void set_max_cpu_num(void)
>                 goto out;
>         }
> 
> -       ret = get_max_num(path, &max_cpu_num.cpu);
> +       ret = get_max_num(path, &max);
>         if (ret)
>                 goto out;
> 
> +       max_cpu_num.cpu = max;

I am concerned for the data conversion from int type to int16_t type.

The GCC option "-Wconversion" is not enabled in perf Makefile, unsafe
data conversion is allowed.  A better way is to update argument type for
get_max_num() for reading CPU number with int16_t type.

Thanks,
Leo

> +
>         /* get the highest present cpu number for a sparse allocation */
>         ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
>         if (ret >= PATH_MAX) {
> @@ -455,8 +457,9 @@ static void set_max_cpu_num(void)
>                 goto out;
>         }
> 
> -       ret = get_max_num(path, &max_present_cpu_num.cpu);
> -
> +       ret = get_max_num(path, &max);
> +       if (!ret)
> +               max_present_cpu_num.cpu = max;
>  out:
>         if (ret)
>                 pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
> @@ -606,7 +609,7 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
>  #define COMMA first ? "" : ","
> 
>         for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) {
> -               struct perf_cpu cpu = { .cpu = INT_MAX };
> +               struct perf_cpu cpu = { .cpu = INT16_MAX };
>                 bool last = i == perf_cpu_map__nr(map);
> 
>                 if (!last)
> diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
> index 610c57da5b37..961a92545039 100644
> --- a/tools/perf/util/env.c
> +++ b/tools/perf/util/env.c
> @@ -543,7 +543,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
> 
>                 for (i = 0; i < env->nr_numa_nodes; i++) {
>                         nn = &env->numa_nodes[i];
> -                       nr = max(nr, perf_cpu_map__max(nn->map).cpu);
> +                       nr = max(nr, (int)perf_cpu_map__max(nn->map).cpu);
>                 }
> 
>                 nr++;
> --
> 2.47.1.613.gc27f4b7a9f-goog
>
Re: [PATCH v2] perf cpumap: Reduce cpu size from int to int16_t
Posted by David Laight 11 months, 2 weeks ago
On Fri, 3 Jan 2025 18:25:32 +0000
Leo Yan <leo.yan@arm.com> wrote:

> On Fri, Dec 20, 2024 at 10:52:07AM -0800, Ian Rogers wrote:
> > 
> > Fewer than 32k logical CPUs are currently supported by perf. A cpumap
> > is indexed by an integer (see perf_cpu_map__cpu) yielding a perf_cpu
> > that wraps a 4-byte int for the logical CPU - the wrapping is done
> > deliberately to avoid confusing a logical CPU with an index into a
> > cpumap. Using a 4-byte int within the perf_cpu is larger than required
> > so this patch reduces it to the 2-byte int16_t. For a cpumap
> > containing 16 entries this will reduce the array size from 64 to 32
> > bytes. For very large servers with lots of logical CPUs the size
> > savings will be greater.
> > 
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> > ---
> > v2. Rebase and tweak commit message.
> > ---
> >  tools/lib/perf/include/perf/cpumap.h |  3 ++-
> >  tools/perf/util/cpumap.c             | 13 ++++++++-----
> >  tools/perf/util/env.c                |  2 +-
> >  3 files changed, 11 insertions(+), 7 deletions(-)
> > 
> > diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
> > index 188a667babc6..8c1ab0f9194e 100644
> > --- a/tools/lib/perf/include/perf/cpumap.h
> > +++ b/tools/lib/perf/include/perf/cpumap.h
> > @@ -4,10 +4,11 @@
> > 
> >  #include <perf/core.h>
> >  #include <stdbool.h>
> > +#include <stdint.h>
> > 
> >  /** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
> >  struct perf_cpu {
> > -       int cpu;
> > +       int16_t cpu;
> >  };
> > 
> >  struct perf_cache {
> > diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
> > index 27094211edd8..85e224d8631b 100644
> > --- a/tools/perf/util/cpumap.c
> > +++ b/tools/perf/util/cpumap.c
> > @@ -427,7 +427,7 @@ static void set_max_cpu_num(void)
> >  {
> >         const char *mnt;
> >         char path[PATH_MAX];
> > -       int ret = -1;
> > +       int max, ret = -1;
> > 
> >         /* set up default */
> >         max_cpu_num.cpu = 4096;
> > @@ -444,10 +444,12 @@ static void set_max_cpu_num(void)
> >                 goto out;
> >         }
> > 
> > -       ret = get_max_num(path, &max_cpu_num.cpu);
> > +       ret = get_max_num(path, &max);
> >         if (ret)
> >                 goto out;
> > 
> > +       max_cpu_num.cpu = max;  
> 
> I am concerned for the data conversion from int type to int16_t type.
> 
> The GCC option "-Wconversion" is not enabled in perf Makefile, unsafe
> data conversion is allowed.  A better way is to update argument type for
> get_max_num() for reading CPU number with int16_t type.

Or just avoid passing &int_var by using the return value instead.
It'll generate smaller, faster code.

	David
Re: [PATCH v2] perf cpumap: Reduce cpu size from int to int16_t
Posted by Ian Rogers 11 months, 2 weeks ago
On Fri, Jan 3, 2025 at 2:45 PM David Laight
<david.laight.linux@gmail.com> wrote:
>
> On Fri, 3 Jan 2025 18:25:32 +0000
> Leo Yan <leo.yan@arm.com> wrote:
>
> > On Fri, Dec 20, 2024 at 10:52:07AM -0800, Ian Rogers wrote:
> > >
> > > Fewer than 32k logical CPUs are currently supported by perf. A cpumap
> > > is indexed by an integer (see perf_cpu_map__cpu) yielding a perf_cpu
> > > that wraps a 4-byte int for the logical CPU - the wrapping is done
> > > deliberately to avoid confusing a logical CPU with an index into a
> > > cpumap. Using a 4-byte int within the perf_cpu is larger than required
> > > so this patch reduces it to the 2-byte int16_t. For a cpumap
> > > containing 16 entries this will reduce the array size from 64 to 32
> > > bytes. For very large servers with lots of logical CPUs the size
> > > savings will be greater.
> > >
> > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> > > ---
> > > v2. Rebase and tweak commit message.
> > > ---
> > >  tools/lib/perf/include/perf/cpumap.h |  3 ++-
> > >  tools/perf/util/cpumap.c             | 13 ++++++++-----
> > >  tools/perf/util/env.c                |  2 +-
> > >  3 files changed, 11 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
> > > index 188a667babc6..8c1ab0f9194e 100644
> > > --- a/tools/lib/perf/include/perf/cpumap.h
> > > +++ b/tools/lib/perf/include/perf/cpumap.h
> > > @@ -4,10 +4,11 @@
> > >
> > >  #include <perf/core.h>
> > >  #include <stdbool.h>
> > > +#include <stdint.h>
> > >
> > >  /** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
> > >  struct perf_cpu {
> > > -       int cpu;
> > > +       int16_t cpu;
> > >  };
> > >
> > >  struct perf_cache {
> > > diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
> > > index 27094211edd8..85e224d8631b 100644
> > > --- a/tools/perf/util/cpumap.c
> > > +++ b/tools/perf/util/cpumap.c
> > > @@ -427,7 +427,7 @@ static void set_max_cpu_num(void)
> > >  {
> > >         const char *mnt;
> > >         char path[PATH_MAX];
> > > -       int ret = -1;
> > > +       int max, ret = -1;
> > >
> > >         /* set up default */
> > >         max_cpu_num.cpu = 4096;
> > > @@ -444,10 +444,12 @@ static void set_max_cpu_num(void)
> > >                 goto out;
> > >         }
> > >
> > > -       ret = get_max_num(path, &max_cpu_num.cpu);
> > > +       ret = get_max_num(path, &max);
> > >         if (ret)
> > >                 goto out;
> > >
> > > +       max_cpu_num.cpu = max;
> >
> > I am concerned for the data conversion from int type to int16_t type.
> >
> > The GCC option "-Wconversion" is not enabled in perf Makefile, unsafe
> > data conversion is allowed.  A better way is to update argument type for
> > get_max_num() for reading CPU number with int16_t type.
>
> Or just avoid passing &int_var by using the return value instead.
> It'll generate smaller, faster code.

Thanks Leo and David. Changing the argument to get_max_num isn't
possible as the function is used by set_max_node_num and the %d
modifier would need to be different for each, etc. Changing the return
type would have implications as the function uses the int encoding
either 0 as success or a negative value encoding -errno, so there's
the potential for collision between read values from the path and
error numbers. Performance isn't hugely critical here too. I'll send
out a v3 to add a bound check to the int to int16_t assignment, which
hopefully addresses Leo's Wconversion worry.

Thanks,
Ian