tools/lib/perf/include/perf/cpumap.h | 3 ++- tools/perf/util/cpumap.c | 13 ++++++++----- tools/perf/util/env.c | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-)
Fewer than 32k CPUs are currently supported by perf. A cpumap stores
an int per CPU, so its size is 4 times the number of CPUs in the
cpumap. We can reduce the size of the int to an int16_t, saving 2
bytes per CPU in the map.
Signed-off-by: Ian Rogers <irogers@google.com>
---
This change is on top of:
https://lore.kernel.org/lkml/20241206044035.1062032-1-irogers@google.com/
---
tools/lib/perf/include/perf/cpumap.h | 3 ++-
tools/perf/util/cpumap.c | 13 ++++++++-----
tools/perf/util/env.c | 2 +-
3 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index cbb65e55fc67..760a9aae9884 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -4,10 +4,11 @@
#include <perf/core.h>
#include <stdbool.h>
+#include <stdint.h>
/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
struct perf_cpu {
- int cpu;
+ int16_t cpu;
};
struct perf_cache {
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 27094211edd8..85e224d8631b 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -427,7 +427,7 @@ static void set_max_cpu_num(void)
{
const char *mnt;
char path[PATH_MAX];
- int ret = -1;
+ int max, ret = -1;
/* set up default */
max_cpu_num.cpu = 4096;
@@ -444,10 +444,12 @@ static void set_max_cpu_num(void)
goto out;
}
- ret = get_max_num(path, &max_cpu_num.cpu);
+ ret = get_max_num(path, &max);
if (ret)
goto out;
+ max_cpu_num.cpu = max;
+
/* get the highest present cpu number for a sparse allocation */
ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
if (ret >= PATH_MAX) {
@@ -455,8 +457,9 @@ static void set_max_cpu_num(void)
goto out;
}
- ret = get_max_num(path, &max_present_cpu_num.cpu);
-
+ ret = get_max_num(path, &max);
+ if (!ret)
+ max_present_cpu_num.cpu = max;
out:
if (ret)
pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
@@ -606,7 +609,7 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
#define COMMA first ? "" : ","
for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) {
- struct perf_cpu cpu = { .cpu = INT_MAX };
+ struct perf_cpu cpu = { .cpu = INT16_MAX };
bool last = i == perf_cpu_map__nr(map);
if (!last)
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index e2843ca2edd9..f1d7d22e7e98 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -531,7 +531,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
for (i = 0; i < env->nr_numa_nodes; i++) {
nn = &env->numa_nodes[i];
- nr = max(nr, perf_cpu_map__max(nn->map).cpu);
+ nr = max(nr, (int)perf_cpu_map__max(nn->map).cpu);
}
nr++;
--
2.47.0.338.g60cca15819-goog
On Fri, 2024-12-06 at 21:21 -0800, Ian Rogers wrote:
> Fewer than 32k CPUs are currently supported by perf.
>
Minor suggestions for the commit log. Perhaps it is clearer to say
Perf does not support more than 32k CPUs.
> A cpumap stores
> an int per CPU, so its size is 4 times the number of CPUs in the
> cpumap.
>
Perf cpumap currently uses a 4 byte int to index CPU, where
a 2 byte int is sufficient to cover the CPU range needed.
Otherwise
Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Tim
> We can reduce the size of the int to an int16_t, saving 2
> bytes per CPU in the map.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> This change is on top of:
> https://lore.kernel.org/lkml/20241206044035.1062032-1-irogers@google.com/
> ---
> tools/lib/perf/include/perf/cpumap.h | 3 ++-
> tools/perf/util/cpumap.c | 13 ++++++++-----
> tools/perf/util/env.c | 2 +-
> 3 files changed, 11 insertions(+), 7 deletions(-)
>
> diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
> index cbb65e55fc67..760a9aae9884 100644
> --- a/tools/lib/perf/include/perf/cpumap.h
> +++ b/tools/lib/perf/include/perf/cpumap.h
> @@ -4,10 +4,11 @@
>
> #include <perf/core.h>
> #include <stdbool.h>
> +#include <stdint.h>
>
> /** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
> struct perf_cpu {
> - int cpu;
> + int16_t cpu;
> };
>
> struct perf_cache {
> diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
> index 27094211edd8..85e224d8631b 100644
> --- a/tools/perf/util/cpumap.c
> +++ b/tools/perf/util/cpumap.c
> @@ -427,7 +427,7 @@ static void set_max_cpu_num(void)
> {
> const char *mnt;
> char path[PATH_MAX];
> - int ret = -1;
> + int max, ret = -1;
>
> /* set up default */
> max_cpu_num.cpu = 4096;
> @@ -444,10 +444,12 @@ static void set_max_cpu_num(void)
> goto out;
> }
>
> - ret = get_max_num(path, &max_cpu_num.cpu);
> + ret = get_max_num(path, &max);
> if (ret)
> goto out;
>
> + max_cpu_num.cpu = max;
> +
> /* get the highest present cpu number for a sparse allocation */
> ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
> if (ret >= PATH_MAX) {
> @@ -455,8 +457,9 @@ static void set_max_cpu_num(void)
> goto out;
> }
>
> - ret = get_max_num(path, &max_present_cpu_num.cpu);
> -
> + ret = get_max_num(path, &max);
> + if (!ret)
> + max_present_cpu_num.cpu = max;
> out:
> if (ret)
> pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
> @@ -606,7 +609,7 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
> #define COMMA first ? "" : ","
>
> for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) {
> - struct perf_cpu cpu = { .cpu = INT_MAX };
> + struct perf_cpu cpu = { .cpu = INT16_MAX };
> bool last = i == perf_cpu_map__nr(map);
>
> if (!last)
> diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
> index e2843ca2edd9..f1d7d22e7e98 100644
> --- a/tools/perf/util/env.c
> +++ b/tools/perf/util/env.c
> @@ -531,7 +531,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
>
> for (i = 0; i < env->nr_numa_nodes; i++) {
> nn = &env->numa_nodes[i];
> - nr = max(nr, perf_cpu_map__max(nn->map).cpu);
> + nr = max(nr, (int)perf_cpu_map__max(nn->map).cpu);
> }
>
> nr++;
On Mon, Dec 9, 2024 at 10:16 AM Tim Chen <tim.c.chen@linux.intel.com> wrote:
>
> On Fri, 2024-12-06 at 21:21 -0800, Ian Rogers wrote:
> > Fewer than 32k CPUs are currently supported by perf.
> >
> Minor suggestions for the commit log. Perhaps it is clearer to say
>
> Perf does not support more than 32k CPUs.
>
> > A cpumap stores
> > an int per CPU, so its size is 4 times the number of CPUs in the
> > cpumap.
> >
> Perf cpumap currently uses a 4 byte int to index CPU, where
> a 2 byte int is sufficient to cover the CPU range needed.
>
> Otherwise
>
> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Thanks for the review. The commit message tweak you provide is
confusing. A cpumap doesn't use an int, a cpumap has an array of
perf_cpus that are ints, these are the things being sized reduced
here. The index into a cpumap is regularly used as a way of densely
encoding accesses into things like counters into uncore PMUs, so an
index is a distinct thing from a CPU and a cpumap. I'll see what I can
do to the commit message given both your and Leo's feedback but what
was originally written was the most technically accurate so it'll end
up likely rhyming with it.
Thanks,
Ian
> Tim
> > We can reduce the size of the int to an int16_t, saving 2
> > bytes per CPU in the map.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> > This change is on top of:
> > https://lore.kernel.org/lkml/20241206044035.1062032-1-irogers@google.com/
> > ---
> > tools/lib/perf/include/perf/cpumap.h | 3 ++-
> > tools/perf/util/cpumap.c | 13 ++++++++-----
> > tools/perf/util/env.c | 2 +-
> > 3 files changed, 11 insertions(+), 7 deletions(-)
> >
> > diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
> > index cbb65e55fc67..760a9aae9884 100644
> > --- a/tools/lib/perf/include/perf/cpumap.h
> > +++ b/tools/lib/perf/include/perf/cpumap.h
> > @@ -4,10 +4,11 @@
> >
> > #include <perf/core.h>
> > #include <stdbool.h>
> > +#include <stdint.h>
> >
> > /** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
> > struct perf_cpu {
> > - int cpu;
> > + int16_t cpu;
> > };
> >
> > struct perf_cache {
> > diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
> > index 27094211edd8..85e224d8631b 100644
> > --- a/tools/perf/util/cpumap.c
> > +++ b/tools/perf/util/cpumap.c
> > @@ -427,7 +427,7 @@ static void set_max_cpu_num(void)
> > {
> > const char *mnt;
> > char path[PATH_MAX];
> > - int ret = -1;
> > + int max, ret = -1;
> >
> > /* set up default */
> > max_cpu_num.cpu = 4096;
> > @@ -444,10 +444,12 @@ static void set_max_cpu_num(void)
> > goto out;
> > }
> >
> > - ret = get_max_num(path, &max_cpu_num.cpu);
> > + ret = get_max_num(path, &max);
> > if (ret)
> > goto out;
> >
> > + max_cpu_num.cpu = max;
> > +
> > /* get the highest present cpu number for a sparse allocation */
> > ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
> > if (ret >= PATH_MAX) {
> > @@ -455,8 +457,9 @@ static void set_max_cpu_num(void)
> > goto out;
> > }
> >
> > - ret = get_max_num(path, &max_present_cpu_num.cpu);
> > -
> > + ret = get_max_num(path, &max);
> > + if (!ret)
> > + max_present_cpu_num.cpu = max;
> > out:
> > if (ret)
> > pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
> > @@ -606,7 +609,7 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
> > #define COMMA first ? "" : ","
> >
> > for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) {
> > - struct perf_cpu cpu = { .cpu = INT_MAX };
> > + struct perf_cpu cpu = { .cpu = INT16_MAX };
> > bool last = i == perf_cpu_map__nr(map);
> >
> > if (!last)
> > diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
> > index e2843ca2edd9..f1d7d22e7e98 100644
> > --- a/tools/perf/util/env.c
> > +++ b/tools/perf/util/env.c
> > @@ -531,7 +531,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
> >
> > for (i = 0; i < env->nr_numa_nodes; i++) {
> > nn = &env->numa_nodes[i];
> > - nr = max(nr, perf_cpu_map__max(nn->map).cpu);
> > + nr = max(nr, (int)perf_cpu_map__max(nn->map).cpu);
> > }
> >
> > nr++;
>
Hi Ian,
On Fri, Dec 06, 2024 at 09:21:33PM -0800, Ian Rogers wrote:
>
> Fewer than 32k CPUs are currently supported by perf. A cpumap stores
> an int per CPU, so its size is 4 times the number of CPUs in the
> cpumap.
Maybe I have a stupid question. An int value has 4 bytes, on the other
hand, we needs 2 bytes to store a 32k value (even 4096 needs 2 bytes
for storing the value).
How can conclude "its size is 4 times the number of CPUs"?
> We can reduce the size of the int to an int16_t, saving 2
> bytes per CPU in the map.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> tools/lib/perf/include/perf/cpumap.h | 3 ++-
> tools/perf/util/cpumap.c | 13 ++++++++-----
> tools/perf/util/env.c | 2 +-
> 3 files changed, 11 insertions(+), 7 deletions(-)
>
> diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
> index cbb65e55fc67..760a9aae9884 100644
> --- a/tools/lib/perf/include/perf/cpumap.h
> +++ b/tools/lib/perf/include/perf/cpumap.h
> @@ -4,10 +4,11 @@
>
> #include <perf/core.h>
> #include <stdbool.h>
> +#include <stdint.h>
>
> /** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
> struct perf_cpu {
> - int cpu;
> + int16_t cpu;
> };
>
> struct perf_cache {
> diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
> index 27094211edd8..85e224d8631b 100644
> --- a/tools/perf/util/cpumap.c
> +++ b/tools/perf/util/cpumap.c
> @@ -427,7 +427,7 @@ static void set_max_cpu_num(void)
> {
> const char *mnt;
> char path[PATH_MAX];
> - int ret = -1;
> + int max, ret = -1;
>
> /* set up default */
> max_cpu_num.cpu = 4096;
> @@ -444,10 +444,12 @@ static void set_max_cpu_num(void)
> goto out;
> }
>
> - ret = get_max_num(path, &max_cpu_num.cpu);
> + ret = get_max_num(path, &max);
> if (ret)
> goto out;
>
> + max_cpu_num.cpu = max;
> +
> /* get the highest present cpu number for a sparse allocation */
> ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
> if (ret >= PATH_MAX) {
> @@ -455,8 +457,9 @@ static void set_max_cpu_num(void)
> goto out;
> }
>
> - ret = get_max_num(path, &max_present_cpu_num.cpu);
> -
> + ret = get_max_num(path, &max);
> + if (!ret)
> + max_present_cpu_num.cpu = max;
This is an improvement for max CPU number, but it is irrevelant to
changing the CPU type to int16_t. It is better to split it into a new
patch.
If get an error for max present CPU number, should we rollback to 4096
for both max_cpu_num and max_present_cpu_num?
Thanks,
Leo
> out:
> if (ret)
> pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
> @@ -606,7 +609,7 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
> #define COMMA first ? "" : ","
>
> for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) {
> - struct perf_cpu cpu = { .cpu = INT_MAX };
> + struct perf_cpu cpu = { .cpu = INT16_MAX };
> bool last = i == perf_cpu_map__nr(map);
>
> if (!last)
> diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
> index e2843ca2edd9..f1d7d22e7e98 100644
> --- a/tools/perf/util/env.c
> +++ b/tools/perf/util/env.c
> @@ -531,7 +531,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
>
> for (i = 0; i < env->nr_numa_nodes; i++) {
> nn = &env->numa_nodes[i];
> - nr = max(nr, perf_cpu_map__max(nn->map).cpu);
> + nr = max(nr, (int)perf_cpu_map__max(nn->map).cpu);
> }
>
> nr++;
> --
> 2.47.0.338.g60cca15819-goog
>
On Mon, Dec 9, 2024 at 12:55 AM Leo Yan <leo.yan@arm.com> wrote:
>
> Hi Ian,
>
> On Fri, Dec 06, 2024 at 09:21:33PM -0800, Ian Rogers wrote:
> >
> > Fewer than 32k CPUs are currently supported by perf. A cpumap stores
> > an int per CPU, so its size is 4 times the number of CPUs in the
> > cpumap.
>
> Maybe I have a stupid question. An int value has 4 bytes, on the other
> hand, we needs 2 bytes to store a 32k value (even 4096 needs 2 bytes
> for storing the value).
>
> How can conclude "its size is 4 times the number of CPUs"?
I'm just trying to say "sizeof(int) == 4" and we have as many of them
as we have logical CPUs.
> > We can reduce the size of the int to an int16_t, saving 2
> > bytes per CPU in the map.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> > tools/lib/perf/include/perf/cpumap.h | 3 ++-
> > tools/perf/util/cpumap.c | 13 ++++++++-----
> > tools/perf/util/env.c | 2 +-
> > 3 files changed, 11 insertions(+), 7 deletions(-)
> >
> > diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
> > index cbb65e55fc67..760a9aae9884 100644
> > --- a/tools/lib/perf/include/perf/cpumap.h
> > +++ b/tools/lib/perf/include/perf/cpumap.h
> > @@ -4,10 +4,11 @@
> >
> > #include <perf/core.h>
> > #include <stdbool.h>
> > +#include <stdint.h>
> >
> > /** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
> > struct perf_cpu {
> > - int cpu;
> > + int16_t cpu;
> > };
> >
> > struct perf_cache {
> > diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
> > index 27094211edd8..85e224d8631b 100644
> > --- a/tools/perf/util/cpumap.c
> > +++ b/tools/perf/util/cpumap.c
> > @@ -427,7 +427,7 @@ static void set_max_cpu_num(void)
> > {
> > const char *mnt;
> > char path[PATH_MAX];
> > - int ret = -1;
> > + int max, ret = -1;
> >
> > /* set up default */
> > max_cpu_num.cpu = 4096;
> > @@ -444,10 +444,12 @@ static void set_max_cpu_num(void)
> > goto out;
> > }
> >
> > - ret = get_max_num(path, &max_cpu_num.cpu);
> > + ret = get_max_num(path, &max);
> > if (ret)
> > goto out;
> >
> > + max_cpu_num.cpu = max;
> > +
> > /* get the highest present cpu number for a sparse allocation */
> > ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
> > if (ret >= PATH_MAX) {
> > @@ -455,8 +457,9 @@ static void set_max_cpu_num(void)
> > goto out;
> > }
> >
> > - ret = get_max_num(path, &max_present_cpu_num.cpu);
> > -
> > + ret = get_max_num(path, &max);
> > + if (!ret)
> > + max_present_cpu_num.cpu = max;
>
> This is an improvement for max CPU number, but it is irrevelant to
> changing the CPU type to int16_t. It is better to split it into a new
> patch.
The change is done this way as passing an int16_t* to a function
expecting an int* causes compiler warnings.
> If get an error for max present CPU number, should we rollback to 4096
> for both max_cpu_num and max_present_cpu_num?
We could. It is probably safest to keep the existing behavior.
Thanks,
Ian
> Thanks,
> Leo
>
> > out:
> > if (ret)
> > pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
> > @@ -606,7 +609,7 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
> > #define COMMA first ? "" : ","
> >
> > for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) {
> > - struct perf_cpu cpu = { .cpu = INT_MAX };
> > + struct perf_cpu cpu = { .cpu = INT16_MAX };
> > bool last = i == perf_cpu_map__nr(map);
> >
> > if (!last)
> > diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
> > index e2843ca2edd9..f1d7d22e7e98 100644
> > --- a/tools/perf/util/env.c
> > +++ b/tools/perf/util/env.c
> > @@ -531,7 +531,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
> >
> > for (i = 0; i < env->nr_numa_nodes; i++) {
> > nn = &env->numa_nodes[i];
> > - nr = max(nr, perf_cpu_map__max(nn->map).cpu);
> > + nr = max(nr, (int)perf_cpu_map__max(nn->map).cpu);
> > }
> >
> > nr++;
> > --
> > 2.47.0.338.g60cca15819-goog
> >
© 2016 - 2025 Red Hat, Inc.