[PATCH v3 20/50] perf record: Be lazier in allocating lost samples buffer

Ian Rogers posted 50 patches 2 years, 2 months ago
There is a newer version of this series
[PATCH v3 20/50] perf record: Be lazier in allocating lost samples buffer
Posted by Ian Rogers 2 years, 2 months ago
Wait until a lost sample occurs to allocate the lost samples buffer,
often the buffer isn't necessary. This saves a 64kb allocation and
5.3kb of peak memory consumption.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/builtin-record.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 9b4f3805ca92..b6c8c1371b39 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1924,21 +1924,13 @@ static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
 static void record__read_lost_samples(struct record *rec)
 {
 	struct perf_session *session = rec->session;
-	struct perf_record_lost_samples *lost;
+	struct perf_record_lost_samples *lost = NULL;
 	struct evsel *evsel;
 
 	/* there was an error during record__open */
 	if (session->evlist == NULL)
 		return;
 
-	lost = zalloc(PERF_SAMPLE_MAX_SIZE);
-	if (lost == NULL) {
-		pr_debug("Memory allocation failed\n");
-		return;
-	}
-
-	lost->header.type = PERF_RECORD_LOST_SAMPLES;
-
 	evlist__for_each_entry(session->evlist, evsel) {
 		struct xyarray *xy = evsel->core.sample_id;
 		u64 lost_count;
@@ -1961,6 +1953,14 @@ static void record__read_lost_samples(struct record *rec)
 				}
 
 				if (count.lost) {
+					if (!lost) {
+						lost = zalloc(PERF_SAMPLE_MAX_SIZE);
+						if (!lost) {
+							pr_debug("Memory allocation failed\n");
+							return;
+						}
+						lost->header.type = PERF_RECORD_LOST_SAMPLES;
+					}
 					__record__save_lost_samples(rec, evsel, lost,
 								    x, y, count.lost, 0);
 				}
@@ -1968,9 +1968,18 @@ static void record__read_lost_samples(struct record *rec)
 		}
 
 		lost_count = perf_bpf_filter__lost_count(evsel);
-		if (lost_count)
+		if (lost_count) {
+			if (!lost) {
+				lost = zalloc(PERF_SAMPLE_MAX_SIZE);
+				if (!lost) {
+					pr_debug("Memory allocation failed\n");
+					return;
+				}
+				lost->header.type = PERF_RECORD_LOST_SAMPLES;
+			}
 			__record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count,
 						    PERF_RECORD_MISC_LOST_SAMPLES_BPF);
+		}
 	}
 out:
 	free(lost);
-- 
2.42.0.758.gaed0368e0e-goog
Re: [PATCH v3 20/50] perf record: Be lazier in allocating lost samples buffer
Posted by Namhyung Kim 2 years, 2 months ago
On Tue, Oct 24, 2023 at 3:25 PM Ian Rogers <irogers@google.com> wrote:
>
> Wait until a lost sample occurs to allocate the lost samples buffer,
> often the buffer isn't necessary. This saves a 64kb allocation and
> 5.3kb of peak memory consumption.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/builtin-record.c | 29 +++++++++++++++++++----------
>  1 file changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 9b4f3805ca92..b6c8c1371b39 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -1924,21 +1924,13 @@ static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
>  static void record__read_lost_samples(struct record *rec)
>  {
>         struct perf_session *session = rec->session;
> -       struct perf_record_lost_samples *lost;
> +       struct perf_record_lost_samples *lost = NULL;
>         struct evsel *evsel;
>
>         /* there was an error during record__open */
>         if (session->evlist == NULL)
>                 return;
>
> -       lost = zalloc(PERF_SAMPLE_MAX_SIZE);

Sorry for naively choosing the max size. :-p
It could be sizeof(*lost) + machine->id_hdr_size.

Thanks,
Namhyung


> -       if (lost == NULL) {
> -               pr_debug("Memory allocation failed\n");
> -               return;
> -       }
> -
> -       lost->header.type = PERF_RECORD_LOST_SAMPLES;
> -
>         evlist__for_each_entry(session->evlist, evsel) {
>                 struct xyarray *xy = evsel->core.sample_id;
>                 u64 lost_count;
> @@ -1961,6 +1953,14 @@ static void record__read_lost_samples(struct record *rec)
>                                 }
>
>                                 if (count.lost) {
> +                                       if (!lost) {
> +                                               lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> +                                               if (!lost) {
> +                                                       pr_debug("Memory allocation failed\n");
> +                                                       return;
> +                                               }
> +                                               lost->header.type = PERF_RECORD_LOST_SAMPLES;
> +                                       }
>                                         __record__save_lost_samples(rec, evsel, lost,
>                                                                     x, y, count.lost, 0);
>                                 }
> @@ -1968,9 +1968,18 @@ static void record__read_lost_samples(struct record *rec)
>                 }
>
>                 lost_count = perf_bpf_filter__lost_count(evsel);
> -               if (lost_count)
> +               if (lost_count) {
> +                       if (!lost) {
> +                               lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> +                               if (!lost) {
> +                                       pr_debug("Memory allocation failed\n");
> +                                       return;
> +                               }
> +                               lost->header.type = PERF_RECORD_LOST_SAMPLES;
> +                       }
>                         __record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count,
>                                                     PERF_RECORD_MISC_LOST_SAMPLES_BPF);
> +               }
>         }
>  out:
>         free(lost);
> --
> 2.42.0.758.gaed0368e0e-goog
>
Re: [PATCH v3 20/50] perf record: Be lazier in allocating lost samples buffer
Posted by Yang Jihong 2 years, 2 months ago
Hello,

On 2023/10/25 6:23, Ian Rogers wrote:
> Wait until a lost sample occurs to allocate the lost samples buffer,
> often the buffer isn't necessary. This saves a 64kb allocation and
> 5.3kb of peak memory consumption.
> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>   tools/perf/builtin-record.c | 29 +++++++++++++++++++----------
>   1 file changed, 19 insertions(+), 10 deletions(-)
> 
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 9b4f3805ca92..b6c8c1371b39 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -1924,21 +1924,13 @@ static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
>   static void record__read_lost_samples(struct record *rec)
>   {
>   	struct perf_session *session = rec->session;
> -	struct perf_record_lost_samples *lost;
> +	struct perf_record_lost_samples *lost = NULL;
>   	struct evsel *evsel;
>   
>   	/* there was an error during record__open */
>   	if (session->evlist == NULL)
>   		return;
>   
> -	lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> -	if (lost == NULL) {
> -		pr_debug("Memory allocation failed\n");
> -		return;
> -	}
> -
> -	lost->header.type = PERF_RECORD_LOST_SAMPLES;
> -
>   	evlist__for_each_entry(session->evlist, evsel) {
>   		struct xyarray *xy = evsel->core.sample_id;
>   		u64 lost_count;
> @@ -1961,6 +1953,14 @@ static void record__read_lost_samples(struct record *rec)
>   				}
>   
>   				if (count.lost) {
> +					if (!lost) {
> +						lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> +						if (!lost) {
> +							pr_debug("Memory allocation failed\n");
> +							return;
> +						}
> +						lost->header.type = PERF_RECORD_LOST_SAMPLES;
> +					}
>   					__record__save_lost_samples(rec, evsel, lost,
>   								    x, y, count.lost, 0);
>   				}
> @@ -1968,9 +1968,18 @@ static void record__read_lost_samples(struct record *rec)
>   		}
>   
>   		lost_count = perf_bpf_filter__lost_count(evsel);
> -		if (lost_count)
> +		if (lost_count) {
> +			if (!lost) {
> +				lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> +				if (!lost) {
> +					pr_debug("Memory allocation failed\n");
> +					return;
> +				}
> +				lost->header.type = PERF_RECORD_LOST_SAMPLES;
> +			}
>   			__record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count,
>   						    PERF_RECORD_MISC_LOST_SAMPLES_BPF);
> +		}
>   	}

Can zalloc for `lost` be moved to __record__save_lost_samples?
This simplifies the code.


diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index dcf288a4fb9a..8d2eb746031a 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1888,14 +1888,25 @@ record__switch_output(struct record *rec, bool 
at_exit)
  }

  static void __record__save_lost_samples(struct record *rec, struct 
evsel *evsel,
-                                       struct perf_record_lost_samples 
*lost,
+                                       struct perf_record_lost_samples 
**plost,
                                         int cpu_idx, int thread_idx, 
u64 lost_count,
                                         u16 misc_flag)
  {
         struct perf_sample_id *sid;
         struct perf_sample sample = {};
+       struct perf_record_lost_samples *lost = *plost;
         int id_hdr_size;

+       if (!lost) {
+               lost = zalloc(PERF_SAMPLE_MAX_SIZE);
+               if (!lost) {
+                       pr_debug("Memory allocation failed\n");
+                       return;
+               }
+               lost->header.type = PERF_RECORD_LOST_SAMPLES;
+               *plost = lost;
+       }
+
         lost->lost = lost_count;
         if (evsel->core.ids) {
                 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, 
thread_idx);
@@ -1912,21 +1923,13 @@ static void __record__save_lost_samples(struct 
record *rec, struct evsel *evsel,
  static void record__read_lost_samples(struct record *rec)
  {
         struct perf_session *session = rec->session;
-       struct perf_record_lost_samples *lost;
+       struct perf_record_lost_samples *lost = NULL;
         struct evsel *evsel;

         /* there was an error during record__open */
         if (session->evlist == NULL)
                 return;

-       lost = zalloc(PERF_SAMPLE_MAX_SIZE);
-       if (lost == NULL) {
-               pr_debug("Memory allocation failed\n");
-               return;
-       }
-
-       lost->header.type = PERF_RECORD_LOST_SAMPLES;
-
         evlist__for_each_entry(session->evlist, evsel) {
                 struct xyarray *xy = evsel->core.sample_id;
                 u64 lost_count;
@@ -1949,7 +1952,7 @@ static void record__read_lost_samples(struct 
record *rec)
                                 }

                                 if (count.lost) {
-                                       __record__save_lost_samples(rec, 
evsel, lost,
+                                       __record__save_lost_samples(rec, 
evsel, &lost,
                                                                     x, 
y, count.lost, 0);
                                 }
                         }
@@ -1957,11 +1960,12 @@ static void record__read_lost_samples(struct 
record *rec)

                 lost_count = perf_bpf_filter__lost_count(evsel);
                 if (lost_count)
-                       __record__save_lost_samples(rec, evsel, lost, 0, 
0, lost_count,
+                       __record__save_lost_samples(rec, evsel, &lost, 
0, 0, lost_count,
 
PERF_RECORD_MISC_LOST_SAMPLES_BPF);
         }
  out:
-       free(lost);
+       if (lost)
+               free(lost);
  }

  static volatile sig_atomic_t workload_exec_errno;


Thanks,
Yang
Re: [PATCH v3 20/50] perf record: Be lazier in allocating lost samples buffer
Posted by Ian Rogers 2 years, 2 months ago
On Tue, Oct 24, 2023 at 8:44 PM Yang Jihong <yangjihong1@huawei.com> wrote:
>
> Hello,
>
> On 2023/10/25 6:23, Ian Rogers wrote:
> > Wait until a lost sample occurs to allocate the lost samples buffer,
> > often the buffer isn't necessary. This saves a 64kb allocation and
> > 5.3kb of peak memory consumption.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >   tools/perf/builtin-record.c | 29 +++++++++++++++++++----------
> >   1 file changed, 19 insertions(+), 10 deletions(-)
> >
> > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> > index 9b4f3805ca92..b6c8c1371b39 100644
> > --- a/tools/perf/builtin-record.c
> > +++ b/tools/perf/builtin-record.c
> > @@ -1924,21 +1924,13 @@ static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
> >   static void record__read_lost_samples(struct record *rec)
> >   {
> >       struct perf_session *session = rec->session;
> > -     struct perf_record_lost_samples *lost;
> > +     struct perf_record_lost_samples *lost = NULL;
> >       struct evsel *evsel;
> >
> >       /* there was an error during record__open */
> >       if (session->evlist == NULL)
> >               return;
> >
> > -     lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> > -     if (lost == NULL) {
> > -             pr_debug("Memory allocation failed\n");
> > -             return;
> > -     }
> > -
> > -     lost->header.type = PERF_RECORD_LOST_SAMPLES;
> > -
> >       evlist__for_each_entry(session->evlist, evsel) {
> >               struct xyarray *xy = evsel->core.sample_id;
> >               u64 lost_count;
> > @@ -1961,6 +1953,14 @@ static void record__read_lost_samples(struct record *rec)
> >                               }
> >
> >                               if (count.lost) {
> > +                                     if (!lost) {
> > +                                             lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> > +                                             if (!lost) {
> > +                                                     pr_debug("Memory allocation failed\n");
> > +                                                     return;
> > +                                             }
> > +                                             lost->header.type = PERF_RECORD_LOST_SAMPLES;
> > +                                     }
> >                                       __record__save_lost_samples(rec, evsel, lost,
> >                                                                   x, y, count.lost, 0);
> >                               }
> > @@ -1968,9 +1968,18 @@ static void record__read_lost_samples(struct record *rec)
> >               }
> >
> >               lost_count = perf_bpf_filter__lost_count(evsel);
> > -             if (lost_count)
> > +             if (lost_count) {
> > +                     if (!lost) {
> > +                             lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> > +                             if (!lost) {
> > +                                     pr_debug("Memory allocation failed\n");
> > +                                     return;
> > +                             }
> > +                             lost->header.type = PERF_RECORD_LOST_SAMPLES;
> > +                     }
> >                       __record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count,
> >                                                   PERF_RECORD_MISC_LOST_SAMPLES_BPF);
> > +             }
> >       }
>
> Can zalloc for `lost` be moved to __record__save_lost_samples?
> This simplifies the code.

Hmm.. seems marginal. This change makes the code not return in
record__read_lost_samples when the memory allocation fails, so I'm
50/50 on it.

Thanks,
Ian

>
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index dcf288a4fb9a..8d2eb746031a 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -1888,14 +1888,25 @@ record__switch_output(struct record *rec, bool
> at_exit)
>   }
>
>   static void __record__save_lost_samples(struct record *rec, struct
> evsel *evsel,
> -                                       struct perf_record_lost_samples
> *lost,
> +                                       struct perf_record_lost_samples
> **plost,
>                                          int cpu_idx, int thread_idx,
> u64 lost_count,
>                                          u16 misc_flag)
>   {
>          struct perf_sample_id *sid;
>          struct perf_sample sample = {};
> +       struct perf_record_lost_samples *lost = *plost;
>          int id_hdr_size;
>
> +       if (!lost) {
> +               lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> +               if (!lost) {
> +                       pr_debug("Memory allocation failed\n");
> +                       return;
> +               }
> +               lost->header.type = PERF_RECORD_LOST_SAMPLES;
> +               *plost = lost;
> +       }
> +
>          lost->lost = lost_count;
>          if (evsel->core.ids) {
>                  sid = xyarray__entry(evsel->core.sample_id, cpu_idx,
> thread_idx);
> @@ -1912,21 +1923,13 @@ static void __record__save_lost_samples(struct
> record *rec, struct evsel *evsel,
>   static void record__read_lost_samples(struct record *rec)
>   {
>          struct perf_session *session = rec->session;
> -       struct perf_record_lost_samples *lost;
> +       struct perf_record_lost_samples *lost = NULL;
>          struct evsel *evsel;
>
>          /* there was an error during record__open */
>          if (session->evlist == NULL)
>                  return;
>
> -       lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> -       if (lost == NULL) {
> -               pr_debug("Memory allocation failed\n");
> -               return;
> -       }
> -
> -       lost->header.type = PERF_RECORD_LOST_SAMPLES;
> -
>          evlist__for_each_entry(session->evlist, evsel) {
>                  struct xyarray *xy = evsel->core.sample_id;
>                  u64 lost_count;
> @@ -1949,7 +1952,7 @@ static void record__read_lost_samples(struct
> record *rec)
>                                  }
>
>                                  if (count.lost) {
> -                                       __record__save_lost_samples(rec,
> evsel, lost,
> +                                       __record__save_lost_samples(rec,
> evsel, &lost,
>                                                                      x,
> y, count.lost, 0);
>                                  }
>                          }
> @@ -1957,11 +1960,12 @@ static void record__read_lost_samples(struct
> record *rec)
>
>                  lost_count = perf_bpf_filter__lost_count(evsel);
>                  if (lost_count)
> -                       __record__save_lost_samples(rec, evsel, lost, 0,
> 0, lost_count,
> +                       __record__save_lost_samples(rec, evsel, &lost,
> 0, 0, lost_count,
>
> PERF_RECORD_MISC_LOST_SAMPLES_BPF);
>          }
>   out:
> -       free(lost);
> +       if (lost)
> +               free(lost);
>   }
>
>   static volatile sig_atomic_t workload_exec_errno;
>
>
> Thanks,
> Yang
>
Re: [PATCH v3 20/50] perf record: Be lazier in allocating lost samples buffer
Posted by Namhyung Kim 2 years, 2 months ago
On Wed, Oct 25, 2023 at 10:01 AM Ian Rogers <irogers@google.com> wrote:
>
> On Tue, Oct 24, 2023 at 8:44 PM Yang Jihong <yangjihong1@huawei.com> wrote:
> >
> > Hello,
> >
> > On 2023/10/25 6:23, Ian Rogers wrote:
> > > Wait until a lost sample occurs to allocate the lost samples buffer,
> > > often the buffer isn't necessary. This saves a 64kb allocation and
> > > 5.3kb of peak memory consumption.
> > >
> > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > ---
> > >   tools/perf/builtin-record.c | 29 +++++++++++++++++++----------
> > >   1 file changed, 19 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> > > index 9b4f3805ca92..b6c8c1371b39 100644
> > > --- a/tools/perf/builtin-record.c
> > > +++ b/tools/perf/builtin-record.c
> > > @@ -1924,21 +1924,13 @@ static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
> > >   static void record__read_lost_samples(struct record *rec)
> > >   {
> > >       struct perf_session *session = rec->session;
> > > -     struct perf_record_lost_samples *lost;
> > > +     struct perf_record_lost_samples *lost = NULL;
> > >       struct evsel *evsel;
> > >
> > >       /* there was an error during record__open */
> > >       if (session->evlist == NULL)
> > >               return;
> > >
> > > -     lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> > > -     if (lost == NULL) {
> > > -             pr_debug("Memory allocation failed\n");
> > > -             return;
> > > -     }
> > > -
> > > -     lost->header.type = PERF_RECORD_LOST_SAMPLES;
> > > -
> > >       evlist__for_each_entry(session->evlist, evsel) {
> > >               struct xyarray *xy = evsel->core.sample_id;
> > >               u64 lost_count;
> > > @@ -1961,6 +1953,14 @@ static void record__read_lost_samples(struct record *rec)
> > >                               }
> > >
> > >                               if (count.lost) {
> > > +                                     if (!lost) {
> > > +                                             lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> > > +                                             if (!lost) {
> > > +                                                     pr_debug("Memory allocation failed\n");
> > > +                                                     return;
> > > +                                             }
> > > +                                             lost->header.type = PERF_RECORD_LOST_SAMPLES;
> > > +                                     }
> > >                                       __record__save_lost_samples(rec, evsel, lost,
> > >                                                                   x, y, count.lost, 0);
> > >                               }
> > > @@ -1968,9 +1968,18 @@ static void record__read_lost_samples(struct record *rec)
> > >               }
> > >
> > >               lost_count = perf_bpf_filter__lost_count(evsel);
> > > -             if (lost_count)
> > > +             if (lost_count) {
> > > +                     if (!lost) {
> > > +                             lost = zalloc(PERF_SAMPLE_MAX_SIZE);
> > > +                             if (!lost) {
> > > +                                     pr_debug("Memory allocation failed\n");
> > > +                                     return;
> > > +                             }
> > > +                             lost->header.type = PERF_RECORD_LOST_SAMPLES;
> > > +                     }
> > >                       __record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count,
> > >                                                   PERF_RECORD_MISC_LOST_SAMPLES_BPF);
> > > +             }
> > >       }
> >
> > Can zalloc for `lost` be moved to __record__save_lost_samples?
> > This simplifies the code.
>
> Hmm.. seems marginal. This change makes the code not return in
> record__read_lost_samples when the memory allocation fails, so I'm
> 50/50 on it.

Maybe you can make it return the failure.

> >
> >
> > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> > index dcf288a4fb9a..8d2eb746031a 100644
> > --- a/tools/perf/builtin-record.c
> > +++ b/tools/perf/builtin-record.c
[SNIP]
> >   out:
> > -       free(lost);
> > +       if (lost)
> > +               free(lost);

This is unnecessary as free() can handle NULL pointers.

Thanks,
Namhyung


> >   }
> >
> >   static volatile sig_atomic_t workload_exec_errno;
> >
> >
> > Thanks,
> > Yang
> >