From nobody Fri Dec 19 06:32:44 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4A6902C2AA1; Wed, 30 Apr 2025 20:55:50 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046551; cv=none; b=eaolyptMEdw4ZEx34kil6KMBx6nVNAPap4jigEtn7kCQ6FQtfpc5rnj70i7LP7hQ4dq+c1rukjepCpyWrTkldr4t1TQ5pHFS8LQfjiHvb/N+0aYCV8IUsg/oiQBGHnP12w4OGzeY0tURIUm33qUFlef6qZTByFWFFz7fbO0BSqw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046551; c=relaxed/simple; bh=YuF+Y8lZeJUIfvxlu78X3rIBrk+bdi/gly1rdAM22iA=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=T6ZZT3jg3Epexs7Fpw68pnlbfTh8/m6jK9+FoHYcfPvNY6pZIdt75/n7mXLrhGb1OpF1rJaz4+nsuJg/6KBWVgGi+zxLhCgKzoRycUOUtTt+1i1FYO99Ugu4MCQkL3NJtQLMECbqt+U7cBtvDGJRPPaPN6J4mlU779uugurYBVw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=lNsp71HH; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="lNsp71HH" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 3EDF0C4CEED; Wed, 30 Apr 2025 20:55:50 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1746046550; bh=YuF+Y8lZeJUIfvxlu78X3rIBrk+bdi/gly1rdAM22iA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=lNsp71HHnYHBZ65kQLDdo16/9nhDVe9NbJ2kYeYWWgduY70k7w0lhdW4kxP16ekBX UJN7iOJvMEUIbELQIk5daL8TsyxfpouTQNAUADwd5A0B/delqPERQx6y6GIHSQQ+2G pvfRSyaln7Emfj3LA8bELNh+MxRzuWQze9cdB2d49aL/Y6i+G8GKHM+ucXSnsJpdlA rUEODn+RTKrqc/soEyZ1llJbqUfm+dD6k/aUPQrNDdH0yzFn3NjgKzo2tEz6++jla6 kq/INArrvsi0i4i73p+pQMX2xFYnjqKlqZ9XIhGA+dIvStW3vIo0JvH/JywqEhCecM 0cyW3ZuplyDGQ== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Ravi Bangoria , Leo Yan Subject: [PATCH 01/11] perf hist: Remove output field from sort-list properly Date: Wed, 30 Apr 2025 13:55:38 -0700 Message-ID: <20250430205548.789750-2-namhyung@kernel.org> X-Mailer: git-send-email 2.49.0.906.g1f30a19c02-goog In-Reply-To: <20250430205548.789750-1-namhyung@kernel.org> References: <20250430205548.789750-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" When it removes an output format for cancelled children or latency, it should delete itself from the sort list as well. Otherwise assertion in fmt_free() will fire. $ perf report -H --stdio perf: ui/hist.c:603: fmt_free: Assertion `!(!list_empty(&fmt->sort_list))= ' failed. Aborted (core dumped) Also convert to perf_hpp__column_unregister() for the same open codes. Fixes: dbd11b6bdab12f60 ("perf hist: Remove formats in hierarchy when cance= l children") Signed-off-by: Namhyung Kim --- tools/perf/ui/hist.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 3ffce69fc823e0bf..bc0689fceeb18bde 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -696,6 +696,7 @@ void perf_hpp_list__prepend_sort_field(struct perf_hpp_= list *list, static void perf_hpp__column_unregister(struct perf_hpp_fmt *format) { list_del_init(&format->list); + list_del_init(&format->sort_list); fmt_free(format); } =20 @@ -818,18 +819,12 @@ void perf_hpp__reset_output_field(struct perf_hpp_lis= t *list) struct perf_hpp_fmt *fmt, *tmp; =20 /* reset output fields */ - perf_hpp_list__for_each_format_safe(list, fmt, tmp) { - list_del_init(&fmt->list); - list_del_init(&fmt->sort_list); - fmt_free(fmt); - } + perf_hpp_list__for_each_format_safe(list, fmt, tmp) + perf_hpp__column_unregister(fmt); =20 /* reset sort keys */ - perf_hpp_list__for_each_sort_list_safe(list, fmt, tmp) { - list_del_init(&fmt->list); - list_del_init(&fmt->sort_list); - fmt_free(fmt); - } + perf_hpp_list__for_each_sort_list_safe(list, fmt, tmp) + perf_hpp__column_unregister(fmt); } =20 /* --=20 2.49.0.906.g1f30a19c02-goog From nobody Fri Dec 19 06:32:44 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 025062D0AC1; Wed, 30 Apr 2025 20:55:51 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046552; cv=none; b=LyAyWyLc+RPRJatWfyHyDcxfRUPIrDdUPUg/A8dKoamfNkhA5Fb1/u6N1fmXeTwDOF80impkqtrxMdVIIn2VYNzN3rIubED3Ea2bsr1NZ3MFlU620+FD8BScEdcu3+Br5rt1wWrqt+9rGIfp4FNvBqZBb6yzSuVZ8Z0t1uNtYhU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046552; c=relaxed/simple; bh=v2DS2F47cm5vHlwHkpeETqDniZGIwCDv5zjC3kDKgeM=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=gCOp7JIOO7ww3SMFHWfEEYM9nDoDCBRrvD90j5ftTyIrmI5yvAJpu8FZv/M62u/F+4fI/jTHNspcf1oRrmzBh4M7P0l4nTykyl7EloWz8pMOOFj7JMYU9qMBAnW4t6y7DOwfFvdPxYJVuDVtsNE+GTm4uebLN4VjeBUruSexdfE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=qx6dLuxB; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="qx6dLuxB" Received: by smtp.kernel.org (Postfix) with ESMTPSA id E6DA4C4CEEA; Wed, 30 Apr 2025 20:55:50 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1746046551; bh=v2DS2F47cm5vHlwHkpeETqDniZGIwCDv5zjC3kDKgeM=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=qx6dLuxBAXI3qKfUdDyf+iSTI8EAKyxhhb7awzsGRy/VLL62/T0isaigpVSY6A7If 0YRxmJPVA+kEDLPLWaXB69YacZUnP+HCV8MgAx9axW2DmI91h+kXvBdqrzvbW2R6lk 5wdnidaU2v4KfI4w/OsVl4u9LhTqxy5I00sSJluwVtmjLqupHnNWrddxHpypIFrnr/ z1UeOHn/hT5uFtA8x6v2L+ATgIFs75Awg2uAI9klOxMCjEAzw6uv+daJhHyelhUsaJ BwWJXHRhgNOq2pC0Uj4MObfRvFTKYv5+6CCgRoWo326WAtfMLd1tpYxZhED0MH3KoN cff741D3kAzqQ== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Ravi Bangoria , Leo Yan Subject: [PATCH 02/11] perf record: Add --sample-mem-info option Date: Wed, 30 Apr 2025 13:55:39 -0700 Message-ID: <20250430205548.789750-3-namhyung@kernel.org> X-Mailer: git-send-email 2.49.0.906.g1f30a19c02-goog In-Reply-To: <20250430205548.789750-1-namhyung@kernel.org> References: <20250430205548.789750-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" There's no way to enable PERF_SAMPLE_DATA_SRC without PERF_SAMPLE_ADDR which brings a lot of overhead due to the number of MMAP[2] records. Let's add a new option to enable this information separately. Signed-off-by: Namhyung Kim --- tools/perf/Documentation/perf-record.txt | 7 ++++++- tools/perf/builtin-record.c | 6 ++++++ tools/perf/util/evsel.c | 2 +- tools/perf/util/record.h | 1 + 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Document= ation/perf-record.txt index c7fc1ba265e2755d..c59f1e79f2b4a6f8 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -340,7 +340,7 @@ OPTIONS =20 -d:: --data:: - Record the sample virtual addresses. + Record the sample virtual addresses. Implies --sample-mem-info. =20 --phys-data:: Record the sample physical addresses. @@ -368,6 +368,11 @@ OPTIONS the sample_type member of the struct perf_event_attr argument to the perf_event_open system call. =20 +--sample-mem-info:: + Record the sample data source information for memory operations. + It requires hardware supports and may work on specific events only. + Please consider using 'perf mem record' instead if you're not sure. + -n:: --no-samples:: Don't sample. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ba20bf7c011d7765..6637a3acb1f1295f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -3436,6 +3436,8 @@ static struct option __record_options[] =3D { "Record the sampled data address data page size"), OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, "Record the sampled code address (ip) page size"), + OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src, + "Record the data source for memory operations"), OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample = cpu"), OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, "Record the sample identifier"), @@ -4130,6 +4132,10 @@ int cmd_record(int argc, const char **argv) goto out_opts; } =20 + /* For backward compatibility, -d implies --mem-info */ + if (rec->opts.sample_address) + rec->opts.sample_data_src =3D true; + /* * Allow aliases to facilitate the lookup of symbols for address * filters. Refer to auxtrace_parse_filters(). diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1d79ffecd41f10ec..0f86df259c822799 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1425,7 +1425,7 @@ void evsel__config(struct evsel *evsel, struct record= _opts *opts, evsel__set_sample_bit(evsel, CPU); } =20 - if (opts->sample_address) + if (opts->sample_data_src) evsel__set_sample_bit(evsel, DATA_SRC); =20 if (opts->sample_phys_addr) diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index a6566134e09e5b19..f1956c4db3195070 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -28,6 +28,7 @@ struct record_opts { bool sample_time_set; bool sample_cpu; bool sample_identifier; + bool sample_data_src; bool period; bool period_set; bool running_time; --=20 2.49.0.906.g1f30a19c02-goog From nobody Fri Dec 19 06:32:44 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BFD0B2D0AD6; Wed, 30 Apr 2025 20:55:52 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046552; cv=none; b=cIQ/SLD8kqtBH5CPFQ1YHZJ5xWdxFZ9lPgiFxa8mU6oSiWwyNjacml9HWd1t2A1Fu8t+Eg8cNPoacoc6V8vm/JX0EnCVYFCuD7p1RUS/TfNOGQjFbwFjJvqSUyR2KP5hYbUKvAXZSViObHTKWfjoPzyXz/hxIIvz+ufSbdThoXM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046552; c=relaxed/simple; bh=L4TzTyAJccQj99m5SJjBS6xHZW5xZZiPDpLmCh83ayg=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=PTNHNcqLNGKTV1eLFexAAt0fI4RccLb/i/V5vC7ZPmlYxw0zj3bXZvwNSgdKnG1T/R3yCm0qS8Dl07GQ9AxLDk2NLBSxxo55SOflPRTnaA/Ffkee8Ljsy8HIg3dNvVzV/y/BueeT5wpjxp2NgYYBZo7Iol44pL6t9nPdcQW0fA8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=lKhr351z; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="lKhr351z" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 9CD51C4CEF3; Wed, 30 Apr 2025 20:55:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1746046552; bh=L4TzTyAJccQj99m5SJjBS6xHZW5xZZiPDpLmCh83ayg=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=lKhr351z4IH8+s7+u9gilJe7pTmsA5NwuVovzJhVKcKGMKyQuDIuL/glN8uUtk7PN c1uwKmeSJQUXQpP3GFeL1xYpRe2uZH+NevktJgM7/qf8GDjqunyTjfdAjGs3v+yhUx ReHWYJJJQ5V9rwrDN7IKbNEsFHCiBTb+Z1YQrPAUvgXtBw8S0n3VZWP0dAHtk/Hwkl C3OfZSlZ8kVq6fsTYxZlZrkfLfyVcp/zRrXUsfXhUP7HI3TARvcBepWqw5vmzsQyse kuy4trWuVApDH7LM+ocLyhDw969hUHHe4DZUPpFWrP6TXhcYIeQdCk85MEQ3jqBCRQ 4ekO3NcdS3GIQ== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Ravi Bangoria , Leo Yan Subject: [PATCH 03/11] perf hist: Support multi-line header Date: Wed, 30 Apr 2025 13:55:40 -0700 Message-ID: <20250430205548.789750-4-namhyung@kernel.org> X-Mailer: git-send-email 2.49.0.906.g1f30a19c02-goog In-Reply-To: <20250430205548.789750-1-namhyung@kernel.org> References: <20250430205548.789750-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" This is a preparation to support multi-line headers in perf mem report. Normal sort keys and output fields that don't have contents for multi- line will print the header string at the last line only. As we don't use multi-line headers normally, it should not have any changes in the output. Signed-off-by: Namhyung Kim --- tools/perf/ui/browsers/hists.c | 24 +++++++++----- tools/perf/ui/hist.c | 9 ++++-- tools/perf/ui/stdio/hist.c | 57 +++++++++++++++++++++------------- tools/perf/util/sort.c | 8 +++-- 4 files changed, 64 insertions(+), 34 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index cf022e92d06b9b28..67cbdec90d0bf0ea 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1686,7 +1686,8 @@ hists_browser__scnprintf_headers(struct hist_browser = *browser, char *buf, return ret; } =20 -static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser = *browser, char *buf, size_t size) +static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser = *browser, + char *buf, size_t size, int line) { struct hists *hists =3D browser->hists; struct perf_hpp dummy_hpp =3D { @@ -1712,7 +1713,7 @@ static int hists_browser__scnprintf_hierarchy_headers= (struct hist_browser *brows if (column++ < browser->b.horiz_scroll) continue; =20 - ret =3D fmt->header(fmt, &dummy_hpp, hists, 0, NULL); + ret =3D fmt->header(fmt, &dummy_hpp, hists, line, NULL); if (advance_hpp_check(&dummy_hpp, ret)) break; =20 @@ -1723,6 +1724,9 @@ static int hists_browser__scnprintf_hierarchy_headers= (struct hist_browser *brows first_node =3D false; } =20 + if (line < hists->hpp_list->nr_header_lines - 1) + return ret; + if (!first_node) { ret =3D scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s", indent * HIERARCHY_INDENT, ""); @@ -1753,7 +1757,7 @@ static int hists_browser__scnprintf_hierarchy_headers= (struct hist_browser *brows } first_col =3D false; =20 - ret =3D fmt->header(fmt, &dummy_hpp, hists, 0, NULL); + ret =3D fmt->header(fmt, &dummy_hpp, hists, line, NULL); dummy_hpp.buf[ret] =3D '\0'; =20 start =3D strim(dummy_hpp.buf); @@ -1772,14 +1776,18 @@ static int hists_browser__scnprintf_hierarchy_heade= rs(struct hist_browser *brows =20 static void hists_browser__hierarchy_headers(struct hist_browser *browser) { + struct perf_hpp_list *hpp_list =3D browser->hists->hpp_list; char headers[1024]; + int line; =20 - hists_browser__scnprintf_hierarchy_headers(browser, headers, - sizeof(headers)); + for (line =3D 0; line < hpp_list->nr_header_lines; line++) { + hists_browser__scnprintf_hierarchy_headers(browser, headers, + sizeof(headers), line); =20 - ui_browser__gotorc_title(&browser->b, 0, 0); - ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); - ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1); + ui_browser__gotorc_title(&browser->b, line, 0); + ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); + ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1); + } } =20 static void hists_browser__headers(struct hist_browser *browser) diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index bc0689fceeb18bde..ec44633207aa3aba 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -321,11 +321,16 @@ static int hpp__width_fn(struct perf_hpp_fmt *fmt, } =20 static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, - struct hists *hists, int line __maybe_unused, + struct hists *hists, int line, int *span __maybe_unused) { int len =3D hpp__width_fn(fmt, hpp, hists); - return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name); + const char *hdr =3D ""; + + if (line =3D=3D hists->hpp_list->nr_header_lines - 1) + hdr =3D fmt->name; + + return scnprintf(hpp->buf, hpp->size, "%*s", len, hdr); } =20 int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...) diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 7ac4b98e28bca82e..8c4c8925df2c22fc 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -643,45 +643,58 @@ static int hists__fprintf_hierarchy_headers(struct hi= sts *hists, unsigned header_width =3D 0; struct perf_hpp_fmt *fmt; struct perf_hpp_list_node *fmt_node; + struct perf_hpp_list *hpp_list =3D hists->hpp_list; const char *sep =3D symbol_conf.field_sep; =20 indent =3D hists->nr_hpp_node; =20 - /* preserve max indent depth for column headers */ - print_hierarchy_indent(sep, indent, " ", fp); - /* the first hpp_list_node is for overhead columns */ fmt_node =3D list_first_entry(&hists->hpp_formats, struct perf_hpp_list_node, list); =20 - perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { - fmt->header(fmt, hpp, hists, 0, NULL); - fprintf(fp, "%s%s", hpp->buf, sep ?: " "); - } + for (int line =3D 0; line < hpp_list->nr_header_lines; line++) { + /* first # is displayed one level up */ + if (line) + fprintf(fp, "# "); =20 - /* combine sort headers with ' / ' */ - first_node =3D true; - list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { - if (!first_node) - header_width +=3D fprintf(fp, " / "); - first_node =3D false; + /* preserve max indent depth for column headers */ + print_hierarchy_indent(sep, indent, " ", fp); =20 - first_col =3D true; perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { - if (perf_hpp__should_skip(fmt, hists)) - continue; + fmt->header(fmt, hpp, hists, line, NULL); + fprintf(fp, "%s%s", hpp->buf, sep ?: " "); + } =20 - if (!first_col) - header_width +=3D fprintf(fp, "+"); - first_col =3D false; + if (line < hpp_list->nr_header_lines - 1) + goto next_line; + + /* combine sort headers with ' / ' */ + first_node =3D true; + list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { + if (!first_node) + header_width +=3D fprintf(fp, " / "); + first_node =3D false; =20 - fmt->header(fmt, hpp, hists, 0, NULL); + first_col =3D true; + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (perf_hpp__should_skip(fmt, hists)) + continue; =20 - header_width +=3D fprintf(fp, "%s", strim(hpp->buf)); + if (!first_col) + header_width +=3D fprintf(fp, "+"); + first_col =3D false; + + fmt->header(fmt, hpp, hists, line, NULL); + + header_width +=3D fprintf(fp, "%s", strim(hpp->buf)); + } } + +next_line: + fprintf(fp, "\n"); } =20 - fprintf(fp, "\n# "); + fprintf(fp, "# "); =20 /* preserve max indent depth for initial dots */ print_hierarchy_indent(sep, indent, dots, fp); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 594b75ca95bf72b2..ae8b8ceb82f3d00b 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2641,18 +2641,22 @@ void perf_hpp__reset_sort_width(struct perf_hpp_fmt= *fmt, struct hists *hists) } =20 static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *h= pp, - struct hists *hists, int line __maybe_unused, + struct hists *hists, int line, int *span __maybe_unused) { struct hpp_sort_entry *hse; size_t len =3D fmt->user_len; + const char *hdr =3D ""; + + if (line =3D=3D hists->hpp_list->nr_header_lines - 1) + hdr =3D fmt->name; =20 hse =3D container_of(fmt, struct hpp_sort_entry, hpp); =20 if (!len) len =3D hists__col_len(hists, hse->se->se_width_idx); =20 - return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, fmt->name); + return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, hdr); } =20 static int __sort__hpp_width(struct perf_hpp_fmt *fmt, --=20 2.49.0.906.g1f30a19c02-goog From nobody Fri Dec 19 06:32:44 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5BD682D1102; Wed, 30 Apr 2025 20:55:52 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046553; cv=none; b=orwyO7/4cCwLRm//o1vIrLSgzU1uskm9/uzP3Rva+hkSvzelpRWeWeFcqP9wHuPoD9Zn+NjOPuETW9x8x+tlJbC4m8U53vRK4NiUBn2WJj1s0HbLinfOILWNn9YC+2ZBfgQpxjXiElruTLA8DkT5Z0kBKKAecxEA1GUbqL0dTuY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046553; c=relaxed/simple; bh=Od313OK1KaRM9Dc3kB1+B0iEWriL30mnhLfzK6N9vII=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=oxpC1RTacIvGHAZOS7S/sb0ZMhkubbsulmcNKSAXk2TRkUW3dRXj6Otx2ArEGgLrKDcQ3PNE1T1CLifMa4L5Yzb3Or+NB3K2etRtBvXKGJuu2YieYhQXelH2XwEYokSO1Vk1BmaKPYq55njrcTQzcYs1BE5SffnJ+Y/aX6rCglg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=fP5u5fPu; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="fP5u5fPu" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 51F70C4CEEC; Wed, 30 Apr 2025 20:55:52 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1746046552; bh=Od313OK1KaRM9Dc3kB1+B0iEWriL30mnhLfzK6N9vII=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=fP5u5fPuoRUawWmxv8/rx3GO0pbZNX0bbxtGLVnBUuQ8itZ3s/yb0xyUHx7EgEkam QfJw1AaeaZR+uSiSVvvnu26u6cp1ExObdUq1D8HyfxFLATDUVYmnHzd+SQ7r1nA/E7 TVviNngR9xlN2B/1oe6BAPhr8QzssBjPTr9TJilrgqFDXWP2jry27Z8KW1uYAX+nFh PQ/Z7p5XgXfCGaV2tObPD2p+rtBvGx8src9z8nsSMUdMafxO+LmdUgNlWU5bHaw3E+ v8BmE/esYoZoCGX/FMkRKq0IR/IKVqdny8eHsN/18/DU/8Y9O6Fm3YyANFtP2koh53 Z9rPMYZJ0tLHw== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Ravi Bangoria , Leo Yan Subject: [PATCH 04/11] perf hist: Add struct he_mem_stat Date: Wed, 30 Apr 2025 13:55:41 -0700 Message-ID: <20250430205548.789750-5-namhyung@kernel.org> X-Mailer: git-send-email 2.49.0.906.g1f30a19c02-goog In-Reply-To: <20250430205548.789750-1-namhyung@kernel.org> References: <20250430205548.789750-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The struct he_mem_stat is to save detailed information about memory instruction. It'll be used to show breakdown of various data from PERF_SAMPLE_DATA_SRC. Note that this structure is generic and the contents will be different depending on actual data it'll use later. The information about the actual data will be saved in struct hists and its length is in nr_mem_stats. This commit just adds ground works and does nothing since hists->nr_mem_stats is 0 for now. Signed-off-by: Namhyung Kim --- tools/perf/util/hist.c | 74 ++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/hist.h | 9 +++++ 2 files changed, 83 insertions(+) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index d65228c1141251fb..fcb9f0db0c92a229 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -336,6 +336,67 @@ static void he_stat__decay(struct he_stat *he_stat) he_stat->latency =3D (he_stat->latency * 7) / 8; } =20 +static int hists__update_mem_stat(struct hists *hists, struct hist_entry *= he, + struct mem_info *mi, u64 period) +{ + if (hists->nr_mem_stats =3D=3D 0) + return 0; + + if (he->mem_stat =3D=3D NULL) { + he->mem_stat =3D calloc(hists->nr_mem_stats, sizeof(*he->mem_stat)); + if (he->mem_stat =3D=3D NULL) + return -1; + } + + for (int i =3D 0; i < hists->nr_mem_stats; i++) { + int idx =3D 0; /* TODO: get correct index from mem info */ + + (void)mi; + he->mem_stat[i].entries[idx] +=3D period; + } + return 0; +} + +static void hists__add_mem_stat(struct hists *hists, struct hist_entry *ds= t, + struct hist_entry *src) +{ + if (hists->nr_mem_stats =3D=3D 0) + return; + + for (int i =3D 0; i < hists->nr_mem_stats; i++) { + for (int k =3D 0; k < MEM_STAT_LEN; k++) + dst->mem_stat[i].entries[k] +=3D src->mem_stat[i].entries[k]; + } +} + +static int hists__clone_mem_stat(struct hists *hists, struct hist_entry *d= st, + struct hist_entry *src) +{ + if (hists->nr_mem_stats =3D=3D 0) + return 0; + + dst->mem_stat =3D calloc(hists->nr_mem_stats, sizeof(*dst->mem_stat)); + if (dst->mem_stat =3D=3D NULL) + return -1; + + for (int i =3D 0; i < hists->nr_mem_stats; i++) { + for (int k =3D 0; k < MEM_STAT_LEN; k++) + dst->mem_stat[i].entries[k] =3D src->mem_stat[i].entries[k]; + } + return 0; +} + +static void hists__decay_mem_stat(struct hists *hists, struct hist_entry *= he) +{ + if (hists->nr_mem_stats =3D=3D 0) + return; + + for (int i =3D 0; i < hists->nr_mem_stats; i++) { + for (int k =3D 0; k < MEM_STAT_LEN; k++) + he->mem_stat[i].entries[k] =3D (he->mem_stat[i].entries[k] * 7) / 8; + } +} + static void hists__delete_entry(struct hists *hists, struct hist_entry *he= ); =20 static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) @@ -350,6 +411,7 @@ static bool hists__decay_entry(struct hists *hists, str= uct hist_entry *he) if (symbol_conf.cumulate_callchain) he_stat__decay(he->stat_acc); decay_callchain(he->callchain); + hists__decay_mem_stat(hists, he); =20 if (!he->depth) { u64 period_diff =3D prev_period - he->stat.period; @@ -693,6 +755,10 @@ static struct hist_entry *hists__findnew_entry(struct = hists *hists, he_stat__add_cpumode_period(&he->stat, al->cpumode, period); if (symbol_conf.cumulate_callchain) he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period); + if (hists__update_mem_stat(hists, he, entry->mem_info, period) < 0) { + hist_entry__delete(he); + return NULL; + } return he; } =20 @@ -1423,6 +1489,7 @@ void hist_entry__delete(struct hist_entry *he) free_callchain(he->callchain); zfree(&he->trace_output); zfree(&he->raw_data); + zfree(&he->mem_stat); ops->free(he); } =20 @@ -1572,6 +1639,7 @@ static struct hist_entry *hierarchy_insert_entry(stru= ct hists *hists, cmp =3D hist_entry__collapse_hierarchy(hpp_list, iter, he); if (!cmp) { he_stat__add_stat(&iter->stat, &he->stat); + hists__add_mem_stat(hists, iter, he); return iter; } =20 @@ -1613,6 +1681,11 @@ static struct hist_entry *hierarchy_insert_entry(str= uct hists *hists, new->srcfile =3D NULL; } =20 + if (hists__clone_mem_stat(hists, new, he) < 0) { + hist_entry__delete(new); + return NULL; + } + rb_link_node(&new->rb_node_in, parent, p); rb_insert_color_cached(&new->rb_node_in, root, leftmost); return new; @@ -1695,6 +1768,7 @@ static int hists__collapse_insert_entry(struct hists = *hists, he_stat__add_stat(&iter->stat, &he->stat); if (symbol_conf.cumulate_callchain) he_stat__add_stat(iter->stat_acc, he->stat_acc); + hists__add_mem_stat(hists, iter, he); =20 if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) { struct callchain_cursor *cursor =3D get_tls_callchain_cursor(); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 76efd8952507a561..aba1d84ca074f27b 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -100,6 +100,13 @@ enum hist_column { struct thread; struct dso; =20 +#define MEM_STAT_LEN 8 + +struct he_mem_stat { + /* meaning of entries depends on enum mem_stat_type */ + u64 entries[MEM_STAT_LEN]; +}; + struct hists { struct rb_root_cached entries_in_array[2]; struct rb_root_cached *entries_in; @@ -125,6 +132,7 @@ struct hists { struct perf_hpp_list *hpp_list; struct list_head hpp_formats; int nr_hpp_node; + int nr_mem_stats; }; =20 #define hists__has(__h, __f) (__h)->hpp_list->__f @@ -232,6 +240,7 @@ struct hist_entry { } pairs; struct he_stat stat; struct he_stat *stat_acc; + struct he_mem_stat *mem_stat; struct map_symbol ms; struct thread *thread; struct comm *comm; --=20 2.49.0.906.g1f30a19c02-goog From nobody Fri Dec 19 06:32:44 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B55382D1111; Wed, 30 Apr 2025 20:55:53 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046553; cv=none; b=asnHo31EcKIC/kNvLBoXvUYWsKJLHvqnJN73bvZPn+1ZSwHlO28H+VuXPEIGIl7+EbqJpBUweo3J7As7lGSdWqTKyK/Ue93KtFb7yjDUlASR/98V0GgTrD8BkPuYm0o/MwepXuv2RYrBttu/klgqtNojoAthO0gktKRq/R4BQ7c= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046553; c=relaxed/simple; bh=7sXQXwPPQhlYweuVtPWicgFAno3L7sNHip2o4AI7mXc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=vAR/gfh5E1GAeKdgeP/04lxKi/RwwHOFQvmFO7NvU1gvp/a9T89zhhc6+uElg8Ch8fCgRjYE2aCDCZbm0ugC5LccyfFv2BTmrAm0CpcLBd1b90EXw32ilqD1PzyZpfuqq9B6LPr//Pk4lwIuqA22zdVreyxS3OXscFCXkYQ3NG8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=RMcOS4AD; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="RMcOS4AD" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 02291C4CEEA; Wed, 30 Apr 2025 20:55:52 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1746046553; bh=7sXQXwPPQhlYweuVtPWicgFAno3L7sNHip2o4AI7mXc=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=RMcOS4AD+GQlbBi2e2q67OHhvg/felLZ/Et4kTFhrsq1lkA2vS1vLhTEUmt3mFKwg LKhF+TFPtY4gGLq2Z+ZDrP8NFxMATdrgqfoUNNDKSHBFpuXQEpORUU2FbinQCjX4rT qZvLIwgg2uSqpo1IuJwj0rlhDadPKv+mcL0BM0/EWc4fwkG44IlwY/R0vSmq3f0Aiq ipzzWu/pLmD0VeZfIGJtUxUjfonzg9WAVTgAGFXjLu/mEPHEmt7RJyeSTQ3JE+SuhH M2D4lQGq+75B1TZhfx9bWLAF3meDrRyVIbKzXixYeCbHhwfo/ojB2ph1RTS4/I844V G1VXJGqgQfEOw== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Ravi Bangoria , Leo Yan Subject: [PATCH 05/11] perf hist: Basic support for mem_stat accounting Date: Wed, 30 Apr 2025 13:55:42 -0700 Message-ID: <20250430205548.789750-6-namhyung@kernel.org> X-Mailer: git-send-email 2.49.0.906.g1f30a19c02-goog In-Reply-To: <20250430205548.789750-1-namhyung@kernel.org> References: <20250430205548.789750-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a logic to account he->mem_stat based on mem_stat_type in hists. Each mem_stat entry will have different meaning based on the type so the index in the array is calculated at runtime using the corresponding value in the sample.data_src. Still hists has no mem_stat_types yet so this code won't work for now. Later hists->mem_stat_types will be allocated based on what users want in the output actually. Signed-off-by: Namhyung Kim --- tools/perf/ui/hist.c | 39 ++++++++++++++++++++++++++++++++++++ tools/perf/util/hist.c | 6 ++++-- tools/perf/util/hist.h | 4 ++++ tools/perf/util/mem-events.c | 18 +++++++++++++++++ tools/perf/util/mem-events.h | 6 ++++++ tools/perf/util/sort.c | 4 ++++ 6 files changed, 75 insertions(+), 2 deletions(-) diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index ec44633207aa3aba..2aad46bbd2ed4d93 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -11,6 +11,7 @@ #include "../util/sort.h" #include "../util/evsel.h" #include "../util/evlist.h" +#include "../util/mem-events.h" #include "../util/thread.h" #include "../util/util.h" =20 @@ -500,6 +501,12 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt _= _maybe_unused, return 0; } =20 +static bool perf_hpp__is_mem_stat_entry(struct perf_hpp_fmt *fmt) +{ + (void)fmt; + return false; +} + static bool perf_hpp__is_hpp_entry(struct perf_hpp_fmt *a) { return a->header =3D=3D hpp__header_fn; @@ -1022,3 +1029,35 @@ int perf_hpp__setup_hists_formats(struct perf_hpp_li= st *list, =20 return 0; } + +int perf_hpp__alloc_mem_stats(struct perf_hpp_list *list, struct evlist *e= vlist) +{ + struct perf_hpp_fmt *fmt; + struct evsel *evsel; + enum mem_stat_type mst[16]; + unsigned nr_mem_stats =3D 0; + + perf_hpp_list__for_each_format(list, fmt) { + if (!perf_hpp__is_mem_stat_entry(fmt)) + continue; + + assert(nr_mem_stats < ARRAY_SIZE(mst)); + mst[nr_mem_stats++] =3D PERF_MEM_STAT_UNKNOWN; + } + + if (nr_mem_stats =3D=3D 0) + return 0; + + evlist__for_each_entry(evlist, evsel) { + struct hists *hists =3D evsel__hists(evsel); + + hists->mem_stat_types =3D calloc(nr_mem_stats, + sizeof(*hists->mem_stat_types)); + if (hists->mem_stat_types =3D=3D NULL) + return -ENOMEM; + + memcpy(hists->mem_stat_types, mst, nr_mem_stats * sizeof(*mst)); + hists->nr_mem_stats =3D nr_mem_stats; + } + return 0; +} diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index fcb9f0db0c92a229..7759c1818c1ad168 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -349,9 +349,10 @@ static int hists__update_mem_stat(struct hists *hists,= struct hist_entry *he, } =20 for (int i =3D 0; i < hists->nr_mem_stats; i++) { - int idx =3D 0; /* TODO: get correct index from mem info */ + int idx =3D mem_stat_index(hists->mem_stat_types[i], + mem_info__const_data_src(mi)->val); =20 - (void)mi; + assert(0 <=3D idx && idx < MEM_STAT_LEN); he->mem_stat[i].entries[idx] +=3D period; } return 0; @@ -3052,6 +3053,7 @@ static void hists_evsel__exit(struct evsel *evsel) struct perf_hpp_list_node *node, *tmp; =20 hists__delete_all_entries(hists); + zfree(&hists->mem_stat_types); =20 list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) { perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index aba1d84ca074f27b..509af09691b84e10 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -9,6 +9,7 @@ #include "events_stats.h" #include "evsel.h" #include "map_symbol.h" +#include "mem-events.h" #include "mutex.h" #include "sample.h" #include "spark.h" @@ -133,6 +134,7 @@ struct hists { struct list_head hpp_formats; int nr_hpp_node; int nr_mem_stats; + enum mem_stat_type *mem_stat_types; }; =20 #define hists__has(__h, __f) (__h)->hpp_list->__f @@ -597,6 +599,8 @@ void perf_hpp__reset_output_field(struct perf_hpp_list = *list); void perf_hpp__append_sort_keys(struct perf_hpp_list *list); int perf_hpp__setup_hists_formats(struct perf_hpp_list *list, struct evlist *evlist); +int perf_hpp__alloc_mem_stats(struct perf_hpp_list *list, + struct evlist *evlist); =20 =20 bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 884d9aebce9199c0..1bc60ad3dc312542 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -799,3 +799,21 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c= _stats *add) stats->nomap +=3D add->nomap; stats->noparse +=3D add->noparse; } + +/* + * It returns an index in hist_entry->mem_stat array for the given val whi= ch + * represents a data-src based on the mem_stat_type. + * + * For example, when mst is about cache level, the index can be 1 for L1, = 2 for + * L2 and so on. + */ +int mem_stat_index(const enum mem_stat_type mst, const u64 val) +{ + switch (mst) { + case PERF_MEM_STAT_UNKNOWN: /* placeholder */ + default: + break; + } + (void)val; + return -1; +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index a5c19d39ee37147b..2604464f985815f6 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -89,4 +89,10 @@ struct hist_entry; int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi); void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add); =20 +enum mem_stat_type { + PERF_MEM_STAT_UNKNOWN, /* placeholder */ +}; + +int mem_stat_index(const enum mem_stat_type mst, const u64 data_src); + #endif /* __PERF_MEM_EVENTS_H */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index ae8b8ceb82f3d00b..6024f588f66f3156 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -4163,6 +4163,10 @@ int setup_sorting(struct evlist *evlist) if (err < 0) return err; =20 + err =3D perf_hpp__alloc_mem_stats(&perf_hpp_list, evlist); + if (err < 0) + return err; + /* copy sort keys to output fields */ perf_hpp__setup_output_field(&perf_hpp_list); /* and then copy output fields to sort keys */ --=20 2.49.0.906.g1f30a19c02-goog From nobody Fri Dec 19 06:32:44 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D780F2D113A; Wed, 30 Apr 2025 20:55:54 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046555; cv=none; b=ZlPLKT1VcP1bsV79Vb9SCry+GKIpFtpLSQ6IlxjBt0WpNItonOL42MyrI2NVTW9SKTOFXK+pFjQ1YteuIEmUfbYK/mTJiibLEVxLnY3eoWjlVyUCNAggGcw0gLyNeykK1RSuWDz9Etk2w9a4ydjvHXkoAfZbCBAXnzS314w2F60= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046555; c=relaxed/simple; bh=FXs7Rohm0pdrL2NM/hmA4w+IkGo+yWcS9CG3bdF8zvY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=oTygKChqmuMg/aSmUXoHmBeFt0USkuCDfZqQLleXVP+Flkr9a7ssmLqsnZT6uNVPjddEg+qjv4wwrj9Wm9l5rK90zRSxbw2CQ28CWiOwCUj6ehUlEMhviLIVgjOBT3bWPnR2EOWoJ+uf3/P+4wxCHCrNdTbDx6bWfHzIs9K3Ow0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=SV99qIUL; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="SV99qIUL" Received: by smtp.kernel.org (Postfix) with ESMTPSA id A6F4EC4CEEE; Wed, 30 Apr 2025 20:55:53 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1746046554; bh=FXs7Rohm0pdrL2NM/hmA4w+IkGo+yWcS9CG3bdF8zvY=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=SV99qIULKxts2NspPjNzYzoV2ZXwjzYgIgjjHKHrSOqPSrUaZVwXcZf1RDRH7mbcG kIecp0fAaNmzs/SHYlolFRphUZCebfjmr/gyOtWiCT5eClcyOMJ9A2u3YBtMiADIH+ 7aliaEvVqZhBDfmsW5RcJl7DNwT58V1QJzuo4f7RY4XgsDszhYUyXrUzxEI8JgHe5L XKeouwjkc19JmNHFu7TVthuzIo2fFvizeNx37x2NYDEgD6YJEQJzTr8q6QXHkQbXPu SyEflHCcCbVN1i3YYEEMx/pXccvbdolwahv2tzMMr3WPIcmPo/EkqK+BAWOW1yELBL xVEWgJewqEjfw== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Ravi Bangoria , Leo Yan Subject: [PATCH 06/11] perf hist: Implement output fields for mem stats Date: Wed, 30 Apr 2025 13:55:43 -0700 Message-ID: <20250430205548.789750-7-namhyung@kernel.org> X-Mailer: git-send-email 2.49.0.906.g1f30a19c02-goog In-Reply-To: <20250430205548.789750-1-namhyung@kernel.org> References: <20250430205548.789750-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" This is a preparation for later changes to support mem_stat output. The new fields will need two lines for the header - the first line will show type of mem stat and the second line will show the name of each item which is returned by mem_stat_name(). Each element in the mem_stat array will be printed in percentage for the hist_entry and their sum would be 100%. Add new output field dimension only for SORT_MODE__MEM using mem_stat. To handle possible name conflict with existing sort keys, move the order of checking output field dimensions after the sort dimensions when it looks for sort keys. Signed-off-by: Namhyung Kim --- tools/perf/ui/browsers/hists.c | 11 +++ tools/perf/ui/hist.c | 158 ++++++++++++++++++++++++++++++++- tools/perf/util/hist.h | 4 + tools/perf/util/mem-events.c | 12 +++ tools/perf/util/mem-events.h | 3 + tools/perf/util/sort.c | 26 ++++-- 6 files changed, 202 insertions(+), 12 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 67cbdec90d0bf0ea..f6ab1310a0bdd6c4 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1266,6 +1266,16 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt = *fmt, \ _fmttype); \ } =20 +#define __HPP_COLOR_MEM_STAT_FN(_name, _type) \ +static int \ +hist_browser__hpp_color_mem_stat_##_name(struct perf_hpp_fmt *fmt, \ + struct perf_hpp *hpp, \ + struct hist_entry *he) \ +{ \ + return hpp__fmt_mem_stat(fmt, hpp, he, PERF_MEM_STAT_##_type, \ + " %5.1f%%", __hpp__slsmg_color_printf);\ +} + __HPP_COLOR_PERCENT_FN(overhead, period, PERF_HPP_FMT_TYPE__PERCENT) __HPP_COLOR_PERCENT_FN(latency, latency, PERF_HPP_FMT_TYPE__LATENCY) __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys, PERF_HPP_FMT_TYPE__PERCEN= T) @@ -1277,6 +1287,7 @@ __HPP_COLOR_ACC_PERCENT_FN(latency_acc, latency, PERF= _HPP_FMT_TYPE__LATENCY) =20 #undef __HPP_COLOR_PERCENT_FN #undef __HPP_COLOR_ACC_PERCENT_FN +#undef __HPP_COLOR_MEM_STAT_FN =20 void hist_browser__init_hpp(void) { diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 2aad46bbd2ed4d93..2a5c9f2b328b2c5c 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -12,6 +12,7 @@ #include "../util/evsel.h" #include "../util/evlist.h" #include "../util/mem-events.h" +#include "../util/string2.h" #include "../util/thread.h" #include "../util/util.h" =20 @@ -151,6 +152,45 @@ int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf= _hpp *hpp, return hpp__fmt(fmt, hpp, he, get_field, fmtstr, print_fn, fmtype); } =20 +int hpp__fmt_mem_stat(struct perf_hpp_fmt *fmt __maybe_unused, struct perf= _hpp *hpp, + struct hist_entry *he, enum mem_stat_type mst, + const char *fmtstr, hpp_snprint_fn print_fn) +{ + struct hists *hists =3D he->hists; + int mem_stat_idx =3D -1; + char *buf =3D hpp->buf; + size_t size =3D hpp->size; + u64 total =3D 0; + int ret =3D 0; + + for (int i =3D 0; i < hists->nr_mem_stats; i++) { + if (hists->mem_stat_types[i] =3D=3D mst) { + mem_stat_idx =3D i; + break; + } + } + assert(mem_stat_idx !=3D -1); + + for (int i =3D 0; i < MEM_STAT_LEN; i++) + total +=3D he->mem_stat[mem_stat_idx].entries[i]; + assert(total !=3D 0); + + for (int i =3D 0; i < MEM_STAT_LEN; i++) { + u64 val =3D he->mem_stat[mem_stat_idx].entries[i]; + + ret +=3D hpp__call_print_fn(hpp, print_fn, fmtstr, 100.0 * val / total); + } + + /* + * Restore original buf and size as it's where caller expects + * the result will be saved. + */ + hpp->buf =3D buf; + hpp->size =3D size; + + return ret; +} + static int field_cmp(u64 field_a, u64 field_b) { if (field_a > field_b) @@ -295,6 +335,23 @@ static int __hpp__sort_acc(struct hist_entry *a, struc= t hist_entry *b, return ret; } =20 +static bool perf_hpp__is_mem_stat_entry(struct perf_hpp_fmt *fmt); + +static enum mem_stat_type hpp__mem_stat_type(struct perf_hpp_fmt *fmt) +{ + if (!perf_hpp__is_mem_stat_entry(fmt)) + return -1; + + pr_debug("Should not reach here\n"); + return -1; +} + +static int64_t hpp__sort_mem_stat(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *a, struct hist_entry *b) +{ + return a->stat.period - b->stat.period; +} + static int hpp__width_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp __maybe_unused, struct hists *hists) @@ -334,6 +391,45 @@ static int hpp__header_fn(struct perf_hpp_fmt *fmt, st= ruct perf_hpp *hpp, return scnprintf(hpp->buf, hpp->size, "%*s", len, hdr); } =20 +static int hpp__header_mem_stat_fn(struct perf_hpp_fmt *fmt, struct perf_h= pp *hpp, + struct hists *hists, int line, + int *span __maybe_unused) +{ + char *buf =3D hpp->buf; + int ret =3D 0; + int len; + enum mem_stat_type mst =3D hpp__mem_stat_type(fmt); + + (void)hists; + if (line =3D=3D 0) { + int left, right; + + len =3D fmt->len; + left =3D (len - strlen(fmt->name)) / 2 - 1; + right =3D len - left - strlen(fmt->name) - 2; + + if (left < 0) + left =3D 0; + if (right < 0) + right =3D 0; + + return scnprintf(hpp->buf, hpp->size, "%.*s %s %.*s", + left, graph_dotted_line, fmt->name, right, graph_dotted_line); + } + + len =3D hpp->size; + for (int i =3D 0; i < MEM_STAT_LEN; i++) { + int printed; + + printed =3D scnprintf(buf, len, "%*s", MEM_STAT_PRINT_LEN, + mem_stat_name(mst, i)); + ret +=3D printed; + buf +=3D printed; + len -=3D printed; + } + return ret; +} + int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...) { va_list args; @@ -459,6 +555,23 @@ static int64_t hpp__sort_##_type(struct perf_hpp_fmt *= fmt __maybe_unused, \ return __hpp__sort(a, b, he_get_##_field); \ } =20 +#define __HPP_COLOR_MEM_STAT_FN(_name, _type) \ +static int hpp__color_mem_stat_##_name(struct perf_hpp_fmt *fmt, \ + struct perf_hpp *hpp, \ + struct hist_entry *he) \ +{ \ + return hpp__fmt_mem_stat(fmt, hpp, he, PERF_MEM_STAT_##_type, \ + " %5.1f%%", hpp_color_scnprintf); \ +} + +#define __HPP_ENTRY_MEM_STAT_FN(_name, _type) \ +static int hpp__entry_mem_stat_##_name(struct perf_hpp_fmt *fmt, \ + struct perf_hpp *hpp, \ + struct hist_entry *he) \ +{ \ + return hpp__fmt_mem_stat(fmt, hpp, he, PERF_MEM_STAT_##_type, \ + " %5.1f%%", hpp_entry_scnprintf); \ +} =20 #define HPP_PERCENT_FNS(_type, _field, _fmttype) \ __HPP_COLOR_PERCENT_FN(_type, _field, _fmttype) \ @@ -478,6 +591,10 @@ __HPP_SORT_RAW_FN(_type, _field) __HPP_ENTRY_AVERAGE_FN(_type, _field) \ __HPP_SORT_AVERAGE_FN(_type, _field) =20 +#define HPP_MEM_STAT_FNS(_name, _type) \ +__HPP_COLOR_MEM_STAT_FN(_name, _type) \ +__HPP_ENTRY_MEM_STAT_FN(_name, _type) + HPP_PERCENT_FNS(overhead, period, PERF_HPP_FMT_TYPE__PERCENT) HPP_PERCENT_FNS(latency, latency, PERF_HPP_FMT_TYPE__LATENCY) HPP_PERCENT_FNS(overhead_sys, period_sys, PERF_HPP_FMT_TYPE__PERCENT) @@ -494,6 +611,8 @@ HPP_AVERAGE_FNS(weight1, weight1) HPP_AVERAGE_FNS(weight2, weight2) HPP_AVERAGE_FNS(weight3, weight3) =20 +HPP_MEM_STAT_FNS(unknown, UNKNOWN) /* placeholder */ + static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, struct hist_entry *a __maybe_unused, struct hist_entry *b __maybe_unused) @@ -503,8 +622,7 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __= maybe_unused, =20 static bool perf_hpp__is_mem_stat_entry(struct perf_hpp_fmt *fmt) { - (void)fmt; - return false; + return fmt->sort =3D=3D hpp__sort_mem_stat; } =20 static bool perf_hpp__is_hpp_entry(struct perf_hpp_fmt *a) @@ -520,6 +638,14 @@ static bool hpp__equal(struct perf_hpp_fmt *a, struct = perf_hpp_fmt *b) return a->idx =3D=3D b->idx; } =20 +static bool hpp__equal_mem_stat(struct perf_hpp_fmt *a, struct perf_hpp_fm= t *b) +{ + if (!perf_hpp__is_mem_stat_entry(a) || !perf_hpp__is_mem_stat_entry(b)) + return false; + + return a->entry =3D=3D b->entry; +} + #define HPP__COLOR_PRINT_FNS(_name, _fn, _idx) \ { \ .name =3D _name, \ @@ -561,6 +687,20 @@ static bool hpp__equal(struct perf_hpp_fmt *a, struct = perf_hpp_fmt *b) .equal =3D hpp__equal, \ } =20 +#define HPP__MEM_STAT_PRINT_FNS(_name, _fn, _type) \ + { \ + .name =3D _name, \ + .header =3D hpp__header_mem_stat_fn, \ + .width =3D hpp__width_fn, \ + .color =3D hpp__color_mem_stat_ ## _fn, \ + .entry =3D hpp__entry_mem_stat_ ## _fn, \ + .cmp =3D hpp__nop_cmp, \ + .collapse =3D hpp__nop_cmp, \ + .sort =3D hpp__sort_mem_stat, \ + .idx =3D PERF_HPP__MEM_STAT_ ## _type, \ + .equal =3D hpp__equal_mem_stat, \ + } + struct perf_hpp_fmt perf_hpp__format[] =3D { HPP__COLOR_PRINT_FNS("Overhead", overhead, OVERHEAD), HPP__COLOR_PRINT_FNS("Latency", latency, LATENCY), @@ -575,6 +715,7 @@ struct perf_hpp_fmt perf_hpp__format[] =3D { HPP__PRINT_FNS("Weight1", weight1, WEIGHT1), HPP__PRINT_FNS("Weight2", weight2, WEIGHT2), HPP__PRINT_FNS("Weight3", weight3, WEIGHT3), + HPP__MEM_STAT_PRINT_FNS("Unknown", unknown, UNKNOWN), /* placeholder */ }; =20 struct perf_hpp_list perf_hpp_list =3D { @@ -586,11 +727,13 @@ struct perf_hpp_list perf_hpp_list =3D { #undef HPP__COLOR_PRINT_FNS #undef HPP__COLOR_ACC_PRINT_FNS #undef HPP__PRINT_FNS +#undef HPP__MEM_STAT_PRINT_FNS =20 #undef HPP_PERCENT_FNS #undef HPP_PERCENT_ACC_FNS #undef HPP_RAW_FNS #undef HPP_AVERAGE_FNS +#undef HPP_MEM_STAT_FNS =20 #undef __HPP_HEADER_FN #undef __HPP_WIDTH_FN @@ -600,6 +743,9 @@ struct perf_hpp_list perf_hpp_list =3D { #undef __HPP_ENTRY_ACC_PERCENT_FN #undef __HPP_ENTRY_RAW_FN #undef __HPP_ENTRY_AVERAGE_FN +#undef __HPP_COLOR_MEM_STAT_FN +#undef __HPP_ENTRY_MEM_STAT_FN + #undef __HPP_SORT_FN #undef __HPP_SORT_ACC_FN #undef __HPP_SORT_RAW_FN @@ -924,6 +1070,10 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, = struct hists *hists) fmt->len =3D 8; break; =20 + case PERF_HPP__MEM_STAT_UNKNOWN: /* placeholder */ + fmt->len =3D MEM_STAT_LEN * MEM_STAT_PRINT_LEN; + break; + default: break; } @@ -1042,12 +1192,14 @@ int perf_hpp__alloc_mem_stats(struct perf_hpp_list = *list, struct evlist *evlist) continue; =20 assert(nr_mem_stats < ARRAY_SIZE(mst)); - mst[nr_mem_stats++] =3D PERF_MEM_STAT_UNKNOWN; + mst[nr_mem_stats++] =3D hpp__mem_stat_type(fmt); } =20 if (nr_mem_stats =3D=3D 0) return 0; =20 + list->nr_header_lines =3D 2; + evlist__for_each_entry(evlist, evsel) { struct hists *hists =3D evsel__hists(evsel); =20 diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 509af09691b84e10..18c696d8d568a9fa 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -587,6 +587,7 @@ enum { PERF_HPP__WEIGHT1, PERF_HPP__WEIGHT2, PERF_HPP__WEIGHT3, + PERF_HPP__MEM_STAT_UNKNOWN, /* placeholder */ =20 PERF_HPP__MAX_INDEX }; @@ -656,6 +657,9 @@ int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_= hpp *hpp, struct hist_entry *he, hpp_field_fn get_field, const char *fmtstr, hpp_snprint_fn print_fn, enum perf_hpp_fmt_type fmtype); +int hpp__fmt_mem_stat(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he, enum mem_stat_type mst, + const char *fmtstr, hpp_snprint_fn print_fn); =20 static inline void advance_hpp(struct perf_hpp *hpp, int inc) { diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 1bc60ad3dc312542..a4c1e42de30f8307 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -817,3 +817,15 @@ int mem_stat_index(const enum mem_stat_type mst, const= u64 val) (void)val; return -1; } + +/* To align output, returned string should be shorter than MEM_STAT_PRINT_= LEN */ +const char *mem_stat_name(const enum mem_stat_type mst, const int idx) +{ + switch (mst) { + case PERF_MEM_STAT_UNKNOWN: + default: + break; + } + (void)idx; + return "N/A"; +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 2604464f985815f6..7aeb4c5fefc89698 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -93,6 +93,9 @@ enum mem_stat_type { PERF_MEM_STAT_UNKNOWN, /* placeholder */ }; =20 +#define MEM_STAT_PRINT_LEN 7 /* 1 space + 5 digits + 1 percent sign */ + int mem_stat_index(const enum mem_stat_type mst, const u64 data_src); +const char *mem_stat_name(const enum mem_stat_type mst, const int idx); =20 #endif /* __PERF_MEM_EVENTS_H */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 6024f588f66f3156..7c669ea27af247e5 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2598,9 +2598,11 @@ struct hpp_dimension { struct perf_hpp_fmt *fmt; int taken; int was_taken; + int mem_mode; }; =20 #define DIM(d, n) { .name =3D n, .fmt =3D &perf_hpp__format[d], } +#define DIM_MEM(d, n) { .name =3D n, .fmt =3D &perf_hpp__format[d], .mem_m= ode =3D 1, } =20 static struct hpp_dimension hpp_sort_dimensions[] =3D { DIM(PERF_HPP__OVERHEAD, "overhead"), @@ -2620,8 +2622,11 @@ static struct hpp_dimension hpp_sort_dimensions[] = =3D { DIM(PERF_HPP__WEIGHT2, "ins_lat"), DIM(PERF_HPP__WEIGHT3, "retire_lat"), DIM(PERF_HPP__WEIGHT3, "p_stage_cyc"), + /* used for output only when SORT_MODE__MEM */ + DIM_MEM(PERF_HPP__MEM_STAT_UNKNOWN, "unknown"), /* placeholder */ }; =20 +#undef DIM_MEM #undef DIM =20 struct hpp_sort_entry { @@ -3608,15 +3613,6 @@ int sort_dimension__add(struct perf_hpp_list *list, = const char *tok, return __sort_dimension__add(sd, list, level); } =20 - for (i =3D 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { - struct hpp_dimension *hd =3D &hpp_sort_dimensions[i]; - - if (strncasecmp(tok, hd->name, strlen(tok))) - continue; - - return __hpp_dimension__add(hd, list, level); - } - for (i =3D 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { struct sort_dimension *sd =3D &bstack_sort_dimensions[i]; =20 @@ -3658,6 +3654,15 @@ int sort_dimension__add(struct perf_hpp_list *list, = const char *tok, return 0; } =20 + for (i =3D 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { + struct hpp_dimension *hd =3D &hpp_sort_dimensions[i]; + + if (strncasecmp(tok, hd->name, strlen(tok))) + continue; + + return __hpp_dimension__add(hd, list, level); + } + if (!add_dynamic_entry(evlist, tok, level)) return 0; =20 @@ -4020,6 +4025,9 @@ int output_field_add(struct perf_hpp_list *list, cons= t char *tok, int *level) if (!strcasecmp(tok, "weight")) ui__warning("--fields weight shows the average value unlike in the --so= rt key.\n"); =20 + if (hd->mem_mode && sort__mode !=3D SORT_MODE__MEMORY) + continue; + return __hpp_dimension__add_output(list, hd, *level); } =20 --=20 2.49.0.906.g1f30a19c02-goog From nobody Fri Dec 19 06:32:44 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 975C72C032A; Wed, 30 Apr 2025 20:55:55 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046555; cv=none; b=HcLDTBP00P4l+wWpbeQ8wx/e5kfeo/rvXTpMVFcAGCjyJTtz9t/5Uqblpnyj4Wh/5a6lm/lifSety5XMkqSHftVBexf0dkXAp/bEh/00mA2NCUOTPHujdKmd6mF5WQi24Nafgu4IUy2Gdrdb1vMhIYkHCEimb9YXU1UPuOOhHKY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046555; c=relaxed/simple; bh=1HgdoUcLZHaR7izvGv+l6i60L6/dassgp57VUa1iSlc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=mQ76vllqClulHSg83D13LggeAa++AfgBuOVXhiqtP6WmpheA7tiQB767fmPwAzrA4Ri5tdc6kf806wvzMo0ey8Ugh9JHTC8yBVSWuL04mwb2Y2shteNyEZBzvhfm2KpcN2Z3N6gihNl5cQGyIjIY/Vme6F6gMukakKhHo+nt0qg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=NxYoD6zI; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="NxYoD6zI" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 639ECC4CEEC; Wed, 30 Apr 2025 20:55:54 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1746046554; bh=1HgdoUcLZHaR7izvGv+l6i60L6/dassgp57VUa1iSlc=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=NxYoD6zI73QFrUGaGRQk/SU2TmzRke7jKR3HPVNlGJZZsJRJs1oC7mHwxOd/U5Wb+ g1Rwe1lJxaPbQSTpX2zqaqdswmPRbFh5umyZBATQbSQWVaQ9s5AtQvRg18kIos1RNr 3MSFvDO3ZfSrCxnVVypLGAurmOerSBvN2P3xFd79xOFamdXh7TUwtPcBPwpBtGOtZi TXRhwX2cF+oz9ZLeBTqSRanbs1Jxfr9YzKeV2UWGroX8PSllwsE/P8OrDmZzNr0Psu 2eMS1i5LilaIKRNyI84ZAX0W9NAwwmseKqkrxYWCxTdDfN4VTZdncWERxNaxq4llEQ n+Le4yVCGp4YA== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Ravi Bangoria , Leo Yan Subject: [PATCH 07/11] perf mem: Add 'op' output field Date: Wed, 30 Apr 2025 13:55:44 -0700 Message-ID: <20250430205548.789750-8-namhyung@kernel.org> X-Mailer: git-send-email 2.49.0.906.g1f30a19c02-goog In-Reply-To: <20250430205548.789750-1-namhyung@kernel.org> References: <20250430205548.789750-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" This is an actual example of the he_mem_stat based sample breakdown. It uses 'mem_op' field of union perf_mem_data_src which means memory operations. It'd have basically 'load' or 'store' which can be useful if PMU doesn't have separate events for them like IBS or SPE. In addition, there's an entry in case load and store happen at the same time. Also adds entries for prefetching and execution. $ perf mem report -F +op -s comm --stdio # To display the perf.data header info, please use --header/--header-only= options. # # # Total Lost Samples: 0 # # Samples: 4K of event 'ibs_op//' # Total weight : 9559 # Sort order : comm # # ------------------------ Mem Op ---------------= --------- # Overhead Samples Load Store Ld+St Pfetch Exec Other N= /A N/A Command # ........ ............ ...............................................= ......... ............... # 44.85% 4077 21.1% 30.7% 0.0% 0.0% 0.0% 48.3% 0.= 0% 0.0% swapper 26.82% 45 98.8% 0.3% 0.0% 0.0% 0.0% 0.9% 0.= 0% 0.0% netsli-prober 7.19% 442 51.7% 13.7% 0.0% 0.0% 0.0% 34.6% 0.= 0% 0.0% perf 5.81% 75 89.7% 2.2% 0.0% 0.0% 0.0% 8.1% 0.= 0% 0.0% qemu-system-ppc 4.77% 1 100.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.= 0% 0.0% notifications_c 1.77% 10 95.9% 1.2% 0.0% 0.0% 0.0% 3.0% 0.= 0% 0.0% MemoryReleaser 0.77% 32 71.6% 4.1% 0.0% 0.0% 0.0% 24.3% 0.= 0% 0.0% DefaultEventMan 0.19% 10 66.7% 22.2% 0.0% 0.0% 0.0% 11.1% 0.= 0% 0.0% gnome-shell Signed-off-by: Namhyung Kim --- tools/perf/ui/browsers/hists.c | 3 +++ tools/perf/ui/hist.c | 12 ++++++--- tools/perf/util/hist.h | 2 +- tools/perf/util/mem-events.c | 48 ++++++++++++++++++++++++++-------- tools/perf/util/mem-events.h | 11 +++++++- tools/perf/util/sort.c | 2 +- 6 files changed, 61 insertions(+), 17 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f6ab1310a0bdd6c4..66a4c769b2d76436 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1284,6 +1284,7 @@ __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_gue= st_sys, PERF_HPP_FMT_TYPE__ __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us, PERF_HPP_FMT_TY= PE__PERCENT) __HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period, PERF_HPP_FMT_TYPE__PERCEN= T) __HPP_COLOR_ACC_PERCENT_FN(latency_acc, latency, PERF_HPP_FMT_TYPE__LATENC= Y) +__HPP_COLOR_MEM_STAT_FN(op, OP) =20 #undef __HPP_COLOR_PERCENT_FN #undef __HPP_COLOR_ACC_PERCENT_FN @@ -1307,6 +1308,8 @@ void hist_browser__init_hpp(void) hist_browser__hpp_color_overhead_acc; perf_hpp__format[PERF_HPP__LATENCY_ACC].color =3D hist_browser__hpp_color_latency_acc; + perf_hpp__format[PERF_HPP__MEM_STAT_OP].color =3D + hist_browser__hpp_color_mem_stat_op; =20 res_sample_init(); } diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 2a5c9f2b328b2c5c..427ce687ad815a62 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -342,6 +342,12 @@ static enum mem_stat_type hpp__mem_stat_type(struct pe= rf_hpp_fmt *fmt) if (!perf_hpp__is_mem_stat_entry(fmt)) return -1; =20 + switch (fmt->idx) { + case PERF_HPP__MEM_STAT_OP: + return PERF_MEM_STAT_OP; + default: + break; + } pr_debug("Should not reach here\n"); return -1; } @@ -611,7 +617,7 @@ HPP_AVERAGE_FNS(weight1, weight1) HPP_AVERAGE_FNS(weight2, weight2) HPP_AVERAGE_FNS(weight3, weight3) =20 -HPP_MEM_STAT_FNS(unknown, UNKNOWN) /* placeholder */ +HPP_MEM_STAT_FNS(op, OP) =20 static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, struct hist_entry *a __maybe_unused, @@ -715,7 +721,7 @@ struct perf_hpp_fmt perf_hpp__format[] =3D { HPP__PRINT_FNS("Weight1", weight1, WEIGHT1), HPP__PRINT_FNS("Weight2", weight2, WEIGHT2), HPP__PRINT_FNS("Weight3", weight3, WEIGHT3), - HPP__MEM_STAT_PRINT_FNS("Unknown", unknown, UNKNOWN), /* placeholder */ + HPP__MEM_STAT_PRINT_FNS("Mem Op", op, OP), }; =20 struct perf_hpp_list perf_hpp_list =3D { @@ -1070,7 +1076,7 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, = struct hists *hists) fmt->len =3D 8; break; =20 - case PERF_HPP__MEM_STAT_UNKNOWN: /* placeholder */ + case PERF_HPP__MEM_STAT_OP: fmt->len =3D MEM_STAT_LEN * MEM_STAT_PRINT_LEN; break; =20 diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 18c696d8d568a9fa..3990cfc21b1615ae 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -587,7 +587,7 @@ enum { PERF_HPP__WEIGHT1, PERF_HPP__WEIGHT2, PERF_HPP__WEIGHT3, - PERF_HPP__MEM_STAT_UNKNOWN, /* placeholder */ + PERF_HPP__MEM_STAT_OP, =20 PERF_HPP__MAX_INDEX }; diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index a4c1e42de30f8307..1c44ccc026fe9974 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -303,15 +303,12 @@ int perf_mem_events__record_args(const char **rec_arg= v, int *argv_nr, char **eve } =20 if (cpu_map) { - struct perf_cpu_map *online =3D cpu_map__online(); - - if (!perf_cpu_map__equal(cpu_map, online)) { + if (!perf_cpu_map__equal(cpu_map, cpu_map__online())) { char buf[200]; =20 cpu_map__snprint(cpu_map, buf, sizeof(buf)); pr_warning("Memory events are enabled on a subset of CPUs: %s\n", buf); } - perf_cpu_map__put(online); perf_cpu_map__put(cpu_map); } =20 @@ -803,18 +800,32 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2= c_stats *add) /* * It returns an index in hist_entry->mem_stat array for the given val whi= ch * represents a data-src based on the mem_stat_type. - * - * For example, when mst is about cache level, the index can be 1 for L1, = 2 for - * L2 and so on. */ int mem_stat_index(const enum mem_stat_type mst, const u64 val) { + union perf_mem_data_src src =3D { + .val =3D val, + }; + switch (mst) { - case PERF_MEM_STAT_UNKNOWN: /* placeholder */ + case PERF_MEM_STAT_OP: + switch (src.mem_op) { + case PERF_MEM_OP_LOAD: + return MEM_STAT_OP_LOAD; + case PERF_MEM_OP_STORE: + return MEM_STAT_OP_STORE; + case PERF_MEM_OP_LOAD | PERF_MEM_OP_STORE: + return MEM_STAT_OP_LDST; + default: + if (src.mem_op & PERF_MEM_OP_PFETCH) + return MEM_STAT_OP_PFETCH; + if (src.mem_op & PERF_MEM_OP_EXEC) + return MEM_STAT_OP_EXEC; + return MEM_STAT_OP_OTHER; + } default: break; } - (void)val; return -1; } =20 @@ -822,10 +833,25 @@ int mem_stat_index(const enum mem_stat_type mst, cons= t u64 val) const char *mem_stat_name(const enum mem_stat_type mst, const int idx) { switch (mst) { - case PERF_MEM_STAT_UNKNOWN: + case PERF_MEM_STAT_OP: + switch (idx) { + case MEM_STAT_OP_LOAD: + return "Load"; + case MEM_STAT_OP_STORE: + return "Store"; + case MEM_STAT_OP_LDST: + return "Ld+St"; + case MEM_STAT_OP_PFETCH: + return "Pfetch"; + case MEM_STAT_OP_EXEC: + return "Exec"; + case MEM_STAT_OP_OTHER: + return "Other"; + default: + break; + } default: break; } - (void)idx; return "N/A"; } diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 7aeb4c5fefc89698..55e5e2607fb732b4 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -90,7 +90,16 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem= _info *mi); void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add); =20 enum mem_stat_type { - PERF_MEM_STAT_UNKNOWN, /* placeholder */ + PERF_MEM_STAT_OP, +}; + +enum mem_stat_op { + MEM_STAT_OP_LOAD, + MEM_STAT_OP_STORE, + MEM_STAT_OP_LDST, + MEM_STAT_OP_PFETCH, + MEM_STAT_OP_EXEC, + MEM_STAT_OP_OTHER, }; =20 #define MEM_STAT_PRINT_LEN 7 /* 1 space + 5 digits + 1 percent sign */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 7c669ea27af247e5..53fcb9191ea0cdc3 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2623,7 +2623,7 @@ static struct hpp_dimension hpp_sort_dimensions[] =3D= { DIM(PERF_HPP__WEIGHT3, "retire_lat"), DIM(PERF_HPP__WEIGHT3, "p_stage_cyc"), /* used for output only when SORT_MODE__MEM */ - DIM_MEM(PERF_HPP__MEM_STAT_UNKNOWN, "unknown"), /* placeholder */ + DIM_MEM(PERF_HPP__MEM_STAT_OP, "op"), }; =20 #undef DIM_MEM --=20 2.49.0.906.g1f30a19c02-goog From nobody Fri Dec 19 06:32:44 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 47AD22D2687; Wed, 30 Apr 2025 20:55:55 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046556; cv=none; b=BtllVtxEg8WOYDQFAOorIq5mKxlwbEZqddspMCgmYj8YT22iQk9lp9wuqhDMgubwqF5+8D8nOC3MXbz0EV0VUNTYVHlkQ+yB5VTsqpbLY4qbf8u+u1SKjMY6ax43fV2mOJkzhdP45a5yAUfhzKZ7351RLmA4xA1ncJh3kJhcN+g= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046556; c=relaxed/simple; bh=bh0WBRo2CF2Dz1/hHYmwAtGgweG2Y4Ay0wrA8009ERQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=qFOLVVSxynGQk11qmYNlsDIxQxY5i+PNm2660A/uepeYGAeQPP4CwXKFt8wXqWk2KjXF4E5frsseR9p7vpaBr1H15CEhAxwyD4RuH5a0wi81s3iTpcvTZpQe38v9qy4yzFuJw5/4cjzEbtn+VIBq3QiFfFOtfNvoDUMaPOya4SE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=vDd5KLLk; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="vDd5KLLk" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 16B29C4CEEE; Wed, 30 Apr 2025 20:55:55 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1746046555; bh=bh0WBRo2CF2Dz1/hHYmwAtGgweG2Y4Ay0wrA8009ERQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=vDd5KLLk0tbOjiQUQ6owj2k+UBylL2+TrgtTYB7k2bOFu99ScfLhms25a4H9eL4H8 XLifZOLuE3C/PZyfUn2HqQScg2Yi2AzAnjv9hYsypvdc+uJtYSAqQU1cxgnT8ORHQc A/i/2Tdz2SnlxBkXbbojVGCTs3xkIz9GiJ6Ai3xOpPFuon3ICY6eYLcc7LUi3iBddx OqZnHdE6x7iI4KaRaCptE3zcGmmDSXtOFaSwkmBZhw+kPjLHMgr7Oi63QHXQWk/Swa HrN37/nbYTh7svtlFp2PCFmHNRbdTjBMGEMHBPSY2A4yn+RiZEgq1iW4I4IpKLFsLq sUibRRCYem8Zw== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Ravi Bangoria , Leo Yan Subject: [PATCH 08/11] perf hist: Hide unused mem stat columns Date: Wed, 30 Apr 2025 13:55:45 -0700 Message-ID: <20250430205548.789750-9-namhyung@kernel.org> X-Mailer: git-send-email 2.49.0.906.g1f30a19c02-goog In-Reply-To: <20250430205548.789750-1-namhyung@kernel.org> References: <20250430205548.789750-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Some mem_stat types don't use all 8 columns. And there are cases only samples in certain kinds of mem_stat types are available only. For that case hide columns which has no samples. The new output for the previous data would be: $ perf mem report -F overhead,op,comm --stdio ... # ------ Mem Op ------- # Overhead Load Store Other Command # ........ ..................... ............... # 44.85% 21.1% 30.7% 48.3% swapper 26.82% 98.8% 0.3% 0.9% netsli-prober 7.19% 51.7% 13.7% 34.6% perf 5.81% 89.7% 2.2% 8.1% qemu-system-ppc 4.77% 100.0% 0.0% 0.0% notifications_c 1.77% 95.9% 1.2% 3.0% MemoryReleaser 0.77% 71.6% 4.1% 24.3% DefaultEventMan 0.19% 66.7% 22.2% 11.1% gnome-shell ... On Intel machines, the event is only for loads or stores so it'll have only one columns like below: # Mem Op # Overhead Load Command # ........ ....... ............... # 20.55% 100.0% swapper 17.13% 100.0% chrome 9.02% 100.0% data-loop.0 6.26% 100.0% pipewire-pulse 5.63% 100.0% threaded-ml 5.47% 100.0% GraphRunner 5.37% 100.0% AudioIP~allback 5.30% 100.0% Chrome_ChildIOT 3.17% 100.0% Isolated Web Co ... Signed-off-by: Namhyung Kim --- tools/perf/ui/hist.c | 35 +++++++++++++++++++++++++++++++++-- tools/perf/util/hist.c | 2 ++ tools/perf/util/hist.h | 1 + 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 427ce687ad815a62..661922c4d7863224 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -178,6 +178,9 @@ int hpp__fmt_mem_stat(struct perf_hpp_fmt *fmt __maybe_= unused, struct perf_hpp * for (int i =3D 0; i < MEM_STAT_LEN; i++) { u64 val =3D he->mem_stat[mem_stat_idx].entries[i]; =20 + if (hists->mem_stat_total[mem_stat_idx].entries[i] =3D=3D 0) + continue; + ret +=3D hpp__call_print_fn(hpp, print_fn, fmtstr, 100.0 * val / total); } =20 @@ -405,12 +408,31 @@ static int hpp__header_mem_stat_fn(struct perf_hpp_fm= t *fmt, struct perf_hpp *hp int ret =3D 0; int len; enum mem_stat_type mst =3D hpp__mem_stat_type(fmt); + int mem_stat_idx =3D -1; + + for (int i =3D 0; i < hists->nr_mem_stats; i++) { + if (hists->mem_stat_types[i] =3D=3D mst) { + mem_stat_idx =3D i; + break; + } + } + assert(mem_stat_idx !=3D -1); =20 - (void)hists; if (line =3D=3D 0) { int left, right; =20 - len =3D fmt->len; + len =3D 0; + /* update fmt->len for acutally used columns only */ + for (int i =3D 0; i < MEM_STAT_LEN; i++) { + if (hists->mem_stat_total[mem_stat_idx].entries[i]) + len +=3D MEM_STAT_PRINT_LEN; + } + fmt->len =3D len; + + /* print header directly if single column only */ + if (len =3D=3D MEM_STAT_PRINT_LEN) + return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name); + left =3D (len - strlen(fmt->name)) / 2 - 1; right =3D len - left - strlen(fmt->name) - 2; =20 @@ -423,10 +445,14 @@ static int hpp__header_mem_stat_fn(struct perf_hpp_fm= t *fmt, struct perf_hpp *hp left, graph_dotted_line, fmt->name, right, graph_dotted_line); } =20 + len =3D hpp->size; for (int i =3D 0; i < MEM_STAT_LEN; i++) { int printed; =20 + if (hists->mem_stat_total[mem_stat_idx].entries[i] =3D=3D 0) + continue; + printed =3D scnprintf(buf, len, "%*s", MEM_STAT_PRINT_LEN, mem_stat_name(mst, i)); ret +=3D printed; @@ -1214,6 +1240,11 @@ int perf_hpp__alloc_mem_stats(struct perf_hpp_list *= list, struct evlist *evlist) if (hists->mem_stat_types =3D=3D NULL) return -ENOMEM; =20 + hists->mem_stat_total =3D calloc(nr_mem_stats, + sizeof(*hists->mem_stat_total)); + if (hists->mem_stat_total =3D=3D NULL) + return -ENOMEM; + memcpy(hists->mem_stat_types, mst, nr_mem_stats * sizeof(*mst)); hists->nr_mem_stats =3D nr_mem_stats; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 7759c1818c1ad168..afc6855327ab0de6 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -354,6 +354,7 @@ static int hists__update_mem_stat(struct hists *hists, = struct hist_entry *he, =20 assert(0 <=3D idx && idx < MEM_STAT_LEN); he->mem_stat[i].entries[idx] +=3D period; + hists->mem_stat_total[i].entries[idx] +=3D period; } return 0; } @@ -3054,6 +3055,7 @@ static void hists_evsel__exit(struct evsel *evsel) =20 hists__delete_all_entries(hists); zfree(&hists->mem_stat_types); + zfree(&hists->mem_stat_total); =20 list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) { perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 3990cfc21b1615ae..fa5e886e5b04ec9b 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -135,6 +135,7 @@ struct hists { int nr_hpp_node; int nr_mem_stats; enum mem_stat_type *mem_stat_types; + struct he_mem_stat *mem_stat_total; }; =20 #define hists__has(__h, __f) (__h)->hpp_list->__f --=20 2.49.0.906.g1f30a19c02-goog From nobody Fri Dec 19 06:32:44 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CEA9B2D269B; Wed, 30 Apr 2025 20:55:56 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046556; cv=none; b=klnAQoD0jaeSoZ6N24IUjI5FXvLv/GSO8Fm4882DMlhE92gZRkoxuh0GjjOFkemCPXpZuJw8qlM+hYTbkjAh+t8I2GAj5WvyLVAGM8QFiE29TEkIVYcL72KuTXEjlyOl5p5z3/QsvivlPkufYlsHcD+vPdyKbzFedr137NbP+T8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046556; c=relaxed/simple; bh=Vxirz+95BbhjXKvR+TeypzcT+/wB8PdmqOQqpeCxrTI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=ky4wxQGVto0AU9uEjdubszoFgxpO0U/C+oxlvk51aAW9/v9T+i21hgDbr/5Fmx/Otx5Co2HQvrAt8d6jTjHT4NgLHHQwDDFE92dyMgs8JikfMx4uuFzy5chI2dfMeKCn7n3KYJtUiTCk9zecHdmBfNIg/Yi8lJ7AGjp6OziCtPg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=LZ1gf74L; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="LZ1gf74L" Received: by smtp.kernel.org (Postfix) with ESMTPSA id BCFE2C4CEE7; Wed, 30 Apr 2025 20:55:55 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1746046556; bh=Vxirz+95BbhjXKvR+TeypzcT+/wB8PdmqOQqpeCxrTI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=LZ1gf74Ltvo9V3BdsnaTLPNEoGXCqrdfyvHb8WlSe5saWLSHy7HCK9xkW9uP/OPRl doVNjf9AIlbMv7RPS8DLhPTn4YEbemAzFWmkOR7Zfbmm7pxtljACBGIxt+zFUOkNf+ 5Ole8weKc0paJR3jqmFYAaXicYEggCYqkZak6K9FB6qIbz/6F7dDlDr70XKdHKtbrn dxoQstH7PlWllBOLEzk0R0jQC3DM/ci330BeoDvrMUoagrpMGtB2TknNvCR/DsyBWI N+Il92SEojylRuarfnzmLhGxPWHJZNQYzU/jUO8lJTkxAnAIdE3H5D99u6unfA1HTO AyL5qYZ2DHwFg== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Ravi Bangoria , Leo Yan Subject: [PATCH 09/11] perf mem: Add 'cache' and 'memory' output fields Date: Wed, 30 Apr 2025 13:55:46 -0700 Message-ID: <20250430205548.789750-10-namhyung@kernel.org> X-Mailer: git-send-email 2.49.0.906.g1f30a19c02-goog In-Reply-To: <20250430205548.789750-1-namhyung@kernel.org> References: <20250430205548.789750-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" This is a breakdown of perf_mem_data_src.mem_lvl_num. But it's also divided into two parts because the combination is bigger than 8. Since there are many entries for different cache levels, 'cache' field focuses on them. I generalized buffers like LFB, MAB and MHB to L1-buf and L2-buf. The rest goes to 'memory' field which can be RAM, CXL, PMEM, IO, etc. $ perf mem report -F cache,mem,dso --stdio ... # # -------------- Cache -------------- --- Memory --- # L1 L2 L3 L1-buf Other RAM Other Shared Object # ................................... .............. ..................= .................. # 53.9% 3.6% 16.2% 21.6% 4.8% 4.8% 95.2% [kernel.kallsyms] 64.7% 1.7% 3.5% 17.4% 12.8% 12.8% 87.2% chrome (deleted) 78.3% 2.8% 0.0% 1.0% 17.9% 17.9% 82.1% libc.so.6 39.6% 1.5% 0.0% 5.7% 53.2% 53.2% 46.8% libxul.so 26.2% 0.0% 0.0% 0.0% 73.8% 73.8% 26.2% [unknown] 85.5% 0.0% 0.0% 14.5% 0.0% 0.0% 100.0% libspa-audioconver= t.so 66.3% 4.4% 0.0% 29.4% 0.0% 0.0% 100.0% libglib-2.0.so.0.8= 200.1 (deleted) 1.9% 0.0% 0.0% 0.0% 98.1% 98.1% 1.9% libmutter-cogl-15.= so.0.0.0 (deleted) 10.6% 0.0% 0.0% 89.4% 0.0% 0.0% 100.0% libpulsecommon-16.= 1.so 0.0% 0.0% 0.0% 100.0% 0.0% 0.0% 100.0% libfreeblpriv3.so = (deleted) ... Signed-off-by: Namhyung Kim --- tools/perf/ui/browsers/hists.c | 6 +++ tools/perf/ui/hist.c | 10 +++++ tools/perf/util/hist.h | 2 + tools/perf/util/mem-events.c | 71 +++++++++++++++++++++++++++++++++- tools/perf/util/mem-events.h | 24 +++++++++++- tools/perf/util/sort.c | 2 + 6 files changed, 113 insertions(+), 2 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 66a4c769b2d76436..675dd64067747126 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1285,6 +1285,8 @@ __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_gues= t_us, PERF_HPP_FMT_TYPE__PE __HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period, PERF_HPP_FMT_TYPE__PERCEN= T) __HPP_COLOR_ACC_PERCENT_FN(latency_acc, latency, PERF_HPP_FMT_TYPE__LATENC= Y) __HPP_COLOR_MEM_STAT_FN(op, OP) +__HPP_COLOR_MEM_STAT_FN(cache, CACHE) +__HPP_COLOR_MEM_STAT_FN(memory, MEMORY) =20 #undef __HPP_COLOR_PERCENT_FN #undef __HPP_COLOR_ACC_PERCENT_FN @@ -1310,6 +1312,10 @@ void hist_browser__init_hpp(void) hist_browser__hpp_color_latency_acc; perf_hpp__format[PERF_HPP__MEM_STAT_OP].color =3D hist_browser__hpp_color_mem_stat_op; + perf_hpp__format[PERF_HPP__MEM_STAT_CACHE].color =3D + hist_browser__hpp_color_mem_stat_cache; + perf_hpp__format[PERF_HPP__MEM_STAT_MEMORY].color =3D + hist_browser__hpp_color_mem_stat_memory; =20 res_sample_init(); } diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 661922c4d7863224..7fc09c738ed02acb 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -348,6 +348,10 @@ static enum mem_stat_type hpp__mem_stat_type(struct pe= rf_hpp_fmt *fmt) switch (fmt->idx) { case PERF_HPP__MEM_STAT_OP: return PERF_MEM_STAT_OP; + case PERF_HPP__MEM_STAT_CACHE: + return PERF_MEM_STAT_CACHE; + case PERF_HPP__MEM_STAT_MEMORY: + return PERF_MEM_STAT_MEMORY; default: break; } @@ -644,6 +648,8 @@ HPP_AVERAGE_FNS(weight2, weight2) HPP_AVERAGE_FNS(weight3, weight3) =20 HPP_MEM_STAT_FNS(op, OP) +HPP_MEM_STAT_FNS(cache, CACHE) +HPP_MEM_STAT_FNS(memory, MEMORY) =20 static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, struct hist_entry *a __maybe_unused, @@ -748,6 +754,8 @@ struct perf_hpp_fmt perf_hpp__format[] =3D { HPP__PRINT_FNS("Weight2", weight2, WEIGHT2), HPP__PRINT_FNS("Weight3", weight3, WEIGHT3), HPP__MEM_STAT_PRINT_FNS("Mem Op", op, OP), + HPP__MEM_STAT_PRINT_FNS("Cache", cache, CACHE), + HPP__MEM_STAT_PRINT_FNS("Memory", memory, MEMORY), }; =20 struct perf_hpp_list perf_hpp_list =3D { @@ -1103,6 +1111,8 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, = struct hists *hists) break; =20 case PERF_HPP__MEM_STAT_OP: + case PERF_HPP__MEM_STAT_CACHE: + case PERF_HPP__MEM_STAT_MEMORY: fmt->len =3D MEM_STAT_LEN * MEM_STAT_PRINT_LEN; break; =20 diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index fa5e886e5b04ec9b..9de50d929ad1268c 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -589,6 +589,8 @@ enum { PERF_HPP__WEIGHT2, PERF_HPP__WEIGHT3, PERF_HPP__MEM_STAT_OP, + PERF_HPP__MEM_STAT_CACHE, + PERF_HPP__MEM_STAT_MEMORY, =20 PERF_HPP__MAX_INDEX }; diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 1c44ccc026fe9974..6822815278a4b213 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -823,6 +823,40 @@ int mem_stat_index(const enum mem_stat_type mst, const= u64 val) return MEM_STAT_OP_EXEC; return MEM_STAT_OP_OTHER; } + case PERF_MEM_STAT_CACHE: + switch (src.mem_lvl_num) { + case PERF_MEM_LVLNUM_L1: + return MEM_STAT_CACHE_L1; + case PERF_MEM_LVLNUM_L2: + return MEM_STAT_CACHE_L2; + case PERF_MEM_LVLNUM_L3: + return MEM_STAT_CACHE_L3; + case PERF_MEM_LVLNUM_L4: + return MEM_STAT_CACHE_L4; + case PERF_MEM_LVLNUM_LFB: + return MEM_STAT_CACHE_L1_BUF; + case PERF_MEM_LVLNUM_L2_MHB: + return MEM_STAT_CACHE_L2_BUF; + default: + return MEM_STAT_CACHE_OTHER; + } + case PERF_MEM_STAT_MEMORY: + switch (src.mem_lvl_num) { + case PERF_MEM_LVLNUM_MSC: + return MEM_STAT_MEMORY_MSC; + case PERF_MEM_LVLNUM_RAM: + return MEM_STAT_MEMORY_RAM; + case PERF_MEM_LVLNUM_UNC: + return MEM_STAT_MEMORY_UNC; + case PERF_MEM_LVLNUM_CXL: + return MEM_STAT_MEMORY_CXL; + case PERF_MEM_LVLNUM_IO: + return MEM_STAT_MEMORY_IO; + case PERF_MEM_LVLNUM_PMEM: + return MEM_STAT_MEMORY_PMEM; + default: + return MEM_STAT_MEMORY_OTHER; + } default: break; } @@ -846,9 +880,44 @@ const char *mem_stat_name(const enum mem_stat_type mst= , const int idx) case MEM_STAT_OP_EXEC: return "Exec"; case MEM_STAT_OP_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_CACHE: + switch (idx) { + case MEM_STAT_CACHE_L1: + return "L1"; + case MEM_STAT_CACHE_L2: + return "L2"; + case MEM_STAT_CACHE_L3: + return "L3"; + case MEM_STAT_CACHE_L4: + return "L4"; + case MEM_STAT_CACHE_L1_BUF: + return "L1-buf"; + case MEM_STAT_CACHE_L2_BUF: + return "L2-buf"; + case MEM_STAT_CACHE_OTHER: + default: return "Other"; + } + case PERF_MEM_STAT_MEMORY: + switch (idx) { + case MEM_STAT_MEMORY_RAM: + return "RAM"; + case MEM_STAT_MEMORY_MSC: + return "MSC"; + case MEM_STAT_MEMORY_UNC: + return "Uncach"; + case MEM_STAT_MEMORY_CXL: + return "CXL"; + case MEM_STAT_MEMORY_IO: + return "IO"; + case MEM_STAT_MEMORY_PMEM: + return "PMEM"; + case MEM_STAT_MEMORY_OTHER: default: - break; + return "Other"; } default: break; diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 55e5e2607fb732b4..002e2772400e3dda 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -91,8 +91,12 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_s= tats *add); =20 enum mem_stat_type { PERF_MEM_STAT_OP, + PERF_MEM_STAT_CACHE, + PERF_MEM_STAT_MEMORY, }; =20 +#define MEM_STAT_PRINT_LEN 7 /* 1 space + 5 digits + 1 percent sign */ + enum mem_stat_op { MEM_STAT_OP_LOAD, MEM_STAT_OP_STORE, @@ -102,7 +106,25 @@ enum mem_stat_op { MEM_STAT_OP_OTHER, }; =20 -#define MEM_STAT_PRINT_LEN 7 /* 1 space + 5 digits + 1 percent sign */ +enum mem_stat_cache { + MEM_STAT_CACHE_L1, + MEM_STAT_CACHE_L2, + MEM_STAT_CACHE_L3, + MEM_STAT_CACHE_L4, + MEM_STAT_CACHE_L1_BUF, + MEM_STAT_CACHE_L2_BUF, + MEM_STAT_CACHE_OTHER, +}; + +enum mem_stat_memory { + MEM_STAT_MEMORY_RAM, + MEM_STAT_MEMORY_MSC, + MEM_STAT_MEMORY_UNC, + MEM_STAT_MEMORY_CXL, + MEM_STAT_MEMORY_IO, + MEM_STAT_MEMORY_PMEM, + MEM_STAT_MEMORY_OTHER, +}; =20 int mem_stat_index(const enum mem_stat_type mst, const u64 data_src); const char *mem_stat_name(const enum mem_stat_type mst, const int idx); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 53fcb9191ea0cdc3..2ad88f7de95a2247 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2624,6 +2624,8 @@ static struct hpp_dimension hpp_sort_dimensions[] =3D= { DIM(PERF_HPP__WEIGHT3, "p_stage_cyc"), /* used for output only when SORT_MODE__MEM */ DIM_MEM(PERF_HPP__MEM_STAT_OP, "op"), + DIM_MEM(PERF_HPP__MEM_STAT_CACHE, "cache"), + DIM_MEM(PERF_HPP__MEM_STAT_MEMORY, "memory"), }; =20 #undef DIM_MEM --=20 2.49.0.906.g1f30a19c02-goog From nobody Fri Dec 19 06:32:44 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2A2B22D26A9; Wed, 30 Apr 2025 20:55:57 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046557; cv=none; b=NXz6XyZwtiqd+TQ3WwVxxBMg5cb+XQ8Ew10eiTVCfSpgRejftTEcDH1uz5u/4APQvSwbTyC36q/IVvINIEGowy5M7mlo4IkLiqJHMmyPWfV9MeTvOcRcIQyyj9j+r52jlZ7fJ1aosfrfZTuqQeWZyPV+O1C6iwE16mvp5Q1Z+ic= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046557; c=relaxed/simple; bh=WZ+nSGlzHsTS0xbr4sBolxWqId6PC7bwCnrZ6y/7vkQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=MlN0IImHH1UXDvKVOUAkC8p9YvwVhp6Y9XJ3ycXzIgDoMYYZw82q2FzCRb+GR3jgVh9uAwdQ69kRtqy4uvVxC4E0nFEiijR0GOtVX5eKUSkK3ynm2VS4VrWB5QWxKA1S/np6S7NFnN9jw2XXfkjwIiEXV7v8/OGBDJRvqXBSQzc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=KnvR8D2o; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="KnvR8D2o" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6D2F1C4CEED; Wed, 30 Apr 2025 20:55:56 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1746046556; bh=WZ+nSGlzHsTS0xbr4sBolxWqId6PC7bwCnrZ6y/7vkQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=KnvR8D2o/D0i49Q4qg+6FG3UouBW7GSW9j127i4+XCw5nL8vJnDHzUV36CJRV7VFz 8Lp2mrdMaPS0F5PxnR/D21BLcym/rt06rL0W1iphRuiqEakecq4lYidOTyuJnmYREi z9ZbNkPwGhHHCsqlieCJOvWSlf/Ez4CY72OzLj/1rbK2fM/Y06OtjtAE9zfflgN6Z2 JC3rpwz7HHWLe1nDwwAbkhegFWPHBdhWVNBNHnKxwuc0jn/TqqJCtJrjMU24d+KPTi eOFrXSDVZQunARZw309zxoL5Rtt66gfal3q8XFSQDlp//S9Aty83fo7gTgJCz5ZCSw Y++CoiClfsv/g== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Ravi Bangoria , Leo Yan Subject: [PATCH 10/11] perf mem: Add 'snoop' output field Date: Wed, 30 Apr 2025 13:55:47 -0700 Message-ID: <20250430205548.789750-11-namhyung@kernel.org> X-Mailer: git-send-email 2.49.0.906.g1f30a19c02-goog In-Reply-To: <20250430205548.789750-1-namhyung@kernel.org> References: <20250430205548.789750-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" This is a breakdown of perf_mem_data_src.mem_snoop values. For now, it doesn't use mem_snoopx values like FWD and PEER. $ perf mem report -F overhead,snoop,comm --stdio ... # ---------- Snoop ----------- # Overhead Hit HitM Miss Other Command # ........ ............................ ............... # 34.24% 0.6% 0.0% 0.0% 99.4% gnome-shell 12.02% 1.0% 0.0% 0.0% 99.0% chrome 9.32% 1.0% 0.0% 0.3% 98.7% Isolated Web Co 6.85% 1.0% 0.3% 0.0% 98.6% swapper 6.30% 0.8% 0.8% 0.0% 98.5% Xorg 3.02% 2.4% 0.0% 0.0% 97.6% VizCompositorTh 2.35% 0.0% 0.0% 0.0% 100.0% firefox-esr 2.04% 0.0% 0.0% 0.0% 100.0% JS Helper 1.51% 3.2% 0.0% 0.0% 96.8% threaded-ml 1.44% 0.0% 0.0% 0.0% 100.0% AudioIP~allback ... Signed-off-by: Namhyung Kim --- tools/perf/ui/browsers/hists.c | 3 +++ tools/perf/ui/hist.c | 5 +++++ tools/perf/util/hist.h | 1 + tools/perf/util/mem-events.c | 23 +++++++++++++++++++++++ tools/perf/util/mem-events.h | 8 ++++++++ tools/perf/util/sort.c | 1 + 6 files changed, 41 insertions(+) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 675dd64067747126..5b080f5062440246 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1287,6 +1287,7 @@ __HPP_COLOR_ACC_PERCENT_FN(latency_acc, latency, PERF= _HPP_FMT_TYPE__LATENCY) __HPP_COLOR_MEM_STAT_FN(op, OP) __HPP_COLOR_MEM_STAT_FN(cache, CACHE) __HPP_COLOR_MEM_STAT_FN(memory, MEMORY) +__HPP_COLOR_MEM_STAT_FN(snoop, SNOOP) =20 #undef __HPP_COLOR_PERCENT_FN #undef __HPP_COLOR_ACC_PERCENT_FN @@ -1316,6 +1317,8 @@ void hist_browser__init_hpp(void) hist_browser__hpp_color_mem_stat_cache; perf_hpp__format[PERF_HPP__MEM_STAT_MEMORY].color =3D hist_browser__hpp_color_mem_stat_memory; + perf_hpp__format[PERF_HPP__MEM_STAT_SNOOP].color =3D + hist_browser__hpp_color_mem_stat_snoop; =20 res_sample_init(); } diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 7fc09c738ed02acb..94024dfa8dccf9ba 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -352,6 +352,8 @@ static enum mem_stat_type hpp__mem_stat_type(struct per= f_hpp_fmt *fmt) return PERF_MEM_STAT_CACHE; case PERF_HPP__MEM_STAT_MEMORY: return PERF_MEM_STAT_MEMORY; + case PERF_HPP__MEM_STAT_SNOOP: + return PERF_MEM_STAT_SNOOP; default: break; } @@ -650,6 +652,7 @@ HPP_AVERAGE_FNS(weight3, weight3) HPP_MEM_STAT_FNS(op, OP) HPP_MEM_STAT_FNS(cache, CACHE) HPP_MEM_STAT_FNS(memory, MEMORY) +HPP_MEM_STAT_FNS(snoop, SNOOP) =20 static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, struct hist_entry *a __maybe_unused, @@ -756,6 +759,7 @@ struct perf_hpp_fmt perf_hpp__format[] =3D { HPP__MEM_STAT_PRINT_FNS("Mem Op", op, OP), HPP__MEM_STAT_PRINT_FNS("Cache", cache, CACHE), HPP__MEM_STAT_PRINT_FNS("Memory", memory, MEMORY), + HPP__MEM_STAT_PRINT_FNS("Snoop", snoop, SNOOP), }; =20 struct perf_hpp_list perf_hpp_list =3D { @@ -1113,6 +1117,7 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, = struct hists *hists) case PERF_HPP__MEM_STAT_OP: case PERF_HPP__MEM_STAT_CACHE: case PERF_HPP__MEM_STAT_MEMORY: + case PERF_HPP__MEM_STAT_SNOOP: fmt->len =3D MEM_STAT_LEN * MEM_STAT_PRINT_LEN; break; =20 diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 9de50d929ad1268c..c2d286c4ba395674 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -591,6 +591,7 @@ enum { PERF_HPP__MEM_STAT_OP, PERF_HPP__MEM_STAT_CACHE, PERF_HPP__MEM_STAT_MEMORY, + PERF_HPP__MEM_STAT_SNOOP, =20 PERF_HPP__MAX_INDEX }; diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 6822815278a4b213..ddcfc6500d77a9e6 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -857,6 +857,17 @@ int mem_stat_index(const enum mem_stat_type mst, const= u64 val) default: return MEM_STAT_MEMORY_OTHER; } + case PERF_MEM_STAT_SNOOP: + switch (src.mem_snoop) { + case PERF_MEM_SNOOP_HIT: + return MEM_STAT_SNOOP_HIT; + case PERF_MEM_SNOOP_HITM: + return MEM_STAT_SNOOP_HITM; + case PERF_MEM_SNOOP_MISS: + return MEM_STAT_SNOOP_MISS; + default: + return MEM_STAT_SNOOP_OTHER; + } default: break; } @@ -919,6 +930,18 @@ const char *mem_stat_name(const enum mem_stat_type mst= , const int idx) default: return "Other"; } + case PERF_MEM_STAT_SNOOP: + switch (idx) { + case MEM_STAT_SNOOP_HIT: + return "Hit"; + case MEM_STAT_SNOOP_HITM: + return "HitM"; + case MEM_STAT_SNOOP_MISS: + return "Miss"; + case MEM_STAT_SNOOP_OTHER: + default: + return "Other"; + } default: break; } diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 002e2772400e3dda..4d8f18583af42550 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -93,6 +93,7 @@ enum mem_stat_type { PERF_MEM_STAT_OP, PERF_MEM_STAT_CACHE, PERF_MEM_STAT_MEMORY, + PERF_MEM_STAT_SNOOP, }; =20 #define MEM_STAT_PRINT_LEN 7 /* 1 space + 5 digits + 1 percent sign */ @@ -126,6 +127,13 @@ enum mem_stat_memory { MEM_STAT_MEMORY_OTHER, }; =20 +enum mem_stat_snoop { + MEM_STAT_SNOOP_HIT, + MEM_STAT_SNOOP_HITM, + MEM_STAT_SNOOP_MISS, + MEM_STAT_SNOOP_OTHER, +}; + int mem_stat_index(const enum mem_stat_type mst, const u64 data_src); const char *mem_stat_name(const enum mem_stat_type mst, const int idx); =20 diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 2ad88f7de95a2247..51a210d874327d3a 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2626,6 +2626,7 @@ static struct hpp_dimension hpp_sort_dimensions[] =3D= { DIM_MEM(PERF_HPP__MEM_STAT_OP, "op"), DIM_MEM(PERF_HPP__MEM_STAT_CACHE, "cache"), DIM_MEM(PERF_HPP__MEM_STAT_MEMORY, "memory"), + DIM_MEM(PERF_HPP__MEM_STAT_SNOOP, "snoop"), }; =20 #undef DIM_MEM --=20 2.49.0.906.g1f30a19c02-goog From nobody Fri Dec 19 06:32:44 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 501592D5D0B; Wed, 30 Apr 2025 20:55:57 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046558; cv=none; b=WfYP/IK5fms2nTSBkj25MipZw/99Uc/ie1+gLYkwL+0qCZZWAFNrFVv7mWhBUCRJSlGgV2bWJqVblPwBigWV6nceVTAd2AVqBKapi7AkLLwwFjjwdPYs2353zRIF3NMD/sou3GRdMjHNKVzPABFxvf31AsWTVBMunmI6hoD9WAA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1746046558; c=relaxed/simple; bh=mwpBnuaInsvGExuka4jT+916LzxQ4kQdYADqsxQz6WQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=iyO0Iy3u31um/MoFiFVscbXSJ4HaHj55vzV9aJkKffoC86zVuOQx0zgSuV0iSl2MA0vd6ir2KAbycxV4dGpFD+VcpX25LyPGEeA+8vA3+AhO6yx2j0uzxs3cxd2NzLQkKG+mpKNDgv0SOr6GivI0s+V+hfIV2eyvwxB9JKvJd58= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=D6UIF3IE; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="D6UIF3IE" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 1FBD0C4CEEC; Wed, 30 Apr 2025 20:55:57 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1746046557; bh=mwpBnuaInsvGExuka4jT+916LzxQ4kQdYADqsxQz6WQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=D6UIF3IE+0m9WJX0ztFawqwEYr2suMbfFj5mOD7zb924RezYUktXScGBcPCPxhn0B 8CZIiBphvYHJ0OSso3PHHYJtam+ED5jqkrJ30ybTdDnHXdi10eM0dW4cOtT3TTtUCa 5mulqwn9a6jytw5eg2O/U59RZXfGE/pETtQwkM3bBM3ncB3WmUgVgOdCSdE+Rn0A7C j9pqifPUaoApW48N7elhiMiFqIGfjLVHjzmkkzJAVZ726P00CycsE8kb1FZupZzcWg IYrmkpdsm5gOTfYGZCC1CUSrXGuuiioDvw2ubARPnpmKnK1QHH6D3EI5JOrQuks5LW cE3U7K3iusIsA== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Ravi Bangoria , Leo Yan Subject: [PATCH 11/11] perf mem: Add 'dtlb' output field Date: Wed, 30 Apr 2025 13:55:48 -0700 Message-ID: <20250430205548.789750-12-namhyung@kernel.org> X-Mailer: git-send-email 2.49.0.906.g1f30a19c02-goog In-Reply-To: <20250430205548.789750-1-namhyung@kernel.org> References: <20250430205548.789750-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" This is a breakdown of perf_mem_data_src.mem_dtlb values. It assumes PMU drivers would set PERF_MEM_TLB_HIT bit with an appropriate level. And having PERF_MEM_TLB_MISS means that it failed to find one in any levels of TLB. For now, it doesn't use PERF_MEM_TLB_{WK,OS} bits. Also it seems Intel machines don't distinguish L1 or L2 precisely. So I added ANY_HIT (printed as "L?-Hit") to handle the case. $ perf mem report -F overhead,dtlb,dso --stdio ... # --- D-TLB ---- # Overhead L?-Hit Miss Shared Object # ........ .............. ................. # 67.03% 99.5% 0.5% [unknown] 31.23% 99.2% 0.8% [kernel.kallsyms] 1.08% 97.8% 2.2% [i915] 0.36% 100.0% 0.0% [JIT] tid 6853 0.12% 100.0% 0.0% [drm] 0.05% 100.0% 0.0% [drm_kms_helper] 0.05% 100.0% 0.0% [ext4] 0.02% 100.0% 0.0% [aesni_intel] 0.02% 100.0% 0.0% [crc32c_intel] 0.02% 100.0% 0.0% [dm_crypt] ... Signed-off-by: Namhyung Kim --- tools/perf/ui/browsers/hists.c | 3 +++ tools/perf/ui/hist.c | 5 +++++ tools/perf/util/hist.h | 1 + tools/perf/util/mem-events.c | 27 +++++++++++++++++++++++++++ tools/perf/util/mem-events.h | 9 +++++++++ tools/perf/util/sort.c | 1 + 6 files changed, 46 insertions(+) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 5b080f5062440246..d26b925e3d7f46af 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1288,6 +1288,7 @@ __HPP_COLOR_MEM_STAT_FN(op, OP) __HPP_COLOR_MEM_STAT_FN(cache, CACHE) __HPP_COLOR_MEM_STAT_FN(memory, MEMORY) __HPP_COLOR_MEM_STAT_FN(snoop, SNOOP) +__HPP_COLOR_MEM_STAT_FN(dtlb, DTLB) =20 #undef __HPP_COLOR_PERCENT_FN #undef __HPP_COLOR_ACC_PERCENT_FN @@ -1319,6 +1320,8 @@ void hist_browser__init_hpp(void) hist_browser__hpp_color_mem_stat_memory; perf_hpp__format[PERF_HPP__MEM_STAT_SNOOP].color =3D hist_browser__hpp_color_mem_stat_snoop; + perf_hpp__format[PERF_HPP__MEM_STAT_DTLB].color =3D + hist_browser__hpp_color_mem_stat_dtlb; =20 res_sample_init(); } diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 94024dfa8dccf9ba..ed5c40ebd906f076 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -354,6 +354,8 @@ static enum mem_stat_type hpp__mem_stat_type(struct per= f_hpp_fmt *fmt) return PERF_MEM_STAT_MEMORY; case PERF_HPP__MEM_STAT_SNOOP: return PERF_MEM_STAT_SNOOP; + case PERF_HPP__MEM_STAT_DTLB: + return PERF_MEM_STAT_DTLB; default: break; } @@ -653,6 +655,7 @@ HPP_MEM_STAT_FNS(op, OP) HPP_MEM_STAT_FNS(cache, CACHE) HPP_MEM_STAT_FNS(memory, MEMORY) HPP_MEM_STAT_FNS(snoop, SNOOP) +HPP_MEM_STAT_FNS(dtlb, DTLB) =20 static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, struct hist_entry *a __maybe_unused, @@ -760,6 +763,7 @@ struct perf_hpp_fmt perf_hpp__format[] =3D { HPP__MEM_STAT_PRINT_FNS("Cache", cache, CACHE), HPP__MEM_STAT_PRINT_FNS("Memory", memory, MEMORY), HPP__MEM_STAT_PRINT_FNS("Snoop", snoop, SNOOP), + HPP__MEM_STAT_PRINT_FNS("D-TLB", dtlb, DTLB), }; =20 struct perf_hpp_list perf_hpp_list =3D { @@ -1118,6 +1122,7 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, = struct hists *hists) case PERF_HPP__MEM_STAT_CACHE: case PERF_HPP__MEM_STAT_MEMORY: case PERF_HPP__MEM_STAT_SNOOP: + case PERF_HPP__MEM_STAT_DTLB: fmt->len =3D MEM_STAT_LEN * MEM_STAT_PRINT_LEN; break; =20 diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index c2d286c4ba395674..355198fd70281f43 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -592,6 +592,7 @@ enum { PERF_HPP__MEM_STAT_CACHE, PERF_HPP__MEM_STAT_MEMORY, PERF_HPP__MEM_STAT_SNOOP, + PERF_HPP__MEM_STAT_DTLB, =20 PERF_HPP__MAX_INDEX }; diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index ddcfc6500d77a9e6..3e9131e05348a996 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -868,6 +868,19 @@ int mem_stat_index(const enum mem_stat_type mst, const= u64 val) default: return MEM_STAT_SNOOP_OTHER; } + case PERF_MEM_STAT_DTLB: + switch (src.mem_dtlb) { + case PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT: + return MEM_STAT_DTLB_L1_HIT; + case PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT: + return MEM_STAT_DTLB_L2_HIT; + case PERF_MEM_TLB_L1 | PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT: + return MEM_STAT_DTLB_ANY_HIT; + default: + if (src.mem_dtlb & PERF_MEM_TLB_MISS) + return MEM_STAT_DTLB_MISS; + return MEM_STAT_DTLB_OTHER; + } default: break; } @@ -942,6 +955,20 @@ const char *mem_stat_name(const enum mem_stat_type mst= , const int idx) default: return "Other"; } + case PERF_MEM_STAT_DTLB: + switch (idx) { + case MEM_STAT_DTLB_L1_HIT: + return "L1-Hit"; + case MEM_STAT_DTLB_L2_HIT: + return "L2-Hit"; + case MEM_STAT_DTLB_ANY_HIT: + return "L?-Hit"; + case MEM_STAT_DTLB_MISS: + return "Miss"; + case MEM_STAT_DTLB_OTHER: + default: + return "Other"; + } default: break; } diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 4d8f18583af42550..5b98076904b0b689 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -94,6 +94,7 @@ enum mem_stat_type { PERF_MEM_STAT_CACHE, PERF_MEM_STAT_MEMORY, PERF_MEM_STAT_SNOOP, + PERF_MEM_STAT_DTLB, }; =20 #define MEM_STAT_PRINT_LEN 7 /* 1 space + 5 digits + 1 percent sign */ @@ -134,6 +135,14 @@ enum mem_stat_snoop { MEM_STAT_SNOOP_OTHER, }; =20 +enum mem_stat_dtlb { + MEM_STAT_DTLB_L1_HIT, + MEM_STAT_DTLB_L2_HIT, + MEM_STAT_DTLB_ANY_HIT, + MEM_STAT_DTLB_MISS, + MEM_STAT_DTLB_OTHER, +}; + int mem_stat_index(const enum mem_stat_type mst, const u64 data_src); const char *mem_stat_name(const enum mem_stat_type mst, const int idx); =20 diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 51a210d874327d3a..8efafa7c10822ee9 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2627,6 +2627,7 @@ static struct hpp_dimension hpp_sort_dimensions[] =3D= { DIM_MEM(PERF_HPP__MEM_STAT_CACHE, "cache"), DIM_MEM(PERF_HPP__MEM_STAT_MEMORY, "memory"), DIM_MEM(PERF_HPP__MEM_STAT_SNOOP, "snoop"), + DIM_MEM(PERF_HPP__MEM_STAT_DTLB, "dtlb"), }; =20 #undef DIM_MEM --=20 2.49.0.906.g1f30a19c02-goog