From nobody Sat Feb 7 17:09:43 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A37C03207; Mon, 29 Jul 2024 00:41:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722213689; cv=none; b=dwtxo0uKqXx4ZFa8ptrtwzfGW3WpLQPvweLq2yqF6YvahLXeVncMzUdrM7hPd7Y1xvf+dp7W6gCrmbngQKMjcG/3QXDSQDfD0Lf+HdFx4LYUWp3aw8XYdyGF1fut0tVYNFdRrvIPfOKYchP5IPZ3dnHa4o4/3FwNn6Fxt2Adbzc= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722213689; c=relaxed/simple; bh=+4xhnYSijGDgv46dB8kyW6bhDjEdx5UE3zvs3CKI3sI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=O1gfVHeoSILZ7unVgoOycnmGX/842O0pMJ80ZVlj9zutcrEGo5jL1DX/b/ia3QTo/P54UVKLBfweV/DGfC8QvKmoSpBanP4wWlI+lYOxD73pXWg4JDKmwq6LcwElttJj7T41qmaj5yz22Nm1Ai95nbcjqmgaVUsEiu0l5Rb3StE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=nFT7VcIZ; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="nFT7VcIZ" Received: by smtp.kernel.org (Postfix) with ESMTPSA id C42F5C4AF10; Mon, 29 Jul 2024 00:41:28 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1722213689; bh=+4xhnYSijGDgv46dB8kyW6bhDjEdx5UE3zvs3CKI3sI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=nFT7VcIZW5fXveVxXbVw9r43AK3qh6yI6aqrqnlu6dEh4HiTFQvZQCN37ogUdcihD 7BbUKtauzMp84hQ5C1KLapwFcjwBXYDLWzQa0el9asE3e8l2iay4Dv+l0D5s2o715u k4/kmgj/M+8run/3iIWFqWXHlz5t4x6LhOlZJTjZVGAQL49rT2yWeA8Id+vkjYRYQ9 H06WiPy0EMHJGXIyr9YNB5Ew4UqWwyKH7PQCvCIG5RM6SEDkJSNhuUiI18ih+gqKqe TWkAoSUvO79j3dCRbqXXSoSWhTMS8EUujCBjS8zcPjhMQOzp9B8IHXP62L1HTR+EbI 52MjZUaxNIUZA== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Steven Rostedt , Changbin Du Subject: [PATCH 1/4] perf ftrace: Add 'tail' option to --graph-opts Date: Sun, 28 Jul 2024 17:41:24 -0700 Message-ID: <20240729004127.238611-2-namhyung@kernel.org> X-Mailer: git-send-email 2.46.0.rc1.232.g9752f9e123-goog In-Reply-To: <20240729004127.238611-1-namhyung@kernel.org> References: <20240729004127.238611-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The 'graph-tail' option is to print function name as a comment at the end. This is useful when a large function is mixed with other functions (possibly from different CPUs). For example, $ sudo perf ftrace -- perf stat true ... 1) | get_unused_fd_flags() { 1) | alloc_fd() { 1) 0.178 us | _raw_spin_lock(); 1) 0.187 us | expand_files(); 1) 0.169 us | _raw_spin_unlock(); 1) 1.211 us | } 1) 1.503 us | } $ sudo perf ftrace --graph-opts tail -- perf stat true ... 1) | get_unused_fd_flags() { 1) | alloc_fd() { 1) 0.099 us | _raw_spin_lock(); 1) 0.083 us | expand_files(); 1) 0.081 us | _raw_spin_unlock(); 1) 0.601 us | } /* alloc_fd */ 1) 0.751 us | } /* get_unused_fd_flags */ Signed-off-by: Namhyung Kim Reviewed-by: Ian Rogers --- tools/perf/Documentation/perf-ftrace.txt | 1 + tools/perf/builtin-ftrace.c | 18 ++++++++++++++++++ tools/perf/util/ftrace.h | 1 + 3 files changed, 20 insertions(+) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Document= ation/perf-ftrace.txt index d780b93fcf87..7ea1645a13cf 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -125,6 +125,7 @@ OPTIONS for 'perf ftrace trace' - verbose - Show process names, PIDs, timestamps, etc. - thresh=3D - Setup trace duration threshold in microseconds. - depth=3D - Set max depth for function graph tracer to follow. + - tail - Print function name at the end. =20 =20 OPTIONS for 'perf ftrace latency' diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index eb30c8eca488..33c52ebda2fd 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -228,6 +228,7 @@ static void reset_tracing_options(struct perf_ftrace *f= trace __maybe_unused) write_tracing_option_file("funcgraph-irqs", "1"); write_tracing_option_file("funcgraph-proc", "0"); write_tracing_option_file("funcgraph-abstime", "0"); + write_tracing_option_file("funcgraph-tail", "0"); write_tracing_option_file("latency-format", "0"); write_tracing_option_file("irq-info", "0"); } @@ -464,6 +465,17 @@ static int set_tracing_funcgraph_verbose(struct perf_f= trace *ftrace) return 0; } =20 +static int set_tracing_funcgraph_tail(struct perf_ftrace *ftrace) +{ + if (!ftrace->graph_tail) + return 0; + + if (write_tracing_option_file("funcgraph-tail", "1") < 0) + return -1; + + return 0; +} + static int set_tracing_thresh(struct perf_ftrace *ftrace) { int ret; @@ -540,6 +552,11 @@ static int set_tracing_options(struct perf_ftrace *ftr= ace) return -1; } =20 + if (set_tracing_funcgraph_tail(ftrace) < 0) { + pr_err("failed to set tracing option funcgraph-tail\n"); + return -1; + } + return 0; } =20 @@ -1099,6 +1116,7 @@ static int parse_graph_tracer_opts(const struct optio= n *opt, { .name =3D "verbose", .value_ptr =3D &ftrace->graph_verbose }, { .name =3D "thresh", .value_ptr =3D &ftrace->graph_thresh }, { .name =3D "depth", .value_ptr =3D &ftrace->graph_depth }, + { .name =3D "tail", .value_ptr =3D &ftrace->graph_tail }, { .name =3D NULL, } }; =20 diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h index 558efcb98d25..2515e841c64c 100644 --- a/tools/perf/util/ftrace.h +++ b/tools/perf/util/ftrace.h @@ -25,6 +25,7 @@ struct perf_ftrace { int graph_noirqs; int graph_verbose; int graph_thresh; + int graph_tail; }; =20 struct filter_entry { --=20 2.46.0.rc1.232.g9752f9e123-goog From nobody Sat Feb 7 17:09:43 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id DE96B4436; Mon, 29 Jul 2024 00:41:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722213690; cv=none; b=GmH+Xla87KuBzMIyA7EozMw8CXGL24aTvpFk8qnR4G8+KFiykcQy2U60pcNARIN2mYYaKOqCP7arV2NhJ4+2GL+rIAkCkhmiao4TuplJjcEWR3nwwxAn540e+X86uFyFBixSM9L2fOlTVnmG1NNudBnDIqrhisi8ftPThWUTtfA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722213690; c=relaxed/simple; bh=FzOBSK/8f/GP8yiHn951JyTfNI4xmMPYoNwpO6Dn0o4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=c1ogFO6Lw44sFNyT/8cC6L18D8uhGNVTQ6DG3plFH40L83NyKYl+NUG6yN1GTlFP25yQx0MG9gPG+rIAO98uOBssvM6dYxXdZJUu81q//7eOBjdupZ1682EyZdhxrxlaNd76cnnefKIpxuSeVs8KEsXkGck0Qi+t4HAcUtoX7CY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=vNWYuHpM; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="vNWYuHpM" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5747BC4AF12; Mon, 29 Jul 2024 00:41:29 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1722213689; bh=FzOBSK/8f/GP8yiHn951JyTfNI4xmMPYoNwpO6Dn0o4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=vNWYuHpMgYTC5hIJDitvvTkkEnawofUo69FCmFkqH1q3CWXiGJ7tN044HdfzouDsg fmLHZfcdr3H4XGfR7UiTHEMwsP67k5DCYUvGq6VsCYfHjTfZE7eFJLuwu+N/nRRMH/ mNoDSdSwCnlaMtRz6o8i2fPjzxot9x7ySkQ4QKcfjtIfCF2b5dfkIlrG8Jp3OddBk2 RzofH+HqjPJmeb7UxAXdycGevtbzrmTMyJtXG8hbFOd2ZFcmdXNH4KFeBnNCg39eZs Yk5YCnv4dYMg4Pt01qmmS9pCVoMws3XYsUBiC5iC7G4vBbwson3hxPDfX6WaCWLIvi ug3NNnyH0Aw1g== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Steven Rostedt , Changbin Du Subject: [PATCH 2/4] perf ftrace: Factor out check_ftrace_capable() Date: Sun, 28 Jul 2024 17:41:25 -0700 Message-ID: <20240729004127.238611-3-namhyung@kernel.org> X-Mailer: git-send-email 2.46.0.rc1.232.g9752f9e123-goog In-Reply-To: <20240729004127.238611-1-namhyung@kernel.org> References: <20240729004127.238611-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The check is a common part of the ftrace commands, let's move it out. Signed-off-by: Namhyung Kim Reviewed-by: Ian Rogers --- tools/perf/builtin-ftrace.c | 44 +++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 33c52ebda2fd..978608ecd89c 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -59,6 +59,22 @@ static void ftrace__workload_exec_failed_signal(int sign= o __maybe_unused, done =3D true; } =20 +static int check_ftrace_capable(void) +{ + if (!(perf_cap__capable(CAP_PERFMON) || + perf_cap__capable(CAP_SYS_ADMIN))) { + pr_err("ftrace only works for %s!\n", +#ifdef HAVE_LIBCAP_SUPPORT + "users with the CAP_PERFMON or CAP_SYS_ADMIN capability" +#else + "root" +#endif + ); + return -1; + } + return 0; +} + static int __write_tracing_file(const char *name, const char *val, bool ap= pend) { char *file; @@ -586,18 +602,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace) .events =3D POLLIN, }; =20 - if (!(perf_cap__capable(CAP_PERFMON) || - perf_cap__capable(CAP_SYS_ADMIN))) { - pr_err("ftrace only works for %s!\n", -#ifdef HAVE_LIBCAP_SUPPORT - "users with the CAP_PERFMON or CAP_SYS_ADMIN capability" -#else - "root" -#endif - ); - return -1; - } - select_tracer(ftrace); =20 if (reset_tracing_files(ftrace) < 0) { @@ -902,18 +906,6 @@ static int __cmd_latency(struct perf_ftrace *ftrace) }; int buckets[NUM_BUCKET] =3D { }; =20 - if (!(perf_cap__capable(CAP_PERFMON) || - perf_cap__capable(CAP_SYS_ADMIN))) { - pr_err("ftrace only works for %s!\n", -#ifdef HAVE_LIBCAP_SUPPORT - "users with the CAP_PERFMON or CAP_SYS_ADMIN capability" -#else - "root" -#endif - ); - return -1; - } - trace_fd =3D prepare_func_latency(ftrace); if (trace_fd < 0) goto out; @@ -1220,6 +1212,10 @@ int cmd_ftrace(int argc, const char **argv) signal(SIGCHLD, sig_handler); signal(SIGPIPE, sig_handler); =20 + ret =3D check_ftrace_capable(); + if (ret < 0) + return -1; + ret =3D perf_config(perf_ftrace_config, &ftrace); if (ret < 0) return -1; --=20 2.46.0.rc1.232.g9752f9e123-goog From nobody Sat Feb 7 17:09:43 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E20F98F72; Mon, 29 Jul 2024 00:41:30 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722213691; cv=none; b=CzZcqCNMwovdGbPtil7mtGROkH0m3SXayZAS7xKA6Cf+Fhf83kwReAvoIpLMl+jddWkjhbTb360pR/jH6Bor+xc2R+C6L2XIHhfVRS9klyvYLRXLMku2Aj7kUYF6bmluWwhzsTyxkmqsF2/OmVqhJJB05ioFYZs7YBlsk7U9Nuo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722213691; c=relaxed/simple; bh=1Mmp/w3vfm1R4mAz1PWP95agTnfr8xNHOGqUFP4LtVs=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=feztMe8KXjzfyn+87b6Quz4qKnIppAMIW6XpEEd0MWBq6o9SoHIntSMcJRxKFXKvv05aP5qRppLR+c/VXo6yfM+wbgwGUN6EuhbgyvF4h0j7u851TJ5JlhVRTUjyfmMxQRiT7I5RY4Uh81FViJ7ifF0x5TAY3GoAwqtjRo2WOPQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=pc3Kb0XZ; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="pc3Kb0XZ" Received: by smtp.kernel.org (Postfix) with ESMTPSA id E4DC9C4AF10; Mon, 29 Jul 2024 00:41:29 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1722213690; bh=1Mmp/w3vfm1R4mAz1PWP95agTnfr8xNHOGqUFP4LtVs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=pc3Kb0XZw5yw/NiNjK8yI01Hgdr4Z+OVDzsDvJ4QyZCazu37CGOJHjYgvopz3n2bF c7OLf3tz3ydJzBUFCITZw+aNYnmPHVF9PH/7eUjU71eM2M7gbTykIRM+NMc38f0pid 4wV6vl/0c5s2XxrsygNBHGzDaCxcpFOWRq5G6gOnADfi6jRNd6xckDX9FFS7BK+lRA 6eMb7E3CfundZMnpcAhPx/YiAFsSKJfJ1pDzJfSKR7JXWU3+/ROSiBmiH6KuNCujNl pPmDYC3BVn0pUHFZ6c0TdGiqUL6KRHCa6pOT6ewKhkr8Eu5zHa02qQgfuIx3DxxKOs 0czqlbTwr4xCA== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Steven Rostedt , Changbin Du Subject: [PATCH 3/4] perf ftrace: Add 'profile' command Date: Sun, 28 Jul 2024 17:41:26 -0700 Message-ID: <20240729004127.238611-4-namhyung@kernel.org> X-Mailer: git-send-email 2.46.0.rc1.232.g9752f9e123-goog In-Reply-To: <20240729004127.238611-1-namhyung@kernel.org> References: <20240729004127.238611-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The 'perf ftrace profile' command is to get function execution profiles using function-graph tracer so that users can see the total, average, max execution time as well as the number of invocation easily. The following is a profile for perf_event_open syscall. $ sudo perf ftrace profile -G __x64_sys_perf_event_open -- \ perf stat -e cycles -C1 true 2> /dev/null | head # Total (us) Avg (us) Max (us) Count Function 65.611 65.611 65.611 1 __x64_sys_perf_event_open 30.527 30.527 30.527 1 anon_inode_getfile 30.260 30.260 30.260 1 __anon_inode_getfile 29.700 29.700 29.700 1 alloc_file_pseudo 17.578 17.578 17.578 1 d_alloc_pseudo 17.382 17.382 17.382 1 __d_alloc 16.738 16.738 16.738 1 kmem_cache_alloc_lru 15.686 15.686 15.686 1 perf_event_alloc 14.012 7.006 11.264 2 obj_cgroup_charge Signed-off-by: Namhyung Kim Reviewed-by: Ian Rogers --- tools/perf/Documentation/perf-ftrace.txt | 42 ++- tools/perf/builtin-ftrace.c | 318 ++++++++++++++++++++++- tools/perf/util/ftrace.h | 2 + 3 files changed, 359 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Document= ation/perf-ftrace.txt index 7ea1645a13cf..33f32467f287 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -9,7 +9,7 @@ perf-ftrace - simple wrapper for kernel's ftrace functional= ity SYNOPSIS -------- [verse] -'perf ftrace' {trace|latency} +'perf ftrace' {trace|latency|profile} =20 DESCRIPTION ----------- @@ -23,6 +23,9 @@ kernel's ftrace infrastructure. 'perf ftrace latency' calculates execution latency of a given function (optionally with BPF) and display it as a histogram. =20 + 'perf ftrace profile' show a execution profile for each function includi= ng + total, average, max time and the number of calls. + The following options apply to perf ftrace. =20 COMMON OPTIONS @@ -146,6 +149,43 @@ OPTIONS for 'perf ftrace latency' Use nano-second instead of micro-second as a base unit of the histogram. =20 =20 +OPTIONS for 'perf ftrace profile' +--------------------------------- + +-T:: +--trace-funcs=3D:: + Set function filter on the given function (or a glob pattern). + Multiple functions can be given by using this option more than once. + The function argument also can be a glob pattern. It will be passed + to 'set_ftrace_filter' in tracefs. + +-N:: +--notrace-funcs=3D:: + Do not trace functions given by the argument. Like -T option, this + can be used more than once to specify multiple functions (or glob + patterns). It will be passed to 'set_ftrace_notrace' in tracefs. + +-G:: +--graph-funcs=3D:: + Set graph filter on the given function (or a glob pattern). This is + useful to trace for functions executed from the given function. This + can be used more than once to specify multiple functions. It will be + passed to 'set_graph_function' in tracefs. + +-g:: +--nograph-funcs=3D:: + Set graph notrace filter on the given function (or a glob pattern). + Like -G option, this is useful for the function_graph tracer only and + disables tracing for function executed from the given function. This + can be used more than once to specify multiple functions. It will be + passed to 'set_graph_notrace' in tracefs. + +-m:: +--buffer-size:: + Set the size of per-cpu tracing buffer, is expected to + be a number with appended unit character - B/K/M/G. + + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-trace[1] diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 978608ecd89c..ae9389435d1b 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -22,15 +23,18 @@ #include "debug.h" #include #include +#include #include #include "evlist.h" #include "target.h" #include "cpumap.h" +#include "hashmap.h" #include "thread_map.h" #include "strfilter.h" #include "util/cap.h" #include "util/config.h" #include "util/ftrace.h" +#include "util/stat.h" #include "util/units.h" #include "util/parse-sublevel-options.h" =20 @@ -959,6 +963,294 @@ static int __cmd_latency(struct perf_ftrace *ftrace) return (done && !workload_exec_errno) ? 0 : -1; } =20 +static size_t profile_hash(long func, void *ctx __maybe_unused) +{ + return str_hash((char *)func); +} + +static bool profile_equal(long func1, long func2, void *ctx __maybe_unused) +{ + return !strcmp((char *)func1, (char *)func2); +} + +static int prepare_func_profile(struct perf_ftrace *ftrace) +{ + ftrace->tracer =3D "function_graph"; + ftrace->graph_tail =3D 1; + + ftrace->profile_hash =3D hashmap__new(profile_hash, profile_equal, NULL); + if (ftrace->profile_hash =3D=3D NULL) + return -ENOMEM; + + return 0; +} + +/* This is saved in a hashmap keyed by the function name */ +struct ftrace_profile_data { + struct stats st; +}; + +static int add_func_duration(struct perf_ftrace *ftrace, char *func, doubl= e time_ns) +{ + struct ftrace_profile_data *prof =3D NULL; + + if (!hashmap__find(ftrace->profile_hash, func, &prof)) { + char *key =3D strdup(func); + + if (key =3D=3D NULL) + return -ENOMEM; + + prof =3D zalloc(sizeof(*prof)); + if (prof =3D=3D NULL) { + free(key); + return -ENOMEM; + } + + init_stats(&prof->st); + hashmap__add(ftrace->profile_hash, key, prof); + } + + update_stats(&prof->st, time_ns); + return 0; +} + +/* + * The ftrace function_graph text output normally looks like below: + * + * CPU DURATION FUNCTION + * + * 0) | syscall_trace_enter.isra.0() { + * 0) | __audit_syscall_entry() { + * 0) | auditd_test_task() { + * 0) 0.271 us | __rcu_read_lock(); + * 0) 0.275 us | __rcu_read_unlock(); + * 0) 1.254 us | } /\* auditd_test_task *\/ + * 0) 0.279 us | ktime_get_coarse_real_ts64(); + * 0) 2.227 us | } /\* __audit_syscall_entry *\/ + * 0) 2.713 us | } /\* syscall_trace_enter.isra.0 *\/ + * + * Parse the line and get the duration and function name. + */ +static int parse_func_duration(struct perf_ftrace *ftrace, char *line, siz= e_t len) +{ + char *p; + char *func; + double duration; + + /* skip CPU */ + p =3D strchr(line, ')'); + if (p =3D=3D NULL) + return 0; + + /* get duration */ + p =3D skip_spaces(p + 1); + + /* no duration? */ + if (p =3D=3D NULL || *p =3D=3D '|') + return 0; + + /* skip markers like '*' or '!' for longer than ms */ + if (!isdigit(*p)) + p++; + + duration =3D strtod(p, &p); + + if (strncmp(p, " us", 3)) { + pr_debug("non-usec time found.. ignoring\n"); + return 0; + } + + /* + * profile stat keeps the max and min values as integer, + * convert to nsec time so that we can have accurate max. + */ + duration *=3D 1000; + + /* skip to the pipe */ + while (p < line + len && *p !=3D '|') + p++; + + if (*p++ !=3D '|') + return -EINVAL; + + /* get function name */ + func =3D skip_spaces(p); + + /* skip the closing bracket and the start of comment */ + if (*func =3D=3D '}') + func +=3D 5; + + /* remove semi-colon or end of comment at the end */ + p =3D line + len - 1; + while (!isalnum(*p) && *p !=3D ']') { + *p =3D '\0'; + --p; + } + + return add_func_duration(ftrace, func, duration); +} + +static int cmp_profile_data(const void *a, const void *b) +{ + const struct hashmap_entry *e1 =3D *(const struct hashmap_entry **)a; + const struct hashmap_entry *e2 =3D *(const struct hashmap_entry **)b; + struct ftrace_profile_data *p1 =3D e1->pvalue; + struct ftrace_profile_data *p2 =3D e2->pvalue; + + /* compare by total time */ + if ((p1->st.n * p1->st.mean) > (p2->st.n * p2->st.mean)) + return -1; + else + return 1; +} + +static void print_profile_result(struct perf_ftrace *ftrace) +{ + struct hashmap_entry *entry, **profile; + size_t i, nr, bkt; + + nr =3D hashmap__size(ftrace->profile_hash); + if (nr =3D=3D 0) + return; + + profile =3D calloc(nr, sizeof(*profile)); + if (profile =3D=3D NULL) { + pr_err("failed to allocate memory for the result\n"); + return; + } + + i =3D 0; + hashmap__for_each_entry(ftrace->profile_hash, entry, bkt) + profile[i++] =3D entry; + + assert(i =3D=3D nr); + + //cmp_profile_data(profile[0], profile[1]); + qsort(profile, nr, sizeof(*profile), cmp_profile_data); + + printf("# %10s %10s %10s %10s %s\n", + "Total (us)", "Avg (us)", "Max (us)", "Count", "Function"); + + for (i =3D 0; i < nr; i++) { + const char *name =3D profile[i]->pkey; + struct ftrace_profile_data *p =3D profile[i]->pvalue; + + printf("%12.3f %10.3f %6"PRIu64".%03"PRIu64" %10.0f %s\n", + p->st.n * p->st.mean / 1000, p->st.mean / 1000, + p->st.max / 1000, p->st.max % 1000, p->st.n, name); + } + + free(profile); + + hashmap__for_each_entry(ftrace->profile_hash, entry, bkt) { + free((char *)entry->pkey); + free(entry->pvalue); + } + + hashmap__free(ftrace->profile_hash); + ftrace->profile_hash =3D NULL; +} + +static int __cmd_profile(struct perf_ftrace *ftrace) +{ + char *trace_file; + int trace_fd; + char buf[4096]; + struct io io; + char *line =3D NULL; + size_t line_len =3D 0; + + if (prepare_func_profile(ftrace) < 0) { + pr_err("failed to prepare func profiler\n"); + goto out; + } + + if (reset_tracing_files(ftrace) < 0) { + pr_err("failed to reset ftrace\n"); + goto out; + } + + /* reset ftrace buffer */ + if (write_tracing_file("trace", "0") < 0) + goto out; + + if (set_tracing_options(ftrace) < 0) + return -1; + + if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { + pr_err("failed to set current_tracer to %s\n", ftrace->tracer); + goto out_reset; + } + + setup_pager(); + + trace_file =3D get_tracing_file("trace_pipe"); + if (!trace_file) { + pr_err("failed to open trace_pipe\n"); + goto out_reset; + } + + trace_fd =3D open(trace_file, O_RDONLY); + + put_tracing_file(trace_file); + + if (trace_fd < 0) { + pr_err("failed to open trace_pipe\n"); + goto out_reset; + } + + fcntl(trace_fd, F_SETFL, O_NONBLOCK); + + if (write_tracing_file("tracing_on", "1") < 0) { + pr_err("can't enable tracing\n"); + goto out_close_fd; + } + + evlist__start_workload(ftrace->evlist); + + io__init(&io, trace_fd, buf, sizeof(buf)); + io.timeout_ms =3D -1; + + while (!done && !io.eof) { + if (io__getline(&io, &line, &line_len) < 0) + break; + + if (parse_func_duration(ftrace, line, line_len) < 0) + break; + } + + write_tracing_file("tracing_on", "0"); + + if (workload_exec_errno) { + const char *emsg =3D str_error_r(workload_exec_errno, buf, sizeof(buf)); + /* flush stdout first so below error msg appears at the end. */ + fflush(stdout); + pr_err("workload failed: %s\n", emsg); + goto out_free_line; + } + + /* read remaining buffer contents */ + io.timeout_ms =3D 0; + while (!io.eof) { + if (io__getline(&io, &line, &line_len) < 0) + break; + + if (parse_func_duration(ftrace, line, line_len) < 0) + break; + } + + print_profile_result(ftrace); + +out_free_line: + free(line); +out_close_fd: + close(trace_fd); +out_reset: + reset_tracing_files(ftrace); +out: + return (done && !workload_exec_errno) ? 0 : -1; +} + static int perf_ftrace_config(const char *var, const char *value, void *cb) { struct perf_ftrace *ftrace =3D cb; @@ -1126,6 +1418,7 @@ enum perf_ftrace_subcommand { PERF_FTRACE_NONE, PERF_FTRACE_TRACE, PERF_FTRACE_LATENCY, + PERF_FTRACE_PROFILE, }; =20 int cmd_ftrace(int argc, const char **argv) @@ -1191,13 +1484,28 @@ int cmd_ftrace(int argc, const char **argv) "Use nano-second histogram"), OPT_PARENT(common_options), }; + const struct option profile_options[] =3D { + OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", + "Trace given functions using function tracer", + parse_filter_func), + OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func", + "Do not trace given functions", parse_filter_func), + OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func", + "Trace given functions using function_graph tracer", + parse_filter_func), + OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func", + "Set nograph filter on given functions", parse_filter_func), + OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size", + "Size of per cpu buffer, needs to use a B, K, M or G suffix.", pars= e_buffer_size), + OPT_PARENT(common_options), + }; const struct option *options =3D ftrace_options; =20 const char * const ftrace_usage[] =3D { "perf ftrace [] []", "perf ftrace [] -- [] []", - "perf ftrace {trace|latency} [] []", - "perf ftrace {trace|latency} [] -- [] []", + "perf ftrace {trace|latency|profile} [] []", + "perf ftrace {trace|latency|profile} [] -- [] []", NULL }; enum perf_ftrace_subcommand subcmd =3D PERF_FTRACE_NONE; @@ -1226,6 +1534,9 @@ int cmd_ftrace(int argc, const char **argv) } else if (!strcmp(argv[1], "latency")) { subcmd =3D PERF_FTRACE_LATENCY; options =3D latency_options; + } else if (!strcmp(argv[1], "profile")) { + subcmd =3D PERF_FTRACE_PROFILE; + options =3D profile_options; } =20 if (subcmd !=3D PERF_FTRACE_NONE) { @@ -1261,6 +1572,9 @@ int cmd_ftrace(int argc, const char **argv) } cmd_func =3D __cmd_latency; break; + case PERF_FTRACE_PROFILE: + cmd_func =3D __cmd_profile; + break; case PERF_FTRACE_NONE: default: pr_err("Invalid subcommand\n"); diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h index 2515e841c64c..bae649ef50e8 100644 --- a/tools/perf/util/ftrace.h +++ b/tools/perf/util/ftrace.h @@ -6,6 +6,7 @@ #include "target.h" =20 struct evlist; +struct hashamp; =20 struct perf_ftrace { struct evlist *evlist; @@ -15,6 +16,7 @@ struct perf_ftrace { struct list_head notrace; struct list_head graph_funcs; struct list_head nograph_funcs; + struct hashmap *profile_hash; unsigned long percpu_buffer_size; bool inherit; bool use_nsec; --=20 2.46.0.rc1.232.g9752f9e123-goog From nobody Sat Feb 7 17:09:43 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0DE5AB661; Mon, 29 Jul 2024 00:41:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722213691; cv=none; b=DiH/lJg3BUTNMrNGpDPR+Da0dJ8Uh9pro+4towUauIAsBlXHCULtMvrxsXsCs6UKyNKgoPnUsUOUnndDAgID8RuyKSy+Gr8pyhZWwZ7qLowISqx5sUNrmY21t49x4bIWikVCCzG6NJFXPk43oTsW5l8brrOlVAxfMJVcpYHhxnY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1722213691; c=relaxed/simple; bh=fsiX2Lm79R2xAO3++1r1+9dNsJhIjv83GPnsKTWgmlE=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=YO50CbBweDBFwr6Zpcj8j0Txx9L8AOJoaJnVG4OFzaQHltdDwCL5f2D2L8lZdK9J2bUA+ZXfqJiORqvS+CpJQSs8okq1Mk5cRHMP2ZKTW9VY9ODUC/0be5A7YVmTt4CmtPqp3xke++FeL7h8EwLzZl9M/Wm+vrsBh+2eVlEcch4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Hq8Ie3lf; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Hq8Ie3lf" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 7AF21C4AF4D; Mon, 29 Jul 2024 00:41:30 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1722213690; bh=fsiX2Lm79R2xAO3++1r1+9dNsJhIjv83GPnsKTWgmlE=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Hq8Ie3lfiCv2SW6TdlBjMomqQNY90gLUyN0iZ0vpg86gbxe+43vm2OLQS16QYA2xQ aFz1XOm/XeN7Ok4FkTwVzS+P6AdIoHejqAFoHusIZpiMWF7bQotMZuVm5yChSyDz1X 51gPIW1jlhTIRUeGWw8PPwBbNAfdd0c9m2DRabk424QFP50ZYroN5dXyY5y7LUuB+L 0drImTD6WPikSSSGlaWlSa4DIiUBNQhRrJVOyDWKsKRSRfMJSq5Xyvsmwd/W8dSasA 31u/CPEAmz/7TC8YEjaD4QeZlcTFgHcL1xWiXLHEbwo2XTkiUnkg7lvvaLd11B52Il 82qB1KT/QDGxA== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Steven Rostedt , Changbin Du Subject: [PATCH 4/4] perf ftrace: Add -s/--sort option to profile sub-command Date: Sun, 28 Jul 2024 17:41:27 -0700 Message-ID: <20240729004127.238611-5-namhyung@kernel.org> X-Mailer: git-send-email 2.46.0.rc1.232.g9752f9e123-goog In-Reply-To: <20240729004127.238611-1-namhyung@kernel.org> References: <20240729004127.238611-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The -s/--sort option is to sort the output by given column. $ sudo perf ftrace profile -s max sync | head # Total (us) Avg (us) Max (us) Count Function 6301.811 6301.811 6301.811 1 __do_sys_sync 6301.328 6301.328 6301.328 1 ksys_sync 5320.300 1773.433 2858.819 3 iterate_supers 2755.875 17.012 2610.633 162 sync_fs_one_sb 2728.351 682.088 2610.413 4 ext4_sync_fs [ext4] 2603.654 2603.654 2603.654 1 jbd2_log_wait_commit [jbd= 2] 4750.615 593.827 2597.427 8 schedule 2164.986 26.728 2115.673 81 sync_inodes_one_sb 2143.842 26.467 2115.438 81 sync_inodes_sb Signed-off-by: Namhyung Kim Reviewed-by: Ian Rogers --- tools/perf/Documentation/perf-ftrace.txt | 5 ++ tools/perf/builtin-ftrace.c | 63 +++++++++++++++++++++++- 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Document= ation/perf-ftrace.txt index 33f32467f287..eaec8253be68 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -185,6 +185,11 @@ OPTIONS for 'perf ftrace profile' Set the size of per-cpu tracing buffer, is expected to be a number with appended unit character - B/K/M/G. =20 +-s:: +--sort=3D:: + Sort the result by the given field. Available values are: + total, avg, max, count, name. Default is 'total'. + =20 SEE ALSO -------- diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index ae9389435d1b..a615c405d98f 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -1090,15 +1090,47 @@ static int parse_func_duration(struct perf_ftrace *= ftrace, char *line, size_t le return add_func_duration(ftrace, func, duration); } =20 +enum perf_ftrace_profile_sort_key { + PFP_SORT_TOTAL =3D 0, + PFP_SORT_AVG, + PFP_SORT_MAX, + PFP_SORT_COUNT, + PFP_SORT_NAME, +}; + +static enum perf_ftrace_profile_sort_key profile_sort =3D PFP_SORT_TOTAL; + static int cmp_profile_data(const void *a, const void *b) { const struct hashmap_entry *e1 =3D *(const struct hashmap_entry **)a; const struct hashmap_entry *e2 =3D *(const struct hashmap_entry **)b; struct ftrace_profile_data *p1 =3D e1->pvalue; struct ftrace_profile_data *p2 =3D e2->pvalue; + double v1, v2; + + switch (profile_sort) { + case PFP_SORT_NAME: + return strcmp(e1->pkey, e2->pkey); + case PFP_SORT_AVG: + v1 =3D p1->st.mean; + v2 =3D p2->st.mean; + break; + case PFP_SORT_MAX: + v1 =3D p1->st.max; + v2 =3D p2->st.max; + break; + case PFP_SORT_COUNT: + v1 =3D p1->st.n; + v2 =3D p2->st.n; + break; + case PFP_SORT_TOTAL: + default: + v1 =3D p1->st.n * p1->st.mean; + v2 =3D p2->st.n * p2->st.mean; + break; + } =20 - /* compare by total time */ - if ((p1->st.n * p1->st.mean) > (p2->st.n * p2->st.mean)) + if (v1 > v2) return -1; else return 1; @@ -1414,6 +1446,30 @@ static int parse_graph_tracer_opts(const struct opti= on *opt, return 0; } =20 +static int parse_sort_key(const struct option *opt, const char *str, int u= nset) +{ + enum perf_ftrace_profile_sort_key *key =3D (void *)opt->value; + + if (unset) + return 0; + + if (!strcmp(str, "total")) + *key =3D PFP_SORT_TOTAL; + else if (!strcmp(str, "avg")) + *key =3D PFP_SORT_AVG; + else if (!strcmp(str, "max")) + *key =3D PFP_SORT_MAX; + else if (!strcmp(str, "count")) + *key =3D PFP_SORT_COUNT; + else if (!strcmp(str, "name")) + *key =3D PFP_SORT_NAME; + else { + pr_err("Unknown sort key: %s\n", str); + return -1; + } + return 0; +} + enum perf_ftrace_subcommand { PERF_FTRACE_NONE, PERF_FTRACE_TRACE, @@ -1497,6 +1553,9 @@ int cmd_ftrace(int argc, const char **argv) "Set nograph filter on given functions", parse_filter_func), OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size", "Size of per cpu buffer, needs to use a B, K, M or G suffix.", pars= e_buffer_size), + OPT_CALLBACK('s', "sort", &profile_sort, "key", + "Sort result by key: total (default), avg, max, count, name.", + parse_sort_key), OPT_PARENT(common_options), }; const struct option *options =3D ftrace_options; --=20 2.46.0.rc1.232.g9752f9e123-goog