And add the missing feature detection logic to clear the flag on old
kernels.
$ perf record -g -vv true
...
------------------------------------------------------------
perf_event_attr:
type 0 (PERF_TYPE_HARDWARE)
size 136
config 0 (PERF_COUNT_HW_CPU_CYCLES)
{ sample_period, sample_freq } 4000
sample_type IP|TID|TIME|CALLCHAIN|PERIOD
read_format ID|LOST
disabled 1
inherit 1
mmap 1
comm 1
freq 1
enable_on_exec 1
task 1
sample_id_all 1
mmap2 1
comm_exec 1
ksymbol 1
bpf_event 1
defer_callchain 1
defer_output 1
------------------------------------------------------------
sys_perf_event_open: pid 162755 cpu 0 group_fd -1 flags 0x8
sys_perf_event_open failed, error -22
switching off deferred callchain support
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/util/evsel.c | 24 ++++++++++++++++++++++++
tools/perf/util/evsel.h | 1 +
2 files changed, 25 insertions(+)
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 244b3e44d090d413..f5652d00b457d096 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1061,6 +1061,14 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
}
}
+ if (param->record_mode == CALLCHAIN_FP && !attr->exclude_callchain_user) {
+ /*
+ * Enable deferred callchains optimistically. It'll be switched
+ * off later if the kernel doesn't support it.
+ */
+ attr->defer_callchain = 1;
+ }
+
if (function) {
pr_info("Disabling user space callchains for function trace event.\n");
attr->exclude_callchain_user = 1;
@@ -1511,6 +1519,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
attr->mmap2 = track && !perf_missing_features.mmap2;
attr->comm = track;
attr->build_id = track && opts->build_id;
+ attr->defer_output = track;
/*
* ksymbol is tracked separately with text poke because it needs to be
@@ -2199,6 +2208,10 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
static void evsel__disable_missing_features(struct evsel *evsel)
{
+ if (perf_missing_features.defer_callchain && evsel->core.attr.defer_callchain)
+ evsel->core.attr.defer_callchain = 0;
+ if (perf_missing_features.defer_callchain && evsel->core.attr.defer_output)
+ evsel->core.attr.defer_output = 0;
if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit &&
(evsel->core.attr.sample_type & PERF_SAMPLE_READ))
evsel->core.attr.inherit = 0;
@@ -2473,6 +2486,13 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu
/* Please add new feature detection here. */
+ attr.defer_callchain = true;
+ if (has_attr_feature(&attr, /*flags=*/0))
+ goto found;
+ perf_missing_features.defer_callchain = true;
+ pr_debug2("switching off deferred callchain support\n");
+ attr.defer_callchain = false;
+
attr.inherit = true;
attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID;
if (has_attr_feature(&attr, /*flags=*/0))
@@ -2584,6 +2604,10 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu
errno = old_errno;
check:
+ if ((evsel->core.attr.defer_callchain || evsel->core.attr.defer_output) &&
+ perf_missing_features.defer_callchain)
+ return true;
+
if (evsel->core.attr.inherit &&
(evsel->core.attr.sample_type & PERF_SAMPLE_READ) &&
perf_missing_features.inherit_sample_read)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 3ae4ac8f9a37e009..a08130ff2e47a887 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -221,6 +221,7 @@ struct perf_missing_features {
bool branch_counters;
bool aux_action;
bool inherit_sample_read;
+ bool defer_callchain;
};
extern struct perf_missing_features perf_missing_features;
--
2.52.0.rc1.455.g30608eb744-goog
On Thu, Nov 13, 2025 at 11:01 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> And add the missing feature detection logic to clear the flag on old
> kernels.
>
> $ perf record -g -vv true
> ...
> ------------------------------------------------------------
> perf_event_attr:
> type 0 (PERF_TYPE_HARDWARE)
> size 136
> config 0 (PERF_COUNT_HW_CPU_CYCLES)
> { sample_period, sample_freq } 4000
> sample_type IP|TID|TIME|CALLCHAIN|PERIOD
> read_format ID|LOST
> disabled 1
> inherit 1
> mmap 1
> comm 1
> freq 1
> enable_on_exec 1
> task 1
> sample_id_all 1
> mmap2 1
> comm_exec 1
> ksymbol 1
> bpf_event 1
> defer_callchain 1
> defer_output 1
> ------------------------------------------------------------
> sys_perf_event_open: pid 162755 cpu 0 group_fd -1 flags 0x8
> sys_perf_event_open failed, error -22
> switching off deferred callchain support
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
> tools/perf/util/evsel.c | 24 ++++++++++++++++++++++++
> tools/perf/util/evsel.h | 1 +
> 2 files changed, 25 insertions(+)
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 244b3e44d090d413..f5652d00b457d096 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1061,6 +1061,14 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
> }
> }
>
> + if (param->record_mode == CALLCHAIN_FP && !attr->exclude_callchain_user) {
> + /*
> + * Enable deferred callchains optimistically. It'll be switched
> + * off later if the kernel doesn't support it.
> + */
> + attr->defer_callchain = 1;
> + }
If a user has requested frame pointer call chains why would they want
deferred call chains? The point of deferral to my understanding is to
allow the paging in of debug data, but frame pointers don't need that
as the stack should be in the page cache.
Is this being done for code coverage reasons so that deferral is known
to work for later addition of SFrames? In which case this should be an
opt-in not default behavior. When there is a record_mode of
CALLCHAIN_SFRAME then making deferral the default for that mode makes
sense, but not for frame pointers IMO.
Thanks,
Ian
> +
> if (function) {
> pr_info("Disabling user space callchains for function trace event.\n");
> attr->exclude_callchain_user = 1;
> @@ -1511,6 +1519,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
> attr->mmap2 = track && !perf_missing_features.mmap2;
> attr->comm = track;
> attr->build_id = track && opts->build_id;
> + attr->defer_output = track;
>
> /*
> * ksymbol is tracked separately with text poke because it needs to be
> @@ -2199,6 +2208,10 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>
> static void evsel__disable_missing_features(struct evsel *evsel)
> {
> + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_callchain)
> + evsel->core.attr.defer_callchain = 0;
> + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_output)
> + evsel->core.attr.defer_output = 0;
> if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit &&
> (evsel->core.attr.sample_type & PERF_SAMPLE_READ))
> evsel->core.attr.inherit = 0;
> @@ -2473,6 +2486,13 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu
>
> /* Please add new feature detection here. */
>
> + attr.defer_callchain = true;
> + if (has_attr_feature(&attr, /*flags=*/0))
> + goto found;
> + perf_missing_features.defer_callchain = true;
> + pr_debug2("switching off deferred callchain support\n");
> + attr.defer_callchain = false;
> +
> attr.inherit = true;
> attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID;
> if (has_attr_feature(&attr, /*flags=*/0))
> @@ -2584,6 +2604,10 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu
> errno = old_errno;
>
> check:
> + if ((evsel->core.attr.defer_callchain || evsel->core.attr.defer_output) &&
> + perf_missing_features.defer_callchain)
> + return true;
> +
> if (evsel->core.attr.inherit &&
> (evsel->core.attr.sample_type & PERF_SAMPLE_READ) &&
> perf_missing_features.inherit_sample_read)
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 3ae4ac8f9a37e009..a08130ff2e47a887 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -221,6 +221,7 @@ struct perf_missing_features {
> bool branch_counters;
> bool aux_action;
> bool inherit_sample_read;
> + bool defer_callchain;
> };
>
> extern struct perf_missing_features perf_missing_features;
> --
> 2.52.0.rc1.455.g30608eb744-goog
>
>
On Fri, Nov 14, 2025 at 9:59 AM Ian Rogers <irogers@google.com> wrote:
>
> On Thu, Nov 13, 2025 at 11:01 PM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > And add the missing feature detection logic to clear the flag on old
> > kernels.
> >
> > $ perf record -g -vv true
> > ...
> > ------------------------------------------------------------
> > perf_event_attr:
> > type 0 (PERF_TYPE_HARDWARE)
> > size 136
> > config 0 (PERF_COUNT_HW_CPU_CYCLES)
> > { sample_period, sample_freq } 4000
> > sample_type IP|TID|TIME|CALLCHAIN|PERIOD
> > read_format ID|LOST
> > disabled 1
> > inherit 1
> > mmap 1
> > comm 1
> > freq 1
> > enable_on_exec 1
> > task 1
> > sample_id_all 1
> > mmap2 1
> > comm_exec 1
> > ksymbol 1
> > bpf_event 1
> > defer_callchain 1
> > defer_output 1
> > ------------------------------------------------------------
> > sys_perf_event_open: pid 162755 cpu 0 group_fd -1 flags 0x8
> > sys_perf_event_open failed, error -22
> > switching off deferred callchain support
> >
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> > tools/perf/util/evsel.c | 24 ++++++++++++++++++++++++
> > tools/perf/util/evsel.h | 1 +
> > 2 files changed, 25 insertions(+)
> >
> > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > index 244b3e44d090d413..f5652d00b457d096 100644
> > --- a/tools/perf/util/evsel.c
> > +++ b/tools/perf/util/evsel.c
> > @@ -1061,6 +1061,14 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
> > }
> > }
> >
> > + if (param->record_mode == CALLCHAIN_FP && !attr->exclude_callchain_user) {
> > + /*
> > + * Enable deferred callchains optimistically. It'll be switched
> > + * off later if the kernel doesn't support it.
> > + */
> > + attr->defer_callchain = 1;
> > + }
>
> If a user has requested frame pointer call chains why would they want
> deferred call chains? The point of deferral to my understanding is to
> allow the paging in of debug data, but frame pointers don't need that
> as the stack should be in the page cache.
>
> Is this being done for code coverage reasons so that deferral is known
> to work for later addition of SFrames? In which case this should be an
> opt-in not default behavior. When there is a record_mode of
> CALLCHAIN_SFRAME then making deferral the default for that mode makes
> sense, but not for frame pointers IMO.
Just to be clear. I don't think the behavior of using frame pointers
should change. Deferral has downsides, for example:
$ perf record -g -a sleep 1
Without deferral kernel stack traces will contain both kernel and user
traces. With deferral the user stack trace is only generated when the
system call returns and so there is a chance for kernel stack traces
to be missing their user part. An obvious behavioral change. I think
for what you are doing here we can have an option something like:
$ perf record --call-graph fp-deferred -a sleep 1
Which would need a man page update, etc. What is happening with the
other call-graph modes and deferral? Could the option be something
like `--call-graph fp,deferred` so that the option is a common one and
say stack snapshots for dwarf be somehow improved?
Thanks,
Ian
> Thanks,
> Ian
>
> > +
> > if (function) {
> > pr_info("Disabling user space callchains for function trace event.\n");
> > attr->exclude_callchain_user = 1;
> > @@ -1511,6 +1519,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
> > attr->mmap2 = track && !perf_missing_features.mmap2;
> > attr->comm = track;
> > attr->build_id = track && opts->build_id;
> > + attr->defer_output = track;
> >
> > /*
> > * ksymbol is tracked separately with text poke because it needs to be
> > @@ -2199,6 +2208,10 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
> >
> > static void evsel__disable_missing_features(struct evsel *evsel)
> > {
> > + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_callchain)
> > + evsel->core.attr.defer_callchain = 0;
> > + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_output)
> > + evsel->core.attr.defer_output = 0;
> > if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit &&
> > (evsel->core.attr.sample_type & PERF_SAMPLE_READ))
> > evsel->core.attr.inherit = 0;
> > @@ -2473,6 +2486,13 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu
> >
> > /* Please add new feature detection here. */
> >
> > + attr.defer_callchain = true;
> > + if (has_attr_feature(&attr, /*flags=*/0))
> > + goto found;
> > + perf_missing_features.defer_callchain = true;
> > + pr_debug2("switching off deferred callchain support\n");
> > + attr.defer_callchain = false;
> > +
> > attr.inherit = true;
> > attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID;
> > if (has_attr_feature(&attr, /*flags=*/0))
> > @@ -2584,6 +2604,10 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu
> > errno = old_errno;
> >
> > check:
> > + if ((evsel->core.attr.defer_callchain || evsel->core.attr.defer_output) &&
> > + perf_missing_features.defer_callchain)
> > + return true;
> > +
> > if (evsel->core.attr.inherit &&
> > (evsel->core.attr.sample_type & PERF_SAMPLE_READ) &&
> > perf_missing_features.inherit_sample_read)
> > diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> > index 3ae4ac8f9a37e009..a08130ff2e47a887 100644
> > --- a/tools/perf/util/evsel.h
> > +++ b/tools/perf/util/evsel.h
> > @@ -221,6 +221,7 @@ struct perf_missing_features {
> > bool branch_counters;
> > bool aux_action;
> > bool inherit_sample_read;
> > + bool defer_callchain;
> > };
> >
> > extern struct perf_missing_features perf_missing_features;
> > --
> > 2.52.0.rc1.455.g30608eb744-goog
> >
> >
On Fri, 14 Nov 2025 10:09:26 -0800 Ian Rogers <irogers@google.com> wrote: > Just to be clear. I don't think the behavior of using frame pointers > should change. Deferral has downsides, for example: > > $ perf record -g -a sleep 1 The biggest advantage of the deferred callstack is that there's much less duplication of data in the ring buffer. Especially when you have deep stacks and long system calls. Now, if we have frame pointers enabled, we could possibly add a feature to the deferred unwinder where it could try to do the deferred immediately and if it faults it then waits until going back to user space. This means that the frame pointer version should work (unless the user space stack was swapped out). > > Without deferral kernel stack traces will contain both kernel and user > traces. With deferral the user stack trace is only generated when the > system call returns and so there is a chance for kernel stack traces > to be missing their user part. An obvious behavioral change. I think > for what you are doing here we can have an option something like: > > $ perf record --call-graph fp-deferred -a sleep 1 I would be OK with this but I would prefer a much shorter name. Adding 20 characters to the command line will likely keep people from using it. -- Steve
On Fri, Nov 14, 2025 at 01:30:09PM -0500, Steven Rostedt wrote: > On Fri, 14 Nov 2025 10:09:26 -0800 > Ian Rogers <irogers@google.com> wrote: > > > Just to be clear. I don't think the behavior of using frame pointers > > should change. Deferral has downsides, for example: > > > > $ perf record -g -a sleep 1 > > The biggest advantage of the deferred callstack is that there's much less > duplication of data in the ring buffer. Especially when you have deep > stacks and long system calls. > > Now, if we have frame pointers enabled, we could possibly add a feature to > the deferred unwinder where it could try to do the deferred immediately and > if it faults it then waits until going back to user space. This would be great if it can share the callstack with later samples before going to user space. > This means that > the frame pointer version should work (unless the user space stack was > swapped out). > > > > > Without deferral kernel stack traces will contain both kernel and user > > traces. With deferral the user stack trace is only generated when the > > system call returns and so there is a chance for kernel stack traces > > to be missing their user part. An obvious behavioral change. Right, this is one of my concerns too. For system-wide profiling, the chances are high it can have some tasks sleeping in the kernel and perf finishes the profiling before they return to user space. Thanks, Namhyung > > I think > > for what you are doing here we can have an option something like: > > > > $ perf record --call-graph fp-deferred -a sleep 1 > > I would be OK with this but I would prefer a much shorter name. Adding 20 > characters to the command line will likely keep people from using it. > > -- Steve
On Fri, Nov 14, 2025 at 10:29 AM Steven Rostedt <rostedt@goodmis.org> wrote: > > On Fri, 14 Nov 2025 10:09:26 -0800 > Ian Rogers <irogers@google.com> wrote: > > > Just to be clear. I don't think the behavior of using frame pointers > > should change. Deferral has downsides, for example: > > > > $ perf record -g -a sleep 1 > > The biggest advantage of the deferred callstack is that there's much less > duplication of data in the ring buffer. Especially when you have deep > stacks and long system calls. I've never had anybody raise this as a concern with fp stack traces, especially given the stack snapshot approach being far more space consuming - but okay. > Now, if we have frame pointers enabled, we could possibly add a feature to > the deferred unwinder where it could try to do the deferred immediately and > if it faults it then waits until going back to user space. This means that > the frame pointer version should work (unless the user space stack was > swapped out). > > > > > Without deferral kernel stack traces will contain both kernel and user > > traces. With deferral the user stack trace is only generated when the > > system call returns and so there is a chance for kernel stack traces > > to be missing their user part. An obvious behavioral change. I think > > for what you are doing here we can have an option something like: > > > > $ perf record --call-graph fp-deferred -a sleep 1 > > I would be OK with this but I would prefer a much shorter name. Adding 20 > characters to the command line will likely keep people from using it. Fwiw, with buildid-mmap we just (v6.18) flipped the default when the kernel has the feature to use it. The kernel feature was added in v5.12. https://lore.kernel.org/r/20250724163302.596743-9-irogers@google.com I don't oppose a shorter name, callchain option, .. Unfortunately with `perf record` -d is taken for saying record data mmaps, -D is taken for a start-up delay option, and -G is a cgroup option. Perhaps '-f' for "frame" and have it mirror '-g' except that deferred is default true rather than false. Thanks, Ian > -- Steve
On Fri, Nov 14, 2025 at 10:09 AM Ian Rogers <irogers@google.com> wrote:
>
> On Fri, Nov 14, 2025 at 9:59 AM Ian Rogers <irogers@google.com> wrote:
> >
> > On Thu, Nov 13, 2025 at 11:01 PM Namhyung Kim <namhyung@kernel.org> wrote:
> > >
> > > And add the missing feature detection logic to clear the flag on old
> > > kernels.
> > >
> > > $ perf record -g -vv true
> > > ...
> > > ------------------------------------------------------------
> > > perf_event_attr:
> > > type 0 (PERF_TYPE_HARDWARE)
> > > size 136
> > > config 0 (PERF_COUNT_HW_CPU_CYCLES)
> > > { sample_period, sample_freq } 4000
> > > sample_type IP|TID|TIME|CALLCHAIN|PERIOD
> > > read_format ID|LOST
> > > disabled 1
> > > inherit 1
> > > mmap 1
> > > comm 1
> > > freq 1
> > > enable_on_exec 1
> > > task 1
> > > sample_id_all 1
> > > mmap2 1
> > > comm_exec 1
> > > ksymbol 1
> > > bpf_event 1
> > > defer_callchain 1
> > > defer_output 1
> > > ------------------------------------------------------------
> > > sys_perf_event_open: pid 162755 cpu 0 group_fd -1 flags 0x8
> > > sys_perf_event_open failed, error -22
> > > switching off deferred callchain support
> > >
> > > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > > ---
> > > tools/perf/util/evsel.c | 24 ++++++++++++++++++++++++
> > > tools/perf/util/evsel.h | 1 +
> > > 2 files changed, 25 insertions(+)
> > >
> > > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > > index 244b3e44d090d413..f5652d00b457d096 100644
> > > --- a/tools/perf/util/evsel.c
> > > +++ b/tools/perf/util/evsel.c
> > > @@ -1061,6 +1061,14 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
> > > }
> > > }
> > >
> > > + if (param->record_mode == CALLCHAIN_FP && !attr->exclude_callchain_user) {
> > > + /*
> > > + * Enable deferred callchains optimistically. It'll be switched
> > > + * off later if the kernel doesn't support it.
> > > + */
> > > + attr->defer_callchain = 1;
> > > + }
> >
> > If a user has requested frame pointer call chains why would they want
> > deferred call chains? The point of deferral to my understanding is to
> > allow the paging in of debug data, but frame pointers don't need that
> > as the stack should be in the page cache.
> >
> > Is this being done for code coverage reasons so that deferral is known
> > to work for later addition of SFrames? In which case this should be an
> > opt-in not default behavior. When there is a record_mode of
> > CALLCHAIN_SFRAME then making deferral the default for that mode makes
> > sense, but not for frame pointers IMO.
>
> Just to be clear. I don't think the behavior of using frame pointers
> should change. Deferral has downsides, for example:
>
> $ perf record -g -a sleep 1
>
> Without deferral kernel stack traces will contain both kernel and user
> traces. With deferral the user stack trace is only generated when the
> system call returns and so there is a chance for kernel stack traces
> to be missing their user part. An obvious behavioral change. I think
> for what you are doing here we can have an option something like:
>
> $ perf record --call-graph fp-deferred -a sleep 1
>
> Which would need a man page update, etc. What is happening with the
> other call-graph modes and deferral? Could the option be something
> like `--call-graph fp,deferred` so that the option is a common one and
> say stack snapshots for dwarf be somehow improved?
Also, making deferral the norm will generate new perf events that
tools, other than perf, processing perf.data files will fail to
consume. So this change would break quite a lot of stuff, so it should
not just be made the default.
Thanks,
Ian
> Thanks,
> Ian
>
> > Thanks,
> > Ian
> >
> > > +
> > > if (function) {
> > > pr_info("Disabling user space callchains for function trace event.\n");
> > > attr->exclude_callchain_user = 1;
> > > @@ -1511,6 +1519,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
> > > attr->mmap2 = track && !perf_missing_features.mmap2;
> > > attr->comm = track;
> > > attr->build_id = track && opts->build_id;
> > > + attr->defer_output = track;
> > >
> > > /*
> > > * ksymbol is tracked separately with text poke because it needs to be
> > > @@ -2199,6 +2208,10 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
> > >
> > > static void evsel__disable_missing_features(struct evsel *evsel)
> > > {
> > > + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_callchain)
> > > + evsel->core.attr.defer_callchain = 0;
> > > + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_output)
> > > + evsel->core.attr.defer_output = 0;
> > > if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit &&
> > > (evsel->core.attr.sample_type & PERF_SAMPLE_READ))
> > > evsel->core.attr.inherit = 0;
> > > @@ -2473,6 +2486,13 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu
> > >
> > > /* Please add new feature detection here. */
> > >
> > > + attr.defer_callchain = true;
> > > + if (has_attr_feature(&attr, /*flags=*/0))
> > > + goto found;
> > > + perf_missing_features.defer_callchain = true;
> > > + pr_debug2("switching off deferred callchain support\n");
> > > + attr.defer_callchain = false;
> > > +
> > > attr.inherit = true;
> > > attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID;
> > > if (has_attr_feature(&attr, /*flags=*/0))
> > > @@ -2584,6 +2604,10 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu
> > > errno = old_errno;
> > >
> > > check:
> > > + if ((evsel->core.attr.defer_callchain || evsel->core.attr.defer_output) &&
> > > + perf_missing_features.defer_callchain)
> > > + return true;
> > > +
> > > if (evsel->core.attr.inherit &&
> > > (evsel->core.attr.sample_type & PERF_SAMPLE_READ) &&
> > > perf_missing_features.inherit_sample_read)
> > > diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> > > index 3ae4ac8f9a37e009..a08130ff2e47a887 100644
> > > --- a/tools/perf/util/evsel.h
> > > +++ b/tools/perf/util/evsel.h
> > > @@ -221,6 +221,7 @@ struct perf_missing_features {
> > > bool branch_counters;
> > > bool aux_action;
> > > bool inherit_sample_read;
> > > + bool defer_callchain;
> > > };
> > >
> > > extern struct perf_missing_features perf_missing_features;
> > > --
> > > 2.52.0.rc1.455.g30608eb744-goog
> > >
> > >
On Fri, Nov 14, 2025 at 10:12:34AM -0800, Ian Rogers wrote:
> On Fri, Nov 14, 2025 at 10:09 AM Ian Rogers <irogers@google.com> wrote:
> >
> > On Fri, Nov 14, 2025 at 9:59 AM Ian Rogers <irogers@google.com> wrote:
> > >
> > > On Thu, Nov 13, 2025 at 11:01 PM Namhyung Kim <namhyung@kernel.org> wrote:
> > > >
> > > > And add the missing feature detection logic to clear the flag on old
> > > > kernels.
> > > >
> > > > $ perf record -g -vv true
> > > > ...
> > > > ------------------------------------------------------------
> > > > perf_event_attr:
> > > > type 0 (PERF_TYPE_HARDWARE)
> > > > size 136
> > > > config 0 (PERF_COUNT_HW_CPU_CYCLES)
> > > > { sample_period, sample_freq } 4000
> > > > sample_type IP|TID|TIME|CALLCHAIN|PERIOD
> > > > read_format ID|LOST
> > > > disabled 1
> > > > inherit 1
> > > > mmap 1
> > > > comm 1
> > > > freq 1
> > > > enable_on_exec 1
> > > > task 1
> > > > sample_id_all 1
> > > > mmap2 1
> > > > comm_exec 1
> > > > ksymbol 1
> > > > bpf_event 1
> > > > defer_callchain 1
> > > > defer_output 1
> > > > ------------------------------------------------------------
> > > > sys_perf_event_open: pid 162755 cpu 0 group_fd -1 flags 0x8
> > > > sys_perf_event_open failed, error -22
> > > > switching off deferred callchain support
> > > >
> > > > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > > > ---
> > > > tools/perf/util/evsel.c | 24 ++++++++++++++++++++++++
> > > > tools/perf/util/evsel.h | 1 +
> > > > 2 files changed, 25 insertions(+)
> > > >
> > > > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > > > index 244b3e44d090d413..f5652d00b457d096 100644
> > > > --- a/tools/perf/util/evsel.c
> > > > +++ b/tools/perf/util/evsel.c
> > > > @@ -1061,6 +1061,14 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
> > > > }
> > > > }
> > > >
> > > > + if (param->record_mode == CALLCHAIN_FP && !attr->exclude_callchain_user) {
> > > > + /*
> > > > + * Enable deferred callchains optimistically. It'll be switched
> > > > + * off later if the kernel doesn't support it.
> > > > + */
> > > > + attr->defer_callchain = 1;
> > > > + }
> > >
> > > If a user has requested frame pointer call chains why would they want
> > > deferred call chains? The point of deferral to my understanding is to
> > > allow the paging in of debug data, but frame pointers don't need that
> > > as the stack should be in the page cache.
> > >
> > > Is this being done for code coverage reasons so that deferral is known
> > > to work for later addition of SFrames? In which case this should be an
> > > opt-in not default behavior. When there is a record_mode of
> > > CALLCHAIN_SFRAME then making deferral the default for that mode makes
> > > sense, but not for frame pointers IMO.
> >
> > Just to be clear. I don't think the behavior of using frame pointers
> > should change. Deferral has downsides, for example:
> >
> > $ perf record -g -a sleep 1
> >
> > Without deferral kernel stack traces will contain both kernel and user
> > traces. With deferral the user stack trace is only generated when the
> > system call returns and so there is a chance for kernel stack traces
> > to be missing their user part. An obvious behavioral change. I think
> > for what you are doing here we can have an option something like:
> >
> > $ perf record --call-graph fp-deferred -a sleep 1
> >
> > Which would need a man page update, etc. What is happening with the
> > other call-graph modes and deferral? Could the option be something
> > like `--call-graph fp,deferred` so that the option is a common one and
> > say stack snapshots for dwarf be somehow improved?
>
> Also, making deferral the norm will generate new perf events that
> tools, other than perf, processing perf.data files will fail to
> consume. So this change would break quite a lot of stuff, so it should
> not just be made the default.
Thanks a lot for your input! Yeah I agree it'd be better to make it
optional. Having separate `--call-graph fp,defer` sounds good. I can
add a config option to control deferred callchains as well.
Thanks,
Namhyung
© 2016 - 2026 Red Hat, Inc.