Perf only looks at attr.config when determining what was programmed into
ETMCR. These bits could theoretically be in any of the config fields.
Add a generic helper to find the value of any named format field in any
config field and then use it to get the attributes relevant to ETMCR.
The kernel will also stop publishing the ETMCR register bits in a header
[1] so preempt that by defining them here.
[1]: https://lore.kernel.org/linux-arm-kernel/20251128-james-cs-syncfreq-v8-10-4d319764cc58@linaro.org/
Signed-off-by: James Clark <james.clark@linaro.org>
---
tools/perf/arch/arm/util/cs-etm.c | 35 ++++++++++++++++++++++++++++++++++-
tools/perf/util/evsel.h | 2 ++
tools/perf/util/pmu.c | 33 +++++++++++++++++++++++++++++++++
3 files changed, 69 insertions(+), 1 deletion(-)
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 22c6272e8c36..414cafb21c98 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -68,6 +68,12 @@ static const char * const metadata_ete_ro[] = {
enum cs_etm_version { CS_NOT_PRESENT, CS_ETMV3, CS_ETMV4, CS_ETE };
+
+/* ETMv3 ETMCR register bits */
+#define ETMCR_CYC_ACC BIT(12)
+#define ETMCR_TIMESTAMP_EN BIT(28)
+#define ETMCR_RETURN_STACK BIT(29)
+
static bool cs_etm_is_ete(struct perf_pmu *cs_etm_pmu, struct perf_cpu cpu);
static int cs_etm_get_ro(struct perf_pmu *pmu, struct perf_cpu cpu, const char *path, __u64 *val);
static bool cs_etm_pmu_path_exists(struct perf_pmu *pmu, struct perf_cpu cpu, const char *path);
@@ -487,6 +493,33 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
return err;
}
+static u64 cs_etm_guess_etmcr(struct auxtrace_record *itr)
+{
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+ struct evsel *evsel = cs_etm_get_evsel(ptr->evlist, cs_etm_pmu);
+ u64 etmcr = 0;
+ u64 val;
+
+ if (!evsel)
+ return 0;
+
+ /*
+ * Roughly guess what the kernel programmed into ETMCR based on
+ * what options the event was opened with. This doesn't have to be
+ * complete or 100% accurate, not all bits used by OpenCSD anyway.
+ */
+ if (!evsel__get_config_val(cs_etm_pmu, evsel, "cycacc", &val) && val)
+ etmcr |= ETMCR_CYC_ACC;
+ if (!evsel__get_config_val(cs_etm_pmu, evsel, "timestamp", &val) && val)
+ etmcr |= ETMCR_TIMESTAMP_EN;
+ if (!evsel__get_config_val(cs_etm_pmu, evsel, "retstack", &val) && val)
+ etmcr |= ETMCR_RETURN_STACK;
+
+ return etmcr;
+}
+
static u64 cs_etm_get_config(struct auxtrace_record *itr)
{
struct cs_etm_recording *ptr =
@@ -746,7 +779,7 @@ static void cs_etm_get_metadata(struct perf_cpu cpu, u32 *offset,
case CS_ETMV3:
magic = __perf_cs_etmv3_magic;
/* Get configuration register */
- info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
+ info->priv[*offset + CS_ETM_ETMCR] = cs_etm_guess_etmcr(itr);
/* traceID set to legacy value in case new perf running on old system */
info->priv[*offset + CS_ETM_ETMTRACEIDR] = cs_etm_get_legacy_trace_id(cpu);
/* Get read-only information from sysFS */
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 3ae4ac8f9a37..1c567cc70a82 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -574,6 +574,8 @@ void evsel__uniquify_counter(struct evsel *counter);
((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1)))
u64 evsel__bitfield_swap_branch_flags(u64 value);
+int evsel__get_config_val(struct perf_pmu *pmu, struct evsel *evsel,
+ const char *config_name, u64 *val);
void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel,
const char *config_name, u64 val);
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index c8968cddc0a9..5501b0230097 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1384,6 +1384,39 @@ pmu_find_format(const struct list_head *formats, const char *name)
return NULL;
}
+int evsel__get_config_val(struct perf_pmu *pmu, struct evsel *evsel,
+ const char *config_name, u64 *val)
+{
+ struct perf_pmu_format *format = pmu_find_format(&pmu->format, config_name);
+ u64 bits = perf_pmu__format_bits(pmu, config_name);
+
+ if (!format || !bits) {
+ pr_err("Unknown/empty format name: %s\n", config_name);
+ return -EINVAL;
+ }
+
+ switch (format->value) {
+ case PERF_PMU_FORMAT_VALUE_CONFIG:
+ *val = FIELD_GET(bits, evsel->core.attr.config);
+ return 0;
+ case PERF_PMU_FORMAT_VALUE_CONFIG1:
+ *val = FIELD_GET(bits, evsel->core.attr.config1);
+ return 0;
+ case PERF_PMU_FORMAT_VALUE_CONFIG2:
+ *val = FIELD_GET(bits, evsel->core.attr.config2);
+ return 0;
+ case PERF_PMU_FORMAT_VALUE_CONFIG3:
+ *val = FIELD_GET(bits, evsel->core.attr.config3);
+ return 0;
+ case PERF_PMU_FORMAT_VALUE_CONFIG4:
+ *val = FIELD_GET(bits, evsel->core.attr.config4);
+ return 0;
+ default:
+ pr_err("Unknown format value: %d\n", format->value);
+ return -EINVAL;
+ }
+}
+
/*
* Set @config_name to @val as long as the user hasn't already set or cleared it
* by passing a config term on the command line.
--
2.34.1
On Mon, Dec 01, 2025 at 04:41:07PM +0000, Coresight ML wrote: [...] > @@ -746,7 +779,7 @@ static void cs_etm_get_metadata(struct perf_cpu cpu, u32 *offset, > case CS_ETMV3: > magic = __perf_cs_etmv3_magic; > /* Get configuration register */ > - info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr); > + info->priv[*offset + CS_ETM_ETMCR] = cs_etm_guess_etmcr(itr); I still think cs_etm_get_config() is better than cs_etm_guess_etmcr(). For ETMv3, we directly pass CONFIG to the kernel, and after validation in the dirver, then the value will be set to ETMCR. If we already know the config value is consistent between user space and kernel, why introduce a redundant "guess" operation here? Thanks, Leo
On 02/12/2025 11:43 am, Leo Yan wrote: > On Mon, Dec 01, 2025 at 04:41:07PM +0000, Coresight ML wrote: > > [...] > >> @@ -746,7 +779,7 @@ static void cs_etm_get_metadata(struct perf_cpu cpu, u32 *offset, >> case CS_ETMV3: >> magic = __perf_cs_etmv3_magic; >> /* Get configuration register */ >> - info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr); >> + info->priv[*offset + CS_ETM_ETMCR] = cs_etm_guess_etmcr(itr); > > I still think cs_etm_get_config() is better than cs_etm_guess_etmcr(). > > For ETMv3, we directly pass CONFIG to the kernel, and after validation > in the dirver, then the value will be set to ETMCR. If we already know > the config value is consistent between user space and kernel, why > introduce a redundant "guess" operation here? > > Thanks, > Leo Because userspace doesn't always come up with the same value as the driver. For example right now in ETM3, ETMCR_RETURN_STACK isn't set depending on certain conditions that userspace doesn't know about. ETM4 has the same for TRCCONFIGR_RS and maybe some others. In the future, other versions of the driver could do different things as long as we don't break decoding. I didn't want the function name to imply it was doing something it wasn't as that confused me a little bit. It's definitely not "getting" the value. Maybe "guess" isn't the best it could be, but it's not far off.
On 02/12/2025 11:53 am, James Clark wrote: > > > On 02/12/2025 11:43 am, Leo Yan wrote: >> On Mon, Dec 01, 2025 at 04:41:07PM +0000, Coresight ML wrote: >> >> [...] >> >>> @@ -746,7 +779,7 @@ static void cs_etm_get_metadata(struct perf_cpu >>> cpu, u32 *offset, >>> case CS_ETMV3: >>> magic = __perf_cs_etmv3_magic; >>> /* Get configuration register */ >>> - info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr); >>> + info->priv[*offset + CS_ETM_ETMCR] = cs_etm_guess_etmcr(itr); >> >> I still think cs_etm_get_config() is better than cs_etm_guess_etmcr(). >> >> For ETMv3, we directly pass CONFIG to the kernel, and after validation >> in the dirver, then the value will be set to ETMCR. If we already know >> the config value is consistent between user space and kernel, why One other note is that since moving the timestamp field, this is no longer true either. The value in attr.config isn't directly put into ETMCR. >> introduce a redundant "guess" operation here? >> >> Thanks, >> Leo > > Because userspace doesn't always come up with the same value as the > driver. For example right now in ETM3, ETMCR_RETURN_STACK isn't set > depending on certain conditions that userspace doesn't know about. ETM4 > has the same for TRCCONFIGR_RS and maybe some others. In the future, > other versions of the driver could do different things as long as we > don't break decoding. > > I didn't want the function name to imply it was doing something it > wasn't as that confused me a little bit. It's definitely not "getting" > the value. Maybe "guess" isn't the best it could be, but it's not far off. >
Hi, On Thu, 4 Dec 2025 at 10:55, James Clark <james.clark@linaro.org> wrote: > > > > On 02/12/2025 11:53 am, James Clark wrote: > > > > > > On 02/12/2025 11:43 am, Leo Yan wrote: > >> On Mon, Dec 01, 2025 at 04:41:07PM +0000, Coresight ML wrote: > >> > >> [...] > >> > >>> @@ -746,7 +779,7 @@ static void cs_etm_get_metadata(struct perf_cpu > >>> cpu, u32 *offset, > >>> case CS_ETMV3: > >>> magic = __perf_cs_etmv3_magic; > >>> /* Get configuration register */ > >>> - info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr); > >>> + info->priv[*offset + CS_ETM_ETMCR] = cs_etm_guess_etmcr(itr); > >> > >> I still think cs_etm_get_config() is better than cs_etm_guess_etmcr(). > >> > >> For ETMv3, we directly pass CONFIG to the kernel, and after validation > >> in the dirver, then the value will be set to ETMCR. If we already know > >> the config value is consistent between user space and kernel, why > > One other note is that since moving the timestamp field, this is no > longer true either. The value in attr.config isn't directly put into ETMCR. > > >> introduce a redundant "guess" operation here? > >> > >> Thanks, > >> Leo > > > > Because userspace doesn't always come up with the same value as the > > driver. For example right now in ETM3, ETMCR_RETURN_STACK isn't set > > depending on certain conditions that userspace doesn't know about. ETM4 > > has the same for TRCCONFIGR_RS and maybe some others. In the future, > > other versions of the driver could do different things as long as we > > don't break decoding. > > > > I didn't want the function name to imply it was doing something it > > wasn't as that confused me a little bit. It's definitely not "getting" > > the value. Maybe "guess" isn't the best it could be, but it's not far off. > > > Perhaps cs_etm_synth_etmcr()? We cannot read it directly as it has not been set at the time of creating these headers. (unlike the sets of static read only IDR regs that we do read). When in perf mode the only configuration bits set in the ConfigR for either ETM3 or 4 are those generated or implied by parameters on the perf command line. This info has to pass from perf to the driver somehow. Evidently many years ago, when only ETMv3/PTM existed the easy way was perf.config == etm.configr, now that is no longer feasible. As long as perf and the drivers interpret the command line attributes in the same way - all is well. As James says, the actual configr can differ from the synth one - the key is the bits that control the trace format - e.g. cyclecounts, rather than trace filtering e.g. userspace/kernel that affects the drivers configr but not the synthesized value in perf. Decode cares about format, not about filtering. Additionally some things - like return-stack are implementation dependent - optional on PTM, not at all on ETMv3. If the trace unit does not support it then the drivers ignore this. the only effect on the trace output is less compression if retstack cannot be used. Generally decode needs to know about things that affect format and function, rather than filtering. Mike -- Mike Leach Principal Engineer, ARM Ltd. Manchester Design Centre. UK
On 04/12/2025 1:45 pm, Mike Leach wrote: > Hi, > > On Thu, 4 Dec 2025 at 10:55, James Clark <james.clark@linaro.org> wrote: >> >> >> >> On 02/12/2025 11:53 am, James Clark wrote: >>> >>> >>> On 02/12/2025 11:43 am, Leo Yan wrote: >>>> On Mon, Dec 01, 2025 at 04:41:07PM +0000, Coresight ML wrote: >>>> >>>> [...] >>>> >>>>> @@ -746,7 +779,7 @@ static void cs_etm_get_metadata(struct perf_cpu >>>>> cpu, u32 *offset, >>>>> case CS_ETMV3: >>>>> magic = __perf_cs_etmv3_magic; >>>>> /* Get configuration register */ >>>>> - info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr); >>>>> + info->priv[*offset + CS_ETM_ETMCR] = cs_etm_guess_etmcr(itr); >>>> >>>> I still think cs_etm_get_config() is better than cs_etm_guess_etmcr(). >>>> >>>> For ETMv3, we directly pass CONFIG to the kernel, and after validation >>>> in the dirver, then the value will be set to ETMCR. If we already know >>>> the config value is consistent between user space and kernel, why >> >> One other note is that since moving the timestamp field, this is no >> longer true either. The value in attr.config isn't directly put into ETMCR. >> >>>> introduce a redundant "guess" operation here? >>>> >>>> Thanks, >>>> Leo >>> >>> Because userspace doesn't always come up with the same value as the >>> driver. For example right now in ETM3, ETMCR_RETURN_STACK isn't set >>> depending on certain conditions that userspace doesn't know about. ETM4 >>> has the same for TRCCONFIGR_RS and maybe some others. In the future, >>> other versions of the driver could do different things as long as we >>> don't break decoding. >>> >>> I didn't want the function name to imply it was doing something it >>> wasn't as that confused me a little bit. It's definitely not "getting" >>> the value. Maybe "guess" isn't the best it could be, but it's not far off. >>> >> > > Perhaps cs_etm_synth_etmcr()? We cannot read it directly as it has not synth is a good name, I can use that. > been set at the time of creating these headers. (unlike the sets of > static read only IDR regs that we do read). > > When in perf mode the only configuration bits set in the ConfigR for > either ETM3 or 4 are those generated or implied by parameters on the > perf command line. > This info has to pass from perf to the driver somehow. Evidently many > years ago, when only ETMv3/PTM existed the easy way was perf.config == > etm.configr, now that is no longer feasible. > As long as perf and the drivers interpret the command line attributes > in the same way - all is well. > > As James says, the actual configr can differ from the synth one - the > key is the bits that control the trace format - e.g. cyclecounts, > rather than trace filtering e.g. userspace/kernel that affects the > drivers configr but not the synthesized value in perf. > Decode cares about format, not about filtering. Additionally some > things - like return-stack are implementation dependent - optional on > PTM, not at all on ETMv3. If the trace unit does not support it then > the drivers ignore this. the only effect on the trace output is less > compression if retstack cannot be used. > > Generally decode needs to know about things that affect format and > function, rather than filtering. > > Mike
© 2016 - 2026 Red Hat, Inc.