[PATCH 4/7] perf cs-etm: Don't use hard coded config bits when setting up ETMCR

James Clark posted 7 patches 2 months, 1 week ago
There is a newer version of this series
[PATCH 4/7] perf cs-etm: Don't use hard coded config bits when setting up ETMCR
Posted by James Clark 2 months, 1 week ago
Perf only looks at attr.config when determining what was programmed into
ETMCR. These bits could theoretically be in any of the config fields.
Add a generic helper to find the value of any named format field in any
config field and then use it to get the attributes relevant to ETMCR.

The kernel will also stop publishing the ETMCR register bits in a header
[1] so preempt that by defining them here.

[1]: https://lore.kernel.org/linux-arm-kernel/20251128-james-cs-syncfreq-v8-10-4d319764cc58@linaro.org/
Signed-off-by: James Clark <james.clark@linaro.org>
---
 tools/perf/arch/arm/util/cs-etm.c | 35 ++++++++++++++++++++++++++++++++++-
 tools/perf/util/evsel.h           |  2 ++
 tools/perf/util/pmu.c             | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 22c6272e8c36..414cafb21c98 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -68,6 +68,12 @@ static const char * const metadata_ete_ro[] = {
 
 enum cs_etm_version { CS_NOT_PRESENT, CS_ETMV3, CS_ETMV4, CS_ETE };
 
+
+/* ETMv3 ETMCR register bits */
+#define ETMCR_CYC_ACC		BIT(12)
+#define ETMCR_TIMESTAMP_EN	BIT(28)
+#define ETMCR_RETURN_STACK	BIT(29)
+
 static bool cs_etm_is_ete(struct perf_pmu *cs_etm_pmu, struct perf_cpu cpu);
 static int cs_etm_get_ro(struct perf_pmu *pmu, struct perf_cpu cpu, const char *path, __u64 *val);
 static bool cs_etm_pmu_path_exists(struct perf_pmu *pmu, struct perf_cpu cpu, const char *path);
@@ -487,6 +493,33 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
 	return err;
 }
 
+static u64 cs_etm_guess_etmcr(struct auxtrace_record *itr)
+{
+	struct cs_etm_recording *ptr =
+		container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+	struct evsel *evsel = cs_etm_get_evsel(ptr->evlist, cs_etm_pmu);
+	u64 etmcr = 0;
+	u64 val;
+
+	if (!evsel)
+		return 0;
+
+	/*
+	 * Roughly guess what the kernel programmed into ETMCR based on
+	 * what options the event was opened with. This doesn't have to be
+	 * complete or 100% accurate, not all bits used by OpenCSD anyway.
+	 */
+	if (!evsel__get_config_val(cs_etm_pmu, evsel, "cycacc", &val) && val)
+		etmcr |= ETMCR_CYC_ACC;
+	if (!evsel__get_config_val(cs_etm_pmu, evsel, "timestamp", &val) && val)
+		etmcr |= ETMCR_TIMESTAMP_EN;
+	if (!evsel__get_config_val(cs_etm_pmu, evsel, "retstack", &val) && val)
+		etmcr |= ETMCR_RETURN_STACK;
+
+	return etmcr;
+}
+
 static u64 cs_etm_get_config(struct auxtrace_record *itr)
 {
 	struct cs_etm_recording *ptr =
@@ -746,7 +779,7 @@ static void cs_etm_get_metadata(struct perf_cpu cpu, u32 *offset,
 	case CS_ETMV3:
 		magic = __perf_cs_etmv3_magic;
 		/* Get configuration register */
-		info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
+		info->priv[*offset + CS_ETM_ETMCR] = cs_etm_guess_etmcr(itr);
 		/* traceID set to legacy value in case new perf running on old system */
 		info->priv[*offset + CS_ETM_ETMTRACEIDR] = cs_etm_get_legacy_trace_id(cpu);
 		/* Get read-only information from sysFS */
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 3ae4ac8f9a37..1c567cc70a82 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -574,6 +574,8 @@ void evsel__uniquify_counter(struct evsel *counter);
 	((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1)))
 
 u64 evsel__bitfield_swap_branch_flags(u64 value);
+int evsel__get_config_val(struct perf_pmu *pmu, struct evsel *evsel,
+			  const char *config_name, u64 *val);
 void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel,
 				const char *config_name, u64 val);
 
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index c8968cddc0a9..5501b0230097 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1384,6 +1384,39 @@ pmu_find_format(const struct list_head *formats, const char *name)
 	return NULL;
 }
 
+int evsel__get_config_val(struct perf_pmu *pmu, struct evsel *evsel,
+			  const char *config_name, u64 *val)
+{
+	struct perf_pmu_format *format = pmu_find_format(&pmu->format, config_name);
+	u64 bits = perf_pmu__format_bits(pmu, config_name);
+
+	if (!format || !bits) {
+		pr_err("Unknown/empty format name: %s\n", config_name);
+		return -EINVAL;
+	}
+
+	switch (format->value) {
+	case PERF_PMU_FORMAT_VALUE_CONFIG:
+		*val = FIELD_GET(bits, evsel->core.attr.config);
+		return 0;
+	case PERF_PMU_FORMAT_VALUE_CONFIG1:
+		*val = FIELD_GET(bits, evsel->core.attr.config1);
+		return 0;
+	case PERF_PMU_FORMAT_VALUE_CONFIG2:
+		*val = FIELD_GET(bits, evsel->core.attr.config2);
+		return 0;
+	case PERF_PMU_FORMAT_VALUE_CONFIG3:
+		*val = FIELD_GET(bits, evsel->core.attr.config3);
+		return 0;
+	case PERF_PMU_FORMAT_VALUE_CONFIG4:
+		*val = FIELD_GET(bits, evsel->core.attr.config4);
+		return 0;
+	default:
+		pr_err("Unknown format value: %d\n", format->value);
+		return -EINVAL;
+	}
+}
+
 /*
  * Set @config_name to @val as long as the user hasn't already set or cleared it
  * by passing a config term on the command line.

-- 
2.34.1
Re: [PATCH 4/7] perf cs-etm: Don't use hard coded config bits when setting up ETMCR
Posted by Leo Yan 2 months, 1 week ago
On Mon, Dec 01, 2025 at 04:41:07PM +0000, Coresight ML wrote:

[...]

> @@ -746,7 +779,7 @@ static void cs_etm_get_metadata(struct perf_cpu cpu, u32 *offset,
>  	case CS_ETMV3:
>  		magic = __perf_cs_etmv3_magic;
>  		/* Get configuration register */
> -		info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
> +		info->priv[*offset + CS_ETM_ETMCR] = cs_etm_guess_etmcr(itr);

I still think cs_etm_get_config() is better than cs_etm_guess_etmcr().

For ETMv3, we directly pass CONFIG to the kernel, and after validation
in the dirver, then the value will be set to ETMCR.  If we already know
the config value is consistent between user space and kernel, why
introduce a redundant "guess" operation here?

Thanks,
Leo
Re: [PATCH 4/7] perf cs-etm: Don't use hard coded config bits when setting up ETMCR
Posted by James Clark 2 months, 1 week ago

On 02/12/2025 11:43 am, Leo Yan wrote:
> On Mon, Dec 01, 2025 at 04:41:07PM +0000, Coresight ML wrote:
> 
> [...]
> 
>> @@ -746,7 +779,7 @@ static void cs_etm_get_metadata(struct perf_cpu cpu, u32 *offset,
>>   	case CS_ETMV3:
>>   		magic = __perf_cs_etmv3_magic;
>>   		/* Get configuration register */
>> -		info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
>> +		info->priv[*offset + CS_ETM_ETMCR] = cs_etm_guess_etmcr(itr);
> 
> I still think cs_etm_get_config() is better than cs_etm_guess_etmcr().
> 
> For ETMv3, we directly pass CONFIG to the kernel, and after validation
> in the dirver, then the value will be set to ETMCR.  If we already know
> the config value is consistent between user space and kernel, why
> introduce a redundant "guess" operation here?
> 
> Thanks,
> Leo

Because userspace doesn't always come up with the same value as the 
driver. For example right now in ETM3, ETMCR_RETURN_STACK isn't set 
depending on certain conditions that userspace doesn't know about.  ETM4 
has the same for TRCCONFIGR_RS and maybe some others. In the future, 
other versions of the driver could do different things as long as we 
don't break decoding.

I didn't want the function name to imply it was doing something it 
wasn't as that confused me a little bit. It's definitely not "getting" 
the value. Maybe "guess" isn't the best it could be, but it's not far off.
Re: [PATCH 4/7] perf cs-etm: Don't use hard coded config bits when setting up ETMCR
Posted by James Clark 2 months, 1 week ago

On 02/12/2025 11:53 am, James Clark wrote:
> 
> 
> On 02/12/2025 11:43 am, Leo Yan wrote:
>> On Mon, Dec 01, 2025 at 04:41:07PM +0000, Coresight ML wrote:
>>
>> [...]
>>
>>> @@ -746,7 +779,7 @@ static void cs_etm_get_metadata(struct perf_cpu 
>>> cpu, u32 *offset,
>>>       case CS_ETMV3:
>>>           magic = __perf_cs_etmv3_magic;
>>>           /* Get configuration register */
>>> -        info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
>>> +        info->priv[*offset + CS_ETM_ETMCR] = cs_etm_guess_etmcr(itr);
>>
>> I still think cs_etm_get_config() is better than cs_etm_guess_etmcr().
>>
>> For ETMv3, we directly pass CONFIG to the kernel, and after validation
>> in the dirver, then the value will be set to ETMCR.  If we already know
>> the config value is consistent between user space and kernel, why

One other note is that since moving the timestamp field, this is no 
longer true either. The value in attr.config isn't directly put into ETMCR.

>> introduce a redundant "guess" operation here?
>>
>> Thanks,
>> Leo
> 
> Because userspace doesn't always come up with the same value as the 
> driver. For example right now in ETM3, ETMCR_RETURN_STACK isn't set 
> depending on certain conditions that userspace doesn't know about.  ETM4 
> has the same for TRCCONFIGR_RS and maybe some others. In the future, 
> other versions of the driver could do different things as long as we 
> don't break decoding.
> 
> I didn't want the function name to imply it was doing something it 
> wasn't as that confused me a little bit. It's definitely not "getting" 
> the value. Maybe "guess" isn't the best it could be, but it's not far off.
> 

Re: [PATCH 4/7] perf cs-etm: Don't use hard coded config bits when setting up ETMCR
Posted by Mike Leach 2 months ago
Hi,

On Thu, 4 Dec 2025 at 10:55, James Clark <james.clark@linaro.org> wrote:
>
>
>
> On 02/12/2025 11:53 am, James Clark wrote:
> >
> >
> > On 02/12/2025 11:43 am, Leo Yan wrote:
> >> On Mon, Dec 01, 2025 at 04:41:07PM +0000, Coresight ML wrote:
> >>
> >> [...]
> >>
> >>> @@ -746,7 +779,7 @@ static void cs_etm_get_metadata(struct perf_cpu
> >>> cpu, u32 *offset,
> >>>       case CS_ETMV3:
> >>>           magic = __perf_cs_etmv3_magic;
> >>>           /* Get configuration register */
> >>> -        info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
> >>> +        info->priv[*offset + CS_ETM_ETMCR] = cs_etm_guess_etmcr(itr);
> >>
> >> I still think cs_etm_get_config() is better than cs_etm_guess_etmcr().
> >>
> >> For ETMv3, we directly pass CONFIG to the kernel, and after validation
> >> in the dirver, then the value will be set to ETMCR.  If we already know
> >> the config value is consistent between user space and kernel, why
>
> One other note is that since moving the timestamp field, this is no
> longer true either. The value in attr.config isn't directly put into ETMCR.
>
> >> introduce a redundant "guess" operation here?
> >>
> >> Thanks,
> >> Leo
> >
> > Because userspace doesn't always come up with the same value as the
> > driver. For example right now in ETM3, ETMCR_RETURN_STACK isn't set
> > depending on certain conditions that userspace doesn't know about.  ETM4
> > has the same for TRCCONFIGR_RS and maybe some others. In the future,
> > other versions of the driver could do different things as long as we
> > don't break decoding.
> >
> > I didn't want the function name to imply it was doing something it
> > wasn't as that confused me a little bit. It's definitely not "getting"
> > the value. Maybe "guess" isn't the best it could be, but it's not far off.
> >
>

Perhaps cs_etm_synth_etmcr()? We cannot read it directly as it has not
been set at the time of creating these headers. (unlike the sets of
static read only IDR regs that we do read).

When in perf mode the only configuration bits set in the ConfigR for
either ETM3 or 4 are those generated or implied by parameters on the
perf command line.
This info has to pass from perf to the driver somehow. Evidently many
years ago, when only ETMv3/PTM existed the easy way was perf.config ==
etm.configr, now that is no longer feasible.
As long as perf and the drivers interpret the command line attributes
in the same way - all is well.

As James says, the actual configr can differ from the synth one - the
key is the bits that control the trace format - e.g. cyclecounts,
rather than trace filtering e.g. userspace/kernel that affects the
drivers configr but not the synthesized value in perf.
Decode cares about format, not about filtering. Additionally some
things - like return-stack are implementation dependent - optional on
PTM, not at all on ETMv3. If the trace unit does not support it then
the drivers ignore this. the only effect on the trace output is less
compression if retstack cannot be used.

Generally decode needs to know about things that affect format and
function, rather than filtering.

Mike
-- 
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
Re: [PATCH 4/7] perf cs-etm: Don't use hard coded config bits when setting up ETMCR
Posted by James Clark 2 months ago

On 04/12/2025 1:45 pm, Mike Leach wrote:
> Hi,
> 
> On Thu, 4 Dec 2025 at 10:55, James Clark <james.clark@linaro.org> wrote:
>>
>>
>>
>> On 02/12/2025 11:53 am, James Clark wrote:
>>>
>>>
>>> On 02/12/2025 11:43 am, Leo Yan wrote:
>>>> On Mon, Dec 01, 2025 at 04:41:07PM +0000, Coresight ML wrote:
>>>>
>>>> [...]
>>>>
>>>>> @@ -746,7 +779,7 @@ static void cs_etm_get_metadata(struct perf_cpu
>>>>> cpu, u32 *offset,
>>>>>        case CS_ETMV3:
>>>>>            magic = __perf_cs_etmv3_magic;
>>>>>            /* Get configuration register */
>>>>> -        info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
>>>>> +        info->priv[*offset + CS_ETM_ETMCR] = cs_etm_guess_etmcr(itr);
>>>>
>>>> I still think cs_etm_get_config() is better than cs_etm_guess_etmcr().
>>>>
>>>> For ETMv3, we directly pass CONFIG to the kernel, and after validation
>>>> in the dirver, then the value will be set to ETMCR.  If we already know
>>>> the config value is consistent between user space and kernel, why
>>
>> One other note is that since moving the timestamp field, this is no
>> longer true either. The value in attr.config isn't directly put into ETMCR.
>>
>>>> introduce a redundant "guess" operation here?
>>>>
>>>> Thanks,
>>>> Leo
>>>
>>> Because userspace doesn't always come up with the same value as the
>>> driver. For example right now in ETM3, ETMCR_RETURN_STACK isn't set
>>> depending on certain conditions that userspace doesn't know about.  ETM4
>>> has the same for TRCCONFIGR_RS and maybe some others. In the future,
>>> other versions of the driver could do different things as long as we
>>> don't break decoding.
>>>
>>> I didn't want the function name to imply it was doing something it
>>> wasn't as that confused me a little bit. It's definitely not "getting"
>>> the value. Maybe "guess" isn't the best it could be, but it's not far off.
>>>
>>
> 
> Perhaps cs_etm_synth_etmcr()? We cannot read it directly as it has not

synth is a good name, I can use that.

> been set at the time of creating these headers. (unlike the sets of
> static read only IDR regs that we do read).
> 
> When in perf mode the only configuration bits set in the ConfigR for
> either ETM3 or 4 are those generated or implied by parameters on the
> perf command line.
> This info has to pass from perf to the driver somehow. Evidently many
> years ago, when only ETMv3/PTM existed the easy way was perf.config ==
> etm.configr, now that is no longer feasible.
> As long as perf and the drivers interpret the command line attributes
> in the same way - all is well.
> 
> As James says, the actual configr can differ from the synth one - the
> key is the bits that control the trace format - e.g. cyclecounts,
> rather than trace filtering e.g. userspace/kernel that affects the
> drivers configr but not the synthesized value in perf.
> Decode cares about format, not about filtering. Additionally some
> things - like return-stack are implementation dependent - optional on
> PTM, not at all on ETMv3. If the trace unit does not support it then
> the drivers ignore this. the only effect on the trace output is less
> compression if retstack cannot be used.
> 
> Generally decode needs to know about things that affect format and
> function, rather than filtering.
> 
> Mike