[PATCH v2 2/2] perf arm-spe: Add support for SPE Data Source packet on AmpereOne

Ilkka Koskinen posted 2 patches 3 weeks, 3 days ago
There is a newer version of this series
[PATCH v2 2/2] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
Posted by Ilkka Koskinen 3 weeks, 3 days ago
Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.

Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
---
 .../util/arm-spe-decoder/arm-spe-decoder.h    |  9 +++
 tools/perf/util/arm-spe.c                     | 65 +++++++++++++++++++
 2 files changed, 74 insertions(+)

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 358c611eeddb..4bcd627e859f 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -67,6 +67,15 @@ enum arm_spe_common_data_source {
 	ARM_SPE_COMMON_DS_DRAM		= 0xe,
 };
 
+enum arm_spe_ampereone_data_source {
+	ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE    = 0x0,
+	ARM_SPE_AMPEREONE_SLC                           = 0x3,
+	ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE             = 0x5,
+	ARM_SPE_AMPEREONE_DDR                           = 0x7,
+	ARM_SPE_AMPEREONE_L1D                           = 0x8,
+	ARM_SPE_AMPEREONE_L2D                           = 0x9,
+};
+
 struct arm_spe_record {
 	enum arm_spe_sample_type type;
 	int err;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 9586416be30a..700d4bc8d8ec 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -103,6 +103,30 @@ struct arm_spe_queue {
 	u32				flags;
 };
 
+struct arm_spe_source_mapping {
+	u16 source;
+	enum arm_spe_common_data_source common_src;
+};
+
+#define MAP_SOURCE(src, common)				\
+	{						\
+		.source = ARM_SPE_##src,		\
+		.common_src = ARM_SPE_COMMON_##common,  \
+	}
+
+static int arm_spe__map_to_common_source(u16 source,
+					 struct arm_spe_source_mapping *tbl,
+					 int nr_sources)
+{
+	while (nr_sources--) {
+		if (tbl->source == source)
+			return tbl->common_src;
+		tbl++;
+	}
+
+	return -1;
+}
+
 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
 			 unsigned char *buf, size_t len)
 {
@@ -443,6 +467,11 @@ static const struct midr_range common_ds_encoding_cpus[] = {
 	{},
 };
 
+static const struct midr_range ampereone_ds_encoding_cpus[] = {
+	MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+	{},
+};
+
 static void arm_spe__sample_flags(struct arm_spe_queue *speq)
 {
 	const struct arm_spe_record *record = &speq->decoder->record;
@@ -532,6 +561,38 @@ static void arm_spe__synth_data_source_common(const struct arm_spe_record *recor
 	}
 }
 
+static struct arm_spe_source_mapping ampereone_sources[] = {
+	MAP_SOURCE(AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE, DS_PEER_CORE),
+	MAP_SOURCE(AMPEREONE_SLC, DS_SYS_CACHE),
+	MAP_SOURCE(AMPEREONE_REMOTE_CHIP_CACHE, DS_REMOTE),
+	MAP_SOURCE(AMPEREONE_DDR, DS_DRAM),
+	MAP_SOURCE(AMPEREONE_L1D, DS_L1D),
+	MAP_SOURCE(AMPEREONE_L2D, DS_L2),
+};
+
+/*
+ * Source is IMPDEF. Here we convert the source code used on AmpereOne cores
+ * to the common (Neoverse, Cortex) to avoid duplicating the decoding code.
+ */
+static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
+						 union perf_mem_data_src *data_src)
+{
+	int common_src;
+	struct arm_spe_record common_record;
+
+	common_src = arm_spe__map_to_common_source(record->source,
+						   ampereone_sources,
+						   ARRAY_SIZE(ampereone_sources));
+	if (common_src < 0)
+		 /* Assign a bogus value that's not used for common coding */
+		common_record.source = 0xfff;
+	else
+		common_record.source = common_src;
+
+	common_record.op = record->op;
+	arm_spe__synth_data_source_common(&common_record, data_src);
+}
+
 static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
 					union perf_mem_data_src *data_src)
 {
@@ -606,6 +667,8 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
 	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
 	bool is_common = arm_spe__is_ds_encoding_supported(speq,
 						common_ds_encoding_cpus);
+	bool is_ampereone = arm_spe__is_ds_encoding_supported(speq,
+						ampereone_ds_encoding_cpus);
 
 	if (record->op & ARM_SPE_OP_LD)
 		data_src.mem_op = PERF_MEM_OP_LOAD;
@@ -616,6 +679,8 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
 
 	if (is_common)
 		arm_spe__synth_data_source_common(record, &data_src);
+	else if (is_ampereone)
+		arm_spe__synth_data_source_ampereone(record, &data_src);
 	else
 		arm_spe__synth_memory_level(record, &data_src);
 
-- 
2.47.0
Re: [PATCH v2 2/2] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
Posted by James Clark 2 weeks, 6 days ago

On 31/10/2024 9:35 pm, Ilkka Koskinen wrote:
> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
> 
> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
> ---
>   .../util/arm-spe-decoder/arm-spe-decoder.h    |  9 +++
>   tools/perf/util/arm-spe.c                     | 65 +++++++++++++++++++
>   2 files changed, 74 insertions(+)
> 
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index 358c611eeddb..4bcd627e859f 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -67,6 +67,15 @@ enum arm_spe_common_data_source {
>   	ARM_SPE_COMMON_DS_DRAM		= 0xe,
>   };
>   
> +enum arm_spe_ampereone_data_source {
> +	ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE    = 0x0,
> +	ARM_SPE_AMPEREONE_SLC                           = 0x3,
> +	ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE             = 0x5,
> +	ARM_SPE_AMPEREONE_DDR                           = 0x7,
> +	ARM_SPE_AMPEREONE_L1D                           = 0x8,
> +	ARM_SPE_AMPEREONE_L2D                           = 0x9,
> +};
> +
>   struct arm_spe_record {
>   	enum arm_spe_sample_type type;
>   	int err;
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index 9586416be30a..700d4bc8d8ec 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -103,6 +103,30 @@ struct arm_spe_queue {
>   	u32				flags;
>   };
>   
> +struct arm_spe_source_mapping {
> +	u16 source;
> +	enum arm_spe_common_data_source common_src;
> +};
> +
> +#define MAP_SOURCE(src, common)				\
> +	{						\
> +		.source = ARM_SPE_##src,		\
> +		.common_src = ARM_SPE_COMMON_##common,  \
> +	}
> +
> +static int arm_spe__map_to_common_source(u16 source,
> +					 struct arm_spe_source_mapping *tbl,
> +					 int nr_sources)
> +{
> +	while (nr_sources--) {
> +		if (tbl->source == source)
> +			return tbl->common_src;
> +		tbl++;
> +	}
> +

Hi Ilkka,

I think a simple switch statement here would be easier to follow than 
the loop, custom macro and then having the mappings in some other place:

switch(source)
case 0x0: /* AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE */
   return DS_PEER_CORE;

etc...

> +	return -1;

And the default case can return 0xfff directly, which avoids the if else 
later only to convert this -1 back into 0xfff.

> +}
> +
>   static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
>   			 unsigned char *buf, size_t len)
>   {
> @@ -443,6 +467,11 @@ static const struct midr_range common_ds_encoding_cpus[] = {
>   	{},
>   };
>   
> +static const struct midr_range ampereone_ds_encoding_cpus[] = {
> +	MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
> +	{},
> +};
> +
>   static void arm_spe__sample_flags(struct arm_spe_queue *speq)
>   {
>   	const struct arm_spe_record *record = &speq->decoder->record;
> @@ -532,6 +561,38 @@ static void arm_spe__synth_data_source_common(const struct arm_spe_record *recor
>   	}
>   }
>   
> +static struct arm_spe_source_mapping ampereone_sources[] = {
> +	MAP_SOURCE(AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE, DS_PEER_CORE),
> +	MAP_SOURCE(AMPEREONE_SLC, DS_SYS_CACHE),
> +	MAP_SOURCE(AMPEREONE_REMOTE_CHIP_CACHE, DS_REMOTE),
> +	MAP_SOURCE(AMPEREONE_DDR, DS_DRAM),
> +	MAP_SOURCE(AMPEREONE_L1D, DS_L1D),
> +	MAP_SOURCE(AMPEREONE_L2D, DS_L2),
> +};
> +
> +/*
> + * Source is IMPDEF. Here we convert the source code used on AmpereOne cores
> + * to the common (Neoverse, Cortex) to avoid duplicating the decoding code.
> + */
> +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
> +						 union perf_mem_data_src *data_src)
> +{
> +	int common_src;
> +	struct arm_spe_record common_record;
> +
> +	common_src = arm_spe__map_to_common_source(record->source,
> +						   ampereone_sources,
> +						   ARRAY_SIZE(ampereone_sources));
> +	if (common_src < 0)
> +		 /* Assign a bogus value that's not used for common coding */
> +		common_record.source = 0xfff;
> +	else
> +		common_record.source = common_src;
> +
> +	common_record.op = record->op;
> +	arm_spe__synth_data_source_common(&common_record, data_src);
> +}
> +
>   static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
>   					union perf_mem_data_src *data_src)
>   {
> @@ -606,6 +667,8 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
>   	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
>   	bool is_common = arm_spe__is_ds_encoding_supported(speq,
>   						common_ds_encoding_cpus);
> +	bool is_ampereone = arm_spe__is_ds_encoding_supported(speq,
> +						ampereone_ds_encoding_cpus);

I know this probably already works, but we don't really need is_common 
is_ampere etc, it will only grow anyway. All we need is a list of midrs 
and function pairs. That also avoids doing is_ampereone even after we 
already know is_common == true.

static const struct data_src[] = {
...
	DS(MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), common_ds),
	DS(MIDR_ALL_VERSIONS(MIDR_AMPERE1A), ampere_ds),
	{},
...
};

"arm_spe__is_ds_encoding_supported" then becomes a direct call to 
"arm_spe__synth_ds" and we can drop the is_ampereone and is_common vars. 
Then adding new ones doesn't require changing the function anymore.

>   
>   	if (record->op & ARM_SPE_OP_LD)
>   		data_src.mem_op = PERF_MEM_OP_LOAD;
> @@ -616,6 +679,8 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
>   
>   	if (is_common)
>   		arm_spe__synth_data_source_common(record, &data_src);
> +	else if (is_ampereone)
> +		arm_spe__synth_data_source_ampereone(record, &data_src);
>   	else
>   		arm_spe__synth_memory_level(record, &data_src);
>
Re: [PATCH v2 2/2] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
Posted by Ilkka Koskinen 2 weeks, 4 days ago
Hi James,

On Mon, 4 Nov 2024, James Clark wrote:
> On 31/10/2024 9:35 pm, Ilkka Koskinen wrote:
>> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
>> 
>> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
>> ---
>>   .../util/arm-spe-decoder/arm-spe-decoder.h    |  9 +++
>>   tools/perf/util/arm-spe.c                     | 65 +++++++++++++++++++
>>   2 files changed, 74 insertions(+)
>> 
>> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h 
>> b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> index 358c611eeddb..4bcd627e859f 100644
>> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> @@ -67,6 +67,15 @@ enum arm_spe_common_data_source {
>>   	ARM_SPE_COMMON_DS_DRAM		= 0xe,
>>   };
>>   +enum arm_spe_ampereone_data_source {
>> +	ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE    = 0x0,
>> +	ARM_SPE_AMPEREONE_SLC                           = 0x3,
>> +	ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE             = 0x5,
>> +	ARM_SPE_AMPEREONE_DDR                           = 0x7,
>> +	ARM_SPE_AMPEREONE_L1D                           = 0x8,
>> +	ARM_SPE_AMPEREONE_L2D                           = 0x9,
>> +};
>> +
>>   struct arm_spe_record {
>>   	enum arm_spe_sample_type type;
>>   	int err;
>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>> index 9586416be30a..700d4bc8d8ec 100644
>> --- a/tools/perf/util/arm-spe.c
>> +++ b/tools/perf/util/arm-spe.c
>> @@ -103,6 +103,30 @@ struct arm_spe_queue {
>>   	u32				flags;
>>   };
>>   +struct arm_spe_source_mapping {
>> +	u16 source;
>> +	enum arm_spe_common_data_source common_src;
>> +};
>> +
>> +#define MAP_SOURCE(src, common)				\
>> +	{						\
>> +		.source = ARM_SPE_##src,		\
>> +		.common_src = ARM_SPE_COMMON_##common,  \
>> +	}
>> +
>> +static int arm_spe__map_to_common_source(u16 source,
>> +					 struct arm_spe_source_mapping *tbl,
>> +					 int nr_sources)
>> +{
>> +	while (nr_sources--) {
>> +		if (tbl->source == source)
>> +			return tbl->common_src;
>> +		tbl++;
>> +	}
>> +
>
> Hi Ilkka,
>
> I think a simple switch statement here would be easier to follow than the 
> loop, custom macro and then having the mappings in some other place:
>
> switch(source)
> case 0x0: /* AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE */
>  return DS_PEER_CORE;
>
> etc...
>
>> +	return -1;
>
> And the default case can return 0xfff directly, which avoids the if else 
> later only to convert this -1 back into 0xfff.

I can surely do that.


>
>> +}
>> +
>>   static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
>>   			 unsigned char *buf, size_t len)
>>   {
>> @@ -443,6 +467,11 @@ static const struct midr_range 
>> common_ds_encoding_cpus[] = {
>>   	{},
>>   };
>>   +static const struct midr_range ampereone_ds_encoding_cpus[] = {
>> +	MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
>> +	{},
>> +};
>> +
>>   static void arm_spe__sample_flags(struct arm_spe_queue *speq)
>>   {
>>   	const struct arm_spe_record *record = &speq->decoder->record;
>> @@ -532,6 +561,38 @@ static void arm_spe__synth_data_source_common(const 
>> struct arm_spe_record *recor
>>   	}
>>   }
>>   +static struct arm_spe_source_mapping ampereone_sources[] = {
>> +	MAP_SOURCE(AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE, DS_PEER_CORE),
>> +	MAP_SOURCE(AMPEREONE_SLC, DS_SYS_CACHE),
>> +	MAP_SOURCE(AMPEREONE_REMOTE_CHIP_CACHE, DS_REMOTE),
>> +	MAP_SOURCE(AMPEREONE_DDR, DS_DRAM),
>> +	MAP_SOURCE(AMPEREONE_L1D, DS_L1D),
>> +	MAP_SOURCE(AMPEREONE_L2D, DS_L2),
>> +};
>> +
>> +/*
>> + * Source is IMPDEF. Here we convert the source code used on AmpereOne 
>> cores
>> + * to the common (Neoverse, Cortex) to avoid duplicating the decoding 
>> code.
>> + */
>> +static void arm_spe__synth_data_source_ampereone(const struct 
>> arm_spe_record *record,
>> +						 union perf_mem_data_src 
>> *data_src)
>> +{
>> +	int common_src;
>> +	struct arm_spe_record common_record;
>> +
>> +	common_src = arm_spe__map_to_common_source(record->source,
>> +						   ampereone_sources,
>> + 
>> ARRAY_SIZE(ampereone_sources));
>> +	if (common_src < 0)
>> +		 /* Assign a bogus value that's not used for common coding */
>> +		common_record.source = 0xfff;
>> +	else
>> +		common_record.source = common_src;
>> +
>> +	common_record.op = record->op;
>> +	arm_spe__synth_data_source_common(&common_record, data_src);
>> +}
>> +
>>   static void arm_spe__synth_memory_level(const struct arm_spe_record 
>> *record,
>>   					union perf_mem_data_src *data_src)
>>   {
>> @@ -606,6 +667,8 @@ static u64 arm_spe__synth_data_source(struct 
>> arm_spe_queue *speq,
>>   	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
>>   	bool is_common = arm_spe__is_ds_encoding_supported(speq,
>>   						common_ds_encoding_cpus);
>> +	bool is_ampereone = arm_spe__is_ds_encoding_supported(speq,
>> +						ampereone_ds_encoding_cpus);
>
> I know this probably already works, but we don't really need is_common 
> is_ampere etc, it will only grow anyway. All we need is a list of midrs and 
> function pairs. That also avoids doing is_ampereone even after we already 
> know is_common == true.
>
> static const struct data_src[] = {
> ...
> 	DS(MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), common_ds),
> 	DS(MIDR_ALL_VERSIONS(MIDR_AMPERE1A), ampere_ds),
> 	{},
> ...
> };
>
> "arm_spe__is_ds_encoding_supported" then becomes a direct call to 
> "arm_spe__synth_ds" and we can drop the is_ampereone and is_common vars. Then 
> adding new ones doesn't require changing the function anymore.

To be honest, I'm not a big fan of the is_xyz() thing but I just didn't 
want to change it. Anyway, I'll change it for the next version.

Cheers, Ilkka


>
>>     	if (record->op & ARM_SPE_OP_LD)
>>   		data_src.mem_op = PERF_MEM_OP_LOAD;
>> @@ -616,6 +679,8 @@ static u64 arm_spe__synth_data_source(struct 
>> arm_spe_queue *speq,
>>     	if (is_common)
>>   		arm_spe__synth_data_source_common(record, &data_src);
>> +	else if (is_ampereone)
>> +		arm_spe__synth_data_source_ampereone(record, &data_src);
>>   	else
>>   		arm_spe__synth_memory_level(record, &data_src);
>> 
>
>