[PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne

Ilkka Koskinen posted 1 patch 1 month ago
There is a newer version of this series
.../util/arm-spe-decoder/arm-spe-decoder.h    |  9 +++
tools/perf/util/arm-spe.c                     | 61 +++++++++++++++++++
2 files changed, 70 insertions(+)
[PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
Posted by Ilkka Koskinen 1 month ago
Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.

Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
---
 .../util/arm-spe-decoder/arm-spe-decoder.h    |  9 +++
 tools/perf/util/arm-spe.c                     | 61 +++++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 1443c28545a9..e4115b1e92b2 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
 	ARM_SPE_NV_DRAM		 = 0xe,
 };
 
+enum arm_spe_ampereone_data_source {
+	ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE	= 0x0,
+	ARM_SPE_AMPEREONE_SLC				= 0x3,
+	ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE		= 0x5,
+	ARM_SPE_AMPEREONE_DDR				= 0x7,
+	ARM_SPE_AMPEREONE_L1D				= 0x8,
+	ARM_SPE_AMPEREONE_L2D				= 0x9,
+};
+
 struct arm_spe_record {
 	enum arm_spe_sample_type type;
 	int err;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 138ffc71b32d..04bd21ad7ea8 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
 		data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
 }
 
+static const struct midr_range ampereone_source_spe[] = {
+	MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+	{},
+};
+
+static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
+						 union perf_mem_data_src *data_src,
+						 u64 midr)
+{
+	if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
+		arm_spe__synth_data_source_generic(record, data_src);
+		return;
+	}
+
+	if (record->op & ARM_SPE_OP_ST) {
+		data_src->mem_lvl = PERF_MEM_LVL_NA;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NA;
+		return;
+	}
+
+	switch (record->source) {
+	case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
+		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+		break;
+	case ARM_SPE_AMPEREONE_SLC:
+		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+		data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
+		break;
+	case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
+		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+		break;
+	case ARM_SPE_AMPEREONE_DDR:
+		data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		break;
+	case ARM_SPE_AMPEREONE_L1D:
+		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		break;
+	case ARM_SPE_AMPEREONE_L2D:
+		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+		break;
+	default:
+		break;
+	}
+}
+
 static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
 {
 	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
 	bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
+	bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);
 
 	if (record->op & ARM_SPE_OP_LD)
 		data_src.mem_op = PERF_MEM_OP_LOAD;
@@ -529,6 +588,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m
 
 	if (is_neoverse)
 		arm_spe__synth_data_source_neoverse(record, &data_src);
+	else if (is_ampereone)
+		arm_spe__synth_data_source_ampereone(record, &data_src, midr);
 	else
 		arm_spe__synth_data_source_generic(record, &data_src);
 
-- 
2.47.0
Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
Posted by Leo Yan 1 month ago
On Thu, Oct 24, 2024 at 11:30:35PM +0000, Ilkka Koskinen wrote:
> 
> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
> 
> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
> ---
>  .../util/arm-spe-decoder/arm-spe-decoder.h    |  9 +++
>  tools/perf/util/arm-spe.c                     | 61 +++++++++++++++++++
>  2 files changed, 70 insertions(+)
> 
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index 1443c28545a9..e4115b1e92b2 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
>         ARM_SPE_NV_DRAM          = 0xe,
>  };
> 
> +enum arm_spe_ampereone_data_source {
> +       ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE    = 0x0,
> +       ARM_SPE_AMPEREONE_SLC                           = 0x3,
> +       ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE             = 0x5,
> +       ARM_SPE_AMPEREONE_DDR                           = 0x7,
> +       ARM_SPE_AMPEREONE_L1D                           = 0x8,
> +       ARM_SPE_AMPEREONE_L2D                           = 0x9,
> +};
> +
>  struct arm_spe_record {
>         enum arm_spe_sample_type type;
>         int err;
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index 138ffc71b32d..04bd21ad7ea8 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
>                 data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
>  }
> 
> +static const struct midr_range ampereone_source_spe[] = {
> +       MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
> +       {},
> +};
> +
> +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
> +                                                union perf_mem_data_src *data_src,
> +                                                u64 midr)
> +{
> +       if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
> +               arm_spe__synth_data_source_generic(record, data_src);
> +               return;
> +       }

With James' suggestion, I don't think here need to check the CPU
variant again.  All generic data source generating should run in the 
arm_spe__synth_data_source() function.

> +
> +       if (record->op & ARM_SPE_OP_ST) {
> +               data_src->mem_lvl = PERF_MEM_LVL_NA;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
> +               data_src->mem_snoop = PERF_MEM_SNOOP_NA;
> +               return;
> +       }
> +
> +       switch (record->source) {
> +       case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
> +               data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
> +               data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +               break;
> +       case ARM_SPE_AMPEREONE_SLC:
> +               data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
> +               data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
> +               break;
> +       case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
> +               data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
> +               data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> +               data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
> +               break;
> +       case ARM_SPE_AMPEREONE_DDR:
> +               data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
> +               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +               break;
> +       case ARM_SPE_AMPEREONE_L1D:
> +               data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
> +               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +               break;
> +       case ARM_SPE_AMPEREONE_L2D:
> +               data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
> +               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
> +               break;

We have another way to do this.  If convert the SoC specific data source
to common data source values, e.g.

  ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE -> ARM_SPE_NV_PEER_CORE
  ARM_SPE_AMPEREONE_SLC -> ARM_SPE_NV_SYS_CACHE
  ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE -> ARM_SPE_NV_REMOTE
  ARM_SPE_AMPEREONE_DDR -> ARM_SPE_NV_DRAM
  ...

Then we don't need to maintain two functions with almost same setting.

I have no strong opinion for this. A dedicated function for Ampere CPU
might give a bit flexiblity for later tweaking. It is up to you.

Last thing, please work on the the latest perf-tools-next branch:

  https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git
  branch: perf-tools-next

Recently we have Arm SPE data source refactoring, please rebase on it.

Thanks,
Leo

> +       default:
> +               break;
> +       }
> +}
> +
>  static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
>  {
>         union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
>         bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
> +       bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);
> 
>         if (record->op & ARM_SPE_OP_LD)
>                 data_src.mem_op = PERF_MEM_OP_LOAD;
> @@ -529,6 +588,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m
> 
>         if (is_neoverse)
>                 arm_spe__synth_data_source_neoverse(record, &data_src);
> +       else if (is_ampereone)
> +               arm_spe__synth_data_source_ampereone(record, &data_src, midr);
>         else
>                 arm_spe__synth_data_source_generic(record, &data_src);
> 
> --
> 2.47.0
> 
>
Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
Posted by Ilkka Koskinen 3 weeks, 6 days ago
Hi Leo,

On Fri, 25 Oct 2024, Leo Yan wrote:
> On Thu, Oct 24, 2024 at 11:30:35PM +0000, Ilkka Koskinen wrote:
>>
>> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
>>
>> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
>> ---
>>  .../util/arm-spe-decoder/arm-spe-decoder.h    |  9 +++
>>  tools/perf/util/arm-spe.c                     | 61 +++++++++++++++++++
>>  2 files changed, 70 insertions(+)
>>
>> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> index 1443c28545a9..e4115b1e92b2 100644
>> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
>>         ARM_SPE_NV_DRAM          = 0xe,
>>  };
>>
>> +enum arm_spe_ampereone_data_source {
>> +       ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE    = 0x0,
>> +       ARM_SPE_AMPEREONE_SLC                           = 0x3,
>> +       ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE             = 0x5,
>> +       ARM_SPE_AMPEREONE_DDR                           = 0x7,
>> +       ARM_SPE_AMPEREONE_L1D                           = 0x8,
>> +       ARM_SPE_AMPEREONE_L2D                           = 0x9,
>> +};
>> +
>>  struct arm_spe_record {
>>         enum arm_spe_sample_type type;
>>         int err;
>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>> index 138ffc71b32d..04bd21ad7ea8 100644
>> --- a/tools/perf/util/arm-spe.c
>> +++ b/tools/perf/util/arm-spe.c
>> @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
>>                 data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
>>  }
>>
>> +static const struct midr_range ampereone_source_spe[] = {
>> +       MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
>> +       {},
>> +};
>> +
>> +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
>> +                                                union perf_mem_data_src *data_src,
>> +                                                u64 midr)
>> +{
>> +       if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
>> +               arm_spe__synth_data_source_generic(record, data_src);
>> +               return;
>> +       }
>
> With James' suggestion, I don't think here need to check the CPU
> variant again.  All generic data source generating should run in the
> arm_spe__synth_data_source() function.

Yep, checking the implementor ID was just wrong and unnecessary.
I fix that.

>
>> +
>> +       if (record->op & ARM_SPE_OP_ST) {
>> +               data_src->mem_lvl = PERF_MEM_LVL_NA;
>> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
>> +               data_src->mem_snoop = PERF_MEM_SNOOP_NA;
>> +               return;
>> +       }
>> +
>> +       switch (record->source) {
>> +       case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
>> +               data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
>> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
>> +               data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>> +               break;
>> +       case ARM_SPE_AMPEREONE_SLC:
>> +               data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
>> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
>> +               data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
>> +               break;
>> +       case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
>> +               data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
>> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
>> +               data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
>> +               data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
>> +               break;
>> +       case ARM_SPE_AMPEREONE_DDR:
>> +               data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
>> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
>> +               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>> +               break;
>> +       case ARM_SPE_AMPEREONE_L1D:
>> +               data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
>> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
>> +               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>> +               break;
>> +       case ARM_SPE_AMPEREONE_L2D:
>> +               data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
>> +               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
>> +               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
>> +               break;
>
> We have another way to do this.  If convert the SoC specific data source
> to common data source values, e.g.
>
>  ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE -> ARM_SPE_NV_PEER_CORE
>  ARM_SPE_AMPEREONE_SLC -> ARM_SPE_NV_SYS_CACHE
>  ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE -> ARM_SPE_NV_REMOTE
>  ARM_SPE_AMPEREONE_DDR -> ARM_SPE_NV_DRAM
>  ...
>
> Then we don't need to maintain two functions with almost same setting.
>
> I have no strong opinion for this. A dedicated function for Ampere CPU
> might give a bit flexiblity for later tweaking. It is up to you.

Let me think about it and see how it would look like.

>
> Last thing, please work on the the latest perf-tools-next branch:
>
>  https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git
>  branch: perf-tools-next
>
> Recently we have Arm SPE data source refactoring, please rebase on it.

Uh, for some reason I rebased it on top of Will's arm64 tree. Sorry about 
that.

Cheers, Ilkka

>
> Thanks,
> Leo
>
>> +       default:
>> +               break;
>> +       }
>> +}
>> +
>>  static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
>>  {
>>         union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
>>         bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
>> +       bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);
>>
>>         if (record->op & ARM_SPE_OP_LD)
>>                 data_src.mem_op = PERF_MEM_OP_LOAD;
>> @@ -529,6 +588,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m
>>
>>         if (is_neoverse)
>>                 arm_spe__synth_data_source_neoverse(record, &data_src);
>> +       else if (is_ampereone)
>> +               arm_spe__synth_data_source_ampereone(record, &data_src, midr);
>>         else
>>                 arm_spe__synth_data_source_generic(record, &data_src);
>>
>> --
>> 2.47.0
>>
>>
>
Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
Posted by James Clark 1 month ago

On 25/10/2024 12:30 am, Ilkka Koskinen wrote:
> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
> 
> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
> ---
>   .../util/arm-spe-decoder/arm-spe-decoder.h    |  9 +++
>   tools/perf/util/arm-spe.c                     | 61 +++++++++++++++++++
>   2 files changed, 70 insertions(+)
> 
> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> index 1443c28545a9..e4115b1e92b2 100644
> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
> @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
>   	ARM_SPE_NV_DRAM		 = 0xe,
>   };
>   
> +enum arm_spe_ampereone_data_source {
> +	ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE	= 0x0,
> +	ARM_SPE_AMPEREONE_SLC				= 0x3,
> +	ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE		= 0x5,
> +	ARM_SPE_AMPEREONE_DDR				= 0x7,
> +	ARM_SPE_AMPEREONE_L1D				= 0x8,
> +	ARM_SPE_AMPEREONE_L2D				= 0x9,
> +};
> +
>   struct arm_spe_record {
>   	enum arm_spe_sample_type type;
>   	int err;
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index 138ffc71b32d..04bd21ad7ea8 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
>   		data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
>   }
>   
> +static const struct midr_range ampereone_source_spe[] = {
> +	MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
> +	{},
> +};
> +
> +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
> +						 union perf_mem_data_src *data_src,
> +						 u64 midr)
> +{
> +	if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
> +		arm_spe__synth_data_source_generic(record, data_src);
> +		return;
> +	}
[...]
>   static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
>   {
>   	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
>   	bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
> +	bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);
>   

Hi Ilkka,

I think this read_cpuid_implementor() is for the device that's running 
report, rather than record. You need to use the midr that's saved into 
the file.

But it looks like you've done that for is_midr_in_range_list(midr, 
ampereone_source_spe) above. Is it possible to just do that and not 
read_cpuid_implementor() and then it's done the same way as neoverse and 
also works off target?

Thanks

James
Re: [PATCH] perf arm-spe: Add support for SPE Data Source packet on AmpereOne
Posted by Ilkka Koskinen 3 weeks, 6 days ago

Hi James,

On Fri, 25 Oct 2024, James Clark wrote:
> On 25/10/2024 12:30 am, Ilkka Koskinen wrote:
>> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
>> 
>> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
>> ---
>>   .../util/arm-spe-decoder/arm-spe-decoder.h    |  9 +++
>>   tools/perf/util/arm-spe.c                     | 61 +++++++++++++++++++
>>   2 files changed, 70 insertions(+)
>> 
>> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h 
>> b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> index 1443c28545a9..e4115b1e92b2 100644
>> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
>> @@ -67,6 +67,15 @@ enum arm_spe_neoverse_data_source {
>>   	ARM_SPE_NV_DRAM		 = 0xe,
>>   };
>>   +enum arm_spe_ampereone_data_source {
>> +	ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE	= 0x0,
>> +	ARM_SPE_AMPEREONE_SLC				= 0x3,
>> +	ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE		= 0x5,
>> +	ARM_SPE_AMPEREONE_DDR				= 0x7,
>> +	ARM_SPE_AMPEREONE_L1D				= 0x8,
>> +	ARM_SPE_AMPEREONE_L2D				= 0x9,
>> +};
>> +
>>   struct arm_spe_record {
>>   	enum arm_spe_sample_type type;
>>   	int err;
>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>> index 138ffc71b32d..04bd21ad7ea8 100644
>> --- a/tools/perf/util/arm-spe.c
>> +++ b/tools/perf/util/arm-spe.c
>> @@ -515,10 +515,69 @@ static void arm_spe__synth_data_source_generic(const 
>> struct arm_spe_record *reco
>>   		data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
>>   }
>>   +static const struct midr_range ampereone_source_spe[] = {
>> +	MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
>> +	{},
>> +};
>> +
>> +static void arm_spe__synth_data_source_ampereone(const struct 
>> arm_spe_record *record,
>> +						 union perf_mem_data_src 
>> *data_src,
>> +						 u64 midr)
>> +{
>> +	if (!is_midr_in_range_list(midr, ampereone_source_spe)) {
>> +		arm_spe__synth_data_source_generic(record, data_src);
>> +		return;
>> +	}
> [...]
>>   static u64 arm_spe__synth_data_source(const struct arm_spe_record 
>> *record, u64 midr)
>>   {
>>   	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
>>   	bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
>> +	bool is_ampereone = (read_cpuid_implementor() == ARM_CPU_IMP_AMPERE);
>> 
>
> Hi Ilkka,
>
> I think this read_cpuid_implementor() is for the device that's running 
> report, rather than record. You need to use the midr that's saved into the 
> file.

Uh, that didn't come to my mind at all.

> But it looks like you've done that for is_midr_in_range_list(midr, 
> ampereone_source_spe) above. Is it possible to just do that and not 
> read_cpuid_implementor() and then it's done the same way as neoverse and also 
> works off target?

You're right, it's wrong and there's no even need for separate implementor 
id check. I fix it.

Cheers, Ilkka

>
> Thanks
>
> James
>
>