Decode SPE Data Source packets on AmpereOne. The field is IMPDEF.
Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
---
.../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++
tools/perf/util/arm-spe.c | 65 +++++++++++++++++++
2 files changed, 74 insertions(+)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 358c611eeddb..4bcd627e859f 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -67,6 +67,15 @@ enum arm_spe_common_data_source {
ARM_SPE_COMMON_DS_DRAM = 0xe,
};
+enum arm_spe_ampereone_data_source {
+ ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0,
+ ARM_SPE_AMPEREONE_SLC = 0x3,
+ ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5,
+ ARM_SPE_AMPEREONE_DDR = 0x7,
+ ARM_SPE_AMPEREONE_L1D = 0x8,
+ ARM_SPE_AMPEREONE_L2D = 0x9,
+};
+
struct arm_spe_record {
enum arm_spe_sample_type type;
int err;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 9586416be30a..700d4bc8d8ec 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -103,6 +103,30 @@ struct arm_spe_queue {
u32 flags;
};
+struct arm_spe_source_mapping {
+ u16 source;
+ enum arm_spe_common_data_source common_src;
+};
+
+#define MAP_SOURCE(src, common) \
+ { \
+ .source = ARM_SPE_##src, \
+ .common_src = ARM_SPE_COMMON_##common, \
+ }
+
+static int arm_spe__map_to_common_source(u16 source,
+ struct arm_spe_source_mapping *tbl,
+ int nr_sources)
+{
+ while (nr_sources--) {
+ if (tbl->source == source)
+ return tbl->common_src;
+ tbl++;
+ }
+
+ return -1;
+}
+
static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
unsigned char *buf, size_t len)
{
@@ -443,6 +467,11 @@ static const struct midr_range common_ds_encoding_cpus[] = {
{},
};
+static const struct midr_range ampereone_ds_encoding_cpus[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+ {},
+};
+
static void arm_spe__sample_flags(struct arm_spe_queue *speq)
{
const struct arm_spe_record *record = &speq->decoder->record;
@@ -532,6 +561,38 @@ static void arm_spe__synth_data_source_common(const struct arm_spe_record *recor
}
}
+static struct arm_spe_source_mapping ampereone_sources[] = {
+ MAP_SOURCE(AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE, DS_PEER_CORE),
+ MAP_SOURCE(AMPEREONE_SLC, DS_SYS_CACHE),
+ MAP_SOURCE(AMPEREONE_REMOTE_CHIP_CACHE, DS_REMOTE),
+ MAP_SOURCE(AMPEREONE_DDR, DS_DRAM),
+ MAP_SOURCE(AMPEREONE_L1D, DS_L1D),
+ MAP_SOURCE(AMPEREONE_L2D, DS_L2),
+};
+
+/*
+ * Source is IMPDEF. Here we convert the source code used on AmpereOne cores
+ * to the common (Neoverse, Cortex) to avoid duplicating the decoding code.
+ */
+static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
+{
+ int common_src;
+ struct arm_spe_record common_record;
+
+ common_src = arm_spe__map_to_common_source(record->source,
+ ampereone_sources,
+ ARRAY_SIZE(ampereone_sources));
+ if (common_src < 0)
+ /* Assign a bogus value that's not used for common coding */
+ common_record.source = 0xfff;
+ else
+ common_record.source = common_src;
+
+ common_record.op = record->op;
+ arm_spe__synth_data_source_common(&common_record, data_src);
+}
+
static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
union perf_mem_data_src *data_src)
{
@@ -606,6 +667,8 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
bool is_common = arm_spe__is_ds_encoding_supported(speq,
common_ds_encoding_cpus);
+ bool is_ampereone = arm_spe__is_ds_encoding_supported(speq,
+ ampereone_ds_encoding_cpus);
if (record->op & ARM_SPE_OP_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
@@ -616,6 +679,8 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
if (is_common)
arm_spe__synth_data_source_common(record, &data_src);
+ else if (is_ampereone)
+ arm_spe__synth_data_source_ampereone(record, &data_src);
else
arm_spe__synth_memory_level(record, &data_src);
--
2.47.0
On 31/10/2024 9:35 pm, Ilkka Koskinen wrote: > Decode SPE Data Source packets on AmpereOne. The field is IMPDEF. > > Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com> > --- > .../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++ > tools/perf/util/arm-spe.c | 65 +++++++++++++++++++ > 2 files changed, 74 insertions(+) > > diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h > index 358c611eeddb..4bcd627e859f 100644 > --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h > +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h > @@ -67,6 +67,15 @@ enum arm_spe_common_data_source { > ARM_SPE_COMMON_DS_DRAM = 0xe, > }; > > +enum arm_spe_ampereone_data_source { > + ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0, > + ARM_SPE_AMPEREONE_SLC = 0x3, > + ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5, > + ARM_SPE_AMPEREONE_DDR = 0x7, > + ARM_SPE_AMPEREONE_L1D = 0x8, > + ARM_SPE_AMPEREONE_L2D = 0x9, > +}; > + > struct arm_spe_record { > enum arm_spe_sample_type type; > int err; > diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c > index 9586416be30a..700d4bc8d8ec 100644 > --- a/tools/perf/util/arm-spe.c > +++ b/tools/perf/util/arm-spe.c > @@ -103,6 +103,30 @@ struct arm_spe_queue { > u32 flags; > }; > > +struct arm_spe_source_mapping { > + u16 source; > + enum arm_spe_common_data_source common_src; > +}; > + > +#define MAP_SOURCE(src, common) \ > + { \ > + .source = ARM_SPE_##src, \ > + .common_src = ARM_SPE_COMMON_##common, \ > + } > + > +static int arm_spe__map_to_common_source(u16 source, > + struct arm_spe_source_mapping *tbl, > + int nr_sources) > +{ > + while (nr_sources--) { > + if (tbl->source == source) > + return tbl->common_src; > + tbl++; > + } > + Hi Ilkka, I think a simple switch statement here would be easier to follow than the loop, custom macro and then having the mappings in some other place: switch(source) case 0x0: /* AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE */ return DS_PEER_CORE; etc... > + return -1; And the default case can return 0xfff directly, which avoids the if else later only to convert this -1 back into 0xfff. > +} > + > static void arm_spe_dump(struct arm_spe *spe __maybe_unused, > unsigned char *buf, size_t len) > { > @@ -443,6 +467,11 @@ static const struct midr_range common_ds_encoding_cpus[] = { > {}, > }; > > +static const struct midr_range ampereone_ds_encoding_cpus[] = { > + MIDR_ALL_VERSIONS(MIDR_AMPERE1A), > + {}, > +}; > + > static void arm_spe__sample_flags(struct arm_spe_queue *speq) > { > const struct arm_spe_record *record = &speq->decoder->record; > @@ -532,6 +561,38 @@ static void arm_spe__synth_data_source_common(const struct arm_spe_record *recor > } > } > > +static struct arm_spe_source_mapping ampereone_sources[] = { > + MAP_SOURCE(AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE, DS_PEER_CORE), > + MAP_SOURCE(AMPEREONE_SLC, DS_SYS_CACHE), > + MAP_SOURCE(AMPEREONE_REMOTE_CHIP_CACHE, DS_REMOTE), > + MAP_SOURCE(AMPEREONE_DDR, DS_DRAM), > + MAP_SOURCE(AMPEREONE_L1D, DS_L1D), > + MAP_SOURCE(AMPEREONE_L2D, DS_L2), > +}; > + > +/* > + * Source is IMPDEF. Here we convert the source code used on AmpereOne cores > + * to the common (Neoverse, Cortex) to avoid duplicating the decoding code. > + */ > +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record, > + union perf_mem_data_src *data_src) > +{ > + int common_src; > + struct arm_spe_record common_record; > + > + common_src = arm_spe__map_to_common_source(record->source, > + ampereone_sources, > + ARRAY_SIZE(ampereone_sources)); > + if (common_src < 0) > + /* Assign a bogus value that's not used for common coding */ > + common_record.source = 0xfff; > + else > + common_record.source = common_src; > + > + common_record.op = record->op; > + arm_spe__synth_data_source_common(&common_record, data_src); > +} > + > static void arm_spe__synth_memory_level(const struct arm_spe_record *record, > union perf_mem_data_src *data_src) > { > @@ -606,6 +667,8 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq, > union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; > bool is_common = arm_spe__is_ds_encoding_supported(speq, > common_ds_encoding_cpus); > + bool is_ampereone = arm_spe__is_ds_encoding_supported(speq, > + ampereone_ds_encoding_cpus); I know this probably already works, but we don't really need is_common is_ampere etc, it will only grow anyway. All we need is a list of midrs and function pairs. That also avoids doing is_ampereone even after we already know is_common == true. static const struct data_src[] = { ... DS(MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), common_ds), DS(MIDR_ALL_VERSIONS(MIDR_AMPERE1A), ampere_ds), {}, ... }; "arm_spe__is_ds_encoding_supported" then becomes a direct call to "arm_spe__synth_ds" and we can drop the is_ampereone and is_common vars. Then adding new ones doesn't require changing the function anymore. > > if (record->op & ARM_SPE_OP_LD) > data_src.mem_op = PERF_MEM_OP_LOAD; > @@ -616,6 +679,8 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq, > > if (is_common) > arm_spe__synth_data_source_common(record, &data_src); > + else if (is_ampereone) > + arm_spe__synth_data_source_ampereone(record, &data_src); > else > arm_spe__synth_memory_level(record, &data_src); >
Hi James, On Mon, 4 Nov 2024, James Clark wrote: > On 31/10/2024 9:35 pm, Ilkka Koskinen wrote: >> Decode SPE Data Source packets on AmpereOne. The field is IMPDEF. >> >> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com> >> --- >> .../util/arm-spe-decoder/arm-spe-decoder.h | 9 +++ >> tools/perf/util/arm-spe.c | 65 +++++++++++++++++++ >> 2 files changed, 74 insertions(+) >> >> diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h >> b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h >> index 358c611eeddb..4bcd627e859f 100644 >> --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h >> +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h >> @@ -67,6 +67,15 @@ enum arm_spe_common_data_source { >> ARM_SPE_COMMON_DS_DRAM = 0xe, >> }; >> +enum arm_spe_ampereone_data_source { >> + ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0, >> + ARM_SPE_AMPEREONE_SLC = 0x3, >> + ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5, >> + ARM_SPE_AMPEREONE_DDR = 0x7, >> + ARM_SPE_AMPEREONE_L1D = 0x8, >> + ARM_SPE_AMPEREONE_L2D = 0x9, >> +}; >> + >> struct arm_spe_record { >> enum arm_spe_sample_type type; >> int err; >> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c >> index 9586416be30a..700d4bc8d8ec 100644 >> --- a/tools/perf/util/arm-spe.c >> +++ b/tools/perf/util/arm-spe.c >> @@ -103,6 +103,30 @@ struct arm_spe_queue { >> u32 flags; >> }; >> +struct arm_spe_source_mapping { >> + u16 source; >> + enum arm_spe_common_data_source common_src; >> +}; >> + >> +#define MAP_SOURCE(src, common) \ >> + { \ >> + .source = ARM_SPE_##src, \ >> + .common_src = ARM_SPE_COMMON_##common, \ >> + } >> + >> +static int arm_spe__map_to_common_source(u16 source, >> + struct arm_spe_source_mapping *tbl, >> + int nr_sources) >> +{ >> + while (nr_sources--) { >> + if (tbl->source == source) >> + return tbl->common_src; >> + tbl++; >> + } >> + > > Hi Ilkka, > > I think a simple switch statement here would be easier to follow than the > loop, custom macro and then having the mappings in some other place: > > switch(source) > case 0x0: /* AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE */ > return DS_PEER_CORE; > > etc... > >> + return -1; > > And the default case can return 0xfff directly, which avoids the if else > later only to convert this -1 back into 0xfff. I can surely do that. > >> +} >> + >> static void arm_spe_dump(struct arm_spe *spe __maybe_unused, >> unsigned char *buf, size_t len) >> { >> @@ -443,6 +467,11 @@ static const struct midr_range >> common_ds_encoding_cpus[] = { >> {}, >> }; >> +static const struct midr_range ampereone_ds_encoding_cpus[] = { >> + MIDR_ALL_VERSIONS(MIDR_AMPERE1A), >> + {}, >> +}; >> + >> static void arm_spe__sample_flags(struct arm_spe_queue *speq) >> { >> const struct arm_spe_record *record = &speq->decoder->record; >> @@ -532,6 +561,38 @@ static void arm_spe__synth_data_source_common(const >> struct arm_spe_record *recor >> } >> } >> +static struct arm_spe_source_mapping ampereone_sources[] = { >> + MAP_SOURCE(AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE, DS_PEER_CORE), >> + MAP_SOURCE(AMPEREONE_SLC, DS_SYS_CACHE), >> + MAP_SOURCE(AMPEREONE_REMOTE_CHIP_CACHE, DS_REMOTE), >> + MAP_SOURCE(AMPEREONE_DDR, DS_DRAM), >> + MAP_SOURCE(AMPEREONE_L1D, DS_L1D), >> + MAP_SOURCE(AMPEREONE_L2D, DS_L2), >> +}; >> + >> +/* >> + * Source is IMPDEF. Here we convert the source code used on AmpereOne >> cores >> + * to the common (Neoverse, Cortex) to avoid duplicating the decoding >> code. >> + */ >> +static void arm_spe__synth_data_source_ampereone(const struct >> arm_spe_record *record, >> + union perf_mem_data_src >> *data_src) >> +{ >> + int common_src; >> + struct arm_spe_record common_record; >> + >> + common_src = arm_spe__map_to_common_source(record->source, >> + ampereone_sources, >> + >> ARRAY_SIZE(ampereone_sources)); >> + if (common_src < 0) >> + /* Assign a bogus value that's not used for common coding */ >> + common_record.source = 0xfff; >> + else >> + common_record.source = common_src; >> + >> + common_record.op = record->op; >> + arm_spe__synth_data_source_common(&common_record, data_src); >> +} >> + >> static void arm_spe__synth_memory_level(const struct arm_spe_record >> *record, >> union perf_mem_data_src *data_src) >> { >> @@ -606,6 +667,8 @@ static u64 arm_spe__synth_data_source(struct >> arm_spe_queue *speq, >> union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; >> bool is_common = arm_spe__is_ds_encoding_supported(speq, >> common_ds_encoding_cpus); >> + bool is_ampereone = arm_spe__is_ds_encoding_supported(speq, >> + ampereone_ds_encoding_cpus); > > I know this probably already works, but we don't really need is_common > is_ampere etc, it will only grow anyway. All we need is a list of midrs and > function pairs. That also avoids doing is_ampereone even after we already > know is_common == true. > > static const struct data_src[] = { > ... > DS(MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), common_ds), > DS(MIDR_ALL_VERSIONS(MIDR_AMPERE1A), ampere_ds), > {}, > ... > }; > > "arm_spe__is_ds_encoding_supported" then becomes a direct call to > "arm_spe__synth_ds" and we can drop the is_ampereone and is_common vars. Then > adding new ones doesn't require changing the function anymore. To be honest, I'm not a big fan of the is_xyz() thing but I just didn't want to change it. Anyway, I'll change it for the next version. Cheers, Ilkka > >> if (record->op & ARM_SPE_OP_LD) >> data_src.mem_op = PERF_MEM_OP_LOAD; >> @@ -616,6 +679,8 @@ static u64 arm_spe__synth_data_source(struct >> arm_spe_queue *speq, >> if (is_common) >> arm_spe__synth_data_source_common(record, &data_src); >> + else if (is_ampereone) >> + arm_spe__synth_data_source_ampereone(record, &data_src); >> else >> arm_spe__synth_memory_level(record, &data_src); >> > >
© 2016 - 2024 Red Hat, Inc.