[PATCH v3 07/10] perf: arm_spe: Add support for filtering on data source

James Clark posted 10 patches 6 months, 2 weeks ago
There is a newer version of this series
[PATCH v3 07/10] perf: arm_spe: Add support for filtering on data source
Posted by James Clark 6 months, 2 weeks ago
SPE_FEAT_FDS adds the ability to filter on the data source of packets.
Like the other existing filters, enable filtering with PMSFCR_EL1.FDS
when any of the filter bits are set.

Each bit maps to data sources 0-63 described by bits[0:5] in the data
source packet (although the full range of data source is 16 bits so
higher value data sources can't be filtered on). The filter is an OR of
all the bits, so for example setting bits 0 and 3 filters packets from
data sources 0 OR 3.

Reviewed-by: Leo Yan <leo.yan@arm.com>
Tested-by: Leo Yan <leo.yan@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
 drivers/perf/arm_spe_pmu.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index 9309b846f642..d04318411f77 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -87,6 +87,7 @@ struct arm_spe_pmu {
 #define SPE_PMU_FEAT_INV_FILT_EVT		(1UL << 6)
 #define SPE_PMU_FEAT_DISCARD			(1UL << 7)
 #define SPE_PMU_FEAT_EFT			(1UL << 8)
+#define SPE_PMU_FEAT_FDS			(1UL << 9)
 #define SPE_PMU_FEAT_DEV_PROBED			(1UL << 63)
 	u64					features;
 
@@ -232,6 +233,10 @@ static const struct attribute_group arm_spe_pmu_cap_group = {
 #define ATTR_CFG_FLD_inv_event_filter_LO	0
 #define ATTR_CFG_FLD_inv_event_filter_HI	63
 
+#define ATTR_CFG_FLD_data_src_filter_CFG	config4	/* PMSDSFR_EL1 */
+#define ATTR_CFG_FLD_data_src_filter_LO	0
+#define ATTR_CFG_FLD_data_src_filter_HI	63
+
 GEN_PMU_FORMAT_ATTR(ts_enable);
 GEN_PMU_FORMAT_ATTR(pa_enable);
 GEN_PMU_FORMAT_ATTR(pct_enable);
@@ -248,6 +253,7 @@ GEN_PMU_FORMAT_ATTR(float_filter);
 GEN_PMU_FORMAT_ATTR(float_filter_mask);
 GEN_PMU_FORMAT_ATTR(event_filter);
 GEN_PMU_FORMAT_ATTR(inv_event_filter);
+GEN_PMU_FORMAT_ATTR(data_src_filter);
 GEN_PMU_FORMAT_ATTR(min_latency);
 GEN_PMU_FORMAT_ATTR(discard);
 
@@ -268,6 +274,7 @@ static struct attribute *arm_spe_pmu_formats_attr[] = {
 	&format_attr_float_filter_mask.attr,
 	&format_attr_event_filter.attr,
 	&format_attr_inv_event_filter.attr,
+	&format_attr_data_src_filter.attr,
 	&format_attr_min_latency.attr,
 	&format_attr_discard.attr,
 	NULL,
@@ -286,6 +293,9 @@ static umode_t arm_spe_pmu_format_attr_is_visible(struct kobject *kobj,
 	if (attr == &format_attr_inv_event_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT))
 		return 0;
 
+	if (attr == &format_attr_data_src_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_FDS))
+		return 0;
+
 	if ((attr == &format_attr_branch_filter_mask.attr ||
 	     attr == &format_attr_load_filter_mask.attr ||
 	     attr == &format_attr_store_filter_mask.attr ||
@@ -406,6 +416,9 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event)
 	if (ATTR_CFG_GET_FLD(attr, inv_event_filter))
 		reg |= PMSFCR_EL1_FnE;
 
+	if (ATTR_CFG_GET_FLD(attr, data_src_filter))
+		reg |= PMSFCR_EL1_FDS;
+
 	if (ATTR_CFG_GET_FLD(attr, min_latency))
 		reg |= PMSFCR_EL1_FL;
 
@@ -430,6 +443,12 @@ static u64 arm_spe_event_to_pmslatfr(struct perf_event *event)
 	return FIELD_PREP(PMSLATFR_EL1_MINLAT, ATTR_CFG_GET_FLD(attr, min_latency));
 }
 
+static u64 arm_spe_event_to_pmsdsfr(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	return ATTR_CFG_GET_FLD(attr, data_src_filter);
+}
+
 static void arm_spe_pmu_pad_buf(struct perf_output_handle *handle, int len)
 {
 	struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
@@ -788,6 +807,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
 	if (arm_spe_event_to_pmsnevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver))
 		return -EOPNOTSUPP;
 
+	if (arm_spe_event_to_pmsdsfr(event) &&
+	    !(spe_pmu->features & SPE_PMU_FEAT_FDS))
+		return -EOPNOTSUPP;
+
 	if (attr->exclude_idle)
 		return -EOPNOTSUPP;
 
@@ -857,6 +880,11 @@ static void arm_spe_pmu_start(struct perf_event *event, int flags)
 		write_sysreg_s(reg, SYS_PMSNEVFR_EL1);
 	}
 
+	if (spe_pmu->features & SPE_PMU_FEAT_FDS) {
+		reg = arm_spe_event_to_pmsdsfr(event);
+		write_sysreg_s(reg, SYS_PMSDSFR_EL1);
+	}
+
 	reg = arm_spe_event_to_pmslatfr(event);
 	write_sysreg_s(reg, SYS_PMSLATFR_EL1);
 
@@ -1116,6 +1144,9 @@ static void __arm_spe_pmu_dev_probe(void *info)
 	if (FIELD_GET(PMSIDR_EL1_EFT, reg))
 		spe_pmu->features |= SPE_PMU_FEAT_EFT;
 
+	if (FIELD_GET(PMSIDR_EL1_FDS, reg))
+		spe_pmu->features |= SPE_PMU_FEAT_FDS;
+
 	/* This field has a spaced out encoding, so just use a look-up */
 	fld = FIELD_GET(PMSIDR_EL1_INTERVAL, reg);
 	switch (fld) {

-- 
2.34.1
Re: [PATCH v3 07/10] perf: arm_spe: Add support for filtering on data source
Posted by Will Deacon 5 months, 1 week ago
On Thu, Jun 05, 2025 at 11:49:05AM +0100, James Clark wrote:
> SPE_FEAT_FDS adds the ability to filter on the data source of packets.
> Like the other existing filters, enable filtering with PMSFCR_EL1.FDS
> when any of the filter bits are set.
> 
> Each bit maps to data sources 0-63 described by bits[0:5] in the data
> source packet (although the full range of data source is 16 bits so
> higher value data sources can't be filtered on). The filter is an OR of
> all the bits, so for example setting bits 0 and 3 filters packets from
> data sources 0 OR 3.
> 
> Reviewed-by: Leo Yan <leo.yan@arm.com>
> Tested-by: Leo Yan <leo.yan@arm.com>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
>  drivers/perf/arm_spe_pmu.c | 31 +++++++++++++++++++++++++++++++
>  1 file changed, 31 insertions(+)
> 
> diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
> index 9309b846f642..d04318411f77 100644
> --- a/drivers/perf/arm_spe_pmu.c
> +++ b/drivers/perf/arm_spe_pmu.c
> @@ -87,6 +87,7 @@ struct arm_spe_pmu {
>  #define SPE_PMU_FEAT_INV_FILT_EVT		(1UL << 6)
>  #define SPE_PMU_FEAT_DISCARD			(1UL << 7)
>  #define SPE_PMU_FEAT_EFT			(1UL << 8)
> +#define SPE_PMU_FEAT_FDS			(1UL << 9)
>  #define SPE_PMU_FEAT_DEV_PROBED			(1UL << 63)
>  	u64					features;
>  
> @@ -232,6 +233,10 @@ static const struct attribute_group arm_spe_pmu_cap_group = {
>  #define ATTR_CFG_FLD_inv_event_filter_LO	0
>  #define ATTR_CFG_FLD_inv_event_filter_HI	63
>  
> +#define ATTR_CFG_FLD_data_src_filter_CFG	config4	/* PMSDSFR_EL1 */
> +#define ATTR_CFG_FLD_data_src_filter_LO	0
> +#define ATTR_CFG_FLD_data_src_filter_HI	63
> +
>  GEN_PMU_FORMAT_ATTR(ts_enable);
>  GEN_PMU_FORMAT_ATTR(pa_enable);
>  GEN_PMU_FORMAT_ATTR(pct_enable);
> @@ -248,6 +253,7 @@ GEN_PMU_FORMAT_ATTR(float_filter);
>  GEN_PMU_FORMAT_ATTR(float_filter_mask);
>  GEN_PMU_FORMAT_ATTR(event_filter);
>  GEN_PMU_FORMAT_ATTR(inv_event_filter);
> +GEN_PMU_FORMAT_ATTR(data_src_filter);
>  GEN_PMU_FORMAT_ATTR(min_latency);
>  GEN_PMU_FORMAT_ATTR(discard);
>  
> @@ -268,6 +274,7 @@ static struct attribute *arm_spe_pmu_formats_attr[] = {
>  	&format_attr_float_filter_mask.attr,
>  	&format_attr_event_filter.attr,
>  	&format_attr_inv_event_filter.attr,
> +	&format_attr_data_src_filter.attr,
>  	&format_attr_min_latency.attr,
>  	&format_attr_discard.attr,
>  	NULL,
> @@ -286,6 +293,9 @@ static umode_t arm_spe_pmu_format_attr_is_visible(struct kobject *kobj,
>  	if (attr == &format_attr_inv_event_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT))
>  		return 0;
>  
> +	if (attr == &format_attr_data_src_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_FDS))
> +		return 0;
> +
>  	if ((attr == &format_attr_branch_filter_mask.attr ||
>  	     attr == &format_attr_load_filter_mask.attr ||
>  	     attr == &format_attr_store_filter_mask.attr ||
> @@ -406,6 +416,9 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event)
>  	if (ATTR_CFG_GET_FLD(attr, inv_event_filter))
>  		reg |= PMSFCR_EL1_FnE;
>  
> +	if (ATTR_CFG_GET_FLD(attr, data_src_filter))
> +		reg |= PMSFCR_EL1_FDS;

Is the polarity correct here? The description of PMSDSFR_EL1.S<m> suggests
that setting bits to 1 _excludes_ the FDS filtering.

>  	if (ATTR_CFG_GET_FLD(attr, min_latency))
>  		reg |= PMSFCR_EL1_FL;
>  
> @@ -430,6 +443,12 @@ static u64 arm_spe_event_to_pmslatfr(struct perf_event *event)
>  	return FIELD_PREP(PMSLATFR_EL1_MINLAT, ATTR_CFG_GET_FLD(attr, min_latency));
>  }
>  
> +static u64 arm_spe_event_to_pmsdsfr(struct perf_event *event)
> +{
> +	struct perf_event_attr *attr = &event->attr;
> +	return ATTR_CFG_GET_FLD(attr, data_src_filter);
> +}
> +
>  static void arm_spe_pmu_pad_buf(struct perf_output_handle *handle, int len)
>  {
>  	struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
> @@ -788,6 +807,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
>  	if (arm_spe_event_to_pmsnevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver))
>  		return -EOPNOTSUPP;
>  
> +	if (arm_spe_event_to_pmsdsfr(event) &&
> +	    !(spe_pmu->features & SPE_PMU_FEAT_FDS))
> +		return -EOPNOTSUPP;

Same question here.

Will
Re: [PATCH v3 07/10] perf: arm_spe: Add support for filtering on data source
Posted by James Clark 5 months, 1 week ago

On 14/07/2025 3:04 pm, Will Deacon wrote:
> On Thu, Jun 05, 2025 at 11:49:05AM +0100, James Clark wrote:
>> SPE_FEAT_FDS adds the ability to filter on the data source of packets.
>> Like the other existing filters, enable filtering with PMSFCR_EL1.FDS
>> when any of the filter bits are set.
>>
>> Each bit maps to data sources 0-63 described by bits[0:5] in the data
>> source packet (although the full range of data source is 16 bits so
>> higher value data sources can't be filtered on). The filter is an OR of
>> all the bits, so for example setting bits 0 and 3 filters packets from
>> data sources 0 OR 3.
>>
>> Reviewed-by: Leo Yan <leo.yan@arm.com>
>> Tested-by: Leo Yan <leo.yan@arm.com>
>> Signed-off-by: James Clark <james.clark@linaro.org>
>> ---
>>   drivers/perf/arm_spe_pmu.c | 31 +++++++++++++++++++++++++++++++
>>   1 file changed, 31 insertions(+)
>>
>> diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
>> index 9309b846f642..d04318411f77 100644
>> --- a/drivers/perf/arm_spe_pmu.c
>> +++ b/drivers/perf/arm_spe_pmu.c
>> @@ -87,6 +87,7 @@ struct arm_spe_pmu {
>>   #define SPE_PMU_FEAT_INV_FILT_EVT		(1UL << 6)
>>   #define SPE_PMU_FEAT_DISCARD			(1UL << 7)
>>   #define SPE_PMU_FEAT_EFT			(1UL << 8)
>> +#define SPE_PMU_FEAT_FDS			(1UL << 9)
>>   #define SPE_PMU_FEAT_DEV_PROBED			(1UL << 63)
>>   	u64					features;
>>   
>> @@ -232,6 +233,10 @@ static const struct attribute_group arm_spe_pmu_cap_group = {
>>   #define ATTR_CFG_FLD_inv_event_filter_LO	0
>>   #define ATTR_CFG_FLD_inv_event_filter_HI	63
>>   
>> +#define ATTR_CFG_FLD_data_src_filter_CFG	config4	/* PMSDSFR_EL1 */
>> +#define ATTR_CFG_FLD_data_src_filter_LO	0
>> +#define ATTR_CFG_FLD_data_src_filter_HI	63
>> +
>>   GEN_PMU_FORMAT_ATTR(ts_enable);
>>   GEN_PMU_FORMAT_ATTR(pa_enable);
>>   GEN_PMU_FORMAT_ATTR(pct_enable);
>> @@ -248,6 +253,7 @@ GEN_PMU_FORMAT_ATTR(float_filter);
>>   GEN_PMU_FORMAT_ATTR(float_filter_mask);
>>   GEN_PMU_FORMAT_ATTR(event_filter);
>>   GEN_PMU_FORMAT_ATTR(inv_event_filter);
>> +GEN_PMU_FORMAT_ATTR(data_src_filter);
>>   GEN_PMU_FORMAT_ATTR(min_latency);
>>   GEN_PMU_FORMAT_ATTR(discard);
>>   
>> @@ -268,6 +274,7 @@ static struct attribute *arm_spe_pmu_formats_attr[] = {
>>   	&format_attr_float_filter_mask.attr,
>>   	&format_attr_event_filter.attr,
>>   	&format_attr_inv_event_filter.attr,
>> +	&format_attr_data_src_filter.attr,
>>   	&format_attr_min_latency.attr,
>>   	&format_attr_discard.attr,
>>   	NULL,
>> @@ -286,6 +293,9 @@ static umode_t arm_spe_pmu_format_attr_is_visible(struct kobject *kobj,
>>   	if (attr == &format_attr_inv_event_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT))
>>   		return 0;
>>   
>> +	if (attr == &format_attr_data_src_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_FDS))
>> +		return 0;
>> +
>>   	if ((attr == &format_attr_branch_filter_mask.attr ||
>>   	     attr == &format_attr_load_filter_mask.attr ||
>>   	     attr == &format_attr_store_filter_mask.attr ||
>> @@ -406,6 +416,9 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event)
>>   	if (ATTR_CFG_GET_FLD(attr, inv_event_filter))
>>   		reg |= PMSFCR_EL1_FnE;
>>   
>> +	if (ATTR_CFG_GET_FLD(attr, data_src_filter))
>> +		reg |= PMSFCR_EL1_FDS;
> 
> Is the polarity correct here? The description of PMSDSFR_EL1.S<m> suggests
> that setting bits to 1 _excludes_ the FDS filtering.
> 

Setting filter bits to 1 means that samples matching are included. 
Setting bits to 0 means that they are excluded. And PMSFCR_EL1.FDS 
enables filtering as a whole, so if the user sets any filter bit to 1 we 
want to enable filtering:

   PMSDSFR_EL1.S<m>

   0b0  If PMSFCR_EL1.FDS is 1, do not record load operations that have
        bits [5:0] of the Data Source packet set to <m>.

   0b1  Load operations with Data Source <m> are unaffected by
        PMSFCR_EL1.FDS.

I think it's all the right way around and it ends up being the same as 
the other filters in SPE. Because we're using any bit being set to 
enable the filtering, the only thing you can't do is enable filtering 
with a 0 filter, but I didn't think that was useful. See the previous 
discussion on this here: 
https://lore.kernel.org/all/5752f039-51c1-4452-b5df-03ff06da7be3@linaro.org/

Reading the "Data source filtering" section in the docs change at the 
end might help too.

>>   	if (ATTR_CFG_GET_FLD(attr, min_latency))
>>   		reg |= PMSFCR_EL1_FL;
>>   
>> @@ -430,6 +443,12 @@ static u64 arm_spe_event_to_pmslatfr(struct perf_event *event)
>>   	return FIELD_PREP(PMSLATFR_EL1_MINLAT, ATTR_CFG_GET_FLD(attr, min_latency));
>>   }
>>   
>> +static u64 arm_spe_event_to_pmsdsfr(struct perf_event *event)
>> +{
>> +	struct perf_event_attr *attr = &event->attr;
>> +	return ATTR_CFG_GET_FLD(attr, data_src_filter);
>> +}
>> +
>>   static void arm_spe_pmu_pad_buf(struct perf_output_handle *handle, int len)
>>   {
>>   	struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
>> @@ -788,6 +807,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
>>   	if (arm_spe_event_to_pmsnevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver))
>>   		return -EOPNOTSUPP;
>>   
>> +	if (arm_spe_event_to_pmsdsfr(event) &&
>> +	    !(spe_pmu->features & SPE_PMU_FEAT_FDS))
>> +		return -EOPNOTSUPP;
> 
> Same question here.
> 
> Will
Re: [PATCH v3 07/10] perf: arm_spe: Add support for filtering on data source
Posted by Will Deacon 5 months ago
On Tue, Jul 15, 2025 at 02:04:18PM +0100, James Clark wrote:
> On 14/07/2025 3:04 pm, Will Deacon wrote:
> > On Thu, Jun 05, 2025 at 11:49:05AM +0100, James Clark wrote:
> > > @@ -406,6 +416,9 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event)
> > >   	if (ATTR_CFG_GET_FLD(attr, inv_event_filter))
> > >   		reg |= PMSFCR_EL1_FnE;
> > > +	if (ATTR_CFG_GET_FLD(attr, data_src_filter))
> > > +		reg |= PMSFCR_EL1_FDS;
> > 
> > Is the polarity correct here? The description of PMSDSFR_EL1.S<m> suggests
> > that setting bits to 1 _excludes_ the FDS filtering.
> > 
> 
> Setting filter bits to 1 means that samples matching are included. Setting
> bits to 0 means that they are excluded. And PMSFCR_EL1.FDS enables filtering
> as a whole, so if the user sets any filter bit to 1 we want to enable
> filtering:
> 
>   PMSDSFR_EL1.S<m>
> 
>   0b0  If PMSFCR_EL1.FDS is 1, do not record load operations that have
>        bits [5:0] of the Data Source packet set to <m>.
> 
>   0b1  Load operations with Data Source <m> are unaffected by
>        PMSFCR_EL1.FDS.
> 
> I think it's all the right way around and it ends up being the same as the
> other filters in SPE. Because we're using any bit being set to enable the
> filtering, the only thing you can't do is enable filtering with a 0 filter,
> but I didn't think that was useful. See the previous discussion on this
> here:
> https://lore.kernel.org/all/5752f039-51c1-4452-b5df-03ff06da7be3@linaro.org/
> 
> Reading the "Data source filtering" section in the docs change at the end
> might help too.

Sorry, but I still don't get it :/

afaict, if any of the bits in 'data_src_filter' are _zero_ then we
should set PMSFCR_EL1.FDS. That also means that a mask of zero means all
loads are filtered, which is what the architecture says and is what we
should provide to userspace.

Will
Re: [PATCH v3 07/10] perf: arm_spe: Add support for filtering on data source
Posted by James Clark 5 months ago

On 17/07/2025 3:29 pm, Will Deacon wrote:
> On Tue, Jul 15, 2025 at 02:04:18PM +0100, James Clark wrote:
>> On 14/07/2025 3:04 pm, Will Deacon wrote:
>>> On Thu, Jun 05, 2025 at 11:49:05AM +0100, James Clark wrote:
>>>> @@ -406,6 +416,9 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event)
>>>>    	if (ATTR_CFG_GET_FLD(attr, inv_event_filter))
>>>>    		reg |= PMSFCR_EL1_FnE;
>>>> +	if (ATTR_CFG_GET_FLD(attr, data_src_filter))
>>>> +		reg |= PMSFCR_EL1_FDS;
>>>
>>> Is the polarity correct here? The description of PMSDSFR_EL1.S<m> suggests
>>> that setting bits to 1 _excludes_ the FDS filtering.
>>>
>>
>> Setting filter bits to 1 means that samples matching are included. Setting
>> bits to 0 means that they are excluded. And PMSFCR_EL1.FDS enables filtering
>> as a whole, so if the user sets any filter bit to 1 we want to enable
>> filtering:
>>
>>    PMSDSFR_EL1.S<m>
>>
>>    0b0  If PMSFCR_EL1.FDS is 1, do not record load operations that have
>>         bits [5:0] of the Data Source packet set to <m>.
>>
>>    0b1  Load operations with Data Source <m> are unaffected by
>>         PMSFCR_EL1.FDS.
>>
>> I think it's all the right way around and it ends up being the same as the
>> other filters in SPE. Because we're using any bit being set to enable the
>> filtering, the only thing you can't do is enable filtering with a 0 filter,
>> but I didn't think that was useful. See the previous discussion on this
>> here:
>> https://lore.kernel.org/all/5752f039-51c1-4452-b5df-03ff06da7be3@linaro.org/
>>
>> Reading the "Data source filtering" section in the docs change at the end
>> might help too.
> 
> Sorry, but I still don't get it :/
> 
> afaict, if any of the bits in 'data_src_filter' are _zero_ then we
> should set PMSFCR_EL1.FDS. That also means that a mask of zero means all
> loads are filtered, which is what the architecture says and is what we
> should provide to userspace.
> 
> Will

We'd have to add another format flag to enable data source filtering 
then, because otherwise the default would be zero and people's samples 
would disappear.

But the only use cases I could think of were more like "I want to see 
samples from data source 1":

   -e arm_spe/data_src_filter=0x1/

Or "I want to see all data sources except 1":

   -e arm_spe/data_src_filter=0xfffffffe/

Filtering out all samples with any data source didn't seem to make sense 
to me, and I think you can already do that with the other filters 
(remove loads etc).

It would be a shame to be inconsistent and to add an enable flag just 
for that one case because the other filters in SPE are auto enabled for 
non-zero values. Although to be fair for PMSFCR.FT and others, zero 
filters are explicitly not allowed:

   If this field is set to 1 and the PMSFCR_EL1.{ST, LD, B} bits are all
   set to zero, it is CONSTRAINED UNPREDICTABLE whether no samples are
   recorded or the PE behaves as if PMSFCR_EL1.FT is set to 0

Seems like FDS doesn't end up as neat as the others, but IMO I can't see 
anyone needing a zero filter. I did discuss it with Leo and we decided 
that we could always add the enable flag at a later date if a use case 
turned up and it wouldn't be a breaking change.

But if you think it's there so it should be exposed I can add it.

James
Re: [PATCH v3 07/10] perf: arm_spe: Add support for filtering on data source
Posted by Will Deacon 5 months ago
On Thu, Jul 17, 2025 at 04:16:32PM +0100, James Clark wrote:
> 
> 
> On 17/07/2025 3:29 pm, Will Deacon wrote:
> > On Tue, Jul 15, 2025 at 02:04:18PM +0100, James Clark wrote:
> > > On 14/07/2025 3:04 pm, Will Deacon wrote:
> > > > On Thu, Jun 05, 2025 at 11:49:05AM +0100, James Clark wrote:
> > > > > @@ -406,6 +416,9 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event)
> > > > >    	if (ATTR_CFG_GET_FLD(attr, inv_event_filter))
> > > > >    		reg |= PMSFCR_EL1_FnE;
> > > > > +	if (ATTR_CFG_GET_FLD(attr, data_src_filter))
> > > > > +		reg |= PMSFCR_EL1_FDS;
> > > > 
> > > > Is the polarity correct here? The description of PMSDSFR_EL1.S<m> suggests
> > > > that setting bits to 1 _excludes_ the FDS filtering.
> > > > 
> > > 
> > > Setting filter bits to 1 means that samples matching are included. Setting
> > > bits to 0 means that they are excluded. And PMSFCR_EL1.FDS enables filtering
> > > as a whole, so if the user sets any filter bit to 1 we want to enable
> > > filtering:
> > > 
> > >    PMSDSFR_EL1.S<m>
> > > 
> > >    0b0  If PMSFCR_EL1.FDS is 1, do not record load operations that have
> > >         bits [5:0] of the Data Source packet set to <m>.
> > > 
> > >    0b1  Load operations with Data Source <m> are unaffected by
> > >         PMSFCR_EL1.FDS.
> > > 
> > > I think it's all the right way around and it ends up being the same as the
> > > other filters in SPE. Because we're using any bit being set to enable the
> > > filtering, the only thing you can't do is enable filtering with a 0 filter,
> > > but I didn't think that was useful. See the previous discussion on this
> > > here:
> > > https://lore.kernel.org/all/5752f039-51c1-4452-b5df-03ff06da7be3@linaro.org/
> > > 
> > > Reading the "Data source filtering" section in the docs change at the end
> > > might help too.
> > 
> > Sorry, but I still don't get it :/
> > 
> > afaict, if any of the bits in 'data_src_filter' are _zero_ then we
> > should set PMSFCR_EL1.FDS. That also means that a mask of zero means all
> > loads are filtered, which is what the architecture says and is what we
> > should provide to userspace.
> > 
> > Will
> 
> We'd have to add another format flag to enable data source filtering then,
> because otherwise the default would be zero and people's samples would
> disappear.
> 
> But the only use cases I could think of were more like "I want to see
> samples from data source 1":
> 
>   -e arm_spe/data_src_filter=0x1/
> 
> Or "I want to see all data sources except 1":
> 
>   -e arm_spe/data_src_filter=0xfffffffe/
> 
> Filtering out all samples with any data source didn't seem to make sense to
> me, and I think you can already do that with the other filters (remove loads
> etc).
> 
> It would be a shame to be inconsistent and to add an enable flag just for
> that one case because the other filters in SPE are auto enabled for non-zero
> values. Although to be fair for PMSFCR.FT and others, zero filters are
> explicitly not allowed:
> 
>   If this field is set to 1 and the PMSFCR_EL1.{ST, LD, B} bits are all
>   set to zero, it is CONSTRAINED UNPREDICTABLE whether no samples are
>   recorded or the PE behaves as if PMSFCR_EL1.FT is set to 0
> 
> Seems like FDS doesn't end up as neat as the others, but IMO I can't see
> anyone needing a zero filter. I did discuss it with Leo and we decided that
> we could always add the enable flag at a later date if a use case turned up
> and it wouldn't be a breaking change.
> 
> But if you think it's there so it should be exposed I can add it.

What about if we expose the inverse of PMSDSFR_EL1 to userspace instead?

Will
Re: [PATCH v3 07/10] perf: arm_spe: Add support for filtering on data source
Posted by James Clark 5 months ago

On 17/07/2025 4:27 pm, Will Deacon wrote:
> On Thu, Jul 17, 2025 at 04:16:32PM +0100, James Clark wrote:
>>
>>
>> On 17/07/2025 3:29 pm, Will Deacon wrote:
>>> On Tue, Jul 15, 2025 at 02:04:18PM +0100, James Clark wrote:
>>>> On 14/07/2025 3:04 pm, Will Deacon wrote:
>>>>> On Thu, Jun 05, 2025 at 11:49:05AM +0100, James Clark wrote:
>>>>>> @@ -406,6 +416,9 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event)
>>>>>>     	if (ATTR_CFG_GET_FLD(attr, inv_event_filter))
>>>>>>     		reg |= PMSFCR_EL1_FnE;
>>>>>> +	if (ATTR_CFG_GET_FLD(attr, data_src_filter))
>>>>>> +		reg |= PMSFCR_EL1_FDS;
>>>>>
>>>>> Is the polarity correct here? The description of PMSDSFR_EL1.S<m> suggests
>>>>> that setting bits to 1 _excludes_ the FDS filtering.
>>>>>
>>>>
>>>> Setting filter bits to 1 means that samples matching are included. Setting
>>>> bits to 0 means that they are excluded. And PMSFCR_EL1.FDS enables filtering
>>>> as a whole, so if the user sets any filter bit to 1 we want to enable
>>>> filtering:
>>>>
>>>>     PMSDSFR_EL1.S<m>
>>>>
>>>>     0b0  If PMSFCR_EL1.FDS is 1, do not record load operations that have
>>>>          bits [5:0] of the Data Source packet set to <m>.
>>>>
>>>>     0b1  Load operations with Data Source <m> are unaffected by
>>>>          PMSFCR_EL1.FDS.
>>>>
>>>> I think it's all the right way around and it ends up being the same as the
>>>> other filters in SPE. Because we're using any bit being set to enable the
>>>> filtering, the only thing you can't do is enable filtering with a 0 filter,
>>>> but I didn't think that was useful. See the previous discussion on this
>>>> here:
>>>> https://lore.kernel.org/all/5752f039-51c1-4452-b5df-03ff06da7be3@linaro.org/
>>>>
>>>> Reading the "Data source filtering" section in the docs change at the end
>>>> might help too.
>>>
>>> Sorry, but I still don't get it :/
>>>
>>> afaict, if any of the bits in 'data_src_filter' are _zero_ then we
>>> should set PMSFCR_EL1.FDS. That also means that a mask of zero means all
>>> loads are filtered, which is what the architecture says and is what we
>>> should provide to userspace.
>>>
>>> Will
>>
>> We'd have to add another format flag to enable data source filtering then,
>> because otherwise the default would be zero and people's samples would
>> disappear.
>>
>> But the only use cases I could think of were more like "I want to see
>> samples from data source 1":
>>
>>    -e arm_spe/data_src_filter=0x1/
>>
>> Or "I want to see all data sources except 1":
>>
>>    -e arm_spe/data_src_filter=0xfffffffe/
>>
>> Filtering out all samples with any data source didn't seem to make sense to
>> me, and I think you can already do that with the other filters (remove loads
>> etc).
>>
>> It would be a shame to be inconsistent and to add an enable flag just for
>> that one case because the other filters in SPE are auto enabled for non-zero
>> values. Although to be fair for PMSFCR.FT and others, zero filters are
>> explicitly not allowed:
>>
>>    If this field is set to 1 and the PMSFCR_EL1.{ST, LD, B} bits are all
>>    set to zero, it is CONSTRAINED UNPREDICTABLE whether no samples are
>>    recorded or the PE behaves as if PMSFCR_EL1.FT is set to 0
>>
>> Seems like FDS doesn't end up as neat as the others, but IMO I can't see
>> anyone needing a zero filter. I did discuss it with Leo and we decided that
>> we could always add the enable flag at a later date if a use case turned up
>> and it wouldn't be a breaking change.
>>
>> But if you think it's there so it should be exposed I can add it.
> 
> What about if we expose the inverse of PMSDSFR_EL1 to userspace instead?
> 
> Will

That's exactly what I was thinking. It does seem a bit weird, but I can 
call it inv_data_src_filter and document it appropriately so it should 
be fine.