[v10] x86/resctrl : Support AMD Assignable Bandwidth Monitoring Counters (ABMC)

[PATCH v10 16/24] x86/resctrl: Add interface to the assign counter

Posted by Babu Moger 1 year, 1 month ago

The mbm_cntr_assign mode offers several counters that can be assigned
to an RMID, event pair and monitor the bandwidth as long as it is
assigned.

Counters are managed at the domain level. Introduce the interface to
allocate/free/assign the counters.

If the user requests assignments across all domains, some domains may
fail if they run out of counters. Ensure assignments continue in other
domains wherever possible.

Signed-off-by: Babu Moger <babu.moger@amd.com>
---
v10: Patch changed completely.
     Counters are managed at the domain based on the discussion.
     https://lore.kernel.org/lkml/CALPaoCj+zWq1vkHVbXYP0znJbe6Ke3PXPWjtri5AFgD9cQDCUg@mail.gmail.com/
     Reset non-architectural MBM state.
     Commit message update.

v9: Introduced new function resctrl_config_cntr to assign the counter, update
    the bitmap and reset the architectural state.
    Taken care of error handling(freeing the counter) when assignment fails.
    Moved mbm_cntr_assigned_to_domain here as it used in this patch.
    Minor text changes.

v8: Renamed rdtgroup_assign_cntr() to rdtgroup_assign_cntr_event().
    Added the code to return the error if rdtgroup_assign_cntr_event fails.
    Moved definition of MBM_EVENT_ARRAY_INDEX to resctrl/internal.h.
    Updated typo in the comments.

v7: New patch. Moved all the FS code here.
    Merged rdtgroup_assign_cntr and rdtgroup_alloc_cntr.
    Adde new #define MBM_EVENT_ARRAY_INDEX.
---
 arch/x86/kernel/cpu/resctrl/internal.h |   5 +-
 arch/x86/kernel/cpu/resctrl/monitor.c  |   4 +-
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 110 +++++++++++++++++++++++++
 3 files changed, 116 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 849bcfe4ea5b..70d2577fc377 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -704,5 +704,8 @@ unsigned int mon_event_config_index_get(u32 evtid);
 int resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
 			     enum resctrl_event_id evtid, u32 rmid, u32 closid,
 			     u32 cntr_id, bool assign);
-
+int rdtgroup_assign_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp,
+			       struct rdt_mon_domain *d, enum resctrl_event_id evtid);
+struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
+				u32 rmid, enum resctrl_event_id evtid);
 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index f857af361af1..8823cd97ff1f 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -575,8 +575,8 @@ void free_rmid(u32 closid, u32 rmid)
 		list_add_tail(&entry->list, &rmid_free_lru);
 }
 
-static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
-				       u32 rmid, enum resctrl_event_id evtid)
+struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
+				u32 rmid, enum resctrl_event_id evtid)
 {
 	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
 
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index e895d2415f22..1c8694a68cf4 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1927,6 +1927,116 @@ int resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
 	return 0;
 }
 
+/*
+ * Configure the counter for the event, RMID pair for the domain.
+ */
+static int resctrl_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+			       enum resctrl_event_id evtid, u32 rmid, u32 closid,
+			       u32 cntr_id, bool assign)
+{
+	struct mbm_state *m;
+	int ret;
+
+	ret = resctrl_arch_config_cntr(r, d, evtid, rmid, closid, cntr_id, assign);
+	if (ret)
+		return ret;
+
+	m = get_mbm_state(d, closid, rmid, evtid);
+	if (m)
+		memset(m, 0, sizeof(struct mbm_state));
+
+	return ret;
+}
+
+static bool mbm_cntr_assigned(struct rdt_resource *r, struct rdt_mon_domain *d,
+			      struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
+{
+	int cntr_id;
+
+	for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
+		if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp &&
+		    d->cntr_cfg[cntr_id].evtid == evtid)
+			return true;
+	}
+
+	return false;
+}
+
+static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_mon_domain *d,
+			  struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
+{
+	int cntr_id;
+
+	for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
+		if (!d->cntr_cfg[cntr_id].rdtgrp) {
+			d->cntr_cfg[cntr_id].rdtgrp = rdtgrp;
+			d->cntr_cfg[cntr_id].evtid = evtid;
+			return cntr_id;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static void mbm_cntr_free(struct rdt_resource *r, struct rdt_mon_domain *d,
+			  struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
+{
+	int cntr_id;
+
+	for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
+		if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp &&
+		    d->cntr_cfg[cntr_id].evtid == evtid)
+			memset(&d->cntr_cfg[cntr_id], 0, sizeof(struct mbm_cntr_cfg));
+	}
+}
+
+/*
+ * Assign a hardware counter to event @evtid of group @rdtgrp.
+ * Counter will be assigned to all the domains if rdt_mon_domain is NULL
+ * else the counter will be assigned to specific domain.
+ */
+int rdtgroup_assign_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp,
+			       struct rdt_mon_domain *d, enum resctrl_event_id evtid)
+{
+	int cntr_id, ret = 0;
+
+	if (!d) {
+		list_for_each_entry(d, &r->mon_domains, hdr.list) {
+			if (mbm_cntr_assigned(r, d, rdtgrp, evtid))
+				continue;
+
+			cntr_id = mbm_cntr_alloc(r, d, rdtgrp, evtid);
+			if (cntr_id <  0) {
+				rdt_last_cmd_puts("Domain Out of MBM assignable counters\n");
+				continue;
+			}
+
+			ret = resctrl_config_cntr(r, d, evtid, rdtgrp->mon.rmid,
+						  rdtgrp->closid, cntr_id, true);
+			if (ret)
+				goto out_done_assign;
+		}
+	} else {
+		if (mbm_cntr_assigned(r, d, rdtgrp, evtid))
+			goto out_done_assign;
+
+		cntr_id = mbm_cntr_alloc(r, d, rdtgrp, evtid);
+		if (cntr_id <  0) {
+			rdt_last_cmd_puts("Domain Out of MBM assignable counters\n");
+			goto out_done_assign;
+		}
+
+		ret = resctrl_config_cntr(r, d, evtid, rdtgrp->mon.rmid,
+					  rdtgrp->closid, cntr_id, true);
+	}
+
+out_done_assign:
+	if (ret)
+		mbm_cntr_free(r, d, rdtgrp, evtid);
+
+	return ret;
+}
+
 /* rdtgroup information files for one cache resource. */
 static struct rftype res_common_files[] = {
 	{
-- 
2.34.1

Re: [PATCH v10 16/24] x86/resctrl: Add interface to the assign counter

Posted by Reinette Chatre 1 year, 1 month ago

Hi Babu,

On 12/12/24 12:15 PM, Babu Moger wrote:
> The mbm_cntr_assign mode offers several counters that can be assigned
> to an RMID, event pair and monitor the bandwidth as long as it is
> assigned.
> 
> Counters are managed at the domain level. Introduce the interface to
> allocate/free/assign the counters.

Changelog of previous patch also claimed to "Provide the interface to assign the
counter ids to RMID." Please let changelogs describe the change more accurately.

(This still does not provide a user interface so what is meant by interface is
unclear)


> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 849bcfe4ea5b..70d2577fc377 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -704,5 +704,8 @@ unsigned int mon_event_config_index_get(u32 evtid);
>  int resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
>  			     enum resctrl_event_id evtid, u32 rmid, u32 closid,
>  			     u32 cntr_id, bool assign);
> -
> +int rdtgroup_assign_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp,
> +			       struct rdt_mon_domain *d, enum resctrl_event_id evtid);

Could you please be consistent in the ordering of parameters?

int rdtgroup_assign_cntr_event(struct rdt_resource *r, struct rdt_mon_domain *d, 
			       struct rdtgroup *rdtgrp, enum resctrl_event_id evtid);

> +struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
> +				u32 rmid, enum resctrl_event_id evtid);
>  #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index f857af361af1..8823cd97ff1f 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -575,8 +575,8 @@ void free_rmid(u32 closid, u32 rmid)
>  		list_add_tail(&entry->list, &rmid_free_lru);
>  }
>  
> -static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
> -				       u32 rmid, enum resctrl_event_id evtid)
> +struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
> +				u32 rmid, enum resctrl_event_id evtid)
>  {
>  	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
>  
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index e895d2415f22..1c8694a68cf4 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -1927,6 +1927,116 @@ int resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
>  	return 0;
>  }
>  
> +/*
> + * Configure the counter for the event, RMID pair for the domain.

This description can be more helpful ... it essentially just re-writes function
header.

> + */
> +static int resctrl_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
> +			       enum resctrl_event_id evtid, u32 rmid, u32 closid,
> +			       u32 cntr_id, bool assign)
> +{
> +	struct mbm_state *m;
> +	int ret;
> +
> +	ret = resctrl_arch_config_cntr(r, d, evtid, rmid, closid, cntr_id, assign);
> +	if (ret)
> +		return ret;
> +
> +	m = get_mbm_state(d, closid, rmid, evtid);
> +	if (m)
> +		memset(m, 0, sizeof(struct mbm_state));
> +
> +	return ret;
> +}
> +
> +static bool mbm_cntr_assigned(struct rdt_resource *r, struct rdt_mon_domain *d,
> +			      struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
> +{
> +	int cntr_id;
> +
> +	for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
> +		if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp &&
> +		    d->cntr_cfg[cntr_id].evtid == evtid)
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
> +static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_mon_domain *d,
> +			  struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
> +{
> +	int cntr_id;
> +
> +	for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
> +		if (!d->cntr_cfg[cntr_id].rdtgrp) {
> +			d->cntr_cfg[cntr_id].rdtgrp = rdtgrp;
> +			d->cntr_cfg[cntr_id].evtid = evtid;
> +			return cntr_id;
> +		}
> +	}
> +
> +	return -EINVAL;

This can be -ENOSPC

> +}
> +
> +static void mbm_cntr_free(struct rdt_resource *r, struct rdt_mon_domain *d,
> +			  struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
> +{
> +	int cntr_id;
> +
> +	for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
> +		if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp &&
> +		    d->cntr_cfg[cntr_id].evtid == evtid)
> +			memset(&d->cntr_cfg[cntr_id], 0, sizeof(struct mbm_cntr_cfg));
> +	}
> +}

From what I can tell the counter ID is always available when the counter is freed so
it can just be freed directly without looping over array?

> +
> +/*
> + * Assign a hardware counter to event @evtid of group @rdtgrp.
> + * Counter will be assigned to all the domains if rdt_mon_domain is NULL

(to be consistent) "if rdt_mon_domain is NULL" -> "if @d is NULL"

> + * else the counter will be assigned to specific domain.

"will be assigned to specific domain" -> "will be assigned to @d"

Reinette

Re: [PATCH v10 16/24] x86/resctrl: Add interface to the assign counter

Posted by Moger, Babu 1 year, 1 month ago

Hi Reinette,


On 12/19/2024 5:22 PM, Reinette Chatre wrote:
> Hi Babu,
> 
> On 12/12/24 12:15 PM, Babu Moger wrote:
>> The mbm_cntr_assign mode offers several counters that can be assigned
>> to an RMID, event pair and monitor the bandwidth as long as it is
>> assigned.
>>
>> Counters are managed at the domain level. Introduce the interface to
>> allocate/free/assign the counters.
> 
> Changelog of previous patch also claimed to "Provide the interface to assign the
> counter ids to RMID." Please let changelogs describe the change more accurately.
> 
> (This still does not provide a user interface so what is meant by interface is
> unclear)

How about this?

The mbm_cntr_assign mode offers several counters that can be assigned
to an RMID, event pair and monitor the bandwidth as long as it is
assigned.

Counters are managed at the domain level. Introduce the functionality to 
allocate/free/assign the counters.

If the user requests assignments across all domains, assignments will 
abort on first failure. The error will be logged in 
/sys/fs/resctrl/info/last_cmd_status.


> 
> 
>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
>> index 849bcfe4ea5b..70d2577fc377 100644
>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>> @@ -704,5 +704,8 @@ unsigned int mon_event_config_index_get(u32 evtid);
>>   int resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
>>   			     enum resctrl_event_id evtid, u32 rmid, u32 closid,
>>   			     u32 cntr_id, bool assign);
>> -
>> +int rdtgroup_assign_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp,
>> +			       struct rdt_mon_domain *d, enum resctrl_event_id evtid);
> 
> Could you please be consistent in the ordering of parameters?
> 
> int rdtgroup_assign_cntr_event(struct rdt_resource *r, struct rdt_mon_domain *d,
> 			       struct rdtgroup *rdtgrp, enum resctrl_event_id evtid);

Sure.

> 
>> +struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
>> +				u32 rmid, enum resctrl_event_id evtid);
>>   #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
>> index f857af361af1..8823cd97ff1f 100644
>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>> @@ -575,8 +575,8 @@ void free_rmid(u32 closid, u32 rmid)
>>   		list_add_tail(&entry->list, &rmid_free_lru);
>>   }
>>   
>> -static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
>> -				       u32 rmid, enum resctrl_event_id evtid)
>> +struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
>> +				u32 rmid, enum resctrl_event_id evtid)
>>   {
>>   	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
>>   
>> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> index e895d2415f22..1c8694a68cf4 100644
>> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> @@ -1927,6 +1927,116 @@ int resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
>>   	return 0;
>>   }
>>   
>> +/*
>> + * Configure the counter for the event, RMID pair for the domain.
> 
> This description can be more helpful ... it essentially just re-writes function
> header.

I can drop this. There is not much to explain here. Code seems easy to 
follow.

> 
>> + */
>> +static int resctrl_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
>> +			       enum resctrl_event_id evtid, u32 rmid, u32 closid,
>> +			       u32 cntr_id, bool assign)
>> +{
>> +	struct mbm_state *m;
>> +	int ret;
>> +
>> +	ret = resctrl_arch_config_cntr(r, d, evtid, rmid, closid, cntr_id, assign);
>> +	if (ret)
>> +		return ret;
>> +
>> +	m = get_mbm_state(d, closid, rmid, evtid);
>> +	if (m)
>> +		memset(m, 0, sizeof(struct mbm_state));
>> +
>> +	return ret;
>> +}
>> +
>> +static bool mbm_cntr_assigned(struct rdt_resource *r, struct rdt_mon_domain *d,
>> +			      struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
>> +{
>> +	int cntr_id;
>> +
>> +	for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
>> +		if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp &&
>> +		    d->cntr_cfg[cntr_id].evtid == evtid)
>> +			return true;
>> +	}
>> +
>> +	return false;
>> +}
>> +
>> +static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_mon_domain *d,
>> +			  struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
>> +{
>> +	int cntr_id;
>> +
>> +	for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
>> +		if (!d->cntr_cfg[cntr_id].rdtgrp) {
>> +			d->cntr_cfg[cntr_id].rdtgrp = rdtgrp;
>> +			d->cntr_cfg[cntr_id].evtid = evtid;
>> +			return cntr_id;
>> +		}
>> +	}
>> +
>> +	return -EINVAL;
> 
> This can be -ENOSPC

Sure.
> 
>> +}
>> +
>> +static void mbm_cntr_free(struct rdt_resource *r, struct rdt_mon_domain *d,
>> +			  struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
>> +{
>> +	int cntr_id;
>> +
>> +	for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
>> +		if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp &&
>> +		    d->cntr_cfg[cntr_id].evtid == evtid)
>> +			memset(&d->cntr_cfg[cntr_id], 0, sizeof(struct mbm_cntr_cfg));
>> +	}
>> +}
> 
>>From what I can tell the counter ID is always available when the counter is freed so
> it can just be freed directly without looping over array?

Yes. With immediate return on every individual failure, we can do this 
easily.

> 
>> +
>> +/*
>> + * Assign a hardware counter to event @evtid of group @rdtgrp.
>> + * Counter will be assigned to all the domains if rdt_mon_domain is NULL
> 
> (to be consistent) "if rdt_mon_domain is NULL" -> "if @d is NULL"

Sure.

> 
>> + * else the counter will be assigned to specific domain.
> 
> "will be assigned to specific domain" -> "will be assigned to @d"

Sure.
> 
> Reinette
> 
>

Re: [PATCH v10 16/24] x86/resctrl: Add interface to the assign counter

Posted by Luck, Tony 1 year, 1 month ago

On Thu, Dec 12, 2024 at 02:15:19PM -0600, Babu Moger wrote:
> +/*
> + * Assign a hardware counter to event @evtid of group @rdtgrp.
> + * Counter will be assigned to all the domains if rdt_mon_domain is NULL
> + * else the counter will be assigned to specific domain.
> + */
> +int rdtgroup_assign_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp,
> +			       struct rdt_mon_domain *d, enum resctrl_event_id evtid)
> +{
> +	int cntr_id, ret = 0;
> +
> +	if (!d) {
> +		list_for_each_entry(d, &r->mon_domains, hdr.list) {
> +			if (mbm_cntr_assigned(r, d, rdtgrp, evtid))
> +				continue;
> +
> +			cntr_id = mbm_cntr_alloc(r, d, rdtgrp, evtid);
> +			if (cntr_id <  0) {
> +				rdt_last_cmd_puts("Domain Out of MBM assignable counters\n");

Message could be more helpful by including the domain number.

> +				continue;

Not sure whether continuing is the right thing to do here. Sure the
other domains may have available counters, but now you may have a
confused status where some requested operations succeeded and others
failed.

> +			}
> +
> +			ret = resctrl_config_cntr(r, d, evtid, rdtgrp->mon.rmid,
> +						  rdtgrp->closid, cntr_id, true);
> +			if (ret)
> +				goto out_done_assign;
> +		}
> +	} else {
> +		if (mbm_cntr_assigned(r, d, rdtgrp, evtid))
> +			goto out_done_assign;
> +
> +		cntr_id = mbm_cntr_alloc(r, d, rdtgrp, evtid);
> +		if (cntr_id <  0) {
> +			rdt_last_cmd_puts("Domain Out of MBM assignable counters\n");

Ditto helpful to include domain number.

> +			goto out_done_assign;

When you run out of counters here, you still return 0 from this
function. This means that updating via write to the "mbm_assign_control"
file may return success, even though the operation failed.

E.g. with no counters available:

# cat available_mbm_cntrs
0=0;1=0

Try to set a monitor domain to record local bandwidth:

# echo 'c1/m94/0=l;1=_;' > mbm_assign_control
# echo $?
0

Looks like it worked!

But it didn't.

# cat ../last_cmd_status
Domain Out of MBM assignable counters

rdtgroup_assign_cntr_event() does say that it didn't if you think to
check here.

> +		}
> +
> +		ret = resctrl_config_cntr(r, d, evtid, rdtgrp->mon.rmid,
> +					  rdtgrp->closid, cntr_id, true);
> +	}
> +
> +out_done_assign:
> +	if (ret)
> +		mbm_cntr_free(r, d, rdtgrp, evtid);
> +
> +	return ret;
> +}

-Tony

Re: [PATCH v10 16/24] x86/resctrl: Add interface to the assign counter

Posted by Moger, Babu 1 year, 1 month ago

Hi Tony,

On 12/12/2024 5:37 PM, Luck, Tony wrote:
> On Thu, Dec 12, 2024 at 02:15:19PM -0600, Babu Moger wrote:
>> +/*
>> + * Assign a hardware counter to event @evtid of group @rdtgrp.
>> + * Counter will be assigned to all the domains if rdt_mon_domain is NULL
>> + * else the counter will be assigned to specific domain.
>> + */
>> +int rdtgroup_assign_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp,
>> +			       struct rdt_mon_domain *d, enum resctrl_event_id evtid)
>> +{
>> +	int cntr_id, ret = 0;
>> +
>> +	if (!d) {
>> +		list_for_each_entry(d, &r->mon_domains, hdr.list) {
>> +			if (mbm_cntr_assigned(r, d, rdtgrp, evtid))
>> +				continue;
>> +
>> +			cntr_id = mbm_cntr_alloc(r, d, rdtgrp, evtid);
>> +			if (cntr_id <  0) {
>> +				rdt_last_cmd_puts("Domain Out of MBM assignable counters\n");
> 
> Message could be more helpful by including the domain number.

Yes. We can do that. I will to use rdt_last_cmd_printf().

> 
>> +				continue;
> 
> Not sure whether continuing is the right thing to do here. Sure the
> other domains may have available counters, but now you may have a
> confused status where some requested operations succeeded and others
> failed.
> 
>> +			}
>> +
>> +			ret = resctrl_config_cntr(r, d, evtid, rdtgrp->mon.rmid,
>> +						  rdtgrp->closid, cntr_id, true);
>> +			if (ret)
>> +				goto out_done_assign;
>> +		}
>> +	} else {
>> +		if (mbm_cntr_assigned(r, d, rdtgrp, evtid))
>> +			goto out_done_assign;
>> +
>> +		cntr_id = mbm_cntr_alloc(r, d, rdtgrp, evtid);
>> +		if (cntr_id <  0) {
>> +			rdt_last_cmd_puts("Domain Out of MBM assignable counters\n");
> 
> Ditto helpful to include domain number.

Sure.

> 
>> +			goto out_done_assign;
> 
> When you run out of counters here, you still return 0 from this
> function. This means that updating via write to the "mbm_assign_control"
> file may return success, even though the operation failed.
> 
> E.g. with no counters available:
> 
> # cat available_mbm_cntrs
> 0=0;1=0
> 
> Try to set a monitor domain to record local bandwidth:
> 
> # echo 'c1/m94/0=l;1=_;' > mbm_assign_control
> # echo $?
> 0
> 
> Looks like it worked!
> 
> But it didn't.
> 
> # cat ../last_cmd_status
> Domain Out of MBM assignable counters
> 
> rdtgroup_assign_cntr_event() does say that it didn't if you think to
> check here.

Yes. Agree.

It is right thing to continue assignment if one of the domain is out of 
counters. In that case how about we save the error(say error_domain) and 
continue. And finally return success if both ret and error_domain are zeros.

    return ret ?  ret : error_domain:


> 
>> +		}
>> +
>> +		ret = resctrl_config_cntr(r, d, evtid, rdtgrp->mon.rmid,
>> +					  rdtgrp->closid, cntr_id, true);
>> +	}
>> +
>> +out_done_assign:
>> +	if (ret)
>> +		mbm_cntr_free(r, d, rdtgrp, evtid);
>> +
>> +	return ret;
>> +}
> 
> -Tony
> 


thanks
Babu

RE: [PATCH v10 16/24] x86/resctrl: Add interface to the assign counter

Posted by Luck, Tony 1 year, 1 month ago

> It is right thing to continue assignment if one of the domain is out of 
> counters. In that case how about we save the error(say error_domain) and 
> continue. And finally return success if both ret and error_domain are zeros.
>
>    return ret ?  ret : error_domain:

If there are many domains, then you might have 3 succeed and 5 fail.

I think the best you can do is return success if everything succeeded
and an error if any failed.

You have the same issue if someone tries to update multiple things
with a single write to mbm_assign_control:

# cat > mbm_assign_control << EOF
c1/m78/0=t;1=l;
c1/m79/0=t;1=l
c1/m80/0=t;1=l;
c1/m81/0=t;1=l;
EOF

Those get processed in order, some may succeed, but once a domain
is out of counters the rest for that domain will fail.

Updates to schemata are handled in multiple passes to either have
all succeed or all fail. But the only problems that can occur are user
syntax/range issues. So it's a lot simpler.

For writes to mbm_assign_control I think it's okay to document that
some requests may have been applied even though the whole request
reports failure. The user can always read the file to check status.

-Tony

Re: RE: [PATCH v10 16/24] x86/resctrl: Add interface to the assign counter

Posted by Moger, Babu 1 year, 1 month ago

Hi Tony,

On 12/13/2024 10:24 AM, Luck, Tony wrote:
>> It is right thing to continue assignment if one of the domain is out of
>> counters. In that case how about we save the error(say error_domain) and
>> continue. And finally return success if both ret and error_domain are zeros.
>>
>>     return ret ?  ret : error_domain:
> 
> If there are many domains, then you might have 3 succeed and 5 fail.
> 
> I think the best you can do is return success if everything succeeded
> and an error if any failed.

Yes. The above check should take care of this case.

> 
> You have the same issue if someone tries to update multiple things
> with a single write to mbm_assign_control:
> 
> # cat > mbm_assign_control << EOF
> c1/m78/0=t;1=l;
> c1/m79/0=t;1=l
> c1/m80/0=t;1=l;
> c1/m81/0=t;1=l;
> EOF
> 
> Those get processed in order, some may succeed, but once a domain
> is out of counters the rest for that domain will fail.

Yes. I see the similar type of processing for schemata.
It is processed sequentially. If one fails, it returns immediately.

ret = rdtgroup_parse_resource(resname, tok, rdtgrp);
if (ret)
       goto out;

I feel it is ok to keep same level of processing.

> 
> Updates to schemata are handled in multiple passes to either have
> all succeed or all fail. But the only problems that can occur are user
> syntax/range issues. So it's a lot simpler.
> 
> For writes to mbm_assign_control I think it's okay to document that
> some requests may have been applied even though the whole request
> reports failure. The user can always read the file to check status.

Yes. We can document this.
> 
> -Tony

Re: [PATCH v10 16/24] x86/resctrl: Add interface to the assign counter

Posted by Reinette Chatre 1 year, 1 month ago

On 12/13/24 8:54 AM, Moger, Babu wrote:
> On 12/13/2024 10:24 AM, Luck, Tony wrote:
>>> It is right thing to continue assignment if one of the domain is out of
>>> counters. In that case how about we save the error(say error_domain) and
>>> continue. And finally return success if both ret and error_domain are zeros.
>>>
>>>     return ret ?  ret : error_domain:
>>
>> If there are many domains, then you might have 3 succeed and 5 fail.
>>
>> I think the best you can do is return success if everything succeeded
>> and an error if any failed.
> 
> Yes. The above check should take care of this case.
> 

If I understand correctly "error_domain" can capture the ID of 
a single failing domain. If there are multiple failing domains like
in Tony's example then "error_domain" will not be accurate and thus
can never be trusted. Instead of a single check of a failure user
space is then forced to parse the more complex "mbm_assign_control"
file to learn what succeeded and failed.

Would it not be simpler to process sequentially and then fail on
first error encountered with detailed error message? With that
user space can determine exactly which portion of request
succeeded and which portion failed. 

>>
>> You have the same issue if someone tries to update multiple things
>> with a single write to mbm_assign_control:
>>
>> # cat > mbm_assign_control << EOF
>> c1/m78/0=t;1=l;
>> c1/m79/0=t;1=l
>> c1/m80/0=t;1=l;
>> c1/m81/0=t;1=l;
>> EOF
>>
>> Those get processed in order, some may succeed, but once a domain
>> is out of counters the rest for that domain will fail.
> 
> Yes. I see the similar type of processing for schemata.
> It is processed sequentially. If one fails, it returns immediately.
> 
> ret = rdtgroup_parse_resource(resname, tok, rdtgrp);
> if (ret)
>       goto out;
> 
> I feel it is ok to keep same level of processing.
> 

resctrl also does sequential processing when, for example, the user requests
move of several tasks. resctrl returns with failure right away with error message
containing failing PID. This gives clear information to user what
portion of request succeeded without requiring user space to
do additional queries.

Reinette

Re: [PATCH v10 16/24] x86/resctrl: Add interface to the assign counter

Posted by Moger, Babu 1 year, 1 month ago

Hi Reinette,

On 12/18/2024 4:01 PM, Reinette Chatre wrote:
> 
> 
> On 12/13/24 8:54 AM, Moger, Babu wrote:
>> On 12/13/2024 10:24 AM, Luck, Tony wrote:
>>>> It is right thing to continue assignment if one of the domain is out of
>>>> counters. In that case how about we save the error(say error_domain) and
>>>> continue. And finally return success if both ret and error_domain are zeros.
>>>>
>>>>      return ret ?  ret : error_domain:
>>>
>>> If there are many domains, then you might have 3 succeed and 5 fail.
>>>
>>> I think the best you can do is return success if everything succeeded
>>> and an error if any failed.
>>
>> Yes. The above check should take care of this case.
>>
> 
> If I understand correctly "error_domain" can capture the ID of
> a single failing domain. If there are multiple failing domains like
> in Tony's example then "error_domain" will not be accurate and thus
> can never be trusted. Instead of a single check of a failure user
> space is then forced to parse the more complex "mbm_assign_control"
> file to learn what succeeded and failed.
> 
> Would it not be simpler to process sequentially and then fail on
> first error encountered with detailed error message? With that
> user space can determine exactly which portion of request
> succeeded and which portion failed.

One more option is to print the error for each failure and continue. And 
finally return error.

"Group mon1, domain:1 Out of MBM counters"

We have the error information as well as the convenience of assignment 
on domains where counters are available when user is working with 
"*"(all domains).

Note: I will be out of office starting next week Until Jan 10.

> 
>>>
>>> You have the same issue if someone tries to update multiple things
>>> with a single write to mbm_assign_control:
>>>
>>> # cat > mbm_assign_control << EOF
>>> c1/m78/0=t;1=l;
>>> c1/m79/0=t;1=l
>>> c1/m80/0=t;1=l;
>>> c1/m81/0=t;1=l;
>>> EOF
>>>
>>> Those get processed in order, some may succeed, but once a domain
>>> is out of counters the rest for that domain will fail.
>>
>> Yes. I see the similar type of processing for schemata.
>> It is processed sequentially. If one fails, it returns immediately.
>>
>> ret = rdtgroup_parse_resource(resname, tok, rdtgrp);
>> if (ret)
>>        goto out;
>>
>> I feel it is ok to keep same level of processing.
>>
> 
> resctrl also does sequential processing when, for example, the user requests
> move of several tasks. resctrl returns with failure right away with error message
> containing failing PID. This gives clear information to user what
> portion of request succeeded without requiring user space to
> do additional queries.
> 
> 
> Reinette
>

Re: [PATCH v10 16/24] x86/resctrl: Add interface to the assign counter

Posted by Reinette Chatre 1 year, 1 month ago

Hi Babu,

On 12/19/24 11:45 AM, Moger, Babu wrote:
> Hi Reinette,
> 
> On 12/18/2024 4:01 PM, Reinette Chatre wrote:
>>
>>
>> On 12/13/24 8:54 AM, Moger, Babu wrote:
>>> On 12/13/2024 10:24 AM, Luck, Tony wrote:
>>>>> It is right thing to continue assignment if one of the domain is out of
>>>>> counters. In that case how about we save the error(say error_domain) and
>>>>> continue. And finally return success if both ret and error_domain are zeros.
>>>>>
>>>>>      return ret ?  ret : error_domain:
>>>>
>>>> If there are many domains, then you might have 3 succeed and 5 fail.
>>>>
>>>> I think the best you can do is return success if everything succeeded
>>>> and an error if any failed.
>>>
>>> Yes. The above check should take care of this case.
>>>
>>
>> If I understand correctly "error_domain" can capture the ID of
>> a single failing domain. If there are multiple failing domains like
>> in Tony's example then "error_domain" will not be accurate and thus
>> can never be trusted. Instead of a single check of a failure user
>> space is then forced to parse the more complex "mbm_assign_control"
>> file to learn what succeeded and failed.
>>
>> Would it not be simpler to process sequentially and then fail on
>> first error encountered with detailed error message? With that
>> user space can determine exactly which portion of request
>> succeeded and which portion failed.
> 
> One more option is to print the error for each failure and continue. And finally return error.
> 
> "Group mon1, domain:1 Out of MBM counters"
> 
> We have the error information as well as the convenience of assignment on domains where counters are available when user is working with "*"(all domains).

This may be possible. Please keep in mind that any errors have to be
easily consumed in an automated way to support the user space tools
that interact with resctrl. I do not think we have thus far focused
on the "last_cmd_status" buffer as part of the user space ABI so this opens
up more considerations.

At this time the error handling of "all domains" does not seem to be
consistent and obvious to user space. From what I can tell the
implementation continues on to the next domain if one domain is out
of counters but it exits immediately if a counter cannot be configured
on a particular domain. 

> 
> Note: I will be out of office starting next week Until Jan 10.

Thank you for letting me know. I am currently reviewing this series
and will post feedback by tomorrow.

Reinette

Re: [PATCH v10 16/24] x86/resctrl: Add interface to the assign counter

Posted by Moger, Babu 1 year, 1 month ago

Hi Reinette,

On 12/19/2024 3:12 PM, Reinette Chatre wrote:
> Hi Babu,
> 
> On 12/19/24 11:45 AM, Moger, Babu wrote:
>> Hi Reinette,
>>
>> On 12/18/2024 4:01 PM, Reinette Chatre wrote:
>>>
>>>
>>> On 12/13/24 8:54 AM, Moger, Babu wrote:
>>>> On 12/13/2024 10:24 AM, Luck, Tony wrote:
>>>>>> It is right thing to continue assignment if one of the domain is out of
>>>>>> counters. In that case how about we save the error(say error_domain) and
>>>>>> continue. And finally return success if both ret and error_domain are zeros.
>>>>>>
>>>>>>       return ret ?  ret : error_domain:
>>>>>
>>>>> If there are many domains, then you might have 3 succeed and 5 fail.
>>>>>
>>>>> I think the best you can do is return success if everything succeeded
>>>>> and an error if any failed.
>>>>
>>>> Yes. The above check should take care of this case.
>>>>
>>>
>>> If I understand correctly "error_domain" can capture the ID of
>>> a single failing domain. If there are multiple failing domains like
>>> in Tony's example then "error_domain" will not be accurate and thus
>>> can never be trusted. Instead of a single check of a failure user
>>> space is then forced to parse the more complex "mbm_assign_control"
>>> file to learn what succeeded and failed.
>>>
>>> Would it not be simpler to process sequentially and then fail on
>>> first error encountered with detailed error message? With that
>>> user space can determine exactly which portion of request
>>> succeeded and which portion failed.
>>
>> One more option is to print the error for each failure and continue. And finally return error.
>>
>> "Group mon1, domain:1 Out of MBM counters"
>>
>> We have the error information as well as the convenience of assignment on domains where counters are available when user is working with "*"(all domains).
> 
> This may be possible. Please keep in mind that any errors have to be
> easily consumed in an automated way to support the user space tools
> that interact with resctrl. I do not think we have thus far focused
> on the "last_cmd_status" buffer as part of the user space ABI so this opens
> up more considerations.
> 
> At this time the error handling of "all domains" does not seem to be
> consistent and obvious to user space. From what I can tell the
> implementation continues on to the next domain if one domain is out
> of counters but it exits immediately if a counter cannot be configured
> on a particular domain.

Yes. We can handle both the errors in the same way.

> 
>>
>> Note: I will be out of office starting next week Until Jan 10.
> 
> Thank you for letting me know. I am currently reviewing this series
> and will post feedback by tomorrow.

Sure. Thanks. I will try to get to some of it at least. The review 
comments which needs investigation may have to wait. Lets see.

Thanks
Babu

RE: [PATCH v10 16/24] x86/resctrl: Add interface to the assign counter

Posted by Luck, Tony 1 year, 1 month ago

> >>>>>> It is right thing to continue assignment if one of the domain is out of
> >>>>>> counters. In that case how about we save the error(say error_domain) and
> >>>>>> continue. And finally return success if both ret and error_domain are zeros.
> >>>>>>
> >>>>>>       return ret ?  ret : error_domain:
> >>>>>
> >>>>> If there are many domains, then you might have 3 succeed and 5 fail.
> >>>>>
> >>>>> I think the best you can do is return success if everything succeeded
> >>>>> and an error if any failed.
> >>>>
> >>>> Yes. The above check should take care of this case.
> >>>>
> >>>
> >>> If I understand correctly "error_domain" can capture the ID of
> >>> a single failing domain. If there are multiple failing domains like
> >>> in Tony's example then "error_domain" will not be accurate and thus
> >>> can never be trusted. Instead of a single check of a failure user
> >>> space is then forced to parse the more complex "mbm_assign_control"
> >>> file to learn what succeeded and failed.
> >>>
> >>> Would it not be simpler to process sequentially and then fail on
> >>> first error encountered with detailed error message? With that
> >>> user space can determine exactly which portion of request
> >>> succeeded and which portion failed.
> >>
> >> One more option is to print the error for each failure and continue. And finally return error.

There's limited space allocated for use by last_cmd_*() messages:

static char last_cmd_status_buf[512];

        seq_buf_init(&last_cmd_status, last_cmd_status_buf,
                     sizeof(last_cmd_status_buf));

If you keep parsing and trying to apply changes from user input you will
quickly hit that limit.


> >>
> >> "Group mon1, domain:1 Out of MBM counters"
> >>
> >> We have the error information as well as the convenience of assignment on domains where counters are available when user is working with "*"(all domains).
> >
> > This may be possible. Please keep in mind that any errors have to be
> > easily consumed in an automated way to support the user space tools
> > that interact with resctrl. I do not think we have thus far focused
> > on the "last_cmd_status" buffer as part of the user space ABI so this opens
> > up more considerations.
> >
> > At this time the error handling of "all domains" does not seem to be
> > consistent and obvious to user space. From what I can tell the
> > implementation continues on to the next domain if one domain is out
> > of counters but it exits immediately if a counter cannot be configured
> > on a particular domain.
>
> Yes. We can handle both the errors in the same way.

I think it is simplest to make the "same way" be "fail on first error".

>
> >
> >>
> >> Note: I will be out of office starting next week Until Jan 10.
> >
> > Thank you for letting me know. I am currently reviewing this series
> > and will post feedback by tomorrow.
>
> Sure. Thanks. I will try to get to some of it at least. The review
> comments which needs investigation may have to wait. Lets see.

-Tony

Re: RE: [PATCH v10 16/24] x86/resctrl: Add interface to the assign counter

Posted by Moger, Babu 1 year, 1 month ago

Hi Tony,

On 12/19/2024 3:45 PM, Luck, Tony wrote:
>>>>>>>> It is right thing to continue assignment if one of the domain is out of
>>>>>>>> counters. In that case how about we save the error(say error_domain) and
>>>>>>>> continue. And finally return success if both ret and error_domain are zeros.
>>>>>>>>
>>>>>>>>        return ret ?  ret : error_domain:
>>>>>>>
>>>>>>> If there are many domains, then you might have 3 succeed and 5 fail.
>>>>>>>
>>>>>>> I think the best you can do is return success if everything succeeded
>>>>>>> and an error if any failed.
>>>>>>
>>>>>> Yes. The above check should take care of this case.
>>>>>>
>>>>>
>>>>> If I understand correctly "error_domain" can capture the ID of
>>>>> a single failing domain. If there are multiple failing domains like
>>>>> in Tony's example then "error_domain" will not be accurate and thus
>>>>> can never be trusted. Instead of a single check of a failure user
>>>>> space is then forced to parse the more complex "mbm_assign_control"
>>>>> file to learn what succeeded and failed.
>>>>>
>>>>> Would it not be simpler to process sequentially and then fail on
>>>>> first error encountered with detailed error message? With that
>>>>> user space can determine exactly which portion of request
>>>>> succeeded and which portion failed.
>>>>
>>>> One more option is to print the error for each failure and continue. And finally return error.
> 
> There's limited space allocated for use by last_cmd_*() messages:
> 
> static char last_cmd_status_buf[512];
> 
>          seq_buf_init(&last_cmd_status, last_cmd_status_buf,
>                       sizeof(last_cmd_status_buf));
> 
> If you keep parsing and trying to apply changes from user input you will
> quickly hit that limit.

oh. ok. Good to know.
> 
> 
>>>>
>>>> "Group mon1, domain:1 Out of MBM counters"
>>>>
>>>> We have the error information as well as the convenience of assignment on domains where counters are available when user is working with "*"(all domains).
>>>
>>> This may be possible. Please keep in mind that any errors have to be
>>> easily consumed in an automated way to support the user space tools
>>> that interact with resctrl. I do not think we have thus far focused
>>> on the "last_cmd_status" buffer as part of the user space ABI so this opens
>>> up more considerations.
>>>
>>> At this time the error handling of "all domains" does not seem to be
>>> consistent and obvious to user space. From what I can tell the
>>> implementation continues on to the next domain if one domain is out
>>> of counters but it exits immediately if a counter cannot be configured
>>> on a particular domain.
>>
>> Yes. We can handle both the errors in the same way.
> 
> I think it is simplest to make the "same way" be "fail on first error".

Ok. Sure.  Will do thanks.

-Babu