[PATCH v4 02/24] x86/resctrl: Access per-rmid structures by index

James Morse posted 24 patches 2 years, 8 months ago
There is a newer version of this series
[PATCH v4 02/24] x86/resctrl: Access per-rmid structures by index
Posted by James Morse 2 years, 8 months ago
Because of the differences between Intel RDT/AMD QoS and Arm's MPAM
monitors, RMID values on arm64 are not unique unless the CLOSID is
also included. Bitmaps like rmid_busy_llc need to be sized by the
number of unique entries for this resource.

Add helpers to encode/decode the CLOSID and RMID to an index. The
domain's rmid_busy_llc and rmid_ptrs[] are then sized by index,
as are the domain mbm_local and mbm_total arrays.
On x86, the index is always just the RMID, so all these structures
remain the same size.

The index gives resctrl a unique value it can use to store monitor
values, and allows MPAM to decode the CLOSID when reading the hardware
counters.

Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
Signed-off-by: James Morse <james.morse@arm.com>
---
Changes since v1:
 * Added X86_BAD_CLOSID macro to make it clear what this value means
 * Added second WARN_ON() for closid checking, and made both _ONCE()

Changes since v2:
 * Added RESCTRL_RESERVED_CLOSID
 * Removed a newline
 * Repharsed some comments
 * Renamed a variable 'ignore'd
 * Moved X86_RESCTRL_BAD_CLOSID to a previous patch

Changes since v3:
 * Changed a variable name
 * Fixed various typos
---
 arch/x86/include/asm/resctrl.h         | 17 ++++++
 arch/x86/kernel/cpu/resctrl/core.c     |  2 +-
 arch/x86/kernel/cpu/resctrl/internal.h |  1 +
 arch/x86/kernel/cpu/resctrl/monitor.c  | 84 +++++++++++++++++---------
 arch/x86/kernel/cpu/resctrl/rdtgroup.c |  7 ++-
 include/linux/resctrl.h                |  3 +
 6 files changed, 83 insertions(+), 31 deletions(-)

diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index e906070285fb..dd9b638d43c8 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -101,6 +101,23 @@ static inline void resctrl_sched_in(struct task_struct *tsk)
 		__resctrl_sched_in(tsk);
 }
 
+static inline u32 resctrl_arch_system_num_rmid_idx(void)
+{
+	/* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
+	return boot_cpu_data.x86_cache_max_rmid + 1;
+}
+
+static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
+{
+	*rmid = idx;
+	*closid = X86_RESCTRL_BAD_CLOSID;
+}
+
+static inline u32 resctrl_arch_rmid_idx_encode(u32 ignored, u32 rmid)
+{
+	return rmid;
+}
+
 void resctrl_cpu_detect(struct cpuinfo_x86 *c);
 
 #else
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 030d3b409768..4bea032d072e 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -600,7 +600,7 @@ static void clear_closid_rmid(int cpu)
 	state->default_rmid = 0;
 	state->cur_closid = 0;
 	state->cur_rmid = 0;
-	wrmsr(MSR_IA32_PQR_ASSOC, 0, 0);
+	wrmsr(MSR_IA32_PQR_ASSOC, 0, RESCTRL_RESERVED_CLOSID);
 }
 
 static int resctrl_online_cpu(unsigned int cpu)
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index f2da908bb079..d571da4848a4 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -7,6 +7,7 @@
 #include <linux/kernfs.h>
 #include <linux/fs_context.h>
 #include <linux/jump_label.h>
+#include <asm/resctrl.h>
 
 #define L3_QOS_CDP_ENABLE		0x01ULL
 
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 86574adedd64..bcc25f5339c0 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -142,12 +142,29 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
 	return val;
 }
 
-static inline struct rmid_entry *__rmid_entry(u32 closid, u32 rmid)
+/*
+ * x86 and arm64 differ in their handling of monitoring.
+ * x86's RMID are an independent number, there is only one source of traffic
+ * with an RMID value of '1'.
+ * arm64's PMG extend the PARTID/CLOSID space, there are multiple sources of
+ * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID
+ * value is no longer unique.
+ * To account for this, resctrl uses an index. On x86 this is just the RMID,
+ * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
+ *
+ * The domain's rmid_busy_llc and rmid_ptrs are sized by index. The arch code
+ * must accept an attempt to read every index.
+ */
+static inline struct rmid_entry *__rmid_entry(u32 idx)
 {
 	struct rmid_entry *entry;
+	u32 closid, rmid;
 
-	entry = &rmid_ptrs[rmid];
-	WARN_ON(entry->rmid != rmid);
+	entry = &rmid_ptrs[idx];
+	resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
+
+	WARN_ON_ONCE(entry->closid != closid);
+	WARN_ON_ONCE(entry->rmid != rmid);
 
 	return entry;
 }
@@ -277,8 +294,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
 void __check_limbo(struct rdt_domain *d, bool force_free)
 {
 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
 	struct rmid_entry *entry;
-	u32 crmid = 1, nrmid;
+	u32 idx, cur_idx = 1;
 	bool rmid_dirty;
 	u64 val = 0;
 
@@ -289,12 +307,11 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
 	 * RMID and move it to the free list when the counter reaches 0.
 	 */
 	for (;;) {
-		nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
-		if (nrmid >= r->num_rmid)
+		idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
+		if (idx >= idx_limit)
 			break;
 
-		entry = __rmid_entry(X86_RESCTRL_BAD_CLOSID, nrmid);// temporary
-
+		entry = __rmid_entry(idx);
 		if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
 					   QOS_L3_OCCUP_EVENT_ID, &val)) {
 			rmid_dirty = true;
@@ -303,19 +320,21 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
 		}
 
 		if (force_free || !rmid_dirty) {
-			clear_bit(entry->rmid, d->rmid_busy_llc);
+			clear_bit(idx, d->rmid_busy_llc);
 			if (!--entry->busy) {
 				rmid_limbo_count--;
 				list_add_tail(&entry->list, &rmid_free_lru);
 			}
 		}
-		crmid = nrmid + 1;
+		cur_idx = idx + 1;
 	}
 }
 
 bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
 {
-	return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
+	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+
+	return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
 }
 
 /*
@@ -345,6 +364,9 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
 	struct rdt_domain *d;
 	int cpu, err;
 	u64 val = 0;
+	u32 idx;
+
+	idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
 
 	entry->busy = 0;
 	cpu = get_cpu();
@@ -364,7 +386,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
 		 */
 		if (!has_busy_rmid(r, d))
 			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
-		set_bit(entry->rmid, d->rmid_busy_llc);
+		set_bit(idx, d->rmid_busy_llc);
 		entry->busy++;
 	}
 	put_cpu();
@@ -377,14 +399,16 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
 
 void free_rmid(u32 closid, u32 rmid)
 {
+	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
 	struct rmid_entry *entry;
 
-	if (!rmid)
-		return;
-
 	lockdep_assert_held(&rdtgroup_mutex);
 
-	entry = __rmid_entry(closid, rmid);
+	/* do not allow the default rmid to be free'd */
+	if (!idx)
+		return;
+
+	entry = __rmid_entry(idx);
 
 	if (is_llc_occupancy_enabled())
 		add_rmid_to_limbo(entry);
@@ -395,11 +419,13 @@ void free_rmid(u32 closid, u32 rmid)
 static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 closid,
 				       u32 rmid, enum resctrl_event_id evtid)
 {
+	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+
 	switch (evtid) {
 	case QOS_L3_MBM_TOTAL_EVENT_ID:
-		return &d->mbm_total[rmid];
+		return &d->mbm_total[idx];
 	case QOS_L3_MBM_LOCAL_EVENT_ID:
-		return &d->mbm_local[rmid];
+		return &d->mbm_local[idx];
 	default:
 		return NULL;
 	}
@@ -441,7 +467,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
  */
 static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
 {
-	struct mbm_state *m = &rr->d->mbm_local[rmid];
+	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+	struct mbm_state *m = &rr->d->mbm_local[idx];
 	u64 cur_bw, bytes, cur_bytes;
 
 	cur_bytes = rr->val;
@@ -531,7 +558,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
 {
 	u32 closid, rmid, cur_msr_val, new_msr_val;
 	struct mbm_state *pmbm_data, *cmbm_data;
-	u32 cur_bw, delta_bw, user_bw;
+	u32 cur_bw, delta_bw, user_bw, idx;
 	struct rdt_resource *r_mba;
 	struct rdt_domain *dom_mba;
 	struct list_head *head;
@@ -544,7 +571,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
 
 	closid = rgrp->closid;
 	rmid = rgrp->mon.rmid;
-	pmbm_data = &dom_mbm->mbm_local[rmid];
+	idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+	pmbm_data = &dom_mbm->mbm_local[idx];
 
 	dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
 	if (!dom_mba) {
@@ -727,19 +755,20 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
 
 static int dom_data_init(struct rdt_resource *r)
 {
+	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
 	struct rmid_entry *entry = NULL;
-	int i, nr_rmids;
+	u32 idx;
+	int i;
 
-	nr_rmids = r->num_rmid;
-	rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
+	rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
 	if (!rmid_ptrs)
 		return -ENOMEM;
 
-	for (i = 0; i < nr_rmids; i++) {
+	for (i = 0; i < idx_limit; i++) {
 		entry = &rmid_ptrs[i];
 		INIT_LIST_HEAD(&entry->list);
 
-		entry->rmid = i;
+		resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
 		list_add_tail(&entry->list, &rmid_free_lru);
 	}
 
@@ -748,7 +777,8 @@ static int dom_data_init(struct rdt_resource *r)
 	 * default_rdtgroup control group, which will be setup later. See
 	 * rdtgroup_setup_root().
 	 */
-	entry = __rmid_entry(0, 0);
+	idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, 0);
+	entry = __rmid_entry(idx);
 	list_del(&entry->list);
 
 	return 0;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index ff9ccfcd18bd..023eae69f29e 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -3604,16 +3604,17 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
 
 static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
 {
+	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
 	size_t tsize;
 
 	if (is_llc_occupancy_enabled()) {
-		d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
+		d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
 		if (!d->rmid_busy_llc)
 			return -ENOMEM;
 	}
 	if (is_mbm_total_enabled()) {
 		tsize = sizeof(*d->mbm_total);
-		d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+		d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
 		if (!d->mbm_total) {
 			bitmap_free(d->rmid_busy_llc);
 			return -ENOMEM;
@@ -3621,7 +3622,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
 	}
 	if (is_mbm_local_enabled()) {
 		tsize = sizeof(*d->mbm_local);
-		d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+		d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
 		if (!d->mbm_local) {
 			bitmap_free(d->rmid_busy_llc);
 			kfree(d->mbm_total);
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 7d80bae05f59..ff7452f644e4 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -6,6 +6,9 @@
 #include <linux/list.h>
 #include <linux/pid.h>
 
+/* CLOSID value used by the default control group */
+#define RESCTRL_RESERVED_CLOSID		0
+
 #ifdef CONFIG_PROC_CPU_RESCTRL
 
 int proc_resctrl_show(struct seq_file *m,
-- 
2.39.2
Re: [PATCH v4 02/24] x86/resctrl: Access per-rmid structures by index
Posted by Reinette Chatre 2 years, 7 months ago
Hi James,

On 5/25/2023 11:01 AM, James Morse wrote:
> Because of the differences between Intel RDT/AMD QoS and Arm's MPAM
> monitors, RMID values on arm64 are not unique unless the CLOSID is

I find the above a bit confusing ... the theme seems to be "RMID values
on arm64 are not unique because they are different from Intel".
Compare to: "One of the differences between Intel RDT/AMD QoS and
Arm's MPAM monitors is that RMID values on arm64 are not unique unless
the CLOSID is also included."

> also included. Bitmaps like rmid_busy_llc need to be sized by the
> number of unique entries for this resource.
> 
> Add helpers to encode/decode the CLOSID and RMID to an index. The
> domain's rmid_busy_llc and rmid_ptrs[] are then sized by index,
> as are the domain mbm_local and mbm_total arrays.

You can use "[]" to indicate an array.

> On x86, the index is always just the RMID, so all these structures
> remain the same size.

I do not consider this accurate considering that the previous
patch increased the size of each element to support this change.

> The index gives resctrl a unique value it can use to store monitor
> values, and allows MPAM to decode the CLOSID when reading the hardware
> counters.
> 
> Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> Changes since v1:
>  * Added X86_BAD_CLOSID macro to make it clear what this value means
>  * Added second WARN_ON() for closid checking, and made both _ONCE()
> 
> Changes since v2:
>  * Added RESCTRL_RESERVED_CLOSID
>  * Removed a newline
>  * Repharsed some comments
>  * Renamed a variable 'ignore'd
>  * Moved X86_RESCTRL_BAD_CLOSID to a previous patch
> 
> Changes since v3:
>  * Changed a variable name
>  * Fixed various typos
> ---
>  arch/x86/include/asm/resctrl.h         | 17 ++++++
>  arch/x86/kernel/cpu/resctrl/core.c     |  2 +-
>  arch/x86/kernel/cpu/resctrl/internal.h |  1 +
>  arch/x86/kernel/cpu/resctrl/monitor.c  | 84 +++++++++++++++++---------
>  arch/x86/kernel/cpu/resctrl/rdtgroup.c |  7 ++-
>  include/linux/resctrl.h                |  3 +
>  6 files changed, 83 insertions(+), 31 deletions(-)
> 

...

> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index 86574adedd64..bcc25f5339c0 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -142,12 +142,29 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
>  	return val;
>  }
>  
> -static inline struct rmid_entry *__rmid_entry(u32 closid, u32 rmid)
> +/*
> + * x86 and arm64 differ in their handling of monitoring.
> + * x86's RMID are an independent number, there is only one source of traffic
> + * with an RMID value of '1'.
> + * arm64's PMG extend the PARTID/CLOSID space, there are multiple sources of
> + * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID
> + * value is no longer unique.
> + * To account for this, resctrl uses an index. On x86 this is just the RMID,
> + * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
> + *
> + * The domain's rmid_busy_llc and rmid_ptrs are sized by index. The arch code

rmid_ptrs[]

> + * must accept an attempt to read every index.
> + */
> +static inline struct rmid_entry *__rmid_entry(u32 idx)
>  {
>  	struct rmid_entry *entry;
> +	u32 closid, rmid;
>  
> -	entry = &rmid_ptrs[rmid];
> -	WARN_ON(entry->rmid != rmid);
> +	entry = &rmid_ptrs[idx];
> +	resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
> +
> +	WARN_ON_ONCE(entry->closid != closid);
> +	WARN_ON_ONCE(entry->rmid != rmid);
>  
>  	return entry;
>  }

...

> @@ -377,14 +399,16 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
>  
>  void free_rmid(u32 closid, u32 rmid)
>  {
> +	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
>  	struct rmid_entry *entry;
>  
> -	if (!rmid)
> -		return;
> -
>  	lockdep_assert_held(&rdtgroup_mutex);
>  
> -	entry = __rmid_entry(closid, rmid);
> +	/* do not allow the default rmid to be free'd */
> +	if (!idx)
> +		return;
> +

The interface seem to become blurry here. There are new
architecture specific encode/decode callbacks while at the same
time there are a few requirements:
- closid 0 and rmid 0 are reserved
- closid 0 and rmid 0 must map to index 0 (the architecture
callbacks thus do not have must freedom here ... they must
return index 0 for closid 0 and rmid 0, no?).

It does seem a bit strange that the one layer provides values (0,0)
to other layer while requiring a specific answer (0).

What if RESCTRL_RESERVED_RMID is also introduced and before encoding
the CLOSID and RMID the core first checks if it is a reserved entry
being freed and exit early if this is the case?


> +	entry = __rmid_entry(idx);
>  
>  	if (is_llc_occupancy_enabled())
>  		add_rmid_to_limbo(entry);

...

> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index 7d80bae05f59..ff7452f644e4 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -6,6 +6,9 @@
>  #include <linux/list.h>
>  #include <linux/pid.h>
>  
> +/* CLOSID value used by the default control group */
> +#define RESCTRL_RESERVED_CLOSID		0
> +

#define RESCTRL_RESERVED_RMID		0 ?

>  #ifdef CONFIG_PROC_CPU_RESCTRL
>  
>  int proc_resctrl_show(struct seq_file *m,


Reinette
Re: [PATCH v4 02/24] x86/resctrl: Access per-rmid structures by index
Posted by James Morse 2 years, 6 months ago
Hi Reinette,

On 15/06/2023 23:03, Reinette Chatre wrote:
> On 5/25/2023 11:01 AM, James Morse wrote:
>> Because of the differences between Intel RDT/AMD QoS and Arm's MPAM
>> monitors, RMID values on arm64 are not unique unless the CLOSID is
> 
> I find the above a bit confusing ... the theme seems to be "RMID values
> on arm64 are not unique because they are different from Intel".
> Compare to: "One of the differences between Intel RDT/AMD QoS and
> Arm's MPAM monitors is that RMID values on arm64 are not unique unless
> the CLOSID is also included."

I'm not too sure what is confusing. I'll pad it out with an example.
Is this clearer?:
--------------%<--------------
x86 systems identify traffic using the CLOSID and RMID. The CLOSID is
used to lookup the control policy, the RMID is used for monitoring. For
x86 these are independent numbers.
Arm's MPAM has equivalent features PARTID and PMG, where the PARTID is
used to lookup the control policy. The PMG in contrast is a small number
of bits that are used to subdivide PARTID when monitoring. The
cache-occupancy monitors require the PARTID to be specified when monitoring.

This means MPAM's PMG field is not unique. There are multiple PMG-0, one
per allocated CLOSID/PARTID. If PMG is treated as equivalent to RMID, it
cannot be allocated as an independent number. Bitmaps like rmid_busy_llc
need to be sized by the number of unique entries for this resource.

Treat the combined CLOSID and RMID as an index, and provide architecture
helpers to pack and unpack an index. This makes the MPAM values unique.
The domain's rmid_busy_llc and rmid_ptrs[] are then sized by index, as
are domain mbm_local[] and mbm_total[].

x86 can ignore the CLOSID field when packing and unpacking an index, and
report as many indexes as RMID.
--------------%<--------------

>> also included. Bitmaps like rmid_busy_llc need to be sized by the
>> number of unique entries for this resource.
>>
>> Add helpers to encode/decode the CLOSID and RMID to an index. The
>> domain's rmid_busy_llc and rmid_ptrs[] are then sized by index,
>> as are the domain mbm_local and mbm_total arrays.
> 
> You can use "[]" to indicate an array.

>> On x86, the index is always just the RMID, so all these structures
>> remain the same size.
> 
> I do not consider this accurate considering that the previous
> patch increased the size of each element to support this change.

That is a different patch... My point here is that x86's array sizes don't get multiplied
by num_closid, as only arm64 needs that.

I've brushed that wording under the carpet in the text above.


>> The index gives resctrl a unique value it can use to store monitor
>> values, and allows MPAM to decode the CLOSID when reading the hardware
>> counters.

>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
>> index 86574adedd64..bcc25f5339c0 100644
>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c

>> @@ -377,14 +399,16 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
>>  
>>  void free_rmid(u32 closid, u32 rmid)
>>  {
>> +	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
>>  	struct rmid_entry *entry;
>>  
>> -	if (!rmid)
>> -		return;
>> -
>>  	lockdep_assert_held(&rdtgroup_mutex);
>>  
>> -	entry = __rmid_entry(closid, rmid);
>> +	/* do not allow the default rmid to be free'd */
>> +	if (!idx)
>> +		return;
>> +
> 
> The interface seem to become blurry here. There are new
> architecture specific encode/decode callbacks while at the same
> time there are a few requirements:
> - closid 0 and rmid 0 are reserved

> - closid 0 and rmid 0 must map to index 0 (the architecture
> callbacks thus do not have must freedom here ... they must
> return index 0 for closid 0 and rmid 0, no?).

It's not supposed to matter, but I agree this check in free_rmid() doesn't decode the
index to check.


> It does seem a bit strange that the one layer provides values (0,0)
> to other layer while requiring a specific answer (0).
> 
> What if RESCTRL_RESERVED_RMID is also introduced and before encoding
> the CLOSID and RMID the core first checks if it is a reserved entry
> being freed and exit early if this is the case?

Sure, that makes this cleaner.


Thanks,

James
RE: [PATCH v4 02/24] x86/resctrl: Access per-rmid structures by index
Posted by Shaopeng Tan (Fujitsu) 2 years, 8 months ago
Hello James,

> Because of the differences between Intel RDT/AMD QoS and Arm's MPAM
> monitors, RMID values on arm64 are not unique unless the CLOSID is also
> included. Bitmaps like rmid_busy_llc need to be sized by the number of unique
> entries for this resource.
> 
> Add helpers to encode/decode the CLOSID and RMID to an index. The
> domain's rmid_busy_llc and rmid_ptrs[] are then sized by index, as are the
> domain mbm_local and mbm_total arrays.
> On x86, the index is always just the RMID, so all these structures remain the
> same size.
> 
> The index gives resctrl a unique value it can use to store monitor values, and
> allows MPAM to decode the CLOSID when reading the hardware counters.
> 
> Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> Changes since v1:
>  * Added X86_BAD_CLOSID macro to make it clear what this value means
>  * Added second WARN_ON() for closid checking, and made both _ONCE()
> 
> Changes since v2:
>  * Added RESCTRL_RESERVED_CLOSID
>  * Removed a newline
>  * Repharsed some comments
>  * Renamed a variable 'ignore'd
>  * Moved X86_RESCTRL_BAD_CLOSID to a previous patch
> 
> Changes since v3:
>  * Changed a variable name
>  * Fixed various typos
> ---
>  arch/x86/include/asm/resctrl.h         | 17 ++++++
>  arch/x86/kernel/cpu/resctrl/core.c     |  2 +-
>  arch/x86/kernel/cpu/resctrl/internal.h |  1 +
> arch/x86/kernel/cpu/resctrl/monitor.c  | 84
> +++++++++++++++++---------
> arch/x86/kernel/cpu/resctrl/rdtgroup.c |  7 ++-
>  include/linux/resctrl.h                |  3 +
>  6 files changed, 83 insertions(+), 31 deletions(-)
> 
> diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
> index e906070285fb..dd9b638d43c8 100644
> --- a/arch/x86/include/asm/resctrl.h
> +++ b/arch/x86/include/asm/resctrl.h
> @@ -101,6 +101,23 @@ static inline void resctrl_sched_in(struct task_struct
> *tsk)
>  		__resctrl_sched_in(tsk);
>  }
> 
> +static inline u32 resctrl_arch_system_num_rmid_idx(void)
> +{
> +	/* RMID are independent numbers for x86. num_rmid_idx ==
> num_rmid */
> +	return boot_cpu_data.x86_cache_max_rmid + 1; }
> +
> +static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid,
> +u32 *rmid) {
> +	*rmid = idx;
> +	*closid = X86_RESCTRL_BAD_CLOSID;
> +}
> +
> +static inline u32 resctrl_arch_rmid_idx_encode(u32 ignored, u32 rmid) {
> +	return rmid;
> +}
> +
>  void resctrl_cpu_detect(struct cpuinfo_x86 *c);
> 
>  #else
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c
> b/arch/x86/kernel/cpu/resctrl/core.c
> index 030d3b409768..4bea032d072e 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -600,7 +600,7 @@ static void clear_closid_rmid(int cpu)
>  	state->default_rmid = 0;
>  	state->cur_closid = 0;
>  	state->cur_rmid = 0;
> -	wrmsr(MSR_IA32_PQR_ASSOC, 0, 0);
> +	wrmsr(MSR_IA32_PQR_ASSOC, 0, RESCTRL_RESERVED_CLOSID);
>  }
> 
>  static int resctrl_online_cpu(unsigned int cpu) diff --git
> a/arch/x86/kernel/cpu/resctrl/internal.h
> b/arch/x86/kernel/cpu/resctrl/internal.h
> index f2da908bb079..d571da4848a4 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -7,6 +7,7 @@
>  #include <linux/kernfs.h>
>  #include <linux/fs_context.h>
>  #include <linux/jump_label.h>
> +#include <asm/resctrl.h>
> 
>  #define L3_QOS_CDP_ENABLE		0x01ULL
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c
> b/arch/x86/kernel/cpu/resctrl/monitor.c
> index 86574adedd64..bcc25f5339c0 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -142,12 +142,29 @@ static inline u64 get_corrected_mbm_count(u32 rmid,
> unsigned long val)
>  	return val;
>  }
> 
> -static inline struct rmid_entry *__rmid_entry(u32 closid, u32 rmid)
> +/*
> + * x86 and arm64 differ in their handling of monitoring.
> + * x86's RMID are an independent number, there is only one source of
> +traffic
> + * with an RMID value of '1'.
> + * arm64's PMG extend the PARTID/CLOSID space, there are multiple
> +sources of
> + * traffic with a PMG value of '1', one for each CLOSID, meaning the
> +RMID
> + * value is no longer unique.
> + * To account for this, resctrl uses an index. On x86 this is just the
> +RMID,
> + * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
> + *
> + * The domain's rmid_busy_llc and rmid_ptrs are sized by index. The
> +arch code
> + * must accept an attempt to read every index.
> + */
> +static inline struct rmid_entry *__rmid_entry(u32 idx)
>  {
>  	struct rmid_entry *entry;
> +	u32 closid, rmid;
> 
> -	entry = &rmid_ptrs[rmid];
> -	WARN_ON(entry->rmid != rmid);
> +	entry = &rmid_ptrs[idx];
> +	resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
> +
> +	WARN_ON_ONCE(entry->closid != closid);
> +	WARN_ON_ONCE(entry->rmid != rmid);
> 
>  	return entry;
>  }
> @@ -277,8 +294,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct
> rdt_domain *d,  void __check_limbo(struct rdt_domain *d, bool force_free)  {
>  	struct rdt_resource *r =
> &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
> +	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
>  	struct rmid_entry *entry;
> -	u32 crmid = 1, nrmid;
> +	u32 idx, cur_idx = 1;
>  	bool rmid_dirty;
>  	u64 val = 0;
> 
> @@ -289,12 +307,11 @@ void __check_limbo(struct rdt_domain *d, bool
> force_free)
>  	 * RMID and move it to the free list when the counter reaches 0.
>  	 */
>  	for (;;) {
> -		nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid,
> crmid);
> -		if (nrmid >= r->num_rmid)
> +		idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
> +		if (idx >= idx_limit)
>  			break;
> 
> -		entry = __rmid_entry(X86_RESCTRL_BAD_CLOSID, nrmid);//
> temporary
> -
> +		entry = __rmid_entry(idx);
>  		if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
>  					   QOS_L3_OCCUP_EVENT_ID,
> &val)) {
>  			rmid_dirty = true;
> @@ -303,19 +320,21 @@ void __check_limbo(struct rdt_domain *d, bool
> force_free)
>  		}
> 
>  		if (force_free || !rmid_dirty) {
> -			clear_bit(entry->rmid, d->rmid_busy_llc);
> +			clear_bit(idx, d->rmid_busy_llc);
>  			if (!--entry->busy) {
>  				rmid_limbo_count--;
>  				list_add_tail(&entry->list, &rmid_free_lru);
>  			}
>  		}
> -		crmid = nrmid + 1;
> +		cur_idx = idx + 1;
>  	}
>  }
> 
>  bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)  {
> -	return find_first_bit(d->rmid_busy_llc, r->num_rmid) !=
> r->num_rmid;
> +	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
> +
> +	return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
>  }

"struct rdt_resource *r" is not used in this function.


Best regards,
Shaopeng TAN