[PATCH v4 10/13] x86/resctrl: Add the sysfs interface to read the event configuration

Babu Moger posted 13 patches 3 years, 7 months ago
[PATCH v4 10/13] x86/resctrl: Add the sysfs interface to read the event configuration
Posted by Babu Moger 3 years, 7 months ago
The current event configuration can be viewed by the user by reading
the sysfs configuration file.

Following are the types of events supported:

====  ===========================================================
Bits   Description
====  ===========================================================
6      Dirty Victims from the QOS domain to all types of memory
5      Reads to slow memory in the non-local NUMA domain
4      Reads to slow memory in the local NUMA domain
3      Non-temporal writes to non-local NUMA domain
2      Non-temporal writes to local NUMA domain
1      Reads to memory in the non-local NUMA domain
0      Reads to memory in the local NUMA domain
====  ===========================================================

By default, the mbm_total_bytes configuration is set to 0x7F to count
all the event types and the mbm_local_bytes configuration is set to
0x15 to count all the local memory events.

$cat /sys/fs/resctrl/mon_data/mon_L3_00/mbm_total_config
0x7f

$cat /sys/fs/resctrl/mon_data/mon_L3_00/mbm_local_config
0x15

Signed-off-by: Babu Moger <babu.moger@amd.com>
---
 arch/x86/kernel/cpu/resctrl/internal.h |   24 ++++++++++
 arch/x86/kernel/cpu/resctrl/rdtgroup.c |   77 ++++++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 45923eb4022f..96f439324d78 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -15,6 +15,7 @@
 #define MSR_IA32_MBA_THRTL_BASE		0xd50
 #define MSR_IA32_MBA_BW_BASE		0xc0000200
 #define MSR_IA32_SMBA_BW_BASE		0xc0000280
+#define MSR_IA32_EVT_CFG_BASE		0xc0000400
 
 #define MSR_IA32_QM_CTR			0x0c8e
 #define MSR_IA32_QM_EVTSEL		0x0c8d
@@ -50,6 +51,29 @@
  */
 #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
 
+/* Reads to Local DRAM Memory */
+#define READS_TO_LOCAL_MEM		BIT(0)
+
+/* Reads to Remote DRAM Memory */
+#define READS_TO_REMOTE_MEM		BIT(1)
+
+/* Non-Temporal Writes to Local Memory */
+#define NON_TEMP_WRITE_TO_LOCAL_MEM	BIT(2)
+
+/* Non-Temporal Writes to Remote Memory */
+#define NON_TEMP_WRITE_TO_REMOTE_MEM	BIT(3)
+
+/* Reads to Local Memory the system identifies as "Slow Memory" */
+#define READS_TO_LOCAL_S_MEM		BIT(4)
+
+/* Reads to Remote Memory the system identifies as "Slow Memory" */
+#define READS_TO_REMOTE_S_MEM		BIT(5)
+
+/* Dirty Victims to All Types of Memory */
+#define  DIRTY_VICTIMS_TO_ALL_MEM	BIT(6)
+
+/* Max event bits supported */
+#define MAX_EVT_CONFIG_BITS		GENMASK(6, 0)
 
 struct rdt_fs_context {
 	struct kernfs_fs_context	kfc;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index da11fdad204d..6f067c1ac7c1 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -254,8 +254,85 @@ static const struct kernfs_ops kf_mondata_ops = {
 	.seq_show		= rdtgroup_mondata_show,
 };
 
+struct mon_config_info {
+	u32 evtid;
+	u32 mon_config;
+};
+
+/*
+ * This is called via IPI to read the CQM/MBM counters
+ * in a domain.
+ */
+void mon_event_config_read(void *info)
+{
+	struct mon_config_info *mon_info = info;
+	u32 h, msr_index;
+
+	switch (mon_info->evtid) {
+	case QOS_L3_MBM_TOTAL_EVENT_ID:
+		msr_index = 0;
+		break;
+	case QOS_L3_MBM_LOCAL_EVENT_ID:
+		msr_index = 1;
+		break;
+	default:
+		/* Not expected to come here */
+		return;
+	}
+
+	rdmsr(MSR_IA32_EVT_CFG_BASE + msr_index, mon_info->mon_config, h);
+}
+
+void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info)
+{
+	smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1);
+}
+
+int rdtgroup_mondata_config_show(struct seq_file *m, void *arg)
+{
+	struct kernfs_open_file *of = m->private;
+	struct mon_config_info mon_info;
+	struct rdt_hw_resource *hw_res;
+	u32 resid, evtid, domid;
+	struct rdtgroup *rdtgrp;
+	struct rdt_resource *r;
+	union mon_data_bits md;
+	struct rdt_domain *d;
+	int ret = 0;
+
+	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	if (!rdtgrp) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	md.priv = of->kn->priv;
+	resid = md.u.rid;
+	domid = md.u.domid;
+	evtid = md.u.evtid;
+
+	hw_res = &rdt_resources_all[resid];
+	r = &hw_res->r_resctrl;
+
+	d = rdt_find_domain(r, domid, NULL);
+	if (IS_ERR_OR_NULL(d)) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	mon_info.evtid = evtid;
+	mondata_config_read(d, &mon_info);
+
+	seq_printf(m, "0x%x\n", mon_info.mon_config);
+
+out:
+	rdtgroup_kn_unlock(of->kn);
+	return ret;
+}
+
 static const struct kernfs_ops kf_mondata_config_ops = {
 	.atomic_write_len       = PAGE_SIZE,
+	.seq_show               = rdtgroup_mondata_config_show,
 };
 
 static bool is_cpu_list(struct kernfs_open_file *of)

Re: [PATCH v4 10/13] x86/resctrl: Add the sysfs interface to read the event configuration
Posted by Reinette Chatre 3 years, 6 months ago
Hi Babu,

On 9/7/2022 11:01 AM, Babu Moger wrote:
> The current event configuration can be viewed by the user by reading
> the sysfs configuration file.
> 
> Following are the types of events supported:
> 
> ====  ===========================================================
> Bits   Description
> ====  ===========================================================
> 6      Dirty Victims from the QOS domain to all types of memory
> 5      Reads to slow memory in the non-local NUMA domain
> 4      Reads to slow memory in the local NUMA domain
> 3      Non-temporal writes to non-local NUMA domain
> 2      Non-temporal writes to local NUMA domain
> 1      Reads to memory in the non-local NUMA domain
> 0      Reads to memory in the local NUMA domain
> ====  ===========================================================
> 

...

> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 45923eb4022f..96f439324d78 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -15,6 +15,7 @@
>  #define MSR_IA32_MBA_THRTL_BASE		0xd50
>  #define MSR_IA32_MBA_BW_BASE		0xc0000200
>  #define MSR_IA32_SMBA_BW_BASE		0xc0000280
> +#define MSR_IA32_EVT_CFG_BASE		0xc0000400
>  
>  #define MSR_IA32_QM_CTR			0x0c8e
>  #define MSR_IA32_QM_EVTSEL		0x0c8d
> @@ -50,6 +51,29 @@
>   */
>  #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
>  
> +/* Reads to Local DRAM Memory */

What prompted the terminology switch between the
changelog ("local NUMA")  and the comments ("DRAM")?

> +#define READS_TO_LOCAL_MEM		BIT(0)
> +
> +/* Reads to Remote DRAM Memory */
> +#define READS_TO_REMOTE_MEM		BIT(1)
> +
> +/* Non-Temporal Writes to Local Memory */
> +#define NON_TEMP_WRITE_TO_LOCAL_MEM	BIT(2)
> +
> +/* Non-Temporal Writes to Remote Memory */
> +#define NON_TEMP_WRITE_TO_REMOTE_MEM	BIT(3)
> +
> +/* Reads to Local Memory the system identifies as "Slow Memory" */
> +#define READS_TO_LOCAL_S_MEM		BIT(4)
> +
> +/* Reads to Remote Memory the system identifies as "Slow Memory" */
> +#define READS_TO_REMOTE_S_MEM		BIT(5)
> +
> +/* Dirty Victims to All Types of Memory */
> +#define  DIRTY_VICTIMS_TO_ALL_MEM	BIT(6)
> +

Could you please fixup the comments to only capitalize 
the first word of each sentence (unless it is an acronym
or required for some other reason)?

> +/* Max event bits supported */
> +#define MAX_EVT_CONFIG_BITS		GENMASK(6, 0)
>  
>  struct rdt_fs_context {
>  	struct kernfs_fs_context	kfc;
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index da11fdad204d..6f067c1ac7c1 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -254,8 +254,85 @@ static const struct kernfs_ops kf_mondata_ops = {
>  	.seq_show		= rdtgroup_mondata_show,
>  };
>  
> +struct mon_config_info {
> +	u32 evtid;
> +	u32 mon_config;
> +};
> +
> +/*
> + * This is called via IPI to read the CQM/MBM counters
> + * in a domain.

This comment does not seem accurate - it is not reading the
actual counters but the configuration of the counters?

> + */
> +void mon_event_config_read(void *info)
> +{
> +	struct mon_config_info *mon_info = info;
> +	u32 h, msr_index;
> +
> +	switch (mon_info->evtid) {
> +	case QOS_L3_MBM_TOTAL_EVENT_ID:
> +		msr_index = 0;
> +		break;
> +	case QOS_L3_MBM_LOCAL_EVENT_ID:
> +		msr_index = 1;
> +		break;
> +	default:
> +		/* Not expected to come here */
> +		return;
> +	}
> +
> +	rdmsr(MSR_IA32_EVT_CFG_BASE + msr_index, mon_info->mon_config, h);
> +}
> +
> +void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info)
> +{
> +	smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1);
> +}
> +
> +int rdtgroup_mondata_config_show(struct seq_file *m, void *arg)
> +{
> +	struct kernfs_open_file *of = m->private;
> +	struct mon_config_info mon_info;

Could you please initialize this struct? I think this is important considering
that there is an (albeit unlikely) chance that uninitialized data can be returned
to user space.

> +	struct rdt_hw_resource *hw_res;
> +	u32 resid, evtid, domid;
> +	struct rdtgroup *rdtgrp;
> +	struct rdt_resource *r;
> +	union mon_data_bits md;
> +	struct rdt_domain *d;
> +	int ret = 0;
> +
> +	rdtgrp = rdtgroup_kn_lock_live(of->kn);
> +	if (!rdtgrp) {
> +		ret = -ENOENT;
> +		goto out;
> +	}
> +
> +	md.priv = of->kn->priv;
> +	resid = md.u.rid;
> +	domid = md.u.domid;
> +	evtid = md.u.evtid;
> +
> +	hw_res = &rdt_resources_all[resid];
> +	r = &hw_res->r_resctrl;
> +
> +	d = rdt_find_domain(r, domid, NULL);
> +	if (IS_ERR_OR_NULL(d)) {
> +		ret = -ENOENT;
> +		goto out;
> +	}
> +
> +	mon_info.evtid = evtid;
> +	mondata_config_read(d, &mon_info);
> +
> +	seq_printf(m, "0x%x\n", mon_info.mon_config);
> +
> +out:
> +	rdtgroup_kn_unlock(of->kn);
> +	return ret;
> +}
> +
>  static const struct kernfs_ops kf_mondata_config_ops = {
>  	.atomic_write_len       = PAGE_SIZE,
> +	.seq_show               = rdtgroup_mondata_config_show,
>  };
  
>  static bool is_cpu_list(struct kernfs_open_file *of)
> 
> 

Reinette
Re: [PATCH v4 10/13] x86/resctrl: Add the sysfs interface to read the event configuration
Posted by Moger, Babu 3 years, 6 months ago
Hi Reinette,


On 9/16/22 10:59, Reinette Chatre wrote:
> Hi Babu,
>
> On 9/7/2022 11:01 AM, Babu Moger wrote:
>> The current event configuration can be viewed by the user by reading
>> the sysfs configuration file.
>>
>> Following are the types of events supported:
>>
>> ====  ===========================================================
>> Bits   Description
>> ====  ===========================================================
>> 6      Dirty Victims from the QOS domain to all types of memory
>> 5      Reads to slow memory in the non-local NUMA domain
>> 4      Reads to slow memory in the local NUMA domain
>> 3      Non-temporal writes to non-local NUMA domain
>> 2      Non-temporal writes to local NUMA domain
>> 1      Reads to memory in the non-local NUMA domain
>> 0      Reads to memory in the local NUMA domain
>> ====  ===========================================================
>>
> ...
>
>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
>> index 45923eb4022f..96f439324d78 100644
>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>> @@ -15,6 +15,7 @@
>>  #define MSR_IA32_MBA_THRTL_BASE		0xd50
>>  #define MSR_IA32_MBA_BW_BASE		0xc0000200
>>  #define MSR_IA32_SMBA_BW_BASE		0xc0000280
>> +#define MSR_IA32_EVT_CFG_BASE		0xc0000400
>>  
>>  #define MSR_IA32_QM_CTR			0x0c8e
>>  #define MSR_IA32_QM_EVTSEL		0x0c8d
>> @@ -50,6 +51,29 @@
>>   */
>>  #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
>>  
>> +/* Reads to Local DRAM Memory */
> What prompted the terminology switch between the
> changelog ("local NUMA")  and the comments ("DRAM")?
oh. ok. Will change it.
>
>> +#define READS_TO_LOCAL_MEM		BIT(0)
>> +
>> +/* Reads to Remote DRAM Memory */
>> +#define READS_TO_REMOTE_MEM		BIT(1)
>> +
>> +/* Non-Temporal Writes to Local Memory */
>> +#define NON_TEMP_WRITE_TO_LOCAL_MEM	BIT(2)
>> +
>> +/* Non-Temporal Writes to Remote Memory */
>> +#define NON_TEMP_WRITE_TO_REMOTE_MEM	BIT(3)
>> +
>> +/* Reads to Local Memory the system identifies as "Slow Memory" */
>> +#define READS_TO_LOCAL_S_MEM		BIT(4)
>> +
>> +/* Reads to Remote Memory the system identifies as "Slow Memory" */
>> +#define READS_TO_REMOTE_S_MEM		BIT(5)
>> +
>> +/* Dirty Victims to All Types of Memory */
>> +#define  DIRTY_VICTIMS_TO_ALL_MEM	BIT(6)
>> +
> Could you please fixup the comments to only capitalize 
> the first word of each sentence (unless it is an acronym
> or required for some other reason)?
Sure.
>
>> +/* Max event bits supported */
>> +#define MAX_EVT_CONFIG_BITS		GENMASK(6, 0)
>>  
>>  struct rdt_fs_context {
>>  	struct kernfs_fs_context	kfc;
>> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> index da11fdad204d..6f067c1ac7c1 100644
>> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> @@ -254,8 +254,85 @@ static const struct kernfs_ops kf_mondata_ops = {
>>  	.seq_show		= rdtgroup_mondata_show,
>>  };
>>  
>> +struct mon_config_info {
>> +	u32 evtid;
>> +	u32 mon_config;
>> +};
>> +
>> +/*
>> + * This is called via IPI to read the CQM/MBM counters
>> + * in a domain.
> This comment does not seem accurate - it is not reading the
> actual counters but the configuration of the counters?
Yes, That is correct.
>
>> + */
>> +void mon_event_config_read(void *info)
>> +{
>> +	struct mon_config_info *mon_info = info;
>> +	u32 h, msr_index;
>> +
>> +	switch (mon_info->evtid) {
>> +	case QOS_L3_MBM_TOTAL_EVENT_ID:
>> +		msr_index = 0;
>> +		break;
>> +	case QOS_L3_MBM_LOCAL_EVENT_ID:
>> +		msr_index = 1;
>> +		break;
>> +	default:
>> +		/* Not expected to come here */
>> +		return;
>> +	}
>> +
>> +	rdmsr(MSR_IA32_EVT_CFG_BASE + msr_index, mon_info->mon_config, h);
>> +}
>> +
>> +void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info)
>> +{
>> +	smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1);
>> +}
>> +
>> +int rdtgroup_mondata_config_show(struct seq_file *m, void *arg)
>> +{
>> +	struct kernfs_open_file *of = m->private;
>> +	struct mon_config_info mon_info;
> Could you please initialize this struct? I think this is important considering
> that there is an (albeit unlikely) chance that uninitialized data can be returned
> to user space.

Sure.

Thanks

Babu

>> +	struct rdt_hw_resource *hw_res;
>> +	u32 resid, evtid, domid;
>> +	struct rdtgroup *rdtgrp;
>> +	struct rdt_resource *r;
>> +	union mon_data_bits md;
>> +	struct rdt_domain *d;
>> +	int ret = 0;
>> +
>> +	rdtgrp = rdtgroup_kn_lock_live(of->kn);
>> +	if (!rdtgrp) {
>> +		ret = -ENOENT;
>> +		goto out;
>> +	}
>> +
>> +	md.priv = of->kn->priv;
>> +	resid = md.u.rid;
>> +	domid = md.u.domid;
>> +	evtid = md.u.evtid;
>> +
>> +	hw_res = &rdt_resources_all[resid];
>> +	r = &hw_res->r_resctrl;
>> +
>> +	d = rdt_find_domain(r, domid, NULL);
>> +	if (IS_ERR_OR_NULL(d)) {
>> +		ret = -ENOENT;
>> +		goto out;
>> +	}
>> +
>> +	mon_info.evtid = evtid;
>> +	mondata_config_read(d, &mon_info);
>> +
>> +	seq_printf(m, "0x%x\n", mon_info.mon_config);
>> +
>> +out:
>> +	rdtgroup_kn_unlock(of->kn);
>> +	return ret;
>> +}
>> +
>>  static const struct kernfs_ops kf_mondata_config_ops = {
>>  	.atomic_write_len       = PAGE_SIZE,
>> +	.seq_show               = rdtgroup_mondata_config_show,
>>  };
>   
>>  static bool is_cpu_list(struct kernfs_open_file *of)
>>
>>
> Reinette

-- 
Thanks
Babu Moger