[PATCH v6 14/30] x86,fs/resctrl: Support binary fixed point event counters

Tony Luck posted 30 patches 3 months, 1 week ago
There is a newer version of this series
[PATCH v6 14/30] x86,fs/resctrl: Support binary fixed point event counters
Posted by Tony Luck 3 months, 1 week ago
Resctrl was written with the assumption that all monitor events can be
displayed as unsigned decimal integers.

Hardware architecture counters may provide some telemetry events with
greater precision where the event is not a simple count, but is a
measurement of some sort (e.g. Joules for energy consumed).

Add a new argument to resctrl_enable_mon_event() for architecture code
to inform the file system that the value for a counter is a fixed-point
value with a specific number of binary places.  The file system will
only allow architecture to use floating point format on events that it
marked with mon_evt::is_floating_point.

Fixed point values are displayed with values rounded to an appropriate
number of decimal places for the precision of the number of binary places
provided. In general one extra decimal place is added for every three
additional binary places. There are some exceptions for low precision
binary values where exact representation is possible:

  1 binary place is 0.0 or 0.5.			=> 1 decimal place
  2 binary places is 0.0. 0.25, 0.5, 0.75	=> 2 decimal places
  3 binary places is 0.0, 0.125, etc.		=> 3 decimal places

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/resctrl.h            |  4 +-
 fs/resctrl/internal.h              |  4 ++
 arch/x86/kernel/cpu/resctrl/core.c |  6 +-
 fs/resctrl/ctrlmondata.c           | 91 +++++++++++++++++++++++++++++-
 fs/resctrl/monitor.c               | 10 +++-
 5 files changed, 108 insertions(+), 7 deletions(-)

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index e05a1abb25d4..1060a54cc9fa 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -379,7 +379,9 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
 u32 resctrl_arch_system_num_rmid_idx(void);
 int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
 
-void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu);
+#define MAX_BINARY_BITS	27
+
+void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, u32 binary_bits);
 
 bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid);
 
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index f51d10d6a510..4dc678af005c 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -58,6 +58,8 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
  * @name:		name of the event
  * @configurable:	true if the event is configurable
  * @any_cpu:		true if the event can be read from any CPU
+ * @is_floating_point:	event values may be displayed in floating point format
+ * @binary_bits:	number of fixed-point binary bits from architecture
  * @enabled:		true if the event is enabled
  */
 struct mon_evt {
@@ -66,6 +68,8 @@ struct mon_evt {
 	char			*name;
 	bool			configurable;
 	bool			any_cpu;
+	bool			is_floating_point;
+	int			binary_bits;
 	bool			enabled;
 };
 
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index b83861ab504f..2b6c6b61707d 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -887,15 +887,15 @@ static __init bool get_rdt_mon_resources(void)
 	bool ret = false;
 
 	if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) {
-		resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID, false);
+		resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID, false, 0);
 		ret = true;
 	}
 	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
-		resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID, false);
+		resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID, false, 0);
 		ret = true;
 	}
 	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) {
-		resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID, false);
+		resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID, false, 0);
 		ret = true;
 	}
 
diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
index 2e65fddc3408..29de0e380ccc 100644
--- a/fs/resctrl/ctrlmondata.c
+++ b/fs/resctrl/ctrlmondata.c
@@ -590,6 +590,93 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
 	resctrl_arch_mon_ctx_free(r, evt->evtid, rr->arch_mon_ctx);
 }
 
+/**
+ * struct fixed_params - parameters to decode a binary fixed point value
+ * @decplaces:	Number of decimal places for this number of binary places.
+ * @pow10:	Multiplier (10 ^ decimal places).
+ */
+struct fixed_params {
+	int	decplaces;
+	int	pow10;
+};
+
+static struct fixed_params fixed_params[MAX_BINARY_BITS + 1] = {
+	[1]  = { .decplaces = 1, .pow10 = 10 },
+	[2]  = { .decplaces = 2, .pow10 = 100 },
+	[3]  = { .decplaces = 3, .pow10 = 1000 },
+	[4]  = { .decplaces = 3, .pow10 = 1000 },
+	[5]  = { .decplaces = 3, .pow10 = 1000 },
+	[6]  = { .decplaces = 3, .pow10 = 1000 },
+	[7]  = { .decplaces = 3, .pow10 = 1000 },
+	[8]  = { .decplaces = 3, .pow10 = 1000 },
+	[9]  = { .decplaces = 3, .pow10 = 1000 },
+	[10] = { .decplaces = 4, .pow10 = 10000 },
+	[11] = { .decplaces = 4, .pow10 = 10000 },
+	[12] = { .decplaces = 4, .pow10 = 10000 },
+	[13] = { .decplaces = 5, .pow10 = 100000 },
+	[14] = { .decplaces = 5, .pow10 = 100000 },
+	[15] = { .decplaces = 5, .pow10 = 100000 },
+	[16] = { .decplaces = 6, .pow10 = 1000000 },
+	[17] = { .decplaces = 6, .pow10 = 1000000 },
+	[18] = { .decplaces = 6, .pow10 = 1000000 },
+	[19] = { .decplaces = 7, .pow10 = 10000000 },
+	[20] = { .decplaces = 7, .pow10 = 10000000 },
+	[21] = { .decplaces = 7, .pow10 = 10000000 },
+	[22] = { .decplaces = 8, .pow10 = 100000000 },
+	[23] = { .decplaces = 8, .pow10 = 100000000 },
+	[24] = { .decplaces = 8, .pow10 = 100000000 },
+	[25] = { .decplaces = 9, .pow10 = 1000000000 },
+	[26] = { .decplaces = 9, .pow10 = 1000000000 },
+	[27] = { .decplaces = 9, .pow10 = 1000000000 }
+};
+
+static void print_event_value(struct seq_file *m, int binary_bits, u64 val)
+{
+	struct fixed_params *fp = &fixed_params[binary_bits];
+	unsigned long long frac;
+	char buf[10];
+
+	/* Mask off the integer part of the fixed-point value. */
+	frac = val & GENMASK_ULL(binary_bits, 0);
+
+	/*
+	 * Multiply by 10^{desired decimal places}. The
+	 * integer part of the fixed point value is now
+	 * almost what is needed.
+	 */
+	frac *= fp->pow10;
+
+	/*
+	 * Round to nearest by adding a value that
+	 * would be a "1" in the binary_bit + 1 place.
+	 * Integer part of fixed point value is now
+	 * the needed value.
+	 */
+	frac += 1 << (binary_bits - 1);
+
+	/*
+	 * Extract the integer part of the value. This
+	 * is the decimal representation of the original
+	 * fixed-point fractional value.
+	 */
+	frac >>= binary_bits;
+
+	/*
+	 * "frac" is now in the range [0 .. fp->pow10).
+	 * I.e. string representation will fit into
+	 * fp->decplaces.
+	 */
+	sprintf(buf, "%0*llu", fp->decplaces, frac);
+
+	/* Trim trailing zeroes */
+	for (int i = fp->decplaces - 1; i > 0; i--) {
+		if (buf[i] != '0')
+			break;
+		buf[i] = '\0';
+	}
+	seq_printf(m, "%llu.%s\n", val >> binary_bits, buf);
+}
+
 int rdtgroup_mondata_show(struct seq_file *m, void *arg)
 {
 	struct kernfs_open_file *of = m->private;
@@ -666,8 +753,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
 		seq_puts(m, "Error\n");
 	else if (rr.err == -EINVAL)
 		seq_puts(m, "Unavailable\n");
-	else
+	else if (evt->binary_bits == 0)
 		seq_printf(m, "%llu\n", rr.val);
+	else
+		print_event_value(m, evt->binary_bits, rr.val);
 
 out:
 	rdtgroup_kn_unlock(of->kn);
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
index aec26457d82c..076c0cc6e53a 100644
--- a/fs/resctrl/monitor.c
+++ b/fs/resctrl/monitor.c
@@ -897,16 +897,22 @@ struct mon_evt mon_event_all[QOS_NUM_EVENTS] = {
 	},
 };
 
-void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu)
+void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, u32 binary_bits)
 {
-	if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS))
+	if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS) ||
+			 binary_bits > MAX_BINARY_BITS)
 		return;
 	if (mon_event_all[eventid].enabled) {
 		pr_warn("Duplicate enable for event %d\n", eventid);
 		return;
 	}
+	if (binary_bits && !mon_event_all[eventid].is_floating_point) {
+		pr_warn("Event %d may not be floating point\n", eventid);
+		return;
+	}
 
 	mon_event_all[eventid].any_cpu = any_cpu;
+	mon_event_all[eventid].binary_bits = binary_bits;
 	mon_event_all[eventid].enabled = true;
 }
 
-- 
2.49.0
Re: [PATCH v6 14/30] x86,fs/resctrl: Support binary fixed point event counters
Posted by Reinette Chatre 3 months ago
Hi Tony,

On 6/26/25 9:49 AM, Tony Luck wrote:
> Resctrl was written with the assumption that all monitor events can be
> displayed as unsigned decimal integers.
> 
> Hardware architecture counters may provide some telemetry events with
> greater precision where the event is not a simple count, but is a
> measurement of some sort (e.g. Joules for energy consumed).
> 
> Add a new argument to resctrl_enable_mon_event() for architecture code
> to inform the file system that the value for a counter is a fixed-point
> value with a specific number of binary places.  The file system will
> only allow architecture to use floating point format on events that it
> marked with mon_evt::is_floating_point.
> 
> Fixed point values are displayed with values rounded to an appropriate
> number of decimal places for the precision of the number of binary places
> provided. In general one extra decimal place is added for every three
> additional binary places. There are some exceptions for low precision
> binary values where exact representation is possible:
> 
>   1 binary place is 0.0 or 0.5.			=> 1 decimal place
>   2 binary places is 0.0. 0.25, 0.5, 0.75	=> 2 decimal places
>   3 binary places is 0.0, 0.125, etc.		=> 3 decimal places
> 
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>  include/linux/resctrl.h            |  4 +-
>  fs/resctrl/internal.h              |  4 ++
>  arch/x86/kernel/cpu/resctrl/core.c |  6 +-
>  fs/resctrl/ctrlmondata.c           | 91 +++++++++++++++++++++++++++++-
>  fs/resctrl/monitor.c               | 10 +++-
>  5 files changed, 108 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index e05a1abb25d4..1060a54cc9fa 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -379,7 +379,9 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
>  u32 resctrl_arch_system_num_rmid_idx(void);
>  int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
>  
> -void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu);
> +#define MAX_BINARY_BITS	27
> +
> +void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, u32 binary_bits);
>  
>  bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid);
>  
> diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
> index f51d10d6a510..4dc678af005c 100644
> --- a/fs/resctrl/internal.h
> +++ b/fs/resctrl/internal.h
> @@ -58,6 +58,8 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
>   * @name:		name of the event
>   * @configurable:	true if the event is configurable
>   * @any_cpu:		true if the event can be read from any CPU
> + * @is_floating_point:	event values may be displayed in floating point format

To help be specific and match user interface doc in patch #30 (and supported with a change
to this patch, more below):
"event values may be displayed" -> "event values are displayed"

> + * @binary_bits:	number of fixed-point binary bits from architecture

Please append "only valid if @is_floating_point is true".

>   * @enabled:		true if the event is enabled
>   */
>  struct mon_evt {
> @@ -66,6 +68,8 @@ struct mon_evt {
>  	char			*name;
>  	bool			configurable;
>  	bool			any_cpu;
> +	bool			is_floating_point;
> +	int			binary_bits;

hmmm ... first hunk of this patch uses "u32" as type for binary_bits and
this hunk uses "int", this mix of types is not clear at this point.

Since "binary_bits" is used as index into array I do not think "int" is
appropriate. How about just unsigned int throughout?

>  	bool			enabled;
>  };
>  
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index b83861ab504f..2b6c6b61707d 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -887,15 +887,15 @@ static __init bool get_rdt_mon_resources(void)
>  	bool ret = false;
>  
>  	if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) {
> -		resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID, false);
> +		resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID, false, 0);
>  		ret = true;
>  	}
>  	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
> -		resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID, false);
> +		resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID, false, 0);
>  		ret = true;
>  	}
>  	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) {
> -		resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID, false);
> +		resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID, false, 0);
>  		ret = true;
>  	}
>  
> diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
> index 2e65fddc3408..29de0e380ccc 100644
> --- a/fs/resctrl/ctrlmondata.c
> +++ b/fs/resctrl/ctrlmondata.c
> @@ -590,6 +590,93 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
>  	resctrl_arch_mon_ctx_free(r, evt->evtid, rr->arch_mon_ctx);
>  }
>  
> +/**
> + * struct fixed_params - parameters to decode a binary fixed point value
> + * @decplaces:	Number of decimal places for this number of binary places.
> + * @pow10:	Multiplier (10 ^ decimal places).

To help be specific:
* @pow10:	Multiplier (10 ^ @decplaces).

... but I wonder if this cannot just use int_pow() to avoid this hardcoding?

> + */
> +struct fixed_params {
> +	int	decplaces;
> +	int	pow10;
> +};
> +
> +static struct fixed_params fixed_params[MAX_BINARY_BITS + 1] = {
> +	[1]  = { .decplaces = 1, .pow10 = 10 },
> +	[2]  = { .decplaces = 2, .pow10 = 100 },
> +	[3]  = { .decplaces = 3, .pow10 = 1000 },
> +	[4]  = { .decplaces = 3, .pow10 = 1000 },
> +	[5]  = { .decplaces = 3, .pow10 = 1000 },
> +	[6]  = { .decplaces = 3, .pow10 = 1000 },
> +	[7]  = { .decplaces = 3, .pow10 = 1000 },
> +	[8]  = { .decplaces = 3, .pow10 = 1000 },
> +	[9]  = { .decplaces = 3, .pow10 = 1000 },
> +	[10] = { .decplaces = 4, .pow10 = 10000 },
> +	[11] = { .decplaces = 4, .pow10 = 10000 },
> +	[12] = { .decplaces = 4, .pow10 = 10000 },
> +	[13] = { .decplaces = 5, .pow10 = 100000 },
> +	[14] = { .decplaces = 5, .pow10 = 100000 },
> +	[15] = { .decplaces = 5, .pow10 = 100000 },
> +	[16] = { .decplaces = 6, .pow10 = 1000000 },
> +	[17] = { .decplaces = 6, .pow10 = 1000000 },
> +	[18] = { .decplaces = 6, .pow10 = 1000000 },
> +	[19] = { .decplaces = 7, .pow10 = 10000000 },
> +	[20] = { .decplaces = 7, .pow10 = 10000000 },
> +	[21] = { .decplaces = 7, .pow10 = 10000000 },
> +	[22] = { .decplaces = 8, .pow10 = 100000000 },
> +	[23] = { .decplaces = 8, .pow10 = 100000000 },
> +	[24] = { .decplaces = 8, .pow10 = 100000000 },
> +	[25] = { .decplaces = 9, .pow10 = 1000000000 },
> +	[26] = { .decplaces = 9, .pow10 = 1000000000 },
> +	[27] = { .decplaces = 9, .pow10 = 1000000000 }
> +};
> +
> +static void print_event_value(struct seq_file *m, int binary_bits, u64 val)
> +{
> +	struct fixed_params *fp = &fixed_params[binary_bits];
> +	unsigned long long frac;
> +	char buf[10];
> +
> +	/* Mask off the integer part of the fixed-point value. */
> +	frac = val & GENMASK_ULL(binary_bits, 0);
> +
> +	/*
> +	 * Multiply by 10^{desired decimal places}. The
> +	 * integer part of the fixed point value is now
> +	 * almost what is needed.
> +	 */
> +	frac *= fp->pow10;
> +
> +	/*
> +	 * Round to nearest by adding a value that
> +	 * would be a "1" in the binary_bit + 1 place.
> +	 * Integer part of fixed point value is now
> +	 * the needed value.
> +	 */
> +	frac += 1 << (binary_bits - 1);

The static checker I tried pointed out that since the right side
does "int" math that is assigned to "unsigned long long" this risks
an "overflow before widen" issue. You can avoid overflow by casting
1 to "unsigned long long."

> +
> +	/*
> +	 * Extract the integer part of the value. This
> +	 * is the decimal representation of the original
> +	 * fixed-point fractional value.
> +	 */
> +	frac >>= binary_bits;
> +
> +	/*
> +	 * "frac" is now in the range [0 .. fp->pow10).
> +	 * I.e. string representation will fit into
> +	 * fp->decplaces.
> +	 */
> +	sprintf(buf, "%0*llu", fp->decplaces, frac);

Please use snprintf() to handle changes to fixed_params[].

> +
> +	/* Trim trailing zeroes */
> +	for (int i = fp->decplaces - 1; i > 0; i--) {
> +		if (buf[i] != '0')
> +			break;
> +		buf[i] = '\0';
> +	}
> +	seq_printf(m, "%llu.%s\n", val >> binary_bits, buf);
> +}
> +
>  int rdtgroup_mondata_show(struct seq_file *m, void *arg)
>  {
>  	struct kernfs_open_file *of = m->private;
> @@ -666,8 +753,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
>  		seq_puts(m, "Error\n");
>  	else if (rr.err == -EINVAL)
>  		seq_puts(m, "Unavailable\n");
> -	else
> +	else if (evt->binary_bits == 0)
>  		seq_printf(m, "%llu\n", rr.val);
> +	else
> +		print_event_value(m, evt->binary_bits, rr.val);
>  

At this time I understand that it will be clear for which
events user space expects floating point numbers. If the architecture in
turn does not support any "binary bits" then I think resctrl
should still print a floating point number ("x.0") to match user space
expectation.

>  out:
>  	rdtgroup_kn_unlock(of->kn);
> diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
> index aec26457d82c..076c0cc6e53a 100644
> --- a/fs/resctrl/monitor.c
> +++ b/fs/resctrl/monitor.c
> @@ -897,16 +897,22 @@ struct mon_evt mon_event_all[QOS_NUM_EVENTS] = {
>  	},
>  };
>  
> -void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu)
> +void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, u32 binary_bits)
>  {
> -	if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS))
> +	if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS) ||
> +			 binary_bits > MAX_BINARY_BITS)

This alignment is off.

>  		return;
>  	if (mon_event_all[eventid].enabled) {
>  		pr_warn("Duplicate enable for event %d\n", eventid);
>  		return;
>  	}
> +	if (binary_bits && !mon_event_all[eventid].is_floating_point) {
> +		pr_warn("Event %d may not be floating point\n", eventid);
> +		return;
> +	}
>  
>  	mon_event_all[eventid].any_cpu = any_cpu;
> +	mon_event_all[eventid].binary_bits = binary_bits;
>  	mon_event_all[eventid].enabled = true;
>  }
>  

Reinette
Re: [PATCH v6 14/30] x86,fs/resctrl: Support binary fixed point event counters
Posted by Luck, Tony 3 months ago
On Tue, Jul 08, 2025 at 02:46:00PM -0700, Reinette Chatre wrote:
> > -void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu)
> > +void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, u32 binary_bits)
> >  {
> > -	if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS))
> > +	if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS) ||
> > +			 binary_bits > MAX_BINARY_BITS)
> 
> This alignment is off.

I think the problem is location of the parentheses. An invalid
"binary_bits" value should trigger the WARN, just as an invalid
eventid.

So I'll change to:

	if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS ||
			 binary_bits > MAX_BINARY_BITS))

-Tony
Re: [PATCH v6 14/30] x86,fs/resctrl: Support binary fixed point event counters
Posted by Fenghua Yu 3 months, 1 week ago
Hi, Tony,

On 6/26/25 09:49, Tony Luck wrote:
> Resctrl was written with the assumption that all monitor events can be
> displayed as unsigned decimal integers.
>
> Hardware architecture counters may provide some telemetry events with
> greater precision where the event is not a simple count, but is a
> measurement of some sort (e.g. Joules for energy consumed).
>
> Add a new argument to resctrl_enable_mon_event() for architecture code
> to inform the file system that the value for a counter is a fixed-point
> value with a specific number of binary places.  The file system will
> only allow architecture to use floating point format on events that it
> marked with mon_evt::is_floating_point.
>
> Fixed point values are displayed with values rounded to an appropriate
> number of decimal places for the precision of the number of binary places
> provided. In general one extra decimal place is added for every three
> additional binary places. There are some exceptions for low precision
> binary values where exact representation is possible:
>
>    1 binary place is 0.0 or 0.5.			=> 1 decimal place
>    2 binary places is 0.0. 0.25, 0.5, 0.75	=> 2 decimal places

nit. s/0.0./0.0,/

[SNIP]

Thanks.

-Fenghua
Re: [PATCH v6 14/30] x86,fs/resctrl: Support binary fixed point event counters
Posted by Fenghua Yu 3 months, 1 week ago
Hi, Tony,

On 6/26/25 09:49, Tony Luck wrote:
> Resctrl was written with the assumption that all monitor events can be
> displayed as unsigned decimal integers.
>
> Hardware architecture counters may provide some telemetry events with
> greater precision where the event is not a simple count, but is a
> measurement of some sort (e.g. Joules for energy consumed).
>
> Add a new argument to resctrl_enable_mon_event() for architecture code
> to inform the file system that the value for a counter is a fixed-point
> value with a specific number of binary places.  The file system will
> only allow architecture to use floating point format on events that it
> marked with mon_evt::is_floating_point.

User app needs to know if a number is a floating pointer value or an 
integer value. I see you document the energy and activity events as 
floating point values and all others are integer values.

Is it better to show the value types in info directory?

e.g. create an info file "events_floating" which shows all events with 
floating point values. Events not in this info are integer by default.

This may have two benefits:

1. An app can query the type info to parse the values accordingly 
without hard coding event types.

2. Any future floating point events can be added here without changing 
the document.

> Fixed point values are displayed with values rounded to an appropriate
> number of decimal places for the precision of the number of binary places
> provided. In general one extra decimal place is added for every three
> additional binary places. There are some exceptions for low precision
> binary values where exact representation is possible:
>
>    1 binary place is 0.0 or 0.5.			=> 1 decimal place
>    2 binary places is 0.0. 0.25, 0.5, 0.75	=> 2 decimal places
>    3 binary places is 0.0, 0.125, etc.		=> 3 decimal places
>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>   include/linux/resctrl.h            |  4 +-
>   fs/resctrl/internal.h              |  4 ++
>   arch/x86/kernel/cpu/resctrl/core.c |  6 +-
>   fs/resctrl/ctrlmondata.c           | 91 +++++++++++++++++++++++++++++-
>   fs/resctrl/monitor.c               | 10 +++-
>   5 files changed, 108 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index e05a1abb25d4..1060a54cc9fa 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -379,7 +379,9 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
>   u32 resctrl_arch_system_num_rmid_idx(void);
>   int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
>   
> -void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu);
> +#define MAX_BINARY_BITS	27
> +
> +void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, u32 binary_bits);
>   
>   bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid);
>   
> diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
> index f51d10d6a510..4dc678af005c 100644
> --- a/fs/resctrl/internal.h
> +++ b/fs/resctrl/internal.h
> @@ -58,6 +58,8 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
>    * @name:		name of the event
>    * @configurable:	true if the event is configurable
>    * @any_cpu:		true if the event can be read from any CPU
> + * @is_floating_point:	event values may be displayed in floating point format
> + * @binary_bits:	number of fixed-point binary bits from architecture
>    * @enabled:		true if the event is enabled
>    */
>   struct mon_evt {
> @@ -66,6 +68,8 @@ struct mon_evt {
>   	char			*name;
>   	bool			configurable;
>   	bool			any_cpu;
> +	bool			is_floating_point;
> +	int			binary_bits;
>   	bool			enabled;
>   };
>   
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index b83861ab504f..2b6c6b61707d 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -887,15 +887,15 @@ static __init bool get_rdt_mon_resources(void)
>   	bool ret = false;
>   
>   	if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) {
> -		resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID, false);
> +		resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID, false, 0);
>   		ret = true;
>   	}
>   	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
> -		resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID, false);
> +		resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID, false, 0);
>   		ret = true;
>   	}
>   	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) {
> -		resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID, false);
> +		resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID, false, 0);
>   		ret = true;
>   	}
>   
> diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
> index 2e65fddc3408..29de0e380ccc 100644
> --- a/fs/resctrl/ctrlmondata.c
> +++ b/fs/resctrl/ctrlmondata.c
> @@ -590,6 +590,93 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
>   	resctrl_arch_mon_ctx_free(r, evt->evtid, rr->arch_mon_ctx);
>   }
>   
> +/**
> + * struct fixed_params - parameters to decode a binary fixed point value
> + * @decplaces:	Number of decimal places for this number of binary places.
> + * @pow10:	Multiplier (10 ^ decimal places).
> + */
> +struct fixed_params {
> +	int	decplaces;
> +	int	pow10;
> +};
> +
> +static struct fixed_params fixed_params[MAX_BINARY_BITS + 1] = {
> +	[1]  = { .decplaces = 1, .pow10 = 10 },
> +	[2]  = { .decplaces = 2, .pow10 = 100 },
> +	[3]  = { .decplaces = 3, .pow10 = 1000 },
> +	[4]  = { .decplaces = 3, .pow10 = 1000 },
> +	[5]  = { .decplaces = 3, .pow10 = 1000 },
> +	[6]  = { .decplaces = 3, .pow10 = 1000 },
> +	[7]  = { .decplaces = 3, .pow10 = 1000 },
> +	[8]  = { .decplaces = 3, .pow10 = 1000 },
> +	[9]  = { .decplaces = 3, .pow10 = 1000 },
> +	[10] = { .decplaces = 4, .pow10 = 10000 },
> +	[11] = { .decplaces = 4, .pow10 = 10000 },
> +	[12] = { .decplaces = 4, .pow10 = 10000 },
> +	[13] = { .decplaces = 5, .pow10 = 100000 },
> +	[14] = { .decplaces = 5, .pow10 = 100000 },
> +	[15] = { .decplaces = 5, .pow10 = 100000 },
> +	[16] = { .decplaces = 6, .pow10 = 1000000 },
> +	[17] = { .decplaces = 6, .pow10 = 1000000 },
> +	[18] = { .decplaces = 6, .pow10 = 1000000 },
> +	[19] = { .decplaces = 7, .pow10 = 10000000 },
> +	[20] = { .decplaces = 7, .pow10 = 10000000 },
> +	[21] = { .decplaces = 7, .pow10 = 10000000 },
> +	[22] = { .decplaces = 8, .pow10 = 100000000 },
> +	[23] = { .decplaces = 8, .pow10 = 100000000 },
> +	[24] = { .decplaces = 8, .pow10 = 100000000 },
> +	[25] = { .decplaces = 9, .pow10 = 1000000000 },
> +	[26] = { .decplaces = 9, .pow10 = 1000000000 },
> +	[27] = { .decplaces = 9, .pow10 = 1000000000 }
> +};
> +
> +static void print_event_value(struct seq_file *m, int binary_bits, u64 val)
> +{
> +	struct fixed_params *fp = &fixed_params[binary_bits];

Is it worth to have a boundary check here like? I'm afraid without the 
hardening check, a future caller may give a wrong value and cause hard 
debugged failure.

if (WARN_ON_ONCE(binary_bits >=MAX_BINARY_BITS))

     return;

> +	unsigned long long frac;
> +	char buf[10];
> +
> +	/* Mask off the integer part of the fixed-point value. */
> +	frac = val & GENMASK_ULL(binary_bits, 0);
> +
> +	/*
> +	 * Multiply by 10^{desired decimal places}. The
> +	 * integer part of the fixed point value is now
> +	 * almost what is needed.
> +	 */
> +	frac *= fp->pow10;
> +
> +	/*
> +	 * Round to nearest by adding a value that
> +	 * would be a "1" in the binary_bit + 1 place.
> +	 * Integer part of fixed point value is now
> +	 * the needed value.
> +	 */
> +	frac += 1 << (binary_bits - 1);
> +
> +	/*
> +	 * Extract the integer part of the value. This
> +	 * is the decimal representation of the original
> +	 * fixed-point fractional value.
> +	 */
> +	frac >>= binary_bits;
> +
> +	/*
> +	 * "frac" is now in the range [0 .. fp->pow10).
> +	 * I.e. string representation will fit into
> +	 * fp->decplaces.
> +	 */
> +	sprintf(buf, "%0*llu", fp->decplaces, frac);
> +
> +	/* Trim trailing zeroes */
> +	for (int i = fp->decplaces - 1; i > 0; i--) {
> +		if (buf[i] != '0')
> +			break;
> +		buf[i] = '\0';
> +	}
> +	seq_printf(m, "%llu.%s\n", val >> binary_bits, buf);
> +}
> +
>   int rdtgroup_mondata_show(struct seq_file *m, void *arg)
>   {
>   	struct kernfs_open_file *of = m->private;
> @@ -666,8 +753,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
>   		seq_puts(m, "Error\n");
>   	else if (rr.err == -EINVAL)
>   		seq_puts(m, "Unavailable\n");
> -	else
> +	else if (evt->binary_bits == 0)
>   		seq_printf(m, "%llu\n", rr.val);
> +	else
> +		print_event_value(m, evt->binary_bits, rr.val);
>   
>   out:
>   	rdtgroup_kn_unlock(of->kn);
> diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
> index aec26457d82c..076c0cc6e53a 100644
> --- a/fs/resctrl/monitor.c
> +++ b/fs/resctrl/monitor.c
> @@ -897,16 +897,22 @@ struct mon_evt mon_event_all[QOS_NUM_EVENTS] = {
>   	},
>   };
>   
> -void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu)
> +void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, u32 binary_bits)
>   {
> -	if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS))
> +	if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS) ||
> +			 binary_bits > MAX_BINARY_BITS)
>   		return;
>   	if (mon_event_all[eventid].enabled) {
>   		pr_warn("Duplicate enable for event %d\n", eventid);
>   		return;
>   	}
> +	if (binary_bits && !mon_event_all[eventid].is_floating_point) {
> +		pr_warn("Event %d may not be floating point\n", eventid);
> +		return;
> +	}
>   
>   	mon_event_all[eventid].any_cpu = any_cpu;
> +	mon_event_all[eventid].binary_bits = binary_bits;
>   	mon_event_all[eventid].enabled = true;
>   }
>   

Thanks.

-Fenghua

Re: [PATCH v6 14/30] x86,fs/resctrl: Support binary fixed point event counters
Posted by Luck, Tony 3 months, 1 week ago
On Fri, Jun 27, 2025 at 02:22:18PM -0700, Fenghua Yu wrote:
> Hi, Tony,
> 
> On 6/26/25 09:49, Tony Luck wrote:
> > Resctrl was written with the assumption that all monitor events can be
> > displayed as unsigned decimal integers.
> > 
> > Hardware architecture counters may provide some telemetry events with
> > greater precision where the event is not a simple count, but is a
> > measurement of some sort (e.g. Joules for energy consumed).
> > 
> > Add a new argument to resctrl_enable_mon_event() for architecture code
> > to inform the file system that the value for a counter is a fixed-point
> > value with a specific number of binary places.  The file system will
> > only allow architecture to use floating point format on events that it
> > marked with mon_evt::is_floating_point.
> 
> User app needs to know if a number is a floating pointer value or an integer
> value. I see you document the energy and activity events as floating point
> values and all others are integer values.
> 
> Is it better to show the value types in info directory?
> 
> e.g. create an info file "events_floating" which shows all events with
> floating point values. Events not in this info are integer by default.
> 
> This may have two benefits:
> 
> 1. An app can query the type info to parse the values accordingly without
> hard coding event types.
> 
> 2. Any future floating point events can be added here without changing the
> document.

Maybe. It's obvious which are floating point because the values
have a "." in them.  Some apps may not care about the difference
and just read everything as if they are floating point. Maybe
likely since the next step is to compute the rate with:
	(current_value - previous_value) / delta_t
which will be done as a floating point calculation with
microsecond timestamps.

But it wouldn't be hard to add an info file that lists which are
in floating point (maybe also to provide the precision as
suggested by Dave Martin).

[snip]

> > +static void print_event_value(struct seq_file *m, int binary_bits, u64 val)
> > +{
> > +	struct fixed_params *fp = &fixed_params[binary_bits];
> 
> Is it worth to have a boundary check here like? I'm afraid without the
> hardening check, a future caller may give a wrong value and cause hard
> debugged failure.
> 
> if (WARN_ON_ONCE(binary_bits >=MAX_BINARY_BITS))
> 
>     return;

Seems like belt and braces overkill. resctrl_enable_mon_event()
already has a check for MAX_BINARY_BITS and will not enable
an event if architecture provides a too-large value.

-Tony