arch/x86/events/intel/core.c | 20 +++++++++++++++++++- arch/x86/events/perf_event.h | 1 + arch/x86/include/asm/perf_event.h | 4 ++++ tools/perf/Documentation/topdown.txt | 9 +++++++-- 4 files changed, 31 insertions(+), 3 deletions(-)
From: Kan Liang <kan.liang@linux.intel.com>
The new RDPMC enhancement, metrics clear mode, is to clear the
PERF_METRICS-related resources as well as the fixed-function performance
monitoring counter 3 after the read is performed. It is available for
ring 3. The feature is enumerated by the
IA32_PERF_CAPABILITIES.RDPMC_CLEAR_METRICS[bit 19]. To enable the
feature, the IA32_FIXED_CTR_CTRL.METRICS_CLEAR_EN[bit 14] must be set.
Two ways were considered to enable the feature.
- Expose a knob in the sysfs globally. One user may affect the
measurement of other users when changing the knob. The solution is
dropped.
- Introduce a new event format, metrics_clear, for the slots event to
disable/enable the feature only for the current process. Users can
utilize the feature as needed.
The latter solution is implemented in the patch.
The current KVM doesn't support the perf metrics yet. For
virtualization, the feature can be enabled later separately.
Update the document of perf metrics.
Suggested-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
The original V4 can be found at
https://lore.kernel.org/lkml/20240731143835.771618-6-kan.liang@linux.intel.com/
The patch was one of the PMU features in the LNL enabling patch set.
The other feature is now blocked. Send the patch separately.
arch/x86/events/intel/core.c | 20 +++++++++++++++++++-
arch/x86/events/perf_event.h | 1 +
arch/x86/include/asm/perf_event.h | 4 ++++
tools/perf/Documentation/topdown.txt | 9 +++++++--
4 files changed, 31 insertions(+), 3 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 5182075e111b..342f8b1a2f93 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2816,6 +2816,9 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
return;
idx = INTEL_PMC_IDX_FIXED_SLOTS;
+
+ if (event->attr.config1 & INTEL_TD_CFG_METRIC_CLEAR)
+ bits |= INTEL_FIXED_3_METRICS_CLEAR;
}
intel_set_masks(event, idx);
@@ -4067,7 +4070,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
* is used in a metrics group, it too cannot support sampling.
*/
if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(event)) {
- if (event->attr.config1 || event->attr.config2)
+ /* The metrics_clear can only be set for the slots event */
+ if (event->attr.config1 &&
+ (!is_slots_event(event) || (event->attr.config1 & ~INTEL_TD_CFG_METRIC_CLEAR)))
+ return -EINVAL;
+
+ if (event->attr.config2)
return -EINVAL;
/*
@@ -4676,6 +4684,8 @@ PMU_FORMAT_ATTR(in_tx, "config:32" );
PMU_FORMAT_ATTR(in_tx_cp, "config:33" );
PMU_FORMAT_ATTR(eq, "config:36" ); /* v6 + */
+PMU_FORMAT_ATTR(metrics_clear, "config1:0"); /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */
+
static ssize_t umask2_show(struct device *dev,
struct device_attribute *attr,
char *page)
@@ -4695,6 +4705,7 @@ static struct device_attribute format_attr_umask2 =
static struct attribute *format_evtsel_ext_attrs[] = {
&format_attr_umask2.attr,
&format_attr_eq.attr,
+ &format_attr_metrics_clear.attr,
NULL
};
@@ -4719,6 +4730,13 @@ evtsel_ext_is_visible(struct kobject *kobj, struct attribute *attr, int i)
if (i == 1)
return (mask & ARCH_PERFMON_EVENTSEL_EQ) ? attr->mode : 0;
+ /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */
+ if (i == 2) {
+ union perf_capabilities intel_cap = hybrid(dev_get_drvdata(dev), intel_cap);
+
+ return intel_cap.rdpmc_metrics_clear ? attr->mode : 0;
+ }
+
return 0;
}
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 82c6f45ce975..31c2771545a6 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -624,6 +624,7 @@ union perf_capabilities {
u64 pebs_output_pt_available:1;
u64 pebs_timing_info:1;
u64 anythread_deprecated:1;
+ u64 rdpmc_metrics_clear:1;
};
u64 capabilities;
};
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 91b73571412f..e3b5e8e96fb3 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -41,6 +41,7 @@
#define INTEL_FIXED_0_USER (1ULL << 1)
#define INTEL_FIXED_0_ANYTHREAD (1ULL << 2)
#define INTEL_FIXED_0_ENABLE_PMI (1ULL << 3)
+#define INTEL_FIXED_3_METRICS_CLEAR (1ULL << 2)
#define HSW_IN_TX (1ULL << 32)
#define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
@@ -372,6 +373,9 @@ static inline bool use_fixed_pseudo_encoding(u64 code)
#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_MEM_BOUND
#define INTEL_TD_METRIC_NUM 8
+#define INTEL_TD_CFG_METRIC_CLEAR_BIT 0
+#define INTEL_TD_CFG_METRIC_CLEAR BIT_ULL(INTEL_TD_CFG_METRIC_CLEAR_BIT)
+
static inline bool is_metric_idx(int idx)
{
return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM;
diff --git a/tools/perf/Documentation/topdown.txt b/tools/perf/Documentation/topdown.txt
index ae0aee86844f..f36c8ca1dc53 100644
--- a/tools/perf/Documentation/topdown.txt
+++ b/tools/perf/Documentation/topdown.txt
@@ -280,8 +280,13 @@ with no longer interval than a few seconds
perf stat -I 1000 --topdown ...
-For user programs using RDPMC directly the counter can
-be reset explicitly using ioctl:
+Starting from the Lunar Lake p-core, a RDPMC metrics clear mode is
+introduced. The metrics and the fixed counter 3 are automatically
+cleared after the read is performed. It is recommended to always enable
+the mode. To enable the mode, the config1 of slots event is set to 1.
+
+On the previous platforms, for user programs using RDPMC directly, the
+counter has to be reset explicitly using ioctl:
ioctl(perf_fd, PERF_EVENT_IOC_RESET, 0);
--
2.38.1
Hi Peter, Gentle ping. Please let me know if you have any comments. Thanks, Kan On 2024-09-26 2:45 p.m., kan.liang@linux.intel.com wrote: > From: Kan Liang <kan.liang@linux.intel.com> > > The new RDPMC enhancement, metrics clear mode, is to clear the > PERF_METRICS-related resources as well as the fixed-function performance > monitoring counter 3 after the read is performed. It is available for > ring 3. The feature is enumerated by the > IA32_PERF_CAPABILITIES.RDPMC_CLEAR_METRICS[bit 19]. To enable the > feature, the IA32_FIXED_CTR_CTRL.METRICS_CLEAR_EN[bit 14] must be set. > > Two ways were considered to enable the feature. > - Expose a knob in the sysfs globally. One user may affect the > measurement of other users when changing the knob. The solution is > dropped. > - Introduce a new event format, metrics_clear, for the slots event to > disable/enable the feature only for the current process. Users can > utilize the feature as needed. > The latter solution is implemented in the patch. > > The current KVM doesn't support the perf metrics yet. For > virtualization, the feature can be enabled later separately. > > Update the document of perf metrics. > > Suggested-by: Andi Kleen <ak@linux.intel.com> > Reviewed-by: Andi Kleen <ak@linux.intel.com> > Reviewed-by: Ian Rogers <irogers@google.com> > Signed-off-by: Kan Liang <kan.liang@linux.intel.com> > --- > > The original V4 can be found at > https://lore.kernel.org/lkml/20240731143835.771618-6-kan.liang@linux.intel.com/ > > The patch was one of the PMU features in the LNL enabling patch set. > The other feature is now blocked. Send the patch separately. > > arch/x86/events/intel/core.c | 20 +++++++++++++++++++- > arch/x86/events/perf_event.h | 1 + > arch/x86/include/asm/perf_event.h | 4 ++++ > tools/perf/Documentation/topdown.txt | 9 +++++++-- > 4 files changed, 31 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c > index 5182075e111b..342f8b1a2f93 100644 > --- a/arch/x86/events/intel/core.c > +++ b/arch/x86/events/intel/core.c > @@ -2816,6 +2816,9 @@ static void intel_pmu_enable_fixed(struct perf_event *event) > return; > > idx = INTEL_PMC_IDX_FIXED_SLOTS; > + > + if (event->attr.config1 & INTEL_TD_CFG_METRIC_CLEAR) > + bits |= INTEL_FIXED_3_METRICS_CLEAR; > } > > intel_set_masks(event, idx); > @@ -4067,7 +4070,12 @@ static int intel_pmu_hw_config(struct perf_event *event) > * is used in a metrics group, it too cannot support sampling. > */ > if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(event)) { > - if (event->attr.config1 || event->attr.config2) > + /* The metrics_clear can only be set for the slots event */ > + if (event->attr.config1 && > + (!is_slots_event(event) || (event->attr.config1 & ~INTEL_TD_CFG_METRIC_CLEAR))) > + return -EINVAL; > + > + if (event->attr.config2) > return -EINVAL; > > /* > @@ -4676,6 +4684,8 @@ PMU_FORMAT_ATTR(in_tx, "config:32" ); > PMU_FORMAT_ATTR(in_tx_cp, "config:33" ); > PMU_FORMAT_ATTR(eq, "config:36" ); /* v6 + */ > > +PMU_FORMAT_ATTR(metrics_clear, "config1:0"); /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */ > + > static ssize_t umask2_show(struct device *dev, > struct device_attribute *attr, > char *page) > @@ -4695,6 +4705,7 @@ static struct device_attribute format_attr_umask2 = > static struct attribute *format_evtsel_ext_attrs[] = { > &format_attr_umask2.attr, > &format_attr_eq.attr, > + &format_attr_metrics_clear.attr, > NULL > }; > > @@ -4719,6 +4730,13 @@ evtsel_ext_is_visible(struct kobject *kobj, struct attribute *attr, int i) > if (i == 1) > return (mask & ARCH_PERFMON_EVENTSEL_EQ) ? attr->mode : 0; > > + /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */ > + if (i == 2) { > + union perf_capabilities intel_cap = hybrid(dev_get_drvdata(dev), intel_cap); > + > + return intel_cap.rdpmc_metrics_clear ? attr->mode : 0; > + } > + > return 0; > } > > diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h > index 82c6f45ce975..31c2771545a6 100644 > --- a/arch/x86/events/perf_event.h > +++ b/arch/x86/events/perf_event.h > @@ -624,6 +624,7 @@ union perf_capabilities { > u64 pebs_output_pt_available:1; > u64 pebs_timing_info:1; > u64 anythread_deprecated:1; > + u64 rdpmc_metrics_clear:1; > }; > u64 capabilities; > }; > diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h > index 91b73571412f..e3b5e8e96fb3 100644 > --- a/arch/x86/include/asm/perf_event.h > +++ b/arch/x86/include/asm/perf_event.h > @@ -41,6 +41,7 @@ > #define INTEL_FIXED_0_USER (1ULL << 1) > #define INTEL_FIXED_0_ANYTHREAD (1ULL << 2) > #define INTEL_FIXED_0_ENABLE_PMI (1ULL << 3) > +#define INTEL_FIXED_3_METRICS_CLEAR (1ULL << 2) > > #define HSW_IN_TX (1ULL << 32) > #define HSW_IN_TX_CHECKPOINTED (1ULL << 33) > @@ -372,6 +373,9 @@ static inline bool use_fixed_pseudo_encoding(u64 code) > #define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_MEM_BOUND > #define INTEL_TD_METRIC_NUM 8 > > +#define INTEL_TD_CFG_METRIC_CLEAR_BIT 0 > +#define INTEL_TD_CFG_METRIC_CLEAR BIT_ULL(INTEL_TD_CFG_METRIC_CLEAR_BIT) > + > static inline bool is_metric_idx(int idx) > { > return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM; > diff --git a/tools/perf/Documentation/topdown.txt b/tools/perf/Documentation/topdown.txt > index ae0aee86844f..f36c8ca1dc53 100644 > --- a/tools/perf/Documentation/topdown.txt > +++ b/tools/perf/Documentation/topdown.txt > @@ -280,8 +280,13 @@ with no longer interval than a few seconds > > perf stat -I 1000 --topdown ... > > -For user programs using RDPMC directly the counter can > -be reset explicitly using ioctl: > +Starting from the Lunar Lake p-core, a RDPMC metrics clear mode is > +introduced. The metrics and the fixed counter 3 are automatically > +cleared after the read is performed. It is recommended to always enable > +the mode. To enable the mode, the config1 of slots event is set to 1. > + > +On the previous platforms, for user programs using RDPMC directly, the > +counter has to be reset explicitly using ioctl: > > ioctl(perf_fd, PERF_EVENT_IOC_RESET, 0); >
Hi Peter, Ping. Could you please let me know if you have any comments. Thanks, Kan On 2024-10-09 3:31 p.m., Liang, Kan wrote: > Hi Peter, > > Gentle ping. Please let me know if you have any comments. > > Thanks, > Kan > > On 2024-09-26 2:45 p.m., kan.liang@linux.intel.com wrote: >> From: Kan Liang <kan.liang@linux.intel.com> >> >> The new RDPMC enhancement, metrics clear mode, is to clear the >> PERF_METRICS-related resources as well as the fixed-function performance >> monitoring counter 3 after the read is performed. It is available for >> ring 3. The feature is enumerated by the >> IA32_PERF_CAPABILITIES.RDPMC_CLEAR_METRICS[bit 19]. To enable the >> feature, the IA32_FIXED_CTR_CTRL.METRICS_CLEAR_EN[bit 14] must be set. >> >> Two ways were considered to enable the feature. >> - Expose a knob in the sysfs globally. One user may affect the >> measurement of other users when changing the knob. The solution is >> dropped. >> - Introduce a new event format, metrics_clear, for the slots event to >> disable/enable the feature only for the current process. Users can >> utilize the feature as needed. >> The latter solution is implemented in the patch. >> >> The current KVM doesn't support the perf metrics yet. For >> virtualization, the feature can be enabled later separately. >> >> Update the document of perf metrics. >> >> Suggested-by: Andi Kleen <ak@linux.intel.com> >> Reviewed-by: Andi Kleen <ak@linux.intel.com> >> Reviewed-by: Ian Rogers <irogers@google.com> >> Signed-off-by: Kan Liang <kan.liang@linux.intel.com> >> --- >> >> The original V4 can be found at >> https://lore.kernel.org/lkml/20240731143835.771618-6-kan.liang@linux.intel.com/ >> >> The patch was one of the PMU features in the LNL enabling patch set. >> The other feature is now blocked. Send the patch separately. >> >> arch/x86/events/intel/core.c | 20 +++++++++++++++++++- >> arch/x86/events/perf_event.h | 1 + >> arch/x86/include/asm/perf_event.h | 4 ++++ >> tools/perf/Documentation/topdown.txt | 9 +++++++-- >> 4 files changed, 31 insertions(+), 3 deletions(-) >> >> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c >> index 5182075e111b..342f8b1a2f93 100644 >> --- a/arch/x86/events/intel/core.c >> +++ b/arch/x86/events/intel/core.c >> @@ -2816,6 +2816,9 @@ static void intel_pmu_enable_fixed(struct perf_event *event) >> return; >> >> idx = INTEL_PMC_IDX_FIXED_SLOTS; >> + >> + if (event->attr.config1 & INTEL_TD_CFG_METRIC_CLEAR) >> + bits |= INTEL_FIXED_3_METRICS_CLEAR; >> } >> >> intel_set_masks(event, idx); >> @@ -4067,7 +4070,12 @@ static int intel_pmu_hw_config(struct perf_event *event) >> * is used in a metrics group, it too cannot support sampling. >> */ >> if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(event)) { >> - if (event->attr.config1 || event->attr.config2) >> + /* The metrics_clear can only be set for the slots event */ >> + if (event->attr.config1 && >> + (!is_slots_event(event) || (event->attr.config1 & ~INTEL_TD_CFG_METRIC_CLEAR))) >> + return -EINVAL; >> + >> + if (event->attr.config2) >> return -EINVAL; >> >> /* >> @@ -4676,6 +4684,8 @@ PMU_FORMAT_ATTR(in_tx, "config:32" ); >> PMU_FORMAT_ATTR(in_tx_cp, "config:33" ); >> PMU_FORMAT_ATTR(eq, "config:36" ); /* v6 + */ >> >> +PMU_FORMAT_ATTR(metrics_clear, "config1:0"); /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */ >> + >> static ssize_t umask2_show(struct device *dev, >> struct device_attribute *attr, >> char *page) >> @@ -4695,6 +4705,7 @@ static struct device_attribute format_attr_umask2 = >> static struct attribute *format_evtsel_ext_attrs[] = { >> &format_attr_umask2.attr, >> &format_attr_eq.attr, >> + &format_attr_metrics_clear.attr, >> NULL >> }; >> >> @@ -4719,6 +4730,13 @@ evtsel_ext_is_visible(struct kobject *kobj, struct attribute *attr, int i) >> if (i == 1) >> return (mask & ARCH_PERFMON_EVENTSEL_EQ) ? attr->mode : 0; >> >> + /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */ >> + if (i == 2) { >> + union perf_capabilities intel_cap = hybrid(dev_get_drvdata(dev), intel_cap); >> + >> + return intel_cap.rdpmc_metrics_clear ? attr->mode : 0; >> + } >> + >> return 0; >> } >> >> diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h >> index 82c6f45ce975..31c2771545a6 100644 >> --- a/arch/x86/events/perf_event.h >> +++ b/arch/x86/events/perf_event.h >> @@ -624,6 +624,7 @@ union perf_capabilities { >> u64 pebs_output_pt_available:1; >> u64 pebs_timing_info:1; >> u64 anythread_deprecated:1; >> + u64 rdpmc_metrics_clear:1; >> }; >> u64 capabilities; >> }; >> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h >> index 91b73571412f..e3b5e8e96fb3 100644 >> --- a/arch/x86/include/asm/perf_event.h >> +++ b/arch/x86/include/asm/perf_event.h >> @@ -41,6 +41,7 @@ >> #define INTEL_FIXED_0_USER (1ULL << 1) >> #define INTEL_FIXED_0_ANYTHREAD (1ULL << 2) >> #define INTEL_FIXED_0_ENABLE_PMI (1ULL << 3) >> +#define INTEL_FIXED_3_METRICS_CLEAR (1ULL << 2) >> >> #define HSW_IN_TX (1ULL << 32) >> #define HSW_IN_TX_CHECKPOINTED (1ULL << 33) >> @@ -372,6 +373,9 @@ static inline bool use_fixed_pseudo_encoding(u64 code) >> #define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_MEM_BOUND >> #define INTEL_TD_METRIC_NUM 8 >> >> +#define INTEL_TD_CFG_METRIC_CLEAR_BIT 0 >> +#define INTEL_TD_CFG_METRIC_CLEAR BIT_ULL(INTEL_TD_CFG_METRIC_CLEAR_BIT) >> + >> static inline bool is_metric_idx(int idx) >> { >> return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM; >> diff --git a/tools/perf/Documentation/topdown.txt b/tools/perf/Documentation/topdown.txt >> index ae0aee86844f..f36c8ca1dc53 100644 >> --- a/tools/perf/Documentation/topdown.txt >> +++ b/tools/perf/Documentation/topdown.txt >> @@ -280,8 +280,13 @@ with no longer interval than a few seconds >> >> perf stat -I 1000 --topdown ... >> >> -For user programs using RDPMC directly the counter can >> -be reset explicitly using ioctl: >> +Starting from the Lunar Lake p-core, a RDPMC metrics clear mode is >> +introduced. The metrics and the fixed counter 3 are automatically >> +cleared after the read is performed. It is recommended to always enable >> +the mode. To enable the mode, the config1 of slots event is set to 1. >> + >> +On the previous platforms, for user programs using RDPMC directly, the >> +counter has to be reset explicitly using ioctl: >> >> ioctl(perf_fd, PERF_EVENT_IOC_RESET, 0); >> >
© 2016 - 2024 Red Hat, Inc.