[PATCH] x86/resctrl: Allow AET to use PMT/TPMI as loadable modules

Tony Luck posted 1 patch 1 week, 3 days ago
There is a newer version of this series
arch/x86/kernel/cpu/resctrl/intel_aet.c    | 27 +++++++++++++++++++---
drivers/platform/x86/intel/pmt/telemetry.c |  4 ++--
arch/x86/Kconfig                           |  2 +-
3 files changed, 27 insertions(+), 6 deletions(-)
[PATCH] x86/resctrl: Allow AET to use PMT/TPMI as loadable modules
Posted by Tony Luck 1 week, 3 days ago
The resctrl subsystem is always built into the base kernel. Currently,
enumerating Application Event Tracing (AET) features requires functions
from INTEL_PMT_TELEMETRY and INTEL_TPMI. Because resctrl makes direct calls
to these functions, it enforces a strict dependency requiring both PMT and
TPMI to be built-in.

This is overly restrictive. Use the symbol_get() mechanism to allow resctrl
to resolve these symbols at runtime, whether they reside in the base kernel
or in loadable modules.

Update the exports for intel_pmt_get_regions_by_feature() and
intel_pmt_put_feature_group() to be accessible via symbol_get(). Replace
the direct calls with indirect calls using function pointers.

Finally, adjust the Kconfig dependencies to allow X86_CPU_RESCTRL_INTEL_AET
to be enabled even when INTEL_PMT_TELEMETRY and INTEL_TPMI are configured
as modules.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/resctrl/intel_aet.c    | 27 +++++++++++++++++++---
 drivers/platform/x86/intel/pmt/telemetry.c |  4 ++--
 arch/x86/Kconfig                           |  2 +-
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index 89b8b619d5d5..b2a47774157f 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -23,6 +23,7 @@
 #include <linux/intel_vsec.h>
 #include <linux/io.h>
 #include <linux/minmax.h>
+#include <linux/module.h>
 #include <linux/printk.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
@@ -304,37 +305,57 @@ static enum pmt_feature_id lookup_pfid(const char *pfname)
  */
 bool intel_aet_get_events(void)
 {
+	struct pmt_feature_group *(*get_feature)(enum pmt_feature_id id);
+	void (*put_feature)(struct pmt_feature_group *p);
 	struct pmt_feature_group *p;
 	enum pmt_feature_id pfid;
 	struct event_group **peg;
 	bool ret = false;
 
+	get_feature = symbol_get(intel_pmt_get_regions_by_feature);
+	if (!get_feature)
+		return ret;
+	put_feature = symbol_get(intel_pmt_put_feature_group);
+	if (!put_feature) {
+		symbol_put(intel_pmt_get_regions_by_feature);
+		return ret;
+	}
+
 	for_each_event_group(peg) {
 		pfid = lookup_pfid((*peg)->pfname);
-		p = intel_pmt_get_regions_by_feature(pfid);
+		p = get_feature(pfid);
 		if (IS_ERR_OR_NULL(p))
 			continue;
 		if (enable_events(*peg, p)) {
 			(*peg)->pfg = p;
 			ret = true;
 		} else {
-			intel_pmt_put_feature_group(p);
+			put_feature(p);
 		}
 	}
 
+	symbol_put(intel_pmt_get_regions_by_feature);
+	symbol_put(intel_pmt_put_feature_group);
+
 	return ret;
 }
 
 void __exit intel_aet_exit(void)
 {
+	void (*put_feature)(struct pmt_feature_group *p);
 	struct event_group **peg;
 
+	put_feature = symbol_get(intel_pmt_put_feature_group);
+	if (!put_feature)
+		return;
+
 	for_each_event_group(peg) {
 		if ((*peg)->pfg) {
-			intel_pmt_put_feature_group((*peg)->pfg);
+			put_feature((*peg)->pfg);
 			(*peg)->pfg = NULL;
 		}
 	}
+	symbol_put(intel_pmt_put_feature_group);
 }
 
 #define DATA_VALID	BIT_ULL(63)
diff --git a/drivers/platform/x86/intel/pmt/telemetry.c b/drivers/platform/x86/intel/pmt/telemetry.c
index a52803bfe124..4504fb9fd83c 100644
--- a/drivers/platform/x86/intel/pmt/telemetry.c
+++ b/drivers/platform/x86/intel/pmt/telemetry.c
@@ -287,13 +287,13 @@ struct pmt_feature_group *intel_pmt_get_regions_by_feature(enum pmt_feature_id i
 
 	return no_free_ptr(feature_group);
 }
-EXPORT_SYMBOL(intel_pmt_get_regions_by_feature);
+EXPORT_SYMBOL_GPL(intel_pmt_get_regions_by_feature);
 
 void intel_pmt_put_feature_group(struct pmt_feature_group *feature_group)
 {
 	kref_put(&feature_group->kref, pmt_feature_group_release);
 }
-EXPORT_SYMBOL(intel_pmt_put_feature_group);
+EXPORT_SYMBOL_GPL(intel_pmt_put_feature_group);
 
 int pmt_telem_read(struct telem_endpoint *ep, u32 id, u64 *data, u32 count)
 {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e2df1b147184..fb3e40fc1e03 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -542,7 +542,7 @@ config X86_CPU_RESCTRL
 
 config X86_CPU_RESCTRL_INTEL_AET
 	bool "Intel Application Energy Telemetry"
-	depends on X86_64 && X86_CPU_RESCTRL && CPU_SUP_INTEL && INTEL_PMT_TELEMETRY=y && INTEL_TPMI=y
+	depends on X86_64 && X86_CPU_RESCTRL && CPU_SUP_INTEL && INTEL_PMT_TELEMETRY && INTEL_TPMI
 	help
 	  Enable per-RMID telemetry events in resctrl.
 
-- 
2.53.0
[PATCH v3 0/7] Allow AET to use PMT/TPMI as loadable modules
Posted by Tony Luck 6 days, 15 hours ago
The resctrl subsystem is always built into the base kernel. Currently,
enumerating Application Event Tracing (AET) features requires functions
from INTEL_PMT_TELEMETRY and INTEL_TPMI. Because resctrl makes direct calls
to these functions, it enforces a strict dependency requiring both PMT and
TPMI to be built-in.

This is overly restrictive. Use the symbol_get() mechanism to allow resctrl
to resolve these symbols at runtime, whether they reside in the base kernel
or in loadable modules.

Update the exports for intel_pmt_get_regions_by_feature() and
intel_pmt_put_feature_group() to be accessible via symbol_get(). Replace
the direct calls with indirect calls using function pointers.

Add extra hooks from filesystem to architecture code so that every
mount/unmount can be tracked. AET now does a complete cleanup on
unmount and a full enumeration on each mount.

Finally, adjust the Kconfig dependencies to allow X86_CPU_RESCTRL_INTEL_AET
to be enabled even when INTEL_PMT_TELEMETRY and INTEL_TPMI are configured
as modules.

Signed-off-by: Tony Luck <tony.luck@intel.com>
AI-review-of-v2: https://sashiko.dev/#/patchset/20260325171738.12207-1-tony.luck%40intel.com

---
Changes since version 2:
Link: https://lore.kernel.org/all/20260325171738.12207-1-tony.luck@intel.com/

Refactor so that AET can completely cleanup on unmount. This fixes the
existing minor problem, that the pmt_feature_group structures allocated
by INTEL_PMT are never freed. But it solves the bigger issue introduced
in v2 of this patch that the hold on the INTEL_PMT module is never
released.

Tony Luck (7):
  x86/resctrl: Drop setting of event_group::force_off when insufficient
    RMIDs
  fs/resctrl: Add interface to disable a monitor event
  fs,x86/resctrl: Add architecture hooks for every mount/unmount
  platform/x86/intel/pmt: Export PMT enumeration functions as GPL
  x86/resctrl: Allow AET to use PMT/TPMI as loadable modules
  x86/resctrl: Delete intel_aet_exit()
  x86/resctrl: Downgrade dependency of AET on INTEL_PMT

 include/linux/resctrl.h                    |  12 ++-
 arch/x86/kernel/cpu/resctrl/internal.h     |  10 +-
 arch/x86/kernel/cpu/resctrl/core.c         |  16 +++-
 arch/x86/kernel/cpu/resctrl/intel_aet.c    | 106 ++++++++++++++++++---
 drivers/platform/x86/intel/pmt/telemetry.c |   4 +-
 fs/resctrl/monitor.c                       |  12 +++
 fs/resctrl/rdtgroup.c                      |  20 +++-
 arch/x86/Kconfig                           |   2 +-
 8 files changed, 155 insertions(+), 27 deletions(-)


base-commit: c369299895a591d96745d6492d4888259b004a9e
-- 
2.53.0
[PATCH v3 1/7] x86/resctrl: Drop setting of event_group::force_off when insufficient RMIDs
Posted by Tony Luck 6 days, 15 hours ago
Setting the force_off flag does not matter when AET features are only
enumerated once (on first mount).

In preparation for enumeration on every mount, drop this because it
overrides user request with rdt= boot option to force enable a feature.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/resctrl/intel_aet.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index 89b8b619d5d5..015627401ed2 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -214,10 +214,8 @@ static bool all_regions_have_sufficient_rmid(struct event_group *e, struct pmt_f
 		if (!p->regions[i].addr)
 			continue;
 		tr = &p->regions[i];
-		if (tr->num_rmids < e->num_rmid) {
-			e->force_off = true;
+		if (tr->num_rmids < e->num_rmid)
 			return false;
-		}
 	}
 
 	return true;
-- 
2.53.0
[PATCH v3 2/7] fs/resctrl: Add interface to disable a monitor event
Posted by Tony Luck 6 days, 15 hours ago
Architecture code can ask file system code to enable events. But there
is no way to clean up and disable events.

Add resctrl_disable_mon_event().

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/resctrl.h |  1 +
 fs/resctrl/monitor.c    | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 006e57fd7ca5..b312aaf76974 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -416,6 +416,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
 
 bool resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
 			      unsigned int binary_bits, void *arch_priv);
+void resctrl_disable_mon_event(enum resctrl_event_id eventid);
 
 bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid);
 
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
index 49f3f6b846b2..0def41c26edc 100644
--- a/fs/resctrl/monitor.c
+++ b/fs/resctrl/monitor.c
@@ -1010,6 +1010,18 @@ bool resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
 	return true;
 }
 
+void resctrl_disable_mon_event(enum resctrl_event_id eventid)
+{
+	if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS))
+		return;
+	if (!mon_event_all[eventid].enabled) {
+		pr_warn("Repeat disable for event %d\n", eventid);
+		return;
+	}
+
+	mon_event_all[eventid].enabled = false;
+}
+
 bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)
 {
 	return eventid >= QOS_FIRST_EVENT && eventid < QOS_NUM_EVENTS &&
-- 
2.53.0
[PATCH v3 3/7] fs,x86/resctrl: Add architecture hooks for every mount/unmount
Posted by Tony Luck 6 days, 15 hours ago
Add hooks for every mount/unmount of the resctrl file system so that
architecture code can allocate on mount and free on unmount.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/resctrl.h                 | 11 +++++++++--
 arch/x86/kernel/cpu/resctrl/internal.h  |  8 ++++++--
 arch/x86/kernel/cpu/resctrl/core.c      | 14 +++++++++++++-
 arch/x86/kernel/cpu/resctrl/intel_aet.c | 13 +++++++++++++
 fs/resctrl/rdtgroup.c                   | 20 ++++++++++++++++----
 5 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index b312aaf76974..fd16256d01a7 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -518,11 +518,18 @@ void resctrl_online_cpu(unsigned int cpu);
 void resctrl_offline_cpu(unsigned int cpu);
 
 /*
- * Architecture hook called at beginning of first file system mount attempt.
- * No locks are held.
+ * Architecture hooks for resctrl mount/unmount. No locks are held.
  */
+
+/* Called at beginning of each file system mount attempt. */
 void resctrl_arch_pre_mount(void);
 
+/* Called to report success/failure of mount. */
+void resctrl_arch_mount_result(int ret);
+
+/* Called to report unmount. */
+void resctrl_arch_unmount(void);
+
 /**
  * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid
  *			      for this resource and domain.
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index e3cfa0c10e92..6f322818a9e6 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -234,14 +234,18 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
 void resctrl_arch_mbm_cntr_assign_set_one(struct rdt_resource *r);
 
 #ifdef CONFIG_X86_CPU_RESCTRL_INTEL_AET
-bool intel_aet_get_events(void);
+bool intel_aet_pre_mount(void);
+void intel_aet_mount_result(int ret);
+void intel_aet_unmount(void);
 void __exit intel_aet_exit(void);
 int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val);
 void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r,
 				struct list_head *add_pos);
 bool intel_handle_aet_option(bool force_off, char *tok);
 #else
-static inline bool intel_aet_get_events(void) { return false; }
+static inline bool intel_aet_pre_mount(void) { return false; }
+static inline void intel_aet_mount_result(int ret) { }
+static inline void intel_aet_unmount(void) { }
 static inline void __exit intel_aet_exit(void) { }
 static inline int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val)
 {
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 7667cf7c4e94..162eca2cfcdb 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -769,8 +769,10 @@ void resctrl_arch_pre_mount(void)
 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl;
 	int cpu;
 
-	if (!intel_aet_get_events())
+	if (!intel_aet_pre_mount()) {
+		r->mon_capable = false;
 		return;
+	}
 
 	/*
 	 * Late discovery of telemetry events means the domains for the
@@ -786,6 +788,16 @@ void resctrl_arch_pre_mount(void)
 	cpus_read_unlock();
 }
 
+void resctrl_arch_mount_result(int ret)
+{
+	intel_aet_mount_result(ret);
+}
+
+void resctrl_arch_unmount(void)
+{
+	intel_aet_unmount();
+}
+
 enum {
 	RDT_FLAG_CMT,
 	RDT_FLAG_MBM_TOTAL,
diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index 015627401ed2..743a1894fe9a 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -335,6 +335,19 @@ void __exit intel_aet_exit(void)
 	}
 }
 
+bool intel_aet_pre_mount(void)
+{
+	return false; // Temporary stub
+}
+
+void intel_aet_mount_result(int ret)
+{
+}
+
+void intel_aet_unmount(void)
+{
+}
+
 #define DATA_VALID	BIT_ULL(63)
 #define DATA_BITS	GENMASK_ULL(62, 0)
 
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index 5da305bd36c9..edcee79de424 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -18,7 +18,6 @@
 #include <linux/fs_parser.h>
 #include <linux/sysfs.h>
 #include <linux/kernfs.h>
-#include <linux/once.h>
 #include <linux/resctrl.h>
 #include <linux/seq_buf.h>
 #include <linux/seq_file.h>
@@ -2788,8 +2787,6 @@ static int rdt_get_tree(struct fs_context *fc)
 	struct rdt_resource *r;
 	int ret;
 
-	DO_ONCE_SLEEPABLE(resctrl_arch_pre_mount);
-
 	cpus_read_lock();
 	mutex_lock(&rdtgroup_mutex);
 	/*
@@ -2900,6 +2897,19 @@ static int rdt_get_tree(struct fs_context *fc)
 	return ret;
 }
 
+static int rdt_get_tree_wrapper(struct fs_context *fc)
+{
+	int ret;
+
+	resctrl_arch_pre_mount();
+
+	ret = rdt_get_tree(fc);
+
+	resctrl_arch_mount_result(ret);
+
+	return ret;
+}
+
 enum rdt_param {
 	Opt_cdp,
 	Opt_cdpl2,
@@ -2959,7 +2969,7 @@ static void rdt_fs_context_free(struct fs_context *fc)
 static const struct fs_context_operations rdt_fs_context_ops = {
 	.free		= rdt_fs_context_free,
 	.parse_param	= rdt_parse_param,
-	.get_tree	= rdt_get_tree,
+	.get_tree	= rdt_get_tree_wrapper,
 };
 
 static int rdt_init_fs_context(struct fs_context *fc)
@@ -3187,6 +3197,8 @@ static void rdt_kill_sb(struct super_block *sb)
 	kernfs_kill_sb(sb);
 	mutex_unlock(&rdtgroup_mutex);
 	cpus_read_unlock();
+
+	resctrl_arch_unmount();
 }
 
 static struct file_system_type rdt_fs_type = {
-- 
2.53.0
Re: [PATCH v3 3/7] fs,x86/resctrl: Add architecture hooks for every mount/unmount
Posted by Chen, Yu C 5 days, 2 hours ago
Hi Tony,

On 3/28/2026 7:02 AM, Tony Luck wrote:

> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index e3cfa0c10e92..6f322818a9e6 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -234,14 +234,18 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
>   void resctrl_arch_mbm_cntr_assign_set_one(struct rdt_resource *r);
>   
>   #ifdef CONFIG_X86_CPU_RESCTRL_INTEL_AET
> -bool intel_aet_get_events(void);
> +bool intel_aet_pre_mount(void);
> +void intel_aet_mount_result(int ret);
> +void intel_aet_unmount(void);
>   void __exit intel_aet_exit(void);
>   int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val);
>   void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r,
>   				struct list_head *add_pos);
>   bool intel_handle_aet_option(bool force_off, char *tok);
>   #else
> -static inline bool intel_aet_get_events(void) { return false; }

The declaration of intel_aet_get_events() was removed,
and the function definition still remains in intel_aet.c.
Not sure if this will trigger a warning like
"no user for intel_aet_get_events". Since intel_aet_get_events() is
renamed to static get_events() in a subsequent patch,
perhaps we can mark intel_aet_get_events() as static in
this patch too?

thanks,
Chenyu
RE: [PATCH v3 3/7] fs,x86/resctrl: Add architecture hooks for every mount/unmount
Posted by Luck, Tony 3 days, 22 hours ago
> > -static inline bool intel_aet_get_events(void) { return false; }
>
> The declaration of intel_aet_get_events() was removed,
> and the function definition still remains in intel_aet.c.
> Not sure if this will trigger a warning like
> "no user for intel_aet_get_events". Since intel_aet_get_events() is
> renamed to static get_events() in a subsequent patch,
> perhaps we can mark intel_aet_get_events() as static in
> this patch too?

I need to explain more in the commit message. Or find a better way to do this.

I wanted to split the file system changes from the architecture changes to make
review easier.  But doing the file system first means that architecture is now
called on every mount, and it isn't ready for that. So, I cheated by breaking
the enumeration of AET in this patch and fixing it in next patch. The kernel
build is ok, so anyone bisecting some other issue won't see a problem (other
than AET not enumerated).

-Tony
[PATCH v3 4/7] platform/x86/intel/pmt: Export PMT enumeration functions as GPL
Posted by Tony Luck 6 days, 15 hours ago
The symbol_get() function requires that objects be GPL licensed.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/platform/x86/intel/pmt/telemetry.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/intel/pmt/telemetry.c b/drivers/platform/x86/intel/pmt/telemetry.c
index a52803bfe124..4504fb9fd83c 100644
--- a/drivers/platform/x86/intel/pmt/telemetry.c
+++ b/drivers/platform/x86/intel/pmt/telemetry.c
@@ -287,13 +287,13 @@ struct pmt_feature_group *intel_pmt_get_regions_by_feature(enum pmt_feature_id i
 
 	return no_free_ptr(feature_group);
 }
-EXPORT_SYMBOL(intel_pmt_get_regions_by_feature);
+EXPORT_SYMBOL_GPL(intel_pmt_get_regions_by_feature);
 
 void intel_pmt_put_feature_group(struct pmt_feature_group *feature_group)
 {
 	kref_put(&feature_group->kref, pmt_feature_group_release);
 }
-EXPORT_SYMBOL(intel_pmt_put_feature_group);
+EXPORT_SYMBOL_GPL(intel_pmt_put_feature_group);
 
 int pmt_telem_read(struct telem_endpoint *ep, u32 id, u64 *data, u32 count)
 {
-- 
2.53.0
[PATCH v3 5/7] x86/resctrl: Allow AET to use PMT/TPMI as loadable modules
Posted by Tony Luck 6 days, 15 hours ago
The resctrl subsystem is always built into the base kernel. Currently,
enumerating Application Event Tracing (AET) features requires functions
from INTEL_PMT_TELEMETRY and INTEL_TPMI. Because resctrl makes direct calls
to these functions, it enforces a strict dependency requiring both PMT and
TPMI to be built-in.

This is overly restrictive. Use the symbol_get() mechanism to allow resctrl
to resolve these symbols at runtime, whether they reside in the base kernel
or in loadable modules.

Use symbol_get() to obtain the addresses of intel_pmt_get_regions_by_feature()
and intel_pmt_put_feature_group() and use indirect calls using function
pointers.

Change the pre-mount hook from once-only on first mount to call the hook
every time. Add hooks for mount success/failure and unmount so that
the hold on the INTEL_PMT_TELEMETRY module is only while the resctrl
file system is mounted. Unmount now cleans up all AET structures and
disables AET events.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/resctrl/intel_aet.c | 97 ++++++++++++++++++++++---
 1 file changed, 87 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index 743a1894fe9a..2d5766904fd8 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -23,6 +23,8 @@
 #include <linux/intel_vsec.h>
 #include <linux/io.h>
 #include <linux/minmax.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/printk.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
@@ -289,6 +291,9 @@ static enum pmt_feature_id lookup_pfid(const char *pfname)
 	return FEATURE_INVALID;
 }
 
+static struct pmt_feature_group *(*get_feature)(enum pmt_feature_id id);
+static void (*put_feature)(struct pmt_feature_group *p);
+
 /*
  * Request a copy of struct pmt_feature_group for each event group. If there is
  * one, the returned structure has an array of telemetry_region structures,
@@ -300,7 +305,7 @@ static enum pmt_feature_id lookup_pfid(const char *pfname)
  * struct pmt_feature_group to indicate that its events are successfully
  * enabled.
  */
-bool intel_aet_get_events(void)
+static bool get_events(void)
 {
 	struct pmt_feature_group *p;
 	enum pmt_feature_id pfid;
@@ -309,14 +314,14 @@ bool intel_aet_get_events(void)
 
 	for_each_event_group(peg) {
 		pfid = lookup_pfid((*peg)->pfname);
-		p = intel_pmt_get_regions_by_feature(pfid);
+		p = get_feature(pfid);
 		if (IS_ERR_OR_NULL(p))
 			continue;
 		if (enable_events(*peg, p)) {
 			(*peg)->pfg = p;
 			ret = true;
 		} else {
-			intel_pmt_put_feature_group(p);
+			put_feature(p);
 		}
 	}
 
@@ -325,27 +330,99 @@ bool intel_aet_get_events(void)
 
 void __exit intel_aet_exit(void)
 {
-	struct event_group **peg;
+}
 
-	for_each_event_group(peg) {
-		if ((*peg)->pfg) {
-			intel_pmt_put_feature_group((*peg)->pfg);
-			(*peg)->pfg = NULL;
-		}
+static bool get_pmt_references(void)
+{
+	get_feature = symbol_get(intel_pmt_get_regions_by_feature);
+	if (!get_feature)
+		return false;
+	put_feature = symbol_get(intel_pmt_put_feature_group);
+	if (!put_feature) {
+		symbol_put(intel_pmt_get_regions_by_feature);
+		get_feature = NULL;
+		return false;
+	}
+
+	return true;
+}
+
+static void put_pmt_references(void)
+{
+	if (get_feature) {
+		symbol_put(intel_pmt_get_regions_by_feature);
+		get_feature = NULL;
+	}
+	if (put_feature) {
+		symbol_put(intel_pmt_put_feature_group);
+		put_feature = NULL;
 	}
 }
 
+static DEFINE_MUTEX(pmt_lock);
+
+static enum {
+	AET_UNINITIALIZED,
+	AET_PRESENT,
+	AET_NOT_PRESENT
+} aet_state;
+
 bool intel_aet_pre_mount(void)
 {
-	return false; // Temporary stub
+	bool ret;
+
+	mutex_lock(&pmt_lock);
+
+	if (aet_state == AET_PRESENT)
+		return true;
+
+	if (aet_state == AET_NOT_PRESENT || !get_pmt_references())
+		return false;
+
+	ret = get_events();
+
+	if (ret) {
+		aet_state = AET_PRESENT;
+	} else {
+		aet_state = AET_NOT_PRESENT;
+		put_pmt_references();
+	}
+
+	return ret;
+}
+
+static void aet_cleanup(void)
+{
+	struct event_group **peg;
+
+	if (aet_state == AET_PRESENT) {
+		for_each_event_group(peg) {
+			if ((*peg)->pfg) {
+				struct event_group *e = *peg;
+
+				for (int j = 0; j < e->num_events; j++)
+					resctrl_disable_mon_event(e->evts[j].id);
+				put_feature((*peg)->pfg);
+				(*peg)->pfg = NULL;
+			}
+		}
+		put_pmt_references();
+		aet_state = AET_UNINITIALIZED;
+	}
 }
 
 void intel_aet_mount_result(int ret)
 {
+	if (ret && ret != -EBUSY)
+		aet_cleanup();
+	mutex_unlock(&pmt_lock);
 }
 
 void intel_aet_unmount(void)
 {
+	mutex_lock(&pmt_lock);
+	aet_cleanup();
+	mutex_unlock(&pmt_lock);
 }
 
 #define DATA_VALID	BIT_ULL(63)
-- 
2.53.0
[PATCH v3 6/7] x86/resctrl: Delete intel_aet_exit()
Posted by Tony Luck 6 days, 15 hours ago
All cleanup for AET is now handled during file system unmount.

Drop intel_aet_exit().

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/resctrl/internal.h  | 2 --
 arch/x86/kernel/cpu/resctrl/core.c      | 2 --
 arch/x86/kernel/cpu/resctrl/intel_aet.c | 4 ----
 3 files changed, 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 6f322818a9e6..2230b64fa2ba 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -237,7 +237,6 @@ void resctrl_arch_mbm_cntr_assign_set_one(struct rdt_resource *r);
 bool intel_aet_pre_mount(void);
 void intel_aet_mount_result(int ret);
 void intel_aet_unmount(void);
-void __exit intel_aet_exit(void);
 int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val);
 void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r,
 				struct list_head *add_pos);
@@ -246,7 +245,6 @@ bool intel_handle_aet_option(bool force_off, char *tok);
 static inline bool intel_aet_pre_mount(void) { return false; }
 static inline void intel_aet_mount_result(int ret) { }
 static inline void intel_aet_unmount(void) { }
-static inline void __exit intel_aet_exit(void) { }
 static inline int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val)
 {
 	return -EINVAL;
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 162eca2cfcdb..1951df519dea 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -1172,8 +1172,6 @@ late_initcall(resctrl_arch_late_init);
 
 static void __exit resctrl_arch_exit(void)
 {
-	intel_aet_exit();
-
 	cpuhp_remove_state(rdt_online);
 
 	resctrl_exit();
diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index 2d5766904fd8..c6c213d28275 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -328,10 +328,6 @@ static bool get_events(void)
 	return ret;
 }
 
-void __exit intel_aet_exit(void)
-{
-}
-
 static bool get_pmt_references(void)
 {
 	get_feature = symbol_get(intel_pmt_get_regions_by_feature);
-- 
2.53.0
[PATCH v3 7/7] x86/resctrl: Downgrade dependency of AET on INTEL_PMT
Posted by Tony Luck 6 days, 15 hours ago
The INTEL_AET code now using symbol_get() to access the INTEL_PMT
enumeration functions so no requirement that they be built-in to
the base kernel.

Update the Kconfig dependencies.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e2df1b147184..1a60da62da33 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -542,7 +542,7 @@ config X86_CPU_RESCTRL
 
 config X86_CPU_RESCTRL_INTEL_AET
 	bool "Intel Application Energy Telemetry"
-	depends on X86_64 && X86_CPU_RESCTRL && CPU_SUP_INTEL && INTEL_PMT_TELEMETRY=y && INTEL_TPMI=y
+	depends on X86_64 && X86_CPU_RESCTRL && CPU_SUP_INTEL && INTEL_PMT_TELEMETRY != n && INTEL_TPMI != n
 	help
 	  Enable per-RMID telemetry events in resctrl.
 
-- 
2.53.0
[PATCH v2] x86/resctrl: Allow AET to use PMT/TPMI as loadable modules
Posted by Tony Luck 1 week, 1 day ago
The resctrl subsystem is always built into the base kernel. Currently,
enumerating Application Event Tracing (AET) features requires functions
from INTEL_PMT_TELEMETRY and INTEL_TPMI. Because resctrl makes direct calls
to these functions, it enforces a strict dependency requiring both PMT and
TPMI to be built-in.

This is overly restrictive. Use the symbol_get() mechanism to allow resctrl
to resolve these symbols at runtime, whether they reside in the base kernel
or in loadable modules.

Update the exports for intel_pmt_get_regions_by_feature() and
intel_pmt_put_feature_group() to be accessible via symbol_get(). Replace
the direct calls with indirect calls using function pointers.

Finally, adjust the Kconfig dependencies to allow X86_CPU_RESCTRL_INTEL_AET
to be enabled even when INTEL_PMT_TELEMETRY and INTEL_TPMI are configured
as modules.

Signed-off-by: Tony Luck <tony.luck@intel.com>
AI-Review-of-v1: https://sashiko.dev/#/patchset/20260323163452.25044-1-tony.luck%40intel.com
---

Sashiko reported three issues. I fixed the one real issue.

1) If the INTEL_PMT_TELEMETRY module is unloaded it will unmap the MMIO register
space and resctrl will page fault reading AET counters.

This patch fixes this issue by delaying symbol_put() calls to intel_aet_exit()
which holds module reference counts on INTEL_PMT_TELEMETRY.

2) Sashiko said that my change to Kconfig wouldn't work if INTEL_PMT_TELEMETRY
was configured as a module. It thought that Kconfig would try to force
X86_CPU_RESCTRL_INTEL_AET to be a module, which violates its "bool" definition.

By experiment, Sashiko is wrong about this. No changes for this.

3) In some deeper call analysis Sashiko pointed out that intel_aet_get_events()
is only called once on first mount. If the INTEL_PMT module isn't loaded when
that happens, AET won't be enabled and the user can't fix this by loading
the module.

By default (on Fedora) the INTEL_PMT_TELEMETRY module is loaded. So a user
could shoot themselves in the foot by unloading the module and then mounting
resctrl. But this doesn't seem worth defending against.

 arch/x86/kernel/cpu/resctrl/intel_aet.c    | 22 +++++++++++++++++++---
 drivers/platform/x86/intel/pmt/telemetry.c |  4 ++--
 arch/x86/Kconfig                           |  2 +-
 3 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index 6fdb2cfa445a..881e3914578c 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -23,6 +23,7 @@
 #include <linux/intel_vsec.h>
 #include <linux/io.h>
 #include <linux/minmax.h>
+#include <linux/module.h>
 #include <linux/printk.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
@@ -380,6 +381,9 @@ static enum pmt_feature_id lookup_pfid(const char *pfname)
 	return FEATURE_INVALID;
 }
 
+static struct pmt_feature_group *(*get_feature)(enum pmt_feature_id id);
+static void (*put_feature)(struct pmt_feature_group *p);
+
 /*
  * Request a copy of struct pmt_feature_group for each event group. If there is
  * one, the returned structure has an array of telemetry_region structures,
@@ -398,16 +402,25 @@ bool intel_aet_get_events(void)
 	struct event_group **peg;
 	bool ret = false;
 
+	get_feature = symbol_get(intel_pmt_get_regions_by_feature);
+	if (!get_feature)
+		return ret;
+	put_feature = symbol_get(intel_pmt_put_feature_group);
+	if (!put_feature) {
+		symbol_put(intel_pmt_get_regions_by_feature);
+		return ret;
+	}
+
 	for_each_event_group(peg) {
 		pfid = lookup_pfid((*peg)->pfname);
-		p = intel_pmt_get_regions_by_feature(pfid);
+		p = get_feature(pfid);
 		if (IS_ERR_OR_NULL(p))
 			continue;
 		if (enable_events(*peg, p)) {
 			(*peg)->pfg = p;
 			ret = true;
 		} else {
-			intel_pmt_put_feature_group(p);
+			put_feature(p);
 		}
 	}
 
@@ -420,10 +433,13 @@ void __exit intel_aet_exit(void)
 
 	for_each_event_group(peg) {
 		if ((*peg)->pfg) {
-			intel_pmt_put_feature_group((*peg)->pfg);
+			put_feature((*peg)->pfg);
 			(*peg)->pfg = NULL;
 		}
 	}
+	symbol_put(intel_pmt_get_regions_by_feature);
+	symbol_put(intel_pmt_put_feature_group);
+
 }
 
 #define DATA_VALID	BIT_ULL(63)
diff --git a/drivers/platform/x86/intel/pmt/telemetry.c b/drivers/platform/x86/intel/pmt/telemetry.c
index a52803bfe124..4504fb9fd83c 100644
--- a/drivers/platform/x86/intel/pmt/telemetry.c
+++ b/drivers/platform/x86/intel/pmt/telemetry.c
@@ -287,13 +287,13 @@ struct pmt_feature_group *intel_pmt_get_regions_by_feature(enum pmt_feature_id i
 
 	return no_free_ptr(feature_group);
 }
-EXPORT_SYMBOL(intel_pmt_get_regions_by_feature);
+EXPORT_SYMBOL_GPL(intel_pmt_get_regions_by_feature);
 
 void intel_pmt_put_feature_group(struct pmt_feature_group *feature_group)
 {
 	kref_put(&feature_group->kref, pmt_feature_group_release);
 }
-EXPORT_SYMBOL(intel_pmt_put_feature_group);
+EXPORT_SYMBOL_GPL(intel_pmt_put_feature_group);
 
 int pmt_telem_read(struct telem_endpoint *ep, u32 id, u64 *data, u32 count)
 {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e2df1b147184..fb3e40fc1e03 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -542,7 +542,7 @@ config X86_CPU_RESCTRL
 
 config X86_CPU_RESCTRL_INTEL_AET
 	bool "Intel Application Energy Telemetry"
-	depends on X86_64 && X86_CPU_RESCTRL && CPU_SUP_INTEL && INTEL_PMT_TELEMETRY=y && INTEL_TPMI=y
+	depends on X86_64 && X86_CPU_RESCTRL && CPU_SUP_INTEL && INTEL_PMT_TELEMETRY && INTEL_TPMI
 	help
 	  Enable per-RMID telemetry events in resctrl.
 
-- 
2.53.0