[PATCH v2] x86/resctrl: Allow AET to use PMT/TPMI as loadable modules

Tony Luck posted 1 patch 1 week, 1 day ago
arch/x86/kernel/cpu/resctrl/intel_aet.c    | 22 +++++++++++++++++++---
drivers/platform/x86/intel/pmt/telemetry.c |  4 ++--
arch/x86/Kconfig                           |  2 +-
3 files changed, 22 insertions(+), 6 deletions(-)
[PATCH v2] x86/resctrl: Allow AET to use PMT/TPMI as loadable modules
Posted by Tony Luck 1 week, 1 day ago
The resctrl subsystem is always built into the base kernel. Currently,
enumerating Application Event Tracing (AET) features requires functions
from INTEL_PMT_TELEMETRY and INTEL_TPMI. Because resctrl makes direct calls
to these functions, it enforces a strict dependency requiring both PMT and
TPMI to be built-in.

This is overly restrictive. Use the symbol_get() mechanism to allow resctrl
to resolve these symbols at runtime, whether they reside in the base kernel
or in loadable modules.

Update the exports for intel_pmt_get_regions_by_feature() and
intel_pmt_put_feature_group() to be accessible via symbol_get(). Replace
the direct calls with indirect calls using function pointers.

Finally, adjust the Kconfig dependencies to allow X86_CPU_RESCTRL_INTEL_AET
to be enabled even when INTEL_PMT_TELEMETRY and INTEL_TPMI are configured
as modules.

Signed-off-by: Tony Luck <tony.luck@intel.com>
AI-Review-of-v1: https://sashiko.dev/#/patchset/20260323163452.25044-1-tony.luck%40intel.com
---

Sashiko reported three issues. I fixed the one real issue.

1) If the INTEL_PMT_TELEMETRY module is unloaded it will unmap the MMIO register
space and resctrl will page fault reading AET counters.

This patch fixes this issue by delaying symbol_put() calls to intel_aet_exit()
which holds module reference counts on INTEL_PMT_TELEMETRY.

2) Sashiko said that my change to Kconfig wouldn't work if INTEL_PMT_TELEMETRY
was configured as a module. It thought that Kconfig would try to force
X86_CPU_RESCTRL_INTEL_AET to be a module, which violates its "bool" definition.

By experiment, Sashiko is wrong about this. No changes for this.

3) In some deeper call analysis Sashiko pointed out that intel_aet_get_events()
is only called once on first mount. If the INTEL_PMT module isn't loaded when
that happens, AET won't be enabled and the user can't fix this by loading
the module.

By default (on Fedora) the INTEL_PMT_TELEMETRY module is loaded. So a user
could shoot themselves in the foot by unloading the module and then mounting
resctrl. But this doesn't seem worth defending against.

 arch/x86/kernel/cpu/resctrl/intel_aet.c    | 22 +++++++++++++++++++---
 drivers/platform/x86/intel/pmt/telemetry.c |  4 ++--
 arch/x86/Kconfig                           |  2 +-
 3 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index 6fdb2cfa445a..881e3914578c 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -23,6 +23,7 @@
 #include <linux/intel_vsec.h>
 #include <linux/io.h>
 #include <linux/minmax.h>
+#include <linux/module.h>
 #include <linux/printk.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
@@ -380,6 +381,9 @@ static enum pmt_feature_id lookup_pfid(const char *pfname)
 	return FEATURE_INVALID;
 }
 
+static struct pmt_feature_group *(*get_feature)(enum pmt_feature_id id);
+static void (*put_feature)(struct pmt_feature_group *p);
+
 /*
  * Request a copy of struct pmt_feature_group for each event group. If there is
  * one, the returned structure has an array of telemetry_region structures,
@@ -398,16 +402,25 @@ bool intel_aet_get_events(void)
 	struct event_group **peg;
 	bool ret = false;
 
+	get_feature = symbol_get(intel_pmt_get_regions_by_feature);
+	if (!get_feature)
+		return ret;
+	put_feature = symbol_get(intel_pmt_put_feature_group);
+	if (!put_feature) {
+		symbol_put(intel_pmt_get_regions_by_feature);
+		return ret;
+	}
+
 	for_each_event_group(peg) {
 		pfid = lookup_pfid((*peg)->pfname);
-		p = intel_pmt_get_regions_by_feature(pfid);
+		p = get_feature(pfid);
 		if (IS_ERR_OR_NULL(p))
 			continue;
 		if (enable_events(*peg, p)) {
 			(*peg)->pfg = p;
 			ret = true;
 		} else {
-			intel_pmt_put_feature_group(p);
+			put_feature(p);
 		}
 	}
 
@@ -420,10 +433,13 @@ void __exit intel_aet_exit(void)
 
 	for_each_event_group(peg) {
 		if ((*peg)->pfg) {
-			intel_pmt_put_feature_group((*peg)->pfg);
+			put_feature((*peg)->pfg);
 			(*peg)->pfg = NULL;
 		}
 	}
+	symbol_put(intel_pmt_get_regions_by_feature);
+	symbol_put(intel_pmt_put_feature_group);
+
 }
 
 #define DATA_VALID	BIT_ULL(63)
diff --git a/drivers/platform/x86/intel/pmt/telemetry.c b/drivers/platform/x86/intel/pmt/telemetry.c
index a52803bfe124..4504fb9fd83c 100644
--- a/drivers/platform/x86/intel/pmt/telemetry.c
+++ b/drivers/platform/x86/intel/pmt/telemetry.c
@@ -287,13 +287,13 @@ struct pmt_feature_group *intel_pmt_get_regions_by_feature(enum pmt_feature_id i
 
 	return no_free_ptr(feature_group);
 }
-EXPORT_SYMBOL(intel_pmt_get_regions_by_feature);
+EXPORT_SYMBOL_GPL(intel_pmt_get_regions_by_feature);
 
 void intel_pmt_put_feature_group(struct pmt_feature_group *feature_group)
 {
 	kref_put(&feature_group->kref, pmt_feature_group_release);
 }
-EXPORT_SYMBOL(intel_pmt_put_feature_group);
+EXPORT_SYMBOL_GPL(intel_pmt_put_feature_group);
 
 int pmt_telem_read(struct telem_endpoint *ep, u32 id, u64 *data, u32 count)
 {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e2df1b147184..fb3e40fc1e03 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -542,7 +542,7 @@ config X86_CPU_RESCTRL
 
 config X86_CPU_RESCTRL_INTEL_AET
 	bool "Intel Application Energy Telemetry"
-	depends on X86_64 && X86_CPU_RESCTRL && CPU_SUP_INTEL && INTEL_PMT_TELEMETRY=y && INTEL_TPMI=y
+	depends on X86_64 && X86_CPU_RESCTRL && CPU_SUP_INTEL && INTEL_PMT_TELEMETRY && INTEL_TPMI
 	help
 	  Enable per-RMID telemetry events in resctrl.
 
-- 
2.53.0