[PATCH 2/4] thermal: intel: Enable Directed Package-level Thermal Interrupts

Ricardo Neri posted 4 patches 1 month ago
[PATCH 2/4] thermal: intel: Enable Directed Package-level Thermal Interrupts
Posted by Ricardo Neri 1 month ago
Package-level thermal interrupts are broadcast to all online CPUs within a
package, even though only one CPU needs to service them. This results in
unnecessary wakeups, lock contention, and corresponding performance and
power-efficiency penalties.

When supported by hardware, a CPU requests to receive directed package-
level thermal interrupts by setting bit 25 in IA32_THERM_INTERRUPT. The
operating system must then verify that hardware has acknowledged this
request by checking bit 25 in IA32_PACKAGE_THERM_STATUS.

Enable directed package-level thermal interrupts on one CPU per package.
Use the CPU hotplug infrastructure. Keep track of the CPUs handling
package-level interrupts with an array.

If the handling CPU goes offline, select a new CPU. Temporarily enable
directed interrupts on both the current and new CPU until hardware
acknowledges the new selection, then disable them on the outgoing CPU.

Systems without directed-interrupt support retain the current behavior:
all online CPUs in a package receive the interrupt and existing handlers
manage any resulting contention. Also fall back to this behavior if the
directed-interrupt acknowledgment fails during boot.

Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
---
 drivers/thermal/intel/therm_throt.c | 178 +++++++++++++++++++++++++++++++++++-
 1 file changed, 177 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c
index 44fa4dd15dd1..456f2ac10e0c 100644
--- a/drivers/thermal/intel/therm_throt.c
+++ b/drivers/thermal/intel/therm_throt.c
@@ -20,6 +20,7 @@
 #include <linux/kernel.h>
 #include <linux/percpu.h>
 #include <linux/export.h>
+#include <linux/delay.h>
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/smp.h>
@@ -244,7 +245,7 @@ static void thermal_intr_init_pkg_clear_mask(void)
 	 * IA32_PACKAGE_THERM_STATUS.
 	 */
 
-	/* All bits except BIT 26 depend on CPUID.06H: EAX[6] = 1 */
+	/* All bits except BITs 25 and 26 depend on CPUID.06H: EAX[6] = 1 */
 	if (boot_cpu_has(X86_FEATURE_PTS))
 		therm_intr_pkg_clear_mask = (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11));
 
@@ -254,6 +255,13 @@ static void thermal_intr_init_pkg_clear_mask(void)
 	 */
 	if (boot_cpu_has(X86_FEATURE_HFI))
 		therm_intr_pkg_clear_mask |= BIT(26);
+
+	/*
+	 * Intel SDM Volume 2A: Thermal and Power Management Leaf
+	 * Bit 25: CPUID.06H: EAX[24] = 1
+	 */
+	if (boot_cpu_has(X86_FEATURE_DIRECTED_PKG_THRM_INTR))
+		therm_intr_pkg_clear_mask |= BIT(25);
 }
 
 /*
@@ -524,6 +532,151 @@ static void thermal_throttle_remove_dev(struct device *dev)
 	sysfs_remove_group(&dev->kobj, &thermal_attr_group);
 }
 
+static int check_directed_thermal_pkg_intr_ack(void)
+{
+	unsigned int count = 15000;
+	u64 msr_val;
+
+	/*
+	 * Hardware is known to acknowledge the setting of the directed
+	 * interrupt in 10ms or less. Wait for 15ms.
+	 */
+	do {
+		rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
+		udelay(1);
+	} while (!(msr_val & PACKAGE_THERM_STATUS_DIRECTED_INTR_ACK) && --count);
+
+	if (!count)
+		return -ETIMEDOUT;
+
+	thermal_clear_package_intr_status(PACKAGE_LEVEL,
+					  PACKAGE_THERM_STATUS_DIRECTED_INTR_ACK);
+
+	return 0;
+}
+
+static void config_directed_thermal_pkg_intr(void *info)
+{
+	bool enable = *((bool *)info);
+	u64 msr_val;
+
+	rdmsrl(MSR_IA32_THERM_INTERRUPT, msr_val);
+
+	if (enable)
+		msr_val |= THERM_DIRECTED_INTR_ENABLE;
+	else
+		msr_val &= ~THERM_DIRECTED_INTR_ENABLE;
+
+	wrmsrl(MSR_IA32_THERM_INTERRUPT, msr_val);
+}
+
+/* Only accessed from CPU hotplug operations. No extra locking needed. */
+static unsigned int *directed_intr_handler_cpus;
+
+static bool directed_thermal_pkg_intr_supported(unsigned int cpu)
+{
+	if (!boot_cpu_has(X86_FEATURE_DIRECTED_PKG_THRM_INTR))
+		return false;
+
+	if (!directed_intr_handler_cpus)
+		return false;
+
+	return true;
+}
+
+static void enable_directed_thermal_pkg_intr(unsigned int cpu)
+{
+	bool enable = true;
+	u16 pkg_id;
+
+	if (!directed_thermal_pkg_intr_supported(cpu))
+		return;
+
+	pkg_id = topology_logical_package_id(cpu);
+	if (pkg_id >= topology_max_packages())
+		return;
+
+	/* Another CPU in this package already handles the directed interrupt. */
+	if (directed_intr_handler_cpus[pkg_id] != nr_cpu_ids)
+		return;
+
+	thermal_clear_package_intr_status(PACKAGE_LEVEL,
+					  PACKAGE_THERM_STATUS_DIRECTED_INTR_ACK);
+
+	config_directed_thermal_pkg_intr(&enable);
+	if (!check_directed_thermal_pkg_intr_ack()) {
+		directed_intr_handler_cpus[pkg_id] = cpu;
+		return;
+	}
+
+	/* Failed to enable the directed package interrupt. Roll back. */
+	enable = false;
+	config_directed_thermal_pkg_intr(&enable);
+
+	/*
+	 * This function is first called from the CPU0 hotplug callback during
+	 * boot. Disable the directed package interrupt. All CPUs in a package
+	 * will receive the package-level interrupt.
+	 */
+	if (cpu)
+		return;
+
+	pr_info_once("CPU0: Failed to enable directed package-level thermal interrupt\n");
+	kfree(directed_intr_handler_cpus);
+	directed_intr_handler_cpus = NULL;
+}
+
+static void disable_directed_thermal_pkg_intr(unsigned int cpu)
+{
+	unsigned int new_cpu;
+	bool enable;
+	u16 pkg_id;
+
+	if (!directed_thermal_pkg_intr_supported(cpu))
+		return;
+
+	pkg_id = topology_logical_package_id(cpu);
+	if (pkg_id >= topology_max_packages())
+		return;
+
+	/* Not the CPU handling the directed interrupt */
+	if (directed_intr_handler_cpus[pkg_id] != cpu)
+		return;
+
+	/* Redirect the interrupt to another online CPU in the package. */
+	new_cpu = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+	if (new_cpu < nr_cpu_ids) {
+		enable = true;
+		thermal_clear_package_intr_status(PACKAGE_LEVEL,
+						  PACKAGE_THERM_STATUS_DIRECTED_INTR_ACK);
+
+		smp_call_function_single(new_cpu, config_directed_thermal_pkg_intr,
+					 &enable, true);
+	}
+
+	/*
+	 * If a new CPU was found, check for acknowledgment. If hardware did not
+	 * acknowledge it, disable the redirection of the interrupt on the new CPU.
+	 * Since no other CPU is configured to receive the package-level interrupt,
+	 * all CPUs in the package will receive it.
+	 */
+	enable = false;
+	if (new_cpu < nr_cpu_ids && check_directed_thermal_pkg_intr_ack()) {
+		smp_call_function_single(new_cpu, config_directed_thermal_pkg_intr,
+					 &enable, true);
+		new_cpu = nr_cpu_ids;
+	}
+
+	/*
+	 * Disable the interrupt on this CPU. Hardware may acknowledge the
+	 * request, but we do not care in this case. We do need to clear the
+	 * ack bit when enabling the interrupt in another CPU.
+	 */
+	config_directed_thermal_pkg_intr(&enable);
+
+	directed_intr_handler_cpus[pkg_id] = new_cpu;
+}
+
 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
 static int thermal_throttle_online(unsigned int cpu)
 {
@@ -548,6 +701,8 @@ static int thermal_throttle_online(unsigned int cpu)
 	l = apic_read(APIC_LVTTHMR);
 	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
 
+	enable_directed_thermal_pkg_intr(cpu);
+
 	return thermal_throttle_add_dev(dev, cpu);
 }
 
@@ -557,6 +712,8 @@ static int thermal_throttle_offline(unsigned int cpu)
 	struct device *dev = get_cpu_device(cpu);
 	u32 l;
 
+	disable_directed_thermal_pkg_intr(cpu);
+
 	/* Mask the thermal vector before draining evtl. pending work */
 	l = apic_read(APIC_LVTTHMR);
 	apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
@@ -573,6 +730,23 @@ static int thermal_throttle_offline(unsigned int cpu)
 	return 0;
 }
 
+static __init void init_directed_pkg_intr(void)
+{
+	int i;
+
+	if (!boot_cpu_has(X86_FEATURE_DIRECTED_PKG_THRM_INTR))
+		return;
+
+	directed_intr_handler_cpus = kmalloc_array(topology_max_packages(),
+						   sizeof(*directed_intr_handler_cpus),
+						   GFP_KERNEL);
+	if (!directed_intr_handler_cpus)
+		return;
+
+	for (i = 0; i < topology_max_packages(); i++)
+		directed_intr_handler_cpus[i] = nr_cpu_ids;
+}
+
 static __init int thermal_throttle_init_device(void)
 {
 	int ret;
@@ -580,6 +754,8 @@ static __init int thermal_throttle_init_device(void)
 	if (!atomic_read(&therm_throt_en))
 		return 0;
 
+	init_directed_pkg_intr();
+
 	intel_hfi_init();
 
 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",

-- 
2.43.0