[RFC PATCH v3 09/10] powerpc: Add debug file for set/unset paravirt CPUs

Shrikanth Hegde posted 10 patches 5 hours ago
[RFC PATCH v3 09/10] powerpc: Add debug file for set/unset paravirt CPUs
Posted by Shrikanth Hegde 5 hours ago
PowerPC systems can be deployed shared processor Logical Partitions (SPLPAR)
aka Shared VM. These configurations allows overcommit of CPU resource.
i.e more virtual CPUs than physical CPUs.

When there is contention of physical CPUs in such cases arch needs to
have a mechanism to set the CPUs as paravirt. It also needs to clear
them when the contention goes away.

Ideal would be get the hint from hypervisor. It would be more accurate
since it has knowledge of all SPLPARs deployed in the system.

Till the hint from underlying hypervisor arrives, another idea is to
approximate the hint from steal time. There are some works ongoing, but
not there yet due to challenges revolving around limits and
convergence.

Till that happens, there is a need for debugfs file which could be used to
set/unset the hint. The interface currently is number starting from which
CPUs will marked as paravirt. It could be changed to one the takes a
cpumask(list of CPUs) in future.

============== Usage Example ============

Lets say 720 CPU system. It is observing 20% steal time. It is evident
that one should probably only 576 CPUs. Do,

echo 576 > /sys/kernel/debug/powerpc/vp_manual_hint
cat /sys/devices/system/cpu/paravirt
576-719

This marks CPUs 576-719 as paravirt and move the tasks out of these
CPUs. To unset, echo total number of CPUs(720) or higher value.

echo 720 > /sys/kernel/debug/powerpc/vp_manual_hint
cat /sys/devices/system/cpu/paravirt

Signed-off-by: Shrikanth Hegde <sshegde@linux.ibm.com>
---
 arch/powerpc/include/asm/paravirt.h |  1 +
 arch/powerpc/kernel/smp.c           | 58 +++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
index b78b82d66057..8854da8e532c 100644
--- a/arch/powerpc/include/asm/paravirt.h
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -16,6 +16,7 @@
 #include <asm/cputhreads.h>
 
 DECLARE_STATIC_KEY_FALSE(shared_processor);
+DECLARE_STATIC_KEY_FALSE(cpu_paravirt_push_tasks);
 
 static inline bool is_shared_processor(void)
 {
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 68edb66c2964..1c0d59d353bd 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -64,6 +64,7 @@
 #include <asm/systemcfg.h>
 
 #include <trace/events/ipi.h>
+#include <linux/debugfs.h>
 
 #ifdef DEBUG
 #include <asm/udbg.h>
@@ -82,6 +83,7 @@ bool has_big_cores __ro_after_init;
 bool coregroup_enabled __ro_after_init;
 bool thread_group_shares_l2 __ro_after_init;
 bool thread_group_shares_l3 __ro_after_init;
+static int vp_manual_hint = NR_CPUS;
 
 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
@@ -1717,6 +1719,7 @@ static void __init build_sched_topology(void)
 	BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1);
 
 	set_sched_topology(powerpc_topology);
+	vp_manual_hint = num_present_cpus();
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
@@ -1797,4 +1800,59 @@ void __noreturn arch_cpu_idle_dead(void)
 	start_secondary_resume();
 }
 
+#ifdef CONFIG_PARAVIRT
+/*
+ * sysfs hint to mark CPUs as paravirt. This will help in restricting
+ * the workload to specified number of CPUs.
+ * For example, On 720 CPU system 576 > vp_manual_hint means, workload will
+ * run on 0-575 CPUs. Tasks will move out of 576-719 CPUs.
+ */
+
+static int pv_vp_manual_hint_set(void *data, u64 val)
+{
+	int cpu;
+	int online_cpus = num_online_cpus();
+
+	if (val == vp_manual_hint)
+		return 0;
+
+	if (val == 0 || val > online_cpus)
+		val = online_cpus;
+
+	vp_manual_hint = val;
+
+	if (vp_manual_hint < online_cpus)
+		static_branch_enable(&cpu_paravirt_push_tasks);
+	else
+		static_branch_disable(&cpu_paravirt_push_tasks);
+
+	for_each_online_cpu(cpu) {
+		if (cpu >= vp_manual_hint)
+			set_cpu_paravirt(cpu, true);
+		else
+			set_cpu_paravirt(cpu, false);
+	}
+	return 0;
+}
+
+static int pv_vp_manual_hint_get(void *data, u64 *val)
+{
+	*val = vp_manual_hint;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_vp_manual_hint, pv_vp_manual_hint_get,
+			pv_vp_manual_hint_set, "%llu\n");
+
+static __init int paravirt_debugfs_init(void)
+{
+	if (is_shared_processor())
+		debugfs_create_file("vp_manual_hint", 0600, arch_debugfs_dir,
+				    NULL, &fops_pv_vp_manual_hint);
+	return 0;
+}
+
+device_initcall(paravirt_debugfs_init)
+#endif
+
 #endif
-- 
2.47.3