paravirt CPUs and push task for less vCPU preemption

[RFC PATCH v3 07/10] sched/core: Push current task from paravirt CPU
Posted by Shrikanth Hegde 5 hours ago
Actively push out any task running on a paravirt CPU. Since the task is
running on the CPU need to spawn a stopper thread and push the task out.

If task is sleeping, when it wakes up it is expected to move out. In
case it still chooses a paravirt CPU, next tick will move it out.
However, if the task in pinned only to paravirt CPUs, it will continue
running there.

Though code is almost same as __balance_push_cpu_stop and quite close to
push_cpu_stop, it provides a cleaner implementation w.r.t to PARAVIRT
config.

Add push_task_work_done flag to protect pv_push_task_work buffer. This has
been placed at the empty slot available considering 64/128 byte
cacheline.

This currently works only FAIR and RT.

Signed-off-by: Shrikanth Hegde <sshegde@linux.ibm.com>
---
 kernel/sched/core.c  | 84 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h |  9 ++++-
 2 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 279b0dd72b5e..1f9df5b8a3a2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5629,6 +5629,10 @@ void sched_tick(void)
 
 	sched_clock_tick();
 
+	/* push the current task out if a paravirt CPU */
+	if (is_cpu_paravirt(cpu))
+		push_current_from_paravirt_cpu(rq);
+
 	rq_lock(rq, &rf);
 	donor = rq->donor;
 
@@ -10977,4 +10981,84 @@ void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx)
 struct cpumask __cpu_paravirt_mask __read_mostly;
 EXPORT_SYMBOL(__cpu_paravirt_mask);
 DEFINE_STATIC_KEY_FALSE(cpu_paravirt_push_tasks);
+
+static DEFINE_PER_CPU(struct cpu_stop_work, pv_push_task_work);
+
+static int paravirt_push_cpu_stop(void *arg)
+{
+	struct task_struct *p = arg;
+	struct rq *rq = this_rq();
+	struct rq_flags rf;
+	int cpu;
+
+	raw_spin_lock_irq(&p->pi_lock);
+	rq_lock(rq, &rf);
+	rq->push_task_work_done = 0;
+
+	update_rq_clock(rq);
+
+	if (task_rq(p) == rq && task_on_rq_queued(p)) {
+		cpu = select_fallback_rq(rq->cpu, p);
+		rq = __migrate_task(rq, &rf, p, cpu);
+	}
+
+	rq_unlock(rq, &rf);
+	raw_spin_unlock_irq(&p->pi_lock);
+	put_task_struct(p);
+
+	return 0;
+}
+
+/* A CPU is marked as Paravirt when there is contention for underlying
+ * physical CPU and using this CPU will lead to hypervisor preemptions.
+ * It is better not to use this CPU.
+ *
+ * In case any task is scheduled on such CPU, move it out. In
+ * select_fallback_rq a non paravirt CPU will be chosen and henceforth
+ * task shouldn't come back to this CPU
+ */
+void push_current_from_paravirt_cpu(struct rq *rq)
+{
+	struct task_struct *push_task = rq->curr;
+	unsigned long flags;
+	struct rq_flags rf;
+
+	if (!is_cpu_paravirt(rq->cpu))
+		return;
+
+	/* Idle task can't be pused out */
+	if (rq->curr == rq->idle)
+		return;
+
+	/* Do for only SCHED_NORMAL AND RT for now */
+	if (push_task->sched_class != &fair_sched_class &&
+	    push_task->sched_class != &rt_sched_class)
+		return;
+
+	if (kthread_is_per_cpu(push_task) ||
+	    is_migration_disabled(push_task))
+		return;
+
+	/* Is it affine to only paravirt cpus? */
+	if (cpumask_subset(push_task->cpus_ptr, cpu_paravirt_mask))
+		return;
+
+	/* There is already a stopper thread for this. Dont race with it */
+	if (rq->push_task_work_done == 1)
+		return;
+
+	local_irq_save(flags);
+	preempt_disable();
+
+	get_task_struct(push_task);
+
+	rq_lock(rq, &rf);
+	rq->push_task_work_done = 1;
+	rq_unlock(rq, &rf);
+
+	stop_one_cpu_nowait(rq->cpu, paravirt_push_cpu_stop, push_task,
+			    this_cpu_ptr(&pv_push_task_work));
+	preempt_enable();
+	local_irq_restore(flags);
+}
 #endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8f9991453d36..5077a32593da 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1187,7 +1187,9 @@ struct rq {
 
 	unsigned char		nohz_idle_balance;
 	unsigned char		idle_balance;
-
+#ifdef CONFIG_PARAVIRT
+	bool			push_task_work_done;
+#endif
 	unsigned long		misfit_task_load;
 
 	/* For active balancing */
@@ -3890,11 +3892,16 @@ static inline bool is_cpu_paravirt(int cpu)
 
 	return false;
 }
+
+void push_current_from_paravirt_cpu(struct rq *rq);
+
 #else	/* !CONFIG_PARAVIRT */
 static inline bool is_cpu_paravirt(int cpu)
 {
 	return false;
 }
+
+static inline void push_current_from_paravirt_cpu(struct rq *rq) { }
 #endif	/* !CONFIG_PARAVIRT */
 
 #endif /* _KERNEL_SCHED_SCHED_H */
-- 
2.47.3