Actively push out any task running on a paravirt CPU. Since the task is
running on the CPU need to spawn a stopper thread and push the task out.
If task is sleeping, when it wakes up it is expected to move out. In
case it still chooses a paravirt CPU, next tick will move it out.
However, if the task in pinned only to paravirt CPUs, it will continue
running there.
Though code is almost same as __balance_push_cpu_stop and quite close to
push_cpu_stop, it provides a cleaner implementation w.r.t to PARAVIRT
config.
Add push_task_work_done flag to protect pv_push_task_work buffer. This has
been placed at the empty slot available considering 64/128 byte
cacheline.
This currently works only FAIR and RT.
Signed-off-by: Shrikanth Hegde <sshegde@linux.ibm.com>
---
kernel/sched/core.c | 84 ++++++++++++++++++++++++++++++++++++++++++++
kernel/sched/sched.h | 9 ++++-
2 files changed, 92 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 279b0dd72b5e..1f9df5b8a3a2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5629,6 +5629,10 @@ void sched_tick(void)
sched_clock_tick();
+ /* push the current task out if a paravirt CPU */
+ if (is_cpu_paravirt(cpu))
+ push_current_from_paravirt_cpu(rq);
+
rq_lock(rq, &rf);
donor = rq->donor;
@@ -10977,4 +10981,84 @@ void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx)
struct cpumask __cpu_paravirt_mask __read_mostly;
EXPORT_SYMBOL(__cpu_paravirt_mask);
DEFINE_STATIC_KEY_FALSE(cpu_paravirt_push_tasks);
+
+static DEFINE_PER_CPU(struct cpu_stop_work, pv_push_task_work);
+
+static int paravirt_push_cpu_stop(void *arg)
+{
+ struct task_struct *p = arg;
+ struct rq *rq = this_rq();
+ struct rq_flags rf;
+ int cpu;
+
+ raw_spin_lock_irq(&p->pi_lock);
+ rq_lock(rq, &rf);
+ rq->push_task_work_done = 0;
+
+ update_rq_clock(rq);
+
+ if (task_rq(p) == rq && task_on_rq_queued(p)) {
+ cpu = select_fallback_rq(rq->cpu, p);
+ rq = __migrate_task(rq, &rf, p, cpu);
+ }
+
+ rq_unlock(rq, &rf);
+ raw_spin_unlock_irq(&p->pi_lock);
+ put_task_struct(p);
+
+ return 0;
+}
+
+/* A CPU is marked as Paravirt when there is contention for underlying
+ * physical CPU and using this CPU will lead to hypervisor preemptions.
+ * It is better not to use this CPU.
+ *
+ * In case any task is scheduled on such CPU, move it out. In
+ * select_fallback_rq a non paravirt CPU will be chosen and henceforth
+ * task shouldn't come back to this CPU
+ */
+void push_current_from_paravirt_cpu(struct rq *rq)
+{
+ struct task_struct *push_task = rq->curr;
+ unsigned long flags;
+ struct rq_flags rf;
+
+ if (!is_cpu_paravirt(rq->cpu))
+ return;
+
+ /* Idle task can't be pused out */
+ if (rq->curr == rq->idle)
+ return;
+
+ /* Do for only SCHED_NORMAL AND RT for now */
+ if (push_task->sched_class != &fair_sched_class &&
+ push_task->sched_class != &rt_sched_class)
+ return;
+
+ if (kthread_is_per_cpu(push_task) ||
+ is_migration_disabled(push_task))
+ return;
+
+ /* Is it affine to only paravirt cpus? */
+ if (cpumask_subset(push_task->cpus_ptr, cpu_paravirt_mask))
+ return;
+
+ /* There is already a stopper thread for this. Dont race with it */
+ if (rq->push_task_work_done == 1)
+ return;
+
+ local_irq_save(flags);
+ preempt_disable();
+
+ get_task_struct(push_task);
+
+ rq_lock(rq, &rf);
+ rq->push_task_work_done = 1;
+ rq_unlock(rq, &rf);
+
+ stop_one_cpu_nowait(rq->cpu, paravirt_push_cpu_stop, push_task,
+ this_cpu_ptr(&pv_push_task_work));
+ preempt_enable();
+ local_irq_restore(flags);
+}
#endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8f9991453d36..5077a32593da 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1187,7 +1187,9 @@ struct rq {
unsigned char nohz_idle_balance;
unsigned char idle_balance;
-
+#ifdef CONFIG_PARAVIRT
+ bool push_task_work_done;
+#endif
unsigned long misfit_task_load;
/* For active balancing */
@@ -3890,11 +3892,16 @@ static inline bool is_cpu_paravirt(int cpu)
return false;
}
+
+void push_current_from_paravirt_cpu(struct rq *rq);
+
#else /* !CONFIG_PARAVIRT */
static inline bool is_cpu_paravirt(int cpu)
{
return false;
}
+
+static inline void push_current_from_paravirt_cpu(struct rq *rq) { }
#endif /* !CONFIG_PARAVIRT */
#endif /* _KERNEL_SCHED_SCHED_H */
--
2.47.3