The generic entry code try to reschedule every time when the kernel
mode non-NMI exception return. At the moment, arm64 only reschedule every
time when EL1 irq exception return;
In preparation for moving arm64 over to the generic entry code, move
arm64_preempt_schedule_irq() into exit_to_kernel_mode(), so not
only EL1 irq but also all EL1 non-NMI exception return, there is a chance
to reschedule. And only if irqs are enabled when the exception trapped,
there may be a chance to reschedule after the exceptions have been handled,
so move arm64_preempt_schedule_irq() into regs_irqs_disabled()
check false block, but it will try to reschedule only when TINY_RCU is
enabled or current is not an idle task.
As Mark pointed out, this change will have the following 2 key impact:
- " We'll preempt even without taking a "real" interrupt. That
shouldn't result in preemption that wasn't possible before,
but it does change the probability of preempting at certain points,
and might have a performance impact, so probably warrants a
benchmark."
- " We will not preempt when taking interrupts from a region of kernel
code where IRQs are enabled but RCU is not watching, matching the
behaviour of the generic entry code.
This has the potential to introduce livelock if we can ever have a
screaming interrupt in such a region, so we'll need to go figure out
whether that's actually a problem.
Having this as a separate patch will make it easier to test/bisect
for that specifically."
Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
arch/arm64/kernel/entry-common.c | 88 ++++++++++++++++----------------
1 file changed, 44 insertions(+), 44 deletions(-)
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 1687627b2ecf..7a588515ee07 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -75,6 +75,48 @@ static noinstr irqentry_state_t enter_from_kernel_mode(struct pt_regs *regs)
return state;
}
+#ifdef CONFIG_PREEMPT_DYNAMIC
+DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
+#define need_irq_preemption() \
+ (static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
+#else
+#define need_irq_preemption() (IS_ENABLED(CONFIG_PREEMPTION))
+#endif
+
+static void __sched arm64_preempt_schedule_irq(void)
+{
+ if (!need_irq_preemption())
+ return;
+
+ /*
+ * Note: thread_info::preempt_count includes both thread_info::count
+ * and thread_info::need_resched, and is not equivalent to
+ * preempt_count().
+ */
+ if (READ_ONCE(current_thread_info()->preempt_count) != 0)
+ return;
+
+ /*
+ * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
+ * priority masking is used the GIC irqchip driver will clear DAIF.IF
+ * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
+ * DAIF we must have handled an NMI, so skip preemption.
+ */
+ if (system_uses_irq_prio_masking() && read_sysreg(daif))
+ return;
+
+ /*
+ * Preempting a task from an IRQ means we leave copies of PSTATE
+ * on the stack. cpufeature's enable calls may modify PSTATE, but
+ * resuming one of these preempted tasks would undo those changes.
+ *
+ * Only allow a task to be preempted once cpufeatures have been
+ * enabled.
+ */
+ if (system_capabilities_finalized())
+ preempt_schedule_irq();
+}
+
/*
* Handle IRQ/context state management when exiting to kernel mode.
* After this function returns it is not safe to call regular kernel code,
@@ -97,6 +139,8 @@ static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs,
return;
}
+ arm64_preempt_schedule_irq();
+
trace_hardirqs_on();
} else {
if (state.exit_rcu)
@@ -281,48 +325,6 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs,
lockdep_hardirqs_on(CALLER_ADDR0);
}
-#ifdef CONFIG_PREEMPT_DYNAMIC
-DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
-#define need_irq_preemption() \
- (static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
-#else
-#define need_irq_preemption() (IS_ENABLED(CONFIG_PREEMPTION))
-#endif
-
-static void __sched arm64_preempt_schedule_irq(void)
-{
- if (!need_irq_preemption())
- return;
-
- /*
- * Note: thread_info::preempt_count includes both thread_info::count
- * and thread_info::need_resched, and is not equivalent to
- * preempt_count().
- */
- if (READ_ONCE(current_thread_info()->preempt_count) != 0)
- return;
-
- /*
- * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
- * priority masking is used the GIC irqchip driver will clear DAIF.IF
- * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
- * DAIF we must have handled an NMI, so skip preemption.
- */
- if (system_uses_irq_prio_masking() && read_sysreg(daif))
- return;
-
- /*
- * Preempting a task from an IRQ means we leave copies of PSTATE
- * on the stack. cpufeature's enable calls may modify PSTATE, but
- * resuming one of these preempted tasks would undo those changes.
- *
- * Only allow a task to be preempted once cpufeatures have been
- * enabled.
- */
- if (system_capabilities_finalized())
- preempt_schedule_irq();
-}
-
static void do_interrupt_handler(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
@@ -591,8 +593,6 @@ static __always_inline void __el1_irq(struct pt_regs *regs,
do_interrupt_handler(regs, handler);
irq_exit_rcu();
- arm64_preempt_schedule_irq();
-
exit_to_kernel_mode(regs, state);
}
static void noinstr el1_interrupt(struct pt_regs *regs,
--
2.34.1