native_flush_tlb_multi() may be frequently called by flush_tlb_mm_range()
and arch_tlbbatch_flush() in production environments. When pages are
reclaimed or process exit, native_flush_tlb_multi() sends IPIs to remote
CPUs and waits for all remote CPUs to complete their local TLB flushes.
The overall latency may reach tens of milliseconds due to a large number of
remote CPUs and other factors (such as interrupts being disabled). Since
flush_tlb_mm_range() and arch_tlbbatch_flush() always disable preemption,
which may cause increased scheduling latency for other threads on the
current CPU.
Previous patch converted flush_tlb_info from per-cpu variable to on-stack
variable. Additionally, it's no longer necessary to explicitly disable
preemption before calling smp_call*() since they internally handle the
preemption logic. Now it's safe to enable preemption during
native_flush_tlb_multi().
Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com>
---
arch/x86/kernel/kvm.c | 4 +++-
arch/x86/mm/tlb.c | 9 +++++++--
2 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 3bc062363814..4f7f4c1149b9 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -668,8 +668,10 @@ static void kvm_flush_tlb_multi(const struct cpumask *cpumask,
u8 state;
int cpu;
struct kvm_steal_time *src;
- struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
+ struct cpumask *flushmask;
+ guard(preempt)();
+ flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
cpumask_copy(flushmask, cpumask);
/*
* We have to call flush only on online vCPUs. And
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index cfc3a72477f5..58c6f3d2f993 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1421,9 +1421,11 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
if (mm_global_asid(mm)) {
broadcast_tlb_flush(info);
} else if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
+ put_cpu();
info->trim_cpumask = should_trim_cpumask(mm);
flush_tlb_multi(mm_cpumask(mm), info);
consider_global_asid(mm);
+ goto invalidate;
} else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
lockdep_assert_irqs_enabled();
local_irq_disable();
@@ -1432,6 +1434,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
}
put_cpu();
+invalidate:
mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}
@@ -1691,7 +1694,9 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
invlpgb_flush_all_nonglobals();
batch->unmapped_pages = false;
} else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
+ put_cpu();
flush_tlb_multi(&batch->cpumask, &info);
+ goto clear;
} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
lockdep_assert_irqs_enabled();
local_irq_disable();
@@ -1699,9 +1704,9 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
local_irq_enable();
}
- cpumask_clear(&batch->cpumask);
-
put_cpu();
+clear:
+ cpumask_clear(&batch->cpumask);
}
/*
--
2.20.1