From nobody Tue Feb 10 07:41:35 2026
Received: from sg-1-100.ptr.blmpb.com (sg-1-100.ptr.blmpb.com
 [118.26.132.100])
	(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1BACA27F749
	for <linux-kernel@vger.kernel.org>; Tue,  3 Feb 2026 11:27:22 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=118.26.132.100
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1770118045; cv=none;
 b=PQ6k25sZnFIAandEqGp6QhqTb7fet0iseuolX7S74+N6SpCj/uIjAU7oAB8wUpUMVuhlUyZmesGuYNKknm2kFYbBdrTnx/08ViHAWwUty/28qlxtBty4t3NcEjzXaexKXxINO/xxOrMGyGO1knWZU1ga5GcP/kxm9Yd3Kdxm1C4=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1770118045; c=relaxed/simple;
	bh=N6MI7lw2gdVXum84X2nWXs7DYm3+C+cRAT9VRcFxEsI=;
	h=To:Content-Type:References:Date:Message-Id:Mime-Version:
	 In-Reply-To:Subject:From:Cc;
 b=HjHaZsR7sfkET0UonG1dN5uceLEAzZbkOz4j3d5yXvdflaFdX2gmIRMJi76J0UJfV8S5312TbQAQXSyNuxGaUQOItPgf66UAIMm5YIdyLzjaV7AflW/efLSDM2qFrkHkghjJf/5TK472CzeeiiBIPY1Dno43gV98xMvdiCBqpkA=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=quarantine dis=none) header.from=bytedance.com;
 spf=pass smtp.mailfrom=bytedance.com;
 dkim=pass (2048-bit key) header.d=bytedance.com header.i=@bytedance.com
 header.b=C6tBP9kT; arc=none smtp.client-ip=118.26.132.100
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=quarantine dis=none) header.from=bytedance.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=bytedance.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=bytedance.com header.i=@bytedance.com
 header.b="C6tBP9kT"
DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;
 s=2212171451; d=bytedance.com; t=1770118037; h=from:subject:
 mime-version:from:date:message-id:subject:to:cc:reply-to:content-type:
 mime-version:in-reply-to:message-id;
 bh=85r2DtOX3PVn6TzG78RrOLSMWjMfWJi9MSPvR284Zu0=;
 b=C6tBP9kTxpeCAxWfl15XSNEJOyE3TqjDmI1yyEdN+xc5XuFDx6bO6ikHjwm3CuDIMMfivd
 iL/G0zqJ4UzDr5JxLp/iHtP/sDJDLm4q4LqJAw4g6muvlIQaC2/VXLcmdVAvsRv1rw7/uf
 Pg3kwVERNjMyr7xKwJ6gn9gk4+Cv0gj8LhbyJ4j49uTxdeF/nAjI5+ZbB4tGb87rgBFLHv
 XIF8fPv9OPLqBOiaadL7PQhq6HDy//fWb0qfMdLYHC3AiS8nQr7sA84lAvzM+JOQRmYOqW
 /9oFmPJrkx0/WZU1++Z0Ivvy6R9XTX/MXt/6a8HDEd+s7nRQNn2VrBSOD9OVJg==
X-Mailer: git-send-email 2.20.1
To: <tglx@linutronix.de>, <mingo@redhat.com>, <luto@kernel.org>,
	<peterz@infradead.org>, <paulmck@kernel.org>, <muchun.song@linux.dev>,
	<bp@alien8.de>, <dave.hansen@linux.intel.com>
X-Lms-Return-Path: 
 <lba+26981db93+174c4a+vger.kernel.org+zhouchuyi@bytedance.com>
References: <20260203112401.3889029-1-zhouchuyi@bytedance.com>
Date: Tue,  3 Feb 2026 19:23:59 +0800
Message-Id: <20260203112401.3889029-10-zhouchuyi@bytedance.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
Mime-Version: 1.0
Content-Transfer-Encoding: quoted-printable
X-Original-From: Chuyi Zhou <zhouchuyi@bytedance.com>
In-Reply-To: <20260203112401.3889029-1-zhouchuyi@bytedance.com>
Subject: [PATCH 09/11] x86/mm: Move flush_tlb_info back to the stack
From: "Chuyi Zhou" <zhouchuyi@bytedance.com>
Cc: <linux-kernel@vger.kernel.org>, "Chuyi Zhou" <zhouchuyi@bytedance.com>
Content-Type: text/plain; charset="utf-8"

Commit 3db6d5a5ecaf ("x86/mm/tlb: Remove 'struct flush_tlb_info' from the
stack") changed flush_tlb_info from stack variables to per-CPU variables.
This brought about a performance improvement of around 3% in extreme test.
However, it also required that all flush_tlb* operations keep preemption
disabled entirely to prevent concurrent modifications of flush_tlb_info.
flush_tlb* needs to send IPIs to remote CPUs and synchronously wait for all
remote CPUs to complete their local TLB flushes. The process could take
tens of milliseconds when interrupts are disabled or with a large number of
remote CPUs.

From the perspective of improving kernel real-time performance, this patch
reverts flush_tlb_info back to stack variables. This is a preparation for
enabling preemption during TLB flush in next patch.

Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com>
---
 arch/x86/mm/tlb.c | 124 ++++++++++++++++++----------------------------
 1 file changed, 49 insertions(+), 75 deletions(-)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index f5b93e01e347..2d68297ed35b 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1394,71 +1394,30 @@ void flush_tlb_multi(const struct cpumask *cpumask,
  */
 unsigned long tlb_single_page_flush_ceiling __read_mostly =3D 33;
=20
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct flush_tlb_info, flush_tlb_info=
);
-
-#ifdef CONFIG_DEBUG_VM
-static DEFINE_PER_CPU(unsigned int, flush_tlb_info_idx);
-#endif
-
-static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
-			unsigned long start, unsigned long end,
-			unsigned int stride_shift, bool freed_tables,
-			u64 new_tlb_gen)
+void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+				unsigned long end, unsigned int stride_shift,
+				bool freed_tables)
 {
-	struct flush_tlb_info *info =3D this_cpu_ptr(&flush_tlb_info);
+	int cpu =3D get_cpu();
=20
-#ifdef CONFIG_DEBUG_VM
-	/*
-	 * Ensure that the following code is non-reentrant and flush_tlb_info
-	 * is not overwritten. This means no TLB flushing is initiated by
-	 * interrupt handlers and machine-check exception handlers.
-	 */
-	BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) !=3D 1);
-#endif
+	struct flush_tlb_info info =3D {
+		.mm =3D mm,
+		.stride_shift =3D stride_shift,
+		.freed_tables =3D freed_tables,
+		.trim_cpumask =3D 0,
+		.initiating_cpu =3D cpu
+	};
=20
-	/*
-	 * If the number of flushes is so large that a full flush
-	 * would be faster, do a full flush.
-	 */
 	if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) {
 		start =3D 0;
 		end =3D TLB_FLUSH_ALL;
 	}
=20
-	info->start		=3D start;
-	info->end		=3D end;
-	info->mm		=3D mm;
-	info->stride_shift	=3D stride_shift;
-	info->freed_tables	=3D freed_tables;
-	info->new_tlb_gen	=3D new_tlb_gen;
-	info->initiating_cpu	=3D smp_processor_id();
-	info->trim_cpumask	=3D 0;
-
-	return info;
-}
-
-static void put_flush_tlb_info(void)
-{
-#ifdef CONFIG_DEBUG_VM
-	/* Complete reentrancy prevention checks */
-	barrier();
-	this_cpu_dec(flush_tlb_info_idx);
-#endif
-}
-
-void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
-				unsigned long end, unsigned int stride_shift,
-				bool freed_tables)
-{
-	struct flush_tlb_info *info;
-	int cpu =3D get_cpu();
-	u64 new_tlb_gen;
-
 	/* This is also a barrier that synchronizes with switch_mm(). */
-	new_tlb_gen =3D inc_mm_tlb_gen(mm);
+	info.new_tlb_gen =3D inc_mm_tlb_gen(mm);
=20
-	info =3D get_flush_tlb_info(mm, start, end, stride_shift, freed_tables,
-				  new_tlb_gen);
+	info.start =3D start;
+	info.end =3D end;
=20
 	/*
 	 * flush_tlb_multi() is not optimized for the common case in which only
@@ -1466,19 +1425,18 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsig=
ned long start,
 	 * flush_tlb_func_local() directly in this case.
 	 */
 	if (mm_global_asid(mm)) {
-		broadcast_tlb_flush(info);
+		broadcast_tlb_flush(&info);
 	} else if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
-		info->trim_cpumask =3D should_trim_cpumask(mm);
-		flush_tlb_multi(mm_cpumask(mm), info);
+		info.trim_cpumask =3D should_trim_cpumask(mm);
+		flush_tlb_multi(mm_cpumask(mm), &info);
 		consider_global_asid(mm);
 	} else if (mm =3D=3D this_cpu_read(cpu_tlbstate.loaded_mm)) {
 		lockdep_assert_irqs_enabled();
 		local_irq_disable();
-		flush_tlb_func(info);
+		flush_tlb_func(&info);
 		local_irq_enable();
 	}
=20
-	put_flush_tlb_info();
 	put_cpu();
 	mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
 }
@@ -1548,19 +1506,29 @@ static void kernel_tlb_flush_range(struct flush_tlb=
_info *info)
=20
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-	struct flush_tlb_info *info;
+	struct flush_tlb_info info =3D {
+		.mm =3D NULL,
+		.stride_shift =3D PAGE_SHIFT,
+		.freed_tables =3D false,
+		.trim_cpumask =3D 0,
+		.new_tlb_gen =3D TLB_GENERATION_INVALID
+	};
=20
 	guard(preempt)();
=20
-	info =3D get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
-				  TLB_GENERATION_INVALID);
+	if ((end - start) >> PAGE_SHIFT > tlb_single_page_flush_ceiling) {
+		start =3D 0;
+		end =3D TLB_FLUSH_ALL;
+	}
=20
-	if (info->end =3D=3D TLB_FLUSH_ALL)
-		kernel_tlb_flush_all(info);
-	else
-		kernel_tlb_flush_range(info);
+	info.initiating_cpu =3D smp_processor_id(),
+	info.start =3D start;
+	info.end =3D end;
=20
-	put_flush_tlb_info();
+	if (info.end =3D=3D TLB_FLUSH_ALL)
+		kernel_tlb_flush_all(&info);
+	else
+		kernel_tlb_flush_range(&info);
 }
=20
 /*
@@ -1728,12 +1696,19 @@ EXPORT_SYMBOL_FOR_KVM(__flush_tlb_all);
=20
 void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 {
-	struct flush_tlb_info *info;
-
 	int cpu =3D get_cpu();
=20
-	info =3D get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
-				  TLB_GENERATION_INVALID);
+	struct flush_tlb_info info =3D {
+		.start =3D 0,
+		.end =3D TLB_FLUSH_ALL,
+		.mm =3D NULL,
+		.stride_shift =3D 0,
+		.freed_tables =3D false,
+		.new_tlb_gen =3D TLB_GENERATION_INVALID,
+		.initiating_cpu =3D cpu,
+		.trim_cpumask =3D 0,
+	};
+
 	/*
 	 * flush_tlb_multi() is not optimized for the common case in which only
 	 * a local TLB flush is needed. Optimize this use-case by calling
@@ -1743,17 +1718,16 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap=
_batch *batch)
 		invlpgb_flush_all_nonglobals();
 		batch->unmapped_pages =3D false;
 	} else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
-		flush_tlb_multi(&batch->cpumask, info);
+		flush_tlb_multi(&batch->cpumask, &info);
 	} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
 		lockdep_assert_irqs_enabled();
 		local_irq_disable();
-		flush_tlb_func(info);
+		flush_tlb_func(&info);
 		local_irq_enable();
 	}
=20
 	cpumask_clear(&batch->cpumask);
=20
-	put_flush_tlb_info();
 	put_cpu();
 }
=20
--=20
2.20.1