arch/x86/kernel/head_64.S | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-)
From: Khalid Ali <khaliidcaliy@gmail.com>
When Initializing cr4 bit PSE and PGE, cr4 is written twice for
each bit. This is redundancy.
Instead, set both bits first and write CR4 once, avoiding redundant
writes. This makes consistent with cr0 writes, which is set bits and
write once.
Signed-off-by: Khalid Ali <khaliidcaliy@gmail.com>
---
arch/x86/kernel/head_64.S | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 4390a28f7dad..dfb5390e5c9a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -222,12 +222,9 @@ SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL)
/* Even if ignored in long mode, set PSE uniformly on all logical CPUs. */
btsl $X86_CR4_PSE_BIT, %ecx
- movq %rcx, %cr4
-
- /*
- * Set CR4.PGE to re-enable global translations.
- */
+ /* Set CR4.PGE to re-enable global translations. */
btsl $X86_CR4_PGE_BIT, %ecx
+
movq %rcx, %cr4
#ifdef CONFIG_SMP
--
2.49.0
> diff > <https://lore.kernel.org/lkml/20250715181709.1040-1-khaliidcaliy@gmail.com/#iZ31arch:x86:kernel:head_64.S> > --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index > 4390a28f7dad..dfb5390e5c9a 100644 --- a/arch/x86/kernel/head_64.S +++ > b/arch/x86/kernel/head_64.S @@ -222,12 +222,9 @@ > SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL) > /* Even if ignored in long mode, set PSE uniformly on all logical CPUs. */ > btsl $X86_CR4_PSE_BIT, %ecx > - movq %rcx, %cr4 - - /* - * Set CR4.PGE to re-enable global > translations. - */ + /* Set CR4.PGE to re-enable global translations. */ btsl $X86_CR4_PGE_BIT, %ecx > + movq %rcx, %cr4 The comments are at best misleading, but you've broken the TLB flush being performed which depends on the double write. This logic is intentionally performing a write with CR4.PGE=0 followed by one with CR4.PGE=1 to flush all global mappings. ~Andrew
On Tue, Jul 15, 2025 at 10:21:20PM +0100, Andrew Cooper wrote: > > diff > > <https://lore.kernel.org/lkml/20250715181709.1040-1-khaliidcaliy@gmail.com/#iZ31arch:x86:kernel:head_64.S> > > --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index > > 4390a28f7dad..dfb5390e5c9a 100644 --- a/arch/x86/kernel/head_64.S +++ > > b/arch/x86/kernel/head_64.S @@ -222,12 +222,9 @@ > > SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL) > > /* Even if ignored in long mode, set PSE uniformly on all logical CPUs. */ > > btsl $X86_CR4_PSE_BIT, %ecx > > - movq %rcx, %cr4 - - /* - * Set CR4.PGE to re-enable global > > translations. - */ + /* Set CR4.PGE to re-enable global translations. */ btsl $X86_CR4_PGE_BIT, %ecx > > + movq %rcx, %cr4 > > The comments are at best misleading, but you've broken the TLB flush > being performed which depends on the double write. > > This logic is intentionally performing a write with CR4.PGE=0 followed > by one with CR4.PGE=1 to flush all global mappings. Thanks both of you for the catch - I didn't realize that fact. Zapped now. So yeah, maybe there should be a comment explaining this subtlety. Thx. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
> > > diff > > > <https://lore.kernel.org/lkml/20250715181709.1040-1-khaliidcaliy@gmail.com/#iZ31arch:x86:kernel:head_64.S> > > > --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index > > > 4390a28f7dad..dfb5390e5c9a 100644 --- a/arch/x86/kernel/head_64.S +++ > > > b/arch/x86/kernel/head_64.S @@ -222,12 +222,9 @@ > > > SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL) > > > /* Even if ignored in long mode, set PSE uniformly on all logical CPUs. */ > > > btsl $X86_CR4_PSE_BIT, %ecx > > > - movq %rcx, %cr4 - - /* - * Set CR4.PGE to re-enable global > > > translations. - */ + /* Set CR4.PGE to re-enable global translations. */ btsl $X86_CR4_PGE_BIT, %ecx > > > + movq %rcx, %cr4 > > > > The comments are at best misleading, but you've broken the TLB flush > > being performed which depends on the double write. > > > > This logic is intentionally performing a write with CR4.PGE=0 followed > > by one with CR4.PGE=1 to flush all global mappings. > Thanks both of you for the catch - I didn't realize that fact. Zapped now. > > So yeah, maybe there should be a comment explaining this subtlety. > > Thx. I think the comment is misplaced. It is better if we move starting from "from the SDM" to below the endif. The second move the comment above it isn't useful also everyone knows what PGE bit does, so it is better if we replace with the above misplaced comment. Thanks Khalid Ali
On Wed, Jul 16, 2025 at 09:07:20AM +0000, Khalid Ali wrote: > I think the comment is misplaced. It is better if we move starting from "from the SDM" > to below the endif. The second move the comment above it isn't useful also everyone knows > what PGE bit does, so it is better if we replace with the above misplaced comment. The comment's fine. The %cr4 writes need a comment explaining what they do because their sequence is special. I don't mind documenting non-obvious asm in a more detailed fashion. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On 2025-07-15 11:16, Khalid Ali wrote: > From: Khalid Ali <khaliidcaliy@gmail.com> > > When Initializing cr4 bit PSE and PGE, cr4 is written twice for > each bit. This is redundancy. > > Instead, set both bits first and write CR4 once, avoiding redundant > writes. This makes consistent with cr0 writes, which is set bits and > write once. > > Signed-off-by: Khalid Ali <khaliidcaliy@gmail.com> In case this wasn't obvious, this is: Nacked-by: H. Peter Anvin (Intel) <hpa@zytor.com> ... with extreme prejudice. -hpa
On 2025-07-15 11:16, Khalid Ali wrote: > From: Khalid Ali <khaliidcaliy@gmail.com> > > When Initializing cr4 bit PSE and PGE, cr4 is written twice for > each bit. This is redundancy. > > Instead, set both bits first and write CR4 once, avoiding redundant > writes. This makes consistent with cr0 writes, which is set bits and > write once. > > Signed-off-by: Khalid Ali <khaliidcaliy@gmail.com> > --- > arch/x86/kernel/head_64.S | 7 ++----- > 1 file changed, 2 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S > index 4390a28f7dad..dfb5390e5c9a 100644 > --- a/arch/x86/kernel/head_64.S > +++ b/arch/x86/kernel/head_64.S > @@ -222,12 +222,9 @@ SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL) > > /* Even if ignored in long mode, set PSE uniformly on all logical CPUs. */ > btsl $X86_CR4_PSE_BIT, %ecx > - movq %rcx, %cr4 > - > - /* > - * Set CR4.PGE to re-enable global translations. > - */ > + /* Set CR4.PGE to re-enable global translations. */ > btsl $X86_CR4_PGE_BIT, %ecx > + > movq %rcx, %cr4 > > #ifdef CONFIG_SMP The double write is intentional: /* * Create a mask of CR4 bits to preserve. Omit PGE in order to flush * global 1:1 translations from the TLBs. * * From the SDM: * "If CR4.PGE is changing from 0 to 1, there were no global TLB * entries before the execution; if CR4.PGE is changing from 1 to 0, * there will be no global TLB entries after the execution." */ movl $(X86_CR4_PAE | X86_CR4_LA57), %edx
On 15.07.25 г. 21:16 ч., Khalid Ali wrote: > From: Khalid Ali <khaliidcaliy@gmail.com> > > When Initializing cr4 bit PSE and PGE, cr4 is written twice for > each bit. This is redundancy. > > Instead, set both bits first and write CR4 once, avoiding redundant > writes. This makes consistent with cr0 writes, which is set bits and > write once. > > Signed-off-by: Khalid Ali <khaliidcaliy@gmail.com> > --- > arch/x86/kernel/head_64.S | 7 ++----- > 1 file changed, 2 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S > index 4390a28f7dad..dfb5390e5c9a 100644 > --- a/arch/x86/kernel/head_64.S > +++ b/arch/x86/kernel/head_64.S > @@ -222,12 +222,9 @@ SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL) > > /* Even if ignored in long mode, set PSE uniformly on all logical CPUs. */ > btsl $X86_CR4_PSE_BIT, %ecx > - movq %rcx, %cr4 > - > - /* > - * Set CR4.PGE to re-enable global translations. > - */ > + /* Set CR4.PGE to re-enable global translations. */ > btsl $X86_CR4_PGE_BIT, %ecx > + > movq %rcx, %cr4 > > #ifdef CONFIG_SMP Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
The following commit has been merged into the x86/cleanups branch of tip:
Commit-ID: 0149fff886a62465b21d80fa53615ee7de3d72f1
Gitweb: https://git.kernel.org/tip/0149fff886a62465b21d80fa53615ee7de3d72f1
Author: Khalid Ali <khaliidcaliy@gmail.com>
AuthorDate: Tue, 15 Jul 2025 18:16:10
Committer: Borislav Petkov (AMD) <bp@alien8.de>
CommitterDate: Tue, 15 Jul 2025 20:52:56 +02:00
x86/boot: Avoid redundant %cr4 write in startup_64()
When initializing %cr4 bits PSE and PGE, %cr4 is written after each bit is
set. Remove the redundant write.
No functional changes.
[ bp: Massage. ]
Signed-off-by: Khalid Ali <khaliidcaliy@gmail.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
Link: https://lore.kernel.org/20250715181709.1040-1-khaliidcaliy@gmail.com
---
arch/x86/kernel/head_64.S | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 3e9b3a3..5c4be47 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -224,11 +224,7 @@ SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL)
/* Even if ignored in long mode, set PSE uniformly on all logical CPUs. */
btsl $X86_CR4_PSE_BIT, %ecx
- movq %rcx, %cr4
-
- /*
- * Set CR4.PGE to re-enable global translations.
- */
+ /* Set CR4.PGE to re-enable global translations. */
btsl $X86_CR4_PGE_BIT, %ecx
movq %rcx, %cr4
© 2016 - 2025 Red Hat, Inc.