The value of the SCTLR2_ELx register is UNKNOWN after reset.
If the firmware initializes these registers properly, no additional
initialization is required.
However, in cases where they are not initialized correctly,
initialize the SCTLR2_ELx registers during CPU/vCPU boot
to prevent unexpected system behavior caused by invalid values.
Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com>
---
arch/arm64/include/asm/assembler.h | 15 +++++++++++++++
arch/arm64/include/asm/el2_setup.h | 7 +++++++
arch/arm64/include/asm/sysreg.h | 5 +++++
arch/arm64/kernel/head.S | 5 +++++
arch/arm64/kernel/hyp-stub.S | 10 ++++++++++
arch/arm64/kvm/hyp/nvhe/psci-relay.c | 3 +++
6 files changed, 45 insertions(+)
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 23be85d93348..c25c2aed5125 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -738,6 +738,21 @@ alternative_endif
set_sctlr sctlr_el2, \reg
.endm
+/* Set SCTLR2_ELx to the @reg value. */
+.macro set_sctlr2_elx, el, reg, tmp
+ mrs_s \tmp, SYS_ID_AA64MMFR3_EL1
+ ubfx \tmp, \tmp, #ID_AA64MMFR3_EL1_SCTLRX_SHIFT, #4
+ cbz \tmp, .Lskip_sctlr2_\@
+ .if \el == 2
+ msr_s SYS_SCTLR2_EL2, \reg
+ .elseif \el == 12
+ msr_s SYS_SCTLR2_EL12, \reg
+ .else
+ msr_s SYS_SCTLR2_EL1, \reg
+ .endif
+.Lskip_sctlr2_\@:
+.endm
+
/*
* Check whether asm code should yield as soon as it is able. This is
* the case if we are currently running in task context, and the
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index d9529dfc4783..2addf7c096fc 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -48,6 +48,12 @@
isb
.endm
+.macro __init_sctlr2_el2
+ mov_q x0, INIT_SCTLR2_EL2
+ set_sctlr2_elx 2, x0, x1
+ isb
+.endm
+
.macro __init_el2_hcrx
mrs x0, id_aa64mmfr1_el1
ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
@@ -411,6 +417,7 @@
*/
.macro init_el2_state
__init_el2_sctlr
+ __init_sctlr2_el2
__init_el2_hcrx
__init_el2_timers
__init_el2_debug
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index d5b5f2ae1afa..0431b357b87b 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -868,6 +868,8 @@
#define INIT_SCTLR_EL2_MMU_OFF \
(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
+#define INIT_SCTLR2_EL2 UL(0)
+
/* SCTLR_EL1 specific flags. */
#ifdef CONFIG_CPU_BIG_ENDIAN
#define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE)
@@ -888,6 +890,8 @@
SCTLR_EL1_LSMAOE | SCTLR_EL1_nTLSMD | SCTLR_EL1_EIS | \
SCTLR_EL1_TSCXT | SCTLR_EL1_EOS)
+#define INIT_SCTLR2_EL1 UL(0)
+
/* MAIR_ELx memory attributes (used by Linux) */
#define MAIR_ATTR_DEVICE_nGnRnE UL(0x00)
#define MAIR_ATTR_DEVICE_nGnRE UL(0x04)
@@ -1164,6 +1168,7 @@
msr hcr_el2, \reg
#endif
.endm
+
#else
#include <linux/bitfield.h>
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index ca04b338cb0d..e42664246e15 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -276,6 +276,8 @@ SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
pre_disable_mmu_workaround
msr sctlr_el1, x0
+ mov_q x0, INIT_SCTLR2_EL1
+ set_sctlr2_elx 1, x0, x1
isb
mov_q x0, INIT_PSTATE_EL1
msr spsr_el1, x0
@@ -308,6 +310,7 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
isb
mov_q x1, INIT_SCTLR_EL1_MMU_OFF
+ mov_q x2, INIT_SCTLR2_EL1
mrs x0, hcr_el2
and x0, x0, #HCR_E2H
@@ -315,11 +318,13 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
/* Set a sane SCTLR_EL1, the VHE way */
msr_s SYS_SCTLR_EL12, x1
+ set_sctlr2_elx 12, x2, x0
mov x2, #BOOT_CPU_FLAG_E2H
b 3f
2:
msr sctlr_el1, x1
+ set_sctlr2_elx 1, x2, x0
mov x2, xzr
3:
mov x0, #INIT_PSTATE_EL1
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 36e2d26b54f5..ac12f1b4f8e2 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -144,7 +144,17 @@ SYM_CODE_START_LOCAL(__finalise_el2)
.Lskip_indirection:
.Lskip_tcr2:
+ mrs_s x1, SYS_ID_AA64MMFR3_EL1
+ ubfx x1, x1, #ID_AA64MMFR3_EL1_SCTLRX_SHIFT, #4
+ cbz x1, .Lskip_sctlr2
+ mrs_s x1, SYS_SCTLR2_EL12
+ msr_s SYS_SCTLR2_EL1, x1
+ // clean SCTLR2_EL1
+ mov_q x1, INIT_SCTLR2_EL1
+ msr_s SYS_SCTLR2_EL12, x1
+
+.Lskip_sctlr2:
isb
// Hack the exception return to stay at EL2
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index c3e196fb8b18..df1180cad7f8 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -4,6 +4,7 @@
* Author: David Brazdil <dbrazdil@google.com>
*/
+#include <asm/cpufeature.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
@@ -219,6 +220,8 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on)
release_boot_args(boot_args);
write_sysreg_el1(INIT_SCTLR_EL1_MMU_OFF, SYS_SCTLR);
+ if (cpus_have_final_cap(ARM64_HAS_SCTLR2))
+ write_sysreg_el1(INIT_SCTLR2_EL1, SYS_SCTLR2);
write_sysreg(INIT_PSTATE_EL1, SPSR_EL2);
__host_enter(host_ctxt);
--
LEVI:{C3F47F37-75D8-414A-A8BA-3980EC8A46D7}
Hi, On Thu, Aug 21, 2025 at 06:24:05PM +0100, Yeoreum Yun wrote: > The value of the SCTLR2_ELx register is UNKNOWN after reset. > If the firmware initializes these registers properly, no additional > initialization is required. > However, in cases where they are not initialized correctly, > initialize the SCTLR2_ELx registers during CPU/vCPU boot > to prevent unexpected system behavior caused by invalid values. > > Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com> > --- [...] > diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h > index 23be85d93348..c25c2aed5125 100644 > --- a/arch/arm64/include/asm/assembler.h > +++ b/arch/arm64/include/asm/assembler.h > @@ -738,6 +738,21 @@ alternative_endif > set_sctlr sctlr_el2, \reg > .endm > > +/* Set SCTLR2_ELx to the @reg value. */ > +.macro set_sctlr2_elx, el, reg, tmp > + mrs_s \tmp, SYS_ID_AA64MMFR3_EL1 > + ubfx \tmp, \tmp, #ID_AA64MMFR3_EL1_SCTLRX_SHIFT, #4 > + cbz \tmp, .Lskip_sctlr2_\@ > + .if \el == 2 > + msr_s SYS_SCTLR2_EL2, \reg > + .elseif \el == 12 > + msr_s SYS_SCTLR2_EL12, \reg > + .else > + msr_s SYS_SCTLR2_EL1, \reg > + .endif > +.Lskip_sctlr2_\@: > +.endm > + You were right that just doing msr_s SYS_SCTLR_\el, \reg here doesn't work. It looks like we rely on the C preprocessor to expand the SYS_FOO_REG names to numeric sysreg encodings. By the time the assembler macros are expanded, it is too late to construct sysreg names -- the C preprocessor only runs once, before the assembler. So, your code here looks reasonable. But, it will still silently do the wrong thing if \el is empty or garbage, so it is probably worth adding an error check: .else .error "Bad EL \"\el\" in set_sctlr2_elx" .endif Also, looking at all this again, the "1", "2" and "12" suffixes are not really numbers: SYS_REG_EL02 would definitely not be the same thing as SYS_REG_EL2 (although there is no _EL02 version of this particular register). So, can you use .ifc to do a string comparison instead? If might be a good idea to migrate other macros that use an "el" argument to do something similar -- although that probably doesn't belong in this series. The assembler seems to have no ".elseifc" directive, so you'll need separate nested .ifc blocks: .ifc \el,2 msr_s SYS_SCTLR2_EL2, \reg .else .ifc \el,12 msr_s SYS_SCTLR2_EL12, \reg .else .ifc \el,1 msr_s SYS_SCTLR2_EL1, \reg .else .error "Bad EL \"\el\" in set_sctlr2_elx" .endif .endif .endif (Note, I've not tested this approach. If you can think of a better way, please feel free to suggest.) [...] > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S > index 36e2d26b54f5..ac12f1b4f8e2 100644 > --- a/arch/arm64/kernel/hyp-stub.S > +++ b/arch/arm64/kernel/hyp-stub.S > @@ -144,7 +144,17 @@ SYM_CODE_START_LOCAL(__finalise_el2) > > .Lskip_indirection: > .Lskip_tcr2: > + mrs_s x1, SYS_ID_AA64MMFR3_EL1 > + ubfx x1, x1, #ID_AA64MMFR3_EL1_SCTLRX_SHIFT, #4 > + cbz x1, .Lskip_sctlr2 > + mrs_s x1, SYS_SCTLR2_EL12 > + msr_s SYS_SCTLR2_EL1, x1 > > + // clean SCTLR2_EL1 > + mov_q x1, INIT_SCTLR2_EL1 > + msr_s SYS_SCTLR2_EL12, x1 I'm still not sure why we need to do this. The code doesn't seem to clean up by the EL1 value of any other register -- or have I missed something? We have already switched to EL2, via the HVC call that jumped to __finalise_el2. We won't run at EL1 again unless KVM starts a guest -- but in that case, it's KVM's responsibility to set up the EL1 registers before handing control to the guest. In any case, is SCTLR2_EL1 ever set to anything except INIT_SCTLR2_EL1 before we get here? [...] Cheers ---Dave
Hi Dave, [...] > > diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h > > index 23be85d93348..c25c2aed5125 100644 > > --- a/arch/arm64/include/asm/assembler.h > > +++ b/arch/arm64/include/asm/assembler.h > > @@ -738,6 +738,21 @@ alternative_endif > > set_sctlr sctlr_el2, \reg > > .endm > > > > +/* Set SCTLR2_ELx to the @reg value. */ > > +.macro set_sctlr2_elx, el, reg, tmp > > + mrs_s \tmp, SYS_ID_AA64MMFR3_EL1 > > + ubfx \tmp, \tmp, #ID_AA64MMFR3_EL1_SCTLRX_SHIFT, #4 > > + cbz \tmp, .Lskip_sctlr2_\@ > > + .if \el == 2 > > + msr_s SYS_SCTLR2_EL2, \reg > > + .elseif \el == 12 > > + msr_s SYS_SCTLR2_EL12, \reg > > + .else > > + msr_s SYS_SCTLR2_EL1, \reg > > + .endif > > +.Lskip_sctlr2_\@: > > +.endm > > + > > You were right that just doing > > msr_s SYS_SCTLR_\el, \reg > > here doesn't work. It looks like we rely on the C preprocessor to > expand the SYS_FOO_REG names to numeric sysreg encodings. By the time > the assembler macros are expanded, it is too late to construct sysreg > names -- the C preprocessor only runs once, before the assembler. > > So, your code here looks reasonable. > > But, it will still silently do the wrong thing if \el is empty or > garbage, so it is probably worth adding an error check: > > .else > .error "Bad EL \"\el\" in set_sctlr2_elx" > .endif > > > Also, looking at all this again, the "1", "2" and "12" suffixes are not > really numbers: SYS_REG_EL02 would definitely not be the same thing as > SYS_REG_EL2 (although there is no _EL02 version of this particular > register). > > So, can you use .ifc to do a string comparison instead? > > If might be a good idea to migrate other macros that use an "el" > argument to do something similar -- although that probably doesn't > belong in this series. > > The assembler seems to have no ".elseifc" directive, so you'll need > separate nested .ifc blocks: > > .ifc \el,2 > msr_s SYS_SCTLR2_EL2, \reg > .else > .ifc \el,12 > msr_s SYS_SCTLR2_EL12, \reg > .else > .ifc \el,1 > msr_s SYS_SCTLR2_EL1, \reg > .else > .error "Bad EL \"\el\" in set_sctlr2_elx" > .endif > .endif > .endif > > (Note, I've not tested this approach. If you can think of a better > way, please feel free to suggest.) > Sorry for late reply. but when I find some usage like above. I couldn't find any usage for this except this macro. In case of entry, since it just only chekc for "el0" case I think it doesn't need to apply this for them. So, let me apply this for set_sctlr2_elx only right now. when some new register requires this kind of pattern, let's apply at that time more generally. Thanks. -- Sincerely, Yeoreum Yun
Hi Dave, > On Thu, Aug 21, 2025 at 06:24:05PM +0100, Yeoreum Yun wrote: > > The value of the SCTLR2_ELx register is UNKNOWN after reset. > > If the firmware initializes these registers properly, no additional > > initialization is required. > > However, in cases where they are not initialized correctly, > > initialize the SCTLR2_ELx registers during CPU/vCPU boot > > to prevent unexpected system behavior caused by invalid values. > > > > Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com> > > --- > > [...] > > > diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h > > index 23be85d93348..c25c2aed5125 100644 > > --- a/arch/arm64/include/asm/assembler.h > > +++ b/arch/arm64/include/asm/assembler.h > > @@ -738,6 +738,21 @@ alternative_endif > > set_sctlr sctlr_el2, \reg > > .endm > > > > +/* Set SCTLR2_ELx to the @reg value. */ > > +.macro set_sctlr2_elx, el, reg, tmp > > + mrs_s \tmp, SYS_ID_AA64MMFR3_EL1 > > + ubfx \tmp, \tmp, #ID_AA64MMFR3_EL1_SCTLRX_SHIFT, #4 > > + cbz \tmp, .Lskip_sctlr2_\@ > > + .if \el == 2 > > + msr_s SYS_SCTLR2_EL2, \reg > > + .elseif \el == 12 > > + msr_s SYS_SCTLR2_EL12, \reg > > + .else > > + msr_s SYS_SCTLR2_EL1, \reg > > + .endif > > +.Lskip_sctlr2_\@: > > +.endm > > + > > You were right that just doing > > msr_s SYS_SCTLR_\el, \reg > > here doesn't work. It looks like we rely on the C preprocessor to > expand the SYS_FOO_REG names to numeric sysreg encodings. By the time > the assembler macros are expanded, it is too late to construct sysreg > names -- the C preprocessor only runs once, before the assembler. > > So, your code here looks reasonable. > > But, it will still silently do the wrong thing if \el is empty or > garbage, so it is probably worth adding an error check: > > .else > .error "Bad EL \"\el\" in set_sctlr2_elx" > .endif > > > Also, looking at all this again, the "1", "2" and "12" suffixes are not > really numbers: SYS_REG_EL02 would definitely not be the same thing as > SYS_REG_EL2 (although there is no _EL02 version of this particular > register). > > So, can you use .ifc to do a string comparison instead? > > If might be a good idea to migrate other macros that use an "el" > argument to do something similar -- although that probably doesn't > belong in this series. > > The assembler seems to have no ".elseifc" directive, so you'll need > separate nested .ifc blocks: > > .ifc \el,2 > msr_s SYS_SCTLR2_EL2, \reg > .else > .ifc \el,12 > msr_s SYS_SCTLR2_EL12, \reg > .else > .ifc \el,1 > msr_s SYS_SCTLR2_EL1, \reg > .else > .error "Bad EL \"\el\" in set_sctlr2_elx" > .endif > .endif > .endif > > (Note, I've not tested this approach. If you can think of a better > way, please feel free to suggest.) Thanks for your suggestion. Let me test and check about this idea could be applied in other macro too :D (But as you mention, I'll apply this for SCTLR2 in other patchset). > > [...] > > > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S > > index 36e2d26b54f5..ac12f1b4f8e2 100644 > > --- a/arch/arm64/kernel/hyp-stub.S > > +++ b/arch/arm64/kernel/hyp-stub.S > > @@ -144,7 +144,17 @@ SYM_CODE_START_LOCAL(__finalise_el2) > > > > .Lskip_indirection: > > .Lskip_tcr2: > > + mrs_s x1, SYS_ID_AA64MMFR3_EL1 > > + ubfx x1, x1, #ID_AA64MMFR3_EL1_SCTLRX_SHIFT, #4 > > + cbz x1, .Lskip_sctlr2 > > + mrs_s x1, SYS_SCTLR2_EL12 > > + msr_s SYS_SCTLR2_EL1, x1 > > > > + // clean SCTLR2_EL1 > > + mov_q x1, INIT_SCTLR2_EL1 > > + msr_s SYS_SCTLR2_EL12, x1 > > I'm still not sure why we need to do this. The code doesn't seem to > clean up by the EL1 value of any other register -- or have I missed > something? > > We have already switched to EL2, via the HVC call that jumped to > __finalise_el2. We won't run at EL1 again unless KVM starts a guest -- > but in that case, it's KVM's responsibility to set up the EL1 registers > before handing control to the guest. > > In any case, is SCTLR2_EL1 ever set to anything except INIT_SCTLR2_EL1 > before we get here? Regardless of VHE and nVHE, between init_kernel_el() and finalise_el2() the kernel modifies SCTLR_EL1/SCTLR2_EL1 (since el level in this period is EL1). and at the end of finalise_el2(), kernel switches to el2 and if VHE, it writes the SCTLR_EL1/SCTLR2_EL1 to SCTLR_EL2/SCTLR2_EL2 and initialize the SCTLR_EL1/SCTLR2_EL1. Although there is no code to modify SCTLR2_EL1 between this period, as SCTLR1_ELx, I initialize the SCTLR2_EL1 as init value for the future usage. Thanks! -- Sincerely, Yeoreum Yun
Hi, On Mon, Sep 01, 2025 at 07:29:28PM +0100, Yeoreum Yun wrote: > Hi Dave, > > > On Thu, Aug 21, 2025 at 06:24:05PM +0100, Yeoreum Yun wrote: > > > The value of the SCTLR2_ELx register is UNKNOWN after reset. > > > If the firmware initializes these registers properly, no additional > > > initialization is required. > > > However, in cases where they are not initialized correctly, > > > initialize the SCTLR2_ELx registers during CPU/vCPU boot > > > to prevent unexpected system behavior caused by invalid values. > > > > > > Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com> > > > --- > > > > [...] > > > > > diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h > > > index 23be85d93348..c25c2aed5125 100644 > > > --- a/arch/arm64/include/asm/assembler.h > > > +++ b/arch/arm64/include/asm/assembler.h > > > @@ -738,6 +738,21 @@ alternative_endif [...] > > > +/* Set SCTLR2_ELx to the @reg value. */ > > > +.macro set_sctlr2_elx, el, reg, tmp > > > + mrs_s \tmp, SYS_ID_AA64MMFR3_EL1 > > > + ubfx \tmp, \tmp, #ID_AA64MMFR3_EL1_SCTLRX_SHIFT, #4 > > > + cbz \tmp, .Lskip_sctlr2_\@ > > > + .if \el == 2 > > > + msr_s SYS_SCTLR2_EL2, \reg > > > + .elseif \el == 12 > > > + msr_s SYS_SCTLR2_EL12, \reg > > > + .else > > > + msr_s SYS_SCTLR2_EL1, \reg > > > + .endif > > > +.Lskip_sctlr2_\@: > > > +.endm [...] > > If might be a good idea to migrate other macros that use an "el" > > argument to do something similar -- although that probably doesn't > > belong in this series. > > > > The assembler seems to have no ".elseifc" directive, so you'll need > > separate nested .ifc blocks: > > > > .ifc \el,2 > > msr_s SYS_SCTLR2_EL2, \reg > > .else > > .ifc \el,12 > > msr_s SYS_SCTLR2_EL12, \reg > > .else > > .ifc \el,1 > > msr_s SYS_SCTLR2_EL1, \reg > > .else > > .error "Bad EL \"\el\" in set_sctlr2_elx" > > .endif > > .endif > > .endif > > > > (Note, I've not tested this approach. If you can think of a better > > way, please feel free to suggest.) > > Thanks for your suggestion. Let me test and check about this idea could > be applied in other macro too :D > (But as you mention, I'll apply this for SCTLR2 in other patchset). Ack, let me know how it goes. It is probably not worth doing this if the changes become complicated, though. [...] > > > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S > > > index 36e2d26b54f5..ac12f1b4f8e2 100644 > > > --- a/arch/arm64/kernel/hyp-stub.S > > > +++ b/arch/arm64/kernel/hyp-stub.S > > > @@ -144,7 +144,17 @@ SYM_CODE_START_LOCAL(__finalise_el2) > > > > > > .Lskip_indirection: > > > .Lskip_tcr2: > > > + mrs_s x1, SYS_ID_AA64MMFR3_EL1 > > > + ubfx x1, x1, #ID_AA64MMFR3_EL1_SCTLRX_SHIFT, #4 > > > + cbz x1, .Lskip_sctlr2 > > > + mrs_s x1, SYS_SCTLR2_EL12 > > > + msr_s SYS_SCTLR2_EL1, x1 > > > > > > + // clean SCTLR2_EL1 > > > + mov_q x1, INIT_SCTLR2_EL1 > > > + msr_s SYS_SCTLR2_EL12, x1 > > > > I'm still not sure why we need to do this. The code doesn't seem to > > clean up by the EL1 value of any other register -- or have I missed > > something? > > > > We have already switched to EL2, via the HVC call that jumped to > > __finalise_el2. We won't run at EL1 again unless KVM starts a guest -- > > but in that case, it's KVM's responsibility to set up the EL1 registers > > before handing control to the guest. > > > > In any case, is SCTLR2_EL1 ever set to anything except INIT_SCTLR2_EL1 > > before we get here? > > Regardless of VHE and nVHE, between init_kernel_el() and finalise_el2() > the kernel modifies SCTLR_EL1/SCTLR2_EL1 (since el level in this period > is EL1). > and at the end of finalise_el2(), kernel switches to el2 and > if VHE, it writes the SCTLR_EL1/SCTLR2_EL1 to SCTLR_EL2/SCTLR2_EL2 and > initialize the SCTLR_EL1/SCTLR2_EL1. > > Although there is no code to modify SCTLR2_EL1 between this period, > as SCTLR1_ELx, I initialize the SCTLR2_EL1 as init value for the future > usage. I think that we don't need to ensure that all sysregs are cleanly initialised; only those that can affect execution in some way need to be initialised. Once we are running at EL2 with VHE, we don't switch back to EL1, so the _EL1 control registers don't affect execution any more. If we did have to clean up the _EL1 registers, then this code would need to clean up all the other regs too, but I can't see clean-up for anything other than SCTLR2_EL1 here. Is there some cleanup code elsewhere that I have missed? Cheers ---Dave
Hi Dave, [...] > > > > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S > > > > index 36e2d26b54f5..ac12f1b4f8e2 100644 > > > > --- a/arch/arm64/kernel/hyp-stub.S > > > > +++ b/arch/arm64/kernel/hyp-stub.S > > > > @@ -144,7 +144,17 @@ SYM_CODE_START_LOCAL(__finalise_el2) > > > > > > > > .Lskip_indirection: > > > > .Lskip_tcr2: > > > > + mrs_s x1, SYS_ID_AA64MMFR3_EL1 > > > > + ubfx x1, x1, #ID_AA64MMFR3_EL1_SCTLRX_SHIFT, #4 > > > > + cbz x1, .Lskip_sctlr2 > > > > + mrs_s x1, SYS_SCTLR2_EL12 > > > > + msr_s SYS_SCTLR2_EL1, x1 > > > > > > > > + // clean SCTLR2_EL1 > > > > + mov_q x1, INIT_SCTLR2_EL1 > > > > + msr_s SYS_SCTLR2_EL12, x1 > > > > > > I'm still not sure why we need to do this. The code doesn't seem to > > > clean up by the EL1 value of any other register -- or have I missed > > > something? > > > > > > We have already switched to EL2, via the HVC call that jumped to > > > __finalise_el2. We won't run at EL1 again unless KVM starts a guest -- > > > but in that case, it's KVM's responsibility to set up the EL1 registers > > > before handing control to the guest. > > > > > > In any case, is SCTLR2_EL1 ever set to anything except INIT_SCTLR2_EL1 > > > before we get here? > > > > Regardless of VHE and nVHE, between init_kernel_el() and finalise_el2() > > the kernel modifies SCTLR_EL1/SCTLR2_EL1 (since el level in this period > > is EL1). > > and at the end of finalise_el2(), kernel switches to el2 and > > if VHE, it writes the SCTLR_EL1/SCTLR2_EL1 to SCTLR_EL2/SCTLR2_EL2 and > > initialize the SCTLR_EL1/SCTLR2_EL1. > > > > Although there is no code to modify SCTLR2_EL1 between this period, > > as SCTLR1_ELx, I initialize the SCTLR2_EL1 as init value for the future > > usage. > > I think that we don't need to ensure that all sysregs are cleanly > initialised; only those that can affect execution in some way need to > be initialised. > > Once we are running at EL2 with VHE, we don't switch back to EL1, so > the _EL1 control registers don't affect execution any more. > > If we did have to clean up the _EL1 registers, then this code would > need to clean up all the other regs too, but I can't see clean-up for > anything other than SCTLR2_EL1 here. Is there some cleanup code > elsewhere that I have missed? > > Cheers > ---Dave When I look at init_el2(), it returns to EL1 via: mov x0, #INIT_PSTATE_EL1 msr spsr_el2, x0 ... eret In other words, from init_kernel_el() through finalise_el2(), all system-register accesses are made at EL1 (i.e., SYS_REG_EL1). During this period, it appears that only SCTLR_EL1 is modified, so the code only needs to care about the accessed register — SCTLR_EL1. That’s why SCTLR_EL1 is reinitialised at the end of finalise_el2(). Otherwise, the MMU bit might remain enabled, which could cause errors later when launching a VM under VHE. However, the idea behind this patch is to initialise SCTLR2_ELx the same way as SCTLR_ELx. I’m not sure whether SCTLR2_ELx is modified during this period. If it is (now or in the future), it should be cleared/reinitialised just like SCTLR_EL1. This patch is based on the assumption that there may be modifications to SCTLR2_ELx during this period. So it isn’t about other system registers; it’s about the register actually used during this period. Am I missing anything? Thanks! -- Sincerely, Yeoreum Yun
Hi, On Tue, Sep 02, 2025 at 12:05:50PM +0100, Yeoreum Yun wrote: > Hi Dave, > > [...] > > > > > > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S > > > > > index 36e2d26b54f5..ac12f1b4f8e2 100644 > > > > > --- a/arch/arm64/kernel/hyp-stub.S > > > > > +++ b/arch/arm64/kernel/hyp-stub.S > > > > > @@ -144,7 +144,17 @@ SYM_CODE_START_LOCAL(__finalise_el2) > > > > > > > > > > .Lskip_indirection: > > > > > .Lskip_tcr2: > > > > > + mrs_s x1, SYS_ID_AA64MMFR3_EL1 > > > > > + ubfx x1, x1, #ID_AA64MMFR3_EL1_SCTLRX_SHIFT, #4 > > > > > + cbz x1, .Lskip_sctlr2 > > > > > + mrs_s x1, SYS_SCTLR2_EL12 > > > > > + msr_s SYS_SCTLR2_EL1, x1 > > > > > > > > > > + // clean SCTLR2_EL1 > > > > > + mov_q x1, INIT_SCTLR2_EL1 > > > > > + msr_s SYS_SCTLR2_EL12, x1 > > > > > > > > I'm still not sure why we need to do this. The code doesn't seem to > > > > clean up by the EL1 value of any other register -- or have I missed > > > > something? > > > > > > > > We have already switched to EL2, via the HVC call that jumped to > > > > __finalise_el2. We won't run at EL1 again unless KVM starts a guest -- > > > > but in that case, it's KVM's responsibility to set up the EL1 registers > > > > before handing control to the guest. > > > > > > > > In any case, is SCTLR2_EL1 ever set to anything except INIT_SCTLR2_EL1 > > > > before we get here? [...] > When I look at init_el2(), it returns to EL1 via: > > mov x0, #INIT_PSTATE_EL1 > msr spsr_el2, x0 > ... > eret > > In other words, from init_kernel_el() through finalise_el2(), > all system-register accesses are made at EL1 (i.e., SYS_REG_EL1). > During this period, it appears that only SCTLR_EL1 is modified, > so the code only needs to care about the accessed register — SCTLR_EL1. > > That’s why SCTLR_EL1 is reinitialised at the end of finalise_el2(). > Otherwise, the MMU bit might remain enabled, which could cause errors later > when launching a VM under VHE. > > However, the idea behind this patch is to initialise SCTLR2_ELx > the same way as SCTLR_ELx. > I’m not sure whether SCTLR2_ELx is modified during this period. > If it is (now or in the future), > it should be cleared/reinitialised just like SCTLR_EL1. > > This patch is based on the assumption that there may be modifications to > SCTLR2_ELx during this period. So it isn’t about other system registers; > it’s about the register actually used during this period. > > Am I missing anything? > > Thanks! > > -- > Sincerely, > Yeoreum Yun I think I missed the SCTLR_EL1 reset in the idmap code after the enter_vhe label. Actually, I'm not sure whether there is any architectural reason for setting SCTLR_EL1 to INIT_SCTLR_EL1_MMU_OFF here. "for good measure" suggests that it felt like a good idea but there was no known reason for it. The commit message for the original patch doesn't offer an explanation -- maybe Marc can remember. This might be a defence against speculative translation table walks using the EL1&0 regime (but the architecture says [RNRJPP]: "If an implementation is executing at EL3 or EL2, the PE is not permitted to use the registers associated with the EL1&0 translation regime to speculatively access memory or translation tables.") So it shouldn't really matter, but in case buggy CPUs don't implement this rule properly it may be a good idea to turn the stage1 MMU off just in case. Since it's there, though, it probably does make sense to reinitialise SCTLR2_EL1 at the same time -- but can you move this so that it is next to the SCTLR_EL1 reinitialisation? Otherwise, the purpose of reinitialising SCTLR2_EL1 is unclear. It really should come under the same "for good measure" justification as the SCTLR_EL1 reset. However, I don't think this has anything to do with putting things into a clean state for VMs. KVM defines the reset state for all the _EL1 regs explicitly -- failing to do that would be a bug in KVM. (See arch/arm64/kvm/sys_regs.c : sys_reg_descs[], kvm_reset_sys_regs().) Cheers ---Dave
Hi Dave, [...] > > > > > > .Lskip_indirection: > > > > > > .Lskip_tcr2: > > > > > > + mrs_s x1, SYS_ID_AA64MMFR3_EL1 > > > > > > + ubfx x1, x1, #ID_AA64MMFR3_EL1_SCTLRX_SHIFT, #4 > > > > > > + cbz x1, .Lskip_sctlr2 > > > > > > + mrs_s x1, SYS_SCTLR2_EL12 > > > > > > + msr_s SYS_SCTLR2_EL1, x1 > > > > > > > > > > > > + // clean SCTLR2_EL1 > > > > > > + mov_q x1, INIT_SCTLR2_EL1 > > > > > > + msr_s SYS_SCTLR2_EL12, x1 > > > > > > > > > > I'm still not sure why we need to do this. The code doesn't seem to > > > > > clean up by the EL1 value of any other register -- or have I missed > > > > > something? > > > > > > > > > > We have already switched to EL2, via the HVC call that jumped to > > > > > __finalise_el2. We won't run at EL1 again unless KVM starts a guest -- > > > > > but in that case, it's KVM's responsibility to set up the EL1 registers > > > > > before handing control to the guest. > > > > > > > > > > In any case, is SCTLR2_EL1 ever set to anything except INIT_SCTLR2_EL1 > > > > > before we get here? > > [...] > > > When I look at init_el2(), it returns to EL1 via: > > > > mov x0, #INIT_PSTATE_EL1 > > msr spsr_el2, x0 > > ... > > eret > > > > In other words, from init_kernel_el() through finalise_el2(), > > all system-register accesses are made at EL1 (i.e., SYS_REG_EL1). > > During this period, it appears that only SCTLR_EL1 is modified, > > so the code only needs to care about the accessed register — SCTLR_EL1. > > > > That’s why SCTLR_EL1 is reinitialised at the end of finalise_el2(). > > Otherwise, the MMU bit might remain enabled, which could cause errors later > > when launching a VM under VHE. > > > > However, the idea behind this patch is to initialise SCTLR2_ELx > > the same way as SCTLR_ELx. > > I’m not sure whether SCTLR2_ELx is modified during this period. > > If it is (now or in the future), > > it should be cleared/reinitialised just like SCTLR_EL1. > > > > This patch is based on the assumption that there may be modifications to > > SCTLR2_ELx during this period. So it isn’t about other system registers; > > it’s about the register actually used during this period. > > > > Am I missing anything? > > > > Thanks! > > > > -- > > Sincerely, > > Yeoreum Yun > > I think I missed the SCTLR_EL1 reset in the idmap code after the > enter_vhe label. > > Actually, I'm not sure whether there is any architectural reason for > setting SCTLR_EL1 to INIT_SCTLR_EL1_MMU_OFF here. "for good measure" > suggests that it felt like a good idea but there was no known reason > for it. The commit message for the original patch doesn't offer an > explanation -- maybe Marc can remember. > > This might be a defence against speculative translation table walks > using the EL1&0 regime (but the architecture says [RNRJPP]: "If an > implementation is executing at EL3 or EL2, the PE is not permitted to > use the registers associated with the EL1&0 translation regime to > speculatively access memory or translation tables.") So it shouldn't > really matter, but in case buggy CPUs don't implement this rule > properly it may be a good idea to turn the stage1 MMU off just in case. > Thanks for great deep insight :D. > Since it's there, though, it probably does make sense to reinitialise > SCTLR2_EL1 at the same time -- but can you move this so that it is next > to the SCTLR_EL1 reinitialisation? Otherwise, the purpose of > reinitialising SCTLR2_EL1 is unclear. It really should come under the > same "for good measure" justification as the SCTLR_EL1 reset. Okay. > > However, I don't think this has anything to do with putting things into > a clean state for VMs. KVM defines the reset state for all the _EL1 > regs explicitly -- failing to do that would be a bug in KVM. > > (See arch/arm64/kvm/sys_regs.c : sys_reg_descs[], kvm_reset_sys_regs().) Right. I've missed the reset sysregs when kvm is launching. Thanks! -- Sincerely, Yeoreum Yun
© 2016 - 2026 Red Hat, Inc.