arch/arm64/kernel/head.S | 3 +++ arch/arm64/kernel/pi/map_range.c | 6 ++++++ arch/arm64/kernel/pi/pi.h | 1 + arch/arm64/mm/mmu.c | 1 + arch/arm64/mm/proc.S | 5 +++-- 5 files changed, 14 insertions(+), 2 deletions(-)
mov_q cannot really move PIE_E[0|1] macros into a general purpose register
as expected if those macro constants contain some 128 bit layout elements,
required for D128 page tables. Fix this problem via first loading up these
macro constants into a given memory location and then subsequently setting
up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
This patch applies on v6.15-rc1
arch/arm64/kernel/head.S | 3 +++
arch/arm64/kernel/pi/map_range.c | 6 ++++++
arch/arm64/kernel/pi/pi.h | 1 +
arch/arm64/mm/mmu.c | 1 +
arch/arm64/mm/proc.S | 5 +++--
5 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 2ce73525de2c..4950d9cc638a 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
* On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set.
*/
+ adr_l x0, pir_data
+ bl __pi_load_pir_data
+
bl __cpu_setup // initialise processor
b __primary_switch
SYM_CODE_END(primary_entry)
diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
index 81345f68f9fc..cd9d24e73046 100644
--- a/arch/arm64/kernel/pi/map_range.c
+++ b/arch/arm64/kernel/pi/map_range.c
@@ -103,3 +103,9 @@ asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
return ptep;
}
+
+asmlinkage void __init load_pir_data(u64 pir_data[])
+{
+ pir_data[0] = PIE_E0;
+ pir_data[1] = PIE_E1;
+}
diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
index c91e5e965cd3..ae61df4fdb77 100644
--- a/arch/arm64/kernel/pi/pi.h
+++ b/arch/arm64/kernel/pi/pi.h
@@ -34,3 +34,4 @@ void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask);
+asmlinkage void load_pir_data(u64 pir_data[]);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index ea6695d53fb9..762e81ff4e85 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -58,6 +58,7 @@ static bool rodata_is_rw __ro_after_init = true;
* with MMU turned off.
*/
long __section(".mmuoff.data.write") __early_cpu_boot_status;
+unsigned long __section(".mmuoff.data.write") pir_data[2];
/*
* Empty_zero_page is a special page that is used for zero-initialized data
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index fb30c8804f87..7dd28cf101c2 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -524,9 +524,10 @@ alternative_else_nop_endif
#define PTE_MAYBE_NG 0
#define PTE_MAYBE_SHARED 0
- mov_q x0, PIE_E0
+ adr_l x1, pir_data
+ ldr x0, [x1, #0]
msr REG_PIRE0_EL1, x0
- mov_q x0, PIE_E1
+ ldr x0, [x1, #8]
msr REG_PIR_EL1, x0
#undef PTE_MAYBE_NG
--
2.25.1
On 10/04/2025 08:40, Anshuman Khandual wrote:
> mov_q cannot really move PIE_E[0|1] macros into a general purpose register
> as expected if those macro constants contain some 128 bit layout elements,
> required for D128 page tables. Fix this problem via first loading up these
> macro constants into a given memory location and then subsequently setting
> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms
of 128-bit types with shifting and masking, which the assembler can't do? It
would be good to spell this out.
>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Ard Biesheuvel <ardb@kernel.org>
> Cc: Ryan Roberts <ryan.roberts@arm.com>
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> ---
> This patch applies on v6.15-rc1
>
> arch/arm64/kernel/head.S | 3 +++
> arch/arm64/kernel/pi/map_range.c | 6 ++++++
> arch/arm64/kernel/pi/pi.h | 1 +
> arch/arm64/mm/mmu.c | 1 +
> arch/arm64/mm/proc.S | 5 +++--
> 5 files changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 2ce73525de2c..4950d9cc638a 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
> * On return, the CPU will be ready for the MMU to be turned on and
> * the TCR will have been set.
> */
> + adr_l x0, pir_data
> + bl __pi_load_pir_data
Using C code to pre-calculate the values into global variables that the assembly
code then loads and stuffs into the PIR registers feels hacky. I wonder if we
can instead pre-calculate into asm-offsets.h? e.g. add the following to
asm-offsets.c:
DEFINE(PIE_E0_ASM, PIE_E0);
DEFINE(PIE_E1_ASM, PIE_E1);
Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the
pre-calculated values that you can then use in proc.S?
Or if that doesn't work for some reason, is it possible to store to the PIR
registers directly from the C code?
Thanks,
Ryan
> +
> bl __cpu_setup // initialise processor
> b __primary_switch
> SYM_CODE_END(primary_entry)
> diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
> index 81345f68f9fc..cd9d24e73046 100644
> --- a/arch/arm64/kernel/pi/map_range.c
> +++ b/arch/arm64/kernel/pi/map_range.c
> @@ -103,3 +103,9 @@ asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
>
> return ptep;
> }
> +
> +asmlinkage void __init load_pir_data(u64 pir_data[])
> +{
> + pir_data[0] = PIE_E0;
> + pir_data[1] = PIE_E1;
> +}
> diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
> index c91e5e965cd3..ae61df4fdb77 100644
> --- a/arch/arm64/kernel/pi/pi.h
> +++ b/arch/arm64/kernel/pi/pi.h
> @@ -34,3 +34,4 @@ void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
> asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
>
> asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask);
> +asmlinkage void load_pir_data(u64 pir_data[]);
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index ea6695d53fb9..762e81ff4e85 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -58,6 +58,7 @@ static bool rodata_is_rw __ro_after_init = true;
> * with MMU turned off.
> */
> long __section(".mmuoff.data.write") __early_cpu_boot_status;
> +unsigned long __section(".mmuoff.data.write") pir_data[2];
>
> /*
> * Empty_zero_page is a special page that is used for zero-initialized data
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index fb30c8804f87..7dd28cf101c2 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -524,9 +524,10 @@ alternative_else_nop_endif
> #define PTE_MAYBE_NG 0
> #define PTE_MAYBE_SHARED 0
>
> - mov_q x0, PIE_E0
> + adr_l x1, pir_data
> + ldr x0, [x1, #0]
> msr REG_PIRE0_EL1, x0
> - mov_q x0, PIE_E1
> + ldr x0, [x1, #8]
> msr REG_PIR_EL1, x0
>
> #undef PTE_MAYBE_NG
On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote:
>
> On 10/04/2025 08:40, Anshuman Khandual wrote:
> > mov_q cannot really move PIE_E[0|1] macros into a general purpose register
> > as expected if those macro constants contain some 128 bit layout elements,
> > required for D128 page tables. Fix this problem via first loading up these
> > macro constants into a given memory location and then subsequently setting
> > up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
>
> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms
> of 128-bit types with shifting and masking, which the assembler can't do? It
> would be good to spell this out.
>
> >
> > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > Cc: Will Deacon <will@kernel.org>
> > Cc: Mark Rutland <mark.rutland@arm.com>
> > Cc: Ard Biesheuvel <ardb@kernel.org>
> > Cc: Ryan Roberts <ryan.roberts@arm.com>
> > Cc: linux-arm-kernel@lists.infradead.org
> > Cc: linux-kernel@vger.kernel.org
> > Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> > ---
> > This patch applies on v6.15-rc1
> >
> > arch/arm64/kernel/head.S | 3 +++
> > arch/arm64/kernel/pi/map_range.c | 6 ++++++
> > arch/arm64/kernel/pi/pi.h | 1 +
> > arch/arm64/mm/mmu.c | 1 +
> > arch/arm64/mm/proc.S | 5 +++--
> > 5 files changed, 14 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> > index 2ce73525de2c..4950d9cc638a 100644
> > --- a/arch/arm64/kernel/head.S
> > +++ b/arch/arm64/kernel/head.S
> > @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
> > * On return, the CPU will be ready for the MMU to be turned on and
> > * the TCR will have been set.
> > */
> > + adr_l x0, pir_data
> > + bl __pi_load_pir_data
>
> Using C code to pre-calculate the values into global variables that the assembly
> code then loads and stuffs into the PIR registers feels hacky. I wonder if we
> can instead pre-calculate into asm-offsets.h? e.g. add the following to
> asm-offsets.c:
>
> DEFINE(PIE_E0_ASM, PIE_E0);
> DEFINE(PIE_E1_ASM, PIE_E1);
>
> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the
> pre-calculated values that you can then use in proc.S?
>
There is another issue, which is that mov_q tries to be smart, and
emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if
directive evaluates the argument, which does not work with symbolic
constants.
I wouldn't mind just dropping that, i.e.,
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -545,17 +545,9 @@ alternative_endif
* magnitude and sign of the operand)
*/
.macro mov_q, reg, val
- .if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
- movz \reg, :abs_g1_s:\val
- .else
- .if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
- movz \reg, :abs_g2_s:\val
- .else
movz \reg, :abs_g3:\val
movk \reg, :abs_g2_nc:\val
- .endif
movk \reg, :abs_g1_nc:\val
- .endif
movk \reg, :abs_g0_nc:\val
.endm
Then, we can apply Ryan's trick to move these constants into
asm-offsets.c, but you'll need to tweak the PTE_MAYBE macros as well:
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -182,5 +182,22 @@ int main(void)
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops,
direct_call));
#endif
+#undef PTE_MAYBE_NG
+#define PTE_MAYBE_NG 0
+
+#undef PTE_MAYBE_SHARED
+#define PTE_MAYBE_SHARED 0
+
+ DEFINE(PIE_E0_ASM, PIE_E0);
+ DEFINE(PIE_E1_ASM, PIE_E1);
return 0;
}
(We should also move the comment from proc.S to asm-offsets.c but I
omitted that here for brevity.)
Then you should be able to use PIE_En_ASM in mov_q instructions without issue.
Alternatively, if changing the implementation of mov_q creates any
problems, we can just add a variant of that macro that lacks the
conditional directives and always emits MOVZ/MOVK/MOVK/MOVK
On 14/04/2025 10:41, Ard Biesheuvel wrote: > On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote: >> >> On 10/04/2025 08:40, Anshuman Khandual wrote: >>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register >>> as expected if those macro constants contain some 128 bit layout elements, >>> required for D128 page tables. Fix this problem via first loading up these >>> macro constants into a given memory location and then subsequently setting >>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values. >> >> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms >> of 128-bit types with shifting and masking, which the assembler can't do? It >> would be good to spell this out. >> >>> >>> Cc: Catalin Marinas <catalin.marinas@arm.com> >>> Cc: Will Deacon <will@kernel.org> >>> Cc: Mark Rutland <mark.rutland@arm.com> >>> Cc: Ard Biesheuvel <ardb@kernel.org> >>> Cc: Ryan Roberts <ryan.roberts@arm.com> >>> Cc: linux-arm-kernel@lists.infradead.org >>> Cc: linux-kernel@vger.kernel.org >>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com> >>> --- >>> This patch applies on v6.15-rc1 >>> >>> arch/arm64/kernel/head.S | 3 +++ >>> arch/arm64/kernel/pi/map_range.c | 6 ++++++ >>> arch/arm64/kernel/pi/pi.h | 1 + >>> arch/arm64/mm/mmu.c | 1 + >>> arch/arm64/mm/proc.S | 5 +++-- >>> 5 files changed, 14 insertions(+), 2 deletions(-) >>> >>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S >>> index 2ce73525de2c..4950d9cc638a 100644 >>> --- a/arch/arm64/kernel/head.S >>> +++ b/arch/arm64/kernel/head.S >>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry) >>> * On return, the CPU will be ready for the MMU to be turned on and >>> * the TCR will have been set. >>> */ >>> + adr_l x0, pir_data >>> + bl __pi_load_pir_data >> >> Using C code to pre-calculate the values into global variables that the assembly >> code then loads and stuffs into the PIR registers feels hacky. I wonder if we >> can instead pre-calculate into asm-offsets.h? e.g. add the following to >> asm-offsets.c: >> >> DEFINE(PIE_E0_ASM, PIE_E0); >> DEFINE(PIE_E1_ASM, PIE_E1); >> >> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the >> pre-calculated values that you can then use in proc.S? >> > > There is another issue, which is that mov_q tries to be smart, and > emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if > directive evaluates the argument, which does not work with symbolic > constants. I'm not quite understanding the detail here; what do you mean by "symbolic constants"? asm-offsets.h will provide something like: #define PIE_E0_ASM 1234567890 The current code is using a hash-define and that's working fine: mov_q x0, PIE_E0 Won't the C preprocessor just substitute and everything will work out? Thanks, Ryan > > I wouldn't mind just dropping that, i.e., > > --- a/arch/arm64/include/asm/assembler.h > +++ b/arch/arm64/include/asm/assembler.h > @@ -545,17 +545,9 @@ alternative_endif > * magnitude and sign of the operand) > */ > .macro mov_q, reg, val > - .if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff) > - movz \reg, :abs_g1_s:\val > - .else > - .if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff) > - movz \reg, :abs_g2_s:\val > - .else > movz \reg, :abs_g3:\val > movk \reg, :abs_g2_nc:\val > - .endif > movk \reg, :abs_g1_nc:\val > - .endif > movk \reg, :abs_g0_nc:\val > .endm > > Then, we can apply Ryan's trick to move these constants into > asm-offsets.c, but you'll need to tweak the PTE_MAYBE macros as well: > > --- a/arch/arm64/kernel/asm-offsets.c > +++ b/arch/arm64/kernel/asm-offsets.c > @@ -182,5 +182,22 @@ int main(void) > #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS > DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, > direct_call)); > #endif > +#undef PTE_MAYBE_NG > +#define PTE_MAYBE_NG 0 > + > +#undef PTE_MAYBE_SHARED > +#define PTE_MAYBE_SHARED 0 > + > + DEFINE(PIE_E0_ASM, PIE_E0); > + DEFINE(PIE_E1_ASM, PIE_E1); > return 0; > } > > (We should also move the comment from proc.S to asm-offsets.c but I > omitted that here for brevity.) > > Then you should be able to use PIE_En_ASM in mov_q instructions without issue. > > Alternatively, if changing the implementation of mov_q creates any > problems, we can just add a variant of that macro that lacks the > conditional directives and always emits MOVZ/MOVK/MOVK/MOVK
On Mon, 14 Apr 2025 at 14:04, Ryan Roberts <ryan.roberts@arm.com> wrote: > > On 14/04/2025 10:41, Ard Biesheuvel wrote: > > On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote: > >> > >> On 10/04/2025 08:40, Anshuman Khandual wrote: > >>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register > >>> as expected if those macro constants contain some 128 bit layout elements, > >>> required for D128 page tables. Fix this problem via first loading up these > >>> macro constants into a given memory location and then subsequently setting > >>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values. > >> > >> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms > >> of 128-bit types with shifting and masking, which the assembler can't do? It > >> would be good to spell this out. > >> > >>> > >>> Cc: Catalin Marinas <catalin.marinas@arm.com> > >>> Cc: Will Deacon <will@kernel.org> > >>> Cc: Mark Rutland <mark.rutland@arm.com> > >>> Cc: Ard Biesheuvel <ardb@kernel.org> > >>> Cc: Ryan Roberts <ryan.roberts@arm.com> > >>> Cc: linux-arm-kernel@lists.infradead.org > >>> Cc: linux-kernel@vger.kernel.org > >>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com> > >>> --- > >>> This patch applies on v6.15-rc1 > >>> > >>> arch/arm64/kernel/head.S | 3 +++ > >>> arch/arm64/kernel/pi/map_range.c | 6 ++++++ > >>> arch/arm64/kernel/pi/pi.h | 1 + > >>> arch/arm64/mm/mmu.c | 1 + > >>> arch/arm64/mm/proc.S | 5 +++-- > >>> 5 files changed, 14 insertions(+), 2 deletions(-) > >>> > >>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S > >>> index 2ce73525de2c..4950d9cc638a 100644 > >>> --- a/arch/arm64/kernel/head.S > >>> +++ b/arch/arm64/kernel/head.S > >>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry) > >>> * On return, the CPU will be ready for the MMU to be turned on and > >>> * the TCR will have been set. > >>> */ > >>> + adr_l x0, pir_data > >>> + bl __pi_load_pir_data > >> > >> Using C code to pre-calculate the values into global variables that the assembly > >> code then loads and stuffs into the PIR registers feels hacky. I wonder if we > >> can instead pre-calculate into asm-offsets.h? e.g. add the following to > >> asm-offsets.c: > >> > >> DEFINE(PIE_E0_ASM, PIE_E0); > >> DEFINE(PIE_E1_ASM, PIE_E1); > >> > >> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the > >> pre-calculated values that you can then use in proc.S? > >> > > > > There is another issue, which is that mov_q tries to be smart, and > > emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if > > directive evaluates the argument, which does not work with symbolic > > constants. > > I'm not quite understanding the detail here; what do you mean by "symbolic > constants"? asm-offsets.h will provide something like: > > #define PIE_E0_ASM 1234567890 > > The current code is using a hash-define and that's working fine: > > mov_q x0, PIE_E0 > > > Won't the C preprocessor just substitute and everything will work out? > Yeah, you're right. I was experimenting with something like .set .Lpie_e0, PIE_E0_ASM mov_q xN, .Lpie_e0 where this problem does exist, but we can just use PIE_E0_ASM directly and things should work as expected.
On 14/04/2025 13:28, Ard Biesheuvel wrote: > On Mon, 14 Apr 2025 at 14:04, Ryan Roberts <ryan.roberts@arm.com> wrote: >> >> On 14/04/2025 10:41, Ard Biesheuvel wrote: >>> On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote: >>>> >>>> On 10/04/2025 08:40, Anshuman Khandual wrote: >>>>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register >>>>> as expected if those macro constants contain some 128 bit layout elements, >>>>> required for D128 page tables. Fix this problem via first loading up these >>>>> macro constants into a given memory location and then subsequently setting >>>>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values. >>>> >>>> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms >>>> of 128-bit types with shifting and masking, which the assembler can't do? It >>>> would be good to spell this out. >>>> >>>>> >>>>> Cc: Catalin Marinas <catalin.marinas@arm.com> >>>>> Cc: Will Deacon <will@kernel.org> >>>>> Cc: Mark Rutland <mark.rutland@arm.com> >>>>> Cc: Ard Biesheuvel <ardb@kernel.org> >>>>> Cc: Ryan Roberts <ryan.roberts@arm.com> >>>>> Cc: linux-arm-kernel@lists.infradead.org >>>>> Cc: linux-kernel@vger.kernel.org >>>>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com> >>>>> --- >>>>> This patch applies on v6.15-rc1 >>>>> >>>>> arch/arm64/kernel/head.S | 3 +++ >>>>> arch/arm64/kernel/pi/map_range.c | 6 ++++++ >>>>> arch/arm64/kernel/pi/pi.h | 1 + >>>>> arch/arm64/mm/mmu.c | 1 + >>>>> arch/arm64/mm/proc.S | 5 +++-- >>>>> 5 files changed, 14 insertions(+), 2 deletions(-) >>>>> >>>>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S >>>>> index 2ce73525de2c..4950d9cc638a 100644 >>>>> --- a/arch/arm64/kernel/head.S >>>>> +++ b/arch/arm64/kernel/head.S >>>>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry) >>>>> * On return, the CPU will be ready for the MMU to be turned on and >>>>> * the TCR will have been set. >>>>> */ >>>>> + adr_l x0, pir_data >>>>> + bl __pi_load_pir_data >>>> >>>> Using C code to pre-calculate the values into global variables that the assembly >>>> code then loads and stuffs into the PIR registers feels hacky. I wonder if we >>>> can instead pre-calculate into asm-offsets.h? e.g. add the following to >>>> asm-offsets.c: >>>> >>>> DEFINE(PIE_E0_ASM, PIE_E0); >>>> DEFINE(PIE_E1_ASM, PIE_E1); >>>> >>>> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the >>>> pre-calculated values that you can then use in proc.S? >>>> >>> >>> There is another issue, which is that mov_q tries to be smart, and >>> emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if >>> directive evaluates the argument, which does not work with symbolic >>> constants. >> >> I'm not quite understanding the detail here; what do you mean by "symbolic >> constants"? asm-offsets.h will provide something like: >> >> #define PIE_E0_ASM 1234567890 >> >> The current code is using a hash-define and that's working fine: >> >> mov_q x0, PIE_E0 >> >> >> Won't the C preprocessor just substitute and everything will work out? >> > > Yeah, you're right. I was experimenting with something like > > .set .Lpie_e0, PIE_E0_ASM > mov_q xN, .Lpie_e0 > > where this problem does exist, but we can just use PIE_E0_ASM directly > and things should work as expected. Ahh great, sounds like this should be pretty simple then!
On 4/14/25 18:01, Ryan Roberts wrote:
> On 14/04/2025 13:28, Ard Biesheuvel wrote:
>> On Mon, 14 Apr 2025 at 14:04, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>>
>>> On 14/04/2025 10:41, Ard Biesheuvel wrote:
>>>> On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>>>>
>>>>> On 10/04/2025 08:40, Anshuman Khandual wrote:
>>>>>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register
>>>>>> as expected if those macro constants contain some 128 bit layout elements,
>>>>>> required for D128 page tables. Fix this problem via first loading up these
>>>>>> macro constants into a given memory location and then subsequently setting
>>>>>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
>>>>>
>>>>> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms
>>>>> of 128-bit types with shifting and masking, which the assembler can't do? It
>>>>> would be good to spell this out.
>>>>>
>>>>>>
>>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>>>> Cc: Will Deacon <will@kernel.org>
>>>>>> Cc: Mark Rutland <mark.rutland@arm.com>
>>>>>> Cc: Ard Biesheuvel <ardb@kernel.org>
>>>>>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>>>>>> Cc: linux-arm-kernel@lists.infradead.org
>>>>>> Cc: linux-kernel@vger.kernel.org
>>>>>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>>>>>> ---
>>>>>> This patch applies on v6.15-rc1
>>>>>>
>>>>>> arch/arm64/kernel/head.S | 3 +++
>>>>>> arch/arm64/kernel/pi/map_range.c | 6 ++++++
>>>>>> arch/arm64/kernel/pi/pi.h | 1 +
>>>>>> arch/arm64/mm/mmu.c | 1 +
>>>>>> arch/arm64/mm/proc.S | 5 +++--
>>>>>> 5 files changed, 14 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>>>>>> index 2ce73525de2c..4950d9cc638a 100644
>>>>>> --- a/arch/arm64/kernel/head.S
>>>>>> +++ b/arch/arm64/kernel/head.S
>>>>>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
>>>>>> * On return, the CPU will be ready for the MMU to be turned on and
>>>>>> * the TCR will have been set.
>>>>>> */
>>>>>> + adr_l x0, pir_data
>>>>>> + bl __pi_load_pir_data
>>>>>
>>>>> Using C code to pre-calculate the values into global variables that the assembly
>>>>> code then loads and stuffs into the PIR registers feels hacky. I wonder if we
>>>>> can instead pre-calculate into asm-offsets.h? e.g. add the following to
>>>>> asm-offsets.c:
>>>>>
>>>>> DEFINE(PIE_E0_ASM, PIE_E0);
>>>>> DEFINE(PIE_E1_ASM, PIE_E1);
>>>>>
>>>>> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the
>>>>> pre-calculated values that you can then use in proc.S?
>>>>>
>>>>
>>>> There is another issue, which is that mov_q tries to be smart, and
>>>> emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if
>>>> directive evaluates the argument, which does not work with symbolic
>>>> constants.
>>>
>>> I'm not quite understanding the detail here; what do you mean by "symbolic
>>> constants"? asm-offsets.h will provide something like:
>>>
>>> #define PIE_E0_ASM 1234567890
>>>
>>> The current code is using a hash-define and that's working fine:
>>>
>>> mov_q x0, PIE_E0
>>>
>>>
>>> Won't the C preprocessor just substitute and everything will work out?
>>>
>>
>> Yeah, you're right. I was experimenting with something like
>>
>> .set .Lpie_e0, PIE_E0_ASM
>> mov_q xN, .Lpie_e0
>>
>> where this problem does exist, but we can just use PIE_E0_ASM directly
>> and things should work as expected.
>
> Ahh great, sounds like this should be pretty simple then!
Following change works both on current and with D128 page tables.
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -182,5 +182,7 @@ int main(void)
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call));
#endif
+ DEFINE(PIE_E0_ASM, PIE_E0);
+ DEFINE(PIE_E1_ASM, PIE_E1);
return 0;
}
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 737c99d79833..f45494425d09 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -536,9 +536,9 @@ alternative_else_nop_endif
#define PTE_MAYBE_NG 0
#define PTE_MAYBE_SHARED 0
- mov_q x0, PIE_E0
+ mov_q x0, PIE_E0_ASM
msr REG_PIRE0_EL1, x0
- mov_q x0, PIE_E1
+ mov_q x0, PIE_E1_ASM
msr REG_PIR_EL1, x0
#undef PTE_MAYBE_NG
On 15/04/2025 07:27, Anshuman Khandual wrote: > > > On 4/14/25 18:01, Ryan Roberts wrote: >> On 14/04/2025 13:28, Ard Biesheuvel wrote: >>> On Mon, 14 Apr 2025 at 14:04, Ryan Roberts <ryan.roberts@arm.com> wrote: >>>> >>>> On 14/04/2025 10:41, Ard Biesheuvel wrote: >>>>> On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote: >>>>>> >>>>>> On 10/04/2025 08:40, Anshuman Khandual wrote: >>>>>>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register >>>>>>> as expected if those macro constants contain some 128 bit layout elements, >>>>>>> required for D128 page tables. Fix this problem via first loading up these >>>>>>> macro constants into a given memory location and then subsequently setting >>>>>>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values. >>>>>> >>>>>> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms >>>>>> of 128-bit types with shifting and masking, which the assembler can't do? It >>>>>> would be good to spell this out. >>>>>> >>>>>>> >>>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com> >>>>>>> Cc: Will Deacon <will@kernel.org> >>>>>>> Cc: Mark Rutland <mark.rutland@arm.com> >>>>>>> Cc: Ard Biesheuvel <ardb@kernel.org> >>>>>>> Cc: Ryan Roberts <ryan.roberts@arm.com> >>>>>>> Cc: linux-arm-kernel@lists.infradead.org >>>>>>> Cc: linux-kernel@vger.kernel.org >>>>>>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com> >>>>>>> --- >>>>>>> This patch applies on v6.15-rc1 >>>>>>> >>>>>>> arch/arm64/kernel/head.S | 3 +++ >>>>>>> arch/arm64/kernel/pi/map_range.c | 6 ++++++ >>>>>>> arch/arm64/kernel/pi/pi.h | 1 + >>>>>>> arch/arm64/mm/mmu.c | 1 + >>>>>>> arch/arm64/mm/proc.S | 5 +++-- >>>>>>> 5 files changed, 14 insertions(+), 2 deletions(-) >>>>>>> >>>>>>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S >>>>>>> index 2ce73525de2c..4950d9cc638a 100644 >>>>>>> --- a/arch/arm64/kernel/head.S >>>>>>> +++ b/arch/arm64/kernel/head.S >>>>>>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry) >>>>>>> * On return, the CPU will be ready for the MMU to be turned on and >>>>>>> * the TCR will have been set. >>>>>>> */ >>>>>>> + adr_l x0, pir_data >>>>>>> + bl __pi_load_pir_data >>>>>> >>>>>> Using C code to pre-calculate the values into global variables that the assembly >>>>>> code then loads and stuffs into the PIR registers feels hacky. I wonder if we >>>>>> can instead pre-calculate into asm-offsets.h? e.g. add the following to >>>>>> asm-offsets.c: >>>>>> >>>>>> DEFINE(PIE_E0_ASM, PIE_E0); >>>>>> DEFINE(PIE_E1_ASM, PIE_E1); >>>>>> >>>>>> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the >>>>>> pre-calculated values that you can then use in proc.S? >>>>>> >>>>> >>>>> There is another issue, which is that mov_q tries to be smart, and >>>>> emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if >>>>> directive evaluates the argument, which does not work with symbolic >>>>> constants. >>>> >>>> I'm not quite understanding the detail here; what do you mean by "symbolic >>>> constants"? asm-offsets.h will provide something like: >>>> >>>> #define PIE_E0_ASM 1234567890 >>>> >>>> The current code is using a hash-define and that's working fine: >>>> >>>> mov_q x0, PIE_E0 >>>> >>>> >>>> Won't the C preprocessor just substitute and everything will work out? >>>> >>> >>> Yeah, you're right. I was experimenting with something like >>> >>> .set .Lpie_e0, PIE_E0_ASM >>> mov_q xN, .Lpie_e0 >>> >>> where this problem does exist, but we can just use PIE_E0_ASM directly >>> and things should work as expected. >> >> Ahh great, sounds like this should be pretty simple then! > > Following change works both on current and with D128 page tables. > > --- a/arch/arm64/kernel/asm-offsets.c > +++ b/arch/arm64/kernel/asm-offsets.c > @@ -182,5 +182,7 @@ int main(void) > #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS > DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call)); > #endif > + DEFINE(PIE_E0_ASM, PIE_E0); > + DEFINE(PIE_E1_ASM, PIE_E1); > return 0; > } > diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S > index 737c99d79833..f45494425d09 100644 > --- a/arch/arm64/mm/proc.S > +++ b/arch/arm64/mm/proc.S > @@ -536,9 +536,9 @@ alternative_else_nop_endif > #define PTE_MAYBE_NG 0 > #define PTE_MAYBE_SHARED 0 I think at minimum, you can remove this PTE_MAYBE_* hack from proc.S. But as Ard says, you may need to add it to asm-offsets.c? I'm surprised asm-offsets.c even compiles without this hack since surely it doesn't have arm64_use_ng_mappings or is_realm_world() available? Thanks, Ryan > > - mov_q x0, PIE_E0 > + mov_q x0, PIE_E0_ASM > msr REG_PIRE0_EL1, x0 > - mov_q x0, PIE_E1 > + mov_q x0, PIE_E1_ASM > msr REG_PIR_EL1, x0 > > #undef PTE_MAYBE_NG >
On 4/15/25 13:23, Ryan Roberts wrote: > On 15/04/2025 07:27, Anshuman Khandual wrote: >> >> >> On 4/14/25 18:01, Ryan Roberts wrote: >>> On 14/04/2025 13:28, Ard Biesheuvel wrote: >>>> On Mon, 14 Apr 2025 at 14:04, Ryan Roberts <ryan.roberts@arm.com> wrote: >>>>> >>>>> On 14/04/2025 10:41, Ard Biesheuvel wrote: >>>>>> On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote: >>>>>>> >>>>>>> On 10/04/2025 08:40, Anshuman Khandual wrote: >>>>>>>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register >>>>>>>> as expected if those macro constants contain some 128 bit layout elements, >>>>>>>> required for D128 page tables. Fix this problem via first loading up these >>>>>>>> macro constants into a given memory location and then subsequently setting >>>>>>>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values. >>>>>>> >>>>>>> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms >>>>>>> of 128-bit types with shifting and masking, which the assembler can't do? It >>>>>>> would be good to spell this out. >>>>>>> >>>>>>>> >>>>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com> >>>>>>>> Cc: Will Deacon <will@kernel.org> >>>>>>>> Cc: Mark Rutland <mark.rutland@arm.com> >>>>>>>> Cc: Ard Biesheuvel <ardb@kernel.org> >>>>>>>> Cc: Ryan Roberts <ryan.roberts@arm.com> >>>>>>>> Cc: linux-arm-kernel@lists.infradead.org >>>>>>>> Cc: linux-kernel@vger.kernel.org >>>>>>>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com> >>>>>>>> --- >>>>>>>> This patch applies on v6.15-rc1 >>>>>>>> >>>>>>>> arch/arm64/kernel/head.S | 3 +++ >>>>>>>> arch/arm64/kernel/pi/map_range.c | 6 ++++++ >>>>>>>> arch/arm64/kernel/pi/pi.h | 1 + >>>>>>>> arch/arm64/mm/mmu.c | 1 + >>>>>>>> arch/arm64/mm/proc.S | 5 +++-- >>>>>>>> 5 files changed, 14 insertions(+), 2 deletions(-) >>>>>>>> >>>>>>>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S >>>>>>>> index 2ce73525de2c..4950d9cc638a 100644 >>>>>>>> --- a/arch/arm64/kernel/head.S >>>>>>>> +++ b/arch/arm64/kernel/head.S >>>>>>>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry) >>>>>>>> * On return, the CPU will be ready for the MMU to be turned on and >>>>>>>> * the TCR will have been set. >>>>>>>> */ >>>>>>>> + adr_l x0, pir_data >>>>>>>> + bl __pi_load_pir_data >>>>>>> >>>>>>> Using C code to pre-calculate the values into global variables that the assembly >>>>>>> code then loads and stuffs into the PIR registers feels hacky. I wonder if we >>>>>>> can instead pre-calculate into asm-offsets.h? e.g. add the following to >>>>>>> asm-offsets.c: >>>>>>> >>>>>>> DEFINE(PIE_E0_ASM, PIE_E0); >>>>>>> DEFINE(PIE_E1_ASM, PIE_E1); >>>>>>> >>>>>>> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the >>>>>>> pre-calculated values that you can then use in proc.S? >>>>>>> >>>>>> >>>>>> There is another issue, which is that mov_q tries to be smart, and >>>>>> emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if >>>>>> directive evaluates the argument, which does not work with symbolic >>>>>> constants. >>>>> >>>>> I'm not quite understanding the detail here; what do you mean by "symbolic >>>>> constants"? asm-offsets.h will provide something like: >>>>> >>>>> #define PIE_E0_ASM 1234567890 >>>>> >>>>> The current code is using a hash-define and that's working fine: >>>>> >>>>> mov_q x0, PIE_E0 >>>>> >>>>> >>>>> Won't the C preprocessor just substitute and everything will work out? >>>>> >>>> >>>> Yeah, you're right. I was experimenting with something like >>>> >>>> .set .Lpie_e0, PIE_E0_ASM >>>> mov_q xN, .Lpie_e0 >>>> >>>> where this problem does exist, but we can just use PIE_E0_ASM directly >>>> and things should work as expected. >>> >>> Ahh great, sounds like this should be pretty simple then! >> >> Following change works both on current and with D128 page tables. >> >> --- a/arch/arm64/kernel/asm-offsets.c >> +++ b/arch/arm64/kernel/asm-offsets.c >> @@ -182,5 +182,7 @@ int main(void) >> #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS >> DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call)); >> #endif >> + DEFINE(PIE_E0_ASM, PIE_E0); >> + DEFINE(PIE_E1_ASM, PIE_E1); >> return 0; >> } >> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S >> index 737c99d79833..f45494425d09 100644 >> --- a/arch/arm64/mm/proc.S >> +++ b/arch/arm64/mm/proc.S >> @@ -536,9 +536,9 @@ alternative_else_nop_endif >> #define PTE_MAYBE_NG 0 >> #define PTE_MAYBE_SHARED 0 > > I think at minimum, you can remove this PTE_MAYBE_* hack from proc.S. But as Ard > says, you may need to add it to asm-offsets.c? I'm surprised asm-offsets.c even Moving PTE_MAYBE_* inside asm-offsets.c works as well in both cases but still wondering why this is even required ? What am I missing ? > compiles without this hack since surely it doesn't have arm64_use_ng_mappings or > is_realm_world() available? Did not face any problem with defconfig for the mainline kernel and both these symbols were visible in the built code. > > Thanks, > Ryan > >> >> - mov_q x0, PIE_E0 >> + mov_q x0, PIE_E0_ASM >> msr REG_PIRE0_EL1, x0 >> - mov_q x0, PIE_E1 >> + mov_q x0, PIE_E1_ASM >> msr REG_PIR_EL1, x0 >> >> #undef PTE_MAYBE_NG >> >
On 15/04/2025 10:46, Anshuman Khandual wrote: > > > On 4/15/25 13:23, Ryan Roberts wrote: >> On 15/04/2025 07:27, Anshuman Khandual wrote: >>> >>> >>> On 4/14/25 18:01, Ryan Roberts wrote: >>>> On 14/04/2025 13:28, Ard Biesheuvel wrote: >>>>> On Mon, 14 Apr 2025 at 14:04, Ryan Roberts <ryan.roberts@arm.com> wrote: >>>>>> >>>>>> On 14/04/2025 10:41, Ard Biesheuvel wrote: >>>>>>> On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote: >>>>>>>> >>>>>>>> On 10/04/2025 08:40, Anshuman Khandual wrote: >>>>>>>>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register >>>>>>>>> as expected if those macro constants contain some 128 bit layout elements, >>>>>>>>> required for D128 page tables. Fix this problem via first loading up these >>>>>>>>> macro constants into a given memory location and then subsequently setting >>>>>>>>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values. >>>>>>>> >>>>>>>> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms >>>>>>>> of 128-bit types with shifting and masking, which the assembler can't do? It >>>>>>>> would be good to spell this out. >>>>>>>> >>>>>>>>> >>>>>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com> >>>>>>>>> Cc: Will Deacon <will@kernel.org> >>>>>>>>> Cc: Mark Rutland <mark.rutland@arm.com> >>>>>>>>> Cc: Ard Biesheuvel <ardb@kernel.org> >>>>>>>>> Cc: Ryan Roberts <ryan.roberts@arm.com> >>>>>>>>> Cc: linux-arm-kernel@lists.infradead.org >>>>>>>>> Cc: linux-kernel@vger.kernel.org >>>>>>>>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com> >>>>>>>>> --- >>>>>>>>> This patch applies on v6.15-rc1 >>>>>>>>> >>>>>>>>> arch/arm64/kernel/head.S | 3 +++ >>>>>>>>> arch/arm64/kernel/pi/map_range.c | 6 ++++++ >>>>>>>>> arch/arm64/kernel/pi/pi.h | 1 + >>>>>>>>> arch/arm64/mm/mmu.c | 1 + >>>>>>>>> arch/arm64/mm/proc.S | 5 +++-- >>>>>>>>> 5 files changed, 14 insertions(+), 2 deletions(-) >>>>>>>>> >>>>>>>>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S >>>>>>>>> index 2ce73525de2c..4950d9cc638a 100644 >>>>>>>>> --- a/arch/arm64/kernel/head.S >>>>>>>>> +++ b/arch/arm64/kernel/head.S >>>>>>>>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry) >>>>>>>>> * On return, the CPU will be ready for the MMU to be turned on and >>>>>>>>> * the TCR will have been set. >>>>>>>>> */ >>>>>>>>> + adr_l x0, pir_data >>>>>>>>> + bl __pi_load_pir_data >>>>>>>> >>>>>>>> Using C code to pre-calculate the values into global variables that the assembly >>>>>>>> code then loads and stuffs into the PIR registers feels hacky. I wonder if we >>>>>>>> can instead pre-calculate into asm-offsets.h? e.g. add the following to >>>>>>>> asm-offsets.c: >>>>>>>> >>>>>>>> DEFINE(PIE_E0_ASM, PIE_E0); >>>>>>>> DEFINE(PIE_E1_ASM, PIE_E1); >>>>>>>> >>>>>>>> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the >>>>>>>> pre-calculated values that you can then use in proc.S? >>>>>>>> >>>>>>> >>>>>>> There is another issue, which is that mov_q tries to be smart, and >>>>>>> emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if >>>>>>> directive evaluates the argument, which does not work with symbolic >>>>>>> constants. >>>>>> >>>>>> I'm not quite understanding the detail here; what do you mean by "symbolic >>>>>> constants"? asm-offsets.h will provide something like: >>>>>> >>>>>> #define PIE_E0_ASM 1234567890 >>>>>> >>>>>> The current code is using a hash-define and that's working fine: >>>>>> >>>>>> mov_q x0, PIE_E0 >>>>>> >>>>>> >>>>>> Won't the C preprocessor just substitute and everything will work out? >>>>>> >>>>> >>>>> Yeah, you're right. I was experimenting with something like >>>>> >>>>> .set .Lpie_e0, PIE_E0_ASM >>>>> mov_q xN, .Lpie_e0 >>>>> >>>>> where this problem does exist, but we can just use PIE_E0_ASM directly >>>>> and things should work as expected. >>>> >>>> Ahh great, sounds like this should be pretty simple then! >>> >>> Following change works both on current and with D128 page tables. >>> >>> --- a/arch/arm64/kernel/asm-offsets.c >>> +++ b/arch/arm64/kernel/asm-offsets.c >>> @@ -182,5 +182,7 @@ int main(void) >>> #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS >>> DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call)); >>> #endif >>> + DEFINE(PIE_E0_ASM, PIE_E0); >>> + DEFINE(PIE_E1_ASM, PIE_E1); >>> return 0; >>> } >>> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S >>> index 737c99d79833..f45494425d09 100644 >>> --- a/arch/arm64/mm/proc.S >>> +++ b/arch/arm64/mm/proc.S >>> @@ -536,9 +536,9 @@ alternative_else_nop_endif >>> #define PTE_MAYBE_NG 0 >>> #define PTE_MAYBE_SHARED 0 >> >> I think at minimum, you can remove this PTE_MAYBE_* hack from proc.S. But as Ard >> says, you may need to add it to asm-offsets.c? I'm surprised asm-offsets.c even > > Moving PTE_MAYBE_* inside asm-offsets.c works as well in both cases > but still wondering why this is even required ? What am I missing ? Without the overrides: #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) #define PTE_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PTE_SHARED) And these are used in the definition of PROT_DEFAULT, which is used in the definition of _PAGE_KERNEL*, which are used in the definition of PIE_E1. For the assembly code arm64_use_ng_mappings and lpa2_is_enabled() are not accessible. But we don't actually care about those bits so its just hacked to override the fields to 0. I would have expected the asm-offsets.c case to have a similer problem because we have not defined arm64_use_ng_mappings and lpa2_is_enabled(). asm-offsets.c is compiled as a standalone object then the macros are exfiltrated and asm-offsets.h is created. Perhaps the compiler is smart enough to see that we don't care about the bits in those fields (pte_pi_index() is only keeping selected bits) and asm-offsets.c can be safely compiled without the hack? > >> compiles without this hack since surely it doesn't have arm64_use_ng_mappings or >> is_realm_world() available? > Did not face any problem with defconfig for the mainline kernel and > both these symbols were visible in the built code. > >> >> Thanks, >> Ryan >> >>> >>> - mov_q x0, PIE_E0 >>> + mov_q x0, PIE_E0_ASM >>> msr REG_PIRE0_EL1, x0 >>> - mov_q x0, PIE_E1 >>> + mov_q x0, PIE_E1_ASM >>> msr REG_PIR_EL1, x0 >>> >>> #undef PTE_MAYBE_NG >>> >>
On 4/15/25 23:13, Ryan Roberts wrote: > On 15/04/2025 10:46, Anshuman Khandual wrote: >> >> >> On 4/15/25 13:23, Ryan Roberts wrote: >>> On 15/04/2025 07:27, Anshuman Khandual wrote: >>>> >>>> >>>> On 4/14/25 18:01, Ryan Roberts wrote: >>>>> On 14/04/2025 13:28, Ard Biesheuvel wrote: >>>>>> On Mon, 14 Apr 2025 at 14:04, Ryan Roberts <ryan.roberts@arm.com> wrote: >>>>>>> >>>>>>> On 14/04/2025 10:41, Ard Biesheuvel wrote: >>>>>>>> On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote: >>>>>>>>> >>>>>>>>> On 10/04/2025 08:40, Anshuman Khandual wrote: >>>>>>>>>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register >>>>>>>>>> as expected if those macro constants contain some 128 bit layout elements, >>>>>>>>>> required for D128 page tables. Fix this problem via first loading up these >>>>>>>>>> macro constants into a given memory location and then subsequently setting >>>>>>>>>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values. >>>>>>>>> >>>>>>>>> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms >>>>>>>>> of 128-bit types with shifting and masking, which the assembler can't do? It >>>>>>>>> would be good to spell this out. >>>>>>>>> >>>>>>>>>> >>>>>>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com> >>>>>>>>>> Cc: Will Deacon <will@kernel.org> >>>>>>>>>> Cc: Mark Rutland <mark.rutland@arm.com> >>>>>>>>>> Cc: Ard Biesheuvel <ardb@kernel.org> >>>>>>>>>> Cc: Ryan Roberts <ryan.roberts@arm.com> >>>>>>>>>> Cc: linux-arm-kernel@lists.infradead.org >>>>>>>>>> Cc: linux-kernel@vger.kernel.org >>>>>>>>>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com> >>>>>>>>>> --- >>>>>>>>>> This patch applies on v6.15-rc1 >>>>>>>>>> >>>>>>>>>> arch/arm64/kernel/head.S | 3 +++ >>>>>>>>>> arch/arm64/kernel/pi/map_range.c | 6 ++++++ >>>>>>>>>> arch/arm64/kernel/pi/pi.h | 1 + >>>>>>>>>> arch/arm64/mm/mmu.c | 1 + >>>>>>>>>> arch/arm64/mm/proc.S | 5 +++-- >>>>>>>>>> 5 files changed, 14 insertions(+), 2 deletions(-) >>>>>>>>>> >>>>>>>>>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S >>>>>>>>>> index 2ce73525de2c..4950d9cc638a 100644 >>>>>>>>>> --- a/arch/arm64/kernel/head.S >>>>>>>>>> +++ b/arch/arm64/kernel/head.S >>>>>>>>>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry) >>>>>>>>>> * On return, the CPU will be ready for the MMU to be turned on and >>>>>>>>>> * the TCR will have been set. >>>>>>>>>> */ >>>>>>>>>> + adr_l x0, pir_data >>>>>>>>>> + bl __pi_load_pir_data >>>>>>>>> >>>>>>>>> Using C code to pre-calculate the values into global variables that the assembly >>>>>>>>> code then loads and stuffs into the PIR registers feels hacky. I wonder if we >>>>>>>>> can instead pre-calculate into asm-offsets.h? e.g. add the following to >>>>>>>>> asm-offsets.c: >>>>>>>>> >>>>>>>>> DEFINE(PIE_E0_ASM, PIE_E0); >>>>>>>>> DEFINE(PIE_E1_ASM, PIE_E1); >>>>>>>>> >>>>>>>>> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the >>>>>>>>> pre-calculated values that you can then use in proc.S? >>>>>>>>> >>>>>>>> >>>>>>>> There is another issue, which is that mov_q tries to be smart, and >>>>>>>> emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if >>>>>>>> directive evaluates the argument, which does not work with symbolic >>>>>>>> constants. >>>>>>> >>>>>>> I'm not quite understanding the detail here; what do you mean by "symbolic >>>>>>> constants"? asm-offsets.h will provide something like: >>>>>>> >>>>>>> #define PIE_E0_ASM 1234567890 >>>>>>> >>>>>>> The current code is using a hash-define and that's working fine: >>>>>>> >>>>>>> mov_q x0, PIE_E0 >>>>>>> >>>>>>> >>>>>>> Won't the C preprocessor just substitute and everything will work out? >>>>>>> >>>>>> >>>>>> Yeah, you're right. I was experimenting with something like >>>>>> >>>>>> .set .Lpie_e0, PIE_E0_ASM >>>>>> mov_q xN, .Lpie_e0 >>>>>> >>>>>> where this problem does exist, but we can just use PIE_E0_ASM directly >>>>>> and things should work as expected. >>>>> >>>>> Ahh great, sounds like this should be pretty simple then! >>>> >>>> Following change works both on current and with D128 page tables. >>>> >>>> --- a/arch/arm64/kernel/asm-offsets.c >>>> +++ b/arch/arm64/kernel/asm-offsets.c >>>> @@ -182,5 +182,7 @@ int main(void) >>>> #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS >>>> DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call)); >>>> #endif >>>> + DEFINE(PIE_E0_ASM, PIE_E0); >>>> + DEFINE(PIE_E1_ASM, PIE_E1); >>>> return 0; >>>> } >>>> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S >>>> index 737c99d79833..f45494425d09 100644 >>>> --- a/arch/arm64/mm/proc.S >>>> +++ b/arch/arm64/mm/proc.S >>>> @@ -536,9 +536,9 @@ alternative_else_nop_endif >>>> #define PTE_MAYBE_NG 0 >>>> #define PTE_MAYBE_SHARED 0 >>> >>> I think at minimum, you can remove this PTE_MAYBE_* hack from proc.S. But as Ard >>> says, you may need to add it to asm-offsets.c? I'm surprised asm-offsets.c even >> >> Moving PTE_MAYBE_* inside asm-offsets.c works as well in both cases >> but still wondering why this is even required ? What am I missing ? > > Without the overrides: > > #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) > #define PTE_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PTE_SHARED) > > And these are used in the definition of PROT_DEFAULT, which is used in the > definition of _PAGE_KERNEL*, which are used in the definition of PIE_E1. > > For the assembly code arm64_use_ng_mappings and lpa2_is_enabled() are not > accessible. But we don't actually care about those bits so its just hacked to > override the fields to 0. Alright. > > I would have expected the asm-offsets.c case to have a similer problem because > we have not defined arm64_use_ng_mappings and lpa2_is_enabled(). asm-offsets.c > is compiled as a standalone object then the macros are exfiltrated and > asm-offsets.h is created. Understood. > > Perhaps the compiler is smart enough to see that we don't care about the bits in > those fields (pte_pi_index() is only keeping selected bits) and asm-offsets.c > can be safely compiled without the hack? I guess the best thing here would be to move PTE_MAYBE_* inside asm-offsets.c to be on the safer side, rather than depending on what compiler might do otherwise. I will respin the patch with both the changes included. > >> >>> compiles without this hack since surely it doesn't have arm64_use_ng_mappings or >>> is_realm_world() available? >> Did not face any problem with defconfig for the mainline kernel and >> both these symbols were visible in the built code. >> >>> >>> Thanks, >>> Ryan >>> >>>> >>>> - mov_q x0, PIE_E0 >>>> + mov_q x0, PIE_E0_ASM >>>> msr REG_PIRE0_EL1, x0 >>>> - mov_q x0, PIE_E1 >>>> + mov_q x0, PIE_E1_ASM >>>> msr REG_PIR_EL1, x0 >>>> >>>> #undef PTE_MAYBE_NG >>>> >>> >
Hi Anshuman,
On Thu, 10 Apr 2025 at 09:40, Anshuman Khandual
<anshuman.khandual@arm.com> wrote:
>
> mov_q cannot really move PIE_E[0|1] macros into a general purpose register
> as expected if those macro constants contain some 128 bit layout elements,
> required for D128 page tables.
Could you elaborate?
> Fix this problem via first loading up these
> macro constants into a given memory location and then subsequently setting
> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
>
If this is necessary, we could also remove the PTE_MAYBE_xx override hack no?
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Ard Biesheuvel <ardb@kernel.org>
> Cc: Ryan Roberts <ryan.roberts@arm.com>
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> ---
> This patch applies on v6.15-rc1
>
> arch/arm64/kernel/head.S | 3 +++
> arch/arm64/kernel/pi/map_range.c | 6 ++++++
> arch/arm64/kernel/pi/pi.h | 1 +
> arch/arm64/mm/mmu.c | 1 +
> arch/arm64/mm/proc.S | 5 +++--
> 5 files changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 2ce73525de2c..4950d9cc638a 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
> * On return, the CPU will be ready for the MMU to be turned on and
> * the TCR will have been set.
> */
> + adr_l x0, pir_data
> + bl __pi_load_pir_data
> +
> bl __cpu_setup // initialise processor
> b __primary_switch
> SYM_CODE_END(primary_entry)
> diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
> index 81345f68f9fc..cd9d24e73046 100644
> --- a/arch/arm64/kernel/pi/map_range.c
> +++ b/arch/arm64/kernel/pi/map_range.c
> @@ -103,3 +103,9 @@ asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
>
> return ptep;
> }
> +
> +asmlinkage void __init load_pir_data(u64 pir_data[])
> +{
> + pir_data[0] = PIE_E0;
> + pir_data[1] = PIE_E1;
> +}
> diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
> index c91e5e965cd3..ae61df4fdb77 100644
> --- a/arch/arm64/kernel/pi/pi.h
> +++ b/arch/arm64/kernel/pi/pi.h
> @@ -34,3 +34,4 @@ void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
> asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
>
> asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask);
> +asmlinkage void load_pir_data(u64 pir_data[]);
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index ea6695d53fb9..762e81ff4e85 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -58,6 +58,7 @@ static bool rodata_is_rw __ro_after_init = true;
> * with MMU turned off.
> */
> long __section(".mmuoff.data.write") __early_cpu_boot_status;
> +unsigned long __section(".mmuoff.data.write") pir_data[2];
>
> /*
> * Empty_zero_page is a special page that is used for zero-initialized data
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index fb30c8804f87..7dd28cf101c2 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -524,9 +524,10 @@ alternative_else_nop_endif
> #define PTE_MAYBE_NG 0
> #define PTE_MAYBE_SHARED 0
>
> - mov_q x0, PIE_E0
> + adr_l x1, pir_data
> + ldr x0, [x1, #0]
> msr REG_PIRE0_EL1, x0
> - mov_q x0, PIE_E1
> + ldr x0, [x1, #8]
> msr REG_PIR_EL1, x0
>
> #undef PTE_MAYBE_NG
> --
> 2.25.1
>
On 4/10/25 17:58, Ard Biesheuvel wrote:
> Hi Anshuman,
>
> On Thu, 10 Apr 2025 at 09:40, Anshuman Khandual
> <anshuman.khandual@arm.com> wrote:
>>
>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register
>> as expected if those macro constants contain some 128 bit layout elements,
>> required for D128 page tables.
>
> Could you elaborate?
Without this change in place, the following build error comes up.
arch/arm64/mm/proc.S: Assembler messages:
arch/arm64/mm/proc.S:539: Error: too many positional arguments
arch/arm64/mm/proc.S:541: Error: too many positional arguments
make[4]: *** [scripts/Makefile.build:335: arch/arm64/mm/proc.o] Error 1
make[4]: *** Waiting for unfinished jobs....
make[3]: *** [scripts/Makefile.build:461: arch/arm64/mm] Error 2
make[3]: *** Waiting for unfinished jobs...
SYM_FUNC_START(__cpu_setup)
..........................
..........................
#define PTE_MAYBE_NG 0
#define PTE_MAYBE_SHARED 0
mov_q x0, PIE_E0 (arch/arm64/mm/proc.S:539)
msr REG_PIRE0_EL1, x0
mov_q x0, PIE_E1 (arch/arm64/mm/proc.S:541)
msr REG_PIR_EL1, x0
#undef PTE_MAYBE_NG
#undef PTE_MAYBE_SHARED
..........................
..........................
Please note the following PIE_E0 definition in D128 context.
#define PTE_PI_MASK GENMASK_U128(118, 115)
#define PTE_PI_SHIFT 115
#define pte_pi_index(pte) (((pte) & PTE_PI_MASK) >> PTE_PI_SHIFT)
#define PIE_E0 ( \
PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_GCS) | \
PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \
PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \
PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O) | \
PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX_O) | \
PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY), PIE_R_O) | \
PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED), PIE_RW_O))
Where _PAGE_XXX definitions here might contain page flags beyond just 64
bits as well.
>
>> Fix this problem via first loading up these
>> macro constants into a given memory location and then subsequently setting
>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
>>
>
> If this is necessary, we could also remove the PTE_MAYBE_xx override hack no?
Could you please elaborate ? Not sure if PTE_MAYBE_xx is the problem here.
>
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Cc: Will Deacon <will@kernel.org>
>> Cc: Mark Rutland <mark.rutland@arm.com>
>> Cc: Ard Biesheuvel <ardb@kernel.org>
>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>> Cc: linux-arm-kernel@lists.infradead.org
>> Cc: linux-kernel@vger.kernel.org
>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>> ---
>> This patch applies on v6.15-rc1
>>
>> arch/arm64/kernel/head.S | 3 +++
>> arch/arm64/kernel/pi/map_range.c | 6 ++++++
>> arch/arm64/kernel/pi/pi.h | 1 +
>> arch/arm64/mm/mmu.c | 1 +
>> arch/arm64/mm/proc.S | 5 +++--
>> 5 files changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>> index 2ce73525de2c..4950d9cc638a 100644
>> --- a/arch/arm64/kernel/head.S
>> +++ b/arch/arm64/kernel/head.S
>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
>> * On return, the CPU will be ready for the MMU to be turned on and
>> * the TCR will have been set.
>> */
>> + adr_l x0, pir_data
>> + bl __pi_load_pir_data
>> +
>> bl __cpu_setup // initialise processor
>> b __primary_switch
>> SYM_CODE_END(primary_entry)
>> diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
>> index 81345f68f9fc..cd9d24e73046 100644
>> --- a/arch/arm64/kernel/pi/map_range.c
>> +++ b/arch/arm64/kernel/pi/map_range.c
>> @@ -103,3 +103,9 @@ asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
>>
>> return ptep;
>> }
>> +
>> +asmlinkage void __init load_pir_data(u64 pir_data[])
>> +{
>> + pir_data[0] = PIE_E0;
>> + pir_data[1] = PIE_E1;
>> +}
>> diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
>> index c91e5e965cd3..ae61df4fdb77 100644
>> --- a/arch/arm64/kernel/pi/pi.h
>> +++ b/arch/arm64/kernel/pi/pi.h
>> @@ -34,3 +34,4 @@ void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
>> asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
>>
>> asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask);
>> +asmlinkage void load_pir_data(u64 pir_data[]);
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index ea6695d53fb9..762e81ff4e85 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -58,6 +58,7 @@ static bool rodata_is_rw __ro_after_init = true;
>> * with MMU turned off.
>> */
>> long __section(".mmuoff.data.write") __early_cpu_boot_status;
>> +unsigned long __section(".mmuoff.data.write") pir_data[2];
>>
>> /*
>> * Empty_zero_page is a special page that is used for zero-initialized data
>> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
>> index fb30c8804f87..7dd28cf101c2 100644
>> --- a/arch/arm64/mm/proc.S
>> +++ b/arch/arm64/mm/proc.S
>> @@ -524,9 +524,10 @@ alternative_else_nop_endif
>> #define PTE_MAYBE_NG 0
>> #define PTE_MAYBE_SHARED 0
>>
>> - mov_q x0, PIE_E0
>> + adr_l x1, pir_data
>> + ldr x0, [x1, #0]
>> msr REG_PIRE0_EL1, x0
>> - mov_q x0, PIE_E1
>> + ldr x0, [x1, #8]
>> msr REG_PIR_EL1, x0
>>
>> #undef PTE_MAYBE_NG
>> --
>> 2.25.1
>>
© 2016 - 2025 Red Hat, Inc.