[PATCH] arm64/mm: Re-organise setting up FEAT_S1PIE registers PIRE0_EL1 and PIR_EL1

Anshuman Khandual posted 1 patch 8 months, 1 week ago
There is a newer version of this series
arch/arm64/kernel/head.S         | 3 +++
arch/arm64/kernel/pi/map_range.c | 6 ++++++
arch/arm64/kernel/pi/pi.h        | 1 +
arch/arm64/mm/mmu.c              | 1 +
arch/arm64/mm/proc.S             | 5 +++--
5 files changed, 14 insertions(+), 2 deletions(-)
[PATCH] arm64/mm: Re-organise setting up FEAT_S1PIE registers PIRE0_EL1 and PIR_EL1
Posted by Anshuman Khandual 8 months, 1 week ago
mov_q cannot really move PIE_E[0|1] macros into a general purpose register
as expected if those macro constants contain some 128 bit layout elements,
required for D128 page tables. Fix this problem via first loading up these
macro constants into a given memory location and then subsequently setting
up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
This patch applies on v6.15-rc1

 arch/arm64/kernel/head.S         | 3 +++
 arch/arm64/kernel/pi/map_range.c | 6 ++++++
 arch/arm64/kernel/pi/pi.h        | 1 +
 arch/arm64/mm/mmu.c              | 1 +
 arch/arm64/mm/proc.S             | 5 +++--
 5 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 2ce73525de2c..4950d9cc638a 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
 	 * On return, the CPU will be ready for the MMU to be turned on and
 	 * the TCR will have been set.
 	 */
+	adr_l	x0, pir_data
+	bl      __pi_load_pir_data
+
 	bl	__cpu_setup			// initialise processor
 	b	__primary_switch
 SYM_CODE_END(primary_entry)
diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
index 81345f68f9fc..cd9d24e73046 100644
--- a/arch/arm64/kernel/pi/map_range.c
+++ b/arch/arm64/kernel/pi/map_range.c
@@ -103,3 +103,9 @@ asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
 
 	return ptep;
 }
+
+asmlinkage void __init load_pir_data(u64 pir_data[])
+{
+	pir_data[0] = PIE_E0;
+	pir_data[1] = PIE_E1;
+}
diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
index c91e5e965cd3..ae61df4fdb77 100644
--- a/arch/arm64/kernel/pi/pi.h
+++ b/arch/arm64/kernel/pi/pi.h
@@ -34,3 +34,4 @@ void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
 asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
 
 asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask);
+asmlinkage void load_pir_data(u64 pir_data[]);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index ea6695d53fb9..762e81ff4e85 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -58,6 +58,7 @@ static bool rodata_is_rw __ro_after_init = true;
  * with MMU turned off.
  */
 long __section(".mmuoff.data.write") __early_cpu_boot_status;
+unsigned long __section(".mmuoff.data.write") pir_data[2];
 
 /*
  * Empty_zero_page is a special page that is used for zero-initialized data
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index fb30c8804f87..7dd28cf101c2 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -524,9 +524,10 @@ alternative_else_nop_endif
 #define PTE_MAYBE_NG		0
 #define PTE_MAYBE_SHARED	0
 
-	mov_q	x0, PIE_E0
+	adr_l	x1, pir_data
+	ldr	x0, [x1, #0]
 	msr	REG_PIRE0_EL1, x0
-	mov_q	x0, PIE_E1
+	ldr	x0, [x1, #8]
 	msr	REG_PIR_EL1, x0
 
 #undef PTE_MAYBE_NG
-- 
2.25.1
Re: [PATCH] arm64/mm: Re-organise setting up FEAT_S1PIE registers PIRE0_EL1 and PIR_EL1
Posted by Ryan Roberts 8 months, 1 week ago
On 10/04/2025 08:40, Anshuman Khandual wrote:
> mov_q cannot really move PIE_E[0|1] macros into a general purpose register
> as expected if those macro constants contain some 128 bit layout elements,
> required for D128 page tables. Fix this problem via first loading up these
> macro constants into a given memory location and then subsequently setting
> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.

From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms
of 128-bit types with shifting and masking, which the assembler can't do? It
would be good to spell this out.

> 
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Ard Biesheuvel <ardb@kernel.org>
> Cc: Ryan Roberts <ryan.roberts@arm.com>
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> ---
> This patch applies on v6.15-rc1
> 
>  arch/arm64/kernel/head.S         | 3 +++
>  arch/arm64/kernel/pi/map_range.c | 6 ++++++
>  arch/arm64/kernel/pi/pi.h        | 1 +
>  arch/arm64/mm/mmu.c              | 1 +
>  arch/arm64/mm/proc.S             | 5 +++--
>  5 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 2ce73525de2c..4950d9cc638a 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
>  	 * On return, the CPU will be ready for the MMU to be turned on and
>  	 * the TCR will have been set.
>  	 */
> +	adr_l	x0, pir_data
> +	bl      __pi_load_pir_data

Using C code to pre-calculate the values into global variables that the assembly
code then loads and stuffs into the PIR registers feels hacky. I wonder if we
can instead pre-calculate into asm-offsets.h? e.g. add the following to
asm-offsets.c:

DEFINE(PIE_E0_ASM, PIE_E0);
DEFINE(PIE_E1_ASM, PIE_E1);

Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the
pre-calculated values that you can then use in proc.S?

Or if that doesn't work for some reason, is it possible to store to the PIR
registers directly from the C code?

Thanks,
Ryan

> +
>  	bl	__cpu_setup			// initialise processor
>  	b	__primary_switch
>  SYM_CODE_END(primary_entry)
> diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
> index 81345f68f9fc..cd9d24e73046 100644
> --- a/arch/arm64/kernel/pi/map_range.c
> +++ b/arch/arm64/kernel/pi/map_range.c
> @@ -103,3 +103,9 @@ asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
>  
>  	return ptep;
>  }
> +
> +asmlinkage void __init load_pir_data(u64 pir_data[])
> +{
> +	pir_data[0] = PIE_E0;
> +	pir_data[1] = PIE_E1;
> +}
> diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
> index c91e5e965cd3..ae61df4fdb77 100644
> --- a/arch/arm64/kernel/pi/pi.h
> +++ b/arch/arm64/kernel/pi/pi.h
> @@ -34,3 +34,4 @@ void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
>  asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
>  
>  asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask);
> +asmlinkage void load_pir_data(u64 pir_data[]);
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index ea6695d53fb9..762e81ff4e85 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -58,6 +58,7 @@ static bool rodata_is_rw __ro_after_init = true;
>   * with MMU turned off.
>   */
>  long __section(".mmuoff.data.write") __early_cpu_boot_status;
> +unsigned long __section(".mmuoff.data.write") pir_data[2];
>  
>  /*
>   * Empty_zero_page is a special page that is used for zero-initialized data
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index fb30c8804f87..7dd28cf101c2 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -524,9 +524,10 @@ alternative_else_nop_endif
>  #define PTE_MAYBE_NG		0
>  #define PTE_MAYBE_SHARED	0
>  
> -	mov_q	x0, PIE_E0
> +	adr_l	x1, pir_data
> +	ldr	x0, [x1, #0]
>  	msr	REG_PIRE0_EL1, x0
> -	mov_q	x0, PIE_E1
> +	ldr	x0, [x1, #8]
>  	msr	REG_PIR_EL1, x0
>  
>  #undef PTE_MAYBE_NG
Re: [PATCH] arm64/mm: Re-organise setting up FEAT_S1PIE registers PIRE0_EL1 and PIR_EL1
Posted by Ard Biesheuvel 8 months, 1 week ago
On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote:
>
> On 10/04/2025 08:40, Anshuman Khandual wrote:
> > mov_q cannot really move PIE_E[0|1] macros into a general purpose register
> > as expected if those macro constants contain some 128 bit layout elements,
> > required for D128 page tables. Fix this problem via first loading up these
> > macro constants into a given memory location and then subsequently setting
> > up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
>
> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms
> of 128-bit types with shifting and masking, which the assembler can't do? It
> would be good to spell this out.
>
> >
> > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > Cc: Will Deacon <will@kernel.org>
> > Cc: Mark Rutland <mark.rutland@arm.com>
> > Cc: Ard Biesheuvel <ardb@kernel.org>
> > Cc: Ryan Roberts <ryan.roberts@arm.com>
> > Cc: linux-arm-kernel@lists.infradead.org
> > Cc: linux-kernel@vger.kernel.org
> > Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> > ---
> > This patch applies on v6.15-rc1
> >
> >  arch/arm64/kernel/head.S         | 3 +++
> >  arch/arm64/kernel/pi/map_range.c | 6 ++++++
> >  arch/arm64/kernel/pi/pi.h        | 1 +
> >  arch/arm64/mm/mmu.c              | 1 +
> >  arch/arm64/mm/proc.S             | 5 +++--
> >  5 files changed, 14 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> > index 2ce73525de2c..4950d9cc638a 100644
> > --- a/arch/arm64/kernel/head.S
> > +++ b/arch/arm64/kernel/head.S
> > @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
> >        * On return, the CPU will be ready for the MMU to be turned on and
> >        * the TCR will have been set.
> >        */
> > +     adr_l   x0, pir_data
> > +     bl      __pi_load_pir_data
>
> Using C code to pre-calculate the values into global variables that the assembly
> code then loads and stuffs into the PIR registers feels hacky. I wonder if we
> can instead pre-calculate into asm-offsets.h? e.g. add the following to
> asm-offsets.c:
>
> DEFINE(PIE_E0_ASM, PIE_E0);
> DEFINE(PIE_E1_ASM, PIE_E1);
>
> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the
> pre-calculated values that you can then use in proc.S?
>

There is another issue, which is that mov_q tries to be smart, and
emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if
directive evaluates the argument, which does not work with symbolic
constants.

I wouldn't mind just dropping that, i.e.,

--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -545,17 +545,9 @@ alternative_endif
         *         magnitude and sign of the operand)
         */
        .macro  mov_q, reg, val
-       .if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
-       movz    \reg, :abs_g1_s:\val
-       .else
-       .if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
-       movz    \reg, :abs_g2_s:\val
-       .else
        movz    \reg, :abs_g3:\val
        movk    \reg, :abs_g2_nc:\val
-       .endif
        movk    \reg, :abs_g1_nc:\val
-       .endif
        movk    \reg, :abs_g0_nc:\val
        .endm

Then, we can apply Ryan's trick to move these constants into
asm-offsets.c, but you'll need to tweak the PTE_MAYBE macros as well:

--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -182,5 +182,22 @@ int main(void)
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
   DEFINE(FTRACE_OPS_DIRECT_CALL,       offsetof(struct ftrace_ops,
direct_call));
 #endif
+#undef PTE_MAYBE_NG
+#define PTE_MAYBE_NG           0
+
+#undef PTE_MAYBE_SHARED
+#define PTE_MAYBE_SHARED       0
+
+  DEFINE(PIE_E0_ASM, PIE_E0);
+  DEFINE(PIE_E1_ASM, PIE_E1);
   return 0;
 }

(We should also move the comment from proc.S to asm-offsets.c but I
omitted that here for brevity.)

Then you should be able to use PIE_En_ASM in mov_q instructions without issue.

Alternatively, if changing the implementation of mov_q creates any
problems, we can just add a variant of that macro that lacks the
conditional directives and always emits MOVZ/MOVK/MOVK/MOVK
Re: [PATCH] arm64/mm: Re-organise setting up FEAT_S1PIE registers PIRE0_EL1 and PIR_EL1
Posted by Ryan Roberts 8 months, 1 week ago
On 14/04/2025 10:41, Ard Biesheuvel wrote:
> On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>
>> On 10/04/2025 08:40, Anshuman Khandual wrote:
>>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register
>>> as expected if those macro constants contain some 128 bit layout elements,
>>> required for D128 page tables. Fix this problem via first loading up these
>>> macro constants into a given memory location and then subsequently setting
>>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
>>
>> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms
>> of 128-bit types with shifting and masking, which the assembler can't do? It
>> would be good to spell this out.
>>
>>>
>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>> Cc: Will Deacon <will@kernel.org>
>>> Cc: Mark Rutland <mark.rutland@arm.com>
>>> Cc: Ard Biesheuvel <ardb@kernel.org>
>>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>>> Cc: linux-arm-kernel@lists.infradead.org
>>> Cc: linux-kernel@vger.kernel.org
>>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>>> ---
>>> This patch applies on v6.15-rc1
>>>
>>>  arch/arm64/kernel/head.S         | 3 +++
>>>  arch/arm64/kernel/pi/map_range.c | 6 ++++++
>>>  arch/arm64/kernel/pi/pi.h        | 1 +
>>>  arch/arm64/mm/mmu.c              | 1 +
>>>  arch/arm64/mm/proc.S             | 5 +++--
>>>  5 files changed, 14 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>>> index 2ce73525de2c..4950d9cc638a 100644
>>> --- a/arch/arm64/kernel/head.S
>>> +++ b/arch/arm64/kernel/head.S
>>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
>>>        * On return, the CPU will be ready for the MMU to be turned on and
>>>        * the TCR will have been set.
>>>        */
>>> +     adr_l   x0, pir_data
>>> +     bl      __pi_load_pir_data
>>
>> Using C code to pre-calculate the values into global variables that the assembly
>> code then loads and stuffs into the PIR registers feels hacky. I wonder if we
>> can instead pre-calculate into asm-offsets.h? e.g. add the following to
>> asm-offsets.c:
>>
>> DEFINE(PIE_E0_ASM, PIE_E0);
>> DEFINE(PIE_E1_ASM, PIE_E1);
>>
>> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the
>> pre-calculated values that you can then use in proc.S?
>>
> 
> There is another issue, which is that mov_q tries to be smart, and
> emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if
> directive evaluates the argument, which does not work with symbolic
> constants.

I'm not quite understanding the detail here; what do you mean by "symbolic
constants"? asm-offsets.h will provide something like:

#define PIE_E0_ASM 1234567890

The current code is using a hash-define and that's working fine:

mov_q	x0, PIE_E0


Won't the C preprocessor just substitute and everything will work out?

Thanks,
Ryan

> 
> I wouldn't mind just dropping that, i.e.,
> 
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -545,17 +545,9 @@ alternative_endif
>          *         magnitude and sign of the operand)
>          */
>         .macro  mov_q, reg, val
> -       .if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
> -       movz    \reg, :abs_g1_s:\val
> -       .else
> -       .if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
> -       movz    \reg, :abs_g2_s:\val
> -       .else
>         movz    \reg, :abs_g3:\val
>         movk    \reg, :abs_g2_nc:\val
> -       .endif
>         movk    \reg, :abs_g1_nc:\val
> -       .endif
>         movk    \reg, :abs_g0_nc:\val
>         .endm
> 
> Then, we can apply Ryan's trick to move these constants into
> asm-offsets.c, but you'll need to tweak the PTE_MAYBE macros as well:
> 
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -182,5 +182,22 @@ int main(void)
>  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
>    DEFINE(FTRACE_OPS_DIRECT_CALL,       offsetof(struct ftrace_ops,
> direct_call));
>  #endif
> +#undef PTE_MAYBE_NG
> +#define PTE_MAYBE_NG           0
> +
> +#undef PTE_MAYBE_SHARED
> +#define PTE_MAYBE_SHARED       0
> +
> +  DEFINE(PIE_E0_ASM, PIE_E0);
> +  DEFINE(PIE_E1_ASM, PIE_E1);
>    return 0;
>  }
> 
> (We should also move the comment from proc.S to asm-offsets.c but I
> omitted that here for brevity.)
> 
> Then you should be able to use PIE_En_ASM in mov_q instructions without issue.
> 
> Alternatively, if changing the implementation of mov_q creates any
> problems, we can just add a variant of that macro that lacks the
> conditional directives and always emits MOVZ/MOVK/MOVK/MOVK
Re: [PATCH] arm64/mm: Re-organise setting up FEAT_S1PIE registers PIRE0_EL1 and PIR_EL1
Posted by Ard Biesheuvel 8 months, 1 week ago
On Mon, 14 Apr 2025 at 14:04, Ryan Roberts <ryan.roberts@arm.com> wrote:
>
> On 14/04/2025 10:41, Ard Biesheuvel wrote:
> > On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote:
> >>
> >> On 10/04/2025 08:40, Anshuman Khandual wrote:
> >>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register
> >>> as expected if those macro constants contain some 128 bit layout elements,
> >>> required for D128 page tables. Fix this problem via first loading up these
> >>> macro constants into a given memory location and then subsequently setting
> >>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
> >>
> >> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms
> >> of 128-bit types with shifting and masking, which the assembler can't do? It
> >> would be good to spell this out.
> >>
> >>>
> >>> Cc: Catalin Marinas <catalin.marinas@arm.com>
> >>> Cc: Will Deacon <will@kernel.org>
> >>> Cc: Mark Rutland <mark.rutland@arm.com>
> >>> Cc: Ard Biesheuvel <ardb@kernel.org>
> >>> Cc: Ryan Roberts <ryan.roberts@arm.com>
> >>> Cc: linux-arm-kernel@lists.infradead.org
> >>> Cc: linux-kernel@vger.kernel.org
> >>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> >>> ---
> >>> This patch applies on v6.15-rc1
> >>>
> >>>  arch/arm64/kernel/head.S         | 3 +++
> >>>  arch/arm64/kernel/pi/map_range.c | 6 ++++++
> >>>  arch/arm64/kernel/pi/pi.h        | 1 +
> >>>  arch/arm64/mm/mmu.c              | 1 +
> >>>  arch/arm64/mm/proc.S             | 5 +++--
> >>>  5 files changed, 14 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> >>> index 2ce73525de2c..4950d9cc638a 100644
> >>> --- a/arch/arm64/kernel/head.S
> >>> +++ b/arch/arm64/kernel/head.S
> >>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
> >>>        * On return, the CPU will be ready for the MMU to be turned on and
> >>>        * the TCR will have been set.
> >>>        */
> >>> +     adr_l   x0, pir_data
> >>> +     bl      __pi_load_pir_data
> >>
> >> Using C code to pre-calculate the values into global variables that the assembly
> >> code then loads and stuffs into the PIR registers feels hacky. I wonder if we
> >> can instead pre-calculate into asm-offsets.h? e.g. add the following to
> >> asm-offsets.c:
> >>
> >> DEFINE(PIE_E0_ASM, PIE_E0);
> >> DEFINE(PIE_E1_ASM, PIE_E1);
> >>
> >> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the
> >> pre-calculated values that you can then use in proc.S?
> >>
> >
> > There is another issue, which is that mov_q tries to be smart, and
> > emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if
> > directive evaluates the argument, which does not work with symbolic
> > constants.
>
> I'm not quite understanding the detail here; what do you mean by "symbolic
> constants"? asm-offsets.h will provide something like:
>
> #define PIE_E0_ASM 1234567890
>
> The current code is using a hash-define and that's working fine:
>
> mov_q   x0, PIE_E0
>
>
> Won't the C preprocessor just substitute and everything will work out?
>

Yeah, you're right. I was experimenting with something like

.set .Lpie_e0, PIE_E0_ASM
mov_q xN, .Lpie_e0

where this problem does exist, but we can just use PIE_E0_ASM directly
and things should work as expected.
Re: [PATCH] arm64/mm: Re-organise setting up FEAT_S1PIE registers PIRE0_EL1 and PIR_EL1
Posted by Ryan Roberts 8 months, 1 week ago
On 14/04/2025 13:28, Ard Biesheuvel wrote:
> On Mon, 14 Apr 2025 at 14:04, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>
>> On 14/04/2025 10:41, Ard Biesheuvel wrote:
>>> On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>>>
>>>> On 10/04/2025 08:40, Anshuman Khandual wrote:
>>>>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register
>>>>> as expected if those macro constants contain some 128 bit layout elements,
>>>>> required for D128 page tables. Fix this problem via first loading up these
>>>>> macro constants into a given memory location and then subsequently setting
>>>>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
>>>>
>>>> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms
>>>> of 128-bit types with shifting and masking, which the assembler can't do? It
>>>> would be good to spell this out.
>>>>
>>>>>
>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>>> Cc: Will Deacon <will@kernel.org>
>>>>> Cc: Mark Rutland <mark.rutland@arm.com>
>>>>> Cc: Ard Biesheuvel <ardb@kernel.org>
>>>>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>>>>> Cc: linux-arm-kernel@lists.infradead.org
>>>>> Cc: linux-kernel@vger.kernel.org
>>>>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>>>>> ---
>>>>> This patch applies on v6.15-rc1
>>>>>
>>>>>  arch/arm64/kernel/head.S         | 3 +++
>>>>>  arch/arm64/kernel/pi/map_range.c | 6 ++++++
>>>>>  arch/arm64/kernel/pi/pi.h        | 1 +
>>>>>  arch/arm64/mm/mmu.c              | 1 +
>>>>>  arch/arm64/mm/proc.S             | 5 +++--
>>>>>  5 files changed, 14 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>>>>> index 2ce73525de2c..4950d9cc638a 100644
>>>>> --- a/arch/arm64/kernel/head.S
>>>>> +++ b/arch/arm64/kernel/head.S
>>>>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
>>>>>        * On return, the CPU will be ready for the MMU to be turned on and
>>>>>        * the TCR will have been set.
>>>>>        */
>>>>> +     adr_l   x0, pir_data
>>>>> +     bl      __pi_load_pir_data
>>>>
>>>> Using C code to pre-calculate the values into global variables that the assembly
>>>> code then loads and stuffs into the PIR registers feels hacky. I wonder if we
>>>> can instead pre-calculate into asm-offsets.h? e.g. add the following to
>>>> asm-offsets.c:
>>>>
>>>> DEFINE(PIE_E0_ASM, PIE_E0);
>>>> DEFINE(PIE_E1_ASM, PIE_E1);
>>>>
>>>> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the
>>>> pre-calculated values that you can then use in proc.S?
>>>>
>>>
>>> There is another issue, which is that mov_q tries to be smart, and
>>> emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if
>>> directive evaluates the argument, which does not work with symbolic
>>> constants.
>>
>> I'm not quite understanding the detail here; what do you mean by "symbolic
>> constants"? asm-offsets.h will provide something like:
>>
>> #define PIE_E0_ASM 1234567890
>>
>> The current code is using a hash-define and that's working fine:
>>
>> mov_q   x0, PIE_E0
>>
>>
>> Won't the C preprocessor just substitute and everything will work out?
>>
> 
> Yeah, you're right. I was experimenting with something like
> 
> .set .Lpie_e0, PIE_E0_ASM
> mov_q xN, .Lpie_e0
> 
> where this problem does exist, but we can just use PIE_E0_ASM directly
> and things should work as expected.

Ahh great, sounds like this should be pretty simple then!
Re: [PATCH] arm64/mm: Re-organise setting up FEAT_S1PIE registers PIRE0_EL1 and PIR_EL1
Posted by Anshuman Khandual 8 months, 1 week ago

On 4/14/25 18:01, Ryan Roberts wrote:
> On 14/04/2025 13:28, Ard Biesheuvel wrote:
>> On Mon, 14 Apr 2025 at 14:04, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>>
>>> On 14/04/2025 10:41, Ard Biesheuvel wrote:
>>>> On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>>>>
>>>>> On 10/04/2025 08:40, Anshuman Khandual wrote:
>>>>>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register
>>>>>> as expected if those macro constants contain some 128 bit layout elements,
>>>>>> required for D128 page tables. Fix this problem via first loading up these
>>>>>> macro constants into a given memory location and then subsequently setting
>>>>>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
>>>>>
>>>>> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms
>>>>> of 128-bit types with shifting and masking, which the assembler can't do? It
>>>>> would be good to spell this out.
>>>>>
>>>>>>
>>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>>>> Cc: Will Deacon <will@kernel.org>
>>>>>> Cc: Mark Rutland <mark.rutland@arm.com>
>>>>>> Cc: Ard Biesheuvel <ardb@kernel.org>
>>>>>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>>>>>> Cc: linux-arm-kernel@lists.infradead.org
>>>>>> Cc: linux-kernel@vger.kernel.org
>>>>>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>>>>>> ---
>>>>>> This patch applies on v6.15-rc1
>>>>>>
>>>>>>  arch/arm64/kernel/head.S         | 3 +++
>>>>>>  arch/arm64/kernel/pi/map_range.c | 6 ++++++
>>>>>>  arch/arm64/kernel/pi/pi.h        | 1 +
>>>>>>  arch/arm64/mm/mmu.c              | 1 +
>>>>>>  arch/arm64/mm/proc.S             | 5 +++--
>>>>>>  5 files changed, 14 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>>>>>> index 2ce73525de2c..4950d9cc638a 100644
>>>>>> --- a/arch/arm64/kernel/head.S
>>>>>> +++ b/arch/arm64/kernel/head.S
>>>>>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
>>>>>>        * On return, the CPU will be ready for the MMU to be turned on and
>>>>>>        * the TCR will have been set.
>>>>>>        */
>>>>>> +     adr_l   x0, pir_data
>>>>>> +     bl      __pi_load_pir_data
>>>>>
>>>>> Using C code to pre-calculate the values into global variables that the assembly
>>>>> code then loads and stuffs into the PIR registers feels hacky. I wonder if we
>>>>> can instead pre-calculate into asm-offsets.h? e.g. add the following to
>>>>> asm-offsets.c:
>>>>>
>>>>> DEFINE(PIE_E0_ASM, PIE_E0);
>>>>> DEFINE(PIE_E1_ASM, PIE_E1);
>>>>>
>>>>> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the
>>>>> pre-calculated values that you can then use in proc.S?
>>>>>
>>>>
>>>> There is another issue, which is that mov_q tries to be smart, and
>>>> emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if
>>>> directive evaluates the argument, which does not work with symbolic
>>>> constants.
>>>
>>> I'm not quite understanding the detail here; what do you mean by "symbolic
>>> constants"? asm-offsets.h will provide something like:
>>>
>>> #define PIE_E0_ASM 1234567890
>>>
>>> The current code is using a hash-define and that's working fine:
>>>
>>> mov_q   x0, PIE_E0
>>>
>>>
>>> Won't the C preprocessor just substitute and everything will work out?
>>>
>>
>> Yeah, you're right. I was experimenting with something like
>>
>> .set .Lpie_e0, PIE_E0_ASM
>> mov_q xN, .Lpie_e0
>>
>> where this problem does exist, but we can just use PIE_E0_ASM directly
>> and things should work as expected.
> 
> Ahh great, sounds like this should be pretty simple then!

Following change works both on current and with D128 page tables.

--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -182,5 +182,7 @@ int main(void)
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
   DEFINE(FTRACE_OPS_DIRECT_CALL,       offsetof(struct ftrace_ops, direct_call));
 #endif
+  DEFINE(PIE_E0_ASM, PIE_E0);
+  DEFINE(PIE_E1_ASM, PIE_E1);
   return 0;
 }
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 737c99d79833..f45494425d09 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -536,9 +536,9 @@ alternative_else_nop_endif
 #define PTE_MAYBE_NG           0
 #define PTE_MAYBE_SHARED       0
 
-       mov_q   x0, PIE_E0
+       mov_q   x0, PIE_E0_ASM
        msr     REG_PIRE0_EL1, x0
-       mov_q   x0, PIE_E1
+       mov_q   x0, PIE_E1_ASM
        msr     REG_PIR_EL1, x0
 
 #undef PTE_MAYBE_NG
Re: [PATCH] arm64/mm: Re-organise setting up FEAT_S1PIE registers PIRE0_EL1 and PIR_EL1
Posted by Ryan Roberts 8 months, 1 week ago
On 15/04/2025 07:27, Anshuman Khandual wrote:
> 
> 
> On 4/14/25 18:01, Ryan Roberts wrote:
>> On 14/04/2025 13:28, Ard Biesheuvel wrote:
>>> On Mon, 14 Apr 2025 at 14:04, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>>>
>>>> On 14/04/2025 10:41, Ard Biesheuvel wrote:
>>>>> On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>>>>>
>>>>>> On 10/04/2025 08:40, Anshuman Khandual wrote:
>>>>>>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register
>>>>>>> as expected if those macro constants contain some 128 bit layout elements,
>>>>>>> required for D128 page tables. Fix this problem via first loading up these
>>>>>>> macro constants into a given memory location and then subsequently setting
>>>>>>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
>>>>>>
>>>>>> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms
>>>>>> of 128-bit types with shifting and masking, which the assembler can't do? It
>>>>>> would be good to spell this out.
>>>>>>
>>>>>>>
>>>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>>>>> Cc: Will Deacon <will@kernel.org>
>>>>>>> Cc: Mark Rutland <mark.rutland@arm.com>
>>>>>>> Cc: Ard Biesheuvel <ardb@kernel.org>
>>>>>>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>>>>>>> Cc: linux-arm-kernel@lists.infradead.org
>>>>>>> Cc: linux-kernel@vger.kernel.org
>>>>>>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>>>>>>> ---
>>>>>>> This patch applies on v6.15-rc1
>>>>>>>
>>>>>>>  arch/arm64/kernel/head.S         | 3 +++
>>>>>>>  arch/arm64/kernel/pi/map_range.c | 6 ++++++
>>>>>>>  arch/arm64/kernel/pi/pi.h        | 1 +
>>>>>>>  arch/arm64/mm/mmu.c              | 1 +
>>>>>>>  arch/arm64/mm/proc.S             | 5 +++--
>>>>>>>  5 files changed, 14 insertions(+), 2 deletions(-)
>>>>>>>
>>>>>>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>>>>>>> index 2ce73525de2c..4950d9cc638a 100644
>>>>>>> --- a/arch/arm64/kernel/head.S
>>>>>>> +++ b/arch/arm64/kernel/head.S
>>>>>>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
>>>>>>>        * On return, the CPU will be ready for the MMU to be turned on and
>>>>>>>        * the TCR will have been set.
>>>>>>>        */
>>>>>>> +     adr_l   x0, pir_data
>>>>>>> +     bl      __pi_load_pir_data
>>>>>>
>>>>>> Using C code to pre-calculate the values into global variables that the assembly
>>>>>> code then loads and stuffs into the PIR registers feels hacky. I wonder if we
>>>>>> can instead pre-calculate into asm-offsets.h? e.g. add the following to
>>>>>> asm-offsets.c:
>>>>>>
>>>>>> DEFINE(PIE_E0_ASM, PIE_E0);
>>>>>> DEFINE(PIE_E1_ASM, PIE_E1);
>>>>>>
>>>>>> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the
>>>>>> pre-calculated values that you can then use in proc.S?
>>>>>>
>>>>>
>>>>> There is another issue, which is that mov_q tries to be smart, and
>>>>> emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if
>>>>> directive evaluates the argument, which does not work with symbolic
>>>>> constants.
>>>>
>>>> I'm not quite understanding the detail here; what do you mean by "symbolic
>>>> constants"? asm-offsets.h will provide something like:
>>>>
>>>> #define PIE_E0_ASM 1234567890
>>>>
>>>> The current code is using a hash-define and that's working fine:
>>>>
>>>> mov_q   x0, PIE_E0
>>>>
>>>>
>>>> Won't the C preprocessor just substitute and everything will work out?
>>>>
>>>
>>> Yeah, you're right. I was experimenting with something like
>>>
>>> .set .Lpie_e0, PIE_E0_ASM
>>> mov_q xN, .Lpie_e0
>>>
>>> where this problem does exist, but we can just use PIE_E0_ASM directly
>>> and things should work as expected.
>>
>> Ahh great, sounds like this should be pretty simple then!
> 
> Following change works both on current and with D128 page tables.
> 
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -182,5 +182,7 @@ int main(void)
>  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
>    DEFINE(FTRACE_OPS_DIRECT_CALL,       offsetof(struct ftrace_ops, direct_call));
>  #endif
> +  DEFINE(PIE_E0_ASM, PIE_E0);
> +  DEFINE(PIE_E1_ASM, PIE_E1);
>    return 0;
>  }
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 737c99d79833..f45494425d09 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -536,9 +536,9 @@ alternative_else_nop_endif
>  #define PTE_MAYBE_NG           0
>  #define PTE_MAYBE_SHARED       0

I think at minimum, you can remove this PTE_MAYBE_* hack from proc.S. But as Ard
says, you may need to add it to asm-offsets.c? I'm surprised asm-offsets.c even
compiles without this hack since surely it doesn't have arm64_use_ng_mappings or
is_realm_world() available?

Thanks,
Ryan

>  
> -       mov_q   x0, PIE_E0
> +       mov_q   x0, PIE_E0_ASM
>         msr     REG_PIRE0_EL1, x0
> -       mov_q   x0, PIE_E1
> +       mov_q   x0, PIE_E1_ASM
>         msr     REG_PIR_EL1, x0
>  
>  #undef PTE_MAYBE_NG
>
Re: [PATCH] arm64/mm: Re-organise setting up FEAT_S1PIE registers PIRE0_EL1 and PIR_EL1
Posted by Anshuman Khandual 8 months, 1 week ago

On 4/15/25 13:23, Ryan Roberts wrote:
> On 15/04/2025 07:27, Anshuman Khandual wrote:
>>
>>
>> On 4/14/25 18:01, Ryan Roberts wrote:
>>> On 14/04/2025 13:28, Ard Biesheuvel wrote:
>>>> On Mon, 14 Apr 2025 at 14:04, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>>>>
>>>>> On 14/04/2025 10:41, Ard Biesheuvel wrote:
>>>>>> On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>>>>>>
>>>>>>> On 10/04/2025 08:40, Anshuman Khandual wrote:
>>>>>>>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register
>>>>>>>> as expected if those macro constants contain some 128 bit layout elements,
>>>>>>>> required for D128 page tables. Fix this problem via first loading up these
>>>>>>>> macro constants into a given memory location and then subsequently setting
>>>>>>>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
>>>>>>>
>>>>>>> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms
>>>>>>> of 128-bit types with shifting and masking, which the assembler can't do? It
>>>>>>> would be good to spell this out.
>>>>>>>
>>>>>>>>
>>>>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>>>>>> Cc: Will Deacon <will@kernel.org>
>>>>>>>> Cc: Mark Rutland <mark.rutland@arm.com>
>>>>>>>> Cc: Ard Biesheuvel <ardb@kernel.org>
>>>>>>>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>>>>>>>> Cc: linux-arm-kernel@lists.infradead.org
>>>>>>>> Cc: linux-kernel@vger.kernel.org
>>>>>>>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>>>>>>>> ---
>>>>>>>> This patch applies on v6.15-rc1
>>>>>>>>
>>>>>>>>  arch/arm64/kernel/head.S         | 3 +++
>>>>>>>>  arch/arm64/kernel/pi/map_range.c | 6 ++++++
>>>>>>>>  arch/arm64/kernel/pi/pi.h        | 1 +
>>>>>>>>  arch/arm64/mm/mmu.c              | 1 +
>>>>>>>>  arch/arm64/mm/proc.S             | 5 +++--
>>>>>>>>  5 files changed, 14 insertions(+), 2 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>>>>>>>> index 2ce73525de2c..4950d9cc638a 100644
>>>>>>>> --- a/arch/arm64/kernel/head.S
>>>>>>>> +++ b/arch/arm64/kernel/head.S
>>>>>>>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
>>>>>>>>        * On return, the CPU will be ready for the MMU to be turned on and
>>>>>>>>        * the TCR will have been set.
>>>>>>>>        */
>>>>>>>> +     adr_l   x0, pir_data
>>>>>>>> +     bl      __pi_load_pir_data
>>>>>>>
>>>>>>> Using C code to pre-calculate the values into global variables that the assembly
>>>>>>> code then loads and stuffs into the PIR registers feels hacky. I wonder if we
>>>>>>> can instead pre-calculate into asm-offsets.h? e.g. add the following to
>>>>>>> asm-offsets.c:
>>>>>>>
>>>>>>> DEFINE(PIE_E0_ASM, PIE_E0);
>>>>>>> DEFINE(PIE_E1_ASM, PIE_E1);
>>>>>>>
>>>>>>> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the
>>>>>>> pre-calculated values that you can then use in proc.S?
>>>>>>>
>>>>>>
>>>>>> There is another issue, which is that mov_q tries to be smart, and
>>>>>> emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if
>>>>>> directive evaluates the argument, which does not work with symbolic
>>>>>> constants.
>>>>>
>>>>> I'm not quite understanding the detail here; what do you mean by "symbolic
>>>>> constants"? asm-offsets.h will provide something like:
>>>>>
>>>>> #define PIE_E0_ASM 1234567890
>>>>>
>>>>> The current code is using a hash-define and that's working fine:
>>>>>
>>>>> mov_q   x0, PIE_E0
>>>>>
>>>>>
>>>>> Won't the C preprocessor just substitute and everything will work out?
>>>>>
>>>>
>>>> Yeah, you're right. I was experimenting with something like
>>>>
>>>> .set .Lpie_e0, PIE_E0_ASM
>>>> mov_q xN, .Lpie_e0
>>>>
>>>> where this problem does exist, but we can just use PIE_E0_ASM directly
>>>> and things should work as expected.
>>>
>>> Ahh great, sounds like this should be pretty simple then!
>>
>> Following change works both on current and with D128 page tables.
>>
>> --- a/arch/arm64/kernel/asm-offsets.c
>> +++ b/arch/arm64/kernel/asm-offsets.c
>> @@ -182,5 +182,7 @@ int main(void)
>>  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
>>    DEFINE(FTRACE_OPS_DIRECT_CALL,       offsetof(struct ftrace_ops, direct_call));
>>  #endif
>> +  DEFINE(PIE_E0_ASM, PIE_E0);
>> +  DEFINE(PIE_E1_ASM, PIE_E1);
>>    return 0;
>>  }
>> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
>> index 737c99d79833..f45494425d09 100644
>> --- a/arch/arm64/mm/proc.S
>> +++ b/arch/arm64/mm/proc.S
>> @@ -536,9 +536,9 @@ alternative_else_nop_endif
>>  #define PTE_MAYBE_NG           0
>>  #define PTE_MAYBE_SHARED       0
> 
> I think at minimum, you can remove this PTE_MAYBE_* hack from proc.S. But as Ard
> says, you may need to add it to asm-offsets.c? I'm surprised asm-offsets.c even

Moving PTE_MAYBE_* inside asm-offsets.c works as well in both cases
but still wondering why this is even required ? What am I missing ?

> compiles without this hack since surely it doesn't have arm64_use_ng_mappings or
> is_realm_world() available?
Did not face any problem with defconfig for the mainline kernel and
both these symbols were visible in the built code.

> 
> Thanks,
> Ryan
> 
>>  
>> -       mov_q   x0, PIE_E0
>> +       mov_q   x0, PIE_E0_ASM
>>         msr     REG_PIRE0_EL1, x0
>> -       mov_q   x0, PIE_E1
>> +       mov_q   x0, PIE_E1_ASM
>>         msr     REG_PIR_EL1, x0
>>  
>>  #undef PTE_MAYBE_NG
>>
>
Re: [PATCH] arm64/mm: Re-organise setting up FEAT_S1PIE registers PIRE0_EL1 and PIR_EL1
Posted by Ryan Roberts 8 months ago
On 15/04/2025 10:46, Anshuman Khandual wrote:
> 
> 
> On 4/15/25 13:23, Ryan Roberts wrote:
>> On 15/04/2025 07:27, Anshuman Khandual wrote:
>>>
>>>
>>> On 4/14/25 18:01, Ryan Roberts wrote:
>>>> On 14/04/2025 13:28, Ard Biesheuvel wrote:
>>>>> On Mon, 14 Apr 2025 at 14:04, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>>>>>
>>>>>> On 14/04/2025 10:41, Ard Biesheuvel wrote:
>>>>>>> On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>>>>>>>
>>>>>>>> On 10/04/2025 08:40, Anshuman Khandual wrote:
>>>>>>>>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register
>>>>>>>>> as expected if those macro constants contain some 128 bit layout elements,
>>>>>>>>> required for D128 page tables. Fix this problem via first loading up these
>>>>>>>>> macro constants into a given memory location and then subsequently setting
>>>>>>>>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
>>>>>>>>
>>>>>>>> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms
>>>>>>>> of 128-bit types with shifting and masking, which the assembler can't do? It
>>>>>>>> would be good to spell this out.
>>>>>>>>
>>>>>>>>>
>>>>>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>>>>>>> Cc: Will Deacon <will@kernel.org>
>>>>>>>>> Cc: Mark Rutland <mark.rutland@arm.com>
>>>>>>>>> Cc: Ard Biesheuvel <ardb@kernel.org>
>>>>>>>>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>>>>>>>>> Cc: linux-arm-kernel@lists.infradead.org
>>>>>>>>> Cc: linux-kernel@vger.kernel.org
>>>>>>>>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>>>>>>>>> ---
>>>>>>>>> This patch applies on v6.15-rc1
>>>>>>>>>
>>>>>>>>>  arch/arm64/kernel/head.S         | 3 +++
>>>>>>>>>  arch/arm64/kernel/pi/map_range.c | 6 ++++++
>>>>>>>>>  arch/arm64/kernel/pi/pi.h        | 1 +
>>>>>>>>>  arch/arm64/mm/mmu.c              | 1 +
>>>>>>>>>  arch/arm64/mm/proc.S             | 5 +++--
>>>>>>>>>  5 files changed, 14 insertions(+), 2 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>>>>>>>>> index 2ce73525de2c..4950d9cc638a 100644
>>>>>>>>> --- a/arch/arm64/kernel/head.S
>>>>>>>>> +++ b/arch/arm64/kernel/head.S
>>>>>>>>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
>>>>>>>>>        * On return, the CPU will be ready for the MMU to be turned on and
>>>>>>>>>        * the TCR will have been set.
>>>>>>>>>        */
>>>>>>>>> +     adr_l   x0, pir_data
>>>>>>>>> +     bl      __pi_load_pir_data
>>>>>>>>
>>>>>>>> Using C code to pre-calculate the values into global variables that the assembly
>>>>>>>> code then loads and stuffs into the PIR registers feels hacky. I wonder if we
>>>>>>>> can instead pre-calculate into asm-offsets.h? e.g. add the following to
>>>>>>>> asm-offsets.c:
>>>>>>>>
>>>>>>>> DEFINE(PIE_E0_ASM, PIE_E0);
>>>>>>>> DEFINE(PIE_E1_ASM, PIE_E1);
>>>>>>>>
>>>>>>>> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the
>>>>>>>> pre-calculated values that you can then use in proc.S?
>>>>>>>>
>>>>>>>
>>>>>>> There is another issue, which is that mov_q tries to be smart, and
>>>>>>> emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if
>>>>>>> directive evaluates the argument, which does not work with symbolic
>>>>>>> constants.
>>>>>>
>>>>>> I'm not quite understanding the detail here; what do you mean by "symbolic
>>>>>> constants"? asm-offsets.h will provide something like:
>>>>>>
>>>>>> #define PIE_E0_ASM 1234567890
>>>>>>
>>>>>> The current code is using a hash-define and that's working fine:
>>>>>>
>>>>>> mov_q   x0, PIE_E0
>>>>>>
>>>>>>
>>>>>> Won't the C preprocessor just substitute and everything will work out?
>>>>>>
>>>>>
>>>>> Yeah, you're right. I was experimenting with something like
>>>>>
>>>>> .set .Lpie_e0, PIE_E0_ASM
>>>>> mov_q xN, .Lpie_e0
>>>>>
>>>>> where this problem does exist, but we can just use PIE_E0_ASM directly
>>>>> and things should work as expected.
>>>>
>>>> Ahh great, sounds like this should be pretty simple then!
>>>
>>> Following change works both on current and with D128 page tables.
>>>
>>> --- a/arch/arm64/kernel/asm-offsets.c
>>> +++ b/arch/arm64/kernel/asm-offsets.c
>>> @@ -182,5 +182,7 @@ int main(void)
>>>  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
>>>    DEFINE(FTRACE_OPS_DIRECT_CALL,       offsetof(struct ftrace_ops, direct_call));
>>>  #endif
>>> +  DEFINE(PIE_E0_ASM, PIE_E0);
>>> +  DEFINE(PIE_E1_ASM, PIE_E1);
>>>    return 0;
>>>  }
>>> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
>>> index 737c99d79833..f45494425d09 100644
>>> --- a/arch/arm64/mm/proc.S
>>> +++ b/arch/arm64/mm/proc.S
>>> @@ -536,9 +536,9 @@ alternative_else_nop_endif
>>>  #define PTE_MAYBE_NG           0
>>>  #define PTE_MAYBE_SHARED       0
>>
>> I think at minimum, you can remove this PTE_MAYBE_* hack from proc.S. But as Ard
>> says, you may need to add it to asm-offsets.c? I'm surprised asm-offsets.c even
> 
> Moving PTE_MAYBE_* inside asm-offsets.c works as well in both cases
> but still wondering why this is even required ? What am I missing ?

Without the overrides:

#define PTE_MAYBE_NG		(arm64_use_ng_mappings ? PTE_NG : 0)
#define PTE_MAYBE_SHARED	(lpa2_is_enabled() ? 0 : PTE_SHARED)

And these are used in the definition of PROT_DEFAULT, which is used in the
definition of _PAGE_KERNEL*, which are used in the definition of PIE_E1.

For the assembly code arm64_use_ng_mappings and lpa2_is_enabled() are not
accessible. But we don't actually care about those bits so its just hacked to
override the fields to 0.

I would have expected the asm-offsets.c case to have a similer problem because
we have not defined arm64_use_ng_mappings and lpa2_is_enabled(). asm-offsets.c
is compiled as a standalone object then the macros are exfiltrated and
asm-offsets.h is created.

Perhaps the compiler is smart enough to see that we don't care about the bits in
those fields (pte_pi_index() is only keeping selected bits) and asm-offsets.c
can be safely compiled without the hack?

> 
>> compiles without this hack since surely it doesn't have arm64_use_ng_mappings or
>> is_realm_world() available?
> Did not face any problem with defconfig for the mainline kernel and
> both these symbols were visible in the built code.
> 
>>
>> Thanks,
>> Ryan
>>
>>>  
>>> -       mov_q   x0, PIE_E0
>>> +       mov_q   x0, PIE_E0_ASM
>>>         msr     REG_PIRE0_EL1, x0
>>> -       mov_q   x0, PIE_E1
>>> +       mov_q   x0, PIE_E1_ASM
>>>         msr     REG_PIR_EL1, x0
>>>  
>>>  #undef PTE_MAYBE_NG
>>>
>>
Re: [PATCH] arm64/mm: Re-organise setting up FEAT_S1PIE registers PIRE0_EL1 and PIR_EL1
Posted by Anshuman Khandual 8 months ago

On 4/15/25 23:13, Ryan Roberts wrote:
> On 15/04/2025 10:46, Anshuman Khandual wrote:
>>
>>
>> On 4/15/25 13:23, Ryan Roberts wrote:
>>> On 15/04/2025 07:27, Anshuman Khandual wrote:
>>>>
>>>>
>>>> On 4/14/25 18:01, Ryan Roberts wrote:
>>>>> On 14/04/2025 13:28, Ard Biesheuvel wrote:
>>>>>> On Mon, 14 Apr 2025 at 14:04, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>>>>>>
>>>>>>> On 14/04/2025 10:41, Ard Biesheuvel wrote:
>>>>>>>> On Mon, 14 Apr 2025 at 09:52, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>>>>>>>>
>>>>>>>>> On 10/04/2025 08:40, Anshuman Khandual wrote:
>>>>>>>>>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register
>>>>>>>>>> as expected if those macro constants contain some 128 bit layout elements,
>>>>>>>>>> required for D128 page tables. Fix this problem via first loading up these
>>>>>>>>>> macro constants into a given memory location and then subsequently setting
>>>>>>>>>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
>>>>>>>>>
>>>>>>>>> From memory, the primary issue is that for D128, PIE_E[0|1] are defined in terms
>>>>>>>>> of 128-bit types with shifting and masking, which the assembler can't do? It
>>>>>>>>> would be good to spell this out.
>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>>>>>>>> Cc: Will Deacon <will@kernel.org>
>>>>>>>>>> Cc: Mark Rutland <mark.rutland@arm.com>
>>>>>>>>>> Cc: Ard Biesheuvel <ardb@kernel.org>
>>>>>>>>>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>>>>>>>>>> Cc: linux-arm-kernel@lists.infradead.org
>>>>>>>>>> Cc: linux-kernel@vger.kernel.org
>>>>>>>>>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>>>>>>>>>> ---
>>>>>>>>>> This patch applies on v6.15-rc1
>>>>>>>>>>
>>>>>>>>>>  arch/arm64/kernel/head.S         | 3 +++
>>>>>>>>>>  arch/arm64/kernel/pi/map_range.c | 6 ++++++
>>>>>>>>>>  arch/arm64/kernel/pi/pi.h        | 1 +
>>>>>>>>>>  arch/arm64/mm/mmu.c              | 1 +
>>>>>>>>>>  arch/arm64/mm/proc.S             | 5 +++--
>>>>>>>>>>  5 files changed, 14 insertions(+), 2 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>>>>>>>>>> index 2ce73525de2c..4950d9cc638a 100644
>>>>>>>>>> --- a/arch/arm64/kernel/head.S
>>>>>>>>>> +++ b/arch/arm64/kernel/head.S
>>>>>>>>>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
>>>>>>>>>>        * On return, the CPU will be ready for the MMU to be turned on and
>>>>>>>>>>        * the TCR will have been set.
>>>>>>>>>>        */
>>>>>>>>>> +     adr_l   x0, pir_data
>>>>>>>>>> +     bl      __pi_load_pir_data
>>>>>>>>>
>>>>>>>>> Using C code to pre-calculate the values into global variables that the assembly
>>>>>>>>> code then loads and stuffs into the PIR registers feels hacky. I wonder if we
>>>>>>>>> can instead pre-calculate into asm-offsets.h? e.g. add the following to
>>>>>>>>> asm-offsets.c:
>>>>>>>>>
>>>>>>>>> DEFINE(PIE_E0_ASM, PIE_E0);
>>>>>>>>> DEFINE(PIE_E1_ASM, PIE_E1);
>>>>>>>>>
>>>>>>>>> Which will generate the asm-offsets.h header with PIE_E[0|1]_ASM with the
>>>>>>>>> pre-calculated values that you can then use in proc.S?
>>>>>>>>>
>>>>>>>>
>>>>>>>> There is another issue, which is that mov_q tries to be smart, and
>>>>>>>> emit fewer than 4 MOVZ/MOVK instructions if possible. So the .if
>>>>>>>> directive evaluates the argument, which does not work with symbolic
>>>>>>>> constants.
>>>>>>>
>>>>>>> I'm not quite understanding the detail here; what do you mean by "symbolic
>>>>>>> constants"? asm-offsets.h will provide something like:
>>>>>>>
>>>>>>> #define PIE_E0_ASM 1234567890
>>>>>>>
>>>>>>> The current code is using a hash-define and that's working fine:
>>>>>>>
>>>>>>> mov_q   x0, PIE_E0
>>>>>>>
>>>>>>>
>>>>>>> Won't the C preprocessor just substitute and everything will work out?
>>>>>>>
>>>>>>
>>>>>> Yeah, you're right. I was experimenting with something like
>>>>>>
>>>>>> .set .Lpie_e0, PIE_E0_ASM
>>>>>> mov_q xN, .Lpie_e0
>>>>>>
>>>>>> where this problem does exist, but we can just use PIE_E0_ASM directly
>>>>>> and things should work as expected.
>>>>>
>>>>> Ahh great, sounds like this should be pretty simple then!
>>>>
>>>> Following change works both on current and with D128 page tables.
>>>>
>>>> --- a/arch/arm64/kernel/asm-offsets.c
>>>> +++ b/arch/arm64/kernel/asm-offsets.c
>>>> @@ -182,5 +182,7 @@ int main(void)
>>>>  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
>>>>    DEFINE(FTRACE_OPS_DIRECT_CALL,       offsetof(struct ftrace_ops, direct_call));
>>>>  #endif
>>>> +  DEFINE(PIE_E0_ASM, PIE_E0);
>>>> +  DEFINE(PIE_E1_ASM, PIE_E1);
>>>>    return 0;
>>>>  }
>>>> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
>>>> index 737c99d79833..f45494425d09 100644
>>>> --- a/arch/arm64/mm/proc.S
>>>> +++ b/arch/arm64/mm/proc.S
>>>> @@ -536,9 +536,9 @@ alternative_else_nop_endif
>>>>  #define PTE_MAYBE_NG           0
>>>>  #define PTE_MAYBE_SHARED       0
>>>
>>> I think at minimum, you can remove this PTE_MAYBE_* hack from proc.S. But as Ard
>>> says, you may need to add it to asm-offsets.c? I'm surprised asm-offsets.c even
>>
>> Moving PTE_MAYBE_* inside asm-offsets.c works as well in both cases
>> but still wondering why this is even required ? What am I missing ?
> 
> Without the overrides:
> 
> #define PTE_MAYBE_NG		(arm64_use_ng_mappings ? PTE_NG : 0)
> #define PTE_MAYBE_SHARED	(lpa2_is_enabled() ? 0 : PTE_SHARED)
> 
> And these are used in the definition of PROT_DEFAULT, which is used in the
> definition of _PAGE_KERNEL*, which are used in the definition of PIE_E1.
> 
> For the assembly code arm64_use_ng_mappings and lpa2_is_enabled() are not
> accessible. But we don't actually care about those bits so its just hacked to
> override the fields to 0.

Alright.

> 
> I would have expected the asm-offsets.c case to have a similer problem because
> we have not defined arm64_use_ng_mappings and lpa2_is_enabled(). asm-offsets.c
> is compiled as a standalone object then the macros are exfiltrated and
> asm-offsets.h is created.

Understood.

> 
> Perhaps the compiler is smart enough to see that we don't care about the bits in
> those fields (pte_pi_index() is only keeping selected bits) and asm-offsets.c
> can be safely compiled without the hack?

I guess the best thing here would be to move PTE_MAYBE_* inside asm-offsets.c to
be on the safer side, rather than depending on what compiler might do otherwise.
I will respin the patch with both the changes included.

> 
>>
>>> compiles without this hack since surely it doesn't have arm64_use_ng_mappings or
>>> is_realm_world() available?
>> Did not face any problem with defconfig for the mainline kernel and
>> both these symbols were visible in the built code.
>>
>>>
>>> Thanks,
>>> Ryan
>>>
>>>>  
>>>> -       mov_q   x0, PIE_E0
>>>> +       mov_q   x0, PIE_E0_ASM
>>>>         msr     REG_PIRE0_EL1, x0
>>>> -       mov_q   x0, PIE_E1
>>>> +       mov_q   x0, PIE_E1_ASM
>>>>         msr     REG_PIR_EL1, x0
>>>>  
>>>>  #undef PTE_MAYBE_NG
>>>>
>>>
>
Re: [PATCH] arm64/mm: Re-organise setting up FEAT_S1PIE registers PIRE0_EL1 and PIR_EL1
Posted by Ard Biesheuvel 8 months, 1 week ago
Hi Anshuman,

On Thu, 10 Apr 2025 at 09:40, Anshuman Khandual
<anshuman.khandual@arm.com> wrote:
>
> mov_q cannot really move PIE_E[0|1] macros into a general purpose register
> as expected if those macro constants contain some 128 bit layout elements,
> required for D128 page tables.

Could you elaborate?

> Fix this problem via first loading up these
> macro constants into a given memory location and then subsequently setting
> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
>

If this is necessary, we could also remove the PTE_MAYBE_xx override hack no?

> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Ard Biesheuvel <ardb@kernel.org>
> Cc: Ryan Roberts <ryan.roberts@arm.com>
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> ---
> This patch applies on v6.15-rc1
>
>  arch/arm64/kernel/head.S         | 3 +++
>  arch/arm64/kernel/pi/map_range.c | 6 ++++++
>  arch/arm64/kernel/pi/pi.h        | 1 +
>  arch/arm64/mm/mmu.c              | 1 +
>  arch/arm64/mm/proc.S             | 5 +++--
>  5 files changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 2ce73525de2c..4950d9cc638a 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
>          * On return, the CPU will be ready for the MMU to be turned on and
>          * the TCR will have been set.
>          */
> +       adr_l   x0, pir_data
> +       bl      __pi_load_pir_data
> +
>         bl      __cpu_setup                     // initialise processor
>         b       __primary_switch
>  SYM_CODE_END(primary_entry)
> diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
> index 81345f68f9fc..cd9d24e73046 100644
> --- a/arch/arm64/kernel/pi/map_range.c
> +++ b/arch/arm64/kernel/pi/map_range.c
> @@ -103,3 +103,9 @@ asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
>
>         return ptep;
>  }
> +
> +asmlinkage void __init load_pir_data(u64 pir_data[])
> +{
> +       pir_data[0] = PIE_E0;
> +       pir_data[1] = PIE_E1;
> +}
> diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
> index c91e5e965cd3..ae61df4fdb77 100644
> --- a/arch/arm64/kernel/pi/pi.h
> +++ b/arch/arm64/kernel/pi/pi.h
> @@ -34,3 +34,4 @@ void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
>  asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
>
>  asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask);
> +asmlinkage void load_pir_data(u64 pir_data[]);
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index ea6695d53fb9..762e81ff4e85 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -58,6 +58,7 @@ static bool rodata_is_rw __ro_after_init = true;
>   * with MMU turned off.
>   */
>  long __section(".mmuoff.data.write") __early_cpu_boot_status;
> +unsigned long __section(".mmuoff.data.write") pir_data[2];
>
>  /*
>   * Empty_zero_page is a special page that is used for zero-initialized data
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index fb30c8804f87..7dd28cf101c2 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -524,9 +524,10 @@ alternative_else_nop_endif
>  #define PTE_MAYBE_NG           0
>  #define PTE_MAYBE_SHARED       0
>
> -       mov_q   x0, PIE_E0
> +       adr_l   x1, pir_data
> +       ldr     x0, [x1, #0]
>         msr     REG_PIRE0_EL1, x0
> -       mov_q   x0, PIE_E1
> +       ldr     x0, [x1, #8]
>         msr     REG_PIR_EL1, x0
>
>  #undef PTE_MAYBE_NG
> --
> 2.25.1
>
Re: [PATCH] arm64/mm: Re-organise setting up FEAT_S1PIE registers PIRE0_EL1 and PIR_EL1
Posted by Anshuman Khandual 8 months, 1 week ago

On 4/10/25 17:58, Ard Biesheuvel wrote:
> Hi Anshuman,
> 
> On Thu, 10 Apr 2025 at 09:40, Anshuman Khandual
> <anshuman.khandual@arm.com> wrote:
>>
>> mov_q cannot really move PIE_E[0|1] macros into a general purpose register
>> as expected if those macro constants contain some 128 bit layout elements,
>> required for D128 page tables.
> 
> Could you elaborate?

Without this change in place, the following build error comes up.

arch/arm64/mm/proc.S: Assembler messages:
arch/arm64/mm/proc.S:539: Error: too many positional arguments
arch/arm64/mm/proc.S:541: Error: too many positional arguments
make[4]: *** [scripts/Makefile.build:335: arch/arm64/mm/proc.o] Error 1
make[4]: *** Waiting for unfinished jobs....
make[3]: *** [scripts/Makefile.build:461: arch/arm64/mm] Error 2
make[3]: *** Waiting for unfinished jobs...

SYM_FUNC_START(__cpu_setup)
	..........................
	..........................
#define PTE_MAYBE_NG            0
#define PTE_MAYBE_SHARED        0

        mov_q   x0, PIE_E0		 (arch/arm64/mm/proc.S:539)
        msr     REG_PIRE0_EL1, x0
        mov_q   x0, PIE_E1		 (arch/arm64/mm/proc.S:541)
        msr     REG_PIR_EL1, x0

#undef PTE_MAYBE_NG
#undef PTE_MAYBE_SHARED
	..........................
	..........................

Please note the following PIE_E0 definition in D128 context.

#define PTE_PI_MASK     GENMASK_U128(118, 115)
#define PTE_PI_SHIFT    115

#define pte_pi_index(pte)       (((pte) & PTE_PI_MASK) >> PTE_PI_SHIFT)

#define PIE_E0  ( \
        PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS),           PIE_GCS)  | \
        PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO),        PIE_R)   | \
        PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_EXECONLY),      PIE_X_O) | \
        PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O)  | \
        PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED_EXEC),   PIE_RWX_O) | \
        PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY),      PIE_R_O)   | \
        PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED),        PIE_RW_O))

Where _PAGE_XXX definitions here might contain page flags beyond just 64
bits as well.

> 
>> Fix this problem via first loading up these
>> macro constants into a given memory location and then subsequently setting
>> up registers PIRE0_EL1 and PIR_EL1 by retrieving the memory stored values.
>>
> 
> If this is necessary, we could also remove the PTE_MAYBE_xx override hack no?

Could you please elaborate ? Not sure if PTE_MAYBE_xx is the problem here.

> 
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Cc: Will Deacon <will@kernel.org>
>> Cc: Mark Rutland <mark.rutland@arm.com>
>> Cc: Ard Biesheuvel <ardb@kernel.org>
>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>> Cc: linux-arm-kernel@lists.infradead.org
>> Cc: linux-kernel@vger.kernel.org
>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>> ---
>> This patch applies on v6.15-rc1
>>
>>  arch/arm64/kernel/head.S         | 3 +++
>>  arch/arm64/kernel/pi/map_range.c | 6 ++++++
>>  arch/arm64/kernel/pi/pi.h        | 1 +
>>  arch/arm64/mm/mmu.c              | 1 +
>>  arch/arm64/mm/proc.S             | 5 +++--
>>  5 files changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>> index 2ce73525de2c..4950d9cc638a 100644
>> --- a/arch/arm64/kernel/head.S
>> +++ b/arch/arm64/kernel/head.S
>> @@ -126,6 +126,9 @@ SYM_CODE_START(primary_entry)
>>          * On return, the CPU will be ready for the MMU to be turned on and
>>          * the TCR will have been set.
>>          */
>> +       adr_l   x0, pir_data
>> +       bl      __pi_load_pir_data
>> +
>>         bl      __cpu_setup                     // initialise processor
>>         b       __primary_switch
>>  SYM_CODE_END(primary_entry)
>> diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
>> index 81345f68f9fc..cd9d24e73046 100644
>> --- a/arch/arm64/kernel/pi/map_range.c
>> +++ b/arch/arm64/kernel/pi/map_range.c
>> @@ -103,3 +103,9 @@ asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
>>
>>         return ptep;
>>  }
>> +
>> +asmlinkage void __init load_pir_data(u64 pir_data[])
>> +{
>> +       pir_data[0] = PIE_E0;
>> +       pir_data[1] = PIE_E1;
>> +}
>> diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
>> index c91e5e965cd3..ae61df4fdb77 100644
>> --- a/arch/arm64/kernel/pi/pi.h
>> +++ b/arch/arm64/kernel/pi/pi.h
>> @@ -34,3 +34,4 @@ void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
>>  asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
>>
>>  asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask);
>> +asmlinkage void load_pir_data(u64 pir_data[]);
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index ea6695d53fb9..762e81ff4e85 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -58,6 +58,7 @@ static bool rodata_is_rw __ro_after_init = true;
>>   * with MMU turned off.
>>   */
>>  long __section(".mmuoff.data.write") __early_cpu_boot_status;
>> +unsigned long __section(".mmuoff.data.write") pir_data[2];
>>
>>  /*
>>   * Empty_zero_page is a special page that is used for zero-initialized data
>> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
>> index fb30c8804f87..7dd28cf101c2 100644
>> --- a/arch/arm64/mm/proc.S
>> +++ b/arch/arm64/mm/proc.S
>> @@ -524,9 +524,10 @@ alternative_else_nop_endif
>>  #define PTE_MAYBE_NG           0
>>  #define PTE_MAYBE_SHARED       0
>>
>> -       mov_q   x0, PIE_E0
>> +       adr_l   x1, pir_data
>> +       ldr     x0, [x1, #0]
>>         msr     REG_PIRE0_EL1, x0
>> -       mov_q   x0, PIE_E1
>> +       ldr     x0, [x1, #8]
>>         msr     REG_PIR_EL1, x0
>>
>>  #undef PTE_MAYBE_NG
>> --
>> 2.25.1
>>