[v2] arm64: Unmap linear alias of kernel data/bss

[PATCH v2 09/10] arm64: mm: Map the kernel data/bss read-only in the linear map

Posted by Ard Biesheuvel 1 week, 6 days ago

From: Ard Biesheuvel <ardb@kernel.org>

On systems where the bootloader adheres to the original arm64 boot
protocol, the placement of the kernel in the physical address space is
highly predictable, and this makes the placement of its linear alias in
the kernel virtual address space equally predictable, given the lack of
randomization of the linear map.

The linear aliases of the kernel text and rodata regions are already
mapped read-only, but the kernel data and bss are mapped read-write in
this region. This is not needed, so map them read-only as well.

Note that the statically allocated kernel page tables do need to be
modifiable via the linear map, so leave these mapped read-write.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/include/asm/sections.h |  1 +
 arch/arm64/mm/mmu.c               | 10 ++++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index 51b0d594239e..f7fe2bcbfd03 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -23,6 +23,7 @@ extern char __irqentry_text_start[], __irqentry_text_end[];
 extern char __mmuoff_data_start[], __mmuoff_data_end[];
 extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
 extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[];
+extern char __pgdir_start[];
 
 static inline size_t entry_tramp_text_size(void)
 {
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 18415d4743bf..fdbbb018adc5 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1122,7 +1122,9 @@ static void __init map_mem(void)
 {
 	static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
 	phys_addr_t kernel_start = __pa_symbol(_text);
-	phys_addr_t kernel_end = __pa_symbol(__init_begin);
+	phys_addr_t init_begin = __pa_symbol(__init_begin);
+	phys_addr_t init_end = __pa_symbol(__init_end);
+	phys_addr_t kernel_end = __pa_symbol(__pgdir_start);
 	phys_addr_t start, end;
 	phys_addr_t early_kfence_pool;
 	int flags = NO_EXEC_MAPPINGS;
@@ -1158,7 +1160,9 @@ static void __init map_mem(void)
 	 * Note that contiguous mappings cannot be remapped in this way,
 	 * so we should avoid them here.
 	 */
-	__map_memblock(kernel_start, kernel_end, PAGE_KERNEL,
+	__map_memblock(kernel_start, init_begin, PAGE_KERNEL,
+		       flags | NO_CONT_MAPPINGS);
+	__map_memblock(init_end, kernel_end, PAGE_KERNEL,
 		       flags | NO_CONT_MAPPINGS);
 
 	/* map all the memory banks */
@@ -1172,6 +1176,8 @@ static void __init map_mem(void)
 			       flags);
 	}
 
+	__map_memblock(init_end, kernel_end, PAGE_KERNEL_RO,
+		       flags | NO_CONT_MAPPINGS);
 	arm64_kfence_map_pool(early_kfence_pool);
 }
 
-- 
2.52.0.457.g6b5491de43-goog

Re: [PATCH v2 09/10] arm64: mm: Map the kernel data/bss read-only in the linear map

Posted by Ryan Roberts 1 week, 5 days ago

On 26/01/2026 09:26, Ard Biesheuvel wrote:
> From: Ard Biesheuvel <ardb@kernel.org>
> 
> On systems where the bootloader adheres to the original arm64 boot
> protocol, the placement of the kernel in the physical address space is
> highly predictable, and this makes the placement of its linear alias in
> the kernel virtual address space equally predictable, given the lack of
> randomization of the linear map.
> 
> The linear aliases of the kernel text and rodata regions are already
> mapped read-only, but the kernel data and bss are mapped read-write in
> this region. This is not needed, so map them read-only as well.
> 
> Note that the statically allocated kernel page tables do need to be
> modifiable via the linear map, so leave these mapped read-write.
> 
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
>  arch/arm64/include/asm/sections.h |  1 +
>  arch/arm64/mm/mmu.c               | 10 ++++++++--
>  2 files changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
> index 51b0d594239e..f7fe2bcbfd03 100644
> --- a/arch/arm64/include/asm/sections.h
> +++ b/arch/arm64/include/asm/sections.h
> @@ -23,6 +23,7 @@ extern char __irqentry_text_start[], __irqentry_text_end[];
>  extern char __mmuoff_data_start[], __mmuoff_data_end[];
>  extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
>  extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[];
> +extern char __pgdir_start[];
>  
>  static inline size_t entry_tramp_text_size(void)
>  {
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 18415d4743bf..fdbbb018adc5 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -1122,7 +1122,9 @@ static void __init map_mem(void)
>  {
>  	static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
>  	phys_addr_t kernel_start = __pa_symbol(_text);
> -	phys_addr_t kernel_end = __pa_symbol(__init_begin);
> +	phys_addr_t init_begin = __pa_symbol(__init_begin);
> +	phys_addr_t init_end = __pa_symbol(__init_end);
> +	phys_addr_t kernel_end = __pa_symbol(__pgdir_start);
>  	phys_addr_t start, end;
>  	phys_addr_t early_kfence_pool;
>  	int flags = NO_EXEC_MAPPINGS;
> @@ -1158,7 +1160,9 @@ static void __init map_mem(void)
>  	 * Note that contiguous mappings cannot be remapped in this way,
>  	 * so we should avoid them here.
>  	 */
> -	__map_memblock(kernel_start, kernel_end, PAGE_KERNEL,
> +	__map_memblock(kernel_start, init_begin, PAGE_KERNEL,
> +		       flags | NO_CONT_MAPPINGS);
> +	__map_memblock(init_end, kernel_end, PAGE_KERNEL,
>  		       flags | NO_CONT_MAPPINGS);

I'm probably being dumb again... why map [init_end, kernel_end) RW here, only to
remap RO below? Why not just map RO here?

>  
>  	/* map all the memory banks */
> @@ -1172,6 +1176,8 @@ static void __init map_mem(void)
>  			       flags);
>  	}
>  
> +	__map_memblock(init_end, kernel_end, PAGE_KERNEL_RO,
> +		       flags | NO_CONT_MAPPINGS);

This seems iffy since __map_memblock() doesn't flush the tlb. If you want to
update an existing mapping you want to be calling update_mapping_prot() right?

Thanks,
Ryan

>  	arm64_kfence_map_pool(early_kfence_pool);
>  }
>

Re: [PATCH v2 09/10] arm64: mm: Map the kernel data/bss read-only in the linear map

Posted by Ard Biesheuvel 1 week, 5 days ago

On Tue, 27 Jan 2026 at 11:33, Ryan Roberts <ryan.roberts@arm.com> wrote:
>
> On 26/01/2026 09:26, Ard Biesheuvel wrote:
> > From: Ard Biesheuvel <ardb@kernel.org>
> >
> > On systems where the bootloader adheres to the original arm64 boot
> > protocol, the placement of the kernel in the physical address space is
> > highly predictable, and this makes the placement of its linear alias in
> > the kernel virtual address space equally predictable, given the lack of
> > randomization of the linear map.
> >
> > The linear aliases of the kernel text and rodata regions are already
> > mapped read-only, but the kernel data and bss are mapped read-write in
> > this region. This is not needed, so map them read-only as well.
> >
> > Note that the statically allocated kernel page tables do need to be
> > modifiable via the linear map, so leave these mapped read-write.
> >
> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > ---
> >  arch/arm64/include/asm/sections.h |  1 +
> >  arch/arm64/mm/mmu.c               | 10 ++++++++--
> >  2 files changed, 9 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
> > index 51b0d594239e..f7fe2bcbfd03 100644
> > --- a/arch/arm64/include/asm/sections.h
> > +++ b/arch/arm64/include/asm/sections.h
> > @@ -23,6 +23,7 @@ extern char __irqentry_text_start[], __irqentry_text_end[];
> >  extern char __mmuoff_data_start[], __mmuoff_data_end[];
> >  extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
> >  extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[];
> > +extern char __pgdir_start[];
> >
> >  static inline size_t entry_tramp_text_size(void)
> >  {
> > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> > index 18415d4743bf..fdbbb018adc5 100644
> > --- a/arch/arm64/mm/mmu.c
> > +++ b/arch/arm64/mm/mmu.c
> > @@ -1122,7 +1122,9 @@ static void __init map_mem(void)
> >  {
> >       static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
> >       phys_addr_t kernel_start = __pa_symbol(_text);
> > -     phys_addr_t kernel_end = __pa_symbol(__init_begin);
> > +     phys_addr_t init_begin = __pa_symbol(__init_begin);
> > +     phys_addr_t init_end = __pa_symbol(__init_end);
> > +     phys_addr_t kernel_end = __pa_symbol(__pgdir_start);
> >       phys_addr_t start, end;
> >       phys_addr_t early_kfence_pool;
> >       int flags = NO_EXEC_MAPPINGS;
> > @@ -1158,7 +1160,9 @@ static void __init map_mem(void)
> >        * Note that contiguous mappings cannot be remapped in this way,
> >        * so we should avoid them here.
> >        */
> > -     __map_memblock(kernel_start, kernel_end, PAGE_KERNEL,
> > +     __map_memblock(kernel_start, init_begin, PAGE_KERNEL,
> > +                    flags | NO_CONT_MAPPINGS);
> > +     __map_memblock(init_end, kernel_end, PAGE_KERNEL,
> >                      flags | NO_CONT_MAPPINGS);
>
> I'm probably being dumb again... why map [init_end, kernel_end) RW here, only to
> remap RO below? Why not just map RO here?
>

Because the loop that iterates over the memblocks will remap it RW
again anyway, so whether we map RW or RO at this point is irrelevant.
This call just needs to occur here to ensure that no block mappings
needs to be broken up later.


> >
> >       /* map all the memory banks */
> > @@ -1172,6 +1176,8 @@ static void __init map_mem(void)
> >                              flags);
> >       }
> >
> > +     __map_memblock(init_end, kernel_end, PAGE_KERNEL_RO,
> > +                    flags | NO_CONT_MAPPINGS);
>
> This seems iffy since __map_memblock() doesn't flush the tlb. If you want to
> update an existing mapping you want to be calling update_mapping_prot() right?
>

Fair enough - I'll use that here.

Re: [PATCH v2 09/10] arm64: mm: Map the kernel data/bss read-only in the linear map

Posted by Ryan Roberts 1 week, 5 days ago

On 27/01/2026 10:36, Ard Biesheuvel wrote:
> On Tue, 27 Jan 2026 at 11:33, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>
>> On 26/01/2026 09:26, Ard Biesheuvel wrote:
>>> From: Ard Biesheuvel <ardb@kernel.org>
>>>
>>> On systems where the bootloader adheres to the original arm64 boot
>>> protocol, the placement of the kernel in the physical address space is
>>> highly predictable, and this makes the placement of its linear alias in
>>> the kernel virtual address space equally predictable, given the lack of
>>> randomization of the linear map.
>>>
>>> The linear aliases of the kernel text and rodata regions are already
>>> mapped read-only, but the kernel data and bss are mapped read-write in
>>> this region. This is not needed, so map them read-only as well.
>>>
>>> Note that the statically allocated kernel page tables do need to be
>>> modifiable via the linear map, so leave these mapped read-write.
>>>
>>> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
>>> ---
>>>  arch/arm64/include/asm/sections.h |  1 +
>>>  arch/arm64/mm/mmu.c               | 10 ++++++++--
>>>  2 files changed, 9 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
>>> index 51b0d594239e..f7fe2bcbfd03 100644
>>> --- a/arch/arm64/include/asm/sections.h
>>> +++ b/arch/arm64/include/asm/sections.h
>>> @@ -23,6 +23,7 @@ extern char __irqentry_text_start[], __irqentry_text_end[];
>>>  extern char __mmuoff_data_start[], __mmuoff_data_end[];
>>>  extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
>>>  extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[];
>>> +extern char __pgdir_start[];
>>>
>>>  static inline size_t entry_tramp_text_size(void)
>>>  {
>>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>>> index 18415d4743bf..fdbbb018adc5 100644
>>> --- a/arch/arm64/mm/mmu.c
>>> +++ b/arch/arm64/mm/mmu.c
>>> @@ -1122,7 +1122,9 @@ static void __init map_mem(void)
>>>  {
>>>       static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
>>>       phys_addr_t kernel_start = __pa_symbol(_text);
>>> -     phys_addr_t kernel_end = __pa_symbol(__init_begin);
>>> +     phys_addr_t init_begin = __pa_symbol(__init_begin);
>>> +     phys_addr_t init_end = __pa_symbol(__init_end);
>>> +     phys_addr_t kernel_end = __pa_symbol(__pgdir_start);
>>>       phys_addr_t start, end;
>>>       phys_addr_t early_kfence_pool;
>>>       int flags = NO_EXEC_MAPPINGS;
>>> @@ -1158,7 +1160,9 @@ static void __init map_mem(void)
>>>        * Note that contiguous mappings cannot be remapped in this way,
>>>        * so we should avoid them here.
>>>        */
>>> -     __map_memblock(kernel_start, kernel_end, PAGE_KERNEL,
>>> +     __map_memblock(kernel_start, init_begin, PAGE_KERNEL,
>>> +                    flags | NO_CONT_MAPPINGS);
>>> +     __map_memblock(init_end, kernel_end, PAGE_KERNEL,
>>>                      flags | NO_CONT_MAPPINGS);
>>
>> I'm probably being dumb again... why map [init_end, kernel_end) RW here, only to
>> remap RO below? Why not just map RO here?
>>
> 
> Because the loop that iterates over the memblocks will remap it RW
> again anyway, so whether we map RW or RO at this point is irrelevant.
> This call just needs to occur here to ensure that no block mappings
> needs to be broken up later.

Ahh yes, got it.

> 
> 
>>>
>>>       /* map all the memory banks */
>>> @@ -1172,6 +1176,8 @@ static void __init map_mem(void)
>>>                              flags);
>>>       }
>>>
>>> +     __map_memblock(init_end, kernel_end, PAGE_KERNEL_RO,
>>> +                    flags | NO_CONT_MAPPINGS);
>>
>> This seems iffy since __map_memblock() doesn't flush the tlb. If you want to
>> update an existing mapping you want to be calling update_mapping_prot() right?
>>
> 
> Fair enough - I'll use that here.