[PATCH 04/21] x86/mm/asi: set up asi_nonsensitive_pgd

Brendan Jackman posted 21 patches 1 week ago
[PATCH 04/21] x86/mm/asi: set up asi_nonsensitive_pgd
Posted by Brendan Jackman 1 week ago
Create the initial shared pagetable to hold all the mappings that will
be shared among ASI domains.

Mirror the physmap into the ASI pagetables, but with a maximum
granularity that's guaranteed to allow changing pageblock sensitivity
without having to allocate pagetables, and with everything as
non-present.

Signed-off-by: Brendan Jackman <jackmanb@google.com>
---
 arch/x86/include/asm/asi.h |  4 ++++
 arch/x86/mm/asi.c          | 19 +++++++++++++++++++
 arch/x86/mm/init.c         |  2 ++
 arch/x86/mm/init_64.c      | 25 +++++++++++++++++++++++--
 include/linux/asi.h        |  4 ++++
 init/main.c                |  1 +
 6 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/asi.h b/arch/x86/include/asm/asi.h
index 32a4c04c4be0f6f425c7cbcff4c58f1827a4b4c4..85062f2a23e127c736a92bb0d49e54f6fdcc2a5b 100644
--- a/arch/x86/include/asm/asi.h
+++ b/arch/x86/include/asm/asi.h
@@ -12,4 +12,8 @@ static inline bool asi_enabled_static(void)
 	return cpu_feature_enabled(X86_FEATURE_ASI);
 }
 
+void asi_init(void);
+
+extern pgd_t *asi_nonsensitive_pgd;
+
 #endif /* _ASM_X86_ASI_H */
diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c
index 8c907f3c84f43f66e412ecbfa99e67390d31a66f..7225f6aec936eedf98cd263d791dd62263d62575 100644
--- a/arch/x86/mm/asi.c
+++ b/arch/x86/mm/asi.c
@@ -1,11 +1,20 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/asi.h>
 #include <linux/init.h>
+#include <linux/memblock.h>
 #include <linux/string.h>
 
 #include <asm/cmdline.h>
 #include <asm/cpufeature.h>
 
+#include "mm_internal.h"
+
+/*
+ * This is a bit like init_mm.pgd, it holds mappings shared among all ASI
+ * domains.
+ */
+pgd_t *asi_nonsensitive_pgd;
+
 void __init asi_check_boottime_disable(void)
 {
 	bool enabled = false;
@@ -26,3 +35,13 @@ void __init asi_check_boottime_disable(void)
 	if (enabled)
 		setup_force_cpu_cap(X86_FEATURE_ASI);
 }
+
+void __init asi_init(void)
+{
+	if (!cpu_feature_enabled(X86_FEATURE_ASI))
+		return;
+
+	asi_nonsensitive_pgd = alloc_low_page();
+	if (WARN_ON(!asi_nonsensitive_pgd))
+		setup_clear_cpu_cap(X86_FEATURE_ASI);
+}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index b877a41fc291284eb271ebe764a52730d51da3fc..8fd34475af7ccd49d0124e13a87342d3bfef3e05 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -773,6 +773,8 @@ void __init init_mem_mapping(void)
 	end = max_low_pfn << PAGE_SHIFT;
 #endif
 
+	asi_init();
+
 	/* the ISA range is always mapped regardless of memory holes */
 	init_memory_mapping(0, ISA_END_ADDRESS, PAGE_KERNEL);
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e98e85cf15f42db669696ba8195d8fc633351b26..7e0471d46767c63ceade479ae0d1bf738f14904a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -7,6 +7,7 @@
  *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
  */
 
+#include <linux/asi.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
@@ -746,7 +747,8 @@ phys_pgd_init(pgd_t *pgd_page, unsigned long paddr_start, unsigned long paddr_en
 {
 	unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
 
-	*pgd_changed = false;
+	if (pgd_changed)
+		*pgd_changed = false;
 
 	paddr_last = paddr_end;
 	vaddr = (unsigned long)__va(paddr_start);
@@ -780,7 +782,8 @@ phys_pgd_init(pgd_t *pgd_page, unsigned long paddr_start, unsigned long paddr_en
 					  (pud_t *) p4d, init);
 
 		spin_unlock(&init_mm.page_table_lock);
-		*pgd_changed = true;
+		if (pgd_changed)
+			*pgd_changed = true;
 	}
 
 	return paddr_last;
@@ -797,6 +800,24 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
 
 	paddr_last = phys_pgd_init(init_mm.pgd, paddr_start, paddr_end, page_size_mask,
 				   prot, init, &pgd_changed);
+
+	/*
+	 * Set up ASI's unrestricted physmap. This needs to mapped at minimum 2M
+	 * size so that regions can be mapped and unmapped at pageblock
+	 * granularity without requiring allocations.
+	 */
+	if (asi_nonsensitive_pgd) {
+		/*
+		 * Since most memory is expected to end up sensitive, start with
+		 * everything unmapped in this pagetable.
+		 */
+		pgprot_t prot_np = __pgprot(pgprot_val(prot) & ~_PAGE_PRESENT);
+
+		VM_BUG_ON((PAGE_SHIFT + pageblock_order) < page_level_shift(PG_LEVEL_2M));
+		phys_pgd_init(asi_nonsensitive_pgd, paddr_start, paddr_end, 1 << PG_LEVEL_2M,
+			      prot_np, init, NULL);
+	}
+
 	if (pgd_changed)
 		sync_global_pgds((unsigned long)__va(paddr_start),
 				 (unsigned long)__va(paddr_end) - 1);
diff --git a/include/linux/asi.h b/include/linux/asi.h
index 1832feb1b14d63f05bbfa3f87dd07753338ed70b..cc4bc957274dbf92ce5bf6185a418d0a8d1b7748 100644
--- a/include/linux/asi.h
+++ b/include/linux/asi.h
@@ -11,5 +11,9 @@
 static inline void asi_check_boottime_disable(void) { }
 static inline bool asi_enabled_static(void) { return false; }
 
+#define asi_nonsensitive_pgd NULL
+
+static inline void asi_init(void) { };
+
 #endif /* CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION */
 #endif /* _INCLUDE_ASI_H */
diff --git a/init/main.c b/init/main.c
index 07a3116811c5d72cbab48410493b3d0f89d1f1b2..0ec230ba123613c89c4dfbede27e0441207b2f88 100644
--- a/init/main.c
+++ b/init/main.c
@@ -12,6 +12,7 @@
 
 #define DEBUG		/* Enable initcall_debug */
 
+#include <linux/asi.h>
 #include <linux/types.h>
 #include <linux/export.h>
 #include <linux/extable.h>

-- 
2.50.1
Re: [PATCH 04/21] x86/mm/asi: set up asi_nonsensitive_pgd
Posted by Dave Hansen 4 hours ago
On 9/24/25 07:59, Brendan Jackman wrote:
> Create the initial shared pagetable to hold all the mappings that will
> be shared among ASI domains.
> 
> Mirror the physmap into the ASI pagetables, but with a maximum
> granularity that's guaranteed to allow changing pageblock sensitivity
> without having to allocate pagetables, and with everything as
> non-present.

Could you also talk about what this granularity _actually_ is and why it
has the property of never requiring page table alloc

...
> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> index e98e85cf15f42db669696ba8195d8fc633351b26..7e0471d46767c63ceade479ae0d1bf738f14904a 100644
> --- a/arch/x86/mm/init_64.c
> +++ b/arch/x86/mm/init_64.c
> @@ -7,6 +7,7 @@
>   *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
>   */
>  
> +#include <linux/asi.h>
>  #include <linux/signal.h>
>  #include <linux/sched.h>
>  #include <linux/kernel.h>
> @@ -746,7 +747,8 @@ phys_pgd_init(pgd_t *pgd_page, unsigned long paddr_start, unsigned long paddr_en
>  {
>  	unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
>  
> -	*pgd_changed = false;
> +	if (pgd_changed)
> +		*pgd_changed = false;

This 'pgd_changed' hunk isn't mentioned in the changelog.

...
> @@ -797,6 +800,24 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
>  
>  	paddr_last = phys_pgd_init(init_mm.pgd, paddr_start, paddr_end, page_size_mask,
>  				   prot, init, &pgd_changed);
> +
> +	/*
> +	 * Set up ASI's unrestricted physmap. This needs to mapped at minimum 2M
> +	 * size so that regions can be mapped and unmapped at pageblock
> +	 * granularity without requiring allocations.
> +	 */

This took me a minute to wrap my head around.

Here, I think you're trying to convey that:

  1. There's a higher-level design decision that all sensitivity will be
     done at a 2M granularity. A 2MB physical region is either sensitive
     or not.
  2. Because of #1, 1GB mappings are not cool because splitting a 1GB
     mapping into 2MB needs to allocate a page table page.
  3. 4k mappings are OK because they can also have their permissions
     changed at a 2MB granularity. It's just more laborious.

The "minimum 2M size" comment really threw me off because that, to me,
also includes 1G which is a no-no here.

I also can't help but wonder if it would have been easier and more
straightforward to just start this whole exercise at 4k: force all the
ASI tables to be 4k. Then, later, add the 2MB support and tie to
pageblocks on after.


> +	if (asi_nonsensitive_pgd) {
> +		/*
> +		 * Since most memory is expected to end up sensitive, start with
> +		 * everything unmapped in this pagetable.
> +		 */
> +		pgprot_t prot_np = __pgprot(pgprot_val(prot) & ~_PAGE_PRESENT);
> +
> +		VM_BUG_ON((PAGE_SHIFT + pageblock_order) < page_level_shift(PG_LEVEL_2M));
> +		phys_pgd_init(asi_nonsensitive_pgd, paddr_start, paddr_end, 1 << PG_LEVEL_2M,
> +			      prot_np, init, NULL);
> +	}

I'm also kinda wondering what the purpose is of having a whole page
table full of !_PAGE_PRESENT entries. It would be nice to know how this
eventually gets turned into something useful.