[PATCHv3 2/4] x86/64/mm: Make SPARSEMEM_VMEMMAP the only memory model

Kirill A. Shutemov posted 4 patches 7 months, 1 week ago
[PATCHv3 2/4] x86/64/mm: Make SPARSEMEM_VMEMMAP the only memory model
Posted by Kirill A. Shutemov 7 months, 1 week ago
5-level paging only supports SPARSEMEM_VMEMMAP. CONFIG_X86_5LEVEL is
being phased out, making 5-level paging support mandatory.

Make CONFIG_SPARSEMEM_VMEMMAP mandatory for x86-64 and eliminate
any associated conditional statements.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/Kconfig      | 2 +-
 arch/x86/mm/init_64.c | 9 +--------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d3c2da3b2f0b..45b36a019b5e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1467,7 +1467,6 @@ config X86_PAE
 config X86_5LEVEL
 	bool "Enable 5-level page tables support"
 	default y
-	select SPARSEMEM_VMEMMAP
 	depends on X86_64
 	help
 	  5-level paging enables access to larger address space:
@@ -1579,6 +1578,7 @@ config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	select SPARSEMEM_STATIC if X86_32
 	select SPARSEMEM_VMEMMAP_ENABLE if X86_64
+	select SPARSEMEM_VMEMMAP if X86_64
 
 config ARCH_SPARSEMEM_DEFAULT
 	def_bool X86_64 || (NUMA && X86_32)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bf45c7aed336..66330fe4e18c 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -833,7 +833,6 @@ void __init paging_init(void)
 	zone_sizes_init();
 }
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
 #define PAGE_UNUSED 0xFD
 
 /*
@@ -932,7 +931,6 @@ static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long
 	if (!IS_ALIGNED(end, PMD_SIZE))
 		unused_pmd_start = end;
 }
-#endif
 
 /*
  * Memory hotplug specific functions
@@ -1152,16 +1150,13 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
 				pmd_clear(pmd);
 				spin_unlock(&init_mm.page_table_lock);
 				pages++;
-			}
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-			else if (vmemmap_pmd_is_unused(addr, next)) {
+			} else if (vmemmap_pmd_is_unused(addr, next)) {
 					free_hugepage_table(pmd_page(*pmd),
 							    altmap);
 					spin_lock(&init_mm.page_table_lock);
 					pmd_clear(pmd);
 					spin_unlock(&init_mm.page_table_lock);
 			}
-#endif
 			continue;
 		}
 
@@ -1500,7 +1495,6 @@ unsigned long memory_block_size_bytes(void)
 	return memory_block_size_probed;
 }
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
  */
@@ -1647,4 +1641,3 @@ void __meminit vmemmap_populate_print_last(void)
 		node_start = 0;
 	}
 }
-#endif
-- 
2.47.2
Re: [PATCHv3 2/4] x86/64/mm: Make SPARSEMEM_VMEMMAP the only memory model
Posted by Dave Hansen 7 months, 1 week ago
On 5/16/25 05:33, Kirill A. Shutemov wrote:
> 5-level paging only supports SPARSEMEM_VMEMMAP. CONFIG_X86_5LEVEL is
> being phased out, making 5-level paging support mandatory.
> 
> Make CONFIG_SPARSEMEM_VMEMMAP mandatory for x86-64 and eliminate
> any associated conditional statements.
I think we have ourselves a catch-22 here.

SPARSEMEM_VMEMMAP was selected because the other sparsemem modes
couldn't handle a dynamic MAX_PHYS{MEM,ADDR}_BITS introduced by 5-level
paging. Now you're proposing making it static again, but keeping the
SPARSEMEM_VMEMMAP dependency.

If you remove the dynamic MAX_PHYS{MEM,ADDR}_BITS, you should also
remove the dependency on SPARSEMEM_VMEMMAP. No?
Re: [PATCHv3 2/4] x86/64/mm: Make SPARSEMEM_VMEMMAP the only memory model
Posted by Kirill A. Shutemov 7 months, 1 week ago
On Fri, May 16, 2025 at 06:42:03AM -0700, Dave Hansen wrote:
> On 5/16/25 05:33, Kirill A. Shutemov wrote:
> > 5-level paging only supports SPARSEMEM_VMEMMAP. CONFIG_X86_5LEVEL is
> > being phased out, making 5-level paging support mandatory.
> > 
> > Make CONFIG_SPARSEMEM_VMEMMAP mandatory for x86-64 and eliminate
> > any associated conditional statements.
> I think we have ourselves a catch-22 here.
> 
> SPARSEMEM_VMEMMAP was selected because the other sparsemem modes
> couldn't handle a dynamic MAX_PHYS{MEM,ADDR}_BITS introduced by 5-level
> paging. Now you're proposing making it static again, but keeping the
> SPARSEMEM_VMEMMAP dependency.
> 
> If you remove the dynamic MAX_PHYS{MEM,ADDR}_BITS, you should also
> remove the dependency on SPARSEMEM_VMEMMAP. No?

I guess. But how?

And is there any value to support !SPARSEMEM_VMEMMAP?

-- 
  Kiryl Shutsemau / Kirill A. Shutemov
Re: [PATCHv3 2/4] x86/64/mm: Make SPARSEMEM_VMEMMAP the only memory model
Posted by Dave Hansen 7 months, 1 week ago
On 5/16/25 07:01, Kirill A. Shutemov wrote:
>> If you remove the dynamic MAX_PHYS{MEM,ADDR}_BITS, you should also
>> remove the dependency on SPARSEMEM_VMEMMAP. No?
> I guess. But how?
> 
> And is there any value to support !SPARSEMEM_VMEMMAP?

Not really, other than making sure x86 supports a really wide variety of
arch-independent options. I've used it a time or two to go looking for
bugs but I'm not sure if those originated on x86 or other architectures.
Re: [PATCHv3 2/4] x86/64/mm: Make SPARSEMEM_VMEMMAP the only memory model
Posted by Ingo Molnar 7 months, 1 week ago
* Kirill A. Shutemov <kirill.shutemov@linux.intel.com> wrote:

> On Fri, May 16, 2025 at 06:42:03AM -0700, Dave Hansen wrote:
> > On 5/16/25 05:33, Kirill A. Shutemov wrote:
> > > 5-level paging only supports SPARSEMEM_VMEMMAP. CONFIG_X86_5LEVEL is
> > > being phased out, making 5-level paging support mandatory.
> > > 
> > > Make CONFIG_SPARSEMEM_VMEMMAP mandatory for x86-64 and eliminate
> > > any associated conditional statements.
> > I think we have ourselves a catch-22 here.
> > 
> > SPARSEMEM_VMEMMAP was selected because the other sparsemem modes
> > couldn't handle a dynamic MAX_PHYS{MEM,ADDR}_BITS introduced by 5-level
> > paging. Now you're proposing making it static again, but keeping the
> > SPARSEMEM_VMEMMAP dependency.
> > 
> > If you remove the dynamic MAX_PHYS{MEM,ADDR}_BITS, you should also
> > remove the dependency on SPARSEMEM_VMEMMAP. No?
> 
> I guess. But how?
> 
> And is there any value to support !SPARSEMEM_VMEMMAP?

Not really IMHO:

  .config.opensuse.default:    CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
  .config.ubuntu.localinstall: CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
  .config.fedora.generic:      CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
  .config.rhel.generic:        CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y

Thanks,

	Ingo
Re: [PATCHv3 2/4] x86/64/mm: Make SPARSEMEM_VMEMMAP the only memory model
Posted by Dave Hansen 7 months, 1 week ago
On 5/16/25 07:08, Ingo Molnar wrote:
>> And is there any value to support !SPARSEMEM_VMEMMAP?
> Not really IMHO:
> 
>   .config.opensuse.default:    CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
>   .config.ubuntu.localinstall: CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
>   .config.fedora.generic:      CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
>   .config.rhel.generic:        CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y

I look at the distro configs all the time as well.

But let's also not forget that none of these have lockdep turned on, and
we don't want to toss out lockdep support on x86, for example. ;)
Re: [PATCHv3 2/4] x86/64/mm: Make SPARSEMEM_VMEMMAP the only memory model
Posted by Ingo Molnar 7 months, 1 week ago
* Dave Hansen <dave.hansen@intel.com> wrote:

> On 5/16/25 07:08, Ingo Molnar wrote:
> >> And is there any value to support !SPARSEMEM_VMEMMAP?
> > Not really IMHO:
> > 
> >   .config.opensuse.default:    CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
> >   .config.ubuntu.localinstall: CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
> >   .config.fedora.generic:      CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
> >   .config.rhel.generic:        CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
> 
> I look at the distro configs all the time as well.
> 
> But let's also not forget that none of these have lockdep turned on,

Not actually true, quite a few distro debug kernel packages have 
lockdep on, and it's often enabled in internal QA rigs as well.

Not so SPARSEMEM_VMEMMAP...

> [...] and we don't want to toss out lockdep support on x86, for 
> example. ;)

That's an apples to oranges comparison:

 - SPARSEMEM_VMEMMAP is always enabled, in standard and debug kernels 
   as well.

 - lockdep is disabled in standard kernels, enabled in debug kernels 
   and very much relied on.

Thanks,

	Ingo
Re: [PATCHv3 2/4] x86/64/mm: Make SPARSEMEM_VMEMMAP the only memory model
Posted by Dave Hansen 7 months, 1 week ago
On 5/16/25 08:35, Ingo Molnar wrote:
>   .config.opensuse.default:    CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
>   .config.ubuntu.localinstall: CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
>   .config.fedora.generic:      CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
>   .config.rhel.generic:        CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y

That reminds me...

Does everybody keep their own local copies of these configs in their
environment? I do, and I refresh them periodically from the distros. I
assume everybody else is doing something similar.

Is there a better way?
Re: [PATCHv3 2/4] x86/64/mm: Make SPARSEMEM_VMEMMAP the only memory model
Posted by H. Peter Anvin 7 months ago
On May 16, 2025 8:46:07 AM PDT, Dave Hansen <dave.hansen@intel.com> wrote:
>On 5/16/25 08:35, Ingo Molnar wrote:
>>   .config.opensuse.default:    CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
>>   .config.ubuntu.localinstall: CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
>>   .config.fedora.generic:      CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
>>   .config.rhel.generic:        CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
>
>That reminds me...
>
>Does everybody keep their own local copies of these configs in their
>environment? I do, and I refresh them periodically from the distros. I
>assume everybody else is doing something similar.
>
>Is there a better way?

What I do is keep a set of minimal configs which are the deltas from the default. 
Re: [PATCHv3 2/4] x86/64/mm: Make SPARSEMEM_VMEMMAP the only memory model
Posted by Ingo Molnar 7 months, 1 week ago
* Dave Hansen <dave.hansen@intel.com> wrote:

> On 5/16/25 05:33, Kirill A. Shutemov wrote:
> > 5-level paging only supports SPARSEMEM_VMEMMAP. CONFIG_X86_5LEVEL is
> > being phased out, making 5-level paging support mandatory.
> > 
> > Make CONFIG_SPARSEMEM_VMEMMAP mandatory for x86-64 and eliminate
> > any associated conditional statements.
> I think we have ourselves a catch-22 here.
> 
> SPARSEMEM_VMEMMAP was selected because the other sparsemem modes
> couldn't handle a dynamic MAX_PHYS{MEM,ADDR}_BITS introduced by 5-level
> paging. Now you're proposing making it static again, but keeping the
> SPARSEMEM_VMEMMAP dependency.
> 
> If you remove the dynamic MAX_PHYS{MEM,ADDR}_BITS, you should also
> remove the dependency on SPARSEMEM_VMEMMAP. No?

Isn't it the other way around? MAX_PHYS{MEM,ADDR}_BITS are now *always* 
dynamic, their value depending on whether LA57 is available and used.

Thanks,

	Ingo
Re: [PATCHv3 2/4] x86/64/mm: Make SPARSEMEM_VMEMMAP the only memory model
Posted by Dave Hansen 7 months, 1 week ago
On 5/16/25 06:45, Ingo Molnar wrote:
> Isn't it the other way around? MAX_PHYS{MEM,ADDR}_BITS are now *always* 
> dynamic, their value depending on whether LA57 is available and used.

MAX_PHYS{MEM,ADDR}_BITS were always intended to be the compile-time
maximums on the architecture. They're static on every architecture
except x86 and some arm64 configs (who probably copied x86).

That's why having them be dynamic broke non-vmemmap sparsemem.

But we also seems to have defined MAXMEM to derive from MAX_PHYSMEM_BITS
and MAXMEM cascades into a bunch of other stuff, including KASAN. So we
can't just make them static again, I guess.

The only option would be to make them static when using non-vmemmap
sparsemem. But that's new-ish, and probably won't get any testing.

I guess there's not much we can do about it.
Re: [PATCHv3 2/4] x86/64/mm: Make SPARSEMEM_VMEMMAP the only memory model
Posted by Dave Hansen 7 months, 1 week ago
On 5/16/25 07:59, Dave Hansen wrote:
> The only option would be to make them static when using non-vmemmap
> sparsemem. But that's new-ish, and probably won't get any testing.

Something like this. But I don't particularly like it.
[tip: x86/merge] x86/mm/64: Make SPARSEMEM_VMEMMAP the only memory model
Posted by tip-bot2 for Kirill A. Shutemov 7 months ago
The following commit has been merged into the x86/merge branch of tip:

Commit-ID:     cba5d9b3e99d6268d7909a65c2bd78f4d195aead
Gitweb:        https://git.kernel.org/tip/cba5d9b3e99d6268d7909a65c2bd78f4d195aead
Author:        Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
AuthorDate:    Fri, 16 May 2025 15:33:04 +03:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Sat, 17 May 2025 10:34:11 +02:00

x86/mm/64: Make SPARSEMEM_VMEMMAP the only memory model

5-level paging only supports SPARSEMEM_VMEMMAP. CONFIG_X86_5LEVEL is
being phased out, making 5-level paging support mandatory.

Make CONFIG_SPARSEMEM_VMEMMAP mandatory for x86-64 and eliminate
any associated conditional statements.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20250516123306.3812286-3-kirill.shutemov@linux.intel.com
---
 arch/x86/Kconfig      |  2 +-
 arch/x86/mm/init_64.c |  9 +--------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 22c60be..27f5d87 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1467,7 +1467,6 @@ config X86_PAE
 config X86_5LEVEL
 	bool "Enable 5-level page tables support"
 	default y
-	select SPARSEMEM_VMEMMAP
 	depends on X86_64
 	help
 	  5-level paging enables access to larger address space:
@@ -1579,6 +1578,7 @@ config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	select SPARSEMEM_STATIC if X86_32
 	select SPARSEMEM_VMEMMAP_ENABLE if X86_64
+	select SPARSEMEM_VMEMMAP if X86_64
 
 config ARCH_SPARSEMEM_DEFAULT
 	def_bool X86_64 || (NUMA && X86_32)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bf45c7a..66330fe 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -833,7 +833,6 @@ void __init paging_init(void)
 	zone_sizes_init();
 }
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
 #define PAGE_UNUSED 0xFD
 
 /*
@@ -932,7 +931,6 @@ static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long
 	if (!IS_ALIGNED(end, PMD_SIZE))
 		unused_pmd_start = end;
 }
-#endif
 
 /*
  * Memory hotplug specific functions
@@ -1152,16 +1150,13 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
 				pmd_clear(pmd);
 				spin_unlock(&init_mm.page_table_lock);
 				pages++;
-			}
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-			else if (vmemmap_pmd_is_unused(addr, next)) {
+			} else if (vmemmap_pmd_is_unused(addr, next)) {
 					free_hugepage_table(pmd_page(*pmd),
 							    altmap);
 					spin_lock(&init_mm.page_table_lock);
 					pmd_clear(pmd);
 					spin_unlock(&init_mm.page_table_lock);
 			}
-#endif
 			continue;
 		}
 
@@ -1500,7 +1495,6 @@ unsigned long memory_block_size_bytes(void)
 	return memory_block_size_probed;
 }
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
  */
@@ -1647,4 +1641,3 @@ void __meminit vmemmap_populate_print_last(void)
 		node_start = 0;
 	}
 }
-#endif
[tip: x86/core] x86/mm/64: Make SPARSEMEM_VMEMMAP the only memory model
Posted by tip-bot2 for Kirill A. Shutemov 7 months, 1 week ago
The following commit has been merged into the x86/core branch of tip:

Commit-ID:     0ebb63735cd6692c0c65307e9b0d683364dfff67
Gitweb:        https://git.kernel.org/tip/0ebb63735cd6692c0c65307e9b0d683364dfff67
Author:        Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
AuthorDate:    Fri, 16 May 2025 15:33:04 +03:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Fri, 16 May 2025 15:13:03 +02:00

x86/mm/64: Make SPARSEMEM_VMEMMAP the only memory model

5-level paging only supports SPARSEMEM_VMEMMAP. CONFIG_X86_5LEVEL is
being phased out, making 5-level paging support mandatory.

Make CONFIG_SPARSEMEM_VMEMMAP mandatory for x86-64 and eliminate
any associated conditional statements.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20250516123306.3812286-3-kirill.shutemov@linux.intel.com
---
 arch/x86/Kconfig      |  2 +-
 arch/x86/mm/init_64.c |  9 +--------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 22c60be..27f5d87 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1467,7 +1467,6 @@ config X86_PAE
 config X86_5LEVEL
 	bool "Enable 5-level page tables support"
 	default y
-	select SPARSEMEM_VMEMMAP
 	depends on X86_64
 	help
 	  5-level paging enables access to larger address space:
@@ -1579,6 +1578,7 @@ config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	select SPARSEMEM_STATIC if X86_32
 	select SPARSEMEM_VMEMMAP_ENABLE if X86_64
+	select SPARSEMEM_VMEMMAP if X86_64
 
 config ARCH_SPARSEMEM_DEFAULT
 	def_bool X86_64 || (NUMA && X86_32)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bf45c7a..66330fe 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -833,7 +833,6 @@ void __init paging_init(void)
 	zone_sizes_init();
 }
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
 #define PAGE_UNUSED 0xFD
 
 /*
@@ -932,7 +931,6 @@ static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long
 	if (!IS_ALIGNED(end, PMD_SIZE))
 		unused_pmd_start = end;
 }
-#endif
 
 /*
  * Memory hotplug specific functions
@@ -1152,16 +1150,13 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
 				pmd_clear(pmd);
 				spin_unlock(&init_mm.page_table_lock);
 				pages++;
-			}
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-			else if (vmemmap_pmd_is_unused(addr, next)) {
+			} else if (vmemmap_pmd_is_unused(addr, next)) {
 					free_hugepage_table(pmd_page(*pmd),
 							    altmap);
 					spin_lock(&init_mm.page_table_lock);
 					pmd_clear(pmd);
 					spin_unlock(&init_mm.page_table_lock);
 			}
-#endif
 			continue;
 		}
 
@@ -1500,7 +1495,6 @@ unsigned long memory_block_size_bytes(void)
 	return memory_block_size_probed;
 }
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
  */
@@ -1647,4 +1641,3 @@ void __meminit vmemmap_populate_print_last(void)
 		node_start = 0;
 	}
 }
-#endif