[PATCH 28/32] x86/boot/e820: Make sure e820_search_gap() finds all gaps

Ingo Molnar posted 32 patches 7 months ago
[PATCH 28/32] x86/boot/e820: Make sure e820_search_gap() finds all gaps
Posted by Ingo Molnar 7 months ago
The current implementation of e820_search_gap() searches gaps
in a reverse search from MAX_GAP_END back to 0, contrary to
what its main comment claims:

    * Search for a gap in the E820 memory space from 0 to MAX_GAP_END (4GB).

But gaps can not only be beyond E820 RAM ranges, they can be below
them as well. For example this function will not find the proper
PCI gap for simplified memory map layouts that have a single RAM
range that crosses the 4GB boundary.

Rework the function to have a proper forward search of
E820 table entries.

This makes the code somewhat bigger:

   text       data        bss        dec        hex    filename
   7613      44072          0      51685       c9e5    e820.o.before
   7645      44072          0      51717       ca05    e820.o.after

but it now both implements what it claims to do, and is more
straightforward to read.

( This also allows 'idx' to be the regular u32 again, not an 'int'
  underflowing to -1. )

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Andy Shevchenko <andy@kernel.org>
Cc: Arnd Bergmann <arnd@kernel.org>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
 arch/x86/kernel/e820.c | 59 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 41 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 0d7e9794cd52..5260ce6ad466 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -666,30 +666,52 @@ __init static void e820__update_table_kexec(void)
  */
 __init static int e820_search_gap(unsigned long *max_gap_start, unsigned long *max_gap_size)
 {
-	u64 last = MAX_GAP_END;
-	int idx = e820_table->nr_entries;
+	struct e820_entry *entry;
+	u64 range_end_prev = 0;
 	int found = 0;
+	u32 idx;
 
-	while (--idx >= 0) {
-		u64 start = e820_table->entries[idx].addr;
-		u64 end = start + e820_table->entries[idx].size;
+	for (idx = 0; idx < e820_table->nr_entries; idx++) {
+		u64 range_start, range_end;
 
-		/*
-		 * Since "last" is at most 4GB, we know we'll
-		 * fit in 32 bits if this condition is true:
-		 */
-		if (last > end) {
-			unsigned long gap = last - end;
+		entry = e820_table->entries + idx;
+		range_start = entry->addr;
+		range_end   = entry->addr + entry->size;
 
-			if (gap > *max_gap_size) {
-				*max_gap_size = gap;
-				*max_gap_start = end;
-				found = 1;
+		/* Process any gap before this entry: */
+		if (range_start > range_end_prev) {
+			u64 gap_start = range_end_prev;
+			u64 gap_end = range_start;
+			u64 gap_size;
+
+			if (gap_start < MAX_GAP_END) {
+				/* Make sure the entirety of the gap is below MAX_GAP_END: */
+				gap_end = min(gap_end, MAX_GAP_END);
+				gap_size = gap_end-gap_start;
+
+				if (gap_size >= *max_gap_size) {
+					*max_gap_start = gap_start;
+					*max_gap_size = gap_size;
+					found = 1;
+				}
 			}
 		}
-		if (start < last)
-			last = start;
+
+		range_end_prev = range_end;
+	}
+
+	/* Is there a usable gap beyond the last entry: */
+	if (entry->addr + entry->size < MAX_GAP_END) {
+		u64 gap_start = entry->addr + entry->size;
+		u64 gap_size = MAX_GAP_END-gap_start;
+
+		if (gap_size >= *max_gap_size) {
+			*max_gap_start = gap_start;
+			*max_gap_size = gap_size;
+			found = 1;
+		}
 	}
+
 	return found;
 }
 
@@ -706,6 +728,7 @@ __init void e820__setup_pci_gap(void)
 	unsigned long max_gap_start, max_gap_size;
 	int found;
 
+	/* The minimum eligible gap size is 4MB: */
 	max_gap_size = SZ_4M;
 	found  = e820_search_gap(&max_gap_start, &max_gap_size);
 
@@ -725,7 +748,7 @@ __init void e820__setup_pci_gap(void)
 	pci_mem_start = max_gap_start;
 
 	pr_info("[gap %#010lx-%#010lx] available for PCI devices\n",
-		max_gap_start, max_gap_start + max_gap_size - 1);
+		max_gap_start, max_gap_start + max_gap_size-1);
 }
 
 /*
-- 
2.45.2
Re: [PATCH 28/32] x86/boot/e820: Make sure e820_search_gap() finds all gaps
Posted by Nikolay Borisov 6 months, 2 weeks ago

On 5/15/25 15:05, Ingo Molnar wrote:
> The current implementation of e820_search_gap() searches gaps
> in a reverse search from MAX_GAP_END back to 0, contrary to
> what its main comment claims:
> 
>      * Search for a gap in the E820 memory space from 0 to MAX_GAP_END (4GB).
> 
> But gaps can not only be beyond E820 RAM ranges, they can be below
> them as well. For example this function will not find the proper
> PCI gap for simplified memory map layouts that have a single RAM
> range that crosses the 4GB boundary.
> 
> Rework the function to have a proper forward search of
> E820 table entries.
> 
> This makes the code somewhat bigger:
> 
>     text       data        bss        dec        hex    filename
>     7613      44072          0      51685       c9e5    e820.o.before
>     7645      44072          0      51717       ca05    e820.o.after
> 
> but it now both implements what it claims to do, and is more
> straightforward to read.
> 
> ( This also allows 'idx' to be the regular u32 again, not an 'int'
>    underflowing to -1. )
> 
> Signed-off-by: Ingo Molnar <mingo@kernel.org>
> Cc: Andy Shevchenko <andy@kernel.org>
> Cc: Arnd Bergmann <arnd@kernel.org>
> Cc: David Woodhouse <dwmw@amazon.co.uk>
> Cc: H. Peter Anvin <hpa@zytor.com>
> Cc: Kees Cook <keescook@chromium.org>
> Cc: Linus Torvalds <torvalds@linux-foundation.org>
> Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
> ---
>   arch/x86/kernel/e820.c | 59 +++++++++++++++++++++++++++++++++++---------------
>   1 file changed, 41 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> index 0d7e9794cd52..5260ce6ad466 100644
> --- a/arch/x86/kernel/e820.c
> +++ b/arch/x86/kernel/e820.c
> @@ -666,30 +666,52 @@ __init static void e820__update_table_kexec(void)
>    */
>   __init static int e820_search_gap(unsigned long *max_gap_start, unsigned long *max_gap_size)
>   {


Also it's not really searching for 'a gap' but rather it searched for a 
specific gap - one larger than the passed max_gap_size. So I wonder if 
find_gap would be a more apt name, given that you have a specific 
criterion you are searching against, the gap size. If you think I'm 
reading too much into it then it's fine to disregard this.


nit: Also this function ought to return boolean.

<snip>
[tip: x86/boot] x86/boot/e820: Make sure e820_search_gap() finds all gaps
Posted by tip-bot2 for Ingo Molnar 2 days, 18 hours ago
The following commit has been merged into the x86/boot branch of tip:

Commit-ID:     0d9daff41418cbc762e4b6ec683e0a5ec4cdb5f3
Gitweb:        https://git.kernel.org/tip/0d9daff41418cbc762e4b6ec683e0a5ec4cdb5f3
Author:        Ingo Molnar <mingo@kernel.org>
AuthorDate:    Thu, 15 May 2025 14:05:44 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Sun, 14 Dec 2025 09:19:42 +01:00

x86/boot/e820: Make sure e820_search_gap() finds all gaps

The current implementation of e820_search_gap() searches gaps
in a reverse search from MAX_GAP_END back to 0, contrary to
what its main comment claims:

    * Search for a gap in the E820 memory space from 0 to MAX_GAP_END (4GB).

But gaps can not only be beyond E820 RAM ranges, they can be below
them as well. For example this function will not find the proper
PCI gap for simplified memory map layouts that have a single RAM
range that crosses the 4GB boundary.

Rework the function to have a proper forward search of
E820 table entries.

This makes the code somewhat bigger:

   text       data        bss        dec        hex    filename
   7613      44072          0      51685       c9e5    e820.o.before
   7645      44072          0      51717       ca05    e820.o.after

but it now both implements what it claims to do, and is more
straightforward to read.

( This also allows 'idx' to be the regular u32 again, not an 'int'
  underflowing to -1. )

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: H . Peter Anvin <hpa@zytor.com>
Cc: Andy Shevchenko <andy@kernel.org>
Cc: Arnd Bergmann <arnd@kernel.org>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Paul Menzel <pmenzel@molgen.mpg.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://patch.msgid.link/20250515120549.2820541-29-mingo@kernel.org
---
 arch/x86/kernel/e820.c | 59 ++++++++++++++++++++++++++++-------------
 1 file changed, 41 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index c4b9a24..d1b1786 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -624,30 +624,52 @@ __init static void e820__update_table_kexec(void)
  */
 __init static int e820_search_gap(unsigned long *max_gap_start, unsigned long *max_gap_size)
 {
-	u64 last = MAX_GAP_END;
-	int idx = e820_table->nr_entries;
+	struct e820_entry *entry;
+	u64 range_end_prev = 0;
 	int found = 0;
+	u32 idx;
 
-	while (--idx >= 0) {
-		u64 start = e820_table->entries[idx].addr;
-		u64 end = start + e820_table->entries[idx].size;
+	for (idx = 0; idx < e820_table->nr_entries; idx++) {
+		u64 range_start, range_end;
 
-		/*
-		 * Since "last" is at most 4GB, we know we'll
-		 * fit in 32 bits if this condition is true:
-		 */
-		if (last > end) {
-			unsigned long gap = last - end;
+		entry = e820_table->entries + idx;
+		range_start = entry->addr;
+		range_end   = entry->addr + entry->size;
 
-			if (gap > *max_gap_size) {
-				*max_gap_size = gap;
-				*max_gap_start = end;
-				found = 1;
+		/* Process any gap before this entry: */
+		if (range_start > range_end_prev) {
+			u64 gap_start = range_end_prev;
+			u64 gap_end = range_start;
+			u64 gap_size;
+
+			if (gap_start < MAX_GAP_END) {
+				/* Make sure the entirety of the gap is below MAX_GAP_END: */
+				gap_end = min(gap_end, MAX_GAP_END);
+				gap_size = gap_end-gap_start;
+
+				if (gap_size >= *max_gap_size) {
+					*max_gap_start = gap_start;
+					*max_gap_size = gap_size;
+					found = 1;
+				}
 			}
 		}
-		if (start < last)
-			last = start;
+
+		range_end_prev = range_end;
+	}
+
+	/* Is there a usable gap beyond the last entry: */
+	if (entry->addr + entry->size < MAX_GAP_END) {
+		u64 gap_start = entry->addr + entry->size;
+		u64 gap_size = MAX_GAP_END-gap_start;
+
+		if (gap_size >= *max_gap_size) {
+			*max_gap_start = gap_start;
+			*max_gap_size = gap_size;
+			found = 1;
+		}
 	}
+
 	return found;
 }
 
@@ -664,6 +686,7 @@ __init void e820__setup_pci_gap(void)
 	unsigned long max_gap_start, max_gap_size;
 	int found;
 
+	/* The minimum eligible gap size is 4MB: */
 	max_gap_size = SZ_4M;
 	found  = e820_search_gap(&max_gap_start, &max_gap_size);
 
@@ -683,7 +706,7 @@ __init void e820__setup_pci_gap(void)
 	pci_mem_start = max_gap_start;
 
 	pr_info("[gap %#010lx-%#010lx] available for PCI devices\n",
-		max_gap_start, max_gap_start + max_gap_size - 1);
+		max_gap_start, max_gap_start + max_gap_size-1);
 }
 
 /*