[PATCH v1 11/14] x86/boot: use __seg_fs and __seg_gs in the real-mode boot code

H. Peter Anvin posted 14 patches 2 weeks, 5 days ago
[PATCH v1 11/14] x86/boot: use __seg_fs and __seg_gs in the real-mode boot code
Posted by H. Peter Anvin 2 weeks, 5 days ago
All supported versions of gcc support __seg_fs and __seg_gs now.
All supported versions of clang support __seg_fs and __seg_gs too,
except for two bugs (as of clang 21, at least):

1. The %fs: and %gs: prefix does not get emitted in inline assembly.
2. An internal compiler error when addressing symbols directly.

However, none of these are required in the boot code. Furthermore,
this makes it possible to remove the absolute_pointer() hack in the
fs/gs access functions.

This requires adding a barrier() to a20.c, to prevent the compiler
from eliding the load from the aliased memory address.

Remove the unused memcmp_[fg]s() functions.

Finally, ds() is by necessity constant, so mark the function as such.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
---
 arch/x86/boot/a20.c  |  1 +
 arch/x86/boot/boot.h | 81 ++++++++++++++------------------------------
 2 files changed, 27 insertions(+), 55 deletions(-)

diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
index 3ab6cd8eaa31..52c3fccdcb70 100644
--- a/arch/x86/boot/a20.c
+++ b/arch/x86/boot/a20.c
@@ -63,6 +63,7 @@ static int a20_test(int loops)
 	while (loops--) {
 		wrgs32(++ctr, A20_TEST_ADDR);
 		io_delay();	/* Serialize and make delay constant */
+		barrier();	/* Compiler won't know about fs/gs overlap */
 		ok = rdfs32(A20_TEST_ADDR+0x10) ^ ctr;
 		if (ok)
 			break;
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index b4eb8405ba55..4d3549ed7987 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -45,7 +45,7 @@ static inline void io_delay(void)
 
 /* These functions are used to reference data in other segments. */
 
-static inline u16 ds(void)
+static inline __attribute_const__ u16 ds(void)
 {
 	u16 seg;
 	asm("movw %%ds,%0" : "=rm" (seg));
@@ -54,7 +54,7 @@ static inline u16 ds(void)
 
 static inline void set_fs(u16 seg)
 {
-	asm volatile("movw %0,%%fs" : : "rm" (seg));
+	asm volatile("movw %0,%%fs" : : "rm" (seg) : "memory");
 }
 static inline u16 fs(void)
 {
@@ -65,7 +65,7 @@ static inline u16 fs(void)
 
 static inline void set_gs(u16 seg)
 {
-	asm volatile("movw %0,%%gs" : : "rm" (seg));
+	asm volatile("movw %0,%%gs" : : "rm" (seg) : "memory");
 }
 static inline u16 gs(void)
 {
@@ -76,96 +76,67 @@ static inline u16 gs(void)
 
 typedef unsigned int addr_t;
 
+/*
+ * WARNING: as of clang 21, clang has the following two bugs related
+ * to __seg_fs and __seg_gs:
+ *
+ * 1. The %fs: and %gs: prefix does not get emitted in inline assembly.
+ * 2. An internal compiler error when addressing symbols directly.
+ *
+ * Neither of those constructs are currently used in the boot code.
+ * If they ever are, and those bugs still remain, then those bugs will
+ * need to be worked around.
+ */
 static inline u8 rdfs8(addr_t addr)
 {
-	u8 *ptr = (u8 *)absolute_pointer(addr);
-	u8 v;
-	asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*ptr));
-	return v;
+	return *(__seg_fs const u8 *)addr;
 }
 static inline u16 rdfs16(addr_t addr)
 {
-	u16 *ptr = (u16 *)absolute_pointer(addr);
-	u16 v;
-	asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*ptr));
-	return v;
+	return *(__seg_fs const u16 *)addr;
 }
 static inline u32 rdfs32(addr_t addr)
 {
-	u32 *ptr = (u32 *)absolute_pointer(addr);
-	u32 v;
-	asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*ptr));
-	return v;
+	return *(__seg_fs const u32 *)addr;
 }
 
 static inline void wrfs8(u8 v, addr_t addr)
 {
-	u8 *ptr = (u8 *)absolute_pointer(addr);
-	asm volatile("movb %1,%%fs:%0" : "+m" (*ptr) : "qi" (v));
+	*(__seg_fs u8 *)addr = v;
 }
 static inline void wrfs16(u16 v, addr_t addr)
 {
-	u16 *ptr = (u16 *)absolute_pointer(addr);
-	asm volatile("movw %1,%%fs:%0" : "+m" (*ptr) : "ri" (v));
+	*(__seg_fs u16 *)addr = v;
 }
 static inline void wrfs32(u32 v, addr_t addr)
 {
-	u32 *ptr = (u32 *)absolute_pointer(addr);
-	asm volatile("movl %1,%%fs:%0" : "+m" (*ptr) : "ri" (v));
+	*(__seg_fs u32 *)addr = v;
 }
 
 static inline u8 rdgs8(addr_t addr)
 {
-	u8 *ptr = (u8 *)absolute_pointer(addr);
-	u8 v;
-	asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*ptr));
-	return v;
+	return *(__seg_gs const u8 *)addr;
 }
 static inline u16 rdgs16(addr_t addr)
 {
-	u16 *ptr = (u16 *)absolute_pointer(addr);
-	u16 v;
-	asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*ptr));
-	return v;
+	return *(__seg_gs const u16 *)addr;
 }
 static inline u32 rdgs32(addr_t addr)
 {
-	u32 *ptr = (u32 *)absolute_pointer(addr);
-	u32 v;
-	asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*ptr));
-	return v;
+	return *(__seg_gs const u32 *)addr;
 }
 
 static inline void wrgs8(u8 v, addr_t addr)
 {
-	u8 *ptr = (u8 *)absolute_pointer(addr);
-	asm volatile("movb %1,%%gs:%0" : "+m" (*ptr) : "qi" (v));
+	*(__seg_gs u8 *)addr = v;
 }
 static inline void wrgs16(u16 v, addr_t addr)
 {
-	u16 *ptr = (u16 *)absolute_pointer(addr);
-	asm volatile("movw %1,%%gs:%0" : "+m" (*ptr) : "ri" (v));
+	*(__seg_gs u16 *)addr = v;
 }
 static inline void wrgs32(u32 v, addr_t addr)
 {
-	u32 *ptr = (u32 *)absolute_pointer(addr);
-	asm volatile("movl %1,%%gs:%0" : "+m" (*ptr) : "ri" (v));
-}
-
-/* Note: these only return true/false, not a signed return value! */
-static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len)
-{
-	bool diff;
-	asm volatile("fs repe cmpsb"
-		     : "=@ccnz" (diff), "+D" (s1), "+S" (s2), "+c" (len));
-	return diff;
-}
-static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len)
-{
-	bool diff;
-	asm volatile("gs repe cmpsb"
-		     : "=@ccnz" (diff), "+D" (s1), "+S" (s2), "+c" (len));
-	return diff;
+	*(__seg_gs u32 *)addr = v;
 }
 
 /* Heap -- available for dynamic lists. */
-- 
2.52.0
Re: [PATCH v1 11/14] x86/boot: use __seg_fs and __seg_gs in the real-mode boot code
Posted by Uros Bizjak 2 weeks, 4 days ago
On Tue, Jan 20, 2026 at 8:54 PM H. Peter Anvin <hpa@zytor.com> wrote:
>
> All supported versions of gcc support __seg_fs and __seg_gs now.
> All supported versions of clang support __seg_fs and __seg_gs too,
> except for two bugs (as of clang 21, at least):
>
> 1. The %fs: and %gs: prefix does not get emitted in inline assembly.
> 2. An internal compiler error when addressing symbols directly.
>
> However, none of these are required in the boot code. Furthermore,
> this makes it possible to remove the absolute_pointer() hack in the
> fs/gs access functions.
>
> This requires adding a barrier() to a20.c, to prevent the compiler
> from eliding the load from the aliased memory address.
>
> Remove the unused memcmp_[fg]s() functions.
>
> Finally, ds() is by necessity constant, so mark the function as such.
>
> Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>

Reviewed-by: Uros Bizjak <ubizjak@gmail.com>

> ---
>  arch/x86/boot/a20.c  |  1 +
>  arch/x86/boot/boot.h | 81 ++++++++++++++------------------------------
>  2 files changed, 27 insertions(+), 55 deletions(-)
>
> diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
> index 3ab6cd8eaa31..52c3fccdcb70 100644
> --- a/arch/x86/boot/a20.c
> +++ b/arch/x86/boot/a20.c
> @@ -63,6 +63,7 @@ static int a20_test(int loops)
>         while (loops--) {
>                 wrgs32(++ctr, A20_TEST_ADDR);
>                 io_delay();     /* Serialize and make delay constant */
> +               barrier();      /* Compiler won't know about fs/gs overlap */
>                 ok = rdfs32(A20_TEST_ADDR+0x10) ^ ctr;
>                 if (ok)
>                         break;
> diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
> index b4eb8405ba55..4d3549ed7987 100644
> --- a/arch/x86/boot/boot.h
> +++ b/arch/x86/boot/boot.h
> @@ -45,7 +45,7 @@ static inline void io_delay(void)
>
>  /* These functions are used to reference data in other segments. */
>
> -static inline u16 ds(void)
> +static inline __attribute_const__ u16 ds(void)
>  {
>         u16 seg;
>         asm("movw %%ds,%0" : "=rm" (seg));
> @@ -54,7 +54,7 @@ static inline u16 ds(void)
>
>  static inline void set_fs(u16 seg)
>  {
> -       asm volatile("movw %0,%%fs" : : "rm" (seg));
> +       asm volatile("movw %0,%%fs" : : "rm" (seg) : "memory");
>  }
>  static inline u16 fs(void)
>  {
> @@ -65,7 +65,7 @@ static inline u16 fs(void)
>
>  static inline void set_gs(u16 seg)
>  {
> -       asm volatile("movw %0,%%gs" : : "rm" (seg));
> +       asm volatile("movw %0,%%gs" : : "rm" (seg) : "memory");
>  }
>  static inline u16 gs(void)
>  {
> @@ -76,96 +76,67 @@ static inline u16 gs(void)
>
>  typedef unsigned int addr_t;
>
> +/*
> + * WARNING: as of clang 21, clang has the following two bugs related
> + * to __seg_fs and __seg_gs:
> + *
> + * 1. The %fs: and %gs: prefix does not get emitted in inline assembly.
> + * 2. An internal compiler error when addressing symbols directly.
> + *
> + * Neither of those constructs are currently used in the boot code.
> + * If they ever are, and those bugs still remain, then those bugs will
> + * need to be worked around.
> + */
>  static inline u8 rdfs8(addr_t addr)
>  {
> -       u8 *ptr = (u8 *)absolute_pointer(addr);
> -       u8 v;
> -       asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*ptr));
> -       return v;
> +       return *(__seg_fs const u8 *)addr;
>  }
>  static inline u16 rdfs16(addr_t addr)
>  {
> -       u16 *ptr = (u16 *)absolute_pointer(addr);
> -       u16 v;
> -       asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*ptr));
> -       return v;
> +       return *(__seg_fs const u16 *)addr;
>  }
>  static inline u32 rdfs32(addr_t addr)
>  {
> -       u32 *ptr = (u32 *)absolute_pointer(addr);
> -       u32 v;
> -       asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*ptr));
> -       return v;
> +       return *(__seg_fs const u32 *)addr;
>  }
>
>  static inline void wrfs8(u8 v, addr_t addr)
>  {
> -       u8 *ptr = (u8 *)absolute_pointer(addr);
> -       asm volatile("movb %1,%%fs:%0" : "+m" (*ptr) : "qi" (v));
> +       *(__seg_fs u8 *)addr = v;
>  }
>  static inline void wrfs16(u16 v, addr_t addr)
>  {
> -       u16 *ptr = (u16 *)absolute_pointer(addr);
> -       asm volatile("movw %1,%%fs:%0" : "+m" (*ptr) : "ri" (v));
> +       *(__seg_fs u16 *)addr = v;
>  }
>  static inline void wrfs32(u32 v, addr_t addr)
>  {
> -       u32 *ptr = (u32 *)absolute_pointer(addr);
> -       asm volatile("movl %1,%%fs:%0" : "+m" (*ptr) : "ri" (v));
> +       *(__seg_fs u32 *)addr = v;
>  }
>
>  static inline u8 rdgs8(addr_t addr)
>  {
> -       u8 *ptr = (u8 *)absolute_pointer(addr);
> -       u8 v;
> -       asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*ptr));
> -       return v;
> +       return *(__seg_gs const u8 *)addr;
>  }
>  static inline u16 rdgs16(addr_t addr)
>  {
> -       u16 *ptr = (u16 *)absolute_pointer(addr);
> -       u16 v;
> -       asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*ptr));
> -       return v;
> +       return *(__seg_gs const u16 *)addr;
>  }
>  static inline u32 rdgs32(addr_t addr)
>  {
> -       u32 *ptr = (u32 *)absolute_pointer(addr);
> -       u32 v;
> -       asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*ptr));
> -       return v;
> +       return *(__seg_gs const u32 *)addr;
>  }
>
>  static inline void wrgs8(u8 v, addr_t addr)
>  {
> -       u8 *ptr = (u8 *)absolute_pointer(addr);
> -       asm volatile("movb %1,%%gs:%0" : "+m" (*ptr) : "qi" (v));
> +       *(__seg_gs u8 *)addr = v;
>  }
>  static inline void wrgs16(u16 v, addr_t addr)
>  {
> -       u16 *ptr = (u16 *)absolute_pointer(addr);
> -       asm volatile("movw %1,%%gs:%0" : "+m" (*ptr) : "ri" (v));
> +       *(__seg_gs u16 *)addr = v;
>  }
>  static inline void wrgs32(u32 v, addr_t addr)
>  {
> -       u32 *ptr = (u32 *)absolute_pointer(addr);
> -       asm volatile("movl %1,%%gs:%0" : "+m" (*ptr) : "ri" (v));
> -}
> -
> -/* Note: these only return true/false, not a signed return value! */
> -static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len)
> -{
> -       bool diff;
> -       asm volatile("fs repe cmpsb"
> -                    : "=@ccnz" (diff), "+D" (s1), "+S" (s2), "+c" (len));
> -       return diff;
> -}
> -static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len)
> -{
> -       bool diff;
> -       asm volatile("gs repe cmpsb"
> -                    : "=@ccnz" (diff), "+D" (s1), "+S" (s2), "+c" (len));
> -       return diff;
> +       *(__seg_gs u32 *)addr = v;
>  }
>
>  /* Heap -- available for dynamic lists. */
> --
> 2.52.0
>
Re: [PATCH v1 11/14] x86/boot: use __seg_fs and __seg_gs in the real-mode boot code
Posted by Uros Bizjak 2 weeks, 5 days ago
On Tue, Jan 20, 2026 at 8:54 PM H. Peter Anvin <hpa@zytor.com> wrote:
>
> All supported versions of gcc support __seg_fs and __seg_gs now.
> All supported versions of clang support __seg_fs and __seg_gs too,
> except for two bugs (as of clang 21, at least):
>
> 1. The %fs: and %gs: prefix does not get emitted in inline assembly.
> 2. An internal compiler error when addressing symbols directly.
>
> However, none of these are required in the boot code. Furthermore,
> this makes it possible to remove the absolute_pointer() hack in the
> fs/gs access functions.
>
> This requires adding a barrier() to a20.c, to prevent the compiler
> from eliding the load from the aliased memory address.
>
> Remove the unused memcmp_[fg]s() functions.
>
> Finally, ds() is by necessity constant, so mark the function as such.
>
> Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
> ---
>  arch/x86/boot/a20.c  |  1 +
>  arch/x86/boot/boot.h | 81 ++++++++++++++------------------------------
>  2 files changed, 27 insertions(+), 55 deletions(-)
>
> diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
> index 3ab6cd8eaa31..52c3fccdcb70 100644
> --- a/arch/x86/boot/a20.c
> +++ b/arch/x86/boot/a20.c
> @@ -63,6 +63,7 @@ static int a20_test(int loops)
>         while (loops--) {
>                 wrgs32(++ctr, A20_TEST_ADDR);
>                 io_delay();     /* Serialize and make delay constant */
> +               barrier();      /* Compiler won't know about fs/gs overlap */
>                 ok = rdfs32(A20_TEST_ADDR+0x10) ^ ctr;
>                 if (ok)
>                         break;

This particular issue is not due to the compiler not knowing about
fs/gs overlap (if the compiler determines that write and read are to
the same non-volatile address, it would simply remove both, load and
store), but due to the compiler performing load hoisting and store
sinking from the loop. The compiler considers these two addresses as
two different addresses (they are also defined in two different named
address spaces), and optimizes access to them. So, without barrier(),
it simply loads the value from A20_TEST_ADDR and A20_TEST_ADDR+0x10
before the loop, resulting in:

  9:   65 8b 0d 00 02 00 00    mov    %gs:0x200,%ecx
 ...
 16:   64 8b 35 10 02 00 00    mov    %fs:0x210,%esi
 ...
 28:   83 ea 01                sub    $0x1,%edx
 2b:   74 0a                   je     37 <a20_test_ref+0x37>
 2d:   e6 80                   out    %al,$0x80
 2f:   89 d9                   mov    %ebx,%ecx
 31:   29 d1                   sub    %edx,%ecx
 33:   39 ce                   cmp    %ecx,%esi
 35:   74 f1                   je     28 <a20_test_ref+0x28>

The solution with barrier() introduces memory clobber between store
and load, so the compiler is now forced to load and store the values
due to the side effects of the barrier() inbetween. This kind of
works, but is just a workaround for what really happens. In reality,
the value at the test address changes "behind the compiler back", IOW
- variable’s value can change in ways the compiler cannot predict.

My proposal is to use a volatile pointer to an absolute address, so
the unwanted optimizations are suppressed. The generated code is the
same as with barrier(), but now the code tells the compiler that every
read and write to this address must happen exactly as written in the
source code. Before your patch, the accessors were defined with
volatile asm, and this is the place where volatile qualifier matters.
So, my proposed code would read:

#define A20_TEST_ADDR    (4*0x80)

#define A20_TEST_GS (*(volatile __seg_gs u32 *)A20_TEST_ADDR)
#define A20_TEST_FS (*(volatile __seg_fs u32 *)(A20_TEST_ADDR+0x10))

static int a20_test(int loops)
{
    int saved, ctr;

    set_fs(0xffff);

    saved = ctr = A20_TEST_GS;

    do {
        A20_TEST_GS = ++ctr;
        io_delay();    /* Make constant delay */
        if (A20_TEST_FS != ctr)
            break;
    } while (--loops);

    A20_TEST_GS = saved;
    return loops;
}

BR,
Uros.
re: [PATCH v1 08/14] x86: make CONFIG_EFI_STUB unconditional
Posted by Simon Glass 2 weeks, 3 days ago
Hi Peter,

On Tue, Jan 20, 2026 at 8:54 PM H. Peter Anvin <hpa@zytor.com> wrote:
>
> The EFI stub code is mature, most current x86 systems require EFI to
> boot, and as it is exclusively preboot code, it doesn't affect the
> runtime memory footprint at all.
> 
> It makes absolutely no sense to omit it anymore, so make it
> unconditional.
> 
> Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
> ---
>  arch/x86/Kconfig                  | 14 ++------------
>  arch/x86/boot/compressed/Makefile |  2 --
>  arch/x86/boot/compressed/error.c  |  2 --
>  arch/x86/boot/header.S            |  3 ---
>  4 files changed, 2 insertions(+), 19 deletions(-)

At least with QEMU the EFI protocol adds quite a lot of overhead.

Is there any actual need for this?

Regards,
Simon

re: [PATCH v1 08/14] x86: make CONFIG_EFI_STUB unconditional
Posted by H. Peter Anvin 2 weeks, 3 days ago
On January 22, 2026 10:57:39 AM PST, Simon Glass <sjg@chromium.org> wrote:
>Hi Peter,
>
>On Tue, Jan 20, 2026 at 8:54 PM H. Peter Anvin <hpa@zytor.com> wrote:
>>
>> The EFI stub code is mature, most current x86 systems require EFI to
>> boot, and as it is exclusively preboot code, it doesn't affect the
>> runtime memory footprint at all.
>> 
>> It makes absolutely no sense to omit it anymore, so make it
>> unconditional.
>> 
>> Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
>> ---
>>  arch/x86/Kconfig                  | 14 ++------------
>>  arch/x86/boot/compressed/Makefile |  2 --
>>  arch/x86/boot/compressed/error.c  |  2 --
>>  arch/x86/boot/header.S            |  3 ---
>>  4 files changed, 2 insertions(+), 19 deletions(-)
>
>At least with QEMU the EFI protocol adds quite a lot of overhead.
>
>Is there any actual need for this?
>
>Regards,
>Simon
>

Including the EFI stub doesn't mean using EFI to boot is required.
Re: [PATCH v1 08/14] x86: make CONFIG_EFI_STUB unconditional
Posted by Simon Glass 1 week, 6 days ago
Hi Peter,

On Fri, 23 Jan 2026 at 13:11, H. Peter Anvin <hpa@zytor.com> wrote:
>
> On January 22, 2026 10:57:39 AM PST, Simon Glass <sjg@chromium.org> wrote:
> >Hi Peter,
> >
> >On Tue, Jan 20, 2026 at 8:54 PM H. Peter Anvin <hpa@zytor.com> wrote:
> >>
> >> The EFI stub code is mature, most current x86 systems require EFI to
> >> boot, and as it is exclusively preboot code, it doesn't affect the
> >> runtime memory footprint at all.
> >>
> >> It makes absolutely no sense to omit it anymore, so make it
> >> unconditional.
> >>
> >> Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
> >> ---
> >>  arch/x86/Kconfig                  | 14 ++------------
> >>  arch/x86/boot/compressed/Makefile |  2 --
> >>  arch/x86/boot/compressed/error.c  |  2 --
> >>  arch/x86/boot/header.S            |  3 ---
> >>  4 files changed, 2 insertions(+), 19 deletions(-)
> >
> >At least with QEMU the EFI protocol adds quite a lot of overhead.
> >
> >Is there any actual need for this?
> >
> >Regards,
> >Simon
> >
>
> Including the EFI stub doesn't mean using EFI to boot is required.

Yes, understood, but it adds bloat. More importantly it will lead to
people assuming that the stub is always used and thus unwittingly blur
the boundary between the stub and the kernel itself.

What is the actual need for this?

Regards,
Simon
Re: [PATCH v1 08/14] x86: make CONFIG_EFI_STUB unconditional
Posted by H. Peter Anvin 1 week, 6 days ago
On 2026-01-26 13:19, Simon Glass wrote:
>>
>> Including the EFI stub doesn't mean using EFI to boot is required.
> 
> Yes, understood, but it adds bloat. More importantly it will lead to
> people assuming that the stub is always used and thus unwittingly blur
> the boundary between the stub and the kernel itself.
> 
> What is the actual need for this?
> 

I would argue that the opposite is more likely: someone inadvertently builds a
kernel without the stub, the bootloader goes down a legacy support path and
things seems to work... except for some platform subtleties.

The bloat is there, but it is small and is only in the on-disk kernel image;
it is zero at runtime.

As such, I don't think this option is a particularly good idea anymore. If
necessary, it could be hidden behind an EXPERT option, but I first wanted to
see who if anyone actually cares in a meaningful way to maintain this option.
Every option, after all, adds maintenance burden.

Note that the BIOS stub is unconditionally compiled and included, and that has
not been an issue.

	-hpa
Re: [PATCH v1 08/14] x86: make CONFIG_EFI_STUB unconditional
Posted by Simon Glass 1 week, 6 days ago
Hi Peter,

On Tue, 27 Jan 2026 at 11:21, H. Peter Anvin <hpa@zytor.com> wrote:
>
> On 2026-01-26 13:19, Simon Glass wrote:
> >>
> >> Including the EFI stub doesn't mean using EFI to boot is required.
> >
> > Yes, understood, but it adds bloat. More importantly it will lead to
> > people assuming that the stub is always used and thus unwittingly blur
> > the boundary between the stub and the kernel itself.
> >
> > What is the actual need for this?
> >
>
> I would argue that the opposite is more likely: someone inadvertently builds a
> kernel without the stub, the bootloader goes down a legacy support path and
> things seems to work... except for some platform subtleties.
>
> The bloat is there, but it is small and is only in the on-disk kernel image;
> it is zero at runtime.
>
> As such, I don't think this option is a particularly good idea anymore. If
> necessary, it could be hidden behind an EXPERT option, but I first wanted to
> see who if anyone actually cares in a meaningful way to maintain this option.
> Every option, after all, adds maintenance burden.
>
> Note that the BIOS stub is unconditionally compiled and included, and that has
> not been an issue.

What is the maintenance burden here? I could potentially take that on,
but I would first want to understand what is involved.

The use of the word 'legacy' worries me too. Is this patch a step
towards removing the non-EFI path?

Regards,
Simon
Re: [PATCH v1 08/14] x86: make CONFIG_EFI_STUB unconditional
Posted by H. Peter Anvin 1 week, 6 days ago
On January 26, 2026 5:44:43 PM PST, Simon Glass <sjg@chromium.org> wrote:
>Hi Peter,
>
>On Tue, 27 Jan 2026 at 11:21, H. Peter Anvin <hpa@zytor.com> wrote:
>>
>> On 2026-01-26 13:19, Simon Glass wrote:
>> >>
>> >> Including the EFI stub doesn't mean using EFI to boot is required.
>> >
>> > Yes, understood, but it adds bloat. More importantly it will lead to
>> > people assuming that the stub is always used and thus unwittingly blur
>> > the boundary between the stub and the kernel itself.
>> >
>> > What is the actual need for this?
>> >
>>
>> I would argue that the opposite is more likely: someone inadvertently builds a
>> kernel without the stub, the bootloader goes down a legacy support path and
>> things seems to work... except for some platform subtleties.
>>
>> The bloat is there, but it is small and is only in the on-disk kernel image;
>> it is zero at runtime.
>>
>> As such, I don't think this option is a particularly good idea anymore. If
>> necessary, it could be hidden behind an EXPERT option, but I first wanted to
>> see who if anyone actually cares in a meaningful way to maintain this option.
>> Every option, after all, adds maintenance burden.
>>
>> Note that the BIOS stub is unconditionally compiled and included, and that has
>> not been an issue.
>
>What is the maintenance burden here? I could potentially take that on,
>but I would first want to understand what is involved.
>
>The use of the word 'legacy' worries me too. Is this patch a step
>towards removing the non-EFI path?
>
>Regards,
>Simon

I just realized you are the U-boot maintainer, so I'm assuming you are thinking of the case where there is no UEFI or BIOS firmware. In that case, just like as for kexec, the current entry point will continue to work, of course. 

What we don't want is having to suffer being on a BIOS or EFI system but not being able to leverage it for the benefit of the kernel. The kernel image is much easier to upgrade.
Re: [PATCH v1 08/14] x86: make CONFIG_EFI_STUB unconditional
Posted by Simon Glass 1 week, 6 days ago
Hi Peter,

On Tue, 27 Jan 2026 at 15:55, H. Peter Anvin <hpa@zytor.com> wrote:
>
> On January 26, 2026 5:44:43 PM PST, Simon Glass <sjg@chromium.org> wrote:
> >Hi Peter,
> >
> >On Tue, 27 Jan 2026 at 11:21, H. Peter Anvin <hpa@zytor.com> wrote:
> >>
> >> On 2026-01-26 13:19, Simon Glass wrote:
> >> >>
> >> >> Including the EFI stub doesn't mean using EFI to boot is required.
> >> >
> >> > Yes, understood, but it adds bloat. More importantly it will lead to
> >> > people assuming that the stub is always used and thus unwittingly blur
> >> > the boundary between the stub and the kernel itself.
> >> >
> >> > What is the actual need for this?
> >> >
> >>
> >> I would argue that the opposite is more likely: someone inadvertently builds a
> >> kernel without the stub, the bootloader goes down a legacy support path and
> >> things seems to work... except for some platform subtleties.
> >>
> >> The bloat is there, but it is small and is only in the on-disk kernel image;
> >> it is zero at runtime.
> >>
> >> As such, I don't think this option is a particularly good idea anymore. If
> >> necessary, it could be hidden behind an EXPERT option, but I first wanted to
> >> see who if anyone actually cares in a meaningful way to maintain this option.
> >> Every option, after all, adds maintenance burden.
> >>
> >> Note that the BIOS stub is unconditionally compiled and included, and that has
> >> not been an issue.
> >
> >What is the maintenance burden here? I could potentially take that on,
> >but I would first want to understand what is involved.
> >
> >The use of the word 'legacy' worries me too. Is this patch a step
> >towards removing the non-EFI path?
> >
> >Regards,
> >Simon

(joining the threads)

> Bypassing the firmware stub (BIOS or EFI) is really only appropriate for special user cases, like kexec, because it removes the ability for the kernel to deal with system issues at an early point.

Does this dealing happen in the EFI stub, or later? Are you referring
to ACPI fix-ups or something else?

>
> However, kexec needs it, and it's not going to go away. However, that doesn't mean we should encourage this in cases which doesn't need it (no matter what the Grub maintainers tell you.)
>
> Now, if you are using KVM without EFI you are probably doing BIOS boot (regardless of if you know it or not), entering via the BIOS firmware stub.

I am thinking of the 64-bit entry point to the kernel. everything
being laid out in memory ready to go.

>
> I just realized you are the U-boot maintainer, so I'm assuming you are thinking of the case where there is no UEFI or BIOS firmware. In that case, just like as for kexec, the current entry point will continue to work, of course.
>
> What we don't want is having to suffer being on a BIOS or EFI system but not being able to leverage it for the benefit of the kernel. The kernel image is much easier to upgrade.

More generally I am thinking about a simple and clean API for the
kernel that doesn't involve having to provide 30K lines of firmware
code just to boot. The BIOS entry point (if that is what it is called)
is quite close to this ideal, even though I know it has shortcomings.

Regards,
Simon
Re: [PATCH v1 08/14] x86: make CONFIG_EFI_STUB unconditional
Posted by H. Peter Anvin 1 week, 6 days ago
On January 26, 2026 7:14:27 PM PST, Simon Glass <sjg@chromium.org> wrote:
>Hi Peter,
>
>On Tue, 27 Jan 2026 at 15:55, H. Peter Anvin <hpa@zytor.com> wrote:
>>
>> On January 26, 2026 5:44:43 PM PST, Simon Glass <sjg@chromium.org> wrote:
>> >Hi Peter,
>> >
>> >On Tue, 27 Jan 2026 at 11:21, H. Peter Anvin <hpa@zytor.com> wrote:
>> >>
>> >> On 2026-01-26 13:19, Simon Glass wrote:
>> >> >>
>> >> >> Including the EFI stub doesn't mean using EFI to boot is required.
>> >> >
>> >> > Yes, understood, but it adds bloat. More importantly it will lead to
>> >> > people assuming that the stub is always used and thus unwittingly blur
>> >> > the boundary between the stub and the kernel itself.
>> >> >
>> >> > What is the actual need for this?
>> >> >
>> >>
>> >> I would argue that the opposite is more likely: someone inadvertently builds a
>> >> kernel without the stub, the bootloader goes down a legacy support path and
>> >> things seems to work... except for some platform subtleties.
>> >>
>> >> The bloat is there, but it is small and is only in the on-disk kernel image;
>> >> it is zero at runtime.
>> >>
>> >> As such, I don't think this option is a particularly good idea anymore. If
>> >> necessary, it could be hidden behind an EXPERT option, but I first wanted to
>> >> see who if anyone actually cares in a meaningful way to maintain this option.
>> >> Every option, after all, adds maintenance burden.
>> >>
>> >> Note that the BIOS stub is unconditionally compiled and included, and that has
>> >> not been an issue.
>> >
>> >What is the maintenance burden here? I could potentially take that on,
>> >but I would first want to understand what is involved.
>> >
>> >The use of the word 'legacy' worries me too. Is this patch a step
>> >towards removing the non-EFI path?
>> >
>> >Regards,
>> >Simon
>
>(joining the threads)
>
>> Bypassing the firmware stub (BIOS or EFI) is really only appropriate for special user cases, like kexec, because it removes the ability for the kernel to deal with system issues at an early point.
>
>Does this dealing happen in the EFI stub, or later? Are you referring
>to ACPI fix-ups or something else?
>
>>
>> However, kexec needs it, and it's not going to go away. However, that doesn't mean we should encourage this in cases which doesn't need it (no matter what the Grub maintainers tell you.)
>>
>> Now, if you are using KVM without EFI you are probably doing BIOS boot (regardless of if you know it or not), entering via the BIOS firmware stub.
>
>I am thinking of the 64-bit entry point to the kernel. everything
>being laid out in memory ready to go.
>
>>
>> I just realized you are the U-boot maintainer, so I'm assuming you are thinking of the case where there is no UEFI or BIOS firmware. In that case, just like as for kexec, the current entry point will continue to work, of course.
>>
>> What we don't want is having to suffer being on a BIOS or EFI system but not being able to leverage it for the benefit of the kernel. The kernel image is much easier to upgrade.
>
>More generally I am thinking about a simple and clean API for the
>kernel that doesn't involve having to provide 30K lines of firmware
>code just to boot. The BIOS entry point (if that is what it is called)
>is quite close to this ideal, even though I know it has shortcomings.
>
>Regards,
>Simon

Yes, I understand. 

The 32/64-bit entrypoints aren't going away; it would be impossible to do so. 

The general rule is: do things as late in the boot process as possible, but no later. 
Re: [PATCH v1 08/14] x86: make CONFIG_EFI_STUB unconditional
Posted by Simon Glass 1 week, 3 days ago
Hi Peter,

On Tue, 27 Jan 2026 at 16:22, H. Peter Anvin <hpa@zytor.com> wrote:
>
> On January 26, 2026 7:14:27 PM PST, Simon Glass <sjg@chromium.org> wrote:
> >Hi Peter,
> >
> >On Tue, 27 Jan 2026 at 15:55, H. Peter Anvin <hpa@zytor.com> wrote:
> >>
> >> On January 26, 2026 5:44:43 PM PST, Simon Glass <sjg@chromium.org> wrote:
> >> >Hi Peter,
> >> >
> >> >On Tue, 27 Jan 2026 at 11:21, H. Peter Anvin <hpa@zytor.com> wrote:
> >> >>
> >> >> On 2026-01-26 13:19, Simon Glass wrote:
> >> >> >>
> >> >> >> Including the EFI stub doesn't mean using EFI to boot is required.
> >> >> >
> >> >> > Yes, understood, but it adds bloat. More importantly it will lead to
> >> >> > people assuming that the stub is always used and thus unwittingly blur
> >> >> > the boundary between the stub and the kernel itself.
> >> >> >
> >> >> > What is the actual need for this?
> >> >> >
> >> >>
> >> >> I would argue that the opposite is more likely: someone inadvertently builds a
> >> >> kernel without the stub, the bootloader goes down a legacy support path and
> >> >> things seems to work... except for some platform subtleties.
> >> >>
> >> >> The bloat is there, but it is small and is only in the on-disk kernel image;
> >> >> it is zero at runtime.
> >> >>
> >> >> As such, I don't think this option is a particularly good idea anymore. If
> >> >> necessary, it could be hidden behind an EXPERT option, but I first wanted to
> >> >> see who if anyone actually cares in a meaningful way to maintain this option.
> >> >> Every option, after all, adds maintenance burden.
> >> >>
> >> >> Note that the BIOS stub is unconditionally compiled and included, and that has
> >> >> not been an issue.
> >> >
> >> >What is the maintenance burden here? I could potentially take that on,
> >> >but I would first want to understand what is involved.
> >> >
> >> >The use of the word 'legacy' worries me too. Is this patch a step
> >> >towards removing the non-EFI path?
> >> >
> >> >Regards,
> >> >Simon
> >
> >(joining the threads)
> >
> >> Bypassing the firmware stub (BIOS or EFI) is really only appropriate for special user cases, like kexec, because it removes the ability for the kernel to deal with system issues at an early point.
> >
> >Does this dealing happen in the EFI stub, or later? Are you referring
> >to ACPI fix-ups or something else?
> >
> >>
> >> However, kexec needs it, and it's not going to go away. However, that doesn't mean we should encourage this in cases which doesn't need it (no matter what the Grub maintainers tell you.)
> >>
> >> Now, if you are using KVM without EFI you are probably doing BIOS boot (regardless of if you know it or not), entering via the BIOS firmware stub.
> >
> >I am thinking of the 64-bit entry point to the kernel. everything
> >being laid out in memory ready to go.
> >
> >>
> >> I just realized you are the U-boot maintainer, so I'm assuming you are thinking of the case where there is no UEFI or BIOS firmware. In that case, just like as for kexec, the current entry point will continue to work, of course.
> >>
> >> What we don't want is having to suffer being on a BIOS or EFI system but not being able to leverage it for the benefit of the kernel. The kernel image is much easier to upgrade.
> >
> >More generally I am thinking about a simple and clean API for the
> >kernel that doesn't involve having to provide 30K lines of firmware
> >code just to boot. The BIOS entry point (if that is what it is called)
> >is quite close to this ideal, even though I know it has shortcomings.
> >
> >Regards,
> >Simon
>
> Yes, I understand.
>
> The 32/64-bit entrypoints aren't going away; it would be impossible to do so.
>

OK, so 'depend on EXPORT' seems good to me.

> The general rule is: do things as late in the boot process as possible, but no later.
>

Just on this point, I wonder how we should define 'late', in the
context of EFI. For example, if the kernel stub reads a file, meaning
it calls back into Tianocore, it is using both early and late code.

Regards,
Simon
Re: [PATCH v1 08/14] x86: make CONFIG_EFI_STUB unconditional
Posted by H. Peter Anvin 1 week, 3 days ago
On January 29, 2026 2:13:13 PM PST, Simon Glass <sjg@chromium.org> wrote:
>Hi Peter,
>
>On Tue, 27 Jan 2026 at 16:22, H. Peter Anvin <hpa@zytor.com> wrote:
>>
>> On January 26, 2026 7:14:27 PM PST, Simon Glass <sjg@chromium.org> wrote:
>> >Hi Peter,
>> >
>> >On Tue, 27 Jan 2026 at 15:55, H. Peter Anvin <hpa@zytor.com> wrote:
>> >>
>> >> On January 26, 2026 5:44:43 PM PST, Simon Glass <sjg@chromium.org> wrote:
>> >> >Hi Peter,
>> >> >
>> >> >On Tue, 27 Jan 2026 at 11:21, H. Peter Anvin <hpa@zytor.com> wrote:
>> >> >>
>> >> >> On 2026-01-26 13:19, Simon Glass wrote:
>> >> >> >>
>> >> >> >> Including the EFI stub doesn't mean using EFI to boot is required.
>> >> >> >
>> >> >> > Yes, understood, but it adds bloat. More importantly it will lead to
>> >> >> > people assuming that the stub is always used and thus unwittingly blur
>> >> >> > the boundary between the stub and the kernel itself.
>> >> >> >
>> >> >> > What is the actual need for this?
>> >> >> >
>> >> >>
>> >> >> I would argue that the opposite is more likely: someone inadvertently builds a
>> >> >> kernel without the stub, the bootloader goes down a legacy support path and
>> >> >> things seems to work... except for some platform subtleties.
>> >> >>
>> >> >> The bloat is there, but it is small and is only in the on-disk kernel image;
>> >> >> it is zero at runtime.
>> >> >>
>> >> >> As such, I don't think this option is a particularly good idea anymore. If
>> >> >> necessary, it could be hidden behind an EXPERT option, but I first wanted to
>> >> >> see who if anyone actually cares in a meaningful way to maintain this option.
>> >> >> Every option, after all, adds maintenance burden.
>> >> >>
>> >> >> Note that the BIOS stub is unconditionally compiled and included, and that has
>> >> >> not been an issue.
>> >> >
>> >> >What is the maintenance burden here? I could potentially take that on,
>> >> >but I would first want to understand what is involved.
>> >> >
>> >> >The use of the word 'legacy' worries me too. Is this patch a step
>> >> >towards removing the non-EFI path?
>> >> >
>> >> >Regards,
>> >> >Simon
>> >
>> >(joining the threads)
>> >
>> >> Bypassing the firmware stub (BIOS or EFI) is really only appropriate for special user cases, like kexec, because it removes the ability for the kernel to deal with system issues at an early point.
>> >
>> >Does this dealing happen in the EFI stub, or later? Are you referring
>> >to ACPI fix-ups or something else?
>> >
>> >>
>> >> However, kexec needs it, and it's not going to go away. However, that doesn't mean we should encourage this in cases which doesn't need it (no matter what the Grub maintainers tell you.)
>> >>
>> >> Now, if you are using KVM without EFI you are probably doing BIOS boot (regardless of if you know it or not), entering via the BIOS firmware stub.
>> >
>> >I am thinking of the 64-bit entry point to the kernel. everything
>> >being laid out in memory ready to go.
>> >
>> >>
>> >> I just realized you are the U-boot maintainer, so I'm assuming you are thinking of the case where there is no UEFI or BIOS firmware. In that case, just like as for kexec, the current entry point will continue to work, of course.
>> >>
>> >> What we don't want is having to suffer being on a BIOS or EFI system but not being able to leverage it for the benefit of the kernel. The kernel image is much easier to upgrade.
>> >
>> >More generally I am thinking about a simple and clean API for the
>> >kernel that doesn't involve having to provide 30K lines of firmware
>> >code just to boot. The BIOS entry point (if that is what it is called)
>> >is quite close to this ideal, even though I know it has shortcomings.
>> >
>> >Regards,
>> >Simon
>>
>> Yes, I understand.
>>
>> The 32/64-bit entrypoints aren't going away; it would be impossible to do so.
>>
>
>OK, so 'depend on EXPORT' seems good to me.
>
>> The general rule is: do things as late in the boot process as possible, but no later.
>>
>
>Just on this point, I wonder how we should define 'late', in the
>context of EFI. For example, if the kernel stub reads a file, meaning
>it calls back into Tianocore, it is using both early and late code.
>
>Regards,
>Simon

"Early" in this context is before ExitBootServices().
Re: [PATCH v1 08/14] x86: make CONFIG_EFI_STUB unconditional
Posted by H. Peter Anvin 1 week, 6 days ago
On January 26, 2026 5:44:43 PM PST, Simon Glass <sjg@chromium.org> wrote:
>Hi Peter,
>
>On Tue, 27 Jan 2026 at 11:21, H. Peter Anvin <hpa@zytor.com> wrote:
>>
>> On 2026-01-26 13:19, Simon Glass wrote:
>> >>
>> >> Including the EFI stub doesn't mean using EFI to boot is required.
>> >
>> > Yes, understood, but it adds bloat. More importantly it will lead to
>> > people assuming that the stub is always used and thus unwittingly blur
>> > the boundary between the stub and the kernel itself.
>> >
>> > What is the actual need for this?
>> >
>>
>> I would argue that the opposite is more likely: someone inadvertently builds a
>> kernel without the stub, the bootloader goes down a legacy support path and
>> things seems to work... except for some platform subtleties.
>>
>> The bloat is there, but it is small and is only in the on-disk kernel image;
>> it is zero at runtime.
>>
>> As such, I don't think this option is a particularly good idea anymore. If
>> necessary, it could be hidden behind an EXPERT option, but I first wanted to
>> see who if anyone actually cares in a meaningful way to maintain this option.
>> Every option, after all, adds maintenance burden.
>>
>> Note that the BIOS stub is unconditionally compiled and included, and that has
>> not been an issue.
>
>What is the maintenance burden here? I could potentially take that on,
>but I would first want to understand what is involved.
>
>The use of the word 'legacy' worries me too. Is this patch a step
>towards removing the non-EFI path?
>
>Regards,
>Simon

Bypassing the firmware stub (BIOS or EFI) is really only appropriate for special user cases, like kexec, because it removes the ability for the kernel to deal with system issues at an early point.

However, kexec needs it, and it's not going to go away. However, that doesn't mean we should encourage this in cases which doesn't need it (no matter what the Grub maintainers tell you.)

Now, if you are using KVM without EFI you are probably doing BIOS boot (regardless of if you know it or not), entering via the BIOS firmware stub.
Re: [PATCH v1 11/14] x86/boot: use __seg_fs and __seg_gs in the real-mode boot code
Posted by H. Peter Anvin 2 weeks, 4 days ago
On January 21, 2026 12:56:39 AM PST, Uros Bizjak <ubizjak@gmail.com> wrote:
>On Tue, Jan 20, 2026 at 8:54 PM H. Peter Anvin <hpa@zytor.com> wrote:
>>
>> All supported versions of gcc support __seg_fs and __seg_gs now.
>> All supported versions of clang support __seg_fs and __seg_gs too,
>> except for two bugs (as of clang 21, at least):
>>
>> 1. The %fs: and %gs: prefix does not get emitted in inline assembly.
>> 2. An internal compiler error when addressing symbols directly.
>>
>> However, none of these are required in the boot code. Furthermore,
>> this makes it possible to remove the absolute_pointer() hack in the
>> fs/gs access functions.
>>
>> This requires adding a barrier() to a20.c, to prevent the compiler
>> from eliding the load from the aliased memory address.
>>
>> Remove the unused memcmp_[fg]s() functions.
>>
>> Finally, ds() is by necessity constant, so mark the function as such.
>>
>> Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
>> ---
>>  arch/x86/boot/a20.c  |  1 +
>>  arch/x86/boot/boot.h | 81 ++++++++++++++------------------------------
>>  2 files changed, 27 insertions(+), 55 deletions(-)
>>
>> diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
>> index 3ab6cd8eaa31..52c3fccdcb70 100644
>> --- a/arch/x86/boot/a20.c
>> +++ b/arch/x86/boot/a20.c
>> @@ -63,6 +63,7 @@ static int a20_test(int loops)
>>         while (loops--) {
>>                 wrgs32(++ctr, A20_TEST_ADDR);
>>                 io_delay();     /* Serialize and make delay constant */
>> +               barrier();      /* Compiler won't know about fs/gs overlap */
>>                 ok = rdfs32(A20_TEST_ADDR+0x10) ^ ctr;
>>                 if (ok)
>>                         break;
>
>This particular issue is not due to the compiler not knowing about
>fs/gs overlap (if the compiler determines that write and read are to
>the same non-volatile address, it would simply remove both, load and
>store), but due to the compiler performing load hoisting and store
>sinking from the loop. The compiler considers these two addresses as
>two different addresses (they are also defined in two different named
>address spaces), and optimizes access to them. So, without barrier(),
>it simply loads the value from A20_TEST_ADDR and A20_TEST_ADDR+0x10
>before the loop, resulting in:
>
>  9:   65 8b 0d 00 02 00 00    mov    %gs:0x200,%ecx
> ...
> 16:   64 8b 35 10 02 00 00    mov    %fs:0x210,%esi
> ...
> 28:   83 ea 01                sub    $0x1,%edx
> 2b:   74 0a                   je     37 <a20_test_ref+0x37>
> 2d:   e6 80                   out    %al,$0x80
> 2f:   89 d9                   mov    %ebx,%ecx
> 31:   29 d1                   sub    %edx,%ecx
> 33:   39 ce                   cmp    %ecx,%esi
> 35:   74 f1                   je     28 <a20_test_ref+0x28>
>
>The solution with barrier() introduces memory clobber between store
>and load, so the compiler is now forced to load and store the values
>due to the side effects of the barrier() inbetween. This kind of
>works, but is just a workaround for what really happens. In reality,
>the value at the test address changes "behind the compiler back", IOW
>- variable’s value can change in ways the compiler cannot predict.
>
>My proposal is to use a volatile pointer to an absolute address, so
>the unwanted optimizations are suppressed. The generated code is the
>same as with barrier(), but now the code tells the compiler that every
>read and write to this address must happen exactly as written in the
>source code. Before your patch, the accessors were defined with
>volatile asm, and this is the place where volatile qualifier matters.
>So, my proposed code would read:
>
>#define A20_TEST_ADDR    (4*0x80)
>
>#define A20_TEST_GS (*(volatile __seg_gs u32 *)A20_TEST_ADDR)
>#define A20_TEST_FS (*(volatile __seg_fs u32 *)(A20_TEST_ADDR+0x10))
>
>static int a20_test(int loops)
>{
>    int saved, ctr;
>
>    set_fs(0xffff);
>
>    saved = ctr = A20_TEST_GS;
>
>    do {
>        A20_TEST_GS = ++ctr;
>        io_delay();    /* Make constant delay */
>        if (A20_TEST_FS != ctr)
>            break;
>    } while (--loops);
>
>    A20_TEST_GS = saved;
>    return loops;
>}
>
>BR,
>Uros.
>

I disagree with that being the preferred solution, and it isn't really the Linux style.

Not only does it require making more changes to the macros, but the barrier() construct is well established in Linux as the way to indicate that a memory variable is subject to examination and/or modification by another system agent, while still being a memory variable. 

It also generally produces better code. 

So no, your analysis is, in my opinion, incorrect in light of the way the Linux memory model is already used.
Re: [PATCH v1 11/14] x86/boot: use __seg_fs and __seg_gs in the real-mode boot code
Posted by Uros Bizjak 2 weeks, 4 days ago
On Wed, Jan 21, 2026 at 4:14 PM H. Peter Anvin <hpa@zytor.com> wrote:

> >My proposal is to use a volatile pointer to an absolute address, so
> >the unwanted optimizations are suppressed. The generated code is the
> >same as with barrier(), but now the code tells the compiler that every
> >read and write to this address must happen exactly as written in the
> >source code. Before your patch, the accessors were defined with
> >volatile asm, and this is the place where volatile qualifier matters.
> >So, my proposed code would read:
> >
> >#define A20_TEST_ADDR    (4*0x80)
> >
> >#define A20_TEST_GS (*(volatile __seg_gs u32 *)A20_TEST_ADDR)
> >#define A20_TEST_FS (*(volatile __seg_fs u32 *)(A20_TEST_ADDR+0x10))
> >
> >static int a20_test(int loops)
> >{
> >    int saved, ctr;
> >
> >    set_fs(0xffff);
> >
> >    saved = ctr = A20_TEST_GS;
> >
> >    do {
> >        A20_TEST_GS = ++ctr;
> >        io_delay();    /* Make constant delay */
> >        if (A20_TEST_FS != ctr)
> >            break;
> >    } while (--loops);
> >
> >    A20_TEST_GS = saved;
> >    return loops;
> >}
> >
> >BR,
> >Uros.
> >
>
> I disagree with that being the preferred solution, and it isn't really the Linux style.
>
> Not only does it require making more changes to the macros, but the barrier() construct is well established in Linux as the way to indicate that a memory variable is subject to examination and/or modification by another system agent, while still being a memory variable.
>
> It also generally produces better code.
>
> So no, your analysis is, in my opinion, incorrect in light of the way the Linux memory model is already used.

Thanks for explaining to me the Linux way! If this is the case, I will
withdraw my proposed solution.

Best regards,
Uros.
Re: [PATCH v1 11/14] x86/boot: use __seg_fs and __seg_gs in the real-mode boot code
Posted by Uros Bizjak 2 weeks, 5 days ago
On Wed, Jan 21, 2026 at 9:56 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Tue, Jan 20, 2026 at 8:54 PM H. Peter Anvin <hpa@zytor.com> wrote:
> >
> > All supported versions of gcc support __seg_fs and __seg_gs now.
> > All supported versions of clang support __seg_fs and __seg_gs too,
> > except for two bugs (as of clang 21, at least):
> >
> > 1. The %fs: and %gs: prefix does not get emitted in inline assembly.
> > 2. An internal compiler error when addressing symbols directly.
> >
> > However, none of these are required in the boot code. Furthermore,
> > this makes it possible to remove the absolute_pointer() hack in the
> > fs/gs access functions.
> >
> > This requires adding a barrier() to a20.c, to prevent the compiler
> > from eliding the load from the aliased memory address.
> >
> > Remove the unused memcmp_[fg]s() functions.
> >
> > Finally, ds() is by necessity constant, so mark the function as such.
> >
> > Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
> > ---
> >  arch/x86/boot/a20.c  |  1 +
> >  arch/x86/boot/boot.h | 81 ++++++++++++++------------------------------
> >  2 files changed, 27 insertions(+), 55 deletions(-)
> >
> > diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
> > index 3ab6cd8eaa31..52c3fccdcb70 100644
> > --- a/arch/x86/boot/a20.c
> > +++ b/arch/x86/boot/a20.c
> > @@ -63,6 +63,7 @@ static int a20_test(int loops)
> >         while (loops--) {
> >                 wrgs32(++ctr, A20_TEST_ADDR);
> >                 io_delay();     /* Serialize and make delay constant */
> > +               barrier();      /* Compiler won't know about fs/gs overlap */
> >                 ok = rdfs32(A20_TEST_ADDR+0x10) ^ ctr;
> >                 if (ok)
> >                         break;
>
> This particular issue is not due to the compiler not knowing about
> fs/gs overlap (if the compiler determines that write and read are to
> the same non-volatile address, it would simply remove both, load and
> store), but due to the compiler performing load hoisting and store
> sinking from the loop. The compiler considers these two addresses as
> two different addresses (they are also defined in two different named
> address spaces), and optimizes access to them. So, without barrier(),
> it simply loads the value from A20_TEST_ADDR and A20_TEST_ADDR+0x10
> before the loop, resulting in:
>
>   9:   65 8b 0d 00 02 00 00    mov    %gs:0x200,%ecx
>  ...
>  16:   64 8b 35 10 02 00 00    mov    %fs:0x210,%esi
>  ...
>  28:   83 ea 01                sub    $0x1,%edx
>  2b:   74 0a                   je     37 <a20_test_ref+0x37>
>  2d:   e6 80                   out    %al,$0x80
>  2f:   89 d9                   mov    %ebx,%ecx
>  31:   29 d1                   sub    %edx,%ecx
>  33:   39 ce                   cmp    %ecx,%esi
>  35:   74 f1                   je     28 <a20_test_ref+0x28>
>
> The solution with barrier() introduces memory clobber between store
> and load, so the compiler is now forced to load and store the values
> due to the side effects of the barrier() inbetween. This kind of
> works, but is just a workaround for what really happens. In reality,
> the value at the test address changes "behind the compiler back", IOW
> - variable’s value can change in ways the compiler cannot predict.
>
> My proposal is to use a volatile pointer to an absolute address, so
> the unwanted optimizations are suppressed. The generated code is the
> same as with barrier(), but now the code tells the compiler that every
> read and write to this address must happen exactly as written in the
> source code. Before your patch, the accessors were defined with
> volatile asm, and this is the place where volatile qualifier matters.
> So, my proposed code would read:
>
> #define A20_TEST_ADDR    (4*0x80)
>
> #define A20_TEST_GS (*(volatile __seg_gs u32 *)A20_TEST_ADDR)
> #define A20_TEST_FS (*(volatile __seg_fs u32 *)(A20_TEST_ADDR+0x10))
>
> static int a20_test(int loops)
> {
>     int saved, ctr;
>
>     set_fs(0xffff);
>
>     saved = ctr = A20_TEST_GS;
>
>     do {
>         A20_TEST_GS = ++ctr;
>         io_delay();    /* Make constant delay */
>         if (A20_TEST_FS != ctr)
>             break;
>     } while (--loops);
>
>     A20_TEST_GS = saved;
>     return loops;
> }

Now also in the form of the attached patch vs. yout git hpa/boot2
branch, tested with gcc-15.2.1 and clang-21.1.8.

BR,
Uros.
diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
index 38a1cad8a553..ff51b0ce4dfd 100644
--- a/arch/x86/boot/a20.c
+++ b/arch/x86/boot/a20.c
@@ -48,8 +48,9 @@ static int empty_8042(void)
    used as a test is the int $0x80 vector, which should be safe. */
 
 #define A20_TEST_ADDR	(4*0x80)
-#define A20_TEST_SHORT  32
-#define A20_TEST_LONG	2097152	/* 2^21 */
+
+#define A20_TEST_GS (*(volatile __seg_gs u32 *)A20_TEST_ADDR)
+#define A20_TEST_FS (*(volatile __seg_fs u32 *)(A20_TEST_ADDR+0x10))
 
 static int a20_test(int loops)
 {
@@ -57,20 +58,22 @@ static int a20_test(int loops)
 
 	set_fs(0xffff);
 
-	saved = ctr = rdgs32(A20_TEST_ADDR);
+	saved = ctr = A20_TEST_GS;
 
 	do {
-		wrgs32(++ctr, A20_TEST_ADDR);
-		io_delay();	/* Serialize and make delay constant */
-		barrier();	/* Compiler won't know about fs/gs overlap */
-		if (rdfs32(A20_TEST_ADDR+0x10) != ctr)
+		A20_TEST_GS = ++ctr;
+		io_delay();	/* Make constant delay */
+		if (A20_TEST_FS != ctr)
 			break;
 	} while (--loops);
 
-	wrgs32(saved, A20_TEST_ADDR);
+	A20_TEST_GS = saved;
 	return loops;
 }
 
+#define A20_TEST_SHORT  32
+#define A20_TEST_LONG	2097152	/* 2^21 */
+
 /* Quick test to see if A20 is already enabled */
 static int a20_test_short(void)
 {