x86/cet: Remove writeable mapping of the BSPs shadow stack

[PATCH] x86/cet: Remove writeable mapping of the BSPs shadow stack

Posted by Andrew Cooper 2 years, 2 months ago

An unintended consequence of the BSP using cpu0_stack[] is that writeable
mappings to the BSPs shadow stacks are retained in the bss.  This renders
CET-SS almost useless, as an attacker can update both return addresses and the
ret will not fault.

We specifically don't want the shatter the superpage mapping .data/.bss, so
the only way to fix this is to not have the BSP stack in the main Xen image.

Break cpu_alloc_stack() out of cpu_smpboot_alloc(), and dynamically allocate
the BSP stack as early as reasonable in __start_xen().  As a consequence,
there is no need to delay the BSP's memguard_guard_stack() call.

Copy the top of cpu info block just before switching to use the new stack.
Fix a latent bug by setting %rsp to info->guest_cpu_user_regs rather than
->es; this would be buggy if reinit_bsp_stack() called schedule() (which
rewrites the GPR block) directly, but luckily it doesn't.

Finally, move cpu0_stack[] into .init, so it can be reclaimed after boot.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
CC: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
---
 xen/arch/x86/include/asm/smp.h |  2 ++
 xen/arch/x86/setup.c           | 20 +++++++++++++-------
 xen/arch/x86/smpboot.c         | 26 +++++++++++++++++++-------
 xen/arch/x86/xen.lds.S         |  4 ++--
 4 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/xen/arch/x86/include/asm/smp.h b/xen/arch/x86/include/asm/smp.h
index 1747772d232e..41a3b6a0dadf 100644
--- a/xen/arch/x86/include/asm/smp.h
+++ b/xen/arch/x86/include/asm/smp.h
@@ -85,6 +85,8 @@ extern cpumask_t **socket_cpumask;
 extern unsigned int disabled_cpus;
 extern bool unaccounted_cpus;
 
+void *cpu_alloc_stack(unsigned int cpu);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 22a9885dee5c..1f816ce05a07 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -148,7 +148,7 @@ cpumask_t __read_mostly cpu_present_map;
 
 unsigned long __read_mostly xen_phys_start;
 
-char __section(".bss.stack_aligned") __aligned(STACK_SIZE)
+char __section("init.bss.stack_aligned") __aligned(STACK_SIZE)
     cpu0_stack[STACK_SIZE];
 
 /* Used by the BSP/AP paths to find the higher half stack mapping to use. */
@@ -712,7 +712,6 @@ static void __init noreturn reinit_bsp_stack(void)
     percpu_traps_init();
 
     stack_base[0] = stack;
-    memguard_guard_stack(stack);
 
     rc = setup_cpu_root_pgt(0);
     if ( rc )
@@ -886,6 +885,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
 {
     char *memmap_type = NULL;
     char *cmdline, *kextra, *loader;
+    void *bsp_stack;
+    struct cpu_info *info = get_cpu_info(), *bsp_info;
     unsigned int initrdidx, num_parked = 0;
     multiboot_info_t *mbi;
     module_t *mod;
@@ -918,7 +919,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
     /* Full exception support from here on in. */
 
     rdmsrl(MSR_EFER, this_cpu(efer));
-    asm volatile ( "mov %%cr4,%0" : "=r" (get_cpu_info()->cr4) );
+    asm volatile ( "mov %%cr4,%0" : "=r" (info->cr4) );
 
     /* Enable NMIs.  Our loader (e.g. Tboot) may have left them disabled. */
     enable_nmis();
@@ -1703,6 +1704,10 @@ void __init noreturn __start_xen(unsigned long mbi_p)
      */
     vm_init();
 
+    bsp_stack = cpu_alloc_stack(0);
+    if ( !bsp_stack )
+        panic("No memory for BSP stack\n");
+
     console_init_ring();
     vesa_init();
 
@@ -1974,17 +1979,18 @@ void __init noreturn __start_xen(unsigned long mbi_p)
 
     if ( bsp_delay_spec_ctrl )
     {
-        struct cpu_info *info = get_cpu_info();
-
         info->spec_ctrl_flags &= ~SCF_use_shadow;
         barrier();
         wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl);
         info->last_spec_ctrl = default_xen_spec_ctrl;
     }
 
-    /* Jump to the 1:1 virtual mappings of cpu0_stack. */
+    /* Copy the cpu info block, and move onto the BSP stack. */
+    bsp_info = get_cpu_info_from_stack((unsigned long)bsp_stack);
+    *bsp_info = *info;
+
     asm volatile ("mov %[stk], %%rsp; jmp %c[fn]" ::
-                  [stk] "g" (__va(__pa(get_stack_bottom()))),
+                  [stk] "g" (&bsp_info->guest_cpu_user_regs),
                   [fn] "i" (reinit_bsp_stack) : "memory");
     unreachable();
 }
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 709704d71ada..b46fd9ab18e4 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -1023,6 +1023,23 @@ static void cpu_smpboot_free(unsigned int cpu, bool remove)
     }
 }
 
+void *cpu_alloc_stack(unsigned int cpu)
+{
+    nodeid_t node = cpu_to_node(cpu);
+    unsigned int memflags = 0;
+    void *stack;
+
+    if ( node != NUMA_NO_NODE )
+        memflags = MEMF_node(node);
+
+    stack = alloc_xenheap_pages(STACK_ORDER, memflags);
+
+    if ( stack )
+        memguard_guard_stack(stack);
+
+    return stack;
+}
+
 static int cpu_smpboot_alloc(unsigned int cpu)
 {
     struct cpu_info *info;
@@ -1035,15 +1052,10 @@ static int cpu_smpboot_alloc(unsigned int cpu)
     if ( node != NUMA_NO_NODE )
         memflags = MEMF_node(node);
 
-    if ( stack_base[cpu] == NULL )
-    {
-        stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER, memflags);
-        if ( !stack_base[cpu] )
+    if ( stack_base[cpu] == NULL &&
+         (stack_base[cpu] = cpu_alloc_stack(cpu)) == NULL )
             goto out;
 
-        memguard_guard_stack(stack_base[cpu]);
-    }
-
     info = get_cpu_info_from_stack((unsigned long)stack_base[cpu]);
     info->processor_id = cpu;
     info->per_cpu_offset = __per_cpu_offset[cpu];
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index 4103763f6391..9cd4fe417e14 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -215,8 +215,9 @@ SECTIONS
   } PHDR(text)
   DECL_SECTION(.init.data) {
 #endif
+       . = ALIGN(STACK_SIZE);
+       *(.init.bss.stack_aligned)
 
-       . = ALIGN(POINTER_ALIGN);
        __initdata_cf_clobber_start = .;
        *(.init.data.cf_clobber)
        *(.init.rodata.cf_clobber)
@@ -300,7 +301,6 @@ SECTIONS
 
   DECL_SECTION(.bss) {
        __bss_start = .;
-       *(.bss.stack_aligned)
        *(.bss.page_aligned*)
        . = ALIGN(PAGE_SIZE);
        __per_cpu_start = .;
-- 
2.11.0

Re: [PATCH] x86/cet: Remove writeable mapping of the BSPs shadow stack

Posted by Jan Beulich 2 years, 2 months ago

On 15.03.2022 17:53, Andrew Cooper wrote:
> An unintended consequence of the BSP using cpu0_stack[] is that writeable
> mappings to the BSPs shadow stacks are retained in the bss.  This renders
> CET-SS almost useless, as an attacker can update both return addresses and the
> ret will not fault.
> 
> We specifically don't want the shatter the superpage mapping .data/.bss, so
> the only way to fix this is to not have the BSP stack in the main Xen image.
> 
> Break cpu_alloc_stack() out of cpu_smpboot_alloc(), and dynamically allocate
> the BSP stack as early as reasonable in __start_xen().  As a consequence,
> there is no need to delay the BSP's memguard_guard_stack() call.
> 
> Copy the top of cpu info block just before switching to use the new stack.
> Fix a latent bug by setting %rsp to info->guest_cpu_user_regs rather than
> ->es; this would be buggy if reinit_bsp_stack() called schedule() (which
> rewrites the GPR block) directly, but luckily it doesn't.

While I don't mind the change, I also don't view the original code as
latently buggy. (Just a remark, not a request to change anything.)

> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -148,7 +148,7 @@ cpumask_t __read_mostly cpu_present_map;
>  
>  unsigned long __read_mostly xen_phys_start;
>  
> -char __section(".bss.stack_aligned") __aligned(STACK_SIZE)
> +char __section("init.bss.stack_aligned") __aligned(STACK_SIZE)
>      cpu0_stack[STACK_SIZE];

I guess the section name was meant to start with a dot, matching
the linker script change? You should actually have seen
--orphan-handling in action here ...

> --- a/xen/arch/x86/xen.lds.S
> +++ b/xen/arch/x86/xen.lds.S
> @@ -215,8 +215,9 @@ SECTIONS
>    } PHDR(text)
>    DECL_SECTION(.init.data) {
>  #endif
> +       . = ALIGN(STACK_SIZE);
> +       *(.init.bss.stack_aligned)

No real need for the ALIGN() here - it's the contributions to the
section which ought to come with proper alignment. Imo ALIGN()
should only ever be there ahead of a symbol definition, as otherwise
the symbol might not mark what it is intended to mark due to padding
which might be inserted. See also 01fe4da6243b ("x86: force suitable
alignment in sources rather than in linker script").

Really we should consider using

    *(SORT_BY_ALIGNMENT(.init.data .init.data.* .init.bss.*))

While I can see your point against forcing sorting by alignment
globally, this very argument doesn't apply here (at least until
there appeared a way for the section attribute and -fdata-sections
to actually interact, such that .init.* could also become per-
function/object).

Then again - this block of zeroes doesn't need to occupy space in
the binary. It could very well live in a @nobits .init.bss in the
final ELF binary. But sadly the section isn't @nobits in the object
file, and with that there would need to be a way to make the linker
convert it to @nobits (and I'm unaware of such). What would work is
naming the section .bss.init.stack_aligned (or e.g.
.bss..init.stack_aligned to make it easier to separate it from
.bss.* in the linker script) - that'll make gcc mark it @nobits.

> -       . = ALIGN(POINTER_ALIGN);
>         __initdata_cf_clobber_start = .;

As a consequence, this ALIGN() shouldn't go away. The only present
contribution to the section is as large as its alignment, but that's
not generally a requirement.

Jan

Re: [PATCH] x86/cet: Remove writeable mapping of the BSPs shadow stack

Posted by Andrew Cooper 2 years, 2 months ago

On 16/03/2022 08:33, Jan Beulich wrote:
> On 15.03.2022 17:53, Andrew Cooper wrote:
>> An unintended consequence of the BSP using cpu0_stack[] is that writeable
>> mappings to the BSPs shadow stacks are retained in the bss.  This renders
>> CET-SS almost useless, as an attacker can update both return addresses and the
>> ret will not fault.
>>
>> We specifically don't want the shatter the superpage mapping .data/.bss, so
>> the only way to fix this is to not have the BSP stack in the main Xen image.
>>
>> Break cpu_alloc_stack() out of cpu_smpboot_alloc(), and dynamically allocate
>> the BSP stack as early as reasonable in __start_xen().  As a consequence,
>> there is no need to delay the BSP's memguard_guard_stack() call.
>>
>> Copy the top of cpu info block just before switching to use the new stack.
>> Fix a latent bug by setting %rsp to info->guest_cpu_user_regs rather than
>> ->es; this would be buggy if reinit_bsp_stack() called schedule() (which
>> rewrites the GPR block) directly, but luckily it doesn't.
> While I don't mind the change, I also don't view the original code as
> latently buggy. (Just a remark, not a request to change anything.)

This is practically a textbook definition of a latent bug.  Using one of
a number of functions in Xen will either read utter garbage off the
stack, or clobber the stack frame and most likely a return address, and
the reason this hasn't exploded is luck, not design.

>
>> --- a/xen/arch/x86/setup.c
>> +++ b/xen/arch/x86/setup.c
>> @@ -148,7 +148,7 @@ cpumask_t __read_mostly cpu_present_map;
>>  
>>  unsigned long __read_mostly xen_phys_start;
>>  
>> -char __section(".bss.stack_aligned") __aligned(STACK_SIZE)
>> +char __section("init.bss.stack_aligned") __aligned(STACK_SIZE)
>>      cpu0_stack[STACK_SIZE];
> I guess the section name was meant to start with a dot, matching
> the linker script change? You should actually have seen
> --orphan-handling in action here ...

It does, now that I've rebased on staging.


>> --- a/xen/arch/x86/xen.lds.S
>> +++ b/xen/arch/x86/xen.lds.S
>> @@ -215,8 +215,9 @@ SECTIONS
>>    } PHDR(text)
>>    DECL_SECTION(.init.data) {
>>  #endif
>> +       . = ALIGN(STACK_SIZE);
>> +       *(.init.bss.stack_aligned)
> No real need for the ALIGN() here - it's the contributions to the
> section which ought to come with proper alignment. Imo ALIGN()
> should only ever be there ahead of a symbol definition, as otherwise
> the symbol might not mark what it is intended to mark due to padding
> which might be inserted. See also 01fe4da6243b ("x86: force suitable
> alignment in sources rather than in linker script").
>
> Really we should consider using
>
>     *(SORT_BY_ALIGNMENT(.init.data .init.data.* .init.bss.*))
>
> While I can see your point against forcing sorting by alignment
> globally, this very argument doesn't apply here (at least until
> there appeared a way for the section attribute and -fdata-sections
> to actually interact, such that .init.* could also become per-
> function/object).
>
> Then again - this block of zeroes doesn't need to occupy space in
> the binary.

It already occupies space, because of mkelf32.

>  It could very well live in a @nobits .init.bss in the
> final ELF binary. But sadly the section isn't @nobits in the object
> file, and with that there would need to be a way to make the linker
> convert it to @nobits (and I'm unaware of such). What would work is
> naming the section .bss.init.stack_aligned (or e.g.
> .bss..init.stack_aligned to make it easier to separate it from
> .bss.* in the linker script) - that'll make gcc mark it @nobits.

Living in .bss would prevent it from being reclaimed.  We've got several
nasty bugs from shooting holes in the Xen image, and too many special
cases already.

Furthermore, we're talking about initdata here.  Size is not a concern,
especially when its 7-9 orders of magnitude smaller than typical systems.

The choice here is between between (theoretically but not in practice)
extra space on disk, and not reclaiming 32k of init data after boot.

>> -       . = ALIGN(POINTER_ALIGN);
>>         __initdata_cf_clobber_start = .;
> As a consequence, this ALIGN() shouldn't go away. The only present
> contribution to the section is as large as its alignment, but that's
> not generally a requirement.

It would actually be a severe error for there to be anything in here
with less than pointer alignment, because of how the section gets
walked.  But I'll keep the ALIGN() in.

~Andrew

Re: [PATCH] x86/cet: Remove writeable mapping of the BSPs shadow stack

Posted by Jan Beulich 2 years, 2 months ago

On 16.03.2022 20:23, Andrew Cooper wrote:
> On 16/03/2022 08:33, Jan Beulich wrote:
>> On 15.03.2022 17:53, Andrew Cooper wrote:
>>> --- a/xen/arch/x86/xen.lds.S
>>> +++ b/xen/arch/x86/xen.lds.S
>>> @@ -215,8 +215,9 @@ SECTIONS
>>>    } PHDR(text)
>>>    DECL_SECTION(.init.data) {
>>>  #endif
>>> +       . = ALIGN(STACK_SIZE);
>>> +       *(.init.bss.stack_aligned)
>> No real need for the ALIGN() here - it's the contributions to the
>> section which ought to come with proper alignment. Imo ALIGN()
>> should only ever be there ahead of a symbol definition, as otherwise
>> the symbol might not mark what it is intended to mark due to padding
>> which might be inserted. See also 01fe4da6243b ("x86: force suitable
>> alignment in sources rather than in linker script").
>>
>> Really we should consider using
>>
>>     *(SORT_BY_ALIGNMENT(.init.data .init.data.* .init.bss.*))
>>
>> While I can see your point against forcing sorting by alignment
>> globally, this very argument doesn't apply here (at least until
>> there appeared a way for the section attribute and -fdata-sections
>> to actually interact, such that .init.* could also become per-
>> function/object).
>>
>> Then again - this block of zeroes doesn't need to occupy space in
>> the binary.
> 
> It already occupies space, because of mkelf32.

Hmm, yes, and not just because of mkelf32: Since we munge everything
in a single PT_LOAD segment in the linker script, all of .init.*
necessarily has space allocated.

>>  It could very well live in a @nobits .init.bss in the
>> final ELF binary. But sadly the section isn't @nobits in the object
>> file, and with that there would need to be a way to make the linker
>> convert it to @nobits (and I'm unaware of such). What would work is
>> naming the section .bss.init.stack_aligned (or e.g.
>> .bss..init.stack_aligned to make it easier to separate it from
>> .bss.* in the linker script) - that'll make gcc mark it @nobits.
> 
> Living in .bss would prevent it from being reclaimed.  We've got several
> nasty bugs from shooting holes in the Xen image, and too many special
> cases already.

I didn't suggest to put it in .bss; the suggested name was merely so
that gcc would mark the section @nobits and we could exclude the
section from what makes in into .bss in the final image independent
of .init.* vs .bss.* ordering in the linker script. But anyway - with
the above this aspect is now moot anyway.

Jan

Re: [PATCH] x86/cet: Remove writeable mapping of the BSPs shadow stack

Posted by Andrew Cooper 2 years, 2 months ago

On 17/03/2022 09:17, Jan Beulich wrote:
> On 16.03.2022 20:23, Andrew Cooper wrote:
>> On 16/03/2022 08:33, Jan Beulich wrote:
>>> On 15.03.2022 17:53, Andrew Cooper wrote:
>>>> --- a/xen/arch/x86/xen.lds.S
>>>> +++ b/xen/arch/x86/xen.lds.S
>>>> @@ -215,8 +215,9 @@ SECTIONS
>>>>    } PHDR(text)
>>>>    DECL_SECTION(.init.data) {
>>>>  #endif
>>>> +       . = ALIGN(STACK_SIZE);
>>>> +       *(.init.bss.stack_aligned)
>>> No real need for the ALIGN() here - it's the contributions to the
>>> section which ought to come with proper alignment. Imo ALIGN()
>>> should only ever be there ahead of a symbol definition, as otherwise
>>> the symbol might not mark what it is intended to mark due to padding
>>> which might be inserted. See also 01fe4da6243b ("x86: force suitable
>>> alignment in sources rather than in linker script").
>>>
>>> Really we should consider using
>>>
>>>     *(SORT_BY_ALIGNMENT(.init.data .init.data.* .init.bss.*))
>>>
>>> While I can see your point against forcing sorting by alignment
>>> globally, this very argument doesn't apply here (at least until
>>> there appeared a way for the section attribute and -fdata-sections
>>> to actually interact, such that .init.* could also become per-
>>> function/object).
>>>
>>> Then again - this block of zeroes doesn't need to occupy space in
>>> the binary.
>> It already occupies space, because of mkelf32.
> Hmm, yes, and not just because of mkelf32: Since we munge everything
> in a single PT_LOAD segment in the linker script, all of .init.*
> necessarily has space allocated.
>
>>>  It could very well live in a @nobits .init.bss in the
>>> final ELF binary. But sadly the section isn't @nobits in the object
>>> file, and with that there would need to be a way to make the linker
>>> convert it to @nobits (and I'm unaware of such). What would work is
>>> naming the section .bss.init.stack_aligned (or e.g.
>>> .bss..init.stack_aligned to make it easier to separate it from
>>> .bss.* in the linker script) - that'll make gcc mark it @nobits.
>> Living in .bss would prevent it from being reclaimed.  We've got several
>> nasty bugs from shooting holes in the Xen image, and too many special
>> cases already.
> I didn't suggest to put it in .bss; the suggested name was merely so
> that gcc would mark the section @nobits and we could exclude the
> section from what makes in into .bss in the final image independent
> of .init.* vs .bss.* ordering in the linker script. But anyway - with
> the above this aspect is now moot anyway.

So can I take this as an ack with the .init typo fixed?

~Andrew

Re: [PATCH] x86/cet: Remove writeable mapping of the BSPs shadow stack

Posted by Jan Beulich 2 years, 2 months ago

On 17.03.2022 17:19, Andrew Cooper wrote:
> On 17/03/2022 09:17, Jan Beulich wrote:
>> On 16.03.2022 20:23, Andrew Cooper wrote:
>>> On 16/03/2022 08:33, Jan Beulich wrote:
>>>> On 15.03.2022 17:53, Andrew Cooper wrote:
>>>>> --- a/xen/arch/x86/xen.lds.S
>>>>> +++ b/xen/arch/x86/xen.lds.S
>>>>> @@ -215,8 +215,9 @@ SECTIONS
>>>>>    } PHDR(text)
>>>>>    DECL_SECTION(.init.data) {
>>>>>  #endif
>>>>> +       . = ALIGN(STACK_SIZE);
>>>>> +       *(.init.bss.stack_aligned)
>>>> No real need for the ALIGN() here - it's the contributions to the
>>>> section which ought to come with proper alignment. Imo ALIGN()
>>>> should only ever be there ahead of a symbol definition, as otherwise
>>>> the symbol might not mark what it is intended to mark due to padding
>>>> which might be inserted. See also 01fe4da6243b ("x86: force suitable
>>>> alignment in sources rather than in linker script").
>>>>
>>>> Really we should consider using
>>>>
>>>>     *(SORT_BY_ALIGNMENT(.init.data .init.data.* .init.bss.*))
>>>>
>>>> While I can see your point against forcing sorting by alignment
>>>> globally, this very argument doesn't apply here (at least until
>>>> there appeared a way for the section attribute and -fdata-sections
>>>> to actually interact, such that .init.* could also become per-
>>>> function/object).
>>>>
>>>> Then again - this block of zeroes doesn't need to occupy space in
>>>> the binary.
>>> It already occupies space, because of mkelf32.
>> Hmm, yes, and not just because of mkelf32: Since we munge everything
>> in a single PT_LOAD segment in the linker script, all of .init.*
>> necessarily has space allocated.
>>
>>>>  It could very well live in a @nobits .init.bss in the
>>>> final ELF binary. But sadly the section isn't @nobits in the object
>>>> file, and with that there would need to be a way to make the linker
>>>> convert it to @nobits (and I'm unaware of such). What would work is
>>>> naming the section .bss.init.stack_aligned (or e.g.
>>>> .bss..init.stack_aligned to make it easier to separate it from
>>>> .bss.* in the linker script) - that'll make gcc mark it @nobits.
>>> Living in .bss would prevent it from being reclaimed.  We've got several
>>> nasty bugs from shooting holes in the Xen image, and too many special
>>> cases already.
>> I didn't suggest to put it in .bss; the suggested name was merely so
>> that gcc would mark the section @nobits and we could exclude the
>> section from what makes in into .bss in the final image independent
>> of .init.* vs .bss.* ordering in the linker script. But anyway - with
>> the above this aspect is now moot anyway.
> 
> So can I take this as an ack with the .init typo fixed?

R-b, yes, as long as the ALIGN(STACK_SIZE) is also dropped and the
other ALIGN() is retained (the latter you did already indicate you
would do).

Jan

Re: [PATCH] x86/cet: Remove writeable mapping of the BSPs shadow stack

Posted by Andrew Cooper 2 years, 2 months ago

On 17/03/2022 16:28, Jan Beulich wrote:
> On 17.03.2022 17:19, Andrew Cooper wrote:
>> On 17/03/2022 09:17, Jan Beulich wrote:
>>> On 16.03.2022 20:23, Andrew Cooper wrote:
>>>> On 16/03/2022 08:33, Jan Beulich wrote:
>>>>> On 15.03.2022 17:53, Andrew Cooper wrote:
>>>>>> --- a/xen/arch/x86/xen.lds.S
>>>>>> +++ b/xen/arch/x86/xen.lds.S
>>>>>> @@ -215,8 +215,9 @@ SECTIONS
>>>>>>    } PHDR(text)
>>>>>>    DECL_SECTION(.init.data) {
>>>>>>  #endif
>>>>>> +       . = ALIGN(STACK_SIZE);
>>>>>> +       *(.init.bss.stack_aligned)
>>>>> No real need for the ALIGN() here - it's the contributions to the
>>>>> section which ought to come with proper alignment. Imo ALIGN()
>>>>> should only ever be there ahead of a symbol definition, as otherwise
>>>>> the symbol might not mark what it is intended to mark due to padding
>>>>> which might be inserted. See also 01fe4da6243b ("x86: force suitable
>>>>> alignment in sources rather than in linker script").
>>>>>
>>>>> Really we should consider using
>>>>>
>>>>>     *(SORT_BY_ALIGNMENT(.init.data .init.data.* .init.bss.*))
>>>>>
>>>>> While I can see your point against forcing sorting by alignment
>>>>> globally, this very argument doesn't apply here (at least until
>>>>> there appeared a way for the section attribute and -fdata-sections
>>>>> to actually interact, such that .init.* could also become per-
>>>>> function/object).
>>>>>
>>>>> Then again - this block of zeroes doesn't need to occupy space in
>>>>> the binary.
>>>> It already occupies space, because of mkelf32.
>>> Hmm, yes, and not just because of mkelf32: Since we munge everything
>>> in a single PT_LOAD segment in the linker script, all of .init.*
>>> necessarily has space allocated.
>>>
>>>>>  It could very well live in a @nobits .init.bss in the
>>>>> final ELF binary. But sadly the section isn't @nobits in the object
>>>>> file, and with that there would need to be a way to make the linker
>>>>> convert it to @nobits (and I'm unaware of such). What would work is
>>>>> naming the section .bss.init.stack_aligned (or e.g.
>>>>> .bss..init.stack_aligned to make it easier to separate it from
>>>>> .bss.* in the linker script) - that'll make gcc mark it @nobits.
>>>> Living in .bss would prevent it from being reclaimed.  We've got several
>>>> nasty bugs from shooting holes in the Xen image, and too many special
>>>> cases already.
>>> I didn't suggest to put it in .bss; the suggested name was merely so
>>> that gcc would mark the section @nobits and we could exclude the
>>> section from what makes in into .bss in the final image independent
>>> of .init.* vs .bss.* ordering in the linker script. But anyway - with
>>> the above this aspect is now moot anyway.
>> So can I take this as an ack with the .init typo fixed?
> R-b, yes, as long as the ALIGN(STACK_SIZE) is also dropped and the
> other ALIGN() is retained (the latter you did already indicate you
> would do).

Ah yes.  Thanks.

~Andrew