[PATCH] Avoid additional relocations in trampoline code

Frediano Ziglio posted 1 patch 3 months ago
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/20240822152953.489136-1-frediano.ziglio@cloud.com
xen/arch/x86/boot/trampoline.S | 4 ++--
xen/arch/x86/boot/wakeup.S     | 6 +++---
xen/arch/x86/xen.lds.S         | 7 ++++++-
3 files changed, 11 insertions(+), 6 deletions(-)
[PATCH] Avoid additional relocations in trampoline code
Posted by Frediano Ziglio 3 months ago
The trampoline could have "manual" relocation entries (created
by assembly macros and some code to use them) and (in case of PE)
normal executable relocations.
Remove all normal executable ones. In this way we don't have to
worry about applying both correctly (they need proper order
which is hard to spot looking at the code).

Signed-off-by: Frediano Ziglio <frediano.ziglio@cloud.com>
---
 xen/arch/x86/boot/trampoline.S | 4 ++--
 xen/arch/x86/boot/wakeup.S     | 6 +++---
 xen/arch/x86/xen.lds.S         | 7 ++++++-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S
index b8ab0ffdcb..0724c953e2 100644
--- a/xen/arch/x86/boot/trampoline.S
+++ b/xen/arch/x86/boot/trampoline.S
@@ -73,7 +73,7 @@ trampoline_protmode_entry:
         mov     %ecx,%cr4
 
         /* Load pagetable base register. */
-        mov     $sym_offs(idle_pg_table),%eax
+        mov     $idle_pg_table_pa, %eax
         add     bootsym_rel(trampoline_xen_phys_start,4,%eax)
         mov     %eax,%cr3
 
@@ -113,7 +113,7 @@ trampoline_protmode_entry:
         .code64
 start64:
         /* Jump to high mappings. */
-        movabs  $__high_start, %rdi
+        movabs  $__high_start_pa + __XEN_VIRT_START, %rdi
         jmpq    *%rdi
 
 #include "video.h"
diff --git a/xen/arch/x86/boot/wakeup.S b/xen/arch/x86/boot/wakeup.S
index 08447e1934..ebe72e1fdd 100644
--- a/xen/arch/x86/boot/wakeup.S
+++ b/xen/arch/x86/boot/wakeup.S
@@ -99,7 +99,7 @@ wakeup_32:
         mov     $bootsym_rel(wakeup_stack, 4, %esp)
 
         # check saved magic again
-        mov     $sym_offs(saved_magic),%eax
+        mov     $saved_magic_pa, %eax
         add     bootsym_rel(trampoline_xen_phys_start, 4, %eax)
         mov     (%eax), %eax
         cmp     $0x9abcdef0, %eax
@@ -112,7 +112,7 @@ wakeup_32:
         mov     %ecx, %cr4
 
         /* Load pagetable base register */
-        mov     $sym_offs(idle_pg_table),%eax
+        mov     $idle_pg_table_pa ,%eax
         add     bootsym_rel(trampoline_xen_phys_start,4,%eax)
         mov     %eax,%cr3
 
@@ -156,7 +156,7 @@ wakeup_32:
         .code64
 wakeup_64:
         /* Jump to high mappings and the higher-level wakeup code. */
-        movabs  $s3_resume, %rbx
+        movabs  $s3_resume_pa + __XEN_VIRT_START, %rbx
         jmp     *%rbx
 
 bogus_saved_magic:
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index 9a1dfe1b34..8e604dde48 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -71,7 +71,12 @@ SECTIONS
   __2M_text_start = .;         /* Start of 2M superpages, mapped RX. */
 #endif
 
-  start_pa = ABSOLUTE(start - __XEN_VIRT_START);
+#define DEFINE_PA_ADDRESS(sym) sym ## _pa = ABSOLUTE(sym - __XEN_VIRT_START)
+  DEFINE_PA_ADDRESS(start);
+  DEFINE_PA_ADDRESS(saved_magic);
+  DEFINE_PA_ADDRESS(idle_pg_table);
+  DEFINE_PA_ADDRESS(__high_start);
+  DEFINE_PA_ADDRESS(s3_resume);
 
   . = __XEN_VIRT_START + XEN_IMG_OFFSET;
   _start = .;
-- 
2.46.0
Re: [PATCH] Avoid additional relocations in trampoline code
Posted by Jan Beulich 2 months, 3 weeks ago
On 22.08.2024 17:29, Frediano Ziglio wrote:
> The trampoline could have "manual" relocation entries (created
> by assembly macros and some code to use them) and (in case of PE)
> normal executable relocations.
> Remove all normal executable ones. In this way we don't have to
> worry about applying both correctly (they need proper order
> which is hard to spot looking at the code).
> 
> Signed-off-by: Frediano Ziglio <frediano.ziglio@cloud.com>

I think this wants splitting into one patch replacing sym_offs() and a
2nd one introducing the hand-crafted __XEN_VIRT_START additions (which
may want macro-izing). Plus the justification for the change wants
extending, to actually explain what the problem is - after all there's
no issue anywhere right now.

> --- a/xen/arch/x86/boot/trampoline.S
> +++ b/xen/arch/x86/boot/trampoline.S
> @@ -73,7 +73,7 @@ trampoline_protmode_entry:
>          mov     %ecx,%cr4
>  
>          /* Load pagetable base register. */
> -        mov     $sym_offs(idle_pg_table),%eax
> +        mov     $idle_pg_table_pa, %eax
>          add     bootsym_rel(trampoline_xen_phys_start,4,%eax)
>          mov     %eax,%cr3
>  
> @@ -113,7 +113,7 @@ trampoline_protmode_entry:
>          .code64
>  start64:
>          /* Jump to high mappings. */
> -        movabs  $__high_start, %rdi
> +        movabs  $__high_start_pa + __XEN_VIRT_START, %rdi
>          jmpq    *%rdi
>  
>  #include "video.h"
> --- a/xen/arch/x86/boot/wakeup.S
> +++ b/xen/arch/x86/boot/wakeup.S
> @@ -99,7 +99,7 @@ wakeup_32:
>          mov     $bootsym_rel(wakeup_stack, 4, %esp)
>  
>          # check saved magic again
> -        mov     $sym_offs(saved_magic),%eax
> +        mov     $saved_magic_pa, %eax
>          add     bootsym_rel(trampoline_xen_phys_start, 4, %eax)
>          mov     (%eax), %eax
>          cmp     $0x9abcdef0, %eax
> @@ -112,7 +112,7 @@ wakeup_32:
>          mov     %ecx, %cr4
>  
>          /* Load pagetable base register */
> -        mov     $sym_offs(idle_pg_table),%eax
> +        mov     $idle_pg_table_pa ,%eax
>          add     bootsym_rel(trampoline_xen_phys_start,4,%eax)
>          mov     %eax,%cr3
>  
> @@ -156,7 +156,7 @@ wakeup_32:
>          .code64
>  wakeup_64:
>          /* Jump to high mappings and the higher-level wakeup code. */
> -        movabs  $s3_resume, %rbx
> +        movabs  $s3_resume_pa + __XEN_VIRT_START, %rbx
>          jmp     *%rbx
>  
>  bogus_saved_magic:

With the sym_offs() uses gone, I think it would be best if the macro was
#undef-ed ahead of the inclusion of trampoline.S. Since x86_64.S uses the
macro, that'll require careful re-arrangement of #include order.

> --- a/xen/arch/x86/xen.lds.S
> +++ b/xen/arch/x86/xen.lds.S
> @@ -71,7 +71,12 @@ SECTIONS
>    __2M_text_start = .;         /* Start of 2M superpages, mapped RX. */
>  #endif
>  
> -  start_pa = ABSOLUTE(start - __XEN_VIRT_START);
> +#define DEFINE_PA_ADDRESS(sym) sym ## _pa = ABSOLUTE(sym - __XEN_VIRT_START)
> +  DEFINE_PA_ADDRESS(start);
> +  DEFINE_PA_ADDRESS(saved_magic);
> +  DEFINE_PA_ADDRESS(idle_pg_table);
> +  DEFINE_PA_ADDRESS(__high_start);
> +  DEFINE_PA_ADDRESS(s3_resume);
>  
>    . = __XEN_VIRT_START + XEN_IMG_OFFSET;
>    _start = .;

For the cases where in assembly code you add __XEN_VIRT_START this is pretty
odd: You subtract the value here just to add it back there. Did you consider
a more straightforward approach, like introducing absolute xxx_va symbols?

Also, as a general request: Can you please become used to adding meaningful
subject prefixes to your patches?

Jan
Re: [PATCH] Avoid additional relocations in trampoline code
Posted by Frediano Ziglio 2 months, 3 weeks ago
On Mon, Aug 26, 2024 at 9:21 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 22.08.2024 17:29, Frediano Ziglio wrote:
> > The trampoline could have "manual" relocation entries (created
> > by assembly macros and some code to use them) and (in case of PE)
> > normal executable relocations.
> > Remove all normal executable ones. In this way we don't have to
> > worry about applying both correctly (they need proper order
> > which is hard to spot looking at the code).
> >
> > Signed-off-by: Frediano Ziglio <frediano.ziglio@cloud.com>
>
> I think this wants splitting into one patch replacing sym_offs() and a
> 2nd one introducing the hand-crafted __XEN_VIRT_START additions (which
> may want macro-izing). Plus the justification for the change wants
> extending, to actually explain what the problem is - after all there's
> no issue anywhere right now.
>

Should I explain the time dependency issue with source code?
I suppose I can describe where currently is and why it would be better
to have it removed (honestly I though I did but reading the commit
message I didn't).
Maybe for search reasons it would be better to define 2 macros like
the following?

#define phys_addr(sym) sym ## _pa
#define virt_addr(sym) sym ## _va

This way, for instance, searching for the "__high_start" word (like
"grep -rw __high_start") would produce a result.

> > --- a/xen/arch/x86/boot/trampoline.S
> > +++ b/xen/arch/x86/boot/trampoline.S
> > @@ -73,7 +73,7 @@ trampoline_protmode_entry:
> >          mov     %ecx,%cr4
> >
> >          /* Load pagetable base register. */
> > -        mov     $sym_offs(idle_pg_table),%eax
> > +        mov     $idle_pg_table_pa, %eax
> >          add     bootsym_rel(trampoline_xen_phys_start,4,%eax)
> >          mov     %eax,%cr3
> >
> > @@ -113,7 +113,7 @@ trampoline_protmode_entry:
> >          .code64
> >  start64:
> >          /* Jump to high mappings. */
> > -        movabs  $__high_start, %rdi
> > +        movabs  $__high_start_pa + __XEN_VIRT_START, %rdi
> >          jmpq    *%rdi
> >
> >  #include "video.h"
> > --- a/xen/arch/x86/boot/wakeup.S
> > +++ b/xen/arch/x86/boot/wakeup.S
> > @@ -99,7 +99,7 @@ wakeup_32:
> >          mov     $bootsym_rel(wakeup_stack, 4, %esp)
> >
> >          # check saved magic again
> > -        mov     $sym_offs(saved_magic),%eax
> > +        mov     $saved_magic_pa, %eax
> >          add     bootsym_rel(trampoline_xen_phys_start, 4, %eax)
> >          mov     (%eax), %eax
> >          cmp     $0x9abcdef0, %eax
> > @@ -112,7 +112,7 @@ wakeup_32:
> >          mov     %ecx, %cr4
> >
> >          /* Load pagetable base register */
> > -        mov     $sym_offs(idle_pg_table),%eax
> > +        mov     $idle_pg_table_pa ,%eax
> >          add     bootsym_rel(trampoline_xen_phys_start,4,%eax)
> >          mov     %eax,%cr3
> >
> > @@ -156,7 +156,7 @@ wakeup_32:
> >          .code64
> >  wakeup_64:
> >          /* Jump to high mappings and the higher-level wakeup code. */
> > -        movabs  $s3_resume, %rbx
> > +        movabs  $s3_resume_pa + __XEN_VIRT_START, %rbx
> >          jmp     *%rbx
> >
> >  bogus_saved_magic:
>
> With the sym_offs() uses gone, I think it would be best if the macro was
> #undef-ed ahead of the inclusion of trampoline.S. Since x86_64.S uses the
> macro, that'll require careful re-arrangement of #include order.
>

I think you mean including the trampoline after including x86_64.S in
head.S. I can do it.

> > --- a/xen/arch/x86/xen.lds.S
> > +++ b/xen/arch/x86/xen.lds.S
> > @@ -71,7 +71,12 @@ SECTIONS
> >    __2M_text_start = .;         /* Start of 2M superpages, mapped RX. */
> >  #endif
> >
> > -  start_pa = ABSOLUTE(start - __XEN_VIRT_START);
> > +#define DEFINE_PA_ADDRESS(sym) sym ## _pa = ABSOLUTE(sym - __XEN_VIRT_START)
> > +  DEFINE_PA_ADDRESS(start);
> > +  DEFINE_PA_ADDRESS(saved_magic);
> > +  DEFINE_PA_ADDRESS(idle_pg_table);
> > +  DEFINE_PA_ADDRESS(__high_start);
> > +  DEFINE_PA_ADDRESS(s3_resume);
> >
> >    . = __XEN_VIRT_START + XEN_IMG_OFFSET;
> >    _start = .;
>
> For the cases where in assembly code you add __XEN_VIRT_START this is pretty
> odd: You subtract the value here just to add it back there. Did you consider
> a more straightforward approach, like introducing absolute xxx_va symbols?
>

I didn't consider. Would something like

#define DEFINE_ABS_ADDRESSES(sym) \
   sym ## _pa = ABSOLUTE(sym - __XEN_VIRT_START); \
   sym ## _va = ABSOLUTE(sym)

make sense? Maybe the _pa and _va suffixes are too similar? Maybe
_physaddr and _virtaddr? Or use capical letters and macros (as above)
to avoid possible clashes?

> Also, as a general request: Can you please become used to adding meaningful
> subject prefixes to your patches?
>

Sure

> Jan

Frediano
Re: [PATCH] Avoid additional relocations in trampoline code
Posted by Jan Beulich 2 months, 3 weeks ago
On 27.08.2024 15:56, Frediano Ziglio wrote:
> On Mon, Aug 26, 2024 at 9:21 AM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 22.08.2024 17:29, Frediano Ziglio wrote:
>>> The trampoline could have "manual" relocation entries (created
>>> by assembly macros and some code to use them) and (in case of PE)
>>> normal executable relocations.
>>> Remove all normal executable ones. In this way we don't have to
>>> worry about applying both correctly (they need proper order
>>> which is hard to spot looking at the code).
>>>
>>> Signed-off-by: Frediano Ziglio <frediano.ziglio@cloud.com>
>>
>> I think this wants splitting into one patch replacing sym_offs() and a
>> 2nd one introducing the hand-crafted __XEN_VIRT_START additions (which
>> may want macro-izing). Plus the justification for the change wants
>> extending, to actually explain what the problem is - after all there's
>> no issue anywhere right now.
> 
> Should I explain the time dependency issue with source code?

Time dependency? I guess I don't understand ...

> I suppose I can describe where currently is and why it would be better
> to have it removed (honestly I though I did but reading the commit
> message I didn't).
> Maybe for search reasons it would be better to define 2 macros like
> the following?
> 
> #define phys_addr(sym) sym ## _pa
> #define virt_addr(sym) sym ## _va
> 
> This way, for instance, searching for the "__high_start" word (like
> "grep -rw __high_start") would produce a result.

I assume it's best to keep everything there together, so see below.

>> With the sym_offs() uses gone, I think it would be best if the macro was
>> #undef-ed ahead of the inclusion of trampoline.S. Since x86_64.S uses the
>> macro, that'll require careful re-arrangement of #include order.
>>
> 
> I think you mean including the trampoline after including x86_64.S in
> head.S.

Yes.

>>> --- a/xen/arch/x86/xen.lds.S
>>> +++ b/xen/arch/x86/xen.lds.S
>>> @@ -71,7 +71,12 @@ SECTIONS
>>>    __2M_text_start = .;         /* Start of 2M superpages, mapped RX. */
>>>  #endif
>>>
>>> -  start_pa = ABSOLUTE(start - __XEN_VIRT_START);
>>> +#define DEFINE_PA_ADDRESS(sym) sym ## _pa = ABSOLUTE(sym - __XEN_VIRT_START)
>>> +  DEFINE_PA_ADDRESS(start);
>>> +  DEFINE_PA_ADDRESS(saved_magic);
>>> +  DEFINE_PA_ADDRESS(idle_pg_table);
>>> +  DEFINE_PA_ADDRESS(__high_start);
>>> +  DEFINE_PA_ADDRESS(s3_resume);
>>>
>>>    . = __XEN_VIRT_START + XEN_IMG_OFFSET;
>>>    _start = .;
>>
>> For the cases where in assembly code you add __XEN_VIRT_START this is pretty
>> odd: You subtract the value here just to add it back there. Did you consider
>> a more straightforward approach, like introducing absolute xxx_va symbols?
> 
> I didn't consider. Would something like
> 
> #define DEFINE_ABS_ADDRESSES(sym) \
>    sym ## _pa = ABSOLUTE(sym - __XEN_VIRT_START); \
>    sym ## _va = ABSOLUTE(sym)
> 
> make sense? Maybe the _pa and _va suffixes are too similar? Maybe
> _physaddr and _virtaddr? Or use capical letters and macros (as above)
> to avoid possible clashes?

I'd like to ask that we don't introduce symbols we don't actually use. Hence
a single macro defining both is probably not going to be overly helpful. As
to capital letters: I'm struggling with the "(as above)" - I don't see any
use of capital letters in symbol names being generated. But yes, I was going
to suggest to consider _VA and _PA tags, precisely to reduce the risk of
clashes.

Jan

Re: [PATCH] Avoid additional relocations in trampoline code
Posted by Frediano Ziglio 2 months, 3 weeks ago
On Tue, Aug 27, 2024 at 4:50 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 27.08.2024 15:56, Frediano Ziglio wrote:
> > On Mon, Aug 26, 2024 at 9:21 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> On 22.08.2024 17:29, Frediano Ziglio wrote:
> >>> The trampoline could have "manual" relocation entries (created
> >>> by assembly macros and some code to use them) and (in case of PE)
> >>> normal executable relocations.
> >>> Remove all normal executable ones. In this way we don't have to
> >>> worry about applying both correctly (they need proper order
> >>> which is hard to spot looking at the code).
> >>>
> >>> Signed-off-by: Frediano Ziglio <frediano.ziglio@cloud.com>
> >>
> >> I think this wants splitting into one patch replacing sym_offs() and a
> >> 2nd one introducing the hand-crafted __XEN_VIRT_START additions (which
> >> may want macro-izing). Plus the justification for the change wants
> >> extending, to actually explain what the problem is - after all there's
> >> no issue anywhere right now.
> >
> > Should I explain the time dependency issue with source code?
>
> Time dependency? I guess I don't understand ...
>

You have a time dependency between 2 code path when one has to be
executed before/after another.
Some patterns are expected (you don't call a method on a destroyed
object) but nasty hidden ones makes code more complicated and less
robust, making code changes harder and program fragile.
In that specific case the copy of the trampoline, for EFI path, is
done after relocation rollback. In that specific code path, you are
executing code which is not meant to be executed at that location (now
should be executed at higher locations) hoping that compiler/binary
code can cope with it. The code should relocate back and switch to
higher locations as quick as possible.

> > I suppose I can describe where currently is and why it would be better
> > to have it removed (honestly I though I did but reading the commit
> > message I didn't).
> > Maybe for search reasons it would be better to define 2 macros like
> > the following?
> >
> > #define phys_addr(sym) sym ## _pa
> > #define virt_addr(sym) sym ## _va
> >
> > This way, for instance, searching for the "__high_start" word (like
> > "grep -rw __high_start") would produce a result.
>
> I assume it's best to keep everything there together, so see below.
>
> >> With the sym_offs() uses gone, I think it would be best if the macro was
> >> #undef-ed ahead of the inclusion of trampoline.S. Since x86_64.S uses the
> >> macro, that'll require careful re-arrangement of #include order.
> >>
> >
> > I think you mean including the trampoline after including x86_64.S in
> > head.S.
>
> Yes.
>

That worked like a charm!

> >>> --- a/xen/arch/x86/xen.lds.S
> >>> +++ b/xen/arch/x86/xen.lds.S
> >>> @@ -71,7 +71,12 @@ SECTIONS
> >>>    __2M_text_start = .;         /* Start of 2M superpages, mapped RX. */
> >>>  #endif
> >>>
> >>> -  start_pa = ABSOLUTE(start - __XEN_VIRT_START);
> >>> +#define DEFINE_PA_ADDRESS(sym) sym ## _pa = ABSOLUTE(sym - __XEN_VIRT_START)
> >>> +  DEFINE_PA_ADDRESS(start);
> >>> +  DEFINE_PA_ADDRESS(saved_magic);
> >>> +  DEFINE_PA_ADDRESS(idle_pg_table);
> >>> +  DEFINE_PA_ADDRESS(__high_start);
> >>> +  DEFINE_PA_ADDRESS(s3_resume);
> >>>
> >>>    . = __XEN_VIRT_START + XEN_IMG_OFFSET;
> >>>    _start = .;
> >>
> >> For the cases where in assembly code you add __XEN_VIRT_START this is pretty
> >> odd: You subtract the value here just to add it back there. Did you consider
> >> a more straightforward approach, like introducing absolute xxx_va symbols?
> >
> > I didn't consider. Would something like
> >
> > #define DEFINE_ABS_ADDRESSES(sym) \
> >    sym ## _pa = ABSOLUTE(sym - __XEN_VIRT_START); \
> >    sym ## _va = ABSOLUTE(sym)
> >
> > make sense? Maybe the _pa and _va suffixes are too similar? Maybe
> > _physaddr and _virtaddr? Or use capical letters and macros (as above)
> > to avoid possible clashes?
>
> I'd like to ask that we don't introduce symbols we don't actually use. Hence
> a single macro defining both is probably not going to be overly helpful. As
> to capital letters: I'm struggling with the "(as above)" - I don't see any
> use of capital letters in symbol names being generated. But yes, I was going
> to suggest to consider _VA and _PA tags, precisely to reduce the risk of
> clashes.
>

Unfortunately this worked but is producing these warnings which are
pretty annoying:

ld: ./.xen.efi.0xffff82d040000000.0: stripping non-representable
symbol 'saved_magic_va' (value 0xffff82d040816018)

I tried to remove them somehow, but I didn't find any way (no hidden
symbols, not been able to force the linker to strip it and not
complain).
I think I'll get back to the single "pa" symbol, this time with _PA
suffix, maybe hidden, and adding macros (phys_addr/virt_addr) to make
clear the purpose.

> Jan

I'll try to split the patch as suggested, although it will be hard to
come with a sensible message for the second commit.

Regards,
  Frediano