[PATCH 5/5] x86: Rollback relocation in case of EFI multiboot

Alejandro Vallejo posted 5 patches 3 months, 2 weeks ago
[PATCH 5/5] x86: Rollback relocation in case of EFI multiboot
Posted by Alejandro Vallejo 3 months, 2 weeks ago
In case EFI not multiboot rolling back relocation is done in
efi_arch_post_exit_boot, called by efi_start however this is
not done in multiboot code path.
Do it also for this path to make it work correctly.

Signed-off-by: Frediano Ziglio <frediano.ziglio@cloud.com>
---
 xen/arch/x86/boot/head.S  | 29 +++++++++++++++---
 xen/arch/x86/boot/reloc.c | 63 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 87 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
index abfa3d82f7..75ac74a589 100644
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -352,6 +352,7 @@ __efi64_mb2_start:
         and     $~15,%rsp
 
         /* Save Multiboot2 magic on the stack. */
+        shlq    $32, %rax
         push    %rax
 
         /* Save EFI ImageHandle on the stack. */
@@ -382,11 +383,24 @@ __efi64_mb2_start:
         /* Just pop an item from the stack. */
         pop     %rax
 
-        /* Restore Multiboot2 magic. */
-        pop     %rax
+        /* Prepare stack for relocation call */
+        subq    $16, %rsp
+        lea     l2_bootmap(%rip), %ecx
+        movl    %ecx, 16(%rsp)
+        lea     l3_bootmap(%rip), %ecx
+        movl    %ecx, 12(%rsp)
+        lea     __base_relocs_end(%rip), %ecx
+        movl    %ecx, 8(%rsp)
+        lea     __base_relocs_start(%rip), %ecx
+        movl    %ecx, 4(%rsp)
+        lea     __image_base__(%rip),%rsi
+        movl    %esi, (%rsp)
+        movabsq $__XEN_VIRT_START, %rcx
+        subq    %rsi, %rcx
+        push    %rcx
 
-        /* Jump to trampoline_setup after switching CPU to x86_32 mode. */
-        lea     trampoline_setup(%rip),%r15
+        /* Jump to trampoline_efi_setup after switching CPU to x86_32 mode. */
+        lea     trampoline_efi_setup(%rip),%r15
 
 x86_32_switch:
         mov     %r15,%rdi
@@ -557,6 +571,12 @@ __start:
         and     $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
         jmp     .Lmb2_tsize
 
+trampoline_efi_setup:
+        movb    $1, %al
+        call    reloc
+        pop     %eax
+        jmp     trampoline_setup
+
 trampoline_bios_setup:
         /*
          * Called on legacy BIOS platforms only.
@@ -627,6 +647,7 @@ trampoline_setup:
         push    %ecx                /* Bottom-most low-memory stack address. */
         push    %ebx                /* Multiboot / PVH information address. */
         push    %eax                /* Magic number. */
+        movb    $0, %al
         call    reloc
 #ifdef CONFIG_PVH_GUEST
         cmpb    $0, sym_esi(pvh_boot)
diff --git a/xen/arch/x86/boot/reloc.c b/xen/arch/x86/boot/reloc.c
index 4033557481..3aa97a99d0 100644
--- a/xen/arch/x86/boot/reloc.c
+++ b/xen/arch/x86/boot/reloc.c
@@ -23,7 +23,9 @@ asm (
     "    .text                         \n"
     "    .globl _start                 \n"
     "_start:                           \n"
-    "    jmp  reloc                    \n"
+    "    cmpb $0, %al                  \n"
+    "    je   reloc                    \n"
+    "    jmp  reloc_pe_back            \n"
     );
 
 #include "defs.h"
@@ -375,6 +377,65 @@ void *__stdcall reloc(uint32_t magic, uint32_t in, uint32_t trampoline,
     }
 }
 
+struct pe_base_relocs {
+    u32 rva;
+    u32 size;
+    u16 entries[];
+};
+
+#define PE_BASE_RELOC_ABS      0
+#define PE_BASE_RELOC_HIGHLOW  3
+#define PE_BASE_RELOC_DIR64   10
+
+void __stdcall reloc_pe_back(long long delta,
+                             uint32_t xen_phys_start,
+                             const struct pe_base_relocs *__base_relocs_start,
+                             const struct pe_base_relocs *__base_relocs_end,
+                             char *l3_bootmap, char *l2_bootmap)
+{
+    const struct pe_base_relocs *base_relocs;
+
+    for ( base_relocs = __base_relocs_start; base_relocs < __base_relocs_end; )
+    {
+        unsigned int i = 0, n;
+
+        n = (base_relocs->size - sizeof(*base_relocs)) /
+            sizeof(*base_relocs->entries);
+
+        /*
+         * Relevant l{2,3}_bootmap entries get initialized explicitly in
+         * efi_arch_memory_setup(), so we must not apply relocations there.
+         * l2_directmap's first slot, otoh, should be handled normally, as
+         * efi_arch_memory_setup() won't touch it (xen_phys_start should
+         * never be zero).
+         */
+        if ( xen_phys_start + base_relocs->rva == (unsigned long)l3_bootmap ||
+             xen_phys_start + base_relocs->rva == (unsigned long)l2_bootmap )
+            i = n;
+
+        for ( ; i < n; ++i )
+        {
+            unsigned long addr = xen_phys_start + base_relocs->rva +
+                                 (base_relocs->entries[i] & 0xfff);
+
+            switch ( base_relocs->entries[i] >> 12 )
+            {
+            case PE_BASE_RELOC_ABS:
+                break;
+            case PE_BASE_RELOC_HIGHLOW:
+                if ( delta )
+                    *(u32 *)addr += delta;
+                break;
+            case PE_BASE_RELOC_DIR64:
+                if ( delta )
+                    *(u64 *)addr += delta;
+                break;
+            }
+        }
+        base_relocs = (const void *)(base_relocs->entries + i + (i & 1));
+    }
+}
+
 /*
  * Local variables:
  * mode: C
-- 
2.45.2
Re: [PATCH 5/5] x86: Rollback relocation in case of EFI multiboot
Posted by Jan Beulich 3 months, 2 weeks ago
On 07.08.2024 15:48, Alejandro Vallejo wrote:
> --- a/xen/arch/x86/boot/head.S
> +++ b/xen/arch/x86/boot/head.S
> @@ -352,6 +352,7 @@ __efi64_mb2_start:
>          and     $~15,%rsp
>  
>          /* Save Multiboot2 magic on the stack. */
> +        shlq    $32, %rax

As indicated for the earlier patch: No insn suffixes please when they're
not actually needed to clarify operand size. (Or else at the very least
be consistent and have suffixes everywhere. Comment applies throughout
the patch.)

Additionally there's some trickery going on here which absolutely needs
commenting, at least lightly. Aiui ...

>          push    %rax
>  
>          /* Save EFI ImageHandle on the stack. */
> @@ -382,11 +383,24 @@ __efi64_mb2_start:
>          /* Just pop an item from the stack. */
>          pop     %rax
>  
> -        /* Restore Multiboot2 magic. */
> -        pop     %rax

... you eliminate this in favor of ...

> +        /* Prepare stack for relocation call */
> +        subq    $16, %rsp

... using the low half of that stack slot here for the last function
argument, then POPing %eax entirely elsewhere, in trampoline_efi_setup.

> +        lea     l2_bootmap(%rip), %ecx
> +        movl    %ecx, 16(%rsp)
> +        lea     l3_bootmap(%rip), %ecx
> +        movl    %ecx, 12(%rsp)
> +        lea     __base_relocs_end(%rip), %ecx
> +        movl    %ecx, 8(%rsp)
> +        lea     __base_relocs_start(%rip), %ecx
> +        movl    %ecx, 4(%rsp)
> +        lea     __image_base__(%rip),%rsi

Nit: Consistently blanks after commas please in new code.

> +        movl    %esi, (%rsp)

Since a 32-bit value suffices, why a 64-bit LEA above?

> +        movabsq $__XEN_VIRT_START, %rcx
> +        subq    %rsi, %rcx
> +        push    %rcx
>  
> -        /* Jump to trampoline_setup after switching CPU to x86_32 mode. */
> -        lea     trampoline_setup(%rip),%r15
> +        /* Jump to trampoline_efi_setup after switching CPU to x86_32 mode. */
> +        lea     trampoline_efi_setup(%rip),%r15
>  
>  x86_32_switch:
>          mov     %r15,%rdi

All of the changes here are benign to the existing MB2/EFI code path just
because __base_relocs_start[] is empty there, aiui. That could certainly
do with making explicit in the description. Initially I meant to indicate
that apparently you're breaking that path.

> --- a/xen/arch/x86/boot/reloc.c
> +++ b/xen/arch/x86/boot/reloc.c
> @@ -23,7 +23,9 @@ asm (
>      "    .text                         \n"
>      "    .globl _start                 \n"
>      "_start:                           \n"
> -    "    jmp  reloc                    \n"
> +    "    cmpb $0, %al                  \n"
> +    "    je   reloc                    \n"

While minor here, I think we should generally prefer TEST (and then JZ)
over CMP when checking for 0. I wonder though whether we really want to
go with this kind of multiplexing. A new reloc-pe.c may be a cleaner
approach. This may then (possibly later) also allow to (more easily)
exclude this code when linking xen-syms.

> @@ -375,6 +377,65 @@ void *__stdcall reloc(uint32_t magic, uint32_t in, uint32_t trampoline,
>      }
>  }
>  
> +struct pe_base_relocs {
> +    u32 rva;
> +    u32 size;
> +    u16 entries[];

uint<N>_t please in new code (but see also at the bottom).

> +};
> +
> +#define PE_BASE_RELOC_ABS      0
> +#define PE_BASE_RELOC_HIGHLOW  3
> +#define PE_BASE_RELOC_DIR64   10
> +
> +void __stdcall reloc_pe_back(long long delta,
> +                             uint32_t xen_phys_start,
> +                             const struct pe_base_relocs *__base_relocs_start,
> +                             const struct pe_base_relocs *__base_relocs_end,
> +                             char *l3_bootmap, char *l2_bootmap)

You only ever use the last two when cast to unsigned long. What's wrong
with declaring them as unsigned long right away? xen_phys_start may also
want to have unsigned long type.

> +{
> +    const struct pe_base_relocs *base_relocs;
> +
> +    for ( base_relocs = __base_relocs_start; base_relocs < __base_relocs_end; )
> +    {
> +        unsigned int i = 0, n;
> +
> +        n = (base_relocs->size - sizeof(*base_relocs)) /
> +            sizeof(*base_relocs->entries);
> +
> +        /*
> +         * Relevant l{2,3}_bootmap entries get initialized explicitly in
> +         * efi_arch_memory_setup(), so we must not apply relocations there.
> +         * l2_directmap's first slot, otoh, should be handled normally, as
> +         * efi_arch_memory_setup() won't touch it (xen_phys_start should
> +         * never be zero).
> +         */
> +        if ( xen_phys_start + base_relocs->rva == (unsigned long)l3_bootmap ||
> +             xen_phys_start + base_relocs->rva == (unsigned long)l2_bootmap )
> +            i = n;
> +
> +        for ( ; i < n; ++i )
> +        {
> +            unsigned long addr = xen_phys_start + base_relocs->rva +
> +                                 (base_relocs->entries[i] & 0xfff);
> +
> +            switch ( base_relocs->entries[i] >> 12 )
> +            {
> +            case PE_BASE_RELOC_ABS:
> +                break;
> +            case PE_BASE_RELOC_HIGHLOW:
> +                if ( delta )
> +                    *(u32 *)addr += delta;
> +                break;
> +            case PE_BASE_RELOC_DIR64:
> +                if ( delta )
> +                    *(u64 *)addr += delta;
> +                break;
> +            }

Except for the dropped default case (which imo needs to be there, just
that you can't use blexit() here), the body of the function looks like
a plain copy of efi_arch_relocate_image(). We want to avoid such
(source) duplication, and rather put the logic in e.g. a header
included by both parties.

Jan