[PATCH v2 5/5] x86: Rollback relocation in case of EFI multiboot

Frediano Ziglio posted 5 patches 3 months, 1 week ago
[PATCH v2 5/5] x86: Rollback relocation in case of EFI multiboot
Posted by Frediano Ziglio 3 months, 1 week ago
In case EFI not multiboot rolling back relocation is done in
efi_arch_post_exit_boot, called by efi_start however this is
not done in multiboot code path.
Do it also for this path to make it work correctly.

Signed-off-by: Frediano Ziglio <frediano.ziglio@cloud.com>
---
 xen/arch/x86/boot/Makefile          |  2 +-
 xen/arch/x86/boot/efi-reloc-image.c | 40 ++++++++++++++
 xen/arch/x86/boot/efi-reloc-image.h | 85 +++++++++++++++++++++++++++++
 xen/arch/x86/boot/head.S            | 44 ++++++++++++---
 xen/arch/x86/efi/efi-boot.h         | 64 ++--------------------
 5 files changed, 168 insertions(+), 67 deletions(-)
 create mode 100644 xen/arch/x86/boot/efi-reloc-image.c
 create mode 100644 xen/arch/x86/boot/efi-reloc-image.h
---
Changes since v1:
- many style updates;
- split file for 32 bit relocation;
- reuse code from header avoiding duplication;
- add some more comment to assembly code;

diff --git a/xen/arch/x86/boot/Makefile b/xen/arch/x86/boot/Makefile
index 03d8ce3a9e..49792e0acf 100644
--- a/xen/arch/x86/boot/Makefile
+++ b/xen/arch/x86/boot/Makefile
@@ -1,6 +1,6 @@
 obj-bin-y += head.o
 
-head-bin-objs := cmdline.o reloc.o
+head-bin-objs := cmdline.o reloc.o efi-reloc-image.o
 
 nocov-y   += $(head-bin-objs)
 noubsan-y += $(head-bin-objs)
diff --git a/xen/arch/x86/boot/efi-reloc-image.c b/xen/arch/x86/boot/efi-reloc-image.c
new file mode 100644
index 0000000000..b103e37cd7
--- /dev/null
+++ b/xen/arch/x86/boot/efi-reloc-image.c
@@ -0,0 +1,40 @@
+/*
+ * efi-reloc-image.c
+ *
+ * 32-bit flat memory-map routines for relocating back PE executable.
+ * This is done with paging disabled to avoid permission issues.
+ *
+ * Copyright (c) 2024, Citrix Systems, Inc.
+ */
+
+/*
+ * This entry point is entered from xen/arch/x86/boot/head.S with:
+ *   - 0x04(%esp) = __XEN_VIRT_START - xen_phys_start
+ *   - 0x0c(%esp) = xen_phys_start
+ *   - 0x10(%esp) = __base_relocs_start
+ *   - 0x14(%esp) = __base_relocs_end
+ */
+asm (
+    "    .text                         \n"
+    "    .globl _start                 \n"
+    "_start:                           \n"
+    "    jmp    reloc_pe_back          \n"
+    );
+
+#include "defs.h"
+
+/* Do not patch page tables. */
+#define in_page_tables(v) false
+
+#define EFI_RELOC_IMAGE_EARLY 1
+#include "efi-reloc-image.h"
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/boot/efi-reloc-image.h b/xen/arch/x86/boot/efi-reloc-image.h
new file mode 100644
index 0000000000..999dd2d2c8
--- /dev/null
+++ b/xen/arch/x86/boot/efi-reloc-image.h
@@ -0,0 +1,85 @@
+/*
+ * efi-reloc-image.h
+ *
+ * Code for relocating back PE executable.
+ * This code is common between 64 bit and 32 bit.
+ *
+ * Copyright (c) 2024, Citrix Systems, Inc.
+ */
+
+#if EFI_RELOC_IMAGE_EARLY != 0 && EFI_RELOC_IMAGE_EARLY != 1
+#error EFI_RELOC_IMAGE_EARLY must be defined either 0 or 1
+#endif
+
+typedef struct pe_base_relocs {
+    uint32_t rva;
+    uint32_t size;
+    uint16_t entries[];
+} pe_base_relocs;
+
+#define PE_BASE_RELOC_ABS      0
+#define PE_BASE_RELOC_HIGHLOW  3
+#define PE_BASE_RELOC_DIR64   10
+
+#if EFI_RELOC_IMAGE_EARLY
+bool __stdcall
+#else
+static bool
+#endif
+reloc_pe_back(long long delta,
+              unsigned long xen_phys_start,
+              const pe_base_relocs *__base_relocs_start,
+              const pe_base_relocs *__base_relocs_end)
+{
+    const struct pe_base_relocs *base_relocs;
+
+    for ( base_relocs = __base_relocs_start; base_relocs < __base_relocs_end; )
+    {
+        unsigned int i = 0, n;
+
+        n = (base_relocs->size - sizeof(*base_relocs)) /
+            sizeof(*base_relocs->entries);
+
+        for ( ; i < n; ++i )
+        {
+            unsigned long addr = xen_phys_start + base_relocs->rva +
+                                 (base_relocs->entries[i] & 0xfff);
+
+            switch ( base_relocs->entries[i] >> 12 )
+            {
+            case PE_BASE_RELOC_ABS:
+                break;
+            case PE_BASE_RELOC_HIGHLOW:
+                if ( delta )
+                {
+                    *(uint32_t *)addr += delta;
+                    if ( in_page_tables(addr) )
+                        *(uint32_t *)addr += xen_phys_start;
+                }
+                break;
+            case PE_BASE_RELOC_DIR64:
+                if ( delta )
+                {
+                    *(uint64_t *)addr += delta;
+                    if ( in_page_tables(addr) )
+                        *(uint64_t *)addr += xen_phys_start;
+                }
+                break;
+            default:
+                return false;
+            }
+        }
+        base_relocs = (const void *)(base_relocs->entries + i + (i & 1));
+    }
+    return true;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
index 86805389f9..dd3600c14b 100644
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -332,7 +332,8 @@ __efi64_mb2_start:
          */
         and     $~15,%rsp
 
-        /* Save Multiboot2 magic on the stack. */
+        /* Save Multiboot2 magic on the stack for a later 32bit call */
+        shl     $32, %rax
         push    %rax
 
         /* Save EFI ImageHandle on the stack. */
@@ -363,11 +364,25 @@ __efi64_mb2_start:
         /* Just pop an item from the stack. */
         pop     %rax
 
-        /* Restore Multiboot2 magic. */
-        pop     %rax
+        /*
+         * Prepare stack for relocation call.
+         * Note that we are in 64bit mode but we are going to call a
+         * function in 32bit mode so the stack is not written with
+         * push instructions.
+         */
+        sub     $16, %rsp
+        lea     __base_relocs_end(%rip), %ecx
+        mov     %ecx, 16(%rsp)
+        lea     __base_relocs_start(%rip), %ecx
+        mov     %ecx, 12(%rsp)
+        lea     __image_base__(%rip), %esi
+        mov     %esi, 8(%rsp)
+        movabs  $__XEN_VIRT_START, %rcx
+        sub     %rsi, %rcx
+        mov     %rcx, (%rsp)
 
-        /* Jump to trampoline_setup after switching CPU to x86_32 mode. */
-        lea     trampoline_setup(%rip),%r15
+        /* Jump to trampoline_efi_setup after switching CPU to x86_32 mode. */
+        lea     trampoline_efi_setup(%rip), %r15
 
 x86_32_switch:
         mov     %r15,%rdi
@@ -539,6 +554,17 @@ __start:
         and     $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
         jmp     .Lmb2_tsize
 
+trampoline_efi_setup:
+        call    reloc_pe_back
+        pop     %eax
+
+        /* Calculate the load base address again, adjusting to sym_esi needs */
+        call    1f
+1:      pop     %esi
+        sub     $sym_offs(1b), %esi
+
+        jmp     trampoline_setup
+
 trampoline_bios_setup:
         /*
          * Called on legacy BIOS platforms only.
@@ -867,8 +893,8 @@ trampoline_setup:
         lret
 
         /*
-         * cmdline and reloc are written in C, and linked to be 32bit PIC with
-         * entrypoints at 0 and using the stdcall convention.
+         * cmdline, reloc and reloc_pe_back are written in C, and linked to be
+         * 32bit PIC with entrypoints at 0 and using the stdcall convention.
          */
         ALIGN
 cmdline_parse_early:
@@ -878,6 +904,10 @@ cmdline_parse_early:
 reloc:
         .incbin "reloc.bin"
 
+        ALIGN
+reloc_pe_back:
+        .incbin "efi-reloc-image.bin"
+
         .section .init.data, "aw", @progbits
 
 ENTRY(trampoline_start)
diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
index f282358435..4f473a287e 100644
--- a/xen/arch/x86/efi/efi-boot.h
+++ b/xen/arch/x86/efi/efi-boot.h
@@ -36,69 +36,15 @@ extern const intpte_t __page_tables_start[], __page_tables_end[];
 #define in_page_tables(v) ((intpte_t *)(v) >= __page_tables_start && \
                            (intpte_t *)(v) < __page_tables_end)
 
-#define PE_BASE_RELOC_ABS      0
-#define PE_BASE_RELOC_HIGHLOW  3
-#define PE_BASE_RELOC_DIR64   10
+#define EFI_RELOC_IMAGE_EARLY 0
+#include "../boot/efi-reloc-image.h"
 
-extern const struct pe_base_relocs {
-    u32 rva;
-    u32 size;
-    u16 entries[];
-} __base_relocs_start[], __base_relocs_end[];
+extern pe_base_relocs __base_relocs_start[], __base_relocs_end[];
 
 static void __init efi_arch_relocate_image(unsigned long delta)
 {
-    const struct pe_base_relocs *base_relocs;
-
-    for ( base_relocs = __base_relocs_start; base_relocs < __base_relocs_end; )
-    {
-        unsigned int i = 0, n;
-
-        n = (base_relocs->size - sizeof(*base_relocs)) /
-            sizeof(*base_relocs->entries);
-
-        /*
-         * Relevant l{2,3}_bootmap entries get initialized explicitly in
-         * efi_arch_memory_setup(), so we must not apply relocations there.
-         * l2_directmap's first slot, otoh, should be handled normally, as
-         * efi_arch_memory_setup() won't touch it (xen_phys_start should
-         * never be zero).
-         */
-        if ( xen_phys_start + base_relocs->rva == (unsigned long)l3_bootmap ||
-             xen_phys_start + base_relocs->rva == (unsigned long)l2_bootmap )
-            i = n;
-
-        for ( ; i < n; ++i )
-        {
-            unsigned long addr = xen_phys_start + base_relocs->rva +
-                                 (base_relocs->entries[i] & 0xfff);
-
-            switch ( base_relocs->entries[i] >> 12 )
-            {
-            case PE_BASE_RELOC_ABS:
-                break;
-            case PE_BASE_RELOC_HIGHLOW:
-                if ( delta )
-                {
-                    *(u32 *)addr += delta;
-                    if ( in_page_tables(addr) )
-                        *(u32 *)addr += xen_phys_start;
-                }
-                break;
-            case PE_BASE_RELOC_DIR64:
-                if ( delta )
-                {
-                    *(u64 *)addr += delta;
-                    if ( in_page_tables(addr) )
-                        *(u64 *)addr += xen_phys_start;
-                }
-                break;
-            default:
-                blexit(L"Unsupported relocation type");
-            }
-        }
-        base_relocs = (const void *)(base_relocs->entries + i + (i & 1));
-    }
+    if (!reloc_pe_back(delta, xen_phys_start, __base_relocs_start, __base_relocs_end))
+        blexit(L"Unsupported relocation type");
 }
 
 extern const s32 __trampoline_rel_start[], __trampoline_rel_stop[];
-- 
2.45.2
Re: [PATCH v2 5/5] x86: Rollback relocation in case of EFI multiboot
Posted by Jan Beulich 3 months ago
On 14.08.2024 10:34, Frediano Ziglio wrote:
> In case EFI not multiboot rolling back relocation is done in
> efi_arch_post_exit_boot, called by efi_start however this is
> not done in multiboot code path.
> Do it also for this path to make it work correctly.
> 
> Signed-off-by: Frediano Ziglio <frediano.ziglio@cloud.com>
> ---
>  xen/arch/x86/boot/Makefile          |  2 +-
>  xen/arch/x86/boot/efi-reloc-image.c | 40 ++++++++++++++
>  xen/arch/x86/boot/efi-reloc-image.h | 85 +++++++++++++++++++++++++++++
>  xen/arch/x86/boot/head.S            | 44 ++++++++++++---
>  xen/arch/x86/efi/efi-boot.h         | 64 ++--------------------
>  5 files changed, 168 insertions(+), 67 deletions(-)
>  create mode 100644 xen/arch/x86/boot/efi-reloc-image.c
>  create mode 100644 xen/arch/x86/boot/efi-reloc-image.h

Would there be anything wrong with using just efi-reloc.[ch]? I'm sorry, but
I'm a little averse to long names when shorter ones are as unambiguous.

> --- /dev/null
> +++ b/xen/arch/x86/boot/efi-reloc-image.c
> @@ -0,0 +1,40 @@
> +/*
> + * efi-reloc-image.c
> + *
> + * 32-bit flat memory-map routines for relocating back PE executable.
> + * This is done with paging disabled to avoid permission issues.
> + *
> + * Copyright (c) 2024, Citrix Systems, Inc.
> + */

Just curious: Is "Citrix" still the right name to use in places like this one?

> +/*
> + * This entry point is entered from xen/arch/x86/boot/head.S with:
> + *   - 0x04(%esp) = __XEN_VIRT_START - xen_phys_start

This could to with adding "(two slots)" or "(64 bits)".

> + *   - 0x0c(%esp) = xen_phys_start
> + *   - 0x10(%esp) = __base_relocs_start
> + *   - 0x14(%esp) = __base_relocs_end
> + */
> +asm (
> +    "    .text                         \n"
> +    "    .globl _start                 \n"
> +    "_start:                           \n"
> +    "    jmp    reloc_pe_back          \n"
> +    );
> +
> +#include "defs.h"
> +
> +/* Do not patch page tables. */
> +#define in_page_tables(v) false

If you want what the comment says, this can't yield "false" for every
possible input. Didn't you even have page table related logic in v1?

> --- /dev/null
> +++ b/xen/arch/x86/boot/efi-reloc-image.h
> @@ -0,0 +1,85 @@
> +/*
> + * efi-reloc-image.h
> + *
> + * Code for relocating back PE executable.
> + * This code is common between 64 bit and 32 bit.
> + *
> + * Copyright (c) 2024, Citrix Systems, Inc.
> + */
> +
> +#if EFI_RELOC_IMAGE_EARLY != 0 && EFI_RELOC_IMAGE_EARLY != 1
> +#error EFI_RELOC_IMAGE_EARLY must be defined either 0 or 1
> +#endif

Depending on compiler type and version, EFI_RELOC_IMAGE_EARLY simply not
being defined may or may not raise a warning, but would otherwise satisfy
EFI_RELOC_IMAGE_EARLY == 0. I think you want to also guard against
un-defined-ness.

> +typedef struct pe_base_relocs {
> +    uint32_t rva;
> +    uint32_t size;
> +    uint16_t entries[];
> +} pe_base_relocs;
> +
> +#define PE_BASE_RELOC_ABS      0
> +#define PE_BASE_RELOC_HIGHLOW  3
> +#define PE_BASE_RELOC_DIR64   10
> +
> +#if EFI_RELOC_IMAGE_EARLY
> +bool __stdcall
> +#else
> +static bool
> +#endif
> +reloc_pe_back(long long delta,
> +              unsigned long xen_phys_start,
> +              const pe_base_relocs *__base_relocs_start,
> +              const pe_base_relocs *__base_relocs_end)
> +{
> +    const struct pe_base_relocs *base_relocs;
> +
> +    for ( base_relocs = __base_relocs_start; base_relocs < __base_relocs_end; )
> +    {
> +        unsigned int i = 0, n;
> +
> +        n = (base_relocs->size - sizeof(*base_relocs)) /
> +            sizeof(*base_relocs->entries);
> +
> +        for ( ; i < n; ++i )
> +        {
> +            unsigned long addr = xen_phys_start + base_relocs->rva +
> +                                 (base_relocs->entries[i] & 0xfff);
> +
> +            switch ( base_relocs->entries[i] >> 12 )
> +            {
> +            case PE_BASE_RELOC_ABS:
> +                break;
> +            case PE_BASE_RELOC_HIGHLOW:
> +                if ( delta )
> +                {
> +                    *(uint32_t *)addr += delta;
> +                    if ( in_page_tables(addr) )
> +                        *(uint32_t *)addr += xen_phys_start;
> +                }
> +                break;
> +            case PE_BASE_RELOC_DIR64:
> +                if ( delta )
> +                {
> +                    *(uint64_t *)addr += delta;
> +                    if ( in_page_tables(addr) )
> +                        *(uint64_t *)addr += xen_phys_start;
> +                }
> +                break;
> +            default:
> +                return false;
> +            }

As you're moving this code, please put blank lines between case blocks.

> +        }
> +        base_relocs = (const void *)(base_relocs->entries + i + (i & 1));
> +    }
> +    return true;
> +}

Nit: Blank line please ahead of a function's main "return".

> --- a/xen/arch/x86/boot/head.S
> +++ b/xen/arch/x86/boot/head.S
> @@ -332,7 +332,8 @@ __efi64_mb2_start:
>           */
>          and     $~15,%rsp
>  
> -        /* Save Multiboot2 magic on the stack. */
> +        /* Save Multiboot2 magic on the stack for a later 32bit call */
> +        shl     $32, %rax
>          push    %rax

I see you're now extending the comment here. However, ...

> @@ -363,11 +364,25 @@ __efi64_mb2_start:
>          /* Just pop an item from the stack. */
>          pop     %rax
>  
> -        /* Restore Multiboot2 magic. */

... this comment shouldn't be lost (wants to move down), and ...

> -        pop     %rax
> +        /*
> +         * Prepare stack for relocation call.
> +         * Note that we are in 64bit mode but we are going to call a
> +         * function in 32bit mode so the stack is not written with
> +         * push instructions.
> +         */
> +        sub     $16, %rsp
> +        lea     __base_relocs_end(%rip), %ecx
> +        mov     %ecx, 16(%rsp)

... the re-using of half a 64-bit slot here still isn't present in
commentary (in fact the comment is slightly wrong as is, because that
re-used half slot _is_ written by a PUSH, just higher up).

> --- a/xen/arch/x86/efi/efi-boot.h
> +++ b/xen/arch/x86/efi/efi-boot.h
> @@ -36,69 +36,15 @@ extern const intpte_t __page_tables_start[], __page_tables_end[];
>  #define in_page_tables(v) ((intpte_t *)(v) >= __page_tables_start && \
>                             (intpte_t *)(v) < __page_tables_end)
>  
> -#define PE_BASE_RELOC_ABS      0
> -#define PE_BASE_RELOC_HIGHLOW  3
> -#define PE_BASE_RELOC_DIR64   10
> +#define EFI_RELOC_IMAGE_EARLY 0
> +#include "../boot/efi-reloc-image.h"
>  
> -extern const struct pe_base_relocs {
> -    u32 rva;
> -    u32 size;
> -    u16 entries[];
> -} __base_relocs_start[], __base_relocs_end[];
> +extern pe_base_relocs __base_relocs_start[], __base_relocs_end[];

You've lost the const.

>  static void __init efi_arch_relocate_image(unsigned long delta)
>  {
> -    const struct pe_base_relocs *base_relocs;
> -
> -    for ( base_relocs = __base_relocs_start; base_relocs < __base_relocs_end; )
> -    {
> -        unsigned int i = 0, n;
> -
> -        n = (base_relocs->size - sizeof(*base_relocs)) /
> -            sizeof(*base_relocs->entries);
> -
> -        /*
> -         * Relevant l{2,3}_bootmap entries get initialized explicitly in
> -         * efi_arch_memory_setup(), so we must not apply relocations there.
> -         * l2_directmap's first slot, otoh, should be handled normally, as
> -         * efi_arch_memory_setup() won't touch it (xen_phys_start should
> -         * never be zero).
> -         */
> -        if ( xen_phys_start + base_relocs->rva == (unsigned long)l3_bootmap ||
> -             xen_phys_start + base_relocs->rva == (unsigned long)l2_bootmap )
> -            i = n;

I can't spot the replacement for this code.

> -        for ( ; i < n; ++i )
> -        {
> -            unsigned long addr = xen_phys_start + base_relocs->rva +
> -                                 (base_relocs->entries[i] & 0xfff);
> -
> -            switch ( base_relocs->entries[i] >> 12 )
> -            {
> -            case PE_BASE_RELOC_ABS:
> -                break;
> -            case PE_BASE_RELOC_HIGHLOW:
> -                if ( delta )
> -                {
> -                    *(u32 *)addr += delta;
> -                    if ( in_page_tables(addr) )
> -                        *(u32 *)addr += xen_phys_start;
> -                }
> -                break;
> -            case PE_BASE_RELOC_DIR64:
> -                if ( delta )
> -                {
> -                    *(u64 *)addr += delta;
> -                    if ( in_page_tables(addr) )
> -                        *(u64 *)addr += xen_phys_start;
> -                }
> -                break;
> -            default:
> -                blexit(L"Unsupported relocation type");
> -            }
> -        }
> -        base_relocs = (const void *)(base_relocs->entries + i + (i & 1));
> -    }
> +    if (!reloc_pe_back(delta, xen_phys_start, __base_relocs_start, __base_relocs_end))

Nit: Style.

> +        blexit(L"Unsupported relocation type");
>  }
>  
>  extern const s32 __trampoline_rel_start[], __trampoline_rel_stop[];