[PATCH RFC] x86/boot: Call efi_multiboot2() at it's linked address

Andrew Cooper posted 1 patch 5 days, 9 hours ago
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/20240930120628.1072839-1-andrew.cooper3@citrix.com
xen/arch/x86/boot/head.S   | 69 +++++++++++++++++++++++++++++++++++++-
xen/arch/x86/boot/x86_64.S |  4 +++
2 files changed, 72 insertions(+), 1 deletion(-)
[PATCH RFC] x86/boot: Call efi_multiboot2() at it's linked address
Posted by Andrew Cooper 5 days, 9 hours ago
When entering via MB2+EFI, the early EFI code hasn't been relocated down to
it's load address.  As a consequence, efi_multboot2() is still expecting to
run at high address.

To set this up, we need Xen's high mappings, while also retaining the EFI
physical-mode mappings in the low half.  Introduce a new efi_l4_bootmap[] for
the purpose.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Frediano Ziglio <frediano.ziglio@cloud.com>
CC: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
CC: Daniel Smith <dpsmith@apertussolutions.com>

Very RFC, compile tested only.
---
 xen/arch/x86/boot/head.S   | 69 +++++++++++++++++++++++++++++++++++++-
 xen/arch/x86/boot/x86_64.S |  4 +++
 2 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
index e0901ee40044..ef07f30d13da 100644
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -344,6 +344,66 @@ __efi64_mb2_start:
         lea     .Lmb2_no_ih(%rip),%r15
         jz      x86_32_switch
 
+        push    %rax
+        push    %rcx
+        push    %rdx
+        push    %rsi
+        push    %rdi
+
+        /* Merge lower half of EFI pagtables with upper half of Xen pagetables */
+        mov     %cr3, %rsi
+        lea     efi_l4_bootmap(%rip), %rdi
+        mov     $L4_PAGETABLE_ENTRIES / 2, %ecx
+        rep movsq
+        lea     ((L4_PAGETABLE_ENTRIES / 2) * 8) + idle_pg_table(%rip), %rsi
+        mov     $L4_PAGETABLE_ENTRIES / 2, %ecx
+        rep movsq
+
+        /* Switch to merged pagetables */
+        lea     efi_l4_bootmap(%rip), %rax
+        mov     %rax, %cr3
+
+        lea     __image_base__(%rip), %esi
+
+        /* Map Xen into the higher mappings using 2M superpages. */
+        lea     _PAGE_PSE + PAGE_HYPERVISOR_RWX + sym_esi(_start), %eax
+        mov     $sym_offs(_start),   %ecx   /* %eax = PTE to write ^      */
+        mov     $sym_offs(_end - 1), %edx
+        shr     $L2_PAGETABLE_SHIFT, %ecx   /* %ecx = First slot to write */
+        shr     $L2_PAGETABLE_SHIFT, %edx   /* %edx = Final slot to write */
+
+1:      mov     %eax, sym_offs(l2_xenmap)(%esi, %ecx, 8)
+        add     $1, %ecx
+        add     $1 << L2_PAGETABLE_SHIFT, %eax
+
+        cmp     %edx, %ecx
+        jbe     1b
+
+        /*
+         * Map Xen into the directmap (needed for early-boot pagetable
+         * handling/walking), and identity map Xen into bootmap (needed for
+         * the transition into long mode), using 2M superpages.
+         */
+        lea     sym_esi(_start), %ecx
+        lea     -1 + sym_esi(_end), %edx
+        lea     _PAGE_PSE + PAGE_HYPERVISOR_RWX(%ecx), %eax /* PTE to write. */
+        shr     $L2_PAGETABLE_SHIFT, %ecx                   /* First slot to write. */
+        shr     $L2_PAGETABLE_SHIFT, %edx                   /* Final slot to write. */
+
+1:      mov     %eax, sym_offs(l2_bootmap)  (%esi, %ecx, 8)
+        mov     %eax, sym_offs(l2_directmap)(%esi, %ecx, 8)
+        add     $1, %ecx
+        add     $1 << L2_PAGETABLE_SHIFT, %eax
+
+        cmp     %edx, %ecx
+        jbe     1b
+
+        pop     %rdi
+        pop     %rsi
+        pop     %rdx
+        pop     %rcx
+        pop     %rax
+
         /* Save Multiboot2 magic on the stack. */
         push    %rax
 
@@ -354,8 +414,15 @@ __efi64_mb2_start:
          * efi_multiboot2() is called according to System V AMD64 ABI:
          *   - IN:  %rdi - EFI ImageHandle, %rsi - EFI SystemTable,
          *          %rdx - MB2 cmdline
+         *
+         * Call via the high mappings
          */
-        call    efi_multiboot2
+        lea     __image_base__(%rip), %r10
+        lea     efi_multiboot2(%rip), %rax
+        sub     %r10, %rax
+        mov     $__XEN_VIRT_START, %r10
+        addq    %r10, %rax
+        call    *%rax
 
         /* Just pop an item from the stack. */
         pop     %rax
diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S
index 04bb62ae8680..93938d0b03f8 100644
--- a/xen/arch/x86/boot/x86_64.S
+++ b/xen/arch/x86/boot/x86_64.S
@@ -206,3 +206,7 @@ GLOBAL(l2_bootmap)
 GLOBAL(l3_bootmap)
         .fill L3_PAGETABLE_ENTRIES, 8, 0
         .size l3_bootmap, . - l3_bootmap
+
+efi_l4_bootmap:
+        .fill L4_PAGETABLE_ENTRIES, 8, 0
+        .size efi_l4_bootmap, . - efi_l4_bootmap

base-commit: 457052167b4dbcda59e06300039302479cc1debf
-- 
2.39.5


Re: [PATCH RFC] x86/boot: Call efi_multiboot2() at it's linked address
Posted by Frediano Ziglio 5 days, 7 hours ago
On Mon, Sep 30, 2024 at 1:06 PM Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>
> When entering via MB2+EFI, the early EFI code hasn't been relocated down to
> it's load address.  As a consequence, efi_multboot2() is still expecting to
> run at high address.
>
> To set this up, we need Xen's high mappings, while also retaining the EFI
> physical-mode mappings in the low half.  Introduce a new efi_l4_bootmap[] for
> the purpose.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> CC: Frediano Ziglio <frediano.ziglio@cloud.com>
> CC: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
> CC: Daniel Smith <dpsmith@apertussolutions.com>
>
> Very RFC, compile tested only.

Tested, it crashes. But not the point of a RFC.

> ---
>  xen/arch/x86/boot/head.S   | 69 +++++++++++++++++++++++++++++++++++++-
>  xen/arch/x86/boot/x86_64.S |  4 +++
>  2 files changed, 72 insertions(+), 1 deletion(-)
>
> diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
> index e0901ee40044..ef07f30d13da 100644
> --- a/xen/arch/x86/boot/head.S
> +++ b/xen/arch/x86/boot/head.S
> @@ -344,6 +344,66 @@ __efi64_mb2_start:
>          lea     .Lmb2_no_ih(%rip),%r15
>          jz      x86_32_switch
>
> +        push    %rax
> +        push    %rcx
> +        push    %rdx
> +        push    %rsi
> +        push    %rdi
> +
> +        /* Merge lower half of EFI pagtables with upper half of Xen pagetables */
> +        mov     %cr3, %rsi
> +        lea     efi_l4_bootmap(%rip), %rdi
> +        mov     $L4_PAGETABLE_ENTRIES / 2, %ecx
> +        rep movsq
> +        lea     ((L4_PAGETABLE_ENTRIES / 2) * 8) + idle_pg_table(%rip), %rsi
> +        mov     $L4_PAGETABLE_ENTRIES / 2, %ecx
> +        rep movsq
> +
> +        /* Switch to merged pagetables */
> +        lea     efi_l4_bootmap(%rip), %rax
> +        mov     %rax, %cr3
> +

I agree with Jan, we are presuming firmware to behave in some way we
can't guarantee.
For instance, it could assume that pages after %cr3 are leafs of the first.
Or for some reason restore %cr3 at some point, making your code crash.
This could be alleviated with a trampoline setting back %cr3 before
calling EFI code but looks like quite some work to do.

Ignoring those, it could work.

> +        lea     __image_base__(%rip), %esi
> +
> +        /* Map Xen into the higher mappings using 2M superpages. */
> +        lea     _PAGE_PSE + PAGE_HYPERVISOR_RWX + sym_esi(_start), %eax
> +        mov     $sym_offs(_start),   %ecx   /* %eax = PTE to write ^      */
> +        mov     $sym_offs(_end - 1), %edx
> +        shr     $L2_PAGETABLE_SHIFT, %ecx   /* %ecx = First slot to write */
> +        shr     $L2_PAGETABLE_SHIFT, %edx   /* %edx = Final slot to write */
> +
> +1:      mov     %eax, sym_offs(l2_xenmap)(%esi, %ecx, 8)
> +        add     $1, %ecx
> +        add     $1 << L2_PAGETABLE_SHIFT, %eax
> +
> +        cmp     %edx, %ecx
> +        jbe     1b
> +
> +        /*
> +         * Map Xen into the directmap (needed for early-boot pagetable
> +         * handling/walking), and identity map Xen into bootmap (needed for
> +         * the transition into long mode), using 2M superpages.
> +         */
> +        lea     sym_esi(_start), %ecx
> +        lea     -1 + sym_esi(_end), %edx
> +        lea     _PAGE_PSE + PAGE_HYPERVISOR_RWX(%ecx), %eax /* PTE to write. */
> +        shr     $L2_PAGETABLE_SHIFT, %ecx                   /* First slot to write. */
> +        shr     $L2_PAGETABLE_SHIFT, %edx                   /* Final slot to write. */
> +
> +1:      mov     %eax, sym_offs(l2_bootmap)  (%esi, %ecx, 8)
> +        mov     %eax, sym_offs(l2_directmap)(%esi, %ecx, 8)
> +        add     $1, %ecx
> +        add     $1 << L2_PAGETABLE_SHIFT, %eax
> +
> +        cmp     %edx, %ecx
> +        jbe     1b
> +

I think in addition to these settings, you need to adjust the pointers
already present in the half table you are copying (I suppose that's
why it crashes).

> +        pop     %rdi
> +        pop     %rsi
> +        pop     %rdx
> +        pop     %rcx
> +        pop     %rax
> +
>          /* Save Multiboot2 magic on the stack. */
>          push    %rax
>
> @@ -354,8 +414,15 @@ __efi64_mb2_start:
>           * efi_multiboot2() is called according to System V AMD64 ABI:
>           *   - IN:  %rdi - EFI ImageHandle, %rsi - EFI SystemTable,
>           *          %rdx - MB2 cmdline
> +         *
> +         * Call via the high mappings
>           */
> -        call    efi_multiboot2
> +        lea     __image_base__(%rip), %r10
> +        lea     efi_multiboot2(%rip), %rax
> +        sub     %r10, %rax
> +        mov     $__XEN_VIRT_START, %r10
> +        addq    %r10, %rax
> +        call    *%rax
>
>          /* Just pop an item from the stack. */
>          pop     %rax
> diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S
> index 04bb62ae8680..93938d0b03f8 100644
> --- a/xen/arch/x86/boot/x86_64.S
> +++ b/xen/arch/x86/boot/x86_64.S
> @@ -206,3 +206,7 @@ GLOBAL(l2_bootmap)
>  GLOBAL(l3_bootmap)
>          .fill L3_PAGETABLE_ENTRIES, 8, 0
>          .size l3_bootmap, . - l3_bootmap
> +
> +efi_l4_bootmap:
> +        .fill L4_PAGETABLE_ENTRIES, 8, 0
> +        .size efi_l4_bootmap, . - efi_l4_bootmap
>
> base-commit: 457052167b4dbcda59e06300039302479cc1debf

Another way to alleviate the "relocation issues" could be to relocate
to the lower addresses, but this could end up setting some pointers
that won't be relocated back to upper addresses.

Another way would be to consider this code as a separate loader,
making sure it's all data and code position independent (sorry, no
compiler option could do, that's why I wrote that RPC script to check
it).

Note: not saying this patch could not work.

Frediano
Re: [PATCH RFC] x86/boot: Call efi_multiboot2() at it's linked address
Posted by Jan Beulich 5 days, 8 hours ago
On 30.09.2024 14:06, Andrew Cooper wrote:
> --- a/xen/arch/x86/boot/head.S
> +++ b/xen/arch/x86/boot/head.S
> @@ -344,6 +344,66 @@ __efi64_mb2_start:
>          lea     .Lmb2_no_ih(%rip),%r15
>          jz      x86_32_switch
>  
> +        push    %rax
> +        push    %rcx
> +        push    %rdx
> +        push    %rsi
> +        push    %rdi
> +
> +        /* Merge lower half of EFI pagtables with upper half of Xen pagetables */
> +        mov     %cr3, %rsi
> +        lea     efi_l4_bootmap(%rip), %rdi
> +        mov     $L4_PAGETABLE_ENTRIES / 2, %ecx
> +        rep movsq
> +        lea     ((L4_PAGETABLE_ENTRIES / 2) * 8) + idle_pg_table(%rip), %rsi
> +        mov     $L4_PAGETABLE_ENTRIES / 2, %ecx
> +        rep movsq
> +
> +        /* Switch to merged pagetables */
> +        lea     efi_l4_bootmap(%rip), %rax
> +        mov     %rax, %cr3

While it may appear to work in some environments, I don't think we're allowed
to replace page tables prior to calling ExitBootServices(). IOW we may not
complain if somewhere this fails to work.

This also builds on the assumption that no new L4 entries would be made by
firmware, nor that it would put anything in the upper half of the L4 table.

> +        lea     __image_base__(%rip), %esi
> +
> +        /* Map Xen into the higher mappings using 2M superpages. */
> +        lea     _PAGE_PSE + PAGE_HYPERVISOR_RWX + sym_esi(_start), %eax

I'm surprised to see you put in place new RWX mappings, when the goal
supposedly is to get rid of any such.

> +        mov     $sym_offs(_start),   %ecx   /* %eax = PTE to write ^      */
> +        mov     $sym_offs(_end - 1), %edx
> +        shr     $L2_PAGETABLE_SHIFT, %ecx   /* %ecx = First slot to write */
> +        shr     $L2_PAGETABLE_SHIFT, %edx   /* %edx = Final slot to write */
> +
> +1:      mov     %eax, sym_offs(l2_xenmap)(%esi, %ecx, 8)
> +        add     $1, %ecx
> +        add     $1 << L2_PAGETABLE_SHIFT, %eax
> +
> +        cmp     %edx, %ecx
> +        jbe     1b
> +
> +        /*
> +         * Map Xen into the directmap (needed for early-boot pagetable
> +         * handling/walking), and identity map Xen into bootmap (needed for
> +         * the transition into long mode), using 2M superpages.
> +         */
> +        lea     sym_esi(_start), %ecx
> +        lea     -1 + sym_esi(_end), %edx
> +        lea     _PAGE_PSE + PAGE_HYPERVISOR_RWX(%ecx), %eax /* PTE to write. */
> +        shr     $L2_PAGETABLE_SHIFT, %ecx                   /* First slot to write. */
> +        shr     $L2_PAGETABLE_SHIFT, %edx                   /* Final slot to write. */
> +
> +1:      mov     %eax, sym_offs(l2_bootmap)  (%esi, %ecx, 8)
> +        mov     %eax, sym_offs(l2_directmap)(%esi, %ecx, 8)
> +        add     $1, %ecx
> +        add     $1 << L2_PAGETABLE_SHIFT, %eax
> +
> +        cmp     %edx, %ecx
> +        jbe     1b
> +
> +        pop     %rdi
> +        pop     %rsi
> +        pop     %rdx
> +        pop     %rcx
> +        pop     %rax
> +
>          /* Save Multiboot2 magic on the stack. */
>          push    %rax
>  
> @@ -354,8 +414,15 @@ __efi64_mb2_start:
>           * efi_multiboot2() is called according to System V AMD64 ABI:
>           *   - IN:  %rdi - EFI ImageHandle, %rsi - EFI SystemTable,
>           *          %rdx - MB2 cmdline
> +         *
> +         * Call via the high mappings
>           */
> -        call    efi_multiboot2
> +        lea     __image_base__(%rip), %r10
> +        lea     efi_multiboot2(%rip), %rax
> +        sub     %r10, %rax
> +        mov     $__XEN_VIRT_START, %r10
> +        addq    %r10, %rax
> +        call    *%rax

While with this you arrange for all %rip-relative addressing to result in
using the linked-at positions, static pointers requiring base relocations
will still point at the loaded-at locations. Things would get particularly
interesting if the difference of two such (dissimilar) pointers would then
be calculated.

Jan