[PATCH] xen/arm: alternative: Don't call vmap() within stop_machine_run()

Julien Grall posted 1 patch 2 years ago
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/20220426200629.58921-1-julien@xen.org
Test gitlab-ci passed
xen/arch/arm/alternative.c | 34 +++++++++++++++++-----------------
1 file changed, 17 insertions(+), 17 deletions(-)
[PATCH] xen/arm: alternative: Don't call vmap() within stop_machine_run()
Posted by Julien Grall 2 years ago
From: Julien Grall <jgrall@amazon.com>

Commit 88a037e2cfe1 "page_alloc: assert IRQs are enabled in heap
alloc/free" extended the checks in the buddy allocator to catch
any use of the helpers from context with interrupts disabled.

Unfortunately, the rule is not followed in the alternative code and
this will result to crash at boot with debug enabled:

(XEN) Xen call trace:
(XEN)    [<0022a510>] alloc_xenheap_pages+0x120/0x150 (PC)
(XEN)    [<00000000>] 00000000 (LR)
(XEN)    [<002736ac>] arch/arm/mm.c#xen_pt_update+0x144/0x6e4
(XEN)    [<002740d4>] map_pages_to_xen+0x10/0x20
(XEN)    [<00236864>] __vmap+0x400/0x4a4
(XEN)    [<0026aee8>] arch/arm/alternative.c#__apply_alternatives_multi_stop+0x144/0x1ec
(XEN)    [<0022fe40>] stop_machine_run+0x23c/0x300
(XEN)    [<002c40c4>] apply_alternatives_all+0x34/0x5c
(XEN)    [<002ce3e8>] start_xen+0xcb8/0x1024
(XEN)    [<00200068>] arch/arm/arm32/head.o#primary_switched+0xc/0x1c

The interrupts will be disabled by the state machine in stop_machine_run(),
hence why the ASSERT is hit.

For now the patch extending the checks has been reverted, but it would
be good to re-introduce it (allocation with interrupts disabled is not
desirable).

So move the re-mapping of Xen to the caller of stop_machine_run().

Signed-off-by: Julien Grall <jgrall@amazon.com>
Cc: David Vrabel <dvrabel@amazon.co.uk>

---

I managed to successfully boot Xen with this patch and dropping the
revert.
---
 xen/arch/arm/alternative.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/xen/arch/arm/alternative.c b/xen/arch/arm/alternative.c
index 237c4e564209..f03cd943c636 100644
--- a/xen/arch/arm/alternative.c
+++ b/xen/arch/arm/alternative.c
@@ -170,7 +170,7 @@ static int __apply_alternatives(const struct alt_region *region,
  * We might be patching the stop_machine state machine, so implement a
  * really simple polling protocol here.
  */
-static int __apply_alternatives_multi_stop(void *unused)
+static int __apply_alternatives_multi_stop(void *xenmap)
 {
     static int patched = 0;
 
@@ -185,22 +185,9 @@ static int __apply_alternatives_multi_stop(void *unused)
     {
         int ret;
         struct alt_region region;
-        mfn_t xen_mfn = virt_to_mfn(_start);
-        paddr_t xen_size = _end - _start;
-        unsigned int xen_order = get_order_from_bytes(xen_size);
-        void *xenmap;
 
         BUG_ON(patched);
 
-        /*
-         * The text and inittext section are read-only. So re-map Xen to
-         * be able to patch the code.
-         */
-        xenmap = __vmap(&xen_mfn, 1U << xen_order, 1, 1, PAGE_HYPERVISOR,
-                        VMAP_DEFAULT);
-        /* Re-mapping Xen is not expected to fail during boot. */
-        BUG_ON(!xenmap);
-
         region.begin = __alt_instructions;
         region.end = __alt_instructions_end;
 
@@ -208,8 +195,6 @@ static int __apply_alternatives_multi_stop(void *unused)
         /* The patching is not expected to fail during boot. */
         BUG_ON(ret != 0);
 
-        vunmap(xenmap);
-
         /* Barriers provided by the cache flushing */
         write_atomic(&patched, 1);
     }
@@ -224,14 +209,29 @@ static int __apply_alternatives_multi_stop(void *unused)
 void __init apply_alternatives_all(void)
 {
     int ret;
+    mfn_t xen_mfn = virt_to_mfn(_start);
+    paddr_t xen_size = _end - _start;
+    unsigned int xen_order = get_order_from_bytes(xen_size);
+    void *xenmap;
 
     ASSERT(system_state != SYS_STATE_active);
 
+    /*
+     * The text and inittext section are read-only. So re-map Xen to
+     * be able to patch the code.
+     */
+    xenmap = __vmap(&xen_mfn, 1U << xen_order, 1, 1, PAGE_HYPERVISOR,
+                    VMAP_DEFAULT);
+    /* Re-mapping Xen is not expected to fail during boot. */
+    BUG_ON(!xenmap);
+
 	/* better not try code patching on a live SMP system */
-    ret = stop_machine_run(__apply_alternatives_multi_stop, NULL, NR_CPUS);
+    ret = stop_machine_run(__apply_alternatives_multi_stop, xenmap, NR_CPUS);
 
     /* stop_machine_run should never fail at this stage of the boot */
     BUG_ON(ret);
+
+    vunmap(xenmap);
 }
 
 int apply_alternatives(const struct alt_instr *start, const struct alt_instr *end)
-- 
2.32.0
Re: [PATCH] xen/arm: alternative: Don't call vmap() within stop_machine_run()
Posted by Jan Beulich 2 years ago
On 26.04.2022 22:06, Julien Grall wrote:
> From: Julien Grall <jgrall@amazon.com>
> 
> Commit 88a037e2cfe1 "page_alloc: assert IRQs are enabled in heap
> alloc/free" extended the checks in the buddy allocator to catch
> any use of the helpers from context with interrupts disabled.
> 
> Unfortunately, the rule is not followed in the alternative code and
> this will result to crash at boot with debug enabled:
> 
> (XEN) Xen call trace:
> (XEN)    [<0022a510>] alloc_xenheap_pages+0x120/0x150 (PC)
> (XEN)    [<00000000>] 00000000 (LR)
> (XEN)    [<002736ac>] arch/arm/mm.c#xen_pt_update+0x144/0x6e4
> (XEN)    [<002740d4>] map_pages_to_xen+0x10/0x20
> (XEN)    [<00236864>] __vmap+0x400/0x4a4
> (XEN)    [<0026aee8>] arch/arm/alternative.c#__apply_alternatives_multi_stop+0x144/0x1ec
> (XEN)    [<0022fe40>] stop_machine_run+0x23c/0x300
> (XEN)    [<002c40c4>] apply_alternatives_all+0x34/0x5c
> (XEN)    [<002ce3e8>] start_xen+0xcb8/0x1024
> (XEN)    [<00200068>] arch/arm/arm32/head.o#primary_switched+0xc/0x1c
> 
> The interrupts will be disabled by the state machine in stop_machine_run(),
> hence why the ASSERT is hit.
> 
> For now the patch extending the checks has been reverted, but it would
> be good to re-introduce it (allocation with interrupts disabled is not
> desirable).

We definitely should re-apply that patch once the one here went in.

Jan
Re: [PATCH] xen/arm: alternative: Don't call vmap() within stop_machine_run()
Posted by Julien Grall 2 years ago
Hi Jan,

On 27/04/2022 07:42, Jan Beulich wrote:
> On 26.04.2022 22:06, Julien Grall wrote:
>> From: Julien Grall <jgrall@amazon.com>
>>
>> Commit 88a037e2cfe1 "page_alloc: assert IRQs are enabled in heap
>> alloc/free" extended the checks in the buddy allocator to catch
>> any use of the helpers from context with interrupts disabled.
>>
>> Unfortunately, the rule is not followed in the alternative code and
>> this will result to crash at boot with debug enabled:
>>
>> (XEN) Xen call trace:
>> (XEN)    [<0022a510>] alloc_xenheap_pages+0x120/0x150 (PC)
>> (XEN)    [<00000000>] 00000000 (LR)
>> (XEN)    [<002736ac>] arch/arm/mm.c#xen_pt_update+0x144/0x6e4
>> (XEN)    [<002740d4>] map_pages_to_xen+0x10/0x20
>> (XEN)    [<00236864>] __vmap+0x400/0x4a4
>> (XEN)    [<0026aee8>] arch/arm/alternative.c#__apply_alternatives_multi_stop+0x144/0x1ec
>> (XEN)    [<0022fe40>] stop_machine_run+0x23c/0x300
>> (XEN)    [<002c40c4>] apply_alternatives_all+0x34/0x5c
>> (XEN)    [<002ce3e8>] start_xen+0xcb8/0x1024
>> (XEN)    [<00200068>] arch/arm/arm32/head.o#primary_switched+0xc/0x1c
>>
>> The interrupts will be disabled by the state machine in stop_machine_run(),
>> hence why the ASSERT is hit.
>>
>> For now the patch extending the checks has been reverted, but it would
>> be good to re-introduce it (allocation with interrupts disabled is not
>> desirable).
> 
> We definitely should re-apply that patch once the one here went in.

I have commit this patch and also re-apply David's patch.

Cheers,

-- 
Julien Grall
Re: [PATCH] xen/arm: alternative: Don't call vmap() within stop_machine_run()
Posted by Stefano Stabellini 2 years ago
On Tue, 26 Apr 2022, Julien Grall wrote:
> From: Julien Grall <jgrall@amazon.com>
> 
> Commit 88a037e2cfe1 "page_alloc: assert IRQs are enabled in heap
> alloc/free" extended the checks in the buddy allocator to catch
> any use of the helpers from context with interrupts disabled.
> 
> Unfortunately, the rule is not followed in the alternative code and
> this will result to crash at boot with debug enabled:
> 
> (XEN) Xen call trace:
> (XEN)    [<0022a510>] alloc_xenheap_pages+0x120/0x150 (PC)
> (XEN)    [<00000000>] 00000000 (LR)
> (XEN)    [<002736ac>] arch/arm/mm.c#xen_pt_update+0x144/0x6e4
> (XEN)    [<002740d4>] map_pages_to_xen+0x10/0x20
> (XEN)    [<00236864>] __vmap+0x400/0x4a4
> (XEN)    [<0026aee8>] arch/arm/alternative.c#__apply_alternatives_multi_stop+0x144/0x1ec
> (XEN)    [<0022fe40>] stop_machine_run+0x23c/0x300
> (XEN)    [<002c40c4>] apply_alternatives_all+0x34/0x5c
> (XEN)    [<002ce3e8>] start_xen+0xcb8/0x1024
> (XEN)    [<00200068>] arch/arm/arm32/head.o#primary_switched+0xc/0x1c
> 
> The interrupts will be disabled by the state machine in stop_machine_run(),
> hence why the ASSERT is hit.
> 
> For now the patch extending the checks has been reverted, but it would
> be good to re-introduce it (allocation with interrupts disabled is not
> desirable).
> 
> So move the re-mapping of Xen to the caller of stop_machine_run().
> 
> Signed-off-by: Julien Grall <jgrall@amazon.com>
> Cc: David Vrabel <dvrabel@amazon.co.uk>

Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>


> ---
> 
> I managed to successfully boot Xen with this patch and dropping the
> revert.
> ---
>  xen/arch/arm/alternative.c | 34 +++++++++++++++++-----------------
>  1 file changed, 17 insertions(+), 17 deletions(-)
> 
> diff --git a/xen/arch/arm/alternative.c b/xen/arch/arm/alternative.c
> index 237c4e564209..f03cd943c636 100644
> --- a/xen/arch/arm/alternative.c
> +++ b/xen/arch/arm/alternative.c
> @@ -170,7 +170,7 @@ static int __apply_alternatives(const struct alt_region *region,
>   * We might be patching the stop_machine state machine, so implement a
>   * really simple polling protocol here.
>   */
> -static int __apply_alternatives_multi_stop(void *unused)
> +static int __apply_alternatives_multi_stop(void *xenmap)
>  {
>      static int patched = 0;
>  
> @@ -185,22 +185,9 @@ static int __apply_alternatives_multi_stop(void *unused)
>      {
>          int ret;
>          struct alt_region region;
> -        mfn_t xen_mfn = virt_to_mfn(_start);
> -        paddr_t xen_size = _end - _start;
> -        unsigned int xen_order = get_order_from_bytes(xen_size);
> -        void *xenmap;
>  
>          BUG_ON(patched);
>  
> -        /*
> -         * The text and inittext section are read-only. So re-map Xen to
> -         * be able to patch the code.
> -         */
> -        xenmap = __vmap(&xen_mfn, 1U << xen_order, 1, 1, PAGE_HYPERVISOR,
> -                        VMAP_DEFAULT);
> -        /* Re-mapping Xen is not expected to fail during boot. */
> -        BUG_ON(!xenmap);
> -
>          region.begin = __alt_instructions;
>          region.end = __alt_instructions_end;
>  
> @@ -208,8 +195,6 @@ static int __apply_alternatives_multi_stop(void *unused)
>          /* The patching is not expected to fail during boot. */
>          BUG_ON(ret != 0);
>  
> -        vunmap(xenmap);
> -
>          /* Barriers provided by the cache flushing */
>          write_atomic(&patched, 1);
>      }
> @@ -224,14 +209,29 @@ static int __apply_alternatives_multi_stop(void *unused)
>  void __init apply_alternatives_all(void)
>  {
>      int ret;
> +    mfn_t xen_mfn = virt_to_mfn(_start);
> +    paddr_t xen_size = _end - _start;
> +    unsigned int xen_order = get_order_from_bytes(xen_size);
> +    void *xenmap;
>  
>      ASSERT(system_state != SYS_STATE_active);
>  
> +    /*
> +     * The text and inittext section are read-only. So re-map Xen to
> +     * be able to patch the code.
> +     */
> +    xenmap = __vmap(&xen_mfn, 1U << xen_order, 1, 1, PAGE_HYPERVISOR,
> +                    VMAP_DEFAULT);
> +    /* Re-mapping Xen is not expected to fail during boot. */
> +    BUG_ON(!xenmap);
> +
>  	/* better not try code patching on a live SMP system */
> -    ret = stop_machine_run(__apply_alternatives_multi_stop, NULL, NR_CPUS);
> +    ret = stop_machine_run(__apply_alternatives_multi_stop, xenmap, NR_CPUS);
>  
>      /* stop_machine_run should never fail at this stage of the boot */
>      BUG_ON(ret);
> +
> +    vunmap(xenmap);
>  }
>  
>  int apply_alternatives(const struct alt_instr *start, const struct alt_instr *end)
> -- 
> 2.32.0
>