[PATCH v6 04/13] xen/arm: Don't release IRQs on suspend

Mykola Kvach posted 13 patches 1 month, 4 weeks ago
[PATCH v6 04/13] xen/arm: Don't release IRQs on suspend
Posted by Mykola Kvach 1 month, 4 weeks ago
From: Mykola Kvach <mykola_kvach@epam.com>

If we call disable_nonboot_cpus on ARM64 with system_state set
to SYS_STATE_suspend, the following assertion will be triggered:

```
(XEN) [   25.582712] Disabling non-boot CPUs ...
(XEN) [   25.587032] Assertion '!in_irq() && (local_irq_is_enabled() || num_online_cpus() <= 1)' failed at common/xmalloc_tlsf.c:714
[...]
(XEN) [   25.975069] Xen call trace:
(XEN) [   25.978353]    [<00000a000022e098>] xfree+0x130/0x1a4 (PC)
(XEN) [   25.984314]    [<00000a000022e08c>] xfree+0x124/0x1a4 (LR)
(XEN) [   25.990276]    [<00000a00002747d4>] release_irq+0xe4/0xe8
(XEN) [   25.996152]    [<00000a0000278588>] time.c#cpu_time_callback+0x44/0x60
(XEN) [   26.003150]    [<00000a000021d678>] notifier_call_chain+0x7c/0xa0
(XEN) [   26.009717]    [<00000a00002018e0>] cpu.c#cpu_notifier_call_chain+0x24/0x48
(XEN) [   26.017148]    [<00000a000020192c>] cpu.c#_take_cpu_down+0x28/0x34
(XEN) [   26.023801]    [<00000a0000201944>] cpu.c#take_cpu_down+0xc/0x18
(XEN) [   26.030281]    [<00000a0000225c5c>] stop_machine.c#stopmachine_action+0xbc/0xe4
(XEN) [   26.038057]    [<00000a00002264bc>] tasklet.c#do_tasklet_work+0xb8/0x100
(XEN) [   26.045229]    [<00000a00002268a4>] do_tasklet+0x68/0xb0
(XEN) [   26.051018]    [<00000a000026e120>] domain.c#idle_loop+0x7c/0x194
(XEN) [   26.057585]    [<00000a0000277e30>] start_secondary+0x21c/0x220
(XEN) [   26.063978]    [<00000a0000361258>] 00000a0000361258
```

This happens because before invoking take_cpu_down via the stop_machine_run
function on the target CPU, stop_machine_run requests
the STOPMACHINE_DISABLE_IRQ state on that CPU. Releasing memory in
the release_irq function then triggers the assertion:

/*
 * Heap allocations may need TLB flushes which may require IRQs to be
 * enabled (except when only 1 PCPU is online).
 */

This patch adds system state checks to guard calls to request_irq
and release_irq. These calls are now skipped when system_state is
SYS_STATE_{resume,suspend}, preventing unsafe operations during
suspend/resume handling.

Signed-off-by: Mykola Kvach <mykola_kvach@epam.com>
---
Changes in V6:
- skipping of IRQ release during system suspend is now handled
  inside release_irq().

Changes in V4:
  - removed the prior tasklet-based workaround in favor of a more
    straightforward and safer solution
  - reworked the approach by adding explicit system state checks around
    request_irq and release_irq calls, skips these calls during suspend
    and resume states to avoid unsafe memory operations when IRQs are
    disabled
---
 xen/arch/arm/gic.c           |  3 +++
 xen/arch/arm/irq.c           |  3 +++
 xen/arch/arm/tee/ffa_notif.c |  2 +-
 xen/arch/arm/time.c          | 11 +++++++----
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index a018bd7715..c64481faa7 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -388,6 +388,9 @@ void gic_dump_info(struct vcpu *v)
 
 void init_maintenance_interrupt(void)
 {
+    if ( system_state == SYS_STATE_resume )
+        return;
+
     request_irq(gic_hw_ops->info->maintenance_irq, 0, maintenance_interrupt,
                 "irq-maintenance", NULL);
 }
diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
index 02ca82c089..361496a6d0 100644
--- a/xen/arch/arm/irq.c
+++ b/xen/arch/arm/irq.c
@@ -300,6 +300,9 @@ void release_irq(unsigned int irq, const void *dev_id)
     unsigned long flags;
     struct irqaction *action, **action_ptr;
 
+    if ( system_state == SYS_STATE_suspend )
+        return;
+
     desc = irq_to_desc(irq);
 
     spin_lock_irqsave(&desc->lock,flags);
diff --git a/xen/arch/arm/tee/ffa_notif.c b/xen/arch/arm/tee/ffa_notif.c
index 86bef6b3b2..4835e25619 100644
--- a/xen/arch/arm/tee/ffa_notif.c
+++ b/xen/arch/arm/tee/ffa_notif.c
@@ -363,7 +363,7 @@ void ffa_notif_init_interrupt(void)
 {
     int ret;
 
-    if ( fw_notif_enabled && notif_sri_irq < NR_GIC_SGI )
+    if ( fw_notif_enabled && notif_sri_irq < NR_GIC_SGI && system_state != SYS_STATE_resume )
     {
         /*
          * An error here is unlikely since the primary CPU has already
diff --git a/xen/arch/arm/time.c b/xen/arch/arm/time.c
index ad984fdfdd..8267fa5191 100644
--- a/xen/arch/arm/time.c
+++ b/xen/arch/arm/time.c
@@ -320,10 +320,13 @@ void init_timer_interrupt(void)
     WRITE_SYSREG(CNTHCTL_EL2_EL1PCTEN, CNTHCTL_EL2);
     disable_physical_timers();
 
-    request_irq(timer_irq[TIMER_HYP_PPI], 0, htimer_interrupt,
-                "hyptimer", NULL);
-    request_irq(timer_irq[TIMER_VIRT_PPI], 0, vtimer_interrupt,
-                   "virtimer", NULL);
+    if ( system_state != SYS_STATE_resume )
+    {
+        request_irq(timer_irq[TIMER_HYP_PPI], 0, htimer_interrupt,
+                    "hyptimer", NULL);
+        request_irq(timer_irq[TIMER_VIRT_PPI], 0, vtimer_interrupt,
+                    "virtimer", NULL);
+    }
 
     check_timer_irq_cfg(timer_irq[TIMER_HYP_PPI], "hypervisor");
     check_timer_irq_cfg(timer_irq[TIMER_VIRT_PPI], "virtual");
-- 
2.48.1
Re: [PATCH v6 04/13] xen/arm: Don't release IRQs on suspend
Posted by Julien Grall 1 month, 2 weeks ago
Hi Mykola,

On 01/09/2025 23:10, Mykola Kvach wrote:
> From: Mykola Kvach <mykola_kvach@epam.com>
> 
> If we call disable_nonboot_cpus on ARM64 with system_state set
> to SYS_STATE_suspend, the following assertion will be triggered:

Looking at the stack trace, I don't understand why this error would not 
happen when offlining a CPU. Can you clarify?

Anyway, I am not very happy to special case suspend/resume in the IRQ 
code. So I would strongly prefer if we follow a different approach.

The one that come to my mind is to switch from request_irq() to 
setup_irq() and allocate the action in a per-cpu variable. With that, 
there should be no free happening with the stop_machine helper.

Cheers,

-- 
Julien Grall
Re: [PATCH v6 04/13] xen/arm: Don't release IRQs on suspend
Posted by Mykola Kvach 1 month, 1 week ago
Hi Julien,

Thank you for the review.

On Sat, Sep 13, 2025 at 2:45 AM Julien Grall <julien@xen.org> wrote:
>
> Hi Mykola,
>
> On 01/09/2025 23:10, Mykola Kvach wrote:
> > From: Mykola Kvach <mykola_kvach@epam.com>
> >
> > If we call disable_nonboot_cpus on ARM64 with system_state set
> > to SYS_STATE_suspend, the following assertion will be triggered:
>
> Looking at the stack trace, I don't understand why this error would not
> happen when offlining a CPU. Can you clarify?
>
> Anyway, I am not very happy to special case suspend/resume in the IRQ
> code. So I would strongly prefer if we follow a different approach.
>
> The one that come to my mind is to switch from request_irq() to
> setup_irq() and allocate the action in a per-cpu variable. With that,
> there should be no free happening with the stop_machine helper.

Yes, this should help in my case and it also looks like a cleaner
solution, thank you.

Interestingly, my teammate Mykyta Poturai came up with the same idea a
few days ago when he faced a similar problem during CPU hotplug
implementation.

So I will just reuse his commits this is the one of the commits:

https://github.com/Deedone/xen/commit/3817601c2f437453035839f29e94c069a770817d

>
> Cheers,
>
> --
> Julien Grall
>

Best regards,
Mykola
Re: [PATCH v6 04/13] xen/arm: Don't release IRQs on suspend
Posted by Volodymyr Babchuk 1 month, 4 weeks ago
Hi Mykola,

Mykola Kvach <xakep.amatop@gmail.com> writes:

> From: Mykola Kvach <mykola_kvach@epam.com>
>
> If we call disable_nonboot_cpus on ARM64 with system_state set
> to SYS_STATE_suspend, the following assertion will be triggered:
>
> ```
> (XEN) [   25.582712] Disabling non-boot CPUs ...
> (XEN) [   25.587032] Assertion '!in_irq() && (local_irq_is_enabled() || num_online_cpus() <= 1)' failed at common/xmalloc_tlsf.c:714
> [...]
> (XEN) [   25.975069] Xen call trace:
> (XEN) [   25.978353]    [<00000a000022e098>] xfree+0x130/0x1a4 (PC)
> (XEN) [   25.984314]    [<00000a000022e08c>] xfree+0x124/0x1a4 (LR)
> (XEN) [   25.990276]    [<00000a00002747d4>] release_irq+0xe4/0xe8
> (XEN) [   25.996152]    [<00000a0000278588>] time.c#cpu_time_callback+0x44/0x60
> (XEN) [   26.003150]    [<00000a000021d678>] notifier_call_chain+0x7c/0xa0
> (XEN) [   26.009717]    [<00000a00002018e0>] cpu.c#cpu_notifier_call_chain+0x24/0x48
> (XEN) [   26.017148]    [<00000a000020192c>] cpu.c#_take_cpu_down+0x28/0x34
> (XEN) [   26.023801]    [<00000a0000201944>] cpu.c#take_cpu_down+0xc/0x18
> (XEN) [   26.030281]    [<00000a0000225c5c>] stop_machine.c#stopmachine_action+0xbc/0xe4
> (XEN) [   26.038057]    [<00000a00002264bc>] tasklet.c#do_tasklet_work+0xb8/0x100
> (XEN) [   26.045229]    [<00000a00002268a4>] do_tasklet+0x68/0xb0
> (XEN) [   26.051018]    [<00000a000026e120>] domain.c#idle_loop+0x7c/0x194
> (XEN) [   26.057585]    [<00000a0000277e30>] start_secondary+0x21c/0x220
> (XEN) [   26.063978]    [<00000a0000361258>] 00000a0000361258
> ```
>
> This happens because before invoking take_cpu_down via the stop_machine_run
> function on the target CPU, stop_machine_run requests
> the STOPMACHINE_DISABLE_IRQ state on that CPU. Releasing memory in
> the release_irq function then triggers the assertion:
>
> /*
>  * Heap allocations may need TLB flushes which may require IRQs to be
>  * enabled (except when only 1 PCPU is online).
>  */
>
> This patch adds system state checks to guard calls to request_irq
> and release_irq. These calls are now skipped when system_state is
> SYS_STATE_{resume,suspend}, preventing unsafe operations during
> suspend/resume handling.
>
> Signed-off-by: Mykola Kvach <mykola_kvach@epam.com>

Reviewed-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>

> ---
> Changes in V6:
> - skipping of IRQ release during system suspend is now handled
>   inside release_irq().
> Changes in V4:
>   - removed the prior tasklet-based workaround in favor of a more
>     straightforward and safer solution
>   - reworked the approach by adding explicit system state checks around
>     request_irq and release_irq calls, skips these calls during suspend
>     and resume states to avoid unsafe memory operations when IRQs are
>     disabled
> ---
>  xen/arch/arm/gic.c           |  3 +++
>  xen/arch/arm/irq.c           |  3 +++
>  xen/arch/arm/tee/ffa_notif.c |  2 +-
>  xen/arch/arm/time.c          | 11 +++++++----
>  4 files changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> index a018bd7715..c64481faa7 100644
> --- a/xen/arch/arm/gic.c
> +++ b/xen/arch/arm/gic.c
> @@ -388,6 +388,9 @@ void gic_dump_info(struct vcpu *v)
>  
>  void init_maintenance_interrupt(void)
>  {
> +    if ( system_state == SYS_STATE_resume )
> +        return;
> +
>      request_irq(gic_hw_ops->info->maintenance_irq, 0, maintenance_interrupt,
>                  "irq-maintenance", NULL);
>  }
> diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
> index 02ca82c089..361496a6d0 100644
> --- a/xen/arch/arm/irq.c
> +++ b/xen/arch/arm/irq.c
> @@ -300,6 +300,9 @@ void release_irq(unsigned int irq, const void *dev_id)
>      unsigned long flags;
>      struct irqaction *action, **action_ptr;
>  
> +    if ( system_state == SYS_STATE_suspend )
> +        return;
> +
>      desc = irq_to_desc(irq);
>  
>      spin_lock_irqsave(&desc->lock,flags);
> diff --git a/xen/arch/arm/tee/ffa_notif.c b/xen/arch/arm/tee/ffa_notif.c
> index 86bef6b3b2..4835e25619 100644
> --- a/xen/arch/arm/tee/ffa_notif.c
> +++ b/xen/arch/arm/tee/ffa_notif.c
> @@ -363,7 +363,7 @@ void ffa_notif_init_interrupt(void)
>  {
>      int ret;
>  
> -    if ( fw_notif_enabled && notif_sri_irq < NR_GIC_SGI )
> +    if ( fw_notif_enabled && notif_sri_irq < NR_GIC_SGI && system_state != SYS_STATE_resume )
>      {
>          /*
>           * An error here is unlikely since the primary CPU has already
> diff --git a/xen/arch/arm/time.c b/xen/arch/arm/time.c
> index ad984fdfdd..8267fa5191 100644
> --- a/xen/arch/arm/time.c
> +++ b/xen/arch/arm/time.c
> @@ -320,10 +320,13 @@ void init_timer_interrupt(void)
>      WRITE_SYSREG(CNTHCTL_EL2_EL1PCTEN, CNTHCTL_EL2);
>      disable_physical_timers();
>  
> -    request_irq(timer_irq[TIMER_HYP_PPI], 0, htimer_interrupt,
> -                "hyptimer", NULL);
> -    request_irq(timer_irq[TIMER_VIRT_PPI], 0, vtimer_interrupt,
> -                   "virtimer", NULL);
> +    if ( system_state != SYS_STATE_resume )
> +    {
> +        request_irq(timer_irq[TIMER_HYP_PPI], 0, htimer_interrupt,
> +                    "hyptimer", NULL);
> +        request_irq(timer_irq[TIMER_VIRT_PPI], 0, vtimer_interrupt,
> +                    "virtimer", NULL);
> +    }
>  
>      check_timer_irq_cfg(timer_irq[TIMER_HYP_PPI], "hypervisor");
>      check_timer_irq_cfg(timer_irq[TIMER_VIRT_PPI], "virtual");

-- 
WBR, Volodymyr