[PATCH v8 13/13] xen/arm: Add support for system suspend triggered by hardware domain

Mykola Kvach posted 13 patches 5 days, 19 hours ago
[PATCH v8 13/13] xen/arm: Add support for system suspend triggered by hardware domain
Posted by Mykola Kvach 5 days, 19 hours ago
From: Mirela Simonovic <mirela.simonovic@aggios.com>

Trigger Xen suspend when the hardware domain initiates suspend via
SHUTDOWN_suspend. Redirect system suspend to CPU#0 to ensure the
suspend logic runs on the boot CPU, as required.

Introduce full suspend/resume infrastructure gated by CONFIG_SYSTEM_SUSPEND,
including logic to:
 - disable and enable non-boot physical CPUs
 - freeze and thaw domains
 - suspend and resume the GIC, timer, iommu and console
 - maintain system state before and after suspend

On boot, init_ttbr is normally initialized during secondary CPU hotplug.
On uniprocessor systems, this would leave init_ttbr uninitialized,
causing resume to fail. To address this, the boot CPU now sets init_ttbr
during suspend.

Remove the restriction in the vPSCI interface preventing suspend from the
hardware domain.

Select HAS_SYSTEM_SUSPEND for ARM_64.

Introduce CONFIG_HAS_HWDOM_SYSTEM_SUSPEND as an architecture-selected
capability for platforms where the hardware domain survives
SHUTDOWN_suspend without hwdom_shutdown(). ARM_64 selects it with
SYSTEM_SUSPEND enabled; other architectures keep the existing behaviour.

Note: the code is behind CONFIG_HAS_SYSTEM_SUSPEND, which is currently only
selected when UNSUPPORTED is set and when MPU isn't set, so the functionality
is built but disabled by default.

Signed-off-by: Mirela Simonovic <mirela.simonovic@aggios.com>
Signed-off-by: Saeed Nowshadi <saeed.nowshadi@xilinx.com>
Signed-off-by: Mykyta Poturai <mykyta_poturai@epam.com>
Signed-off-by: Mykola Kvach <mykola_kvach@epam.com>
---
Changes in V8:
- Add a pre-suspend check in system_suspend() after scheduler_disable()
  to require all domains to be in the shut down state with
  SHUTDOWN_suspend before proceeding with the global suspend flow.
- Drop the common-level depends on !ARM_64 || !SYSTEM_SUSPEND from
  CONFIG_HAS_HWDOM_SHUTDOWN_ON_SUSPEND and model the ARM64 suspend
  case with an arch-selected capability instead.
- Rename CONFIG_HAS_HWDOM_SHUTDOWN_ON_SUSPEND to CONFIG_HAS_HWDOM_SYSTEM_SUSPEND.
- Rename need_hwdom_shutdown() to want_hwdom_shutdown().

Changes in V7:
- Control domain is responsible for host suspend
- Move the is_hardware_domain check into host_system_suspend()
- Add an empty inline host_system_suspend() function when SYSTEM_SUSPEND
  config is disabled
- Use IS_ENABLED() for config checking instead of #ifdef
- Replace #ifdef checks in domain_shutdown() with IS_ENABLED() to simplify
  control flow.
- Factor hardware domain shutdown condition into a helper
  (need_hwdom_shutdown()) to avoid preprocessor directives inside the function.
- Squash with iommu suspend/resume commit
---
 xen/arch/arm/Kconfig               |   2 +
 xen/arch/arm/include/asm/mm.h      |   2 +
 xen/arch/arm/include/asm/suspend.h |   7 +-
 xen/arch/arm/mmu/smpboot.c         |   2 +-
 xen/arch/arm/suspend.c             | 181 +++++++++++++++++++++++++++++
 xen/arch/arm/vpsci.c               |  12 +-
 xen/common/Kconfig                 |   3 +
 xen/common/domain.c                |   7 +-
 xen/drivers/passthrough/arm/smmu.c |  10 ++
 9 files changed, 220 insertions(+), 6 deletions(-)

diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index 2f2b501fda..c2e63ce8ff 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -8,6 +8,8 @@ config ARM_64
 	depends on !ARM_32
 	select 64BIT
 	select HAS_FAST_MULTIPLY
+	select HAS_HWDOM_SYSTEM_SUSPEND if SYSTEM_SUSPEND
+	select HAS_SYSTEM_SUSPEND if !MPU && UNSUPPORTED
 	select HAS_VPCI_GUEST_SUPPORT if PCI_PASSTHROUGH
 
 config ARM
diff --git a/xen/arch/arm/include/asm/mm.h b/xen/arch/arm/include/asm/mm.h
index 72a6928624..87b54a55dc 100644
--- a/xen/arch/arm/include/asm/mm.h
+++ b/xen/arch/arm/include/asm/mm.h
@@ -360,6 +360,8 @@ static inline void page_set_xenheap_gfn(struct page_info *p, gfn_t gfn)
     } while ( (y = cmpxchg(&p->u.inuse.type_info, x, nx)) != x );
 }
 
+void set_init_ttbr(lpae_t *root);
+
 #endif /*  __ARCH_ARM_MM__ */
 /*
  * Local variables:
diff --git a/xen/arch/arm/include/asm/suspend.h b/xen/arch/arm/include/asm/suspend.h
index c127fa3d78..c36ba23b10 100644
--- a/xen/arch/arm/include/asm/suspend.h
+++ b/xen/arch/arm/include/asm/suspend.h
@@ -38,7 +38,12 @@ extern struct cpu_context cpu_context;
 
 int prepare_resume_ctx(struct cpu_context *ptr);
 void hyp_resume(void);
-#endif /* CONFIG_SYSTEM_SUSPEND */
+void host_system_suspend(struct domain *d);
+
+#else /* !CONFIG_SYSTEM_SUSPEND */
+
+static inline void host_system_suspend(struct domain *d) { (void)d; }
+#endif
 
 #endif /* ARM_SUSPEND_H */
 
diff --git a/xen/arch/arm/mmu/smpboot.c b/xen/arch/arm/mmu/smpboot.c
index 37e91d72b7..ff508ecf40 100644
--- a/xen/arch/arm/mmu/smpboot.c
+++ b/xen/arch/arm/mmu/smpboot.c
@@ -72,7 +72,7 @@ static void clear_boot_pagetables(void)
     clear_table(boot_third);
 }
 
-static void set_init_ttbr(lpae_t *root)
+void set_init_ttbr(lpae_t *root)
 {
     /*
      * init_ttbr is part of the identity mapping which is read-only. So
diff --git a/xen/arch/arm/suspend.c b/xen/arch/arm/suspend.c
index e38566b0b7..4d1289776b 100644
--- a/xen/arch/arm/suspend.c
+++ b/xen/arch/arm/suspend.c
@@ -1,9 +1,190 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 
+#include <asm/psci.h>
 #include <asm/suspend.h>
 
+#include <public/sched.h>
+#include <xen/console.h>
+#include <xen/cpu.h>
+#include <xen/errno.h>
+#include <xen/iommu.h>
+#include <xen/sched.h>
+#include <xen/tasklet.h>
+
 struct cpu_context cpu_context = {};
 
+static int can_system_suspend(void)
+{
+    int ret = 0;
+    struct domain *d;
+
+    rcu_read_lock(&domlist_read_lock);
+
+    for_each_domain ( d )
+    {
+        bool domain_suspended;
+
+        spin_lock(&d->shutdown_lock);
+        domain_suspended = d->is_shut_down &&
+                           d->shutdown_code == SHUTDOWN_suspend;
+        spin_unlock(&d->shutdown_lock);
+
+        if ( domain_suspended )
+            continue;
+
+        printk(XENLOG_ERR
+               "System suspend requires all domains to be shut down for suspend (dom%d: isn't in suspend state)\n",
+               d->domain_id);
+
+        ret = -EBUSY;
+        break;
+    }
+
+    rcu_read_unlock(&domlist_read_lock);
+
+    return ret;
+}
+
+/* Xen suspend. data identifies the domain that initiated suspend. */
+static void system_suspend(void *data)
+{
+    int status;
+    unsigned long flags;
+    struct domain *d = (struct domain *)data;
+
+    BUG_ON(system_state != SYS_STATE_active);
+
+    system_state = SYS_STATE_suspend;
+
+    printk("Xen suspending...\n");
+
+    freeze_domains();
+    scheduler_disable();
+
+    status = can_system_suspend();
+    if ( status )
+    {
+        system_state = SYS_STATE_resume;
+        goto resume_scheduler;
+    }
+
+    /*
+     * Non-boot CPUs have to be disabled on suspend and enabled on resume
+     * (hotplug-based mechanism). Disabling non-boot CPUs will lead to PSCI
+     * CPU_OFF to be called by each non-boot CPU. Depending on the underlying
+     * platform capabilities, this may lead to the physical powering down of
+     * CPUs.
+     */
+    status = disable_nonboot_cpus();
+    if ( status )
+    {
+        system_state = SYS_STATE_resume;
+        goto resume_nonboot_cpus;
+    }
+
+    time_suspend();
+
+    status = iommu_suspend();
+    if ( status )
+    {
+        system_state = SYS_STATE_resume;
+        goto resume_time;
+    }
+
+    console_start_sync();
+    status = console_suspend();
+    if ( status )
+    {
+        dprintk(XENLOG_ERR, "Failed to suspend the console, err=%d\n", status);
+        system_state = SYS_STATE_resume;
+        goto resume_end_sync;
+    }
+
+    local_irq_save(flags);
+    status = gic_suspend();
+    if ( status )
+    {
+        system_state = SYS_STATE_resume;
+        goto resume_irqs;
+    }
+
+    set_init_ttbr(xen_pgtable);
+
+    /*
+     * Enable identity mapping before entering suspend to simplify
+     * the resume path
+     */
+    update_boot_mapping(true);
+
+    if ( prepare_resume_ctx(&cpu_context) )
+    {
+        status = call_psci_system_suspend();
+        /*
+         * If suspend is finalized properly by above system suspend PSCI call,
+         * the code below in this 'if' branch will never execute. Execution
+         * will continue from hyp_resume which is the hypervisor's resume point.
+         * In hyp_resume CPU context will be restored and since link-register is
+         * restored as well, it will appear to return from prepare_resume_ctx.
+         * The difference in returning from prepare_resume_ctx on system suspend
+         * versus resume is in function's return value: on suspend, the return
+         * value is a non-zero value, on resume it is zero. That is why the
+         * control flow will not re-enter this 'if' branch on resume.
+         */
+        if ( status )
+            dprintk(XENLOG_WARNING, "PSCI system suspend failed, err=%d\n",
+                    status);
+    }
+
+    system_state = SYS_STATE_resume;
+    update_boot_mapping(false);
+
+    gic_resume();
+
+ resume_irqs:
+    local_irq_restore(flags);
+
+    console_resume();
+ resume_end_sync:
+    console_end_sync();
+
+    iommu_resume();
+
+ resume_time:
+    time_resume();
+
+ resume_nonboot_cpus:
+    /*
+     * The rcu_barrier() has to be added to ensure that the per cpu area is
+     * freed before a non-boot CPU tries to initialize it (_free_percpu_area()
+     * has to be called before the init_percpu_area()). This scenario occurs
+     * when non-boot CPUs are hot-unplugged on suspend and hotplugged on resume.
+     */
+    rcu_barrier();
+    enable_nonboot_cpus();
+
+ resume_scheduler:
+    scheduler_enable();
+    thaw_domains();
+
+    system_state = SYS_STATE_active;
+
+    printk("Resume (status %d)\n", status);
+
+    domain_resume(d);
+}
+
+static DECLARE_TASKLET(system_suspend_tasklet, system_suspend, NULL);
+
+void host_system_suspend(struct domain *d)
+{
+    system_suspend_tasklet.data = (void *)d;
+    /*
+     * The suspend procedure has to be finalized by the pCPU#0 (non-boot pCPUs
+     * will be disabled during the suspend).
+     */
+    tasklet_schedule_on_cpu(&system_suspend_tasklet, 0);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/arm/vpsci.c b/xen/arch/arm/vpsci.c
index bd87ec430d..8fb9172186 100644
--- a/xen/arch/arm/vpsci.c
+++ b/xen/arch/arm/vpsci.c
@@ -5,6 +5,7 @@
 
 #include <asm/current.h>
 #include <asm/domain.h>
+#include <asm/suspend.h>
 #include <asm/vgic.h>
 #include <asm/vpsci.h>
 #include <asm/event.h>
@@ -232,8 +233,7 @@ static int32_t do_psci_1_0_system_suspend(register_t epoint, register_t cid)
     if ( is_64bit_domain(d) && is_thumb )
         return PSCI_INVALID_ADDRESS;
 
-    /* SYSTEM_SUSPEND is not supported for the hardware domain yet */
-    if ( is_hardware_domain(d) )
+    if ( !IS_ENABLED(CONFIG_SYSTEM_SUSPEND) && is_hardware_domain(d) )
         return PSCI_NOT_SUPPORTED;
 
     /* Ensure that all CPUs other than the calling one are offline */
@@ -266,6 +266,9 @@ static int32_t do_psci_1_0_system_suspend(register_t epoint, register_t cid)
             "SYSTEM_SUSPEND requested, epoint=%#"PRIregister", cid=%#"PRIregister"\n",
             epoint, cid);
 
+    if ( is_control_domain(d) )
+        host_system_suspend(d);
+
     return rc;
 }
 
@@ -290,7 +293,10 @@ static int32_t do_psci_1_0_features(uint32_t psci_func_id)
         return 0;
     case PSCI_1_0_FN32_SYSTEM_SUSPEND:
     case PSCI_1_0_FN64_SYSTEM_SUSPEND:
-        return is_hardware_domain(current->domain) ? PSCI_NOT_SUPPORTED : 0;
+        if ( IS_ENABLED(CONFIG_SYSTEM_SUSPEND) ||
+             !is_hardware_domain(current->domain) )
+            return 0;
+        fallthrough;
     default:
         return PSCI_NOT_SUPPORTED;
     }
diff --git a/xen/common/Kconfig b/xen/common/Kconfig
index 0a20aa0a12..feb1336f46 100644
--- a/xen/common/Kconfig
+++ b/xen/common/Kconfig
@@ -137,6 +137,9 @@ config HAS_EX_TABLE
 config HAS_FAST_MULTIPLY
 	bool
 
+config HAS_HWDOM_SYSTEM_SUSPEND
+	bool
+
 config HAS_IOPORTS
 	bool
 
diff --git a/xen/common/domain.c b/xen/common/domain.c
index bb9e210c28..d3edfb2a13 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -1375,6 +1375,11 @@ void __domain_crash(struct domain *d)
     domain_shutdown(d, SHUTDOWN_crash);
 }
 
+static inline bool want_hwdom_shutdown(uint8_t reason)
+{
+    return !IS_ENABLED(CONFIG_HAS_HWDOM_SYSTEM_SUSPEND) ||
+           reason != SHUTDOWN_suspend;
+}
 
 int domain_shutdown(struct domain *d, u8 reason)
 {
@@ -1391,7 +1396,7 @@ int domain_shutdown(struct domain *d, u8 reason)
         d->shutdown_code = reason;
     reason = d->shutdown_code;
 
-    if ( is_hardware_domain(d) )
+    if ( is_hardware_domain(d) && want_hwdom_shutdown(reason) )
         hwdom_shutdown(reason);
 
     if ( d->is_shutting_down )
diff --git a/xen/drivers/passthrough/arm/smmu.c b/xen/drivers/passthrough/arm/smmu.c
index 22d306d0cb..45f29ef8ec 100644
--- a/xen/drivers/passthrough/arm/smmu.c
+++ b/xen/drivers/passthrough/arm/smmu.c
@@ -2947,6 +2947,13 @@ static void arm_smmu_iommu_domain_teardown(struct domain *d)
 	xfree(xen_domain);
 }
 
+#ifdef CONFIG_SYSTEM_SUSPEND
+static int arm_smmu_suspend(void)
+{
+	return -ENOSYS;
+}
+#endif
+
 static const struct iommu_ops arm_smmu_iommu_ops = {
     .page_sizes = PAGE_SIZE_4K,
     .init = arm_smmu_iommu_domain_init,
@@ -2960,6 +2967,9 @@ static const struct iommu_ops arm_smmu_iommu_ops = {
     .map_page = arm_iommu_map_page,
     .unmap_page = arm_iommu_unmap_page,
     .dt_xlate = arm_smmu_dt_xlate_generic,
+#ifdef CONFIG_SYSTEM_SUSPEND
+    .suspend = arm_smmu_suspend,
+#endif
 };
 
 static struct arm_smmu_device *find_smmu(const struct device *dev)
-- 
2.43.0
Re: [PATCH v8 13/13] xen/arm: Add support for system suspend triggered by hardware domain
Posted by Jan Beulich 5 days, 18 hours ago
On 02.04.2026 12:45, Mykola Kvach wrote:
> +/* Xen suspend. data identifies the domain that initiated suspend. */
> +static void system_suspend(void *data)
> +{
> +    int status;
> +    unsigned long flags;
> +    struct domain *d = (struct domain *)data;
> +
> +    BUG_ON(system_state != SYS_STATE_active);
> +
> +    system_state = SYS_STATE_suspend;
> +
> +    printk("Xen suspending...\n");
> +
> +    freeze_domains();
> +    scheduler_disable();
> +
> +    status = can_system_suspend();
> +    if ( status )
> +    {
> +        system_state = SYS_STATE_resume;
> +        goto resume_scheduler;
> +    }
> +
> +    /*
> +     * Non-boot CPUs have to be disabled on suspend and enabled on resume
> +     * (hotplug-based mechanism). Disabling non-boot CPUs will lead to PSCI
> +     * CPU_OFF to be called by each non-boot CPU. Depending on the underlying
> +     * platform capabilities, this may lead to the physical powering down of
> +     * CPUs.
> +     */
> +    status = disable_nonboot_cpus();
> +    if ( status )
> +    {
> +        system_state = SYS_STATE_resume;
> +        goto resume_nonboot_cpus;
> +    }
> +
> +    time_suspend();
> +
> +    status = iommu_suspend();
> +    if ( status )
> +    {
> +        system_state = SYS_STATE_resume;
> +        goto resume_time;
> +    }

So you've frozen the system just to get ...

> --- a/xen/drivers/passthrough/arm/smmu.c
> +++ b/xen/drivers/passthrough/arm/smmu.c
> @@ -2947,6 +2947,13 @@ static void arm_smmu_iommu_domain_teardown(struct domain *d)
>  	xfree(xen_domain);
>  }
>  
> +#ifdef CONFIG_SYSTEM_SUSPEND
> +static int arm_smmu_suspend(void)
> +{
> +	return -ENOSYS;
> +}
> +#endif

... unconditional failure from here?

Also, ENOSYS is clearly inappropriate to use here. EOPNOTSUPP or something yet
better distinguishable, please (if this can't be dropped altogether).

Jan