From: Mirela Simonovic <mirela.simonovic@aggios.com>
Trigger Xen suspend when the hardware domain initiates suspend via
SHUTDOWN_suspend. Redirect system suspend to CPU#0 to ensure the
suspend logic runs on the boot CPU, as required.
Introduce full suspend/resume infrastructure gated by CONFIG_SYSTEM_SUSPEND,
including logic to:
- disable and enable non-boot physical CPUs
- freeze and thaw domains
- suspend and resume the GIC, timer, iommu and console
- maintain system state before and after suspend
On boot, init_ttbr is normally initialized during secondary CPU hotplug.
On uniprocessor systems, this would leave init_ttbr uninitialized,
causing resume to fail. To address this, the boot CPU now sets init_ttbr
during suspend.
Remove the restriction in the vPSCI interface preventing suspend from the
hardware domain.
Select HAS_SYSTEM_SUSPEND for ARM_64.
Introduce CONFIG_HAS_HWDOM_SYSTEM_SUSPEND as an architecture-selected
capability for platforms where the hardware domain survives
SHUTDOWN_suspend without hwdom_shutdown(). ARM_64 selects it with
SYSTEM_SUSPEND enabled; other architectures keep the existing behaviour.
Note: the code is behind CONFIG_HAS_SYSTEM_SUSPEND, which is currently only
selected when UNSUPPORTED is set and when MPU isn't set, so the functionality
is built but disabled by default.
Signed-off-by: Mirela Simonovic <mirela.simonovic@aggios.com>
Signed-off-by: Saeed Nowshadi <saeed.nowshadi@xilinx.com>
Signed-off-by: Mykyta Poturai <mykyta_poturai@epam.com>
Signed-off-by: Mykola Kvach <mykola_kvach@epam.com>
---
Changes in V8:
- Add a pre-suspend check in system_suspend() after scheduler_disable()
to require all domains to be in the shut down state with
SHUTDOWN_suspend before proceeding with the global suspend flow.
- Drop the common-level depends on !ARM_64 || !SYSTEM_SUSPEND from
CONFIG_HAS_HWDOM_SHUTDOWN_ON_SUSPEND and model the ARM64 suspend
case with an arch-selected capability instead.
- Rename CONFIG_HAS_HWDOM_SHUTDOWN_ON_SUSPEND to CONFIG_HAS_HWDOM_SYSTEM_SUSPEND.
- Rename need_hwdom_shutdown() to want_hwdom_shutdown().
Changes in V7:
- Control domain is responsible for host suspend
- Move the is_hardware_domain check into host_system_suspend()
- Add an empty inline host_system_suspend() function when SYSTEM_SUSPEND
config is disabled
- Use IS_ENABLED() for config checking instead of #ifdef
- Replace #ifdef checks in domain_shutdown() with IS_ENABLED() to simplify
control flow.
- Factor hardware domain shutdown condition into a helper
(need_hwdom_shutdown()) to avoid preprocessor directives inside the function.
- Squash with iommu suspend/resume commit
---
xen/arch/arm/Kconfig | 2 +
xen/arch/arm/include/asm/mm.h | 2 +
xen/arch/arm/include/asm/suspend.h | 7 +-
xen/arch/arm/mmu/smpboot.c | 2 +-
xen/arch/arm/suspend.c | 181 +++++++++++++++++++++++++++++
xen/arch/arm/vpsci.c | 12 +-
xen/common/Kconfig | 3 +
xen/common/domain.c | 7 +-
xen/drivers/passthrough/arm/smmu.c | 10 ++
9 files changed, 220 insertions(+), 6 deletions(-)
diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index 2f2b501fda..c2e63ce8ff 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -8,6 +8,8 @@ config ARM_64
depends on !ARM_32
select 64BIT
select HAS_FAST_MULTIPLY
+ select HAS_HWDOM_SYSTEM_SUSPEND if SYSTEM_SUSPEND
+ select HAS_SYSTEM_SUSPEND if !MPU && UNSUPPORTED
select HAS_VPCI_GUEST_SUPPORT if PCI_PASSTHROUGH
config ARM
diff --git a/xen/arch/arm/include/asm/mm.h b/xen/arch/arm/include/asm/mm.h
index 72a6928624..87b54a55dc 100644
--- a/xen/arch/arm/include/asm/mm.h
+++ b/xen/arch/arm/include/asm/mm.h
@@ -360,6 +360,8 @@ static inline void page_set_xenheap_gfn(struct page_info *p, gfn_t gfn)
} while ( (y = cmpxchg(&p->u.inuse.type_info, x, nx)) != x );
}
+void set_init_ttbr(lpae_t *root);
+
#endif /* __ARCH_ARM_MM__ */
/*
* Local variables:
diff --git a/xen/arch/arm/include/asm/suspend.h b/xen/arch/arm/include/asm/suspend.h
index c127fa3d78..c36ba23b10 100644
--- a/xen/arch/arm/include/asm/suspend.h
+++ b/xen/arch/arm/include/asm/suspend.h
@@ -38,7 +38,12 @@ extern struct cpu_context cpu_context;
int prepare_resume_ctx(struct cpu_context *ptr);
void hyp_resume(void);
-#endif /* CONFIG_SYSTEM_SUSPEND */
+void host_system_suspend(struct domain *d);
+
+#else /* !CONFIG_SYSTEM_SUSPEND */
+
+static inline void host_system_suspend(struct domain *d) { (void)d; }
+#endif
#endif /* ARM_SUSPEND_H */
diff --git a/xen/arch/arm/mmu/smpboot.c b/xen/arch/arm/mmu/smpboot.c
index 37e91d72b7..ff508ecf40 100644
--- a/xen/arch/arm/mmu/smpboot.c
+++ b/xen/arch/arm/mmu/smpboot.c
@@ -72,7 +72,7 @@ static void clear_boot_pagetables(void)
clear_table(boot_third);
}
-static void set_init_ttbr(lpae_t *root)
+void set_init_ttbr(lpae_t *root)
{
/*
* init_ttbr is part of the identity mapping which is read-only. So
diff --git a/xen/arch/arm/suspend.c b/xen/arch/arm/suspend.c
index e38566b0b7..4d1289776b 100644
--- a/xen/arch/arm/suspend.c
+++ b/xen/arch/arm/suspend.c
@@ -1,9 +1,190 @@
/* SPDX-License-Identifier: GPL-2.0-only */
+#include <asm/psci.h>
#include <asm/suspend.h>
+#include <public/sched.h>
+#include <xen/console.h>
+#include <xen/cpu.h>
+#include <xen/errno.h>
+#include <xen/iommu.h>
+#include <xen/sched.h>
+#include <xen/tasklet.h>
+
struct cpu_context cpu_context = {};
+static int can_system_suspend(void)
+{
+ int ret = 0;
+ struct domain *d;
+
+ rcu_read_lock(&domlist_read_lock);
+
+ for_each_domain ( d )
+ {
+ bool domain_suspended;
+
+ spin_lock(&d->shutdown_lock);
+ domain_suspended = d->is_shut_down &&
+ d->shutdown_code == SHUTDOWN_suspend;
+ spin_unlock(&d->shutdown_lock);
+
+ if ( domain_suspended )
+ continue;
+
+ printk(XENLOG_ERR
+ "System suspend requires all domains to be shut down for suspend (dom%d: isn't in suspend state)\n",
+ d->domain_id);
+
+ ret = -EBUSY;
+ break;
+ }
+
+ rcu_read_unlock(&domlist_read_lock);
+
+ return ret;
+}
+
+/* Xen suspend. data identifies the domain that initiated suspend. */
+static void system_suspend(void *data)
+{
+ int status;
+ unsigned long flags;
+ struct domain *d = (struct domain *)data;
+
+ BUG_ON(system_state != SYS_STATE_active);
+
+ system_state = SYS_STATE_suspend;
+
+ printk("Xen suspending...\n");
+
+ freeze_domains();
+ scheduler_disable();
+
+ status = can_system_suspend();
+ if ( status )
+ {
+ system_state = SYS_STATE_resume;
+ goto resume_scheduler;
+ }
+
+ /*
+ * Non-boot CPUs have to be disabled on suspend and enabled on resume
+ * (hotplug-based mechanism). Disabling non-boot CPUs will lead to PSCI
+ * CPU_OFF to be called by each non-boot CPU. Depending on the underlying
+ * platform capabilities, this may lead to the physical powering down of
+ * CPUs.
+ */
+ status = disable_nonboot_cpus();
+ if ( status )
+ {
+ system_state = SYS_STATE_resume;
+ goto resume_nonboot_cpus;
+ }
+
+ time_suspend();
+
+ status = iommu_suspend();
+ if ( status )
+ {
+ system_state = SYS_STATE_resume;
+ goto resume_time;
+ }
+
+ console_start_sync();
+ status = console_suspend();
+ if ( status )
+ {
+ dprintk(XENLOG_ERR, "Failed to suspend the console, err=%d\n", status);
+ system_state = SYS_STATE_resume;
+ goto resume_end_sync;
+ }
+
+ local_irq_save(flags);
+ status = gic_suspend();
+ if ( status )
+ {
+ system_state = SYS_STATE_resume;
+ goto resume_irqs;
+ }
+
+ set_init_ttbr(xen_pgtable);
+
+ /*
+ * Enable identity mapping before entering suspend to simplify
+ * the resume path
+ */
+ update_boot_mapping(true);
+
+ if ( prepare_resume_ctx(&cpu_context) )
+ {
+ status = call_psci_system_suspend();
+ /*
+ * If suspend is finalized properly by above system suspend PSCI call,
+ * the code below in this 'if' branch will never execute. Execution
+ * will continue from hyp_resume which is the hypervisor's resume point.
+ * In hyp_resume CPU context will be restored and since link-register is
+ * restored as well, it will appear to return from prepare_resume_ctx.
+ * The difference in returning from prepare_resume_ctx on system suspend
+ * versus resume is in function's return value: on suspend, the return
+ * value is a non-zero value, on resume it is zero. That is why the
+ * control flow will not re-enter this 'if' branch on resume.
+ */
+ if ( status )
+ dprintk(XENLOG_WARNING, "PSCI system suspend failed, err=%d\n",
+ status);
+ }
+
+ system_state = SYS_STATE_resume;
+ update_boot_mapping(false);
+
+ gic_resume();
+
+ resume_irqs:
+ local_irq_restore(flags);
+
+ console_resume();
+ resume_end_sync:
+ console_end_sync();
+
+ iommu_resume();
+
+ resume_time:
+ time_resume();
+
+ resume_nonboot_cpus:
+ /*
+ * The rcu_barrier() has to be added to ensure that the per cpu area is
+ * freed before a non-boot CPU tries to initialize it (_free_percpu_area()
+ * has to be called before the init_percpu_area()). This scenario occurs
+ * when non-boot CPUs are hot-unplugged on suspend and hotplugged on resume.
+ */
+ rcu_barrier();
+ enable_nonboot_cpus();
+
+ resume_scheduler:
+ scheduler_enable();
+ thaw_domains();
+
+ system_state = SYS_STATE_active;
+
+ printk("Resume (status %d)\n", status);
+
+ domain_resume(d);
+}
+
+static DECLARE_TASKLET(system_suspend_tasklet, system_suspend, NULL);
+
+void host_system_suspend(struct domain *d)
+{
+ system_suspend_tasklet.data = (void *)d;
+ /*
+ * The suspend procedure has to be finalized by the pCPU#0 (non-boot pCPUs
+ * will be disabled during the suspend).
+ */
+ tasklet_schedule_on_cpu(&system_suspend_tasklet, 0);
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/arch/arm/vpsci.c b/xen/arch/arm/vpsci.c
index bd87ec430d..8fb9172186 100644
--- a/xen/arch/arm/vpsci.c
+++ b/xen/arch/arm/vpsci.c
@@ -5,6 +5,7 @@
#include <asm/current.h>
#include <asm/domain.h>
+#include <asm/suspend.h>
#include <asm/vgic.h>
#include <asm/vpsci.h>
#include <asm/event.h>
@@ -232,8 +233,7 @@ static int32_t do_psci_1_0_system_suspend(register_t epoint, register_t cid)
if ( is_64bit_domain(d) && is_thumb )
return PSCI_INVALID_ADDRESS;
- /* SYSTEM_SUSPEND is not supported for the hardware domain yet */
- if ( is_hardware_domain(d) )
+ if ( !IS_ENABLED(CONFIG_SYSTEM_SUSPEND) && is_hardware_domain(d) )
return PSCI_NOT_SUPPORTED;
/* Ensure that all CPUs other than the calling one are offline */
@@ -266,6 +266,9 @@ static int32_t do_psci_1_0_system_suspend(register_t epoint, register_t cid)
"SYSTEM_SUSPEND requested, epoint=%#"PRIregister", cid=%#"PRIregister"\n",
epoint, cid);
+ if ( is_control_domain(d) )
+ host_system_suspend(d);
+
return rc;
}
@@ -290,7 +293,10 @@ static int32_t do_psci_1_0_features(uint32_t psci_func_id)
return 0;
case PSCI_1_0_FN32_SYSTEM_SUSPEND:
case PSCI_1_0_FN64_SYSTEM_SUSPEND:
- return is_hardware_domain(current->domain) ? PSCI_NOT_SUPPORTED : 0;
+ if ( IS_ENABLED(CONFIG_SYSTEM_SUSPEND) ||
+ !is_hardware_domain(current->domain) )
+ return 0;
+ fallthrough;
default:
return PSCI_NOT_SUPPORTED;
}
diff --git a/xen/common/Kconfig b/xen/common/Kconfig
index 0a20aa0a12..feb1336f46 100644
--- a/xen/common/Kconfig
+++ b/xen/common/Kconfig
@@ -137,6 +137,9 @@ config HAS_EX_TABLE
config HAS_FAST_MULTIPLY
bool
+config HAS_HWDOM_SYSTEM_SUSPEND
+ bool
+
config HAS_IOPORTS
bool
diff --git a/xen/common/domain.c b/xen/common/domain.c
index bb9e210c28..d3edfb2a13 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -1375,6 +1375,11 @@ void __domain_crash(struct domain *d)
domain_shutdown(d, SHUTDOWN_crash);
}
+static inline bool want_hwdom_shutdown(uint8_t reason)
+{
+ return !IS_ENABLED(CONFIG_HAS_HWDOM_SYSTEM_SUSPEND) ||
+ reason != SHUTDOWN_suspend;
+}
int domain_shutdown(struct domain *d, u8 reason)
{
@@ -1391,7 +1396,7 @@ int domain_shutdown(struct domain *d, u8 reason)
d->shutdown_code = reason;
reason = d->shutdown_code;
- if ( is_hardware_domain(d) )
+ if ( is_hardware_domain(d) && want_hwdom_shutdown(reason) )
hwdom_shutdown(reason);
if ( d->is_shutting_down )
diff --git a/xen/drivers/passthrough/arm/smmu.c b/xen/drivers/passthrough/arm/smmu.c
index 22d306d0cb..45f29ef8ec 100644
--- a/xen/drivers/passthrough/arm/smmu.c
+++ b/xen/drivers/passthrough/arm/smmu.c
@@ -2947,6 +2947,13 @@ static void arm_smmu_iommu_domain_teardown(struct domain *d)
xfree(xen_domain);
}
+#ifdef CONFIG_SYSTEM_SUSPEND
+static int arm_smmu_suspend(void)
+{
+ return -ENOSYS;
+}
+#endif
+
static const struct iommu_ops arm_smmu_iommu_ops = {
.page_sizes = PAGE_SIZE_4K,
.init = arm_smmu_iommu_domain_init,
@@ -2960,6 +2967,9 @@ static const struct iommu_ops arm_smmu_iommu_ops = {
.map_page = arm_iommu_map_page,
.unmap_page = arm_iommu_unmap_page,
.dt_xlate = arm_smmu_dt_xlate_generic,
+#ifdef CONFIG_SYSTEM_SUSPEND
+ .suspend = arm_smmu_suspend,
+#endif
};
static struct arm_smmu_device *find_smmu(const struct device *dev)
--
2.43.0
On 02.04.2026 12:45, Mykola Kvach wrote:
> +/* Xen suspend. data identifies the domain that initiated suspend. */
> +static void system_suspend(void *data)
> +{
> + int status;
> + unsigned long flags;
> + struct domain *d = (struct domain *)data;
> +
> + BUG_ON(system_state != SYS_STATE_active);
> +
> + system_state = SYS_STATE_suspend;
> +
> + printk("Xen suspending...\n");
> +
> + freeze_domains();
> + scheduler_disable();
> +
> + status = can_system_suspend();
> + if ( status )
> + {
> + system_state = SYS_STATE_resume;
> + goto resume_scheduler;
> + }
> +
> + /*
> + * Non-boot CPUs have to be disabled on suspend and enabled on resume
> + * (hotplug-based mechanism). Disabling non-boot CPUs will lead to PSCI
> + * CPU_OFF to be called by each non-boot CPU. Depending on the underlying
> + * platform capabilities, this may lead to the physical powering down of
> + * CPUs.
> + */
> + status = disable_nonboot_cpus();
> + if ( status )
> + {
> + system_state = SYS_STATE_resume;
> + goto resume_nonboot_cpus;
> + }
> +
> + time_suspend();
> +
> + status = iommu_suspend();
> + if ( status )
> + {
> + system_state = SYS_STATE_resume;
> + goto resume_time;
> + }
So you've frozen the system just to get ...
> --- a/xen/drivers/passthrough/arm/smmu.c
> +++ b/xen/drivers/passthrough/arm/smmu.c
> @@ -2947,6 +2947,13 @@ static void arm_smmu_iommu_domain_teardown(struct domain *d)
> xfree(xen_domain);
> }
>
> +#ifdef CONFIG_SYSTEM_SUSPEND
> +static int arm_smmu_suspend(void)
> +{
> + return -ENOSYS;
> +}
> +#endif
... unconditional failure from here?
Also, ENOSYS is clearly inappropriate to use here. EOPNOTSUPP or something yet
better distinguishable, please (if this can't be dropped altogether).
Jan
© 2016 - 2026 Red Hat, Inc.