SME2 support adds the following state for HVF guests:
- Vector registers Z0, ... , Z31 (introduced by FEAT_SVE but HVF does
not support it)
- Predicate registers P0, .., P15 (also FEAT_SVE)
- ZA register
- ZT0 register
- PSTATE.{SM,ZA} bits (SVCR pseudo-register)
- SMPRI_EL1 which handles the PE's priority in the SMCU
- TPIDR2_EL0 the thread local ID register for SME
Signed-off-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
---
target/arm/hvf/hvf.c | 293 +++++++++++++++++++++++++++++++++++++++++++-
target/arm/hvf/sysreg.c.inc | 10 ++
target/arm/hvf_arm.h | 46 +++++++
target/arm/machine.c | 2 +-
4 files changed, 348 insertions(+), 3 deletions(-)
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index fa26f63a61ad8fc2ffb675bd6d3c7fd21f1a9dae..42547b175c30c01c2b7e060dcc01d1e11efb0d1b 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -397,6 +397,62 @@ static const struct hvf_reg_match hvf_fpreg_match[] = {
{ HV_SIMD_FP_REG_Q31, offsetof(CPUARMState, vfp.zregs[31]) },
};
+#if HVF_SME2_AVAILABLE
+static const struct hvf_reg_match hvf_sme2_zreg_match[] = {
+ { HV_SME_Z_REG_0, offsetof(CPUARMState, vfp.zregs[0]) },
+ { HV_SME_Z_REG_1, offsetof(CPUARMState, vfp.zregs[1]) },
+ { HV_SME_Z_REG_2, offsetof(CPUARMState, vfp.zregs[2]) },
+ { HV_SME_Z_REG_3, offsetof(CPUARMState, vfp.zregs[3]) },
+ { HV_SME_Z_REG_4, offsetof(CPUARMState, vfp.zregs[4]) },
+ { HV_SME_Z_REG_5, offsetof(CPUARMState, vfp.zregs[5]) },
+ { HV_SME_Z_REG_6, offsetof(CPUARMState, vfp.zregs[6]) },
+ { HV_SME_Z_REG_7, offsetof(CPUARMState, vfp.zregs[7]) },
+ { HV_SME_Z_REG_8, offsetof(CPUARMState, vfp.zregs[8]) },
+ { HV_SME_Z_REG_9, offsetof(CPUARMState, vfp.zregs[9]) },
+ { HV_SME_Z_REG_10, offsetof(CPUARMState, vfp.zregs[10]) },
+ { HV_SME_Z_REG_11, offsetof(CPUARMState, vfp.zregs[11]) },
+ { HV_SME_Z_REG_12, offsetof(CPUARMState, vfp.zregs[12]) },
+ { HV_SME_Z_REG_13, offsetof(CPUARMState, vfp.zregs[13]) },
+ { HV_SME_Z_REG_14, offsetof(CPUARMState, vfp.zregs[14]) },
+ { HV_SME_Z_REG_15, offsetof(CPUARMState, vfp.zregs[15]) },
+ { HV_SME_Z_REG_16, offsetof(CPUARMState, vfp.zregs[16]) },
+ { HV_SME_Z_REG_17, offsetof(CPUARMState, vfp.zregs[17]) },
+ { HV_SME_Z_REG_18, offsetof(CPUARMState, vfp.zregs[18]) },
+ { HV_SME_Z_REG_19, offsetof(CPUARMState, vfp.zregs[19]) },
+ { HV_SME_Z_REG_20, offsetof(CPUARMState, vfp.zregs[20]) },
+ { HV_SME_Z_REG_21, offsetof(CPUARMState, vfp.zregs[21]) },
+ { HV_SME_Z_REG_22, offsetof(CPUARMState, vfp.zregs[22]) },
+ { HV_SME_Z_REG_23, offsetof(CPUARMState, vfp.zregs[23]) },
+ { HV_SME_Z_REG_24, offsetof(CPUARMState, vfp.zregs[24]) },
+ { HV_SME_Z_REG_25, offsetof(CPUARMState, vfp.zregs[25]) },
+ { HV_SME_Z_REG_26, offsetof(CPUARMState, vfp.zregs[26]) },
+ { HV_SME_Z_REG_27, offsetof(CPUARMState, vfp.zregs[27]) },
+ { HV_SME_Z_REG_28, offsetof(CPUARMState, vfp.zregs[28]) },
+ { HV_SME_Z_REG_29, offsetof(CPUARMState, vfp.zregs[29]) },
+ { HV_SME_Z_REG_30, offsetof(CPUARMState, vfp.zregs[30]) },
+ { HV_SME_Z_REG_31, offsetof(CPUARMState, vfp.zregs[31]) },
+};
+
+static const struct hvf_reg_match hvf_sme2_preg_match[] = {
+ { HV_SME_P_REG_0, offsetof(CPUARMState, vfp.pregs[0]) },
+ { HV_SME_P_REG_1, offsetof(CPUARMState, vfp.pregs[1]) },
+ { HV_SME_P_REG_2, offsetof(CPUARMState, vfp.pregs[2]) },
+ { HV_SME_P_REG_3, offsetof(CPUARMState, vfp.pregs[3]) },
+ { HV_SME_P_REG_4, offsetof(CPUARMState, vfp.pregs[4]) },
+ { HV_SME_P_REG_5, offsetof(CPUARMState, vfp.pregs[5]) },
+ { HV_SME_P_REG_6, offsetof(CPUARMState, vfp.pregs[6]) },
+ { HV_SME_P_REG_7, offsetof(CPUARMState, vfp.pregs[7]) },
+ { HV_SME_P_REG_8, offsetof(CPUARMState, vfp.pregs[8]) },
+ { HV_SME_P_REG_9, offsetof(CPUARMState, vfp.pregs[9]) },
+ { HV_SME_P_REG_10, offsetof(CPUARMState, vfp.pregs[10]) },
+ { HV_SME_P_REG_11, offsetof(CPUARMState, vfp.pregs[11]) },
+ { HV_SME_P_REG_12, offsetof(CPUARMState, vfp.pregs[12]) },
+ { HV_SME_P_REG_13, offsetof(CPUARMState, vfp.pregs[13]) },
+ { HV_SME_P_REG_14, offsetof(CPUARMState, vfp.pregs[14]) },
+ { HV_SME_P_REG_15, offsetof(CPUARMState, vfp.pregs[15]) },
+};
+#endif /* HVF_SME2_AVAILABLE */
+
/*
* QEMU uses KVM system register ids in the migration format.
* Conveniently, HVF uses the same encoding of the op* and cr* parameters
@@ -408,22 +464,203 @@ static const struct hvf_reg_match hvf_fpreg_match[] = {
#define HVF_TO_KVMID(HVF) \
(CP_REG_ARM64 | CP_REG_SIZE_U64 | CP_REG_ARM64_SYSREG | (HVF))
-/* Verify this at compile-time. */
+/*
+ * Verify this at compile-time.
+ *
+ * SME2 registers are guarded by a runtime availability attribute instead of a
+ * compile-time def, so verify those at runtime in hvf_arch_init_vcpu() below.
+ */
#define DEF_SYSREG(HVF_ID, ...) \
QEMU_BUILD_BUG_ON(HVF_ID != KVMID_TO_HVF(KVMID_AA64_SYS_REG64(__VA_ARGS__)));
+#define DEF_SYSREG_15_02(...)
#include "sysreg.c.inc"
#undef DEF_SYSREG
+#undef DEF_SYSREG_15_02
#define DEF_SYSREG(HVF_ID, op0, op1, crn, crm, op2) HVF_ID,
+#define DEF_SYSREG_15_02(...)
static const hv_sys_reg_t hvf_sreg_list[] = {
#include "sysreg.c.inc"
};
#undef DEF_SYSREG
+#undef DEF_SYSREG_15_02
+
+#define DEF_SYSREG(...)
+#define DEF_SYSREG_15_02(HVF_ID, op0, op1, crn, crm, op2) HVF_ID,
+
+API_AVAILABLE(macos(15.2))
+static const hv_sys_reg_t hvf_sreg_list_sme2[] = {
+#include "sysreg.c.inc"
+};
+
+#undef DEF_SYSREG
+#undef DEF_SYSREG_15_02
+
+/*
+ * For FEAT_SME2 migration, we need to store PSTATE.{SM,ZA} bits which are
+ * accessible with the SVCR pseudo-register. However, in the HVF API this is
+ * not exposed as a system-register (i.e. HVF_SYS_REG_SVCR) but a custom
+ * struct, hv_vcpu_sme_state_t. So we need to define our own KVMID in order to
+ * store it in cpreg_values and make it migrateable.
+ */
+#define SVCR KVMID_AA64_SYS_REG64(3, 3, 4, 2, 2)
+
+#if HVF_SME2_AVAILABLE
+API_AVAILABLE(macos(15.2))
+static void hvf_arch_put_sme(CPUState *cpu)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ CPUARMState *env = &arm_cpu->env;
+ const size_t svl_bytes = hvf_arm_sme2_get_svl();
+ const size_t z_size = svl_bytes;
+ const size_t preg_size = DIV_ROUND_UP(z_size, 8);
+ const size_t za_size = svl_bytes * svl_bytes;
+ hv_vcpu_sme_state_t sme_state = { 0 };
+ hv_return_t ret;
+ uint64_t svcr;
+ int n;
+
+ /*
+ * Set PSTATE.{SM,ZA} bits
+ */
+ svcr = arm_cpu->cpreg_values[arm_cpu->cpreg_array_len - 1];
+ env->svcr = svcr;
+
+ /*
+ * Construct SVCR (PSTATE.{SM,ZA}) state to pass to HVF:
+ */
+ sme_state.streaming_sve_mode_enabled = FIELD_EX64(env->svcr, SVCR, SM) > 0;
+ sme_state.za_storage_enabled = FIELD_EX64(env->svcr, SVCR, ZA) > 0;
+ ret = hv_vcpu_set_sme_state(cpu->accel->fd, &sme_state);
+ assert_hvf_ok(ret);
+
+ /*
+ * We only care about Z/P registers if we're in streaming SVE mode, i.e.
+ * PSTATE.SM is set, because only then can instructions that access them be
+ * used. We don't care about the register values otherwise. This is because
+ * when the processing unit exits/enters this mode, it zeroes out those
+ * registers.
+ */
+ if (sme_state.streaming_sve_mode_enabled) {
+ for (n = 0; n < ARRAY_SIZE(hvf_sme2_zreg_match); ++n) {
+ ret = hv_vcpu_set_sme_z_reg(cpu->accel->fd,
+ hvf_sme2_zreg_match[n].reg,
+ (uint8_t *)&env->vfp.zregs[n].d[0],
+ z_size);
+ assert_hvf_ok(ret);
+ }
+
+ for (n = 0; n < ARRAY_SIZE(hvf_sme2_preg_match); ++n) {
+ ret = hv_vcpu_set_sme_p_reg(cpu->accel->fd,
+ hvf_sme2_preg_match[n].reg,
+ (uint8_t *)&env->vfp.pregs[n].p[0],
+ preg_size);
+ assert_hvf_ok(ret);
+ }
+ }
+
+ /*
+ * If PSTATE.ZA bit is set then ZA and ZT0 are valid, otherwise they are
+ * zeroed out.
+ */
+ if (sme_state.za_storage_enabled) {
+ hv_sme_zt0_uchar64_t tmp = { 0 };
+
+ memcpy(&tmp, &env->za_state.zt0, 64);
+ ret = hv_vcpu_set_sme_zt0_reg(cpu->accel->fd, &tmp);
+ assert_hvf_ok(ret);
+
+ ret = hv_vcpu_set_sme_za_reg(cpu->accel->fd,
+ (uint8_t *)&env->za_state.za,
+ za_size);
+ assert_hvf_ok(ret);
+ }
+
+ return;
+}
+
+API_AVAILABLE(macos(15.2))
+static void hvf_arch_get_sme(CPUState *cpu)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ CPUARMState *env = &arm_cpu->env;
+ const size_t svl_bytes = hvf_arm_sme2_get_svl();
+ const size_t z_size = svl_bytes;
+ const size_t preg_size = DIV_ROUND_UP(z_size, 8);
+ const size_t za_size = svl_bytes * svl_bytes;
+ hv_vcpu_sme_state_t sme_state = { 0 };
+ hv_return_t ret;
+ uint64_t svcr;
+ int n;
+
+ /*
+ * Get SVCR (PSTATE.{SM,ZA}) state from HVF:
+ */
+ ret = hv_vcpu_get_sme_state(cpu->accel->fd, &sme_state);
+ assert_hvf_ok(ret);
+
+ /*
+ * Set SVCR first because changing it will zero out Z/P regs
+ */
+ svcr =
+ (sme_state.za_storage_enabled ? R_SVCR_ZA_MASK : 0)
+ | (sme_state.streaming_sve_mode_enabled ? R_SVCR_SM_MASK : 0);
+
+ aarch64_set_svcr(env, svcr, R_SVCR_ZA_MASK | R_SVCR_SM_MASK);
+ arm_cpu->cpreg_values[arm_cpu->cpreg_array_len - 1] = svcr;
+
+ /*
+ * We only care about Z/P registers if we're in streaming SVE mode, i.e.
+ * PSTATE.SM is set, because only then can instructions that access them be
+ * used. We don't care about the register values otherwise. This is because
+ * when the processing unit exits/enters this mode, it zeroes out those
+ * registers.
+ */
+ if (sme_state.streaming_sve_mode_enabled) {
+ for (n = 0; n < ARRAY_SIZE(hvf_sme2_zreg_match); ++n) {
+ ret = hv_vcpu_get_sme_z_reg(cpu->accel->fd,
+ hvf_sme2_zreg_match[n].reg,
+ (uint8_t *)&env->vfp.zregs[n].d[0],
+ z_size);
+ assert_hvf_ok(ret);
+ }
+
+ for (n = 0; n < ARRAY_SIZE(hvf_sme2_preg_match); ++n) {
+ ret = hv_vcpu_get_sme_p_reg(cpu->accel->fd,
+ hvf_sme2_preg_match[n].reg,
+ (uint8_t *)&env->vfp.pregs[n].p[0],
+ preg_size);
+ assert_hvf_ok(ret);
+ }
+ }
+
+ /*
+ * If PSTATE.ZA bit is set then ZA and ZT0 are valid, otherwise they are
+ * zeroed out.
+ */
+ if (sme_state.za_storage_enabled) {
+ hv_sme_zt0_uchar64_t tmp = { 0 };
+
+ /* Get ZT0 in a tmp vector, and then copy it to env.za_state.zt0 */
+ ret = hv_vcpu_get_sme_zt0_reg(cpu->accel->fd, &tmp);
+ assert_hvf_ok(ret);
+
+ memcpy(&env->za_state.zt0, &tmp, 64);
+ ret = hv_vcpu_get_sme_za_reg(cpu->accel->fd,
+ (uint8_t *)&env->za_state.za,
+ za_size);
+ assert_hvf_ok(ret);
+
+ }
+
+ return;
+}
+#endif /* HVF_SME2_AVAILABLE */
int hvf_arch_get_registers(CPUState *cpu)
{
@@ -465,6 +702,10 @@ int hvf_arch_get_registers(CPUState *cpu)
uint64_t kvm_id = arm_cpu->cpreg_indexes[i];
int hvf_id = KVMID_TO_HVF(kvm_id);
+ if (kvm_id == HVF_TO_KVMID(SVCR)) {
+ continue;
+ }
+
if (cpu->accel->guest_debug_enabled) {
/* Handle debug registers */
switch (hvf_id) {
@@ -558,6 +799,13 @@ int hvf_arch_get_registers(CPUState *cpu)
arm_cpu->cpreg_values[i] = val;
}
+#if HVF_SME2_AVAILABLE
+ if (__builtin_available(macOS 15.2, *)) {
+ if (cpu_isar_feature(aa64_sme, arm_cpu)) {
+ hvf_arch_get_sme(cpu);
+ }
+ }
+#endif /* HVF_SME2_AVAILABLE */
assert(write_list_to_cpustate(arm_cpu));
aarch64_restore_sp(env, arm_current_el(env));
@@ -603,6 +851,10 @@ int hvf_arch_put_registers(CPUState *cpu)
uint64_t kvm_id = arm_cpu->cpreg_indexes[i];
int hvf_id = KVMID_TO_HVF(kvm_id);
+ if (kvm_id == HVF_TO_KVMID(SVCR)) {
+ continue;
+ }
+
if (cpu->accel->guest_debug_enabled) {
/* Handle debug registers */
switch (hvf_id) {
@@ -687,6 +939,13 @@ int hvf_arch_put_registers(CPUState *cpu)
ret = hv_vcpu_set_vtimer_offset(cpu->accel->fd, hvf_state->vtimer_offset);
assert_hvf_ok(ret);
+#if HVF_SME2_AVAILABLE
+ if (__builtin_available(macOS 15.2, *)) {
+ if (cpu_isar_feature(aa64_sme, arm_cpu)) {
+ hvf_arch_put_sme(cpu);
+ }
+ }
+#endif /* HVF_SME2_AVAILABLE */
return 0;
}
@@ -909,6 +1168,18 @@ int hvf_arch_init_vcpu(CPUState *cpu)
hv_return_t ret;
int i;
+ if (__builtin_available(macOS 15.2, *)) {
+ sregs_match_len += ARRAY_SIZE(hvf_sreg_list_sme2) + 1;
+
+#define DEF_SYSREG_15_02(HVF_ID, ...) \
+ g_assert(HVF_ID == KVMID_TO_HVF(KVMID_AA64_SYS_REG64(__VA_ARGS__)));
+#define DEF_SYSREG(...)
+
+#include "sysreg.c.inc"
+
+#undef DEF_SYSREG
+#undef DEF_SYSREG_15_02
+ }
env->aarch64 = true;
asm volatile("mrs %0, cntfrq_el0" : "=r"(arm_cpu->gt_cntfrq_hz));
@@ -927,7 +1198,7 @@ int hvf_arch_init_vcpu(CPUState *cpu)
memset(arm_cpu->cpreg_values, 0, sregs_match_len * sizeof(uint64_t));
/* Populate cp list for all known sysregs */
- for (i = 0; i < sregs_match_len; i++) {
+ for (i = 0; i < ARRAY_SIZE(hvf_sreg_list); i++) {
hv_sys_reg_t hvf_id = hvf_sreg_list[i];
uint64_t kvm_id = HVF_TO_KVMID(hvf_id);
uint32_t key = kvm_to_cpreg_id(kvm_id);
@@ -938,6 +1209,24 @@ int hvf_arch_init_vcpu(CPUState *cpu)
arm_cpu->cpreg_indexes[sregs_cnt++] = kvm_id;
}
}
+ if (__builtin_available(macOS 15.2, *)) {
+ for (i = 0; i < ARRAY_SIZE(hvf_sreg_list_sme2); i++) {
+ hv_sys_reg_t hvf_id = hvf_sreg_list_sme2[i];
+ uint64_t kvm_id = HVF_TO_KVMID(hvf_id);
+ uint32_t key = kvm_to_cpreg_id(kvm_id);
+ const ARMCPRegInfo *ri = get_arm_cp_reginfo(arm_cpu->cp_regs, key);
+
+ if (ri) {
+ assert(!(ri->type & ARM_CP_NO_RAW));
+ arm_cpu->cpreg_indexes[sregs_cnt++] = kvm_id;
+ }
+ }
+ /*
+ * Add SVCR last. It is elsewhere assumed its index is after
+ * hvf_sreg_list and hvf_sreg_list_sme2.
+ */
+ arm_cpu->cpreg_indexes[sregs_cnt++] = HVF_TO_KVMID(SVCR);
+ }
arm_cpu->cpreg_array_len = sregs_cnt;
arm_cpu->cpreg_vmstate_array_len = sregs_cnt;
diff --git a/target/arm/hvf/sysreg.c.inc b/target/arm/hvf/sysreg.c.inc
index 067a8603fa785593ed0879cea26d036b0ec2823e..fb973ec19b747b445b57d7fc15e8d0a05336f941 100644
--- a/target/arm/hvf/sysreg.c.inc
+++ b/target/arm/hvf/sysreg.c.inc
@@ -145,3 +145,13 @@ DEF_SYSREG(HV_SYS_REG_TPIDRRO_EL0, 3, 3, 13, 0, 3)
DEF_SYSREG(HV_SYS_REG_CNTV_CTL_EL0, 3, 3, 14, 3, 1)
DEF_SYSREG(HV_SYS_REG_CNTV_CVAL_EL0, 3, 3, 14, 3, 2)
DEF_SYSREG(HV_SYS_REG_SP_EL1, 3, 4, 4, 1, 0)
+
+#if HVF_SME2_AVAILABLE
+DEF_SYSREG_15_02(HV_SYS_REG_SMCR_EL1, 3, 0, 1, 2, 6)
+DEF_SYSREG_15_02(HV_SYS_REG_SMPRI_EL1, 3, 0, 1, 2, 4)
+DEF_SYSREG_15_02(HV_SYS_REG_TPIDR2_EL0, 3, 3, 13, 0, 5)
+DEF_SYSREG_15_02(HV_SYS_REG_ID_AA64ZFR0_EL1, 3, 0, 0, 4, 4)
+DEF_SYSREG_15_02(HV_SYS_REG_ID_AA64SMFR0_EL1, 3, 0, 0, 4, 5)
+DEF_SYSREG_15_02(HV_SYS_REG_SMPRI_EL1, 3, 0, 1, 2, 4)
+DEF_SYSREG_15_02(HV_SYS_REG_SMCR_EL1, 3, 0, 1, 2, 6)
+#endif /* HVF_SME2_AVAILABLE */
diff --git a/target/arm/hvf_arm.h b/target/arm/hvf_arm.h
index ea82f2691dfcce780d9ab7d580c8a7946e3abaa7..adb282f02d0fc059b6ff0ce5270ad9f447f37927 100644
--- a/target/arm/hvf_arm.h
+++ b/target/arm/hvf_arm.h
@@ -13,6 +13,14 @@
#include "target/arm/cpu-qom.h"
+#ifdef __MAC_OS_X_VERSION_MAX_ALLOWED
+ #define HVF_SME2_AVAILABLE (__MAC_OS_X_VERSION_MAX_ALLOWED >= 150200)
+ #include "system/hvf_int.h"
+#else
+ #define HVF_SME2_AVAILABLE 0
+#endif
+
+
/**
* hvf_arm_init_debug() - initialize guest debug capabilities
*
@@ -25,4 +33,42 @@ void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu);
uint32_t hvf_arm_get_default_ipa_bit_size(void);
uint32_t hvf_arm_get_max_ipa_bit_size(void);
+#if HVF_SME2_AVAILABLE
+static inline bool hvf_arm_sme2_supported(void)
+{
+ if (__builtin_available(macOS 15.2, *)) {
+ size_t svl_bytes;
+ hv_return_t result = hv_sme_config_get_max_svl_bytes(&svl_bytes);
+ if (result == HV_UNSUPPORTED) {
+ return false;
+ }
+ assert_hvf_ok(result);
+ return svl_bytes > 0;
+ } else {
+ return false;
+ }
+}
+
+static inline uint32_t hvf_arm_sme2_get_svl(void)
+{
+ if (__builtin_available(macOS 15.2, *)) {
+ size_t svl_bytes;
+ hv_return_t result = hv_sme_config_get_max_svl_bytes(&svl_bytes);
+ assert_hvf_ok(result);
+ return svl_bytes;
+ } else {
+ abort();
+ }
+}
+#else /* HVF_SME2_AVAILABLE */
+static inline bool hvf_arm_sme2_supported(void)
+{
+ return false;
+}
+static inline uint32_t hvf_arm_sme2_get_svl(void)
+{
+ abort();
+}
+#endif /* HVF_SME2_AVAILABLE */
+
#endif
diff --git a/target/arm/machine.c b/target/arm/machine.c
index 0befdb0b28ad3f45b0bd83575dd9fd4fecaf3db1..7e3339081bf1f9f57bb0514d795c8a4254975c6a 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -231,7 +231,7 @@ static bool sve_needed(void *opaque)
{
ARMCPU *cpu = opaque;
- return cpu_isar_feature(aa64_sve, cpu);
+ return cpu_isar_feature(aa64_sve, cpu) || cpu_isar_feature(aa64_sme, cpu);
}
/* The first two words of each Zreg is stored in VFP state. */
--
2.47.3
Hi Manos!
On 14.01.26 09:35, Manos Pitsidianakis wrote:
> SME2 support adds the following state for HVF guests:
>
> - Vector registers Z0, ... , Z31 (introduced by FEAT_SVE but HVF does
> not support it)
> - Predicate registers P0, .., P15 (also FEAT_SVE)
> - ZA register
> - ZT0 register
> - PSTATE.{SM,ZA} bits (SVCR pseudo-register)
> - SMPRI_EL1 which handles the PE's priority in the SMCU
> - TPIDR2_EL0 the thread local ID register for SME
>
> Signed-off-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
Thanks a lot for the patches. I don't have an M4/M5 handy (yet), so I
can't test the code works correctly. But it sounds like you did that, so
I have no concerns on functionality.
However, I have concerns on a few maintainability aspects. You #ifdef
out a lot of code conditionally on the target macOS version. Any of that
code that is in ifdef may or may not end up getting compiled in CI or
other test builds, which means you are missing out on a lot of
compilation test coverage. As a general rule of thumb, please reduce
#ifdef to the bare minimum.
One thing I like to do (when possible) is to use the ifdef to define a
global const variable or an inline function. That way the compiler's
dead code analysis will eliminate the non-active aspects of your code,
but all previous compiler phases still run which means you get syntax
checks.
How much of the code down here really does require #ifdefs? And if it's
a lot, maybe we just bump the minimum required macOS version instead.
Alex
On Wed, Jan 14, 2026 at 1:19 PM Alexander Graf <agraf@csgraf.de> wrote:
>
> Hi Manos!
>
> On 14.01.26 09:35, Manos Pitsidianakis wrote:
> > SME2 support adds the following state for HVF guests:
> >
> > - Vector registers Z0, ... , Z31 (introduced by FEAT_SVE but HVF does
> > not support it)
> > - Predicate registers P0, .., P15 (also FEAT_SVE)
> > - ZA register
> > - ZT0 register
> > - PSTATE.{SM,ZA} bits (SVCR pseudo-register)
> > - SMPRI_EL1 which handles the PE's priority in the SMCU
> > - TPIDR2_EL0 the thread local ID register for SME
> >
> > Signed-off-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
>
>
> Thanks a lot for the patches. I don't have an M4/M5 handy (yet), so I
> can't test the code works correctly. But it sounds like you did that, so
> I have no concerns on functionality.
Yes, I tested it on Linaro's M4 machine. Adding a functional test
using Arm's benchmark is possible, but I couldn't be bothered to
create vm image assets just for this :)
Note: testing migration is tricky because the CPU has to be in SME
mode for the Z/P registers to have meaningful values, so you need to
savevm while SME instructions are executed to get a useful migration
state. I modified Arm's benchmark to compare the SME matrix
calculations against the non-SME calculation to ensure they are the
same and I savevm/loadvm a bunch of times while it ran with added
printfs in the get/put methods to print whether SME was active or not.
>
> However, I have concerns on a few maintainability aspects. You #ifdef
> out a lot of code conditionally on the target macOS version. Any of that
> code that is in ifdef may or may not end up getting compiled in CI or
> other test builds, which means you are missing out on a lot of
> compilation test coverage. As a general rule of thumb, please reduce
> #ifdef to the bare minimum.
I agree completely, the problem is two-fold here: the HVF exposes APIs
with API_AVAILABLE(...) (clang's availability attribute) and also with
plain ifdefs (__MAC_OS_X_VERSION_MAX_ALLOWED >= 150200), and
specifically the HV_SME_FOO_REG variants, which cannot be protected
with the availability attribute. )
So every time an SME type such as hv_vcpu_sme_state_t is used, it has
to be guarded :/ __builtin_available(...) check is not sufficient
because it will not compile due to undefined types.
>
> One thing I like to do (when possible) is to use the ifdef to define a
> global const variable or an inline function. That way the compiler's
> dead code analysis will eliminate the non-active aspects of your code,
> but all previous compiler phases still run which means you get syntax
> checks.
>
> How much of the code down here really does require #ifdefs? And if it's
> a lot, maybe we just bump the minimum required macOS version instead.
>
The missing type definitions and missing function declarations will
still raise compiler errors unfortunately. We could add those missing
type definitions as stubs if they are not defined so that it compiles,
but I chose to ifdef instead. What would you prefer?
>
> Alex
>
--
Manos Pitsidianakis
Emulation and Virtualization Engineer at Linaro Ltd
On 14.01.26 12:51, Manos Pitsidianakis wrote:
> On Wed, Jan 14, 2026 at 1:19 PM Alexander Graf <agraf@csgraf.de> wrote:
>> Hi Manos!
>>
>> On 14.01.26 09:35, Manos Pitsidianakis wrote:
>>> SME2 support adds the following state for HVF guests:
>>>
>>> - Vector registers Z0, ... , Z31 (introduced by FEAT_SVE but HVF does
>>> not support it)
>>> - Predicate registers P0, .., P15 (also FEAT_SVE)
>>> - ZA register
>>> - ZT0 register
>>> - PSTATE.{SM,ZA} bits (SVCR pseudo-register)
>>> - SMPRI_EL1 which handles the PE's priority in the SMCU
>>> - TPIDR2_EL0 the thread local ID register for SME
>>>
>>> Signed-off-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
>>
>> Thanks a lot for the patches. I don't have an M4/M5 handy (yet), so I
>> can't test the code works correctly. But it sounds like you did that, so
>> I have no concerns on functionality.
> Yes, I tested it on Linaro's M4 machine. Adding a functional test
> using Arm's benchmark is possible, but I couldn't be bothered to
> create vm image assets just for this :)
>
> Note: testing migration is tricky because the CPU has to be in SME
> mode for the Z/P registers to have meaningful values, so you need to
> savevm while SME instructions are executed to get a useful migration
> state. I modified Arm's benchmark to compare the SME matrix
> calculations against the non-SME calculation to ensure they are the
> same and I savevm/loadvm a bunch of times while it ran with added
> printfs in the get/put methods to print whether SME was active or not.
>
>> However, I have concerns on a few maintainability aspects. You #ifdef
>> out a lot of code conditionally on the target macOS version. Any of that
>> code that is in ifdef may or may not end up getting compiled in CI or
>> other test builds, which means you are missing out on a lot of
>> compilation test coverage. As a general rule of thumb, please reduce
>> #ifdef to the bare minimum.
> I agree completely, the problem is two-fold here: the HVF exposes APIs
> with API_AVAILABLE(...) (clang's availability attribute) and also with
> plain ifdefs (__MAC_OS_X_VERSION_MAX_ALLOWED >= 150200), and
> specifically the HV_SME_FOO_REG variants, which cannot be protected
> with the availability attribute. )
>
> So every time an SME type such as hv_vcpu_sme_state_t is used, it has
> to be guarded :/ __builtin_available(...) check is not sufficient
> because it will not compile due to undefined types.
>
>
>> One thing I like to do (when possible) is to use the ifdef to define a
>> global const variable or an inline function. That way the compiler's
>> dead code analysis will eliminate the non-active aspects of your code,
>> but all previous compiler phases still run which means you get syntax
>> checks.
>>
>> How much of the code down here really does require #ifdefs? And if it's
>> a lot, maybe we just bump the minimum required macOS version instead.
>>
> The missing type definitions and missing function declarations will
> still raise compiler errors unfortunately. We could add those missing
> type definitions as stubs if they are not defined so that it compiles,
> but I chose to ifdef instead. What would you prefer?
How about you create a separate #include'd header that provides stubs
and defines the same way the original headers would? You can #ifdef your
way inside there all you want. And then you write the generic code with
the base assumption that all definitions are available.
Alex
On Wed, Jan 14, 2026 at 4:14 PM Alexander Graf <agraf@csgraf.de> wrote:
>
>
> On 14.01.26 12:51, Manos Pitsidianakis wrote:
> > On Wed, Jan 14, 2026 at 1:19 PM Alexander Graf <agraf@csgraf.de> wrote:
> >> Hi Manos!
> >>
> >> On 14.01.26 09:35, Manos Pitsidianakis wrote:
> >>> SME2 support adds the following state for HVF guests:
> >>>
> >>> - Vector registers Z0, ... , Z31 (introduced by FEAT_SVE but HVF does
> >>> not support it)
> >>> - Predicate registers P0, .., P15 (also FEAT_SVE)
> >>> - ZA register
> >>> - ZT0 register
> >>> - PSTATE.{SM,ZA} bits (SVCR pseudo-register)
> >>> - SMPRI_EL1 which handles the PE's priority in the SMCU
> >>> - TPIDR2_EL0 the thread local ID register for SME
> >>>
> >>> Signed-off-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
> >>
> >> Thanks a lot for the patches. I don't have an M4/M5 handy (yet), so I
> >> can't test the code works correctly. But it sounds like you did that, so
> >> I have no concerns on functionality.
> > Yes, I tested it on Linaro's M4 machine. Adding a functional test
> > using Arm's benchmark is possible, but I couldn't be bothered to
> > create vm image assets just for this :)
> >
> > Note: testing migration is tricky because the CPU has to be in SME
> > mode for the Z/P registers to have meaningful values, so you need to
> > savevm while SME instructions are executed to get a useful migration
> > state. I modified Arm's benchmark to compare the SME matrix
> > calculations against the non-SME calculation to ensure they are the
> > same and I savevm/loadvm a bunch of times while it ran with added
> > printfs in the get/put methods to print whether SME was active or not.
> >
> >> However, I have concerns on a few maintainability aspects. You #ifdef
> >> out a lot of code conditionally on the target macOS version. Any of that
> >> code that is in ifdef may or may not end up getting compiled in CI or
> >> other test builds, which means you are missing out on a lot of
> >> compilation test coverage. As a general rule of thumb, please reduce
> >> #ifdef to the bare minimum.
> > I agree completely, the problem is two-fold here: the HVF exposes APIs
> > with API_AVAILABLE(...) (clang's availability attribute) and also with
> > plain ifdefs (__MAC_OS_X_VERSION_MAX_ALLOWED >= 150200), and
> > specifically the HV_SME_FOO_REG variants, which cannot be protected
> > with the availability attribute. )
> >
> > So every time an SME type such as hv_vcpu_sme_state_t is used, it has
> > to be guarded :/ __builtin_available(...) check is not sufficient
> > because it will not compile due to undefined types.
> >
> >
> >> One thing I like to do (when possible) is to use the ifdef to define a
> >> global const variable or an inline function. That way the compiler's
> >> dead code analysis will eliminate the non-active aspects of your code,
> >> but all previous compiler phases still run which means you get syntax
> >> checks.
> >>
> >> How much of the code down here really does require #ifdefs? And if it's
> >> a lot, maybe we just bump the minimum required macOS version instead.
> >>
> > The missing type definitions and missing function declarations will
> > still raise compiler errors unfortunately. We could add those missing
> > type definitions as stubs if they are not defined so that it compiles,
> > but I chose to ifdef instead. What would you prefer?
>
>
> How about you create a separate #include'd header that provides stubs
> and defines the same way the original headers would? You can #ifdef your
> way inside there all you want. And then you write the generic code with
> the base assumption that all definitions are available.
>
> Alex
I will do that, and add a comment to remind us to remove the
duplication once we raise the minimum macos version. Will send a V2.
Thanks!
--
Manos Pitsidianakis
Emulation and Virtualization Engineer at Linaro Ltd
> On 14. Jan 2026, at 12:51, Manos Pitsidianakis <manos.pitsidianakis@linaro.org> wrote: > > I agree completely, the problem is two-fold here: the HVF exposes APIs > with API_AVAILABLE(...) (clang's availability attribute) and also with > plain ifdefs (__MAC_OS_X_VERSION_MAX_ALLOWED >= 150200), and > specifically the HV_SME_FOO_REG variants, which cannot be protected > with the availability attribute. ) > > So every time an SME type such as hv_vcpu_sme_state_t is used, it has > to be guarded :/ __builtin_available(...) check is not sufficient > because it will not compile due to undefined types. Hello, Newer SDKs can still target older OSes so we might get away with just requiring a new SDK but still running on older OSes I think?
© 2016 - 2026 Red Hat, Inc.