:p
atchew
Login
Most of the patches here are from Tao Su's v1. The main issue in his version were two: - overlooking kvm_cpu_xsave_init(), which currently looks at ExtSaveArea. This would get a bit ugly for extended save states that are enabled by both AVX512 and AVX10. Patches 1-2 change kvm_cpu_xsave_init() to look at ExtSaveArea's size field instead of testing features. - downgrading silently to KVM reported value if the avx10_version property is >= kvm reported value. Xiaoyao Li suggested basing this on cpu->check_cpuid and cpu->enforce_cpuid. Also, the check must accept any accelerator and not just KVM. I moved the check to x86_cpu_filter_features in patch 4. I don't have a Granite Rapids machine, so please test! :) Paolo Paolo Bonzini (3): target/i386: cpu: set correct supported XCR0 features for TCG target/i386: do not rely on ExtSaveArea for accelerator-supported XCR0 bits target/i386: return bool from x86_cpu_filter_features Tao Su (5): target/i386: add AVX10 feature and AVX10 version property target/i386: add CPUID.24 features for AVX10 target/i386: Add feature dependencies for AVX10 target/i386: Add AVX512 state when AVX10 is supported target/i386: Introduce GraniteRapids-v2 model target/i386/cpu.h | 16 ++++ target/i386/cpu.c | 175 ++++++++++++++++++++++++++++++++++---- target/i386/kvm/kvm-cpu.c | 8 -- target/i386/kvm/kvm.c | 3 +- 4 files changed, 175 insertions(+), 27 deletions(-) -- 2.47.0
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/cpu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -XXX,XX +XXX,XX @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .needs_ecx = true, .ecx = 0, .reg = R_EAX, }, - .tcg_features = ~0U, + .tcg_features = XSTATE_FP_MASK | XSTATE_SSE_MASK | + XSTATE_YMM_MASK | XSTATE_BNDREGS_MASK | XSTATE_BNDCSR_MASK | + XSTATE_PKRU_MASK, .migratable_flags = XSTATE_FP_MASK | XSTATE_SSE_MASK | XSTATE_YMM_MASK | XSTATE_BNDREGS_MASK | XSTATE_BNDCSR_MASK | XSTATE_OPMASK_MASK | XSTATE_ZMM_Hi256_MASK | XSTATE_Hi16_ZMM_MASK | @@ -XXX,XX +XXX,XX @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .needs_ecx = true, .ecx = 0, .reg = R_EDX, }, - .tcg_features = ~0U, + .tcg_features = 0U, }, /*Below are MSR exposed features*/ [FEAT_ARCH_CAPABILITIES] = { -- 2.47.0
Right now, QEMU is using the "feature" and "bits" fields of ExtSaveArea to query the accelerator for the support status of extended save areas. This is a problem for AVX10, which attaches two feature bits (AVX512F and AVX10) to the same extended save states. To keep the AVX10 hacks to the minimum, limit usage of esa->features and esa->bits. Instead, just query the accelerator for the 0xD leaf. Do it in common code and clear esa->size if an extended save state is unsupported. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/cpu.c | 33 +++++++++++++++++++++++++++++++-- target/i386/kvm/kvm-cpu.c | 4 ---- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -XXX,XX +XXX,XX @@ static void x86_cpu_set_sgxlepubkeyhash(CPUX86State *env) #endif } +static bool cpuid_has_xsave_feature(CPUX86State *env, const ExtSaveArea *esa) +{ + if (!esa->size) { + return false; + } + + return (env->features[esa->feature] & esa->bits); +} + static void x86_cpu_reset_hold(Object *obj, ResetType type) { CPUState *cs = CPU(obj); @@ -XXX,XX +XXX,XX @@ static void x86_cpu_reset_hold(Object *obj, ResetType type) if (!((1 << i) & CPUID_XSTATE_XCR0_MASK)) { continue; } - if (env->features[esa->feature] & esa->bits) { + if (cpuid_has_xsave_feature(env, esa)) { xcr0 |= 1ull << i; } } @@ -XXX,XX +XXX,XX @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) mask = 0; for (i = 0; i < ARRAY_SIZE(x86_ext_save_areas); i++) { const ExtSaveArea *esa = &x86_ext_save_areas[i]; - if (env->features[esa->feature] & esa->bits) { + if (cpuid_has_xsave_feature(env, esa)) { mask |= (1ULL << i); } } @@ -XXX,XX +XXX,XX @@ static void x86_cpu_register_feature_bit_props(X86CPUClass *xcc, static void x86_cpu_post_initfn(Object *obj) { + static bool first = true; + uint64_t supported_xcr0; + int i; + + if (first) { + first = false; + + supported_xcr0 = + ((uint64_t) x86_cpu_get_supported_feature_word(NULL, FEAT_XSAVE_XCR0_HI) << 32) | + x86_cpu_get_supported_feature_word(NULL, FEAT_XSAVE_XCR0_LO); + + for (i = XSTATE_SSE_BIT + 1; i < XSAVE_STATE_AREA_COUNT; i++) { + ExtSaveArea *esa = &x86_ext_save_areas[i]; + + if (!(supported_xcr0 & (1 << i))) { + esa->size = 0; + } + } + } + accel_cpu_instance_init(CPU(obj)); } diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c @@ -XXX,XX +XXX,XX @@ static void kvm_cpu_xsave_init(void) if (!esa->size) { continue; } - if ((x86_cpu_get_supported_feature_word(NULL, esa->feature) & esa->bits) - != esa->bits) { - continue; - } host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx); if (eax != 0) { assert(esa->size == eax); -- 2.47.0
Prepare for filtering non-boolean features such as AVX10 version. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/cpu.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -XXX,XX +XXX,XX @@ static void x86_cpu_parse_featurestr(const char *typename, char *features, } } -static void x86_cpu_filter_features(X86CPU *cpu, bool verbose); +static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose); /* Build a list with the name of all features on a feature word array */ static void x86_cpu_list_feature_names(FeatureWordArray features, @@ -XXX,XX +XXX,XX @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) * * Returns: 0 if all flags are supported by the host, non-zero otherwise. */ -static void x86_cpu_filter_features(X86CPU *cpu, bool verbose) +static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose) { CPUX86State *env = &cpu->env; FeatureWord w; @@ -XXX,XX +XXX,XX @@ static void x86_cpu_filter_features(X86CPU *cpu, bool verbose) mark_unavailable_features(cpu, FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT, prefix); } } + + return x86_cpu_have_filtered_features(cpu); } static void x86_cpu_hyperv_realize(X86CPU *cpu) @@ -XXX,XX +XXX,XX @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) } } - x86_cpu_filter_features(cpu, cpu->check_cpuid || cpu->enforce_cpuid); - - if (cpu->enforce_cpuid && x86_cpu_have_filtered_features(cpu)) { - error_setg(&local_err, - accel_uses_host_cpuid() ? + if (x86_cpu_filter_features(cpu, cpu->check_cpuid || cpu->enforce_cpuid)) { + if (cpu->enforce_cpuid) { + error_setg(&local_err, + accel_uses_host_cpuid() ? "Host doesn't support requested features" : "TCG doesn't support requested features"); - goto out; + goto out; + } } /* On AMD CPUs, some CPUID[8000_0001].EDX bits must match the bits on -- 2.47.0
From: Tao Su <tao1.su@linux.intel.com> When AVX10 enable bit is set, the 0x24 leaf will be present as "AVX10 Converged Vector ISA leaf" containing fields for the version number and the supported vector bit lengths. Introduce avx10-version property so that avx10 version can be controlled by user and cpu model. Per spec, avx10 version can never be 0, the default value of avx10-version is set to 0 to determine whether it is specified by user. The default can come from the device model or, for the max model, from KVM's reported value. Signed-off-by: Tao Su <tao1.su@linux.intel.com> Link: https://lore.kernel.org/r/20241028024512.156724-3-tao1.su@linux.intel.com Link: https://lore.kernel.org/r/20241028024512.156724-4-tao1.su@linux.intel.com Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/cpu.h | 4 +++ target/i386/cpu.c | 64 ++++++++++++++++++++++++++++++++++++++----- target/i386/kvm/kvm.c | 3 +- 3 files changed, 63 insertions(+), 8 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -XXX,XX +XXX,XX @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_1_EDX_AMX_COMPLEX (1U << 8) /* PREFETCHIT0/1 Instructions */ #define CPUID_7_1_EDX_PREFETCHITI (1U << 14) +/* Support for Advanced Vector Extensions 10 */ +#define CPUID_7_1_EDX_AVX10 (1U << 19) /* Flexible return and event delivery (FRED) */ #define CPUID_7_1_EAX_FRED (1U << 17) /* Load into IA32_KERNEL_GS_BASE (LKGS) */ @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { uint32_t cpuid_vendor3; uint32_t cpuid_version; FeatureWordArray features; + /* AVX10 version */ + uint8_t avx10_version; /* Features that were explicitly enabled/disabled */ FeatureWordArray user_features; uint32_t cpuid_model[12]; diff --git a/target/i386/cpu.c b/target/i386/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -XXX,XX +XXX,XX @@ #include "cpu-internal.h" static void x86_cpu_realizefn(DeviceState *dev, Error **errp); +static void x86_cpu_get_supported_cpuid(uint32_t func, uint32_t index, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx); /* Helpers for building CPUID[2] descriptors: */ @@ -XXX,XX +XXX,XX @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "avx-vnni-int8", "avx-ne-convert", NULL, NULL, "amx-complex", NULL, "avx-vnni-int16", NULL, NULL, NULL, "prefetchiti", NULL, - NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "avx10", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -XXX,XX +XXX,XX @@ typedef struct X86CPUDefinition { int family; int model; int stepping; + int avx10_version; FeatureWordArray features; const char *model_id; const CPUCaches *const cache_info; @@ -XXX,XX +XXX,XX @@ static Property max_x86_cpu_properties[] = { static void max_x86_cpu_realize(DeviceState *dev, Error **errp) { Object *obj = OBJECT(dev); + X86CPU *cpu = X86_CPU(dev); + CPUX86State *env = &cpu->env; if (!object_property_get_int(obj, "family", &error_abort)) { if (X86_CPU(obj)->env.features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { @@ -XXX,XX +XXX,XX @@ static void max_x86_cpu_realize(DeviceState *dev, Error **errp) } } + if ((env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) && !env->avx10_version) { + uint32_t eax, ebx, ecx, edx; + x86_cpu_get_supported_cpuid(0x24, 0, + &eax, &ebx, &ecx, &edx); + + env->avx10_version = ebx & 0xff; + } + x86_cpu_realizefn(dev, errp); } @@ -XXX,XX +XXX,XX @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model) */ object_property_set_str(OBJECT(cpu), "vendor", def->vendor, &error_abort); + object_property_set_int(OBJECT(cpu), "avx10-version", def->avx10_version, + &error_abort); + x86_cpu_apply_version_props(cpu, model); /* @@ -XXX,XX +XXX,XX @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } break; } + case 0x24: { + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + if (env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) { + *ebx = env->avx10_version; + } + break; + } case 0x40000000: /* * CPUID code in kvm_arch_init_vcpu() ignores stuff @@ -XXX,XX +XXX,XX @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F); } + /* Advanced Vector Extensions 10 (AVX10) requires CPUID[0x24] */ + if (env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) { + x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x24); + } + /* SVM requires CPUID[0x8000000A] */ if (env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM) { x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000000A); @@ -XXX,XX +XXX,XX @@ static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose) CPUX86State *env = &cpu->env; FeatureWord w; const char *prefix = NULL; + bool have_filtered_features; + + uint32_t eax_0, ebx_0, ecx_0, edx_0; + uint32_t eax_1, ebx_1, ecx_1, edx_1; if (verbose) { prefix = accel_uses_host_cpuid() @@ -XXX,XX +XXX,XX @@ static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose) */ if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) && kvm_enabled()) { - uint32_t eax_0, ebx_0, ecx_0, edx_0_unused; - uint32_t eax_1, ebx_1, ecx_1_unused, edx_1_unused; - x86_cpu_get_supported_cpuid(0x14, 0, - &eax_0, &ebx_0, &ecx_0, &edx_0_unused); + &eax_0, &ebx_0, &ecx_0, &edx_0); x86_cpu_get_supported_cpuid(0x14, 1, - &eax_1, &ebx_1, &ecx_1_unused, &edx_1_unused); + &eax_1, &ebx_1, &ecx_1, &edx_1); if (!eax_0 || ((ebx_0 & INTEL_PT_MINIMAL_EBX) != INTEL_PT_MINIMAL_EBX) || @@ -XXX,XX +XXX,XX @@ static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose) } } - return x86_cpu_have_filtered_features(cpu); + have_filtered_features = x86_cpu_have_filtered_features(cpu); + + if (env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) { + x86_cpu_get_supported_cpuid(0x24, 0, + &eax_0, &ebx_0, &ecx_0, &edx_0); + uint8_t version = ebx_0 & 0xff; + + if (version < env->avx10_version) { + if (prefix) { + warn_report("%s: avx10.%d", prefix, env->avx10_version); + } + env->avx10_version = version; + have_filtered_features = true; + } + } + + return have_filtered_features; } static void x86_cpu_hyperv_realize(X86CPU *cpu) @@ -XXX,XX +XXX,XX @@ static Property x86_cpu_properties[] = { DEFINE_PROP_UINT32("min-level", X86CPU, env.cpuid_min_level, 0), DEFINE_PROP_UINT32("min-xlevel", X86CPU, env.cpuid_min_xlevel, 0), DEFINE_PROP_UINT32("min-xlevel2", X86CPU, env.cpuid_min_xlevel2, 0), + DEFINE_PROP_UINT8("avx10-version", X86CPU, env.avx10_version, 0), DEFINE_PROP_UINT64("ucode-rev", X86CPU, ucode_rev, 0), DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true), DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor), diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -XXX,XX +XXX,XX @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, case 0x7: case 0x14: case 0x1d: - case 0x1e: { + case 0x1e: + case 0x24: { uint32_t times; c->function = i; -- 2.47.0
From: Tao Su <tao1.su@linux.intel.com> Introduce features for the supported vector bit lengths. Signed-off-by: Tao Su <tao1.su@linux.intel.com> Link: https://lore.kernel.org/r/20241028024512.156724-3-tao1.su@linux.intel.com Link: https://lore.kernel.org/r/20241028024512.156724-4-tao1.su@linux.intel.com --- target/i386/cpu.h | 8 ++++++++ target/i386/cpu.c | 15 +++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -XXX,XX +XXX,XX @@ typedef enum FeatureWord { FEAT_XSAVE_XSS_HI, /* CPUID[EAX=0xd,ECX=1].EDX */ FEAT_7_1_EDX, /* CPUID[EAX=7,ECX=1].EDX */ FEAT_7_2_EDX, /* CPUID[EAX=7,ECX=2].EDX */ + FEAT_24_0_EBX, /* CPUID[EAX=0x24,ECX=0].EBX */ FEATURE_WORDS, } FeatureWord; @@ -XXX,XX +XXX,XX @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* Packets which contain IP payload have LIP values */ #define CPUID_14_0_ECX_LIP (1U << 31) +/* AVX10 128-bit vector support is present */ +#define CPUID_24_0_EBX_AVX10_128 (1U << 16) +/* AVX10 256-bit vector support is present */ +#define CPUID_24_0_EBX_AVX10_256 (1U << 17) +/* AVX10 512-bit vector support is present */ +#define CPUID_24_0_EBX_AVX10_512 (1U << 18) + /* RAS Features */ #define CPUID_8000_0007_EBX_OVERFLOW_RECOV (1U << 0) #define CPUID_8000_0007_EBX_SUCCOR (1U << 1) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -XXX,XX +XXX,XX @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_SGX_12_0_EAX_FEATURES 0 #define TCG_SGX_12_0_EBX_FEATURES 0 #define TCG_SGX_12_1_EAX_FEATURES 0 +#define TCG_24_0_EBX_FEATURES 0 #if defined CONFIG_USER_ONLY #define CPUID_8000_0008_EBX_KERNEL_FEATURES (CPUID_8000_0008_EBX_IBPB | \ @@ -XXX,XX +XXX,XX @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .tcg_features = TCG_7_2_EDX_FEATURES, }, + [FEAT_24_0_EBX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + [16] = "avx10-128", + [17] = "avx10-256", + [18] = "avx10-512", + }, + .cpuid = { + .eax = 0x24, + .needs_ecx = true, .ecx = 0, + .reg = R_EBX, + }, + .tcg_features = TCG_24_0_EBX_FEATURES, + }, [FEAT_8000_0007_EDX] = { .type = CPUID_FEATURE_WORD, .feat_names = { -- 2.47.0
From: Tao Su <tao1.su@linux.intel.com> Since the highest supported vector length for a processor implies that all lesser vector lengths are also supported, add the dependencies of the supported vector lengths. If all vector lengths aren't supported, clear AVX10 enable bit as well. Note that the order of AVX10 related dependencies should be kept as: CPUID_24_0_EBX_AVX10_128 -> CPUID_24_0_EBX_AVX10_256, CPUID_24_0_EBX_AVX10_256 -> CPUID_24_0_EBX_AVX10_512, CPUID_24_0_EBX_AVX10_VL_MASK -> CPUID_7_1_EDX_AVX10, CPUID_7_1_EDX_AVX10 -> CPUID_24_0_EBX, so that prevent user from setting weird CPUID combinations, e.g. 256-bits and 512-bits are supported but 128-bits is not, no vector lengths are supported but AVX10 enable bit is still set. Since AVX10_128 will be reserved as 1, adding these dependencies has the bonus that when user sets -cpu host,-avx10-128, CPUID_7_1_EDX_AVX10 and CPUID_24_0_EBX will be disabled automatically. Tested-by: Xuelian Guo <xuelian.guo@intel.com> Signed-off-by: Tao Su <tao1.su@linux.intel.com> Link: https://lore.kernel.org/r/20241028024512.156724-5-tao1.su@linux.intel.com Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/cpu.h | 4 ++++ target/i386/cpu.c | 16 ++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -XXX,XX +XXX,XX @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_24_0_EBX_AVX10_256 (1U << 17) /* AVX10 512-bit vector support is present */ #define CPUID_24_0_EBX_AVX10_512 (1U << 18) +/* AVX10 vector length support mask */ +#define CPUID_24_0_EBX_AVX10_VL_MASK (CPUID_24_0_EBX_AVX10_128 | \ + CPUID_24_0_EBX_AVX10_256 | \ + CPUID_24_0_EBX_AVX10_512) /* RAS Features */ #define CPUID_8000_0007_EBX_OVERFLOW_RECOV (1U << 0) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -XXX,XX +XXX,XX @@ static FeatureDep feature_dependencies[] = { .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_SGX }, .to = { FEAT_SGX_12_1_EAX, ~0ull }, }, + { + .from = { FEAT_24_0_EBX, CPUID_24_0_EBX_AVX10_128 }, + .to = { FEAT_24_0_EBX, CPUID_24_0_EBX_AVX10_256 }, + }, + { + .from = { FEAT_24_0_EBX, CPUID_24_0_EBX_AVX10_256 }, + .to = { FEAT_24_0_EBX, CPUID_24_0_EBX_AVX10_512 }, + }, + { + .from = { FEAT_24_0_EBX, CPUID_24_0_EBX_AVX10_VL_MASK }, + .to = { FEAT_7_1_EDX, CPUID_7_1_EDX_AVX10 }, + }, + { + .from = { FEAT_7_1_EDX, CPUID_7_1_EDX_AVX10 }, + .to = { FEAT_24_0_EBX, ~0ull }, + }, }; typedef struct X86RegisterInfo32 { -- 2.47.0
From: Tao Su <tao1.su@linux.intel.com> AVX10 state enumeration in CPUID leaf D and enabling in XCR0 register are identical to AVX512 state regardless of the supported vector lengths. Given that some E-cores will support AVX10 but not support AVX512, add AVX512 state components to guest when AVX10 is enabled. Based on a patch by Tao Su <tao1.su@linux.intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/cpu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -XXX,XX +XXX,XX @@ static bool cpuid_has_xsave_feature(CPUX86State *env, const ExtSaveArea *esa) return false; } - return (env->features[esa->feature] & esa->bits); + if (env->features[esa->feature] & esa->bits) { + return true; + } + if (esa->feature == FEAT_7_0_EBX && esa->bits = CPUID_7_0_EBX_AVX512F + && (features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10)) { + return true; + } + + return false; } static void x86_cpu_reset_hold(Object *obj, ResetType type) -- 2.47.0
From: Tao Su <tao1.su@linux.intel.com> Update GraniteRapids CPU model to add AVX10 and the missing features(ss, tsc-adjust, cldemote, movdiri, movdir64b). Tested-by: Xuelian Guo <xuelian.guo@intel.com> Signed-off-by: Tao Su <tao1.su@linux.intel.com> Link: https://lore.kernel.org/r/20241028024512.156724-7-tao1.su@linux.intel.com Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/cpu.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -XXX,XX +XXX,XX @@ static const X86CPUDefinition builtin_x86_defs[] = { .model_id = "Intel Xeon Processor (GraniteRapids)", .versions = (X86CPUVersionDefinition[]) { { .version = 1 }, + { + .version = 2, + .props = (PropValue[]) { + { "ss", "on" }, + { "tsc-adjust", "on" }, + { "cldemote", "on" }, + { "movdiri", "on" }, + { "movdir64b", "on" }, + { "avx10", "on" }, + { "avx10-128", "on" }, + { "avx10-256", "on" }, + { "avx10-512", "on" }, + { "avx10-version", "1" }, + { "stepping", "1" }, + { /* end of list */ } + } + }, { /* end of list */ }, }, }, -- 2.47.0
Add AVX10.1 CPUID support, i.e. add AVX10 support bit via CPUID.(EAX=07H, ECX=01H):EDX[bit 19] and new CPUID leaf 0x24H so that guest OS and applications can query the AVX10 CPUIDs directly. The AVX10.1 spec can be found in [1], it is worth mentioning that VL128 (CPUID.(EAX=24H, ECX=00H):EBX[bit 16]) was dropped in rev3.0, but it will be added back and reserved as 1 (SDM and AVX10.2 spec[2] have already described). Since GraniteRapids (stepping 1) is the first platform to support AVX10, introduce GraniteRapids-v2 CPU model to add AVX10 in this patch set, and add some missing features as well. [1] https://cdrdv2.intel.com/v1/dl/getContent/671200 [2] https://cdrdv2.intel.com/v1/dl/getContent/828965 --- Changelog: v2->v3: - Move assigning avx10_version from max_x86_cpu_realize() to x86_cpu_expand_features(). - Add CPUIDs of vector lengths back in cpu_x86_cpuid(). - Update comment on x86_cpu_expand_features(). (Zhao) - Tell user about revised version if version is invalid. (Zhao) - Handle when AVX10 enable bit is disabled but user sets avx10-verion. (Zhao) - Add Zhao's Reviewed-by and Xuelian's Tested-by. v2: https://lore.kernel.org/all/20241029151858.550269-1-pbonzini@redhat.com/ v1: https://lore.kernel.org/all/20241028024512.156724-1-tao1.su@linux.intel.com/ --- Paolo Bonzini (3): target/i386: cpu: set correct supported XCR0 features for TCG target/i386: do not rely on ExtSaveArea for accelerator-supported XCR0 bits target/i386: return bool from x86_cpu_filter_features Tao Su (5): target/i386: add AVX10 feature and AVX10 version property target/i386: add CPUID.24 features for AVX10 target/i386: Add feature dependencies for AVX10 target/i386: Add AVX512 state when AVX10 is supported target/i386: Introduce GraniteRapids-v2 model target/i386/cpu.c | 177 ++++++++++++++++++++++++++++++++++---- target/i386/cpu.h | 16 ++++ target/i386/kvm/kvm-cpu.c | 4 - target/i386/kvm/kvm.c | 3 +- 4 files changed, 176 insertions(+), 24 deletions(-) base-commit: 58d49b5895f2e0b5cfe4b2901bf24f3320b74f29 -- 2.34.1
From: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Zhao Liu <zhao1.liu@intel.com> --- target/i386/cpu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -XXX,XX +XXX,XX @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .needs_ecx = true, .ecx = 0, .reg = R_EAX, }, - .tcg_features = ~0U, + .tcg_features = XSTATE_FP_MASK | XSTATE_SSE_MASK | + XSTATE_YMM_MASK | XSTATE_BNDREGS_MASK | XSTATE_BNDCSR_MASK | + XSTATE_PKRU_MASK, .migratable_flags = XSTATE_FP_MASK | XSTATE_SSE_MASK | XSTATE_YMM_MASK | XSTATE_BNDREGS_MASK | XSTATE_BNDCSR_MASK | XSTATE_OPMASK_MASK | XSTATE_ZMM_Hi256_MASK | XSTATE_Hi16_ZMM_MASK | @@ -XXX,XX +XXX,XX @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .needs_ecx = true, .ecx = 0, .reg = R_EDX, }, - .tcg_features = ~0U, + .tcg_features = 0U, }, /*Below are MSR exposed features*/ [FEAT_ARCH_CAPABILITIES] = { -- 2.34.1
From: Paolo Bonzini <pbonzini@redhat.com> Right now, QEMU is using the "feature" and "bits" fields of ExtSaveArea to query the accelerator for the support status of extended save areas. This is a problem for AVX10, which attaches two feature bits (AVX512F and AVX10) to the same extended save states. To keep the AVX10 hacks to the minimum, limit usage of esa->features and esa->bits. Instead, just query the accelerator for the 0xD leaf. Do it in common code and clear esa->size if an extended save state is unsupported. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Zhao Liu <zhao1.liu@intel.com> --- target/i386/cpu.c | 33 +++++++++++++++++++++++++++++++-- target/i386/kvm/kvm-cpu.c | 4 ---- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -XXX,XX +XXX,XX @@ static void x86_cpu_set_sgxlepubkeyhash(CPUX86State *env) #endif } +static bool cpuid_has_xsave_feature(CPUX86State *env, const ExtSaveArea *esa) +{ + if (!esa->size) { + return false; + } + + return (env->features[esa->feature] & esa->bits); +} + static void x86_cpu_reset_hold(Object *obj, ResetType type) { CPUState *cs = CPU(obj); @@ -XXX,XX +XXX,XX @@ static void x86_cpu_reset_hold(Object *obj, ResetType type) if (!((1 << i) & CPUID_XSTATE_XCR0_MASK)) { continue; } - if (env->features[esa->feature] & esa->bits) { + if (cpuid_has_xsave_feature(env, esa)) { xcr0 |= 1ull << i; } } @@ -XXX,XX +XXX,XX @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) mask = 0; for (i = 0; i < ARRAY_SIZE(x86_ext_save_areas); i++) { const ExtSaveArea *esa = &x86_ext_save_areas[i]; - if (env->features[esa->feature] & esa->bits) { + if (cpuid_has_xsave_feature(env, esa)) { mask |= (1ULL << i); } } @@ -XXX,XX +XXX,XX @@ static void x86_cpu_register_feature_bit_props(X86CPUClass *xcc, static void x86_cpu_post_initfn(Object *obj) { + static bool first = true; + uint64_t supported_xcr0; + int i; + + if (first) { + first = false; + + supported_xcr0 = + ((uint64_t) x86_cpu_get_supported_feature_word(NULL, FEAT_XSAVE_XCR0_HI) << 32) | + x86_cpu_get_supported_feature_word(NULL, FEAT_XSAVE_XCR0_LO); + + for (i = XSTATE_SSE_BIT + 1; i < XSAVE_STATE_AREA_COUNT; i++) { + ExtSaveArea *esa = &x86_ext_save_areas[i]; + + if (!(supported_xcr0 & (1 << i))) { + esa->size = 0; + } + } + } + accel_cpu_instance_init(CPU(obj)); } diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c @@ -XXX,XX +XXX,XX @@ static void kvm_cpu_xsave_init(void) if (!esa->size) { continue; } - if ((x86_cpu_get_supported_feature_word(NULL, esa->feature) & esa->bits) - != esa->bits) { - continue; - } host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx); if (eax != 0) { assert(esa->size == eax); -- 2.34.1
From: Paolo Bonzini <pbonzini@redhat.com> Prepare for filtering non-boolean features such as AVX10 version. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Signed-off-by: Tao Su <tao1.su@linux.intel.com> --- target/i386/cpu.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -XXX,XX +XXX,XX @@ static void x86_cpu_parse_featurestr(const char *typename, char *features, } } -static void x86_cpu_filter_features(X86CPU *cpu, bool verbose); +static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose); /* Build a list with the name of all features on a feature word array */ static void x86_cpu_list_feature_names(FeatureWordArray features, @@ -XXX,XX +XXX,XX @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) * Finishes initialization of CPUID data, filters CPU feature * words based on host availability of each feature. * - * Returns: 0 if all flags are supported by the host, non-zero otherwise. + * Returns: true if any flag is not supported by the host, false otherwise. */ -static void x86_cpu_filter_features(X86CPU *cpu, bool verbose) +static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose) { CPUX86State *env = &cpu->env; FeatureWord w; @@ -XXX,XX +XXX,XX @@ static void x86_cpu_filter_features(X86CPU *cpu, bool verbose) mark_unavailable_features(cpu, FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT, prefix); } } + + return x86_cpu_have_filtered_features(cpu); } static void x86_cpu_hyperv_realize(X86CPU *cpu) @@ -XXX,XX +XXX,XX @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) } } - x86_cpu_filter_features(cpu, cpu->check_cpuid || cpu->enforce_cpuid); - - if (cpu->enforce_cpuid && x86_cpu_have_filtered_features(cpu)) { - error_setg(&local_err, - accel_uses_host_cpuid() ? + if (x86_cpu_filter_features(cpu, cpu->check_cpuid || cpu->enforce_cpuid)) { + if (cpu->enforce_cpuid) { + error_setg(&local_err, + accel_uses_host_cpuid() ? "Host doesn't support requested features" : "TCG doesn't support requested features"); - goto out; + goto out; + } } /* On AMD CPUs, some CPUID[8000_0001].EDX bits must match the bits on -- 2.34.1
When AVX10 enable bit is set, the 0x24 leaf will be present as "AVX10 Converged Vector ISA leaf" containing fields for the version number and the supported vector bit lengths. Introduce avx10-version property so that avx10 version can be controlled by user and cpu model. Per spec, avx10 version can never be 0, the default value of avx10-version is set to 0 to determine whether it is specified by user. The default can come from the device model or, for the max model, from KVM's reported value. Signed-off-by: Tao Su <tao1.su@linux.intel.com> Link: https://lore.kernel.org/r/20241028024512.156724-3-tao1.su@linux.intel.com Link: https://lore.kernel.org/r/20241028024512.156724-4-tao1.su@linux.intel.com Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Tested-by: Xuelian Guo <xuelian.guo@intel.com> --- target/i386/cpu.c | 64 ++++++++++++++++++++++++++++++++++++++----- target/i386/cpu.h | 4 +++ target/i386/kvm/kvm.c | 3 +- 3 files changed, 63 insertions(+), 8 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -XXX,XX +XXX,XX @@ #include "cpu-internal.h" static void x86_cpu_realizefn(DeviceState *dev, Error **errp); +static void x86_cpu_get_supported_cpuid(uint32_t func, uint32_t index, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx); /* Helpers for building CPUID[2] descriptors: */ @@ -XXX,XX +XXX,XX @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "avx-vnni-int8", "avx-ne-convert", NULL, NULL, "amx-complex", NULL, "avx-vnni-int16", NULL, NULL, NULL, "prefetchiti", NULL, - NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "avx10", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -XXX,XX +XXX,XX @@ typedef struct X86CPUDefinition { int family; int model; int stepping; + uint8_t avx10_version; FeatureWordArray features; const char *model_id; const CPUCaches *const cache_info; @@ -XXX,XX +XXX,XX @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model) */ object_property_set_str(OBJECT(cpu), "vendor", def->vendor, &error_abort); + object_property_set_uint(OBJECT(cpu), "avx10-version", def->avx10_version, + &error_abort); + x86_cpu_apply_version_props(cpu, model); /* @@ -XXX,XX +XXX,XX @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } break; } + case 0x24: { + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + if ((env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) && count == 0) { + *ebx = env->features[FEAT_24_0_EBX] | env->avx10_version; + } + break; + } case 0x40000000: /* * CPUID code in kvm_arch_init_vcpu() ignores stuff @@ -XXX,XX +XXX,XX @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) ~env->user_features[w] & ~feature_word_info[w].no_autoenable_flags; } + + if ((env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) && !env->avx10_version) { + uint32_t eax, ebx, ecx, edx; + x86_cpu_get_supported_cpuid(0x24, 0, &eax, &ebx, &ecx, &edx); + env->avx10_version = ebx & 0xff; + } } for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) { @@ -XXX,XX +XXX,XX @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F); } + /* Advanced Vector Extensions 10 (AVX10) requires CPUID[0x24] */ + if (env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) { + x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x24); + } + /* SVM requires CPUID[0x8000000A] */ if (env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM) { x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000000A); @@ -XXX,XX +XXX,XX @@ static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose) CPUX86State *env = &cpu->env; FeatureWord w; const char *prefix = NULL; + bool have_filtered_features; + + uint32_t eax_0, ebx_0, ecx_0, edx_0; + uint32_t eax_1, ebx_1, ecx_1, edx_1; if (verbose) { prefix = accel_uses_host_cpuid() @@ -XXX,XX +XXX,XX @@ static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose) */ if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) && kvm_enabled()) { - uint32_t eax_0, ebx_0, ecx_0, edx_0_unused; - uint32_t eax_1, ebx_1, ecx_1_unused, edx_1_unused; - x86_cpu_get_supported_cpuid(0x14, 0, - &eax_0, &ebx_0, &ecx_0, &edx_0_unused); + &eax_0, &ebx_0, &ecx_0, &edx_0); x86_cpu_get_supported_cpuid(0x14, 1, - &eax_1, &ebx_1, &ecx_1_unused, &edx_1_unused); + &eax_1, &ebx_1, &ecx_1, &edx_1); if (!eax_0 || ((ebx_0 & INTEL_PT_MINIMAL_EBX) != INTEL_PT_MINIMAL_EBX) || @@ -XXX,XX +XXX,XX @@ static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose) } } - return x86_cpu_have_filtered_features(cpu); + have_filtered_features = x86_cpu_have_filtered_features(cpu); + + if (env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) { + x86_cpu_get_supported_cpuid(0x24, 0, + &eax_0, &ebx_0, &ecx_0, &edx_0); + uint8_t version = ebx_0 & 0xff; + + if (version < env->avx10_version) { + if (prefix) { + warn_report("%s: avx10.%d. Adjust to avx10.%d", + prefix, env->avx10_version, version); + } + env->avx10_version = version; + have_filtered_features = true; + } + } else if (env->avx10_version && prefix) { + warn_report("%s: avx10.%d.", prefix, env->avx10_version); + have_filtered_features = true; + } + + return have_filtered_features; } static void x86_cpu_hyperv_realize(X86CPU *cpu) @@ -XXX,XX +XXX,XX @@ static Property x86_cpu_properties[] = { DEFINE_PROP_UINT32("min-level", X86CPU, env.cpuid_min_level, 0), DEFINE_PROP_UINT32("min-xlevel", X86CPU, env.cpuid_min_xlevel, 0), DEFINE_PROP_UINT32("min-xlevel2", X86CPU, env.cpuid_min_xlevel2, 0), + DEFINE_PROP_UINT8("avx10-version", X86CPU, env.avx10_version, 0), DEFINE_PROP_UINT64("ucode-rev", X86CPU, ucode_rev, 0), DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true), DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -XXX,XX +XXX,XX @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_1_EDX_AMX_COMPLEX (1U << 8) /* PREFETCHIT0/1 Instructions */ #define CPUID_7_1_EDX_PREFETCHITI (1U << 14) +/* Support for Advanced Vector Extensions 10 */ +#define CPUID_7_1_EDX_AVX10 (1U << 19) /* Flexible return and event delivery (FRED) */ #define CPUID_7_1_EAX_FRED (1U << 17) /* Load into IA32_KERNEL_GS_BASE (LKGS) */ @@ -XXX,XX +XXX,XX @@ typedef struct CPUArchState { uint32_t cpuid_vendor3; uint32_t cpuid_version; FeatureWordArray features; + /* AVX10 version */ + uint8_t avx10_version; /* Features that were explicitly enabled/disabled */ FeatureWordArray user_features; uint32_t cpuid_model[12]; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -XXX,XX +XXX,XX @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, case 0x7: case 0x14: case 0x1d: - case 0x1e: { + case 0x1e: + case 0x24: { uint32_t times; c->function = i; -- 2.34.1
Introduce features for the supported vector bit lengths. Signed-off-by: Tao Su <tao1.su@linux.intel.com> Link: https://lore.kernel.org/r/20241028024512.156724-3-tao1.su@linux.intel.com Link: https://lore.kernel.org/r/20241028024512.156724-4-tao1.su@linux.intel.com Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Tested-by: Xuelian Guo <xuelian.guo@intel.com> --- target/i386/cpu.c | 15 +++++++++++++++ target/i386/cpu.h | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -XXX,XX +XXX,XX @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_SGX_12_0_EAX_FEATURES 0 #define TCG_SGX_12_0_EBX_FEATURES 0 #define TCG_SGX_12_1_EAX_FEATURES 0 +#define TCG_24_0_EBX_FEATURES 0 #if defined CONFIG_USER_ONLY #define CPUID_8000_0008_EBX_KERNEL_FEATURES (CPUID_8000_0008_EBX_IBPB | \ @@ -XXX,XX +XXX,XX @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .tcg_features = TCG_7_2_EDX_FEATURES, }, + [FEAT_24_0_EBX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + [16] = "avx10-128", + [17] = "avx10-256", + [18] = "avx10-512", + }, + .cpuid = { + .eax = 0x24, + .needs_ecx = true, .ecx = 0, + .reg = R_EBX, + }, + .tcg_features = TCG_24_0_EBX_FEATURES, + }, [FEAT_8000_0007_EDX] = { .type = CPUID_FEATURE_WORD, .feat_names = { diff --git a/target/i386/cpu.h b/target/i386/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -XXX,XX +XXX,XX @@ typedef enum FeatureWord { FEAT_XSAVE_XSS_HI, /* CPUID[EAX=0xd,ECX=1].EDX */ FEAT_7_1_EDX, /* CPUID[EAX=7,ECX=1].EDX */ FEAT_7_2_EDX, /* CPUID[EAX=7,ECX=2].EDX */ + FEAT_24_0_EBX, /* CPUID[EAX=0x24,ECX=0].EBX */ FEATURE_WORDS, } FeatureWord; @@ -XXX,XX +XXX,XX @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* Packets which contain IP payload have LIP values */ #define CPUID_14_0_ECX_LIP (1U << 31) +/* AVX10 128-bit vector support is present */ +#define CPUID_24_0_EBX_AVX10_128 (1U << 16) +/* AVX10 256-bit vector support is present */ +#define CPUID_24_0_EBX_AVX10_256 (1U << 17) +/* AVX10 512-bit vector support is present */ +#define CPUID_24_0_EBX_AVX10_512 (1U << 18) + /* RAS Features */ #define CPUID_8000_0007_EBX_OVERFLOW_RECOV (1U << 0) #define CPUID_8000_0007_EBX_SUCCOR (1U << 1) -- 2.34.1
Since the highest supported vector length for a processor implies that all lesser vector lengths are also supported, add the dependencies of the supported vector lengths. If all vector lengths aren't supported, clear AVX10 enable bit as well. Note that the order of AVX10 related dependencies should be kept as: CPUID_24_0_EBX_AVX10_128 -> CPUID_24_0_EBX_AVX10_256, CPUID_24_0_EBX_AVX10_256 -> CPUID_24_0_EBX_AVX10_512, CPUID_24_0_EBX_AVX10_VL_MASK -> CPUID_7_1_EDX_AVX10, CPUID_7_1_EDX_AVX10 -> CPUID_24_0_EBX, so that prevent user from setting weird CPUID combinations, e.g. 256-bits and 512-bits are supported but 128-bits is not, no vector lengths are supported but AVX10 enable bit is still set. Since AVX10_128 will be reserved as 1, adding these dependencies has the bonus that when user sets -cpu host,-avx10-128, CPUID_7_1_EDX_AVX10 and CPUID_24_0_EBX will be disabled automatically. Tested-by: Xuelian Guo <xuelian.guo@intel.com> Signed-off-by: Tao Su <tao1.su@linux.intel.com> Link: https://lore.kernel.org/r/20241028024512.156724-5-tao1.su@linux.intel.com Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/cpu.c | 16 ++++++++++++++++ target/i386/cpu.h | 4 ++++ 2 files changed, 20 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -XXX,XX +XXX,XX @@ static FeatureDep feature_dependencies[] = { .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_SGX }, .to = { FEAT_SGX_12_1_EAX, ~0ull }, }, + { + .from = { FEAT_24_0_EBX, CPUID_24_0_EBX_AVX10_128 }, + .to = { FEAT_24_0_EBX, CPUID_24_0_EBX_AVX10_256 }, + }, + { + .from = { FEAT_24_0_EBX, CPUID_24_0_EBX_AVX10_256 }, + .to = { FEAT_24_0_EBX, CPUID_24_0_EBX_AVX10_512 }, + }, + { + .from = { FEAT_24_0_EBX, CPUID_24_0_EBX_AVX10_VL_MASK }, + .to = { FEAT_7_1_EDX, CPUID_7_1_EDX_AVX10 }, + }, + { + .from = { FEAT_7_1_EDX, CPUID_7_1_EDX_AVX10 }, + .to = { FEAT_24_0_EBX, ~0ull }, + }, }; typedef struct X86RegisterInfo32 { diff --git a/target/i386/cpu.h b/target/i386/cpu.h index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -XXX,XX +XXX,XX @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_24_0_EBX_AVX10_256 (1U << 17) /* AVX10 512-bit vector support is present */ #define CPUID_24_0_EBX_AVX10_512 (1U << 18) +/* AVX10 vector length support mask */ +#define CPUID_24_0_EBX_AVX10_VL_MASK (CPUID_24_0_EBX_AVX10_128 | \ + CPUID_24_0_EBX_AVX10_256 | \ + CPUID_24_0_EBX_AVX10_512) /* RAS Features */ #define CPUID_8000_0007_EBX_OVERFLOW_RECOV (1U << 0) -- 2.34.1
AVX10 state enumeration in CPUID leaf D and enabling in XCR0 register are identical to AVX512 state regardless of the supported vector lengths. Given that some E-cores will support AVX10 but not support AVX512, add AVX512 state components to guest when AVX10 is enabled. Based on a patch by Tao Su <tao1.su@linux.intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Tested-by: Xuelian Guo <xuelian.guo@intel.com> Signed-off-by: Tao Su <tao1.su@linux.intel.com> --- target/i386/cpu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -XXX,XX +XXX,XX @@ static bool cpuid_has_xsave_feature(CPUX86State *env, const ExtSaveArea *esa) return false; } - return (env->features[esa->feature] & esa->bits); + if (env->features[esa->feature] & esa->bits) { + return true; + } + if (esa->feature == FEAT_7_0_EBX && esa->bits == CPUID_7_0_EBX_AVX512F + && (env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10)) { + return true; + } + + return false; } static void x86_cpu_reset_hold(Object *obj, ResetType type) -- 2.34.1
Update GraniteRapids CPU model to add AVX10 and the missing features(ss, tsc-adjust, cldemote, movdiri, movdir64b). Tested-by: Xuelian Guo <xuelian.guo@intel.com> Signed-off-by: Tao Su <tao1.su@linux.intel.com> Link: https://lore.kernel.org/r/20241028024512.156724-7-tao1.su@linux.intel.com Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/cpu.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index XXXXXXX..XXXXXXX 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -XXX,XX +XXX,XX @@ static const X86CPUDefinition builtin_x86_defs[] = { .model_id = "Intel Xeon Processor (GraniteRapids)", .versions = (X86CPUVersionDefinition[]) { { .version = 1 }, + { + .version = 2, + .props = (PropValue[]) { + { "ss", "on" }, + { "tsc-adjust", "on" }, + { "cldemote", "on" }, + { "movdiri", "on" }, + { "movdir64b", "on" }, + { "avx10", "on" }, + { "avx10-128", "on" }, + { "avx10-256", "on" }, + { "avx10-512", "on" }, + { "avx10-version", "1" }, + { "stepping", "1" }, + { /* end of list */ } + } + }, { /* end of list */ }, }, }, -- 2.34.1