While I've also already coded up the patch to actually support the new BFloat16 insns, there's little point in submitting this without having tested it. But the two preparatory patches may turn out useful earlier on. They're based on the full AVX512 emulator series, but shouldn't be overly difficult to re-base ahead of it. 1: x86/CPUID: support leaf 7 subleaf 1 / AVX512_BF16 2: x86emul: support CPUID subleaves for vcpu_has_*() Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 17/05/2019 13:21, Jan Beulich wrote: > While I've also already coded up the patch to actually support > the new BFloat16 insns, there's little point in submitting this > without having tested it. But the two preparatory patches may > turn out useful earlier on. They're based on the full AVX512 > emulator series, but shouldn't be overly difficult to re-base > ahead of it. To follow up on the IRC conversations which occurred because of my email problems... > 1: x86/CPUID: support leaf 7 subleaf 1 / AVX512_BF16 Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> > 2: x86emul: support CPUID subleaves for vcpu_has_*() This patch has been obsoleted following the completion of series of mine from before I got bogged down in XSA-297, which has just been committed. https://xenbits.xen.org/gitweb/?p=xen.git;a=commitdiff;h=346666c4bdf72ca1d908bbcdb9185981aac7e749 ~Andrew _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
The AVX512_BF16 feature flag resides in this so far blank sub-leaf. Expand infrastructure accordingly. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/tools/libxl/libxl_cpuid.c +++ b/tools/libxl/libxl_cpuid.c @@ -218,6 +218,8 @@ int libxl_cpuid_parse_config(libxl_cpuid {"arch-caps", 0x00000007, 0, CPUID_REG_EDX, 29, 1}, {"ssbd", 0x00000007, 0, CPUID_REG_EDX, 31, 1}, + {"avx512-bf16", 0x00000007, 1, CPUID_REG_EAX, 5, 1}, + {"lahfsahf", 0x80000001, NA, CPUID_REG_ECX, 0, 1}, {"cmplegacy", 0x80000001, NA, CPUID_REG_ECX, 1, 1}, {"svm", 0x80000001, NA, CPUID_REG_ECX, 2, 1}, --- a/tools/misc/xen-cpuid.c +++ b/tools/misc/xen-cpuid.c @@ -170,6 +170,11 @@ static const char *const str_7d0[32] = /* 30 */ [31] = "ssbd", }; +static const char *const str_7a1[32] = +{ + /* 4 */ [ 5] = "avx512_bf16", +}; + static const struct { const char *name; const char *abbr; @@ -186,6 +191,7 @@ static const struct { { "0x80000007.edx", "e7d", str_e7d }, { "0x80000008.ebx", "e8b", str_e8b }, { "0x00000007:0.edx", "7d0", str_7d0 }, + { "0x00000007:1.eax", "7a1", str_7a1 }, }; #define COL_ALIGN "18" --- a/xen/arch/x86/cpu/common.c +++ b/xen/arch/x86/cpu/common.c @@ -391,11 +391,17 @@ static void generic_identify(struct cpui = cpuid_ebx(0x80000008); /* Intel-defined flags: level 0x00000007 */ - if ( c->cpuid_level >= 0x00000007 ) - cpuid_count(0x00000007, 0, &tmp, + if ( c->cpuid_level >= 0x00000007 ) { + cpuid_count(0x00000007, 0, &eax, &c->x86_capability[cpufeat_word(X86_FEATURE_FSGSBASE)], &c->x86_capability[cpufeat_word(X86_FEATURE_PKU)], &c->x86_capability[cpufeat_word(X86_FEATURE_AVX512_4VNNIW)]); + if (eax > 0) + cpuid_count(0x00000007, 1, + &c->x86_capability[cpufeat_word(X86_FEATURE_AVX512_BF16)], + &tmp, &tmp, &tmp); + } + if (c->cpuid_level >= 0xd) cpuid_count(0xd, 1, &c->x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)], --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -267,6 +267,9 @@ XEN_CPUFEATURE(L1D_FLUSH, 9*32+28) / XEN_CPUFEATURE(ARCH_CAPS, 9*32+29) /* IA32_ARCH_CAPABILITIES MSR */ XEN_CPUFEATURE(SSBD, 9*32+31) /*A MSR_SPEC_CTRL.SSBD available */ +/* Intel-defined CPU features, CPUID level 0x00000007:1.eax, word 10 */ +XEN_CPUFEATURE(AVX512_BF16, 10*32+ 5) /* AVX512 BFloat16 Instructions */ + #endif /* XEN_CPUFEATURE */ /* Clean up from a default include. Close the enum (for C). */ --- a/xen/include/xen/lib/x86/cpuid.h +++ b/xen/include/xen/lib/x86/cpuid.h @@ -14,6 +14,7 @@ #define FEATURESET_e7d 7 /* 0x80000007.edx */ #define FEATURESET_e8b 8 /* 0x80000008.ebx */ #define FEATURESET_7d0 9 /* 0x00000007:0.edx */ +#define FEATURESET_7a1 10 /* 0x00000007:1.eax */ struct cpuid_leaf { @@ -79,7 +80,7 @@ const char *x86_cpuid_vendor_to_str(unsi #define CPUID_GUEST_NR_BASIC (0xdu + 1) #define CPUID_GUEST_NR_CACHE (5u + 1) -#define CPUID_GUEST_NR_FEAT (0u + 1) +#define CPUID_GUEST_NR_FEAT (1u + 1) #define CPUID_GUEST_NR_TOPO (1u + 1) #define CPUID_GUEST_NR_XSTATE (62u + 1) #define CPUID_GUEST_NR_EXTD_INTEL (0x8u + 1) @@ -177,6 +178,13 @@ struct cpuid_policy struct { DECL_BITFIELD(7d0); }; }; }; + struct { + /* Subleaf 1. */ + union { + uint32_t _7a1; + struct { DECL_BITFIELD(7a1); }; + }; + }; } feat; /* Extended topology enumeration: 0x0000000B[xx] */ @@ -280,6 +288,7 @@ static inline void cpuid_policy_to_featu fs[FEATURESET_e7d] = p->extd.e7d; fs[FEATURESET_e8b] = p->extd.e8b; fs[FEATURESET_7d0] = p->feat._7d0; + fs[FEATURESET_7a1] = p->feat._7a1; } /* Fill in a CPUID policy from a featureset bitmap. */ @@ -296,6 +305,7 @@ static inline void cpuid_featureset_to_p p->extd.e7d = fs[FEATURESET_e7d]; p->extd.e8b = fs[FEATURESET_e8b]; p->feat._7d0 = fs[FEATURESET_7d0]; + p->feat._7a1 = fs[FEATURESET_7a1]; } const uint32_t *x86_cpuid_lookup_deep_deps(uint32_t feature); --- a/xen/tools/gen-cpuid.py +++ b/xen/tools/gen-cpuid.py @@ -267,7 +267,7 @@ def crunch_numbers(state): # AVX512 extensions acting (solely) on vectors of bytes/words are made # dependents of AVX512BW (as to requiring wider than 16-bit mask # registers), despite the SDM not formally making this connection. - AVX512BW: [AVX512_VBMI, AVX512_BITALG, AVX512_VBMI2], + AVX512BW: [AVX512_VBMI, AVX512_BF16, AVX512_BITALG, AVX512_VBMI2], # The features: # * Single Thread Indirect Branch Predictors _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
The AVX512_BF16 feature flag resides in leaf 7 sub-leaf 1. Expand infrastructure accordingly before enabling support for those insns. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -1845,6 +1845,7 @@ in_protmode( static bool vcpu_has( unsigned int eax, + unsigned int ecx, unsigned int reg, unsigned int bit, struct x86_emulate_ctxt *ctxt, @@ -1854,7 +1855,7 @@ static bool vcpu_has( int rc = X86EMUL_OKAY; fail_if(!ops->cpuid); - rc = ops->cpuid(eax, 0, &res, ctxt); + rc = ops->cpuid(eax, ecx, &res, ctxt); if ( rc == X86EMUL_OKAY ) { switch ( reg ) @@ -1873,76 +1874,78 @@ static bool vcpu_has( return rc == X86EMUL_OKAY; } -#define vcpu_has_fpu() vcpu_has( 1, EDX, 0, ctxt, ops) -#define vcpu_has_sep() vcpu_has( 1, EDX, 11, ctxt, ops) -#define vcpu_has_cx8() vcpu_has( 1, EDX, 8, ctxt, ops) -#define vcpu_has_cmov() vcpu_has( 1, EDX, 15, ctxt, ops) -#define vcpu_has_clflush() vcpu_has( 1, EDX, 19, ctxt, ops) -#define vcpu_has_mmx() vcpu_has( 1, EDX, 23, ctxt, ops) -#define vcpu_has_sse() vcpu_has( 1, EDX, 25, ctxt, ops) -#define vcpu_has_sse2() vcpu_has( 1, EDX, 26, ctxt, ops) -#define vcpu_has_sse3() vcpu_has( 1, ECX, 0, ctxt, ops) -#define vcpu_has_pclmulqdq() vcpu_has( 1, ECX, 1, ctxt, ops) -#define vcpu_has_ssse3() vcpu_has( 1, ECX, 9, ctxt, ops) -#define vcpu_has_fma() vcpu_has( 1, ECX, 12, ctxt, ops) -#define vcpu_has_cx16() vcpu_has( 1, ECX, 13, ctxt, ops) -#define vcpu_has_sse4_1() vcpu_has( 1, ECX, 19, ctxt, ops) -#define vcpu_has_sse4_2() vcpu_has( 1, ECX, 20, ctxt, ops) -#define vcpu_has_movbe() vcpu_has( 1, ECX, 22, ctxt, ops) -#define vcpu_has_popcnt() vcpu_has( 1, ECX, 23, ctxt, ops) -#define vcpu_has_aesni() vcpu_has( 1, ECX, 25, ctxt, ops) -#define vcpu_has_avx() vcpu_has( 1, ECX, 28, ctxt, ops) -#define vcpu_has_f16c() vcpu_has( 1, ECX, 29, ctxt, ops) -#define vcpu_has_rdrand() vcpu_has( 1, ECX, 30, ctxt, ops) -#define vcpu_has_mmxext() (vcpu_has(0x80000001, EDX, 22, ctxt, ops) || \ +#define NA 0 + +#define vcpu_has_fpu() vcpu_has( 1, NA, EDX, 0, ctxt, ops) +#define vcpu_has_sep() vcpu_has( 1, NA, EDX, 11, ctxt, ops) +#define vcpu_has_cx8() vcpu_has( 1, NA, EDX, 8, ctxt, ops) +#define vcpu_has_cmov() vcpu_has( 1, NA, EDX, 15, ctxt, ops) +#define vcpu_has_clflush() vcpu_has( 1, NA, EDX, 19, ctxt, ops) +#define vcpu_has_mmx() vcpu_has( 1, NA, EDX, 23, ctxt, ops) +#define vcpu_has_sse() vcpu_has( 1, NA, EDX, 25, ctxt, ops) +#define vcpu_has_sse2() vcpu_has( 1, NA, EDX, 26, ctxt, ops) +#define vcpu_has_sse3() vcpu_has( 1, NA, ECX, 0, ctxt, ops) +#define vcpu_has_pclmulqdq() vcpu_has( 1, NA, ECX, 1, ctxt, ops) +#define vcpu_has_ssse3() vcpu_has( 1, NA, ECX, 9, ctxt, ops) +#define vcpu_has_fma() vcpu_has( 1, NA, ECX, 12, ctxt, ops) +#define vcpu_has_cx16() vcpu_has( 1, NA, ECX, 13, ctxt, ops) +#define vcpu_has_sse4_1() vcpu_has( 1, NA, ECX, 19, ctxt, ops) +#define vcpu_has_sse4_2() vcpu_has( 1, NA, ECX, 20, ctxt, ops) +#define vcpu_has_movbe() vcpu_has( 1, NA, ECX, 22, ctxt, ops) +#define vcpu_has_popcnt() vcpu_has( 1, NA, ECX, 23, ctxt, ops) +#define vcpu_has_aesni() vcpu_has( 1, NA, ECX, 25, ctxt, ops) +#define vcpu_has_avx() vcpu_has( 1, NA, ECX, 28, ctxt, ops) +#define vcpu_has_f16c() vcpu_has( 1, NA, ECX, 29, ctxt, ops) +#define vcpu_has_rdrand() vcpu_has( 1, NA, ECX, 30, ctxt, ops) +#define vcpu_has_mmxext() (vcpu_has(0x80000001, NA, EDX, 22, ctxt, ops) || \ vcpu_has_sse()) -#define vcpu_has_3dnow_ext() vcpu_has(0x80000001, EDX, 30, ctxt, ops) -#define vcpu_has_3dnow() vcpu_has(0x80000001, EDX, 31, ctxt, ops) -#define vcpu_has_lahf_lm() vcpu_has(0x80000001, ECX, 0, ctxt, ops) -#define vcpu_has_cr8_legacy() vcpu_has(0x80000001, ECX, 4, ctxt, ops) -#define vcpu_has_lzcnt() vcpu_has(0x80000001, ECX, 5, ctxt, ops) -#define vcpu_has_sse4a() vcpu_has(0x80000001, ECX, 6, ctxt, ops) -#define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX, 7, ctxt, ops) -#define vcpu_has_xop() vcpu_has(0x80000001, ECX, 12, ctxt, ops) -#define vcpu_has_fma4() vcpu_has(0x80000001, ECX, 16, ctxt, ops) -#define vcpu_has_tbm() vcpu_has(0x80000001, ECX, 21, ctxt, ops) -#define vcpu_has_monitorx() vcpu_has(0x80000001, ECX, 29, ctxt, ops) -#define vcpu_has_bmi1() vcpu_has( 7, EBX, 3, ctxt, ops) -#define vcpu_has_hle() vcpu_has( 7, EBX, 4, ctxt, ops) -#define vcpu_has_avx2() vcpu_has( 7, EBX, 5, ctxt, ops) -#define vcpu_has_bmi2() vcpu_has( 7, EBX, 8, ctxt, ops) -#define vcpu_has_invpcid() vcpu_has( 7, EBX, 10, ctxt, ops) -#define vcpu_has_rtm() vcpu_has( 7, EBX, 11, ctxt, ops) -#define vcpu_has_mpx() vcpu_has( 7, EBX, 14, ctxt, ops) -#define vcpu_has_avx512f() vcpu_has( 7, EBX, 16, ctxt, ops) -#define vcpu_has_avx512dq() vcpu_has( 7, EBX, 17, ctxt, ops) -#define vcpu_has_rdseed() vcpu_has( 7, EBX, 18, ctxt, ops) -#define vcpu_has_adx() vcpu_has( 7, EBX, 19, ctxt, ops) -#define vcpu_has_smap() vcpu_has( 7, EBX, 20, ctxt, ops) -#define vcpu_has_avx512_ifma() vcpu_has( 7, EBX, 21, ctxt, ops) -#define vcpu_has_clflushopt() vcpu_has( 7, EBX, 23, ctxt, ops) -#define vcpu_has_clwb() vcpu_has( 7, EBX, 24, ctxt, ops) -#define vcpu_has_avx512pf() vcpu_has( 7, EBX, 26, ctxt, ops) -#define vcpu_has_avx512er() vcpu_has( 7, EBX, 27, ctxt, ops) -#define vcpu_has_avx512cd() vcpu_has( 7, EBX, 28, ctxt, ops) -#define vcpu_has_sha() vcpu_has( 7, EBX, 29, ctxt, ops) -#define vcpu_has_avx512bw() vcpu_has( 7, EBX, 30, ctxt, ops) -#define vcpu_has_avx512vl() vcpu_has( 7, EBX, 31, ctxt, ops) -#define vcpu_has_avx512_vbmi() vcpu_has( 7, ECX, 1, ctxt, ops) -#define vcpu_has_avx512_vbmi2() vcpu_has( 7, ECX, 6, ctxt, ops) -#define vcpu_has_gfni() vcpu_has( 7, ECX, 8, ctxt, ops) -#define vcpu_has_vaes() vcpu_has( 7, ECX, 9, ctxt, ops) -#define vcpu_has_vpclmulqdq() vcpu_has( 7, ECX, 10, ctxt, ops) -#define vcpu_has_avx512_vnni() vcpu_has( 7, ECX, 11, ctxt, ops) -#define vcpu_has_avx512_bitalg() vcpu_has( 7, ECX, 12, ctxt, ops) -#define vcpu_has_avx512_vpopcntdq() vcpu_has( 7, ECX, 14, ctxt, ops) -#define vcpu_has_rdpid() vcpu_has( 7, ECX, 22, ctxt, ops) -#define vcpu_has_movdiri() vcpu_has( 7, ECX, 27, ctxt, ops) -#define vcpu_has_movdir64b() vcpu_has( 7, ECX, 28, ctxt, ops) -#define vcpu_has_avx512_4vnniw() vcpu_has( 7, EDX, 2, ctxt, ops) -#define vcpu_has_avx512_4fmaps() vcpu_has( 7, EDX, 3, ctxt, ops) -#define vcpu_has_clzero() vcpu_has(0x80000008, EBX, 0, ctxt, ops) -#define vcpu_has_wbnoinvd() vcpu_has(0x80000008, EBX, 9, ctxt, ops) +#define vcpu_has_3dnow_ext() vcpu_has(0x80000001, NA, EDX, 30, ctxt, ops) +#define vcpu_has_3dnow() vcpu_has(0x80000001, NA, EDX, 31, ctxt, ops) +#define vcpu_has_lahf_lm() vcpu_has(0x80000001, NA, ECX, 0, ctxt, ops) +#define vcpu_has_cr8_legacy() vcpu_has(0x80000001, NA, ECX, 4, ctxt, ops) +#define vcpu_has_lzcnt() vcpu_has(0x80000001, NA, ECX, 5, ctxt, ops) +#define vcpu_has_sse4a() vcpu_has(0x80000001, NA, ECX, 6, ctxt, ops) +#define vcpu_has_misalignsse() vcpu_has(0x80000001, NA, ECX, 7, ctxt, ops) +#define vcpu_has_xop() vcpu_has(0x80000001, NA, ECX, 12, ctxt, ops) +#define vcpu_has_fma4() vcpu_has(0x80000001, NA, ECX, 16, ctxt, ops) +#define vcpu_has_tbm() vcpu_has(0x80000001, NA, ECX, 21, ctxt, ops) +#define vcpu_has_monitorx() vcpu_has(0x80000001, NA, ECX, 29, ctxt, ops) +#define vcpu_has_bmi1() vcpu_has( 7, 0, EBX, 3, ctxt, ops) +#define vcpu_has_hle() vcpu_has( 7, 0, EBX, 4, ctxt, ops) +#define vcpu_has_avx2() vcpu_has( 7, 0, EBX, 5, ctxt, ops) +#define vcpu_has_bmi2() vcpu_has( 7, 0, EBX, 8, ctxt, ops) +#define vcpu_has_invpcid() vcpu_has( 7, 0, EBX, 10, ctxt, ops) +#define vcpu_has_rtm() vcpu_has( 7, 0, EBX, 11, ctxt, ops) +#define vcpu_has_mpx() vcpu_has( 7, 0, EBX, 14, ctxt, ops) +#define vcpu_has_avx512f() vcpu_has( 7, 0, EBX, 16, ctxt, ops) +#define vcpu_has_avx512dq() vcpu_has( 7, 0, EBX, 17, ctxt, ops) +#define vcpu_has_rdseed() vcpu_has( 7, 0, EBX, 18, ctxt, ops) +#define vcpu_has_adx() vcpu_has( 7, 0, EBX, 19, ctxt, ops) +#define vcpu_has_smap() vcpu_has( 7, 0, EBX, 20, ctxt, ops) +#define vcpu_has_avx512_ifma() vcpu_has( 7, 0, EBX, 21, ctxt, ops) +#define vcpu_has_clflushopt() vcpu_has( 7, 0, EBX, 23, ctxt, ops) +#define vcpu_has_clwb() vcpu_has( 7, 0, EBX, 24, ctxt, ops) +#define vcpu_has_avx512pf() vcpu_has( 7, 0, EBX, 26, ctxt, ops) +#define vcpu_has_avx512er() vcpu_has( 7, 0, EBX, 27, ctxt, ops) +#define vcpu_has_avx512cd() vcpu_has( 7, 0, EBX, 28, ctxt, ops) +#define vcpu_has_sha() vcpu_has( 7, 0, EBX, 29, ctxt, ops) +#define vcpu_has_avx512bw() vcpu_has( 7, 0, EBX, 30, ctxt, ops) +#define vcpu_has_avx512vl() vcpu_has( 7, 0, EBX, 31, ctxt, ops) +#define vcpu_has_avx512_vbmi() vcpu_has( 7, 0, ECX, 1, ctxt, ops) +#define vcpu_has_avx512_vbmi2() vcpu_has( 7, 0, ECX, 6, ctxt, ops) +#define vcpu_has_gfni() vcpu_has( 7, 0, ECX, 8, ctxt, ops) +#define vcpu_has_vaes() vcpu_has( 7, 0, ECX, 9, ctxt, ops) +#define vcpu_has_vpclmulqdq() vcpu_has( 7, 0, ECX, 10, ctxt, ops) +#define vcpu_has_avx512_vnni() vcpu_has( 7, 0, ECX, 11, ctxt, ops) +#define vcpu_has_avx512_bitalg() vcpu_has( 7, 0, ECX, 12, ctxt, ops) +#define vcpu_has_avx512_vpopcntdq() vcpu_has( 7, 0, ECX, 14, ctxt, ops) +#define vcpu_has_rdpid() vcpu_has( 7, 0, ECX, 22, ctxt, ops) +#define vcpu_has_movdiri() vcpu_has( 7, 0, ECX, 27, ctxt, ops) +#define vcpu_has_movdir64b() vcpu_has( 7, 0, ECX, 28, ctxt, ops) +#define vcpu_has_avx512_4vnniw() vcpu_has( 7, 0, EDX, 2, ctxt, ops) +#define vcpu_has_avx512_4fmaps() vcpu_has( 7, 0, EDX, 3, ctxt, ops) +#define vcpu_has_clzero() vcpu_has(0x80000008, NA, EBX, 0, ctxt, ops) +#define vcpu_has_wbnoinvd() vcpu_has(0x80000008, NA, EBX, 9, ctxt, ops) #define vcpu_must_have(feat) \ generate_exception_if(!vcpu_has_##feat(), EXC_UD) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
© 2016 - 2024 Red Hat, Inc.