[PULL 139/153] i386/cpu: Support AVX10.2 with AVX10 feature models

Paolo Bonzini posted 153 patches 1 week, 4 days ago
Maintainers: "Alex Bennée" <alex.bennee@linaro.org>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Thomas Huth <thuth@redhat.com>, Yonggang Luo <luoyonggang@gmail.com>, Richard Henderson <richard.henderson@linaro.org>, Paolo Bonzini <pbonzini@redhat.com>, Cameron Esfahani <dirty@apple.com>, Roman Bolshakov <rbolshakov@ddn.com>, Phil Dennis-Jordan <phil@philjordan.eu>, Mads Ynddal <mads@ynddal.dk>, Magnus Kulke <magnus.kulke@linux.microsoft.com>, Wei Liu <wei.liu@kernel.org>, Stefano Stabellini <sstabellini@kernel.org>, Anthony PERARD <anthony@xenproject.org>, Paul Durrant <paul@xen.org>, "Edgar E. Iglesias" <edgar.iglesias@gmail.com>, David Hildenbrand <david@kernel.org>, Igor Mammedov <imammedo@redhat.com>, Alistair Francis <alistair.francis@wdc.com>, Stefan Berger <stefanb@linux.vnet.ibm.com>, John Snow <jsnow@redhat.com>, Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>, Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>, Stefan Hajnoczi <stefanha@redhat.com>, Fam Zheng <fam@euphon.net>, Aarushi Mehta <mehta.aaru20@gmail.com>, Julia Suvorova <jusual@redhat.com>, Stefano Garzarella <sgarzare@redhat.com>, Stefan Weil <sw@weilnetz.de>, Samuel Thibault <samuel.thibault@ens-lyon.org>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, Zhao Liu <zhao1.liu@intel.com>, Peter Maydell <peter.maydell@linaro.org>, Mauro Carvalho Chehab <mchehab+huawei@kernel.org>, "Daniel P. Berrangé" <berrange@redhat.com>, Luc Michel <luc@lmichel.fr>, Damien Hedde <damien.hedde@dahe.fr>, Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>, Markus Armbruster <armbru@redhat.com>, Eduardo Habkost <eduardo@habkost.net>, Ani Sinha <anisinha@redhat.com>, Christian Schoenebeck <qemu_oss@crudebyte.com>, Greg Kurz <groug@kaod.org>, "Michael S. Tsirkin" <mst@redhat.com>, Dongjiu Geng <gengdongjiu1@gmail.com>, Aurelien Jarno <aurelien@aurel32.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, "Cédric Le Goater" <clg@kaod.org>, Steven Lee <steven_lee@aspeedtech.com>, Troy Lee <leetroy@gmail.com>, Jamin Lin <jamin_lin@aspeedtech.com>, Andrew Jeffery <andrew@codeconstruct.com.au>, Joel Stanley <joel@jms.id.au>, Tyrone Ting <kfting@nuvoton.com>, Hao Wu <wuhaotsh@google.com>, Beniamino Galvani <b.galvani@gmail.com>, Strahinja Jankovic <strahinja.p.jankovic@gmail.com>, Niek Linnenbank <nieklinnenbank@gmail.com>, Samuel Tardieu <sam@rfc1149.net>, Antony Pavlov <antonynpavlov@gmail.com>, Igor Mitsyanko <i.mitsyanko@gmail.com>, Jean-Christophe Dubois <jcd@tribudubois.net>, Andrey Smirnov <andrew.smirnov@gmail.com>, Bernhard Beschow <shentey@gmail.com>, Rob Herring <robh@kernel.org>, Subbaraya Sundeep <sundeep.lkml@gmail.com>, Jan Kiszka <jan.kiszka@web.de>, Felipe Balbi <balbi@kernel.org>, Radoslaw Biernacki <rad@semihalf.com>, Leif Lindholm <leif.lindholm@oss.qualcomm.com>, Eric Auger <eric.auger@redhat.com>, Alexandre Iooss <erdnaxe@crans.org>, Shannon Zhao <shannon.zhaosl@gmail.com>, Gerd Hoffmann <kraxel@redhat.com>, Laurent Vivier <laurent@vivier.eu>, Michael Rolnik <mrolnik@gmail.com>, Raphael Norwitz <raphael@enfabrica.net>, Helge Deller <deller@gmx.de>, "Clément Chigot" <chigot@adacore.com>, Frederic Konrad <konrad.frederic@yahoo.fr>, Alberto Garcia <berto@igalia.com>, Yoshinori Sato <yoshinori.sato@nifty.com>, Magnus Damm <magnus.damm@gmail.com>, Palmer Dabbelt <palmer@dabbelt.com>, Weiwei Li <liwei1518@gmail.com>, Daniel Henrique Barboza <dbarboza@ventanamicro.com>, Liu Zhiwei <zhiwei_liu@linux.alibaba.com>, Halil Pasic <pasic@linux.ibm.com>, Christian Borntraeger <borntraeger@linux.ibm.com>, Jason Herne <jjherne@linux.ibm.com>, Vijai Kumar K <vijai@behindbytes.com>, Nicholas Piggin <npiggin@gmail.com>, Harsh Prateek Bora <harshpb@linux.ibm.com>, "Collin L. Walling" <walling@linux.ibm.com>, Amit Shah <amit@kernel.org>, Yanan Wang <wangyanan55@huawei.com>, Riku Voipio <riku.voipio@iki.fi>, Paul Burton <paulburton@kernel.org>, Aleksandar Rikalo <arikalo@gmail.com>, Jonathan Cameron <jonathan.cameron@huawei.com>, Fan Ni <fan.ni@samsung.com>, Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>, "Hervé Poussineau" <hpoussin@reactos.org>, BALATON Zoltan <balaton@eik.bme.hu>, Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>, Dmitry Osipenko <dmitry.osipenko@collabora.com>, Dmitry Fleytman <dmitry.fleytman@gmail.com>, Ninad Palsule <ninad@linux.ibm.com>, Glenn Miles <milesg@linux.ibm.com>, "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>, Corey Minyard <cminyard@mvista.com>, Patrick Leis <venture@google.com>, Alejandro Jimenez <alejandro.j.jimenez@oracle.com>, Sairaj Kodilkar <sarunkod@amd.com>, Jason Wang <jasowang@redhat.com>, Yi Liu <yi.l.liu@intel.com>, "Clément Mathieu--Drif" <clement.mathieu--drif@eviden.com>, David Woodhouse <dwmw2@infradead.org>, Sergio Lopez <slp@redhat.com>, Alexander Graf <graf@amazon.com>, Dorjoy Chowdhury <dorjoychy111@gmail.com>, Song Gao <gaosong@loongson.cn>, Bibo Mao <maobibo@loongson.cn>, Jiaxun Yang <jiaxun.yang@flygoat.com>, Huacai Chen <chenhuacai@kernel.org>, Jia Liu <proljc@gmail.com>, Gautam Menghani <gautam@linux.ibm.com>, Aditya Gupta <adityag@linux.ibm.com>, Xiao Guangrong <xiaoguangrong.eric@gmail.com>, Alexander Bulekov <alxndr@bu.edu>, Bandan Das <bsd@redhat.com>, Darren Kenny <darren.kenny@oracle.com>, Qiuhao Li <Qiuhao.Li@outlook.com>, Artyom Tarasenko <atar4qemu@gmail.com>, Gustavo Romero <gustavo.romero@linaro.org>, Francisco Iglesias <francisco.iglesias@amd.com>, Pavel Pisa <pisa@cmp.felk.cvut.cz>, Vikram Garhwal <vikram.garhwal@bytedance.com>, Sriram Yagnaraman <sriram.yagnaraman@ericsson.com>, Max Filippov <jcmvbkbc@gmail.com>, Jiri Pirko <jiri@resnulli.us>, Sven Schnelle <svens@stackframe.org>, Stafford Horne <shorne@gmail.com>, Elena Ufimtseva <elena.ufimtseva@oracle.com>, Jagannathan Raman <jag.raman@oracle.com>, Chinmay Rath <rathc@linux.ibm.com>, Sai Pavan Boddu <sai.pavan.boddu@amd.com>, Ran Wang <wangran@bosc.ac.cn>, Anup Patel <anup.patel@wdc.com>, Eric Farman <farman@linux.ibm.com>, Matthew Rosato <mjrosato@linux.ibm.com>, Ilya Leoshkevich <iii@linux.ibm.com>, Tony Krowiak <akrowiak@linux.ibm.com>, Farhan Ali <alifm@linux.ibm.com>, Nina Schoetterl-Glausch <nsg@linux.ibm.com>, Jared Rossi <jrossi@linux.ibm.com>, Zhuoying Cai <zycai@linux.ibm.com>, Cornelia Huck <cohuck@redhat.com>, Hannes Reinecke <hare@suse.com>, Bin Meng <bmeng.cn@gmail.com>, Titus Rwantare <titusr@google.com>, Bastian Koppelmann <kbastian@mail.uni-paderborn.de>, Jeuk Kim <jeuk20.kim@samsung.com>, "Hongren (Zenithal) Zheng" <i@zenithal.me>, "Canokeys.org" <contact@canokeys.org>, John Levon <john.levon@nutanix.com>, Thanos Makatos <thanos.makatos@nutanix.com>, Alex Williamson <alex@shazbot.org>, Tomita Moeko <tomitamoeko@gmail.com>, Viresh Kumar <viresh.kumar@linaro.org>, Mathieu Poirier <mathieu.poirier@linaro.org>, Manos Pitsidianakis <manos.pitsidianakis@linaro.org>, "Gonglei (Arei)" <arei.gonglei@huawei.com>, Eric Blake <eblake@redhat.com>, Keith Busch <kbusch@kernel.org>, Klaus Jensen <its@irrelevant.dk>, Jesper Devantier <foss@defmacro.it>, Huai-Cheng Kuo <hchkuo@avery-design.com.tw>, Chris Browy <cbrowy@avery-design.com>, Zhenzhong Duan <zhenzhong.duan@intel.com>, "Dr. David Alan Gilbert" <dave@treblig.org>, Zhang Chen <zhangckid@gmail.com>, Li Zhijian <lizhijian@fujitsu.com>, Mahmoud Mandour <ma.mandourr@gmail.com>, Pierrick Bouvier <pierrick.bouvier@linaro.org>, Cleber Rosa <crosa@redhat.com>, Michael Roth <michael.roth@amd.com>, Maksim Davydov <davydov-max@yandex-team.ru>, David Gibson <david@gibson.dropbear.id.au>, Hyman Huang <yong.huang@smartx.com>, Brian Cain <brian.cain@oss.qualcomm.com>, Marcelo Tosatti <mtosatti@redhat.com>, Pedro Barbuda <pbarbuda@microsoft.com>, Mohamed Mediouni <mohamed@unpredictable.fr>, Coiby Xu <Coiby.Xu@gmail.com>
[PULL 139/153] i386/cpu: Support AVX10.2 with AVX10 feature models
Posted by Paolo Bonzini 1 week, 3 days ago
From: Zhao Liu <zhao1.liu@intel.com>

Intel AVX10 Version 2 (Intel AVX10.2) includes a suite of new
instructions delivering new AI features and performance, accelerated
media processing, expanded Web Assembly, and Cryptography support, along
with enhancements to existing legacy instructions for completeness and
efficiency, and it is enumerated as version 2 in CPUID 0x24.0x0.EBX[bits
0-7] [*].

Considerring "Intel CPUs which support Intel AVX10.2 will include an
enumeration for AVX10_VNNI_INT (CPUID.24H.01H:ECX.AVX10_VNNI_INT[2])"
[*] and EVEX VPDP* instructions for INT8/INT16 (AVX10_VNNI_INT) are
detected by either AVX10.2 OR AVX10_VNNI_INT, AVX10_VNNI_INT is part of
AVX10.2, so any Intel AVX10.2 implementation lacking the AVX10_VNNI_INT
enumeration should be considered buggy hardware.

Therefore, it's necessary to set AVX10_VNNI_INT enumeration for Guest
when the user specifies AVX10 version 2. For this, introduce AVX10
models to explicitly define the feature bits included in different AVX10
versions.

[*]: Intel Advanced Vector Extensions 10.2 Architecture Specification
     (rev 5.0).

Tested-by: Xudong Hao <xudong.hao@intel.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20251215073743.4055227-5-zhao1.liu@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/cpu.h |   2 +
 target/i386/cpu.c | 120 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 115 insertions(+), 7 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 653f14e516c..a4a8bc61a99 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2211,6 +2211,8 @@ typedef struct CPUArchState {
     FeatureWordArray features;
     /* AVX10 version */
     uint8_t avx10_version;
+    /* AVX10 (CPUID 0x24) maximum supported sub-leaf. */
+    uint8_t avx10_max_subleaf;
     /* Features that were explicitly enabled/disabled */
     FeatureWordArray user_features;
     uint32_t cpuid_model[12];
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index e0d75d170fa..9c81e4e1f5a 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -2382,6 +2382,40 @@ x86_cpu_def_get_versions(const X86CPUDefinition *def)
     return def->versions ?: default_version_list;
 }
 
+/* CPUID 0x24.0x0 (EAX, EBX, ECX, EDX) and 0x24.0x1 (EAX, EBX, ECX, EDX) */
+#define AVX10_FEATURE_WORDS 8
+
+typedef struct AVX10VersionDefinition {
+    const char *name;
+    /* AVX10 version */
+    uint8_t version;
+    /* AVX10 (CPUID 0x24) maximum supported sub-leaf. */
+    uint8_t max_subleaf;
+    FeatureMask *features;
+} AVX10VersionDefinition;
+
+static const AVX10VersionDefinition builtin_avx10_defs[] = {
+    {
+        .name = "avx10.1",
+        .version = 1,
+        .max_subleaf = 0,
+        .features = (FeatureMask[]) {
+            { FEAT_7_1_EDX,         CPUID_7_1_EDX_AVX10 },
+            { FEAT_24_0_EBX,        CPUID_24_0_EBX_AVX10_VL_MASK },
+            { /* end of list */ }
+        }
+    },
+    {
+        .name = "avx10.2",
+        .version = 2,
+        .max_subleaf = 1,
+        .features = (FeatureMask[]) {
+            { FEAT_24_1_ECX,         CPUID_24_1_ECX_AVX10_VNNI_INT },
+            { /* end of list */ }
+        }
+    },
+};
+
 static const CPUCaches epyc_cache_info = {
     .l1d_cache = &(CPUCacheInfo) {
         .type = DATA_CACHE,
@@ -7242,6 +7276,65 @@ static void x86_cpuid_set_tsc_freq(Object *obj, Visitor *v, const char *name,
     cpu->env.tsc_khz = cpu->env.user_tsc_khz = value / 1000;
 }
 
+static void x86_cpuid_get_avx10_version(Object *obj, Visitor *v,
+                                        const char *name, void *opaque,
+                                        Error **errp)
+{
+    X86CPU *cpu = X86_CPU(obj);
+    uint8_t value;
+
+    value = cpu->env.avx10_version;
+    visit_type_uint8(v, name, &value, errp);
+}
+
+static bool x86_cpu_apply_avx10_features(X86CPU *cpu, uint8_t version,
+                                         Error **errp)
+{
+    const AVX10VersionDefinition *def;
+    CPUX86State *env = &cpu->env;
+
+    if (!version) {
+        env->avx10_version = 0;
+        env->avx10_max_subleaf = 0;
+        return true;
+    }
+
+    for (int i = 0; i < ARRAY_SIZE(builtin_avx10_defs); i++) {
+        FeatureMask *f;
+
+        def = &builtin_avx10_defs[i];
+        for (f = def->features; f && f->mask; f++) {
+            env->features[f->index] |= f->mask;
+        }
+
+        if (def->version == version) {
+            env->avx10_version = version;
+            env->avx10_max_subleaf = def->max_subleaf;
+            break;
+        }
+    }
+
+    if (def->version < version) {
+        error_setg(errp, "avx10-version can be at most %d", def->version);
+        return false;
+    }
+    return true;
+}
+
+static void x86_cpuid_set_avx10_version(Object *obj, Visitor *v,
+                                        const char *name, void *opaque,
+                                        Error **errp)
+{
+    X86CPU *cpu = X86_CPU(obj);
+    uint8_t value;
+
+    if (!visit_type_uint8(v, name, &value, errp)) {
+        return;
+    }
+
+    x86_cpu_apply_avx10_features(cpu, value, errp);
+}
+
 /* Generic getter for "feature-words" and "filtered-features" properties */
 static void x86_cpu_get_feature_words(Object *obj, Visitor *v,
                                       const char *name, void *opaque,
@@ -7932,8 +8025,10 @@ static void x86_cpu_load_model(X86CPU *cpu, const X86CPUModel *model)
      */
     object_property_set_str(OBJECT(cpu), "vendor", def->vendor, &error_abort);
 
-    object_property_set_uint(OBJECT(cpu), "avx10-version", def->avx10_version,
-                             &error_abort);
+    if (def->avx10_version) {
+        object_property_set_uint(OBJECT(cpu), "avx10-version",
+                                 def->avx10_version, &error_abort);
+    }
 
     x86_cpu_apply_version_props(cpu, model);
 
@@ -8480,9 +8575,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
             break;
         }
         if (count == 0) {
-            uint32_t unused;
-            x86_cpu_get_supported_cpuid(0x1E, 0, eax, &unused,
-                                        &unused, &unused);
+            *eax = env->avx10_max_subleaf;
             *ebx = env->features[FEAT_24_0_EBX] | env->avx10_version;
         } else if (count == 1) {
             *ecx = env->features[FEAT_24_1_ECX];
@@ -9164,7 +9257,11 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
         if ((env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) && !env->avx10_version) {
             uint32_t eax, ebx, ecx, edx;
             x86_cpu_get_supported_cpuid(0x24, 0, &eax, &ebx, &ecx, &edx);
-            env->avx10_version = ebx & 0xff;
+
+            if (!object_property_set_uint(OBJECT(cpu), "avx10-version",
+                                          ebx & 0xff, errp)) {
+                return;
+            }
         }
     }
 
@@ -9393,6 +9490,11 @@ static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose)
                 warn_report("%s: avx10.%d. Adjust to avx10.%d",
                             prefix, env->avx10_version, version);
             }
+            /*
+             * Discrete feature bits have been checked and filtered based on
+             * host support. So it's safe to change version without reverting
+             * other feature bits.
+             */
             env->avx10_version = version;
             have_filtered_features = true;
         }
@@ -10229,7 +10331,6 @@ static const Property x86_cpu_properties[] = {
     DEFINE_PROP_UINT32("min-level", X86CPU, env.cpuid_min_level, 0),
     DEFINE_PROP_UINT32("min-xlevel", X86CPU, env.cpuid_min_xlevel, 0),
     DEFINE_PROP_UINT32("min-xlevel2", X86CPU, env.cpuid_min_xlevel2, 0),
-    DEFINE_PROP_UINT8("avx10-version", X86CPU, env.avx10_version, 0),
     DEFINE_PROP_UINT64("ucode-rev", X86CPU, ucode_rev, 0),
     DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true),
     DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor),
@@ -10371,6 +10472,11 @@ static void x86_cpu_common_class_init(ObjectClass *oc, const void *data)
                               x86_cpu_get_unavailable_features,
                               NULL, NULL, NULL);
 
+    object_class_property_add(oc, "avx10-version",  "uint8",
+                              x86_cpuid_get_avx10_version,
+                              x86_cpuid_set_avx10_version,
+                              NULL, NULL);
+
 #if !defined(CONFIG_USER_ONLY)
     object_class_property_add(oc, "crash-information", "GuestPanicInformation",
                               x86_cpu_get_crash_info_qom, NULL, NULL, NULL);
-- 
2.52.0