From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663713531; cv=none; d=zohomail.com; s=zohoarc; b=SWBaLhNadsC5li+grKCQNjv/I2h1iNc6sYBCyzjZ7H3pml9qdYO5B0r6hFdeewEkWeasxnwi2Pcn9Mb7LVTAuD3XDoRGUWfARghr2ir/ErnLVAupYx/nCDTEIRRJompBoJgwKVGO/ZrfKFzVDWnFrQqJf04qMRNB1K/0nLWIzlk= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663713531; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=8qNlguD+njmm2618TYLP1T4Fj6fykhZdMOk4HXO6te4=; b=knWDf0At7V+2upeOsTfZ4EMo20Fe1V+x0wDyRdJKhL+OF1vgtx52ReRnHYh1S8wjc0FsRMRIVYSULBg8DAd5XyKf7ZDSpF2xnzfmDYd6VsQpo+OJ0hJPIqXBUN0zlfIBaBrcWjXvt2asm187HkiIEFii+mfGNB6LXFxAhJgl7yo= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663713531551839.3206349671508; Tue, 20 Sep 2022 15:38:51 -0700 (PDT) Received: from localhost ([::1]:34122 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oaltC-0007Fu-Hl for importer@patchew.org; Tue, 20 Sep 2022 18:38:50 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:39842) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzn-0006H2-9K for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:19 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:40295) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzk-00022Q-4a for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:17 -0400 Received: from mail-ed1-f71.google.com (mail-ed1-f71.google.com [209.85.208.71]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-551-opfu15tQM9SSS8t3dlpUAQ-1; Tue, 20 Sep 2022 13:25:13 -0400 Received: by mail-ed1-f71.google.com with SMTP id h13-20020a056402280d00b004528c8400afso2343728ede.6 for ; Tue, 20 Sep 2022 10:25:13 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id n16-20020a1709062bd000b00730bfe6adc4sm200151ejg.37.2022.09.20.10.25.10 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:10 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694715; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=8qNlguD+njmm2618TYLP1T4Fj6fykhZdMOk4HXO6te4=; b=VpY7Y91iRsdH3rda3J2/MXga088W/lpD2zRYtcsDgu13R18dvPXGynhvsA8T3phgmGIRR+ x+2vv8mKNIai7gvgRfzTseKcOOoaq62UaOK07RIErvbPlsQLiA3pn9/IQ2VKyjYSTFWeYE KB3T8/pFijtgA5hpfJgj0y3kaYhe1yI= X-MC-Unique: opfu15tQM9SSS8t3dlpUAQ-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=8qNlguD+njmm2618TYLP1T4Fj6fykhZdMOk4HXO6te4=; b=zAlqA7+tvm7GhC+x+Y8i9iEEhhLE9ZwMFHXurKn2XSs1y+mOZ8eh7+WDawLxKLwm/V KjsiupHGBmf5XTlYJUv0fXP5k/PyPZ1QTBSqL72yaJ3un+f/zhPuj8fGGunLuJ1eLWDO PvRuo9hACD0MNAmgARxkkLeywoZUbh98fNbdbTur0Nk+s1Cp9Tjl2ubqCpNyrm7piGAg O3CFqy3sLgZq6ukj49a3+KLx1VKLTvvwBObVoLdIX2Ezkxtmo+uB/G3zsD1EqBTVhkES X7hvcrKivM4iZ/qWm1jR5h8sWTSjUGMXFRBD5cTlgVngiugzIeo8kZX341MJXIFN2xqJ j76Q== X-Gm-Message-State: ACrzQf0ER8Sl7vmn4nXG8sj6S/gamMySYaOkBlF1j8kO1b3nQ6TO9uj0 iC6AN/ARUQtVjlYEiffUFtfZf+/v0z5x9aLQMUv/YX0DXK3eP9hwYQhUT145w1TahZOJuxi3s+W G+UCNviRnA8C6+ud6wIzyzmk8vCuSoWJ5b+MHtqfwFOIRmjAtEg1YIiLAyBq7Qnle6sg= X-Received: by 2002:a17:907:724b:b0:780:49ab:4b66 with SMTP id ds11-20020a170907724b00b0078049ab4b66mr18029476ejc.67.1663694711772; Tue, 20 Sep 2022 10:25:11 -0700 (PDT) X-Google-Smtp-Source: AMsMyM5zyfXra/VcooDJi91fwqUYX7OjlSobMq3RdXDN1J6geGN2NKFLzZ9SSQPWMWJ8cWTEqa3VKg== X-Received: by 2002:a17:907:724b:b0:780:49ab:4b66 with SMTP id ds11-20020a170907724b00b0078049ab4b66mr18029454ejc.67.1663694711444; Tue, 20 Sep 2022 10:25:11 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 01/37] target/i386: Define XMMReg and access macros, align ZMM registers Date: Tue, 20 Sep 2022 19:24:31 +0200 Message-Id: <20220920172507.95568-2-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663713532517100001 Content-Type: text/plain; charset="utf-8" From: Richard Henderson This will be used for emission and endian adjustments of gvec operations. Signed-off-by: Richard Henderson Message-Id: <20220822223722.1697758-2-richard.henderson@linaro.org> Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 56 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 82004b65b9..8311b69c88 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1233,18 +1233,34 @@ typedef struct SegmentCache { uint32_t flags; } SegmentCache; =20 -#define MMREG_UNION(n, bits) \ - union n { \ - uint8_t _b_##n[(bits)/8]; \ - uint16_t _w_##n[(bits)/16]; \ - uint32_t _l_##n[(bits)/32]; \ - uint64_t _q_##n[(bits)/64]; \ - float32 _s_##n[(bits)/32]; \ - float64 _d_##n[(bits)/64]; \ - } +typedef union MMXReg { + uint8_t _b_MMXReg[64 / 8]; + uint16_t _w_MMXReg[64 / 16]; + uint32_t _l_MMXReg[64 / 32]; + uint64_t _q_MMXReg[64 / 64]; + float32 _s_MMXReg[64 / 32]; + float64 _d_MMXReg[64 / 64]; +} MMXReg; =20 -typedef MMREG_UNION(ZMMReg, 512) ZMMReg; -typedef MMREG_UNION(MMXReg, 64) MMXReg; +typedef union XMMReg { + uint64_t _q_XMMReg[128 / 64]; +} XMMReg; + +typedef union YMMReg { + uint64_t _q_YMMReg[256 / 64]; + XMMReg _x_YMMReg[256 / 128]; +} YMMReg; + +typedef union ZMMReg { + uint8_t _b_ZMMReg[512 / 8]; + uint16_t _w_ZMMReg[512 / 16]; + uint32_t _l_ZMMReg[512 / 32]; + uint64_t _q_ZMMReg[512 / 64]; + float32 _s_ZMMReg[512 / 32]; + float64 _d_ZMMReg[512 / 64]; + XMMReg _x_ZMMReg[512 / 128]; + YMMReg _y_ZMMReg[512 / 256]; +} ZMMReg; =20 typedef struct BNDReg { uint64_t lb; @@ -1267,6 +1283,13 @@ typedef struct BNDCSReg { #define ZMM_S(n) _s_ZMMReg[15 - (n)] #define ZMM_Q(n) _q_ZMMReg[7 - (n)] #define ZMM_D(n) _d_ZMMReg[7 - (n)] +#define ZMM_X(n) _x_ZMMReg[3 - (n)] +#define ZMM_Y(n) _y_ZMMReg[1 - (n)] + +#define XMM_Q(n) _q_XMMReg[1 - (n)] + +#define YMM_Q(n) _q_YMMReg[3 - (n)] +#define YMM_X(n) _x_YMMReg[1 - (n)] =20 #define MMX_B(n) _b_MMXReg[7 - (n)] #define MMX_W(n) _w_MMXReg[3 - (n)] @@ -1279,6 +1302,13 @@ typedef struct BNDCSReg { #define ZMM_S(n) _s_ZMMReg[n] #define ZMM_Q(n) _q_ZMMReg[n] #define ZMM_D(n) _d_ZMMReg[n] +#define ZMM_X(n) _x_ZMMReg[n] +#define ZMM_Y(n) _y_ZMMReg[n] + +#define XMM_Q(n) _q_XMMReg[n] + +#define YMM_Q(n) _q_YMMReg[n] +#define YMM_X(n) _x_YMMReg[n] =20 #define MMX_B(n) _b_MMXReg[n] #define MMX_W(n) _w_MMXReg[n] @@ -1556,8 +1586,8 @@ typedef struct CPUArchState { float_status mmx_status; /* for 3DNow! float ops */ float_status sse_status; uint32_t mxcsr; - ZMMReg xmm_regs[CPU_NB_REGS =3D=3D 8 ? 8 : 32]; - ZMMReg xmm_t0; + ZMMReg xmm_regs[CPU_NB_REGS =3D=3D 8 ? 8 : 32] QEMU_ALIGNED(16); + ZMMReg xmm_t0 QEMU_ALIGNED(16); MMXReg mmx_t0; =20 uint64_t opmask_regs[NB_OPMASK_REGS]; --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663709771; cv=none; d=zohomail.com; s=zohoarc; b=Mfi6rnfvUTX7tmtD6v05lLRjhLO4NMqxrbJZnLBgoLDNBBOj+OnSpPZvZdettuOut1Hs6LfPpTibxZ9sP4nn+FhmeRbQ17oPKlgvWfSN7EhAFsESRO68qk0BpHZmii3w8V5hsbTsMWuU1OLqror6EKaDsNYgXd0sOcVbrfJpzKk= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663709771; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=18z5vF7zZEAhYiakuZ5NSGXaQJ0lYrU3mqUdyfIIYa4=; b=KAR6fN8iP0aB/Db+gmLDpVYvU9hnq8lXXeNkaBFO83A+cAAjUlb+hqBM0X5UefgsEP5fb9d12XzIqmShhddS9/a2BeiCdErnzRNLRK1fDa8hLukqEu0I9ZhA1mSNNRYfSpS+8NsGqwARCKGvDQs8DegAAKHXnbSr4QRPS6WIekM= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663709771277442.8835853041418; Tue, 20 Sep 2022 14:36:11 -0700 (PDT) Received: from localhost ([::1]:54192 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oakuX-0003Gy-3i for importer@patchew.org; Tue, 20 Sep 2022 17:36:09 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:39846) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzp-0006Ja-EK for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:31 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:35769) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzm-00022T-Gz for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:20 -0400 Received: from mail-ej1-f70.google.com (mail-ej1-f70.google.com [209.85.218.70]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-280-43j1AU_rNe2iPtyLfT9q9w-1; Tue, 20 Sep 2022 13:25:14 -0400 Received: by mail-ej1-f70.google.com with SMTP id sb32-20020a1709076da000b0077faea20701so1777090ejc.10 for ; Tue, 20 Sep 2022 10:25:14 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id t17-20020aa7db11000000b00453a49ea65bsm154699eds.86.2022.09.20.10.25.12 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:12 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694715; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=18z5vF7zZEAhYiakuZ5NSGXaQJ0lYrU3mqUdyfIIYa4=; b=F87vfb4UPiurq6AgoTroURfloV0vBE47TIX8MbM2Sxu0NbU7j62y8dFaN5YBYYk63Aefhx y5/Ct+Yy4aEeGZE5r/BFWejAtjiCKzDG6dKrnB85yKzo4o7B8e6pPDV31/KVkfEDpzq6Av WRPhLMVSmeb6itv/WNJKPxz+HG+FJQI= X-MC-Unique: 43j1AU_rNe2iPtyLfT9q9w-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=18z5vF7zZEAhYiakuZ5NSGXaQJ0lYrU3mqUdyfIIYa4=; b=tZpy1AVSivV9fQEti/bISrczzS8e9p8f1lgdHjtpCMVoGWWNFGguPcrU4bXlHFEuIm /G7D9qO4syfmzPSRESUGX25PYMbY0FAqSd8oIriWkfjyT6RPrwq3qdUNYiLx1a2FP64T FoLf8sQgkna4g5tnMC7TzKZHOyROwE9kYD916g5DxIfxoCcs8fA/xUZkZ0WBkvcn9t5W SllUy/rwW+9bqWm0iEnCNjDRwaV0sA0redSb6GOtDbWnXZMbXaCPXy+ALH+ecSUcl965 WPfyqsCBxn6IwezZKNM54W4lfOhw9NpIr9roTj3RtJBU2k+Yzl8DcVHhMjKiBPoS3goi I5qQ== X-Gm-Message-State: ACrzQf1aWUzBJJT+/+FgCOMKmK2NDPzZbosYU8/vs+AGaIfa3RBk2BvW t4j4Hyvs+izCaBauyGVZeJzkL9qSc7MHACLlar3HTBDHD7dGmoGmOR02hxSzvlfA9kgj0HYO5Gp xEwnzBnOcPjm6X1bmrC0RUJ43eGsUfoC9dF/Y6yP67+i2tLjtq1eoAunsfAxwdz95HRA= X-Received: by 2002:a05:6402:428c:b0:440:8259:7a2b with SMTP id g12-20020a056402428c00b0044082597a2bmr20578276edc.329.1663694713204; Tue, 20 Sep 2022 10:25:13 -0700 (PDT) X-Google-Smtp-Source: AMsMyM5F8p9se+aE17/voopZugN2PLS2HevN4jQLpd9okvj82bBh26EvNRCf6mpQdzRrLSJCsdMPzg== X-Received: by 2002:a05:6402:428c:b0:440:8259:7a2b with SMTP id g12-20020a056402428c00b0044082597a2bmr20578242edc.329.1663694712790; Tue, 20 Sep 2022 10:25:12 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 02/37] target/i386: make ldo/sto operations consistent with ldq Date: Tue, 20 Sep 2022 19:24:32 +0200 Message-Id: <20220920172507.95568-3-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663709772247100001 Content-Type: text/plain; charset="utf-8" ldq takes a pointer to the first byte to load the 64-bit word in; ldo takes a pointer to the first byte of the ZMMReg. Make them consistent, which will be useful in the new SSE decoder's load/writeback routines. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/tcg/translate.c | 43 +++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index b25109ea24..6802c5a96d 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2762,29 +2762,29 @@ static inline void gen_ldo_env_A0(DisasContext *s, = int offset, bool align) int mem_index =3D s->mem_index; tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEUQ | (align ? MO_ALIGN_16 : 0)); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0= ))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0= ))); tcg_gen_addi_tl(s->tmp0, s->A0, 8); tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1= ))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1= ))); } =20 static inline void gen_sto_env_A0(DisasContext *s, int offset, bool align) { int mem_index =3D s->mem_index; - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0= ))); + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0= ))); tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEUQ | (align ? MO_ALIGN_16 : 0)); tcg_gen_addi_tl(s->tmp0, s->A0, 8); - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1= ))); + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1= ))); tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ); } =20 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset) { - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q= (0))); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q= (0))); - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q= (1))); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q= (1))); + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q= (0))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q= (0))); + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q= (1))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q= (1))); } =20 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset) @@ -2806,6 +2806,7 @@ static inline void gen_op_movq_env_0(DisasContext *s,= int d_offset) } =20 #define ZMM_OFFSET(reg) offsetof(CPUX86State, xmm_regs[reg]) +#define XMM_OFFSET(reg) offsetof(CPUX86State, xmm_regs[reg].ZMM_X(0)) =20 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg); typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg); @@ -3319,13 +3320,13 @@ static void gen_sse(CPUX86State *env, DisasContext = *s, int b, if (mod =3D=3D 3) goto illegal_op; gen_lea_modrm(env, s, modrm); - gen_sto_env_A0(s, ZMM_OFFSET(reg), true); + gen_sto_env_A0(s, XMM_OFFSET(reg), true); break; case 0x3f0: /* lddqu */ if (mod =3D=3D 3) goto illegal_op; gen_lea_modrm(env, s, modrm); - gen_ldo_env_A0(s, ZMM_OFFSET(reg), false); + gen_ldo_env_A0(s, XMM_OFFSET(reg), true); break; case 0x22b: /* movntss */ case 0x32b: /* movntsd */ @@ -3394,12 +3395,12 @@ static void gen_sse(CPUX86State *env, DisasContext = *s, int b, case 0x26f: /* movdqu xmm, ea */ if (mod !=3D 3) { gen_lea_modrm(env, s, modrm); - gen_ldo_env_A0(s, ZMM_OFFSET(reg), + gen_ldo_env_A0(s, XMM_OFFSET(reg), /* movaps, movapd, movdqa */ b =3D=3D 0x028 || b =3D=3D 0x128 || b =3D= =3D 0x16f); } else { rm =3D (modrm & 7) | REX_B(s); - gen_op_movo(s, ZMM_OFFSET(reg), ZMM_OFFSET(rm)); + gen_op_movo(s, XMM_OFFSET(reg), XMM_OFFSET(rm)); } break; case 0x210: /* movss xmm, ea */ @@ -3455,7 +3456,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s= , int b, case 0x212: /* movsldup */ if (mod !=3D 3) { gen_lea_modrm(env, s, modrm); - gen_ldo_env_A0(s, ZMM_OFFSET(reg), true); + gen_ldo_env_A0(s, XMM_OFFSET(reg), true); } else { rm =3D (modrm & 7) | REX_B(s); gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0= )), @@ -3497,7 +3498,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s= , int b, case 0x216: /* movshdup */ if (mod !=3D 3) { gen_lea_modrm(env, s, modrm); - gen_ldo_env_A0(s, ZMM_OFFSET(reg), true); + gen_ldo_env_A0(s, XMM_OFFSET(reg), true); } else { rm =3D (modrm & 7) | REX_B(s); gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1= )), @@ -3601,12 +3602,12 @@ static void gen_sse(CPUX86State *env, DisasContext = *s, int b, case 0x27f: /* movdqu ea, xmm */ if (mod !=3D 3) { gen_lea_modrm(env, s, modrm); - gen_sto_env_A0(s, ZMM_OFFSET(reg), + gen_sto_env_A0(s, XMM_OFFSET(reg), /* movaps, movapd, movdqa */ b =3D=3D 0x029 || b =3D=3D 0x129 || b =3D= =3D 0x17f); } else { rm =3D (modrm & 7) | REX_B(s); - gen_op_movo(s, ZMM_OFFSET(rm), ZMM_OFFSET(reg)); + gen_op_movo(s, XMM_OFFSET(rm), XMM_OFFSET(reg)); } break; case 0x211: /* movss ea, xmm */ @@ -3758,7 +3759,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s= , int b, gen_helper_enter_mmx(cpu_env); if (mod !=3D 3) { gen_lea_modrm(env, s, modrm); - op2_offset =3D offsetof(CPUX86State,xmm_t0); + op2_offset =3D offsetof(CPUX86State, xmm_t0.ZMM_X(0)); /* FIXME: should be 64-bit access if b1 =3D=3D 0. */ gen_ldo_env_A0(s, op2_offset, !!b1); } else { @@ -3950,10 +3951,10 @@ static void gen_sse(CPUX86State *env, DisasContext = *s, int b, offsetof(ZMMReg, ZMM_W(0))); break; case 0x2a: /* movntdqa */ - gen_ldo_env_A0(s, op1_offset, true); + gen_ldo_env_A0(s, op1_offset + offsetof(ZMMReg, ZM= M_X(0)), true); return; default: - gen_ldo_env_A0(s, op2_offset, true); + gen_ldo_env_A0(s, op2_offset + offsetof(ZMMReg, ZM= M_X(0)), true); } } if (!op6->fn[b1].op1) { @@ -4535,7 +4536,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s= , int b, } else { op2_offset =3D offsetof(CPUX86State, xmm_t0); gen_lea_modrm(env, s, modrm); - gen_ldo_env_A0(s, op2_offset, true); + gen_ldo_env_A0(s, op2_offset + offsetof(ZMMReg, ZMM_X(0)),= true); } =20 val =3D x86_ldub_code(env, s); @@ -4642,7 +4643,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s= , int b, break; default: /* 128 bit access */ - gen_ldo_env_A0(s, op2_offset, true); + gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_X(0= )), true); break; } } else { --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663709979; cv=none; d=zohomail.com; s=zohoarc; b=XMtyY/obk9Mt7htRGEF/3Wm489ldNEIZJoHufhKk393diLlQOkQH5Fw3gbuXbm53zc8mkyumj402AjOP+oQ6IH9YX54lRnIbg3UIiZKqMxS6B5loMCSUTXnkgFpJBftmj2AZ981qxMu9KIVVjy95OVWyUkoicTy8/y1Jatit9mw= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663709979; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=zmNqQLse27todSNojwG6cPQ7ioarN00Tl0IZY7IVvsY=; b=ab5bPsZDk4IuaGY6Xrt5YjMa4Zl+wg2PkMjHuNNZpl7P0uOvh5BmZnwkR7TBTL5XtZgqSq8aL12xx2gtxif8IuYyTmI1mehGVBMoxXcOIXs6rhv9bXzekVZoLq9AH0YfzE9xOMU2xhE1v0dHjbNAkq5/46JgCOZIMaL9a7KkOWE= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663709979232127.28893116117808; Tue, 20 Sep 2022 14:39:39 -0700 (PDT) Received: from localhost ([::1]:51482 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oakxt-00028x-W2 for importer@patchew.org; Tue, 20 Sep 2022 17:39:38 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:39844) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzp-0006HF-D8 for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:26 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:54691) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzl-00022d-TI for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:19 -0400 Received: from mail-ej1-f71.google.com (mail-ej1-f71.google.com [209.85.218.71]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-634-ts6OWSYPPBquvCS08LXckA-1; Tue, 20 Sep 2022 13:25:15 -0400 Received: by mail-ej1-f71.google.com with SMTP id gn33-20020a1709070d2100b007815bfd2c44so1782352ejc.16 for ; Tue, 20 Sep 2022 10:25:15 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id o3-20020a170906768300b0073ddff7e432sm210543ejm.14.2022.09.20.10.25.13 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:13 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694716; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=zmNqQLse27todSNojwG6cPQ7ioarN00Tl0IZY7IVvsY=; b=OB2sMWZ9kfEHrMcSO2mWQIWUe9pw4ZfDITrUgH3t7SCpZ4XHMvLbpUzyYYtCYgWEAAN49s ulZvAmQM+aXPyMLIb8BiK+sz8IGRD+UpQvENy1D7XQHEGXQADmlmj/cZissQeZDXTZo+Kr Ik2TsYYo9u/+/prt5wktDvVqfOFxkQY= X-MC-Unique: ts6OWSYPPBquvCS08LXckA-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=zmNqQLse27todSNojwG6cPQ7ioarN00Tl0IZY7IVvsY=; b=rlRE9LmsTY/oSfioDl+f13Xrm1UXyMchuFfphvXUR635lz0F3kK/IrDEAYW/V77FCi q4uAXeIUhWFB7X/MvBASH3o8zEnyAM12LjbjMev15sea0oZxEWbx0VVhhhAlap+UbE2N CSInQExfa4hJQDpHW7P9snl3zKUkXgwJJhYTeJNYWVBRUmGCLEXxhP5Hl50gN83t+loS j7NX4XPt+0x7y01il/MsWkeAkL3Yb6ry2TWDb0nx1HAlGTQsfWmQ/ixc3IcIHh0jTdCU CIwFuiSkVNUBUCUkzmNGjQAeJkYde97lK/aaUxBGFbyV7WG+eiROhVSnqGZObqrm/HgT 8kiQ== X-Gm-Message-State: ACrzQf21hxzwnxnpfnzJklaJjprV6aMO+6K3lkFYJvgBAVwuMXAaz53s F7RRFENrq5CqKHwn6cNooyA9WKZN707a7sFZzAJgrau7giynhujRL3pORMKguW1uQaMfFrC8/qj oMPZa5l4B7LAepuGrlbtMBuT7LH8cqCFvCDn+k8axq+ewVBTdpdBI0p+dNdCgd9SWUAk= X-Received: by 2002:a17:907:70c:b0:740:33f3:cbab with SMTP id xb12-20020a170907070c00b0074033f3cbabmr17669743ejb.600.1663694714379; Tue, 20 Sep 2022 10:25:14 -0700 (PDT) X-Google-Smtp-Source: AMsMyM65Py8nKyfgdAw0fcgsYuE6ahIQgbZp7x6ONOJfBJfYaEuAbNgBwAx+vxct6yOLse5b2PT9Kw== X-Received: by 2002:a17:907:70c:b0:740:33f3:cbab with SMTP id xb12-20020a170907070c00b0074033f3cbabmr17669716ejb.600.1663694714101; Tue, 20 Sep 2022 10:25:14 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 03/37] target/i386: REPZ and REPNZ are mutually exclusive Date: Tue, 20 Sep 2022 19:24:33 +0200 Message-Id: <20220920172507.95568-4-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663709980255100001 Content-Type: text/plain; charset="utf-8" The later prefix wins if both are present, make it show in s->prefix too. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 6802c5a96d..0f45a9f5ad 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4753,9 +4753,11 @@ static target_ulong disas_insn(DisasContext *s, CPUS= tate *cpu) switch (b) { case 0xf3: prefixes |=3D PREFIX_REPZ; + prefixes &=3D ~PREFIX_REPNZ; goto next_byte; case 0xf2: prefixes |=3D PREFIX_REPNZ; + prefixes &=3D ~PREFIX_REPZ; goto next_byte; case 0xf0: prefixes |=3D PREFIX_LOCK; --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663712530; cv=none; d=zohomail.com; s=zohoarc; b=f9IOsan+f8Ibuh8u/Zuj06BOeDEGpCozATIwqSo5CDxuUon6JLArUYlU8zRqpxJ/U5aEPQJUWtuK+b2uyxAxrojKeHgMbhkmuOUJWMlmYLrvSCkTtSC3DoZCvr8j/SMqYaCuMENPPRPSMLjE0A3U6CXLCndDg2iK6alPmzwNTmQ= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663712530; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=y75+M5IOAZvf3uuFV/0juokikMOyYtt8TC4O+3/T2Y4=; b=Eq67ZhaPjWSfkGJxcqPFqJLU93Wg7C2JAtwT0QXIfNpZZ53WPpcRoF8sqZ+abRMPfRrwcZMMojIzTqNbH4OAPSvyJaB81Sqj6kCXlELPAKYX3VnXHp7naBAWLDEp+HBzwtshBiLy7JGJsU/KuwkTGFxC94Stsu3AhL4LrnR10zk= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663712530982522.8577545359495; Tue, 20 Sep 2022 15:22:10 -0700 (PDT) Received: from localhost ([::1]:38082 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oald2-0000nH-D6 for importer@patchew.org; Tue, 20 Sep 2022 18:22:08 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:39848) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzp-0006Jc-51 for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:21 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:49692) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzn-000233-CO for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:20 -0400 Received: from mail-ej1-f69.google.com (mail-ej1-f69.google.com [209.85.218.69]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-504-A_K99hgMPjSIxEV-v1HKww-1; Tue, 20 Sep 2022 13:25:17 -0400 Received: by mail-ej1-f69.google.com with SMTP id ne4-20020a1709077b8400b0078114a1a6d8so1772043ejc.9 for ; Tue, 20 Sep 2022 10:25:17 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id ky10-20020a170907778a00b0077826b92d99sm216090ejc.12.2022.09.20.10.25.14 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:15 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694718; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=y75+M5IOAZvf3uuFV/0juokikMOyYtt8TC4O+3/T2Y4=; b=Y8HpQCi386hsC3waXqFhynQtrjI9nHZ+5PW1YeT9FNQrl3unMVQXnwQvGB866W4Kky8Ou5 poL09ZSzNvgCutGnZ34PtnF5P59xPuJPav8Crnuyf4FhfCnQQF0jnOCn90xCLTKTsyaVgz U5lAIbMD8MlOjQDawalXlVcQLSRApS4= X-MC-Unique: A_K99hgMPjSIxEV-v1HKww-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=y75+M5IOAZvf3uuFV/0juokikMOyYtt8TC4O+3/T2Y4=; b=FCMSXRAS6zGGYGX+7f9Pi5QZolpIY45KSVKYE8NN2RjHPHQHqucOSfMxKCadFKKjjd 2M7drRcqHNP3S/ZcDZcJ/25JQYTOlqmbdBc15d4UcCZt62Bh434R6cybr9XLjFSacPXj U08n6O44Y6YYTsDdEYTFsStrVGtKW+qZaS3LkkTRlTfxrXRi0zCiXozZ4cQ5JjUPcqdT iDM4V3ijArqcrbJ5z+Hlygvot0s7uIwN5bHJfYLXOnJnLCEdYCYB1fE8IGR569HAD3Mf ckC2FFmq8v5Q7jKiO/LBFn+sGtxI00USVICRqC8tlKvkpNouRHzOwd3Lhy7OvZ3bLEKD ZnGA== X-Gm-Message-State: ACrzQf0JkoZtFf0tv7y0fqCOAXLDARnw/OaalW3Nibe3mIUYxjZhH3Mk 6MV1ZbQI4b1cNGX2clFxL8Ascgw+L+v3qZLZPJzLdrD9kdrjZuDRhz9Rmjo6aS4ZKzc1sbHNmPq NaRWEoY8LNlHpzkR/d1Fg+3U5iQ7OvHW6vOCw9+cMVLBj8dXJfGkNXT41bZPqMdw1oVQ= X-Received: by 2002:a17:907:1c90:b0:77f:b1ae:9f44 with SMTP id nb16-20020a1709071c9000b0077fb1ae9f44mr17441179ejc.304.1663694716073; Tue, 20 Sep 2022 10:25:16 -0700 (PDT) X-Google-Smtp-Source: AMsMyM5noaO8xznaYkresX4qtUFY98Etw9auKCtvFLC1l+omJHBnYyLSGnV5j9HHWAXLxJrAj3ZfBw== X-Received: by 2002:a17:907:1c90:b0:77f:b1ae:9f44 with SMTP id nb16-20020a1709071c9000b0077fb1ae9f44mr17441160ejc.304.1663694715733; Tue, 20 Sep 2022 10:25:15 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 04/37] target/i386: introduce insn_get_addr Date: Tue, 20 Sep 2022 19:24:34 +0200 Message-Id: <20220920172507.95568-5-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663712531988100001 Content-Type: text/plain; charset="utf-8" The "O" operand type in the Intel SDM needs to load an 8- to 64-bit unsigned value, while insn_get is limited to 32 bits. Extract the code out of disas_insn and into a separate function. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 0f45a9f5ad..9d0e128a6a 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2308,6 +2308,31 @@ static void gen_ldst_modrm(CPUX86State *env, DisasCo= ntext *s, int modrm, } } =20 +static target_ulong insn_get_addr(CPUX86State *env, DisasContext *s, MemOp= ot) +{ + target_ulong ret; + + switch (ot) { + case MO_8: + ret =3D x86_ldub_code(env, s); + break; + case MO_16: + ret =3D x86_lduw_code(env, s); + break; + case MO_32: + ret =3D x86_ldl_code(env, s); + break; +#ifdef TARGET_X86_64 + case MO_64: + ret =3D x86_ldq_code(env, s); + break; +#endif + default: + g_assert_not_reached(); + } + return ret; +} + static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp o= t) { uint32_t ret; @@ -5882,16 +5907,7 @@ static target_ulong disas_insn(DisasContext *s, CPUS= tate *cpu) target_ulong offset_addr; =20 ot =3D mo_b_d(b, dflag); - switch (s->aflag) { -#ifdef TARGET_X86_64 - case MO_64: - offset_addr =3D x86_ldq_code(env, s); - break; -#endif - default: - offset_addr =3D insn_get(env, s, s->aflag); - break; - } + offset_addr =3D insn_get_addr(env, s, s->aflag); tcg_gen_movi_tl(s->A0, offset_addr); gen_add_A0_ds_seg(s); if ((b & 2) =3D=3D 0) { --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663710885; cv=none; d=zohomail.com; s=zohoarc; b=EaJ/SVOeGqTF8I2NtqiTAPRs/xzDnsDI2Cv7xWSCPhdOSmdags7ZiuDSgnb/Jnn1OPnGfBGw3Ase9xJn33qxAaXKG6UmST73qCII00qpRDNOq3YzI5ytGcaVOhjsYSnfxoVWG2D4BpO4WLUHOSXb2K+LaNzlybdA/bqEdcTMqhk= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663710885; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=p3LbZl9wQFR3dLNSxB0++lqg/soBADHPQV6A3GRvxxM=; b=j8uugD6nudYmBBX8eFw+i/g+4xYnzPe7umivnCh+q3ZJUOQjq5Tv+lYS6yNe9y8Vp9g7n+DAN8glgJ1o5GDppkZbaL7HerAnM0yh4Q+Du/tdxpae4WRlMqY5ETaPUTuzYKXsoIdQVn3bKOdrabKM2fjEB8OCe/l/G4XoAaHBOO4= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663710885464521.9384922099135; Tue, 20 Sep 2022 14:54:45 -0700 (PDT) Received: from localhost ([::1]:47210 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oalCV-0005IM-DW for importer@patchew.org; Tue, 20 Sep 2022 17:54:44 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:60404) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzu-0006KU-2T for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:36 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:24109) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzq-000250-Dp for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:25 -0400 Received: from mail-ej1-f70.google.com (mail-ej1-f70.google.com [209.85.218.70]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-136-5TNFgU7qOc2CHy_yViA5GQ-1; Tue, 20 Sep 2022 13:25:20 -0400 Received: by mail-ej1-f70.google.com with SMTP id hr12-20020a1709073f8c00b0077e8371f847so1805872ejc.20 for ; Tue, 20 Sep 2022 10:25:20 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id 18-20020a170906309200b0073dc5bb7c32sm185419ejv.64.2022.09.20.10.25.16 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:16 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694721; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=p3LbZl9wQFR3dLNSxB0++lqg/soBADHPQV6A3GRvxxM=; b=bjNTMD+Np2x4b3EgeU3EfAjOuNY2Q1HArvoTA4HnKSPcOQQQIa+LNc/3phHUX5Avs8O3aK Lsw5LwB3zRrFXnaPyGP0caYbHOXUiTCleVq+phckaQ4NGwQbyh2/yh4ac7uXGxoJhRIZXm lZJkjv1CYIhAW1NFr8pdKm/1pvqAE5E= X-MC-Unique: 5TNFgU7qOc2CHy_yViA5GQ-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=p3LbZl9wQFR3dLNSxB0++lqg/soBADHPQV6A3GRvxxM=; b=NXjucV6N6boUB5obMILP5NHBPlw+wmLN9ywWQz1jMGG+9JhmZ1jRZ8FVZvp7ofTgiY +FSJgbdHCPhEknmBoNs9J4YjMw0N3XJdNz9wb+PJy2n8VO1+5HHsFW0SaHNcZo8wuid9 ZyzaVqRZWzwzWT0ucIHvBT7c7iP+aoQ8LaW+YFuOJJVCtzsoiGMhsskWbOFWDd1slF34 cz/xruiOBagN7WXFO1NBMz3pOmCbZ2RaH1e81KWvIHBTDWhBvtx5MH1JihdkDqIhtfOZ BrBKFIDSM4W527MWO/+YqSyB1z7Y4BAVnEmRknf66M6NuHH5JBstjRjV/PGvDX1DS1wl ImUw== X-Gm-Message-State: ACrzQf3ceSWltelXfybI0S/jcS1gttLcYWRDMyanS5aKd9eHwRDSj1Z+ pKyyvFoz9nBAkOU0RpETeZ7ADrkOmNY5kDVJRZzNSNYcguSJTEDtqD6WBEhukryFcQA16qlUmjW PLBc8yIl3HZst6xMTFTfANIHILYJ4xY1XXIRln4QPSchnzBYtlEX+QKfSbR10ipV7WSc= X-Received: by 2002:a17:906:fd8b:b0:779:dcbe:3a9d with SMTP id xa11-20020a170906fd8b00b00779dcbe3a9dmr18077161ejb.235.1663694718223; Tue, 20 Sep 2022 10:25:18 -0700 (PDT) X-Google-Smtp-Source: AMsMyM4Pw+5QUWrYBcWrsQjC+18gdmd9q0ZzIWAvOCKPRhRsSoN8Kb2OPrb4no8//uXyVIDhS0UfXQ== X-Received: by 2002:a17:906:fd8b:b0:779:dcbe:3a9d with SMTP id xa11-20020a170906fd8b00b00779dcbe3a9dmr18077086ejb.235.1663694717066; Tue, 20 Sep 2022 10:25:17 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 05/37] target/i386: add core of new i386 decoder Date: Tue, 20 Sep 2022 19:24:35 +0200 Message-Id: <20220920172507.95568-6-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663710886788100001 Content-Type: text/plain; charset="utf-8" The new decoder is based on three principles: - use mostly table-driven decoding, using tables derived as much as possible from the Intel manual. Centralizing the decode the operands makes it more homogeneous, for example all immediates are signed. All modrm handling is in one function, and can be shared between SSE and ALU instructions (including XMM<->GPR instructions). The SSE/AVX decoder will also not have duplicated code between the 0F, 0F38 and 0F3A tables. - keep the code as "non-branchy" as possible. Generally, the code for the new decoder is more verbose, but the control flow is simpler. Conditionals are not nested and have small bodies. All instruction groups are resolved even before operands are decoded, and code generation is separated as much as possible within small functions that only handle one instruction each. - keep address generation and (for ALU operands) memory loads and writeback as much in common code as possible. All ALU operations for example are implemented as T0=3Df(T0,T1). For non-ALU instructions, read-modify-write memory operations are rare, but registers do not have TCGv equivalents: therefore, the common logic sets up pointer temporaries with the operands, while load and writeback are handled by gvec or by helpers. These principles make future code review and extensibility simpler, at the cost of having a relatively large amount of code in the form of this patch. Even EVEX should not be _too_ hard to implement (it's just a crazy large amount of possibilities). This patch introduces the main decoder flow, and integrates the old decoder with the new one. The old decoder takes care of parsing prefixes and then optionally drops to the new one. The changes to the old decoder are minimal and allow it to be replaced incrementally with the new one. There is a debugging mechanism through a "LIMIT" environment variable. In user-mode emulation, the variable is the number of instructions decoded by the new decoder before permanently switching to the old one. In system emulation, the variable is the highest opcode that is decoded by the new decoder (this is less friendly, but it's the best that can be done without requiring deterministic execution). Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/tcg/decode-new.c.inc | 748 +++++++++++++++++++++++++++++++ target/i386/tcg/decode-new.h | 181 ++++++++ target/i386/tcg/emit.c.inc | 31 ++ target/i386/tcg/translate.c | 68 ++- 4 files changed, 1020 insertions(+), 8 deletions(-) create mode 100644 target/i386/tcg/decode-new.c.inc create mode 100644 target/i386/tcg/decode-new.h create mode 100644 target/i386/tcg/emit.c.inc diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc new file mode 100644 index 0000000000..a908e8b086 --- /dev/null +++ b/target/i386/tcg/decode-new.c.inc @@ -0,0 +1,748 @@ +/* + * New-style decoder for i386 instructions + * + * Copyright (c) 2022 Red Hat, Inc. + * + * Author: Paolo Bonzini + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +/* + * The decoder is mostly based on tables copied from the Intel SDM. As + * a result, most operand load and writeback is done entirely in common + * table-driven code using the same operand type (X86_TYPE_*) and + * size (X86_SIZE_*) codes used in the manual. + * + * The main difference is that the V, U and W types are extended to + * cover MMX as well; if an instruction is like + * + * por Pq, Qq + * 66 por Vx, Hx, Wx + * + * only the second row is included and the instruction is marked as a + * valid MMX instruction. The MMX flag directs the decoder to rewrite + * the V/U/H/W types to P/N/P/Q if there is no prefix, as well as changing + * "x" to "q" if there is no prefix. + * + * In addition, the ss/ps/sd/pd types are sometimes mushed together as "x" + * if the difference is expressed via prefixes. Individual instructions + * are separated by prefix in the generator functions. + * + * There are a couple cases in which instructions (e.g. MOVD) write the + * whole XMM or MM register but are established incorrectly in the manual + * as "d" or "q". These have to be fixed for the decoder to work correctl= y. + */ + +#define X86_OP_NONE { 0 }, + +#define X86_OP_GROUP3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \ + .decode =3D glue(decode_, op), \ + .op0 =3D glue(X86_TYPE_, op0_), \ + .s0 =3D glue(X86_SIZE_, s0_), \ + .op1 =3D glue(X86_TYPE_, op1_), \ + .s1 =3D glue(X86_SIZE_, s1_), \ + .op2 =3D glue(X86_TYPE_, op2_), \ + .s2 =3D glue(X86_SIZE_, s2_), \ + .is_decode =3D true, \ + ## __VA_ARGS__ \ +} + +#define X86_OP_GROUP2(op, op0, s0, op1, s1, ...) \ + X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__) +#define X86_OP_GROUP0(op, ...) \ + X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__) + +#define X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \ + .gen =3D glue(gen_, op), \ + .op0 =3D glue(X86_TYPE_, op0_), \ + .s0 =3D glue(X86_SIZE_, s0_), \ + .op1 =3D glue(X86_TYPE_, op1_), \ + .s1 =3D glue(X86_SIZE_, s1_), \ + .op2 =3D glue(X86_TYPE_, op2_), \ + .s2 =3D glue(X86_SIZE_, s2_), \ + ## __VA_ARGS__ \ +} + +#define X86_OP_ENTRY4(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) \ + X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, \ + .op3 =3D X86_TYPE_I, .s3 =3D X86_SIZE_b, \ + ## __VA_ARGS__) + +#define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...) \ + X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__) +#define X86_OP_ENTRY0(op, ...) \ + X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__) + +#define i64 .special =3D X86_SPECIAL_i64, +#define o64 .special =3D X86_SPECIAL_o64, +#define xchg .special =3D X86_SPECIAL_Locked, +#define mmx .special =3D X86_SPECIAL_MMX, +#define zext0 .special =3D X86_SPECIAL_ZExtOp0, +#define zext2 .special =3D X86_SPECIAL_ZExtOp2, + +static uint8_t get_modrm(DisasContext *s, CPUX86State *env) +{ + if (!s->has_modrm) { + s->modrm =3D x86_ldub_code(env, s); + s->has_modrm =3D true; + } + return s->modrm; +} + +static const X86OpEntry opcodes_0F38_00toEF[240] =3D { +}; + +/* five rows for no prefix, 66, F3, F2, 66+F2 */ +static const X86OpEntry opcodes_0F38_F0toFF[16][5] =3D { +}; + +static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + *b =3D x86_ldub_code(env, s); + if (*b < 0xf0) { + *entry =3D opcodes_0F38_00toEF[*b]; + } else { + int row =3D 0; + if (s->prefix & PREFIX_REPZ) { + /* The REPZ (F3) prefix has priority over 66 */ + row =3D 2; + } else { + row +=3D s->prefix & PREFIX_REPNZ ? 3 : 0; + row +=3D s->prefix & PREFIX_DATA ? 1 : 0; + } + *entry =3D opcodes_0F38_F0toFF[*b & 15][row]; + } +} + +static const X86OpEntry opcodes_0F3A[256] =3D { +}; + +static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + *b =3D x86_ldub_code(env, s); + *entry =3D opcodes_0F3A[*b]; +} + +static const X86OpEntry opcodes_0F[256] =3D { + [0x38] =3D X86_OP_GROUP0(0F38), + [0x3a] =3D X86_OP_GROUP0(0F3A), +}; + +static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *en= try, uint8_t *b) +{ + *entry =3D opcodes_0F[*b]; +} + +static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry= , uint8_t *b) +{ + *b =3D x86_ldub_code(env, s); + do_decode_0F(s, env, entry, b); +} + +static const X86OpEntry opcodes_root[256] =3D { + [0x0F] =3D X86_OP_GROUP0(0F), +}; + +#undef mmx + +/* + * Decode the fixed part of the opcode and place the last + * in b. + */ +static void decode_root(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + *entry =3D opcodes_root[*b]; +} + + +static int decode_modrm(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode, + X86DecodedOp *op, X86OpType type) +{ + int modrm =3D get_modrm(s, env); + if ((modrm >> 6) =3D=3D 3) { + if (s->prefix & PREFIX_LOCK) { + decode->e.gen =3D gen_illegal; + return 0xff; + } + op->n =3D (modrm & 7); + if (type !=3D X86_TYPE_Q && type !=3D X86_TYPE_N) { + op->n |=3D REX_B(s); + } + } else { + op->has_ea =3D true; + op->n =3D -1; + decode->mem =3D gen_lea_modrm_0(env, s, get_modrm(s, env)); + } + return modrm; +} + +static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size,= MemOp *ot) +{ + switch (size) { + case X86_SIZE_b: /* byte */ + *ot =3D MO_8; + return true; + + case X86_SIZE_d: /* 32-bit */ + case X86_SIZE_ss: /* SSE/AVX scalar single precision */ + *ot =3D MO_32; + return true; + + case X86_SIZE_p: /* Far pointer, return offset size */ + case X86_SIZE_s: /* Descriptor, return offset size */ + case X86_SIZE_v: /* 16/32/64-bit, based on operand size */ + *ot =3D s->dflag; + return true; + + case X86_SIZE_pi: /* MMX */ + case X86_SIZE_q: /* 64-bit */ + case X86_SIZE_sd: /* SSE/AVX scalar double precision */ + *ot =3D MO_64; + return true; + + case X86_SIZE_w: /* 16-bit */ + *ot =3D MO_16; + return true; + + case X86_SIZE_y: /* 32/64-bit, based on operand size */ + *ot =3D s->dflag =3D=3D MO_16 ? MO_32 : s->dflag; + return true; + + case X86_SIZE_z: /* 16-bit for 16-bit operand size, else 32-bit */ + *ot =3D s->dflag =3D=3D MO_16 ? MO_16 : MO_32; + return true; + + case X86_SIZE_dq: /* SSE/AVX 128-bit */ + if (e->special =3D=3D X86_SPECIAL_MMX && + !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { + *ot =3D MO_64; + return true; + } + if (s->vex_l && e->s0 !=3D X86_SIZE_qq && e->s1 !=3D X86_SIZE_qq) { + return false; + } + *ot =3D MO_128; + return true; + + case X86_SIZE_qq: /* AVX 256-bit */ + if (!s->vex_l) { + return false; + } + *ot =3D MO_256; + return true; + + case X86_SIZE_x: /* 128/256-bit, based on operand size */ + if (e->special =3D=3D X86_SPECIAL_MMX && + !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { + *ot =3D MO_64; + return true; + } + /* fall through */ + case X86_SIZE_ps: /* SSE/AVX packed single precision */ + case X86_SIZE_pd: /* SSE/AVX packed double precision */ + *ot =3D s->vex_l ? MO_256 : MO_128; + return true; + + case X86_SIZE_d64: /* Default to 64-bit in 64-bit mode */ + *ot =3D CODE64(s) && s->dflag =3D=3D MO_32 ? MO_64 : s->dflag; + return true; + + case X86_SIZE_f64: /* Ignore size override prefix in 64-bit mode */ + *ot =3D CODE64(s) ? MO_64 : s->dflag; + return true; + + default: + *ot =3D -1; + return true; + } +} + +static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *d= ecode, + X86DecodedOp *op, X86OpType type, int b) +{ + int modrm; + + switch (type) { + case X86_TYPE_None: /* Implicit or absent */ + case X86_TYPE_A: /* Implicit */ + case X86_TYPE_F: /* EFLAGS/RFLAGS */ + break; + + case X86_TYPE_B: /* VEX.vvvv selects a GPR */ + op->unit =3D X86_OP_INT; + op->n =3D s->vex_v; + break; + + case X86_TYPE_C: /* REG in the modrm byte selects a control register = */ + op->unit =3D X86_OP_CR; + goto get_reg; + + case X86_TYPE_D: /* REG in the modrm byte selects a debug register */ + op->unit =3D X86_OP_DR; + goto get_reg; + + case X86_TYPE_G: /* REG in the modrm byte selects a GPR */ + op->unit =3D X86_OP_INT; + goto get_reg; + + case X86_TYPE_S: /* reg selects a segment register */ + op->unit =3D X86_OP_SEG; + goto get_reg; + + case X86_TYPE_P: + op->unit =3D X86_OP_MMX; + goto get_reg; + + case X86_TYPE_V: /* reg in the modrm byte selects an XMM/YMM register= */ + if (decode->e.special =3D=3D X86_SPECIAL_MMX && + !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { + op->unit =3D X86_OP_MMX; + } else { + op->unit =3D X86_OP_SSE; + } + get_reg: + op->n =3D ((get_modrm(s, env) >> 3) & 7) | REX_R(s); + break; + + case X86_TYPE_E: /* ALU modrm operand */ + op->unit =3D X86_OP_INT; + goto get_modrm; + + case X86_TYPE_Q: /* MMX modrm operand */ + op->unit =3D X86_OP_MMX; + goto get_modrm; + + case X86_TYPE_W: /* XMM/YMM modrm operand */ + if (decode->e.special =3D=3D X86_SPECIAL_MMX && + !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { + op->unit =3D X86_OP_MMX; + } else { + op->unit =3D X86_OP_SSE; + } + goto get_modrm; + + case X86_TYPE_N: /* R/M in the modrm byte selects an MMX register */ + op->unit =3D X86_OP_MMX; + goto get_modrm_reg; + + case X86_TYPE_U: /* R/M in the modrm byte selects an XMM/YMM register= */ + if (decode->e.special =3D=3D X86_SPECIAL_MMX && + !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { + op->unit =3D X86_OP_MMX; + } else { + op->unit =3D X86_OP_SSE; + } + goto get_modrm_reg; + + case X86_TYPE_R: /* R/M in the modrm byte selects a register */ + op->unit =3D X86_OP_INT; + get_modrm_reg: + modrm =3D get_modrm(s, env); + if ((modrm >> 6) !=3D 3) { + return false; + } + goto get_modrm; + + case X86_TYPE_M: /* modrm byte selects a memory operand */ + modrm =3D get_modrm(s, env); + if ((modrm >> 6) =3D=3D 3) { + return false; + } + get_modrm: + decode_modrm(s, env, decode, op, type); + break; + + case X86_TYPE_O: /* Absolute address encoded in the instruction */ + op->unit =3D X86_OP_INT; + op->has_ea =3D true; + op->n =3D -1; + decode->mem =3D (AddressParts) { + .def_seg =3D R_DS, + .base =3D -1, + .index =3D -1, + .disp =3D insn_get_addr(env, s, s->aflag) + }; + break; + + case X86_TYPE_H: /* For AVX, VEX.vvvv selects an XMM/YMM register */ + if ((s->prefix & PREFIX_VEX)) { + op->unit =3D X86_OP_SSE; + op->n =3D s->vex_v; + break; + } + if (op =3D=3D &decode->op[0]) { + /* shifts place the destination in VEX.vvvv, use modrm */ + return decode_op(s, env, decode, op, decode->e.op1, b); + } else { + return decode_op(s, env, decode, op, decode->e.op0, b); + } + + case X86_TYPE_I: /* Immediate */ + op->unit =3D X86_OP_IMM; + decode->immediate =3D insn_get_signed(env, s, op->ot); + break; + + case X86_TYPE_J: /* Relative offset for a jump */ + op->unit =3D X86_OP_IMM; + decode->immediate =3D insn_get_signed(env, s, op->ot); + decode->immediate +=3D s->pc - s->cs_base; + if (s->dflag =3D=3D MO_16) { + decode->immediate &=3D 0xffff; + } else if (!CODE64(s)) { + decode->immediate &=3D 0xffffffffu; + } + break; + + case X86_TYPE_L: /* The upper 4 bits of the immediate select a 128-bi= t register */ + op->n =3D insn_get(env, s, op->ot) >> 4; + break; + + case X86_TYPE_X: /* string source */ + op->n =3D -1; + decode->mem =3D (AddressParts) { + .def_seg =3D R_DS, + .base =3D R_ESI, + .index =3D -1, + }; + break; + + case X86_TYPE_Y: /* string destination */ + op->n =3D -1; + decode->mem =3D (AddressParts) { + .def_seg =3D R_ES, + .base =3D R_EDI, + .index =3D -1, + }; + break; + + case X86_TYPE_2op: + *op =3D decode->op[0]; + break; + + case X86_TYPE_LoBits: + op->n =3D (b & 7) | REX_B(s); + op->unit =3D X86_OP_INT; + break; + + case X86_TYPE_0 ... X86_TYPE_7: + op->n =3D type - X86_TYPE_0; + op->unit =3D X86_OP_INT; + break; + + case X86_TYPE_ES ... X86_TYPE_GS: + op->n =3D type - X86_TYPE_ES; + op->unit =3D X86_OP_SEG; + break; + } + + return true; +} + +static bool decode_insn(DisasContext *s, CPUX86State *env, X86DecodeFunc d= ecode_func, + X86DecodedInsn *decode) +{ + X86OpEntry *e =3D &decode->e; + + decode_func(s, env, e, &decode->b); + while (e->is_decode) { + e->is_decode =3D false; + e->decode(s, env, e, &decode->b); + } + + /* First compute size of operands in order to initialize s->rip_offset= . */ + if (e->op0 !=3D X86_TYPE_None) { + if (!decode_op_size(s, e, e->s0, &decode->op[0].ot)) { + return false; + } + if (e->op0 =3D=3D X86_TYPE_I) { + s->rip_offset +=3D 1 << decode->op[0].ot; + } + } + if (e->op1 !=3D X86_TYPE_None) { + if (!decode_op_size(s, e, e->s1, &decode->op[1].ot)) { + return false; + } + if (e->op1 =3D=3D X86_TYPE_I) { + s->rip_offset +=3D 1 << decode->op[1].ot; + } + } + if (e->op2 !=3D X86_TYPE_None) { + if (!decode_op_size(s, e, e->s2, &decode->op[2].ot)) { + return false; + } + if (e->op2 =3D=3D X86_TYPE_I) { + s->rip_offset +=3D 1 << decode->op[2].ot; + } + } + if (e->op3 !=3D X86_TYPE_None) { + assert(e->op3 =3D=3D X86_TYPE_I && e->s3 =3D=3D X86_SIZE_b); + s->rip_offset +=3D 1; + } + + if (e->op0 !=3D X86_TYPE_None && + !decode_op(s, env, decode, &decode->op[0], e->op0, decode->b)) { + return false; + } + + if (e->op1 !=3D X86_TYPE_None && + !decode_op(s, env, decode, &decode->op[1], e->op1, decode->b)) { + return false; + } + + if (e->op2 !=3D X86_TYPE_None && + !decode_op(s, env, decode, &decode->op[2], e->op2, decode->b)) { + return false; + } + + if (e->op3 !=3D X86_TYPE_None) { + decode->immediate =3D insn_get_signed(env, s, MO_8); + } + + return true; +} + +/* + * Convert one instruction. s->base.is_jmp is set if the translation must + * be stopped. + */ +static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) +{ + CPUX86State *env =3D cpu->env_ptr; + bool first =3D true; + X86DecodedInsn decode; + X86DecodeFunc decode_func =3D decode_root; + +#ifdef CONFIG_USER_ONLY + if (limit) { --limit; } +#endif + s->has_modrm =3D false; + + next_byte: + if (first) { + first =3D false; + } else { + b =3D x86_ldub_code(env, s); + } + /* Collect prefixes. */ + switch (b) { + case 0xf3: + s->prefix |=3D PREFIX_REPZ; + s->prefix &=3D ~PREFIX_REPNZ; + goto next_byte; + case 0xf2: + s->prefix |=3D PREFIX_REPNZ; + s->prefix &=3D ~PREFIX_REPZ; + goto next_byte; + case 0xf0: + s->prefix |=3D PREFIX_LOCK; + goto next_byte; + case 0x2e: + s->override =3D R_CS; + goto next_byte; + case 0x36: + s->override =3D R_SS; + goto next_byte; + case 0x3e: + s->override =3D R_DS; + goto next_byte; + case 0x26: + s->override =3D R_ES; + goto next_byte; + case 0x64: + s->override =3D R_FS; + goto next_byte; + case 0x65: + s->override =3D R_GS; + goto next_byte; + case 0x66: + s->prefix |=3D PREFIX_DATA; + goto next_byte; + case 0x67: + s->prefix |=3D PREFIX_ADR; + goto next_byte; +#ifdef TARGET_X86_64 + case 0x40 ... 0x4f: + if (CODE64(s)) { + /* REX prefix */ + s->prefix |=3D PREFIX_REX; + s->rex_w =3D (b >> 3) & 1; + s->rex_r =3D (b & 0x4) << 1; + s->rex_x =3D (b & 0x2) << 2; + s->rex_b =3D (b & 0x1) << 3; + goto next_byte; + } + break; +#endif + case 0xc5: /* 2-byte VEX */ + case 0xc4: /* 3-byte VEX */ + /* + * VEX prefixes cannot be used except in 32-bit mode. + * Otherwise the instruction is LES or LDS. + */ + if (CODE32(s) && !VM86(s)) { + static const int pp_prefix[4] =3D { + 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ + }; + int vex3, vex2 =3D x86_ldub_code(env, s); + + if (!CODE64(s) && (vex2 & 0xc0) !=3D 0xc0) { + /* + * 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b, + * otherwise the instruction is LES or LDS. + */ + s->pc--; /* rewind the advance_pc() x86_ldub_code() did */ + break; + } + + /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes= . */ + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ + | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) { + goto illegal_op; + } +#ifdef TARGET_X86_64 + s->rex_r =3D (~vex2 >> 4) & 8; +#endif + if (b =3D=3D 0xc5) { + /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode = byte */ + vex3 =3D vex2; + decode_func =3D decode_0F; + } else { + /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */ + vex3 =3D x86_ldub_code(env, s); +#ifdef TARGET_X86_64 + s->rex_x =3D (~vex2 >> 3) & 8; + s->rex_b =3D (~vex2 >> 2) & 8; + s->rex_w =3D (vex3 >> 7) & 1; +#endif + switch (vex2 & 0x1f) { + case 0x01: /* Implied 0f leading opcode bytes. */ + decode_func =3D decode_0F; + break; + case 0x02: /* Implied 0f 38 leading opcode bytes. */ + decode_func =3D decode_0F38; + break; + case 0x03: /* Implied 0f 3a leading opcode bytes. */ + decode_func =3D decode_0F3A; + break; + default: /* Reserved for future use. */ + goto unknown_op; + } + } + s->vex_v =3D (~vex3 >> 3) & 0xf; + s->vex_l =3D (vex3 >> 2) & 1; + s->prefix |=3D pp_prefix[vex3 & 3] | PREFIX_VEX; + } + break; + default: + if (b >=3D 0x100) { + b -=3D 0x100; + decode_func =3D do_decode_0F; + } + break; + } + + /* Post-process prefixes. */ + if (CODE64(s)) { + /* + * In 64-bit mode, the default data size is 32-bit. Select 64-bit + * data with rex_w, and 16-bit data with 0x66; rex_w takes precede= nce + * over 0x66 if both are present. + */ + s->dflag =3D (REX_W(s) ? MO_64 : s->prefix & PREFIX_DATA ? MO_16 := MO_32); + /* In 64-bit mode, 0x67 selects 32-bit addressing. */ + s->aflag =3D (s->prefix & PREFIX_ADR ? MO_32 : MO_64); + } else { + /* In 16/32-bit mode, 0x66 selects the opposite data size. */ + if (CODE32(s) ^ ((s->prefix & PREFIX_DATA) !=3D 0)) { + s->dflag =3D MO_32; + } else { + s->dflag =3D MO_16; + } + /* In 16/32-bit mode, 0x67 selects the opposite addressing. */ + if (CODE32(s) ^ ((s->prefix & PREFIX_ADR) !=3D 0)) { + s->aflag =3D MO_32; + } else { + s->aflag =3D MO_16; + } + } + + memset(&decode, 0, sizeof(decode)); + decode.b =3D b; + if (!decode_insn(s, env, decode_func, &decode)) { + goto illegal_op; + } + if (!decode.e.gen) { + goto unknown_op; + } + + switch (decode.e.special) { + case X86_SPECIAL_None: + break; + + case X86_SPECIAL_Locked: + if (decode.op[0].has_ea) { + s->prefix |=3D PREFIX_LOCK; + } + break; + + case X86_SPECIAL_ProtMode: + if (!PE(s) || VM86(s)) { + goto illegal_op; + } + break; + + case X86_SPECIAL_i64: + if (CODE64(s)) { + goto illegal_op; + } + break; + case X86_SPECIAL_o64: + if (!CODE64(s)) { + goto illegal_op; + } + break; + + case X86_SPECIAL_ZExtOp0: + assert(decode.op[0].unit =3D=3D X86_OP_INT); + if (!decode.op[0].has_ea) { + decode.op[0].ot =3D MO_32; + } + break; + + case X86_SPECIAL_ZExtOp2: + assert(decode.op[2].unit =3D=3D X86_OP_INT); + if (!decode.op[2].has_ea) { + decode.op[2].ot =3D MO_32; + } + break; + + case X86_SPECIAL_MMX: + if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) { + gen_helper_enter_mmx(cpu_env); + } + break; + } + + if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea)= { + gen_load_ea(s, &decode.mem); + } + decode.e.gen(s, env, &decode); + return; + illegal_op: + gen_illegal_opcode(s); + return; + unknown_op: + gen_unknown_opcode(env, s); +} diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h new file mode 100644 index 0000000000..2f22d4d22e --- /dev/null +++ b/target/i386/tcg/decode-new.h @@ -0,0 +1,181 @@ +/* + * Decode table flags, mostly based on Intel SDM. + * + * Copyright (c) 2022 Red Hat, Inc. + * + * Author: Paolo Bonzini + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +typedef enum X86OpType { + X86_TYPE_None, + + X86_TYPE_A, /* Implicit */ + X86_TYPE_B, /* VEX.vvvv selects a GPR */ + X86_TYPE_C, /* REG in the modrm byte selects a control register */ + X86_TYPE_D, /* REG in the modrm byte selects a debug register */ + X86_TYPE_E, /* ALU modrm operand */ + X86_TYPE_F, /* EFLAGS/RFLAGS */ + X86_TYPE_G, /* REG in the modrm byte selects a GPR */ + X86_TYPE_H, /* For AVX, VEX.vvvv selects an XMM/YMM register */ + X86_TYPE_I, /* Immediate */ + X86_TYPE_J, /* Relative offset for a jump */ + X86_TYPE_L, /* The upper 4 bits of the immediate select a 128-bit regi= ster */ + X86_TYPE_M, /* modrm byte selects a memory operand */ + X86_TYPE_N, /* R/M in the modrm byte selects an MMX register */ + X86_TYPE_O, /* Absolute address encoded in the instruction */ + X86_TYPE_P, /* reg in the modrm byte selects an MMX register */ + X86_TYPE_Q, /* MMX modrm operand */ + X86_TYPE_R, /* R/M in the modrm byte selects a register */ + X86_TYPE_S, /* reg selects a segment register */ + X86_TYPE_U, /* R/M in the modrm byte selects an XMM/YMM register */ + X86_TYPE_V, /* reg in the modrm byte selects an XMM/YMM register */ + X86_TYPE_W, /* XMM/YMM modrm operand */ + X86_TYPE_X, /* string source */ + X86_TYPE_Y, /* string destination */ + + /* Custom */ + X86_TYPE_2op, /* 2-operand RMW instruction */ + X86_TYPE_LoBits, /* encoded in bits 0-2 of the operand + REX.B */ + X86_TYPE_0, /* Hard-coded GPRs (RAX..RDI) */ + X86_TYPE_1, + X86_TYPE_2, + X86_TYPE_3, + X86_TYPE_4, + X86_TYPE_5, + X86_TYPE_6, + X86_TYPE_7, + X86_TYPE_ES, /* Hard-coded segment registers */ + X86_TYPE_CS, + X86_TYPE_SS, + X86_TYPE_DS, + X86_TYPE_FS, + X86_TYPE_GS, +} X86OpType; + +typedef enum X86OpSize { + X86_SIZE_None, + + X86_SIZE_a, /* BOUND operand */ + X86_SIZE_b, /* byte */ + X86_SIZE_d, /* 32-bit */ + X86_SIZE_dq, /* SSE/AVX 128-bit */ + X86_SIZE_p, /* Far pointer */ + X86_SIZE_pd, /* SSE/AVX packed double precision */ + X86_SIZE_pi, /* MMX */ + X86_SIZE_ps, /* SSE/AVX packed single precision */ + X86_SIZE_q, /* 64-bit */ + X86_SIZE_qq, /* AVX 256-bit */ + X86_SIZE_s, /* Descriptor */ + X86_SIZE_sd, /* SSE/AVX scalar double precision */ + X86_SIZE_ss, /* SSE/AVX scalar single precision */ + X86_SIZE_si, /* 32-bit GPR */ + X86_SIZE_v, /* 16/32/64-bit, based on operand size */ + X86_SIZE_w, /* 16-bit */ + X86_SIZE_x, /* 128/256-bit, based on operand size */ + X86_SIZE_y, /* 32/64-bit, based on operand size */ + X86_SIZE_z, /* 16-bit for 16-bit operand size, else 32-bit */ + + /* Custom */ + X86_SIZE_d64, + X86_SIZE_f64, +} X86OpSize; + +/* Execution flags */ + +typedef enum X86OpUnit { + X86_OP_SKIP, /* not valid or managed by emission function */ + X86_OP_SEG, /* segment selector */ + X86_OP_CR, /* control register */ + X86_OP_DR, /* debug register */ + X86_OP_INT, /* loaded into/stored from s->T0/T1 */ + X86_OP_IMM, /* immediate */ + X86_OP_SSE, /* address in either s->ptrX or s->A0 depending on has= _ea */ + X86_OP_MMX, /* address in either s->ptrX or s->A0 depending on has= _ea */ +} X86OpUnit; + +typedef enum X86InsnSpecial { + X86_SPECIAL_None, + + /* Always locked if it has a memory operand (XCHG) */ + X86_SPECIAL_Locked, + + /* Fault outside protected mode */ + X86_SPECIAL_ProtMode, + + /* + * Register operand 0/2 is zero extended to 32 bits. Rd/Mb or Rd/Mw + * in the manual. + */ + X86_SPECIAL_ZExtOp0, + X86_SPECIAL_ZExtOp2, + + /* + * MMX instruction exists with no prefix; if there is no prefix, V/H/W= /U operands + * become P/P/Q/N, and size "x" becomes "q". + */ + X86_SPECIAL_MMX, + + /* Illegal or exclusive to 64-bit mode */ + X86_SPECIAL_i64, + X86_SPECIAL_o64, +} X86InsnSpecial; + +typedef struct X86OpEntry X86OpEntry; +typedef struct X86DecodedInsn X86DecodedInsn; + +/* Decode function for multibyte opcodes. */ +typedef void (*X86DecodeFunc)(DisasContext *s, CPUX86State *env, X86OpEntr= y *entry, uint8_t *b); + +/* Code generation function. */ +typedef void (*X86GenFunc)(DisasContext *s, CPUX86State *env, X86DecodedIn= sn *decode); + +struct X86OpEntry { + /* Based on the is_decode flags. */ + union { + X86GenFunc gen; + X86DecodeFunc decode; + }; + /* op0 is always written, op1 and op2 are always read. */ + X86OpType op0:8; + X86OpSize s0:8; + X86OpType op1:8; + X86OpSize s1:8; + X86OpType op2:8; + X86OpSize s2:8; + /* Must be I and b respectively if present. */ + X86OpType op3:8; + X86OpSize s3:8; + + X86InsnSpecial special:8; + bool is_decode:1; +}; + +typedef struct X86DecodedOp { + int8_t n; + MemOp ot; /* For b/c/d/p/s/q/v/w/y/z */ + X86OpUnit unit; + bool has_ea; +} X86DecodedOp; + +struct X86DecodedInsn { + X86OpEntry e; + X86DecodedOp op[3]; + target_ulong immediate; + AddressParts mem; + + uint8_t b; +}; + diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc new file mode 100644 index 0000000000..e86364ffc1 --- /dev/null +++ b/target/i386/tcg/emit.c.inc @@ -0,0 +1,31 @@ +/* + * New-style TCG opcode generator for i386 instructions + * + * Copyright (c) 2022 Red Hat, Inc. + * + * Author: Paolo Bonzini + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +static void gen_illegal(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + gen_illegal_opcode(s); +} + +static void gen_load_ea(DisasContext *s, AddressParts *mem) +{ + TCGv ea =3D gen_lea_modrm_1(s, *mem); + gen_lea_v_seg(s, s->aflag, ea, mem->def_seg, s->override); +} diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 9d0e128a6a..ad14f56a31 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -85,6 +85,9 @@ typedef struct DisasContext { int8_t override; /* -1 if no override, else R_CS, R_DS, etc */ uint8_t prefix; =20 + bool has_modrm; + uint8_t modrm; + #ifndef CONFIG_USER_ONLY uint8_t cpl; /* code priv level */ uint8_t iopl; /* i/o priv level */ @@ -2356,6 +2359,31 @@ static inline uint32_t insn_get(CPUX86State *env, Di= sasContext *s, MemOp ot) return ret; } =20 +static target_long insn_get_signed(CPUX86State *env, DisasContext *s, MemO= p ot) +{ + target_long ret; + + switch (ot) { + case MO_8: + ret =3D (int8_t) x86_ldub_code(env, s); + break; + case MO_16: + ret =3D (int16_t) x86_lduw_code(env, s); + break; + case MO_32: + ret =3D (int32_t) x86_ldl_code(env, s); + break; +#ifdef TARGET_X86_64 + case MO_64: + ret =3D x86_ldq_code(env, s); + break; +#endif + default: + g_assert_not_reached(); + } + return ret; +} + static inline int insn_const_size(MemOp ot) { if (ot <=3D MO_32) { @@ -2846,6 +2874,11 @@ typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_p= tr reg_b, TCGv_i32 val); typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_= b, TCGv val); =20 +static bool first =3D true; static unsigned long limit; +#include "decode-new.h" +#include "emit.c.inc" +#include "decode-new.c.inc" + #define SSE_OPF_CMP (1 << 1) /* does not write for first operand */ #define SSE_OPF_SPECIAL (1 << 3) /* magic */ #define SSE_OPF_3DNOW (1 << 4) /* 3DNow! instruction */ @@ -4772,10 +4805,35 @@ static target_ulong disas_insn(DisasContext *s, CPU= State *cpu) =20 prefixes =3D 0; =20 + if (first) first =3D false, limit =3D getenv("LIMIT") ? atol(getenv("L= IMIT")) : -1; + bool use_new =3D true; +#ifdef CONFIG_USER_ONLY + use_new &=3D limit > 0; +#endif next_byte: + s->prefix =3D prefixes; b =3D x86_ldub_code(env, s); /* Collect prefixes. */ switch (b) { + default: +#ifndef CONFIG_USER_ONLY + use_new &=3D b <=3D limit; +#endif + if (use_new && 0) { + disas_insn_new(s, cpu, b); + return s->pc; + } + break; + case 0x0f: + b =3D x86_ldub_code(env, s) + 0x100; +#ifndef CONFIG_USER_ONLY + use_new &=3D b <=3D limit; +#endif + if (use_new && 0) { + disas_insn_new(s, cpu, b + 0x100); + return s->pc; + } + break; case 0xf3: prefixes |=3D PREFIX_REPZ; prefixes &=3D ~PREFIX_REPNZ; @@ -4826,6 +4884,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) #endif case 0xc5: /* 2-byte VEX */ case 0xc4: /* 3-byte VEX */ + use_new =3D false; /* VEX prefixes cannot be used except in 32-bit mode. Otherwise the instruction is LES or LDS. */ if (CODE32(s) && !VM86(s)) { @@ -4910,14 +4969,7 @@ static target_ulong disas_insn(DisasContext *s, CPUS= tate *cpu) s->dflag =3D dflag; =20 /* now check op code */ - reswitch: - switch(b) { - case 0x0f: - /**************************/ - /* extended op code */ - b =3D x86_ldub_code(env, s) | 0x100; - goto reswitch; - + switch (b) { /**************************/ /* arith & logic */ case 0x00 ... 0x05: --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663710176; cv=none; d=zohomail.com; s=zohoarc; b=DoL4dY18iH8U3AeafCp38i+JXvNQR8As0an534TyIXGYU9O7ObGW2ZrEqU0nySBKl0PM4SF8RtCuHayoY6aG4ZgNJ4/u/MdnIzClKBkGs60VjYqFqssY4p5fZScUWzS2APNdMW4VUEuF5k05V2kTVILnDB7ILQ1oJrsEfbSlet8= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663710176; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=5yNNSCZf7PAfY9a2N0s5ZJwEMS+GSiiy9aGwbxeMwC0=; b=k81sRMv9H7/0ZHQfEGwzKpy0Z7qGTrYTeoBNQJVagrwRNHECZiXu/LNIE9zQrJoVI94qtGQ9CcgKvakMRneJ0glbLKK4KtRlI8oZJLIm7aQNklpsu4iBxSU0co9ZmW4cKONfVNRN71/4OF36u5KHab1hSX2V89KlrXceQgLOv2g= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663710176614543.4607501964156; Tue, 20 Sep 2022 14:42:56 -0700 (PDT) Received: from localhost ([::1]:38740 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oal15-0008CW-AV for importer@patchew.org; Tue, 20 Sep 2022 17:42:55 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:60406) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzv-0006Ki-DI for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:36 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:22665) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzt-00026K-Bu for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:27 -0400 Received: from mail-ed1-f70.google.com (mail-ed1-f70.google.com [209.85.208.70]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-284-OeSd5-l_PN28H4rzX5xuUw-1; Tue, 20 Sep 2022 13:25:20 -0400 Received: by mail-ed1-f70.google.com with SMTP id h13-20020a056402280d00b004528c8400afso2343980ede.6 for ; Tue, 20 Sep 2022 10:25:19 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id ti5-20020a170907c20500b007801ce34311sm198979ejc.19.2022.09.20.10.25.17 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:17 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694723; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=5yNNSCZf7PAfY9a2N0s5ZJwEMS+GSiiy9aGwbxeMwC0=; b=N6l8vFYwpfBOEOev91BaWaNbpFn7Mf/UiNSEJtI6QX+OjSmz+4k+3dlymVp2zMJoc7z7cP Oeq4aswug0B3WY7ChAq/b9E0yU4pQcTbWHCVlSfRLVExpnhSzQjMOkT4F8KVxhnFh8gCbY 1fxOoT9Y1x5akuhwAMPuukuajNXge9A= X-MC-Unique: OeSd5-l_PN28H4rzX5xuUw-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=5yNNSCZf7PAfY9a2N0s5ZJwEMS+GSiiy9aGwbxeMwC0=; b=IEjI4EVvX8pTccKJQuuo6q8zqu3SGSj0ljCXMQd27K6Qj+7q9W8joWITt8I3ECNleF htUFzRGI1EPKvmrsd5pzZpo/OhuLZPJHMxfz+ZOsu10pnwZrhu7Lpig8gB/8SwXfJlvZ VdLQI/9O9IBv3dRMILlJQbzo/RqaAD+rqyYWgk39gVPnvzllL4fnDx3J1LLyWHXtTItE yZlLib/Ju/YwyRoB7nTmHX+gRwf+DPljv0ywRiSwADdy6ehfJkNvn36MFRjfQ5FKAQ73 B646NgvalRTkrn9e36rEHgahBbGeTSF9P2aJk4NPpeOrDCZf1VK5B+g4h6ObFOwXr0d5 X4fw== X-Gm-Message-State: ACrzQf27PyKjgW8zeLaaKhrY4kJskvSrYZ+qFOei44RHl4zpCCPyIRS9 Dh+CFDvCssmUNfV+5Zf3Gdew+i+8dGrw6vTwF+GS+FgpIGXdKvE2dQkk0MdeZ/wsJ3NLIfKgKvF E1BcuR2sFCo0PNRz/rhNxgjvhk+ZRoZmP3mcw9pUuX5V9R9GOQ0ANHRU6pTY8pztt+iQ= X-Received: by 2002:a05:6402:350a:b0:44e:9da7:2afb with SMTP id b10-20020a056402350a00b0044e9da72afbmr21683436edd.290.1663694718587; Tue, 20 Sep 2022 10:25:18 -0700 (PDT) X-Google-Smtp-Source: AMsMyM53sY0ACYsLl4FY6dLFdVq4CSxwCDyPV/bpzd89HziyjHSyeHTt6t0b4anlAxG4z7Rl4YN1Wg== X-Received: by 2002:a05:6402:350a:b0:44e:9da7:2afb with SMTP id b10-20020a056402350a00b0044e9da72afbmr21683422edd.290.1663694718273; Tue, 20 Sep 2022 10:25:18 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 06/37] target/i386: add ALU load/writeback core Date: Tue, 20 Sep 2022 19:24:36 +0200 Message-Id: <20220920172507.95568-7-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663710177542100003 Content-Type: text/plain; charset="utf-8" Add generic code generation that takes care of preparing operands around calls to decode.e.gen in a table-driven manner, so that ALU operations need not take care of that. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/tcg/decode-new.c.inc | 33 ++++++- target/i386/tcg/decode-new.h | 7 ++ target/i386/tcg/emit.c.inc | 155 +++++++++++++++++++++++++++++++ target/i386/tcg/translate.c | 18 ++++ 4 files changed, 212 insertions(+), 1 deletion(-) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index a908e8b086..be4e5705ed 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -513,6 +513,20 @@ static bool decode_insn(DisasContext *s, CPUX86State *= env, X86DecodeFunc decode_ return true; } =20 +static void decode_temp_free(X86DecodedOp *op) +{ + if (op->v_ptr) { + tcg_temp_free_ptr(op->v_ptr); + } +} + +static void decode_temps_free(X86DecodedInsn *decode) +{ + decode_temp_free(&decode->op[0]); + decode_temp_free(&decode->op[1]); + decode_temp_free(&decode->op[2]); +} + /* * Convert one instruction. s->base.is_jmp is set if the translation must * be stopped. @@ -738,7 +752,24 @@ static void disas_insn_new(DisasContext *s, CPUState *= cpu, int b) if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea)= { gen_load_ea(s, &decode.mem); } - decode.e.gen(s, env, &decode); + if (s->prefix & PREFIX_LOCK) { + if (decode.op[0].unit !=3D X86_OP_INT || !decode.op[0].has_ea) { + goto illegal_op; + } + gen_load(s, &decode, 2, s->T1); + decode.e.gen(s, env, &decode); + } else { + if (decode.op[0].unit =3D=3D X86_OP_MMX) { + compute_mmx_offset(&decode.op[0]); + } else if (decode.op[0].unit =3D=3D X86_OP_SSE) { + compute_xmm_offset(&decode.op[0]); + } + gen_load(s, &decode, 1, s->T0); + gen_load(s, &decode, 2, s->T1); + decode.e.gen(s, env, &decode); + gen_writeback(s, &decode, 0, s->T0); + } + decode_temps_free(&decode); return; illegal_op: gen_illegal_opcode(s); diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index 2f22d4d22e..3a856b48e7 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -168,6 +168,13 @@ typedef struct X86DecodedOp { MemOp ot; /* For b/c/d/p/s/q/v/w/y/z */ X86OpUnit unit; bool has_ea; + int offset; /* For MMX and SSE */ + + /* + * This field is used internally by macros OP0_PTR/OP1_PTR/OP2_PTR, + * do not access directly! + */ + TCGv_ptr v_ptr; } X86DecodedOp; =20 struct X86DecodedInsn { diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index e86364ffc1..8f60658537 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -29,3 +29,158 @@ static void gen_load_ea(DisasContext *s, AddressParts *= mem) TCGv ea =3D gen_lea_modrm_1(s, *mem); gen_lea_v_seg(s, s->aflag, ea, mem->def_seg, s->override); } + +static inline int mmx_offset(MemOp ot) +{ + switch (ot) { + case MO_8: + return offsetof(MMXReg, MMX_B(0)); + case MO_16: + return offsetof(MMXReg, MMX_W(0)); + case MO_32: + return offsetof(MMXReg, MMX_L(0)); + case MO_64: + return offsetof(MMXReg, MMX_Q(0)); + default: + g_assert_not_reached(); + } +} + +static inline int xmm_offset(MemOp ot) +{ + switch (ot) { + case MO_8: + return offsetof(ZMMReg, ZMM_B(0)); + case MO_16: + return offsetof(ZMMReg, ZMM_W(0)); + case MO_32: + return offsetof(ZMMReg, ZMM_L(0)); + case MO_64: + return offsetof(ZMMReg, ZMM_Q(0)); + case MO_128: + return offsetof(ZMMReg, ZMM_X(0)); + case MO_256: + return offsetof(ZMMReg, ZMM_Y(0)); + default: + g_assert_not_reached(); + } +} + +static void compute_mmx_offset(X86DecodedOp *op) +{ + if (!op->has_ea) { + op->offset =3D offsetof(CPUX86State, fpregs[op->n].mmx) + mmx_offs= et(op->ot); + } else { + op->offset =3D offsetof(CPUX86State, mmx_t0) + mmx_offset(op->ot); + } +} + +static void compute_xmm_offset(X86DecodedOp *op) +{ + if (!op->has_ea) { + op->offset =3D ZMM_OFFSET(op->n) + xmm_offset(op->ot); + } else { + op->offset =3D offsetof(CPUX86State, xmm_t0) + xmm_offset(op->ot); + } +} + +static void gen_load_sse(DisasContext *s, TCGv temp, MemOp ot, int dest_of= s, bool aligned) +{ + if (ot =3D=3D MO_8) { + gen_op_ld_v(s, MO_8, temp, s->A0); + tcg_gen_st8_tl(temp, cpu_env, dest_ofs); + } else if (ot =3D=3D MO_16) { + gen_op_ld_v(s, MO_16, temp, s->A0); + tcg_gen_st16_tl(temp, cpu_env, dest_ofs); + } else if (ot =3D=3D MO_32) { + gen_op_ld_v(s, MO_32, temp, s->A0); + tcg_gen_st32_tl(temp, cpu_env, dest_ofs); + } else if (ot =3D=3D MO_64) { + gen_ldq_env_A0(s, dest_ofs); + } else if (ot =3D=3D MO_128) { + gen_ldo_env_A0(s, dest_ofs, aligned); + } else if (ot =3D=3D MO_256) { + gen_ldy_env_A0(s, dest_ofs, aligned); + } +} + +static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCG= v v) +{ + X86DecodedOp *op =3D &decode->op[opn]; + + switch (op->unit) { + case X86_OP_SKIP: + return; + case X86_OP_SEG: + tcg_gen_ld32u_tl(v, cpu_env, + offsetof(CPUX86State,segs[op->n].selector)); + break; + case X86_OP_CR: + tcg_gen_ld_tl(v, cpu_env, offsetof(CPUX86State, cr[op->n])); + break; + case X86_OP_DR: + tcg_gen_ld_tl(v, cpu_env, offsetof(CPUX86State, dr[op->n])); + break; + case X86_OP_INT: + if (op->has_ea) { + gen_op_ld_v(s, op->ot, v, s->A0); + } else { + gen_op_mov_v_reg(s, op->ot, v, op->n); + } + break; + case X86_OP_IMM: + tcg_gen_movi_tl(v, decode->immediate); + break; + + case X86_OP_MMX: + compute_mmx_offset(op); + goto load_vector; + + case X86_OP_SSE: + compute_xmm_offset(op); + load_vector: + if (op->has_ea) { + gen_load_sse(s, v, op->ot, op->offset, true); + } + break; + + default: + g_assert_not_reached(); + } +} + +static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn= , TCGv v) +{ + X86DecodedOp *op =3D &decode->op[opn]; + switch (op->unit) { + case X86_OP_SKIP: + break; + case X86_OP_SEG: + /* Note that reg =3D=3D R_SS in gen_movl_seg_T0 always sets is_jmp= . */ + gen_movl_seg_T0(s, op->n); + if (s->base.is_jmp) { + gen_jmp_im(s, s->pc - s->cs_base); + if (op->n =3D=3D R_SS) { + s->flags &=3D ~HF_TF_MASK; + gen_eob_inhibit_irq(s, true); + } else { + gen_eob(s); + } + } + break; + case X86_OP_INT: + if (op->has_ea) { + gen_op_st_v(s, op->ot, v, s->A0); + } else { + gen_op_mov_reg_v(s, op->ot, op->n, v); + } + break; + case X86_OP_MMX: + case X86_OP_SSE: + break; + case X86_OP_CR: + case X86_OP_DR: + default: + g_assert_not_reached(); + } +} diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index ad14f56a31..e6e82f32cb 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2832,6 +2832,24 @@ static inline void gen_sto_env_A0(DisasContext *s, i= nt offset, bool align) tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ); } =20 +static void gen_ldy_env_A0(DisasContext *s, int offset, bool align) +{ + int mem_index =3D s->mem_index; + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, + MO_LEUQ | (align ? MO_ALIGN_32 : 0)); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(YMMReg, YMM_Q(0= ))); + tcg_gen_addi_tl(s->tmp0, s->A0, 8); + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(YMMReg, YMM_Q(1= ))); + + tcg_gen_addi_tl(s->tmp0, s->A0, 16); + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(YMMReg, YMM_Q(2= ))); + tcg_gen_addi_tl(s->tmp0, s->A0, 24); + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(YMMReg, YMM_Q(3= ))); +} + static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset) { tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q= (0))); --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=fail; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=redhat.com Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 166371271087868.47701220248177; Tue, 20 Sep 2022 15:25:10 -0700 (PDT) Received: from localhost ([::1]:57176 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oalfw-0006Zp-Gu for importer@patchew.org; Tue, 20 Sep 2022 18:25:08 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:60408) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzv-0006Kj-DS for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:36 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:25947) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzt-00026V-GS for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:26 -0400 Received: from mail-ed1-f70.google.com (mail-ed1-f70.google.com [209.85.208.70]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-272-OOjDHFK0N_qimoQkqJqsOw-1; Tue, 20 Sep 2022 13:25:21 -0400 Received: by mail-ed1-f70.google.com with SMTP id y14-20020a056402440e00b0044301c7ccd9so2347316eda.19 for ; Tue, 20 Sep 2022 10:25:21 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id t18-20020a17090605d200b0073da32b7db0sm128228ejt.199.2022.09.20.10.25.19 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:19 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694723; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=pmay+KXHKAZiRDPwIYrcS61+XW1Aa8B74QrdkvyUTEc=; b=cBBtJolhpLBXvlDyGRy+R39Oj//ffMqLGQTsdD9GfC3Ot1pWi6NBgXR+qWvaIjNLQ/rj+W 8FH1nzsLBWSUgMAajIaQRMaAPA6k2Hxe4+8dy4i+ohgkyz/HSAW/lmM6KsQKAIcSZq1/3n kWSx8fDOk6Xuq76e9enhSMusqfL+8KU= X-MC-Unique: OOjDHFK0N_qimoQkqJqsOw-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=pmay+KXHKAZiRDPwIYrcS61+XW1Aa8B74QrdkvyUTEc=; b=EDddJsYcRaGrtH+ZfSKIwydkRurUM09gkZupKuBcgVxqDE64AHP+uySEfA0ihtTnG9 TJhOkZf2DsvDPATgAgNDhRrALGYMK4RKY/jjZ+HZk3iAZ+bRX+udme4hmquCFhEKQz3j i1Er5gd4jUDxyo7SsWouNGJHHGz9aIMLhnfaNBnpo4e4v1vOeBBoiZVPz8miAGuLY+WY 1f/3UCOpqL0cfDFXf65rJqcHSpgpdIYmP9msdm1VYfP4o2zN4IEfY1aczbaSvm9IQnI8 +ChMQ/D2CLJfWxuZllGSLve7HeaArEBkREKIqZf88dgBGImCsdWqblugqCc/t0697iAn UMPA== X-Gm-Message-State: ACrzQf1WOX0OxkyDiQhto3F850Gs8BTJzRL7iU0/JKRGxC7ZXT0QVpZ1 9qSAkhGw4HKwe9NX70793tE8NiSYvjSatQhUp+1LsLIqqhUUFp7Gx1SYdp2nDYJ2/FNjOwL6Ha4 /6wHdam5ZmMAQWsHGxk/cQsgFiRK4wTe+JSO7YEHUQ0nDBVMzpJ/U586J3d6cmYGKrPg= X-Received: by 2002:a17:907:9714:b0:77b:e7a8:2f66 with SMTP id jg20-20020a170907971400b0077be7a82f66mr18154800ejc.107.1663694720348; Tue, 20 Sep 2022 10:25:20 -0700 (PDT) X-Google-Smtp-Source: AMsMyM4Aoy/QtKxICN9IO17gGlah/cdhOra84clJKOhtVqLKEX9SdUxru4JROWQdwOr3RDVXDEOATg== X-Received: by 2002:a17:907:9714:b0:77b:e7a8:2f66 with SMTP id jg20-20020a170907971400b0077be7a82f66mr18154787ejc.107.1663694720089; Tue, 20 Sep 2022 10:25:20 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 07/37] target/i386: add CPUID[EAX=7, ECX=0].ECX to DisasContext Date: Tue, 20 Sep 2022 19:24:37 +0200 Message-Id: <20220920172507.95568-8-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: fail (Header signature does not verify) X-ZM-MESSAGEID: 1663712712832100001 Content-Type: text/plain; charset="utf-8" TCG will shortly implement VAES instructions, so add the relevant feature word to the DisasContext. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index e6e82f32cb..4dcd276e80 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -115,6 +115,7 @@ typedef struct DisasContext { int cpuid_ext2_features; int cpuid_ext3_features; int cpuid_7_0_ebx_features; + int cpuid_7_0_ecx_features; int cpuid_xsave_features; =20 /* TCG local temps */ @@ -8871,6 +8872,7 @@ static void i386_tr_init_disas_context(DisasContextBa= se *dcbase, CPUState *cpu) dc->cpuid_ext2_features =3D env->features[FEAT_8000_0001_EDX]; dc->cpuid_ext3_features =3D env->features[FEAT_8000_0001_ECX]; dc->cpuid_7_0_ebx_features =3D env->features[FEAT_7_0_EBX]; + dc->cpuid_7_0_ecx_features =3D env->features[FEAT_7_0_ECX]; dc->cpuid_xsave_features =3D env->features[FEAT_XSAVE]; dc->jmp_opt =3D !((cflags & CF_NO_GOTO_TB) || (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK))); --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663711447; cv=none; d=zohomail.com; s=zohoarc; b=Tj6Hstte2KosVl3v96Vd4tRLwrLPTmQggH70YKKyKiCvvN+eXi86s9ce391CHRd6iZ+wGgNjwcmumlEbRIMf/XGokiWl6NaTlLJPpkIJto0PdFx7ufWbJNA1Wl7rCbDzvzG61FdqU+H+0fyvTleKCvoFkvtAsq779xGjmyp/rXY= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663711447; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=mhC9Y/3lDBwkh9XPslwUY7nTmD9g2+Zei/ClVTt/d6A=; b=AbAYR4B7wAJfPlhjZWRAFVU8DPalL+/RVy2wGeZQUlHet8UAkRIsaeNYUpyPiOIXHYBy+7gVzthgKMJ8DcYMEcIDrMFRTvP6lpFT2PlAiTSHkR0m9e7B1rLXE6GxGLBM2XTKQhpPeW7tLqA50mUOb5e/jB23pHS2AEe/ywnrdX0= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663711447325166.0366384095836; Tue, 20 Sep 2022 15:04:07 -0700 (PDT) Received: from localhost ([::1]:60658 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oalLa-0006Ez-2X for importer@patchew.org; Tue, 20 Sep 2022 18:04:06 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:60410) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzw-0006Kt-LT for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:45 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:39894) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzt-00026w-HG for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:27 -0400 Received: from mail-ed1-f72.google.com (mail-ed1-f72.google.com [209.85.208.72]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-408-nskiom1iO3qIGkJthWugXQ-1; Tue, 20 Sep 2022 13:25:23 -0400 Received: by mail-ed1-f72.google.com with SMTP id f10-20020a0564021e8a00b00451be6582d5so2376739edf.15 for ; Tue, 20 Sep 2022 10:25:22 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id w8-20020a50fa88000000b0045256570210sm254449edr.3.2022.09.20.10.25.20 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:20 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694724; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=mhC9Y/3lDBwkh9XPslwUY7nTmD9g2+Zei/ClVTt/d6A=; b=XwVgv1H5CJ2nk7jcuf6oi0G1260WS0eSFE+MYxFUkWDkSPi+IGfAoJTVwY4LMdQihtZtgW Cs6/BGKEOeUl+egPs8RSfWw0CEyZSqnVWKhm/fOPQ3hyKiy82os5YE0rJlSx9KoS99Q9wa XmiusDzouif0c+tjb8O6+AnxIAVe+xA= X-MC-Unique: nskiom1iO3qIGkJthWugXQ-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=mhC9Y/3lDBwkh9XPslwUY7nTmD9g2+Zei/ClVTt/d6A=; b=nOpbvoZPGNRtk0JHLyWlBiL/2TVWRmZZdkEViCYULvSwFnpkbGc9LW1TRHYnSvCd88 X0agCPnKuuJte+ED/psaF0pxaVvmdOUQO+lGr53nLpxTTwm4T6QFzWYC2ZeSdzVH5oUA FmmmR7VsovxCQR4JK4eccAAStqFIu2Z3f16CNiA1jQmRBpKIyn+MSB7rL8mM9cZ6Ll8K DCL3j6zXtTCXWVvnfj9Pq8wYXphAFKhjKDCGtmqawE7gGp6HaxWYb8XK6mJOj7rtyFRd nz6TbUgp23y/Xp/WPLI6SNjT0dUx7Erv1pfJO8YLVVGl2BazwuQ9BxRGfgApRM4eu8vW tzRA== X-Gm-Message-State: ACrzQf2hGAgKietmTUNH81tsqIAHGSM6EMRLvAVVyWDUEc4DFhWMKa/U d4d49sLGTHhVN6lLFztjF8+1UflUeh5GrRQRwHFYSzVMY7Rh3M/bYbMmttg22t9eIUg7+FLf80z XCwB53DUvWzTXsXFz4rQJhovMtILBqC3WFrFD/1PnXPrPW+FRWmrrBMz55KVAn/ssX1U= X-Received: by 2002:a17:907:16a3:b0:777:be43:7670 with SMTP id hc35-20020a17090716a300b00777be437670mr17935269ejc.552.1663694721623; Tue, 20 Sep 2022 10:25:21 -0700 (PDT) X-Google-Smtp-Source: AMsMyM6p2YWNiRQ7e06dBsKSTn3FEHFLBUAv4inzp7+Z4AV70VVSpuOTcOvhHr1+QNdIACciaot++g== X-Received: by 2002:a17:907:16a3:b0:777:be43:7670 with SMTP id hc35-20020a17090716a300b00777be437670mr17935250ejc.552.1663694721352; Tue, 20 Sep 2022 10:25:21 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 08/37] target/i386: add CPUID feature checks to new decoder Date: Tue, 20 Sep 2022 19:24:38 +0200 Message-Id: <20220920172507.95568-9-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663711448016100001 Content-Type: text/plain; charset="utf-8" Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/decode-new.c.inc | 55 ++++++++++++++++++++++++++++++++ target/i386/tcg/decode-new.h | 20 ++++++++++++ 2 files changed, 75 insertions(+) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index be4e5705ed..e268b5fb48 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -85,6 +85,7 @@ #define X86_OP_ENTRY0(op, ...) \ X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__) =20 +#define cpuid(feat) .cpuid =3D X86_FEAT_##feat, #define i64 .special =3D X86_SPECIAL_i64, #define o64 .special =3D X86_SPECIAL_o64, #define xchg .special =3D X86_SPECIAL_Locked, @@ -513,6 +514,56 @@ static bool decode_insn(DisasContext *s, CPUX86State *= env, X86DecodeFunc decode_ return true; } =20 +static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) +{ + switch (cpuid) { + case X86_FEAT_None: + return true; + case X86_FEAT_MOVBE: + return (s->cpuid_ext_features & CPUID_EXT_MOVBE); + case X86_FEAT_PCLMULQDQ: + return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ); + case X86_FEAT_SSE: + return (s->cpuid_ext_features & CPUID_SSE); + case X86_FEAT_SSE2: + return (s->cpuid_ext_features & CPUID_SSE2); + case X86_FEAT_SSE3: + return (s->cpuid_ext_features & CPUID_EXT_SSE3); + case X86_FEAT_SSSE3: + return (s->cpuid_ext_features & CPUID_EXT_SSSE3); + case X86_FEAT_SSE41: + return (s->cpuid_ext_features & CPUID_EXT_SSE41); + case X86_FEAT_SSE42: + return (s->cpuid_ext_features & CPUID_EXT_SSE42); + case X86_FEAT_AES: + if (!(s->cpuid_ext_features & CPUID_EXT_AES)) { + return false; + } else if (!(s->prefix & PREFIX_VEX)) { + return true; + } else if (!(s->cpuid_ext_features & CPUID_EXT_AVX)) { + return false; + } else { + return !s->vex_l || (s->cpuid_7_0_ecx_features & CPUID_7_0_ECX= _VAES); + } + + case X86_FEAT_AVX: + return (s->cpuid_ext_features & CPUID_EXT_AVX); + + case X86_FEAT_SSE4A: + return (s->cpuid_ext3_features & CPUID_EXT3_SSE4A); + + case X86_FEAT_ADX: + return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX); + case X86_FEAT_BMI1: + return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1); + case X86_FEAT_BMI2: + return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2); + case X86_FEAT_AVX2: + return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2); + } + g_assert_not_reached(); +} + static void decode_temp_free(X86DecodedOp *op) { if (op->v_ptr) { @@ -701,6 +752,10 @@ static void disas_insn_new(DisasContext *s, CPUState *= cpu, int b) goto unknown_op; } =20 + if (!has_cpuid_feature(s, decode.e.cpuid)) { + goto illegal_op; + } + switch (decode.e.special) { case X86_SPECIAL_None: break; diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index 3a856b48e7..e62e9c9d87 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -93,6 +93,25 @@ typedef enum X86OpSize { X86_SIZE_f64, } X86OpSize; =20 +typedef enum X86CPUIDFeature { + X86_FEAT_None, + X86_FEAT_ADX, + X86_FEAT_AES, + X86_FEAT_AVX, + X86_FEAT_AVX2, + X86_FEAT_BMI1, + X86_FEAT_BMI2, + X86_FEAT_MOVBE, + X86_FEAT_PCLMULQDQ, + X86_FEAT_SSE, + X86_FEAT_SSE2, + X86_FEAT_SSE3, + X86_FEAT_SSSE3, + X86_FEAT_SSE41, + X86_FEAT_SSE42, + X86_FEAT_SSE4A, +} X86CPUIDFeature; + /* Execution flags */ =20 typedef enum X86OpUnit { @@ -160,6 +179,7 @@ struct X86OpEntry { X86OpSize s3:8; =20 X86InsnSpecial special:8; + X86CPUIDFeature cpuid:8; bool is_decode:1; }; =20 --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663713709; cv=none; d=zohomail.com; s=zohoarc; b=k2oxxyyIi+hQHPxnJ4vWf4s24Z1Ur/ZTNu040QUipidPofCI30JBxFLtptIK21pZvTDD/5kkQF/VMjYHYG37c0j1z9w/Cq/1tbr8icGzyr6U9bWY2C09x8Z7661tK48G1zTv9zLhw1ehsTchUMRYngp0F63v1IupLnGjtMd0bmI= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663713709; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=Yc6DxNmSj3Qg1jfzzJSXZqJn2BBTcGOivU6NJ/hq6Pw=; b=QFVQ8Z4cvawoD+Z1AFnpnuuIYWFFVS5Ne0WqEcO8FRS0fNW8FsUfiEDzeEfN/abjr3xRfpdMjQVZiSDM6UFfXsG7SUboeACGyw4IyyV0RrlAvKQaTbf8LrvgiPOlgAUjMMICWC4E+FTTXZvRmfzxnTuQ9SY0CO4Izy1Sf+HDXTU= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663713709507494.27291940316013; Tue, 20 Sep 2022 15:41:49 -0700 (PDT) Received: from localhost ([::1]:57536 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oalw3-0004lz-T5 for importer@patchew.org; Tue, 20 Sep 2022 18:41:47 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:60412) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzw-0006Ku-Le for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:42 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:21816) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzu-00027j-7w for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:27 -0400 Received: from mail-ej1-f69.google.com (mail-ej1-f69.google.com [209.85.218.69]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-448-NVUfrrhbPmSO0AW7zhTrbg-1; Tue, 20 Sep 2022 13:25:24 -0400 Received: by mail-ej1-f69.google.com with SMTP id xj11-20020a170906db0b00b0077b6ecb23fcso1770790ejb.5 for ; Tue, 20 Sep 2022 10:25:24 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id a8-20020a50c308000000b00445e930e20esm180920edb.64.2022.09.20.10.25.21 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:22 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694725; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=Yc6DxNmSj3Qg1jfzzJSXZqJn2BBTcGOivU6NJ/hq6Pw=; b=UYjJ0Aphy+ftVbWvP4JAMhEb8EswrXG6a9MS9oiVNGy5w+lzQ+m2T2PVBMRplthO8TX2F3 lz7lwGrgaf0Obvn1FUKq+uohet3zjQRPEyauAGKnBHKbVQUmfUiglLtsLvFBBfjZwvcQE6 G64inewzlS2HKxHREFvqH20zOo49VnE= X-MC-Unique: NVUfrrhbPmSO0AW7zhTrbg-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=Yc6DxNmSj3Qg1jfzzJSXZqJn2BBTcGOivU6NJ/hq6Pw=; b=ox8gbuzcOU0A+7HxwdwzwiCDTtYa4HvtgrkcP6Htc+nQcPhtS6XSM6heWYlN1VP9k7 uO2/XDe3bgnPDp8z68NF4s7R5l2o2PkwQXskIb+aP3UpDGUDbaSMtVC95b9Zkb4FcplH 42CbgBJtmPmlpZXYUDYwDN5FiyG+ftxht+J++PT29PEmQzfeRLOF5O8wyBixEnjPzsc6 qIcntCb8/rfhTg5Vq/g3G7hUCDLxU+y/q6xXenfV+tCdxlAs+3e4W9Ways2Ysc/xwcLJ lkjw3K1CcKQpoWlKUnrKcv16G7Q+hWasrVPFeqRHoG+Z21jugWqzVfanmiBTf1cYP6Oh 1QoA== X-Gm-Message-State: ACrzQf0zZUmZ+gO9CKxiOBTCdtdNFmiksG7GO/lPv/SHh4BbAGp948tF Tvds51Er8q80nD9XgCm45iUc+px6yOl1/cY2Pw9b1QORxma2UupuFiuxEsRUcWHyzT88U2yFsN0 jJvag5ySMvdV8b7N/xNei1GKuRBymifQbAFYsf/ZpG07Xb8ogRoOGZS0AugwcajM66e8= X-Received: by 2002:a17:906:b106:b0:780:83a8:773 with SMTP id u6-20020a170906b10600b0078083a80773mr17597670ejy.758.1663694722995; Tue, 20 Sep 2022 10:25:22 -0700 (PDT) X-Google-Smtp-Source: AMsMyM5zgTwzmTT3x4+VMPo9x89B6aDIaEnB2u8IEf5bkGsz6hPGyZgqAUs1sEzI5soaPhfjSLaI2A== X-Received: by 2002:a17:906:b106:b0:780:83a8:773 with SMTP id u6-20020a170906b10600b0078083a80773mr17597650ejy.758.1663694722714; Tue, 20 Sep 2022 10:25:22 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 09/37] target/i386: add AVX_EN hflag Date: Tue, 20 Sep 2022 19:24:39 +0200 Message-Id: <20220920172507.95568-10-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663713711611100001 Content-Type: text/plain; charset="utf-8" From: Paul Brook Add a new hflag bit to determine whether AVX instructions are allowed Signed-off-by: Paul Brook Message-Id: <20220424220204.2493824-4-paul@nowt.org> Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 3 +++ target/i386/helper.c | 12 ++++++++++++ target/i386/tcg/fpu_helper.c | 1 + 3 files changed, 16 insertions(+) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 8311b69c88..ff1df4ea53 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -169,6 +169,7 @@ typedef enum X86Seg { #define HF_MPX_EN_SHIFT 25 /* MPX Enabled (CR4+XCR0+BNDCFGx) */ #define HF_MPX_IU_SHIFT 26 /* BND registers in-use */ #define HF_UMIP_SHIFT 27 /* CR4.UMIP */ +#define HF_AVX_EN_SHIFT 28 /* AVX Enabled (CR4+XCR0) */ =20 #define HF_CPL_MASK (3 << HF_CPL_SHIFT) #define HF_INHIBIT_IRQ_MASK (1 << HF_INHIBIT_IRQ_SHIFT) @@ -195,6 +196,7 @@ typedef enum X86Seg { #define HF_MPX_EN_MASK (1 << HF_MPX_EN_SHIFT) #define HF_MPX_IU_MASK (1 << HF_MPX_IU_SHIFT) #define HF_UMIP_MASK (1 << HF_UMIP_SHIFT) +#define HF_AVX_EN_MASK (1 << HF_AVX_EN_SHIFT) =20 /* hflags2 */ =20 @@ -2121,6 +2123,7 @@ void host_cpuid(uint32_t function, uint32_t count, =20 /* helper.c */ void x86_cpu_set_a20(X86CPU *cpu, int a20_state); +void cpu_sync_avx_hflag(CPUX86State *env); =20 #ifndef CONFIG_USER_ONLY static inline int x86_asidx_from_attrs(CPUState *cs, MemTxAttrs attrs) diff --git a/target/i386/helper.c b/target/i386/helper.c index fa409e9c44..30083c9cff 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -29,6 +29,17 @@ #endif #include "qemu/log.h" =20 +void cpu_sync_avx_hflag(CPUX86State *env) +{ + if ((env->cr[4] & CR4_OSXSAVE_MASK) + && (env->xcr0 & (XSTATE_SSE_MASK | XSTATE_YMM_MASK)) + =3D=3D (XSTATE_SSE_MASK | XSTATE_YMM_MASK)) { + env->hflags |=3D HF_AVX_EN_MASK; + } else{ + env->hflags &=3D ~HF_AVX_EN_MASK; + } +} + void cpu_sync_bndcs_hflags(CPUX86State *env) { uint32_t hflags =3D env->hflags; @@ -209,6 +220,7 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_= cr4) env->hflags =3D hflags; =20 cpu_sync_bndcs_hflags(env); + cpu_sync_avx_hflag(env); } =20 #if !defined(CONFIG_USER_ONLY) diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 30bc44fcf8..48bf0c5cf8 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -2943,6 +2943,7 @@ void helper_xsetbv(CPUX86State *env, uint32_t ecx, ui= nt64_t mask) =20 env->xcr0 =3D mask; cpu_sync_bndcs_hflags(env); + cpu_sync_avx_hflag(env); return; =20 do_gpf: --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663711031; cv=none; d=zohomail.com; s=zohoarc; b=YEAy2qGBQkpqMxZopLJFvIlXzCpXqNwGfLex+Mx2m5LI+U8kJvaFxS15yOSLg3T85D5UJY6ZiQ+LEGVbz8x1gXO5JmjcaeOrH76hxLxxVneo+HhOPBcRtCQmn7LWTPCYhCeCx+nhFqyjGN2AE4rZaqem0AzevR96L5ABIfAIUdE= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663711031; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=r3BqA3UdIPAOyX412zN1DWCXCJRBRB/o7GJRTryks30=; b=DGJe1UPu2Q3mrH6YMaCgFJ7xHolHK10KgnhA4RpfHpLF12G/eVxSX8r4urusnSPW17p3qiKqJ36cIs46+c+kk55o5il0o7/B2rrIhESVymnna8Ar5iE7aHFKhLS6KTjozTD3/4rJCCvfYm6pwmU4C8Q+g7WrlYiD8ZMNEYp6Ghg= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663711031050885.2675794239331; Tue, 20 Sep 2022 14:57:11 -0700 (PDT) Received: from localhost ([::1]:53438 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oalEr-0002em-Tx for importer@patchew.org; Tue, 20 Sep 2022 17:57:09 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:60416) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah00-0006MD-OY for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:42 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:43739) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzw-00029D-Ev for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:30 -0400 Received: from mail-ed1-f70.google.com (mail-ed1-f70.google.com [209.85.208.70]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-613-YjPPvHoZO3GiSvqMbiZi8g-1; Tue, 20 Sep 2022 13:25:26 -0400 Received: by mail-ed1-f70.google.com with SMTP id h13-20020a056402280d00b004528c8400afso2344139ede.6 for ; Tue, 20 Sep 2022 10:25:25 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id e4-20020aa7d7c4000000b0044ec76521a1sm199191eds.55.2022.09.20.10.25.23 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:23 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694727; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=r3BqA3UdIPAOyX412zN1DWCXCJRBRB/o7GJRTryks30=; b=J1auotKI/D2x7T6cb/JjaYkW0SYOpwmyxpvOodZ7LfGFwiAITnbc+2T4zofSbCtGBu9fIl g9CAcdEoajqx/F79CGFHP6YikgMBMDi9VSD3l6rCCp8ui5NCN1TWTBVmPcpYbuMuR/iuQr LhX6di9yMJgzGQd0gEy846obGAVkX6I= X-MC-Unique: YjPPvHoZO3GiSvqMbiZi8g-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=r3BqA3UdIPAOyX412zN1DWCXCJRBRB/o7GJRTryks30=; b=HpKissnTwldSFFGtqO3aZT6dFjYMyQyFn8pb37JrSi5o/Jxc/GMofi4I4roQKzKTWf g0gGdRUe+18DDTl9759+mwNqFk2QqNSjCqofodvARYXIzTUXRgbUJvw/9ETCQoS4GGxy x/unOz53ckaV9LQYDnir5G8X+QriGI5VPC2fu+4TuqHt3LN0JVOUKp3QW+2maTbw6pWd wpN2EAXNqnJCjUyk/n8rGeB09qZJ9bXQCJoOPZVNA4iXYPUGnnOnmvSWRRWHElzHn6Qy IpTooGvw0p0adY3dG46CLSxW8aCWT8K8ju5xahVNq4w1RkRRhRWraW1PCTpOZDsxP+dN EYkA== X-Gm-Message-State: ACrzQf1ZK4aM2PIJZTf0dROqesYRPaunlqJm27oF163OolmKrfAX/Dot 5Et0SE89HUrBDiPZFI4GDKVtoOXLr+zRSgQjgDgha11mIKs6XD0b+0rdZe2N1DS7gxvzTnlokzw gt+szxIxaGWmtCT12Eo6d01vp9SUM/KkovDOAswOC4CSI/7li3AVCv4qAjkFsyLdWnSk= X-Received: by 2002:aa7:c997:0:b0:454:232d:514a with SMTP id c23-20020aa7c997000000b00454232d514amr8932381edt.416.1663694724408; Tue, 20 Sep 2022 10:25:24 -0700 (PDT) X-Google-Smtp-Source: AMsMyM7/aDNaO4Jb4l18q/L5cyL385kVCgLWD6CF9LubOL/3PsUL9AQoRxoIPYAS8j35EyZS8BZnpA== X-Received: by 2002:aa7:c997:0:b0:454:232d:514a with SMTP id c23-20020aa7c997000000b00454232d514amr8932340edt.416.1663694723965; Tue, 20 Sep 2022 10:25:23 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 10/37] target/i386: validate VEX prefixes via the instructions' exception classes Date: Tue, 20 Sep 2022 19:24:40 +0200 Message-Id: <20220920172507.95568-11-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663711032654100001 Content-Type: text/plain; charset="utf-8" Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/tcg/decode-new.c.inc | 160 ++++++++++++++++++++++++++++++- target/i386/tcg/decode-new.h | 32 +++++++ target/i386/tcg/emit.c.inc | 37 ++++++- target/i386/tcg/translate.c | 18 ++-- 4 files changed, 235 insertions(+), 12 deletions(-) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index e268b5fb48..f56c654e08 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -93,6 +93,23 @@ #define zext0 .special =3D X86_SPECIAL_ZExtOp0, #define zext2 .special =3D X86_SPECIAL_ZExtOp2, =20 +#define vex1 .vex_class =3D 1, +#define vex1_rep3 .vex_class =3D 1, .vex_special =3D X86_VEX_REPScalar, +#define vex2 .vex_class =3D 2, +#define vex2_rep3 .vex_class =3D 2, .vex_special =3D X86_VEX_REPScalar, +#define vex3 .vex_class =3D 3, +#define vex4 .vex_class =3D 4, +#define vex4_unal .vex_class =3D 4, .vex_special =3D X86_VEX_SSEUnaligned, +#define vex5 .vex_class =3D 5, +#define vex6 .vex_class =3D 6, +#define vex7 .vex_class =3D 7, +#define vex8 .vex_class =3D 8, +#define vex11 .vex_class =3D 11, +#define vex12 .vex_class =3D 12, +#define vex13 .vex_class =3D 13, + +#define avx2_256 .vex_special =3D X86_VEX_AVX2_256, + static uint8_t get_modrm(DisasContext *s, CPUX86State *env) { if (!s->has_modrm) { @@ -157,6 +174,18 @@ static const X86OpEntry opcodes_root[256] =3D { }; =20 #undef mmx +#undef vex1 +#undef vex2 +#undef vex3 +#undef vex4 +#undef vex4_unal +#undef vex5 +#undef vex6 +#undef vex7 +#undef vex8 +#undef vex11 +#undef vex12 +#undef vex13 =20 /* * Decode the fixed part of the opcode and place the last @@ -564,6 +593,132 @@ static bool has_cpuid_feature(DisasContext *s, X86CPU= IDFeature cpuid) g_assert_not_reached(); } =20 +static bool validate_vex(DisasContext *s, X86DecodedInsn *decode) +{ + X86OpEntry *e =3D &decode->e; + + switch (e->vex_special) { + case X86_VEX_REPScalar: + /* + * Instructions which differ between 00/66 and F2/F3 in the + * exception classification and the size of the memory operand. + */ + assert(e->vex_class =3D=3D 1 || e->vex_class =3D=3D 2); + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { + e->vex_class =3D 3; + if (s->vex_l) { + goto illegal; + } + assert(decode->e.s2 =3D=3D X86_SIZE_x); + if (decode->op[2].has_ea) { + decode->op[2].ot =3D s->prefix & PREFIX_REPZ ? MO_32 : MO_= 64; + } + } + break; + + case X86_VEX_SSEUnaligned: + /* handled in sse_needs_alignment. */ + break; + + case X86_VEX_AVX2_256: + if ((s->prefix & PREFIX_VEX) && s->vex_l && !has_cpuid_feature(s, = X86_FEAT_AVX2)) { + goto illegal; + } + } + + /* TODO: instructions that require VEX.W=3D0 (Table 2-16) */ + + switch (e->vex_class) { + case 0: + if (s->prefix & PREFIX_VEX) { + goto illegal; + } + return true; + case 1: + case 2: + case 3: + case 4: + case 5: + case 7: + if (s->prefix & PREFIX_VEX) { + if (!(s->flags & HF_AVX_EN_MASK)) { + goto illegal; + } + } else { + if (!(s->flags & HF_OSFXSR_MASK)) { + goto illegal; + } + } + break; + case 12: + assert(s->has_modrm); + /* Must have a VSIB byte and no address prefix. */ + if ((s->modrm & 7) !=3D 4 || s->aflag =3D=3D MO_16) { + goto illegal; + } + /* Check no overlap between registers. */ + if (decode->op[0].unit =3D=3D decode->op[1].unit && decode->op[0].= n =3D=3D decode->op[1].n) { + goto illegal; + } + if (decode->op[0].unit =3D=3D X86_OP_SSE && decode->op[0].n =3D=3D= decode->mem.index) { + goto illegal; + } + if (decode->op[1].unit =3D=3D X86_OP_SSE && decode->op[1].n =3D=3D= decode->mem.index) { + goto illegal; + } + /* fall through */ + case 6: + case 11: + if (!(s->prefix & PREFIX_VEX)) { + goto illegal; + } + if (!(s->flags & HF_AVX_EN_MASK)) { + goto illegal; + } + break; + case 8: + if (!(s->prefix & PREFIX_VEX)) { + /* EMMS */ + return true; + } + if (!(s->flags & HF_AVX_EN_MASK)) { + goto illegal; + } + break; + case 13: + if (!(s->prefix & PREFIX_VEX)) { + goto illegal; + } + if (s->vex_l) { + goto illegal; + } + /* All integer instructions use VEX.vvvv, so exit. */ + return true; + } + + if (s->vex_v !=3D 0 && + e->op0 !=3D X86_TYPE_H && e->op0 !=3D X86_TYPE_B && + e->op1 !=3D X86_TYPE_H && e->op1 !=3D X86_TYPE_B && + e->op2 !=3D X86_TYPE_H && e->op2 !=3D X86_TYPE_B) { + goto illegal; + } + + if (s->flags & HF_TS_MASK) { + goto nm_exception; + } + if (s->flags & HF_EM_MASK) { + goto illegal; + } + return true; + +nm_exception: + gen_NM_exception(s); + return false; +illegal: + gen_illegal_opcode(s); + return false; +} + static void decode_temp_free(X86DecodedOp *op) { if (op->v_ptr) { @@ -804,8 +959,11 @@ static void disas_insn_new(DisasContext *s, CPUState *= cpu, int b) break; } =20 + if (!validate_vex(s, &decode)) { + return; + } if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea)= { - gen_load_ea(s, &decode.mem); + gen_load_ea(s, &decode.mem, decode.e.vex_class =3D=3D 12); } if (s->prefix & PREFIX_LOCK) { if (decode.op[0].unit !=3D X86_OP_INT || !decode.op[0].has_ea) { diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index e62e9c9d87..8431057769 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -152,6 +152,36 @@ typedef enum X86InsnSpecial { X86_SPECIAL_o64, } X86InsnSpecial; =20 +/* + * Special cases for instructions that operate on XMM/YMM registers. Intel + * retconned all of them to have VEX exception classes other than 0 and 13= , so + * all these only matter for instructions that have a VEX exception class. + * Based on tables in the "AVX and SSE Instruction Exception Specification" + * section of the manual. + */ +typedef enum X86VEXSpecial { + /* Legacy SSE instructions that allow unaligned operands */ + X86_VEX_SSEUnaligned, + + /* + * Used for instructions that distinguish the XMM operand type with an + * instruction prefix; legacy SSE encodings will allow unaligned opera= nds + * for scalar operands only (identified by a REP prefix). In this cas= e, + * the decoding table uses "x" for the vector operands instead of spec= ifying + * pd/ps/sd/ss individually. + */ + X86_VEX_REPScalar, + + /* + * VEX instructions that only support 256-bit operands with AVX2 (Tabl= e 2-17 + * column 3). Columns 2 and 4 (instructions limited to 256- and 127-b= it + * operands respectively) are implicit in the presence of dq and qq + * operands, and thus handled by decode_op_size. + */ + X86_VEX_AVX2_256, +} X86VEXSpecial; + + typedef struct X86OpEntry X86OpEntry; typedef struct X86DecodedInsn X86DecodedInsn; =20 @@ -180,6 +210,8 @@ struct X86OpEntry { =20 X86InsnSpecial special:8; X86CPUIDFeature cpuid:8; + uint8_t vex_class:8; + X86VEXSpecial vex_special:8; bool is_decode:1; }; =20 diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 8f60658537..0cba106f74 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -19,14 +19,19 @@ * License along with this library; if not, see . */ =20 +static void gen_NM_exception(DisasContext *s) +{ + gen_exception(s, EXCP07_PREX, s->pc_start - s->cs_base); +} + static void gen_illegal(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) { gen_illegal_opcode(s); } =20 -static void gen_load_ea(DisasContext *s, AddressParts *mem) +static void gen_load_ea(DisasContext *s, AddressParts *mem, bool is_vsib) { - TCGv ea =3D gen_lea_modrm_1(s, *mem); + TCGv ea =3D gen_lea_modrm_1(s, *mem, is_vsib); gen_lea_v_seg(s, s->aflag, ea, mem->def_seg, s->override); } =20 @@ -104,6 +109,25 @@ static void gen_load_sse(DisasContext *s, TCGv temp, M= emOp ot, int dest_ofs, boo } } =20 +static bool sse_needs_alignment(DisasContext *s, X86DecodedInsn *decode, M= emOp ot) +{ + switch (decode->e.vex_class) { + case 2: + case 4: + if ((s->prefix & PREFIX_VEX) || + decode->e.vex_special =3D=3D X86_VEX_SSEUnaligned) { + /* MOST legacy SSE instructions require aligned memory operand= s, but not all. */ + return false; + } + /* fall through */ + case 1: + return ot >=3D MO_128; + + default: + return false; + } +} + static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCG= v v) { X86DecodedOp *op =3D &decode->op[opn]; @@ -140,7 +164,8 @@ static void gen_load(DisasContext *s, X86DecodedInsn *d= ecode, int opn, TCGv v) compute_xmm_offset(op); load_vector: if (op->has_ea) { - gen_load_sse(s, v, op->ot, op->offset, true); + bool aligned =3D sse_needs_alignment(s, decode, op->ot); + gen_load_sse(s, v, op->ot, op->offset, aligned); } break; =20 @@ -176,7 +201,13 @@ static void gen_writeback(DisasContext *s, X86DecodedI= nsn *decode, int opn, TCGv } break; case X86_OP_MMX: + break; case X86_OP_SSE: + if ((s->prefix & PREFIX_VEX) && op->ot =3D=3D MO_128) { + tcg_gen_gvec_dup_imm(MO_64, + offsetof(CPUX86State, xmm_regs[op->n].ZMM= _X(1)), + 16, 16, 0); + } break; case X86_OP_CR: case X86_OP_DR: diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 4dcd276e80..aeda520f35 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -23,6 +23,7 @@ #include "disas/disas.h" #include "exec/exec-all.h" #include "tcg/tcg-op.h" +#include "tcg/tcg-op-gvec.h" #include "exec/cpu_ldst.h" #include "exec/translator.h" =20 @@ -2217,11 +2218,11 @@ static AddressParts gen_lea_modrm_0(CPUX86State *en= v, DisasContext *s, } =20 /* Compute the address, with a minimum number of TCG ops. */ -static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a) +static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a, bool is_vsib) { TCGv ea =3D NULL; =20 - if (a.index >=3D 0) { + if (a.index >=3D 0 && !is_vsib) { if (a.scale =3D=3D 0) { ea =3D cpu_regs[a.index]; } else { @@ -2249,7 +2250,7 @@ static TCGv gen_lea_modrm_1(DisasContext *s, AddressP= arts a) static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm) { AddressParts a =3D gen_lea_modrm_0(env, s, modrm); - TCGv ea =3D gen_lea_modrm_1(s, a); + TCGv ea =3D gen_lea_modrm_1(s, a, false); gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override); } =20 @@ -2262,7 +2263,8 @@ static void gen_nop_modrm(CPUX86State *env, DisasCont= ext *s, int modrm) static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm, TCGCond cond, TCGv_i64 bndv) { - TCGv ea =3D gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm)); + AddressParts a =3D gen_lea_modrm_0(env, s, modrm); + TCGv ea =3D gen_lea_modrm_1(s, a, false); =20 tcg_gen_extu_tl_i64(s->tmp1_i64, ea); if (!CODE64(s)) { @@ -5964,7 +5966,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) reg =3D ((modrm >> 3) & 7) | REX_R(s); { AddressParts a =3D gen_lea_modrm_0(env, s, modrm); - TCGv ea =3D gen_lea_modrm_1(s, a); + TCGv ea =3D gen_lea_modrm_1(s, a, false); gen_lea_v_seg(s, s->aflag, ea, -1, -1); gen_op_mov_reg_v(s, dflag, reg, s->A0); } @@ -6191,7 +6193,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) if (mod !=3D 3) { /* memory op */ AddressParts a =3D gen_lea_modrm_0(env, s, modrm); - TCGv ea =3D gen_lea_modrm_1(s, a); + TCGv ea =3D gen_lea_modrm_1(s, a, false); TCGv last_addr =3D tcg_temp_new(); bool update_fdp =3D true; =20 @@ -7221,7 +7223,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) gen_exts(ot, s->T1); tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot); tcg_gen_shli_tl(s->tmp0, s->tmp0, ot); - tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0); + tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a, false), s->tmp0); gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override); if (!(s->prefix & PREFIX_LOCK)) { gen_op_ld_v(s, ot, s->T0, s->A0); @@ -8292,7 +8294,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) /* rip-relative generates #ud */ goto illegal_op; } - tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a)); + tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a, false)); if (!CODE64(s)) { tcg_gen_ext32u_tl(s->A0, s->A0); } --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663713958; cv=none; d=zohomail.com; s=zohoarc; b=Pzjh0iuH1MiFEwjuWzM6nCHthxkmEdC9YaGBkMpAc+8TlVXx///UdLZz5hOi0uaja84K5CKG47sr0CGFrQHCbwCQrDakSQ+Ee6ktkmdORNfVVqsRpGG/mjzf9w23ELtuuSa4QE+uEu0h7/wO/wizmzogxwtPPOyQ1BnjdpYVUUo= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663713958; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=71v9UCBxG+vsAGEHFzGyYzgryy33ZHT4EeoLr6WO0sw=; b=SypUiPlk9Dz4oKLqah67PACLQlFVPh4d4/uDf9hqytI8XJscAA524wf4iuxn1umj0vJTWCsLvtdTpA0uLsMBw6iSW+eI72ntws55b3N39ZMQhbsNfzrxSp6qzuTDgevLL1bcBoHxHbgbbGbuqC+4wixyefQI4tBGfK7//bslAXI= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663713958274913.1203727207119; Tue, 20 Sep 2022 15:45:58 -0700 (PDT) Received: from localhost ([::1]:42504 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oam04-0005bF-ST for importer@patchew.org; Tue, 20 Sep 2022 18:45:56 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:60418) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah03-0006No-O5 for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:45 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:49122) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oagzz-00029g-5R for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:33 -0400 Received: from mail-ej1-f70.google.com (mail-ej1-f70.google.com [209.85.218.70]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-290-es43iWSDPUeD2qODJWsmPA-1; Tue, 20 Sep 2022 13:25:28 -0400 Received: by mail-ej1-f70.google.com with SMTP id xc12-20020a170907074c00b007416699ea14so1758468ejb.19 for ; Tue, 20 Sep 2022 10:25:28 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id d23-20020a05640208d700b0044f2564c28csm223733edz.20.2022.09.20.10.25.26 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:26 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694729; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=71v9UCBxG+vsAGEHFzGyYzgryy33ZHT4EeoLr6WO0sw=; b=bb03pEu1zNJ9qQlR8wu8KNVFXdkSfn22rzink/kLspy37RA+C+mUsM+4Eiw/TCurn5aZSL WjlKJq3JXLbNu5XLuS7t0owvuFQUZB4TveUkz35K+3DQtpax41PiYF5TzPWdsn6H6CgUfM sMfSbBz0RQx2sCOePnm9Nn3UA7+lpns= X-MC-Unique: es43iWSDPUeD2qODJWsmPA-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=71v9UCBxG+vsAGEHFzGyYzgryy33ZHT4EeoLr6WO0sw=; b=PwGqGTEsTsUuef4A098tJnLlxUEePdhDc6GXUcZo+eI/KOcRLfyDjtl2VmrVkdK+am R+19n3zpSxuomlVxsX0ZpRXcrX5tFiUD97ZqGs4fTyFA5CKueySRHJ8eqCJFcRS+YtDb 4mFYu6fyEjwqmwBfITzUNxO3uVvxGXQTWCCQqz5LbHcXFs/5ZdCEuWu+iB1U+MeNfrpM kxhdQBOTABGs4PO3PgkEJKJZyGZqWkTZ+seLhSflBifDfZTw5fHhBqqWBKEWCcBgHp19 ZJtZ896dCFqXbqu41fdMAgZALXaqA1T1PzccCP7nmNebztYgh8wv+PowBJ2feg1UD/Xb Ko9w== X-Gm-Message-State: ACrzQf3UFPM6s8OOOxB+QmyiwpKJdCsRThhkAIf+Y4pWZ1shcf/o89Ga C5telXg7I50+6ERfTn6QZFjCqA6GO8/sFqB1WM+lDde7j16qpclFQ0kUJmL7JupqvFBbbdxBRkI vD649TIs3i2BWp/hfIUw9x4NijcZDQOanHAkCLsxA4For8B1D/Cak4IvHOn2stdAjW6M= X-Received: by 2002:a17:907:dac:b0:775:58d2:defe with SMTP id go44-20020a1709070dac00b0077558d2defemr17972646ejc.231.1663694727167; Tue, 20 Sep 2022 10:25:27 -0700 (PDT) X-Google-Smtp-Source: AMsMyM7cAamLt+YpubthN+RlF28rkU+yLP8uLiMt3gZJuNOCiRrWLS7e937hlxn2Kt/p09/1b7MwJQ== X-Received: by 2002:a17:907:dac:b0:775:58d2:defe with SMTP id go44-20020a1709070dac00b0077558d2defemr17972616ejc.231.1663694726821; Tue, 20 Sep 2022 10:25:26 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 11/37] target/i386: validate SSE prefixes directly in the decoding table Date: Tue, 20 Sep 2022 19:24:41 +0200 Message-Id: <20220920172507.95568-12-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663713960234100003 Content-Type: text/plain; charset="utf-8" Many SSE and AVX instructions are only valid with specific prefixes (none, 66, F3, F2). Introduce a direct way to encode this in the decoding table to avoid using decode groups too much. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/tcg/decode-new.c.inc | 37 ++++++++++++++++++++++++++++++++ target/i386/tcg/decode-new.h | 1 + 2 files changed, 38 insertions(+) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index f56c654e08..4dc67e6d37 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -110,6 +110,22 @@ =20 #define avx2_256 .vex_special =3D X86_VEX_AVX2_256, =20 +#define P_00 1 +#define P_66 (1 << PREFIX_DATA) +#define P_F3 (1 << PREFIX_REPZ) +#define P_F2 (1 << PREFIX_REPNZ) + +#define p_00 .valid_prefix =3D P_00, +#define p_66 .valid_prefix =3D P_66, +#define p_f3 .valid_prefix =3D P_F3, +#define p_f2 .valid_prefix =3D P_F2, +#define p_00_66 .valid_prefix =3D P_00 | P_66, +#define p_00_f3 .valid_prefix =3D P_00 | P_F3, +#define p_66_f2 .valid_prefix =3D P_66 | P_F2, +#define p_00_66_f3 .valid_prefix =3D P_00 | P_66 | P_F3, +#define p_66_f3_f2 .valid_prefix =3D P_66 | P_F3 | P_F2, +#define p_00_66_f3_f2 .valid_prefix =3D P_00 | P_66 | P_F3 | P_F2, + static uint8_t get_modrm(DisasContext *s, CPUX86State *env) { if (!s->has_modrm) { @@ -480,6 +496,23 @@ static bool decode_op(DisasContext *s, CPUX86State *en= v, X86DecodedInsn *decode, return true; } =20 +static bool validate_sse_prefix(DisasContext *s, X86OpEntry *e) +{ + uint16_t sse_prefixes; + + if (!e->valid_prefix) { + return true; + } + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { + /* In SSE instructions, 0xF3 and 0xF2 cancel 0x66. */ + s->prefix &=3D ~PREFIX_DATA; + } + + /* Now, either zero or one bit is set in sse_prefixes. */ + sse_prefixes =3D s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA= ); + return e->valid_prefix & (1 << sse_prefixes); +} + static bool decode_insn(DisasContext *s, CPUX86State *env, X86DecodeFunc d= ecode_func, X86DecodedInsn *decode) { @@ -491,6 +524,10 @@ static bool decode_insn(DisasContext *s, CPUX86State *= env, X86DecodeFunc decode_ e->decode(s, env, e, &decode->b); } =20 + if (!validate_sse_prefix(s, e)) { + return false; + } + /* First compute size of operands in order to initialize s->rip_offset= . */ if (e->op0 !=3D X86_TYPE_None) { if (!decode_op_size(s, e, e->s0, &decode->op[0].ot)) { diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index 8431057769..5fb68a365c 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -212,6 +212,7 @@ struct X86OpEntry { X86CPUIDFeature cpuid:8; uint8_t vex_class:8; X86VEXSpecial vex_special:8; + uint16_t valid_prefix:16; bool is_decode:1; }; =20 --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663711177; cv=none; d=zohomail.com; s=zohoarc; b=gGCFwFs7xMOaNxeWfCvmy/aLN6H5RXI9GMSyomCt4qhFIMNM6vTPwHK+T+ZT621VRT6dlYYwX/LwScsOw1XUnlgpv/5spcKGRiuBAtTxcIseesFmlq93Jm/BdvW7OYOsvVyr/tJYqyz5e1qFEPuQMPYFI4Otlg+P1RgyZXubIpM= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663711177; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=bvfMevmXrR9/Cf/4Gb7PWDBX+h4Rt+ZSzbLf23Fobh4=; b=fPnODIIs41WMZqjlmbPHVVSiZEXHnZiV9VjBxY9y0N1MxVpZbp1qDUulBwOnb/AFQbMvQD877vDNqmaCtnLtG/PFLHrA1psIv42pi8v42MvSZdVl8H5WuJhiM+sGCMPPu78cZERyiimjntbVmUq6EAI3m9tSeN+Xuh0vjJ0W/rY= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663711177429248.01002386591074; Tue, 20 Sep 2022 14:59:37 -0700 (PDT) Received: from localhost ([::1]:50108 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oalHE-0000Ev-0T for importer@patchew.org; Tue, 20 Sep 2022 17:59:36 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58684) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah08-0006QQ-Oi for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:49 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:53234) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah02-0002A2-Fq for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:40 -0400 Received: from mail-ed1-f72.google.com (mail-ed1-f72.google.com [209.85.208.72]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-141-5-h9MWanOI-lQh9b3YlxnA-1; Tue, 20 Sep 2022 13:25:31 -0400 Received: by mail-ed1-f72.google.com with SMTP id h13-20020a056402280d00b004528c8400afso2344288ede.6 for ; Tue, 20 Sep 2022 10:25:30 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id d3-20020a170906344300b00779dc23be62sm151764ejb.120.2022.09.20.10.25.27 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:27 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694733; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=bvfMevmXrR9/Cf/4Gb7PWDBX+h4Rt+ZSzbLf23Fobh4=; b=CHiNmRUwExkViIdv7kGKxCRKVnLPfQt6PyCcRgrLjK0Xdh6t09LlBaR984n1D3TZZ2caig BrzkmnfWA1Ngj4csIOjCvkFcTZ4o7vK+HjZU2ZJlNO7tf9BtbSLP+Ppwzqf5xCo+D9R0C8 zDVPQ6TTN+wW4TFQi2tOC3mCgFCA7aE= X-MC-Unique: 5-h9MWanOI-lQh9b3YlxnA-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=bvfMevmXrR9/Cf/4Gb7PWDBX+h4Rt+ZSzbLf23Fobh4=; b=PFIi8JktjxvlbqwTmGBHGJr8JxCEssRe0FqUTvaxz2GveghpggiNnDAHiem63RSZaW rprv3IgV803QcRPG3MAhb7YOt0XFATnelhrfRxh0OqNN50H40p8m1H7svHzZLayd7XhC VSE3E+j4lYc1/rOSWsGEMOMMvmZ7eaK7pBshzOfBeOCohm7O9+nYBHquKKzCkD7mt5n2 HXlWke8IPRQu78K+IfMD3qqZCFl/2PYDT1TdKbBqdrIdXV00svaDTg8PHvcsH0/e5XFL vBppVlGAhToK543TWlmIau3jr/Y6LFkF/VewHy4mXVUBCBSNdcesNeyweqqgTryjJ52g i15Q== X-Gm-Message-State: ACrzQf2f7i4t6WhMn7BlySn1TuK3i+zEKry73tysQQQ1exyUn7X7kP5j JP7w/2SIgGM6oj7nwaZT2iOEubYR6Ur/3DqzGc/vRa86ClPyZCFHSTJrsklhnofBsSBHRYzy7/W Fx6IKWW/hSY7BZaw/2u9RFM1LyCopAtixDVsGhNueJ1/uoA8U0RE1/segmMowyenCL3Q= X-Received: by 2002:a17:906:5a45:b0:77a:406c:6867 with SMTP id my5-20020a1709065a4500b0077a406c6867mr17382252ejc.437.1663694729086; Tue, 20 Sep 2022 10:25:29 -0700 (PDT) X-Google-Smtp-Source: AMsMyM7v4tKkHsETfNRhJVKbSGBKIsbcyQkDRnmGhbC3eMno9aDQdVpczl4MwgTLmFz6PI30ir/nEg== X-Received: by 2002:a17:906:5a45:b0:77a:406c:6867 with SMTP id my5-20020a1709065a4500b0077a406c6867mr17382204ejc.437.1663694728382; Tue, 20 Sep 2022 10:25:28 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 12/37] target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder Date: Tue, 20 Sep 2022 19:24:42 +0200 Message-Id: <20220920172507.95568-13-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663711178320100003 Content-Type: text/plain; charset="utf-8" Because these are the only VEX instructions that QEMU supports, the new decoder is entered on the first byte of a valid VEX prefix, and VEX decoding only needs to be done in decode-new.c.inc. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/decode-new.c.inc | 59 +++++++ target/i386/tcg/emit.c.inc | 258 +++++++++++++++++++++++++++ target/i386/tcg/translate.c | 293 +------------------------------ 3 files changed, 321 insertions(+), 289 deletions(-) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index 4dc67e6d37..4344bcb40c 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -135,11 +135,69 @@ static uint8_t get_modrm(DisasContext *s, CPUX86State= *env) return s->modrm; } =20 +static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *= entry, uint8_t *b) +{ + static const X86GenFunc group17_gen[8] =3D { + NULL, gen_BLSR, gen_BLSMSK, gen_BLSI, + }; + int op =3D (get_modrm(s, env) >> 3) & 7; + entry->gen =3D group17_gen[op]; +} + static const X86OpEntry opcodes_0F38_00toEF[240] =3D { }; =20 /* five rows for no prefix, 66, F3, F2, 66+F2 */ static const X86OpEntry opcodes_0F38_F0toFF[16][5] =3D { + [0] =3D { + X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)), + X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)), + {}, + X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)), + X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)), + }, + [1] =3D { + X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)), + X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)), + {}, + X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)), + X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)), + }, + [2] =3D { + X86_OP_ENTRY3(ANDN, G,y, B,y, E,y, vex13 cpuid(BMI1)), + {}, + {}, + {}, + {}, + }, + [3] =3D { + X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)), + {}, + {}, + {}, + {}, + }, + [5] =3D { + X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)), + {}, + X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 cpuid(BMI2)), + X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 cpuid(BMI2)), + {}, + }, + [6] =3D { + {}, + X86_OP_ENTRY2(ADCX, G,y, E,y, cpuid(ADX)), + X86_OP_ENTRY2(ADOX, G,y, E,y, cpuid(ADX)), + X86_OP_ENTRY3(MULX, /* B,y, */ G,y, E,y, 2,y, vex13 cpuid(BMI2)), + {}, + }, + [7] =3D { + X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 cpuid(BMI1)), + X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)), + X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 cpuid(BMI1)), + X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 cpuid(BMI1)), + {}, + }, }; =20 static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) @@ -161,6 +219,7 @@ static void decode_0F38(DisasContext *s, CPUX86State *e= nv, X86OpEntry *entry, ui } =20 static const X86OpEntry opcodes_0F3A[256] =3D { + [0xF0] =3D X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2), }; =20 static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 0cba106f74..862da3c84a 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -215,3 +215,261 @@ static void gen_writeback(DisasContext *s, X86Decoded= Insn *decode, int opn, TCGv g_assert_not_reached(); } } + +static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_= op) +{ + TCGv carry_in =3D NULL; + TCGv carry_out =3D (cc_op =3D=3D CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2= ); + TCGv zero; + + if (cc_op =3D=3D s->cc_op || s->cc_op =3D=3D CC_OP_ADCOX) { + /* Re-use the carry-out from a previous round. */ + carry_in =3D carry_out; + cc_op =3D s->cc_op; + } else if (s->cc_op =3D=3D CC_OP_ADCX || s->cc_op =3D=3D CC_OP_ADOX) { + /* Merge with the carry-out from the opposite instruction. */ + cc_op =3D CC_OP_ADCOX; + } + + /* If we don't have a carry-in, get it out of EFLAGS. */ + if (!carry_in) { + if (s->cc_op !=3D CC_OP_ADCX && s->cc_op !=3D CC_OP_ADOX) { + gen_compute_eflags(s); + } + carry_in =3D s->tmp0; + tcg_gen_extract_tl(carry_in, cpu_cc_src, + ctz32(cc_op =3D=3D CC_OP_ADCX ? CC_C : CC_O), 1); + } + + switch (ot) { +#ifdef TARGET_X86_64 + case MO_32: + /* If TL is 64-bit just do everything in 64-bit arithmetic. */ + tcg_gen_add_i64(s->T0, s->T0, s->T1); + tcg_gen_add_i64(s->T0, s->T0, carry_in); + tcg_gen_shri_i64(carry_out, s->T0, 32); + break; +#endif + default: + zero =3D tcg_constant_tl(0); + tcg_gen_add2_tl(s->T0, carry_out, s->T0, zero, carry_in, zero); + tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero); + break; + } + set_cc_op(s, cc_op); +} + +static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) +{ + gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADCX); +} + +static void gen_ADOX(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) +{ + gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADOX); +} + +static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) +{ + MemOp ot =3D decode->op[0].ot; + + tcg_gen_andc_tl(s->T0, s->T1, s->T0); + gen_op_update1_cc(s); + set_cc_op(s, CC_OP_LOGICB + ot); +} + +static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *d= ecode) +{ + MemOp ot =3D decode->op[0].ot; + TCGv bound, zero; + + /* + * Extract START, and shift the operand. + * Shifts larger than operand size get zeros. + */ + tcg_gen_ext8u_tl(s->A0, s->T1); + tcg_gen_shr_tl(s->T0, s->T0, s->A0); + + bound =3D tcg_constant_tl(ot =3D=3D MO_64 ? 63 : 31); + zero =3D tcg_constant_tl(0); + tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero); + + /* + * Extract the LEN into a mask. Lengths larger than + * operand size get all ones. + */ + tcg_gen_extract_tl(s->A0, s->T1, 8, 8); + tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, s->A0, bound); + + tcg_gen_movi_tl(s->T1, 1); + tcg_gen_shl_tl(s->T1, s->T1, s->A0); + tcg_gen_subi_tl(s->T1, s->T1, 1); + tcg_gen_and_tl(s->T0, s->T0, s->T1); + + gen_op_update1_cc(s); + set_cc_op(s, CC_OP_LOGICB + ot); +} + +static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) +{ + MemOp ot =3D decode->op[0].ot; + + tcg_gen_neg_tl(s->T1, s->T0); + tcg_gen_and_tl(s->T0, s->T0, s->T1); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); + set_cc_op(s, CC_OP_BMILGB + ot); +} + +static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *= decode) +{ + MemOp ot =3D decode->op[0].ot; + + tcg_gen_subi_tl(s->T1, s->T0, 1); + tcg_gen_xor_tl(s->T0, s->T0, s->T1); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); + set_cc_op(s, CC_OP_BMILGB + ot); +} + +static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) +{ + MemOp ot =3D decode->op[0].ot; + + tcg_gen_subi_tl(s->T1, s->T0, 1); + tcg_gen_and_tl(s->T0, s->T0, s->T1); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); + set_cc_op(s, CC_OP_BMILGB + ot); +} + +static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) +{ + MemOp ot =3D decode->op[0].ot; + TCGv bound; + + tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]); + bound =3D tcg_constant_tl(ot =3D=3D MO_64 ? 63 : 31); + + /* + * Note that since we're using BMILG (in order to get O + * cleared) we need to store the inverse into C. + */ + tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, s->T1, bound); + tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1, bound, bound, s->T1); + + tcg_gen_movi_tl(s->A0, -1); + tcg_gen_shl_tl(s->A0, s->A0, s->T1); + tcg_gen_andc_tl(s->T0, s->T0, s->A0); + + gen_op_update1_cc(s); + set_cc_op(s, CC_OP_BMILGB + ot); +} + +static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *d= ecode) +{ + MemOp ot =3D decode->op[2].ot; + + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot)); +} + +static void gen_MOVBE(DisasContext *s, CPUX86State *env, X86DecodedInsn *d= ecode) +{ + MemOp ot =3D decode->op[0].ot; + + /* M operand type does not load/store */ + if (decode->e.op0 =3D=3D X86_TYPE_M) { + tcg_gen_qemu_st_tl(s->T0, s->A0, s->mem_index, ot | MO_BE); + } else { + tcg_gen_qemu_ld_tl(s->T0, s->A0, s->mem_index, ot | MO_BE); + } +} + +static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) +{ + MemOp ot =3D decode->op[0].ot; + + /* low part of result in VEX.vvvv, high in MODRM */ + switch (ot) { + default: + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); + tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32, + s->tmp2_i32, s->tmp3_i32); + tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32); + tcg_gen_extu_i32_tl(s->T0, s->tmp3_i32); + break; +#ifdef TARGET_X86_64 + case MO_64: + tcg_gen_mulu2_i64(cpu_regs[s->vex_v], s->T0, s->T0, s->T1); + break; +#endif + } + +} + +static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) +{ + MemOp ot =3D decode->op[1].ot; + if (ot < MO_64) { + tcg_gen_ext32u_tl(s->T0, s->T0); + } + gen_helper_pdep(s->T0, s->T0, s->T1); +} + +static void gen_PEXT(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) +{ + MemOp ot =3D decode->op[1].ot; + if (ot < MO_64) { + tcg_gen_ext32u_tl(s->T0, s->T0); + } + gen_helper_pext(s->T0, s->T0, s->T1); +} + +static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) +{ + MemOp ot =3D decode->op[0].ot; + int b =3D decode->immediate; + + if (ot =3D=3D MO_64) { + tcg_gen_rotri_tl(s->T0, s->T0, b & 63); + } else { + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31); + tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); + } +} + +static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) +{ + MemOp ot =3D decode->op[0].ot; + int mask; + + mask =3D ot =3D=3D MO_64 ? 63 : 31; + tcg_gen_andi_tl(s->T1, s->T1, mask); + if (ot !=3D MO_64) { + tcg_gen_ext32s_tl(s->T0, s->T0); + } + tcg_gen_sar_tl(s->T0, s->T0, s->T1); +} + +static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) +{ + MemOp ot =3D decode->op[0].ot; + int mask; + + mask =3D ot =3D=3D MO_64 ? 63 : 31; + tcg_gen_andi_tl(s->T1, s->T1, mask); + tcg_gen_shl_tl(s->T0, s->T0, s->T1); +} + +static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) +{ + MemOp ot =3D decode->op[0].ot; + int mask; + + mask =3D ot =3D=3D MO_64 ? 63 : 31; + tcg_gen_andi_tl(s->T1, s->T1, mask); + if (ot !=3D MO_64) { + tcg_gen_ext32u_tl(s->T0, s->T0); + } + tcg_gen_shr_tl(s->T0, s->T0, s->T1); +} diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index aeda520f35..cc2e6f0906 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4130,151 +4130,6 @@ static void gen_sse(CPUX86State *env, DisasContext = *s, int b, s->mem_index, ot | MO_BE); } break; - - case 0x0f2: /* andn Gy, By, Ey */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1) - || !(s->prefix & PREFIX_VEX) - || s->vex_l !=3D 0) { - goto illegal_op; - } - ot =3D mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]); - gen_op_mov_reg_v(s, ot, reg, s->T0); - gen_op_update1_cc(s); - set_cc_op(s, CC_OP_LOGICB + ot); - break; - - case 0x0f7: /* bextr Gy, Ey, By */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1) - || !(s->prefix & PREFIX_VEX) - || s->vex_l !=3D 0) { - goto illegal_op; - } - ot =3D mo_64_32(s->dflag); - { - TCGv bound, zero; - - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - /* Extract START, and shift the operand. - Shifts larger than operand size get zeros. */ - tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]); - tcg_gen_shr_tl(s->T0, s->T0, s->A0); - - bound =3D tcg_const_tl(ot =3D=3D MO_64 ? 63 : 31); - zero =3D tcg_const_tl(0); - tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, - s->T0, zero); - tcg_temp_free(zero); - - /* Extract the LEN into a mask. Lengths larger than - operand size get all ones. */ - tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8); - tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, - s->A0, bound); - tcg_temp_free(bound); - tcg_gen_movi_tl(s->T1, 1); - tcg_gen_shl_tl(s->T1, s->T1, s->A0); - tcg_gen_subi_tl(s->T1, s->T1, 1); - tcg_gen_and_tl(s->T0, s->T0, s->T1); - - gen_op_mov_reg_v(s, ot, reg, s->T0); - gen_op_update1_cc(s); - set_cc_op(s, CC_OP_LOGICB + ot); - } - break; - - case 0x0f5: /* bzhi Gy, Ey, By */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) - || !(s->prefix & PREFIX_VEX) - || s->vex_l !=3D 0) { - goto illegal_op; - } - ot =3D mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]); - { - TCGv bound =3D tcg_const_tl(ot =3D=3D MO_64 ? 63 : 31); - /* Note that since we're using BMILG (in order to get O - cleared) we need to store the inverse into C. */ - tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, - s->T1, bound); - tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1, - bound, bound, s->T1); - tcg_temp_free(bound); - } - tcg_gen_movi_tl(s->A0, -1); - tcg_gen_shl_tl(s->A0, s->A0, s->T1); - tcg_gen_andc_tl(s->T0, s->T0, s->A0); - gen_op_mov_reg_v(s, ot, reg, s->T0); - gen_op_update1_cc(s); - set_cc_op(s, CC_OP_BMILGB + ot); - break; - - case 0x3f6: /* mulx By, Gy, rdx, Ey */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) - || !(s->prefix & PREFIX_VEX) - || s->vex_l !=3D 0) { - goto illegal_op; - } - ot =3D mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - switch (ot) { - default: - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]); - tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32, - s->tmp2_i32, s->tmp3_i32); - tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32); - tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32); - break; -#ifdef TARGET_X86_64 - case MO_64: - tcg_gen_mulu2_i64(s->T0, s->T1, - s->T0, cpu_regs[R_EDX]); - tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0); - tcg_gen_mov_i64(cpu_regs[reg], s->T1); - break; -#endif - } - break; - - case 0x3f5: /* pdep Gy, By, Ey */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) - || !(s->prefix & PREFIX_VEX) - || s->vex_l !=3D 0) { - goto illegal_op; - } - ot =3D mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - /* Note that by zero-extending the source operand, we - automatically handle zero-extending the result. */ - if (ot =3D=3D MO_64) { - tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]); - } else { - tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]); - } - gen_helper_pdep(cpu_regs[reg], s->T1, s->T0); - break; - - case 0x2f5: /* pext Gy, By, Ey */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) - || !(s->prefix & PREFIX_VEX) - || s->vex_l !=3D 0) { - goto illegal_op; - } - ot =3D mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - /* Note that by zero-extending the source operand, we - automatically handle zero-extending the result. */ - if (ot =3D=3D MO_64) { - tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]); - } else { - tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]); - } - gen_helper_pext(cpu_regs[reg], s->T1, s->T0); - break; - case 0x1f6: /* adcx Gy, Ey */ case 0x2f6: /* adox Gy, Ey */ CHECK_NO_VEX(s); @@ -4354,73 +4209,6 @@ static void gen_sse(CPUX86State *env, DisasContext *= s, int b, } break; =20 - case 0x1f7: /* shlx Gy, Ey, By */ - case 0x2f7: /* sarx Gy, Ey, By */ - case 0x3f7: /* shrx Gy, Ey, By */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) - || !(s->prefix & PREFIX_VEX) - || s->vex_l !=3D 0) { - goto illegal_op; - } - ot =3D mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - if (ot =3D=3D MO_64) { - tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63); - } else { - tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31); - } - if (b =3D=3D 0x1f7) { - tcg_gen_shl_tl(s->T0, s->T0, s->T1); - } else if (b =3D=3D 0x2f7) { - if (ot !=3D MO_64) { - tcg_gen_ext32s_tl(s->T0, s->T0); - } - tcg_gen_sar_tl(s->T0, s->T0, s->T1); - } else { - if (ot !=3D MO_64) { - tcg_gen_ext32u_tl(s->T0, s->T0); - } - tcg_gen_shr_tl(s->T0, s->T0, s->T1); - } - gen_op_mov_reg_v(s, ot, reg, s->T0); - break; - - case 0x0f3: - case 0x1f3: - case 0x2f3: - case 0x3f3: /* Group 17 */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1) - || !(s->prefix & PREFIX_VEX) - || s->vex_l !=3D 0) { - goto illegal_op; - } - ot =3D mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - - tcg_gen_mov_tl(cpu_cc_src, s->T0); - switch (reg & 7) { - case 1: /* blsr By,Ey */ - tcg_gen_subi_tl(s->T1, s->T0, 1); - tcg_gen_and_tl(s->T0, s->T0, s->T1); - break; - case 2: /* blsmsk By,Ey */ - tcg_gen_subi_tl(s->T1, s->T0, 1); - tcg_gen_xor_tl(s->T0, s->T0, s->T1); - break; - case 3: /* blsi By, Ey */ - tcg_gen_neg_tl(s->T1, s->T0); - tcg_gen_and_tl(s->T0, s->T0, s->T1); - break; - default: - goto unknown_op; - } - tcg_gen_mov_tl(cpu_cc_dst, s->T0); - gen_op_mov_reg_v(s, ot, s->vex_v, s->T0); - set_cc_op(s, CC_OP_BMILGB + ot); - break; - - default: - goto unknown_op; } break; =20 @@ -4636,37 +4424,6 @@ static void gen_sse(CPUX86State *env, DisasContext *= s, int b, } break; =20 - case 0x33a: - /* Various integer extensions at 0f 3a f[0-f]. */ - b =3D modrm | (b1 << 8); - modrm =3D x86_ldub_code(env, s); - reg =3D ((modrm >> 3) & 7) | REX_R(s); - - switch (b) { - case 0x3f0: /* rorx Gy,Ey, Ib */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) - || !(s->prefix & PREFIX_VEX) - || s->vex_l !=3D 0) { - goto illegal_op; - } - ot =3D mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - b =3D x86_ldub_code(env, s); - if (ot =3D=3D MO_64) { - tcg_gen_rotri_tl(s->T0, s->T0, b & 63); - } else { - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31); - tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); - } - gen_op_mov_reg_v(s, ot, reg, s->T0); - break; - - default: - goto unknown_op; - } - break; - default: unknown_op: gen_unknown_opcode(env, s); @@ -4905,59 +4662,17 @@ static target_ulong disas_insn(DisasContext *s, CPU= State *cpu) #endif case 0xc5: /* 2-byte VEX */ case 0xc4: /* 3-byte VEX */ - use_new =3D false; - /* VEX prefixes cannot be used except in 32-bit mode. - Otherwise the instruction is LES or LDS. */ if (CODE32(s) && !VM86(s)) { - static const int pp_prefix[4] =3D { - 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ - }; - int vex3, vex2 =3D x86_ldub_code(env, s); + int vex2 =3D x86_ldub_code(env, s); + s->pc--; /* rewind the advance_pc() x86_ldub_code() did */ =20 if (!CODE64(s) && (vex2 & 0xc0) !=3D 0xc0) { /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b, otherwise the instruction is LES or LDS. */ - s->pc--; /* rewind the advance_pc() x86_ldub_code() did */ break; } - - /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes= . */ - if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ - | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) { - goto illegal_op; - } -#ifdef TARGET_X86_64 - s->rex_r =3D (~vex2 >> 4) & 8; -#endif - if (b =3D=3D 0xc5) { - /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode = byte */ - vex3 =3D vex2; - b =3D x86_ldub_code(env, s) | 0x100; - } else { - /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */ - vex3 =3D x86_ldub_code(env, s); -#ifdef TARGET_X86_64 - s->rex_x =3D (~vex2 >> 3) & 8; - s->rex_b =3D (~vex2 >> 2) & 8; - s->rex_w =3D (vex3 >> 7) & 1; -#endif - switch (vex2 & 0x1f) { - case 0x01: /* Implied 0f leading opcode bytes. */ - b =3D x86_ldub_code(env, s) | 0x100; - break; - case 0x02: /* Implied 0f 38 leading opcode bytes. */ - b =3D 0x138; - break; - case 0x03: /* Implied 0f 3a leading opcode bytes. */ - b =3D 0x13a; - break; - default: /* Reserved for future use. */ - goto unknown_op; - } - } - s->vex_v =3D (~vex3 >> 3) & 0xf; - s->vex_l =3D (vex3 >> 2) & 1; - prefixes |=3D pp_prefix[vex3 & 3] | PREFIX_VEX; + disas_insn_new(s, cpu, b); + return s->pc; } break; } --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663713741; cv=none; d=zohomail.com; s=zohoarc; b=fDiRSPYa4YBSsGydqDzhpjfdgsvmcY9VpIPrvZ6EvGo5HzR/ZKJg1NbfmrujSw6i2ebx9XvpubujQc+TtYSdh7QD0DDiY5FenlbHcVqQktul463gpYy6vBmPtaZ1f1j8N/aiNgcOiCFHqJXF0djG6t6E5CetdDIM/qfs++E97j0= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663713741; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=7PT7xATaVA8JzO+2GaxsONZZ33cSzYJhz8pmwMgRcJ4=; b=kl6S6GNycv1ZSaIpZvt1yDxEe0iZge14kyvrtnlsPZ4XGb6e+ikoqncLMTDXIOypJ+nd+vDaIHExltBOI9JdrwgH12BP4TdepCAk7nOiJUFZcTo9h9GZ5nkVVhUUv9YJxHSuSrNDKNh++WKW6jmEi6lxDEo/JXqNY5JuNGxDKCQ= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663713741608920.3336466165061; Tue, 20 Sep 2022 15:42:21 -0700 (PDT) Received: from localhost ([::1]:41508 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oalwZ-0006RA-C5 for importer@patchew.org; Tue, 20 Sep 2022 18:42:19 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58680) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah06-0006Q7-Au for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:45 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:28945) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah01-00029x-FN for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:36 -0400 Received: from mail-ed1-f72.google.com (mail-ed1-f72.google.com [209.85.208.72]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-483-EFEXrpj0PbCiOKCMD_BtVA-1; Tue, 20 Sep 2022 13:25:31 -0400 Received: by mail-ed1-f72.google.com with SMTP id e15-20020a056402190f00b0044f41e776a0so2397363edz.0 for ; Tue, 20 Sep 2022 10:25:31 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id h25-20020aa7cdd9000000b0044ee2869ef7sm244684edw.4.2022.09.20.10.25.28 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:29 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694732; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=7PT7xATaVA8JzO+2GaxsONZZ33cSzYJhz8pmwMgRcJ4=; b=IlmVAC/aIhR5+6PwtmyLhhPR4yqbc8dF0H/GwYMCsTpVHJq6No5Q5WcBSiBrrbF2OrGTUR b+xo0q9JES3hPzZDbT5SpiQnAr1n4sjQh1hQOneRknd0t09MZV9Zp8IPOFxBlCL4Lfk6Dh cmh5Pq9ZGNGGd/n4ihYm20qEb6b6lCY= X-MC-Unique: EFEXrpj0PbCiOKCMD_BtVA-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=7PT7xATaVA8JzO+2GaxsONZZ33cSzYJhz8pmwMgRcJ4=; b=n6rg8/eEveuc3JGZdtRTiAYMRbThW8PB26fOR7ZdG7hfTB1KxVqWw6h4erjfo7KU3E aJFfcHvZaAvRcmwMHq9Q6Yg98OkQ7GT6FgWJs6r7MQVlgPSkf2Ehh8WgN5e82FDrd9FJ wfHBoDjZ2PufAIGUGd2StKd1urUqvBRFPYbvMcEFagNe06QvQmz0Npn6KKYPlFo/ZAn/ NOw+efd1hvDZ44aEnU/rAxo4SiyFkPDKQqKFBJsVWQ88MYg23TNjaH0Upr2jp5c9w/C9 cikSe25+SRfrJe0/15MztVqfOcvA7IcWn1hXUVaSA94Y00f21emcA1T5mplFvS+hTDGz wM6g== X-Gm-Message-State: ACrzQf3P6uLsdJ2a6u8F8v9hIfP1sCVuVasEJbGZq4uWsauCs5yeRctr r06z1qFC7x3YR2gUAnbTUHSjhUNvgfqRu3gzVJaxYRVMj1jDbkTHOjdWjG0usZuoynrZz2SkNAz GsIUrQiVUy6hLYfS1YciqxDSoQcqPwpAtb/Ced74SLpiNJVKpy358MjaLubTgZhdiAt0= X-Received: by 2002:a17:907:74a:b0:77e:9455:b4e3 with SMTP id xc10-20020a170907074a00b0077e9455b4e3mr18044417ejb.471.1663694729927; Tue, 20 Sep 2022 10:25:29 -0700 (PDT) X-Google-Smtp-Source: AMsMyM65k9Stszkw5pSsfCoGFt+QRHMtAzH65xregGLbbgAjUSpirKfdmyQ/0lwGlMQzuUNXZmnCng== X-Received: by 2002:a17:907:74a:b0:77e:9455:b4e3 with SMTP id xc10-20020a170907074a00b0077e9455b4e3mr18044391ejb.471.1663694729528; Tue, 20 Sep 2022 10:25:29 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 13/37] target/i386: Prepare ops_sse_header.h for 256 bit AVX Date: Tue, 20 Sep 2022 19:24:43 +0200 Message-Id: <20220920172507.95568-14-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663713741799100001 Content-Type: text/plain; charset="utf-8" From: Paul Brook Adjust all #ifdefs to match the ones in ops_sse.h. Signed-off-by: Paul Brook Message-Id: <20220424220204.2493824-23-paul@nowt.org> Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/ops_sse_header.h | 114 +++++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 39 deletions(-) diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index 400b24c091..9d9a115df4 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -43,7 +43,7 @@ DEF_HELPER_3(glue(pslld, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(psrlq, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(psllq, SUFFIX), void, env, Reg, Reg) =20 -#if SHIFT =3D=3D 1 +#if SHIFT >=3D 1 DEF_HELPER_3(glue(psrldq, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg) #endif @@ -101,7 +101,7 @@ SSE_HELPER_L(pcmpeql, FCMPEQ) =20 SSE_HELPER_W(pmullw, FMULLW) #if SHIFT =3D=3D 0 -SSE_HELPER_W(pmulhrw, FMULHRW) +DEF_HELPER_3(glue(pmulhrw, SUFFIX), void, env, Reg, Reg) #endif SSE_HELPER_W(pmulhuw, FMULHUW) SSE_HELPER_W(pmulhw, FMULHW) @@ -113,7 +113,9 @@ DEF_HELPER_3(glue(pmuludq, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(pmaddwd, SUFFIX), void, env, Reg, Reg) =20 DEF_HELPER_3(glue(psadbw, SUFFIX), void, env, Reg, Reg) +#if SHIFT < 2 DEF_HELPER_4(glue(maskmov, SUFFIX), void, env, Reg, Reg, tl) +#endif DEF_HELPER_2(glue(movl_mm_T0, SUFFIX), void, Reg, i32) #ifdef TARGET_X86_64 DEF_HELPER_2(glue(movq_mm_T0, SUFFIX), void, Reg, i64) @@ -122,38 +124,63 @@ DEF_HELPER_2(glue(movq_mm_T0, SUFFIX), void, Reg, i64) #if SHIFT =3D=3D 0 DEF_HELPER_3(glue(pshufw, SUFFIX), void, Reg, Reg, int) #else -DEF_HELPER_3(glue(shufps, SUFFIX), void, Reg, Reg, int) -DEF_HELPER_3(glue(shufpd, SUFFIX), void, Reg, Reg, int) DEF_HELPER_3(glue(pshufd, SUFFIX), void, Reg, Reg, int) DEF_HELPER_3(glue(pshuflw, SUFFIX), void, Reg, Reg, int) DEF_HELPER_3(glue(pshufhw, SUFFIX), void, Reg, Reg, int) #endif =20 -#if SHIFT =3D=3D 1 +#if SHIFT >=3D 1 /* FPU ops */ /* XXX: not accurate */ =20 -#define SSE_HELPER_S(name, F) \ - DEF_HELPER_3(glue(name ## ps, SUFFIX), void, env, Reg, Reg) \ - DEF_HELPER_3(name ## ss, void, env, Reg, Reg) \ - DEF_HELPER_3(glue(name ## pd, SUFFIX), void, env, Reg, Reg) \ +#define SSE_HELPER_P4(name) \ + DEF_HELPER_3(glue(name ## ps, SUFFIX), void, env, Reg, Reg) \ + DEF_HELPER_3(glue(name ## pd, SUFFIX), void, env, Reg, Reg) + +#define SSE_HELPER_P3(name, ...) \ + DEF_HELPER_3(glue(name ## ps, SUFFIX), void, env, Reg, Reg) \ + DEF_HELPER_3(glue(name ## pd, SUFFIX), void, env, Reg, Reg) + +#if SHIFT =3D=3D 1 +#define SSE_HELPER_S4(name) \ + SSE_HELPER_P4(name) \ + DEF_HELPER_3(name ## ss, void, env, Reg, Reg) \ DEF_HELPER_3(name ## sd, void, env, Reg, Reg) +#define SSE_HELPER_S3(name) \ + SSE_HELPER_P3(name) \ + DEF_HELPER_3(name ## ss, void, env, Reg, Reg) \ + DEF_HELPER_3(name ## sd, void, env, Reg, Reg) +#else +#define SSE_HELPER_S4(name, ...) SSE_HELPER_P4(name) +#define SSE_HELPER_S3(name, ...) SSE_HELPER_P3(name) +#endif =20 -SSE_HELPER_S(add, FPU_ADD) -SSE_HELPER_S(sub, FPU_SUB) -SSE_HELPER_S(mul, FPU_MUL) -SSE_HELPER_S(div, FPU_DIV) -SSE_HELPER_S(min, FPU_MIN) -SSE_HELPER_S(max, FPU_MAX) -SSE_HELPER_S(sqrt, FPU_SQRT) +DEF_HELPER_3(glue(shufps, SUFFIX), void, Reg, Reg, int) +DEF_HELPER_3(glue(shufpd, SUFFIX), void, Reg, Reg, int) =20 +SSE_HELPER_S4(add) +SSE_HELPER_S4(sub) +SSE_HELPER_S4(mul) +SSE_HELPER_S4(div) +SSE_HELPER_S4(min) +SSE_HELPER_S4(max) + +SSE_HELPER_S3(sqrt) =20 DEF_HELPER_3(glue(cvtps2pd, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(cvtpd2ps, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(cvtss2sd, void, env, Reg, Reg) -DEF_HELPER_3(cvtsd2ss, void, env, Reg, Reg) DEF_HELPER_3(glue(cvtdq2ps, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(cvtdq2pd, SUFFIX), void, env, Reg, Reg) + +DEF_HELPER_3(glue(cvtps2dq, SUFFIX), void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(glue(cvtpd2dq, SUFFIX), void, env, ZMMReg, ZMMReg) + +DEF_HELPER_3(glue(cvttps2dq, SUFFIX), void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(glue(cvttpd2dq, SUFFIX), void, env, ZMMReg, ZMMReg) + +#if SHIFT =3D=3D 1 +DEF_HELPER_3(cvtss2sd, void, env, Reg, Reg) +DEF_HELPER_3(cvtsd2ss, void, env, Reg, Reg) DEF_HELPER_3(cvtpi2ps, void, env, ZMMReg, MMXReg) DEF_HELPER_3(cvtpi2pd, void, env, ZMMReg, MMXReg) DEF_HELPER_3(cvtsi2ss, void, env, ZMMReg, i32) @@ -164,8 +191,6 @@ DEF_HELPER_3(cvtsq2ss, void, env, ZMMReg, i64) DEF_HELPER_3(cvtsq2sd, void, env, ZMMReg, i64) #endif =20 -DEF_HELPER_3(glue(cvtps2dq, SUFFIX), void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(glue(cvtpd2dq, SUFFIX), void, env, ZMMReg, ZMMReg) DEF_HELPER_3(cvtps2pi, void, env, MMXReg, ZMMReg) DEF_HELPER_3(cvtpd2pi, void, env, MMXReg, ZMMReg) DEF_HELPER_2(cvtss2si, s32, env, ZMMReg) @@ -175,8 +200,6 @@ DEF_HELPER_2(cvtss2sq, s64, env, ZMMReg) DEF_HELPER_2(cvtsd2sq, s64, env, ZMMReg) #endif =20 -DEF_HELPER_3(glue(cvttps2dq, SUFFIX), void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(glue(cvttpd2dq, SUFFIX), void, env, ZMMReg, ZMMReg) DEF_HELPER_3(cvttps2pi, void, env, MMXReg, ZMMReg) DEF_HELPER_3(cvttpd2pi, void, env, MMXReg, ZMMReg) DEF_HELPER_2(cvttss2si, s32, env, ZMMReg) @@ -185,27 +208,24 @@ DEF_HELPER_2(cvttsd2si, s32, env, ZMMReg) DEF_HELPER_2(cvttss2sq, s64, env, ZMMReg) DEF_HELPER_2(cvttsd2sq, s64, env, ZMMReg) #endif +#endif =20 DEF_HELPER_3(glue(rsqrtps, SUFFIX), void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(rsqrtss, void, env, ZMMReg, ZMMReg) DEF_HELPER_3(glue(rcpps, SUFFIX), void, env, ZMMReg, ZMMReg) +#if SHIFT =3D=3D 1 +DEF_HELPER_3(rsqrtss, void, env, ZMMReg, ZMMReg) DEF_HELPER_3(rcpss, void, env, ZMMReg, ZMMReg) DEF_HELPER_3(extrq_r, void, env, ZMMReg, ZMMReg) DEF_HELPER_4(extrq_i, void, env, ZMMReg, int, int) DEF_HELPER_3(insertq_r, void, env, ZMMReg, ZMMReg) DEF_HELPER_5(insertq_i, void, env, ZMMReg, ZMMReg, int, int) -DEF_HELPER_3(glue(haddps, SUFFIX), void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(glue(haddpd, SUFFIX), void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(glue(hsubps, SUFFIX), void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(glue(hsubpd, SUFFIX), void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(glue(addsubps, SUFFIX), void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(glue(addsubpd, SUFFIX), void, env, ZMMReg, ZMMReg) +#endif =20 -#define SSE_HELPER_CMP(name, F, C) \ - DEF_HELPER_3(glue(name ## ps, SUFFIX), void, env, Reg, Reg) \ - DEF_HELPER_3(name ## ss, void, env, Reg, Reg) \ - DEF_HELPER_3(glue(name ## pd, SUFFIX), void, env, Reg, Reg) \ - DEF_HELPER_3(name ## sd, void, env, Reg, Reg) +SSE_HELPER_P4(hadd) +SSE_HELPER_P4(hsub) +SSE_HELPER_P4(addsub) + +#define SSE_HELPER_CMP(name, F, C) SSE_HELPER_S4(name) =20 SSE_HELPER_CMP(cmpeq, FPU_CMPQ, FPU_EQ) SSE_HELPER_CMP(cmplt, FPU_CMPS, FPU_LT) @@ -216,10 +236,13 @@ SSE_HELPER_CMP(cmpnlt, FPU_CMPS, !FPU_LT) SSE_HELPER_CMP(cmpnle, FPU_CMPS, !FPU_LE) SSE_HELPER_CMP(cmpord, FPU_CMPQ, !FPU_UNORD) =20 +#if SHIFT =3D=3D 1 DEF_HELPER_3(ucomiss, void, env, Reg, Reg) DEF_HELPER_3(comiss, void, env, Reg, Reg) DEF_HELPER_3(ucomisd, void, env, Reg, Reg) DEF_HELPER_3(comisd, void, env, Reg, Reg) +#endif + DEF_HELPER_2(glue(movmskps, SUFFIX), i32, env, Reg) DEF_HELPER_2(glue(movmskpd, SUFFIX), i32, env, Reg) #endif @@ -236,7 +259,7 @@ DEF_HELPER_3(glue(packssdw, SUFFIX), void, env, Reg, Re= g) UNPCK_OP(l, 0) UNPCK_OP(h, 1) =20 -#if SHIFT =3D=3D 1 +#if SHIFT >=3D 1 DEF_HELPER_3(glue(punpcklqdq, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(punpckhqdq, SUFFIX), void, env, Reg, Reg) #endif @@ -283,7 +306,7 @@ DEF_HELPER_3(glue(psignd, SUFFIX), void, env, Reg, Reg) DEF_HELPER_4(glue(palignr, SUFFIX), void, env, Reg, Reg, s32) =20 /* SSE4.1 op helpers */ -#if SHIFT =3D=3D 1 +#if SHIFT >=3D 1 DEF_HELPER_3(glue(pblendvb, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(blendvps, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(blendvpd, SUFFIX), void, env, Reg, Reg) @@ -312,22 +335,30 @@ DEF_HELPER_3(glue(pmaxsd, SUFFIX), void, env, Reg, Re= g) DEF_HELPER_3(glue(pmaxuw, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(pmaxud, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(pmulld, SUFFIX), void, env, Reg, Reg) +#if SHIFT =3D=3D 1 DEF_HELPER_3(glue(phminposuw, SUFFIX), void, env, Reg, Reg) +#endif DEF_HELPER_4(glue(roundps, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_4(glue(roundpd, SUFFIX), void, env, Reg, Reg, i32) +#if SHIFT =3D=3D 1 DEF_HELPER_4(glue(roundss, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_4(glue(roundsd, SUFFIX), void, env, Reg, Reg, i32) +#endif DEF_HELPER_4(glue(blendps, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_4(glue(blendpd, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_4(glue(pblendw, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_4(glue(dpps, SUFFIX), void, env, Reg, Reg, i32) +#if SHIFT =3D=3D 1 DEF_HELPER_4(glue(dppd, SUFFIX), void, env, Reg, Reg, i32) +#endif DEF_HELPER_4(glue(mpsadbw, SUFFIX), void, env, Reg, Reg, i32) #endif =20 /* SSE4.2 op helpers */ -#if SHIFT =3D=3D 1 +#if SHIFT >=3D 1 DEF_HELPER_3(glue(pcmpgtq, SUFFIX), void, env, Reg, Reg) +#endif +#if SHIFT =3D=3D 1 DEF_HELPER_4(glue(pcmpestri, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_4(glue(pcmpestrm, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_4(glue(pcmpistri, SUFFIX), void, env, Reg, Reg, i32) @@ -336,13 +367,15 @@ DEF_HELPER_3(crc32, tl, i32, tl, i32) #endif =20 /* AES-NI op helpers */ -#if SHIFT =3D=3D 1 +#if SHIFT >=3D 1 DEF_HELPER_3(glue(aesdec, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(aesdeclast, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(aesenc, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(aesenclast, SUFFIX), void, env, Reg, Reg) +#if SHIFT =3D=3D 1 DEF_HELPER_3(glue(aesimc, SUFFIX), void, env, Reg, Reg) DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env, Reg, Reg, i32) +#endif DEF_HELPER_4(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, i32) #endif =20 @@ -354,6 +387,9 @@ DEF_HELPER_4(glue(pclmulqdq, SUFFIX), void, env, Reg, R= eg, i32) #undef SSE_HELPER_W #undef SSE_HELPER_L #undef SSE_HELPER_Q -#undef SSE_HELPER_S +#undef SSE_HELPER_S3 +#undef SSE_HELPER_S4 +#undef SSE_HELPER_P3 +#undef SSE_HELPER_P4 #undef SSE_HELPER_CMP #undef UNPCK_OP --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663714057; cv=none; d=zohomail.com; s=zohoarc; b=SsPHrY8nC6DhLxcG4oOM2z18e5p89en3LXqHDZk6JqFYZrYxh5z4ZaDCP7eIR1vXkoADHx+/NR/f5JT8fimQOdKyNTdW3Vdo6TR/7+HV584XSLQUqAzgErcvc5nJC3HNRg4Tr+2h+pidm5LwUo0GWt2ik/yMdYb5CkOivNKN/kU= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663714057; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=ZNn2sURHRNH2zJJinDCr9480EXSZSfTGYomBNJfzEzI=; b=NB/LjqqLEGHVpQeP8P9UpToLiSTyJ4sAgSwZ90NQDIDLPM6u5eJY7s/YxFDdU3oo8sZxDFWJm5HgQeCRLbnODoCca/Wk88FIfp0CaUzL2uf1znBLWLv4YsSyqUA1bkRQH6Km3WWNTqG+x/tHMpdmy/wVXqzsijgktxmPJctFGe4= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663714057915274.10930820816907; Tue, 20 Sep 2022 15:47:37 -0700 (PDT) Received: from localhost ([::1]:57682 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oam1g-0007ui-HW for importer@patchew.org; Tue, 20 Sep 2022 18:47:36 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58690) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0B-0006QW-Dx for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:54 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:49090) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah06-0002AJ-2u for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:43 -0400 Received: from mail-ej1-f70.google.com (mail-ej1-f70.google.com [209.85.218.70]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-495-s0hOu4T-PcSHqL6xhHh_Ew-1; Tue, 20 Sep 2022 13:25:34 -0400 Received: by mail-ej1-f70.google.com with SMTP id qb30-20020a1709077e9e00b0077d1271283eso1784364ejc.2 for ; Tue, 20 Sep 2022 10:25:34 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id w8-20020a50fa88000000b0045256570210sm254618edr.3.2022.09.20.10.25.30 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:30 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694736; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=ZNn2sURHRNH2zJJinDCr9480EXSZSfTGYomBNJfzEzI=; b=XiRtSi9UtJ2tcMcH+V13M7VxLylhHVPMbXt3UWZW/0Fsi3mf/Sd22M1sKuTF9dhvUW56Vz ADlCbKyMxckMgvqtzBBXZuRLc1C0vN+csYhnO95QtFCMqHjYENKL55/cs69uqtkmIsTY0s R0f0UgcaabITlr8DXMS8nMfQDpW3mVw= X-MC-Unique: s0hOu4T-PcSHqL6xhHh_Ew-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=ZNn2sURHRNH2zJJinDCr9480EXSZSfTGYomBNJfzEzI=; b=sjJ0sRY6OkxSNQVfiGvzZG4wC5V7qDmcCGcafxnZ8tW/MmMdEqMO4ievCgjJBBoOVt ROVGc9uKTG9SFaSuuGdzknYmwHQRsDEytPPlpXYIS8XzuFKZZNLBJa8tIzuuAJf9Nsjq 0JSKoE2sYDZChiCFqCuLskzqdwlFkHDadqGv0QliqJGL6OFm6N/2CHm4XB24rva5No6A ZZH8P1kF76+vthNl0PziOUiuFrlXPCuoBF+auoTTLjrzC5O2tU/0XB8zrFvT98+Y+gnC eeQlh/37XiVDNGxsD90f+PJAJ4YmnBp7wC0hQKotTqW01JT+n/skGcE8kitFe1gm++fv 0MlA== X-Gm-Message-State: ACrzQf2EeI4P0GExnNMsTUSWzVpSMsa2Wmb4Hm+y3bPZ7aIkBLFuDiyF rmpgtUwohuA4Fr+nAO8zv4GsiP3XmOIlOXzqUxeoqqFAfhn5fCkDwFYNy5M+NvLMNkGeiG4BORr QNipENZidCV7rZxIcXRFcUzOlJgxj1FV+uTMtzqk78rakC3k2YISAyP0qvGrXZXUdRo8= X-Received: by 2002:a05:6402:4cb:b0:453:b9f1:f10a with SMTP id n11-20020a05640204cb00b00453b9f1f10amr13750262edw.47.1663694731979; Tue, 20 Sep 2022 10:25:31 -0700 (PDT) X-Google-Smtp-Source: AMsMyM5+WmaE3pOU6UqcksucwE9l9iZqK1zbIECnmsxDR0OB4bLV3VsaNQlz6ZAtbV2x+lPRErj1Aw== X-Received: by 2002:a05:6402:4cb:b0:453:b9f1:f10a with SMTP id n11-20020a05640204cb00b00453b9f1f10amr13750185edw.47.1663694730935; Tue, 20 Sep 2022 10:25:30 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 14/37] target/i386: extend helpers to support VEX.V 3- and 4- operand encodings Date: Tue, 20 Sep 2022 19:24:44 +0200 Message-Id: <20220920172507.95568-15-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663714059187100001 Content-Type: text/plain; charset="utf-8" Add to the helpers all the operands that are needed to implement AVX. Extracted from a patch by Paul Brook . Message-Id: <20220424220204.2493824-26-paul@nowt.org> Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/ops_sse.h | 173 +++++++++++++-------------------- target/i386/ops_sse_header.h | 149 ++++++++++++++-------------- target/i386/tcg/translate.c | 181 ++++++++++++++++++++++++----------- 3 files changed, 265 insertions(+), 238 deletions(-) diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 7bf8bb967d..5f0ee9db52 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -48,9 +48,8 @@ #define FPSLL(x, c) ((x) << shift) #endif =20 -void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c) +void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { - Reg *s =3D d; int shift; if (c->Q(0) > 15) { for (int i =3D 0; i < 1 << SHIFT; i++) { @@ -64,9 +63,8 @@ void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d,= Reg *c) } } =20 -void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c) +void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { - Reg *s =3D d; int shift; if (c->Q(0) > 15) { for (int i =3D 0; i < 1 << SHIFT; i++) { @@ -80,9 +78,8 @@ void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d,= Reg *c) } } =20 -void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c) +void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { - Reg *s =3D d; int shift; if (c->Q(0) > 15) { shift =3D 15; @@ -94,9 +91,8 @@ void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d,= Reg *c) } } =20 -void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c) +void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { - Reg *s =3D d; int shift; if (c->Q(0) > 31) { for (int i =3D 0; i < 1 << SHIFT; i++) { @@ -110,9 +106,8 @@ void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *= d, Reg *c) } } =20 -void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c) +void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { - Reg *s =3D d; int shift; if (c->Q(0) > 31) { for (int i =3D 0; i < 1 << SHIFT; i++) { @@ -126,9 +121,8 @@ void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *= d, Reg *c) } } =20 -void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *c) +void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { - Reg *s =3D d; int shift; if (c->Q(0) > 31) { shift =3D 31; @@ -140,9 +134,8 @@ void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *= d, Reg *c) } } =20 -void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c) +void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { - Reg *s =3D d; int shift; if (c->Q(0) > 63) { for (int i =3D 0; i < 1 << SHIFT; i++) { @@ -156,9 +149,8 @@ void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *= d, Reg *c) } } =20 -void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c) +void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { - Reg *s =3D d; int shift; if (c->Q(0) > 63) { for (int i =3D 0; i < 1 << SHIFT; i++) { @@ -173,9 +165,8 @@ void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *= d, Reg *c) } =20 #if SHIFT >=3D 1 -void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c) +void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { - Reg *s =3D d; int shift, i, j; =20 shift =3D c->L(0); @@ -192,9 +183,8 @@ void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg = *d, Reg *c) } } =20 -void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c) +void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { - Reg *s =3D d; int shift, i, j; =20 shift =3D c->L(0); @@ -222,9 +212,8 @@ void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg = *d, Reg *c) } =20 #define SSE_HELPER_2(name, elem, num, F) \ - void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \ + void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) \ { \ - Reg *v =3D d; \ int n =3D num; \ for (int i =3D 0; i < n; i++) { \ d->elem(i) =3D F(v->elem(i), s->elem(i)); \ @@ -362,18 +351,24 @@ SSE_HELPER_W(helper_pcmpeqw, FCMPEQ) SSE_HELPER_L(helper_pcmpeql, FCMPEQ) =20 SSE_HELPER_W(helper_pmullw, FMULLW) -#if SHIFT =3D=3D 0 -SSE_HELPER_W(helper_pmulhrw, FMULHRW) -#endif SSE_HELPER_W(helper_pmulhuw, FMULHUW) SSE_HELPER_W(helper_pmulhw, FMULHW) =20 +#if SHIFT =3D=3D 0 +void glue(helper_pmulhrw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +{ + d->W(0) =3D FMULHRW(d->W(0), s->W(0)); + d->W(1) =3D FMULHRW(d->W(1), s->W(1)); + d->W(2) =3D FMULHRW(d->W(2), s->W(2)); + d->W(3) =3D FMULHRW(d->W(3), s->W(3)); +} +#endif + SSE_HELPER_B(helper_pavgb, FAVG) SSE_HELPER_W(helper_pavgw, FAVG) =20 -void glue(helper_pmuludq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_pmuludq, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { - Reg *v =3D d; int i; =20 for (i =3D 0; i < (1 << SHIFT); i++) { @@ -381,9 +376,8 @@ void glue(helper_pmuludq, SUFFIX)(CPUX86State *env, Reg= *d, Reg *s) } } =20 -void glue(helper_pmaddwd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_pmaddwd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { - Reg *v =3D d; int i; =20 for (i =3D 0; i < (2 << SHIFT); i++) { @@ -402,10 +396,8 @@ static inline int abs1(int a) } } #endif - -void glue(helper_psadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_psadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { - Reg *v =3D d; int i; =20 for (i =3D 0; i < (1 << SHIFT); i++) { @@ -478,9 +470,8 @@ void glue(helper_pshufw, SUFFIX)(Reg *d, Reg *s, int or= der) SHUFFLE4(W, s, s, 0); } #else -void glue(helper_shufps, SUFFIX)(Reg *d, Reg *s, int order) +void glue(helper_shufps, SUFFIX)(Reg *d, Reg *v, Reg *s, int order) { - Reg *v =3D d; uint32_t r0, r1, r2, r3; int i; =20 @@ -489,9 +480,8 @@ void glue(helper_shufps, SUFFIX)(Reg *d, Reg *s, int or= der) } } =20 -void glue(helper_shufpd, SUFFIX)(Reg *d, Reg *s, int order) +void glue(helper_shufpd, SUFFIX)(Reg *d, Reg *v, Reg *s, int order) { - Reg *v =3D d; uint64_t r0, r1; int i; =20 @@ -543,9 +533,8 @@ void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int o= rder) =20 #define SSE_HELPER_P(name, F) \ void glue(helper_ ## name ## ps, SUFFIX)(CPUX86State *env, \ - Reg *d, Reg *s) \ + Reg *d, Reg *v, Reg *s) \ { \ - Reg *v =3D d; \ int i; \ for (i =3D 0; i < 2 << SHIFT; i++) { \ d->ZMM_S(i) =3D F(32, v->ZMM_S(i), s->ZMM_S(i)); \ @@ -553,9 +542,8 @@ void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int o= rder) } \ \ void glue(helper_ ## name ## pd, SUFFIX)(CPUX86State *env, \ - Reg *d, Reg *s) \ + Reg *d, Reg *v, Reg *s) \ { \ - Reg *v =3D d; \ int i; \ for (i =3D 0; i < 1 << SHIFT; i++) { \ d->ZMM_D(i) =3D F(64, v->ZMM_D(i), s->ZMM_D(i)); \ @@ -567,15 +555,13 @@ void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int= order) #define SSE_HELPER_S(name, F) \ SSE_HELPER_P(name, F) \ \ - void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *s)\ + void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *v, Reg *s)\ { \ - Reg *v =3D d; \ d->ZMM_S(0) =3D F(32, v->ZMM_S(0), s->ZMM_S(0)); \ } \ \ - void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *s)\ + void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *v, Reg *s)\ { \ - Reg *v =3D d; \ d->ZMM_D(0) =3D F(64, v->ZMM_D(0), s->ZMM_D(0)); \ } =20 @@ -958,9 +944,8 @@ void helper_insertq_i(CPUX86State *env, ZMMReg *d, ZMMR= eg *s, int index, int len #endif =20 #define SSE_HELPER_HPS(name, F) \ -void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \ +void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *= s) \ { \ - Reg *v =3D d; \ float32 r[2 << SHIFT]; \ int i, j, k; \ for (k =3D 0; k < 2 << SHIFT; k +=3D LANE_WIDTH / 4) { \ @@ -980,9 +965,8 @@ SSE_HELPER_HPS(haddps, float32_add) SSE_HELPER_HPS(hsubps, float32_sub) =20 #define SSE_HELPER_HPD(name, F) \ -void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \ +void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *= s) \ { \ - Reg *v =3D d; \ float64 r[1 << SHIFT]; \ int i, j, k; \ for (k =3D 0; k < 1 << SHIFT; k +=3D LANE_WIDTH / 8) { \ @@ -1001,9 +985,8 @@ void glue(helper_ ## name, SUFFIX)(CPUX86State *env, R= eg *d, Reg *s) \ SSE_HELPER_HPD(haddpd, float64_add) SSE_HELPER_HPD(hsubpd, float64_sub) =20 -void glue(helper_addsubps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_addsubps, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *= s) { - Reg *v =3D d; int i; for (i =3D 0; i < 2 << SHIFT; i +=3D 2) { d->ZMM_S(i) =3D float32_sub(v->ZMM_S(i), s->ZMM_S(i), &env->sse_st= atus); @@ -1011,9 +994,8 @@ void glue(helper_addsubps, SUFFIX)(CPUX86State *env, R= eg *d, Reg *s) } } =20 -void glue(helper_addsubpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_addsubpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *= s) { - Reg *v =3D d; int i; for (i =3D 0; i < 1 << SHIFT; i +=3D 2) { d->ZMM_D(i) =3D float64_sub(v->ZMM_D(i), s->ZMM_D(i), &env->sse_st= atus); @@ -1023,9 +1005,8 @@ void glue(helper_addsubpd, SUFFIX)(CPUX86State *env, = Reg *d, Reg *s) =20 #define SSE_HELPER_CMP_P(name, F, C) \ void glue(helper_ ## name ## ps, SUFFIX)(CPUX86State *env, \ - Reg *d, Reg *s) \ + Reg *d, Reg *v, Reg *s) \ { \ - Reg *v =3D d; \ int i; \ for (i =3D 0; i < 2 << SHIFT; i++) { \ d->ZMM_L(i) =3D C(F(32, v->ZMM_S(i), s->ZMM_S(i))) ? -1 : 0; \ @@ -1033,9 +1014,8 @@ void glue(helper_addsubpd, SUFFIX)(CPUX86State *env, = Reg *d, Reg *s) } \ \ void glue(helper_ ## name ## pd, SUFFIX)(CPUX86State *env, \ - Reg *d, Reg *s) \ + Reg *d, Reg *v, Reg *s) \ { \ - Reg *v =3D d; \ int i; \ for (i =3D 0; i < 1 << SHIFT; i++) { \ d->ZMM_Q(i) =3D C(F(64, v->ZMM_D(i), s->ZMM_D(i))) ? -1 : 0; \ @@ -1045,15 +1025,13 @@ void glue(helper_addsubpd, SUFFIX)(CPUX86State *env= , Reg *d, Reg *s) #if SHIFT =3D=3D 1 #define SSE_HELPER_CMP(name, F, C) = \ SSE_HELPER_CMP_P(name, F, C) = \ - void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *s) \ + void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *v, Reg *s) = \ { = \ - Reg *v =3D d; = \ d->ZMM_L(0) =3D C(F(32, v->ZMM_S(0), s->ZMM_S(0))) ? -1 : 0; = \ } = \ = \ - void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *s) \ + void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *v, Reg *s) = \ { = \ - Reg *v =3D d; = \ d->ZMM_Q(0) =3D C(F(64, v->ZMM_D(0), s->ZMM_D(0))) ? -1 : 0; = \ } =20 @@ -1179,9 +1157,8 @@ uint32_t glue(helper_pmovmskb, SUFFIX)(CPUX86State *e= nv, Reg *s) =20 #define PACK_HELPER_B(name, F) \ void glue(helper_pack ## name, SUFFIX)(CPUX86State *env, \ - Reg *d, Reg *s) \ + Reg *d, Reg *v, Reg *s) \ { \ - Reg *v =3D d; \ uint8_t r[PACK_WIDTH * 2]; \ int j, k; \ for (j =3D 0; j < 4 << SHIFT; j +=3D PACK_WIDTH) { \ @@ -1200,9 +1177,8 @@ void glue(helper_pack ## name, SUFFIX)(CPUX86State *e= nv, \ PACK_HELPER_B(sswb, satsb) PACK_HELPER_B(uswb, satub) =20 -void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *= s) { - Reg *v =3D d; uint16_t r[PACK_WIDTH]; int j, k; =20 @@ -1222,9 +1198,8 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, = Reg *d, Reg *s) #define UNPCK_OP(base_name, base) \ \ void glue(helper_punpck ## base_name ## bw, SUFFIX)(CPUX86State *env,\ - Reg *d, Reg *s) \ + Reg *d, Reg *v, Reg *s) \ { \ - Reg *v =3D d; \ uint8_t r[PACK_WIDTH * 2]; \ int j, i; \ \ @@ -1241,9 +1216,8 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, = Reg *d, Reg *s) } \ \ void glue(helper_punpck ## base_name ## wd, SUFFIX)(CPUX86State *env,\ - Reg *d, Reg *s) \ + Reg *d, Reg *v, Reg *s) \ { \ - Reg *v =3D d; \ uint16_t r[PACK_WIDTH]; \ int j, i; \ \ @@ -1260,9 +1234,8 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, = Reg *d, Reg *s) } \ \ void glue(helper_punpck ## base_name ## dq, SUFFIX)(CPUX86State *env,\ - Reg *d, Reg *s) \ + Reg *d, Reg *v, Reg *s) \ { \ - Reg *v =3D d; \ uint32_t r[PACK_WIDTH / 2]; \ int j, i; \ \ @@ -1280,9 +1253,8 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, = Reg *d, Reg *s) \ XMM_ONLY( \ void glue(helper_punpck ## base_name ## qdq, SUFFIX)( \ - CPUX86State *env, Reg *d, Reg *s) \ + CPUX86State *env, Reg *d, Reg *v, Reg *s) \ { \ - Reg *v =3D d; \ uint64_t r[2]; \ int i; \ \ @@ -1453,9 +1425,8 @@ void helper_pswapd(CPUX86State *env, MMXReg *d, MMXRe= g *s) #endif =20 /* SSSE3 op helpers */ -void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { - Reg *v =3D d; int i; #if SHIFT =3D=3D 0 uint8_t r[8]; @@ -1480,9 +1451,8 @@ void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Re= g *d, Reg *s) } =20 #define SSE_HELPER_HW(name, F) \ -void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \ +void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *= s) \ { \ - Reg *v =3D d; \ uint16_t r[4 << SHIFT]; \ int i, j, k; \ for (k =3D 0; k < 4 << SHIFT; k +=3D LANE_WIDTH / 2) { \ @@ -1499,9 +1469,8 @@ void glue(helper_ ## name, SUFFIX)(CPUX86State *env, = Reg *d, Reg *s) \ } =20 #define SSE_HELPER_HL(name, F) \ -void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \ +void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *= s) \ { \ - Reg *v =3D d; \ uint32_t r[2 << SHIFT]; \ int i, j, k; \ for (k =3D 0; k < 2 << SHIFT; k +=3D LANE_WIDTH / 4) { \ @@ -1527,9 +1496,8 @@ SSE_HELPER_HL(phsubd, FSUB) #undef SSE_HELPER_HW #undef SSE_HELPER_HL =20 -void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg = *s) { - Reg *v =3D d; int i; for (i =3D 0; i < 4 << SHIFT; i++) { d->W(i) =3D satsw((int8_t)s->B(i * 2) * (uint8_t)v->B(i * 2) + @@ -1554,10 +1522,9 @@ SSE_HELPER_B(helper_psignb, FSIGNB) SSE_HELPER_W(helper_psignw, FSIGNW) SSE_HELPER_L(helper_psignd, FSIGNL) =20 -void glue(helper_palignr, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, +void glue(helper_palignr, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, int32_t shift) { - Reg *v =3D d; int i; =20 /* XXX could be checked during translation */ @@ -1594,10 +1561,9 @@ void glue(helper_palignr, SUFFIX)(CPUX86State *env, = Reg *d, Reg *s, #if SHIFT >=3D 1 =20 #define SSE_HELPER_V(name, elem, num, F) \ - void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \ + void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, \ + Reg *m) \ { \ - Reg *v =3D d; \ - Reg *m =3D &env->xmm_regs[0]; \ int i; \ for (i =3D 0; i < num; i++) { \ d->elem(i) =3D F(v->elem(i), s->elem(i), m->elem(i)); \ @@ -1605,10 +1571,9 @@ void glue(helper_palignr, SUFFIX)(CPUX86State *env, = Reg *d, Reg *s, } =20 #define SSE_HELPER_I(name, elem, num, F) \ - void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, \ + void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, \ uint32_t imm) \ { \ - Reg *v =3D d; \ int i; \ for (i =3D 0; i < num; i++) { \ int j =3D i & 7; \ @@ -1660,9 +1625,8 @@ SSE_HELPER_F(helper_pmovzxwq, Q, 1 << SHIFT, s->W) SSE_HELPER_F(helper_pmovzxdq, Q, 1 << SHIFT, s->L) #endif =20 -void glue(helper_pmuldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_pmuldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { - Reg *v =3D d; int i; =20 for (i =3D 0; i < 1 << SHIFT; i++) { @@ -1673,9 +1637,8 @@ void glue(helper_pmuldq, SUFFIX)(CPUX86State *env, Re= g *d, Reg *s) #define FCMPEQQ(d, s) (d =3D=3D s ? -1 : 0) SSE_HELPER_Q(helper_pcmpeqq, FCMPEQQ) =20 -void glue(helper_packusdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_packusdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *= s) { - Reg *v =3D d; uint16_t r[8]; int i, j, k; =20 @@ -1893,10 +1856,9 @@ SSE_HELPER_I(helper_blendps, L, 2 << SHIFT, FBLENDP) SSE_HELPER_I(helper_blendpd, Q, 1 << SHIFT, FBLENDP) SSE_HELPER_I(helper_pblendw, W, 4 << SHIFT, FBLENDP) =20 -void glue(helper_dpps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, +void glue(helper_dpps, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, uint32_t mask) { - Reg *v =3D d; float32 prod1, prod2, temp2, temp3, temp4; int i; =20 @@ -1939,9 +1901,8 @@ void glue(helper_dpps, SUFFIX)(CPUX86State *env, Reg = *d, Reg *s, #if SHIFT =3D=3D 1 /* Oddly, there is no ymm version of dppd */ void glue(helper_dppd, SUFFIX)(CPUX86State *env, - Reg *d, Reg *s, uint32_t mask) + Reg *d, Reg *v, Reg *s, uint32_t mask) { - Reg *v =3D d; float64 prod1, prod2, temp2; =20 if (mask & (1 << 4)) { @@ -1960,10 +1921,9 @@ void glue(helper_dppd, SUFFIX)(CPUX86State *env, } #endif =20 -void glue(helper_mpsadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, +void glue(helper_mpsadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, uint32_t offset) { - Reg *v =3D d; int i, j; uint16_t r[8]; =20 @@ -2236,10 +2196,9 @@ static void clmulq(uint64_t *dest_l, uint64_t *dest_= h, } #endif =20 -void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, +void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg = *s, uint32_t ctrl) { - Reg *v =3D d; uint64_t a, b; int i; =20 @@ -2250,10 +2209,10 @@ void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *en= v, Reg *d, Reg *s, } } =20 -void glue(helper_aesdec, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_aesdec, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { int i; - Reg st =3D *d; + Reg st =3D *v; Reg rk =3D *s; =20 for (i =3D 0 ; i < 2 << SHIFT ; i++) { @@ -2265,10 +2224,10 @@ void glue(helper_aesdec, SUFFIX)(CPUX86State *env, = Reg *d, Reg *s) } } =20 -void glue(helper_aesdeclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_aesdeclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg= *s) { int i; - Reg st =3D *d; + Reg st =3D *v; Reg rk =3D *s; =20 for (i =3D 0; i < 8 << SHIFT; i++) { @@ -2276,10 +2235,10 @@ void glue(helper_aesdeclast, SUFFIX)(CPUX86State *e= nv, Reg *d, Reg *s) } } =20 -void glue(helper_aesenc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_aesenc, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { int i; - Reg st =3D *d; + Reg st =3D *v; Reg rk =3D *s; =20 for (i =3D 0 ; i < 2 << SHIFT ; i++) { @@ -2291,10 +2250,10 @@ void glue(helper_aesenc, SUFFIX)(CPUX86State *env, = Reg *d, Reg *s) } } =20 -void glue(helper_aesenclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_aesenclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg= *s) { int i; - Reg st =3D *d; + Reg st =3D *v; Reg rk =3D *s; =20 for (i =3D 0; i < 8 << SHIFT; i++) { diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index 9d9a115df4..b60fe2f0d4 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -34,31 +34,31 @@ #define dh_typecode_ZMMReg dh_typecode_ptr #define dh_typecode_MMXReg dh_typecode_ptr =20 -DEF_HELPER_3(glue(psrlw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psraw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psllw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psrld, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psrad, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pslld, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psrlq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psllq, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(psrlw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psraw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psllw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psrld, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psrad, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pslld, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psrlq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psllq, SUFFIX), void, env, Reg, Reg, Reg) =20 #if SHIFT >=3D 1 -DEF_HELPER_3(glue(psrldq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(psrldq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pslldq, SUFFIX), void, env, Reg, Reg, Reg) #endif =20 #define SSE_HELPER_B(name, F)\ - DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg) + DEF_HELPER_4(glue(name, SUFFIX), void, env, Reg, Reg, Reg) =20 #define SSE_HELPER_W(name, F)\ - DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg) + DEF_HELPER_4(glue(name, SUFFIX), void, env, Reg, Reg, Reg) =20 #define SSE_HELPER_L(name, F)\ - DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg) + DEF_HELPER_4(glue(name, SUFFIX), void, env, Reg, Reg, Reg) =20 #define SSE_HELPER_Q(name, F)\ - DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg) + DEF_HELPER_4(glue(name, SUFFIX), void, env, Reg, Reg, Reg) =20 SSE_HELPER_B(paddb, FADD) SSE_HELPER_W(paddw, FADD) @@ -109,10 +109,10 @@ SSE_HELPER_W(pmulhw, FMULHW) SSE_HELPER_B(pavgb, FAVG) SSE_HELPER_W(pavgw, FAVG) =20 -DEF_HELPER_3(glue(pmuludq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmaddwd, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(pmuludq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pmaddwd, SUFFIX), void, env, Reg, Reg, Reg) =20 -DEF_HELPER_3(glue(psadbw, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(psadbw, SUFFIX), void, env, Reg, Reg, Reg) #if SHIFT < 2 DEF_HELPER_4(glue(maskmov, SUFFIX), void, env, Reg, Reg, tl) #endif @@ -134,8 +134,8 @@ DEF_HELPER_3(glue(pshufhw, SUFFIX), void, Reg, Reg, int) /* XXX: not accurate */ =20 #define SSE_HELPER_P4(name) \ - DEF_HELPER_3(glue(name ## ps, SUFFIX), void, env, Reg, Reg) \ - DEF_HELPER_3(glue(name ## pd, SUFFIX), void, env, Reg, Reg) + DEF_HELPER_4(glue(name ## ps, SUFFIX), void, env, Reg, Reg, Reg) \ + DEF_HELPER_4(glue(name ## pd, SUFFIX), void, env, Reg, Reg, Reg) =20 #define SSE_HELPER_P3(name, ...) \ DEF_HELPER_3(glue(name ## ps, SUFFIX), void, env, Reg, Reg) \ @@ -144,8 +144,8 @@ DEF_HELPER_3(glue(pshufhw, SUFFIX), void, Reg, Reg, int) #if SHIFT =3D=3D 1 #define SSE_HELPER_S4(name) \ SSE_HELPER_P4(name) \ - DEF_HELPER_3(name ## ss, void, env, Reg, Reg) \ - DEF_HELPER_3(name ## sd, void, env, Reg, Reg) + DEF_HELPER_4(name ## ss, void, env, Reg, Reg, Reg) \ + DEF_HELPER_4(name ## sd, void, env, Reg, Reg, Reg) #define SSE_HELPER_S3(name) \ SSE_HELPER_P3(name) \ DEF_HELPER_3(name ## ss, void, env, Reg, Reg) \ @@ -155,8 +155,8 @@ DEF_HELPER_3(glue(pshufhw, SUFFIX), void, Reg, Reg, int) #define SSE_HELPER_S3(name, ...) SSE_HELPER_P3(name) #endif =20 -DEF_HELPER_3(glue(shufps, SUFFIX), void, Reg, Reg, int) -DEF_HELPER_3(glue(shufpd, SUFFIX), void, Reg, Reg, int) +DEF_HELPER_4(glue(shufps, SUFFIX), void, Reg, Reg, Reg, int) +DEF_HELPER_4(glue(shufpd, SUFFIX), void, Reg, Reg, Reg, int) =20 SSE_HELPER_S4(add) SSE_HELPER_S4(sub) @@ -212,6 +212,7 @@ DEF_HELPER_2(cvttsd2sq, s64, env, ZMMReg) =20 DEF_HELPER_3(glue(rsqrtps, SUFFIX), void, env, ZMMReg, ZMMReg) DEF_HELPER_3(glue(rcpps, SUFFIX), void, env, ZMMReg, ZMMReg) + #if SHIFT =3D=3D 1 DEF_HELPER_3(rsqrtss, void, env, ZMMReg, ZMMReg) DEF_HELPER_3(rcpss, void, env, ZMMReg, ZMMReg) @@ -248,20 +249,20 @@ DEF_HELPER_2(glue(movmskpd, SUFFIX), i32, env, Reg) #endif =20 DEF_HELPER_2(glue(pmovmskb, SUFFIX), i32, env, Reg) -DEF_HELPER_3(glue(packsswb, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(packuswb, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(packssdw, SUFFIX), void, env, Reg, Reg) -#define UNPCK_OP(base_name, base) \ - DEF_HELPER_3(glue(punpck ## base_name ## bw, SUFFIX), void, env, Reg, = Reg) \ - DEF_HELPER_3(glue(punpck ## base_name ## wd, SUFFIX), void, env, Reg, = Reg) \ - DEF_HELPER_3(glue(punpck ## base_name ## dq, SUFFIX), void, env, Reg, = Reg) +DEF_HELPER_4(glue(packsswb, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(packuswb, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(packssdw, SUFFIX), void, env, Reg, Reg, Reg) +#define UNPCK_OP(name, base) \ + DEF_HELPER_4(glue(punpck ## name ## bw, SUFFIX), void, env, Reg, Reg, = Reg) \ + DEF_HELPER_4(glue(punpck ## name ## wd, SUFFIX), void, env, Reg, Reg, = Reg) \ + DEF_HELPER_4(glue(punpck ## name ## dq, SUFFIX), void, env, Reg, Reg, = Reg) =20 UNPCK_OP(l, 0) UNPCK_OP(h, 1) =20 #if SHIFT >=3D 1 -DEF_HELPER_3(glue(punpcklqdq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(punpckhqdq, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(punpcklqdq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(punpckhqdq, SUFFIX), void, env, Reg, Reg, Reg) #endif =20 /* 3DNow! float ops */ @@ -288,28 +289,28 @@ DEF_HELPER_3(pswapd, void, env, MMXReg, MMXReg) #endif =20 /* SSSE3 op helpers */ -DEF_HELPER_3(glue(phaddw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(phaddd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(phaddsw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(phsubw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(phsubd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(phsubsw, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(phaddw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(phaddd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(phaddsw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(phsubw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(phsubd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(phsubsw, SUFFIX), void, env, Reg, Reg, Reg) DEF_HELPER_3(glue(pabsb, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(pabsw, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(pabsd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmaddubsw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmulhrsw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pshufb, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psignb, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psignw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psignd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_4(glue(palignr, SUFFIX), void, env, Reg, Reg, s32) +DEF_HELPER_4(glue(pmaddubsw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pmulhrsw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pshufb, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psignb, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psignw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psignd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_5(glue(palignr, SUFFIX), void, env, Reg, Reg, Reg, s32) =20 /* SSE4.1 op helpers */ #if SHIFT >=3D 1 -DEF_HELPER_3(glue(pblendvb, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(blendvps, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(blendvpd, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_5(glue(pblendvb, SUFFIX), void, env, Reg, Reg, Reg, Reg) +DEF_HELPER_5(glue(blendvps, SUFFIX), void, env, Reg, Reg, Reg, Reg) +DEF_HELPER_5(glue(blendvpd, SUFFIX), void, env, Reg, Reg, Reg, Reg) DEF_HELPER_3(glue(ptest, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(pmovsxbw, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(pmovsxbd, SUFFIX), void, env, Reg, Reg) @@ -323,40 +324,40 @@ DEF_HELPER_3(glue(pmovzxbq, SUFFIX), void, env, Reg, = Reg) DEF_HELPER_3(glue(pmovzxwd, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(pmovzxwq, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(pmovzxdq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmuldq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pcmpeqq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(packusdw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pminsb, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pminsd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pminuw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pminud, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmaxsb, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmaxsd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmaxuw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmaxud, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmulld, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(pmuldq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pcmpeqq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(packusdw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pminsb, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pminsd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pminuw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pminud, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pmaxsb, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pmaxsd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pmaxuw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pmaxud, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pmulld, SUFFIX), void, env, Reg, Reg, Reg) #if SHIFT =3D=3D 1 DEF_HELPER_3(glue(phminposuw, SUFFIX), void, env, Reg, Reg) #endif DEF_HELPER_4(glue(roundps, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_4(glue(roundpd, SUFFIX), void, env, Reg, Reg, i32) #if SHIFT =3D=3D 1 -DEF_HELPER_4(glue(roundss, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(roundsd, SUFFIX), void, env, Reg, Reg, i32) +DEF_HELPER_4(roundss_xmm, void, env, Reg, Reg, i32) +DEF_HELPER_4(roundsd_xmm, void, env, Reg, Reg, i32) #endif -DEF_HELPER_4(glue(blendps, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(blendpd, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(pblendw, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(dpps, SUFFIX), void, env, Reg, Reg, i32) +DEF_HELPER_5(glue(blendps, SUFFIX), void, env, Reg, Reg, Reg, i32) +DEF_HELPER_5(glue(blendpd, SUFFIX), void, env, Reg, Reg, Reg, i32) +DEF_HELPER_5(glue(pblendw, SUFFIX), void, env, Reg, Reg, Reg, i32) +DEF_HELPER_5(glue(dpps, SUFFIX), void, env, Reg, Reg, Reg, i32) #if SHIFT =3D=3D 1 -DEF_HELPER_4(glue(dppd, SUFFIX), void, env, Reg, Reg, i32) +DEF_HELPER_5(glue(dppd, SUFFIX), void, env, Reg, Reg, Reg, i32) #endif -DEF_HELPER_4(glue(mpsadbw, SUFFIX), void, env, Reg, Reg, i32) +DEF_HELPER_5(glue(mpsadbw, SUFFIX), void, env, Reg, Reg, Reg, i32) #endif =20 /* SSE4.2 op helpers */ #if SHIFT >=3D 1 -DEF_HELPER_3(glue(pcmpgtq, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(pcmpgtq, SUFFIX), void, env, Reg, Reg, Reg) #endif #if SHIFT =3D=3D 1 DEF_HELPER_4(glue(pcmpestri, SUFFIX), void, env, Reg, Reg, i32) @@ -368,15 +369,15 @@ DEF_HELPER_3(crc32, tl, i32, tl, i32) =20 /* AES-NI op helpers */ #if SHIFT >=3D 1 -DEF_HELPER_3(glue(aesdec, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(aesdeclast, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(aesenc, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(aesenclast, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(aesdec, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(aesdeclast, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(aesenc, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(aesenclast, SUFFIX), void, env, Reg, Reg, Reg) #if SHIFT =3D=3D 1 DEF_HELPER_3(glue(aesimc, SUFFIX), void, env, Reg, Reg) DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env, Reg, Reg, i32) #endif -DEF_HELPER_4(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, i32) +DEF_HELPER_5(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, Reg, i32) #endif =20 #undef SHIFT diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index cc2e6f0906..d20835f96d 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -130,6 +130,7 @@ typedef struct DisasContext { TCGv tmp4; TCGv_ptr ptr0; TCGv_ptr ptr1; + TCGv_ptr ptr2; TCGv_i32 tmp2_i32; TCGv_i32 tmp3_i32; TCGv_i64 tmp1_i64; @@ -2889,18 +2890,28 @@ typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_pt= r reg, TCGv_i64 val); typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b= ); typedef void (*SSEFunc_0_eppp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_= b, TCGv_ptr reg_c); +typedef void (*SSEFunc_0_epppp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg= _b, + TCGv_ptr reg_c, TCGv_ptr reg_d); typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_= b, TCGv_i32 val); +typedef void (*SSEFunc_0_epppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg= _b, + TCGv_ptr reg_c, TCGv_i32 val); typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val= ); +typedef void (*SSEFunc_0_pppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_ptr re= g_c, + TCGv_i32 val); typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_= b, TCGv val); +typedef void (*SSEFunc_0_epppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg= _b, + TCGv_ptr reg_c, TCGv val); =20 static bool first =3D true; static unsigned long limit; #include "decode-new.h" #include "emit.c.inc" #include "decode-new.c.inc" =20 +#define SSE_OPF_V0 (1 << 0) /* vex.v must be 1111b (only 2 operands= ) */ #define SSE_OPF_CMP (1 << 1) /* does not write for first operand */ +#define SSE_OPF_BLENDV (1 << 2) /* blendv* instruction */ #define SSE_OPF_SPECIAL (1 << 3) /* magic */ #define SSE_OPF_3DNOW (1 << 4) /* 3DNow! instruction */ #define SSE_OPF_MMX (1 << 5) /* MMX/integer/AVX2 instruction */ @@ -2910,10 +2921,10 @@ static bool first =3D true; static unsigned long li= mit; #define OP(op, flags, a, b, c, d) \ {flags, {{.op =3D a}, {.op =3D b}, {.op =3D c}, {.op =3D d} } } =20 -#define MMX_OP(x) OP(op1, SSE_OPF_MMX, \ +#define MMX_OP(x) OP(op2, SSE_OPF_MMX, \ gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm, NULL, NULL) =20 -#define SSE_FOP(name) OP(op1, SSE_OPF_SCALAR, \ +#define SSE_FOP(name) OP(op2, SSE_OPF_SCALAR, \ gen_helper_##name##ps##_xmm, gen_helper_##name##pd##_xmm, \ gen_helper_##name##ss, gen_helper_##name##sd) #define SSE_OP(sname, dname, op, flags) OP(op, flags, \ @@ -2923,6 +2934,9 @@ typedef union SSEFuncs { SSEFunc_0_epp op1; SSEFunc_0_ppi op1i; SSEFunc_0_eppt op1t; + SSEFunc_0_eppp op2; + SSEFunc_0_pppi op2i; + SSEFunc_0_epppp op3; } SSEFuncs; =20 struct SSEOpHelper_table1 { @@ -2942,8 +2956,8 @@ static const struct SSEOpHelper_table1 sse_op_table1[= 256] =3D { [0x11] =3D SSE_SPECIAL, /* movups, movupd, movss, movsd */ [0x12] =3D SSE_SPECIAL, /* movlps, movlpd, movsldup, movddup */ [0x13] =3D SSE_SPECIAL, /* movlps, movlpd */ - [0x14] =3D SSE_OP(punpckldq, punpcklqdq, op1, 0), /* unpcklps, unpcklp= d */ - [0x15] =3D SSE_OP(punpckhdq, punpckhqdq, op1, 0), /* unpckhps, unpckhp= d */ + [0x14] =3D SSE_OP(punpckldq, punpcklqdq, op2, 0), /* unpcklps, unpcklp= d */ + [0x15] =3D SSE_OP(punpckhdq, punpckhqdq, op2, 0), /* unpckhps, unpckhp= d */ [0x16] =3D SSE_SPECIAL, /* movhps, movhpd, movshdup */ [0x17] =3D SSE_SPECIAL, /* movhps, movhpd */ =20 @@ -2953,28 +2967,28 @@ static const struct SSEOpHelper_table1 sse_op_table= 1[256] =3D { [0x2b] =3D SSE_SPECIAL, /* movntps, movntpd, movntss, movntsd */ [0x2c] =3D SSE_SPECIAL, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si = */ [0x2d] =3D SSE_SPECIAL, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */ - [0x2e] =3D OP(op1, SSE_OPF_CMP | SSE_OPF_SCALAR, + [0x2e] =3D OP(op1, SSE_OPF_CMP | SSE_OPF_SCALAR | SSE_OPF_V0, gen_helper_ucomiss, gen_helper_ucomisd, NULL, NULL), - [0x2f] =3D OP(op1, SSE_OPF_CMP | SSE_OPF_SCALAR, + [0x2f] =3D OP(op1, SSE_OPF_CMP | SSE_OPF_SCALAR | SSE_OPF_V0, gen_helper_comiss, gen_helper_comisd, NULL, NULL), [0x50] =3D SSE_SPECIAL, /* movmskps, movmskpd */ - [0x51] =3D OP(op1, SSE_OPF_SCALAR, + [0x51] =3D OP(op1, SSE_OPF_SCALAR | SSE_OPF_V0, gen_helper_sqrtps_xmm, gen_helper_sqrtpd_xmm, gen_helper_sqrtss, gen_helper_sqrtsd), - [0x52] =3D OP(op1, SSE_OPF_SCALAR, + [0x52] =3D OP(op1, SSE_OPF_SCALAR | SSE_OPF_V0, gen_helper_rsqrtps_xmm, NULL, gen_helper_rsqrtss, NULL), - [0x53] =3D OP(op1, SSE_OPF_SCALAR, + [0x53] =3D OP(op1, SSE_OPF_SCALAR | SSE_OPF_V0, gen_helper_rcpps_xmm, NULL, gen_helper_rcpss, NULL), - [0x54] =3D SSE_OP(pand, pand, op1, 0), /* andps, andpd */ - [0x55] =3D SSE_OP(pandn, pandn, op1, 0), /* andnps, andnpd */ - [0x56] =3D SSE_OP(por, por, op1, 0), /* orps, orpd */ - [0x57] =3D SSE_OP(pxor, pxor, op1, 0), /* xorps, xorpd */ + [0x54] =3D SSE_OP(pand, pand, op2, 0), /* andps, andpd */ + [0x55] =3D SSE_OP(pandn, pandn, op2, 0), /* andnps, andnpd */ + [0x56] =3D SSE_OP(por, por, op2, 0), /* orps, orpd */ + [0x57] =3D SSE_OP(pxor, pxor, op2, 0), /* xorps, xorpd */ [0x58] =3D SSE_FOP(add), [0x59] =3D SSE_FOP(mul), - [0x5a] =3D OP(op1, SSE_OPF_SCALAR, + [0x5a] =3D OP(op1, SSE_OPF_SCALAR | SSE_OPF_V0, gen_helper_cvtps2pd_xmm, gen_helper_cvtpd2ps_xmm, gen_helper_cvtss2sd, gen_helper_cvtsd2ss), - [0x5b] =3D OP(op1, 0, + [0x5b] =3D OP(op1, SSE_OPF_V0, gen_helper_cvtdq2ps_xmm, gen_helper_cvtps2dq_xmm, gen_helper_cvttps2dq_xmm, NULL), [0x5c] =3D SSE_FOP(sub), @@ -2983,7 +2997,7 @@ static const struct SSEOpHelper_table1 sse_op_table1[= 256] =3D { [0x5f] =3D SSE_FOP(max), =20 [0xc2] =3D SSE_FOP(cmpeq), /* sse_op_table4 */ - [0xc6] =3D SSE_OP(shufps, shufpd, op1i, SSE_OPF_SHUF), + [0xc6] =3D SSE_OP(shufps, shufpd, op2i, SSE_OPF_SHUF), =20 /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX. */ [0x38] =3D SSE_SPECIAL, @@ -3002,13 +3016,13 @@ static const struct SSEOpHelper_table1 sse_op_table= 1[256] =3D { [0x69] =3D MMX_OP(punpckhwd), [0x6a] =3D MMX_OP(punpckhdq), [0x6b] =3D MMX_OP(packssdw), - [0x6c] =3D OP(op1, SSE_OPF_MMX, + [0x6c] =3D OP(op2, SSE_OPF_MMX, NULL, gen_helper_punpcklqdq_xmm, NULL, NULL), - [0x6d] =3D OP(op1, SSE_OPF_MMX, + [0x6d] =3D OP(op2, SSE_OPF_MMX, NULL, gen_helper_punpckhqdq_xmm, NULL, NULL), [0x6e] =3D SSE_SPECIAL, /* movd mm, ea */ [0x6f] =3D SSE_SPECIAL, /* movq, movdqa, , movqdu */ - [0x70] =3D OP(op1i, SSE_OPF_SHUF | SSE_OPF_MMX, + [0x70] =3D OP(op1i, SSE_OPF_SHUF | SSE_OPF_MMX | SSE_OPF_V0, gen_helper_pshufw_mmx, gen_helper_pshufd_xmm, gen_helper_pshufhw_xmm, gen_helper_pshuflw_xmm), [0x71] =3D SSE_SPECIAL, /* shiftw */ @@ -3019,17 +3033,17 @@ static const struct SSEOpHelper_table1 sse_op_table= 1[256] =3D { [0x76] =3D MMX_OP(pcmpeql), [0x77] =3D SSE_SPECIAL, /* emms */ [0x78] =3D SSE_SPECIAL, /* extrq_i, insertq_i (sse4a) */ - [0x79] =3D OP(op1, 0, + [0x79] =3D OP(op1, SSE_OPF_V0, NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r), - [0x7c] =3D OP(op1, 0, + [0x7c] =3D OP(op2, 0, NULL, gen_helper_haddpd_xmm, NULL, gen_helper_haddps_xmm), - [0x7d] =3D OP(op1, 0, + [0x7d] =3D OP(op2, 0, NULL, gen_helper_hsubpd_xmm, NULL, gen_helper_hsubps_xmm), [0x7e] =3D SSE_SPECIAL, /* movd, movd, , movq */ [0x7f] =3D SSE_SPECIAL, /* movq, movdqa, movdqu */ [0xc4] =3D SSE_SPECIAL, /* pinsrw */ [0xc5] =3D SSE_SPECIAL, /* pextrw */ - [0xd0] =3D OP(op1, 0, + [0xd0] =3D OP(op2, 0, NULL, gen_helper_addsubpd_xmm, NULL, gen_helper_addsubps_x= mm), [0xd1] =3D MMX_OP(psrlw), [0xd2] =3D MMX_OP(psrld), @@ -3052,7 +3066,7 @@ static const struct SSEOpHelper_table1 sse_op_table1[= 256] =3D { [0xe3] =3D MMX_OP(pavgw), [0xe4] =3D MMX_OP(pmulhuw), [0xe5] =3D MMX_OP(pmulhw), - [0xe6] =3D OP(op1, 0, + [0xe6] =3D OP(op1, SSE_OPF_V0, NULL, gen_helper_cvttpd2dq_xmm, gen_helper_cvtdq2pd_xmm, gen_helper_cvtpd2dq_xmm), [0xe7] =3D SSE_SPECIAL, /* movntq, movntq */ @@ -3071,7 +3085,7 @@ static const struct SSEOpHelper_table1 sse_op_table1[= 256] =3D { [0xf4] =3D MMX_OP(pmuludq), [0xf5] =3D MMX_OP(pmaddwd), [0xf6] =3D MMX_OP(psadbw), - [0xf7] =3D OP(op1t, SSE_OPF_MMX, + [0xf7] =3D OP(op1t, SSE_OPF_MMX | SSE_OPF_V0, gen_helper_maskmov_mmx, gen_helper_maskmov_xmm, NULL, NULL= ), [0xf8] =3D MMX_OP(psubb), [0xf9] =3D MMX_OP(psubw), @@ -3089,7 +3103,7 @@ static const struct SSEOpHelper_table1 sse_op_table1[= 256] =3D { =20 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm } =20 -static const SSEFunc_0_epp sse_op_table2[3 * 8][2] =3D { +static const SSEFunc_0_eppp sse_op_table2[3 * 8][2] =3D { [0 + 2] =3D MMX_OP2(psrlw), [0 + 4] =3D MMX_OP2(psraw), [0 + 6] =3D MMX_OP2(psllw), @@ -3133,7 +3147,7 @@ static const SSEFunc_l_ep sse_op_table3bq[] =3D { #define SSE_CMP(x) { \ gen_helper_ ## x ## ps ## _xmm, gen_helper_ ## x ## pd ## _xmm, \ gen_helper_ ## x ## ss, gen_helper_ ## x ## sd} -static const SSEFunc_0_epp sse_op_table4[8][4] =3D { +static const SSEFunc_0_eppp sse_op_table4[8][4] =3D { SSE_CMP(cmpeq), SSE_CMP(cmplt), SSE_CMP(cmple), @@ -3145,6 +3159,11 @@ static const SSEFunc_0_epp sse_op_table4[8][4] =3D { }; #undef SSE_CMP =20 +static void gen_helper_pavgusb(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_= b) +{ + gen_helper_pavgb_mmx(env, reg_a, reg_a, reg_b); +} + static const SSEFunc_0_epp sse_op_table5[256] =3D { [0x0c] =3D gen_helper_pi2fw, [0x0d] =3D gen_helper_pi2fd, @@ -3169,7 +3188,7 @@ static const SSEFunc_0_epp sse_op_table5[256] =3D { [0xb6] =3D gen_helper_movq, /* pfrcpit2 */ [0xb7] =3D gen_helper_pmulhrw_mmx, [0xbb] =3D gen_helper_pswapd, - [0xbf] =3D gen_helper_pavgb_mmx, + [0xbf] =3D gen_helper_pavgusb, }; =20 struct SSEOpHelper_table6 { @@ -3181,6 +3200,8 @@ struct SSEOpHelper_table6 { struct SSEOpHelper_table7 { union { SSEFunc_0_eppi op1; + SSEFunc_0_epppi op2; + SSEFunc_0_epppp op3; } fn[2]; uint32_t ext_mask; int flags; @@ -3192,15 +3213,15 @@ struct SSEOpHelper_table7 { {{{.op =3D mmx_name}, {.op =3D gen_helper_ ## name ## _xmm} }, \ CPUID_EXT_ ## ext, flags} #define BINARY_OP_MMX(name, ext) \ - OP(name, op1, SSE_OPF_MMX, ext, gen_helper_ ## name ## _mmx) + OP(name, op2, SSE_OPF_MMX, ext, gen_helper_ ## name ## _mmx) #define BINARY_OP(name, ext, flags) \ - OP(name, op1, flags, ext, NULL) + OP(name, op2, flags, ext, NULL) #define UNARY_OP_MMX(name, ext) \ - OP(name, op1, SSE_OPF_MMX, ext, gen_helper_ ## name ## _mmx) + OP(name, op1, SSE_OPF_V0 | SSE_OPF_MMX, ext, gen_helper_ ## name ## _m= mx) #define UNARY_OP(name, ext, flags) \ - OP(name, op1, flags, ext, NULL) -#define BLENDV_OP(name, ext, flags) OP(name, op1, 0, ext, NULL) -#define CMP_OP(name, ext) OP(name, op1, SSE_OPF_CMP, ext, NULL) + OP(name, op1, SSE_OPF_V0 | flags, ext, NULL) +#define BLENDV_OP(name, ext, flags) OP(name, op3, SSE_OPF_BLENDV, ext, NUL= L) +#define CMP_OP(name, ext) OP(name, op1, SSE_OPF_CMP | SSE_OPF_V0, ext, NUL= L) #define SPECIAL_OP(ext) OP(special, op1, SSE_OPF_SPECIAL, ext, NULL) =20 /* prefix [66] 0f 38 */ @@ -3758,7 +3779,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s= , int b, op1_offset =3D offsetof(CPUX86State,mmx_t0); } assert(b1 < 2); - SSEFunc_0_epp fn =3D sse_op_table2[((b - 1) & 3) * 8 + + SSEFunc_0_eppp fn =3D sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1]; if (!fn) { goto unknown_op; @@ -3771,8 +3792,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s= , int b, op2_offset =3D offsetof(CPUX86State,fpregs[rm].mmx); } tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset); - fn(cpu_env, s->ptr0, s->ptr1); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); + tcg_gen_addi_ptr(s->ptr2, cpu_env, op1_offset); + fn(cpu_env, s->ptr0, s->ptr1, s->ptr2); break; case 0x050: /* movmskps */ rm =3D (modrm & 7) | REX_B(s); @@ -4041,7 +4063,21 @@ static void gen_sse(CPUX86State *env, DisasContext *= s, int b, } tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - op6->fn[b1].op1(cpu_env, s->ptr0, s->ptr1); + if (op6->flags & SSE_OPF_V0) { + op6->fn[b1].op1(cpu_env, s->ptr0, s->ptr1); + } else { + tcg_gen_addi_ptr(s->ptr2, cpu_env, op1_offset); + if (op6->flags & SSE_OPF_BLENDV) { + TCGv_ptr mask =3D tcg_temp_new_ptr(); + tcg_gen_addi_ptr(mask, cpu_env, ZMM_OFFSET(0)); + op6->fn[b1].op3(cpu_env, s->ptr0, s->ptr2, s->ptr1, + mask); + tcg_temp_free_ptr(mask); + } else { + SSEFunc_0_eppp fn =3D op6->fn[b1].op2; + fn(cpu_env, s->ptr0, s->ptr2, s->ptr1); + } + } } else { CHECK_NO_VEX(s); if ((op6->flags & SSE_OPF_MMX) =3D=3D 0) { @@ -4057,7 +4093,11 @@ static void gen_sse(CPUX86State *env, DisasContext *= s, int b, } tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - op6->fn[0].op1(cpu_env, s->ptr0, s->ptr1); + if (op6->flags & SSE_OPF_V0) { + op6->fn[0].op1(cpu_env, s->ptr0, s->ptr1); + } else { + op6->fn[0].op2(cpu_env, s->ptr0, s->ptr0, s->ptr1); + } } =20 if (op6->flags & SSE_OPF_CMP) { @@ -4391,7 +4431,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s= , int b, /* We only actually have one MMX instuction (palignr) */ assert(b =3D=3D 0x0f); =20 - op7->fn[0].op1(cpu_env, s->ptr0, s->ptr1, + op7->fn[0].op2(cpu_env, s->ptr0, s->ptr0, s->ptr1, tcg_const_i32(val)); break; } @@ -4418,7 +4458,13 @@ static void gen_sse(CPUX86State *env, DisasContext *= s, int b, =20 tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - op7->fn[b1].op1(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val)); + if (op7->flags & SSE_OPF_V0) { + op7->fn[b1].op1(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(v= al)); + } else { + tcg_gen_addi_ptr(s->ptr2, cpu_env, op1_offset); + op7->fn[b1].op2(cpu_env, s->ptr0, s->ptr2, s->ptr1, + tcg_const_i32(val)); + } if (op7->flags & SSE_OPF_CMP) { set_cc_op(s, CC_OP_EFLAGS); } @@ -4510,26 +4556,46 @@ static void gen_sse(CPUX86State *env, DisasContext = *s, int b, return; } } + + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - if (sse_op_flags & SSE_OPF_SHUF) { - val =3D x86_ldub_code(env, s); - sse_op_fn.op1i(s->ptr0, s->ptr1, tcg_const_i32(val)); - } else if (b =3D=3D 0xf7) { - /* maskmov : we must prepare A0 */ - if (mod !=3D 3) { - goto illegal_op; + if (sse_op_flags & SSE_OPF_V0) { + if (sse_op_flags & SSE_OPF_SHUF) { + val =3D x86_ldub_code(env, s); + sse_op_fn.op1i(s->ptr0, s->ptr1, tcg_const_i32(val)); + } else if (b =3D=3D 0xf7) { + /* maskmov : we must prepare A0 */ + if (mod !=3D 3) { + goto illegal_op; + } + tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]); + gen_extu(s->aflag, s->A0); + gen_add_A0_ds_seg(s); + + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); + sse_op_fn.op1t(cpu_env, s->ptr0, s->ptr1, s->A0); + /* Does not write to the fist operand */ + return; + } else { + sse_op_fn.op1(cpu_env, s->ptr0, s->ptr1); } - tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]); - gen_extu(s->aflag, s->A0); - gen_add_A0_ds_seg(s); - sse_op_fn.op1t(cpu_env, s->ptr0, s->ptr1, s->A0); - } else if (b =3D=3D 0xc2) { - /* compare insns, bits 7:3 (7:5 for AVX) are ignored */ - val =3D x86_ldub_code(env, s) & 7; - sse_op_table4[val][b1](cpu_env, s->ptr0, s->ptr1); } else { - sse_op_fn.op1(cpu_env, s->ptr0, s->ptr1); + tcg_gen_addi_ptr(s->ptr2, cpu_env, op1_offset); + if (sse_op_flags & SSE_OPF_SHUF) { + val =3D x86_ldub_code(env, s); + sse_op_fn.op2i(s->ptr0, s->ptr2, s->ptr1, + tcg_const_i32(val)); + } else { + SSEFunc_0_eppp fn =3D sse_op_fn.op2; + if (b =3D=3D 0xc2) { + /* compare insns */ + val =3D x86_ldub_code(env, s) & 7; + fn =3D sse_op_table4[val][b1]; + } + fn(cpu_env, s->ptr0, s->ptr2, s->ptr1); + } } =20 if (sse_op_flags & SSE_OPF_CMP) { @@ -8611,6 +8677,7 @@ static void i386_tr_init_disas_context(DisasContextBa= se *dcbase, CPUState *cpu) dc->tmp4 =3D tcg_temp_new(); dc->ptr0 =3D tcg_temp_new_ptr(); dc->ptr1 =3D tcg_temp_new_ptr(); + dc->ptr2 =3D tcg_temp_new_ptr(); dc->cc_srcT =3D tcg_temp_local_new(); } =20 --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663713386; cv=none; d=zohomail.com; s=zohoarc; b=n498fWYKXecGG4YahMNsy1wi3q/7GsrUZvSyKRZE4zWG0R2U63mWDv2/Z8uA9GVdiJ3PrCZtakxWuqKRAC/Jn+eaQJa/VAFNkzmJjMTxtlKLTy3kAvAW6gRhWRDyFkBOMVfg0tM373HjVtc57Akw716SRRci2jKy5DgJ8h6qCV8= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663713386; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=GZE0N8PNvqQrTS2RMXVe32gG4ZjMIl14Sabo0b1XCdY=; b=Oh29PlIEbHEoUBIjQM9UI7Synuzz8qX15+77DHTbwsqUqnPGjSJ0hWGpvEXfAmVreSasOkRfvfT6gJN9d0YuWO5uTGwk5UMYRN4Y+DlZMJLpoN1PEEInm2c62pJF1IuWR34Fvek4831AWHckABHkHwz3xdfLIA7bTEe/jVmfan8= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663713386665332.1991335990285; Tue, 20 Sep 2022 15:36:26 -0700 (PDT) Received: from localhost ([::1]:58462 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oalqq-0002yE-DU for importer@patchew.org; Tue, 20 Sep 2022 18:36:24 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58682) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah08-0006QP-Ha for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:45 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:22565) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah06-0002AL-2L for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:39 -0400 Received: from mail-ej1-f69.google.com (mail-ej1-f69.google.com [209.85.218.69]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-575-yUapoQk7NbC5dge5CUxeDg-1; Tue, 20 Sep 2022 13:25:35 -0400 Received: by mail-ej1-f69.google.com with SMTP id sb32-20020a1709076da000b0077faea20701so1777578ejc.10 for ; Tue, 20 Sep 2022 10:25:34 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id f5-20020a056402004500b0044f1e64e9f4sm217059edu.17.2022.09.20.10.25.31 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:31 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694736; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=GZE0N8PNvqQrTS2RMXVe32gG4ZjMIl14Sabo0b1XCdY=; b=Z1nrS935LIUXgIqA6qcNXombVMVAOuzNVoxx+8gcYYiRl8AuRYxg2piHKve28ZuukLqexM gjciUwJZwclsDZBCVrtJB+vuif0AdA8bt06kjxkWKof80mly/OyqPU3lIUEy5ICyRJ4qfR PCnTEEeHXfSiILSiZaMbEFguj3hKYkY= X-MC-Unique: yUapoQk7NbC5dge5CUxeDg-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=GZE0N8PNvqQrTS2RMXVe32gG4ZjMIl14Sabo0b1XCdY=; b=QqPPmmIhhvIvhfxxiVsKHeu5NjiN3XikLx4+hdVvuw3o20OlbvUn8+64+P9SN2zzM1 UXZUd2TFxNqWaXsEE7xSDWZ/1gyY9zNTunoNrgFXi7nMHjp559l4/2uFtcN0doUYcw4w mv186adr7Z2nYf5M5AwfjzVTzb3b/rNKKCP7q3w1N4XbRr3tAENZUiGDKRF64lps1qLE q2ERNz7TjGkTy69TZRQQRIu4NswiPLUKiFNQk+KsJ7+sakQS6LHfTt9pIO3nZ367eTSI reWtTvbg/y9sJsfk1u6OudcYm7MEEnwCpAzS9iEiHGx/BLPEt692cwlD0/K9mhBITZaL ZaHg== X-Gm-Message-State: ACrzQf2IfWFQXNOj78M+KZO6dXd6wb7oCC02v3huS0Fgx7/jpZv7Zrzv chx+Rq8spfVaB7WYt8MdK9dxZt1FrBlue5IyFWbTURPGT1zULOt4jL6rWPKk1mbCRdrjWvZ6lUk cH7IoOeOGaBRwTo1sVvqAYrbq7I+2Rl5M0TQxz2TMDDhc9/G8TFcw7RkOGmn5tTPta8E= X-Received: by 2002:a17:907:701:b0:780:2c44:e4dd with SMTP id xb1-20020a170907070100b007802c44e4ddmr17654894ejb.589.1663694733239; Tue, 20 Sep 2022 10:25:33 -0700 (PDT) X-Google-Smtp-Source: AMsMyM6Ke4OonRd5c+N6/JoXAhjWe9E+29KHweDUJz6t71DjDxwdLZxztDMB3z0O8LhaBcNP5JhZvg== X-Received: by 2002:a17:907:701:b0:780:2c44:e4dd with SMTP id xb1-20020a170907070100b007802c44e4ddmr17654835ejb.589.1663694732249; Tue, 20 Sep 2022 10:25:32 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 15/37] target/i386: support operand merging in binary scalar helpers Date: Tue, 20 Sep 2022 19:24:45 +0200 Message-Id: <20220920172507.95568-16-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663713387919100001 Content-Type: text/plain; charset="utf-8" Compared to Paul's implementation, the new decoder will use a different app= roach to implement AVX's merging of dst with src1 on scalar operations. Adjust t= he helpers to provide this functionality. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/ops_sse.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 5f0ee9db52..ddedc46f36 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -557,12 +557,20 @@ void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int= order) \ void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *v, Reg *s)\ { \ + int i; \ d->ZMM_S(0) =3D F(32, v->ZMM_S(0), s->ZMM_S(0)); \ + for (i =3D 1; i < 2 << SHIFT; i++) { \ + d->ZMM_L(i) =3D v->ZMM_L(i); \ + } \ } \ \ void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *v, Reg *s)\ { \ + int i; \ d->ZMM_D(0) =3D F(64, v->ZMM_D(0), s->ZMM_D(0)); \ + for (i =3D 1; i < 1 << SHIFT; i++) { \ + d->ZMM_Q(i) =3D v->ZMM_Q(i); \ + } \ } =20 #else @@ -1027,12 +1035,20 @@ void glue(helper_addsubpd, SUFFIX)(CPUX86State *env= , Reg *d, Reg *v, Reg *s) SSE_HELPER_CMP_P(name, F, C) = \ void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *v, Reg *s) = \ { = \ + int i; = \ d->ZMM_L(0) =3D C(F(32, v->ZMM_S(0), s->ZMM_S(0))) ? -1 : 0; = \ + for (i =3D 1; i < 2 << SHIFT; i++) { = \ + d->ZMM_L(i) =3D v->ZMM_L(i); = \ + } = \ } = \ = \ void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *v, Reg *s) = \ { = \ + int i; = \ d->ZMM_Q(0) =3D C(F(64, v->ZMM_D(0), s->ZMM_D(0))) ? -1 : 0; = \ + for (i =3D 1; i < 1 << SHIFT; i++) { = \ + d->ZMM_Q(i) =3D v->ZMM_Q(i); = \ + } = \ } =20 #define FPU_EQ(x) (x =3D=3D float_relation_equal) --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663713834; cv=none; d=zohomail.com; s=zohoarc; b=ZH6qarsYAyWozEsGT4j8eHSJb4Sl+mlfT9KhwDnCbpV8YGHrVsXw4OjTXnOz0bxOjdmnF+RFRn1rUXOrjKJDTg5/e2tzcD8wsy+ipTP5+itVe4eg69U8DPBEuSrN5wgcLsXMkqWD/UFuagYJKReP6BcS/laWjuqgcyhyPFpSdGg= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663713834; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=63P6n3TZUMN2plBMebwY6xrAOJMypTNGGdJKWhGkiUk=; b=mZeLjhke2BxrpNLM8BrG/zz3JKvjHLN7mSBksiBkSWNS3cL0L/lE0miw1QuNmt5ZjnS5DPGfPtyPH2qyXqlHYNSBxpurhnrlMeUUQ2O4bBmbL9yU/2Tn1JkFKc5HYRkedWAeUyzlvqfeo1wmUldoHyX4+YS0Ylr1gxkxZxzKY+o= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 166371383431489.93307535058477; Tue, 20 Sep 2022 15:43:54 -0700 (PDT) Received: from localhost ([::1]:56516 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oaly3-00026O-0t for importer@patchew.org; Tue, 20 Sep 2022 18:43:51 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58686) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah09-0006QR-E8 for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:54 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:28826) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah06-0002AR-2l for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:41 -0400 Received: from mail-ed1-f69.google.com (mail-ed1-f69.google.com [209.85.208.69]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-604-viFc7qDZOYOn4mg8e19COg-1; Tue, 20 Sep 2022 13:25:35 -0400 Received: by mail-ed1-f69.google.com with SMTP id x5-20020a05640226c500b00451ec193793so2378195edd.16 for ; Tue, 20 Sep 2022 10:25:35 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id d9-20020a170906304900b0078128c89439sm209651ejd.6.2022.09.20.10.25.32 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:33 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694736; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=63P6n3TZUMN2plBMebwY6xrAOJMypTNGGdJKWhGkiUk=; b=TbSXYonxDY6Da4Pp5YNN3Xe529UDIOt265nDaQNjcuB6rjtar8jBNZ1EKZGCnuyvkGrIrk 3sqQWQNBzeqK+74wXjkcqdHDA38ULJuIui3wTNssg3lBz5T1FozoLDH8/2DUkWzrwlWSCO C08qOeZnq5S0Yx1WIELK6GtTBoJyMEU= X-MC-Unique: viFc7qDZOYOn4mg8e19COg-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=63P6n3TZUMN2plBMebwY6xrAOJMypTNGGdJKWhGkiUk=; b=AVaTY/BfeSUWepc5tbMNOr3mjOSe7ZgvsLoey5SqwzQ6CRNr8RWa8UxoUl8tl7sGKM Gte1KoaL/dbtTG9i54CHRw39b9jAFj3InhaqZvLaA6nGQcfNbkBg705uA9jIIw3scZ6y CMJV4MCMEAl7KN3YpKQXseD34wXqa6aRw2cgCfzK1ePbVu3RnCTrE84Pm4dCBJ4rWgm/ +GtFRbMjlwQs1z3dXpBm5MI//oOxUCzn/YN2C+nty6+99fT3Ab7Zkbz6xh003ASBQHyA /AdRLQiT+m3KJ36dSVHezVpLRC/lImw9bRgM4tcj5Stp3ZlbNbWvkC54iPpwPrjDg5VB +crA== X-Gm-Message-State: ACrzQf0BMSjqQbNqMvNB12OFb32IFQ2MsOw9Gp/wAJcsdy0DI4LAQO+B oDFxgVwlHwneCcixSZzkoB3l4VhNChdCNuOEIZV4CPd3DAXgpN0l8MR2r4GU11/6DL3CduGkbd3 7Xoek1QzBlbBaJmbjZXW+E6Rf2GRoqARNQsZ7m6KAJL/4kvoBlDNCwuKtG/6azDM0xbY= X-Received: by 2002:a05:6402:d0b:b0:443:df38:9df with SMTP id eb11-20020a0564020d0b00b00443df3809dfmr20826972edb.9.1663694734067; Tue, 20 Sep 2022 10:25:34 -0700 (PDT) X-Google-Smtp-Source: AMsMyM5guyN5XK+qTl1uf5oBK0I4cSeg9JJfBT4FuCXNHj3MDM2rjvZk7RuZByBYjxB/dDGK46j7qg== X-Received: by 2002:a05:6402:d0b:b0:443:df38:9df with SMTP id eb11-20020a0564020d0b00b00443df3809dfmr20826951edb.9.1663694733771; Tue, 20 Sep 2022 10:25:33 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 16/37] target/i386: provide 3-operand versions of unary scalar helpers Date: Tue, 20 Sep 2022 19:24:46 +0200 Message-Id: <20220920172507.95568-17-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663713836373100001 Content-Type: text/plain; charset="utf-8" Compared to Paul's implementation, the new decoder will use a different app= roach to implement AVX's merging of dst with src1 on scalar operations. Adjust t= he old SSE decoder to be compatible with new-style helpers. The affected instructions are CVTSx2Sx, ROUNDSx, RSQRTSx, SQRTSx, RCPSx. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/ops_sse.h | 48 ++++++++++++++++++++++++++++++------ target/i386/ops_sse_header.h | 16 ++++++------ target/i386/tcg/translate.c | 22 ++++++++++------- 3 files changed, 61 insertions(+), 25 deletions(-) diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index ddedc46f36..8bb7293975 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -617,14 +617,22 @@ void glue(helper_sqrtpd, SUFFIX)(CPUX86State *env, Re= g *d, Reg *s) } =20 #if SHIFT =3D=3D 1 -void helper_sqrtss(CPUX86State *env, Reg *d, Reg *s) +void helper_sqrtss(CPUX86State *env, Reg *d, Reg *v, Reg *s) { + int i; d->ZMM_S(0) =3D float32_sqrt(s->ZMM_S(0), &env->sse_status); + for (i =3D 1; i < 2 << SHIFT; i++) { + d->ZMM_L(i) =3D v->ZMM_L(i); + } } =20 -void helper_sqrtsd(CPUX86State *env, Reg *d, Reg *s) +void helper_sqrtsd(CPUX86State *env, Reg *d, Reg *v, Reg *s) { + int i; d->ZMM_D(0) =3D float64_sqrt(s->ZMM_D(0), &env->sse_status); + for (i =3D 1; i < 1 << SHIFT; i++) { + d->ZMM_Q(i) =3D v->ZMM_Q(i); + } } #endif =20 @@ -649,14 +657,22 @@ void glue(helper_cvtpd2ps, SUFFIX)(CPUX86State *env, = Reg *d, Reg *s) } =20 #if SHIFT =3D=3D 1 -void helper_cvtss2sd(CPUX86State *env, Reg *d, Reg *s) +void helper_cvtss2sd(CPUX86State *env, Reg *d, Reg *v, Reg *s) { + int i; d->ZMM_D(0) =3D float32_to_float64(s->ZMM_S(0), &env->sse_status); + for (i =3D 1; i < 1 << SHIFT; i++) { + d->ZMM_Q(i) =3D v->ZMM_Q(i); + } } =20 -void helper_cvtsd2ss(CPUX86State *env, Reg *d, Reg *s) +void helper_cvtsd2ss(CPUX86State *env, Reg *d, Reg *v, Reg *s) { + int i; d->ZMM_S(0) =3D float64_to_float32(s->ZMM_D(0), &env->sse_status); + for (i =3D 1; i < 2 << SHIFT; i++) { + d->ZMM_L(i) =3D v->ZMM_L(i); + } } #endif =20 @@ -876,13 +892,17 @@ void glue(helper_rsqrtps, SUFFIX)(CPUX86State *env, Z= MMReg *d, ZMMReg *s) } =20 #if SHIFT =3D=3D 1 -void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *s) +void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s) { uint8_t old_flags =3D get_float_exception_flags(&env->sse_status); + int i; d->ZMM_S(0) =3D float32_div(float32_one, float32_sqrt(s->ZMM_S(0), &env->sse_status), &env->sse_status); set_float_exception_flags(old_flags, &env->sse_status); + for (i =3D 1; i < 2 << SHIFT; i++) { + d->ZMM_L(i) =3D v->ZMM_L(i); + } } #endif =20 @@ -897,10 +917,14 @@ void glue(helper_rcpps, SUFFIX)(CPUX86State *env, ZMM= Reg *d, ZMMReg *s) } =20 #if SHIFT =3D=3D 1 -void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *s) +void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s) { uint8_t old_flags =3D get_float_exception_flags(&env->sse_status); + int i; d->ZMM_S(0) =3D float32_div(float32_one, s->ZMM_S(0), &env->sse_status= ); + for (i =3D 1; i < 2 << SHIFT; i++) { + d->ZMM_L(i) =3D v->ZMM_L(i); + } set_float_exception_flags(old_flags, &env->sse_status); } #endif @@ -1798,11 +1822,12 @@ void glue(helper_roundpd, SUFFIX)(CPUX86State *env,= Reg *d, Reg *s, } =20 #if SHIFT =3D=3D 1 -void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, +void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, uint32_t mode) { uint8_t old_flags =3D get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; + int i; =20 prev_rounding_mode =3D env->sse_status.float_rounding_mode; if (!(mode & (1 << 2))) { @@ -1823,6 +1848,9 @@ void glue(helper_roundss, SUFFIX)(CPUX86State *env, R= eg *d, Reg *s, } =20 d->ZMM_S(0) =3D float32_round_to_int(s->ZMM_S(0), &env->sse_status); + for (i =3D 1; i < 2 << SHIFT; i++) { + d->ZMM_L(i) =3D v->ZMM_L(i); + } =20 if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) { set_float_exception_flags(get_float_exception_flags(&env->sse_stat= us) & @@ -1832,11 +1860,12 @@ void glue(helper_roundss, SUFFIX)(CPUX86State *env,= Reg *d, Reg *s, env->sse_status.float_rounding_mode =3D prev_rounding_mode; } =20 -void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, +void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, uint32_t mode) { uint8_t old_flags =3D get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; + int i; =20 prev_rounding_mode =3D env->sse_status.float_rounding_mode; if (!(mode & (1 << 2))) { @@ -1857,6 +1886,9 @@ void glue(helper_roundsd, SUFFIX)(CPUX86State *env, R= eg *d, Reg *s, } =20 d->ZMM_D(0) =3D float64_round_to_int(s->ZMM_D(0), &env->sse_status); + for (i =3D 1; i < 1 << SHIFT; i++) { + d->ZMM_Q(i) =3D v->ZMM_Q(i); + } =20 if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) { set_float_exception_flags(get_float_exception_flags(&env->sse_stat= us) & diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index b60fe2f0d4..32ffa8445b 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -148,8 +148,8 @@ DEF_HELPER_3(glue(pshufhw, SUFFIX), void, Reg, Reg, int) DEF_HELPER_4(name ## sd, void, env, Reg, Reg, Reg) #define SSE_HELPER_S3(name) \ SSE_HELPER_P3(name) \ - DEF_HELPER_3(name ## ss, void, env, Reg, Reg) \ - DEF_HELPER_3(name ## sd, void, env, Reg, Reg) + DEF_HELPER_4(name ## ss, void, env, Reg, Reg, Reg) \ + DEF_HELPER_4(name ## sd, void, env, Reg, Reg, Reg) #else #define SSE_HELPER_S4(name, ...) SSE_HELPER_P4(name) #define SSE_HELPER_S3(name, ...) SSE_HELPER_P3(name) @@ -179,8 +179,8 @@ DEF_HELPER_3(glue(cvttps2dq, SUFFIX), void, env, ZMMReg= , ZMMReg) DEF_HELPER_3(glue(cvttpd2dq, SUFFIX), void, env, ZMMReg, ZMMReg) =20 #if SHIFT =3D=3D 1 -DEF_HELPER_3(cvtss2sd, void, env, Reg, Reg) -DEF_HELPER_3(cvtsd2ss, void, env, Reg, Reg) +DEF_HELPER_4(cvtss2sd, void, env, Reg, Reg, Reg) +DEF_HELPER_4(cvtsd2ss, void, env, Reg, Reg, Reg) DEF_HELPER_3(cvtpi2ps, void, env, ZMMReg, MMXReg) DEF_HELPER_3(cvtpi2pd, void, env, ZMMReg, MMXReg) DEF_HELPER_3(cvtsi2ss, void, env, ZMMReg, i32) @@ -214,8 +214,8 @@ DEF_HELPER_3(glue(rsqrtps, SUFFIX), void, env, ZMMReg, = ZMMReg) DEF_HELPER_3(glue(rcpps, SUFFIX), void, env, ZMMReg, ZMMReg) =20 #if SHIFT =3D=3D 1 -DEF_HELPER_3(rsqrtss, void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(rcpss, void, env, ZMMReg, ZMMReg) +DEF_HELPER_4(rsqrtss, void, env, ZMMReg, ZMMReg, ZMMReg) +DEF_HELPER_4(rcpss, void, env, ZMMReg, ZMMReg, ZMMReg) DEF_HELPER_3(extrq_r, void, env, ZMMReg, ZMMReg) DEF_HELPER_4(extrq_i, void, env, ZMMReg, int, int) DEF_HELPER_3(insertq_r, void, env, ZMMReg, ZMMReg) @@ -342,8 +342,8 @@ DEF_HELPER_3(glue(phminposuw, SUFFIX), void, env, Reg, = Reg) DEF_HELPER_4(glue(roundps, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_4(glue(roundpd, SUFFIX), void, env, Reg, Reg, i32) #if SHIFT =3D=3D 1 -DEF_HELPER_4(roundss_xmm, void, env, Reg, Reg, i32) -DEF_HELPER_4(roundsd_xmm, void, env, Reg, Reg, i32) +DEF_HELPER_5(roundss_xmm, void, env, Reg, Reg, Reg, i32) +DEF_HELPER_5(roundsd_xmm, void, env, Reg, Reg, Reg, i32) #endif DEF_HELPER_5(glue(blendps, SUFFIX), void, env, Reg, Reg, Reg, i32) DEF_HELPER_5(glue(blendpd, SUFFIX), void, env, Reg, Reg, Reg, i32) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index d20835f96d..a974fb28f9 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2930,6 +2930,9 @@ static bool first =3D true; static unsigned long limi= t; #define SSE_OP(sname, dname, op, flags) OP(op, flags, \ gen_helper_##sname##_xmm, gen_helper_##dname##_xmm, NULL, NULL) =20 +#define SSE_OP_UNARY(a, b, c, d) \ + {SSE_OPF_SCALAR | SSE_OPF_V0, {{.op1 =3D a}, {.op1 =3D b}, {.op2 =3D c= }, {.op2 =3D d} } } + typedef union SSEFuncs { SSEFunc_0_epp op1; SSEFunc_0_ppi op1i; @@ -2972,12 +2975,12 @@ static const struct SSEOpHelper_table1 sse_op_table= 1[256] =3D { [0x2f] =3D OP(op1, SSE_OPF_CMP | SSE_OPF_SCALAR | SSE_OPF_V0, gen_helper_comiss, gen_helper_comisd, NULL, NULL), [0x50] =3D SSE_SPECIAL, /* movmskps, movmskpd */ - [0x51] =3D OP(op1, SSE_OPF_SCALAR | SSE_OPF_V0, + [0x51] =3D SSE_OP_UNARY( gen_helper_sqrtps_xmm, gen_helper_sqrtpd_xmm, gen_helper_sqrtss, gen_helper_sqrtsd), - [0x52] =3D OP(op1, SSE_OPF_SCALAR | SSE_OPF_V0, + [0x52] =3D SSE_OP_UNARY( gen_helper_rsqrtps_xmm, NULL, gen_helper_rsqrtss, NULL), - [0x53] =3D OP(op1, SSE_OPF_SCALAR | SSE_OPF_V0, + [0x53] =3D SSE_OP_UNARY( gen_helper_rcpps_xmm, NULL, gen_helper_rcpss, NULL), [0x54] =3D SSE_OP(pand, pand, op2, 0), /* andps, andpd */ [0x55] =3D SSE_OP(pandn, pandn, op2, 0), /* andnps, andnpd */ @@ -2985,9 +2988,9 @@ static const struct SSEOpHelper_table1 sse_op_table1[= 256] =3D { [0x57] =3D SSE_OP(pxor, pxor, op2, 0), /* xorps, xorpd */ [0x58] =3D SSE_FOP(add), [0x59] =3D SSE_FOP(mul), - [0x5a] =3D OP(op1, SSE_OPF_SCALAR | SSE_OPF_V0, - gen_helper_cvtps2pd_xmm, gen_helper_cvtpd2ps_xmm, - gen_helper_cvtss2sd, gen_helper_cvtsd2ss), + [0x5a] =3D SSE_OP_UNARY( + gen_helper_cvtps2pd_xmm, gen_helper_cvtpd2ps_xmm, + gen_helper_cvtss2sd, gen_helper_cvtsd2ss), [0x5b] =3D OP(op1, SSE_OPF_V0, gen_helper_cvtdq2ps_xmm, gen_helper_cvtps2dq_xmm, gen_helper_cvttps2dq_xmm, NULL), @@ -3283,8 +3286,8 @@ static const struct SSEOpHelper_table6 sse_op_table6[= 256] =3D { static const struct SSEOpHelper_table7 sse_op_table7[256] =3D { [0x08] =3D UNARY_OP(roundps, SSE41, 0), [0x09] =3D UNARY_OP(roundpd, SSE41, 0), - [0x0a] =3D UNARY_OP(roundss, SSE41, SSE_OPF_SCALAR), - [0x0b] =3D UNARY_OP(roundsd, SSE41, SSE_OPF_SCALAR), + [0x0a] =3D BINARY_OP(roundss, SSE41, SSE_OPF_SCALAR), + [0x0b] =3D BINARY_OP(roundsd, SSE41, SSE_OPF_SCALAR), [0x0c] =3D BINARY_OP(blendps, SSE41, 0), [0x0d] =3D BINARY_OP(blendpd, SSE41, 0), [0x0e] =3D BINARY_OP(pblendw, SSE41, SSE_OPF_MMX), @@ -4560,7 +4563,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s= , int b, =20 tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - if (sse_op_flags & SSE_OPF_V0) { + if ((sse_op_flags & SSE_OPF_V0) && + !((sse_op_flags & SSE_OPF_SCALAR) && b1 >=3D 2)) { if (sse_op_flags & SSE_OPF_SHUF) { val =3D x86_ldub_code(env, s); sse_op_fn.op1i(s->ptr0, s->ptr1, tcg_const_i32(val)); --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663711876; cv=none; d=zohomail.com; s=zohoarc; b=TrIRdCtKOlxq1mSsIeHa77YALeA7n5+8u+AVkauWEGfLBoOjewtTjuvpr2FR3FS5N7v5b9BRw9cCRDfNZvMfB1PudxEhlzhBccPA4nMU/bdPtSfhr04i7HFWz7WbahTwLmPj4v5wstnJDsQpqyq2t9TjgJ6kUjEeUDo/7hLxYXk= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663711876; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=KT2NOlwaQokd6chNCwWU1zFutgO7jY0xwNMqsV7fX8o=; b=IPnNQ01oJssuKuqnGiq0vHNOfuu0t9wpNMPWaVDotRu2+ro6JfKC/EOUAu+v6jGMvsFj3JihkiwSfo0Wg8VnB/v+9c5zZv9LWPtOvcMgLjY7PNibsP+FvQMG6BdRdlTo8zDSPG67A+adk/oo1TMXzA6T5fJvJNweJ32YDatq3Ww= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663711876209129.27275004557396; Tue, 20 Sep 2022 15:11:16 -0700 (PDT) Received: from localhost ([::1]:35862 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oalSV-0001PM-5U for importer@patchew.org; Tue, 20 Sep 2022 18:11:15 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58688) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0A-0006QS-EZ for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:55 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:26637) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah08-0002Aq-Mm for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:42 -0400 Received: from mail-ej1-f71.google.com (mail-ej1-f71.google.com [209.85.218.71]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-648-zXkLDW-6Pci0uKkopfJn-A-1; Tue, 20 Sep 2022 13:25:36 -0400 Received: by mail-ej1-f71.google.com with SMTP id du20-20020a17090772d400b0077df99c5ab3so1785745ejc.3 for ; Tue, 20 Sep 2022 10:25:36 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id s20-20020a508d14000000b0044657ecfbb5sm225981eds.13.2022.09.20.10.25.34 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:34 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694740; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=KT2NOlwaQokd6chNCwWU1zFutgO7jY0xwNMqsV7fX8o=; b=K9rBzjBTkbaMC53VoC3nPuta9PDB7iuJia/Tisntbe1w6J2xmzgVt9cc+G7TSMPqOehZ14 G1ZUfGccZz4kqOiOrZYVFYGBVlFn429tZRz1Lw2nHvH+wSBYium2UJYVa2f2Jd4yBYRoI3 Quj7t1fTFuwv+bsrvraToP9Ae3gavzg= X-MC-Unique: zXkLDW-6Pci0uKkopfJn-A-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=KT2NOlwaQokd6chNCwWU1zFutgO7jY0xwNMqsV7fX8o=; b=QJGNbNeZl6H61rwsUBLcv6E36mjfE5DfEuBSethaixs+BT6PImntKIvK7D8LnxrZys 3Dbvp3upaaaGDF3pyKuguU60DhhY9v9Uh2kctPEeX3HLiDzPiYoOKYglQEDk3ZgKUBBY ag7C2MT+5ELqiVCTB1n4kFLnyi9LKg7sF7d5TOgG+u6wVvCCWYogazzoW0yMRxeqLcsJ oqm0dI3udn3S/L/kHFdKLdF1qpK6AoRlUOp5nSt2Cyv3N0jYWepq+gKzpBBhRtDb28DQ bUJYjUKV3Lf0EBRFAf4Zexn2w8XZrxU9Re2Qb8M0oUj9to9hOBEDlJnNBykWYM3/XekI lj2w== X-Gm-Message-State: ACrzQf0xJJ8PkorO9YvNkGnsnv1l865UA39+pb1wJHXz922OSGDLzdwQ E7vkDn264FbtBhZ1o8yNV7eCa7m5BSNXSH1ShIk4Q77T1hmKKU6I0xauTtjD3pCfBLSuWGBjY7j QbQ8L86uG9Lpkw2S8U0hP16qzG2Yh2uStV0rhMl+aYB3FHvNkrD+uqQ03bHPBolEAdVA= X-Received: by 2002:a17:907:ea0:b0:779:6c9d:7355 with SMTP id ho32-20020a1709070ea000b007796c9d7355mr17271820ejc.542.1663694735578; Tue, 20 Sep 2022 10:25:35 -0700 (PDT) X-Google-Smtp-Source: AMsMyM52pWp+U2TgEWK8zFXICCb2ufYsOJe4GWMYDS9HjjnyASrtl+eRZGCSoaKiYXPGQBE+XXIDFg== X-Received: by 2002:a17:907:ea0:b0:779:6c9d:7355 with SMTP id ho32-20020a1709070ea000b007796c9d7355mr17271799ejc.542.1663694735281; Tue, 20 Sep 2022 10:25:35 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 17/37] target/i386: implement additional AVX comparison operators Date: Tue, 20 Sep 2022 19:24:47 +0200 Message-Id: <20220920172507.95568-18-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663711877034100001 Content-Type: text/plain; charset="utf-8" The new implementation of SSE will cover AVX from the get go, so include the 24 extra comparison operators that are only available with the VEX prefix. Based on a patch by Paul Brook . Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/ops_sse.h | 38 ++++++++++++++++++++++++++++++++++++ target/i386/ops_sse_header.h | 27 +++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 8bb7293975..18d217ebf5 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -1075,10 +1075,21 @@ void glue(helper_addsubpd, SUFFIX)(CPUX86State *env= , Reg *d, Reg *v, Reg *s) } = \ } =20 +static inline bool FPU_EQU(FloatRelation x) +{ + return (x =3D=3D float_relation_equal || x =3D=3D float_relation_unord= ered); +} +static inline bool FPU_GE(FloatRelation x) +{ + return (x =3D=3D float_relation_equal || x =3D=3D float_relation_great= er); +} #define FPU_EQ(x) (x =3D=3D float_relation_equal) #define FPU_LT(x) (x =3D=3D float_relation_less) #define FPU_LE(x) (x <=3D float_relation_equal) +#define FPU_GT(x) (x =3D=3D float_relation_greater) #define FPU_UNORD(x) (x =3D=3D float_relation_unordered) +/* We must make sure we evaluate the argument in case it is a signalling N= AN */ +#define FPU_FALSE(x) (x =3D=3D float_relation_equal && 0) =20 #define FPU_CMPQ(size, a, b) \ float ## size ## _compare_quiet(a, b, &env->sse_status) @@ -1098,6 +1109,33 @@ SSE_HELPER_CMP(cmpnlt, FPU_CMPS, !FPU_LT) SSE_HELPER_CMP(cmpnle, FPU_CMPS, !FPU_LE) SSE_HELPER_CMP(cmpord, FPU_CMPQ, !FPU_UNORD) =20 +SSE_HELPER_CMP(cmpequ, FPU_CMPQ, FPU_EQU) +SSE_HELPER_CMP(cmpnge, FPU_CMPS, !FPU_GE) +SSE_HELPER_CMP(cmpngt, FPU_CMPS, !FPU_GT) +SSE_HELPER_CMP(cmpfalse, FPU_CMPQ, FPU_FALSE) +SSE_HELPER_CMP(cmpnequ, FPU_CMPQ, !FPU_EQU) +SSE_HELPER_CMP(cmpge, FPU_CMPS, FPU_GE) +SSE_HELPER_CMP(cmpgt, FPU_CMPS, FPU_GT) +SSE_HELPER_CMP(cmptrue, FPU_CMPQ, !FPU_FALSE) + +SSE_HELPER_CMP(cmpeqs, FPU_CMPS, FPU_EQ) +SSE_HELPER_CMP(cmpltq, FPU_CMPQ, FPU_LT) +SSE_HELPER_CMP(cmpleq, FPU_CMPQ, FPU_LE) +SSE_HELPER_CMP(cmpunords, FPU_CMPS, FPU_UNORD) +SSE_HELPER_CMP(cmpneqq, FPU_CMPS, !FPU_EQ) +SSE_HELPER_CMP(cmpnltq, FPU_CMPQ, !FPU_LT) +SSE_HELPER_CMP(cmpnleq, FPU_CMPQ, !FPU_LE) +SSE_HELPER_CMP(cmpords, FPU_CMPS, !FPU_UNORD) + +SSE_HELPER_CMP(cmpequs, FPU_CMPS, FPU_EQU) +SSE_HELPER_CMP(cmpngeq, FPU_CMPQ, !FPU_GE) +SSE_HELPER_CMP(cmpngtq, FPU_CMPQ, !FPU_GT) +SSE_HELPER_CMP(cmpfalses, FPU_CMPS, FPU_FALSE) +SSE_HELPER_CMP(cmpnequs, FPU_CMPS, !FPU_EQU) +SSE_HELPER_CMP(cmpgeq, FPU_CMPQ, FPU_GE) +SSE_HELPER_CMP(cmpgtq, FPU_CMPQ, FPU_GT) +SSE_HELPER_CMP(cmptrues, FPU_CMPS, !FPU_FALSE) + #undef SSE_HELPER_CMP =20 #if SHIFT =3D=3D 1 diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index 32ffa8445b..e7866a8395 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -237,6 +237,33 @@ SSE_HELPER_CMP(cmpnlt, FPU_CMPS, !FPU_LT) SSE_HELPER_CMP(cmpnle, FPU_CMPS, !FPU_LE) SSE_HELPER_CMP(cmpord, FPU_CMPQ, !FPU_UNORD) =20 +SSE_HELPER_CMP(cmpequ, FPU_CMPQ, FPU_EQU) +SSE_HELPER_CMP(cmpnge, FPU_CMPS, !FPU_GE) +SSE_HELPER_CMP(cmpngt, FPU_CMPS, !FPU_GT) +SSE_HELPER_CMP(cmpfalse, FPU_CMPQ, FPU_FALSE) +SSE_HELPER_CMP(cmpnequ, FPU_CMPQ, !FPU_EQU) +SSE_HELPER_CMP(cmpge, FPU_CMPS, FPU_GE) +SSE_HELPER_CMP(cmpgt, FPU_CMPS, FPU_GT) +SSE_HELPER_CMP(cmptrue, FPU_CMPQ, !FPU_FALSE) + +SSE_HELPER_CMP(cmpeqs, FPU_CMPS, FPU_EQ) +SSE_HELPER_CMP(cmpltq, FPU_CMPQ, FPU_LT) +SSE_HELPER_CMP(cmpleq, FPU_CMPQ, FPU_LE) +SSE_HELPER_CMP(cmpunords, FPU_CMPS, FPU_UNORD) +SSE_HELPER_CMP(cmpneqq, FPU_CMPS, !FPU_EQ) +SSE_HELPER_CMP(cmpnltq, FPU_CMPQ, !FPU_LT) +SSE_HELPER_CMP(cmpnleq, FPU_CMPQ, !FPU_LE) +SSE_HELPER_CMP(cmpords, FPU_CMPS, !FPU_UNORD) + +SSE_HELPER_CMP(cmpequs, FPU_CMPS, FPU_EQU) +SSE_HELPER_CMP(cmpngeq, FPU_CMPQ, !FPU_GE) +SSE_HELPER_CMP(cmpngtq, FPU_CMPQ, !FPU_GT) +SSE_HELPER_CMP(cmpfalses, FPU_CMPS, FPU_FALSE) +SSE_HELPER_CMP(cmpnequs, FPU_CMPS, !FPU_EQU) +SSE_HELPER_CMP(cmpgeq, FPU_CMPQ, FPU_GE) +SSE_HELPER_CMP(cmpgtq, FPU_CMPQ, FPU_GT) +SSE_HELPER_CMP(cmptrues, FPU_CMPS, !FPU_FALSE) + #if SHIFT =3D=3D 1 DEF_HELPER_3(ucomiss, void, env, Reg, Reg) DEF_HELPER_3(comiss, void, env, Reg, Reg) --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663712379; cv=none; d=zohomail.com; s=zohoarc; b=Vwk7Qq3Zwl01LQvO/TCVYZpeN1cato1HQcmIqzZYPpJIdJ5nofeX5WD7iA84FAP6/pjbDcxbjIhrAjuYaRzpwChMlaWBf9nzzznMC9dNXVRnDyNUdI00/4cmSnaanSwxFVC9LwrC6f9ow0IRoOoraTriBIchLGlh+RdXeCXjtvQ= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663712379; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=Ki83e2Sfs73t8nh59NwsARPUhdXZ3icuXqbcNfYW5rk=; b=WzcmPCEaqOa/XeDutDpCXNTFr2vMz+oyQtG7LJwNmigj3n4xDN1sNnOWwC9ddA6qy8eXPowSTskjr5ekBJQJrSUwRMr3jHF6l1uTmzDlR6WWz314Iim3FQB+7v2TDIoBnby2GVLeref7MPMnWV/mP5tVtY4+s8o7fQHOyPQS1tA= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663712379637504.8263267827391; Tue, 20 Sep 2022 15:19:39 -0700 (PDT) Received: from localhost ([::1]:35940 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oalab-0003kv-Gh for importer@patchew.org; Tue, 20 Sep 2022 18:19:37 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58694) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0C-0006Qc-O5 for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:55 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:20418) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0A-0002BE-72 for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:43 -0400 Received: from mail-ej1-f70.google.com (mail-ej1-f70.google.com [209.85.218.70]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-126-3jRzBHMVOPKZzM2MS63sjg-1; Tue, 20 Sep 2022 13:25:39 -0400 Received: by mail-ej1-f70.google.com with SMTP id gv43-20020a1709072beb00b0077c3f58a03eso1770309ejc.4 for ; Tue, 20 Sep 2022 10:25:38 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id hb19-20020a170906b89300b0077fc47605b4sm108711ejb.217.2022.09.20.10.25.35 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:36 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694741; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=Ki83e2Sfs73t8nh59NwsARPUhdXZ3icuXqbcNfYW5rk=; b=BttnwrwpVnBhLgrn8L6c1963KVaDnaOw72JISqu3+LupH9ho9r7Vtyxk2g71PyNYaw1FPq FptsA6FTzezGGpImjmYOx5DGRYxlriYjgV8erDD5ELzu8u/jauwq0rLTdaumkP3hm8Z+jM eX53S/ANNnitHUcRYCX7frBk9Gs/beU= X-MC-Unique: 3jRzBHMVOPKZzM2MS63sjg-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=Ki83e2Sfs73t8nh59NwsARPUhdXZ3icuXqbcNfYW5rk=; b=ETwAq8U7tyg3DNICmlfzj5fJh2aibkHfTYGgHZ9vEIlD+eK1KBCdh6dZjgFttXMCIt /Ga2Cr2yQ9iQBvHsT9rA4OYNExkkwvNI1wY9tyF2haEyPgrj1eByQwIc7Xud6wQLvRWP XN7uxFKNA6JKuxZVEN6mLb3cGxHjaXQScmTSMowcGIm6wGsb+hkdO4pWbkmQs5rbBhEd QN7QpavpWGA0vZizu0SwL/btMUsJw+R+Co/hbQUTkt205MhkVtoEZoEalKZe01Pab0qL P5K0KQ+VL9yrQZTtlSJQRNEdYVTs2VLC0yfhSb1oADmlnjadmvobNr6m+LDVoecFsp3p JM6w== X-Gm-Message-State: ACrzQf3ps5t0F1Bcb/dGDz+zvILR9nPYM5n7n6U17tAkxmTaP8F/vBaa osgvFT309Qh7nViebKpZIwkPcPKRkfA5fyWuBbutmF73bqQk3EDwYArC3/iU6xnBVK2fFcYxwOa vUfqVwAeiGxV0yxQtGtacv7csXd5i/2visJpkSTsDoJGgSUsF9mNeUc4wxAcwD84hFzg= X-Received: by 2002:a17:907:2cd3:b0:77c:3e23:7bec with SMTP id hg19-20020a1709072cd300b0077c3e237becmr18514406ejc.380.1663694736944; Tue, 20 Sep 2022 10:25:36 -0700 (PDT) X-Google-Smtp-Source: AMsMyM66oYPDyCt3VUrlKqJFeZ7fc3ay75aWSQDl+KQCg16la7lDOzmzC4xuNqs8QLXQAd407CQ/FQ== X-Received: by 2002:a17:907:2cd3:b0:77c:3e23:7bec with SMTP id hg19-20020a1709072cd300b0077c3e237becmr18514377ejc.380.1663694736646; Tue, 20 Sep 2022 10:25:36 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 18/37] target/i386: Introduce 256-bit vector helpers Date: Tue, 20 Sep 2022 19:24:48 +0200 Message-Id: <20220920172507.95568-19-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663712380721100001 Content-Type: text/plain; charset="utf-8" The new implementation of SSE will cover AVX from the get go, because all the work for the helper functions is already done. We just need to build them. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/helper.h | 2 ++ target/i386/ops_sse.h | 5 +++++ target/i386/ops_sse_header.h | 4 ++++ target/i386/tcg/fpu_helper.c | 3 +++ 4 files changed, 14 insertions(+) diff --git a/target/i386/helper.h b/target/i386/helper.h index ac3b4d1ee3..3da5df98b9 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -218,6 +218,8 @@ DEF_HELPER_3(movq, void, env, ptr, ptr) #include "ops_sse_header.h" #define SHIFT 1 #include "ops_sse_header.h" +#define SHIFT 2 +#include "ops_sse_header.h" =20 DEF_HELPER_3(rclb, tl, env, tl, tl) DEF_HELPER_3(rclw, tl, env, tl, tl) diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 18d217ebf5..090ba013b3 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -35,7 +35,11 @@ #define W(n) ZMM_W(n) #define L(n) ZMM_L(n) #define Q(n) ZMM_Q(n) +#if SHIFT =3D=3D 1 #define SUFFIX _xmm +#else +#define SUFFIX _ymm +#endif #endif =20 #define LANE_WIDTH (SHIFT ? 16 : 8) @@ -2379,6 +2383,7 @@ void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State= *env, Reg *d, Reg *s, =20 #undef SSE_HELPER_S =20 +#undef LANE_WIDTH #undef SHIFT #undef XMM_ONLY #undef Reg diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index e7866a8395..440f1c0e78 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -21,7 +21,11 @@ #define SUFFIX _mmx #else #define Reg ZMMReg +#if SHIFT =3D=3D 1 #define SUFFIX _xmm +#else +#define SUFFIX _ymm +#endif #endif =20 #define dh_alias_Reg ptr diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 48bf0c5cf8..819e920ec6 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -3053,3 +3053,6 @@ void helper_movq(CPUX86State *env, void *d, void *s) =20 #define SHIFT 1 #include "ops_sse.h" + +#define SHIFT 2 +#include "ops_sse.h" --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663710710; cv=none; d=zohomail.com; s=zohoarc; b=hAd72+Yzgi2DBVKdLcCscgPtsw1+H7NHn5uDqYu17D0+JhZVURloUqD7Jy9XoYklfguOwW0LovvycEAGNZsLMnhQIy2qnLjL7In91TX/9JHMT0OB88RksXxezbWDuXyLp1IDiw5z2CN8ZrFpdrzgYtff544BspDEiqAtDjrZLrI= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663710710; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=Fnp/J9qGLyC/xVWjo8F4/x/2VDjblWvAu1wxTikTieQ=; b=H0UNJrgOrFPAELfm+vQQUIbfLmhw+v0jcXvCcYGs54ROPjlLp7pHM60wpvdHXTvWl5wb7WS/ApfldFu3jRkEaXj60a6Bct2FNzpbX8kBjlqIsDDw2yzos2VLf0xMRlKEsN2shooFUJ2Hev+vaDKWx4FY2H8clAnGuV6XQDCGRpM= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 16637107100671005.6001249667149; Tue, 20 Sep 2022 14:51:50 -0700 (PDT) Received: from localhost ([::1]:45708 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oal9g-0000Sf-RI for importer@patchew.org; Tue, 20 Sep 2022 17:51:48 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58692) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0C-0006Qb-Ny for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:57 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:52989) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah09-0002BA-SO for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:43 -0400 Received: from mail-ej1-f69.google.com (mail-ej1-f69.google.com [209.85.218.69]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-261-IDcOZwZaN7G_Tqa5pRcSPg-1; Tue, 20 Sep 2022 13:25:40 -0400 Received: by mail-ej1-f69.google.com with SMTP id qb30-20020a1709077e9e00b0077d1271283eso1784524ejc.2 for ; Tue, 20 Sep 2022 10:25:39 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id 1-20020a170906210100b0073dd8e5a39fsm134376ejt.156.2022.09.20.10.25.37 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:37 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694741; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=Fnp/J9qGLyC/xVWjo8F4/x/2VDjblWvAu1wxTikTieQ=; b=Chela0SDOr9e8jWt9zQ6G0o4292uJk8JxQ8qrkkQNVBoc/YWHZwwUTJZd1QrQvAf/oSWcr qMzYGNBK9HKQ75MO3cNe2iHtZOvtUIe+B60DxMXKX8UtN8s1ruMgFgrSrq3D8BTsNZP/v1 Ht7B9nEQj5C/1+7VPfvLklU5fiH4/8Y= X-MC-Unique: IDcOZwZaN7G_Tqa5pRcSPg-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=Fnp/J9qGLyC/xVWjo8F4/x/2VDjblWvAu1wxTikTieQ=; b=HHnprdI3mlqWlwmeuUxuZf6zYRt3ndVdsUJoOEVaENTwsgWA+KPz1HZaZs/EET/wAX okvElnlhc5g/xP3cpxD1EqXVubSHF1Hr2swcQ+hTl410b5smfHRy5aS7+kyS3H3W+3ZJ blXxC1geskuXoxAfrgyCjxvn8Bn9sX77AZbHEf9ysuWLWuIw6lyt186jT6oz6WeKhFkV Mz3225Q4cUa3wK/68wTMEO6hKX/n65xBWfbyNlIdQFmqnoQ1MpN0fWWAp0qhKf56f51G T5DJvp63Jc7Z6u00IdildM0wOUvBCOOffT98b3jBzEX4X/b1ncDcaCvmVJ+O8Lfhimiq 5T5g== X-Gm-Message-State: ACrzQf1zkoG7gjzb9UNkm7GFB8xXHN5bS81rXFnam06R0A9zQ//XGF1o jgtM5xi02DoC5/D48THXmNKS1NWC6O/9rDFfdFTN7sdSMNr9CryHrmt0a9khG82br5j9Hek1e70 z0sHyt0gxLfgUEo7rbRfTj/n/uTqoRL39dFVISdJ5PNy9zL1ziH2GMHgu33geW8GNdEo= X-Received: by 2002:aa7:cfd1:0:b0:451:de20:3392 with SMTP id r17-20020aa7cfd1000000b00451de203392mr21184411edy.16.1663694738538; Tue, 20 Sep 2022 10:25:38 -0700 (PDT) X-Google-Smtp-Source: AMsMyM5zE5jwnftWYqhkdzo+/s0fISetUwQu/BJrmoT8gqVG9X6WhzN0LqwA25c/5FX53o7eTyrZiQ== X-Received: by 2002:aa7:cfd1:0:b0:451:de20:3392 with SMTP id r17-20020aa7cfd1000000b00451de203392mr21184370edy.16.1663694738155; Tue, 20 Sep 2022 10:25:38 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 19/37] target/i386: reimplement 0x0f 0x60-0x6f, add AVX Date: Tue, 20 Sep 2022 19:24:49 +0200 Message-Id: <20220920172507.95568-20-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663710711573100001 Content-Type: text/plain; charset="utf-8" These are both MMX and SSE/AVX instructions, except for vmovdqu. In both cases the inputs and output is in s->ptr{0,1,2}, so the only difference between MMX, SSE, and AVX is which helper to call. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/tcg/decode-new.c.inc | 42 +++++++ target/i386/tcg/emit.c.inc | 203 +++++++++++++++++++++++++++++++ target/i386/tcg/translate.c | 19 ++- 3 files changed, 263 insertions(+), 1 deletion(-) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index 4344bcb40c..6635c41a2f 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -135,6 +135,19 @@ static uint8_t get_modrm(DisasContext *s, CPUX86State = *env) return s->modrm; } =20 +static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X8= 6OpEntry entries[4]) +{ + if (s->prefix & PREFIX_REPNZ) { + return &entries[3]; + } else if (s->prefix & PREFIX_REPZ) { + return &entries[2]; + } else if (s->prefix & PREFIX_DATA) { + return &entries[1]; + } else { + return &entries[0]; + } +} + static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *= entry, uint8_t *b) { static const X86GenFunc group17_gen[8] =3D { @@ -144,6 +157,17 @@ static void decode_group17(DisasContext *s, CPUX86Stat= e *env, X86OpEntry *entry, entry->gen =3D group17_gen[op]; } =20 +static void decode_0F6F(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F6F[4] =3D { + X86_OP_ENTRY3(MOVDQ, P,q, None,None, Q,q, vex1 mmx), /* mov= q */ + X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex1), /* mov= dqa */ + X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* mov= dqu */ + {}, + }; + *entry =3D *decode_by_prefix(s, opcodes_0F6F); +} + static const X86OpEntry opcodes_0F38_00toEF[240] =3D { }; =20 @@ -229,8 +253,26 @@ static void decode_0F3A(DisasContext *s, CPUX86State *= env, X86OpEntry *entry, ui } =20 static const X86OpEntry opcodes_0F[256] =3D { + [0x60] =3D X86_OP_ENTRY3(PUNPCKLBW, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), + [0x61] =3D X86_OP_ENTRY3(PUNPCKLWD, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), + [0x62] =3D X86_OP_ENTRY3(PUNPCKLDQ, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), + [0x63] =3D X86_OP_ENTRY3(PACKSSWB, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), + [0x64] =3D X86_OP_ENTRY3(PCMPGTB, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), + [0x65] =3D X86_OP_ENTRY3(PCMPGTW, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), + [0x66] =3D X86_OP_ENTRY3(PCMPGTD, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), + [0x67] =3D X86_OP_ENTRY3(PACKUSWB, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), + [0x38] =3D X86_OP_GROUP0(0F38), [0x3a] =3D X86_OP_GROUP0(0F3A), + + [0x68] =3D X86_OP_ENTRY3(PUNPCKHBW, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), + [0x69] =3D X86_OP_ENTRY3(PUNPCKHWD, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), + [0x6a] =3D X86_OP_ENTRY3(PUNPCKHDQ, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), + [0x6b] =3D X86_OP_ENTRY3(PACKSSDW, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), + [0x6c] =3D X86_OP_ENTRY3(PUNPCKLQDQ, V,x, H,x, W,x, vex4 p_66 avx2_25= 6), + [0x6d] =3D X86_OP_ENTRY3(PUNPCKHQDQ, V,x, H,x, W,x, vex4 p_66 avx2_25= 6), + [0x6e] =3D X86_OP_ENTRY3(MOVD_to, V,x, None,None, E,y, vex5 mmx p_0= 0_66), /* wrong dest Vy on SDM! */ + [0x6f] =3D X86_OP_GROUP0(0F6F), }; =20 static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *en= try, uint8_t *b) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 862da3c84a..60b80e9d30 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -71,6 +71,56 @@ static inline int xmm_offset(MemOp ot) } } =20 +static int vector_reg_offset(X86DecodedOp *op) +{ + assert(op->unit =3D=3D X86_OP_MMX || op->unit =3D=3D X86_OP_SSE); + + if (op->unit =3D=3D X86_OP_MMX) { + return op->offset - mmx_offset(op->ot); + } else { + return op->offset - xmm_offset(op->ot); + } +} + +static int vector_elem_offset(X86DecodedOp *op, MemOp ot, int n) +{ + int base_ofs =3D vector_reg_offset(op); + switch(ot) { + case MO_8: + if (op->unit =3D=3D X86_OP_MMX) { + return base_ofs + offsetof(MMXReg, MMX_B(n)); + } else { + return base_ofs + offsetof(ZMMReg, ZMM_B(n)); + } + case MO_16: + if (op->unit =3D=3D X86_OP_MMX) { + return base_ofs + offsetof(MMXReg, MMX_W(n)); + } else { + return base_ofs + offsetof(ZMMReg, ZMM_W(n)); + } + case MO_32: + if (op->unit =3D=3D X86_OP_MMX) { + return base_ofs + offsetof(MMXReg, MMX_L(n)); + } else { + return base_ofs + offsetof(ZMMReg, ZMM_L(n)); + } + case MO_64: + if (op->unit =3D=3D X86_OP_MMX) { + return base_ofs; + } else { + return base_ofs + offsetof(ZMMReg, ZMM_Q(n)); + } + case MO_128: + assert(op->unit =3D=3D X86_OP_SSE); + return base_ofs + offsetof(ZMMReg, ZMM_X(n)); + case MO_256: + assert(op->unit =3D=3D X86_OP_SSE); + return base_ofs + offsetof(ZMMReg, ZMM_Y(n)); + default: + g_assert_not_reached(); + } +} + static void compute_mmx_offset(X86DecodedOp *op) { if (!op->has_ea) { @@ -174,6 +224,23 @@ static void gen_load(DisasContext *s, X86DecodedInsn *= decode, int opn, TCGv v) } } =20 +static TCGv_ptr op_ptr(X86DecodedInsn *decode, int opn) +{ + X86DecodedOp *op =3D &decode->op[opn]; + if (op->v_ptr) { + return op->v_ptr; + } + op->v_ptr =3D tcg_temp_new_ptr(); + + /* The temporary points to the MMXReg or ZMMReg. */ + tcg_gen_addi_ptr(op->v_ptr, cpu_env, vector_reg_offset(op)); + return op->v_ptr; +} + +#define OP_PTR0 op_ptr(decode, 0) +#define OP_PTR1 op_ptr(decode, 1) +#define OP_PTR2 op_ptr(decode, 2) + static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn= , TCGv v) { X86DecodedOp *op =3D &decode->op[opn]; @@ -216,6 +283,114 @@ static void gen_writeback(DisasContext *s, X86Decoded= Insn *decode, int opn, TCGv } } =20 +static inline int vector_len(DisasContext *s, X86DecodedInsn *decode) +{ + if (decode->e.special =3D=3D X86_SPECIAL_MMX && + !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { + return 8; + } + return s->vex_l ? 32 : 16; +} + +static void gen_store_sse(DisasContext *s, X86DecodedInsn *decode, int src= _ofs) +{ + MemOp ot =3D decode->op[0].ot; + int vec_len =3D vector_len(s, decode); + bool aligned =3D sse_needs_alignment(s, decode, ot); + + if (!decode->op[0].has_ea) { + tcg_gen_gvec_mov(MO_64, decode->op[0].offset, src_ofs, vec_len, ve= c_len); + return; + } + + switch (ot) { + case MO_64: + gen_stq_env_A0(s, src_ofs); + break; + case MO_128: + gen_sto_env_A0(s, src_ofs, aligned); + break; + case MO_256: + gen_sty_env_A0(s, src_ofs, aligned); + break; + default: + g_assert_not_reached(); + } +} + +#define BINARY_INT_GVEC(uname, func, ...) = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + int vec_len =3D vector_len(s, decode); = \ + = \ + func(__VA_ARGS__, = \ + decode->op[0].offset, decode->op[1].offset, = \ + decode->op[2].offset, vec_len, vec_len); = \ +} + +BINARY_INT_GVEC(PCMPGTB, tcg_gen_gvec_cmp, TCG_COND_GT, MO_8) +BINARY_INT_GVEC(PCMPGTW, tcg_gen_gvec_cmp, TCG_COND_GT, MO_16) +BINARY_INT_GVEC(PCMPGTD, tcg_gen_gvec_cmp, TCG_COND_GT, MO_32) + + +/* + * 00 =3D p* Pq, Qq (if mmx not NULL; no VEX) + * 66 =3D vp* Vx, Hx, Wx + * + * These are really the same encoding, because 1) V is the same as P when = VEX.V + * is not present 2) P and Q are the same as H and W apart from MM/XMM + */ +static inline void gen_binary_int_sse(DisasContext *s, CPUX86State *env, X= 86DecodedInsn *decode, + SSEFunc_0_eppp mmx, SSEFunc_0_eppp x= mm, SSEFunc_0_eppp ymm) +{ + assert(!!mmx =3D=3D !!(decode->e.special =3D=3D X86_SPECIAL_MMX)); + + if (mmx && (s->prefix & PREFIX_VEX) && !(s->prefix & PREFIX_DATA)) { + /* VEX encoding is not applicable to MMX instructions. */ + gen_illegal_opcode(s); + return; + } + if (!(s->prefix & PREFIX_DATA)) { + mmx(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2); + } else if (!s->vex_l) { + xmm(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2); + } else { + ymm(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2); + } +} + + +#define BINARY_INT_MMX(uname, lname) = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + gen_binary_int_sse(s, env, decode, = \ + gen_helper_##lname##_mmx, = \ + gen_helper_##lname##_xmm, = \ + gen_helper_##lname##_ymm); = \ +} +BINARY_INT_MMX(PUNPCKLBW, punpcklbw) +BINARY_INT_MMX(PUNPCKLWD, punpcklwd) +BINARY_INT_MMX(PUNPCKLDQ, punpckldq) +BINARY_INT_MMX(PACKSSWB, packsswb) +BINARY_INT_MMX(PACKUSWB, packuswb) +BINARY_INT_MMX(PUNPCKHBW, punpckhbw) +BINARY_INT_MMX(PUNPCKHWD, punpckhwd) +BINARY_INT_MMX(PUNPCKHDQ, punpckhdq) +BINARY_INT_MMX(PACKSSDW, packssdw) + +/* Instructions with no MMX equivalent. */ +#define BINARY_INT_SSE(uname, lname) = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + gen_binary_int_sse(s, env, decode, = \ + NULL, = \ + gen_helper_##lname##_xmm, = \ + gen_helper_##lname##_ymm); = \ +} + +BINARY_INT_SSE(PUNPCKLQDQ, punpcklqdq) +BINARY_INT_SSE(PUNPCKHQDQ, punpckhqdq) + static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_= op) { TCGv carry_in =3D NULL; @@ -383,6 +558,34 @@ static void gen_MOVBE(DisasContext *s, CPUX86State *en= v, X86DecodedInsn *decode) } } =20 +static void gen_MOVD_to(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + MemOp ot =3D decode->op[2].ot; + int vec_len =3D vector_len(s, decode); + int lo_ofs =3D vector_elem_offset(&decode->op[0], ot, 0); + + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); + + switch (ot) { + case MO_32: +#ifdef TARGET_X86_64 + tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); + tcg_gen_st_i32(s->tmp3_i32, cpu_env, lo_ofs); + break; + case MO_64: +#endif + tcg_gen_st_tl(s->T1, cpu_env, lo_ofs); + break; + default: + g_assert_not_reached(); + } +} + +static void gen_MOVDQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *d= ecode) +{ + gen_store_sse(s, decode, decode->op[2].offset); +} + static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) { MemOp ot =3D decode->op[0].ot; diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index a974fb28f9..1ffbf2f4f8 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2854,6 +2854,23 @@ static void gen_ldy_env_A0(DisasContext *s, int offs= et, bool align) tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(YMMReg, YMM_Q(3= ))); } =20 +static void gen_sty_env_A0(DisasContext *s, int offset, bool align) +{ + int mem_index =3D s->mem_index; + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(YMMReg, YMM_Q(0= ))); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, + MO_LEUQ | (align ? MO_ALIGN_32 : 0)); + tcg_gen_addi_tl(s->tmp0, s->A0, 8); + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(YMMReg, YMM_Q(1= ))); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ); + tcg_gen_addi_tl(s->tmp0, s->A0, 16); + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(YMMReg, YMM_Q(2= ))); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ); + tcg_gen_addi_tl(s->tmp0, s->A0, 24); + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(YMMReg, YMM_Q(3= ))); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ); +} + static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset) { tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q= (0))); @@ -4677,7 +4694,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) #ifndef CONFIG_USER_ONLY use_new &=3D b <=3D limit; #endif - if (use_new && 0) { + if (use_new && (b >=3D 0x160 && b <=3D 0x16f)) { disas_insn_new(s, cpu, b + 0x100); return s->pc; } --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663712816; cv=none; d=zohomail.com; s=zohoarc; b=g74AT7Fj4LBx2MQMwxH8X8pjnc2MLsldEnzycrz7G3bG752nxzjVUA4vFvP3FujarAo4l0J4/KGfM2I7pc/ashAsB6EXI6Bdke4WJnPxeHJGQWe1KTZ0dwyZLRzOZPhaxlXQzp8h/NLwXTEptV0N4jiHpK2k5aXXT5BqjGp5i2E= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663712816; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=mGkl/92AJmHGD0OchJP1HzUYvDeID0ABtmt3aekzqts=; b=NHdtuKn8QD5tneREqDcqXrY1fDiaHzulwqn3kcU9bPdlfEWV/DxySFoo76zZaqScYCxBLeEq2yvpyszmTBVAL2oewNHb/0Ewxd2jVcu8vomAFGawcxMEaPYHb37qIejdOijLQKni/JfXJoY7Xu5LFuJaHX2WehdIdIhfSJBmI4w= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663712816428928.4210586306458; Tue, 20 Sep 2022 15:26:56 -0700 (PDT) Received: from localhost ([::1]:45884 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oalha-0007i7-SC for importer@patchew.org; Tue, 20 Sep 2022 18:26:51 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:57422) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0I-0006SK-69 for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:57 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:60593) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0G-0002C7-7l for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:49 -0400 Received: from mail-ej1-f71.google.com (mail-ej1-f71.google.com [209.85.218.71]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-639-JTkBqOK_O1KXsFzLiz5Nvg-1; Tue, 20 Sep 2022 13:25:41 -0400 Received: by mail-ej1-f71.google.com with SMTP id gt19-20020a1709072d9300b00780ec98af88so1770433ejc.13 for ; Tue, 20 Sep 2022 10:25:41 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id a10-20020a50ff0a000000b00454546561cfsm153144edu.82.2022.09.20.10.25.39 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:39 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694747; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=mGkl/92AJmHGD0OchJP1HzUYvDeID0ABtmt3aekzqts=; b=HQ3OmqTX+BpTXH5WCXYhNwD3Q5G76LDszKlZEwDpGzMGOJynCIxw2OjUzTSwf46bzb9DHu TisxXISZ+Qk67kM3AcEaV1kk+cBHDIMCZJjxN+INu7q+V3UmFHEXfYDVVw8KW9rXbjmEZU 9lDLVKkFX+d5c7blX1SFmmRWfBEQQA4= X-MC-Unique: JTkBqOK_O1KXsFzLiz5Nvg-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=mGkl/92AJmHGD0OchJP1HzUYvDeID0ABtmt3aekzqts=; b=jPxh8GuJXfIW2ETW0JMsTaoLheXzgQhOtQ4T0fuNWNCcsTzeLohjM77cznFDkRNg92 3c5MOsdjmFbVILFEiJURZgo+jowD6EAa5RO0DjomGAWFIQYbDLNnwdjls3fnSKaXEgm+ OCzEu8v+FsghebJ778uLN4YE3m+PWLdoX+3pEKn81xxqHYiBii+g7KWDAXOLD3sDDXdi WFApLBGc8Lj3Es2EAW9XmO7lShNXQ/TQ69m4EuXMyP2zYLgYqdzHGRuOHFxsBlz8GZVm wObXppyhBXM4o00wXz/i7wcTL2yrV8doxEfciUUFlHZd75UIim7kPIyM8C+Q59325dtN z/Pw== X-Gm-Message-State: ACrzQf1ok+Bqlk9edALAbaeKHGSlPcMTiESUktQa0gJUEDEMRmbzLBV/ DZ3u7Kut3drfrXyn9hu6A5zIPb9EWH5UrU7wIs2jCu9g5chlibjJ+PHAf/pOTrMD7pTi+mkGTyy 3lwK4j3klWz++Pb8X4xSH7uMUv24WTqrbYnudD6QDIN6jxEg1UBD00h79TpJc8JXBEyI= X-Received: by 2002:a05:6402:22a9:b0:453:9ae3:a18b with SMTP id cx9-20020a05640222a900b004539ae3a18bmr15572188edb.242.1663694740493; Tue, 20 Sep 2022 10:25:40 -0700 (PDT) X-Google-Smtp-Source: AMsMyM5SvSB+j/SDlovKtCONVlXzXNxLWaiC20F36XpquyOpJXzOJ12Onn9knEmjYKrsOb+1qd5pfw== X-Received: by 2002:a05:6402:22a9:b0:453:9ae3:a18b with SMTP id cx9-20020a05640222a900b004539ae3a18bmr15572160edb.242.1663694740201; Tue, 20 Sep 2022 10:25:40 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 20/37] target/i386: reimplement 0x0f 0xd8-0xdf, 0xe8-0xef, 0xf8-0xff, add AVX Date: Tue, 20 Sep 2022 19:24:50 +0200 Message-Id: <20220920172507.95568-21-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663712817226100001 Content-Type: text/plain; charset="utf-8" These are more simple integer instructions present in both MMX and SSE/AVX, with no holes that were later occupied by newer instructions. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/tcg/decode-new.c.inc | 28 ++++++++++++++++++++++++++++ target/i386/tcg/emit.c.inc | 32 ++++++++++++++++++++++++++++++++ target/i386/tcg/translate.c | 4 +++- 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index 6635c41a2f..d207a1f0c1 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -273,6 +273,34 @@ static const X86OpEntry opcodes_0F[256] =3D { [0x6d] =3D X86_OP_ENTRY3(PUNPCKHQDQ, V,x, H,x, W,x, vex4 p_66 avx2_25= 6), [0x6e] =3D X86_OP_ENTRY3(MOVD_to, V,x, None,None, E,y, vex5 mmx p_0= 0_66), /* wrong dest Vy on SDM! */ [0x6f] =3D X86_OP_GROUP0(0F6F), + + /* Incorrectly missing from 2-17 */ + [0xd8] =3D X86_OP_ENTRY3(PSUBUSB, V,x, H,x, W,x, vex4 mmx avx2_256 p= _00_66), + [0xd9] =3D X86_OP_ENTRY3(PSUBUSW, V,x, H,x, W,x, vex4 mmx avx2_256 p= _00_66), + [0xda] =3D X86_OP_ENTRY3(PMINUB, V,x, H,x, W,x, vex4 mmx avx2_256 p= _00_66), + [0xdb] =3D X86_OP_ENTRY3(PAND, V,x, H,x, W,x, vex4 mmx avx2_256 p= _00_66), + [0xdc] =3D X86_OP_ENTRY3(PADDUSB, V,x, H,x, W,x, vex4 mmx avx2_256 p= _00_66), + [0xdd] =3D X86_OP_ENTRY3(PADDUSW, V,x, H,x, W,x, vex4 mmx avx2_256 p= _00_66), + [0xde] =3D X86_OP_ENTRY3(PMAXUB, V,x, H,x, W,x, vex4 mmx avx2_256 p= _00_66), + [0xdf] =3D X86_OP_ENTRY3(PANDN, V,x, H,x, W,x, vex4 mmx avx2_256 p= _00_66), + + [0xe8] =3D X86_OP_ENTRY3(PSUBSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_= 00_66), + [0xe9] =3D X86_OP_ENTRY3(PSUBSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_= 00_66), + [0xea] =3D X86_OP_ENTRY3(PMINSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_= 00_66), + [0xeb] =3D X86_OP_ENTRY3(POR, V,x, H,x, W,x, vex4 mmx avx2_256 p_= 00_66), + [0xec] =3D X86_OP_ENTRY3(PADDSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_= 00_66), + [0xed] =3D X86_OP_ENTRY3(PADDSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_= 00_66), + [0xee] =3D X86_OP_ENTRY3(PMAXSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_= 00_66), + [0xef] =3D X86_OP_ENTRY3(PXOR, V,x, H,x, W,x, vex4 mmx avx2_256 p_= 00_66), + + [0xf8] =3D X86_OP_ENTRY3(PSUBB, V,x, H,x, W,x, vex4 mmx avx2_256 p_0= 0_66), + [0xf9] =3D X86_OP_ENTRY3(PSUBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_0= 0_66), + [0xfa] =3D X86_OP_ENTRY3(PSUBD, V,x, H,x, W,x, vex4 mmx avx2_256 p_0= 0_66), + [0xfb] =3D X86_OP_ENTRY3(PSUBQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_0= 0_66), + [0xfc] =3D X86_OP_ENTRY3(PADDB, V,x, H,x, W,x, vex4 mmx avx2_256 p_0= 0_66), + [0xfd] =3D X86_OP_ENTRY3(PADDW, V,x, H,x, W,x, vex4 mmx avx2_256 p_0= 0_66), + [0xfe] =3D X86_OP_ENTRY3(PADDD, V,x, H,x, W,x, vex4 mmx avx2_256 p_0= 0_66), + /* 0xff =3D UD0 */ }; =20 static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *en= try, uint8_t *b) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 60b80e9d30..f1f7397869 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -328,9 +328,31 @@ static void gen_##uname(DisasContext *s, CPUX86State *= env, X86DecodedInsn *decod decode->op[2].offset, vec_len, vec_len); = \ } =20 +BINARY_INT_GVEC(PADDB, tcg_gen_gvec_add, MO_8) +BINARY_INT_GVEC(PADDW, tcg_gen_gvec_add, MO_16) +BINARY_INT_GVEC(PADDD, tcg_gen_gvec_add, MO_32) +BINARY_INT_GVEC(PADDSB, tcg_gen_gvec_ssadd, MO_8) +BINARY_INT_GVEC(PADDSW, tcg_gen_gvec_ssadd, MO_16) +BINARY_INT_GVEC(PADDUSB, tcg_gen_gvec_usadd, MO_8) +BINARY_INT_GVEC(PADDUSW, tcg_gen_gvec_usadd, MO_16) +BINARY_INT_GVEC(PAND, tcg_gen_gvec_and, MO_64) BINARY_INT_GVEC(PCMPGTB, tcg_gen_gvec_cmp, TCG_COND_GT, MO_8) BINARY_INT_GVEC(PCMPGTW, tcg_gen_gvec_cmp, TCG_COND_GT, MO_16) BINARY_INT_GVEC(PCMPGTD, tcg_gen_gvec_cmp, TCG_COND_GT, MO_32) +BINARY_INT_GVEC(PMAXSW, tcg_gen_gvec_smax, MO_16) +BINARY_INT_GVEC(PMAXUB, tcg_gen_gvec_umax, MO_8) +BINARY_INT_GVEC(PMINSW, tcg_gen_gvec_smin, MO_16) +BINARY_INT_GVEC(PMINUB, tcg_gen_gvec_umin, MO_8) +BINARY_INT_GVEC(POR, tcg_gen_gvec_or, MO_64) +BINARY_INT_GVEC(PSUBB, tcg_gen_gvec_sub, MO_8) +BINARY_INT_GVEC(PSUBW, tcg_gen_gvec_sub, MO_16) +BINARY_INT_GVEC(PSUBD, tcg_gen_gvec_sub, MO_32) +BINARY_INT_GVEC(PSUBQ, tcg_gen_gvec_sub, MO_64) +BINARY_INT_GVEC(PSUBSB, tcg_gen_gvec_sssub, MO_8) +BINARY_INT_GVEC(PSUBSW, tcg_gen_gvec_sssub, MO_16) +BINARY_INT_GVEC(PSUBUSB, tcg_gen_gvec_ussub, MO_8) +BINARY_INT_GVEC(PSUBUSW, tcg_gen_gvec_ussub, MO_16) +BINARY_INT_GVEC(PXOR, tcg_gen_gvec_xor, MO_64) =20 =20 /* @@ -609,6 +631,16 @@ static void gen_MULX(DisasContext *s, CPUX86State *env= , X86DecodedInsn *decode) =20 } =20 +static void gen_PANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *d= ecode) +{ + int vec_len =3D vector_len(s, decode); + + /* Careful, operand order is reversed! */ + tcg_gen_gvec_andc(MO_64, + decode->op[0].offset, decode->op[2].offset, + decode->op[1].offset, vec_len, vec_len); +} + static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) { MemOp ot =3D decode->op[1].ot; diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 1ffbf2f4f8..fd565650a9 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4694,7 +4694,9 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) #ifndef CONFIG_USER_ONLY use_new &=3D b <=3D limit; #endif - if (use_new && (b >=3D 0x160 && b <=3D 0x16f)) { + if (use_new && + ((b >=3D 0x160 && b <=3D 0x16f) || + (b >=3D 0x1d8 && b <=3D 0x1ff && (b & 8)))) { disas_insn_new(s, cpu, b + 0x100); return s->pc; } --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663710532; cv=none; d=zohomail.com; s=zohoarc; b=SnBy+VcvqMcxnJsQ8EYyyZ25RHNp8EAuWGKJ5iPr/Lg58Mxr2jyFUahQjtW2ClaXC9s41ZdCjLDXpNVxQNAiFw08TFM/ZFtZtLR1ZCBRJkeobUsX4Yv1c6Ltg8zEeg45bw80PCsxFMKb5UQJOW7sIjGMov71TJGuE65uBjfDOIQ= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663710532; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=cophxzFqlPbxR5U5SA8zIeC6sut6syB8XLYG2EHM4y0=; b=SBGA/zZT8TgabAuZj0tgXTn+8v8mz6NLXJ62BWeLJhWMmfMAN0c/ng1OgFM+Q6Fb76dXBv3cC4jELrGrs/r4KmF7q1MvHu5o60la81QqyVxCBz2rF7q5CoRZtNkgfzbHcaCCzD3VJJNWCo6Pos1DCh4XQglJgcqBp/gyogywepI= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663710532406242.422841589045; Tue, 20 Sep 2022 14:48:52 -0700 (PDT) Received: from localhost ([::1]:42008 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oal6p-0003AF-6F for importer@patchew.org; Tue, 20 Sep 2022 17:48:51 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:57418) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0G-0006Qi-Fc for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:57 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:53485) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0E-0002Bs-Ba for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:48 -0400 Received: from mail-ej1-f70.google.com (mail-ej1-f70.google.com [209.85.218.70]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-408-PT1zH7rfPBy-SggC5RyWqg-1; Tue, 20 Sep 2022 13:25:44 -0400 Received: by mail-ej1-f70.google.com with SMTP id he41-20020a1709073da900b00780bff831ebso1758994ejc.6 for ; Tue, 20 Sep 2022 10:25:43 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id z20-20020a1709067e5400b007778c9b7629sm206210ejr.34.2022.09.20.10.25.40 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:41 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694745; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=cophxzFqlPbxR5U5SA8zIeC6sut6syB8XLYG2EHM4y0=; b=GhFhPG8Rq3KjSDQ/U0WvofemgZuqZ07aEnWQpqEwqa9xOMaHf7qJTNrvtcLmexAiq0cnYo UEZk7l9ZYZ/anB7GG3WTkKo/c7WQ4XtxMUwBnrXGbAcyKWU1v1argIimfDtAJZD1kzqw35 HCCm7AsRdvuFmhI4/VqCPSlrCH3SJ8Q= X-MC-Unique: PT1zH7rfPBy-SggC5RyWqg-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=cophxzFqlPbxR5U5SA8zIeC6sut6syB8XLYG2EHM4y0=; b=P8d2VyPEabUZdYvH9YmXTCIyhkndYwbxS/TUbJIDyc/XpnKrlemisC7WTD5I7dhVU1 ZSFL53+XTb6geK+A4wTYvfVMAAWIWg3I73mCSnq6+UmfJNIC6uj1VPg6plloAHU7qq7o bj10ZlQPzDou7pK3LG8Gqe04KvqLeDDCKJZnHJGZwhz12aAmix3bu2FRs2ap87T2PtBc CvitMr6CQAJH0RwrHac5FVR0GYBRmkCeMcwM5mcvv8QBu7XChfXWiPkKRnoLgociUsiY mQG+guEIKJmhWB3636SU2vT1xDrqQ+1jSeQWl1MoPvZ+tT5uaFJ8VrG+mpdt7hMWAlGB HMBg== X-Gm-Message-State: ACrzQf37FW0naxG87oaajsNN/OIkkMJPpMm2+15lXAKtLei1uqzDT16J 7MVsiO9lRhSLFE/yNXgWav3j8rO694HplrHegMZN+ksCXHIHGl76VsN+zKVORgaAhAFwC7ocwUg yWPyNCRn3DAfLuwfP0gRZNgqEQMULn375cgz9iHrMHM6/1yTNfob4PgPzWEyFBB6zXrk= X-Received: by 2002:a05:6402:1a4d:b0:44e:221d:d1dd with SMTP id bf13-20020a0564021a4d00b0044e221dd1ddmr21386569edb.267.1663694741942; Tue, 20 Sep 2022 10:25:41 -0700 (PDT) X-Google-Smtp-Source: AMsMyM5HnSmjVM0vysmMKSQ77M1cOVdAEpOgjdups/vOrBTG5SKy8gjaSLeqlHS1U8C/9TSMrOoPRA== X-Received: by 2002:a05:6402:1a4d:b0:44e:221d:d1dd with SMTP id bf13-20020a0564021a4d00b0044e221dd1ddmr21386543edb.267.1663694741593; Tue, 20 Sep 2022 10:25:41 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 21/37] target/i386: reimplement 0x0f 0x50-0x5f, add AVX Date: Tue, 20 Sep 2022 19:24:51 +0200 Message-Id: <20220920172507.95568-22-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663710534477100001 Content-Type: text/plain; charset="utf-8" These are mostly floating-point SSE operations. The odd ones out are MOVMSK and CVTxx2yy, the others are straightforward. Unary operations are a bit special in AVX because they have 2 operands for PD/PS operands (VEX.vvvv must be 1111b), and 3 operands for SD/SS. They are handled using X86_OP_GROUP3 for compactness. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/tcg/decode-new.c.inc | 43 ++++++++ target/i386/tcg/emit.c.inc | 166 +++++++++++++++++++++++++++++++ target/i386/tcg/translate.c | 2 +- 3 files changed, 210 insertions(+), 1 deletion(-) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index d207a1f0c1..5b753ea329 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -252,7 +252,41 @@ static void decode_0F3A(DisasContext *s, CPUX86State *= env, X86OpEntry *entry, ui *entry =3D opcodes_0F3A[*b]; } =20 +static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry= *entry, uint8_t *b) +{ + if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) { + entry->op1 =3D X86_TYPE_None; + entry->s1 =3D X86_SIZE_None; + } + switch (*b) { + case 0x51: entry->gen =3D gen_VSQRT; break; + case 0x52: entry->gen =3D gen_VRSQRT; break; + case 0x53: entry->gen =3D gen_VRCP; break; + case 0x5A: entry->gen =3D gen_VCVTfp2fp; break; + } +} + +static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F5B[4] =3D { + X86_OP_ENTRY2(VCVTDQ2PS, V,x, W,x, vex2), + X86_OP_ENTRY2(VCVTPS2DQ, V,x, W,x, vex2), + X86_OP_ENTRY2(VCVTTPS2DQ, V,x, W,x, vex2), + {}, + }; + *entry =3D *decode_by_prefix(s, opcodes_0F5B); +} + static const X86OpEntry opcodes_0F[256] =3D { + [0x50] =3D X86_OP_ENTRY3(MOVMSK, G,y, None,None, U,x, vex7 p_00_66= ), + [0x51] =3D X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_= f3_f2), + [0x52] =3D X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3), + [0x53] =3D X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3), + [0x54] =3D X86_OP_ENTRY3(PAND, V,x, H,x, W,x, vex4 p_00_66), /*= vand */ + [0x55] =3D X86_OP_ENTRY3(PANDN, V,x, H,x, W,x, vex4 p_00_66), /*= vandn */ + [0x56] =3D X86_OP_ENTRY3(POR, V,x, H,x, W,x, vex4 p_00_66), /*= vor */ + [0x57] =3D X86_OP_ENTRY3(PXOR, V,x, H,x, W,x, vex4 p_00_66), /*= vxor */ + [0x60] =3D X86_OP_ENTRY3(PUNPCKLBW, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), [0x61] =3D X86_OP_ENTRY3(PUNPCKLWD, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), [0x62] =3D X86_OP_ENTRY3(PUNPCKLDQ, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), @@ -265,6 +299,15 @@ static const X86OpEntry opcodes_0F[256] =3D { [0x38] =3D X86_OP_GROUP0(0F38), [0x3a] =3D X86_OP_GROUP0(0F3A), =20 + [0x58] =3D X86_OP_ENTRY3(VADD, V,x, H,x, W,x, vex2_rep3 p_00_66_= f3_f2), + [0x59] =3D X86_OP_ENTRY3(VMUL, V,x, H,x, W,x, vex2_rep3 p_00_66_= f3_f2), + [0x5a] =3D X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex3 p_00_66_f3_f2= ), + [0x5b] =3D X86_OP_GROUP0(0F5B), + [0x5c] =3D X86_OP_ENTRY3(VSUB, V,x, H,x, W,x, vex2_rep3 p_00_66_= f3_f2), + [0x5d] =3D X86_OP_ENTRY3(VMIN, V,x, H,x, W,x, vex2_rep3 p_00_66_= f3_f2), + [0x5e] =3D X86_OP_ENTRY3(VDIV, V,x, H,x, W,x, vex2_rep3 p_00_66_= f3_f2), + [0x5f] =3D X86_OP_ENTRY3(VMAX, V,x, H,x, W,x, vex2_rep3 p_00_66_= f3_f2), + [0x68] =3D X86_OP_ENTRY3(PUNPCKHBW, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), [0x69] =3D X86_OP_ENTRY3(PUNPCKHWD, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), [0x6a] =3D X86_OP_ENTRY3(PUNPCKHDQ, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index f1f7397869..58b2fd7a2a 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -318,6 +318,131 @@ static void gen_store_sse(DisasContext *s, X86Decoded= Insn *decode, int src_ofs) } } =20 +/* + * 00 =3D v*ps Vps, Hps, Wpd + * 66 =3D v*pd Vpd, Hpd, Wps + * f3 =3D v*ss Vss, Hss, Wps + * f2 =3D v*sd Vsd, Hsd, Wps + */ +static inline void gen_unary_fp_sse(DisasContext *s, CPUX86State *env, X86= DecodedInsn *decode, + SSEFunc_0_epp pd_xmm, SSEFunc_0_epp ps_xmm, + SSEFunc_0_epp pd_ymm, SSEFunc_0_epp ps_ymm, + SSEFunc_0_eppp sd, SSEFunc_0_eppp ss) +{ + if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) !=3D 0) { + SSEFunc_0_eppp fn =3D s->prefix & PREFIX_REPZ ? ss : sd; + if (!fn) { + gen_illegal_opcode(s); + return; + } + fn(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2); + } else { + SSEFunc_0_epp ps, pd, fn; + ps =3D s->vex_l ? ps_ymm : ps_xmm; + pd =3D s->vex_l ? pd_ymm : pd_xmm; + fn =3D s->prefix & PREFIX_DATA ? pd : ps; + if (!fn) { + gen_illegal_opcode(s); + return; + } + fn(cpu_env, OP_PTR0, OP_PTR2); + } +} +#define UNARY_FP_SSE(uname, lname) = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + gen_unary_fp_sse(s, env, decode, = \ + gen_helper_##lname##pd_xmm, = \ + gen_helper_##lname##ps_xmm, = \ + gen_helper_##lname##pd_ymm, = \ + gen_helper_##lname##ps_ymm, = \ + gen_helper_##lname##sd, = \ + gen_helper_##lname##ss); = \ +} +UNARY_FP_SSE(VSQRT, sqrt) + +/* + * 00 =3D v*ps Vps, Hps, Wpd + * 66 =3D v*pd Vpd, Hpd, Wps + * f3 =3D v*ss Vss, Hss, Wps + * f2 =3D v*sd Vsd, Hsd, Wps + */ +static inline void gen_fp_sse(DisasContext *s, CPUX86State *env, X86Decode= dInsn *decode, + SSEFunc_0_eppp pd_xmm, SSEFunc_0_eppp ps_xmm, + SSEFunc_0_eppp pd_ymm, SSEFunc_0_eppp ps_ymm, + SSEFunc_0_eppp sd, SSEFunc_0_eppp ss) +{ + SSEFunc_0_eppp ps, pd, fn; + if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) !=3D 0) { + fn =3D s->prefix & PREFIX_REPZ ? ss : sd; + } else { + ps =3D s->vex_l ? ps_ymm : ps_xmm; + pd =3D s->vex_l ? pd_ymm : pd_xmm; + fn =3D s->prefix & PREFIX_DATA ? pd : ps; + } + if (fn) { + fn(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2); + } else { + gen_illegal_opcode(s); + } +} +#define FP_SSE(uname, lname) = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + gen_fp_sse(s, env, decode, = \ + gen_helper_##lname##pd_xmm, = \ + gen_helper_##lname##ps_xmm, = \ + gen_helper_##lname##pd_ymm, = \ + gen_helper_##lname##ps_ymm, = \ + gen_helper_##lname##sd, = \ + gen_helper_##lname##ss); = \ +} +FP_SSE(VADD, add) +FP_SSE(VMUL, mul) +FP_SSE(VSUB, sub) +FP_SSE(VMIN, min) +FP_SSE(VDIV, div) +FP_SSE(VMAX, max) + +/* + * 00 =3D v*ps Vps, Wpd + * f3 =3D v*ss Vss, Wps + */ +static inline void gen_unary_fp32_sse(DisasContext *s, CPUX86State *env, X= 86DecodedInsn *decode, + SSEFunc_0_epp ps_xmm, + SSEFunc_0_epp ps_ymm, + SSEFunc_0_eppp ss) +{ + if ((s->prefix & (PREFIX_DATA | PREFIX_REPNZ)) !=3D 0) { + goto illegal_op; + } else if (s->prefix & PREFIX_REPZ) { + if (!ss) { + goto illegal_op; + } + ss(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2); + } else { + SSEFunc_0_epp fn =3D s->vex_l ? ps_ymm : ps_xmm; + if (!fn) { + goto illegal_op; + } + fn(cpu_env, OP_PTR0, OP_PTR2); + } + return; + +illegal_op: + gen_illegal_opcode(s); +} +#define UNARY_FP32_SSE(uname, lname) = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + gen_unary_fp32_sse(s, env, decode, = \ + gen_helper_##lname##ps_xmm, = \ + gen_helper_##lname##ps_ymm, = \ + gen_helper_##lname##ss); = \ +} +UNARY_FP32_SSE(VRSQRT, rsqrt) +UNARY_FP32_SSE(VRCP, rcp) + #define BINARY_INT_GVEC(uname, func, ...) = \ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ { = \ @@ -413,6 +538,29 @@ static void gen_##uname(DisasContext *s, CPUX86State *= env, X86DecodedInsn *decod BINARY_INT_SSE(PUNPCKLQDQ, punpcklqdq) BINARY_INT_SSE(PUNPCKHQDQ, punpckhqdq) =20 +static inline void gen_unary_int_sse(DisasContext *s, CPUX86State *env, X8= 6DecodedInsn *decode, + SSEFunc_0_epp xmm, SSEFunc_0_epp ymm) +{ + if (!s->vex_l) { + xmm(cpu_env, OP_PTR0, OP_PTR2); + } else { + ymm(cpu_env, OP_PTR0, OP_PTR2); + } +} + +#define UNARY_INT_SSE(uname, lname) = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + gen_unary_int_sse(s, env, decode, = \ + gen_helper_##lname##_xmm, = \ + gen_helper_##lname##_ymm); = \ +} + +UNARY_INT_SSE(VCVTDQ2PS, cvtdq2ps) +UNARY_INT_SSE(VCVTPS2DQ, cvtps2dq) +UNARY_INT_SSE(VCVTTPS2DQ, cvttps2dq) + + static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_= op) { TCGv carry_in =3D NULL; @@ -608,6 +756,16 @@ static void gen_MOVDQ(DisasContext *s, CPUX86State *en= v, X86DecodedInsn *decode) gen_store_sse(s, decode, decode->op[2].offset); } =20 +static void gen_MOVMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *= decode) +{ + typeof(gen_helper_movmskps_ymm) *ps, *pd, *fn; + ps =3D s->vex_l ? gen_helper_movmskps_ymm : gen_helper_movmskps_xmm; + pd =3D s->vex_l ? gen_helper_movmskpd_ymm : gen_helper_movmskpd_xmm; + fn =3D s->prefix & PREFIX_DATA ? pd : ps; + fn(s->tmp2_i32, cpu_env, OP_PTR2); + tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); +} + static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) { MemOp ot =3D decode->op[0].ot; @@ -708,3 +866,11 @@ static void gen_SHRX(DisasContext *s, CPUX86State *env= , X86DecodedInsn *decode) } tcg_gen_shr_tl(s->T0, s->T0, s->T1); } + +static void gen_VCVTfp2fp(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) +{ + gen_unary_fp_sse(s, env, decode, + gen_helper_cvtpd2ps_xmm, gen_helper_cvtps2pd_xmm, + gen_helper_cvtpd2ps_ymm, gen_helper_cvtps2pd_ymm, + gen_helper_cvtsd2ss, gen_helper_cvtss2sd); +} diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index fd565650a9..95172c30d0 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4695,7 +4695,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) use_new &=3D b <=3D limit; #endif if (use_new && - ((b >=3D 0x160 && b <=3D 0x16f) || + ((b >=3D 0x150 && b <=3D 0x16f) || (b >=3D 0x1d8 && b <=3D 0x1ff && (b & 8)))) { disas_insn_new(s, cpu, b + 0x100); return s->pc; --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663711719; cv=none; d=zohomail.com; s=zohoarc; b=oFHOI45YFPJ/zgzfA2jpESrOFndz7KxmYRCf20V7vsEQtcNIw/0Md/oEDryDSR90ufJTNNf6arfB5A78sfA83edM48EU3KSwwd/B+JrraA1hWJsTRTKanYMTn+IrMJYmgwHe2d+Pz2UiE86q5A2beLaf6fxop3oAFCPB0yT7s1A= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663711719; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=4ainAkqafQKOkldyIRhNwW+LS+KJbF46htGaGpDJrvY=; b=VcjBGYgdH8gJKE4eUIgujhNIg0BFw6BeipcFPZ25TTd1A2yxlxFgQ7Uea/JnZVqiFb0qdcWNTJgMdoULm8BiFruPnbrpbGCDY+E9v9W+YNfaACNdDtIgF6Ibos8ssSv1qa5874VRQAwpSGujssbmhS9egHcHD9Iz20ntGhHqp0w= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663711719143169.93637060123103; Tue, 20 Sep 2022 15:08:39 -0700 (PDT) Received: from localhost ([::1]:41160 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oalPx-0003vs-S3 for importer@patchew.org; Tue, 20 Sep 2022 18:08:37 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:57420) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0G-0006Qj-HD for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:57 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:33893) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0E-0002C0-LC for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:48 -0400 Received: from mail-ej1-f70.google.com (mail-ej1-f70.google.com [209.85.218.70]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-265-otNiZunnPdWlY1LNk54f3w-1; Tue, 20 Sep 2022 13:25:44 -0400 Received: by mail-ej1-f70.google.com with SMTP id jg32-20020a170907972000b0077ce313a8f0so1781632ejc.15 for ; Tue, 20 Sep 2022 10:25:44 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id 17-20020a170906201100b0078027e6e92csm181557ejo.84.2022.09.20.10.25.42 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:42 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694746; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=4ainAkqafQKOkldyIRhNwW+LS+KJbF46htGaGpDJrvY=; b=a2CklV8EkwQrKbcLv+g3MqWxIOUsfvDgt0jHYDThbmwn7FOMnb/22m9NvbbX+/aIA78W8a 5jjUaudbLRAHy6pSfPmoZcYYGaAv//lFD5sGcT6bJ2tjfY5RIPQPwl7iYVz++GvL6rDbkh 2GzLmvNKZVy4tlykl2AOO+91PzaAp3E= X-MC-Unique: otNiZunnPdWlY1LNk54f3w-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=4ainAkqafQKOkldyIRhNwW+LS+KJbF46htGaGpDJrvY=; b=C3fsPL9fa+cuDT05SEE/zpzaRqam3FefIhJwBMwNV+PWO1qoWW467ZL5g3B+WyBURs t86W5V2OEUHO4DUeACfibTniuRLh/JwDsFTivqHWGdiyGXIgJpZtHpN74NaJDhZxj3fx PXmJrZd4TBUezMXMimqoyuoVaSP6t24+61e4WXPFnpO/XEIKnnAUB4/hfhw3uPLDvDrA WwKWDNL3/NMEgO+bORXwvFvJUKwlM4Afk/atosbrul9WQUy1XU0YO/4ioiaRgi0EhMH/ OB0TTR25un+D30QB3OiAZyMxKjSOOEyN741NlzfCRoAhMjQkREShsYAVbnl7f/WgN44V NGLg== X-Gm-Message-State: ACrzQf0a8aBbOlX/yGRNnuklwEU0gTk17XjcxrHY342Qu7CYsbVxaacP Pex+Iw9YxlRcow85sqRZ/c3s4Pq4MFDSxrcJQrEbS3kOYjBGykAqkfNskAcsXBRZ8RN7m3en4Cu 7vJ71J6v1JaIp0YRAOnaXi608gRuisMn7QkY+ct8x6gO9QLcuLGgtweR9TBZy1YxPb/Q= X-Received: by 2002:a17:907:1df1:b0:77a:a670:d807 with SMTP id og49-20020a1709071df100b0077aa670d807mr17860667ejc.163.1663694743175; Tue, 20 Sep 2022 10:25:43 -0700 (PDT) X-Google-Smtp-Source: AMsMyM7MNAbeyLkb5Xnl5heJxaNZlOLnMF6OdU0QGJTd3yNZt8YFNQaKmg8pCObEav7xPUE8RVDy6g== X-Received: by 2002:a17:907:1df1:b0:77a:a670:d807 with SMTP id og49-20020a1709071df100b0077aa670d807mr17860650ejc.163.1663694742863; Tue, 20 Sep 2022 10:25:42 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 22/37] target/i386: reimplement 0x0f 0x78-0x7f, add AVX Date: Tue, 20 Sep 2022 19:24:52 +0200 Message-Id: <20220920172507.95568-23-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663711720294100001 Content-Type: text/plain; charset="utf-8" These are a mixed batch, including the first two horizontal (66 and F2 only) operations, more moves, and SSE4a extract/insert. Because SSE4a is pretty rare, I chose to leave the helper as they are, but it is possible to unify them by loading index and length from the source XMM register and generating deposit or extract TCG ops. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/tcg/decode-new.c.inc | 51 +++++++++++++++++++ target/i386/tcg/emit.c.inc | 86 ++++++++++++++++++++++++++++++++ target/i386/tcg/translate.c | 1 + 3 files changed, 138 insertions(+) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index 5b753ea329..6220142cdb 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -168,6 +168,50 @@ static void decode_0F6F(DisasContext *s, CPUX86State *= env, X86OpEntry *entry, ui *entry =3D *decode_by_prefix(s, opcodes_0F6F); } =20 +static void decode_0F78(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F78[4] =3D { + {}, + X86_OP_ENTRY3(EXTRQ_i, V,x, None,None, I,w, cpuid(SSE4A)), + {}, + X86_OP_ENTRY3(INSERTQ_i, V,x, U,x, I,w, cpuid(SSE4A)), + }; + *entry =3D *decode_by_prefix(s, opcodes_0F78); +} + +static void decode_0F79(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + if (s->prefix & PREFIX_REPNZ) { + entry->gen =3D gen_INSERTQ_r; + } else if (s->prefix & PREFIX_DATA) { + entry->gen =3D gen_EXTRQ_r; + } else { + entry->gen =3D NULL; + }; +} + +static void decode_0F7E(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F7E[4] =3D { + X86_OP_ENTRY3(MOVD_from, E,y, None,None, P,y, vex5 mmx), + X86_OP_ENTRY3(MOVD_from, E,y, None,None, V,y, vex5), + X86_OP_ENTRY3(MOVQ, V,x, None,None, W,q, vex5), /* wrong de= st Vy on SDM! */ + {}, + }; + *entry =3D *decode_by_prefix(s, opcodes_0F7E); +} + +static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F7F[4] =3D { + X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 mmx), /* movq= */ + X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1), /* movdqa */ + X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4_unal), /* mov= dqu */ + {}, + }; + *entry =3D *decode_by_prefix(s, opcodes_0F7F); +} + static const X86OpEntry opcodes_0F38_00toEF[240] =3D { }; =20 @@ -317,6 +361,13 @@ static const X86OpEntry opcodes_0F[256] =3D { [0x6e] =3D X86_OP_ENTRY3(MOVD_to, V,x, None,None, E,y, vex5 mmx p_0= 0_66), /* wrong dest Vy on SDM! */ [0x6f] =3D X86_OP_GROUP0(0F6F), =20 + [0x78] =3D X86_OP_GROUP0(0F78), + [0x79] =3D X86_OP_GROUP2(0F79, V,x, U,x, cpuid(SSE4A)), + [0x7c] =3D X86_OP_ENTRY3(VHADD, V,x, H,x, W,x, vex2 cpuid(SSE3) = p_66_f2), + [0x7d] =3D X86_OP_ENTRY3(VHSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) = p_66_f2), + [0x7e] =3D X86_OP_GROUP0(0F7E), + [0x7f] =3D X86_OP_GROUP0(0F7F), + /* Incorrectly missing from 2-17 */ [0xd8] =3D X86_OP_ENTRY3(PSUBUSB, V,x, H,x, W,x, vex4 mmx avx2_256 p= _00_66), [0xd9] =3D X86_OP_ENTRY3(PSUBUSW, V,x, H,x, W,x, vex4 mmx avx2_256 p= _00_66), diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 58b2fd7a2a..140a621abf 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -443,6 +443,30 @@ static void gen_##uname(DisasContext *s, CPUX86State *= env, X86DecodedInsn *decod UNARY_FP32_SSE(VRSQRT, rsqrt) UNARY_FP32_SSE(VRCP, rcp) =20 +/* + * 66 =3D v*pd Vpd, Hpd, Wpd + * f2 =3D v*ps Vps, Hps, Wps + */ +static inline void gen_horizontal_fp_sse(DisasContext *s, CPUX86State *env= , X86DecodedInsn *decode, + SSEFunc_0_eppp pd_xmm, SSEFunc_0_= eppp ps_xmm, + SSEFunc_0_eppp pd_ymm, SSEFunc_0_= eppp ps_ymm) +{ + SSEFunc_0_eppp ps, pd, fn; + ps =3D s->vex_l ? ps_ymm : ps_xmm; + pd =3D s->vex_l ? pd_ymm : pd_xmm; + fn =3D s->prefix & PREFIX_DATA ? pd : ps; + fn(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2); +} +#define HORIZONTAL_FP_SSE(uname, lname) = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + gen_horizontal_fp_sse(s, env, decode, = \ + gen_helper_##lname##pd_xmm, gen_helper_##lname##= ps_xmm, \ + gen_helper_##lname##pd_ymm, gen_helper_##lname##= ps_ymm); \ +} +HORIZONTAL_FP_SSE(VHADD, hadd) +HORIZONTAL_FP_SSE(VHSUB, hsub) + #define BINARY_INT_GVEC(uname, func, ...) = \ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ { = \ @@ -716,6 +740,32 @@ static void gen_CRC32(DisasContext *s, CPUX86State *en= v, X86DecodedInsn *decode) gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot)); } =20 +static void gen_EXTRQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + TCGv_i32 length =3D tcg_constant_i32(decode->immediate & 63); + TCGv_i32 index =3D tcg_constant_i32((decode->immediate >> 8) & 63); + + gen_helper_extrq_i(cpu_env, OP_PTR0, index, length); +} + +static void gen_EXTRQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + gen_helper_extrq_r(cpu_env, OP_PTR0, OP_PTR2); +} + +static void gen_INSERTQ_i(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) +{ + TCGv_i32 length =3D tcg_constant_i32(decode->immediate & 63); + TCGv_i32 index =3D tcg_constant_i32((decode->immediate >> 8) & 63); + + gen_helper_insertq_i(cpu_env, OP_PTR0, OP_PTR1, index, length); +} + +static void gen_INSERTQ_r(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) +{ + gen_helper_insertq_r(cpu_env, OP_PTR0, OP_PTR2); +} + static void gen_MOVBE(DisasContext *s, CPUX86State *env, X86DecodedInsn *d= ecode) { MemOp ot =3D decode->op[0].ot; @@ -728,6 +778,24 @@ static void gen_MOVBE(DisasContext *s, CPUX86State *en= v, X86DecodedInsn *decode) } } =20 +static void gen_MOVD_from(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) +{ + MemOp ot =3D decode->op[2].ot; + + switch (ot) { + case MO_32: +#ifdef TARGET_X86_64 + tcg_gen_ld32u_tl(s->T0, cpu_env, decode->op[2].offset); + break; + case MO_64: +#endif + tcg_gen_ld_tl(s->T0, cpu_env, decode->op[2].offset); + break; + default: + abort(); + } +} + static void gen_MOVD_to(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) { MemOp ot =3D decode->op[2].ot; @@ -766,6 +834,24 @@ static void gen_MOVMSK(DisasContext *s, CPUX86State *e= nv, X86DecodedInsn *decode tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); } =20 +static void gen_MOVQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) +{ + int vec_len =3D vector_len(s, decode); + int lo_ofs =3D vector_elem_offset(&decode->op[0], MO_64, 0); + + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[2].offset); + /* + * tcg_gen_gvec_dup_i64(MO_64, op0.offset, 8, vec_len, s->tmp1_64) wou= ld + * seem to work, but it does not on big-endian platforms; the cleared = parts + * are always at higher addresses, but cross-endian emulation inverts = the + * byte order so that the cleared parts need to be at *lower* addresse= s. + * Because oprsz is 8, we see this here even for SSE; but more in gene= ral, + * it disqualifies using oprsz < maxsz to emulate VEX128. + */ + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, lo_ofs); +} + static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) { MemOp ot =3D decode->op[0].ot; diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 95172c30d0..4404440d87 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4696,6 +4696,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) #endif if (use_new && ((b >=3D 0x150 && b <=3D 0x16f) || + (b >=3D 0x178 && b <=3D 0x17f) || (b >=3D 0x1d8 && b <=3D 0x1ff && (b & 8)))) { disas_insn_new(s, cpu, b + 0x100); return s->pc; --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663713084; cv=none; d=zohomail.com; s=zohoarc; b=CZns06ruP6E73CqoiDgVkL1I6FYPRrDNQ4nrdMpZFev79eRt7QwB59gijGrGHmgiIOIEUocSP6UhW3EJHE2KioRn/B+RxhIg4kdQf4cygOXJSGGnClhuurULG+ueH4RLGzmM84brvLgp4LpPIYENblzh2ZV/Lv+Xcy6+53hqpOE= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663713084; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=+1Ckix9z/G+LwwBowTFdrdNWn5MYsnkvODE+d6k9ixQ=; b=htRXsOT7jgtLAWHUJqBYyr3vcBFFtndfSHIPCSTynoM29X1azlL64b4i31jHZN0Xf21ZvxF786gi8GLMqqHueS8d0xHXfaJpU+kwTPT65SFYY1JgAb6pluSxKJQT5sKI+q94z5kHAljWbyqvuAK/bWLCIXQBeVBq0baF1Qpq2/k= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663713084871383.32686656219914; Tue, 20 Sep 2022 15:31:24 -0700 (PDT) Received: from localhost ([::1]:40916 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oallz-0005TY-EO for importer@patchew.org; Tue, 20 Sep 2022 18:31:23 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:57424) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0K-0006Sc-51 for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:57 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:50258) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0G-0002Ci-EU for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:51 -0400 Received: from mail-ed1-f69.google.com (mail-ed1-f69.google.com [209.85.208.69]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-657-WRk5zDb3NaqTqhg1t4xuWA-1; Tue, 20 Sep 2022 13:25:46 -0400 Received: by mail-ed1-f69.google.com with SMTP id t13-20020a056402524d00b00452c6289448so2380092edd.17 for ; Tue, 20 Sep 2022 10:25:46 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id si17-20020a170906ced100b00780819f9e61sm157225ejb.131.2022.09.20.10.25.43 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:44 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694747; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=+1Ckix9z/G+LwwBowTFdrdNWn5MYsnkvODE+d6k9ixQ=; b=BrZgWlN8a/xSy/ZfuPMj178evedl/bu0XwdtT6D6QJkZRI4Cc7D1TBX1TRKYDpMVHwN2br 4d3AVu6BOHFRWZGcmWMHRQwmYKKi5dHEsB/pJwAMiadEXbnKefU/QlrILSq+FI9L/3XjNg gs9Dek2DQuMvWe3wr/x9UiallGNFpv0= X-MC-Unique: WRk5zDb3NaqTqhg1t4xuWA-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=+1Ckix9z/G+LwwBowTFdrdNWn5MYsnkvODE+d6k9ixQ=; b=gZihz1gqeT5/XeXGZsRlUoqafACeiN7loKDiO9jz3j5mtuvPSP+BUnwknYGMOmlAVh URtWNhZIrIkrKNXbINw7ocG8Law4ih7HU2GjDupAdOgSm3/bs6GKGwKC7mhIjCf266/f o0yDZiiq15kuCvo761wMIYcH3zgR03Dfnnt2DlZn0/7k+joseap5PJL1nTeBYK6Vu4Rj CJKOjD/NSCa6dAD8e1PAIaQCJ1JyQjYjeFZbpG0V5eeKYSsTIKN7LvTVfagnQYvRh+sC 9i0fUkVHwBR0wCsEOFJp79pK9KZ4yNLN/lBuT/LrZFLlXy87DxPHhmNzVI/gwDoJshtE 7Erg== X-Gm-Message-State: ACrzQf3TyFicYNZsjSv1dDfwdFrPvdgG67mqM+sfkle2l06mb4rlUzP3 5okEyNi/26jb5CD+eU9/0mS2uOXL4kxuHNCMrhCi99bRRhVEQqJ1GF0iduThUmSawoAm1XSKv92 G/mOaUQ+V0ZJrMgPtpxQzyKUFB/GepLJHIE0R0au9DhJ4JOHbt0pYI3zHd+oZSbZNDkE= X-Received: by 2002:a17:907:a06:b0:77b:6eca:c089 with SMTP id bb6-20020a1709070a0600b0077b6ecac089mr17108178ejc.362.1663694744868; Tue, 20 Sep 2022 10:25:44 -0700 (PDT) X-Google-Smtp-Source: AMsMyM7Jp1DflISxISm5p7V2SxzsdmwsSbODoAKvJ7e7yWRheNuYRDDz1Dgqa+58ebg9kU2G3MbfPA== X-Received: by 2002:a17:907:a06:b0:77b:6eca:c089 with SMTP id bb6-20020a1709070a0600b0077b6ecac089mr17108153ejc.362.1663694744472; Tue, 20 Sep 2022 10:25:44 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 23/37] target/i386: reimplement 0x0f 0x70-0x77, add AVX Date: Tue, 20 Sep 2022 19:24:53 +0200 Message-Id: <20220920172507.95568-24-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663713086510100001 Content-Type: text/plain; charset="utf-8" This includes shifts by immediate, which use bits 3-5 of the ModRM byte as an opcode extension. With the exception of 128-bit shifts, they are implemented using gvec. This also covers VZEROALL and VZEROUPPER, which use the same opcode as EMMS. If we were wanting to optimize out gen_clear_ymmh then this would be one of the starting points. The implementation of the VZEROALL and VZEROUPPER helpers is by Paul Brook. Signed-off-by: Paolo Bonzini --- target/i386/tcg/decode-new.c.inc | 92 +++++++++++++- target/i386/tcg/emit.c.inc | 204 +++++++++++++++++++++++++++++++ target/i386/tcg/translate.c | 3 +- 3 files changed, 293 insertions(+), 6 deletions(-) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index 6220142cdb..02f4063ebc 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -157,6 +157,58 @@ static void decode_group17(DisasContext *s, CPUX86Stat= e *env, X86OpEntry *entry, entry->gen =3D group17_gen[op]; } =20 +static void decode_group12(DisasContext *s, CPUX86State *env, X86OpEntry *= entry, uint8_t *b) +{ + static const X86OpEntry opcodes_group12[8] =3D { + {}, + {}, + X86_OP_ENTRY3(PSRLW_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), + {}, + X86_OP_ENTRY3(PSRAW_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), + {}, + X86_OP_ENTRY3(PSLLW_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), + {}, + }; + + int op =3D (get_modrm(s, env) >> 3) & 7; + *entry =3D opcodes_group12[op]; +} + +static void decode_group13(DisasContext *s, CPUX86State *env, X86OpEntry *= entry, uint8_t *b) +{ + static const X86OpEntry opcodes_group13[8] =3D { + {}, + {}, + X86_OP_ENTRY3(PSRLD_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), + {}, + X86_OP_ENTRY3(PSRAD_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), + {}, + X86_OP_ENTRY3(PSLLD_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), + {}, + }; + + int op =3D (get_modrm(s, env) >> 3) & 7; + *entry =3D opcodes_group13[op]; +} + +static void decode_group14(DisasContext *s, CPUX86State *env, X86OpEntry *= entry, uint8_t *b) +{ + static const X86OpEntry opcodes_group14[8] =3D { + /* grp14 */ + {}, + {}, + X86_OP_ENTRY3(PSRLQ_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), + X86_OP_ENTRY3(PSRLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66), + {}, + {}, + X86_OP_ENTRY3(PSLLQ_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), + X86_OP_ENTRY3(PSLLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66), + }; + + int op =3D (get_modrm(s, env) >> 3) & 7; + *entry =3D opcodes_group14[op]; +} + static void decode_0F6F(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) { static const X86OpEntry opcodes_0F6F[4] =3D { @@ -168,6 +220,31 @@ static void decode_0F6F(DisasContext *s, CPUX86State *= env, X86OpEntry *entry, ui *entry =3D *decode_by_prefix(s, opcodes_0F6F); } =20 +static void decode_0F70(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + static const X86OpEntry pshufw[4] =3D { + X86_OP_ENTRY3(PSHUFW, P,q, Q,q, I,b, vex4 mmx), + X86_OP_ENTRY3(PSHUFD, V,x, W,x, I,b, vex4 avx2_256), + X86_OP_ENTRY3(PSHUFHW, V,x, W,x, I,b, vex4 avx2_256), + X86_OP_ENTRY3(PSHUFLW, V,x, W,x, I,b, vex4 avx2_256), + }; + + *entry =3D *decode_by_prefix(s, pshufw); +} + +static void decode_0F77(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + if (!(s->prefix & PREFIX_VEX)) { + entry->gen =3D gen_EMMS; + } else if (!s->vex_l) { + entry->gen =3D gen_VZEROUPPER; + entry->vex_class =3D 8; + } else { + entry->gen =3D gen_VZEROALL; + entry->vex_class =3D 8; + } +} + static void decode_0F78(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) { static const X86OpEntry opcodes_0F78[4] =3D { @@ -340,6 +417,15 @@ static const X86OpEntry opcodes_0F[256] =3D { [0x66] =3D X86_OP_ENTRY3(PCMPGTD, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), [0x67] =3D X86_OP_ENTRY3(PACKUSWB, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), =20 + [0x70] =3D X86_OP_GROUP0(0F70), + [0x71] =3D X86_OP_GROUP0(group12), + [0x72] =3D X86_OP_GROUP0(group13), + [0x73] =3D X86_OP_GROUP0(group14), + [0x74] =3D X86_OP_ENTRY3(PCMPEQB, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), + [0x75] =3D X86_OP_ENTRY3(PCMPEQW, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), + [0x76] =3D X86_OP_ENTRY3(PCMPEQD, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), + [0x77] =3D X86_OP_GROUP0(0F77), + [0x38] =3D X86_OP_GROUP0(0F38), [0x3a] =3D X86_OP_GROUP0(0F3A), =20 @@ -937,10 +1023,8 @@ static bool validate_vex(DisasContext *s, X86DecodedI= nsn *decode) } break; case 8: - if (!(s->prefix & PREFIX_VEX)) { - /* EMMS */ - return true; - } + /* Non-VEX case handled in decode_0F77. */ + assert(s->prefix & PREFIX_VEX); if (!(s->flags & HF_AVX_EN_MASK)) { goto illegal; } diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 140a621abf..062d8213b3 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -19,6 +19,11 @@ * License along with this library; if not, see . */ =20 +static inline TCGv_i32 tcg_constant8u_i32(uint8_t val) +{ + return tcg_constant_i32(val); +} + static void gen_NM_exception(DisasContext *s) { gen_exception(s, EXCP07_PREX, s->pc_start - s->cs_base); @@ -485,6 +490,9 @@ BINARY_INT_GVEC(PADDSW, tcg_gen_gvec_ssadd, MO_16) BINARY_INT_GVEC(PADDUSB, tcg_gen_gvec_usadd, MO_8) BINARY_INT_GVEC(PADDUSW, tcg_gen_gvec_usadd, MO_16) BINARY_INT_GVEC(PAND, tcg_gen_gvec_and, MO_64) +BINARY_INT_GVEC(PCMPEQB, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_8) +BINARY_INT_GVEC(PCMPEQD, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_32) +BINARY_INT_GVEC(PCMPEQW, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_16) BINARY_INT_GVEC(PCMPGTB, tcg_gen_gvec_cmp, TCG_COND_GT, MO_8) BINARY_INT_GVEC(PCMPGTW, tcg_gen_gvec_cmp, TCG_COND_GT, MO_16) BINARY_INT_GVEC(PCMPGTD, tcg_gen_gvec_cmp, TCG_COND_GT, MO_32) @@ -585,6 +593,29 @@ UNARY_INT_SSE(VCVTPS2DQ, cvtps2dq) UNARY_INT_SSE(VCVTTPS2DQ, cvttps2dq) =20 =20 +static inline void gen_unary_imm_sse(DisasContext *s, CPUX86State *env, X8= 6DecodedInsn *decode, + SSEFunc_0_ppi xmm, SSEFunc_0_ppi ymm) +{ + TCGv_i32 imm =3D tcg_constant8u_i32(decode->immediate); + if (!s->vex_l) { + xmm(OP_PTR0, OP_PTR1, imm); + } else { + ymm(OP_PTR0, OP_PTR1, imm); + } +} + +#define UNARY_IMM_SSE(uname, lname) = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + gen_unary_imm_sse(s, env, decode, = \ + gen_helper_##lname##_xmm, = \ + gen_helper_##lname##_ymm); = \ +} + +UNARY_IMM_SSE(PSHUFD, pshufd) +UNARY_IMM_SSE(PSHUFHW, pshufhw) +UNARY_IMM_SSE(PSHUFLW, pshuflw) + static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_= op) { TCGv carry_in =3D NULL; @@ -740,6 +771,11 @@ static void gen_CRC32(DisasContext *s, CPUX86State *en= v, X86DecodedInsn *decode) gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot)); } =20 +static void gen_EMMS(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) +{ + gen_helper_emms(cpu_env); +} + static void gen_EXTRQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) { TCGv_i32 length =3D tcg_constant_i32(decode->immediate & 63); @@ -903,6 +939,154 @@ static void gen_PEXT(DisasContext *s, CPUX86State *en= v, X86DecodedInsn *decode) gen_helper_pext(s->T0, s->T0, s->T1); } =20 +static void gen_PSHUFW(DisasContext *s, CPUX86State *env, X86DecodedInsn *= decode) +{ + TCGv_i32 imm =3D tcg_constant8u_i32(decode->immediate); + gen_helper_pshufw_mmx(OP_PTR0, OP_PTR1, imm); +} + +static void gen_PSRLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + int vec_len =3D vector_len(s, decode); + + if (decode->immediate >=3D 16) { + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len= , 0); + } else { + tcg_gen_gvec_shri(MO_16, + decode->op[0].offset, decode->op[1].offset, + decode->immediate, vec_len, vec_len); + } +} + +static void gen_PSLLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + int vec_len =3D vector_len(s, decode); + + if (decode->immediate >=3D 16) { + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len= , 0); + } else { + tcg_gen_gvec_shli(MO_16, + decode->op[0].offset, decode->op[1].offset, + decode->immediate, vec_len, vec_len); + } +} + +static void gen_PSRAW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + int vec_len =3D vector_len(s, decode); + + if (decode->immediate >=3D 16) { + decode->immediate =3D 15; + } + tcg_gen_gvec_sari(MO_16, + decode->op[0].offset, decode->op[1].offset, + decode->immediate, vec_len, vec_len); +} + +static void gen_PSRLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + int vec_len =3D vector_len(s, decode); + + if (decode->immediate >=3D 32) { + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len= , 0); + } else { + tcg_gen_gvec_shri(MO_32, + decode->op[0].offset, decode->op[1].offset, + decode->immediate, vec_len, vec_len); + } +} + +static void gen_PSLLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + int vec_len =3D vector_len(s, decode); + + if (decode->immediate >=3D 32) { + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len= , 0); + } else { + tcg_gen_gvec_shli(MO_32, + decode->op[0].offset, decode->op[1].offset, + decode->immediate, vec_len, vec_len); + } +} + +static void gen_PSRAD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + int vec_len =3D vector_len(s, decode); + + if (decode->immediate >=3D 32) { + decode->immediate =3D 31; + } + tcg_gen_gvec_sari(MO_32, + decode->op[0].offset, decode->op[1].offset, + decode->immediate, vec_len, vec_len); +} + +static void gen_PSRLQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + int vec_len =3D vector_len(s, decode); + + if (decode->immediate >=3D 64) { + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len= , 0); + } else { + tcg_gen_gvec_shri(MO_64, + decode->op[0].offset, decode->op[1].offset, + decode->immediate, vec_len, vec_len); + } +} + +static void gen_PSLLQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + int vec_len =3D vector_len(s, decode); + + if (decode->immediate >=3D 64) { + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len= , 0); + } else { + tcg_gen_gvec_shli(MO_64, + decode->op[0].offset, decode->op[1].offset, + decode->immediate, vec_len, vec_len); + } +} + +static TCGv_ptr make_imm8u_xmm_vec(uint8_t imm, int vec_len) +{ + MemOp ot =3D vec_len =3D=3D 16 ? MO_128 : MO_256; + TCGv_i32 imm_v =3D tcg_constant8u_i32(imm); + TCGv_ptr ptr =3D tcg_temp_new_ptr(); + + tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_t0) + xmm_offset= (ot), + vec_len, vec_len, 0); + + tcg_gen_addi_ptr(ptr, cpu_env, offsetof(CPUX86State, xmm_t0)); + tcg_gen_st_i32(imm_v, ptr, offsetof(ZMMReg, ZMM_L(0))); + return ptr; +} + +static void gen_PSRLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) +{ + int vec_len =3D vector_len(s, decode); + TCGv_ptr imm_vec =3D make_imm8u_xmm_vec(decode->immediate, vec_len); + + if (s->vex_l) { + gen_helper_psrldq_ymm(cpu_env, OP_PTR0, OP_PTR1, imm_vec); + } else { + gen_helper_psrldq_xmm(cpu_env, OP_PTR0, OP_PTR1, imm_vec); + } + tcg_temp_free_ptr(imm_vec); +} + +static void gen_PSLLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) +{ + int vec_len =3D vector_len(s, decode); + TCGv_ptr imm_vec =3D make_imm8u_xmm_vec(decode->immediate, vec_len); + + if (s->vex_l) { + gen_helper_pslldq_ymm(cpu_env, OP_PTR0, OP_PTR1, imm_vec); + } else { + gen_helper_pslldq_xmm(cpu_env, OP_PTR0, OP_PTR1, imm_vec); + } + tcg_temp_free_ptr(imm_vec); +} + static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) { MemOp ot =3D decode->op[0].ot; @@ -960,3 +1144,23 @@ static void gen_VCVTfp2fp(DisasContext *s, CPUX86Stat= e *env, X86DecodedInsn *dec gen_helper_cvtpd2ps_ymm, gen_helper_cvtps2pd_ymm, gen_helper_cvtsd2ss, gen_helper_cvtss2sd); } + +static void gen_VZEROALL(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) +{ + TCGv_ptr ptr =3D tcg_temp_new_ptr(); + + tcg_gen_addi_ptr(ptr, cpu_env, offsetof(CPUX86State, xmm_t0)); + gen_helper_memset(ptr, ptr, tcg_constant_i32(0), + tcg_constant_ptr(CPU_NB_REGS * sizeof(ZMMReg))); + tcg_temp_free_ptr(ptr); +} + +static void gen_VZEROUPPER(DisasContext *s, CPUX86State *env, X86DecodedIn= sn *decode) +{ + int i; + + for (i =3D 0; i < CPU_NB_REGS; i++) { + int offset =3D ZMM_OFFSET(i) + offsetof(ZMMReg, ZMM_X(0)); + tcg_gen_gvec_mov(MO_64, offset, offset, 16, 32); + } +} diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 4404440d87..4d0bf511fa 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4695,8 +4695,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) use_new &=3D b <=3D limit; #endif if (use_new && - ((b >=3D 0x150 && b <=3D 0x16f) || - (b >=3D 0x178 && b <=3D 0x17f) || + ((b >=3D 0x150 && b <=3D 0x17f) || (b >=3D 0x1d8 && b <=3D 0x1ff && (b & 8)))) { disas_insn_new(s, cpu, b + 0x100); return s->pc; --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663710908; cv=none; d=zohomail.com; s=zohoarc; b=Yp/daO5hzfannhXGxXh1CZuRoG0j0jHv9Wf5fuLIf5Sqfu4ihO39AxT/ZVqyMyQzJ/khSrJjWLBIwdu51iHMOdnn6VlesB08tRDu4R7cmqaLFBkxswDknVO8aBSpxeEfpsdUpC8ww8fJJtkuigbLKiBTIiaPMxLtw1kI1UC8ryc= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663710908; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=nXGMPhyWPieM/QIjTI01XrC9LGHf1/ZCQfetOtjgTZI=; b=BAhMveVLa6BAX/GzjjLgWcb5UY3Rwwqm8c+ryGws2S7q2EF1jXcU0fw4enzrc6NI9zNk7K/ZJ0xKzIYUp2/nvTvm61EVlvq2gCpp9gDENboiYPu2Sid22N/4aVCTgG6EXi8olZdP2jkcEFKGMEJZ/aLJZ2LZVEfJlsPYVQlS7t0= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 166371090870364.09501934014042; Tue, 20 Sep 2022 14:55:08 -0700 (PDT) Received: from localhost ([::1]:32904 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oalCt-0006PQ-Bh for importer@patchew.org; Tue, 20 Sep 2022 17:55:07 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:57428) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0M-0006T2-Ke for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:59 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:24586) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0H-0002Dl-Uh for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:53 -0400 Received: from mail-ed1-f69.google.com (mail-ed1-f69.google.com [209.85.208.69]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-52-D4krkW89NPS-eoDcDKA-xg-1; Tue, 20 Sep 2022 13:25:47 -0400 Received: by mail-ed1-f69.google.com with SMTP id w17-20020a056402269100b0045249bc17a9so2369385edd.9 for ; Tue, 20 Sep 2022 10:25:47 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id o5-20020a170906600500b007708130c287sm200140ejj.40.2022.09.20.10.25.44 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:45 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694749; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=nXGMPhyWPieM/QIjTI01XrC9LGHf1/ZCQfetOtjgTZI=; b=KAA+NeN5O9HAvRXrWTRbzARFNtWzn1bHPZbHPRpraBrS1xhDIcoUBRUUDJEwYbNtGRIdi3 xvMbAZsHd1ouYINC3zVqi+fLLsuIC9mvIiEfGLK3APvyCKvHHOOYB7oZPfLkkweFbld7kI 31ufPXnP8JDLqJfQwF1LqxAngckgxd4= X-MC-Unique: D4krkW89NPS-eoDcDKA-xg-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=nXGMPhyWPieM/QIjTI01XrC9LGHf1/ZCQfetOtjgTZI=; b=H6PK80fzptfEznVulx9W0JPFJuAwQHa0zYoJ6NbBXxXrA6+M4ZbGdXzh2NAGwRGX/H YRH8BnPgiim7azH3MfGQskNTpazhFdEiGlNhKesFHBK7QTluPWEnWk5HEWa0bUg6eWl/ GR40rO11oT1w20t9tqMyLOyJEYtKHeAZxPRwL/RDzxpITSP2VziVIyje2f5HDoaVl+og LLzDx5ohDcIlUaaUomzZK0bEYq9MYxKqJMKj792ZvmKc3hAoq+UY44Cv+uslKA0bV58E KvUafw1auJJ+to0EU220WwIawVQbEj64xKtX1m+NujAEjRLY3NAoOQ8teLKutyRbbWpH /TYA== X-Gm-Message-State: ACrzQf1mDD1512RtLwAqzXa5/9Opw145Y62N7MHn960oUutLVw5UGMO1 4EGVot6Uwwe2UXI4a+QZ30C9vrzNLtQeI6V5q68tRpt7jkvK5mYtcqarGWUWvqvDG5u5LW7UHQA aDJ0qfN+b7JX+H3Rwwqx1x+rrgjR81RlYZWSg7Er44ACtRLRcvL4RulcZo7qR7JdBor0= X-Received: by 2002:aa7:dc13:0:b0:443:3f15:8440 with SMTP id b19-20020aa7dc13000000b004433f158440mr20811476edu.274.1663694746203; Tue, 20 Sep 2022 10:25:46 -0700 (PDT) X-Google-Smtp-Source: AMsMyM4pmBQfMJWlDBtWqs9kWEKUQYmbAjAzgPSDZHoXFXxuUi4SN1exdvScaG77/o5TDGIuNPAXbw== X-Received: by 2002:aa7:dc13:0:b0:443:3f15:8440 with SMTP id b19-20020aa7dc13000000b004433f158440mr20811451edu.274.1663694745885; Tue, 20 Sep 2022 10:25:45 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 24/37] target/i386: reimplement 0x0f 0xd0-0xd7, 0xe0-0xe7, 0xf0-0xf7, add AVX Date: Tue, 20 Sep 2022 19:24:54 +0200 Message-Id: <20220920172507.95568-25-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663710910810100001 Content-Type: text/plain; charset="utf-8" The more complicated ones here are d6-d7, e6-e7, f7. The others are trivial. For LDDQU, using gen_load_sse directly might corrupt the register if the second part of the load fails. Therefore, add a custom X86_TYPE_WM value; like X86_TYPE_W it does call gen_load(), but it also rejects a value of 11 in the ModRM field like X86_TYPE_M. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/tcg/decode-new.c.inc | 53 ++++++++++++++++++++++ target/i386/tcg/decode-new.h | 1 + target/i386/tcg/emit.c.inc | 77 +++++++++++++++++++++++++++----- target/i386/tcg/translate.c | 2 +- 4 files changed, 122 insertions(+), 11 deletions(-) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index 02f4063ebc..0bc41b01c9 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -289,6 +289,18 @@ static void decode_0F7F(DisasContext *s, CPUX86State *= env, X86OpEntry *entry, ui *entry =3D *decode_by_prefix(s, opcodes_0F7F); } =20 +static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + static const X86OpEntry movq[4] =3D { + {}, + X86_OP_ENTRY3(MOVQ, W,x, None, None, V,q, vex5), + X86_OP_ENTRY3(MOVq_dq, V,dq, None, None, N,q), + X86_OP_ENTRY3(MOVq_dq, P,q, None, None, U,q), + }; + + *entry =3D *decode_by_prefix(s, movq); +} + static const X86OpEntry opcodes_0F38_00toEF[240] =3D { }; =20 @@ -398,6 +410,17 @@ static void decode_0F5B(DisasContext *s, CPUX86State *= env, X86OpEntry *entry, ui *entry =3D *decode_by_prefix(s, opcodes_0F5B); } =20 +static void decode_0FE6(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + static const X86OpEntry opcodes_0FE6[4] =3D { + {}, + X86_OP_ENTRY2(VCVTTPD2DQ, V,x, W,x, vex2), + X86_OP_ENTRY2(VCVTDQ2PD, V,x, W,x, vex2), + X86_OP_ENTRY2(VCVTPD2DQ, V,x, W,x, vex2), + }; + *entry =3D *decode_by_prefix(s, opcodes_0FE6); +} + static const X86OpEntry opcodes_0F[256] =3D { [0x50] =3D X86_OP_ENTRY3(MOVMSK, G,y, None,None, U,x, vex7 p_00_66= ), [0x51] =3D X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_= f3_f2), @@ -454,6 +477,33 @@ static const X86OpEntry opcodes_0F[256] =3D { [0x7e] =3D X86_OP_GROUP0(0F7E), [0x7f] =3D X86_OP_GROUP0(0F7F), =20 + [0xd0] =3D X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(S= SE3) p_66_f2), + [0xd1] =3D X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx= 2_256 p_00_66), + [0xd2] =3D X86_OP_ENTRY3(PSRLD_r, V,x, H,x, W,x, vex4 mmx avx= 2_256 p_00_66), + [0xd3] =3D X86_OP_ENTRY3(PSRLQ_r, V,x, H,x, W,x, vex4 mmx avx= 2_256 p_00_66), + [0xd4] =3D X86_OP_ENTRY3(PADDQ, V,x, H,x, W,x, vex4 mmx avx= 2_256 p_00_66), + [0xd5] =3D X86_OP_ENTRY3(PMULLW, V,x, H,x, W,x, vex4 mmx avx= 2_256 p_00_66), + [0xd6] =3D X86_OP_GROUP0(0FD6), + [0xd7] =3D X86_OP_ENTRY3(PMOVMSKB, G,d, None,None, U,x, vex7 mmx avx= 2_256 p_00_66), + + [0xe0] =3D X86_OP_ENTRY3(PAVGB, V,x, H,x, W,x, vex4 mmx avx= 2_256 p_00_66), + [0xe1] =3D X86_OP_ENTRY3(PSRAW_r, V,x, H,x, W,x, vex7 mmx avx= 2_256 p_00_66), + [0xe2] =3D X86_OP_ENTRY3(PSRAD_r, V,x, H,x, W,x, vex7 mmx avx= 2_256 p_00_66), + [0xe3] =3D X86_OP_ENTRY3(PAVGW, V,x, H,x, W,x, vex4 mmx avx= 2_256 p_00_66), + [0xe4] =3D X86_OP_ENTRY3(PMULHUW, V,x, H,x, W,x, vex4 mmx avx= 2_256 p_00_66), + [0xe5] =3D X86_OP_ENTRY3(PMULHW, V,x, H,x, W,x, vex4 mmx avx= 2_256 p_00_66), + [0xe6] =3D X86_OP_GROUP0(0FE6), + [0xe7] =3D X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 mmx p_0= 0_66), /* MOVNTQ/MOVNTDQ */ + + [0xf0] =3D X86_OP_ENTRY3(MOVDQ, V,x, None,None, WM,x, vex4_unal cp= uid(SSE3) p_f2), /* LDDQU */ + [0xf1] =3D X86_OP_ENTRY3(PSLLW_r, V,x, H,x, W,x, vex7 mmx avx= 2_256 p_00_66), + [0xf2] =3D X86_OP_ENTRY3(PSLLD_r, V,x, H,x, W,x, vex7 mmx avx= 2_256 p_00_66), + [0xf3] =3D X86_OP_ENTRY3(PSLLQ_r, V,x, H,x, W,x, vex7 mmx avx= 2_256 p_00_66), + [0xf4] =3D X86_OP_ENTRY3(PMULUDQ, V,x, H,x, W,x, vex4 mmx avx= 2_256 p_00_66), + [0xf5] =3D X86_OP_ENTRY3(PMADDWD, V,x, H,x, W,x, vex4 mmx avx= 2_256 p_00_66), + [0xf6] =3D X86_OP_ENTRY3(PSADBW, V,x, H,x, W,x, vex4 mmx avx= 2_256 p_00_66), + [0xf7] =3D X86_OP_ENTRY3(MASKMOV, None,None, V,dq, U,dq, vex4_unal av= x2_256 mmx p_00_66), + /* Incorrectly missing from 2-17 */ [0xd8] =3D X86_OP_ENTRY3(PSUBUSB, V,x, H,x, W,x, vex4 mmx avx2_256 p= _00_66), [0xd9] =3D X86_OP_ENTRY3(PSUBUSW, V,x, H,x, W,x, vex4 mmx avx2_256 p= _00_66), @@ -710,6 +760,9 @@ static bool decode_op(DisasContext *s, CPUX86State *env= , X86DecodedInsn *decode, } goto get_modrm; =20 + case X86_TYPE_WM: /* modrm byte selects an XMM/YMM memory operand */ + op->unit =3D X86_OP_SSE; + /* fall through */ case X86_TYPE_M: /* modrm byte selects a memory operand */ modrm =3D get_modrm(s, env); if ((modrm >> 6) =3D=3D 3) { diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index 5fb68a365c..c248b089b7 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -47,6 +47,7 @@ typedef enum X86OpType { X86_TYPE_Y, /* string destination */ =20 /* Custom */ + X86_TYPE_WM, /* modrm byte selects an XMM/YMM memory operand */ X86_TYPE_2op, /* 2-operand RMW instruction */ X86_TYPE_LoBits, /* encoded in bits 0-2 of the operand + REX.B */ X86_TYPE_0, /* Hard-coded GPRs (RAX..RDI) */ diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 062d8213b3..0e0783416b 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -471,6 +471,7 @@ static void gen_##uname(DisasContext *s, CPUX86State *e= nv, X86DecodedInsn *decod } HORIZONTAL_FP_SSE(VHADD, hadd) HORIZONTAL_FP_SSE(VHSUB, hsub) +HORIZONTAL_FP_SSE(VADDSUB, addsub) =20 #define BINARY_INT_GVEC(uname, func, ...) = \ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ @@ -485,6 +486,7 @@ static void gen_##uname(DisasContext *s, CPUX86State *e= nv, X86DecodedInsn *decod BINARY_INT_GVEC(PADDB, tcg_gen_gvec_add, MO_8) BINARY_INT_GVEC(PADDW, tcg_gen_gvec_add, MO_16) BINARY_INT_GVEC(PADDD, tcg_gen_gvec_add, MO_32) +BINARY_INT_GVEC(PADDQ, tcg_gen_gvec_add, MO_64) BINARY_INT_GVEC(PADDSB, tcg_gen_gvec_ssadd, MO_8) BINARY_INT_GVEC(PADDSW, tcg_gen_gvec_ssadd, MO_16) BINARY_INT_GVEC(PADDUSB, tcg_gen_gvec_usadd, MO_8) @@ -500,6 +502,7 @@ BINARY_INT_GVEC(PMAXSW, tcg_gen_gvec_smax, MO_16) BINARY_INT_GVEC(PMAXUB, tcg_gen_gvec_umax, MO_8) BINARY_INT_GVEC(PMINSW, tcg_gen_gvec_smin, MO_16) BINARY_INT_GVEC(PMINUB, tcg_gen_gvec_umin, MO_8) +BINARY_INT_GVEC(PMULLW, tcg_gen_gvec_mul, MO_16) BINARY_INT_GVEC(POR, tcg_gen_gvec_or, MO_64) BINARY_INT_GVEC(PSUBB, tcg_gen_gvec_sub, MO_8) BINARY_INT_GVEC(PSUBW, tcg_gen_gvec_sub, MO_16) @@ -557,6 +560,23 @@ BINARY_INT_MMX(PUNPCKHWD, punpckhwd) BINARY_INT_MMX(PUNPCKHDQ, punpckhdq) BINARY_INT_MMX(PACKSSDW, packssdw) =20 +BINARY_INT_MMX(PAVGB, pavgb) +BINARY_INT_MMX(PAVGW, pavgw) +BINARY_INT_MMX(PMADDWD, pmaddwd) +BINARY_INT_MMX(PMULHUW, pmulhuw) +BINARY_INT_MMX(PMULHW, pmulhw) +BINARY_INT_MMX(PMULUDQ, pmuludq) +BINARY_INT_MMX(PSADBW, psadbw) + +BINARY_INT_MMX(PSLLW_r, psllw) +BINARY_INT_MMX(PSLLD_r, pslld) +BINARY_INT_MMX(PSLLQ_r, psllq) +BINARY_INT_MMX(PSRLW_r, psrlw) +BINARY_INT_MMX(PSRLD_r, psrld) +BINARY_INT_MMX(PSRLQ_r, psrlq) +BINARY_INT_MMX(PSRAW_r, psraw) +BINARY_INT_MMX(PSRAD_r, psrad) + /* Instructions with no MMX equivalent. */ #define BINARY_INT_SSE(uname, lname) = \ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ @@ -588,6 +608,9 @@ static void gen_##uname(DisasContext *s, CPUX86State *e= nv, X86DecodedInsn *decod gen_helper_##lname##_ymm); = \ } =20 +UNARY_INT_SSE(VCVTDQ2PD, cvtdq2pd) +UNARY_INT_SSE(VCVTPD2DQ, cvtpd2dq) +UNARY_INT_SSE(VCVTTPD2DQ, cvttpd2dq) UNARY_INT_SSE(VCVTDQ2PS, cvtdq2ps) UNARY_INT_SSE(VCVTPS2DQ, cvtps2dq) UNARY_INT_SSE(VCVTTPS2DQ, cvttps2dq) @@ -802,6 +825,19 @@ static void gen_INSERTQ_r(DisasContext *s, CPUX86State= *env, X86DecodedInsn *dec gen_helper_insertq_r(cpu_env, OP_PTR0, OP_PTR2); } =20 +static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]); + gen_extu(s->aflag, s->A0); + gen_add_A0_ds_seg(s); + + if (s->prefix & PREFIX_DATA) { + gen_helper_maskmov_xmm(cpu_env, OP_PTR1, OP_PTR2, s->A0); + } else { + gen_helper_maskmov_mmx(cpu_env, OP_PTR1, OP_PTR2, s->A0); + } +} + static void gen_MOVBE(DisasContext *s, CPUX86State *env, X86DecodedInsn *d= ecode) { MemOp ot =3D decode->op[0].ot; @@ -876,16 +912,27 @@ static void gen_MOVQ(DisasContext *s, CPUX86State *en= v, X86DecodedInsn *decode) int lo_ofs =3D vector_elem_offset(&decode->op[0], MO_64, 0); =20 tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[2].offset); - /* - * tcg_gen_gvec_dup_i64(MO_64, op0.offset, 8, vec_len, s->tmp1_64) wou= ld - * seem to work, but it does not on big-endian platforms; the cleared = parts - * are always at higher addresses, but cross-endian emulation inverts = the - * byte order so that the cleared parts need to be at *lower* addresse= s. - * Because oprsz is 8, we see this here even for SSE; but more in gene= ral, - * it disqualifies using oprsz < maxsz to emulate VEX128. - */ - tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, lo_ofs); + if (decode->op[0].has_ea) { + gen_op_st_v(s, MO_64, s->tmp1_i64, s->A0); + } else { + /* + * tcg_gen_gvec_dup_i64(MO_64, op0.offset, 8, vec_len, s->tmp1_64)= would + * seem to work, but it does not on big-endian platforms; the clea= red parts + * are always at higher addresses, but cross-endian emulation inve= rts the + * byte order so that the cleared parts need to be at *lower* addr= esses. + * Because oprsz is 8, we see this here even for SSE; but more in = general, + * it disqualifies using oprsz < maxsz to emulate VEX128. + */ + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len= , 0); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, lo_ofs); + } +} + +static void gen_MOVq_dq(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + gen_helper_enter_mmx(cpu_env); + /* Otherwise the same as any other movq. */ + return gen_MOVQ(s, env, decode); } =20 static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) @@ -939,6 +986,16 @@ static void gen_PEXT(DisasContext *s, CPUX86State *env= , X86DecodedInsn *decode) gen_helper_pext(s->T0, s->T0, s->T1); } =20 +static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) +{ + if (s->prefix & PREFIX_DATA) { + gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, OP_PTR2); + } else { + gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, OP_PTR2); + } + tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); +} + static void gen_PSHUFW(DisasContext *s, CPUX86State *env, X86DecodedInsn *= decode) { TCGv_i32 imm =3D tcg_constant8u_i32(decode->immediate); diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 4d0bf511fa..2366fd4211 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4696,7 +4696,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) #endif if (use_new && ((b >=3D 0x150 && b <=3D 0x17f) || - (b >=3D 0x1d8 && b <=3D 0x1ff && (b & 8)))) { + (b >=3D 0x1d0 && b <=3D 0x1ff))) { disas_insn_new(s, cpu, b + 0x100); return s->pc; } --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663713578; cv=none; d=zohomail.com; s=zohoarc; b=j5b1YLjbyx5oHg8bFjctJuRfurU5zW7aUFOxlrAf/7Wll8fceU7mOXLkXkaiiXWUayHnd7Mnt7xUApNW5PIdmbaxFCTeCF1v1eJq2fYhZvRYw8x/Z7Csr3fAvuVtxuA9olURP0k84WXEjGHFtyfod10vrtVmDRpwFoy9da1f0zQ= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663713578; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=v9Lnu9mvyjwzq9QbOxVjShbFP29UAYt4sG34U5TM//s=; b=WjSjgHXA9VZGUeUAJAw13Mt1Q7GcGGg85hXNqZUk7ma4truwglbKX/xvlD5WaWC1L9i2tWEsn2mnRgxNo2YLxGr9rtUF3TPuk55JT6T9B4QwgzAa9QUt9joHZVd5xQ1YYlzIxpH13aNpNB+2ENcSDnQ32QL8KEtITgNU8gr/9eo= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663713578040299.5798525128023; Tue, 20 Sep 2022 15:39:38 -0700 (PDT) Received: from localhost ([::1]:57156 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oaltw-0000gv-St for importer@patchew.org; Tue, 20 Sep 2022 18:39:36 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:57426) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0L-0006Su-Hy for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:57 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:24911) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0I-0002Dy-Ot for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:53 -0400 Received: from mail-ej1-f70.google.com (mail-ej1-f70.google.com [209.85.218.70]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-114-9CXht9ZFPUuihAQ-UGw4Mw-1; Tue, 20 Sep 2022 13:25:49 -0400 Received: by mail-ej1-f70.google.com with SMTP id qb30-20020a1709077e9e00b0077d1271283eso1784704ejc.2 for ; Tue, 20 Sep 2022 10:25:48 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id h23-20020a170906399700b00773f3cb67ffsm197178eje.28.2022.09.20.10.25.46 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:46 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694750; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=v9Lnu9mvyjwzq9QbOxVjShbFP29UAYt4sG34U5TM//s=; b=QD94fVdwymNCc8YWr8BtdWLBZjE1SPZJ70Op9YIaLdqZ5a5JKS2zWeISi4wpuhfnk02e8g V0xZ9J7jXoh2Bz+EidHqH1RvmTU+FqEj/xZtZwYqXtt+L9VJ7x5RGYwmv+mV4YauL8Mt6w azCuIBu3ojxVWajM/9QrPW/eqjv5Fuo= X-MC-Unique: 9CXht9ZFPUuihAQ-UGw4Mw-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=v9Lnu9mvyjwzq9QbOxVjShbFP29UAYt4sG34U5TM//s=; b=CMocuOlN/iqAh+l/Ettz7ptOMjShfLmRtbr46au89b+cDA/cFS9l8vjMzgRFBsIP0Z 4O/Jv0GC+4UdSNjLil2GAVrtovIamCDb1MolCAHv0TbisioSjyK1kFfKS/3t5EvPLPmy Doz/ihdT+le62vISADKIWe0BeQX7CCc75AjjwQAthg7FtTSbknoIaN6PXkE4e/RccpE2 ejCa98TVLslNbKkIJsoawzNwN6XQfVfxuCjRCWqRJrp+XA7/jpWbpYQ1N8/N9RyfymaY WxSa52PYQfOx7yE6nfdZgineSMVxdK0cPPYAieWnEF2oGyQAKVQgw1dyJ6EHFi9Jd2qR 3HpA== X-Gm-Message-State: ACrzQf2aZRKg/aVBhTVakJtnl8NZJAd1f6Tccj33uujDuwFZZ0BugDUd SNY8gYv8XLdtdJiJOPMbw20EujpOPNfqgf9+RehzHnMpoCxnDMu413Zk+PfqSWD8e/l6IhGcwNS 3O3njaSzX+CpYZkQbVV+zUnpGgqUSkDO1J7vFg6GbcTDg6Sw+tFTp2nONWsyW5lV/4AM= X-Received: by 2002:a17:906:9b8f:b0:77b:2b61:ae2d with SMTP id dd15-20020a1709069b8f00b0077b2b61ae2dmr17442068ejc.50.1663694747565; Tue, 20 Sep 2022 10:25:47 -0700 (PDT) X-Google-Smtp-Source: AMsMyM4tTU8Y48HmCCvLb9E4lnxYN+q9w8HH1jG+1B4s8W8Fkjvzv18KmXxygeQ437c2ogixKc390A== X-Received: by 2002:a17:906:9b8f:b0:77b:2b61:ae2d with SMTP id dd15-20020a1709069b8f00b0077b2b61ae2dmr17442046ejc.50.1663694747250; Tue, 20 Sep 2022 10:25:47 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 25/37] target/i386: clarify (un)signedness of immediates from 0F3Ah opcodes Date: Tue, 20 Sep 2022 19:24:55 +0200 Message-Id: <20220920172507.95568-26-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663713578853100001 Content-Type: text/plain; charset="utf-8" Three-byte opcodes from the 0F3Ah area all have an immediate byte which is usually unsigned. Clarify in the helper code that it is unsigned; the new decoder treats immediates as signed by default, and seeing an intN_t in the prototype might give the wrong impression that one can use decode->immediate directly. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/ops_sse.h | 8 ++++---- target/i386/ops_sse_header.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 090ba013b3..e7830ff277 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -1605,17 +1605,17 @@ SSE_HELPER_W(helper_psignw, FSIGNW) SSE_HELPER_L(helper_psignd, FSIGNL) =20 void glue(helper_palignr, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, - int32_t shift) + uint32_t imm) { int i; =20 /* XXX could be checked during translation */ - if (shift >=3D (SHIFT ? 32 : 16)) { + if (imm >=3D (SHIFT ? 32 : 16)) { for (i =3D 0; i < (1 << SHIFT); i++) { d->Q(i) =3D 0; } } else { - shift <<=3D 3; + int shift =3D imm * 8; #define SHR(v, i) (i < 64 && i > -64 ? i > 0 ? v >> (i) : (v << -(i)) : 0) #if SHIFT =3D=3D 0 d->Q(0) =3D SHR(s->Q(0), shift - 0) | @@ -2093,7 +2093,7 @@ static inline int pcmp_val(Reg *r, uint8_t ctrl, int = i) } =20 static inline unsigned pcmpxstrx(CPUX86State *env, Reg *d, Reg *s, - int8_t ctrl, int valids, int validd) + uint8_t ctrl, int valids, int validd) { unsigned int res =3D 0; int v; diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index 440f1c0e78..98178be148 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -335,7 +335,7 @@ DEF_HELPER_4(glue(pshufb, SUFFIX), void, env, Reg, Reg,= Reg) DEF_HELPER_4(glue(psignb, SUFFIX), void, env, Reg, Reg, Reg) DEF_HELPER_4(glue(psignw, SUFFIX), void, env, Reg, Reg, Reg) DEF_HELPER_4(glue(psignd, SUFFIX), void, env, Reg, Reg, Reg) -DEF_HELPER_5(glue(palignr, SUFFIX), void, env, Reg, Reg, Reg, s32) +DEF_HELPER_5(glue(palignr, SUFFIX), void, env, Reg, Reg, Reg, i32) =20 /* SSE4.1 op helpers */ #if SHIFT >=3D 1 --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663712549; cv=none; d=zohomail.com; s=zohoarc; b=hV1seyx3dt/6C39fu/chreMXq2bjBgBFAXejw9i61dG7toJkZrjuCmpzfEjZ/+NMlyr4+Skg+f3UIegKaxorjmdbgX29lVeX1uhNF23B8xWP4U33SIOQnpUQstM8PuoRXykbB/S5JFiMOYOIEP3V6usERWJhPr09LzkaWZoEAz0= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663712549; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=sDJqAb6a5pfeDt1eLTOtA+KyzkZgubtGC2La834bVzs=; b=nqOEkojrdgr12DYWlMMxVcICoChfmbf6re6FMvcXG4qN9lgQioAvenO7zD9DghJ3jA1MIhYDjGA18Lfm0Ba4K7HLAZM2bSKAquCiN1eUdRlXxzMe7T1EX2XPgAg/Q8rKNb8jJ+j2jvZF7ZjSVwEphSAbObbGWkKAKnWYntabAhQ= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663712549251883.5896026589276; Tue, 20 Sep 2022 15:22:29 -0700 (PDT) Received: from localhost ([::1]:34616 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oaldM-0001o7-1U for importer@patchew.org; Tue, 20 Sep 2022 18:22:28 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:39750) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0N-0006T4-Pv for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:59 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:23026) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0K-0002Ea-UT for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:55 -0400 Received: from mail-ed1-f72.google.com (mail-ed1-f72.google.com [209.85.208.72]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-482-6a0VzYeEMiOID2vhio1_DA-1; Tue, 20 Sep 2022 13:25:51 -0400 Received: by mail-ed1-f72.google.com with SMTP id y14-20020a056402440e00b0044301c7ccd9so2348247eda.19 for ; Tue, 20 Sep 2022 10:25:50 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id k9-20020a17090632c900b0076f08f6b563sm188160ejk.65.2022.09.20.10.25.47 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:48 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694752; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=sDJqAb6a5pfeDt1eLTOtA+KyzkZgubtGC2La834bVzs=; b=OqhGL4HM6n38NZWd597bNIqvGPG80MjrAbjbTCkahuKxrgS8tO2ewsM+Q4fnRBzjGIqSpk uEiL18qouEqi0k+CvVIscRbHCpOYZ7sxldjRAnYizTGXISVRzLca50FiViQ/C+OmoVeFCA x+mOAZlO4wUCDKIMUaXSbgjqs94n6Us= X-MC-Unique: 6a0VzYeEMiOID2vhio1_DA-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=sDJqAb6a5pfeDt1eLTOtA+KyzkZgubtGC2La834bVzs=; b=lRHziFopXgP6c4F/ZEM0uFi11QycJEAd1mwT+02pRYAR/QhPFyUC3NK8vHpc92wXmI Tdsa8loavLpt82ruvovYwFnYsiUe4mhrEVHFtiAr82Q52vWNbRSEAEVKL5Zf76hYViMK y8kGJvT+gScldQy7hr7Cegei8ubCnac/D1700pJOTG3gGu9lbv1DFJPrcqT67Rz9gNyr WVxkRF/gkyay+Pf6yYytIBEQVR1k77ACbJPBq/VrgvPZsYMDQh+0vGUt20Lv4yup5Qr7 auTj88arw6EBy8QJ7NKyvcxhEV5mRPYnsChECc5sH0BSg413eTp8uaBCLsVKw0JYfEe/ tRzQ== X-Gm-Message-State: ACrzQf1PazTLAVwebiuPnUDPS4tvUU/Nnxl4fMfztzfHOVHWw4oQkDt3 jJv/8BVFqYBGpeAI+as2pQFhek5xDXjZ2Fg75Xhy2FxjIwv9quLkMcZxCZO0CbSNLU2T8X/BRdU 0KBo/zny0L1wGlu0MBnxj6XRhxaolCFeCA2/fjQhgQytPDaHS2nv8vWiJDpK3rK1ZWho= X-Received: by 2002:a17:907:1c29:b0:77d:89d8:f7b2 with SMTP id nc41-20020a1709071c2900b0077d89d8f7b2mr17976136ejc.639.1663694749402; Tue, 20 Sep 2022 10:25:49 -0700 (PDT) X-Google-Smtp-Source: AMsMyM6k1qRDueA9LGkBUOxgfB5BYckE8BWZ08ZKEfF2+2OIORLI/IVx4eQD5bs4KPi8ck1EYJmLSA== X-Received: by 2002:a17:907:1c29:b0:77d:89d8:f7b2 with SMTP id nc41-20020a1709071c2900b0077d89d8f7b2mr17976098ejc.639.1663694748756; Tue, 20 Sep 2022 10:25:48 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 26/37] target/i386: reimplement 0x0f 0x3a, add AVX Date: Tue, 20 Sep 2022 19:24:56 +0200 Message-Id: <20220920172507.95568-27-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663712550006100001 Content-Type: text/plain; charset="utf-8" The more complicated operations here are insertions and extractions. Otherwise, there are just more entries than usual because the PS/PD/SS/SD variations are encoded in the opcode rater than in the prefixes. These three-byte opcodes also include AVX new instructions, whose implementation in the helpers was originally done by Paul Brook . Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/ops_sse.h | 95 ++++++++++ target/i386/ops_sse_header.h | 10 + target/i386/tcg/decode-new.c.inc | 75 ++++++++ target/i386/tcg/emit.c.inc | 309 +++++++++++++++++++++++++++++++ target/i386/tcg/translate.c | 3 +- 5 files changed, 491 insertions(+), 1 deletion(-) diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index e7830ff277..cb8909adcf 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -2381,6 +2381,101 @@ void glue(helper_aeskeygenassist, SUFFIX)(CPUX86Sta= te *env, Reg *d, Reg *s, #endif #endif =20 +#if SHIFT >=3D 1 +void glue(helper_vpermilpd_imm, SUFFIX)(Reg *d, Reg *s, uint32_t order) +{ + uint64_t r0, r1; + int i; + + for (i =3D 0; i < 1 << SHIFT; i +=3D 2) { + r0 =3D s->Q(i + ((order >> 0) & 1)); + r1 =3D s->Q(i + ((order >> 1) & 1)); + d->Q(i) =3D r0; + d->Q(i+1) =3D r1; + + order >>=3D 2; + } +} + +void glue(helper_vpermilps_imm, SUFFIX)(Reg *d, Reg *s, uint32_t order) +{ + uint32_t r0, r1, r2, r3; + int i; + + for (i =3D 0; i < 2 << SHIFT; i +=3D 4) { + r0 =3D s->L(i + ((order >> 0) & 3)); + r1 =3D s->L(i + ((order >> 2) & 3)); + r2 =3D s->L(i + ((order >> 4) & 3)); + r3 =3D s->L(i + ((order >> 6) & 3)); + d->L(i) =3D r0; + d->L(i+1) =3D r1; + d->L(i+2) =3D r2; + d->L(i+3) =3D r3; + } +} + +#if SHIFT >=3D 2 +void helper_vpermdq_ymm(Reg *d, Reg *v, Reg *s, uint32_t order) +{ + uint64_t r0, r1, r2, r3; + + switch (order & 3) { + case 0: + r0 =3D v->Q(0); + r1 =3D v->Q(1); + break; + case 1: + r0 =3D v->Q(2); + r1 =3D v->Q(3); + break; + case 2: + r0 =3D s->Q(0); + r1 =3D s->Q(1); + break; + case 3: + r0 =3D s->Q(2); + r1 =3D s->Q(3); + break; + } + switch ((order >> 4) & 3) { + case 0: + r2 =3D v->Q(0); + r3 =3D v->Q(1); + break; + case 1: + r2 =3D v->Q(2); + r3 =3D v->Q(3); + break; + case 2: + r2 =3D s->Q(0); + r3 =3D s->Q(1); + break; + case 3: + r2 =3D s->Q(2); + r3 =3D s->Q(3); + break; + } + d->Q(0) =3D r0; + d->Q(1) =3D r1; + d->Q(2) =3D r2; + d->Q(3) =3D r3; +} + +void helper_vpermq_ymm(Reg *d, Reg *s, uint32_t order) +{ + uint64_t r0, r1, r2, r3; + r0 =3D s->Q(order & 3); + r1 =3D s->Q((order >> 2) & 3); + r2 =3D s->Q((order >> 4) & 3); + r3 =3D s->Q((order >> 6) & 3); + d->Q(0) =3D r0; + d->Q(1) =3D r1; + d->Q(2) =3D r2; + d->Q(3) =3D r3; +} +#endif +#endif + #undef SSE_HELPER_S =20 #undef LANE_WIDTH diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index 98178be148..1afc4ff6a1 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -411,6 +411,16 @@ DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env,= Reg, Reg, i32) DEF_HELPER_5(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, Reg, i32) #endif =20 +/* AVX helpers */ +#if SHIFT >=3D 1 +DEF_HELPER_3(glue(vpermilpd_imm, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(vpermilps_imm, SUFFIX), void, Reg, Reg, i32) +#if SHIFT =3D=3D 2 +DEF_HELPER_4(vpermdq_ymm, void, Reg, Reg, Reg, i32) +DEF_HELPER_3(vpermq_ymm, void, Reg, Reg, i32) +#endif +#endif + #undef SHIFT #undef Reg #undef SUFFIX diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index 0bc41b01c9..a1f0d7a24c 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -375,7 +375,78 @@ static void decode_0F38(DisasContext *s, CPUX86State *= env, X86OpEntry *entry, ui } } =20 +static void decode_VINSERTPS(DisasContext *s, CPUX86State *env, X86OpEntry= *entry, uint8_t *b) +{ + static const X86OpEntry + vinsertps_reg =3D X86_OP_ENTRY4(VINSERTPS_r, V,dq, H,dq, U,dq, vex= 5 cpuid(SSE41) p_66), + vinsertps_mem =3D X86_OP_ENTRY4(VINSERTPS_m, V,dq, H,dq, M,d, vex= 5 cpuid(SSE41) p_66); + + int modrm =3D get_modrm(s, env); + *entry =3D (modrm >> 6) =3D=3D 3 ? vinsertps_reg : vinsertps_mem; +} + static const X86OpEntry opcodes_0F3A[256] =3D { + /* + * These are VEX-only, but incorrectly listed in the manual as excepti= on type 4. + * Also the "qq" instructions are sometimes omitted by Table 2-17, but= are VEX256 + * only. + */ + [0x00] =3D X86_OP_ENTRY3(VPERMQ, V,qq, W,qq, I,b, vex6 cpuid(AVX= 2) p_66), + [0x01] =3D X86_OP_ENTRY3(VPERMQ, V,qq, W,qq, I,b, vex6 cpuid(AVX= 2) p_66), /* VPERMPD */ + [0x02] =3D X86_OP_ENTRY4(VBLENDPS, V,x, H,x, W,x, vex6 cpuid(AVX= 2) p_66), /* VPBLENDD */ + [0x04] =3D X86_OP_ENTRY3(VPERMILPS_i, V,x, W,x, I,b, vex6 cpuid(AVX= ) p_66), + [0x05] =3D X86_OP_ENTRY3(VPERMILPD_i, V,x, W,x, I,b, vex6 cpuid(AVX= ) p_66), + [0x06] =3D X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 cpuid(AVX= ) p_66), + + [0x14] =3D X86_OP_ENTRY3(PEXTRB, E,b, V,dq, I,b, vex5 cpuid(SSE4= 1) zext0 p_66), + [0x15] =3D X86_OP_ENTRY3(PEXTRW, E,w, V,dq, I,b, vex5 cpuid(SSE4= 1) zext0 p_66), + [0x16] =3D X86_OP_ENTRY3(PEXTR, E,y, V,dq, I,b, vex5 cpuid(SSE4= 1) p_66), + [0x17] =3D X86_OP_ENTRY3(VEXTRACTPS, E,d, V,dq, I,b, vex5 cpuid(SSE4= 1) p_66), + + [0x20] =3D X86_OP_ENTRY4(PINSRB, V,dq, H,dq, E,b, vex5 cpuid(SSE4= 1) zext2 p_66), + [0x21] =3D X86_OP_GROUP0(VINSERTPS), + [0x22] =3D X86_OP_ENTRY4(PINSR, V,dq, H,dq, E,y, vex5 cpuid(SSE4= 1) p_66), + + [0x40] =3D X86_OP_ENTRY4(VDDPS, V,x, H,x, W,x, vex2 cpuid(SSE4= 1) p_66), + [0x41] =3D X86_OP_ENTRY4(VDDPD, V,dq, H,dq, W,dq, vex2 cpuid(SSE4= 1) p_66), + [0x42] =3D X86_OP_ENTRY4(VMPSADBW, V,x, H,x, W,x, vex2 cpuid(SSE4= 1) avx2_256 p_66), + [0x44] =3D X86_OP_ENTRY4(PCLMULQDQ, V,dq, H,dq, W,dq, vex4 cpuid(PCLM= ULQDQ) p_66), + [0x46] =3D X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 cpuid(AVX2= ) p_66), + + [0x60] =3D X86_OP_ENTRY4(PCMPESTRM, None,None, V,dq, W,dq, vex4_unal = cpuid(SSE42) p_66), + [0x61] =3D X86_OP_ENTRY4(PCMPESTRI, None,None, V,dq, W,dq, vex4_unal = cpuid(SSE42) p_66), + [0x62] =3D X86_OP_ENTRY4(PCMPISTRM, None,None, V,dq, W,dq, vex4_unal = cpuid(SSE42) p_66), + [0x63] =3D X86_OP_ENTRY4(PCMPISTRI, None,None, V,dq, W,dq, vex4_unal = cpuid(SSE42) p_66), + + [0x08] =3D X86_OP_ENTRY3(VROUNDPS, V,x, W,x, I,b, vex2 cpuid(SSE4= 1) p_66), + [0x09] =3D X86_OP_ENTRY3(VROUNDPD, V,x, W,x, I,b, vex2 cpuid(SSE4= 1) p_66), + /* + * Not listed as four operand in the manual. Also writes and reads 12= 8-bits + * from the first two operands due to the V operand picking higher ent= ries of + * the H operand; the "Vss,Hss,Wss" description from the manual is inc= orrect. + * For other unary operations such as VSQRTSx this is hidden by the "R= EPScalar" + * value of vex_special, because the table lists the operand types of = VSQRTPx. + */ + [0x0a] =3D X86_OP_ENTRY4(VROUNDSS, V,x, H,x, W,ss, vex3 cpuid(SSE41= ) p_66), + [0x0b] =3D X86_OP_ENTRY4(VROUNDSD, V,x, H,x, W,sd, vex3 cpuid(SSE41= ) p_66), + [0x0c] =3D X86_OP_ENTRY4(VBLENDPS, V,x, H,x, W,x, vex4 cpuid(SSE4= 1) p_66), + [0x0d] =3D X86_OP_ENTRY4(VBLENDPD, V,x, H,x, W,x, vex4 cpuid(SSE4= 1) p_66), + [0x0e] =3D X86_OP_ENTRY4(VPBLENDW, V,x, H,x, W,x, vex4 cpuid(SSE4= 1) p_66), + [0x0f] =3D X86_OP_ENTRY4(PALIGNR, V,x, H,x, W,x, vex4 cpuid(SSSE= 3) mmx p_00_66), + + [0x18] =3D X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 cpuid(AV= X) p_66), + [0x19] =3D X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 cpuid(AV= X) p_66), + + [0x38] =3D X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 cpuid(AV= X2) p_66), + [0x39] =3D X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 cpuid(AV= X2) p_66), + + /* Listed incorrectly as type 4 */ + [0x4a] =3D X86_OP_ENTRY4(VBLENDVPS, V,x, H,x, W,x, vex6 cpuid(AVX)= p_66), + [0x4b] =3D X86_OP_ENTRY4(VBLENDVPD, V,x, H,x, W,x, vex6 cpuid(AVX)= p_66), + [0x4c] =3D X86_OP_ENTRY4(VPBLENDVB, V,x, H,x, W,x, vex6 cpuid(AVX)= p_66 avx2_256), + + [0xdf] =3D X86_OP_ENTRY3(VAESKEYGEN, V,dq, W,dq, I,b, vex4 cpuid(AES)= p_66), + [0xF0] =3D X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2), }; =20 @@ -916,6 +987,10 @@ static bool decode_insn(DisasContext *s, CPUX86State *= env, X86DecodeFunc decode_ } } if (e->op3 !=3D X86_TYPE_None) { + /* + * A couple instructions actually use the extra immediate byte for= an Lx + * register operand; those are handled in the gen_* functions as o= ne off. + */ assert(e->op3 =3D=3D X86_TYPE_I && e->s3 =3D=3D X86_SIZE_b); s->rip_offset +=3D 1; } diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 0e0783416b..c5e90111a9 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -473,6 +473,55 @@ HORIZONTAL_FP_SSE(VHADD, hadd) HORIZONTAL_FP_SSE(VHSUB, hsub) HORIZONTAL_FP_SSE(VADDSUB, addsub) =20 +static inline void gen_ternary_sse(DisasContext *s, CPUX86State *env, X86D= ecodedInsn *decode, + int op3, SSEFunc_0_epppp xmm, SSEFunc_0= _epppp ymm) +{ + SSEFunc_0_epppp fn =3D s->vex_l ? ymm : xmm; + TCGv_ptr ptr3 =3D tcg_temp_new_ptr(); + + /* The format of the fourth input is Lx */ + tcg_gen_addi_ptr(ptr3, cpu_env, ZMM_OFFSET(op3)); + fn(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2, ptr3); + tcg_temp_free_ptr(ptr3); +} +#define TERNARY_SSE(uvname, lname) = \ +static void gen_##uvname(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) \ +{ = \ + gen_ternary_sse(s, env, decode, (uint8_t)decode->immediate >> 4, = \ + gen_helper_##lname##_xmm, gen_helper_##lname##_ymm); = \ +} +TERNARY_SSE(VBLENDVPS, blendvps) +TERNARY_SSE(VBLENDVPD, blendvpd) +TERNARY_SSE(VPBLENDVB, pblendvb) + +static inline void gen_binary_imm_sse(DisasContext *s, CPUX86State *env, X= 86DecodedInsn *decode, + SSEFunc_0_epppi xmm, SSEFunc_0_epppi= ymm) +{ + TCGv_i32 imm =3D tcg_constant8u_i32(decode->immediate); + if (!s->vex_l) { + xmm(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); + } else { + ymm(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); + } +} + +#define BINARY_IMM_SSE(uname, lname) = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + gen_binary_imm_sse(s, env, decode, = \ + gen_helper_##lname##_xmm, = \ + gen_helper_##lname##_ymm); = \ +} + +BINARY_IMM_SSE(VBLENDPD, blendpd) +BINARY_IMM_SSE(VBLENDPS, blendps) +BINARY_IMM_SSE(VPBLENDW, pblendw) +BINARY_IMM_SSE(VDDPS, dpps) +#define gen_helper_dppd_ymm NULL +BINARY_IMM_SSE(VDDPD, dppd) +BINARY_IMM_SSE(VMPSADBW, mpsadbw) +BINARY_IMM_SSE(PCLMULQDQ, pclmulqdq) + #define BINARY_INT_GVEC(uname, func, ...) = \ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ { = \ @@ -638,6 +687,32 @@ static void gen_##uname(DisasContext *s, CPUX86State *= env, X86DecodedInsn *decod UNARY_IMM_SSE(PSHUFD, pshufd) UNARY_IMM_SSE(PSHUFHW, pshufhw) UNARY_IMM_SSE(PSHUFLW, pshuflw) +#define gen_helper_vpermq_xmm NULL +UNARY_IMM_SSE(VPERMQ, vpermq) +UNARY_IMM_SSE(VPERMILPS_i, vpermilps_imm) +UNARY_IMM_SSE(VPERMILPD_i, vpermilpd_imm) + +static inline void gen_unary_imm_fp_sse(DisasContext *s, CPUX86State *env,= X86DecodedInsn *decode, + SSEFunc_0_eppi xmm, SSEFunc_0_eppi= ymm) +{ + TCGv_i32 imm =3D tcg_constant8u_i32(decode->immediate); + if (!s->vex_l) { + xmm(cpu_env, OP_PTR0, OP_PTR1, imm); + } else { + ymm(cpu_env, OP_PTR0, OP_PTR1, imm); + } +} + +#define UNARY_IMM_FP_SSE(uname, lname) = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + gen_unary_imm_fp_sse(s, env, decode, = \ + gen_helper_##lname##_xmm, = \ + gen_helper_##lname##_ymm); = \ +} + +UNARY_IMM_FP_SSE(VROUNDPS, roundps) +UNARY_IMM_FP_SSE(VROUNDPD, roundpd) =20 static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_= op) { @@ -958,6 +1033,18 @@ static void gen_MULX(DisasContext *s, CPUX86State *en= v, X86DecodedInsn *decode) =20 } =20 +static void gen_PALIGNR(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + TCGv_i32 imm =3D tcg_constant8u_i32(decode->immediate); + if (!(s->prefix & PREFIX_DATA)) { + gen_helper_palignr_mmx(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); + } else if (!s->vex_l) { + gen_helper_palignr_xmm(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); + } else { + gen_helper_palignr_ymm(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); + } +} + static void gen_PANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *d= ecode) { int vec_len =3D vector_len(s, decode); @@ -968,6 +1055,42 @@ static void gen_PANDN(DisasContext *s, CPUX86State *e= nv, X86DecodedInsn *decode) decode->op[1].offset, vec_len, vec_len); } =20 +static void gen_PCMPESTRI(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) +{ + TCGv_i32 imm =3D tcg_constant8u_i32(decode->immediate); + gen_helper_pcmpestri_xmm(cpu_env, OP_PTR1, OP_PTR2, imm); + set_cc_op(s, CC_OP_EFLAGS); +} + +static void gen_PCMPESTRM(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) +{ + TCGv_i32 imm =3D tcg_constant8u_i32(decode->immediate); + gen_helper_pcmpestrm_xmm(cpu_env, OP_PTR1, OP_PTR2, imm); + set_cc_op(s, CC_OP_EFLAGS); + if ((s->prefix & PREFIX_VEX) && !s->vex_l) { + tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_regs[0].ZMM_= X(1)), + 16, 16, 0); + } +} + +static void gen_PCMPISTRI(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) +{ + TCGv_i32 imm =3D tcg_constant8u_i32(decode->immediate); + gen_helper_pcmpistri_xmm(cpu_env, OP_PTR1, OP_PTR2, imm); + set_cc_op(s, CC_OP_EFLAGS); +} + +static void gen_PCMPISTRM(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) +{ + TCGv_i32 imm =3D tcg_constant8u_i32(decode->immediate); + gen_helper_pcmpistrm_xmm(cpu_env, OP_PTR1, OP_PTR2, imm); + set_cc_op(s, CC_OP_EFLAGS); + if ((s->prefix & PREFIX_VEX) && !s->vex_l) { + tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_regs[0].ZMM_= X(1)), + 16, 16, 0); + } +} + static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) { MemOp ot =3D decode->op[1].ot; @@ -986,6 +1109,89 @@ static void gen_PEXT(DisasContext *s, CPUX86State *en= v, X86DecodedInsn *decode) gen_helper_pext(s->T0, s->T0, s->T1); } =20 +static inline void gen_pextr(DisasContext *s, CPUX86State *env, X86Decoded= Insn *decode, MemOp ot) +{ + int vec_len =3D vector_len(s, decode); + int mask =3D (vec_len >> ot) - 1; + int val =3D decode->immediate & mask; + + switch (ot) { + case MO_8: + tcg_gen_ld8u_tl(s->T0, cpu_env, vector_elem_offset(&decode->op[1],= ot, val)); + break; + case MO_16: + tcg_gen_ld16u_tl(s->T0, cpu_env, vector_elem_offset(&decode->op[1]= , ot, val)); + break; + case MO_32: +#ifdef TARGET_X86_64 + tcg_gen_ld32u_tl(s->T0, cpu_env, vector_elem_offset(&decode->op[1]= , ot, val)); + break; + case MO_64: +#endif + tcg_gen_ld_tl(s->T0, cpu_env, vector_elem_offset(&decode->op[1], o= t, val)); + break; + default: + abort(); + } +} + +static void gen_PEXTRB(DisasContext *s, CPUX86State *env, X86DecodedInsn *= decode) +{ + gen_pextr(s, env, decode, MO_8); +} + +static void gen_PEXTRW(DisasContext *s, CPUX86State *env, X86DecodedInsn *= decode) +{ + gen_pextr(s, env, decode, MO_16); +} + +static void gen_PEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *d= ecode) +{ + MemOp ot =3D decode->op[0].ot; + gen_pextr(s, env, decode, ot); +} + +static inline void gen_pinsr(DisasContext *s, CPUX86State *env, X86Decoded= Insn *decode, MemOp ot) +{ + int vec_len =3D vector_len(s, decode); + int mask =3D (vec_len >> ot) - 1; + int val =3D decode->immediate & mask; + + if (decode->op[1].offset !=3D decode->op[0].offset) { + assert(vec_len =3D=3D 16); + gen_store_sse(s, decode, decode->op[1].offset); + } + + switch (ot) { + case MO_8: + tcg_gen_st8_tl(s->T1, cpu_env, vector_elem_offset(&decode->op[0], = ot, val)); + break; + case MO_16: + tcg_gen_st16_tl(s->T1, cpu_env, vector_elem_offset(&decode->op[0],= ot, val)); + break; + case MO_32: +#ifdef TARGET_X86_64 + tcg_gen_st32_tl(s->T1, cpu_env, vector_elem_offset(&decode->op[0],= ot, val)); + break; + case MO_64: +#endif + tcg_gen_st_tl(s->T1, cpu_env, vector_elem_offset(&decode->op[0], o= t, val)); + break; + default: + abort(); + } +} + +static void gen_PINSRB(DisasContext *s, CPUX86State *env, X86DecodedInsn *= decode) +{ + gen_pinsr(s, env, decode, MO_8); +} + +static void gen_PINSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *d= ecode) +{ + gen_pinsr(s, env, decode, decode->op[2].ot); +} + static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) { if (s->prefix & PREFIX_DATA) { @@ -1194,6 +1400,13 @@ static void gen_SHRX(DisasContext *s, CPUX86State *e= nv, X86DecodedInsn *decode) tcg_gen_shr_tl(s->T0, s->T0, s->T1); } =20 +static void gen_VAESKEYGEN(DisasContext *s, CPUX86State *env, X86DecodedIn= sn *decode) +{ + TCGv_i32 imm =3D tcg_constant8u_i32(decode->immediate); + assert(!s->vex_l); + gen_helper_aeskeygenassist_xmm(cpu_env, OP_PTR0, OP_PTR1, imm); +} + static void gen_VCVTfp2fp(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) { gen_unary_fp_sse(s, env, decode, @@ -1202,6 +1415,102 @@ static void gen_VCVTfp2fp(DisasContext *s, CPUX86St= ate *env, X86DecodedInsn *dec gen_helper_cvtsd2ss, gen_helper_cvtss2sd); } =20 +static void gen_VEXTRACTx128(DisasContext *s, CPUX86State *env, X86Decoded= Insn *decode) +{ + int mask =3D decode->immediate & 1; + int src_ofs =3D vector_elem_offset(&decode->op[1], MO_128, mask); + if (decode->op[0].has_ea) { + /* VEX-only instruction, no alignment requirements. */ + gen_sto_env_A0(s, src_ofs, false); + } else { + tcg_gen_gvec_mov(MO_64, decode->op[0].offset, src_ofs, 16, 16); + } +} + +static void gen_VEXTRACTPS(DisasContext *s, CPUX86State *env, X86DecodedIn= sn *decode) +{ + gen_pextr(s, env, decode, MO_32); +} + +static void gen_vinsertps(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) +{ + int val =3D decode->immediate; + int dest_word =3D (val >> 4) & 3; + int new_mask =3D (val & 15) | (1 << dest_word); + int vec_len =3D 16; + + assert(!s->vex_l); + + if (new_mask =3D=3D 15) { + /* All zeroes except possibly for the inserted element */ + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len= , 0); + } else if (decode->op[1].offset !=3D decode->op[0].offset) { + gen_store_sse(s, decode, decode->op[1].offset); + } + + if (new_mask !=3D (val & 15)) { + tcg_gen_st_i32(s->tmp2_i32, cpu_env, + vector_elem_offset(&decode->op[0], MO_32, dest_word= )); + } + + if (new_mask !=3D 15) { + TCGv_i32 zero =3D tcg_constant_i32(0); /* float32_zero */ + int i; + for (i =3D 0; i < 4; i++) { + if ((val >> i) & 1) { + tcg_gen_st_i32(zero, cpu_env, + vector_elem_offset(&decode->op[0], MO_32, i= )); + } + } + } +} + +static void gen_VINSERTPS_r(DisasContext *s, CPUX86State *env, X86DecodedI= nsn *decode) +{ + int val =3D decode->immediate; + tcg_gen_ld_i32(s->tmp2_i32, cpu_env, + vector_elem_offset(&decode->op[2], MO_32, (val >> 6) & = 3)); + gen_vinsertps(s, env, decode); +} + +static void gen_VINSERTPS_m(DisasContext *s, CPUX86State *env, X86DecodedI= nsn *decode) +{ + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); + gen_vinsertps(s, env, decode); +} + +static void gen_VINSERTx128(DisasContext *s, CPUX86State *env, X86DecodedI= nsn *decode) +{ + int mask =3D decode->immediate & 1; + tcg_gen_gvec_mov(MO_64, + decode->op[0].offset + offsetof(YMMReg, YMM_X(mask)), + decode->op[2].offset + offsetof(YMMReg, YMM_X(0)), 16= , 16); + tcg_gen_gvec_mov(MO_64, + decode->op[0].offset + offsetof(YMMReg, YMM_X(!mask)), + decode->op[1].offset + offsetof(YMMReg, YMM_X(!mask))= , 16, 16); +} + +static void gen_VPERM2x128(DisasContext *s, CPUX86State *env, X86DecodedIn= sn *decode) +{ + TCGv_i32 imm =3D tcg_constant8u_i32(decode->immediate); + assert(s->vex_l); + gen_helper_vpermdq_ymm(OP_PTR0, OP_PTR1, OP_PTR2, imm); +} + +static void gen_VROUNDSD(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) +{ + TCGv_i32 imm =3D tcg_constant8u_i32(decode->immediate); + assert(!s->vex_l); + gen_helper_roundsd_xmm(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); +} + +static void gen_VROUNDSS(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) +{ + TCGv_i32 imm =3D tcg_constant8u_i32(decode->immediate); + assert(!s->vex_l); + gen_helper_roundss_xmm(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); +} + static void gen_VZEROALL(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) { TCGv_ptr ptr =3D tcg_temp_new_ptr(); diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 2366fd4211..302feeaaf6 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4695,7 +4695,8 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) use_new &=3D b <=3D limit; #endif if (use_new && - ((b >=3D 0x150 && b <=3D 0x17f) || + (b =3D=3D 0x13a || + (b >=3D 0x150 && b <=3D 0x17f) || (b >=3D 0x1d0 && b <=3D 0x1ff))) { disas_insn_new(s, cpu, b + 0x100); return s->pc; --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663710368; cv=none; d=zohomail.com; s=zohoarc; b=eNK//436lbZoy+B0Q21smd28xdDfyUXLrvhAZ91tuQtTAHLl70OWB0QG6XkkQ77FYR/toe8A+Udt8ACLMkMJVf9QQkiugqWz8eMKSBMFc21UC8V8NEwy07F5lgh6vyeQt2GthFee3D6CtNJKE3XPcSogGehLAFuESe1Hap6hYC0= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663710368; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=0cKuu8KwWrHFcXenB2KvhX7Mcmnc9QSZITTwlBYZzfU=; b=n28YGIGE1E76RpJkqpgUOO91M1RfqXiVSbfIwNeNeHu1cnSTPEPAxKFPcKID4J3L1GphV3Yb3pSqT1Jwsi1hkqNwA1PeDOBUbYmRBiOtudX/hJv/xj5XEFxOd4hVp6LGyKJq8JsMzGgHJtx8v1usI+9bHfnI/BF6IA3wHlNHKMg= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 166371036847150.15936457008047; Tue, 20 Sep 2022 14:46:08 -0700 (PDT) Received: from localhost ([::1]:41338 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oal4A-0005Yb-5C for importer@patchew.org; Tue, 20 Sep 2022 17:46:06 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:57430) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0N-0006T3-8W for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:59 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:21529) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0L-0002Eh-JK for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:25:55 -0400 Received: from mail-ed1-f70.google.com (mail-ed1-f70.google.com [209.85.208.70]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-259-owTTpphMMIimfvPE4uVSDQ-1; Tue, 20 Sep 2022 13:25:51 -0400 Received: by mail-ed1-f70.google.com with SMTP id y14-20020a056402440e00b0044301c7ccd9so2348253eda.19 for ; Tue, 20 Sep 2022 10:25:51 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id ay21-20020a056402203500b0044e9601e53fsm236088edb.19.2022.09.20.10.25.49 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:49 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694752; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=0cKuu8KwWrHFcXenB2KvhX7Mcmnc9QSZITTwlBYZzfU=; b=XeQahWVQaDg2N2H41I7zgl4cIxKAu91fdn+JQsPZXOfL32LvD0J6y1p5HGatiYYfjF3r+p el1LC9mjT+D8Cr5CtMSDue0xUn4SGrW5r4qD0KLSuCvlJo1OmJbqdMQ5ToGKdxPoesHxtQ Xb1xxlRg+eDlD4Pe1TfX0iti5GQGt/0= X-MC-Unique: owTTpphMMIimfvPE4uVSDQ-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=0cKuu8KwWrHFcXenB2KvhX7Mcmnc9QSZITTwlBYZzfU=; b=LXV0wmCGHUaGRNXe5uEQ99qF6XmsVWig/Kweb+HL6/ZowekNGjFmfV238K/IbX31oX +22q5nZydoN5imNz5De/Gs7+f/+wpqkVHaMZRCl1ineM9baKan3BX5czneR03oOTGCHQ pvnbqX/CINIA5EX+XXrUsHI7nwQwJ8/HrrjcWChDAJRq8eRYDRpX4Rb5uXBeM3gxApwL p/1xaak26f/UtQ6r76uOmvfc/jbV8kUO8+x9GQTh2hOLcrzrwFX1xvPSbVw7Vj4EGx9w aqybbQRDf/73ivO+5z21ThELxSo0mLgROIxcd7og5q8NJCE60pWOYHY8jVNKb+Kwvc3U m8hg== X-Gm-Message-State: ACrzQf2RuBruFQ3T7rVj/c9g4CXVSMq4oE2nkVdD/oYuCKDsT8Zb5rm7 9/Y1eGPfpR0walrYew2KLuX7civoZNkPNZVI6JEwy6OSl4nv8qHjz8RX8mOtW/ec/EJhtEWsPI/ 9IN2CZslh1QuucbH6I321nqMSSakaick7CgpnPhJWyeS8zmk+9TwyeDh6Fxm9iLBz0Bw= X-Received: by 2002:a17:906:eece:b0:76f:5c6:2340 with SMTP id wu14-20020a170906eece00b0076f05c62340mr17899141ejb.383.1663694750091; Tue, 20 Sep 2022 10:25:50 -0700 (PDT) X-Google-Smtp-Source: AMsMyM7xZWadhcssiqJvzy1EZ2flxO52iXnlRKc0BNvkNhJJG9QRCiTIUr2s/lQwxqcDeAA92Gnprg== X-Received: by 2002:a17:906:eece:b0:76f:5c6:2340 with SMTP id wu14-20020a170906eece00b0076f05c62340mr17899117ejb.383.1663694749815; Tue, 20 Sep 2022 10:25:49 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 27/37] target/i386: Use tcg gvec ops for pmovmskb Date: Tue, 20 Sep 2022 19:24:57 +0200 Message-Id: <20220920172507.95568-28-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663710369843100001 Content-Type: text/plain; charset="utf-8" From: Richard Henderson As pmovmskb is used by strlen et al, this is the third highest overhead sse operation at %0.8. Signed-off-by: Richard Henderson [Reorganize to generate code for any vector size. - Paolo] Signed-off-by: Paolo Bonzini --- target/i386/tcg/emit.c.inc | 90 +++++++++++++++++++++++++++++++++++--- 1 file changed, 85 insertions(+), 5 deletions(-) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index c5e90111a9..5345e791b7 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1192,14 +1192,94 @@ static void gen_PINSR(DisasContext *s, CPUX86State = *env, X86DecodedInsn *decode) gen_pinsr(s, env, decode, decode->op[2].ot); } =20 +static void gen_pmovmskb_i64(TCGv_i64 d, TCGv_i64 s) +{ + TCGv_i64 t =3D tcg_temp_new_i64(); + + tcg_gen_andi_i64(d, s, 0x8080808080808080ull); + + /* + * After each shift+or pair: + * 0: a.......b.......c.......d.......e.......f.......g.......h....... + * 7: ab......bc......cd......de......ef......fg......gh......h....... + * 14: abcd....bcde....cdef....defg....efgh....fgh.....gh......h....... + * 28: abcdefghbcdefgh.cdefgh..defgh...efgh....fgh.....gh......h....... + * The result is left in the high bits of the word. + */ + tcg_gen_shli_i64(t, d, 7); + tcg_gen_or_i64(d, d, t); + tcg_gen_shli_i64(t, d, 14); + tcg_gen_or_i64(d, d, t); + tcg_gen_shli_i64(t, d, 28); + tcg_gen_or_i64(d, d, t); +} + +static void gen_pmovmskb_vec(unsigned vece, TCGv_vec d, TCGv_vec s) +{ + TCGv_vec t =3D tcg_temp_new_vec_matching(d); + TCGv_vec m =3D tcg_constant_vec_matching(d, MO_8, 0x80); + + /* See above */ + tcg_gen_and_vec(vece, d, s, m); + tcg_gen_shli_vec(vece, t, d, 7); + tcg_gen_or_vec(vece, d, d, t); + tcg_gen_shli_vec(vece, t, d, 14); + tcg_gen_or_vec(vece, d, d, t); + if (vece =3D=3D MO_64) { + tcg_gen_shli_vec(vece, t, d, 28); + tcg_gen_or_vec(vece, d, d, t); + } +} + +#ifdef TARGET_I386 +#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i32 +#else +#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i64 +#endif + static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) { - if (s->prefix & PREFIX_DATA) { - gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, OP_PTR2); - } else { - gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, OP_PTR2); + static const TCGOpcode vecop_list[] =3D { INDEX_op_shli_vec, 0 }; + static const GVecGen2 g =3D { + .fni8 =3D gen_pmovmskb_i64, + .fniv =3D gen_pmovmskb_vec, + .opt_opc =3D vecop_list, + .vece =3D MO_64, + .prefer_i64 =3D TCG_TARGET_REG_BITS =3D=3D 64 + }; + MemOp ot =3D decode->op[2].ot; + int vec_len =3D vector_len(s, decode); + TCGv t =3D tcg_temp_new(); + + tcg_gen_gvec_2(offsetof(CPUX86State, xmm_t0) + xmm_offset(ot), decode-= >op[2].offset, + vec_len, vec_len, &g); + tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec= _len - 1))); + while (vec_len > 8) { + vec_len -=3D 8; + if (TCG_TARGET_HAS_extract2_tl) { + /* + * Load the next byte of the result into the high byte of T. + * TCG does a similar expansion of deposit to shl+extract2; by + * loading the whole word, the shift left is avoided. + */ +#ifdef TARGET_X86_64 + tcg_gen_ld_tl(t, cpu_env, offsetof(CPUX86State, xmm_t0.ZMM_Q((= vec_len - 1) / 8))); +#else + tcg_gen_ld_tl(t, cpu_env, offsetof(CPUX86State, xmm_t0.ZMM_L((= vec_len - 1) / 4))); +#endif + + tcg_gen_extract2_tl(s->T0, t, s->T0, TARGET_LONG_BITS - 8); + } else { + /* + * The _previous_ value is deposited into bits 8 and higher of= t. Because + * those bits are known to be zero after ld8u, this becomes a = shift+or + * if deposit is not available. + */ + tcg_gen_ld8u_tl(t, cpu_env, offsetof(CPUX86State, xmm_t0.ZMM_B= (vec_len - 1))); + tcg_gen_deposit_tl(s->T0, t, s->T0, 8, TARGET_LONG_BITS - 8); + } } - tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); + tcg_temp_free(t); } =20 static void gen_PSHUFW(DisasContext *s, CPUX86State *env, X86DecodedInsn *= decode) --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663714386; cv=none; d=zohomail.com; s=zohoarc; b=gHqZKJ5/GuMNppbZ5yRtLBrgnBKt+kV+tgP94VIxd8qKbtVuZ5L5mx1Fqi0EVeOdXKyOoRnkC37UK0XR/fdi8FmPwr0uaQ3gmDhuY32RBQCV2W/mZP8PS2JceR4BlwFLHE5c5qRiQSNTmgFEN/EIv5LcAEabgGsgt2ivJA9wH3Y= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663714386; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=dgnukL4MO9/CNxcTi5jdWQPdH4fcdgoR+hNF1iVXZNI=; b=YNIIWNcKEY79hOUsPy5orZn+aZAS8DHCsLwSYyjA9ghghHX4rBNy0/+rMih0PzOS2FYZlJpxXkfTYVPZS68fllqTnjezG7UxOP2BjKd9qG82ELB9kBoR9LGmB9dAzW9jWsA6qEhHk8Vk7Jn1lZn3Yp3MhvkoVPigiBdqok+8ODY= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663714386878307.4543053519294; Tue, 20 Sep 2022 15:53:06 -0700 (PDT) Received: from localhost ([::1]:59850 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oam6y-0005jt-Eu for importer@patchew.org; Tue, 20 Sep 2022 18:53:04 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:39754) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0U-0006aO-Hl for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:02 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:32671) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0P-0002FN-QL for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:02 -0400 Received: from mail-ej1-f72.google.com (mail-ej1-f72.google.com [209.85.218.72]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-549-rsTnooL8NZSPRw6MRiqZIw-1; Tue, 20 Sep 2022 13:25:54 -0400 Received: by mail-ej1-f72.google.com with SMTP id qf40-20020a1709077f2800b0077b43f8b94cso1786520ejc.23 for ; Tue, 20 Sep 2022 10:25:54 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id b13-20020a170906708d00b00734bfab4d59sm139546ejk.170.2022.09.20.10.25.51 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:51 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694756; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=dgnukL4MO9/CNxcTi5jdWQPdH4fcdgoR+hNF1iVXZNI=; b=DYkosfC60ERsxlWR3WfJfgWj92w15YTVYL3VZlt+jBHwrWhFvH3+sLwxvAXCGlW7EDXHka gk6NnaTZhdZJSFq/mFLx+SdfFYPTYDTLcF2mg3E68QnVSfZ6X81iQ5T7aeX+hiTPQFrrRx seBBUVAygnOHQYQIbfe5LubTHOm2eyc= X-MC-Unique: rsTnooL8NZSPRw6MRiqZIw-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=dgnukL4MO9/CNxcTi5jdWQPdH4fcdgoR+hNF1iVXZNI=; b=I4clTPL5jg0lHL3TBVFQnt5H4Oik4kL+IMdt4fc769Mx27/5IcIhWxBuNAWMK6i/aE hT2wIdJwEwra9Qwxn5bbX7HVRQsLlg3lcRP6Q8MDF+UPxs7HB7ED5vV4428YRsLeGwBX O3gKUQ4wz5bXAxFaeyjLJ4D4+3TO829eILb80j6VpS0SwEdsWuVLd0+a6IBqsrF13UZF s6cq9pNwLS58TUiwtLzh6ikcrNq5rZyKJeI5IVEyrBPM0dfLnci6BGGO/U8QeKn3DAsW O822t2OWZpaLVpgUqjrTZFt5R9dnuubx96Cz9jflFP0kq8JAm8mKrDc1orioXIsiPRIs tECg== X-Gm-Message-State: ACrzQf2RSh0JiAWkZLbU/xpXYR3INtLzukyE8i4Su6bdjDM1m0T8E5Cx 2RWnZ/4JQZWk6GlTxpeUO+QOaakp5YxuTnJ1Gx1HLjj7esSRifxCfHCDaG74u+ZROXTmqaC7A+h UrCHcr7PhJgQCrmD5yK77X39hLdVZvv1+hbeSgcPEzkIHWE2OQZv2Ykj3ZfZiqrllkfY= X-Received: by 2002:a05:6402:3552:b0:451:2037:639e with SMTP id f18-20020a056402355200b004512037639emr21330599edd.136.1663694752788; Tue, 20 Sep 2022 10:25:52 -0700 (PDT) X-Google-Smtp-Source: AMsMyM7Ea7PXh1KBoY1T1uPQVikksLObh3OB5bwG6btxRmm93nfzMpNtGDVrmoIZIEHOKWxQn52FdQ== X-Received: by 2002:a05:6402:3552:b0:451:2037:639e with SMTP id f18-20020a056402355200b004512037639emr21330553edd.136.1663694752076; Tue, 20 Sep 2022 10:25:52 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 28/37] target/i386: reimplement 0x0f 0x38, add AVX Date: Tue, 20 Sep 2022 19:24:58 +0200 Message-Id: <20220920172507.95568-29-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663714388563100001 Content-Type: text/plain; charset="utf-8" There are several special cases here: 1) extending moves have different widths for the helpers vs. for the memory loads, and the width for memory loads depends on VEX.L too. This is represented by X86_SPECIAL_AVXExtMov. 2) some instructions, such as variable-width shifts, select the vector elem= ent size via REX.W. 3) VSIB instructions (VGATHERxPy, VPGATHERxy) are also part of this group, and they have (among other things) two output operands. 3) the macros for 4-operand blends (which are under 0x0f 0x3a) have to be extended to support 2-operand blends. The 2-operand variant actually came a few years earlier, but it is clearer to implement them in the opposite order. X86_TYPE_WM, introduced earlier for unaligned loads, is reused for helpers that accept a Reg* but have a M argument. These three-byte opcodes also include AVX new instructions, for which the helpers were originally implemented by Paul Brook . Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/ops_sse.h | 188 ++++++++++++++++++++++++++- target/i386/ops_sse_header.h | 19 +++ target/i386/tcg/decode-new.c.inc | 112 ++++++++++++++++- target/i386/tcg/decode-new.h | 6 + target/i386/tcg/emit.c.inc | 210 ++++++++++++++++++++++++++++++- target/i386/tcg/translate.c | 2 +- 6 files changed, 529 insertions(+), 8 deletions(-) diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index cb8909adcf..104a53fda0 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -2382,6 +2382,36 @@ void glue(helper_aeskeygenassist, SUFFIX)(CPUX86Stat= e *env, Reg *d, Reg *s, #endif =20 #if SHIFT >=3D 1 +void glue(helper_vpermilpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg = *s) +{ + uint64_t r0, r1; + int i; + + for (i =3D 0; i < 1 << SHIFT; i +=3D 2) { + r0 =3D v->Q(i + ((s->Q(i) >> 1) & 1)); + r1 =3D v->Q(i + ((s->Q(i+1) >> 1) & 1)); + d->Q(i) =3D r0; + d->Q(i+1) =3D r1; + } +} + +void glue(helper_vpermilps, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg = *s) +{ + uint32_t r0, r1, r2, r3; + int i; + + for (i =3D 0; i < 2 << SHIFT; i +=3D 4) { + r0 =3D v->L(i + (s->L(i) & 3)); + r1 =3D v->L(i + (s->L(i+1) & 3)); + r2 =3D v->L(i + (s->L(i+2) & 3)); + r3 =3D v->L(i + (s->L(i+3) & 3)); + d->L(i) =3D r0; + d->L(i+1) =3D r1; + d->L(i+2) =3D r2; + d->L(i+3) =3D r3; + } +} + void glue(helper_vpermilpd_imm, SUFFIX)(Reg *d, Reg *s, uint32_t order) { uint64_t r0, r1; @@ -2414,6 +2444,150 @@ void glue(helper_vpermilps_imm, SUFFIX)(Reg *d, Reg= *s, uint32_t order) } } =20 +#if SHIFT =3D=3D 1 +#define FPSRLVD(x, c) (c < 32 ? ((x) >> c) : 0) +#define FPSRLVQ(x, c) (c < 64 ? ((x) >> c) : 0) +#define FPSRAVD(x, c) ((int32_t)(x) >> (c < 64 ? c : 31)) +#define FPSRAVQ(x, c) ((int64_t)(x) >> (c < 64 ? c : 63)) +#define FPSLLVD(x, c) (c < 32 ? ((x) << c) : 0) +#define FPSLLVQ(x, c) (c < 64 ? ((x) << c) : 0) +#endif + +SSE_HELPER_L(helper_vpsrlvd, FPSRLVD) +SSE_HELPER_L(helper_vpsravd, FPSRAVD) +SSE_HELPER_L(helper_vpsllvd, FPSLLVD) + +SSE_HELPER_Q(helper_vpsrlvq, FPSRLVQ) +SSE_HELPER_Q(helper_vpsravq, FPSRAVQ) +SSE_HELPER_Q(helper_vpsllvq, FPSLLVQ) + +void glue(helper_vtestps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +{ + uint32_t zf =3D 0, cf =3D 0; + int i; + + for (i =3D 0; i < 2 << SHIFT; i++) { + zf |=3D (s->L(i) & d->L(i)); + cf |=3D (s->L(i) & ~d->L(i)); + } + CC_SRC =3D ((zf >> 31) ? 0 : CC_Z) | ((cf >> 31) ? 0 : CC_C); +} + +void glue(helper_vtestpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +{ + uint64_t zf =3D 0, cf =3D 0; + int i; + + for (i =3D 0; i < 1 << SHIFT; i++) { + zf |=3D (s->Q(i) & d->Q(i)); + cf |=3D (s->Q(i) & ~d->Q(i)); + } + CC_SRC =3D ((zf >> 63) ? 0 : CC_Z) | ((cf >> 63) ? 0 : CC_C); +} + +void glue(helper_vpmaskmovd_st, SUFFIX)(CPUX86State *env, + Reg *v, Reg *s, target_ulong a0) +{ + int i; + + for (i =3D 0; i < (2 << SHIFT); i++) { + if (v->L(i) >> 31) { + cpu_stl_data_ra(env, a0 + i * 4, s->L(i), GETPC()); + } + } +} + +void glue(helper_vpmaskmovq_st, SUFFIX)(CPUX86State *env, + Reg *v, Reg *s, target_ulong a0) +{ + int i; + + for (i =3D 0; i < (1 << SHIFT); i++) { + if (v->Q(i) >> 63) { + cpu_stq_data_ra(env, a0 + i * 8, s->Q(i), GETPC()); + } + } +} + +void glue(helper_vpmaskmovd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg= *s) +{ + int i; + + for (i =3D 0; i < (2 << SHIFT); i++) { + d->L(i) =3D (v->L(i) >> 31) ? s->L(i) : 0; + } +} + +void glue(helper_vpmaskmovq, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg= *s) +{ + int i; + + for (i =3D 0; i < (1 << SHIFT); i++) { + d->Q(i) =3D (v->Q(i) >> 63) ? s->Q(i) : 0; + } +} + +void glue(helper_vpgatherdd, SUFFIX)(CPUX86State *env, + Reg *d, Reg *v, Reg *s, target_ulong a0, unsigned scale) +{ + int i; + for (i =3D 0; i < (2 << SHIFT); i++) { + if (v->L(i) >> 31) { + target_ulong addr =3D a0 + + ((target_ulong)(int32_t)s->L(i) << scale); + d->L(i) =3D cpu_ldl_data_ra(env, addr, GETPC()); + } + v->L(i) =3D 0; + } +} + +void glue(helper_vpgatherdq, SUFFIX)(CPUX86State *env, + Reg *d, Reg *v, Reg *s, target_ulong a0, unsigned scale) +{ + int i; + for (i =3D 0; i < (1 << SHIFT); i++) { + if (v->Q(i) >> 63) { + target_ulong addr =3D a0 + + ((target_ulong)(int32_t)s->L(i) << scale); + d->Q(i) =3D cpu_ldq_data_ra(env, addr, GETPC()); + } + v->Q(i) =3D 0; + } +} + +void glue(helper_vpgatherqd, SUFFIX)(CPUX86State *env, + Reg *d, Reg *v, Reg *s, target_ulong a0, unsigned scale) +{ + int i; + for (i =3D 0; i < (1 << SHIFT); i++) { + if (v->L(i) >> 31) { + target_ulong addr =3D a0 + + ((target_ulong)(int64_t)s->Q(i) << scale); + d->L(i) =3D cpu_ldl_data_ra(env, addr, GETPC()); + } + v->L(i) =3D 0; + } + for (i /=3D 2; i < 1 << SHIFT; i++) { + d->Q(i) =3D 0; + v->Q(i) =3D 0; + } +} + +void glue(helper_vpgatherqq, SUFFIX)(CPUX86State *env, + Reg *d, Reg *v, Reg *s, target_ulong a0, unsigned scale) +{ + int i; + for (i =3D 0; i < (1 << SHIFT); i++) { + if (v->Q(i) >> 63) { + target_ulong addr =3D a0 + + ((target_ulong)(int64_t)s->Q(i) << scale); + d->Q(i) =3D cpu_ldq_data_ra(env, addr, GETPC()); + } + v->Q(i) =3D 0; + } +} +#endif + #if SHIFT >=3D 2 void helper_vpermdq_ymm(Reg *d, Reg *v, Reg *s, uint32_t order) { @@ -2473,7 +2647,19 @@ void helper_vpermq_ymm(Reg *d, Reg *s, uint32_t orde= r) d->Q(2) =3D r2; d->Q(3) =3D r3; } -#endif + +void helper_vpermd_ymm(Reg *d, Reg *v, Reg *s) +{ + uint32_t r[8]; + int i; + + for (i =3D 0; i < 8; i++) { + r[i] =3D s->L(v->L(i) & 7); + } + for (i =3D 0; i < 8; i++) { + d->L(i) =3D r[i]; + } +} #endif =20 #undef SSE_HELPER_S diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index 1afc4ff6a1..dd8dcebc23 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -413,9 +413,28 @@ DEF_HELPER_5(glue(pclmulqdq, SUFFIX), void, env, Reg, = Reg, Reg, i32) =20 /* AVX helpers */ #if SHIFT >=3D 1 +DEF_HELPER_4(glue(vpermilpd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(vpermilps, SUFFIX), void, env, Reg, Reg, Reg) DEF_HELPER_3(glue(vpermilpd_imm, SUFFIX), void, Reg, Reg, i32) DEF_HELPER_3(glue(vpermilps_imm, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_4(glue(vpsrlvd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(vpsravd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(vpsllvd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(vpsrlvq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(vpsravq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(vpsllvq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_3(glue(vtestps, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(vtestpd, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(vpmaskmovd_st, SUFFIX), void, env, Reg, Reg, tl) +DEF_HELPER_4(glue(vpmaskmovq_st, SUFFIX), void, env, Reg, Reg, tl) +DEF_HELPER_4(glue(vpmaskmovd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(vpmaskmovq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_6(glue(vpgatherdd, SUFFIX), void, env, Reg, Reg, Reg, tl, i32) +DEF_HELPER_6(glue(vpgatherdq, SUFFIX), void, env, Reg, Reg, Reg, tl, i32) +DEF_HELPER_6(glue(vpgatherqd, SUFFIX), void, env, Reg, Reg, Reg, tl, i32) +DEF_HELPER_6(glue(vpgatherqq, SUFFIX), void, env, Reg, Reg, Reg, tl, i32) #if SHIFT =3D=3D 2 +DEF_HELPER_3(vpermd_ymm, void, Reg, Reg, Reg) DEF_HELPER_4(vpermdq_ymm, void, Reg, Reg, Reg, i32) DEF_HELPER_3(vpermq_ymm, void, Reg, Reg, i32) #endif diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index a1f0d7a24c..798b423163 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -92,6 +92,7 @@ #define mmx .special =3D X86_SPECIAL_MMX, #define zext0 .special =3D X86_SPECIAL_ZExtOp0, #define zext2 .special =3D X86_SPECIAL_ZExtOp2, +#define avx_movx .special =3D X86_SPECIAL_AVXExtMov, =20 #define vex1 .vex_class =3D 1, #define vex1_rep3 .vex_class =3D 1, .vex_special =3D X86_VEX_REPScalar, @@ -302,6 +303,105 @@ static void decode_0FD6(DisasContext *s, CPUX86State = *env, X86OpEntry *entry, ui } =20 static const X86OpEntry opcodes_0F38_00toEF[240] =3D { + [0x00] =3D X86_OP_ENTRY3(PSHUFB, V,x, H,x, W,x, vex4 cpuid(SSSE= 3) mmx avx2_256 p_00_66), + [0x01] =3D X86_OP_ENTRY3(PHADDW, V,x, H,x, W,x, vex4 cpuid(SSSE= 3) mmx avx2_256 p_00_66), + [0x02] =3D X86_OP_ENTRY3(PHADDD, V,x, H,x, W,x, vex4 cpuid(SSSE= 3) mmx avx2_256 p_00_66), + [0x03] =3D X86_OP_ENTRY3(PHADDSW, V,x, H,x, W,x, vex4 cpuid(SSSE= 3) mmx avx2_256 p_00_66), + [0x04] =3D X86_OP_ENTRY3(PMADDUBSW, V,x, H,x, W,x, vex4 cpuid(SSSE= 3) mmx avx2_256 p_00_66), + [0x05] =3D X86_OP_ENTRY3(PHSUBW, V,x, H,x, W,x, vex4 cpuid(SSSE= 3) mmx avx2_256 p_00_66), + [0x06] =3D X86_OP_ENTRY3(PHSUBD, V,x, H,x, W,x, vex4 cpuid(SSSE= 3) mmx avx2_256 p_00_66), + [0x07] =3D X86_OP_ENTRY3(PHSUBSW, V,x, H,x, W,x, vex4 cpuid(SSSE= 3) mmx avx2_256 p_00_66), + + [0x10] =3D X86_OP_ENTRY2(PBLENDVB, V,x, W,x, vex4 cpuid(SSE4= 1) avx2_256 p_66), + [0x14] =3D X86_OP_ENTRY2(BLENDVPS, V,x, W,x, vex4 cpuid(SSE4= 1) p_66), + [0x15] =3D X86_OP_ENTRY2(BLENDVPD, V,x, W,x, vex4 cpuid(SSE4= 1) p_66), + /* Listed incorrectly as type 4 */ + [0x16] =3D X86_OP_ENTRY3(VPERMD, V,qq, H,qq, W,qq, vex6 cpuid= (AVX2) p_66), + [0x17] =3D X86_OP_ENTRY3(VPTEST, None,None, V,x, W,x, vex4 cpuid= (SSE41) p_66), + + /* + * Source operand listed as Mq/Ux and similar in the manual; incorrect= ly listed + * as 128-bit only in 2-17. + */ + [0x20] =3D X86_OP_ENTRY3(VPMOVSXBW, V,x, None,None, W,q, vex5 cpuid= (SSE41) avx_movx avx2_256 p_66), + [0x21] =3D X86_OP_ENTRY3(VPMOVSXBD, V,x, None,None, W,d, vex5 cpuid= (SSE41) avx_movx avx2_256 p_66), + [0x22] =3D X86_OP_ENTRY3(VPMOVSXBQ, V,x, None,None, W,w, vex5 cpuid= (SSE41) avx_movx avx2_256 p_66), + [0x23] =3D X86_OP_ENTRY3(VPMOVSXWD, V,x, None,None, W,q, vex5 cpuid= (SSE41) avx_movx avx2_256 p_66), + [0x24] =3D X86_OP_ENTRY3(VPMOVSXWQ, V,x, None,None, W,d, vex5 cpuid= (SSE41) avx_movx avx2_256 p_66), + [0x25] =3D X86_OP_ENTRY3(VPMOVSXDQ, V,x, None,None, W,q, vex5 cpuid= (SSE41) avx_movx avx2_256 p_66), + + /* Same as PMOVSX. */ + [0x30] =3D X86_OP_ENTRY3(VPMOVZXBW, V,x, None,None, W,q, vex5 cpuid= (SSE41) avx_movx avx2_256 p_66), + [0x31] =3D X86_OP_ENTRY3(VPMOVZXBD, V,x, None,None, W,d, vex5 cpuid= (SSE41) avx_movx avx2_256 p_66), + [0x32] =3D X86_OP_ENTRY3(VPMOVZXBQ, V,x, None,None, W,w, vex5 cpuid= (SSE41) avx_movx avx2_256 p_66), + [0x33] =3D X86_OP_ENTRY3(VPMOVZXWD, V,x, None,None, W,q, vex5 cpuid= (SSE41) avx_movx avx2_256 p_66), + [0x34] =3D X86_OP_ENTRY3(VPMOVZXWQ, V,x, None,None, W,d, vex5 cpuid= (SSE41) avx_movx avx2_256 p_66), + [0x35] =3D X86_OP_ENTRY3(VPMOVZXDQ, V,x, None,None, W,q, vex5 cpuid= (SSE41) avx_movx avx2_256 p_66), + [0x36] =3D X86_OP_ENTRY3(VPERMD, V,qq, H,qq, W,qq, vex6 cpuid= (AVX2) p_66), + [0x37] =3D X86_OP_ENTRY3(PCMPGTQ, V,x, H,x, W,x, vex4 cpuid= (SSE42) avx2_256 p_66), + + [0x40] =3D X86_OP_ENTRY3(PMULLD, V,x, H,x, W,x, vex4 cpui= d(SSE41) avx2_256 p_66), + [0x41] =3D X86_OP_ENTRY3(VPHMINPOSUW, V,dq, None,None, W,dq, vex4 cpui= d(SSE41) p_66), + /* Listed incorrectly as type 4 */ + [0x45] =3D X86_OP_ENTRY3(VPSRLV, V,x, H,x, W,x, vex6 cpui= d(AVX2) p_66), + [0x46] =3D X86_OP_ENTRY3(VPSRAV, V,x, H,x, W,x, vex6 cpui= d(AVX2) p_66), + [0x47] =3D X86_OP_ENTRY3(VPSLLV, V,x, H,x, W,x, vex6 cpui= d(AVX2) p_66), + + [0x90] =3D X86_OP_ENTRY3(VPGATHERD, V,x, H,x, M,d, vex12 cpuid(AVX2= ) p_66), /* vpgatherdd/q */ + [0x91] =3D X86_OP_ENTRY3(VPGATHERQ, V,x, H,x, M,q, vex12 cpuid(AVX2= ) p_66), /* vpgatherqd/q */ + [0x92] =3D X86_OP_ENTRY3(VPGATHERD, V,x, H,x, M,d, vex12 cpuid(AVX2= ) p_66), /* vgatherdps/d */ + [0x93] =3D X86_OP_ENTRY3(VPGATHERQ, V,x, H,x, M,q, vex12 cpuid(AVX2= ) p_66), /* vgatherqps/d */ + + [0x08] =3D X86_OP_ENTRY3(PSIGNB, V,x, H,x, W,x, vex4 cpuid= (SSSE3) mmx avx2_256 p_00_66), + [0x09] =3D X86_OP_ENTRY3(PSIGNW, V,x, H,x, W,x, vex4 cpuid= (SSSE3) mmx avx2_256 p_00_66), + [0x0a] =3D X86_OP_ENTRY3(PSIGND, V,x, H,x, W,x, vex4 cpuid= (SSSE3) mmx avx2_256 p_00_66), + [0x0b] =3D X86_OP_ENTRY3(PMULHRSW, V,x, H,x, W,x, vex4 cpuid= (SSSE3) mmx avx2_256 p_00_66), + [0x0c] =3D X86_OP_ENTRY3(VPERMILPS, V,x, H,x, W,x, vex4 cpuid= (AVX) p_00_66), + [0x0d] =3D X86_OP_ENTRY3(VPERMILPD, V,x, H,x, W,x, vex4 cpuid= (AVX) p_66), + [0x0e] =3D X86_OP_ENTRY3(VTESTPS, None,None, V,x, W,x, vex4 cpuid= (AVX) p_66), + [0x0f] =3D X86_OP_ENTRY3(VTESTPD, None,None, V,x, W,x, vex4 cpuid= (AVX) p_66), + + [0x18] =3D X86_OP_ENTRY3(VPBROADCASTD, V,x, None,None, W,d, vex6 c= puid(AVX) p_66), /* vbroadcastss */ + [0x19] =3D X86_OP_ENTRY3(VPBROADCASTQ, V,qq, None,None, W,q, vex6 c= puid(AVX) p_66), /* vbroadcastsd */ + [0x1a] =3D X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 c= puid(AVX) p_66), + [0x1c] =3D X86_OP_ENTRY3(PABSB, V,x, None,None, W,x, vex4 c= puid(SSSE3) mmx avx2_256 p_00_66), + [0x1d] =3D X86_OP_ENTRY3(PABSW, V,x, None,None, W,x, vex4 c= puid(SSSE3) mmx avx2_256 p_00_66), + [0x1e] =3D X86_OP_ENTRY3(PABSD, V,x, None,None, W,x, vex4 c= puid(SSSE3) mmx avx2_256 p_00_66), + + [0x28] =3D X86_OP_ENTRY3(PMULDQ, V,x, H,x, W,x, vex4 cpu= id(SSE41) avx2_256 p_66), + [0x29] =3D X86_OP_ENTRY3(PCMPEQQ, V,x, H,x, W,x, vex4 cpu= id(SSE41) avx2_256 p_66), + [0x2a] =3D X86_OP_ENTRY3(MOVNTDQA, V,x, None,None, M,x, vex1 cpu= id(SSE41) avx2_256 p_66), + [0x2b] =3D X86_OP_ENTRY3(VPACKUSDW, V,x, H,x, W,x, vex4 cpu= id(SSE41) avx2_256 p_66), + [0x2c] =3D X86_OP_ENTRY3(VMASKMOVPS, V,x, H,x, WM,x, vex6 cpu= id(AVX) p_66), + [0x2d] =3D X86_OP_ENTRY3(VMASKMOVPD, V,x, H,x, WM,x, vex6 cpu= id(AVX) p_66), + /* Incorrectly listed as Mx,Hx,Vx in the manual */ + [0x2e] =3D X86_OP_ENTRY3(VMASKMOVPS_st, M,x, V,x, H,x, vex6 cpu= id(AVX) p_66), + [0x2f] =3D X86_OP_ENTRY3(VMASKMOVPD_st, M,x, V,x, H,x, vex6 cpu= id(AVX) p_66), + + [0x38] =3D X86_OP_ENTRY3(PMINSB, V,x, H,x, W,x, vex4 cpuid(SS= E41) avx2_256 p_66), + [0x39] =3D X86_OP_ENTRY3(PMINSD, V,x, H,x, W,x, vex4 cpuid(SS= E41) avx2_256 p_66), + [0x3a] =3D X86_OP_ENTRY3(PMINUW, V,x, H,x, W,x, vex4 cpuid(SS= E41) avx2_256 p_66), + [0x3b] =3D X86_OP_ENTRY3(PMINUD, V,x, H,x, W,x, vex4 cpuid(SS= E41) avx2_256 p_66), + [0x3c] =3D X86_OP_ENTRY3(PMAXSB, V,x, H,x, W,x, vex4 cpuid(SS= E41) avx2_256 p_66), + [0x3d] =3D X86_OP_ENTRY3(PMAXSD, V,x, H,x, W,x, vex4 cpuid(SS= E41) avx2_256 p_66), + [0x3e] =3D X86_OP_ENTRY3(PMAXUW, V,x, H,x, W,x, vex4 cpuid(SS= E41) avx2_256 p_66), + [0x3f] =3D X86_OP_ENTRY3(PMAXUD, V,x, H,x, W,x, vex4 cpuid(SS= E41) avx2_256 p_66), + + [0x58] =3D X86_OP_ENTRY3(VPBROADCASTD, V,x, None,None, W,d, vex6 c= puid(AVX2) p_66), + [0x59] =3D X86_OP_ENTRY3(VPBROADCASTQ, V,x, None,None, W,q, vex6 c= puid(AVX2) p_66), + [0x5a] =3D X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 c= puid(AVX2) p_66), + + [0x78] =3D X86_OP_ENTRY3(VPBROADCASTB, V,x, None,None, W,b, vex6 c= puid(AVX2) p_66), + [0x79] =3D X86_OP_ENTRY3(VPBROADCASTW, V,x, None,None, W,w, vex6 c= puid(AVX2) p_66), + + [0x8c] =3D X86_OP_ENTRY3(VPMASKMOV, V,x, H,x, WM,x, vex6 cpuid(AVX= 2) p_66), + [0x8e] =3D X86_OP_ENTRY3(VPMASKMOV_st, M,x, V,x, H,x, vex6 cpuid(AVX= 2) p_66), + + [0xdb] =3D X86_OP_ENTRY3(VAESIMC, V,dq, None,None, W,dq, vex4 cpui= d(AES) p_66), + [0xdc] =3D X86_OP_ENTRY3(VAESENC, V,x, H,x, W,x, vex4 cpui= d(AES) p_66), + [0xdd] =3D X86_OP_ENTRY3(VAESENCLAST, V,x, H,x, W,x, vex4 cpui= d(AES) p_66), + [0xde] =3D X86_OP_ENTRY3(VAESDEC, V,x, H,x, W,x, vex4 cpui= d(AES) p_66), + [0xdf] =3D X86_OP_ENTRY3(VAESDECLAST, V,x, H,x, W,x, vex4 cpui= d(AES) p_66), }; =20 /* five rows for no prefix, 66, F3, F2, 66+F2 */ @@ -431,8 +531,8 @@ static const X86OpEntry opcodes_0F3A[256] =3D { [0x0b] =3D X86_OP_ENTRY4(VROUNDSD, V,x, H,x, W,sd, vex3 cpuid(SSE41= ) p_66), [0x0c] =3D X86_OP_ENTRY4(VBLENDPS, V,x, H,x, W,x, vex4 cpuid(SSE4= 1) p_66), [0x0d] =3D X86_OP_ENTRY4(VBLENDPD, V,x, H,x, W,x, vex4 cpuid(SSE4= 1) p_66), - [0x0e] =3D X86_OP_ENTRY4(VPBLENDW, V,x, H,x, W,x, vex4 cpuid(SSE4= 1) p_66), - [0x0f] =3D X86_OP_ENTRY4(PALIGNR, V,x, H,x, W,x, vex4 cpuid(SSSE= 3) mmx p_00_66), + [0x0e] =3D X86_OP_ENTRY4(VPBLENDW, V,x, H,x, W,x, vex4 cpuid(SSE4= 1) avx2_256 p_66), + [0x0f] =3D X86_OP_ENTRY4(PALIGNR, V,x, H,x, W,x, vex4 cpuid(SSSE= 3) mmx avx2_256 p_00_66), =20 [0x18] =3D X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 cpuid(AV= X) p_66), [0x19] =3D X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 cpuid(AV= X) p_66), @@ -1424,6 +1524,14 @@ static void disas_insn_new(DisasContext *s, CPUState= *cpu, int b) } break; =20 + case X86_SPECIAL_AVXExtMov: + if (!decode.op[2].has_ea) { + decode.op[2].ot =3D s->vex_l ? MO_256 : MO_128; + } else if (s->vex_l) { + decode.op[2].ot++; + } + break; + case X86_SPECIAL_MMX: if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) { gen_helper_enter_mmx(cpu_env); diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index c248b089b7..c74550e32b 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -142,6 +142,12 @@ typedef enum X86InsnSpecial { X86_SPECIAL_ZExtOp0, X86_SPECIAL_ZExtOp2, =20 + /* + * Register operand 2 is extended to full width, while a memory operand + * is doubled in size if VEX.L=3D1. + */ + X86_SPECIAL_AVXExtMov, + /* * MMX instruction exists with no prefix; if there is no prefix, V/H/W= /U operands * become P/P/Q/N, and size "x" becomes "q". diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 5345e791b7..dd36a3544e 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -19,6 +19,9 @@ * License along with this library; if not, see . */ =20 +typedef void (*SSEFunc_0_epppti)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr re= g_b, + TCGv_ptr reg_c, TCGv a0, TCGv_i32 scale); + static inline TCGv_i32 tcg_constant8u_i32(uint8_t val) { return tcg_constant_i32(val); @@ -484,15 +487,20 @@ static inline void gen_ternary_sse(DisasContext *s, C= PUX86State *env, X86Decoded fn(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2, ptr3); tcg_temp_free_ptr(ptr3); } -#define TERNARY_SSE(uvname, lname) = \ +#define TERNARY_SSE(uname, uvname, lname) = \ static void gen_##uvname(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) \ { = \ gen_ternary_sse(s, env, decode, (uint8_t)decode->immediate >> 4, = \ gen_helper_##lname##_xmm, gen_helper_##lname##_ymm); = \ +} = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + gen_ternary_sse(s, env, decode, 0, = \ + gen_helper_##lname##_xmm, gen_helper_##lname##_ymm); = \ } -TERNARY_SSE(VBLENDVPS, blendvps) -TERNARY_SSE(VBLENDVPD, blendvpd) -TERNARY_SSE(VPBLENDVB, pblendvb) +TERNARY_SSE(BLENDVPS, VBLENDVPS, blendvps) +TERNARY_SSE(BLENDVPD, VBLENDVPD, blendvpd) +TERNARY_SSE(PBLENDVB, VPBLENDVB, pblendvb) =20 static inline void gen_binary_imm_sse(DisasContext *s, CPUX86State *env, X= 86DecodedInsn *decode, SSEFunc_0_epppi xmm, SSEFunc_0_epppi= ymm) @@ -522,6 +530,25 @@ BINARY_IMM_SSE(VDDPD, dppd) BINARY_IMM_SSE(VMPSADBW, mpsadbw) BINARY_IMM_SSE(PCLMULQDQ, pclmulqdq) =20 + +#define UNARY_INT_GVEC(uname, func, ...) = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + int vec_len =3D vector_len(s, decode); = \ + = \ + func(__VA_ARGS__, decode->op[0].offset, = \ + decode->op[2].offset, vec_len, vec_len); = \ +} +UNARY_INT_GVEC(PABSB, tcg_gen_gvec_abs, MO_8) +UNARY_INT_GVEC(PABSW, tcg_gen_gvec_abs, MO_16) +UNARY_INT_GVEC(PABSD, tcg_gen_gvec_abs, MO_32) +UNARY_INT_GVEC(VBROADCASTx128, tcg_gen_gvec_dup_mem, MO_128) +UNARY_INT_GVEC(VPBROADCASTB, tcg_gen_gvec_dup_mem, MO_8) +UNARY_INT_GVEC(VPBROADCASTW, tcg_gen_gvec_dup_mem, MO_16) +UNARY_INT_GVEC(VPBROADCASTD, tcg_gen_gvec_dup_mem, MO_32) +UNARY_INT_GVEC(VPBROADCASTQ, tcg_gen_gvec_dup_mem, MO_64) + + #define BINARY_INT_GVEC(uname, func, ...) = \ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ { = \ @@ -544,14 +571,25 @@ BINARY_INT_GVEC(PAND, tcg_gen_gvec_and, MO_64) BINARY_INT_GVEC(PCMPEQB, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_8) BINARY_INT_GVEC(PCMPEQD, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_32) BINARY_INT_GVEC(PCMPEQW, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_16) +BINARY_INT_GVEC(PCMPEQQ, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_64) BINARY_INT_GVEC(PCMPGTB, tcg_gen_gvec_cmp, TCG_COND_GT, MO_8) BINARY_INT_GVEC(PCMPGTW, tcg_gen_gvec_cmp, TCG_COND_GT, MO_16) BINARY_INT_GVEC(PCMPGTD, tcg_gen_gvec_cmp, TCG_COND_GT, MO_32) +BINARY_INT_GVEC(PCMPGTQ, tcg_gen_gvec_cmp, TCG_COND_GT, MO_64) +BINARY_INT_GVEC(PMAXSB, tcg_gen_gvec_smax, MO_8) BINARY_INT_GVEC(PMAXSW, tcg_gen_gvec_smax, MO_16) +BINARY_INT_GVEC(PMAXSD, tcg_gen_gvec_smax, MO_32) BINARY_INT_GVEC(PMAXUB, tcg_gen_gvec_umax, MO_8) +BINARY_INT_GVEC(PMAXUW, tcg_gen_gvec_umax, MO_16) +BINARY_INT_GVEC(PMAXUD, tcg_gen_gvec_umax, MO_32) +BINARY_INT_GVEC(PMINSB, tcg_gen_gvec_smin, MO_8) BINARY_INT_GVEC(PMINSW, tcg_gen_gvec_smin, MO_16) +BINARY_INT_GVEC(PMINSD, tcg_gen_gvec_smin, MO_32) BINARY_INT_GVEC(PMINUB, tcg_gen_gvec_umin, MO_8) +BINARY_INT_GVEC(PMINUW, tcg_gen_gvec_umin, MO_16) +BINARY_INT_GVEC(PMINUD, tcg_gen_gvec_umin, MO_32) BINARY_INT_GVEC(PMULLW, tcg_gen_gvec_mul, MO_16) +BINARY_INT_GVEC(PMULLD, tcg_gen_gvec_mul, MO_32) BINARY_INT_GVEC(POR, tcg_gen_gvec_or, MO_64) BINARY_INT_GVEC(PSUBB, tcg_gen_gvec_sub, MO_8) BINARY_INT_GVEC(PSUBW, tcg_gen_gvec_sub, MO_16) @@ -626,6 +664,19 @@ BINARY_INT_MMX(PSRLQ_r, psrlq) BINARY_INT_MMX(PSRAW_r, psraw) BINARY_INT_MMX(PSRAD_r, psrad) =20 +BINARY_INT_MMX(PHADDW, phaddw) +BINARY_INT_MMX(PHADDSW, phaddsw) +BINARY_INT_MMX(PHADDD, phaddd) +BINARY_INT_MMX(PHSUBW, phsubw) +BINARY_INT_MMX(PHSUBSW, phsubsw) +BINARY_INT_MMX(PHSUBD, phsubd) +BINARY_INT_MMX(PMADDUBSW, pmaddubsw) +BINARY_INT_MMX(PSHUFB, pshufb) +BINARY_INT_MMX(PSIGNB, psignb) +BINARY_INT_MMX(PSIGNW, psignw) +BINARY_INT_MMX(PSIGND, psignd) +BINARY_INT_MMX(PMULHRSW, pmulhrsw) + /* Instructions with no MMX equivalent. */ #define BINARY_INT_SSE(uname, lname) = \ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ @@ -636,8 +687,35 @@ static void gen_##uname(DisasContext *s, CPUX86State *= env, X86DecodedInsn *decod gen_helper_##lname##_ymm); = \ } =20 +/* Instructions with no MMX equivalent. */ BINARY_INT_SSE(PUNPCKLQDQ, punpcklqdq) BINARY_INT_SSE(PUNPCKHQDQ, punpckhqdq) +BINARY_INT_SSE(VPACKUSDW, packusdw) +BINARY_INT_SSE(VPERMILPS, vpermilps) +BINARY_INT_SSE(VPERMILPD, vpermilpd) +BINARY_INT_SSE(VMASKMOVPS, vpmaskmovd) +BINARY_INT_SSE(VMASKMOVPD, vpmaskmovq) + +BINARY_INT_SSE(PMULDQ, pmuldq) + +BINARY_INT_SSE(VAESDEC, aesdec) +BINARY_INT_SSE(VAESDECLAST, aesdeclast) +BINARY_INT_SSE(VAESENC, aesenc) +BINARY_INT_SSE(VAESENCLAST, aesenclast) + +#define UNARY_CMP_SSE(uname, lname) = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + if (!s->vex_l) { = \ + gen_helper_##lname##_xmm(cpu_env, OP_PTR1, OP_PTR2); = \ + } else { = \ + gen_helper_##lname##_ymm(cpu_env, OP_PTR1, OP_PTR2); = \ + } = \ + set_cc_op(s, CC_OP_EFLAGS); = \ +} +UNARY_CMP_SSE(VPTEST, ptest) +UNARY_CMP_SSE(VTESTPS, vtestps) +UNARY_CMP_SSE(VTESTPD, vtestpd) =20 static inline void gen_unary_int_sse(DisasContext *s, CPUX86State *env, X8= 6DecodedInsn *decode, SSEFunc_0_epp xmm, SSEFunc_0_epp ymm) @@ -657,6 +735,20 @@ static void gen_##uname(DisasContext *s, CPUX86State *= env, X86DecodedInsn *decod gen_helper_##lname##_ymm); = \ } =20 +UNARY_INT_SSE(VPMOVSXBW, pmovsxbw) +UNARY_INT_SSE(VPMOVSXBD, pmovsxbd) +UNARY_INT_SSE(VPMOVSXBQ, pmovsxbq) +UNARY_INT_SSE(VPMOVSXWD, pmovsxwd) +UNARY_INT_SSE(VPMOVSXWQ, pmovsxwq) +UNARY_INT_SSE(VPMOVSXDQ, pmovsxdq) + +UNARY_INT_SSE(VPMOVZXBW, pmovzxbw) +UNARY_INT_SSE(VPMOVZXBD, pmovzxbd) +UNARY_INT_SSE(VPMOVZXBQ, pmovzxbq) +UNARY_INT_SSE(VPMOVZXWD, pmovzxwd) +UNARY_INT_SSE(VPMOVZXWQ, pmovzxwq) +UNARY_INT_SSE(VPMOVZXDQ, pmovzxdq) + UNARY_INT_SSE(VCVTDQ2PD, cvtdq2pd) UNARY_INT_SSE(VCVTPD2DQ, cvtpd2dq) UNARY_INT_SSE(VCVTTPD2DQ, cvttpd2dq) @@ -714,6 +806,64 @@ static void gen_##uname(DisasContext *s, CPUX86State *= env, X86DecodedInsn *decod UNARY_IMM_FP_SSE(VROUNDPS, roundps) UNARY_IMM_FP_SSE(VROUNDPD, roundpd) =20 +static inline void gen_rexw_avx(DisasContext *s, CPUX86State *env, X86Deco= dedInsn *decode, + SSEFunc_0_eppp d_xmm, SSEFunc_0_eppp q_xmm, + SSEFunc_0_eppp d_ymm, SSEFunc_0_eppp q_ymm) +{ + SSEFunc_0_eppp d =3D s->vex_l ? d_ymm : d_xmm; + SSEFunc_0_eppp q =3D s->vex_l ? q_ymm : q_xmm; + SSEFunc_0_eppp fn =3D s->rex_w ? q : d; + fn(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2); +} + +/* REX.W affects whether to operate on 32- or 64-bit elements. */ +#define REXW_AVX(uname, lname) = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + gen_rexw_avx(s, env, decode, = \ + gen_helper_##lname##d_xmm, gen_helper_##lname##q_xmm, = \ + gen_helper_##lname##d_ymm, gen_helper_##lname##q_ymm); = \ +} +REXW_AVX(VPSLLV, vpsllv) +REXW_AVX(VPSRLV, vpsrlv) +REXW_AVX(VPSRAV, vpsrav) +REXW_AVX(VPMASKMOV, vpmaskmov) + +/* Same as above, but with extra arguments to the helper. */ +static inline void gen_vsib_avx(DisasContext *s, CPUX86State *env, X86Deco= dedInsn *decode, + SSEFunc_0_epppti d_xmm, SSEFunc_0_epppti q= _xmm, + SSEFunc_0_epppti d_ymm, SSEFunc_0_epppti q= _ymm) +{ + SSEFunc_0_epppti d =3D s->vex_l ? d_ymm : d_xmm; + SSEFunc_0_epppti q =3D s->vex_l ? q_ymm : q_xmm; + SSEFunc_0_epppti fn =3D s->rex_w ? q : d; + TCGv_i32 scale =3D tcg_constant_i32(decode->mem.scale); + TCGv_ptr index =3D tcg_temp_new_ptr(); + + /* Pass third input as (index, base, scale) */ + tcg_gen_addi_ptr(index, cpu_env, ZMM_OFFSET(decode->mem.index)); + fn(cpu_env, OP_PTR0, OP_PTR1, index, s->A0, scale); + + /* + * There are two output operands, so zero OP1's high 128 bits + * in the VEX.128 case. + */ + if (!s->vex_l) { + int ymmh_ofs =3D vector_elem_offset(&decode->op[1], MO_128, 1); + tcg_gen_gvec_dup_imm(MO_64, ymmh_ofs, 16, 16, 0); + } + tcg_temp_free_ptr(index); +} +#define VSIB_AVX(uname, lname) = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + gen_vsib_avx(s, env, decode, = \ + gen_helper_##lname##d_xmm, gen_helper_##lname##q_xmm, = \ + gen_helper_##lname##d_ymm, gen_helper_##lname##q_ymm); = \ +} +VSIB_AVX(VPGATHERD, vpgatherd) +VSIB_AVX(VPGATHERQ, vpgatherq) + static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_= op) { TCGv carry_in =3D NULL; @@ -981,6 +1131,11 @@ static void gen_MOVMSK(DisasContext *s, CPUX86State *= env, X86DecodedInsn *decode tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); } =20 +static void gen_MOVNTDQA(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) +{ + gen_load_sse(s, s->T0, decode->op[0].ot, decode->op[0].offset, true); +} + static void gen_MOVQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) { int vec_len =3D vector_len(s, decode); @@ -1487,6 +1642,12 @@ static void gen_VAESKEYGEN(DisasContext *s, CPUX86St= ate *env, X86DecodedInsn *de gen_helper_aeskeygenassist_xmm(cpu_env, OP_PTR0, OP_PTR1, imm); } =20 +static void gen_VAESIMC(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + assert(!s->vex_l); + gen_helper_aesimc_xmm(cpu_env, OP_PTR0, OP_PTR2); +} + static void gen_VCVTfp2fp(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) { gen_unary_fp_sse(s, env, decode, @@ -1570,6 +1731,41 @@ static void gen_VINSERTx128(DisasContext *s, CPUX86S= tate *env, X86DecodedInsn *d decode->op[1].offset + offsetof(YMMReg, YMM_X(!mask))= , 16, 16); } =20 +static inline void gen_maskmov(DisasContext *s, CPUX86State *env, X86Decod= edInsn *decode, + SSEFunc_0_eppt xmm, SSEFunc_0_eppt ymm) +{ + if (!s->vex_l) { + xmm(cpu_env, OP_PTR2, OP_PTR1, s->A0); + } else { + ymm(cpu_env, OP_PTR2, OP_PTR1, s->A0); + } +} + +static void gen_VMASKMOVPD_st(DisasContext *s, CPUX86State *env, X86Decode= dInsn *decode) +{ + gen_maskmov(s, env, decode, gen_helper_vpmaskmovq_st_xmm, gen_helper_v= pmaskmovq_st_ymm); +} + +static void gen_VMASKMOVPS_st(DisasContext *s, CPUX86State *env, X86Decode= dInsn *decode) +{ + gen_maskmov(s, env, decode, gen_helper_vpmaskmovd_st_xmm, gen_helper_v= pmaskmovd_st_ymm); +} + +static void gen_VPMASKMOV_st(DisasContext *s, CPUX86State *env, X86Decoded= Insn *decode) +{ + if (s->rex_w) { + gen_VMASKMOVPD_st(s, env, decode); + } else { + gen_VMASKMOVPS_st(s, env, decode); + } +} + +static void gen_VPERMD(DisasContext *s, CPUX86State *env, X86DecodedInsn *= decode) +{ + assert(s->vex_l); + gen_helper_vpermd_ymm(OP_PTR0, OP_PTR1, OP_PTR2); +} + static void gen_VPERM2x128(DisasContext *s, CPUX86State *env, X86DecodedIn= sn *decode) { TCGv_i32 imm =3D tcg_constant8u_i32(decode->immediate); @@ -1577,6 +1773,12 @@ static void gen_VPERM2x128(DisasContext *s, CPUX86St= ate *env, X86DecodedInsn *de gen_helper_vpermdq_ymm(OP_PTR0, OP_PTR1, OP_PTR2, imm); } =20 +static void gen_VPHMINPOSUW(DisasContext *s, CPUX86State *env, X86DecodedI= nsn *decode) +{ + assert(!s->vex_l); + gen_helper_phminposuw_xmm(cpu_env, OP_PTR0, OP_PTR2); +} + static void gen_VROUNDSD(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) { TCGv_i32 imm =3D tcg_constant8u_i32(decode->immediate); diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 302feeaaf6..32f937013f 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4695,7 +4695,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) use_new &=3D b <=3D limit; #endif if (use_new && - (b =3D=3D 0x13a || + (b =3D=3D 0x138 || b =3D=3D 0x13a || (b >=3D 0x150 && b <=3D 0x17f) || (b >=3D 0x1d0 && b <=3D 0x1ff))) { disas_insn_new(s, cpu, b + 0x100); --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663714645; cv=none; d=zohomail.com; s=zohoarc; b=FOyqfwRKLI4qM5ve9VDoHIVKQluKLRolOx5d1DO74bIHyK5zWNk3L19iuHFfRwa0DDjVhytH+eYtDL1mTKyUxfIxHoJ9AC3jab3vE0L0CDuH9wRhgiq/ynKavc0GJFfqIqX6AfE+LXbk1Xnrwa1KzaxTvZj1Ya+j0rZNj4tBNdA= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663714645; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=kL9TEJMgcV6CpIfB1X9GtUOh3TtmamVndhuDMBySJLo=; b=UoAc4NMNmG8yuOR57kvXO8GA64CxNFYk+yBpapjRiRlPEVmvt1Oa1u1/6A0nlKE+Hgiw+2o1rEcOLe8auOzXLxiIpnCFfnmODs6VfaK/uNckwpwiD2c9VpdR3yWCumYsStAjTWqtm0WW3CwHybBpk3YvidwbX+xtFqxBuaQ1UIg= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663714645347228.4671514368098; Tue, 20 Sep 2022 15:57:25 -0700 (PDT) Received: from localhost ([::1]:43296 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oamB8-0004uA-Ng for importer@patchew.org; Tue, 20 Sep 2022 18:57:22 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58726) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0a-0006li-Qc for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:09 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:31515) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0Y-0002HH-2B for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:08 -0400 Received: from mail-ej1-f69.google.com (mail-ej1-f69.google.com [209.85.218.69]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-231-uaTwZRhUPhG28-3Zg3Qkhw-1; Tue, 20 Sep 2022 13:25:55 -0400 Received: by mail-ej1-f69.google.com with SMTP id oz30-20020a1709077d9e00b0077239b6a915so1792414ejc.11 for ; Tue, 20 Sep 2022 10:25:55 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id 7-20020a170906308700b00781b589a1afsm140558ejv.159.2022.09.20.10.25.52 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:53 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694765; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=kL9TEJMgcV6CpIfB1X9GtUOh3TtmamVndhuDMBySJLo=; b=ErwIFGETsfMW7tHYaccBjriD24a/UIcv+cqtzqrWBjKfSe43WQt6ScimzP51LmOMutL5XA 1k9iOA1maxSPwAAcUX0C/a7Et6PYc4BxulgQjtXjNerqxjyrwIDf+56ri4LDXVoVHHiHah 82oqUKKOzxcza2EJngLKQ48Zu45hF6I= X-MC-Unique: uaTwZRhUPhG28-3Zg3Qkhw-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=kL9TEJMgcV6CpIfB1X9GtUOh3TtmamVndhuDMBySJLo=; b=ClrgOczDTEc8LlkC7F9JirD/DdqGJzIGS6Cwn/hbh41+sLbQv7ioDe6mwynnzpWRYo A+BEj28/PHtuSSt6aO2cH7//0wU7G4DKtDCbG1wAWWMUUGmVZsLChX9hJDatd2uhvxWp /OMGIVxlWqQYyZdaWjeRRknjr1TnyTiWkdaHEgYr9KxtciU53eZAAjf4P+7C2GdkbyFq Jb8jvHFcICZiKOGDAHv0dAXQslnmXCE2BI6suPigQLUVCp0qfNGP835eS1YMEdL6Qgeo QhDgwNqSAIT1AFvfYO8TErzmwXQBA2xqo8h7K2T8TznVQ9iQDd6ylesF0cRiKYYP2wfx 8ijA== X-Gm-Message-State: ACrzQf0/TqUt0rdUbH2PnysfuB1ep2BklixoJsXxAmkGIDZ9e3wqP95R SFTwrMQwSL8un5jWt5YENd+GGtTS23p009WpEvpjRYPWbqj34tIZsgx2eb7C9K/BvPqRqpCGZIU 4C4vosqI9x81relNDcbfkYJfDetLggU5rbcKGJ9LdVdy8TTZzp2TGFJaWvw9FUcqjI54= X-Received: by 2002:a17:907:724e:b0:77a:63:4a24 with SMTP id ds14-20020a170907724e00b0077a00634a24mr17647581ejc.35.1663694753850; Tue, 20 Sep 2022 10:25:53 -0700 (PDT) X-Google-Smtp-Source: AMsMyM6kBhQhdaVctjAkUklhjboRYUAIqv7csIJEE0XPDrHZPMc+FbzVtnImDCfBpNpCpIg++rx+JQ== X-Received: by 2002:a17:907:724e:b0:77a:63:4a24 with SMTP id ds14-20020a170907724e00b0077a00634a24mr17647552ejc.35.1663694753532; Tue, 20 Sep 2022 10:25:53 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 29/37] target/i386: reimplement 0x0f 0xc2, 0xc4-0xc6, add AVX Date: Tue, 20 Sep 2022 19:24:59 +0200 Message-Id: <20220920172507.95568-30-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663714645808100001 Content-Type: text/plain; charset="utf-8" Nothing special going on here, for once. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/decode-new.c.inc | 5 +++ target/i386/tcg/emit.c.inc | 75 ++++++++++++++++++++++++++++++++ target/i386/tcg/translate.c | 1 + 3 files changed, 81 insertions(+) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index 798b423163..461921a98d 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -648,6 +648,11 @@ static const X86OpEntry opcodes_0F[256] =3D { [0x7e] =3D X86_OP_GROUP0(0F7E), [0x7f] =3D X86_OP_GROUP0(0F7F), =20 + [0xc2] =3D X86_OP_ENTRY4(VCMP, V,x, H,x, W,x, vex2_rep3 p_= 00_66_f3_f2), + [0xc4] =3D X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_0= 0_66), + [0xc5] =3D X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_0= 0_66), + [0xc6] =3D X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66= ), + [0xd0] =3D X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(S= SE3) p_66_f2), [0xd1] =3D X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx= 2_256 p_00_66), [0xd2] =3D X86_OP_ENTRY3(PSRLD_r, V,x, H,x, W,x, vex4 mmx avx= 2_256 p_00_66), diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index dd36a3544e..71b8fcbe24 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1342,6 +1342,11 @@ static void gen_PINSRB(DisasContext *s, CPUX86State = *env, X86DecodedInsn *decode gen_pinsr(s, env, decode, MO_8); } =20 +static void gen_PINSRW(DisasContext *s, CPUX86State *env, X86DecodedInsn *= decode) +{ + gen_pinsr(s, env, decode, MO_16); +} + static void gen_PINSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *d= ecode) { gen_pinsr(s, env, decode, decode->op[2].ot); @@ -1648,6 +1653,66 @@ static void gen_VAESIMC(DisasContext *s, CPUX86State= *env, X86DecodedInsn *decod gen_helper_aesimc_xmm(cpu_env, OP_PTR0, OP_PTR2); } =20 +/* + * 00 =3D v*ps Vps, Hps, Wpd + * 66 =3D v*pd Vpd, Hpd, Wps + * f3 =3D v*ss Vss, Hss, Wps + * f2 =3D v*sd Vsd, Hsd, Wps + */ +#define SSE_CMP(x) { \ + gen_helper_ ## x ## ps ## _xmm, gen_helper_ ## x ## pd ## _xmm, \ + gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, \ + gen_helper_ ## x ## ps ## _ymm, gen_helper_ ## x ## pd ## _ymm} +static const SSEFunc_0_eppp gen_helper_cmp_funcs[32][6] =3D { + SSE_CMP(cmpeq), + SSE_CMP(cmplt), + SSE_CMP(cmple), + SSE_CMP(cmpunord), + SSE_CMP(cmpneq), + SSE_CMP(cmpnlt), + SSE_CMP(cmpnle), + SSE_CMP(cmpord), + + SSE_CMP(cmpequ), + SSE_CMP(cmpnge), + SSE_CMP(cmpngt), + SSE_CMP(cmpfalse), + SSE_CMP(cmpnequ), + SSE_CMP(cmpge), + SSE_CMP(cmpgt), + SSE_CMP(cmptrue), + + SSE_CMP(cmpeqs), + SSE_CMP(cmpltq), + SSE_CMP(cmpleq), + SSE_CMP(cmpunords), + SSE_CMP(cmpneqq), + SSE_CMP(cmpnltq), + SSE_CMP(cmpnleq), + SSE_CMP(cmpords), + + SSE_CMP(cmpequs), + SSE_CMP(cmpngeq), + SSE_CMP(cmpngtq), + SSE_CMP(cmpfalses), + SSE_CMP(cmpnequs), + SSE_CMP(cmpgeq), + SSE_CMP(cmpgtq), + SSE_CMP(cmptrues), +}; +#undef SSE_CMP + +static void gen_VCMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) +{ + int index =3D decode->immediate & (s->prefix & PREFIX_VEX ? 31 : 7); + int b =3D + s->prefix & PREFIX_REPZ ? 2 /* ss */ : + s->prefix & PREFIX_REPNZ ? 3 /* sd */ : + !!(s->prefix & PREFIX_DATA) /* pd */ + (s->vex_l << 2); + + gen_helper_cmp_funcs[index][b](cpu_env, OP_PTR0, OP_PTR1, OP_PTR2); +} + static void gen_VCVTfp2fp(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) { gen_unary_fp_sse(s, env, decode, @@ -1793,6 +1858,16 @@ static void gen_VROUNDSS(DisasContext *s, CPUX86Stat= e *env, X86DecodedInsn *deco gen_helper_roundss_xmm(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); } =20 +static void gen_VSHUF(DisasContext *s, CPUX86State *env, X86DecodedInsn *d= ecode) +{ + TCGv_i32 imm =3D tcg_constant_i32(decode->immediate); + SSEFunc_0_pppi ps, pd, fn; + ps =3D s->vex_l ? gen_helper_shufps_ymm : gen_helper_shufps_xmm; + pd =3D s->vex_l ? gen_helper_shufpd_ymm : gen_helper_shufpd_xmm; + fn =3D s->prefix & PREFIX_DATA ? pd : ps; + fn(OP_PTR0, OP_PTR1, OP_PTR2, imm); +} + static void gen_VZEROALL(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) { TCGv_ptr ptr =3D tcg_temp_new_ptr(); diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 32f937013f..eb7a4d0e4d 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4697,6 +4697,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) if (use_new && (b =3D=3D 0x138 || b =3D=3D 0x13a || (b >=3D 0x150 && b <=3D 0x17f) || + b =3D=3D 0x1c2 || (b >=3D 0x1c4 && b <=3D 0x1c6) || (b >=3D 0x1d0 && b <=3D 0x1ff))) { disas_insn_new(s, cpu, b + 0x100); return s->pc; --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663713428; cv=none; d=zohomail.com; s=zohoarc; b=dau8LeJ8FVY9io8EMmPZGn1KQIfIvatcdzxO3DkE2s51cbUjcUCcyz34jxtft0BvCF74x2Y7YvG9NKzcnizTYvPZZGZQfEaqKYCcgWBm05rDhKbU703pETOJMemOzAv2asFUoa/hbP8vIKiKmFQrz/VtfFNEhYejBIjOSGnWCoU= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663713428; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=k/59JyXFay2hDfFk/6CL8NE8vmjr5IrSGvkbHsj6SXY=; b=mU+M4Y7O/NnxonBJjlBgwWtLALKIwGGrqRA8Pizp8EBLMRBUZGMw/iZUG6svLWBMBh5cCwF/eWZP4zzVe0w0I1GQB89Ktrf6cv7n9muI4NnfrsYxOSTHfUrO0rpNShTvEN6uNAJMz7c9LjZOxfw5mB6QMzzcq78nDo9veaumPa8= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 166371342831077.76991884176164; Tue, 20 Sep 2022 15:37:08 -0700 (PDT) Received: from localhost ([::1]:39256 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oalrW-00040b-Tb for importer@patchew.org; Tue, 20 Sep 2022 18:37:07 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:39752) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0T-0006ZZ-UC for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:01 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:40029) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0R-0002Fc-B7 for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:01 -0400 Received: from mail-ej1-f69.google.com (mail-ej1-f69.google.com [209.85.218.69]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-349-_hROJYVON2S04bNMPTGbgA-1; Tue, 20 Sep 2022 13:25:57 -0400 Received: by mail-ej1-f69.google.com with SMTP id xj11-20020a170906db0b00b0077b6ecb23fcso1771548ejb.5 for ; Tue, 20 Sep 2022 10:25:57 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id 11-20020a170906328b00b007389c5a45f0sm151433ejw.148.2022.09.20.10.25.54 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:54 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694758; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=k/59JyXFay2hDfFk/6CL8NE8vmjr5IrSGvkbHsj6SXY=; b=MqPusRuP6LXyZcAUcEHs2YfRsJt+W0dHOjfpp7/gsrdTCrfGhoHb1aAz/ZgQZPhz4Wz+0Z htbuVc09UVPMq4u8kMHxFiGGFRIPCm+rTRZz4rM0u0sEYqmggg9GWYbHP7URSaDiAa73xT LuPeY2YW3pZGSR/bljer6zfo1AIZjd8= X-MC-Unique: _hROJYVON2S04bNMPTGbgA-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=k/59JyXFay2hDfFk/6CL8NE8vmjr5IrSGvkbHsj6SXY=; b=3cWWf+pvvL/P2O8J3nIWcG8eg1QT+zn5aienlib7q6b3FKfKlGh8v8TwT1W7rJZoE9 Qf6y4fzQW8NjLGOCrdjFT5Zqng7XhlXZrrzOnxn4pK8KbUGP/mrxtFydsRRrTtjSy9zS gedKPKVKOVJrqsw3TSdPERPbuZb3fOA2S/hXHj6Gtp2AjIVWImuOPO5G47uT+WWzfxxy L+dcMnDegGSW1JcYChuL9pgT0ihBwegLCqbPpg2noHYwPaa70CHO+r6qp28uBnQH3Qa6 VxjM3dPEQD4w6+ZcbywkRzyIGeVYqucmxtL/sn/qRxzuv+AphG/bg7/TVi+fqLxZw7Xh SDQQ== X-Gm-Message-State: ACrzQf1lBbyT9WtvAjmrOacJCJjlzc5O4UOX3gIan2vJqn2ilOSpFXnD dRYIevoj/p8zRyhXuu8l8RtXfg0lEK8cqggAyom1xqbdnf5TP2UxlzG/TYM3MtUXWhYzH4pmG+2 ji50eoZdCOK6pMJOEehuejkwMOGn5YVNqBIxL8xHNJTzzyBG9/1iRiZIQXn/Wda5rNB8= X-Received: by 2002:a17:906:9bd4:b0:770:4efb:acbe with SMTP id de20-20020a1709069bd400b007704efbacbemr17959503ejc.436.1663694755503; Tue, 20 Sep 2022 10:25:55 -0700 (PDT) X-Google-Smtp-Source: AMsMyM6x1IBVn194IIxDXfwn2y5nMIMnlF2fD/n7EVUul9YLMmp3s7bdGLdtGs4pihWfDKHcZtjcsg== X-Received: by 2002:a17:906:9bd4:b0:770:4efb:acbe with SMTP id de20-20020a1709069bd400b007704efbacbemr17959468ejc.436.1663694755025; Tue, 20 Sep 2022 10:25:55 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 30/37] target/i386: reimplement 0x0f 0x10-0x17, add AVX Date: Tue, 20 Sep 2022 19:25:00 +0200 Message-Id: <20220920172507.95568-31-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663713430061100001 Content-Type: text/plain; charset="utf-8" These are mostly moves, and yet are a total pain. The main issue is that: 1) some instructions are selected by mod=3D=3D11 (register operand) vs. mod=3D00/01/10 (memory operand) 2) stores to memory are two-operand operations, while the 3-register and load-from-memory versions operate on the entire contents of the destination; this makes it easier to separate the gen_* function for the store case 3) it's inefficient to load into xmm_T0 only to move the value out again, so the gen_* function for the load case is separated too The manual also has various mistakes in the operands here, for example the store case of MOVHPS operates on a 128-bit source (albeit discarding the bottom 64 bits) and therefore should be Mq,Vdq rather than Mq,Vq. Likewise for the destination and source of MOVHLPS. VUNPCK?PS and VUNPCK?PD are the same as VUNPCK?DQ and VUNPCK?QDQ, but encoded as prefixes rather than separate operands. The helpers can be reused however. For MOVSLDUP, MOVSHDUP and MOVDDUP I chose to reimplement them as helpers. I named the helper for MOVDDUP "movdldup" in preparation for possible future introduction of MOVDHDUP and to clarify the similarity with MOVSLDUP. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/ops_sse.h | 7 ++ target/i386/ops_sse_header.h | 3 + target/i386/tcg/decode-new.c.inc | 126 ++++++++++++++++++++++++++++++ target/i386/tcg/emit.c.inc | 127 +++++++++++++++++++++++++++++++ target/i386/tcg/translate.c | 1 + 5 files changed, 264 insertions(+) diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 104a53fda0..43b32edbfc 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -1683,6 +1683,10 @@ void glue(helper_ptest, SUFFIX)(CPUX86State *env, Re= g *d, Reg *s) CC_SRC =3D (zf ? 0 : CC_Z) | (cf ? 0 : CC_C); } =20 +#define FMOVSLDUP(i) s->L((i) & ~1) +#define FMOVSHDUP(i) s->L((i) | 1) +#define FMOVDLDUP(i) s->Q((i) & ~1) + #define SSE_HELPER_F(name, elem, num, F) \ void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \ { \ @@ -1705,6 +1709,9 @@ SSE_HELPER_F(helper_pmovzxbq, Q, 1 << SHIFT, s->B) SSE_HELPER_F(helper_pmovzxwd, L, 2 << SHIFT, s->W) SSE_HELPER_F(helper_pmovzxwq, Q, 1 << SHIFT, s->W) SSE_HELPER_F(helper_pmovzxdq, Q, 1 << SHIFT, s->L) +SSE_HELPER_F(helper_pmovsldup, L, 2 << SHIFT, FMOVSLDUP) +SSE_HELPER_F(helper_pmovshdup, L, 2 << SHIFT, FMOVSHDUP) +SSE_HELPER_F(helper_pmovdldup, Q, 1 << SHIFT, FMOVDLDUP) #endif =20 void glue(helper_pmuldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index dd8dcebc23..00de6d69f1 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -355,6 +355,9 @@ DEF_HELPER_3(glue(pmovzxbq, SUFFIX), void, env, Reg, Re= g) DEF_HELPER_3(glue(pmovzxwd, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(pmovzxwq, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(pmovzxdq, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovsldup, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovshdup, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovdldup, SUFFIX), void, env, Reg, Reg) DEF_HELPER_4(glue(pmuldq, SUFFIX), void, env, Reg, Reg, Reg) DEF_HELPER_4(glue(pcmpeqq, SUFFIX), void, env, Reg, Reg, Reg) DEF_HELPER_4(glue(packusdw, SUFFIX), void, env, Reg, Reg, Reg) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index 461921a98d..e0ddddcd9e 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -556,6 +556,122 @@ static void decode_0F3A(DisasContext *s, CPUX86State = *env, X86OpEntry *entry, ui *entry =3D opcodes_0F3A[*b]; } =20 +/* + * There are some mistakes in the operands in the manual, and the load/sto= re/register + * cases are easiest to keep separate, so the entries for 10-17 follow sim= plicity and + * efficiency of implementation rather than copying what the manual says. + * + * In particular: + * + * 1) "VMOVSS m32, xmm1" and "VMOVSD m64, xmm1" do not support VEX.vvvv != =3D 1111b, + * but this is not mentioned in the tables. + * + * 2) MOVHLPS, MOVHPS, MOVHPD, MOVLPD, MOVLPS read the high quadword of on= e of their + * operands, which must therefore be dq; MOVLPD and MOVLPS also write the = high + * quadword of the V operand. + */ +static void decode_0F10(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F10_reg[4] =3D { + X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPS= */ + X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPD= */ + X86_OP_ENTRY3(VMOVSS, V,x, H,x, W,x, vex4), + X86_OP_ENTRY3(VMOVLPx, V,x, H,x, W,x, vex4), /* MOVSD */ + }; + + static const X86OpEntry opcodes_0F10_mem[4] =3D { + X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MO= VUPS */ + X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MO= VUPD */ + X86_OP_ENTRY3(VMOVSS_ld, V,x, H,x, M,ss, vex4), + X86_OP_ENTRY3(VMOVSD_ld, V,x, H,x, M,sd, vex4), + }; + + if ((get_modrm(s, env) >> 6) =3D=3D 3) { + *entry =3D *decode_by_prefix(s, opcodes_0F10_reg); + } else { + *entry =3D *decode_by_prefix(s, opcodes_0F10_mem); + } +} + +static void decode_0F11(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F11_reg[4] =3D { + X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVPS */ + X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVPD */ + X86_OP_ENTRY3(VMOVSS, W,x, H,x, V,x, vex4), + X86_OP_ENTRY3(VMOVLPx, W,x, H,x, V,q, vex4), /* MOVSD */ + }; + + static const X86OpEntry opcodes_0F11_mem[4] =3D { + X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVPS */ + X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVPD */ + X86_OP_ENTRY3(VMOVSS_st, M,ss, None,None, V,x, vex4), + X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4), /* MOVSD */ + }; + + if ((get_modrm(s, env) >> 6) =3D=3D 3) { + *entry =3D *decode_by_prefix(s, opcodes_0F11_reg); + } else { + *entry =3D *decode_by_prefix(s, opcodes_0F11_mem); + } +} + +static void decode_0F12(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F12_mem[4] =3D { + /* + * Use dq for operand for compatibility with gen_MOVSD and + * to allow VEX128 only. + */ + X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq, M,q, vex4), /* MOVLPS */ + X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq, M,q, vex4), /* MOVLPD */ + X86_OP_ENTRY3(VMOVSLDUP, V,x, None,None, W,x, vex4 cpuid(SSE3)), + X86_OP_ENTRY3(VMOVDDUP, V,x, None,None, WM,q, vex4 cpuid(SSE3))= , /* qq if VEX.256 */ + }; + static const X86OpEntry opcodes_0F12_reg[4] =3D { + X86_OP_ENTRY3(VMOVHLPS, V,dq, H,dq, U,dq, vex4), + X86_OP_ENTRY3(VMOVLPx, W,x, H,x, U,q, vex4), /* MOVLPD = */ + X86_OP_ENTRY3(VMOVSLDUP, V,x, None,None, U,x, vex4 cpuid(SSE3)), + X86_OP_ENTRY3(VMOVDDUP, V,x, None,None, U,x, vex4 cpuid(SSE3)), + }; + + if ((get_modrm(s, env) >> 6) =3D=3D 3) { + *entry =3D *decode_by_prefix(s, opcodes_0F12_reg); + } else { + *entry =3D *decode_by_prefix(s, opcodes_0F12_mem); + if ((s->prefix & PREFIX_REPNZ) && s->vex_l) { + entry->s2 =3D X86_SIZE_qq; + } + } +} + +static void decode_0F16(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F16_mem[4] =3D { + /* + * Operand 1 technically only reads the low 64 bits, but uses dq s= o that + * it is easier to check for op0 =3D=3D op1 in an endianness-neutr= al manner. + */ + X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq, M,q, vex4), /* MOVHPS */ + X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq, M,q, vex4), /* MOVHPD */ + X86_OP_ENTRY3(VMOVSHDUP, V,x, None,None, W,x, vex4 cpuid(SSE3)), + {}, + }; + static const X86OpEntry opcodes_0F16_reg[4] =3D { + /* Same as above, operand 1 could be Hq if it wasn't for big-endia= n. */ + X86_OP_ENTRY3(VMOVLHPS, V,dq, H,dq, U,q, vex4), + X86_OP_ENTRY3(VMOVHPx, V,x, H,x, U,x, vex4), /* MOVHPD */ + X86_OP_ENTRY3(VMOVSHDUP, V,x, None,None, U,x, vex4 cpuid(SSE3)), + {}, + }; + + if ((get_modrm(s, env) >> 6) =3D=3D 3) { + *entry =3D *decode_by_prefix(s, opcodes_0F16_reg); + } else { + *entry =3D *decode_by_prefix(s, opcodes_0F16_mem); + } +} + static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry= *entry, uint8_t *b) { if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) { @@ -593,6 +709,16 @@ static void decode_0FE6(DisasContext *s, CPUX86State *= env, X86OpEntry *entry, ui } =20 static const X86OpEntry opcodes_0F[256] =3D { + [0x10] =3D X86_OP_GROUP0(0F10), + [0x11] =3D X86_OP_GROUP0(0F11), + [0x12] =3D X86_OP_GROUP0(0F12), + [0x13] =3D X86_OP_ENTRY3(VMOVLPx_st, M,q, None,None, V,q, vex4 p_00_= 66), + [0x14] =3D X86_OP_ENTRY3(VUNPCKLPx, V,x, H,x, W,x, vex4 p_00_= 66), + [0x15] =3D X86_OP_ENTRY3(VUNPCKHPx, V,x, H,x, W,x, vex4 p_00_= 66), + [0x16] =3D X86_OP_GROUP0(0F16), + /* Incorrectly listed as Mq,Vq in the manual */ + [0x17] =3D X86_OP_ENTRY3(VMOVHPx_st, M,q, None,None, V,dq, vex4 p_00_= 66), + [0x50] =3D X86_OP_ENTRY3(MOVMSK, G,y, None,None, U,x, vex7 p_00_66= ), [0x51] =3D X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_= f3_f2), [0x52] =3D X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3), diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 71b8fcbe24..381fdf0ae6 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -394,6 +394,7 @@ static inline void gen_fp_sse(DisasContext *s, CPUX86St= ate *env, X86DecodedInsn gen_illegal_opcode(s); } } + #define FP_SSE(uname, lname) = \ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ { = \ @@ -412,6 +413,20 @@ FP_SSE(VMIN, min) FP_SSE(VDIV, div) FP_SSE(VMAX, max) =20 +#define FP_UNPACK_SSE(uname, lname) = \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) \ +{ = \ + /* PS maps to the DQ integer instruction, PD maps to QDQ. */ = \ + gen_fp_sse(s, env, decode, = \ + gen_helper_##lname##qdq_xmm, = \ + gen_helper_##lname##dq_xmm, = \ + gen_helper_##lname##qdq_ymm, = \ + gen_helper_##lname##dq_ymm, = \ + NULL, NULL); = \ +} +FP_UNPACK_SSE(VUNPCKLPx, punpckl) +FP_UNPACK_SSE(VUNPCKHPx, punpckh) + /* * 00 =3D v*ps Vps, Wpd * f3 =3D v*ss Vss, Wps @@ -749,6 +764,10 @@ UNARY_INT_SSE(VPMOVZXWD, pmovzxwd) UNARY_INT_SSE(VPMOVZXWQ, pmovzxwq) UNARY_INT_SSE(VPMOVZXDQ, pmovzxdq) =20 +UNARY_INT_SSE(VMOVSLDUP, pmovsldup) +UNARY_INT_SSE(VMOVSHDUP, pmovshdup) +UNARY_INT_SSE(VMOVDDUP, pmovdldup) + UNARY_INT_SSE(VCVTDQ2PD, cvtdq2pd) UNARY_INT_SSE(VCVTPD2DQ, cvtpd2dq) UNARY_INT_SSE(VCVTTPD2DQ, cvttpd2dq) @@ -1816,6 +1835,114 @@ static void gen_VMASKMOVPS_st(DisasContext *s, CPUX= 86State *env, X86DecodedInsn gen_maskmov(s, env, decode, gen_helper_vpmaskmovd_st_xmm, gen_helper_v= pmaskmovd_st_ymm); } =20 +static void gen_VMOVHPx_ld(DisasContext *s, CPUX86State *env, X86DecodedIn= sn *decode) +{ + gen_ldq_env_A0(s, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); + if (decode->op[0].offset !=3D decode->op[1].offset) { + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[1].offset + offset= of(XMMReg, XMM_Q(0))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + offset= of(XMMReg, XMM_Q(0))); + } +} + +static void gen_VMOVHPx_st(DisasContext *s, CPUX86State *env, X86DecodedIn= sn *decode) +{ + gen_stq_env_A0(s, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1))); +} + +static void gen_VMOVHPx(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + if (decode->op[0].offset !=3D decode->op[2].offset) { + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[2].offset + offset= of(XMMReg, XMM_Q(1))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + offset= of(XMMReg, XMM_Q(1))); + } + if (decode->op[0].offset !=3D decode->op[1].offset) { + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[1].offset + offset= of(XMMReg, XMM_Q(0))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + offset= of(XMMReg, XMM_Q(0))); + } +} + +static void gen_VMOVHLPS(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) +{ + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[2].offset + offsetof(X= MMReg, XMM_Q(1))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + offsetof(X= MMReg, XMM_Q(0))); + if (decode->op[0].offset !=3D decode->op[1].offset) { + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[1].offset + offset= of(XMMReg, XMM_Q(1))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + offset= of(XMMReg, XMM_Q(1))); + } +} + +static void gen_VMOVLHPS(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) +{ + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[2].offset); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + offsetof(X= MMReg, XMM_Q(1))); + if (decode->op[0].offset !=3D decode->op[1].offset) { + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[1].offset + offset= of(XMMReg, XMM_Q(0))); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + offset= of(XMMReg, XMM_Q(0))); + } +} + +/* + * Note that MOVLPx supports 256-bit operation unlike MOVHLPx, MOVLHPx, MO= XHPx. + * Use a gvec move to move everything above the bottom 64 bits. + */ + +static void gen_VMOVLPx(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + int vec_len =3D vector_len(s, decode); + + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[2].offset + offsetof(X= MMReg, XMM_Q(0))); + tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, ve= c_len, vec_len); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + offsetof(X= MMReg, XMM_Q(0))); +} + +static void gen_VMOVLPx_ld(DisasContext *s, CPUX86State *env, X86DecodedIn= sn *decode) +{ + int vec_len =3D vector_len(s, decode); + + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); + tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, ve= c_len, vec_len); + tcg_gen_st_i64(s->tmp1_i64, OP_PTR0, offsetof(ZMMReg, ZMM_Q(0))); +} + +static void gen_VMOVLPx_st(DisasContext *s, CPUX86State *env, X86DecodedIn= sn *decode) +{ + tcg_gen_ld_i64(s->tmp1_i64, OP_PTR2, offsetof(ZMMReg, ZMM_Q(0))); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); +} + +static void gen_VMOVSD_ld(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) +{ + TCGv zero =3D tcg_constant_i64(0); + + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); + tcg_gen_st_i64(zero, OP_PTR0, offsetof(ZMMReg, ZMM_Q(1))); + tcg_gen_st_i64(s->tmp1_i64, OP_PTR0, offsetof(ZMMReg, ZMM_Q(0))); +} + +static void gen_VMOVSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *= decode) +{ + int vec_len =3D vector_len(s, decode); + + tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, ve= c_len, vec_len); + tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); +} + +static void gen_VMOVSS_ld(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) +{ + int vec_len =3D vector_len(s, decode); + + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); + tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); +} + +static void gen_VMOVSS_st(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) +{ + tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); +} + static void gen_VPMASKMOV_st(DisasContext *s, CPUX86State *env, X86Decoded= Insn *decode) { if (s->rex_w) { diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index eb7a4d0e4d..f61be8f113 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4696,6 +4696,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) #endif if (use_new && (b =3D=3D 0x138 || b =3D=3D 0x13a || + (b >=3D 0x110 && b <=3D 0x117) || (b >=3D 0x150 && b <=3D 0x17f) || b =3D=3D 0x1c2 || (b >=3D 0x1c4 && b <=3D 0x1c6) || (b >=3D 0x1d0 && b <=3D 0x1ff))) { --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663714599; cv=none; d=zohomail.com; s=zohoarc; b=nAj24xsh90qwOJUjmX3u60iqYWqHj5nuWK1w5+dbHIw2iaPXq1KJP2CDt73OI87jrRjkEVvcNKHV7KUQ0uuBwFO7N8EKYVCPjRlNobFK51XNSwhRh2AxxsOL2i39900PkDmlcb9La8YJR9a0r4EQrKYlQ4Eu72oeMpMlKZjv9xs= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663714599; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=CHKtesGkXNXdb6UvRXh8n0fViR0wjlvGrZ8KfUqe+ZQ=; b=OAmrrVMR/wIhTe80gDOFIpzc0j1BxYzLOP+smYrywk8tjhpZeAukdiwnDbQSmeZ5eURqNsOqnmqrK91woRd8kI71FNFYSWHNE9PJ71kMUbFZcUvHA1nVbHUGMgJxwMM5rXPUQYnw6cAXv+Al+V0pG/Begu6mj7FzYD5uWW/VuGw= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663714599093384.3027847847636; Tue, 20 Sep 2022 15:56:39 -0700 (PDT) Received: from localhost ([::1]:36676 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oamAP-0003ab-S1 for importer@patchew.org; Tue, 20 Sep 2022 18:56:37 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58722) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0a-0006jk-5M for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:08 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:21560) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0R-0002Fg-Q8 for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:06 -0400 Received: from mail-ej1-f71.google.com (mail-ej1-f71.google.com [209.85.218.71]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-593-J6ucxOILOh62azQREAEOIg-1; Tue, 20 Sep 2022 13:25:58 -0400 Received: by mail-ej1-f71.google.com with SMTP id ga33-20020a1709070c2100b00781bd47a18eso1513507ejc.12 for ; Tue, 20 Sep 2022 10:25:57 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id 1-20020a170906210100b0073dd8e5a39fsm134696ejt.156.2022.09.20.10.25.55 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:55 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694759; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=CHKtesGkXNXdb6UvRXh8n0fViR0wjlvGrZ8KfUqe+ZQ=; b=LpEoPw4jAJzwNqyozkcxoBhZuOWH/TexU5K0IMhUdZdRLAonvapc3IFZtMWt50UJFG8J8M tWOAZnZnjA9K0ikrkwFRucCnDFsPb3ZjmkmhOeROnRqhw/Tj2fPiz4mdYrmPpSsHm1N3Cm qERPWcbWy6QI3khuAMCKFvRxvzVA8D4= X-MC-Unique: J6ucxOILOh62azQREAEOIg-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=CHKtesGkXNXdb6UvRXh8n0fViR0wjlvGrZ8KfUqe+ZQ=; b=mWvH8Y6tYN5dt9lAeLHNdQedeatqQA+HIiyS76+zPAVP644KVCIXUeaMT6cDfVn4VK vNaKwRDg/OumE+xdUuh+ulu6h3joALHkGArALHYF5qdkyapw2xK65d22UcAyRUAuZPaR uaWXUm4kHILFEwByyUwOY7Gr3KXnYX+1U7RE+7hpILO33QjJm4YW0KQ7YQMsS873lxII 3hOvC4Vy9Z1DYEBFDtwGUnDfN+pcvt+dsGW/8UKT7zR+Mb5YcZK1P4akgHV2i1GjhBI/ KbNrBEkP75+AeCgBAkei9+PUCE6VfL8ziuVWotDsjdk99E9uJEehdYT9CvByl/3y6Kxy LJ7A== X-Gm-Message-State: ACrzQf0RhUM2dBzCxXSrrPJzj0U6/FFZV4dBxcmM8RsylKRfwbQRi5Pq MsSkMRkBLDr3Dq04gNzX2RVldOgHXDhI3/hNY+73NzOZm8PA9kuJ0xf9w91WdRYGs5lc2p5UQ8q 40AF2mSJWR6BoXMWqxezCKzjBPdXAy5jD0UmsTDgG2SbccJWA73M7YFqnY8jgS3AtrhM= X-Received: by 2002:a17:907:a079:b0:77d:a363:64b6 with SMTP id ia25-20020a170907a07900b0077da36364b6mr17611162ejc.451.1663694756436; Tue, 20 Sep 2022 10:25:56 -0700 (PDT) X-Google-Smtp-Source: AMsMyM4clpJN031WcBU8njcr5KFx7i0qXmvPwAFiedwJMC9zclSdYnADX1pUUK9X252FAdmp8nevcQ== X-Received: by 2002:a17:907:a079:b0:77d:a363:64b6 with SMTP id ia25-20020a170907a07900b0077da36364b6mr17611145ejc.451.1663694756064; Tue, 20 Sep 2022 10:25:56 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 31/37] target/i386: reimplement 0x0f 0x28-0x2f, add AVX Date: Tue, 20 Sep 2022 19:25:01 +0200 Message-Id: <20220920172507.95568-32-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663714599728100001 Content-Type: text/plain; charset="utf-8" Here the code is a bit uglier due to the truncation and extension of registers to and from 32-bit. There is also a mistake in the manual with respect to the size of the memory operand of CVTPS2PI and CVTTPS2PI, reported by Ricky Zhou. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/tcg/decode-new.c.inc | 56 +++++++++++++++ target/i386/tcg/emit.c.inc | 120 +++++++++++++++++++++++++++++++ target/i386/tcg/translate.c | 1 + 3 files changed, 177 insertions(+) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index e0ddddcd9e..63eb66ccc4 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -672,6 +672,53 @@ static void decode_0F16(DisasContext *s, CPUX86State *= env, X86OpEntry *entry, ui } } =20 +static void decode_0F2A(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F2A[4] =3D { + X86_OP_ENTRY3(CVTPI2Px, V,x, None,None, Q,q), + X86_OP_ENTRY3(CVTPI2Px, V,x, None,None, Q,q), + X86_OP_ENTRY3(VCVTSI2Sx, V,x, H,x, E,y, vex3), + X86_OP_ENTRY3(VCVTSI2Sx, V,x, H,x, E,y, vex3), + }; + *entry =3D *decode_by_prefix(s, opcodes_0F2A); +} + +static void decode_0F2B(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F2B[4] =3D { + X86_OP_ENTRY3(MOVDQ, M,x, None,None, V,x, vex4), /* MOVNTPS = */ + X86_OP_ENTRY3(MOVDQ, M,x, None,None, V,x, vex4), /* MOVNTPD = */ + X86_OP_ENTRY3(VMOVSS_st, M,ss, None,None, V,x, vex4 cpuid(SSE4A))= , /* MOVNTSS */ + X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4 cpuid(SSE4A))= , /* MOVNTSD */ + }; + + *entry =3D *decode_by_prefix(s, opcodes_0F2B); +} + +static void decode_0F2C(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F2C[4] =3D { + /* Listed as ps/pd in the manual, but CVTTPS2PI only reads 64-bit.= */ + X86_OP_ENTRY3(CVTTPx2PI, P,q, None,None, W,q), + X86_OP_ENTRY3(CVTTPx2PI, P,q, None,None, W,dq), + X86_OP_ENTRY3(VCVTTSx2SI, G,y, None,None, W,ss, vex3), + X86_OP_ENTRY3(VCVTTSx2SI, G,y, None,None, W,sd, vex3), + }; + *entry =3D *decode_by_prefix(s, opcodes_0F2C); +} + +static void decode_0F2D(DisasContext *s, CPUX86State *env, X86OpEntry *ent= ry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F2D[4] =3D { + /* Listed as ps/pd in the manual, but CVTPS2PI only reads 64-bit. = */ + X86_OP_ENTRY3(CVTPx2PI, P,q, None,None, W,q), + X86_OP_ENTRY3(CVTPx2PI, P,q, None,None, W,dq), + X86_OP_ENTRY3(VCVTSx2SI, G,y, None,None, W,ss, vex3), + X86_OP_ENTRY3(VCVTSx2SI, G,y, None,None, W,sd, vex3), + }; + *entry =3D *decode_by_prefix(s, opcodes_0F2D); +} + static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry= *entry, uint8_t *b) { if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) { @@ -746,6 +793,15 @@ static const X86OpEntry opcodes_0F[256] =3D { [0x76] =3D X86_OP_ENTRY3(PCMPEQD, V,x, H,x, W,x, vex4 mmx avx2_256= p_00_66), [0x77] =3D X86_OP_GROUP0(0F77), =20 + [0x28] =3D X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex1 p_00_6= 6), /* MOVAPS */ + [0x29] =3D X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 p_00_6= 6), /* MOVAPS */ + [0x2A] =3D X86_OP_GROUP0(0F2A), + [0x2B] =3D X86_OP_GROUP0(0F2B), + [0x2C] =3D X86_OP_GROUP0(0F2C), + [0x2D] =3D X86_OP_GROUP0(0F2D), + [0x2E] =3D X86_OP_ENTRY3(VUCOMI, None,None, V,x, W,x, vex4 p_00_6= 6), + [0x2F] =3D X86_OP_ENTRY3(VCOMI, None,None, V,x, W,x, vex4 p_00_6= 6), + [0x38] =3D X86_OP_GROUP0(0F38), [0x3a] =3D X86_OP_GROUP0(0F3A), =20 diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 381fdf0ae6..6e391e3598 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1038,6 +1038,36 @@ static void gen_CRC32(DisasContext *s, CPUX86State *= env, X86DecodedInsn *decode) gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot)); } =20 +static void gen_CVTPI2Px(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) +{ + gen_helper_enter_mmx(cpu_env); + if (s->prefix & PREFIX_DATA) { + gen_helper_cvtpi2pd(cpu_env, OP_PTR0, OP_PTR2); + } else { + gen_helper_cvtpi2ps(cpu_env, OP_PTR0, OP_PTR2); + } +} + +static void gen_CVTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) +{ + gen_helper_enter_mmx(cpu_env); + if (s->prefix & PREFIX_DATA) { + gen_helper_cvtpd2pi(cpu_env, OP_PTR0, OP_PTR2); + } else { + gen_helper_cvtps2pi(cpu_env, OP_PTR0, OP_PTR2); + } +} + +static void gen_CVTTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) +{ + gen_helper_enter_mmx(cpu_env); + if (s->prefix & PREFIX_DATA) { + gen_helper_cvttpd2pi(cpu_env, OP_PTR0, OP_PTR2); + } else { + gen_helper_cvttps2pi(cpu_env, OP_PTR0, OP_PTR2); + } +} + static void gen_EMMS(DisasContext *s, CPUX86State *env, X86DecodedInsn *de= code) { gen_helper_emms(cpu_env); @@ -1732,6 +1762,14 @@ static void gen_VCMP(DisasContext *s, CPUX86State *e= nv, X86DecodedInsn *decode) gen_helper_cmp_funcs[index][b](cpu_env, OP_PTR0, OP_PTR1, OP_PTR2); } =20 +static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *d= ecode) +{ + SSEFunc_0_epp fn; + fn =3D s->prefix & PREFIX_DATA ? gen_helper_comisd : gen_helper_comiss; + fn(cpu_env, OP_PTR1, OP_PTR2); + set_cc_op(s, CC_OP_EFLAGS); +} + static void gen_VCVTfp2fp(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) { gen_unary_fp_sse(s, env, decode, @@ -1740,6 +1778,80 @@ static void gen_VCVTfp2fp(DisasContext *s, CPUX86Sta= te *env, X86DecodedInsn *dec gen_helper_cvtsd2ss, gen_helper_cvtss2sd); } =20 +static void gen_VCVTSI2Sx(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) +{ + int vec_len =3D vector_len(s, decode); + MemOp ot =3D decode->op[2].ot; + TCGv_i32 in; + + tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, ve= c_len, vec_len); +#ifdef TARGET_X86_64 + if (ot =3D=3D MO_64) { + if (s->prefix & PREFIX_REPNZ) { + gen_helper_cvtsq2sd(cpu_env, OP_PTR0, s->T1); + } else { + gen_helper_cvtsq2ss(cpu_env, OP_PTR0, s->T1); + } + return; + } + in =3D s->tmp2_i32; + tcg_gen_trunc_tl_i32(in, s->T1); +#else + in =3D s->T1; +#endif + + if (s->prefix & PREFIX_REPNZ) { + gen_helper_cvtsi2sd(cpu_env, OP_PTR0, in); + } else { + gen_helper_cvtsi2ss(cpu_env, OP_PTR0, in); + } +} + +static inline void gen_VCVTtSx2SI(DisasContext *s, CPUX86State *env, X86De= codedInsn *decode, + SSEFunc_i_ep ss2si, SSEFunc_l_ep ss2sq, + SSEFunc_i_ep sd2si, SSEFunc_l_ep sd2sq) +{ + MemOp ot =3D decode->op[0].ot; + TCGv_i32 out; + +#ifdef TARGET_X86_64 + if (ot =3D=3D MO_64) { + if (s->prefix & PREFIX_REPNZ) { + sd2sq(s->T0, cpu_env, OP_PTR2); + } else { + ss2sq(s->T0, cpu_env, OP_PTR2); + } + return; + } + + out =3D s->tmp2_i32; +#else + out =3D s->T0; +#endif + if (s->prefix & PREFIX_REPNZ) { + sd2si(out, cpu_env, OP_PTR2); + } else { + ss2si(out, cpu_env, OP_PTR2); + } +#ifdef TARGET_X86_64 + tcg_gen_extu_i32_tl(s->T0, out); +#endif +} + +static void gen_VCVTSx2SI(DisasContext *s, CPUX86State *env, X86DecodedIns= n *decode) +{ + gen_VCVTtSx2SI(s, env, decode, + gen_helper_cvtss2si, gen_helper_cvtss2sq, + gen_helper_cvtsd2si, gen_helper_cvtsd2sq); +} + +static void gen_VCVTTSx2SI(DisasContext *s, CPUX86State *env, X86DecodedIn= sn *decode) +{ + gen_VCVTtSx2SI(s, env, decode, + gen_helper_cvttss2si, gen_helper_cvttss2sq, + gen_helper_cvttsd2si, gen_helper_cvttsd2sq); +} + static void gen_VEXTRACTx128(DisasContext *s, CPUX86State *env, X86Decoded= Insn *decode) { int mask =3D decode->immediate & 1; @@ -1995,6 +2107,14 @@ static void gen_VSHUF(DisasContext *s, CPUX86State *= env, X86DecodedInsn *decode) fn(OP_PTR0, OP_PTR1, OP_PTR2, imm); } =20 +static void gen_VUCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *= decode) +{ + SSEFunc_0_epp fn; + fn =3D s->prefix & PREFIX_DATA ? gen_helper_ucomisd : gen_helper_ucomi= ss; + fn(cpu_env, OP_PTR1, OP_PTR2); + set_cc_op(s, CC_OP_EFLAGS); +} + static void gen_VZEROALL(DisasContext *s, CPUX86State *env, X86DecodedInsn= *decode) { TCGv_ptr ptr =3D tcg_temp_new_ptr(); diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index f61be8f113..2d93e52ae5 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4697,6 +4697,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) if (use_new && (b =3D=3D 0x138 || b =3D=3D 0x13a || (b >=3D 0x110 && b <=3D 0x117) || + (b >=3D 0x128 && b <=3D 0x12f) || (b >=3D 0x150 && b <=3D 0x17f) || b =3D=3D 0x1c2 || (b >=3D 0x1c4 && b <=3D 0x1c6) || (b >=3D 0x1d0 && b <=3D 0x1ff))) { --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663711047; cv=none; d=zohomail.com; s=zohoarc; b=bGtFcrRgLTLIZDYlSmQNswYrthFJ/avt6nNTkvd8oikQKUp+Sqm2lgWSCSDO+5RgtW9/z2U5mcnnHIjX9xEl2SEBBrPWZwEZ/dD90kEXXm6H81TqFhKIKr5tUPn6cdj2dmkf/saXxa5ex6l3rBOY7L2Ge+VAj4pCRvZcMIQBUkM= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663711047; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=lYlCi4izgJ/t/0gMaR1REf/PoK52S5TGaB1ccYyMtxI=; b=doUjPFHGdeqRxLm/oE618Qwz5zwEe60wy3NVHhrXu0Msipz72sMJVAU8gyuIKymfJG2CUs2zwia72d7a0wPvF1oyr+D667pemqA80AsDiYRWEq5sB0QMxkIAV7vqXNUg8dGJjxrk91H7X9QKJ1yWa8M3gSvaqeLBL9IzkJqxLAc= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663711047805325.29433159970586; Tue, 20 Sep 2022 14:57:27 -0700 (PDT) Received: from localhost ([::1]:45684 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oalF8-0003k4-P1 for importer@patchew.org; Tue, 20 Sep 2022 17:57:26 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:42598) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0i-0006z1-Ny for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:16 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:38659) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0S-0002GD-Ti for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:16 -0400 Received: from mail-ej1-f71.google.com (mail-ej1-f71.google.com [209.85.218.71]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-466-m8nkar4QNtGv6yeNI4nt4A-1; Tue, 20 Sep 2022 13:25:59 -0400 Received: by mail-ej1-f71.google.com with SMTP id jg32-20020a170907972000b0077ce313a8f0so1781959ejc.15 for ; Tue, 20 Sep 2022 10:25:58 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id v3-20020a1709061dc300b007336c3f05bdsm135812ejh.178.2022.09.20.10.25.56 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:56 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694760; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=lYlCi4izgJ/t/0gMaR1REf/PoK52S5TGaB1ccYyMtxI=; b=V+RZb3BkS8S3Jd7ef/nwG1cFrrgxE2dE2xi05+oWvvGM+GfGL+rBO8W7YWM0lsHXqF1N20 Uouv3SvADsFfnBFTXdt/nrGbbjAlTEhaiIYnfFvSwr+HR71Y1cAI+tfcgGz4V0mXUe8Z6l +AQIAVn1ISiWarIADKPIlg1iiYhKzk8= X-MC-Unique: m8nkar4QNtGv6yeNI4nt4A-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=lYlCi4izgJ/t/0gMaR1REf/PoK52S5TGaB1ccYyMtxI=; b=5V57vGl53DxmS/xS6GL8biZcI74KqC/cqhlM6o2/kBE1r7Gu0QeSgWzP4F6dbCy8+g Fu03xp41Wn6/e4IcPNYsO7iKiW5oePOzynDa3NS/JUxecYNTgcYsjKXel975xwvRWW+w XK4WxwjqCxYCdVxtxs4L/lktgUJacSm5naja78dGc9aPZR7gdCPFuVzb0JdLVZ0UJ4TG ORCX9wcMazLLDo4t+3pdbgDWp6WZGzbK/Mht1rD55whFQY8vuq33UkX02Un1JqINq/5Z 99/wbl6dW2k3JuWPUIzbRdwTb4pgXrvjD/cQhpFF07R++QGkps73va9BwfwSzdjpEpoH oMPw== X-Gm-Message-State: ACrzQf15Ei1R+seXHbngWKvY0eL+KTX1iqHowEpInkWfEaXiRK+TplOL hkHBUZ4BfpV5H0l3R7o4fua1UOZQbg/sCVtR51yVnG/nwoFajgS3ird0NIInr7eQ4Daw94qNQPV 9Gj1W2VDt9yrK0m9W9O6ORBt0VGL6vgOvMDZ0I4P9H4zBa9eHn7geCEB62PT5c86ma0c= X-Received: by 2002:a17:906:5d04:b0:77f:ca9f:33d1 with SMTP id g4-20020a1709065d0400b0077fca9f33d1mr18304917ejt.526.1663694757694; Tue, 20 Sep 2022 10:25:57 -0700 (PDT) X-Google-Smtp-Source: AMsMyM4nIfs0H61xsoAPFs3W1BDsrzZfyYQJy5cH2gFMfc3Gx6a6HKEQ6rlSaJqEq0PHU7oXYSVCEQ== X-Received: by 2002:a17:906:5d04:b0:77f:ca9f:33d1 with SMTP id g4-20020a1709065d0400b0077fca9f33d1mr18304895ejt.526.1663694757376; Tue, 20 Sep 2022 10:25:57 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 32/37] target/i386: implement XSAVE and XRSTOR of AVX registers Date: Tue, 20 Sep 2022 19:25:02 +0200 Message-Id: <20220920172507.95568-33-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -20 X-Spam_score: -2.1 X-Spam_bar: -- X-Spam_report: (-2.1 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663711048577100001 Content-Type: text/plain; charset="utf-8" Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/tcg/fpu_helper.c | 78 ++++++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 3 deletions(-) diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 819e920ec6..c1e3d74c84 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -2571,6 +2571,22 @@ static void do_xsave_sse(CPUX86State *env, target_ul= ong ptr, uintptr_t ra) } } =20 +static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + int i, nb_xmm_regs; + + if (env->hflags & HF_CS64_MASK) { + nb_xmm_regs =3D 16; + } else { + nb_xmm_regs =3D 8; + } + + for (i =3D 0; i < nb_xmm_regs; i++, ptr +=3D 16) { + cpu_stq_data_ra(env, ptr, env->xmm_regs[i].ZMM_Q(2), ra); + cpu_stq_data_ra(env, ptr + 8, env->xmm_regs[i].ZMM_Q(3), ra); + } +} + static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t= ra) { target_ulong addr =3D ptr + offsetof(XSaveBNDREG, bnd_regs); @@ -2663,6 +2679,9 @@ static void do_xsave(CPUX86State *env, target_ulong p= tr, uint64_t rfbm, if (opt & XSTATE_SSE_MASK) { do_xsave_sse(env, ptr, ra); } + if (opt & XSTATE_YMM_MASK) { + do_xsave_ymmh(env, ptr + XO(avx_state), ra); + } if (opt & XSTATE_BNDREGS_MASK) { do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); } @@ -2737,6 +2756,54 @@ static void do_xrstor_sse(CPUX86State *env, target_u= long ptr, uintptr_t ra) } } =20 +static void do_clear_sse(CPUX86State *env) +{ + int i, nb_xmm_regs; + + if (env->hflags & HF_CS64_MASK) { + nb_xmm_regs =3D 16; + } else { + nb_xmm_regs =3D 8; + } + + for (i =3D 0; i < nb_xmm_regs; i++) { + env->xmm_regs[i].ZMM_Q(0) =3D 0; + env->xmm_regs[i].ZMM_Q(1) =3D 0; + } +} + +static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t r= a) +{ + int i, nb_xmm_regs; + + if (env->hflags & HF_CS64_MASK) { + nb_xmm_regs =3D 16; + } else { + nb_xmm_regs =3D 8; + } + + for (i =3D 0; i < nb_xmm_regs; i++, ptr +=3D 16) { + env->xmm_regs[i].ZMM_Q(2) =3D cpu_ldq_data_ra(env, ptr, ra); + env->xmm_regs[i].ZMM_Q(3) =3D cpu_ldq_data_ra(env, ptr + 8, ra); + } +} + +static void do_clear_ymmh(CPUX86State *env) +{ + int i, nb_xmm_regs; + + if (env->hflags & HF_CS64_MASK) { + nb_xmm_regs =3D 16; + } else { + nb_xmm_regs =3D 8; + } + + for (i =3D 0; i < nb_xmm_regs; i++) { + env->xmm_regs[i].ZMM_Q(2) =3D 0; + env->xmm_regs[i].ZMM_Q(3) =3D 0; + } +} + static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_= t ra) { target_ulong addr =3D ptr + offsetof(XSaveBNDREG, bnd_regs); @@ -2856,9 +2923,14 @@ void helper_xrstor(CPUX86State *env, target_ulong pt= r, uint64_t rfbm) if (xstate_bv & XSTATE_SSE_MASK) { do_xrstor_sse(env, ptr, ra); } else { - /* ??? When AVX is implemented, we may have to be more - selective in the clearing. */ - memset(env->xmm_regs, 0, sizeof(env->xmm_regs)); + do_clear_sse(env); + } + } + if (rfbm & XSTATE_YMM_MASK) { + if (xstate_bv & XSTATE_YMM_MASK) { + do_xrstor_ymmh(env, ptr + XO(avx_state), ra); + } else { + do_clear_ymmh(env); } } if (rfbm & XSTATE_BNDREGS_MASK) { --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663713597; cv=none; d=zohomail.com; s=zohoarc; b=KYAZOcpC3aeLwsPo2TqhzOG/Gs65UqU11GPtd4fqUWa8m3730UrVgpzPNSPz5QLQfTNyNMy7M5JDoUOK85ugxp7aKgPTx1o+b+HkLpoMJmfCzSrryhuXMTVTHxPB3PyMejS1P6dPIJh98eH0yKwCvJq8c3w1nlVsBgO0bw6EzoU= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663713597; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=VNJTRN07rdUHRl+49t4uiASrF5Xy08Mj8BelZltQBd8=; b=WDg9aB++1ioLeRX7Oawx1WqKFFATDrF+SsssFtJxAoG4QU8RdEyO4xYRnUVtEMIkRlb0ybYi2wtnhZvS3d0rEZ0ALmL94JF1dwDgSCIOl83+lnDcOmevl6/dlQMUm5KbdqyAGGhAOHZMe6u4fkIZ1dkOBIf6FcdKacpSrQ00ZvM= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663713597020356.6980880886932; Tue, 20 Sep 2022 15:39:57 -0700 (PDT) Received: from localhost ([::1]:54668 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oaluF-0001Y9-IP for importer@patchew.org; Tue, 20 Sep 2022 18:39:55 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58724) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0Z-0006kS-Nt for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:07 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:31706) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0X-0002Gx-2a for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:07 -0400 Received: from mail-ej1-f71.google.com (mail-ej1-f71.google.com [209.85.218.71]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-670-QX3jbAaPMVG2CnsCzbwAtg-1; Tue, 20 Sep 2022 13:26:01 -0400 Received: by mail-ej1-f71.google.com with SMTP id sd4-20020a1709076e0400b00781e6ba94e1so689260ejc.1 for ; Tue, 20 Sep 2022 10:26:00 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id f19-20020a17090631d300b0073022b796a7sm171601ejf.93.2022.09.20.10.25.57 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:25:58 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694764; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=VNJTRN07rdUHRl+49t4uiASrF5Xy08Mj8BelZltQBd8=; b=Xq8+u0h+jTJf6E4+dKEPpI/GOrl57Qj9dt9yFEzY71VdXsvrDnjqHxeAkR866bPoRgrVgg mOqavTCTczAZHOnQkvT3zkLmwfbraIVCrYyXHfE+VjBPhmn18tVEO59q6iDSbCG+ym8ySI 8gLUIb8j4ewZWM8cCJaduE2Tn2EEtgk= X-MC-Unique: QX3jbAaPMVG2CnsCzbwAtg-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=VNJTRN07rdUHRl+49t4uiASrF5Xy08Mj8BelZltQBd8=; b=0N3LjxqmvHWTdmPMlGWuxKNqV1osk9JITAOqEzya7+w0mpZ28L3fXYl2KN62qeiyzK Kmu7+e9sumgMKbIPScqH4hZhgdhgjyBttggmxO8BmjRWp9ey+UIwewwKQCdu95Ra6at3 yl7u/HvEP/m5kjIcs6NPvyI1UILSLjM4Zop95/RrFEDgdi5ssztU38vTNdy5TzPfV95f PFdF4XjDkNJ9H+M4Srk10meh01s9qolzI0nwFbHdRb1dbUV8hCQCVYMSRWg9ECBzMPaz hKJP3IS1BnicI40oEgJrjv4F2bcOt4nymV7JdZIuSYNvWLmYUVjhe726aMSzo1wXdMyA V68g== X-Gm-Message-State: ACrzQf2Bf4H5VAGI3+mjYtQePazks9j1/VR25m9ijhPrDPA8by294xC8 Ym+10DJsqqK6mtoI75QGT0z+MSCKh0s1FXwEdLAzUCaVHj1thM/UQK/Z1vWfxU/8/QzK9n6KVuo pyYr/+x6L0yywPrAOM0S07zxpYo6scEs9zTIiPrEsZU5VeAyvahZgnUkLh1KA7tELQyA= X-Received: by 2002:a17:907:60c9:b0:77f:be6d:d870 with SMTP id hv9-20020a17090760c900b0077fbe6dd870mr17842279ejc.348.1663694759481; Tue, 20 Sep 2022 10:25:59 -0700 (PDT) X-Google-Smtp-Source: AMsMyM7ApLrHAFhVK9qLIHsKB9hFtah+MBJM8dJoNXtibuQLCTgsWu7ICPASc9RONHidOARU9E5BpQ== X-Received: by 2002:a17:907:60c9:b0:77f:be6d:d870 with SMTP id hv9-20020a17090760c900b0077fbe6dd870mr17842259ejc.348.1663694759226; Tue, 20 Sep 2022 10:25:59 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 33/37] target/i386: implement VLDMXCSR/VSTMXCSR Date: Tue, 20 Sep 2022 19:25:03 +0200 Message-Id: <20220920172507.95568-34-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663713598996100001 Content-Type: text/plain; charset="utf-8" These are exactly the same as the non-VEX version, but one has to be careful that only VEX.L=3D0 is allowed. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/tcg/decode-new.c.inc | 25 +++++++++++++++++++++++++ target/i386/tcg/emit.c.inc | 20 ++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index 63eb66ccc4..efe52504f2 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -82,6 +82,10 @@ =20 #define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...) \ X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__) +#define X86_OP_ENTRYw(op, op0, s0, ...) \ + X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__) +#define X86_OP_ENTRYr(op, op0, s0, ...) \ + X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__) #define X86_OP_ENTRY0(op, ...) \ X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__) =20 @@ -149,6 +153,25 @@ static inline const X86OpEntry *decode_by_prefix(Disas= Context *s, const X86OpEnt } } =20 +static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *= entry, uint8_t *b) +{ + /* only includes ldmxcsr and stmxcsr, because they have AVX variants. = */ + static const X86OpEntry group15_reg[8] =3D { + }; + + static const X86OpEntry group15_mem[8] =3D { + [2] =3D X86_OP_ENTRYr(LDMXCSR, E,d, vex5), + [3] =3D X86_OP_ENTRYw(STMXCSR, E,d, vex5), + }; + + uint8_t modrm =3D get_modrm(s, env); + if ((modrm >> 6) =3D=3D 3) { + *entry =3D group15_reg[(modrm >> 3) & 7]; + } else { + *entry =3D group15_mem[(modrm >> 3) & 7]; + } +} + static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *= entry, uint8_t *b) { static const X86GenFunc group17_gen[8] =3D { @@ -830,6 +853,8 @@ static const X86OpEntry opcodes_0F[256] =3D { [0x7e] =3D X86_OP_GROUP0(0F7E), [0x7f] =3D X86_OP_GROUP0(0F7F), =20 + [0xae] =3D X86_OP_GROUP0(group15), + [0xc2] =3D X86_OP_ENTRY4(VCMP, V,x, H,x, W,x, vex2_rep3 p_= 00_66_f3_f2), [0xc4] =3D X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_0= 0_66), [0xc5] =3D X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_0= 0_66), diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 6e391e3598..be7e3797e8 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1099,6 +1099,16 @@ static void gen_INSERTQ_r(DisasContext *s, CPUX86Sta= te *env, X86DecodedInsn *dec gen_helper_insertq_r(cpu_env, OP_PTR0, OP_PTR2); } =20 +static void gen_LDMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + if (s->vex_l) { + gen_illegal_opcode(s); + return; + } + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T1); + gen_helper_ldmxcsr(cpu_env, s->tmp2_i32); +} + static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) { tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]); @@ -1696,6 +1706,16 @@ static void gen_VAESKEYGEN(DisasContext *s, CPUX86St= ate *env, X86DecodedInsn *de gen_helper_aeskeygenassist_xmm(cpu_env, OP_PTR0, OP_PTR1, imm); } =20 +static void gen_STMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) +{ + if (s->vex_l) { + gen_illegal_opcode(s); + return; + } + gen_helper_update_mxcsr(cpu_env); + tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr)); +} + static void gen_VAESIMC(DisasContext *s, CPUX86State *env, X86DecodedInsn = *decode) { assert(!s->vex_l); --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663714368; cv=none; d=zohomail.com; s=zohoarc; b=RdixQ5oJALAV9lgWL7fQjXObeY8y2zUGOb5QNufKHTM1EgdsFe6M+embkEhQ2AjiWPf1iJsfYqtaEde0NcVUorHwEjsVbpbLe1ctC5TbEgnteKcjiGvbZ/XD4TxCSuYUB4aUqbXIVL3c4dCszwpbNH9ZDTVkguZo0SOopWVjDYM= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663714368; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=e2P74FqVILbG/QWT2XViS/weRofyvDcC4h9ItmjBGdA=; b=JSDdYnodTstfMOYcL8t2zizAPuz8l4VLgzUIrqg5rRy7i4tLGw8WbBA2lPPOfCzsx5TlPWuytg3WAOlS/acebcKzjL9Z6soxmvHg4Sr9zaWr/kgTw7IMu0LCUr7hq1I/WVlV1tjrq9CZMHctigowGajm8MssyoOj40sP7A1/FGI= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 166371436877372.30617466410547; Tue, 20 Sep 2022 15:52:48 -0700 (PDT) Received: from localhost ([::1]:50150 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oam6g-0004px-Ku for importer@patchew.org; Tue, 20 Sep 2022 18:52:46 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58720) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0Y-0006iG-EK for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:06 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:24055) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0W-0002Gn-5R for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:06 -0400 Received: from mail-ed1-f69.google.com (mail-ed1-f69.google.com [209.85.208.69]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-281-M4rC6ToWOna-cePxLBFlag-1; Tue, 20 Sep 2022 13:26:02 -0400 Received: by mail-ed1-f69.google.com with SMTP id s17-20020a056402521100b004511c8d59e3so2392327edd.11 for ; Tue, 20 Sep 2022 10:26:02 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id r2-20020a170906a20200b0078020ae040csm118493ejy.219.2022.09.20.10.25.59 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:26:00 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694763; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=e2P74FqVILbG/QWT2XViS/weRofyvDcC4h9ItmjBGdA=; b=X4Gb/n0vF5cuLWd9Cwe5A478s80tMtI+VELQ4Sdr/WSze0T66WZoFHSwIPTghAjw49VtvR 3USFkG3N3MBC/HhoxD5ar8YjEmU18uByPLmJ6ZzzXIOIEQV9qs00Y0Yqtzv+tlXE5VyADL a8iGhR2WRUVw8enedSAnA3eNz5SEBcE= X-MC-Unique: M4rC6ToWOna-cePxLBFlag-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=e2P74FqVILbG/QWT2XViS/weRofyvDcC4h9ItmjBGdA=; b=AoGy+8DUVRRk+M8jwPIsK7fOz/AD8sXyn53u40NrCSw7Rkw7wF6rFF6KZYtSTAny88 QscW6pYqqn4t8vyTDqvbdERh10N494CHPIDx1yfJ2pUgnMAG56yVSppAPKbEn+y+uz3x /YFIENaFJZq/1cBU0PeBHabzAokZMxtQenAlf5IuOaNrazhSDbgwnYCGNqL3ILrK/IDx DvvswLUZWlw8CC1yWJTQOk6jnaC7Hz+B8VN9T/wHNtKmSrt4BxRrzQ/YhbBb3U+uyLMH 7ax2HQDEZDeFIu7eXhB5ICu4CpOHOF9X2bZILBrIUdQTjBemtlt7YMmw00ukMpe8ljxn NqZw== X-Gm-Message-State: ACrzQf1qtuohRSo/O1T6+AzVViaoVYSMo4hVJ6BT8I6xTTnaNHZZJOPx 5QUd/k0X4RTWVtaFo5LCyU8wh+OinSGqb49HFt4tN1r+ocwpFM3ILU9IQdKdc1/ocf+YpWkuRIP 9ZtrgAmG5Qb5cgYoXnIFtXrflDPX3VzgnKOkb7C+SR8Ya9vhGcshUtjKZmSFbWgPNx24= X-Received: by 2002:a05:6402:4411:b0:437:b723:72 with SMTP id y17-20020a056402441100b00437b7230072mr21212696eda.38.1663694760922; Tue, 20 Sep 2022 10:26:00 -0700 (PDT) X-Google-Smtp-Source: AMsMyM7Xx4JfL7Ur92YfeYaY4smyQeVSE2TlV8aGzV7AlHpde+H4hGqlpk80JITIkCyYNDmzadVy3Q== X-Received: by 2002:a05:6402:4411:b0:437:b723:72 with SMTP id y17-20020a056402441100b00437b7230072mr21212673eda.38.1663694760585; Tue, 20 Sep 2022 10:26:00 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 34/37] target/i386: Enable AVX cpuid bits when using TCG Date: Tue, 20 Sep 2022 19:25:04 +0200 Message-Id: <20220920172507.95568-35-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, UPPERCASE_50_75=0.008 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663714370539100001 Content-Type: text/plain; charset="utf-8" From: Paul Brook Include AVX, AVX2 and VAES in the guest cpuid features supported by TCG. Signed-off-by: Paul Brook Message-Id: <20220424220204.2493824-40-paul@nowt.org> Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 1db1278a59..ec0817a61d 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -625,12 +625,12 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t ven= dor1, CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \ CPUID_EXT_XSAVE | /* CPUID_EXT_OSXSAVE is dynamic */ \ CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR | \ - CPUID_EXT_RDRAND) + CPUID_EXT_RDRAND | CPUID_EXT_AVX) /* missing: CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX, CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA, CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA, - CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AVX, + CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_F16C */ =20 #ifdef TARGET_X86_64 @@ -653,14 +653,14 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t ven= dor1, CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX | \ CPUID_7_0_EBX_PCOMMIT | CPUID_7_0_EBX_CLFLUSHOPT | \ CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_FSGSBASE = | \ - CPUID_7_0_EBX_ERMS) + CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_AVX2) /* missing: - CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2, + CPUID_7_0_EBX_HLE CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, CPUID_7_0_EBX_RDSEED */ #define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | \ /* CPUID_7_0_ECX_OSPKE is dynamic */ \ - CPUID_7_0_ECX_LA57 | CPUID_7_0_ECX_PKS) + CPUID_7_0_ECX_LA57 | CPUID_7_0_ECX_PKS | CPUID_7_0_ECX_VAES) #define TCG_7_0_EDX_FEATURES 0 #define TCG_7_1_EAX_FEATURES 0 #define TCG_APM_FEATURES 0 --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663714749; cv=none; d=zohomail.com; s=zohoarc; b=lMJaP1vA6tYKIk6jw7VISZEn2qPtinkEgR9MebgPvv2ccPq9pqZx/XBU9A/2k0eh4DzCGH2rfyXeVZbfw+SFJoejPRgMAXtai5Wbwok6lQxT5LmzuOWZ8Q75RAugNtMm7yeKM3dtoIjqnFrGl8SMV4cigYCeXg0LblrGLxNPAts= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663714749; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=m29uhLtFVgsc9LJfDw/3Q6y8RuZz33X46FxN5TMwCAs=; b=bmrH6Gv1Oy++oHztx3gzIfCdRKyInVGYvXYjWcGACx/xhXWOAhvMLLTRU6IUyGiv6zEoqshvPAJnPP57+XwcKGXON0SXfhpsoNY52C4Atrudm82ySoiaMN7auUVwjKbGKhwrPXkTMWJqEId4nTjp0JeAdkZtbtqSNkLpuS3jjXA= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663714749307226.01908679845678; Tue, 20 Sep 2022 15:59:09 -0700 (PDT) Received: from localhost ([::1]:54950 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oamCp-0001qR-4x for importer@patchew.org; Tue, 20 Sep 2022 18:59:07 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58730) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0d-0006ry-O4 for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:11 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.129.124]:57405) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0b-0002Hz-Ds for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:11 -0400 Received: from mail-ed1-f71.google.com (mail-ed1-f71.google.com [209.85.208.71]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-153-8Z62djXeNO2xYAx5lW768g-1; Tue, 20 Sep 2022 13:26:04 -0400 Received: by mail-ed1-f71.google.com with SMTP id y1-20020a056402358100b00451b144e23eso2353117edc.18 for ; Tue, 20 Sep 2022 10:26:03 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id lb13-20020a170907784d00b0077909095acasm150742ejc.143.2022.09.20.10.26.01 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:26:01 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694768; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=m29uhLtFVgsc9LJfDw/3Q6y8RuZz33X46FxN5TMwCAs=; b=H6LWwk8a8T7zlSJb8GVKxs+DVAX2/VNOYxnT7y1uT6Fva8xidDLXCSO1/EicyHVq7ZMR+f Zuo9GGGijGuvKjuVEDkCKnem1w5ITRFwbG1lpV9G52To/wtQ9S33qe4heTU4uYe1rM5Fpk VjQ014krqi4JWCrLOyuO+jgijTttN5Q= X-MC-Unique: 8Z62djXeNO2xYAx5lW768g-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=m29uhLtFVgsc9LJfDw/3Q6y8RuZz33X46FxN5TMwCAs=; b=xcKYqgUnJ4w9MXVWv3v29u1vqhwBj6NcI2c+IqEQs+zesw/yIeMna0gNpCn9FBxl4r 8xshiOL5z2BDW8PpIGb9Wa/jKXoYlQKiQtf7ys1cMV8Pe5w4trIXA3OQ1XOqt4uhvZl9 NQDML88kx5hNvLllq0rD1g3TkCbeJzPPlQEdKEBRbrYwK1xxjPjDnb1Csk5QYA9uf/Sq 9lAur2sufatZ9XaCKK/iIpLvplowGl20yZQs9cVbIVPyLC5H9a/z0b/ublp1tsnuSRr3 6TivTXfn33LMvdWUcJB8D+XXUyQ15I138FFcD79lG//CrfbObxlGCA1XGoF+m1fXf5lQ yD2w== X-Gm-Message-State: ACrzQf2DOYdTUZXK2Kuj4CfzSbCKB0LawbLyLhGCWHqHr4X+TOQaNNm2 aRjH7uttsxROYyiqsti4A75DoWfvrSzh55H3EXlu0TIG2QanuHmZM6y0H1U48tGKP+aFYXGXYwu yFYP7R6jNLoE9A0FJh0XdKqTn62PBTRIjfRgJetuKUzQDP07mwwPLLzL4sMIeXxmX+xY= X-Received: by 2002:a17:907:6285:b0:781:ad26:7b53 with SMTP id nd5-20020a170907628500b00781ad267b53mr4605980ejc.273.1663694762412; Tue, 20 Sep 2022 10:26:02 -0700 (PDT) X-Google-Smtp-Source: AMsMyM6iqsRQJVJi1TF73n53kAqJhwPhO8dgbHyjDeK12RiP3VbvTPMcHvwcsm30QUrLuK6afJcTTQ== X-Received: by 2002:a17:907:6285:b0:781:ad26:7b53 with SMTP id nd5-20020a170907628500b00781ad267b53mr4605956ejc.273.1663694762019; Tue, 20 Sep 2022 10:26:02 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 35/37] tests/tcg: extend SSE tests to AVX Date: Tue, 20 Sep 2022 19:25:05 +0200 Message-Id: <20220920172507.95568-36-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.129.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663714750533100001 Content-Type: text/plain; charset="utf-8" Extracted from a patch by Paul Brook . Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- tests/tcg/i386/Makefile.target | 2 +- tests/tcg/i386/test-avx.c | 201 ++++++++++++++++++--------------- tests/tcg/i386/test-avx.py | 5 +- 3 files changed, 113 insertions(+), 95 deletions(-) diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target index ae71e7f748..4139973255 100644 --- a/tests/tcg/i386/Makefile.target +++ b/tests/tcg/i386/Makefile.target @@ -98,5 +98,5 @@ test-3dnow: test-3dnow.h test-mmx: CFLAGS +=3D -masm=3Dintel -O -I. test-mmx: test-mmx.h =20 -test-avx: CFLAGS +=3D -masm=3Dintel -O -I. +test-avx: CFLAGS +=3D -mavx -masm=3Dintel -O -I. test-avx: test-avx.h diff --git a/tests/tcg/i386/test-avx.c b/tests/tcg/i386/test-avx.c index 23c170dd79..953e2906fe 100644 --- a/tests/tcg/i386/test-avx.c +++ b/tests/tcg/i386/test-avx.c @@ -6,18 +6,18 @@ typedef void (*testfn)(void); =20 typedef struct { - uint64_t q0, q1; -} __attribute__((aligned(16))) v2di; + uint64_t q0, q1, q2, q3; +} __attribute__((aligned(32))) v4di; =20 typedef struct { uint64_t mm[8]; - v2di xmm[16]; + v4di ymm[16]; uint64_t r[16]; uint64_t flags; uint32_t ff; uint64_t pad; - v2di mem[4]; - v2di mem0[4]; + v4di mem[4]; + v4di mem0[4]; } reg_state; =20 typedef struct { @@ -31,20 +31,20 @@ reg_state initI; reg_state initF32; reg_state initF64; =20 -static void dump_xmm(const char *name, int n, const v2di *r, int ff) +static void dump_ymm(const char *name, int n, const v4di *r, int ff) { - printf("%s%d =3D %016lx %016lx\n", - name, n, r->q1, r->q0); + printf("%s%d =3D %016lx %016lx %016lx %016lx\n", + name, n, r->q3, r->q2, r->q1, r->q0); if (ff =3D=3D 64) { - double v[2]; + double v[4]; memcpy(v, r, sizeof(v)); - printf(" %16g %16g\n", - v[1], v[0]); - } else if (ff =3D=3D 32) { - float v[4]; - memcpy(v, r, sizeof(v)); - printf(" %8g %8g %8g %8g\n", + printf(" %16g %16g %16g %16g\n", v[3], v[2], v[1], v[0]); + } else if (ff =3D=3D 32) { + float v[8]; + memcpy(v, r, sizeof(v)); + printf(" %8g %8g %8g %8g %8g %8g %8g %8g\n", + v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]); } } =20 @@ -53,10 +53,10 @@ static void dump_regs(reg_state *s) int i; =20 for (i =3D 0; i < 16; i++) { - dump_xmm("xmm", i, &s->xmm[i], 0); + dump_ymm("ymm", i, &s->ymm[i], 0); } for (i =3D 0; i < 4; i++) { - dump_xmm("mem", i, &s->mem0[i], 0); + dump_ymm("mem", i, &s->mem0[i], 0); } } =20 @@ -74,13 +74,13 @@ static void compare_state(const reg_state *a, const reg= _state *b) } } for (i =3D 0; i < 16; i++) { - if (memcmp(&a->xmm[i], &b->xmm[i], 16)) { - dump_xmm("xmm", i, &b->xmm[i], a->ff); + if (memcmp(&a->ymm[i], &b->ymm[i], 32)) { + dump_ymm("ymm", i, &b->ymm[i], a->ff); } } for (i =3D 0; i < 4; i++) { - if (memcmp(&a->mem0[i], &a->mem[i], 16)) { - dump_xmm("mem", i, &a->mem[i], a->ff); + if (memcmp(&a->mem0[i], &a->mem[i], 32)) { + dump_ymm("mem", i, &a->mem[i], a->ff); } } if (a->flags !=3D b->flags) { @@ -89,9 +89,9 @@ static void compare_state(const reg_state *a, const reg_s= tate *b) } =20 #define LOADMM(r, o) "movq " #r ", " #o "[%0]\n\t" -#define LOADXMM(r, o) "movdqa " #r ", " #o "[%0]\n\t" +#define LOADYMM(r, o) "vmovdqa " #r ", " #o "[%0]\n\t" #define STOREMM(r, o) "movq " #o "[%1], " #r "\n\t" -#define STOREXMM(r, o) "movdqa " #o "[%1], " #r "\n\t" +#define STOREYMM(r, o) "vmovdqa " #o "[%1], " #r "\n\t" #define MMREG(F) \ F(mm0, 0x00) \ F(mm1, 0x08) \ @@ -101,39 +101,39 @@ static void compare_state(const reg_state *a, const r= eg_state *b) F(mm5, 0x28) \ F(mm6, 0x30) \ F(mm7, 0x38) -#define XMMREG(F) \ - F(xmm0, 0x040) \ - F(xmm1, 0x050) \ - F(xmm2, 0x060) \ - F(xmm3, 0x070) \ - F(xmm4, 0x080) \ - F(xmm5, 0x090) \ - F(xmm6, 0x0a0) \ - F(xmm7, 0x0b0) \ - F(xmm8, 0x0c0) \ - F(xmm9, 0x0d0) \ - F(xmm10, 0x0e0) \ - F(xmm11, 0x0f0) \ - F(xmm12, 0x100) \ - F(xmm13, 0x110) \ - F(xmm14, 0x120) \ - F(xmm15, 0x130) +#define YMMREG(F) \ + F(ymm0, 0x040) \ + F(ymm1, 0x060) \ + F(ymm2, 0x080) \ + F(ymm3, 0x0a0) \ + F(ymm4, 0x0c0) \ + F(ymm5, 0x0e0) \ + F(ymm6, 0x100) \ + F(ymm7, 0x120) \ + F(ymm8, 0x140) \ + F(ymm9, 0x160) \ + F(ymm10, 0x180) \ + F(ymm11, 0x1a0) \ + F(ymm12, 0x1c0) \ + F(ymm13, 0x1e0) \ + F(ymm14, 0x200) \ + F(ymm15, 0x220) #define LOADREG(r, o) "mov " #r ", " #o "[rax]\n\t" #define STOREREG(r, o) "mov " #o "[rax], " #r "\n\t" #define REG(F) \ - F(rbx, 0x148) \ - F(rcx, 0x150) \ - F(rdx, 0x158) \ - F(rsi, 0x160) \ - F(rdi, 0x168) \ - F(r8, 0x180) \ - F(r9, 0x188) \ - F(r10, 0x190) \ - F(r11, 0x198) \ - F(r12, 0x1a0) \ - F(r13, 0x1a8) \ - F(r14, 0x1b0) \ - F(r15, 0x1b8) \ + F(rbx, 0x248) \ + F(rcx, 0x250) \ + F(rdx, 0x258) \ + F(rsi, 0x260) \ + F(rdi, 0x268) \ + F(r8, 0x280) \ + F(r9, 0x288) \ + F(r10, 0x290) \ + F(r11, 0x298) \ + F(r12, 0x2a0) \ + F(r13, 0x2a8) \ + F(r14, 0x2b0) \ + F(r15, 0x2b8) \ =20 static void run_test(const TestDef *t) { @@ -143,7 +143,7 @@ static void run_test(const TestDef *t) printf("%5d %s\n", t->n, t->s); asm volatile( MMREG(LOADMM) - XMMREG(LOADXMM) + YMMREG(LOADYMM) "sub rsp, 128\n\t" "push rax\n\t" "push rbx\n\t" @@ -156,26 +156,26 @@ static void run_test(const TestDef *t) "pop rbx\n\t" "shr rbx, 8\n\t" "shl rbx, 8\n\t" - "mov rcx, 0x1c0[rax]\n\t" + "mov rcx, 0x2c0[rax]\n\t" "and rcx, 0xff\n\t" "or rbx, rcx\n\t" "push rbx\n\t" "popf\n\t" REG(LOADREG) - "mov rax, 0x140[rax]\n\t" + "mov rax, 0x240[rax]\n\t" "call [rsp]\n\t" "mov [rsp], rax\n\t" "mov rax, 8[rsp]\n\t" REG(STOREREG) "mov rbx, [rsp]\n\t" - "mov 0x140[rax], rbx\n\t" + "mov 0x240[rax], rbx\n\t" "mov rbx, 0\n\t" - "mov 0x170[rax], rbx\n\t" - "mov 0x178[rax], rbx\n\t" + "mov 0x270[rax], rbx\n\t" + "mov 0x278[rax], rbx\n\t" "pushf\n\t" "pop rbx\n\t" "and rbx, 0xff\n\t" - "mov 0x1c0[rax], rbx\n\t" + "mov 0x2c0[rax], rbx\n\t" "add rsp, 16\n\t" "pop rdx\n\t" "pop rcx\n\t" @@ -183,15 +183,15 @@ static void run_test(const TestDef *t) "pop rax\n\t" "add rsp, 128\n\t" MMREG(STOREMM) - XMMREG(STOREXMM) + YMMREG(STOREYMM) : : "r"(init), "r"(&result), "r"(t->fn) : "memory", "cc", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", - "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", - "xmm12", "xmm13", "xmm14", "xmm15" + "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", + "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15" ); compare_state(init, &result); } @@ -223,22 +223,30 @@ static void run_all(void) =20 float val_f32[] =3D {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5, 8.3}; double val_f64[] =3D {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5}; -v2di val_i64[] =3D { - {0x3d6b3b6a9e4118f2lu, 0x355ae76d2774d78clu}, - {0xd851c54a56bf1f29lu, 0x4a84d1d50bf4c4fflu}, - {0x5826475e2c5fd799lu, 0xfd32edc01243f5e9lu}, +v4di val_i64[] =3D { + {0x3d6b3b6a9e4118f2lu, 0x355ae76d2774d78clu, + 0xac3ff76c4daa4b28lu, 0xe7fabd204cb54083lu}, + {0xd851c54a56bf1f29lu, 0x4a84d1d50bf4c4fflu, + 0x56621e553d52b56clu, 0xd0069553da8f584alu}, + {0x5826475e2c5fd799lu, 0xfd32edc01243f5e9lu, + 0x738ba2c66d3fe126lu, 0x5707219c6e6c26b4lu}, }; =20 -v2di deadbeef =3D {0xa5a5a5a5deadbeefull, 0xa5a5a5a5deadbeefull}; -v2di indexq =3D {0x000000000000001full, 0x000000000000008full}; -v2di indexd =3D {0x00000002000000efull, 0xfffffff500000010ull}; +v4di deadbeef =3D {0xa5a5a5a5deadbeefull, 0xa5a5a5a5deadbeefull, + 0xa5a5a5a5deadbeefull, 0xa5a5a5a5deadbeefull}; +v4di indexq =3D {0x000000000000001full, 0x000000000000008full, + 0xffffffffffffffffull, 0xffffffffffffff5full}; +v4di indexd =3D {0x00000002000000efull, 0xfffffff500000010ull, + 0x0000000afffffff0ull, 0x000000000000000eull}; =20 -void init_f32reg(v2di *r) +v4di gather_mem[0x20]; + +void init_f32reg(v4di *r) { static int n; - float v[4]; + float v[8]; int i; - for (i =3D 0; i < 4; i++) { + for (i =3D 0; i < 8; i++) { v[i] =3D val_f32[n++]; if (n =3D=3D ARRAY_LEN(val_f32)) { n =3D 0; @@ -247,12 +255,12 @@ void init_f32reg(v2di *r) memcpy(r, v, sizeof(*r)); } =20 -void init_f64reg(v2di *r) +void init_f64reg(v4di *r) { static int n; - double v[2]; + double v[4]; int i; - for (i =3D 0; i < 2; i++) { + for (i =3D 0; i < 4; i++) { v[i] =3D val_f64[n++]; if (n =3D=3D ARRAY_LEN(val_f64)) { n =3D 0; @@ -261,13 +269,15 @@ void init_f64reg(v2di *r) memcpy(r, v, sizeof(*r)); } =20 -void init_intreg(v2di *r) +void init_intreg(v4di *r) { static uint64_t mask; static int n; =20 r->q0 =3D val_i64[n].q0 ^ mask; r->q1 =3D val_i64[n].q1 ^ mask; + r->q2 =3D val_i64[n].q2 ^ mask; + r->q3 =3D val_i64[n].q3 ^ mask; n++; if (n =3D=3D ARRAY_LEN(val_i64)) { n =3D 0; @@ -280,46 +290,53 @@ static void init_all(reg_state *s) int i; =20 s->r[3] =3D (uint64_t)&s->mem[0]; /* rdx */ + s->r[4] =3D (uint64_t)&gather_mem[ARRAY_LEN(gather_mem) / 2]; /* rsi */ s->r[5] =3D (uint64_t)&s->mem[2]; /* rdi */ s->flags =3D 2; - for (i =3D 0; i < 8; i++) { - s->xmm[i] =3D deadbeef; + for (i =3D 0; i < 16; i++) { + s->ymm[i] =3D deadbeef; } - s->xmm[13] =3D indexd; - s->xmm[14] =3D indexq; - for (i =3D 0; i < 2; i++) { + s->ymm[13] =3D indexd; + s->ymm[14] =3D indexq; + for (i =3D 0; i < 4; i++) { s->mem0[i] =3D deadbeef; } } =20 int main(int argc, char *argv[]) { + int i; + init_all(&initI); - init_intreg(&initI.xmm[10]); - init_intreg(&initI.xmm[11]); - init_intreg(&initI.xmm[12]); + init_intreg(&initI.ymm[10]); + init_intreg(&initI.ymm[11]); + init_intreg(&initI.ymm[12]); init_intreg(&initI.mem0[1]); printf("Int:\n"); dump_regs(&initI); =20 init_all(&initF32); - init_f32reg(&initF32.xmm[10]); - init_f32reg(&initF32.xmm[11]); - init_f32reg(&initF32.xmm[12]); + init_f32reg(&initF32.ymm[10]); + init_f32reg(&initF32.ymm[11]); + init_f32reg(&initF32.ymm[12]); init_f32reg(&initF32.mem0[1]); initF32.ff =3D 32; printf("F32:\n"); dump_regs(&initF32); =20 init_all(&initF64); - init_f64reg(&initF64.xmm[10]); - init_f64reg(&initF64.xmm[11]); - init_f64reg(&initF64.xmm[12]); + init_f64reg(&initF64.ymm[10]); + init_f64reg(&initF64.ymm[11]); + init_f64reg(&initF64.ymm[12]); init_f64reg(&initF64.mem0[1]); initF64.ff =3D 64; printf("F64:\n"); dump_regs(&initF64); =20 + for (i =3D 0; i < ARRAY_LEN(gather_mem); i++) { + init_intreg(&gather_mem[i]); + } + if (argc > 1) { int n =3D atoi(argv[1]); run_test(&test_table[n]); diff --git a/tests/tcg/i386/test-avx.py b/tests/tcg/i386/test-avx.py index e16a3d8bee..02982329f1 100755 --- a/tests/tcg/i386/test-avx.py +++ b/tests/tcg/i386/test-avx.py @@ -8,6 +8,7 @@ =20 archs =3D [ "SSE", "SSE2", "SSE3", "SSSE3", "SSE4_1", "SSE4_2", + "AES", "AVX", "AVX2", "AES+AVX", "VAES+AVX", ] =20 ignore =3D set(["FISTTP", @@ -42,7 +43,7 @@ 'vROUND[PS][SD]': 0x7, 'vSHUFPD': 0x0f, 'vSHUFPS': 0xff, - 'vAESKEYGENASSIST': 0, + 'vAESKEYGENASSIST': 0xff, 'VEXTRACT[FI]128': 0x01, 'VINSERT[FI]128': 0x01, 'VPBLENDD': 0xff, @@ -85,7 +86,7 @@ def mem_w(w): else: raise Exception() =20 - return t + " PTR 16[rdx]" + return t + " PTR 32[rdx]" =20 class XMMArg(): isxmm =3D True --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663713756; cv=none; d=zohomail.com; s=zohoarc; b=VVXRMhc7TO6LWbd8oz2agqR47ErTCfwcsHqLZwwKH9mZNE1BQf7K33w5EVbuCKok3UgbfUltO1bOWDtQVfRTGhMhr6M2PtHOIRv/SiXuSE9FuVNEDOPNaifN40+mhE6d1S4i4rxxo1OYEdbJXAtMZQFoOyyH5o4gO80tDSXQo+4= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663713756; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=XaPGjNeXAEOZGIV333y8CDzhVi7EMBWFiSWeqsvy0T4=; b=RU9dNEQGpYxDVx8mNndBx2/4kdVR+p+DreC+LcOXi3VqLntJ0PdT4Tif46mcQnHCHso0GmxZE9FGqSUPB24BfITioNlOq9/U7wjwIDlv5JGMIwn2l6eicyroQpnC0NhXPdnAPPBwKZ6KBqt54+G+IwPwGcwMTw5o9+Dv31Hg6q4= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663713756398608.1154578057868; Tue, 20 Sep 2022 15:42:36 -0700 (PDT) Received: from localhost ([::1]:56778 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oalwo-00074e-9r for importer@patchew.org; Tue, 20 Sep 2022 18:42:34 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58728) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0c-0006oP-0v for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:10 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:55248) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0Z-0002HW-Ow for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:09 -0400 Received: from mail-ej1-f71.google.com (mail-ej1-f71.google.com [209.85.218.71]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-378-njHrXrFEP6iuUVh8wmDq4g-1; Tue, 20 Sep 2022 13:26:05 -0400 Received: by mail-ej1-f71.google.com with SMTP id hp18-20020a1709073e1200b0078054727517so1796761ejc.0 for ; Tue, 20 Sep 2022 10:26:05 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id k26-20020aa7c39a000000b00450f338b9c8sm176311edq.69.2022.09.20.10.26.02 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:26:02 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694766; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=XaPGjNeXAEOZGIV333y8CDzhVi7EMBWFiSWeqsvy0T4=; b=Dd7XCyAJyzC5r5ANDRFFpLiQCKlnYixY5RfES3l+dTYDL+DFKKC2mfL0TpRtUNymMZDStW kO3QQOoVeodaJLs9MBFS7aLouNL2K5czuZTj/NWm4WD6JeuTglrNUaGduPku16UIdMB2Y+ XExHfEeWE4HjKHl3GeA/QhPuzrE4OyY= X-MC-Unique: njHrXrFEP6iuUVh8wmDq4g-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=XaPGjNeXAEOZGIV333y8CDzhVi7EMBWFiSWeqsvy0T4=; b=jCAijA7Jz8vELecYc3pONnKm13Ycz9h6kI+/sTqmvooaZ/zv4VhueY0vezrmVdBRhR MMYHyY5Q3dDR1GDCZBz/vd08liOdtIslF1psxEt7L9K8klVLnfz4xa+CfwaHubj/3iK6 ZVZeOcthWy4+jyP0bZDXuPl5Tbpl31mJwwCAnWrv488R7Vs+5xT0KdsMQKFp2taVF8Lf 8dOsavJ/CB6SPm/IrQ5GJSxQwql9x8D6vAXmRPCX+Kho92IZyk4Isro5x0mx1PbqxCqJ BicCm4c6CVUEt1p2vikELwKqEEH7rbJKkxc7U69mW3YfbznUzbCwIFQQfExKEz/LDR8X dJGA== X-Gm-Message-State: ACrzQf09/dl9cd/X31NRP1HAtKQg+bZIfhaTfo1wyESBH3cPlrRXPM6t aiKeccT3rg1u2iZtDvilUb+rFdQ7g9zEhxZc45Fg6Bqo42btiYGUxzBpnZBy/3eoW/zQ0qWLb+x dp2cfZdQXnUUvLtMorAJBdy0tScoMR3GhM0psNaEnOV2Hye9D2qYPloEiejrHgSQ1j/0= X-Received: by 2002:a05:6402:26d2:b0:451:5a8c:346b with SMTP id x18-20020a05640226d200b004515a8c346bmr20801063edd.424.1663694763953; Tue, 20 Sep 2022 10:26:03 -0700 (PDT) X-Google-Smtp-Source: AMsMyM5cHd6niiKvMciQWfFWzjvX0EaUOmeSKFvzTkpFlj1oolu02a76VeW8A7JlbLXGG0zWFB95+g== X-Received: by 2002:a05:6402:26d2:b0:451:5a8c:346b with SMTP id x18-20020a05640226d200b004515a8c346bmr20801030edd.424.1663694763591; Tue, 20 Sep 2022 10:26:03 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 36/37] target/i386: move 3DNow to the new decoder Date: Tue, 20 Sep 2022 19:25:06 +0200 Message-Id: <20220920172507.95568-37-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663713757905100003 Content-Type: text/plain; charset="utf-8" This adds another kind of weirdness when you thought you had seen it all: an opcode byte that comes _after_ the address, not before. It's not worth adding a new X86_SPECIAL_* constant for it, but it's actually not unlike VCMP; so, forgive me for exploiting the similarity and just deciding to dispatch to the right gen_helper_* call in a single code generation function. In fact, the old decoder had a bug where s->rip_offset should have been set to 1 for 3DNow! instructions, and it's fixed now. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson --- target/i386/tcg/decode-new.c.inc | 10 +++++ target/i386/tcg/decode-new.h | 1 + target/i386/tcg/emit.c.inc | 61 +++++++++++++++++++++++++++ target/i386/tcg/translate.c | 71 +------------------------------- 4 files changed, 74 insertions(+), 69 deletions(-) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index efe52504f2..e4de3bac0a 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -779,6 +779,14 @@ static void decode_0FE6(DisasContext *s, CPUX86State *= env, X86OpEntry *entry, ui } =20 static const X86OpEntry opcodes_0F[256] =3D { + [0x0E] =3D X86_OP_ENTRY0(EMMS, cpuid(3DNO= W)), /* femms */ + /* + * 3DNow!'s opcode byte comes *after* modrm and displacements, making = it + * more like an Ib operand. Dispatch to the right helper in a single = gen_* + * function. + */ + [0x0F] =3D X86_OP_ENTRY3(3dnow, P,q, Q,q, I,b, cpuid(3DNO= W)), + [0x10] =3D X86_OP_GROUP0(0F10), [0x11] =3D X86_OP_GROUP0(0F11), [0x12] =3D X86_OP_GROUP0(0F12), @@ -1364,6 +1372,8 @@ static bool has_cpuid_feature(DisasContext *s, X86CPU= IDFeature cpuid) case X86_FEAT_AVX: return (s->cpuid_ext_features & CPUID_EXT_AVX); =20 + case X86_FEAT_3DNOW: + return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW); case X86_FEAT_SSE4A: return (s->cpuid_ext3_features & CPUID_EXT3_SSE4A); =20 diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index c74550e32b..789e1a3e1a 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -96,6 +96,7 @@ typedef enum X86OpSize { =20 typedef enum X86CPUIDFeature { X86_FEAT_None, + X86_FEAT_3DNOW, X86_FEAT_ADX, X86_FEAT_AES, X86_FEAT_AVX, diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index be7e3797e8..ef4e3478fa 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -19,6 +19,7 @@ * License along with this library; if not, see . */ =20 +typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b= ); typedef void (*SSEFunc_0_epppti)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr re= g_b, TCGv_ptr reg_c, TCGv a0, TCGv_i32 scale); =20 @@ -326,6 +327,66 @@ static void gen_store_sse(DisasContext *s, X86DecodedI= nsn *decode, int src_ofs) } } =20 +static void gen_helper_pavgusb(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_= b) +{ + gen_helper_pavgb_mmx(env, reg_a, reg_a, reg_b); +} + +#define FN_3DNOW_MOVE ((SSEFunc_0_epp) (uintptr_t) 1) +static const SSEFunc_0_epp fns_3dnow[] =3D { + [0x0c] =3D gen_helper_pi2fw, + [0x0d] =3D gen_helper_pi2fd, + [0x1c] =3D gen_helper_pf2iw, + [0x1d] =3D gen_helper_pf2id, + [0x8a] =3D gen_helper_pfnacc, + [0x8e] =3D gen_helper_pfpnacc, + [0x90] =3D gen_helper_pfcmpge, + [0x94] =3D gen_helper_pfmin, + [0x96] =3D gen_helper_pfrcp, + [0x97] =3D gen_helper_pfrsqrt, + [0x9a] =3D gen_helper_pfsub, + [0x9e] =3D gen_helper_pfadd, + [0xa0] =3D gen_helper_pfcmpgt, + [0xa4] =3D gen_helper_pfmax, + [0xa6] =3D FN_3DNOW_MOVE, /* PFRCPIT1; no need to actually increase pr= ecision */ + [0xa7] =3D FN_3DNOW_MOVE, /* PFRSQIT1 */ + [0xb6] =3D FN_3DNOW_MOVE, /* PFRCPIT2 */ + [0xaa] =3D gen_helper_pfsubr, + [0xae] =3D gen_helper_pfacc, + [0xb0] =3D gen_helper_pfcmpeq, + [0xb4] =3D gen_helper_pfmul, + [0xb7] =3D gen_helper_pmulhrw_mmx, + [0xbb] =3D gen_helper_pswapd, + [0xbf] =3D gen_helper_pavgusb, +}; + +static void gen_3dnow(DisasContext *s, CPUX86State *env, X86DecodedInsn *d= ecode) +{ + uint8_t b =3D decode->immediate; + SSEFunc_0_epp fn =3D b < ARRAY_SIZE(fns_3dnow) ? fns_3dnow[b] : NULL; + + if (!fn) { + gen_illegal_opcode(s); + return; + } + if (s->flags & HF_TS_MASK) { + gen_NM_exception(s); + return; + } + if (s->flags & HF_EM_MASK) { + gen_illegal_opcode(s); + return; + } + + gen_helper_enter_mmx(cpu_env); + if (fn =3D=3D FN_3DNOW_MOVE) { + tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[1].offset); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset); + } else { + fn(cpu_env, OP_PTR0, OP_PTR1); + } +} + /* * 00 =3D v*ps Vps, Hps, Wpd * 66 =3D v*pd Vpd, Hpd, Wps diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 2d93e52ae5..627d321f00 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2930,7 +2930,6 @@ static bool first =3D true; static unsigned long limi= t; #define SSE_OPF_CMP (1 << 1) /* does not write for first operand */ #define SSE_OPF_BLENDV (1 << 2) /* blendv* instruction */ #define SSE_OPF_SPECIAL (1 << 3) /* magic */ -#define SSE_OPF_3DNOW (1 << 4) /* 3DNow! instruction */ #define SSE_OPF_MMX (1 << 5) /* MMX/integer/AVX2 instruction */ #define SSE_OPF_SCALAR (1 << 6) /* Has SSE scalar variants */ #define SSE_OPF_SHUF (1 << 9) /* pshufx/shufpx */ @@ -2964,13 +2963,9 @@ struct SSEOpHelper_table1 { SSEFuncs fn[4]; }; =20 -#define SSE_3DNOW { SSE_OPF_3DNOW } #define SSE_SPECIAL { SSE_OPF_SPECIAL } =20 static const struct SSEOpHelper_table1 sse_op_table1[256] =3D { - /* 3DNow! extensions */ - [0x0e] =3D SSE_SPECIAL, /* femms */ - [0x0f] =3D SSE_3DNOW, /* pf... (sse_op_table5) */ /* pure SSE operations */ [0x10] =3D SSE_SPECIAL, /* movups, movupd, movss, movsd */ [0x11] =3D SSE_SPECIAL, /* movups, movupd, movss, movsd */ @@ -3179,38 +3174,6 @@ static const SSEFunc_0_eppp sse_op_table4[8][4] =3D { }; #undef SSE_CMP =20 -static void gen_helper_pavgusb(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_= b) -{ - gen_helper_pavgb_mmx(env, reg_a, reg_a, reg_b); -} - -static const SSEFunc_0_epp sse_op_table5[256] =3D { - [0x0c] =3D gen_helper_pi2fw, - [0x0d] =3D gen_helper_pi2fd, - [0x1c] =3D gen_helper_pf2iw, - [0x1d] =3D gen_helper_pf2id, - [0x8a] =3D gen_helper_pfnacc, - [0x8e] =3D gen_helper_pfpnacc, - [0x90] =3D gen_helper_pfcmpge, - [0x94] =3D gen_helper_pfmin, - [0x96] =3D gen_helper_pfrcp, - [0x97] =3D gen_helper_pfrsqrt, - [0x9a] =3D gen_helper_pfsub, - [0x9e] =3D gen_helper_pfadd, - [0xa0] =3D gen_helper_pfcmpgt, - [0xa4] =3D gen_helper_pfmax, - [0xa6] =3D gen_helper_movq, /* pfrcpit1; no need to actually increase = precision */ - [0xa7] =3D gen_helper_movq, /* pfrsqit1 */ - [0xaa] =3D gen_helper_pfsubr, - [0xae] =3D gen_helper_pfacc, - [0xb0] =3D gen_helper_pfcmpeq, - [0xb4] =3D gen_helper_pfmul, - [0xb6] =3D gen_helper_movq, /* pfrcpit2 */ - [0xb7] =3D gen_helper_pmulhrw_mmx, - [0xbb] =3D gen_helper_pswapd, - [0xbf] =3D gen_helper_pavgusb, -}; - struct SSEOpHelper_table6 { SSEFuncs fn[2]; uint32_t ext_mask; @@ -3363,7 +3326,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s= , int b, b1 =3D 0; sse_op_flags =3D sse_op_table1[b].flags; sse_op_fn =3D sse_op_table1[b].fn[b1]; - if ((sse_op_flags & (SSE_OPF_SPECIAL | SSE_OPF_3DNOW)) =3D=3D 0 + if ((sse_op_flags & SSE_OPF_SPECIAL) =3D=3D 0 && !sse_op_fn.op1) { goto unknown_op; } @@ -3377,11 +3340,6 @@ static void gen_sse(CPUX86State *env, DisasContext *= s, int b, is_xmm =3D 1; } } - if (sse_op_flags & SSE_OPF_3DNOW) { - if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) { - goto illegal_op; - } - } /* simple MMX/SSE operation */ if (s->flags & HF_TS_MASK) { gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); @@ -3397,15 +3355,6 @@ static void gen_sse(CPUX86State *env, DisasContext *= s, int b, && (b !=3D 0x38 && b !=3D 0x3a)) { goto unknown_op; } - if (b =3D=3D 0x0e) { - if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) { - /* If we were fully decoding this we might use illegal_op. */ - goto unknown_op; - } - /* femms */ - gen_helper_emms(cpu_env); - return; - } if (b =3D=3D 0x77) { /* emms */ gen_helper_emms(cpu_env); @@ -4563,18 +4512,6 @@ static void gen_sse(CPUX86State *env, DisasContext *= s, int b, rm =3D (modrm & 7); op2_offset =3D offsetof(CPUX86State,fpregs[rm].mmx); } - if (sse_op_flags & SSE_OPF_3DNOW) { - /* 3DNow! data insns */ - val =3D x86_ldub_code(env, s); - SSEFunc_0_epp op_3dnow =3D sse_op_table5[val]; - if (!op_3dnow) { - goto unknown_op; - } - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - op_3dnow(cpu_env, s->ptr0, s->ptr1); - return; - } } =20 =20 @@ -4696,7 +4633,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) #endif if (use_new && (b =3D=3D 0x138 || b =3D=3D 0x13a || - (b >=3D 0x110 && b <=3D 0x117) || + (b >=3D 0x10e && b <=3D 0x117) || (b >=3D 0x128 && b <=3D 0x12f) || (b >=3D 0x150 && b <=3D 0x17f) || b =3D=3D 0x1c2 || (b >=3D 0x1c4 && b <=3D 0x1c6) || @@ -8533,10 +8470,6 @@ static target_ulong disas_insn(DisasContext *s, CPUS= tate *cpu) =20 set_cc_op(s, CC_OP_POPCNT); break; - case 0x10e ... 0x10f: - /* 3DNow! instructions, ignore prefixes */ - s->prefix &=3D ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA); - /* fall through */ case 0x110 ... 0x117: case 0x128 ... 0x12f: case 0x138 ... 0x13a: --=20 2.37.2 From nobody Fri Apr 19 05:49:18 2024 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass(p=none dis=none) header.from=redhat.com ARC-Seal: i=1; a=rsa-sha256; t=1663715103; cv=none; d=zohomail.com; s=zohoarc; b=IMKutYTDFu6D+HZV34JpmXYsMoqBoBiCvrOsJlm8YrDrVNVKbLTWoHURir9T5xIHPwoDetounot0NlDhPTa0vaW0le5hzyqXnHwx+f2pum4hz5C/H+A8KaBb2iDb+i359KraZBJvVsgdlEibsznAQEZ1FvtUIwXrONRubvcYb0s= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1663715103; h=Content-Type:Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=JnRCq3cmQQ+iHKtSYTziLeQ4kbJFdrb03hpOwbRilYk=; b=X+E03GTFdQQB1iU2monjfROu9CQp+KcMEg842Mf5YrvmRZzu35AY2wdTGhAGiIIybgle+bUMrOwpFRodPSzOc/DdljaVOxp+Kg3O7lU8UoKA3q9Evx1VIIH/pkw2s4L599ai+efpMQELCruYiyoYVEwkcO9ONhJlnUeD7GufYGY= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=pass header.from= (p=none dis=none) Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1663715103951614.3493227595674; Tue, 20 Sep 2022 16:05:03 -0700 (PDT) Received: from localhost ([::1]:51876 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1oamIY-0001Ge-5d for importer@patchew.org; Tue, 20 Sep 2022 19:05:02 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58732) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0e-0006sy-5Y for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:12 -0400 Received: from us-smtp-delivery-124.mimecast.com ([170.10.133.124]:47746) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1oah0b-0002Hx-Fd for qemu-devel@nongnu.org; Tue, 20 Sep 2022 13:26:11 -0400 Received: from mail-ej1-f71.google.com (mail-ej1-f71.google.com [209.85.218.71]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_128_GCM_SHA256) id us-mta-500-iYpxg0SsM2O_tSn7P_Nh8w-1; Tue, 20 Sep 2022 13:26:07 -0400 Received: by mail-ej1-f71.google.com with SMTP id he41-20020a1709073da900b00780bff831ebso1759533ejc.6 for ; Tue, 20 Sep 2022 10:26:07 -0700 (PDT) Received: from [192.168.10.118] ([2001:b07:6468:f312:9af8:e5f5:7516:fa89]) by smtp.gmail.com with ESMTPSA id g17-20020a1709062db100b0077f15e98256sm115149eji.203.2022.09.20.10.26.04 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 20 Sep 2022 10:26:04 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1663694768; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=JnRCq3cmQQ+iHKtSYTziLeQ4kbJFdrb03hpOwbRilYk=; b=QZhXwtaLmjH5GM4xXIpvGwXVp+0hzN5ea2gIIRXE12LW5fImr8Qsm9Nspi2Tg4bUZF+r+g AzM23a6YVeKtBbprVENtctTNOsqpigMhrXGMWCr8pSYIzCJ25CijB9Z7ZDJeSyQPpHyaGB EL/Ce3/xratje4fUtijJDPcL2p0Kv4E= X-MC-Unique: iYpxg0SsM2O_tSn7P_Nh8w-1 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date; bh=JnRCq3cmQQ+iHKtSYTziLeQ4kbJFdrb03hpOwbRilYk=; b=iRq9gSIB8cFO0LjMSgaV82253NI4YQwfycoFBhdMiN6soi6LlAA3GiOpZRNVtOh5fm WzRCUnYMEP/g0vnDgJiUDj3V/8i/oxaPhCyiReGZ/ZJ2SZkEArMGRHMv6JtFSOImNSfn vePKNhXcJgzueDZY8iRcy/lAgWvXS/gVQfdqtBO+/TngpK490pA7RWeTd59eUKJW7hx6 fCR9y8OJM9kNHq+VtgJa6R1sl2MZXpKeEAL/2oLh8Vo5BkKZbuF6QJraSd83cxq6iZVQ TXQkGRgvdJjgvoJToYS0G2BntviobC1/ONjcRBTgR4FeoIxZfkrPr/BOgvGacDg9pkJL hJkg== X-Gm-Message-State: ACrzQf2tlETOnDZz2pTQ7vmpvjLaTlu9FtWnRaa96e+QUC9CdNnI6srq +g+2z44+BwfT1zr/p1CWQCkPSv0jLKj17oNyAMIx6hli+umjyKY739W9Stv+1nWal7W68iQAyLQ 4sz/hlQJa8ddBB88ZIsBYQiC4l57K4AbI950X9jv4QqncS5PteyjDjESp3wP3mktrqUI= X-Received: by 2002:aa7:d556:0:b0:451:f7e6:5121 with SMTP id u22-20020aa7d556000000b00451f7e65121mr21215286edr.188.1663694765369; Tue, 20 Sep 2022 10:26:05 -0700 (PDT) X-Google-Smtp-Source: AMsMyM7w4BVoEu5H/AuMvXagEEi3WPTkw6GcBugnA/V8T7SVmgPDJ9tb2tH+28VgAJOmambnRz4+ug== X-Received: by 2002:aa7:d556:0:b0:451:f7e6:5121 with SMTP id u22-20020aa7d556000000b00451f7e65121mr21215246edr.188.1663694764917; Tue, 20 Sep 2022 10:26:04 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org, paul@nowt.org Subject: [PATCH v2 37/37] target/i386: remove old SSE decoder Date: Tue, 20 Sep 2022 19:25:07 +0200 Message-Id: <20220920172507.95568-38-pbonzini@redhat.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com> References: <20220920172507.95568-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=170.10.133.124; envelope-from=pbonzini@redhat.com; helo=us-smtp-delivery-124.mimecast.com X-Spam_score_int: -27 X-Spam_score: -2.8 X-Spam_bar: -- X-Spam_report: (-2.8 / 5.0 requ) BAYES_00=-1.9, DKIMWL_WL_HIGH=-0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, RCVD_IN_DNSWL_LOW=-0.7, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: pass (identity @redhat.com) X-ZM-MESSAGEID: 1663715106213100001 Content-Type: text/plain; charset="utf-8" With all SSE (and AVX!) instructions now implemented in disas_insn_new, it's possible to remove gen_sse, as well as the helpers for instructions that now use gvec. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/ops_sse.h | 124 --- target/i386/ops_sse_header.h | 61 -- target/i386/tcg/decode-new.c.inc | 3 - target/i386/tcg/emit.c.inc | 17 + target/i386/tcg/translate.c | 1722 +----------------------------- 5 files changed, 19 insertions(+), 1908 deletions(-) diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 43b32edbfc..76bf20b878 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -297,17 +297,6 @@ static inline int satsw(int x) #define FMAXUB(a, b) ((a) > (b)) ? (a) : (b) #define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b) =20 -#define FAND(a, b) ((a) & (b)) -#define FANDN(a, b) ((~(a)) & (b)) -#define FOR(a, b) ((a) | (b)) -#define FXOR(a, b) ((a) ^ (b)) - -#define FCMPGTB(a, b) ((int8_t)(a) > (int8_t)(b) ? -1 : 0) -#define FCMPGTW(a, b) ((int16_t)(a) > (int16_t)(b) ? -1 : 0) -#define FCMPGTL(a, b) ((int32_t)(a) > (int32_t)(b) ? -1 : 0) -#define FCMPEQ(a, b) ((a) =3D=3D (b) ? -1 : 0) - -#define FMULLW(a, b) ((a) * (b)) #define FMULHRW(a, b) (((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16) #define FMULHUW(a, b) ((a) * (b) >> 16) #define FMULHW(a, b) ((int16_t)(a) * (int16_t)(b) >> 16) @@ -315,46 +304,6 @@ static inline int satsw(int x) #define FAVG(a, b) (((a) + (b) + 1) >> 1) #endif =20 -SSE_HELPER_B(helper_paddb, FADD) -SSE_HELPER_W(helper_paddw, FADD) -SSE_HELPER_L(helper_paddl, FADD) -SSE_HELPER_Q(helper_paddq, FADD) - -SSE_HELPER_B(helper_psubb, FSUB) -SSE_HELPER_W(helper_psubw, FSUB) -SSE_HELPER_L(helper_psubl, FSUB) -SSE_HELPER_Q(helper_psubq, FSUB) - -SSE_HELPER_B(helper_paddusb, FADDUB) -SSE_HELPER_B(helper_paddsb, FADDSB) -SSE_HELPER_B(helper_psubusb, FSUBUB) -SSE_HELPER_B(helper_psubsb, FSUBSB) - -SSE_HELPER_W(helper_paddusw, FADDUW) -SSE_HELPER_W(helper_paddsw, FADDSW) -SSE_HELPER_W(helper_psubusw, FSUBUW) -SSE_HELPER_W(helper_psubsw, FSUBSW) - -SSE_HELPER_B(helper_pminub, FMINUB) -SSE_HELPER_B(helper_pmaxub, FMAXUB) - -SSE_HELPER_W(helper_pminsw, FMINSW) -SSE_HELPER_W(helper_pmaxsw, FMAXSW) - -SSE_HELPER_Q(helper_pand, FAND) -SSE_HELPER_Q(helper_pandn, FANDN) -SSE_HELPER_Q(helper_por, FOR) -SSE_HELPER_Q(helper_pxor, FXOR) - -SSE_HELPER_B(helper_pcmpgtb, FCMPGTB) -SSE_HELPER_W(helper_pcmpgtw, FCMPGTW) -SSE_HELPER_L(helper_pcmpgtl, FCMPGTL) - -SSE_HELPER_B(helper_pcmpeqb, FCMPEQ) -SSE_HELPER_W(helper_pcmpeqw, FCMPEQ) -SSE_HELPER_L(helper_pcmpeql, FCMPEQ) - -SSE_HELPER_W(helper_pmullw, FMULLW) SSE_HELPER_W(helper_pmulhuw, FMULHUW) SSE_HELPER_W(helper_pmulhw, FMULHW) =20 @@ -432,29 +381,6 @@ void glue(helper_maskmov, SUFFIX)(CPUX86State *env, Re= g *d, Reg *s, } #endif =20 -void glue(helper_movl_mm_T0, SUFFIX)(Reg *d, uint32_t val) -{ - int i; - - d->L(0) =3D val; - d->L(1) =3D 0; - for (i =3D 1; i < (1 << SHIFT); i++) { - d->Q(i) =3D 0; - } -} - -#ifdef TARGET_X86_64 -void glue(helper_movq_mm_T0, SUFFIX)(Reg *d, uint64_t val) -{ - int i; - - d->Q(0) =3D val; - for (i =3D 1; i < (1 << SHIFT); i++) { - d->Q(i) =3D 0; - } -} -#endif - #define SHUFFLE4(F, a, b, offset) do { \ r0 =3D a->F((order & 3) + offset); \ r1 =3D a->F(((order >> 2) & 3) + offset); \ @@ -1216,27 +1142,6 @@ uint32_t glue(helper_movmskpd, SUFFIX)(CPUX86State *= env, Reg *s) =20 #endif =20 -uint32_t glue(helper_pmovmskb, SUFFIX)(CPUX86State *env, Reg *s) -{ - uint32_t val; - int i; - - val =3D 0; - for (i =3D 0; i < (1 << SHIFT); i++) { - uint8_t byte =3D 0; - byte |=3D (s->B(8 * i + 0) >> 7); - byte |=3D (s->B(8 * i + 1) >> 6) & 0x02; - byte |=3D (s->B(8 * i + 2) >> 5) & 0x04; - byte |=3D (s->B(8 * i + 3) >> 4) & 0x08; - byte |=3D (s->B(8 * i + 4) >> 3) & 0x10; - byte |=3D (s->B(8 * i + 5) >> 2) & 0x20; - byte |=3D (s->B(8 * i + 6) >> 1) & 0x40; - byte |=3D (s->B(8 * i + 7)) & 0x80; - val |=3D byte << (8 * i); - } - return val; -} - #define PACK_HELPER_B(name, F) \ void glue(helper_pack ## name, SUFFIX)(CPUX86State *env, \ Reg *d, Reg *v, Reg *s) \ @@ -1587,13 +1492,6 @@ void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env= , Reg *d, Reg *v, Reg *s) } } =20 -#define FABSB(x) (x > INT8_MAX ? -(int8_t)x : x) -#define FABSW(x) (x > INT16_MAX ? -(int16_t)x : x) -#define FABSL(x) (x > INT32_MAX ? -(int32_t)x : x) -SSE_HELPER_1(helper_pabsb, B, 8 << SHIFT, FABSB) -SSE_HELPER_1(helper_pabsw, W, 4 << SHIFT, FABSW) -SSE_HELPER_1(helper_pabsd, L, 2 << SHIFT, FABSL) - #define FMULHRSW(d, s) (((int16_t) d * (int16_t)s + 0x4000) >> 15) SSE_HELPER_W(helper_pmulhrsw, FMULHRSW) =20 @@ -1723,9 +1621,6 @@ void glue(helper_pmuldq, SUFFIX)(CPUX86State *env, Re= g *d, Reg *v, Reg *s) } } =20 -#define FCMPEQQ(d, s) (d =3D=3D s ? -1 : 0) -SSE_HELPER_Q(helper_pcmpeqq, FCMPEQQ) - void glue(helper_packusdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *= s) { uint16_t r[8]; @@ -1746,22 +1641,6 @@ void glue(helper_packusdw, SUFFIX)(CPUX86State *env,= Reg *d, Reg *v, Reg *s) } } =20 -#define FMINSB(d, s) MIN((int8_t)d, (int8_t)s) -#define FMINSD(d, s) MIN((int32_t)d, (int32_t)s) -#define FMAXSB(d, s) MAX((int8_t)d, (int8_t)s) -#define FMAXSD(d, s) MAX((int32_t)d, (int32_t)s) -SSE_HELPER_B(helper_pminsb, FMINSB) -SSE_HELPER_L(helper_pminsd, FMINSD) -SSE_HELPER_W(helper_pminuw, MIN) -SSE_HELPER_L(helper_pminud, MIN) -SSE_HELPER_B(helper_pmaxsb, FMAXSB) -SSE_HELPER_L(helper_pmaxsd, FMAXSD) -SSE_HELPER_W(helper_pmaxuw, MAX) -SSE_HELPER_L(helper_pmaxud, MAX) - -#define FMULLD(d, s) ((int32_t)d * (int32_t)s) -SSE_HELPER_L(helper_pmulld, FMULLD) - #if SHIFT =3D=3D 1 void glue(helper_phminposuw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { @@ -2042,9 +1921,6 @@ void glue(helper_mpsadbw, SUFFIX)(CPUX86State *env, R= eg *d, Reg *v, Reg *s, } =20 /* SSE4.2 op helpers */ -#define FCMPGTQ(d, s) ((int64_t)d > (int64_t)s ? -1 : 0) -SSE_HELPER_Q(helper_pcmpgtq, FCMPGTQ) - #if SHIFT =3D=3D 1 static inline int pcmp_elen(CPUX86State *env, int reg, uint32_t ctrl) { diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index 00de6d69f1..2f1f811f9f 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -64,46 +64,6 @@ DEF_HELPER_4(glue(pslldq, SUFFIX), void, env, Reg, Reg, = Reg) #define SSE_HELPER_Q(name, F)\ DEF_HELPER_4(glue(name, SUFFIX), void, env, Reg, Reg, Reg) =20 -SSE_HELPER_B(paddb, FADD) -SSE_HELPER_W(paddw, FADD) -SSE_HELPER_L(paddl, FADD) -SSE_HELPER_Q(paddq, FADD) - -SSE_HELPER_B(psubb, FSUB) -SSE_HELPER_W(psubw, FSUB) -SSE_HELPER_L(psubl, FSUB) -SSE_HELPER_Q(psubq, FSUB) - -SSE_HELPER_B(paddusb, FADDUB) -SSE_HELPER_B(paddsb, FADDSB) -SSE_HELPER_B(psubusb, FSUBUB) -SSE_HELPER_B(psubsb, FSUBSB) - -SSE_HELPER_W(paddusw, FADDUW) -SSE_HELPER_W(paddsw, FADDSW) -SSE_HELPER_W(psubusw, FSUBUW) -SSE_HELPER_W(psubsw, FSUBSW) - -SSE_HELPER_B(pminub, FMINUB) -SSE_HELPER_B(pmaxub, FMAXUB) - -SSE_HELPER_W(pminsw, FMINSW) -SSE_HELPER_W(pmaxsw, FMAXSW) - -SSE_HELPER_Q(pand, FAND) -SSE_HELPER_Q(pandn, FANDN) -SSE_HELPER_Q(por, FOR) -SSE_HELPER_Q(pxor, FXOR) - -SSE_HELPER_B(pcmpgtb, FCMPGTB) -SSE_HELPER_W(pcmpgtw, FCMPGTW) -SSE_HELPER_L(pcmpgtl, FCMPGTL) - -SSE_HELPER_B(pcmpeqb, FCMPEQ) -SSE_HELPER_W(pcmpeqw, FCMPEQ) -SSE_HELPER_L(pcmpeql, FCMPEQ) - -SSE_HELPER_W(pmullw, FMULLW) #if SHIFT =3D=3D 0 DEF_HELPER_3(glue(pmulhrw, SUFFIX), void, env, Reg, Reg) #endif @@ -120,10 +80,6 @@ DEF_HELPER_4(glue(psadbw, SUFFIX), void, env, Reg, Reg,= Reg) #if SHIFT < 2 DEF_HELPER_4(glue(maskmov, SUFFIX), void, env, Reg, Reg, tl) #endif -DEF_HELPER_2(glue(movl_mm_T0, SUFFIX), void, Reg, i32) -#ifdef TARGET_X86_64 -DEF_HELPER_2(glue(movq_mm_T0, SUFFIX), void, Reg, i64) -#endif =20 #if SHIFT =3D=3D 0 DEF_HELPER_3(glue(pshufw, SUFFIX), void, Reg, Reg, int) @@ -279,7 +235,6 @@ DEF_HELPER_2(glue(movmskps, SUFFIX), i32, env, Reg) DEF_HELPER_2(glue(movmskpd, SUFFIX), i32, env, Reg) #endif =20 -DEF_HELPER_2(glue(pmovmskb, SUFFIX), i32, env, Reg) DEF_HELPER_4(glue(packsswb, SUFFIX), void, env, Reg, Reg, Reg) DEF_HELPER_4(glue(packuswb, SUFFIX), void, env, Reg, Reg, Reg) DEF_HELPER_4(glue(packssdw, SUFFIX), void, env, Reg, Reg, Reg) @@ -326,9 +281,6 @@ DEF_HELPER_4(glue(phaddsw, SUFFIX), void, env, Reg, Reg= , Reg) DEF_HELPER_4(glue(phsubw, SUFFIX), void, env, Reg, Reg, Reg) DEF_HELPER_4(glue(phsubd, SUFFIX), void, env, Reg, Reg, Reg) DEF_HELPER_4(glue(phsubsw, SUFFIX), void, env, Reg, Reg, Reg) -DEF_HELPER_3(glue(pabsb, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pabsw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pabsd, SUFFIX), void, env, Reg, Reg) DEF_HELPER_4(glue(pmaddubsw, SUFFIX), void, env, Reg, Reg, Reg) DEF_HELPER_4(glue(pmulhrsw, SUFFIX), void, env, Reg, Reg, Reg) DEF_HELPER_4(glue(pshufb, SUFFIX), void, env, Reg, Reg, Reg) @@ -359,17 +311,7 @@ DEF_HELPER_3(glue(pmovsldup, SUFFIX), void, env, Reg, = Reg) DEF_HELPER_3(glue(pmovshdup, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(pmovdldup, SUFFIX), void, env, Reg, Reg) DEF_HELPER_4(glue(pmuldq, SUFFIX), void, env, Reg, Reg, Reg) -DEF_HELPER_4(glue(pcmpeqq, SUFFIX), void, env, Reg, Reg, Reg) DEF_HELPER_4(glue(packusdw, SUFFIX), void, env, Reg, Reg, Reg) -DEF_HELPER_4(glue(pminsb, SUFFIX), void, env, Reg, Reg, Reg) -DEF_HELPER_4(glue(pminsd, SUFFIX), void, env, Reg, Reg, Reg) -DEF_HELPER_4(glue(pminuw, SUFFIX), void, env, Reg, Reg, Reg) -DEF_HELPER_4(glue(pminud, SUFFIX), void, env, Reg, Reg, Reg) -DEF_HELPER_4(glue(pmaxsb, SUFFIX), void, env, Reg, Reg, Reg) -DEF_HELPER_4(glue(pmaxsd, SUFFIX), void, env, Reg, Reg, Reg) -DEF_HELPER_4(glue(pmaxuw, SUFFIX), void, env, Reg, Reg, Reg) -DEF_HELPER_4(glue(pmaxud, SUFFIX), void, env, Reg, Reg, Reg) -DEF_HELPER_4(glue(pmulld, SUFFIX), void, env, Reg, Reg, Reg) #if SHIFT =3D=3D 1 DEF_HELPER_3(glue(phminposuw, SUFFIX), void, env, Reg, Reg) #endif @@ -390,9 +332,6 @@ DEF_HELPER_5(glue(mpsadbw, SUFFIX), void, env, Reg, Reg= , Reg, i32) #endif =20 /* SSE4.2 op helpers */ -#if SHIFT >=3D 1 -DEF_HELPER_4(glue(pcmpgtq, SUFFIX), void, env, Reg, Reg, Reg) -#endif #if SHIFT =3D=3D 1 DEF_HELPER_4(glue(pcmpestri, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_4(glue(pcmpestrm, SUFFIX), void, env, Reg, Reg, i32) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.= c.inc index e4de3bac0a..64876696f5 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -1538,9 +1538,6 @@ static void disas_insn_new(DisasContext *s, CPUState = *cpu, int b) X86DecodedInsn decode; X86DecodeFunc decode_func =3D decode_root; =20 -#ifdef CONFIG_USER_ONLY - if (limit) { --limit; } -#endif s->has_modrm =3D false; =20 next_byte: diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index ef4e3478fa..31186545c8 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -19,7 +19,24 @@ * License along with this library; if not, see . */ =20 +#define ZMM_OFFSET(reg) offsetof(CPUX86State, xmm_regs[reg]) + +typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg); +typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg); typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b= ); +typedef void (*SSEFunc_0_eppp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_= b, + TCGv_ptr reg_c); +typedef void (*SSEFunc_0_epppp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg= _b, + TCGv_ptr reg_c, TCGv_ptr reg_d); +typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_= b, + TCGv_i32 val); +typedef void (*SSEFunc_0_epppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg= _b, + TCGv_ptr reg_c, TCGv_i32 val); +typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val= ); +typedef void (*SSEFunc_0_pppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_ptr re= g_c, + TCGv_i32 val); +typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_= b, + TCGv val); typedef void (*SSEFunc_0_epppti)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr re= g_b, TCGv_ptr reg_c, TCGv a0, TCGv_i32 scale); =20 diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 627d321f00..c00cf0c682 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -128,9 +128,6 @@ typedef struct DisasContext { /* TCG local register indexes (only used inside old micro ops) */ TCGv tmp0; TCGv tmp4; - TCGv_ptr ptr0; - TCGv_ptr ptr1; - TCGv_ptr ptr2; TCGv_i32 tmp2_i32; TCGv_i32 tmp3_i32; TCGv_i64 tmp1_i64; @@ -2871,57 +2868,6 @@ static void gen_sty_env_A0(DisasContext *s, int offs= et, bool align) tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ); } =20 -static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset) -{ - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q= (0))); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q= (0))); - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q= (1))); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q= (1))); -} - -static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset) -{ - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset); -} - -static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset) -{ - tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset); - tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset); -} - -static inline void gen_op_movq_env_0(DisasContext *s, int d_offset) -{ - tcg_gen_movi_i64(s->tmp1_i64, 0); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset); -} - -#define ZMM_OFFSET(reg) offsetof(CPUX86State, xmm_regs[reg]) -#define XMM_OFFSET(reg) offsetof(CPUX86State, xmm_regs[reg].ZMM_X(0)) - -typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg); -typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg); -typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val); -typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val); -typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b= ); -typedef void (*SSEFunc_0_eppp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_= b, - TCGv_ptr reg_c); -typedef void (*SSEFunc_0_epppp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg= _b, - TCGv_ptr reg_c, TCGv_ptr reg_d); -typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_= b, - TCGv_i32 val); -typedef void (*SSEFunc_0_epppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg= _b, - TCGv_ptr reg_c, TCGv_i32 val); -typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val= ); -typedef void (*SSEFunc_0_pppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_ptr re= g_c, - TCGv_i32 val); -typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_= b, - TCGv val); -typedef void (*SSEFunc_0_epppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg= _b, - TCGv_ptr reg_c, TCGv val); - -static bool first =3D true; static unsigned long limit; #include "decode-new.h" #include "emit.c.inc" #include "decode-new.c.inc" @@ -4607,40 +2917,15 @@ static target_ulong disas_insn(DisasContext *s, CPU= State *cpu) =20 prefixes =3D 0; =20 - if (first) first =3D false, limit =3D getenv("LIMIT") ? atol(getenv("L= IMIT")) : -1; - bool use_new =3D true; -#ifdef CONFIG_USER_ONLY - use_new &=3D limit > 0; -#endif next_byte: s->prefix =3D prefixes; b =3D x86_ldub_code(env, s); /* Collect prefixes. */ switch (b) { default: -#ifndef CONFIG_USER_ONLY - use_new &=3D b <=3D limit; -#endif - if (use_new && 0) { - disas_insn_new(s, cpu, b); - return s->pc; - } break; case 0x0f: b =3D x86_ldub_code(env, s) + 0x100; -#ifndef CONFIG_USER_ONLY - use_new &=3D b <=3D limit; -#endif - if (use_new && - (b =3D=3D 0x138 || b =3D=3D 0x13a || - (b >=3D 0x10e && b <=3D 0x117) || - (b >=3D 0x128 && b <=3D 0x12f) || - (b >=3D 0x150 && b <=3D 0x17f) || - b =3D=3D 0x1c2 || (b >=3D 0x1c4 && b <=3D 0x1c6) || - (b >=3D 0x1d0 && b <=3D 0x1ff))) { - disas_insn_new(s, cpu, b + 0x100); - return s->pc; - } break; case 0xf3: prefixes |=3D PREFIX_REPZ; @@ -8470,7 +6755,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) =20 set_cc_op(s, CC_OP_POPCNT); break; - case 0x110 ... 0x117: + case 0x10e ... 0x117: case 0x128 ... 0x12f: case 0x138 ... 0x13a: case 0x150 ... 0x179: @@ -8478,7 +6763,7 @@ static target_ulong disas_insn(DisasContext *s, CPUSt= ate *cpu) case 0x1c2: case 0x1c4 ... 0x1c6: case 0x1d0 ... 0x1fe: - gen_sse(env, s, b, pc_start); + disas_insn_new(s, cpu, b); break; default: goto unknown_op; @@ -8635,9 +6920,6 @@ static void i386_tr_init_disas_context(DisasContextBa= se *dcbase, CPUState *cpu) dc->tmp2_i32 =3D tcg_temp_new_i32(); dc->tmp3_i32 =3D tcg_temp_new_i32(); dc->tmp4 =3D tcg_temp_new(); - dc->ptr0 =3D tcg_temp_new_ptr(); - dc->ptr1 =3D tcg_temp_new_ptr(); - dc->ptr2 =3D tcg_temp_new_ptr(); dc->cc_srcT =3D tcg_temp_local_new(); } =20 --=20 2.37.2