[PULL 130/153] target/i386/tcg: add a CCOp for SBB x,x

Paolo Bonzini posted 153 patches 1 week, 4 days ago
Maintainers: "Alex Bennée" <alex.bennee@linaro.org>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Thomas Huth <thuth@redhat.com>, Yonggang Luo <luoyonggang@gmail.com>, Richard Henderson <richard.henderson@linaro.org>, Paolo Bonzini <pbonzini@redhat.com>, Cameron Esfahani <dirty@apple.com>, Roman Bolshakov <rbolshakov@ddn.com>, Phil Dennis-Jordan <phil@philjordan.eu>, Mads Ynddal <mads@ynddal.dk>, Magnus Kulke <magnus.kulke@linux.microsoft.com>, Wei Liu <wei.liu@kernel.org>, Stefano Stabellini <sstabellini@kernel.org>, Anthony PERARD <anthony@xenproject.org>, Paul Durrant <paul@xen.org>, "Edgar E. Iglesias" <edgar.iglesias@gmail.com>, David Hildenbrand <david@kernel.org>, Igor Mammedov <imammedo@redhat.com>, Alistair Francis <alistair.francis@wdc.com>, Stefan Berger <stefanb@linux.vnet.ibm.com>, John Snow <jsnow@redhat.com>, Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>, Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>, Stefan Hajnoczi <stefanha@redhat.com>, Fam Zheng <fam@euphon.net>, Aarushi Mehta <mehta.aaru20@gmail.com>, Julia Suvorova <jusual@redhat.com>, Stefano Garzarella <sgarzare@redhat.com>, Stefan Weil <sw@weilnetz.de>, Samuel Thibault <samuel.thibault@ens-lyon.org>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, Zhao Liu <zhao1.liu@intel.com>, Peter Maydell <peter.maydell@linaro.org>, Mauro Carvalho Chehab <mchehab+huawei@kernel.org>, "Daniel P. Berrangé" <berrange@redhat.com>, Luc Michel <luc@lmichel.fr>, Damien Hedde <damien.hedde@dahe.fr>, Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>, Markus Armbruster <armbru@redhat.com>, Eduardo Habkost <eduardo@habkost.net>, Ani Sinha <anisinha@redhat.com>, Christian Schoenebeck <qemu_oss@crudebyte.com>, Greg Kurz <groug@kaod.org>, "Michael S. Tsirkin" <mst@redhat.com>, Dongjiu Geng <gengdongjiu1@gmail.com>, Aurelien Jarno <aurelien@aurel32.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, "Cédric Le Goater" <clg@kaod.org>, Steven Lee <steven_lee@aspeedtech.com>, Troy Lee <leetroy@gmail.com>, Jamin Lin <jamin_lin@aspeedtech.com>, Andrew Jeffery <andrew@codeconstruct.com.au>, Joel Stanley <joel@jms.id.au>, Tyrone Ting <kfting@nuvoton.com>, Hao Wu <wuhaotsh@google.com>, Beniamino Galvani <b.galvani@gmail.com>, Strahinja Jankovic <strahinja.p.jankovic@gmail.com>, Niek Linnenbank <nieklinnenbank@gmail.com>, Samuel Tardieu <sam@rfc1149.net>, Antony Pavlov <antonynpavlov@gmail.com>, Igor Mitsyanko <i.mitsyanko@gmail.com>, Jean-Christophe Dubois <jcd@tribudubois.net>, Andrey Smirnov <andrew.smirnov@gmail.com>, Bernhard Beschow <shentey@gmail.com>, Rob Herring <robh@kernel.org>, Subbaraya Sundeep <sundeep.lkml@gmail.com>, Jan Kiszka <jan.kiszka@web.de>, Felipe Balbi <balbi@kernel.org>, Radoslaw Biernacki <rad@semihalf.com>, Leif Lindholm <leif.lindholm@oss.qualcomm.com>, Eric Auger <eric.auger@redhat.com>, Alexandre Iooss <erdnaxe@crans.org>, Shannon Zhao <shannon.zhaosl@gmail.com>, Gerd Hoffmann <kraxel@redhat.com>, Laurent Vivier <laurent@vivier.eu>, Michael Rolnik <mrolnik@gmail.com>, Raphael Norwitz <raphael@enfabrica.net>, Helge Deller <deller@gmx.de>, "Clément Chigot" <chigot@adacore.com>, Frederic Konrad <konrad.frederic@yahoo.fr>, Alberto Garcia <berto@igalia.com>, Yoshinori Sato <yoshinori.sato@nifty.com>, Magnus Damm <magnus.damm@gmail.com>, Palmer Dabbelt <palmer@dabbelt.com>, Weiwei Li <liwei1518@gmail.com>, Daniel Henrique Barboza <dbarboza@ventanamicro.com>, Liu Zhiwei <zhiwei_liu@linux.alibaba.com>, Halil Pasic <pasic@linux.ibm.com>, Christian Borntraeger <borntraeger@linux.ibm.com>, Jason Herne <jjherne@linux.ibm.com>, Vijai Kumar K <vijai@behindbytes.com>, Nicholas Piggin <npiggin@gmail.com>, Harsh Prateek Bora <harshpb@linux.ibm.com>, "Collin L. Walling" <walling@linux.ibm.com>, Amit Shah <amit@kernel.org>, Yanan Wang <wangyanan55@huawei.com>, Riku Voipio <riku.voipio@iki.fi>, Paul Burton <paulburton@kernel.org>, Aleksandar Rikalo <arikalo@gmail.com>, Jonathan Cameron <jonathan.cameron@huawei.com>, Fan Ni <fan.ni@samsung.com>, Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>, "Hervé Poussineau" <hpoussin@reactos.org>, BALATON Zoltan <balaton@eik.bme.hu>, Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>, Dmitry Osipenko <dmitry.osipenko@collabora.com>, Dmitry Fleytman <dmitry.fleytman@gmail.com>, Ninad Palsule <ninad@linux.ibm.com>, Glenn Miles <milesg@linux.ibm.com>, "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>, Corey Minyard <cminyard@mvista.com>, Patrick Leis <venture@google.com>, Alejandro Jimenez <alejandro.j.jimenez@oracle.com>, Sairaj Kodilkar <sarunkod@amd.com>, Jason Wang <jasowang@redhat.com>, Yi Liu <yi.l.liu@intel.com>, "Clément Mathieu--Drif" <clement.mathieu--drif@eviden.com>, David Woodhouse <dwmw2@infradead.org>, Sergio Lopez <slp@redhat.com>, Alexander Graf <graf@amazon.com>, Dorjoy Chowdhury <dorjoychy111@gmail.com>, Song Gao <gaosong@loongson.cn>, Bibo Mao <maobibo@loongson.cn>, Jiaxun Yang <jiaxun.yang@flygoat.com>, Huacai Chen <chenhuacai@kernel.org>, Jia Liu <proljc@gmail.com>, Gautam Menghani <gautam@linux.ibm.com>, Aditya Gupta <adityag@linux.ibm.com>, Xiao Guangrong <xiaoguangrong.eric@gmail.com>, Alexander Bulekov <alxndr@bu.edu>, Bandan Das <bsd@redhat.com>, Darren Kenny <darren.kenny@oracle.com>, Qiuhao Li <Qiuhao.Li@outlook.com>, Artyom Tarasenko <atar4qemu@gmail.com>, Gustavo Romero <gustavo.romero@linaro.org>, Francisco Iglesias <francisco.iglesias@amd.com>, Pavel Pisa <pisa@cmp.felk.cvut.cz>, Vikram Garhwal <vikram.garhwal@bytedance.com>, Sriram Yagnaraman <sriram.yagnaraman@ericsson.com>, Max Filippov <jcmvbkbc@gmail.com>, Jiri Pirko <jiri@resnulli.us>, Sven Schnelle <svens@stackframe.org>, Stafford Horne <shorne@gmail.com>, Elena Ufimtseva <elena.ufimtseva@oracle.com>, Jagannathan Raman <jag.raman@oracle.com>, Chinmay Rath <rathc@linux.ibm.com>, Sai Pavan Boddu <sai.pavan.boddu@amd.com>, Ran Wang <wangran@bosc.ac.cn>, Anup Patel <anup.patel@wdc.com>, Eric Farman <farman@linux.ibm.com>, Matthew Rosato <mjrosato@linux.ibm.com>, Ilya Leoshkevich <iii@linux.ibm.com>, Tony Krowiak <akrowiak@linux.ibm.com>, Farhan Ali <alifm@linux.ibm.com>, Nina Schoetterl-Glausch <nsg@linux.ibm.com>, Jared Rossi <jrossi@linux.ibm.com>, Zhuoying Cai <zycai@linux.ibm.com>, Cornelia Huck <cohuck@redhat.com>, Hannes Reinecke <hare@suse.com>, Bin Meng <bmeng.cn@gmail.com>, Titus Rwantare <titusr@google.com>, Bastian Koppelmann <kbastian@mail.uni-paderborn.de>, Jeuk Kim <jeuk20.kim@samsung.com>, "Hongren (Zenithal) Zheng" <i@zenithal.me>, "Canokeys.org" <contact@canokeys.org>, John Levon <john.levon@nutanix.com>, Thanos Makatos <thanos.makatos@nutanix.com>, Alex Williamson <alex@shazbot.org>, Tomita Moeko <tomitamoeko@gmail.com>, Viresh Kumar <viresh.kumar@linaro.org>, Mathieu Poirier <mathieu.poirier@linaro.org>, Manos Pitsidianakis <manos.pitsidianakis@linaro.org>, "Gonglei (Arei)" <arei.gonglei@huawei.com>, Eric Blake <eblake@redhat.com>, Keith Busch <kbusch@kernel.org>, Klaus Jensen <its@irrelevant.dk>, Jesper Devantier <foss@defmacro.it>, Huai-Cheng Kuo <hchkuo@avery-design.com.tw>, Chris Browy <cbrowy@avery-design.com>, Zhenzhong Duan <zhenzhong.duan@intel.com>, "Dr. David Alan Gilbert" <dave@treblig.org>, Zhang Chen <zhangckid@gmail.com>, Li Zhijian <lizhijian@fujitsu.com>, Mahmoud Mandour <ma.mandourr@gmail.com>, Pierrick Bouvier <pierrick.bouvier@linaro.org>, Cleber Rosa <crosa@redhat.com>, Michael Roth <michael.roth@amd.com>, Maksim Davydov <davydov-max@yandex-team.ru>, David Gibson <david@gibson.dropbear.id.au>, Hyman Huang <yong.huang@smartx.com>, Brian Cain <brian.cain@oss.qualcomm.com>, Marcelo Tosatti <mtosatti@redhat.com>, Pedro Barbuda <pbarbuda@microsoft.com>, Mohamed Mediouni <mohamed@unpredictable.fr>, Coiby Xu <Coiby.Xu@gmail.com>
[PULL 130/153] target/i386/tcg: add a CCOp for SBB x,x
Posted by Paolo Bonzini 1 week, 3 days ago
This is more efficient both when generating code and when testing
flags.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/cpu.h           | 13 ++++++++++++-
 target/i386/cpu-dump.c      |  2 ++
 target/i386/tcg/cc_helper.c |  6 ++++++
 target/i386/tcg/translate.c | 13 +++++++++++++
 target/i386/tcg/emit.c.inc  | 33 ++++++---------------------------
 5 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index fc634883649..697dde375c6 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1565,7 +1565,18 @@ typedef enum {
     CC_OP_POPCNTL__,
     CC_OP_POPCNTQ__,
     CC_OP_POPCNT = sizeof(target_ulong) == 8 ? CC_OP_POPCNTQ__ : CC_OP_POPCNTL__,
-#define CC_OP_LAST_BWLQ CC_OP_POPCNTQ__
+
+    /*
+     * Note that only CC_OP_SBB_SELF (i.e. the one with MO_TL size)
+     * is used or implemented, because the translator sign-extends
+     * the -1 or 0 value that is written in CC_DST.
+     */
+    CC_OP_SBB_SELFB__, /* S/Z/C/A via CC_DST, O clear, P set.  */
+    CC_OP_SBB_SELFW__,
+    CC_OP_SBB_SELFL__,
+    CC_OP_SBB_SELFQ__,
+    CC_OP_SBB_SELF = sizeof(target_ulong) == 8 ? CC_OP_SBB_SELFQ__ : CC_OP_SBB_SELFL__,
+#define CC_OP_LAST_BWLQ CC_OP_SBB_SELFQ__
 
     CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
 } CCOp;
diff --git a/target/i386/cpu-dump.c b/target/i386/cpu-dump.c
index ed8fd363c6b..9e290c0b9b4 100644
--- a/target/i386/cpu-dump.c
+++ b/target/i386/cpu-dump.c
@@ -91,6 +91,8 @@ static const char * const cc_op_str[] = {
     [CC_OP_BMILGQ] = "BMILGQ",
 
     [CC_OP_POPCNT] = "POPCNT",
+
+    [CC_OP_SBB_SELF] = "SBBx,x",
 };
 
 static void
diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c
index f1940b40927..1e022da7b02 100644
--- a/target/i386/tcg/cc_helper.c
+++ b/target/i386/tcg/cc_helper.c
@@ -84,6 +84,9 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
         return src1;
     case CC_OP_POPCNT:
         return dst ? 0 : CC_Z;
+    case CC_OP_SBB_SELF:
+	/* dst is either all zeros (--Z-P-) or all ones (-S-APC) */
+        return (dst & (CC_Z|CC_A|CC_C|CC_S)) ^ (CC_P | CC_Z);
 
     case CC_OP_MULB:
         return compute_all_mulb(dst, src1);
@@ -246,6 +249,9 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
     case CC_OP_MULQ:
         return src1 != 0;
 
+    case CC_OP_SBB_SELF:
+        return dst & 1;
+
     case CC_OP_ADCX:
     case CC_OP_ADCOX:
         return dst;
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index d3e5e4d8edb..5aa38a912bd 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -304,6 +304,7 @@ static const uint8_t cc_op_live_[] = {
     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
     [CC_OP_POPCNT] = USES_CC_DST,
+    [CC_OP_SBB_SELF] = USES_CC_DST,
 };
 
 static uint8_t cc_op_live(CCOp op)
@@ -938,6 +939,9 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
         size = cc_op_size(s->cc_op);
         return gen_prepare_val_nz(cpu_cc_src, size, false);
 
+    case CC_OP_SBB_SELF:
+        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst };
+
     case CC_OP_ADCX:
     case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
@@ -999,6 +1003,7 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
     case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
                              .no_setcond = true };
+    case CC_OP_SBB_SELF:
     case CC_OP_LOGICB ... CC_OP_LOGICQ:
     case CC_OP_POPCNT:
         return (CCPrepare) { .cond = TCG_COND_NEVER };
@@ -1078,6 +1083,14 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         }
         break;
 
+    case CC_OP_SBB_SELF:
+        /* checking for nonzero is usually the most efficient */
+        if (jcc_op == JCC_L || jcc_op == JCC_B || jcc_op == JCC_S) {
+            jcc_op = JCC_Z;
+            inv = !inv;
+        }
+        goto slow_jcc;
+
     case CC_OP_LOGICB ... CC_OP_LOGICQ:
         /* Mostly used for test+jump */
         size = s->cc_op - CC_OP_LOGICB;
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 8dac4d09da1..0fde3d669d9 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -3876,37 +3876,16 @@ static void gen_SBB(DisasContext *s, X86DecodedInsn *decode)
         return;
     }
 
-    c_in = tcg_temp_new();
-    gen_compute_eflags_c(s, c_in);
-
-    /*
-     * Here the change is as follows:
-     * CC_SBB: src1 = T0, src2 = T0, src3 = c_in
-     * CC_SUB: src1 = 0, src2 = c_in (no src3)
-     *
-     * The difference also does not matter:
-     * - AF is bit 4 of dst^src1^src2, but bit 4 of src1^src2 is zero in both cases
-     *   therefore AF comes straight from dst (in fact it is c_in)
-     * - for OF, src1 and src2 have the same sign in both cases, meaning there
-     *   can be no overflow
-     */
+    /* SBB x,x has its own CCOp so that's even easier.  */
     if (decode->e.op2 != X86_TYPE_I && !decode->op[0].has_ea && decode->op[0].n == decode->op[2].n) {
-        if (s->cc_op == CC_OP_DYNAMIC) {
-            tcg_gen_neg_tl(s->T0, c_in);
-        } else {
-            /*
-             * Do not negate c_in because it will often be dead and only the
-             * instruction generated by negsetcond will survive.
-             */
-            gen_neg_setcc(s, JCC_B << 1, s->T0);
-        }
-        tcg_gen_movi_tl(s->cc_srcT, 0);
-        decode->cc_src = c_in;
-        decode->cc_dst = s->T0;
-        decode->cc_op = CC_OP_SUBB + ot;
+        gen_neg_setcc(s, JCC_B << 1, s->T0);
+        prepare_update1_cc(decode, s, CC_OP_SBB_SELF);
         return;
     }
 
+    c_in = tcg_temp_new();
+    gen_compute_eflags_c(s, c_in);
+
     if (s->prefix & PREFIX_LOCK) {
         tcg_gen_add_tl(s->T0, s->T1, c_in);
         tcg_gen_neg_tl(s->T0, s->T0);
-- 
2.52.0