From nobody Tue Feb 10 22:56:37 2026
Delivered-To: importer@patchew.org
Authentication-Results: mx.zohomail.com;
	spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as
 permitted sender)
  smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org;
	dmarc=fail(p=none dis=none)  header.from=univ-grenoble-alpes.fr
Return-Path: <qemu-devel-bounces+importer=patchew.org@nongnu.org>
Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by
 mx.zohomail.com
	with SMTPS id 1634638043992467.681927224276;
 Tue, 19 Oct 2021 03:07:23 -0700 (PDT)
Received: from localhost ([::1]:60752 helo=lists1p.gnu.org)
	by lists.gnu.org with esmtp (Exim 4.90_1)
	(envelope-from <qemu-devel-bounces+importer=patchew.org@nongnu.org>)
	id 1mcm1i-00019i-Qe
	for importer@patchew.org; Tue, 19 Oct 2021 06:07:22 -0400
Received: from eggs.gnu.org ([2001:470:142:3::10]:58846)
 by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)
 (Exim 4.90_1)
 (envelope-from <frederic.petrot@univ-grenoble-alpes.fr>)
 id 1mcljv-0001A8-4w; Tue, 19 Oct 2021 05:48:59 -0400
Received: from zm-mta-out-3.u-ga.fr ([152.77.200.56]:54918)
 by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)
 (Exim 4.90_1)
 (envelope-from <frederic.petrot@univ-grenoble-alpes.fr>)
 id 1mcljr-0006co-QK; Tue, 19 Oct 2021 05:48:58 -0400
Received: from mailhost.u-ga.fr (mailhost2.u-ga.fr [129.88.177.242])
 by zm-mta-out-3.u-ga.fr (Postfix) with ESMTP id 31F0F41FF1;
 Tue, 19 Oct 2021 11:48:34 +0200 (CEST)
Received: from smtps.univ-grenoble-alpes.fr (smtps2.u-ga.fr [152.77.18.2])
 by mailhost.u-ga.fr (Postfix) with ESMTP id 15C2F601E2;
 Tue, 19 Oct 2021 11:48:34 +0200 (CEST)
Received: from palmier.u-ga.fr (palmier.tima.u-ga.fr [147.171.132.208])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest
 SHA256)
 (No client certificate requested)
 (Authenticated sender: petrotf@univ-grenoble-alpes.fr)
 by smtps.univ-grenoble-alpes.fr (Postfix) with ESMTPSA id E37D814005D;
 Tue, 19 Oct 2021 11:48:33 +0200 (CEST)
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20P=C3=A9trot?=
 <frederic.petrot@univ-grenoble-alpes.fr>
To: qemu-devel@nongnu.org,
	qemu-riscv@nongnu.org
Subject: [PATCH v3 14/21] target/riscv: support for 128-bit arithmetic
 instructions
Date: Tue, 19 Oct 2021 11:48:05 +0200
Message-Id: <20211019094812.614056-15-frederic.petrot@univ-grenoble-alpes.fr>
X-Mailer: git-send-email 2.33.0
In-Reply-To: <20211019094812.614056-1-frederic.petrot@univ-grenoble-alpes.fr>
References: <20211019094812.614056-1-frederic.petrot@univ-grenoble-alpes.fr>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
X-Greylist: Whitelist-UGA SMTP Authentifie (petrotf@univ-grenoble-alpes.fr)
 via submission-587 ACL (41)
X-Greylist: Whitelist-UGA MAILHOST (SMTP non authentifie) depuis 152.77.18.2
Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17
 as permitted sender) client-ip=209.51.188.17;
 envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org;
 helo=lists.gnu.org;
Received-SPF: pass client-ip=152.77.200.56;
 envelope-from=frederic.petrot@univ-grenoble-alpes.fr;
 helo=zm-mta-out-3.u-ga.fr
X-Spam_score_int: -18
X-Spam_score: -1.9
X-Spam_bar: -
X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, RCVD_IN_MSPIKE_H2=-0.001,
 SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no
X-Spam_action: no action
X-BeenThere: qemu-devel@nongnu.org
X-Mailman-Version: 2.1.23
Precedence: list
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
 <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <https://lists.nongnu.org/archive/html/qemu-devel>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
 <mailto:qemu-devel-request@nongnu.org?subject=subscribe>
Cc: bin.meng@windriver.com, richard.henderson@linaro.org,
 alistair.francis@wdc.com, fabien.portas@grenoble-inp.org, palmer@dabbelt.com,
 =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20P=C3=A9trot?=
 <frederic.petrot@univ-grenoble-alpes.fr>, philmd@redhat.com
Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org
Sender: "Qemu-devel" <qemu-devel-bounces+importer=patchew.org@nongnu.org>
X-ZM-MESSAGEID: 1634638045908100001

Addition of 128-bit adds and subs in their various sizes.

Signed-off-by: Fr=C3=A9d=C3=A9ric P=C3=A9trot <frederic.petrot@univ-grenobl=
e-alpes.fr>
Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
---
 target/riscv/insn32.decode              |   3 +
 target/riscv/translate.c                | 105 ++++++++++++---
 target/riscv/insn_trans/trans_rvb.c.inc |  20 +--
 target/riscv/insn_trans/trans_rvi.c.inc | 169 ++++++++++++++++++++++--
 target/riscv/insn_trans/trans_rvm.c.inc |  26 ++--
 5 files changed, 266 insertions(+), 57 deletions(-)

diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index c642f6d09d..3556bf49cc 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -171,9 +171,12 @@ sraw     0100000 .....  ..... 101 ..... 0111011 @r
 ldu      ............   ..... 111 ..... 0000011 @i
 lq       ............   ..... 010 ..... 0001111 @i
 sq       ............   ..... 100 ..... 0100011 @s
+addid    ............  .....  000 ..... 1011011 @i
 sllid    000000 ......  ..... 001 ..... 1011011 @sh6
 srlid    000000 ......  ..... 101 ..... 1011011 @sh6
 sraid    010000 ......  ..... 101 ..... 1011011 @sh6
+addd     0000000 ..... .....  000 ..... 1111011 @r
+subd     0100000 ..... .....  000 ..... 1111011 @r
 slld     0000000 ..... .....  001 ..... 1111011 @r
 srld     0000000 ..... .....  101 ..... 1111011 @r
 srad     0100000 ..... .....  101 ..... 1111011 @r
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 67a82a0855..332a5d0384 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -485,57 +485,122 @@ static bool gen_logic(DisasContext *ctx, arg_r *a, D=
isasExtend ext,
 }
=20
 static bool gen_arith_imm_fn(DisasContext *ctx, arg_i *a, DisasExtend ext,
-                             void (*func)(TCGv, TCGv, target_long))
+                             void (*func)(TCGv, TCGv, target_long),
+                             void (*f128)(TCGv, TCGv, TCGv, TCGv, target_l=
ong))
 {
-    TCGv dest =3D dest_gpr(ctx, a->rd);
-    TCGv src1 =3D get_gpr(ctx, a->rs1, ext);
+    if (get_xl_max(ctx) < MXL_RV128) {
+        TCGv dest =3D dest_gpr(ctx, a->rd);
+        TCGv src1 =3D get_gpr(ctx, a->rs1, ext);
=20
-    func(dest, src1, a->imm);
+        func(dest, src1, a->imm);
=20
-    gen_set_gpr(ctx, a->rd, dest);
+        gen_set_gpr(ctx, a->rd, dest);
+    } else {
+        TCGv src1l =3D get_gpr(ctx, a->rs1, ext),
+             src1h =3D get_gprh(ctx, a->rs1),
+             destl =3D tcg_temp_new(),
+             desth =3D tcg_temp_new();
+
+        if (get_ol(ctx) < MXL_RV128) {
+            func(destl, src1l, a->imm);
+            gen_set_gpr(ctx, a->rd, destl);
+            gen_set_gprh(ctx, a->rd, desth);
+        } else {
+            assert(f128 !=3D NULL);
+            f128(destl, desth, src1l, src1h, a->imm);
+            gen_set_gpr(ctx, a->rd, destl);
+            gen_set_gprh(ctx, a->rd, desth);
+        }
+
+        tcg_temp_free(destl);
+        tcg_temp_free(desth);
+    }
     return true;
 }
=20
 static bool gen_arith_imm_tl(DisasContext *ctx, arg_i *a, DisasExtend ext,
-                             void (*func)(TCGv, TCGv, TCGv))
+                             void (*func)(TCGv, TCGv, TCGv),
+                             void (*f128)(TCGv, TCGv, TCGv, TCGv, TCGv, TC=
Gv))
 {
-    TCGv dest =3D dest_gpr(ctx, a->rd);
-    TCGv src1 =3D get_gpr(ctx, a->rs1, ext);
-    TCGv src2 =3D tcg_constant_tl(a->imm);
=20
-    func(dest, src1, src2);
+    if (get_xl_max(ctx) < MXL_RV128) {
+        TCGv dest =3D dest_gpr(ctx, a->rd);
+        TCGv src1 =3D get_gpr(ctx, a->rs1, ext);
+        TCGv src2 =3D tcg_constant_tl(a->imm);
=20
-    gen_set_gpr(ctx, a->rd, dest);
+        func(dest, src1, src2);
+
+        gen_set_gpr(ctx, a->rd, dest);
+    } else {
+        TCGv src1l =3D get_gpr(ctx, a->rs1, ext),
+             src1h =3D get_gprh(ctx, a->rs1),
+             src2l =3D tcg_constant_tl(a->imm),
+             src2h =3D tcg_constant_tl(-(a->imm < 0)),
+             destl =3D tcg_temp_new(),
+             desth =3D tcg_temp_new();
+
+        assert(f128 !=3D NULL);
+        f128(destl, desth, src1l, src1h, src2l, src2h);
+        gen_set_gpr(ctx, a->rd, destl);
+        gen_set_gprh(ctx, a->rd, desth);
+        tcg_temp_free(destl);
+        tcg_temp_free(desth);
+    }
     return true;
 }
=20
 static bool gen_arith(DisasContext *ctx, arg_r *a, DisasExtend ext,
-                      void (*func)(TCGv, TCGv, TCGv))
+                      void (*func)(TCGv, TCGv, TCGv),
+                      void (*f128)(TCGv, TCGv, TCGv, TCGv, TCGv, TCGv))
 {
-    TCGv dest =3D dest_gpr(ctx, a->rd);
-    TCGv src1 =3D get_gpr(ctx, a->rs1, ext);
-    TCGv src2 =3D get_gpr(ctx, a->rs2, ext);
+    if (get_xl_max(ctx) < MXL_RV128) {
+        TCGv dest =3D dest_gpr(ctx, a->rd);
+        TCGv src1 =3D get_gpr(ctx, a->rs1, ext);
+        TCGv src2 =3D get_gpr(ctx, a->rs2, ext);
=20
-    func(dest, src1, src2);
+        func(dest, src1, src2);
=20
-    gen_set_gpr(ctx, a->rd, dest);
+        gen_set_gpr(ctx, a->rd, dest);
+    } else {
+        TCGv src1l =3D get_gpr(ctx, a->rs1, ext),
+             src1h =3D get_gprh(ctx, a->rs1),
+             src2l =3D get_gpr(ctx, a->rs2, ext),
+             src2h =3D get_gprh(ctx, a->rs2),
+             destl =3D tcg_temp_new(),
+             desth =3D tcg_temp_new();
+
+        if (get_ol(ctx) < MXL_RV128) {
+            func(destl, src1l, src2l);
+            gen_set_gpr(ctx, a->rd, destl);
+            gen_set_gprh(ctx, a->rd, desth);
+        } else {
+            assert(f128 !=3D NULL);
+            f128(destl, desth, src1l, src1h, src2l, src2h);
+            gen_set_gpr(ctx, a->rd, destl);
+            gen_set_gprh(ctx, a->rd, desth);
+        }
+
+        tcg_temp_free(destl);
+        tcg_temp_free(desth);
+    }
     return true;
 }
=20
 static bool gen_arith_per_ol(DisasContext *ctx, arg_r *a, DisasExtend ext,
                              void (*f_tl)(TCGv, TCGv, TCGv),
-                             void (*f_32)(TCGv, TCGv, TCGv))
+                             void (*f_32)(TCGv, TCGv, TCGv),
+                             void (*f_128)(TCGv, TCGv, TCGv, TCGv, TCGv, T=
CGv))
 {
     int olen =3D get_olen(ctx);
=20
     if (olen !=3D TARGET_LONG_BITS) {
         if (olen =3D=3D 32) {
             f_tl =3D f_32;
-        } else {
+        } else if (olen !=3D 128) {
             g_assert_not_reached();
         }
     }
-    return gen_arith(ctx, a, ext, f_tl);
+    return gen_arith(ctx, a, ext, f_tl, f_128);
 }
=20
 static bool gen_shift_imm_fn(DisasContext *ctx, arg_shift *a, DisasExtend =
ext,
diff --git a/target/riscv/insn_trans/trans_rvb.c.inc b/target/riscv/insn_tr=
ans/trans_rvb.c.inc
index cae97ed842..764c0b7122 100644
--- a/target/riscv/insn_trans/trans_rvb.c.inc
+++ b/target/riscv/insn_trans/trans_rvb.c.inc
@@ -104,25 +104,25 @@ static bool trans_xnor(DisasContext *ctx, arg_xnor *a)
 static bool trans_min(DisasContext *ctx, arg_min *a)
 {
     REQUIRE_ZBB(ctx);
-    return gen_arith(ctx, a, EXT_SIGN, tcg_gen_smin_tl);
+    return gen_arith(ctx, a, EXT_SIGN, tcg_gen_smin_tl, NULL);
 }
=20
 static bool trans_max(DisasContext *ctx, arg_max *a)
 {
     REQUIRE_ZBB(ctx);
-    return gen_arith(ctx, a, EXT_SIGN, tcg_gen_smax_tl);
+    return gen_arith(ctx, a, EXT_SIGN, tcg_gen_smax_tl, NULL);
 }
=20
 static bool trans_minu(DisasContext *ctx, arg_minu *a)
 {
     REQUIRE_ZBB(ctx);
-    return gen_arith(ctx, a, EXT_SIGN, tcg_gen_umin_tl);
+    return gen_arith(ctx, a, EXT_SIGN, tcg_gen_umin_tl, NULL);
 }
=20
 static bool trans_maxu(DisasContext *ctx, arg_maxu *a)
 {
     REQUIRE_ZBB(ctx);
-    return gen_arith(ctx, a, EXT_SIGN, tcg_gen_umax_tl);
+    return gen_arith(ctx, a, EXT_SIGN, tcg_gen_umax_tl, NULL);
 }
=20
 static bool trans_sext_b(DisasContext *ctx, arg_sext_b *a)
@@ -354,7 +354,7 @@ GEN_SHADD(3)
 static bool trans_sh##SHAMT##add(DisasContext *ctx, arg_sh##SHAMT##add *a)=
 \
 {                                                                         =
 \
     REQUIRE_ZBA(ctx);                                                     =
 \
-    return gen_arith(ctx, a, EXT_NONE, gen_sh##SHAMT##add);               =
 \
+    return gen_arith(ctx, a, EXT_NONE, gen_sh##SHAMT##add, NULL);         =
 \
 }
=20
 GEN_TRANS_SHADD(1)
@@ -444,7 +444,7 @@ static bool trans_sh##SHAMT##add_uw(DisasContext *ctx, =
       \
 {                                                             \
     REQUIRE_64BIT(ctx);                                       \
     REQUIRE_ZBA(ctx);                                         \
-    return gen_arith(ctx, a, EXT_NONE, gen_sh##SHAMT##add_uw);  \
+    return gen_arith(ctx, a, EXT_NONE, gen_sh##SHAMT##add_uw, NULL); \
 }
=20
 GEN_TRANS_SHADD_UW(1)
@@ -463,7 +463,7 @@ static bool trans_add_uw(DisasContext *ctx, arg_add_uw =
*a)
 {
     REQUIRE_64BIT(ctx);
     REQUIRE_ZBA(ctx);
-    return gen_arith(ctx, a, EXT_NONE, gen_add_uw);
+    return gen_arith(ctx, a, EXT_NONE, gen_add_uw, NULL);
 }
=20
 static void gen_slli_uw(TCGv dest, TCGv src, target_long shamt)
@@ -481,7 +481,7 @@ static bool trans_slli_uw(DisasContext *ctx, arg_slli_u=
w *a)
 static bool trans_clmul(DisasContext *ctx, arg_clmul *a)
 {
     REQUIRE_ZBC(ctx);
-    return gen_arith(ctx, a, EXT_NONE, gen_helper_clmul);
+    return gen_arith(ctx, a, EXT_NONE, gen_helper_clmul, NULL);
 }
=20
 static void gen_clmulh(TCGv dst, TCGv src1, TCGv src2)
@@ -493,11 +493,11 @@ static void gen_clmulh(TCGv dst, TCGv src1, TCGv src2)
 static bool trans_clmulh(DisasContext *ctx, arg_clmulr *a)
 {
     REQUIRE_ZBC(ctx);
-    return gen_arith(ctx, a, EXT_NONE, gen_clmulh);
+    return gen_arith(ctx, a, EXT_NONE, gen_clmulh, NULL);
 }
=20
 static bool trans_clmulr(DisasContext *ctx, arg_clmulh *a)
 {
     REQUIRE_ZBC(ctx);
-    return gen_arith(ctx, a, EXT_NONE, gen_helper_clmulr);
+    return gen_arith(ctx, a, EXT_NONE, gen_helper_clmulr, NULL);
 }
diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_tr=
ans/trans_rvi.c.inc
index 6e2c89cd5e..6497338842 100644
--- a/target/riscv/insn_trans/trans_rvi.c.inc
+++ b/target/riscv/insn_trans/trans_rvi.c.inc
@@ -97,13 +97,121 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a)
     return true;
 }
=20
+/*
+ * Comparison predicates using bitwise logic taken from Hacker's Delight, =
2.12
+ * We are just interested in the sign bit, so rl is not used but for subtr=
acting
+ */
+static bool gen_setcond_i128(TCGv rl, TCGv rh,
+                             TCGv al, TCGv ah,
+                             TCGv bl, TCGv bh,
+                             TCGCond cond)
+{
+    switch (cond) {
+    case TCG_COND_EQ:
+        tcg_gen_setcond_tl(TCG_COND_EQ, rl, al, bl);
+        tcg_gen_setcond_tl(TCG_COND_EQ, rh, ah, bh);
+        tcg_gen_and_tl(rl, rl, rh);
+        break;
+
+    case TCG_COND_NE:
+        tcg_gen_setcond_tl(TCG_COND_NE, rl, al, bl);
+        tcg_gen_setcond_tl(TCG_COND_NE, rh, ah, bh);
+        tcg_gen_or_tl(rl, rl, rh);
+        break;
+
+    case TCG_COND_LT:
+    {
+        TCGv tmp1 =3D tcg_temp_new(),
+             tmp2 =3D tcg_temp_new();
+
+        tcg_gen_sub2_tl(rl, rh, al, ah, bl, bh);
+        tcg_gen_xor_tl(tmp1, rh, ah);
+        tcg_gen_xor_tl(tmp2, ah, bh);
+        tcg_gen_and_tl(tmp1, tmp1, tmp2);
+        tcg_gen_xor_tl(tmp1, rh, tmp1);
+        tcg_gen_shri_tl(rl, tmp1, 63);
+
+        tcg_temp_free(tmp1);
+        tcg_temp_free(tmp2);
+        break;
+    }
+
+    case TCG_COND_GE:
+        /* Invert result of TCG_COND_LT */
+        gen_setcond_i128(rl, rh, al, ah, bl, bh, TCG_COND_LT);
+        tcg_gen_xori_tl(rl, rl, 1);
+        break;
+
+    case TCG_COND_LTU:
+    {
+        TCGv tmp1 =3D tcg_temp_new(),
+             tmp2 =3D tcg_temp_new();
+
+        tcg_gen_sub2_tl(rl, rh, al, ah, bl, bh);
+        tcg_gen_eqv_tl(tmp1, ah, bh);
+        tcg_gen_and_tl(tmp1, tmp1, rh);
+        tcg_gen_andc_tl(tmp2, bh, ah);
+        tcg_gen_or_tl(tmp1, tmp1, tmp2);
+        tcg_gen_shri_tl(rl, tmp1, 63);
+
+        tcg_temp_free(tmp1);
+        tcg_temp_free(tmp2);
+        break;
+    }
+
+    case TCG_COND_GEU:
+        /* Invert result of TCG_COND_LTU */
+        gen_setcond_i128(rl, rh, al, ah, bl, bh, TCG_COND_LTU);
+        tcg_gen_xori_tl(rl, rl, 1);
+        break;
+
+    default:
+        return false;
+    }
+    tcg_gen_movi_tl(rh, 0);
+    return true;
+}
+
 static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
 {
     TCGLabel *l =3D gen_new_label();
     TCGv src1 =3D get_gpr(ctx, a->rs1, EXT_SIGN);
     TCGv src2 =3D get_gpr(ctx, a->rs2, EXT_SIGN);
=20
-    tcg_gen_brcond_tl(cond, src1, src2, l);
+    if (get_xl(ctx) =3D=3D MXL_RV128) {
+        TCGv src1h =3D get_gprh(ctx, a->rs1),
+             src2h =3D get_gprh(ctx, a->rs2),
+             tmpl =3D tcg_temp_new(),
+             tmph =3D tcg_temp_new();
+
+        /*
+         * Do not use gen_setcond_i128 for EQ and NE as these conditions a=
re
+         * often met and can be more efficiently implemented.
+         */
+        if (cond =3D=3D TCG_COND_EQ || cond =3D=3D TCG_COND_NE) {
+            /*
+             * bnez and beqz being used quite often too, lets optimize the=
m,
+             * although QEMU's tcg optimizer handles these cases nicely
+             */
+            if (a->rs2 =3D=3D 0) {
+                tcg_gen_or_tl(tmpl, src1, src1h);
+                tcg_gen_brcondi_tl(cond, tmpl, 0, l);
+            } else {
+                tcg_gen_xor_tl(tmpl, src1, src2);
+                tcg_gen_xor_tl(tmph, src1h, src2h);
+                tcg_gen_or_tl(tmpl, tmpl, tmph);
+                tcg_gen_brcondi_tl(cond, tmpl, 0, l);
+            }
+        } else {
+            gen_setcond_i128(tmpl, tmph, src1, src1h, src2, src2h, cond);
+            tcg_gen_brcondi_tl(TCG_COND_NE, tmpl, 0, l);
+        }
+
+        tcg_temp_free(tmph);
+        tcg_temp_free(tmpl);
+    } else {
+        tcg_gen_brcond_tl(cond, src1, src2, l);
+    }
     gen_goto_tb(ctx, 1, ctx->pc_succ_insn);
=20
     gen_set_label(l); /* branch taken */
@@ -370,9 +478,30 @@ static bool trans_sq(DisasContext *ctx, arg_sq *a)
     return gen_store(ctx, a, MO_TEO);
 }
=20
+static bool trans_addd(DisasContext *ctx, arg_addd *a)
+{
+    REQUIRE_128BIT(ctx);
+    ctx->ol =3D MXL_RV64;
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_add_tl, NULL);
+}
+
+static bool trans_addid(DisasContext *ctx, arg_addid *a)
+{
+    REQUIRE_128BIT(ctx);
+    ctx->ol =3D MXL_RV64;
+    return gen_arith_imm_fn(ctx, a, EXT_NONE, tcg_gen_addi_tl, NULL);
+}
+
+static bool trans_subd(DisasContext *ctx, arg_subd *a)
+{
+    REQUIRE_128BIT(ctx);
+    ctx->ol =3D MXL_RV64;
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_sub_tl, NULL);
+}
+
 static bool trans_addi(DisasContext *ctx, arg_addi *a)
 {
-    return gen_arith_imm_fn(ctx, a, EXT_NONE, tcg_gen_addi_tl);
+    return gen_arith_imm_fn(ctx, a, EXT_NONE, tcg_gen_addi_tl, gen_addi2_i=
128);
 }
=20
 static void gen_slt(TCGv ret, TCGv s1, TCGv s2)
@@ -380,19 +509,31 @@ static void gen_slt(TCGv ret, TCGv s1, TCGv s2)
     tcg_gen_setcond_tl(TCG_COND_LT, ret, s1, s2);
 }
=20
+static void gen_slt_i128(TCGv retl, TCGv reth,
+                         TCGv s1l, TCGv s1h, TCGv s2l, TCGv s2h)
+{
+    gen_setcond_i128(retl, reth, s1l, s1h, s2l, s2h, TCG_COND_LT);
+}
+
 static void gen_sltu(TCGv ret, TCGv s1, TCGv s2)
 {
     tcg_gen_setcond_tl(TCG_COND_LTU, ret, s1, s2);
 }
=20
+static void gen_sltu_i128(TCGv retl, TCGv reth,
+                         TCGv s1l, TCGv s1h, TCGv s2l, TCGv s2h)
+{
+    gen_setcond_i128(retl, reth, s1l, s1h, s2l, s2h, TCG_COND_LTU);
+}
+
 static bool trans_slti(DisasContext *ctx, arg_slti *a)
 {
-    return gen_arith_imm_tl(ctx, a, EXT_SIGN, gen_slt);
+    return gen_arith_imm_tl(ctx, a, EXT_SIGN, gen_slt, gen_slt_i128);
 }
=20
 static bool trans_sltiu(DisasContext *ctx, arg_sltiu *a)
 {
-    return gen_arith_imm_tl(ctx, a, EXT_SIGN, gen_sltu);
+    return gen_arith_imm_tl(ctx, a, EXT_SIGN, gen_sltu, gen_sltu_i128);
 }
=20
 static bool trans_xori(DisasContext *ctx, arg_xori *a)
@@ -478,12 +619,12 @@ static bool trans_srai(DisasContext *ctx, arg_srai *a)
=20
 static bool trans_add(DisasContext *ctx, arg_add *a)
 {
-    return gen_arith(ctx, a, EXT_NONE, tcg_gen_add_tl);
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_add_tl, tcg_gen_add2_tl);
 }
=20
 static bool trans_sub(DisasContext *ctx, arg_sub *a)
 {
-    return gen_arith(ctx, a, EXT_NONE, tcg_gen_sub_tl);
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_sub_tl, tcg_gen_sub2_tl);
 }
=20
 enum M128_DIR {
@@ -559,12 +700,12 @@ static bool trans_sll(DisasContext *ctx, arg_sll *a)
=20
 static bool trans_slt(DisasContext *ctx, arg_slt *a)
 {
-    return gen_arith(ctx, a, EXT_SIGN, gen_slt);
+    return gen_arith(ctx, a, EXT_SIGN, gen_slt, gen_slt_i128);
 }
=20
 static bool trans_sltu(DisasContext *ctx, arg_sltu *a)
 {
-    return gen_arith(ctx, a, EXT_SIGN, gen_sltu);
+    return gen_arith(ctx, a, EXT_SIGN, gen_sltu, gen_sltu_i128);
 }
=20
 static void gen_srl_i128(TCGv destl, TCGv desth,
@@ -647,9 +788,9 @@ static bool trans_and(DisasContext *ctx, arg_and *a)
=20
 static bool trans_addiw(DisasContext *ctx, arg_addiw *a)
 {
-    REQUIRE_64BIT(ctx);
+    REQUIRE_64_OR_128BIT(ctx);
     ctx->ol =3D MXL_RV32;
-    return gen_arith_imm_fn(ctx, a, EXT_NONE, tcg_gen_addi_tl);
+    return gen_arith_imm_fn(ctx, a, EXT_NONE, tcg_gen_addi_tl, NULL);
 }
=20
 static bool trans_slliw(DisasContext *ctx, arg_slliw *a)
@@ -697,16 +838,16 @@ static bool trans_sraid(DisasContext *ctx, arg_sraid =
*a)
=20
 static bool trans_addw(DisasContext *ctx, arg_addw *a)
 {
-    REQUIRE_64BIT(ctx);
+    REQUIRE_64_OR_128BIT(ctx);
     ctx->ol =3D MXL_RV32;
-    return gen_arith(ctx, a, EXT_NONE, tcg_gen_add_tl);
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_add_tl, NULL);
 }
=20
 static bool trans_subw(DisasContext *ctx, arg_subw *a)
 {
-    REQUIRE_64BIT(ctx);
+    REQUIRE_64_OR_128BIT(ctx);
     ctx->ol =3D MXL_RV32;
-    return gen_arith(ctx, a, EXT_NONE, tcg_gen_sub_tl);
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_sub_tl, NULL);
 }
=20
 static bool trans_sllw(DisasContext *ctx, arg_sllw *a)
diff --git a/target/riscv/insn_trans/trans_rvm.c.inc b/target/riscv/insn_tr=
ans/trans_rvm.c.inc
index 2af0e5c139..efe25dfc11 100644
--- a/target/riscv/insn_trans/trans_rvm.c.inc
+++ b/target/riscv/insn_trans/trans_rvm.c.inc
@@ -22,7 +22,7 @@
 static bool trans_mul(DisasContext *ctx, arg_mul *a)
 {
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl);
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, NULL);
 }
=20
 static void gen_mulh(TCGv ret, TCGv s1, TCGv s2)
@@ -42,7 +42,7 @@ static void gen_mulh_w(TCGv ret, TCGv s1, TCGv s2)
 static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
 {
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w);
+    return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w, NULL);
 }
=20
 static void gen_mulhsu(TCGv ret, TCGv arg1, TCGv arg2)
@@ -76,7 +76,7 @@ static void gen_mulhsu_w(TCGv ret, TCGv arg1, TCGv arg2)
 static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
 {
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w);
+    return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w, NU=
LL);
 }
=20
 static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2)
@@ -91,7 +91,7 @@ static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
 {
     REQUIRE_EXT(ctx, RVM);
     /* gen_mulh_w works for either sign as input. */
-    return gen_arith_per_ol(ctx, a, EXT_ZERO, gen_mulhu, gen_mulh_w);
+    return gen_arith_per_ol(ctx, a, EXT_ZERO, gen_mulhu, gen_mulh_w, NULL);
 }
=20
 static void gen_div(TCGv ret, TCGv source1, TCGv source2)
@@ -130,7 +130,7 @@ static void gen_div(TCGv ret, TCGv source1, TCGv source=
2)
 static bool trans_div(DisasContext *ctx, arg_div *a)
 {
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith(ctx, a, EXT_SIGN, gen_div);
+    return gen_arith(ctx, a, EXT_SIGN, gen_div, NULL);
 }
=20
 static void gen_divu(TCGv ret, TCGv source1, TCGv source2)
@@ -158,7 +158,7 @@ static void gen_divu(TCGv ret, TCGv source1, TCGv sourc=
e2)
 static bool trans_divu(DisasContext *ctx, arg_divu *a)
 {
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith(ctx, a, EXT_ZERO, gen_divu);
+    return gen_arith(ctx, a, EXT_ZERO, gen_divu, NULL);
 }
=20
 static void gen_rem(TCGv ret, TCGv source1, TCGv source2)
@@ -199,7 +199,7 @@ static void gen_rem(TCGv ret, TCGv source1, TCGv source=
2)
 static bool trans_rem(DisasContext *ctx, arg_rem *a)
 {
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith(ctx, a, EXT_SIGN, gen_rem);
+    return gen_arith(ctx, a, EXT_SIGN, gen_rem, NULL);
 }
=20
 static void gen_remu(TCGv ret, TCGv source1, TCGv source2)
@@ -227,7 +227,7 @@ static void gen_remu(TCGv ret, TCGv source1, TCGv sourc=
e2)
 static bool trans_remu(DisasContext *ctx, arg_remu *a)
 {
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith(ctx, a, EXT_ZERO, gen_remu);
+    return gen_arith(ctx, a, EXT_ZERO, gen_remu, NULL);
 }
=20
 static bool trans_mulw(DisasContext *ctx, arg_mulw *a)
@@ -235,7 +235,7 @@ static bool trans_mulw(DisasContext *ctx, arg_mulw *a)
     REQUIRE_64BIT(ctx);
     REQUIRE_EXT(ctx, RVM);
     ctx->ol =3D MXL_RV32;
-    return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl);
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, NULL);
 }
=20
 static bool trans_divw(DisasContext *ctx, arg_divw *a)
@@ -243,7 +243,7 @@ static bool trans_divw(DisasContext *ctx, arg_divw *a)
     REQUIRE_64BIT(ctx);
     REQUIRE_EXT(ctx, RVM);
     ctx->ol =3D MXL_RV32;
-    return gen_arith(ctx, a, EXT_SIGN, gen_div);
+    return gen_arith(ctx, a, EXT_SIGN, gen_div, NULL);
 }
=20
 static bool trans_divuw(DisasContext *ctx, arg_divuw *a)
@@ -251,7 +251,7 @@ static bool trans_divuw(DisasContext *ctx, arg_divuw *a)
     REQUIRE_64BIT(ctx);
     REQUIRE_EXT(ctx, RVM);
     ctx->ol =3D MXL_RV32;
-    return gen_arith(ctx, a, EXT_ZERO, gen_divu);
+    return gen_arith(ctx, a, EXT_ZERO, gen_divu, NULL);
 }
=20
 static bool trans_remw(DisasContext *ctx, arg_remw *a)
@@ -259,7 +259,7 @@ static bool trans_remw(DisasContext *ctx, arg_remw *a)
     REQUIRE_64BIT(ctx);
     REQUIRE_EXT(ctx, RVM);
     ctx->ol =3D MXL_RV32;
-    return gen_arith(ctx, a, EXT_SIGN, gen_rem);
+    return gen_arith(ctx, a, EXT_SIGN, gen_rem, NULL);
 }
=20
 static bool trans_remuw(DisasContext *ctx, arg_remuw *a)
@@ -267,5 +267,5 @@ static bool trans_remuw(DisasContext *ctx, arg_remuw *a)
     REQUIRE_64BIT(ctx);
     REQUIRE_EXT(ctx, RVM);
     ctx->ol =3D MXL_RV32;
-    return gen_arith(ctx, a, EXT_ZERO, gen_remu);
+    return gen_arith(ctx, a, EXT_ZERO, gen_remu, NULL);
 }
--=20
2.33.0