From nobody Mon Feb  9 12:46:50 2026
Delivered-To: importer@patchew.org
Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as
 permitted sender) client-ip=208.118.235.17;
 envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org;
 helo=lists.gnu.org;
Authentication-Results: mx.zohomail.com;
	dkim=fail;
	spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted
 sender)  smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org
Return-Path: <qemu-devel-bounces+importer=patchew.org@nongnu.org>
Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by
 mx.zohomail.com
	with SMTPS id 149930092422548.016328831721125;
 Wed, 5 Jul 2017 17:28:44 -0700 (PDT)
Received: from localhost ([::1]:48642 helo=lists.gnu.org)
	by lists.gnu.org with esmtp (Exim 4.71)
	(envelope-from <qemu-devel-bounces+importer=patchew.org@nongnu.org>)
	id 1dSuex-0002At-1u
	for importer@patchew.org; Wed, 05 Jul 2017 20:28:43 -0400
Received: from eggs.gnu.org ([2001:4830:134:3::10]:59698)
	by lists.gnu.org with esmtp (Exim 4.71)
	(envelope-from <rth7680@gmail.com>) id 1dSuay-0007V5-Vd
	for qemu-devel@nongnu.org; Wed, 05 Jul 2017 20:24:38 -0400
Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)
	(envelope-from <rth7680@gmail.com>) id 1dSuax-0002PZ-7V
	for qemu-devel@nongnu.org; Wed, 05 Jul 2017 20:24:36 -0400
Received: from mail-qk0-x243.google.com ([2607:f8b0:400d:c09::243]:35760)
	by eggs.gnu.org with esmtps (TLS1.0:RSA_AES_128_CBC_SHA1:16)
	(Exim 4.71) (envelope-from <rth7680@gmail.com>) id 1dSuax-0002PO-1q
	for qemu-devel@nongnu.org; Wed, 05 Jul 2017 20:24:35 -0400
Received: by mail-qk0-x243.google.com with SMTP id 16so647348qkg.2
	for <qemu-devel@nongnu.org>; Wed, 05 Jul 2017 17:24:35 -0700 (PDT)
Received: from bigtime.twiddle.net.com (rrcs-66-91-136-156.west.biz.rr.com.
	[66.91.136.156])
	by smtp.gmail.com with ESMTPSA id u85sm371825qku.42.2017.07.05.17.24.30
	(version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128);
	Wed, 05 Jul 2017 17:24:33 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025;
	h=sender:from:to:cc:subject:date:message-id:in-reply-to:references;
	bh=WXdwifL93X0z4n54qawmHFEVY2bx1xK1JAIRcet8xwc=;
	b=caDFbCUXF5vVqHJgjOzKD3mfdAaIqVctepcT50MD7CSEZgByURzYaE1JtzKskcYiwL
	UpiuNG5kjPDYXzhplf8i7vv7KuhLJKIGS++PYAwmw3eRtexFQmVcRgidE8P1hTovcaNw
	IEKQ+taTSxnegT+U+XipRssJ2bD5woIAgyFxrTFloLw+R417bJpjj+qXoIhZPI3HzYzF
	OEhcEi1LmpJgt1yCq1/UeS+VK+2YTdKyLxh5jf6uKbIQ4VFQKdJRMWTLFh0nnmaLWJZN
	UrQjcTDujxzOKswPoKTqDQ8CVHV88MYhU1Lp7ItXMU3l8IP/u0mMejjgWr6M4V2pS4fB
	eAKA==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
	d=1e100.net; s=20161025;
	h=x-gm-message-state:sender:from:to:cc:subject:date:message-id
	:in-reply-to:references;
	bh=WXdwifL93X0z4n54qawmHFEVY2bx1xK1JAIRcet8xwc=;
	b=W02FQ2ol+fWpOE9t0jCbDhDk1uEG6ypBhS/kMh2LvNEZatVqe45cAvD6IFrNeVdpcN
	tVisj4xyZ3IdB/1PP7gxomb5zqQDXD5bM7esGwy579xZT0sjE/tMTTlEmCavYwu5P9qa
	i8+Cza9ea/pDjhvpgBEWj2jj8MQvZeuJ/Z5GMTrX5Rz6dIMGbmrNYr2CGKXcC/byiOVC
	wE9uptFJhVLea3a4QKw7VsAsW3uxzSmC1jlklG70cwsyYMLVIfaq24rrKI7QuD3O7y1c
	jhJHOsG+t2a1SBw2Q+7maBY66+lRePev1ChNzA+gn3Z/ms1h44d6JWdDc06g1Y1kSgTW
	lrTw==
X-Gm-Message-State: AKS2vOw4m8/+GNqVcKVReSLtEQxlN3EKKMSUh0EbbjTKrR2OX6athSFr
	w14qWtYYKOmb2I0Q3Ys=
X-Received: by 10.55.22.71 with SMTP id g68mr59120979qkh.218.1499300674033;
	Wed, 05 Jul 2017 17:24:34 -0700 (PDT)
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Date: Wed,  5 Jul 2017 14:23:54 -1000
Message-Id: <20170706002401.10507-5-rth@twiddle.net>
X-Mailer: git-send-email 2.9.4
In-Reply-To: <20170706002401.10507-1-rth@twiddle.net>
References: <20170706002401.10507-1-rth@twiddle.net>
X-detected-operating-system: by eggs.gnu.org: Genre and OS details not
	recognized.
X-Received-From: 2607:f8b0:400d:c09::243
Subject: [Qemu-devel] [PATCH 04/11] target/sh4: Recognize common gUSA
 sequences
X-BeenThere: qemu-devel@nongnu.org
X-Mailman-Version: 2.1.21
Precedence: list
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <http://lists.nongnu.org/archive/html/qemu-devel/>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=subscribe>
Cc: bruno@clisp.org, aurelien@aurel32.net
Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org
Sender: "Qemu-devel" <qemu-devel-bounces+importer=patchew.org@nongnu.org>
X-ZohoMail-DKIM: fail (Header signature does not verify)
X-ZohoMail: RDKM_2  RSF_0  Z_629925259 SPT_0
Content-Transfer-Encoding: quoted-printable
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"

For many of the sequences produced by gcc or glibc,
we can translate these as host atomic operations.
Which saves the need to acquire the exclusive lock.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/sh4/translate.c | 300 +++++++++++++++++++++++++++++++++++++++++++++=
++--
 1 file changed, 290 insertions(+), 10 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 02c6efc..9ab7d6e 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -1896,11 +1896,296 @@ static void decode_opc(DisasContext * ctx)
 }
=20
 #ifdef CONFIG_USER_ONLY
-static int decode_gusa(DisasContext *ctx)
+/* For uniprocessors, SH4 uses optimistic restartable atomic sequences.
+   Upon an interrupt, a real kernel would simply notice magic values in
+   the registers and reset the PC to the start of the sequence.
+
+   For QEMU, we cannot do this in quite the same way.  Instead, we notice
+   the normal start of such a sequence (mov #-x,r15).  While we can handle
+   any sequence via cpu_exec_step_atomic, we can recognize the "normal"
+   sequences and transform them into atomic operations as seen by the host.
+*/
+static int decode_gusa(DisasContext *ctx, CPUSH4State *env, int *pmax_insn=
s)
 {
+    uint16_t insns[5];
+    int ld_adr, ld_reg, ld_mop;
+    int op_reg, op_arg, op_opc;
+    int mt_reg, st_reg, st_mop;
+
     uint32_t pc =3D ctx->pc;
     uint32_t pc_end =3D ctx->tb->cs_base;
+    int backup =3D sextract32(ctx->tbflags, GUSA_SHIFT, 8);
+    int max_insns =3D (pc_end - pc) / 2;
+    int i;
+
+    if (pc !=3D pc_end + backup || max_insns < 2) {
+        /* This is a malformed gUSA region.  Don't do anything special,
+           since the interpreter is likely to get confused.  */
+        ctx->envflags &=3D ~GUSA_MASK;
+        return 0;
+    }
+
+    if (ctx->tbflags & GUSA_EXCLUSIVE) {
+        /* Regardless of single-stepping or the end of the page,
+           we must complete execution of the gUSA region while
+           holding the exclusive lock.  */
+        *pmax_insns =3D max_insns;
+        return 0;
+    }
+
+    /* The state machine below will consume only a few insns.
+       If there are more than that in a region, fail now.  */
+    if (max_insns > ARRAY_SIZE(insns)) {
+        goto fail;
+    }
+
+    /* Read all of the insns for the region.  */
+    for (i =3D 0; i < max_insns; ++i) {
+        insns[i] =3D cpu_lduw_code(env, pc + i * 2);
+    }
+
+    ld_adr =3D ld_reg =3D ld_mop =3D -1;
+    op_reg =3D op_arg =3D op_opc =3D -1;
+    mt_reg =3D -1;
+    st_reg =3D st_mop =3D -1;
+    i =3D 0;
+
+#define NEXT_INSN \
+    do { if (i >=3D max_insns) goto fail; ctx->opcode =3D insns[i++]; } wh=
ile (0)
+
+    /*
+     * Expect a load to begin the region.
+     */
+    NEXT_INSN;
+    switch (ctx->opcode & 0xf00f) {
+    case 0x6000: /* mov.b @Rm,Rn */
+        ld_mop =3D MO_SB;
+        break;
+    case 0x6001: /* mov.w @Rm,Rn */
+        ld_mop =3D MO_TESW;
+        break;
+    case 0x6002: /* mov.l @Rm,Rn */
+        ld_mop =3D MO_TESL;
+        break;
+    default:
+        goto fail;
+    }
+    ld_adr =3D B7_4;
+    op_reg =3D ld_reg =3D B11_8;
+    if (ld_adr =3D=3D ld_reg) {
+        goto fail;
+    }
+
+    /*
+     * Expect an optional register move.
+     */
+    NEXT_INSN;
+    switch (ctx->opcode & 0xf00f) {
+    case 0x6003: /* mov Rm,Rn */
+        /* Here we want to recognize the ld output being
+           saved for later consumtion (e.g. atomic_fetch_op).  */
+        if (ld_reg !=3D B7_4) {
+            goto fail;
+        }
+        op_reg =3D B11_8;
+        break;
+
+    default:
+        /* Put back and re-examine as operation.  */
+        --i;
+    }
+
+    /*
+     * Expect the operation.
+     */
+    NEXT_INSN;
+    switch (ctx->opcode & 0xf00f) {
+    case 0x300c: /* add Rm,Rn */
+        op_opc =3D INDEX_op_add_i32;
+        goto do_reg_op;
+    case 0x2009: /* and Rm,Rn */
+        op_opc =3D INDEX_op_and_i32;
+        goto do_reg_op;
+    case 0x200a: /* xor Rm,Rn */
+        op_opc =3D INDEX_op_xor_i32;
+        goto do_reg_op;
+    case 0x200b: /* or Rm,Rn */
+        op_opc =3D INDEX_op_or_i32;
+    do_reg_op:
+        /* The operation register should be as expected, and the
+           other input cannot depend on the load.  */
+        op_arg =3D B7_4;
+        if (op_reg !=3D B11_8 || op_arg =3D=3D op_reg || op_arg =3D=3D ld_=
reg) {
+            goto fail;
+        }
+        break;
+
+    case 0x3000: /* cmp/eq Rm,Rn */
+        /* Looking for the middle of a compare-and-swap sequence,
+           beginning with the compare.  Operands can be either order,
+           but with only one overlapping the load.  */
+        if ((op_reg =3D=3D B11_8) + (op_reg =3D=3D B7_4) !=3D 1) {
+            goto fail;
+        }
+        op_opc =3D INDEX_op_setcond_i32;  /* placeholder */
+        op_arg =3D (op_reg =3D=3D B11_8 ? B7_4 : B11_8);
+
+        NEXT_INSN;
+        switch (ctx->opcode & 0xff00) {
+        case 0x8b00: /* bf label */
+        case 0x8f00: /* bf/s label */
+            if (pc + (i + 1 + B7_0s) * 2 !=3D pc_end) {
+                goto fail;
+            }
+            if ((ctx->opcode & 0xff00) =3D=3D 0x8b00) { /* bf label */
+                break;
+            }
+            /* We're looking to unconditionally modify Rn with the
+               result of the comparison, within the delay slot of
+               the branch.  This is used by older gcc.  */
+            NEXT_INSN;
+            if ((ctx->opcode & 0xf0ff) =3D=3D 0x0029) { /* movt Rn */
+                mt_reg =3D B11_8;
+            } else {
+                goto fail;
+            }
+            break;
+
+        default:
+            goto fail;
+        }
+        break;
+
+    default:
+        /* Put back and re-examine as store.  */
+        --i;
+    }
+
+    /*
+     * Expect the store.
+     */
+    /* The store must be the last insn.  */
+    if (i !=3D max_insns - 1) {
+        goto fail;
+    }
+    NEXT_INSN;
+    switch (ctx->opcode & 0xf00f) {
+    case 0x2000: /* mov.b Rm,@Rn */
+        st_mop =3D MO_UB;
+        break;
+    case 0x2001: /* mov.w Rm,@Rn */
+        st_mop =3D MO_UW;
+        break;
+    case 0x2002: /* mov.l Rm,@Rn */
+        st_mop =3D MO_UL;
+        break;
+    default:
+        goto fail;
+    }
+    /* The store must match the load.  */
+    if (ld_adr !=3D B11_8 || st_mop !=3D (ld_mop & MO_SIZE)) {
+        goto fail;
+    }
+    st_reg =3D B7_4;
+
+#undef NEXT_INSN
+
+    /*
+     * Emit the operation.
+     */
+    tcg_gen_insn_start(pc, ctx->envflags);
+    switch (op_opc) {
+    case -1:
+        /* No operation found.  Look for exchange pattern.  */
+        if (st_reg =3D=3D ld_reg || st_reg =3D=3D op_reg) {
+            goto fail;
+        }
+        tcg_gen_atomic_xchg_i32(REG(ld_reg), REG(ld_adr), REG(st_reg),
+                                ctx->memidx, ld_mop);
+        break;
+
+    case INDEX_op_add_i32:
+        if (op_reg !=3D st_reg) {
+            goto fail;
+        }
+        if (op_reg =3D=3D ld_reg && st_mop =3D=3D MO_UL) {
+            tcg_gen_atomic_add_fetch_i32(REG(ld_reg), REG(ld_adr),
+                                         REG(op_arg), ctx->memidx, ld_mop);
+        } else {
+            tcg_gen_atomic_fetch_add_i32(REG(ld_reg), REG(ld_adr),
+                                         REG(op_arg), ctx->memidx, ld_mop);
+            if (op_reg !=3D ld_reg) {
+                /* Note that mop sizes < 4 cannot use add_fetch
+                   because it won't carry into the higher bits.  */
+                tcg_gen_add_i32(REG(op_reg), REG(ld_reg), REG(op_arg));
+            }
+        }
+        break;
+
+    case INDEX_op_and_i32:
+        if (op_reg !=3D st_reg) {
+            goto fail;
+        }
+        if (op_reg =3D=3D ld_reg) {
+            tcg_gen_atomic_and_fetch_i32(REG(ld_reg), REG(ld_adr),
+                                         REG(op_arg), ctx->memidx, ld_mop);
+        } else {
+            tcg_gen_atomic_fetch_and_i32(REG(ld_reg), REG(ld_adr),
+                                         REG(op_arg), ctx->memidx, ld_mop);
+            tcg_gen_and_i32(REG(op_reg), REG(ld_reg), REG(op_arg));
+        }
+        break;
+
+    case INDEX_op_or_i32:
+        if (op_reg !=3D st_reg) {
+            goto fail;
+        }
+        if (op_reg =3D=3D ld_reg) {
+            tcg_gen_atomic_or_fetch_i32(REG(ld_reg), REG(ld_adr),
+                                        REG(op_arg), ctx->memidx, ld_mop);
+        } else {
+            tcg_gen_atomic_fetch_or_i32(REG(ld_reg), REG(ld_adr),
+                                        REG(op_arg), ctx->memidx, ld_mop);
+            tcg_gen_or_i32(REG(op_reg), REG(ld_reg), REG(op_arg));
+        }
+        break;
+
+    case INDEX_op_xor_i32:
+        if (op_reg !=3D st_reg) {
+            goto fail;
+        }
+        if (op_reg =3D=3D ld_reg) {
+            tcg_gen_atomic_xor_fetch_i32(REG(ld_reg), REG(ld_adr),
+                                         REG(op_arg), ctx->memidx, ld_mop);
+        } else {
+            tcg_gen_atomic_fetch_xor_i32(REG(ld_reg), REG(ld_adr),
+                                         REG(op_arg), ctx->memidx, ld_mop);
+            tcg_gen_xor_i32(REG(op_reg), REG(ld_reg), REG(op_arg));
+        }
+        break;
=20
+    case INDEX_op_setcond_i32:
+        if (st_reg =3D=3D ld_reg) {
+            goto fail;
+        }
+        tcg_gen_atomic_cmpxchg_i32(REG(ld_reg), REG(ld_adr), REG(op_arg),
+                                   REG(st_reg), ctx->memidx, ld_mop);
+        tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, REG(ld_reg), REG(op_arg=
));
+        if (mt_reg >=3D 0) {
+            tcg_gen_mov_i32(REG(mt_reg), cpu_sr_t);
+        }
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+
+    /* The entire region has been translated.  */
+    ctx->envflags &=3D ~GUSA_MASK;
+    ctx->pc =3D pc_end;
+    return max_insns;
+
+ fail:
     qemu_log_mask(LOG_UNIMP, "Unrecognized gUSA sequence %08x-%08x\n",
                   pc, pc_end);
=20
@@ -1913,8 +2198,8 @@ static int decode_gusa(DisasContext *ctx)
     ctx->bstate =3D BS_EXCP;
=20
     /* We're not executing an instruction, but we must report one for the
-       purposes of accounting within the TB.  At which point we might as
-       well report the entire region so that it's immediately available
+       purposes of accounting within the TB.  We might as well report the
+       entire region consumed via ctx->pc so that it's immediately availab=
le
        in the disassembly dump.  */
     ctx->pc =3D pc_end;
     return 1;
@@ -1966,13 +2251,8 @@ void gen_intermediate_code(CPUSH4State * env, struct=
 TranslationBlock *tb)
     num_insns =3D 0;
=20
 #ifdef CONFIG_USER_ONLY
-    if (ctx.tbflags & GUSA_EXCLUSIVE) {
-        /* Regardless of single-stepping or the end of the page,
-           we must complete execution of the gUSA region while
-           holding the exclusive lock.  */
-        max_insns =3D (tb->cs_base - ctx.pc) / 2;
-    } else if (ctx.tbflags & GUSA_MASK) {
-        num_insns =3D decode_gusa(&ctx);
+    if (ctx.tbflags & GUSA_MASK) {
+        num_insns =3D decode_gusa(&ctx, env, &max_insns);
     }
 #endif
=20
--=20
2.9.4