From nobody Mon Feb  9 13:11:42 2026
Delivered-To: importer@patchew.org
Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as
 permitted sender) client-ip=208.118.235.17;
 envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org;
 helo=lists.gnu.org;
Authentication-Results: mx.zohomail.com;
	dkim=fail;
	spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted
 sender)  smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org;
	dmarc=fail(p=none dis=none)  header.from=linaro.org
Return-Path: <qemu-devel-bounces+importer=patchew.org@nongnu.org>
Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by
 mx.zohomail.com
	with SMTPS id 1539713263350542.6282057519788;
 Tue, 16 Oct 2018 11:07:43 -0700 (PDT)
Received: from localhost ([::1]:59484 helo=lists.gnu.org)
	by lists.gnu.org with esmtp (Exim 4.71)
	(envelope-from <qemu-devel-bounces+importer=patchew.org@nongnu.org>)
	id 1gCTkr-0002Ou-VB
	for importer@patchew.org; Tue, 16 Oct 2018 14:07:42 -0400
Received: from eggs.gnu.org ([2001:4830:134:3::10]:59725)
	by lists.gnu.org with esmtp (Exim 4.71)
	(envelope-from <richard.henderson@linaro.org>) id 1gCTTP-00046x-M0
	for qemu-devel@nongnu.org; Tue, 16 Oct 2018 13:49:42 -0400
Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)
	(envelope-from <richard.henderson@linaro.org>) id 1gCTTL-00010d-2e
	for qemu-devel@nongnu.org; Tue, 16 Oct 2018 13:49:39 -0400
Received: from mail-pg1-x532.google.com ([2607:f8b0:4864:20::532]:38067)
	by eggs.gnu.org with esmtps (TLS1.0:RSA_AES_128_CBC_SHA1:16)
	(Exim 4.71) (envelope-from <richard.henderson@linaro.org>)
	id 1gCTTK-0000wY-MJ
	for qemu-devel@nongnu.org; Tue, 16 Oct 2018 13:49:34 -0400
Received: by mail-pg1-x532.google.com with SMTP id f8-v6so11197880pgq.5
	for <qemu-devel@nongnu.org>; Tue, 16 Oct 2018 10:49:33 -0700 (PDT)
Received: from cloudburst.twiddle.net (174-21-9-133.tukw.qwest.net.
	[174.21.9.133]) by smtp.gmail.com with ESMTPSA id
	6-v6sm17441210pgl.6.2018.10.16.10.49.30
	(version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256);
	Tue, 16 Oct 2018 10:49:31 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linaro.org; s=google;
	h=from:to:cc:subject:date:message-id:in-reply-to:references;
	bh=PCmtFGzAQFVJC59+lCU2eXtnA4WHfu0lUi+CVh16Ypc=;
	b=WpLJlpsojkBkNSvay18eMD8izL/tPtIJt1pIYwkh1JwuO19Ju0wUjBEvqiets3CyG5
	4/TyCjyDY6TaYQTVgqOYGtIywS6hjZtnS+is0NHsF9H4gdxIX2UxNIuzPeo3fEJIOY2c
	I4elLMLKD3yP29X8ZGEVacMawajeOWGbLf8DM=
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
	d=1e100.net; s=20161025;
	h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to
	:references;
	bh=PCmtFGzAQFVJC59+lCU2eXtnA4WHfu0lUi+CVh16Ypc=;
	b=lwmdQRJmdv0NkmfI4m2lSe8i/xic9xZ2P0bCLS072dhe3jR0CaWZqRWy6vpsbBDstR
	qagYoEgHgeGvUkGt9A9AuKcjpJJUvRcDaPGfDuhXlQlFiP8JsatBP2MlsEbGS+KzPuEW
	EhLEVeLpkFzbrJU17KAXhNkmK6ak8ZHrLfc4WkQZgtPMH1iSTBZTzdaoB2CDz6cof3aE
	oTNwszpGkg+1Cin+AwKUCpWhdTA5/WhgcAUEftRHpDx2TI1r+VXn98wBiA6zntFghYiD
	5ChMwGFPu02KHH9jdDyKtv5ngROpMe0u99f/mDEKavGt19zglHz0Qofp/FJWlbf4jNMG
	pf9Q==
X-Gm-Message-State: ABuFfoiBxlne4iEIMxQn6XQ1D4sfAcWfYEN9KqBw8HIYobSMdWr7CXkD
	ppe/ISQrgFVsZzu2MrHz2v+2j+X7YK4=
X-Google-Smtp-Source: 
 ACcGV61TemvBOIzVsjvzRlNuX5/UPUIglxZnxdB2zRXDuECTAyPl7ZU3euf3WizbHQqOa/yysi7qOQ==
X-Received: by 2002:a63:a902:: with SMTP id
	u2-v6mr21177130pge.207.1539712172133;
	Tue, 16 Oct 2018 10:49:32 -0700 (PDT)
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Date: Tue, 16 Oct 2018 10:49:02 -0700
Message-Id: <20181016174911.9052-13-richard.henderson@linaro.org>
X-Mailer: git-send-email 2.17.2
In-Reply-To: <20181016174911.9052-1-richard.henderson@linaro.org>
References: <20181016174911.9052-1-richard.henderson@linaro.org>
X-detected-operating-system: by eggs.gnu.org: Genre and OS details not
	recognized.
X-Received-From: 2607:f8b0:4864:20::532
Subject: [Qemu-devel] [PULL 12/21] tcg: Split CONFIG_ATOMIC128
X-BeenThere: qemu-devel@nongnu.org
X-Mailman-Version: 2.1.21
Precedence: list
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <http://lists.nongnu.org/archive/html/qemu-devel/>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=subscribe>
Cc: peter.maydell@linaro.org
Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org
Sender: "Qemu-devel" <qemu-devel-bounces+importer=patchew.org@nongnu.org>
X-ZohoMail-DKIM: fail (Header signature does not verify)
X-ZohoMail: RDMRC_1  RDKM_2  RSF_0  Z_629925259 SPT_0
Content-Transfer-Encoding: quoted-printable
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"

GCC7+ will no longer advertise support for 16-byte __atomic operations
if only cmpxchg is supported, as for x86_64.  Fortunately, x86_64 still
has support for __sync_compare_and_swap_16 and we can make use of that.
AArch64 does not have, nor ever has had such support, so open-code it.

Reviewed-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/atomic_template.h |  20 ++++-
 include/qemu/atomic128.h    | 155 ++++++++++++++++++++++++++++++++++++
 tcg/tcg.h                   |  16 ++--
 accel/tcg/cputlb.c          |   3 +-
 accel/tcg/user-exec.c       |   5 +-
 configure                   |  19 +++++
 6 files changed, 204 insertions(+), 14 deletions(-)
 create mode 100644 include/qemu/atomic128.h

diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
index d751bcba48..efde12fdb2 100644
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -100,19 +100,24 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, targ=
et_ulong addr,
     DATA_TYPE ret;
=20
     ATOMIC_TRACE_RMW;
+#if DATA_SIZE =3D=3D 16
+    ret =3D atomic16_cmpxchg(haddr, cmpv, newv);
+#else
     ret =3D atomic_cmpxchg__nocheck(haddr, cmpv, newv);
+#endif
     ATOMIC_MMU_CLEANUP;
     return ret;
 }
=20
 #if DATA_SIZE >=3D 16
+#if HAVE_ATOMIC128
 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
 {
     ATOMIC_MMU_DECLS;
     DATA_TYPE val, *haddr =3D ATOMIC_MMU_LOOKUP;
=20
     ATOMIC_TRACE_LD;
-    __atomic_load(haddr, &val, __ATOMIC_RELAXED);
+    val =3D atomic16_read(haddr);
     ATOMIC_MMU_CLEANUP;
     return val;
 }
@@ -124,9 +129,10 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong a=
ddr,
     DATA_TYPE *haddr =3D ATOMIC_MMU_LOOKUP;
=20
     ATOMIC_TRACE_ST;
-    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
+    atomic16_set(haddr, val);
     ATOMIC_MMU_CLEANUP;
 }
+#endif
 #else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
                            ABI_TYPE val EXTRA_ARGS)
@@ -228,19 +234,24 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, targ=
et_ulong addr,
     DATA_TYPE ret;
=20
     ATOMIC_TRACE_RMW;
+#if DATA_SIZE =3D=3D 16
+    ret =3D atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv));
+#else
     ret =3D atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv));
+#endif
     ATOMIC_MMU_CLEANUP;
     return BSWAP(ret);
 }
=20
 #if DATA_SIZE >=3D 16
+#if HAVE_ATOMIC128
 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
 {
     ATOMIC_MMU_DECLS;
     DATA_TYPE val, *haddr =3D ATOMIC_MMU_LOOKUP;
=20
     ATOMIC_TRACE_LD;
-    __atomic_load(haddr, &val, __ATOMIC_RELAXED);
+    val =3D atomic16_read(haddr);
     ATOMIC_MMU_CLEANUP;
     return BSWAP(val);
 }
@@ -253,9 +264,10 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong a=
ddr,
=20
     ATOMIC_TRACE_ST;
     val =3D BSWAP(val);
-    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
+    atomic16_set(haddr, val);
     ATOMIC_MMU_CLEANUP;
 }
+#endif
 #else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
                            ABI_TYPE val EXTRA_ARGS)
diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
new file mode 100644
index 0000000000..fdea225132
--- /dev/null
+++ b/include/qemu/atomic128.h
@@ -0,0 +1,155 @@
+/*
+ * Simple interface for 128-bit atomic operations.
+ *
+ * Copyright (C) 2018 Linaro, Ltd.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or late=
r.
+ * See the COPYING file in the top-level directory.
+ *
+ * See docs/devel/atomics.txt for discussion about the guarantees each
+ * atomic primitive is meant to provide.
+ */
+
+#ifndef QEMU_ATOMIC128_H
+#define QEMU_ATOMIC128_H
+
+/*
+ * GCC is a house divided about supporting large atomic operations.
+ *
+ * For hosts that only have large compare-and-swap, a legalistic reading
+ * of the C++ standard means that one cannot implement __atomic_read on
+ * read-only memory, and thus all atomic operations must synchronize
+ * through libatomic.
+ *
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D80878
+ *
+ * This interpretation is not especially helpful for QEMU.
+ * For softmmu, all RAM is always read/write from the hypervisor.
+ * For user-only, if the guest doesn't implement such an __atomic_read
+ * then the host need not worry about it either.
+ *
+ * Moreover, using libatomic is not an option, because its interface is
+ * built for std::atomic<T>, and requires that *all* accesses to such an
+ * object go through the library.  In our case we do not have an object
+ * in the C/C++ sense, but a view of memory as seen by the guest.
+ * The guest may issue a large atomic operation and then access those
+ * pieces using word-sized accesses.  From the hypervisor, we have no
+ * way to connect those two actions.
+ *
+ * Therefore, special case each platform.
+ */
+
+#if defined(CONFIG_ATOMIC128)
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
+{
+    return atomic_cmpxchg__nocheck(ptr, cmp, new);
+}
+# define HAVE_CMPXCHG128 1
+#elif defined(CONFIG_CMPXCHG128)
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
+{
+    return __sync_val_compare_and_swap_16(ptr, cmp, new);
+}
+# define HAVE_CMPXCHG128 1
+#elif defined(__aarch64__)
+/* Through gcc 8, aarch64 has no support for 128-bit at all.  */
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
+{
+    uint64_t cmpl =3D int128_getlo(cmp), cmph =3D int128_gethi(cmp);
+    uint64_t newl =3D int128_getlo(new), newh =3D int128_gethi(new);
+    uint64_t oldl, oldh;
+    uint32_t tmp;
+
+    asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t"
+        "cmp %[oldl], %[cmpl]\n\t"
+        "ccmp %[oldh], %[cmph], #0, eq\n\t"
+        "b.ne 1f\n\t"
+        "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t"
+        "cbnz %w[tmp], 0b\n"
+        "1:"
+        : [mem] "+m"(*ptr), [tmp] "=3D&r"(tmp),
+          [oldl] "=3D&r"(oldl), [oldh] "=3Dr"(oldh)
+        : [cmpl] "r"(cmpl), [cmph] "r"(cmph),
+          [newl] "r"(newl), [newh] "r"(newh)
+        : "memory", "cc");
+
+    return int128_make128(oldl, oldh);
+}
+# define HAVE_CMPXCHG128 1
+#else
+/* Fallback definition that must be optimized away, or error.  */
+Int128 __attribute__((error("unsupported atomic")))
+    atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new);
+# define HAVE_CMPXCHG128 0
+#endif /* Some definition for HAVE_CMPXCHG128 */
+
+
+#if defined(CONFIG_ATOMIC128)
+static inline Int128 atomic16_read(Int128 *ptr)
+{
+    return atomic_read__nocheck(ptr);
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    atomic_set__nocheck(ptr, val);
+}
+
+# define HAVE_ATOMIC128 1
+#elif !defined(CONFIG_USER_ONLY) && defined(__aarch64__)
+/* We can do better than cmpxchg for AArch64.  */
+static inline Int128 atomic16_read(Int128 *ptr)
+{
+    uint64_t l, h;
+    uint32_t tmp;
+
+    /* The load must be paired with the store to guarantee not tearing.  */
+    asm("0: ldxp %[l], %[h], %[mem]\n\t"
+        "stxp %w[tmp], %[l], %[h], %[mem]\n\t"
+        "cbnz %w[tmp], 0b"
+        : [mem] "+m"(*ptr), [tmp] "=3Dr"(tmp), [l] "=3Dr"(l), [h] "=3Dr"(h=
));
+
+    return int128_make128(l, h);
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    uint64_t l =3D int128_getlo(val), h =3D int128_gethi(val);
+    uint64_t t1, t2;
+
+    /* Load into temporaries to acquire the exclusive access lock.  */
+    asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
+        "stxp %w[t1], %[l], %[h], %[mem]\n\t"
+        "cbnz %w[t1], 0b"
+        : [mem] "+m"(*ptr), [t1] "=3D&r"(t1), [t2] "=3D&r"(t2)
+        : [l] "r"(l), [h] "r"(h));
+}
+
+# define HAVE_ATOMIC128 1
+#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
+static inline Int128 atomic16_read(Int128 *ptr)
+{
+    /* Maybe replace 0 with 0, returning the old value.  */
+    return atomic16_cmpxchg(ptr, 0, 0);
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    Int128 old =3D *ptr, cmp;
+    do {
+        cmp =3D old;
+        old =3D atomic16_cmpxchg(ptr, cmp, val);
+    } while (old !=3D cmp);
+}
+
+# define HAVE_ATOMIC128 1
+#else
+/* Fallback definitions that must be optimized away, or error.  */
+Int128 __attribute__((error("unsupported atomic")))
+    atomic16_read(Int128 *ptr);
+void __attribute__((error("unsupported atomic")))
+    atomic16_set(Int128 *ptr, Int128 val);
+# define HAVE_ATOMIC128 0
+#endif /* Some definition for HAVE_ATOMIC128 */
+
+#endif /* QEMU_ATOMIC128_H */
diff --git a/tcg/tcg.h b/tcg/tcg.h
index c59f254e27..f4efbaa680 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -32,6 +32,7 @@
 #include "qemu/queue.h"
 #include "tcg-mo.h"
 #include "tcg-target.h"
+#include "qemu/int128.h"
=20
 /* XXX: make safe guess about sizes */
 #define MAX_OP_PER_INSTR 266
@@ -1456,11 +1457,14 @@ GEN_ATOMIC_HELPER_ALL(xchg)
 #undef GEN_ATOMIC_HELPER
 #endif /* CONFIG_SOFTMMU */
=20
-#ifdef CONFIG_ATOMIC128
-#include "qemu/int128.h"
-
-/* These aren't really a "proper" helpers because TCG cannot manage Int128.
-   However, use the same format as the others, for use by the backends. */
+/*
+ * These aren't really a "proper" helpers because TCG cannot manage Int128.
+ * However, use the same format as the others, for use by the backends.
+ *
+ * The cmpxchg functions are only defined if HAVE_CMPXCHG128;
+ * the ld/st functions are only defined if HAVE_ATOMIC128,
+ * as defined by <qemu/atomic128.h>.
+ */
 Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr,
                                      Int128 cmpv, Int128 newv,
                                      TCGMemOpIdx oi, uintptr_t retaddr);
@@ -1477,6 +1481,4 @@ void helper_atomic_sto_le_mmu(CPUArchState *env, targ=
et_ulong addr, Int128 val,
 void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128=
 val,
                               TCGMemOpIdx oi, uintptr_t retaddr);
=20
-#endif /* CONFIG_ATOMIC128 */
-
 #endif /* TCG_H */
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index e4993d72fb..28b770a404 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -32,6 +32,7 @@
 #include "exec/log.h"
 #include "exec/helper-proto.h"
 #include "qemu/atomic.h"
+#include "qemu/atomic128.h"
=20
 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
 /* #define DEBUG_TLB */
@@ -1112,7 +1113,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, tar=
get_ulong addr,
 #include "atomic_template.h"
 #endif
=20
-#ifdef CONFIG_ATOMIC128
+#if HAVE_CMPXCHG128 || HAVE_ATOMIC128
 #define DATA_SIZE 16
 #include "atomic_template.h"
 #endif
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 26a3ffbba1..cd75829cf2 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -25,6 +25,7 @@
 #include "exec/cpu_ldst.h"
 #include "translate-all.h"
 #include "exec/helper-proto.h"
+#include "qemu/atomic128.h"
=20
 #undef EAX
 #undef ECX
@@ -615,7 +616,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, targe=
t_ulong addr,
 /* The following is only callable from other helpers, and matches up
    with the softmmu version.  */
=20
-#ifdef CONFIG_ATOMIC128
+#if HAVE_ATOMIC128 || HAVE_CMPXCHG128
=20
 #undef EXTRA_ARGS
 #undef ATOMIC_NAME
@@ -628,4 +629,4 @@ static void *atomic_mmu_lookup(CPUArchState *env, targe=
t_ulong addr,
=20
 #define DATA_SIZE 16
 #include "atomic_template.h"
-#endif /* CONFIG_ATOMIC128 */
+#endif
diff --git a/configure b/configure
index 8af2be959f..03bf719ca7 100755
--- a/configure
+++ b/configure
@@ -5160,6 +5160,21 @@ EOF
   fi
 fi
=20
+cmpxchg128=3Dno
+if test "$int128" =3D yes -a "$atomic128" =3D no; then
+  cat > $TMPC << EOF
+int main(void)
+{
+  unsigned __int128 x =3D 0, y =3D 0;
+  __sync_val_compare_and_swap_16(&x, y, x);
+  return 0;
+}
+EOF
+  if compile_prog "" "" ; then
+    cmpxchg128=3Dyes
+  fi
+fi
+
 #########################################
 # See if 64-bit atomic operations are supported.
 # Note that without __atomic builtins, we can only
@@ -6669,6 +6684,10 @@ if test "$atomic128" =3D "yes" ; then
   echo "CONFIG_ATOMIC128=3Dy" >> $config_host_mak
 fi
=20
+if test "$cmpxchg128" =3D "yes" ; then
+  echo "CONFIG_CMPXCHG128=3Dy" >> $config_host_mak
+fi
+
 if test "$atomic64" =3D "yes" ; then
   echo "CONFIG_ATOMIC64=3Dy" >> $config_host_mak
 fi
--=20
2.17.2