From nobody Tue Apr 8 22:25:07 2025 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; dkim=fail; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=linaro.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1539713263350542.6282057519788; Tue, 16 Oct 2018 11:07:43 -0700 (PDT) Received: from localhost ([::1]:59484 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gCTkr-0002Ou-VB for importer@patchew.org; Tue, 16 Oct 2018 14:07:42 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:59725) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gCTTP-00046x-M0 for qemu-devel@nongnu.org; Tue, 16 Oct 2018 13:49:42 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1gCTTL-00010d-2e for qemu-devel@nongnu.org; Tue, 16 Oct 2018 13:49:39 -0400 Received: from mail-pg1-x532.google.com ([2607:f8b0:4864:20::532]:38067) by eggs.gnu.org with esmtps (TLS1.0:RSA_AES_128_CBC_SHA1:16) (Exim 4.71) (envelope-from ) id 1gCTTK-0000wY-MJ for qemu-devel@nongnu.org; Tue, 16 Oct 2018 13:49:34 -0400 Received: by mail-pg1-x532.google.com with SMTP id f8-v6so11197880pgq.5 for ; Tue, 16 Oct 2018 10:49:33 -0700 (PDT) Received: from cloudburst.twiddle.net (174-21-9-133.tukw.qwest.net. [174.21.9.133]) by smtp.gmail.com with ESMTPSA id 6-v6sm17441210pgl.6.2018.10.16.10.49.30 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Tue, 16 Oct 2018 10:49:31 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linaro.org; s=google; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=PCmtFGzAQFVJC59+lCU2eXtnA4WHfu0lUi+CVh16Ypc=; b=WpLJlpsojkBkNSvay18eMD8izL/tPtIJt1pIYwkh1JwuO19Ju0wUjBEvqiets3CyG5 4/TyCjyDY6TaYQTVgqOYGtIywS6hjZtnS+is0NHsF9H4gdxIX2UxNIuzPeo3fEJIOY2c I4elLMLKD3yP29X8ZGEVacMawajeOWGbLf8DM= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=PCmtFGzAQFVJC59+lCU2eXtnA4WHfu0lUi+CVh16Ypc=; b=lwmdQRJmdv0NkmfI4m2lSe8i/xic9xZ2P0bCLS072dhe3jR0CaWZqRWy6vpsbBDstR qagYoEgHgeGvUkGt9A9AuKcjpJJUvRcDaPGfDuhXlQlFiP8JsatBP2MlsEbGS+KzPuEW EhLEVeLpkFzbrJU17KAXhNkmK6ak8ZHrLfc4WkQZgtPMH1iSTBZTzdaoB2CDz6cof3aE oTNwszpGkg+1Cin+AwKUCpWhdTA5/WhgcAUEftRHpDx2TI1r+VXn98wBiA6zntFghYiD 5ChMwGFPu02KHH9jdDyKtv5ngROpMe0u99f/mDEKavGt19zglHz0Qofp/FJWlbf4jNMG pf9Q== X-Gm-Message-State: ABuFfoiBxlne4iEIMxQn6XQ1D4sfAcWfYEN9KqBw8HIYobSMdWr7CXkD ppe/ISQrgFVsZzu2MrHz2v+2j+X7YK4= X-Google-Smtp-Source: ACcGV61TemvBOIzVsjvzRlNuX5/UPUIglxZnxdB2zRXDuECTAyPl7ZU3euf3WizbHQqOa/yysi7qOQ== X-Received: by 2002:a63:a902:: with SMTP id u2-v6mr21177130pge.207.1539712172133; Tue, 16 Oct 2018 10:49:32 -0700 (PDT) From: Richard Henderson To: qemu-devel@nongnu.org Date: Tue, 16 Oct 2018 10:49:02 -0700 Message-Id: <20181016174911.9052-13-richard.henderson@linaro.org> X-Mailer: git-send-email 2.17.2 In-Reply-To: <20181016174911.9052-1-richard.henderson@linaro.org> References: <20181016174911.9052-1-richard.henderson@linaro.org> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 2607:f8b0:4864:20::532 Subject: [Qemu-devel] [PULL 12/21] tcg: Split CONFIG_ATOMIC128 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: peter.maydell@linaro.org Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: fail (Header signature does not verify) X-ZohoMail: RDMRC_1 RDKM_2 RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" GCC7+ will no longer advertise support for 16-byte __atomic operations if only cmpxchg is supported, as for x86_64. Fortunately, x86_64 still has support for __sync_compare_and_swap_16 and we can make use of that. AArch64 does not have, nor ever has had such support, so open-code it. Reviewed-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/atomic_template.h | 20 ++++- include/qemu/atomic128.h | 155 ++++++++++++++++++++++++++++++++++++ tcg/tcg.h | 16 ++-- accel/tcg/cputlb.c | 3 +- accel/tcg/user-exec.c | 5 +- configure | 19 +++++ 6 files changed, 204 insertions(+), 14 deletions(-) create mode 100644 include/qemu/atomic128.h diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h index d751bcba48..efde12fdb2 100644 --- a/accel/tcg/atomic_template.h +++ b/accel/tcg/atomic_template.h @@ -100,19 +100,24 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, targ= et_ulong addr, DATA_TYPE ret; =20 ATOMIC_TRACE_RMW; +#if DATA_SIZE =3D=3D 16 + ret =3D atomic16_cmpxchg(haddr, cmpv, newv); +#else ret =3D atomic_cmpxchg__nocheck(haddr, cmpv, newv); +#endif ATOMIC_MMU_CLEANUP; return ret; } =20 #if DATA_SIZE >=3D 16 +#if HAVE_ATOMIC128 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) { ATOMIC_MMU_DECLS; DATA_TYPE val, *haddr =3D ATOMIC_MMU_LOOKUP; =20 ATOMIC_TRACE_LD; - __atomic_load(haddr, &val, __ATOMIC_RELAXED); + val =3D atomic16_read(haddr); ATOMIC_MMU_CLEANUP; return val; } @@ -124,9 +129,10 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong a= ddr, DATA_TYPE *haddr =3D ATOMIC_MMU_LOOKUP; =20 ATOMIC_TRACE_ST; - __atomic_store(haddr, &val, __ATOMIC_RELAXED); + atomic16_set(haddr, val); ATOMIC_MMU_CLEANUP; } +#endif #else ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val EXTRA_ARGS) @@ -228,19 +234,24 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, targ= et_ulong addr, DATA_TYPE ret; =20 ATOMIC_TRACE_RMW; +#if DATA_SIZE =3D=3D 16 + ret =3D atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv)); +#else ret =3D atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)); +#endif ATOMIC_MMU_CLEANUP; return BSWAP(ret); } =20 #if DATA_SIZE >=3D 16 +#if HAVE_ATOMIC128 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) { ATOMIC_MMU_DECLS; DATA_TYPE val, *haddr =3D ATOMIC_MMU_LOOKUP; =20 ATOMIC_TRACE_LD; - __atomic_load(haddr, &val, __ATOMIC_RELAXED); + val =3D atomic16_read(haddr); ATOMIC_MMU_CLEANUP; return BSWAP(val); } @@ -253,9 +264,10 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong a= ddr, =20 ATOMIC_TRACE_ST; val =3D BSWAP(val); - __atomic_store(haddr, &val, __ATOMIC_RELAXED); + atomic16_set(haddr, val); ATOMIC_MMU_CLEANUP; } +#endif #else ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val EXTRA_ARGS) diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h new file mode 100644 index 0000000000..fdea225132 --- /dev/null +++ b/include/qemu/atomic128.h @@ -0,0 +1,155 @@ +/* + * Simple interface for 128-bit atomic operations. + * + * Copyright (C) 2018 Linaro, Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2 or late= r. + * See the COPYING file in the top-level directory. + * + * See docs/devel/atomics.txt for discussion about the guarantees each + * atomic primitive is meant to provide. + */ + +#ifndef QEMU_ATOMIC128_H +#define QEMU_ATOMIC128_H + +/* + * GCC is a house divided about supporting large atomic operations. + * + * For hosts that only have large compare-and-swap, a legalistic reading + * of the C++ standard means that one cannot implement __atomic_read on + * read-only memory, and thus all atomic operations must synchronize + * through libatomic. + * + * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D80878 + * + * This interpretation is not especially helpful for QEMU. + * For softmmu, all RAM is always read/write from the hypervisor. + * For user-only, if the guest doesn't implement such an __atomic_read + * then the host need not worry about it either. + * + * Moreover, using libatomic is not an option, because its interface is + * built for std::atomic, and requires that *all* accesses to such an + * object go through the library. In our case we do not have an object + * in the C/C++ sense, but a view of memory as seen by the guest. + * The guest may issue a large atomic operation and then access those + * pieces using word-sized accesses. From the hypervisor, we have no + * way to connect those two actions. + * + * Therefore, special case each platform. + */ + +#if defined(CONFIG_ATOMIC128) +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) +{ + return atomic_cmpxchg__nocheck(ptr, cmp, new); +} +# define HAVE_CMPXCHG128 1 +#elif defined(CONFIG_CMPXCHG128) +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) +{ + return __sync_val_compare_and_swap_16(ptr, cmp, new); +} +# define HAVE_CMPXCHG128 1 +#elif defined(__aarch64__) +/* Through gcc 8, aarch64 has no support for 128-bit at all. */ +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) +{ + uint64_t cmpl =3D int128_getlo(cmp), cmph =3D int128_gethi(cmp); + uint64_t newl =3D int128_getlo(new), newh =3D int128_gethi(new); + uint64_t oldl, oldh; + uint32_t tmp; + + asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t" + "cmp %[oldl], %[cmpl]\n\t" + "ccmp %[oldh], %[cmph], #0, eq\n\t" + "b.ne 1f\n\t" + "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t" + "cbnz %w[tmp], 0b\n" + "1:" + : [mem] "+m"(*ptr), [tmp] "=3D&r"(tmp), + [oldl] "=3D&r"(oldl), [oldh] "=3Dr"(oldh) + : [cmpl] "r"(cmpl), [cmph] "r"(cmph), + [newl] "r"(newl), [newh] "r"(newh) + : "memory", "cc"); + + return int128_make128(oldl, oldh); +} +# define HAVE_CMPXCHG128 1 +#else +/* Fallback definition that must be optimized away, or error. */ +Int128 __attribute__((error("unsupported atomic"))) + atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new); +# define HAVE_CMPXCHG128 0 +#endif /* Some definition for HAVE_CMPXCHG128 */ + + +#if defined(CONFIG_ATOMIC128) +static inline Int128 atomic16_read(Int128 *ptr) +{ + return atomic_read__nocheck(ptr); +} + +static inline void atomic16_set(Int128 *ptr, Int128 val) +{ + atomic_set__nocheck(ptr, val); +} + +# define HAVE_ATOMIC128 1 +#elif !defined(CONFIG_USER_ONLY) && defined(__aarch64__) +/* We can do better than cmpxchg for AArch64. */ +static inline Int128 atomic16_read(Int128 *ptr) +{ + uint64_t l, h; + uint32_t tmp; + + /* The load must be paired with the store to guarantee not tearing. */ + asm("0: ldxp %[l], %[h], %[mem]\n\t" + "stxp %w[tmp], %[l], %[h], %[mem]\n\t" + "cbnz %w[tmp], 0b" + : [mem] "+m"(*ptr), [tmp] "=3Dr"(tmp), [l] "=3Dr"(l), [h] "=3Dr"(h= )); + + return int128_make128(l, h); +} + +static inline void atomic16_set(Int128 *ptr, Int128 val) +{ + uint64_t l =3D int128_getlo(val), h =3D int128_gethi(val); + uint64_t t1, t2; + + /* Load into temporaries to acquire the exclusive access lock. */ + asm("0: ldxp %[t1], %[t2], %[mem]\n\t" + "stxp %w[t1], %[l], %[h], %[mem]\n\t" + "cbnz %w[t1], 0b" + : [mem] "+m"(*ptr), [t1] "=3D&r"(t1), [t2] "=3D&r"(t2) + : [l] "r"(l), [h] "r"(h)); +} + +# define HAVE_ATOMIC128 1 +#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128 +static inline Int128 atomic16_read(Int128 *ptr) +{ + /* Maybe replace 0 with 0, returning the old value. */ + return atomic16_cmpxchg(ptr, 0, 0); +} + +static inline void atomic16_set(Int128 *ptr, Int128 val) +{ + Int128 old =3D *ptr, cmp; + do { + cmp =3D old; + old =3D atomic16_cmpxchg(ptr, cmp, val); + } while (old !=3D cmp); +} + +# define HAVE_ATOMIC128 1 +#else +/* Fallback definitions that must be optimized away, or error. */ +Int128 __attribute__((error("unsupported atomic"))) + atomic16_read(Int128 *ptr); +void __attribute__((error("unsupported atomic"))) + atomic16_set(Int128 *ptr, Int128 val); +# define HAVE_ATOMIC128 0 +#endif /* Some definition for HAVE_ATOMIC128 */ + +#endif /* QEMU_ATOMIC128_H */ diff --git a/tcg/tcg.h b/tcg/tcg.h index c59f254e27..f4efbaa680 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -32,6 +32,7 @@ #include "qemu/queue.h" #include "tcg-mo.h" #include "tcg-target.h" +#include "qemu/int128.h" =20 /* XXX: make safe guess about sizes */ #define MAX_OP_PER_INSTR 266 @@ -1456,11 +1457,14 @@ GEN_ATOMIC_HELPER_ALL(xchg) #undef GEN_ATOMIC_HELPER #endif /* CONFIG_SOFTMMU */ =20 -#ifdef CONFIG_ATOMIC128 -#include "qemu/int128.h" - -/* These aren't really a "proper" helpers because TCG cannot manage Int128. - However, use the same format as the others, for use by the backends. */ +/* + * These aren't really a "proper" helpers because TCG cannot manage Int128. + * However, use the same format as the others, for use by the backends. + * + * The cmpxchg functions are only defined if HAVE_CMPXCHG128; + * the ld/st functions are only defined if HAVE_ATOMIC128, + * as defined by . + */ Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr, Int128 cmpv, Int128 newv, TCGMemOpIdx oi, uintptr_t retaddr); @@ -1477,6 +1481,4 @@ void helper_atomic_sto_le_mmu(CPUArchState *env, targ= et_ulong addr, Int128 val, void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128= val, TCGMemOpIdx oi, uintptr_t retaddr); =20 -#endif /* CONFIG_ATOMIC128 */ - #endif /* TCG_H */ diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index e4993d72fb..28b770a404 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -32,6 +32,7 @@ #include "exec/log.h" #include "exec/helper-proto.h" #include "qemu/atomic.h" +#include "qemu/atomic128.h" =20 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ /* #define DEBUG_TLB */ @@ -1112,7 +1113,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, tar= get_ulong addr, #include "atomic_template.h" #endif =20 -#ifdef CONFIG_ATOMIC128 +#if HAVE_CMPXCHG128 || HAVE_ATOMIC128 #define DATA_SIZE 16 #include "atomic_template.h" #endif diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index 26a3ffbba1..cd75829cf2 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -25,6 +25,7 @@ #include "exec/cpu_ldst.h" #include "translate-all.h" #include "exec/helper-proto.h" +#include "qemu/atomic128.h" =20 #undef EAX #undef ECX @@ -615,7 +616,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, targe= t_ulong addr, /* The following is only callable from other helpers, and matches up with the softmmu version. */ =20 -#ifdef CONFIG_ATOMIC128 +#if HAVE_ATOMIC128 || HAVE_CMPXCHG128 =20 #undef EXTRA_ARGS #undef ATOMIC_NAME @@ -628,4 +629,4 @@ static void *atomic_mmu_lookup(CPUArchState *env, targe= t_ulong addr, =20 #define DATA_SIZE 16 #include "atomic_template.h" -#endif /* CONFIG_ATOMIC128 */ +#endif diff --git a/configure b/configure index 8af2be959f..03bf719ca7 100755 --- a/configure +++ b/configure @@ -5160,6 +5160,21 @@ EOF fi fi =20 +cmpxchg128=3Dno +if test "$int128" =3D yes -a "$atomic128" =3D no; then + cat > $TMPC << EOF +int main(void) +{ + unsigned __int128 x =3D 0, y =3D 0; + __sync_val_compare_and_swap_16(&x, y, x); + return 0; +} +EOF + if compile_prog "" "" ; then + cmpxchg128=3Dyes + fi +fi + ######################################### # See if 64-bit atomic operations are supported. # Note that without __atomic builtins, we can only @@ -6669,6 +6684,10 @@ if test "$atomic128" =3D "yes" ; then echo "CONFIG_ATOMIC128=3Dy" >> $config_host_mak fi =20 +if test "$cmpxchg128" =3D "yes" ; then + echo "CONFIG_CMPXCHG128=3Dy" >> $config_host_mak +fi + if test "$atomic64" =3D "yes" ; then echo "CONFIG_ATOMIC64=3Dy" >> $config_host_mak fi --=20 2.17.2