[PATCH 37/50] accel/tcg: Drop CONFIG_ATOMIC64 checks from ldst_atomicicy.c.inc

Richard Henderson posted 50 patches 2 days, 5 hours ago
Maintainers: "Alex Bennée" <alex.bennee@linaro.org>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Thomas Huth <thuth@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, Fabiano Rosas <farosas@suse.de>, Laurent Vivier <lvivier@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Riku Voipio <riku.voipio@iki.fi>, Stefan Hajnoczi <stefanha@redhat.com>, Fam Zheng <fam@euphon.net>, Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>, Warner Losh <imp@bsdimp.com>, Kyle Evans <kevans@freebsd.org>, "Daniel P. Berrangé" <berrange@redhat.com>, Stefano Stabellini <sstabellini@kernel.org>, Anthony PERARD <anthony@xenproject.org>, Paul Durrant <paul@xen.org>, "Edgar E. Iglesias" <edgar.iglesias@gmail.com>, "Michael S. Tsirkin" <mst@redhat.com>, David Hildenbrand <david@kernel.org>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, Peter Xu <peterx@redhat.com>, Li Zhijian <lizhijian@fujitsu.com>, Hyman Huang <yong.huang@smartx.com>, Peter Maydell <peter.maydell@linaro.org>, Helge Deller <deller@gmx.de>, Zhao Liu <zhao1.liu@intel.com>, Eduardo Habkost <eduardo@habkost.net>, Palmer Dabbelt <palmer@dabbelt.com>, Alistair Francis <alistair.francis@wdc.com>, Weiwei Li <liwei1518@gmail.com>, Daniel Henrique Barboza <dbarboza@ventanamicro.com>, Liu Zhiwei <zhiwei_liu@linux.alibaba.com>, Ilya Leoshkevich <iii@linux.ibm.com>, WANG Xuerui <git@xen0n.name>, Stefan Weil <sw@weilnetz.de>
[PATCH 37/50] accel/tcg: Drop CONFIG_ATOMIC64 checks from ldst_atomicicy.c.inc
Posted by Richard Henderson 2 days, 5 hours ago
CONFIG_ATOMIC64 is a configuration knob for 32-bit hosts.

This allows removal of functions like load_atomic8_or_exit
and simplification of load_atom_extract_al8_or_exit to
load_atom_extract_al8.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c             |  35 +-------
 accel/tcg/ldst_atomicity.c.inc | 149 +++++----------------------------
 2 files changed, 24 insertions(+), 160 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index a6774083b0..6900a12682 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -2080,25 +2080,6 @@ static uint64_t do_ld_parts_beN(MMULookupPageData *p, uint64_t ret_be)
     return ret_be;
 }
 
-/**
- * do_ld_parts_be4
- * @p: translation parameters
- * @ret_be: accumulated data
- *
- * As do_ld_bytes_beN, but with one atomic load.
- * Four aligned bytes are guaranteed to cover the load.
- */
-static uint64_t do_ld_whole_be4(MMULookupPageData *p, uint64_t ret_be)
-{
-    int o = p->addr & 3;
-    uint32_t x = load_atomic4(p->haddr - o);
-
-    x = cpu_to_be32(x);
-    x <<= o * 8;
-    x >>= (4 - p->size) * 8;
-    return (ret_be << (p->size * 8)) | x;
-}
-
 /**
  * do_ld_parts_be8
  * @p: translation parameters
@@ -2111,7 +2092,7 @@ static uint64_t do_ld_whole_be8(CPUState *cpu, uintptr_t ra,
                                 MMULookupPageData *p, uint64_t ret_be)
 {
     int o = p->addr & 7;
-    uint64_t x = load_atomic8_or_exit(cpu, ra, p->haddr - o);
+    uint64_t x = load_atomic8(p->haddr - o);
 
     x = cpu_to_be64(x);
     x <<= o * 8;
@@ -2176,11 +2157,7 @@ static uint64_t do_ld_beN(CPUState *cpu, MMULookupPageData *p,
         if (atom == MO_ATOM_IFALIGN_PAIR
             ? p->size == half_size
             : p->size >= half_size) {
-            if (!HAVE_al8_fast && p->size < 4) {
-                return do_ld_whole_be4(p, ret_be);
-            } else {
-                return do_ld_whole_be8(cpu, ra, p, ret_be);
-            }
+            return do_ld_whole_be8(cpu, ra, p, ret_be);
         }
         /* fall through */
 
@@ -2586,13 +2563,7 @@ static uint64_t do_st_leN(CPUState *cpu, MMULookupPageData *p,
         if (atom == MO_ATOM_IFALIGN_PAIR
             ? p->size == half_size
             : p->size >= half_size) {
-            if (!HAVE_al8_fast && p->size <= 4) {
-                return store_whole_le4(p->haddr, p->size, val_le);
-            } else if (HAVE_al8) {
-                return store_whole_le8(p->haddr, p->size, val_le);
-            } else {
-                cpu_loop_exit_atomic(cpu, ra);
-            }
+            return store_whole_le8(p->haddr, p->size, val_le);
         }
         /* fall through */
 
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
index c735add261..f5b8289009 100644
--- a/accel/tcg/ldst_atomicity.c.inc
+++ b/accel/tcg/ldst_atomicity.c.inc
@@ -12,13 +12,6 @@
 #include "host/load-extract-al16-al8.h.inc"
 #include "host/store-insert-al16.h.inc"
 
-#ifdef CONFIG_ATOMIC64
-# define HAVE_al8          true
-#else
-# define HAVE_al8          false
-#endif
-#define HAVE_al8_fast      (ATOMIC_REG_SIZE >= 8)
-
 /**
  * required_atomicity:
  *
@@ -132,44 +125,7 @@ static inline uint32_t load_atomic4(void *pv)
 static inline uint64_t load_atomic8(void *pv)
 {
     uint64_t *p = __builtin_assume_aligned(pv, 8);
-
-    qemu_build_assert(HAVE_al8);
-    return qatomic_read__nocheck(p);
-}
-
-/**
- * load_atomic8_or_exit:
- * @cpu: generic cpu state
- * @ra: host unwind address
- * @pv: host address
- *
- * Atomically load 8 aligned bytes from @pv.
- * If this is not possible, longjmp out to restart serially.
- */
-static uint64_t load_atomic8_or_exit(CPUState *cpu, uintptr_t ra, void *pv)
-{
-    if (HAVE_al8) {
-        return load_atomic8(pv);
-    }
-
-#ifdef CONFIG_USER_ONLY
-    /*
-     * If the page is not writable, then assume the value is immutable
-     * and requires no locking.  This ignores the case of MAP_SHARED with
-     * another process, because the fallback start_exclusive solution
-     * provides no protection across processes.
-     */
-    WITH_MMAP_LOCK_GUARD() {
-        if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) {
-            uint64_t *p = __builtin_assume_aligned(pv, 8);
-            return *p;
-        }
-    }
-#endif
-
-    /* Ultimate fallback: re-execute in serial context. */
-    trace_load_atom8_or_exit_fallback(ra);
-    cpu_loop_exit_atomic(cpu, ra);
+    return qatomic_read(p);
 }
 
 /**
@@ -264,9 +220,7 @@ static uint64_t load_atom_extract_al8x2(void *pv)
 }
 
 /**
- * load_atom_extract_al8_or_exit:
- * @cpu: generic cpu state
- * @ra: host unwind address
+ * load_atom_extract_al8
  * @pv: host address
  * @s: object size in bytes, @s <= 4.
  *
@@ -275,15 +229,14 @@ static uint64_t load_atom_extract_al8x2(void *pv)
  * 8-byte load and extract.
  * The value is returned in the low bits of a uint32_t.
  */
-static uint32_t load_atom_extract_al8_or_exit(CPUState *cpu, uintptr_t ra,
-                                              void *pv, int s)
+static uint32_t load_atom_extract_al8(void *pv, int s)
 {
     uintptr_t pi = (uintptr_t)pv;
     int o = pi & 7;
     int shr = (HOST_BIG_ENDIAN ? 8 - s - o : o) * 8;
 
     pv = (void *)(pi & ~7);
-    return load_atomic8_or_exit(cpu, ra, pv) >> shr;
+    return load_atomic8(pv) >> shr;
 }
 
 /**
@@ -297,7 +250,7 @@ static uint32_t load_atom_extract_al8_or_exit(CPUState *cpu, uintptr_t ra,
  * and p % 16 + s > 8.  I.e. does not cross a 16-byte
  * boundary, but *does* cross an 8-byte boundary.
  * This is the slow version, so we must have eliminated
- * any faster load_atom_extract_al8_or_exit case.
+ * any faster load_atom_extract_al8 case.
  *
  * If this is not possible, longjmp out to restart serially.
  */
@@ -374,21 +327,6 @@ static inline uint64_t load_atom_8_by_4(void *pv)
     }
 }
 
-/**
- * load_atom_8_by_8_or_4:
- * @pv: host address
- *
- * Load 8 bytes from aligned @pv, with at least 4-byte atomicity.
- */
-static inline uint64_t load_atom_8_by_8_or_4(void *pv)
-{
-    if (HAVE_al8_fast) {
-        return load_atomic8(pv);
-    } else {
-        return load_atom_8_by_4(pv);
-    }
-}
-
 /**
  * load_atom_2:
  * @p: host address
@@ -418,12 +356,8 @@ static uint16_t load_atom_2(CPUState *cpu, uintptr_t ra,
         return lduw_he_p(pv);
     case MO_16:
         /* The only case remaining is MO_ATOM_WITHIN16. */
-        if (!HAVE_al8_fast && (pi & 3) == 1) {
-            /* Big or little endian, we want the middle two bytes. */
-            return load_atomic4(pv - 1) >> 8;
-        }
         if ((pi & 15) != 7) {
-            return load_atom_extract_al8_or_exit(cpu, ra, pv, 2);
+            return load_atom_extract_al8(pv, 2);
         }
         return load_atom_extract_al16_or_exit(cpu, ra, pv, 2);
     default:
@@ -468,7 +402,7 @@ static uint32_t load_atom_4(CPUState *cpu, uintptr_t ra,
         return load_atom_extract_al4x2(pv);
     case MO_32:
         if (!(pi & 4)) {
-            return load_atom_extract_al8_or_exit(cpu, ra, pv, 4);
+            return load_atom_extract_al8(pv, 4);
         }
         return load_atom_extract_al16_or_exit(cpu, ra, pv, 4);
     default:
@@ -493,7 +427,7 @@ static uint64_t load_atom_8(CPUState *cpu, uintptr_t ra,
      * If the host does not support 8-byte atomics, wait until we have
      * examined the atomicity parameters below.
      */
-    if (HAVE_al8 && likely((pi & 7) == 0)) {
+    if (likely((pi & 7) == 0)) {
         return load_atomic8(pv);
     }
     if (HAVE_ATOMIC128_RO) {
@@ -502,30 +436,9 @@ static uint64_t load_atom_8(CPUState *cpu, uintptr_t ra,
 
     atmax = required_atomicity(cpu, pi, memop);
     if (atmax == MO_64) {
-        if (!HAVE_al8 && (pi & 7) == 0) {
-            load_atomic8_or_exit(cpu, ra, pv);
-        }
         return load_atom_extract_al16_or_exit(cpu, ra, pv, 8);
     }
-    if (HAVE_al8_fast) {
-        return load_atom_extract_al8x2(pv);
-    }
-    switch (atmax) {
-    case MO_8:
-        return ldq_he_p(pv);
-    case MO_16:
-        return load_atom_8_by_2(pv);
-    case MO_32:
-        return load_atom_8_by_4(pv);
-    case -MO_32:
-        if (HAVE_al8) {
-            return load_atom_extract_al8x2(pv);
-        }
-        trace_load_atom8_fallback(memop, ra);
-        cpu_loop_exit_atomic(cpu, ra);
-    default:
-        g_assert_not_reached();
-    }
+    return load_atom_extract_al8x2(pv);
 }
 
 /**
@@ -565,18 +478,10 @@ static Int128 load_atom_16(CPUState *cpu, uintptr_t ra,
         b = load_atom_8_by_4(pv + 8);
         break;
     case MO_64:
-        if (!HAVE_al8) {
-            trace_load_atom16_fallback(memop, ra);
-            cpu_loop_exit_atomic(cpu, ra);
-        }
         a = load_atomic8(pv);
         b = load_atomic8(pv + 8);
         break;
     case -MO_64:
-        if (!HAVE_al8) {
-            trace_load_atom16_fallback(memop, ra);
-            cpu_loop_exit_atomic(cpu, ra);
-        }
         a = load_atom_extract_al8x2(pv);
         b = load_atom_extract_al8x2(pv + 8);
         break;
@@ -624,9 +529,7 @@ static inline void store_atomic4(void *pv, uint32_t val)
 static inline void store_atomic8(void *pv, uint64_t val)
 {
     uint64_t *p = __builtin_assume_aligned(pv, 8);
-
-    qemu_build_assert(HAVE_al8);
-    qatomic_set__nocheck(p, val);
+    qatomic_set(p, val);
 }
 
 /**
@@ -688,9 +591,8 @@ static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
 {
     uint64_t old, new;
 
-    qemu_build_assert(HAVE_al8);
     p = __builtin_assume_aligned(p, 8);
-    old = qatomic_read__nocheck(p);
+    old = qatomic_read(p);
     do {
         new = (old & ~msk) | val;
     } while (!__atomic_compare_exchange_n(p, &old, new, true,
@@ -802,7 +704,6 @@ static uint64_t store_whole_le8(void *pv, int size, uint64_t val_le)
     uint64_t m = MAKE_64BIT_MASK(0, sz);
     uint64_t v;
 
-    qemu_build_assert(HAVE_al8);
     if (HOST_BIG_ENDIAN) {
         v = bswap64(val_le) >> sh;
         m = bswap64(m) >> sh;
@@ -887,10 +788,8 @@ static void store_atom_2(CPUState *cpu, uintptr_t ra,
         store_atom_insert_al4(pv - 1, (uint32_t)val << 8, MAKE_64BIT_MASK(8, 16));
         return;
     } else if ((pi & 7) == 3) {
-        if (HAVE_al8) {
-            store_atom_insert_al8(pv - 3, (uint64_t)val << 24, MAKE_64BIT_MASK(24, 16));
-            return;
-        }
+        store_atom_insert_al8(pv - 3, (uint64_t)val << 24, MAKE_64BIT_MASK(24, 16));
+        return;
     } else if ((pi & 15) == 7) {
         if (HAVE_CMPXCHG128) {
             Int128 v = int128_lshift(int128_make64(val), 56);
@@ -957,10 +856,8 @@ static void store_atom_4(CPUState *cpu, uintptr_t ra,
         return;
     case MO_32:
         if ((pi & 7) < 4) {
-            if (HAVE_al8) {
-                store_whole_le8(pv, 4, cpu_to_le32(val));
-                return;
-            }
+            store_whole_le8(pv, 4, cpu_to_le32(val));
+            return;
         } else {
             if (HAVE_CMPXCHG128) {
                 store_whole_le16(pv, 4, int128_make64(cpu_to_le32(val)));
@@ -988,7 +885,7 @@ static void store_atom_8(CPUState *cpu, uintptr_t ra,
     uintptr_t pi = (uintptr_t)pv;
     int atmax;
 
-    if (HAVE_al8 && likely((pi & 7) == 0)) {
+    if (likely((pi & 7) == 0)) {
         store_atomic8(pv, val);
         return;
     }
@@ -1005,7 +902,7 @@ static void store_atom_8(CPUState *cpu, uintptr_t ra,
         store_atom_8_by_4(pv, val);
         return;
     case -MO_32:
-        if (HAVE_al8) {
+        {
             uint64_t val_le = cpu_to_le64(val);
             int s2 = pi & 7;
             int s1 = 8 - s2;
@@ -1024,9 +921,8 @@ static void store_atom_8(CPUState *cpu, uintptr_t ra,
             default:
                 g_assert_not_reached();
             }
-            return;
         }
-        break;
+        return;
     case MO_64:
         if (HAVE_CMPXCHG128) {
             store_whole_le16(pv, 8, int128_make64(cpu_to_le64(val)));
@@ -1077,12 +973,9 @@ static void store_atom_16(CPUState *cpu, uintptr_t ra,
         store_atom_8_by_4(pv + 8, b);
         return;
     case MO_64:
-        if (HAVE_al8) {
-            store_atomic8(pv, a);
-            store_atomic8(pv + 8, b);
-            return;
-        }
-        break;
+        store_atomic8(pv, a);
+        store_atomic8(pv + 8, b);
+        return;
     case -MO_64:
         if (HAVE_CMPXCHG128) {
             uint64_t val_le;
-- 
2.43.0
Re: [PATCH 37/50] accel/tcg: Drop CONFIG_ATOMIC64 checks from ldst_atomicicy.c.inc
Posted by Pierrick Bouvier 1 day, 13 hours ago
On 1/7/26 9:30 PM, Richard Henderson wrote:
> CONFIG_ATOMIC64 is a configuration knob for 32-bit hosts.
> 
> This allows removal of functions like load_atomic8_or_exit
> and simplification of load_atom_extract_al8_or_exit to
> load_atom_extract_al8.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   accel/tcg/cputlb.c             |  35 +-------
>   accel/tcg/ldst_atomicity.c.inc | 149 +++++----------------------------
>   2 files changed, 24 insertions(+), 160 deletions(-)
> 

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>