CONFIG_ATOMIC64 is a configuration knob for 32-bit hosts.
This allows removal of functions like load_atomic8_or_exit
and simplification of load_atom_extract_al8_or_exit to
load_atom_extract_al8.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/cputlb.c | 35 +-------
accel/tcg/ldst_atomicity.c.inc | 149 +++++----------------------------
2 files changed, 24 insertions(+), 160 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index a6774083b0..6900a12682 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -2080,25 +2080,6 @@ static uint64_t do_ld_parts_beN(MMULookupPageData *p, uint64_t ret_be)
return ret_be;
}
-/**
- * do_ld_parts_be4
- * @p: translation parameters
- * @ret_be: accumulated data
- *
- * As do_ld_bytes_beN, but with one atomic load.
- * Four aligned bytes are guaranteed to cover the load.
- */
-static uint64_t do_ld_whole_be4(MMULookupPageData *p, uint64_t ret_be)
-{
- int o = p->addr & 3;
- uint32_t x = load_atomic4(p->haddr - o);
-
- x = cpu_to_be32(x);
- x <<= o * 8;
- x >>= (4 - p->size) * 8;
- return (ret_be << (p->size * 8)) | x;
-}
-
/**
* do_ld_parts_be8
* @p: translation parameters
@@ -2111,7 +2092,7 @@ static uint64_t do_ld_whole_be8(CPUState *cpu, uintptr_t ra,
MMULookupPageData *p, uint64_t ret_be)
{
int o = p->addr & 7;
- uint64_t x = load_atomic8_or_exit(cpu, ra, p->haddr - o);
+ uint64_t x = load_atomic8(p->haddr - o);
x = cpu_to_be64(x);
x <<= o * 8;
@@ -2176,11 +2157,7 @@ static uint64_t do_ld_beN(CPUState *cpu, MMULookupPageData *p,
if (atom == MO_ATOM_IFALIGN_PAIR
? p->size == half_size
: p->size >= half_size) {
- if (!HAVE_al8_fast && p->size < 4) {
- return do_ld_whole_be4(p, ret_be);
- } else {
- return do_ld_whole_be8(cpu, ra, p, ret_be);
- }
+ return do_ld_whole_be8(cpu, ra, p, ret_be);
}
/* fall through */
@@ -2586,13 +2563,7 @@ static uint64_t do_st_leN(CPUState *cpu, MMULookupPageData *p,
if (atom == MO_ATOM_IFALIGN_PAIR
? p->size == half_size
: p->size >= half_size) {
- if (!HAVE_al8_fast && p->size <= 4) {
- return store_whole_le4(p->haddr, p->size, val_le);
- } else if (HAVE_al8) {
- return store_whole_le8(p->haddr, p->size, val_le);
- } else {
- cpu_loop_exit_atomic(cpu, ra);
- }
+ return store_whole_le8(p->haddr, p->size, val_le);
}
/* fall through */
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
index c735add261..f5b8289009 100644
--- a/accel/tcg/ldst_atomicity.c.inc
+++ b/accel/tcg/ldst_atomicity.c.inc
@@ -12,13 +12,6 @@
#include "host/load-extract-al16-al8.h.inc"
#include "host/store-insert-al16.h.inc"
-#ifdef CONFIG_ATOMIC64
-# define HAVE_al8 true
-#else
-# define HAVE_al8 false
-#endif
-#define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8)
-
/**
* required_atomicity:
*
@@ -132,44 +125,7 @@ static inline uint32_t load_atomic4(void *pv)
static inline uint64_t load_atomic8(void *pv)
{
uint64_t *p = __builtin_assume_aligned(pv, 8);
-
- qemu_build_assert(HAVE_al8);
- return qatomic_read__nocheck(p);
-}
-
-/**
- * load_atomic8_or_exit:
- * @cpu: generic cpu state
- * @ra: host unwind address
- * @pv: host address
- *
- * Atomically load 8 aligned bytes from @pv.
- * If this is not possible, longjmp out to restart serially.
- */
-static uint64_t load_atomic8_or_exit(CPUState *cpu, uintptr_t ra, void *pv)
-{
- if (HAVE_al8) {
- return load_atomic8(pv);
- }
-
-#ifdef CONFIG_USER_ONLY
- /*
- * If the page is not writable, then assume the value is immutable
- * and requires no locking. This ignores the case of MAP_SHARED with
- * another process, because the fallback start_exclusive solution
- * provides no protection across processes.
- */
- WITH_MMAP_LOCK_GUARD() {
- if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) {
- uint64_t *p = __builtin_assume_aligned(pv, 8);
- return *p;
- }
- }
-#endif
-
- /* Ultimate fallback: re-execute in serial context. */
- trace_load_atom8_or_exit_fallback(ra);
- cpu_loop_exit_atomic(cpu, ra);
+ return qatomic_read(p);
}
/**
@@ -264,9 +220,7 @@ static uint64_t load_atom_extract_al8x2(void *pv)
}
/**
- * load_atom_extract_al8_or_exit:
- * @cpu: generic cpu state
- * @ra: host unwind address
+ * load_atom_extract_al8
* @pv: host address
* @s: object size in bytes, @s <= 4.
*
@@ -275,15 +229,14 @@ static uint64_t load_atom_extract_al8x2(void *pv)
* 8-byte load and extract.
* The value is returned in the low bits of a uint32_t.
*/
-static uint32_t load_atom_extract_al8_or_exit(CPUState *cpu, uintptr_t ra,
- void *pv, int s)
+static uint32_t load_atom_extract_al8(void *pv, int s)
{
uintptr_t pi = (uintptr_t)pv;
int o = pi & 7;
int shr = (HOST_BIG_ENDIAN ? 8 - s - o : o) * 8;
pv = (void *)(pi & ~7);
- return load_atomic8_or_exit(cpu, ra, pv) >> shr;
+ return load_atomic8(pv) >> shr;
}
/**
@@ -297,7 +250,7 @@ static uint32_t load_atom_extract_al8_or_exit(CPUState *cpu, uintptr_t ra,
* and p % 16 + s > 8. I.e. does not cross a 16-byte
* boundary, but *does* cross an 8-byte boundary.
* This is the slow version, so we must have eliminated
- * any faster load_atom_extract_al8_or_exit case.
+ * any faster load_atom_extract_al8 case.
*
* If this is not possible, longjmp out to restart serially.
*/
@@ -374,21 +327,6 @@ static inline uint64_t load_atom_8_by_4(void *pv)
}
}
-/**
- * load_atom_8_by_8_or_4:
- * @pv: host address
- *
- * Load 8 bytes from aligned @pv, with at least 4-byte atomicity.
- */
-static inline uint64_t load_atom_8_by_8_or_4(void *pv)
-{
- if (HAVE_al8_fast) {
- return load_atomic8(pv);
- } else {
- return load_atom_8_by_4(pv);
- }
-}
-
/**
* load_atom_2:
* @p: host address
@@ -418,12 +356,8 @@ static uint16_t load_atom_2(CPUState *cpu, uintptr_t ra,
return lduw_he_p(pv);
case MO_16:
/* The only case remaining is MO_ATOM_WITHIN16. */
- if (!HAVE_al8_fast && (pi & 3) == 1) {
- /* Big or little endian, we want the middle two bytes. */
- return load_atomic4(pv - 1) >> 8;
- }
if ((pi & 15) != 7) {
- return load_atom_extract_al8_or_exit(cpu, ra, pv, 2);
+ return load_atom_extract_al8(pv, 2);
}
return load_atom_extract_al16_or_exit(cpu, ra, pv, 2);
default:
@@ -468,7 +402,7 @@ static uint32_t load_atom_4(CPUState *cpu, uintptr_t ra,
return load_atom_extract_al4x2(pv);
case MO_32:
if (!(pi & 4)) {
- return load_atom_extract_al8_or_exit(cpu, ra, pv, 4);
+ return load_atom_extract_al8(pv, 4);
}
return load_atom_extract_al16_or_exit(cpu, ra, pv, 4);
default:
@@ -493,7 +427,7 @@ static uint64_t load_atom_8(CPUState *cpu, uintptr_t ra,
* If the host does not support 8-byte atomics, wait until we have
* examined the atomicity parameters below.
*/
- if (HAVE_al8 && likely((pi & 7) == 0)) {
+ if (likely((pi & 7) == 0)) {
return load_atomic8(pv);
}
if (HAVE_ATOMIC128_RO) {
@@ -502,30 +436,9 @@ static uint64_t load_atom_8(CPUState *cpu, uintptr_t ra,
atmax = required_atomicity(cpu, pi, memop);
if (atmax == MO_64) {
- if (!HAVE_al8 && (pi & 7) == 0) {
- load_atomic8_or_exit(cpu, ra, pv);
- }
return load_atom_extract_al16_or_exit(cpu, ra, pv, 8);
}
- if (HAVE_al8_fast) {
- return load_atom_extract_al8x2(pv);
- }
- switch (atmax) {
- case MO_8:
- return ldq_he_p(pv);
- case MO_16:
- return load_atom_8_by_2(pv);
- case MO_32:
- return load_atom_8_by_4(pv);
- case -MO_32:
- if (HAVE_al8) {
- return load_atom_extract_al8x2(pv);
- }
- trace_load_atom8_fallback(memop, ra);
- cpu_loop_exit_atomic(cpu, ra);
- default:
- g_assert_not_reached();
- }
+ return load_atom_extract_al8x2(pv);
}
/**
@@ -565,18 +478,10 @@ static Int128 load_atom_16(CPUState *cpu, uintptr_t ra,
b = load_atom_8_by_4(pv + 8);
break;
case MO_64:
- if (!HAVE_al8) {
- trace_load_atom16_fallback(memop, ra);
- cpu_loop_exit_atomic(cpu, ra);
- }
a = load_atomic8(pv);
b = load_atomic8(pv + 8);
break;
case -MO_64:
- if (!HAVE_al8) {
- trace_load_atom16_fallback(memop, ra);
- cpu_loop_exit_atomic(cpu, ra);
- }
a = load_atom_extract_al8x2(pv);
b = load_atom_extract_al8x2(pv + 8);
break;
@@ -624,9 +529,7 @@ static inline void store_atomic4(void *pv, uint32_t val)
static inline void store_atomic8(void *pv, uint64_t val)
{
uint64_t *p = __builtin_assume_aligned(pv, 8);
-
- qemu_build_assert(HAVE_al8);
- qatomic_set__nocheck(p, val);
+ qatomic_set(p, val);
}
/**
@@ -688,9 +591,8 @@ static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
{
uint64_t old, new;
- qemu_build_assert(HAVE_al8);
p = __builtin_assume_aligned(p, 8);
- old = qatomic_read__nocheck(p);
+ old = qatomic_read(p);
do {
new = (old & ~msk) | val;
} while (!__atomic_compare_exchange_n(p, &old, new, true,
@@ -802,7 +704,6 @@ static uint64_t store_whole_le8(void *pv, int size, uint64_t val_le)
uint64_t m = MAKE_64BIT_MASK(0, sz);
uint64_t v;
- qemu_build_assert(HAVE_al8);
if (HOST_BIG_ENDIAN) {
v = bswap64(val_le) >> sh;
m = bswap64(m) >> sh;
@@ -887,10 +788,8 @@ static void store_atom_2(CPUState *cpu, uintptr_t ra,
store_atom_insert_al4(pv - 1, (uint32_t)val << 8, MAKE_64BIT_MASK(8, 16));
return;
} else if ((pi & 7) == 3) {
- if (HAVE_al8) {
- store_atom_insert_al8(pv - 3, (uint64_t)val << 24, MAKE_64BIT_MASK(24, 16));
- return;
- }
+ store_atom_insert_al8(pv - 3, (uint64_t)val << 24, MAKE_64BIT_MASK(24, 16));
+ return;
} else if ((pi & 15) == 7) {
if (HAVE_CMPXCHG128) {
Int128 v = int128_lshift(int128_make64(val), 56);
@@ -957,10 +856,8 @@ static void store_atom_4(CPUState *cpu, uintptr_t ra,
return;
case MO_32:
if ((pi & 7) < 4) {
- if (HAVE_al8) {
- store_whole_le8(pv, 4, cpu_to_le32(val));
- return;
- }
+ store_whole_le8(pv, 4, cpu_to_le32(val));
+ return;
} else {
if (HAVE_CMPXCHG128) {
store_whole_le16(pv, 4, int128_make64(cpu_to_le32(val)));
@@ -988,7 +885,7 @@ static void store_atom_8(CPUState *cpu, uintptr_t ra,
uintptr_t pi = (uintptr_t)pv;
int atmax;
- if (HAVE_al8 && likely((pi & 7) == 0)) {
+ if (likely((pi & 7) == 0)) {
store_atomic8(pv, val);
return;
}
@@ -1005,7 +902,7 @@ static void store_atom_8(CPUState *cpu, uintptr_t ra,
store_atom_8_by_4(pv, val);
return;
case -MO_32:
- if (HAVE_al8) {
+ {
uint64_t val_le = cpu_to_le64(val);
int s2 = pi & 7;
int s1 = 8 - s2;
@@ -1024,9 +921,8 @@ static void store_atom_8(CPUState *cpu, uintptr_t ra,
default:
g_assert_not_reached();
}
- return;
}
- break;
+ return;
case MO_64:
if (HAVE_CMPXCHG128) {
store_whole_le16(pv, 8, int128_make64(cpu_to_le64(val)));
@@ -1077,12 +973,9 @@ static void store_atom_16(CPUState *cpu, uintptr_t ra,
store_atom_8_by_4(pv + 8, b);
return;
case MO_64:
- if (HAVE_al8) {
- store_atomic8(pv, a);
- store_atomic8(pv + 8, b);
- return;
- }
- break;
+ store_atomic8(pv, a);
+ store_atomic8(pv + 8, b);
+ return;
case -MO_64:
if (HAVE_CMPXCHG128) {
uint64_t val_le;
--
2.43.0