[v1] tcg patch queue

[PATCH 06/35] host/include/loongarch64: Add atomic16 load and store
Posted by Richard Henderson 1 year ago
While loongarch64 does not have a 128-bit cmpxchg, it does
have 128-bit atomic load and store via the vector unit.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20230916220151.526140-6-richard.henderson@linaro.org>
---
 .../include/loongarch64/host/atomic128-ldst.h | 52 +++++++++++++++++++
 .../loongarch64/host/load-extract-al16-al8.h  | 39 ++++++++++++++
 .../loongarch64/host/store-insert-al16.h      | 12 +++++
 3 files changed, 103 insertions(+)
 create mode 100644 host/include/loongarch64/host/atomic128-ldst.h
 create mode 100644 host/include/loongarch64/host/load-extract-al16-al8.h
 create mode 100644 host/include/loongarch64/host/store-insert-al16.h

diff --git a/host/include/loongarch64/host/atomic128-ldst.h b/host/include/loongarch64/host/atomic128-ldst.h
new file mode 100644
index 0000000000..9a4a8f8b9e
--- /dev/null
+++ b/host/include/loongarch64/host/atomic128-ldst.h
@@ -0,0 +1,52 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Load/store for 128-bit atomic operations, LoongArch version.
+ *
+ * See docs/devel/atomics.rst for discussion about the guarantees each
+ * atomic primitive is meant to provide.
+ */
+
+#ifndef LOONGARCH_ATOMIC128_LDST_H
+#define LOONGARCH_ATOMIC128_LDST_H
+
+#include "host/cpuinfo.h"
+#include "tcg/debug-assert.h"
+
+#define HAVE_ATOMIC128_RO  likely(cpuinfo & CPUINFO_LSX)
+#define HAVE_ATOMIC128_RW  HAVE_ATOMIC128_RO
+
+/*
+ * As of gcc 13 and clang 16, there is no compiler support for LSX at all.
+ * Use inline assembly throughout.
+ */
+
+static inline Int128 atomic16_read_ro(const Int128 *ptr)
+{
+    uint64_t l, h;
+
+    tcg_debug_assert(HAVE_ATOMIC128_RO);
+    asm("vld $vr0, %2, 0\n\t"
+        "vpickve2gr.d %0, $vr0, 0\n\t"
+        "vpickve2gr.d %1, $vr0, 1"
+	: "=r"(l), "=r"(h) : "r"(ptr), "m"(*ptr) : "f0");
+
+    return int128_make128(l, h);
+}
+
+static inline Int128 atomic16_read_rw(Int128 *ptr)
+{
+    return atomic16_read_ro(ptr);
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    uint64_t l = int128_getlo(val), h = int128_gethi(val);
+
+    tcg_debug_assert(HAVE_ATOMIC128_RW);
+    asm("vinsgr2vr.d $vr0, %1, 0\n\t"
+        "vinsgr2vr.d $vr0, %2, 1\n\t"
+        "vst $vr0, %3, 0"
+	: "=m"(*ptr) : "r"(l), "r"(h), "r"(ptr) : "f0");
+}
+
+#endif /* LOONGARCH_ATOMIC128_LDST_H */
diff --git a/host/include/loongarch64/host/load-extract-al16-al8.h b/host/include/loongarch64/host/load-extract-al16-al8.h
new file mode 100644
index 0000000000..d1fb59d8af
--- /dev/null
+++ b/host/include/loongarch64/host/load-extract-al16-al8.h
@@ -0,0 +1,39 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Atomic extract 64 from 128-bit, LoongArch version.
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ */
+
+#ifndef LOONGARCH_LOAD_EXTRACT_AL16_AL8_H
+#define LOONGARCH_LOAD_EXTRACT_AL16_AL8_H
+
+#include "host/cpuinfo.h"
+#include "tcg/debug-assert.h"
+
+/**
+ * load_atom_extract_al16_or_al8:
+ * @pv: host address
+ * @s: object size in bytes, @s <= 8.
+ *
+ * Load @s bytes from @pv, when pv % s != 0.  If [p, p+s-1] does not
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
+ * otherwise the access must be 8-byte atomic.
+ */
+static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
+{
+    uintptr_t pi = (uintptr_t)pv;
+    Int128 *ptr_align = (Int128 *)(pi & ~7);
+    int shr = (pi & 7) * 8;
+    uint64_t l, h;
+
+    tcg_debug_assert(HAVE_ATOMIC128_RO);
+    asm("vld $vr0, %2, 0\n\t"
+        "vpickve2gr.d %0, $vr0, 0\n\t"
+        "vpickve2gr.d %1, $vr0, 1"
+	: "=r"(l), "=r"(h) : "r"(ptr_align), "m"(*ptr_align) : "f0");
+
+    return (l >> shr) | (h << (-shr & 63));
+}
+
+#endif /* LOONGARCH_LOAD_EXTRACT_AL16_AL8_H */
diff --git a/host/include/loongarch64/host/store-insert-al16.h b/host/include/loongarch64/host/store-insert-al16.h
new file mode 100644
index 0000000000..919fd8d744
--- /dev/null
+++ b/host/include/loongarch64/host/store-insert-al16.h
@@ -0,0 +1,12 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Atomic store insert into 128-bit, LoongArch version.
+ */
+
+#ifndef LOONGARCH_STORE_INSERT_AL16_H
+#define LOONGARCH_STORE_INSERT_AL16_H
+
+void store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk)
+    QEMU_ERROR("unsupported atomic");
+
+#endif /* LOONGARCH_STORE_INSERT_AL16_H */
-- 
2.34.1