1
The following changes since commit 9e5319ca52a5b9e84d55ad9c36e2c0b317a122bb:
1
The following changes since commit 7fe6cb68117ac856e03c93d18aca09de015392b0:
2
2
3
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging (2019-10-04 18:32:34 +0100)
3
Merge tag 'pull-target-arm-20230530-1' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2023-05-30 08:02:05 -0700)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20191013
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230530
8
8
9
for you to fetch changes up to d2f86bba6931388e275e8eb4ccd1dbcc7cae6328:
9
for you to fetch changes up to 276d77de503e8f5f5cbd3f7d94302ca12d1d982e:
10
10
11
cpus: kick all vCPUs when running thread=single (2019-10-07 14:08:58 -0400)
11
tests/decode: Add tests for various named-field cases (2023-05-30 10:55:39 -0700)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Host vector support for tcg/ppc.
14
Improvements to 128-bit atomics:
15
Fix thread=single cpu kicking.
15
- Separate __int128_t type and arithmetic detection
16
- Support 128-bit load/store in backend for i386, aarch64, ppc64, s390x
17
- Accelerate atomics via host/include/
18
Decodetree:
19
- Add named field syntax
20
- Move tests to meson
16
21
17
----------------------------------------------------------------
22
----------------------------------------------------------------
18
Alex Bennée (1):
23
Peter Maydell (5):
19
cpus: kick all vCPUs when running thread=single
24
docs: Document decodetree named field syntax
25
scripts/decodetree: Pass lvalue-formatter function to str_extract()
26
scripts/decodetree: Implement a topological sort
27
scripts/decodetree: Implement named field support
28
tests/decode: Add tests for various named-field cases
20
29
21
Richard Henderson (22):
30
Richard Henderson (22):
22
tcg/ppc: Introduce Altivec registers
31
tcg: Fix register move type in tcg_out_ld_helper_ret
23
tcg/ppc: Introduce macro VX4()
32
accel/tcg: Fix check for page writeability in load_atomic16_or_exit
24
tcg/ppc: Introduce macros VRT(), VRA(), VRB(), VRC()
33
meson: Split test for __int128_t type from __int128_t arithmetic
25
tcg/ppc: Create TCGPowerISA and have_isa
34
qemu/atomic128: Add x86_64 atomic128-ldst.h
26
tcg/ppc: Replace HAVE_ISA_2_06
35
tcg/i386: Support 128-bit load/store
27
tcg/ppc: Replace HAVE_ISEL macro with a variable
36
tcg/aarch64: Rename temporaries
28
tcg/ppc: Enable tcg backend vector compilation
37
tcg/aarch64: Reserve TCG_REG_TMP1, TCG_REG_TMP2
29
tcg/ppc: Add support for load/store/logic/comparison
38
tcg/aarch64: Simplify constraints on qemu_ld/st
30
tcg/ppc: Add support for vector maximum/minimum
39
tcg/aarch64: Support 128-bit load/store
31
tcg/ppc: Add support for vector add/subtract
40
tcg/ppc: Support 128-bit load/store
32
tcg/ppc: Add support for vector saturated add/subtract
41
tcg/s390x: Support 128-bit load/store
33
tcg/ppc: Support vector shift by immediate
42
accel/tcg: Extract load_atom_extract_al16_or_al8 to host header
34
tcg/ppc: Support vector multiply
43
accel/tcg: Extract store_atom_insert_al16 to host header
35
tcg/ppc: Support vector dup2
44
accel/tcg: Add x86_64 load_atom_extract_al16_or_al8
36
tcg/ppc: Enable Altivec detection
45
accel/tcg: Add aarch64 lse2 load_atom_extract_al16_or_al8
37
tcg/ppc: Update vector support for VSX
46
accel/tcg: Add aarch64 store_atom_insert_al16
38
tcg/ppc: Update vector support for v2.07 Altivec
47
tcg: Remove TCG_TARGET_TLB_DISPLACEMENT_BITS
39
tcg/ppc: Update vector support for v2.07 VSX
48
decodetree: Add --test-for-error
40
tcg/ppc: Update vector support for v2.07 FP
49
decodetree: Fix recursion in prop_format and build_tree
41
tcg/ppc: Update vector support for v3.00 Altivec
50
decodetree: Diagnose empty pattern group
42
tcg/ppc: Update vector support for v3.00 load/store
51
decodetree: Do not remove output_file from /dev
43
tcg/ppc: Update vector support for v3.00 dup/dupi
52
tests/decode: Convert tests to meson
44
53
45
tcg/ppc/tcg-target.h | 51 ++-
54
docs/devel/decodetree.rst | 33 ++-
46
tcg/ppc/tcg-target.opc.h | 13 +
55
meson.build | 15 +-
47
cpus.c | 24 +-
56
host/include/aarch64/host/load-extract-al16-al8.h | 40 ++++
48
tcg/ppc/tcg-target.inc.c | 1118 ++++++++++++++++++++++++++++++++++++++++++----
57
host/include/aarch64/host/store-insert-al16.h | 47 ++++
49
4 files changed, 1119 insertions(+), 87 deletions(-)
58
host/include/generic/host/load-extract-al16-al8.h | 45 ++++
50
create mode 100644 tcg/ppc/tcg-target.opc.h
59
host/include/generic/host/store-insert-al16.h | 50 ++++
51
60
host/include/x86_64/host/atomic128-ldst.h | 68 ++++++
61
host/include/x86_64/host/load-extract-al16-al8.h | 50 ++++
62
include/qemu/int128.h | 4 +-
63
tcg/aarch64/tcg-target-con-set.h | 4 +-
64
tcg/aarch64/tcg-target-con-str.h | 1 -
65
tcg/aarch64/tcg-target.h | 12 +-
66
tcg/arm/tcg-target.h | 1 -
67
tcg/i386/tcg-target.h | 5 +-
68
tcg/mips/tcg-target.h | 1 -
69
tcg/ppc/tcg-target-con-set.h | 2 +
70
tcg/ppc/tcg-target-con-str.h | 1 +
71
tcg/ppc/tcg-target.h | 4 +-
72
tcg/riscv/tcg-target.h | 1 -
73
tcg/s390x/tcg-target-con-set.h | 2 +
74
tcg/s390x/tcg-target.h | 3 +-
75
tcg/sparc64/tcg-target.h | 1 -
76
tcg/tci/tcg-target.h | 1 -
77
tests/decode/err_field10.decode | 7 +
78
tests/decode/err_field7.decode | 7 +
79
tests/decode/err_field8.decode | 8 +
80
tests/decode/err_field9.decode | 14 ++
81
tests/decode/succ_named_field.decode | 19 ++
82
tcg/tcg.c | 4 +-
83
accel/tcg/ldst_atomicity.c.inc | 80 +------
84
tcg/aarch64/tcg-target.c.inc | 243 +++++++++++++++-----
85
tcg/i386/tcg-target.c.inc | 191 +++++++++++++++-
86
tcg/ppc/tcg-target.c.inc | 108 ++++++++-
87
tcg/s390x/tcg-target.c.inc | 107 ++++++++-
88
scripts/decodetree.py | 265 ++++++++++++++++++++--
89
tests/decode/check.sh | 24 --
90
tests/decode/meson.build | 64 ++++++
91
tests/meson.build | 5 +-
92
38 files changed, 1312 insertions(+), 225 deletions(-)
93
create mode 100644 host/include/aarch64/host/load-extract-al16-al8.h
94
create mode 100644 host/include/aarch64/host/store-insert-al16.h
95
create mode 100644 host/include/generic/host/load-extract-al16-al8.h
96
create mode 100644 host/include/generic/host/store-insert-al16.h
97
create mode 100644 host/include/x86_64/host/atomic128-ldst.h
98
create mode 100644 host/include/x86_64/host/load-extract-al16-al8.h
99
create mode 100644 tests/decode/err_field10.decode
100
create mode 100644 tests/decode/err_field7.decode
101
create mode 100644 tests/decode/err_field8.decode
102
create mode 100644 tests/decode/err_field9.decode
103
create mode 100644 tests/decode/succ_named_field.decode
104
delete mode 100755 tests/decode/check.sh
105
create mode 100644 tests/decode/meson.build
diff view generated by jsdifflib
1
This is only used for 32-bit hosts.
1
The first move was incorrectly using TCG_TYPE_I32 while the second
2
move was correctly using TCG_TYPE_REG. This prevents a 64-bit host
3
from moving all 128-bits of the return value.
2
4
5
Fixes: ebebea53ef8 ("tcg: Support TCG_TYPE_I128 in tcg_out_{ld,st}_helper_{args,ret}")
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
---
8
---
6
tcg/ppc/tcg-target.inc.c | 9 +++++++++
9
tcg/tcg.c | 4 ++--
7
1 file changed, 9 insertions(+)
10
1 file changed, 2 insertions(+), 2 deletions(-)
8
11
9
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
12
diff --git a/tcg/tcg.c b/tcg/tcg.c
10
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/ppc/tcg-target.inc.c
14
--- a/tcg/tcg.c
12
+++ b/tcg/ppc/tcg-target.inc.c
15
+++ b/tcg/tcg.c
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
14
}
17
mov[0].dst = ldst->datalo_reg;
15
break;
18
mov[0].src =
16
19
tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
17
+ case INDEX_op_dup2_vec:
20
- mov[0].dst_type = TCG_TYPE_I32;
18
+ assert(TCG_TARGET_REG_BITS == 32);
21
- mov[0].src_type = TCG_TYPE_I32;
19
+ /* With inputs a1 = xLxx, a2 = xHxx */
22
+ mov[0].dst_type = TCG_TYPE_REG;
20
+ tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */
23
+ mov[0].src_type = TCG_TYPE_REG;
21
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */
24
mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
22
+ tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */
25
23
+ return;
26
mov[1].dst = ldst->datahi_reg;
24
+
25
case INDEX_op_ppc_mrgh_vec:
26
insn = mrgh_op[vece];
27
break;
28
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
29
case INDEX_op_ppc_mulou_vec:
30
case INDEX_op_ppc_pkum_vec:
31
case INDEX_op_ppc_rotl_vec:
32
+ case INDEX_op_dup2_vec:
33
return &v_v_v;
34
case INDEX_op_not_vec:
35
case INDEX_op_dup_vec:
36
--
27
--
37
2.17.1
28
2.34.1
38
39
diff view generated by jsdifflib
1
Introduce an enum to hold base < 2.06 < 3.00. Use macros to
1
PAGE_WRITE is current writability, as modified by TB protection;
2
preserve the existing have_isa_2_06 and have_isa_3_00 predicates.
2
PAGE_WRITE_ORG is the original page writability.
3
3
4
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
4
Fixes: cdfac37be0d ("accel/tcg: Honor atomicity of loads")
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
tcg/ppc/tcg-target.h | 12 ++++++++++--
8
accel/tcg/ldst_atomicity.c.inc | 4 ++--
8
tcg/ppc/tcg-target.inc.c | 8 ++++----
9
1 file changed, 2 insertions(+), 2 deletions(-)
9
2 files changed, 14 insertions(+), 6 deletions(-)
10
10
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
11
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.h
13
--- a/accel/tcg/ldst_atomicity.c.inc
14
+++ b/tcg/ppc/tcg-target.h
14
+++ b/accel/tcg/ldst_atomicity.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef enum {
15
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
16
TCG_AREG0 = TCG_REG_R27
16
* another process, because the fallback start_exclusive solution
17
} TCGReg;
17
* provides no protection across processes.
18
18
*/
19
-extern bool have_isa_2_06;
19
- if (!page_check_range(h2g(pv), 8, PAGE_WRITE)) {
20
-extern bool have_isa_3_00;
20
+ if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) {
21
+typedef enum {
21
uint64_t *p = __builtin_assume_aligned(pv, 8);
22
+ tcg_isa_base,
22
return *p;
23
+ tcg_isa_2_06,
24
+ tcg_isa_3_00,
25
+} TCGPowerISA;
26
+
27
+extern TCGPowerISA have_isa;
28
+
29
+#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
30
+#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
31
32
/* optional instructions automatically implemented */
33
#define TCG_TARGET_HAS_ext8u_i32 0 /* andi */
34
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/tcg/ppc/tcg-target.inc.c
37
+++ b/tcg/ppc/tcg-target.inc.c
38
@@ -XXX,XX +XXX,XX @@
39
40
static tcg_insn_unit *tb_ret_addr;
41
42
-bool have_isa_2_06;
43
-bool have_isa_3_00;
44
+TCGPowerISA have_isa;
45
46
#define HAVE_ISA_2_06 have_isa_2_06
47
#define HAVE_ISEL have_isa_2_06
48
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
49
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
50
unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
51
52
+ have_isa = tcg_isa_base;
53
if (hwcap & PPC_FEATURE_ARCH_2_06) {
54
- have_isa_2_06 = true;
55
+ have_isa = tcg_isa_2_06;
56
}
23
}
57
#ifdef PPC_FEATURE2_ARCH_3_00
24
@@ -XXX,XX +XXX,XX @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
58
if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
25
* another process, because the fallback start_exclusive solution
59
- have_isa_3_00 = true;
26
* provides no protection across processes.
60
+ have_isa = tcg_isa_3_00;
27
*/
28
- if (!page_check_range(h2g(p), 16, PAGE_WRITE)) {
29
+ if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) {
30
return *p;
61
}
31
}
62
#endif
32
#endif
63
64
--
33
--
65
2.17.1
34
2.34.1
66
67
diff view generated by jsdifflib
1
These new instructions are conditional on MSR.VEC for TX=1,
1
Older versions of clang have missing runtime functions for arithmetic
2
so we can consider these Altivec instructions.
2
with -fsanitize=undefined (see 464e3671f9d5c), so we cannot use
3
__int128_t for implementing Int128. But __int128_t is present,
4
data movement works, and it can be used for atomic128.
3
5
4
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
Probe for both CONFIG_INT128_TYPE and CONFIG_INT128, adjust
7
qemu/int128.h to define Int128Alias if CONFIG_INT128_TYPE,
8
and adjust the meson probe for atomics to use has_int128_type.
9
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
12
---
7
tcg/ppc/tcg-target.inc.c | 28 ++++++++++++++++++++++++++--
13
meson.build | 15 ++++++++++-----
8
1 file changed, 26 insertions(+), 2 deletions(-)
14
include/qemu/int128.h | 4 ++--
15
2 files changed, 12 insertions(+), 7 deletions(-)
9
16
10
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
17
diff --git a/meson.build b/meson.build
11
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.inc.c
19
--- a/meson.build
13
+++ b/tcg/ppc/tcg-target.inc.c
20
+++ b/meson.build
14
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
21
@@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_ATOMIC64', cc.links('''
15
22
return 0;
16
#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */
23
}'''))
17
#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
24
18
+#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
25
-has_int128 = cc.links('''
19
26
+has_int128_type = cc.compiles('''
20
#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */
27
+ __int128_t a;
21
#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */
28
+ __uint128_t b;
22
#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */
29
+ int main(void) { b = a; }''')
23
#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */
30
+config_host_data.set('CONFIG_INT128_TYPE', has_int128_type)
24
+#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */
31
+
25
+#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */
32
+has_int128 = has_int128_type and cc.links('''
26
33
__int128_t a;
27
#define RT(r) ((r)<<21)
34
__uint128_t b;
28
#define RS(r) ((r)<<21)
35
int main (void) {
29
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
36
@@ -XXX,XX +XXX,XX @@ has_int128 = cc.links('''
30
return;
37
a = a * a;
38
return 0;
39
}''')
40
-
41
config_host_data.set('CONFIG_INT128', has_int128)
42
43
-if has_int128
44
+if has_int128_type
45
# "do we have 128-bit atomics which are handled inline and specifically not
46
# via libatomic". The reason we can't use libatomic is documented in the
47
# comment starting "GCC is a house divided" in include/qemu/atomic128.h.
48
@@ -XXX,XX +XXX,XX @@ if has_int128
49
# __alignof(unsigned __int128) for the host.
50
atomic_test_128 = '''
51
int main(int ac, char **av) {
52
- unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], 16);
53
+ __uint128_t *p = __builtin_assume_aligned(av[ac - 1], 16);
54
p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED);
55
__atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED);
56
__atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
57
@@ -XXX,XX +XXX,XX @@ if has_int128
58
config_host_data.set('CONFIG_CMPXCHG128', cc.links('''
59
int main(void)
60
{
61
- unsigned __int128 x = 0, y = 0;
62
+ __uint128_t x = 0, y = 0;
63
__sync_val_compare_and_swap_16(&x, y, x);
64
return 0;
31
}
65
}
32
}
66
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
33
+ if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) {
67
index XXXXXXX..XXXXXXX 100644
34
+ tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
68
--- a/include/qemu/int128.h
35
+ return;
69
+++ b/include/qemu/int128.h
36
+ }
70
@@ -XXX,XX +XXX,XX @@ static inline void bswap128s(Int128 *s)
37
71
* a possible structure and the native types. Ease parameter passing
38
/*
72
* via use of the transparent union extension.
39
* Otherwise we must load the value from the constant pool.
73
*/
40
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
74
-#ifdef CONFIG_INT128
41
TCGReg dst, TCGReg src)
75
+#ifdef CONFIG_INT128_TYPE
42
{
76
typedef union {
43
tcg_debug_assert(dst >= TCG_REG_V0);
77
__uint128_t u;
44
- tcg_debug_assert(src >= TCG_REG_V0);
78
__int128_t i;
45
+
79
@@ -XXX,XX +XXX,XX @@ typedef union {
46
+ /* Splat from integer reg allowed via constraints for v3.00. */
80
} Int128Alias __attribute__((transparent_union));
47
+ if (src < TCG_REG_V0) {
81
#else
48
+ tcg_debug_assert(have_isa_3_00);
82
typedef Int128 Int128Alias;
49
+ switch (vece) {
83
-#endif /* CONFIG_INT128 */
50
+ case MO_64:
84
+#endif /* CONFIG_INT128_TYPE */
51
+ tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
85
52
+ return true;
86
#endif /* INT128_H */
53
+ case MO_32:
54
+ tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
55
+ return true;
56
+ default:
57
+ /* Fail, so that we fall back on either dupm or mov+dup. */
58
+ return false;
59
+ }
60
+ }
61
62
/*
63
* Recall we use (or emulate) VSX integer loads, so the integer is
64
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
65
static const TCGTargetOpDef sub2
66
= { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } };
67
static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
68
+ static const TCGTargetOpDef v_vr = { .args_ct_str = { "v", "vr" } };
69
static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
70
static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
71
static const TCGTargetOpDef v_v_v_v
72
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
73
return &v_v_v;
74
case INDEX_op_not_vec:
75
case INDEX_op_neg_vec:
76
- case INDEX_op_dup_vec:
77
return &v_v;
78
+ case INDEX_op_dup_vec:
79
+ return have_isa_3_00 ? &v_vr : &v_v;
80
case INDEX_op_ld_vec:
81
case INDEX_op_st_vec:
82
case INDEX_op_dupm_vec:
83
--
87
--
84
2.17.1
88
2.34.1
85
86
diff view generated by jsdifflib
1
These new instructions are a mix of those like LXSD that are
1
With CPUINFO_ATOMIC_VMOVDQA, we can perform proper atomic
2
only conditional only on MSR.VEC and those like LXV that are
2
load/store without cmpxchg16b.
3
conditional on MSR.VEC for TX=1. Thus, in the end, we can
4
consider all of these as Altivec instructions.
5
3
6
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
tcg/ppc/tcg-target.inc.c | 47 ++++++++++++++++++++++++++++++++--------
7
host/include/x86_64/host/atomic128-ldst.h | 68 +++++++++++++++++++++++
10
1 file changed, 38 insertions(+), 9 deletions(-)
8
1 file changed, 68 insertions(+)
9
create mode 100644 host/include/x86_64/host/atomic128-ldst.h
11
10
12
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
11
diff --git a/host/include/x86_64/host/atomic128-ldst.h b/host/include/x86_64/host/atomic128-ldst.h
13
index XXXXXXX..XXXXXXX 100644
12
new file mode 100644
14
--- a/tcg/ppc/tcg-target.inc.c
13
index XXXXXXX..XXXXXXX
15
+++ b/tcg/ppc/tcg-target.inc.c
14
--- /dev/null
16
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
15
+++ b/host/include/x86_64/host/atomic128-ldst.h
17
#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */
16
@@ -XXX,XX +XXX,XX @@
18
#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */
17
+/*
19
#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */
18
+ * SPDX-License-Identifier: GPL-2.0-or-later
20
+#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */
19
+ * Load/store for 128-bit atomic operations, x86_64 version.
21
+#define LXSD (OPCD(57) | 2) /* v3.00 */
20
+ *
22
+#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */
21
+ * Copyright (C) 2023 Linaro, Ltd.
23
22
+ *
24
#define STVX XO31(231)
23
+ * See docs/devel/atomics.rst for discussion about the guarantees each
25
#define STVEWX XO31(199)
24
+ * atomic primitive is meant to provide.
26
#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */
25
+ */
27
#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */
26
+
28
+#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
27
+#ifndef AARCH64_ATOMIC128_LDST_H
29
+#define STXSD (OPCD(61) | 2) /* v3.00 */
28
+#define AARCH64_ATOMIC128_LDST_H
30
29
+
31
#define VADDSBS VX4(768)
30
+#ifdef CONFIG_INT128_TYPE
32
#define VADDUBS VX4(512)
31
+#include "host/cpuinfo.h"
33
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
32
+#include "tcg/debug-assert.h"
34
TCGReg base, tcg_target_long offset)
33
+
35
{
34
+/*
36
tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
35
+ * Through clang 16, with -mcx16, __atomic_load_n is incorrectly
37
- bool is_store = false;
36
+ * expanded to a read-write operation: lock cmpxchg16b.
38
+ bool is_int_store = false;
37
+ */
39
TCGReg rs = TCG_REG_TMP1;
38
+
40
39
+#define HAVE_ATOMIC128_RO likely(cpuinfo & CPUINFO_ATOMIC_VMOVDQA)
41
switch (opi) {
40
+#define HAVE_ATOMIC128_RW 1
42
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
41
+
43
break;
42
+static inline Int128 atomic16_read_ro(const Int128 *ptr)
44
}
43
+{
45
break;
44
+ Int128Alias r;
46
+ case LXSD:
45
+
47
+ case STXSD:
46
+ tcg_debug_assert(HAVE_ATOMIC128_RO);
48
+ align = 3;
47
+ asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr));
49
+ break;
48
+
50
+ case LXV:
49
+ return r.s;
51
+ case STXV:
50
+}
52
+ align = 15;
51
+
53
+ break;
52
+static inline Int128 atomic16_read_rw(Int128 *ptr)
54
case STD:
53
+{
55
align = 3;
54
+ __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
56
/* FALLTHRU */
55
+ Int128Alias r;
57
case STB: case STH: case STW:
56
+
58
- is_store = true;
57
+ if (HAVE_ATOMIC128_RO) {
59
+ is_int_store = true;
58
+ asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr_align));
60
break;
59
+ } else {
61
}
60
+ r.i = __sync_val_compare_and_swap_16(ptr_align, 0, 0);
62
61
+ }
63
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
62
+ return r.s;
64
if (rs == base) {
63
+}
65
rs = TCG_REG_R0;
64
+
66
}
65
+static inline void atomic16_set(Int128 *ptr, Int128 val)
67
- tcg_debug_assert(!is_store || rs != rt);
66
+{
68
+ tcg_debug_assert(!is_int_store || rs != rt);
67
+ __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
69
tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
68
+ Int128Alias new = { .s = val };
70
tcg_out32(s, opx | TAB(rt & 31, base, rs));
69
+
71
return;
70
+ if (HAVE_ATOMIC128_RO) {
72
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
71
+ asm("vmovdqa %1, %0" : "=m"(*ptr_align) : "x" (new.i));
73
case TCG_TYPE_V64:
72
+ } else {
74
tcg_debug_assert(ret >= TCG_REG_V0);
73
+ __int128_t old;
75
if (have_vsx) {
74
+ do {
76
- tcg_out_mem_long(s, 0, LXSDX, ret, base, offset);
75
+ old = *ptr_align;
77
+ tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
76
+ } while (!__sync_bool_compare_and_swap_16(ptr_align, old, new.i));
78
+ ret, base, offset);
77
+ }
79
break;
78
+}
80
}
79
+#else
81
tcg_debug_assert((offset & 7) == 0);
80
+/* Provide QEMU_ERROR stubs. */
82
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
81
+#include "host/include/generic/host/atomic128-ldst.h"
83
case TCG_TYPE_V128:
82
+#endif
84
tcg_debug_assert(ret >= TCG_REG_V0);
83
+
85
tcg_debug_assert((offset & 15) == 0);
84
+#endif /* AARCH64_ATOMIC128_LDST_H */
86
- tcg_out_mem_long(s, 0, LVX, ret, base, offset);
87
+ tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
88
+ LVX, ret, base, offset);
89
break;
90
default:
91
g_assert_not_reached();
92
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
93
case TCG_TYPE_V64:
94
tcg_debug_assert(arg >= TCG_REG_V0);
95
if (have_vsx) {
96
- tcg_out_mem_long(s, 0, STXSDX, arg, base, offset);
97
+ tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
98
+ STXSDX, arg, base, offset);
99
break;
100
}
101
tcg_debug_assert((offset & 7) == 0);
102
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
103
break;
104
case TCG_TYPE_V128:
105
tcg_debug_assert(arg >= TCG_REG_V0);
106
- tcg_out_mem_long(s, 0, STVX, arg, base, offset);
107
+ tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
108
+ STVX, arg, base, offset);
109
break;
110
default:
111
g_assert_not_reached();
112
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
113
tcg_debug_assert(out >= TCG_REG_V0);
114
switch (vece) {
115
case MO_8:
116
- tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
117
+ if (have_isa_3_00) {
118
+ tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
119
+ } else {
120
+ tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
121
+ }
122
elt = extract32(offset, 0, 4);
123
#ifndef HOST_WORDS_BIGENDIAN
124
elt ^= 15;
125
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
126
break;
127
case MO_16:
128
tcg_debug_assert((offset & 1) == 0);
129
- tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
130
+ if (have_isa_3_00) {
131
+ tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
132
+ } else {
133
+ tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
134
+ }
135
elt = extract32(offset, 1, 3);
136
#ifndef HOST_WORDS_BIGENDIAN
137
elt ^= 7;
138
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
139
tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
140
break;
141
case MO_32:
142
+ if (have_isa_3_00) {
143
+ tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
144
+ break;
145
+ }
146
tcg_debug_assert((offset & 3) == 0);
147
tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
148
elt = extract32(offset, 2, 2);
149
--
85
--
150
2.17.1
86
2.34.1
151
87
152
88
diff view generated by jsdifflib
1
The VSX instruction set instructions include double-word loads and
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
stores, double-word load and splat, double-word permute, and bit
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
select. All of which require multiple operations in the Altivec
3
---
4
instruction set.
4
tcg/i386/tcg-target.h | 4 +-
5
tcg/i386/tcg-target.c.inc | 191 +++++++++++++++++++++++++++++++++++++-
6
2 files changed, 190 insertions(+), 5 deletions(-)
5
7
6
Because the VSX registers map %vsr32 to %vr0, and we have no current
8
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
7
intention or need to use vector registers outside %vr0-%vr19, force
8
on the {ax,bx,cx,tx} bits within the added VSX insns so that we don't
9
have to otherwise modify the VR[TABC] macros.
10
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
13
---
14
tcg/ppc/tcg-target.h | 5 ++--
15
tcg/ppc/tcg-target.inc.c | 52 ++++++++++++++++++++++++++++++++++++----
16
2 files changed, 51 insertions(+), 6 deletions(-)
17
18
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
19
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
20
--- a/tcg/ppc/tcg-target.h
10
--- a/tcg/i386/tcg-target.h
21
+++ b/tcg/ppc/tcg-target.h
11
+++ b/tcg/i386/tcg-target.h
22
@@ -XXX,XX +XXX,XX @@ typedef enum {
12
@@ -XXX,XX +XXX,XX @@ typedef enum {
23
13
#define have_avx1 (cpuinfo & CPUINFO_AVX1)
24
extern TCGPowerISA have_isa;
14
#define have_avx2 (cpuinfo & CPUINFO_AVX2)
25
extern bool have_altivec;
15
#define have_movbe (cpuinfo & CPUINFO_MOVBE)
26
+extern bool have_vsx;
16
-#define have_atomic16 (cpuinfo & CPUINFO_ATOMIC_VMOVDQA)
27
17
28
#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
18
/*
29
#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
19
* There are interesting instructions in AVX512, so long as we have AVX512VL,
30
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
20
@@ -XXX,XX +XXX,XX @@ typedef enum {
31
* instruction and substituting two 32-bit stores makes the generated
21
#define TCG_TARGET_HAS_qemu_st8_i32 1
32
* code quite large.
22
#endif
23
24
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
25
+#define TCG_TARGET_HAS_qemu_ldst_i128 \
26
+ (TCG_TARGET_REG_BITS == 64 && (cpuinfo & CPUINFO_ATOMIC_VMOVDQA))
27
28
/* We do not support older SSE systems, only beginning with AVX1. */
29
#define TCG_TARGET_HAS_v64 have_avx1
30
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
31
index XXXXXXX..XXXXXXX 100644
32
--- a/tcg/i386/tcg-target.c.inc
33
+++ b/tcg/i386/tcg-target.c.inc
34
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
35
#endif
36
};
37
38
+#define TCG_TMP_VEC TCG_REG_XMM5
39
+
40
static const int tcg_target_call_iarg_regs[] = {
41
#if TCG_TARGET_REG_BITS == 64
42
#if defined(_WIN64)
43
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
44
#define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16)
45
#define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16)
46
#define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16)
47
+#define OPC_PEXTRD (0x16 | P_EXT3A | P_DATA16)
48
+#define OPC_PINSRD (0x22 | P_EXT3A | P_DATA16)
49
#define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16)
50
#define OPC_PMAXSW (0xee | P_EXT | P_DATA16)
51
#define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16)
52
@@ -XXX,XX +XXX,XX @@ typedef struct {
53
54
bool tcg_target_has_memory_bswap(MemOp memop)
55
{
56
- return have_movbe;
57
+ TCGAtomAlign aa;
58
+
59
+ if (!have_movbe) {
60
+ return false;
61
+ }
62
+ if ((memop & MO_SIZE) < MO_128) {
63
+ return true;
64
+ }
65
+
66
+ /*
67
+ * Reject 16-byte memop with 16-byte atomicity, i.e. VMOVDQA,
68
+ * but do allow a pair of 64-bit operations, i.e. MOVBEQ.
69
+ */
70
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
71
+ return aa.atom < MO_128;
72
}
73
74
/*
75
@@ -XXX,XX +XXX,XX @@ static const TCGLdstHelperParam ldst_helper_param = {
76
static const TCGLdstHelperParam ldst_helper_param = { };
77
#endif
78
79
+static void tcg_out_vec_to_pair(TCGContext *s, TCGType type,
80
+ TCGReg l, TCGReg h, TCGReg v)
81
+{
82
+ int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
83
+
84
+ /* vpmov{d,q} %v, %l */
85
+ tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, v, 0, l);
86
+ /* vpextr{d,q} $1, %v, %h */
87
+ tcg_out_vex_modrm(s, OPC_PEXTRD + rexw, v, 0, h);
88
+ tcg_out8(s, 1);
89
+}
90
+
91
+static void tcg_out_pair_to_vec(TCGContext *s, TCGType type,
92
+ TCGReg v, TCGReg l, TCGReg h)
93
+{
94
+ int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
95
+
96
+ /* vmov{d,q} %l, %v */
97
+ tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, v, 0, l);
98
+ /* vpinsr{d,q} $1, %h, %v, %v */
99
+ tcg_out_vex_modrm(s, OPC_PINSRD + rexw, v, v, h);
100
+ tcg_out8(s, 1);
101
+}
102
+
103
/*
104
* Generate code for the slow path for a load at the end of block
33
*/
105
*/
34
-#define TCG_TARGET_HAS_v64 0
106
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
35
+#define TCG_TARGET_HAS_v64 have_vsx
107
{
36
#define TCG_TARGET_HAS_v128 have_altivec
108
TCGLabelQemuLdst *ldst = NULL;
37
#define TCG_TARGET_HAS_v256 0
109
MemOp opc = get_memop(oi);
38
110
+ MemOp s_bits = opc & MO_SIZE;
39
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
111
unsigned a_mask;
40
#define TCG_TARGET_HAS_mul_vec 1
112
41
#define TCG_TARGET_HAS_sat_vec 1
113
#ifdef CONFIG_SOFTMMU
42
#define TCG_TARGET_HAS_minmax_vec 1
114
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
43
-#define TCG_TARGET_HAS_bitsel_vec 0
115
*h = x86_guest_base;
44
+#define TCG_TARGET_HAS_bitsel_vec have_vsx
116
#endif
45
#define TCG_TARGET_HAS_cmpsel_vec 0
117
h->base = addrlo;
46
118
- h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
47
void flush_icache_range(uintptr_t start, uintptr_t stop);
119
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
48
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
120
a_mask = (1 << h->aa.align) - 1;
49
index XXXXXXX..XXXXXXX 100644
121
50
--- a/tcg/ppc/tcg-target.inc.c
122
#ifdef CONFIG_SOFTMMU
51
+++ b/tcg/ppc/tcg-target.inc.c
123
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
52
@@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr;
124
TCGType tlbtype = TCG_TYPE_I32;
53
TCGPowerISA have_isa;
125
int trexw = 0, hrexw = 0, tlbrexw = 0;
54
static bool have_isel;
126
unsigned mem_index = get_mmuidx(oi);
55
bool have_altivec;
127
- unsigned s_bits = opc & MO_SIZE;
56
+bool have_vsx;
128
unsigned s_mask = (1 << s_bits) - 1;
57
129
int tlb_mask;
58
#ifndef CONFIG_SOFTMMU
130
59
#define TCG_GUEST_BASE_REG 30
131
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
60
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
132
h.base, h.index, 0, h.ofs + 4);
61
#define LVEBX XO31(7)
133
}
62
#define LVEHX XO31(39)
134
break;
63
#define LVEWX XO31(71)
135
+
64
+#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */
136
+ case MO_128:
65
+#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */
137
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
66
138
+
67
#define STVX XO31(231)
139
+ /*
68
#define STVEWX XO31(199)
140
+ * Without 16-byte atomicity, use integer regs.
69
+#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */
141
+ * That is where we want the data, and it allows bswaps.
70
142
+ */
71
#define VADDSBS VX4(768)
143
+ if (h.aa.atom < MO_128) {
72
#define VADDUBS VX4(512)
144
+ if (use_movbe) {
73
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
145
+ TCGReg t = datalo;
74
146
+ datalo = datahi;
75
#define VSLDOI VX4(44)
147
+ datahi = t;
76
148
+ }
77
+#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */
149
+ if (h.base == datalo || h.index == datalo) {
78
+#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
150
+ tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, datahi,
79
+
151
+ h.base, h.index, 0, h.ofs);
80
#define RT(r) ((r)<<21)
152
+ tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
81
#define RS(r) ((r)<<21)
153
+ datalo, datahi, 0);
82
#define RA(r) ((r)<<16)
154
+ tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
83
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
155
+ datahi, datahi, 8);
84
add = 0;
156
+ } else {
85
}
157
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
86
158
+ h.base, h.index, 0, h.ofs);
87
- load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
159
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
88
- if (TCG_TARGET_REG_BITS == 64) {
160
+ h.base, h.index, 0, h.ofs + 8);
89
- new_pool_l2(s, rel, s->code_ptr, add, val, val);
161
+ }
90
+ if (have_vsx) {
91
+ load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
92
+ load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
93
+ if (TCG_TARGET_REG_BITS == 64) {
94
+ new_pool_label(s, val, rel, s->code_ptr, add);
95
+ } else {
96
+ new_pool_l2(s, rel, s->code_ptr, add, val, val);
97
+ }
98
} else {
99
- new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
100
+ load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
101
+ if (TCG_TARGET_REG_BITS == 64) {
102
+ new_pool_l2(s, rel, s->code_ptr, add, val, val);
103
+ } else {
104
+ new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
105
+ }
106
}
107
108
if (USE_REG_TB) {
109
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
110
/* fallthru */
111
case TCG_TYPE_V64:
112
tcg_debug_assert(ret >= TCG_REG_V0);
113
+ if (have_vsx) {
114
+ tcg_out_mem_long(s, 0, LXSDX, ret, base, offset);
115
+ break;
162
+ break;
116
+ }
163
+ }
117
tcg_debug_assert((offset & 7) == 0);
164
+
118
tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
165
+ /*
119
if (offset & 8) {
166
+ * With 16-byte atomicity, a vector load is required.
120
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
167
+ * If we already have 16-byte alignment, then VMOVDQA always works.
121
/* fallthru */
168
+ * Else if VMOVDQU has atomicity with dynamic alignment, use that.
122
case TCG_TYPE_V64:
169
+ * Else use we require a runtime test for alignment for VMOVDQA;
123
tcg_debug_assert(arg >= TCG_REG_V0);
170
+ * use VMOVDQU on the unaligned nonatomic path for simplicity.
124
+ if (have_vsx) {
171
+ */
125
+ tcg_out_mem_long(s, 0, STXSDX, arg, base, offset);
172
+ if (h.aa.align >= MO_128) {
173
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg,
174
+ TCG_TMP_VEC, 0,
175
+ h.base, h.index, 0, h.ofs);
176
+ } else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) {
177
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg,
178
+ TCG_TMP_VEC, 0,
179
+ h.base, h.index, 0, h.ofs);
180
+ } else {
181
+ TCGLabel *l1 = gen_new_label();
182
+ TCGLabel *l2 = gen_new_label();
183
+
184
+ tcg_out_testi(s, h.base, 15);
185
+ tcg_out_jxx(s, JCC_JNE, l1, true);
186
+
187
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg,
188
+ TCG_TMP_VEC, 0,
189
+ h.base, h.index, 0, h.ofs);
190
+ tcg_out_jxx(s, JCC_JMP, l2, true);
191
+
192
+ tcg_out_label(s, l1);
193
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg,
194
+ TCG_TMP_VEC, 0,
195
+ h.base, h.index, 0, h.ofs);
196
+ tcg_out_label(s, l2);
197
+ }
198
+ tcg_out_vec_to_pair(s, TCG_TYPE_I64, datalo, datahi, TCG_TMP_VEC);
199
+ break;
200
+
201
default:
202
g_assert_not_reached();
203
}
204
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
205
h.base, h.index, 0, h.ofs + 4);
206
}
207
break;
208
+
209
+ case MO_128:
210
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
211
+
212
+ /*
213
+ * Without 16-byte atomicity, use integer regs.
214
+ * That is where we have the data, and it allows bswaps.
215
+ */
216
+ if (h.aa.atom < MO_128) {
217
+ if (use_movbe) {
218
+ TCGReg t = datalo;
219
+ datalo = datahi;
220
+ datahi = t;
221
+ }
222
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
223
+ h.base, h.index, 0, h.ofs);
224
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
225
+ h.base, h.index, 0, h.ofs + 8);
126
+ break;
226
+ break;
127
+ }
227
+ }
128
tcg_debug_assert((offset & 7) == 0);
228
+
129
if (offset & 8) {
229
+ /*
130
tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
230
+ * With 16-byte atomicity, a vector store is required.
131
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
231
+ * If we already have 16-byte alignment, then VMOVDQA always works.
132
case INDEX_op_shri_vec:
232
+ * Else if VMOVDQU has atomicity with dynamic alignment, use that.
133
case INDEX_op_sari_vec:
233
+ * Else use we require a runtime test for alignment for VMOVDQA;
134
return vece <= MO_32 ? -1 : 0;
234
+ * use VMOVDQU on the unaligned nonatomic path for simplicity.
135
+ case INDEX_op_bitsel_vec:
235
+ */
136
+ return have_vsx;
236
+ tcg_out_pair_to_vec(s, TCG_TYPE_I64, TCG_TMP_VEC, datalo, datahi);
237
+ if (h.aa.align >= MO_128) {
238
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg,
239
+ TCG_TMP_VEC, 0,
240
+ h.base, h.index, 0, h.ofs);
241
+ } else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) {
242
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg,
243
+ TCG_TMP_VEC, 0,
244
+ h.base, h.index, 0, h.ofs);
245
+ } else {
246
+ TCGLabel *l1 = gen_new_label();
247
+ TCGLabel *l2 = gen_new_label();
248
+
249
+ tcg_out_testi(s, h.base, 15);
250
+ tcg_out_jxx(s, JCC_JNE, l1, true);
251
+
252
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg,
253
+ TCG_TMP_VEC, 0,
254
+ h.base, h.index, 0, h.ofs);
255
+ tcg_out_jxx(s, JCC_JMP, l2, true);
256
+
257
+ tcg_out_label(s, l1);
258
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg,
259
+ TCG_TMP_VEC, 0,
260
+ h.base, h.index, 0, h.ofs);
261
+ tcg_out_label(s, l2);
262
+ }
263
+ break;
264
+
137
default:
265
default:
138
return 0;
266
g_assert_not_reached();
139
}
267
}
140
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
268
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
141
tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
269
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
142
break;
143
case MO_64:
144
+ if (have_vsx) {
145
+ tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
146
+ break;
147
+ }
148
tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
149
tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
150
break;
151
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
152
tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
153
break;
154
case MO_64:
155
+ if (have_vsx) {
156
+ tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
157
+ break;
158
+ }
159
tcg_debug_assert((offset & 7) == 0);
160
tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
161
tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
162
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
163
}
270
}
164
break;
271
break;
165
272
+ case INDEX_op_qemu_ld_a32_i128:
166
+ case INDEX_op_bitsel_vec:
273
+ case INDEX_op_qemu_ld_a64_i128:
167
+ tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
274
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
168
+ return;
275
+ tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
169
+
276
+ break;
170
case INDEX_op_dup2_vec:
277
171
assert(TCG_TARGET_REG_BITS == 32);
278
case INDEX_op_qemu_st_a64_i32:
172
/* With inputs a1 = xLxx, a2 = xHxx */
279
case INDEX_op_qemu_st8_a64_i32:
173
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
280
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
174
case INDEX_op_st_vec:
281
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
175
case INDEX_op_dupm_vec:
282
}
176
return &v_r;
283
break;
177
+ case INDEX_op_bitsel_vec:
284
+ case INDEX_op_qemu_st_a32_i128:
178
case INDEX_op_ppc_msum_vec:
285
+ case INDEX_op_qemu_st_a64_i128:
179
return &v_v_v_v;
286
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
287
+ tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
288
+ break;
289
290
OP_32_64(mulu2):
291
tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
292
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
293
case INDEX_op_qemu_st_a64_i64:
294
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I4(L, L, L, L);
295
296
+ case INDEX_op_qemu_ld_a32_i128:
297
+ case INDEX_op_qemu_ld_a64_i128:
298
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
299
+ return C_O2_I1(r, r, L);
300
+ case INDEX_op_qemu_st_a32_i128:
301
+ case INDEX_op_qemu_st_a64_i128:
302
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
303
+ return C_O0_I3(L, L, L);
304
+
305
case INDEX_op_brcond2_i32:
306
return C_O0_I4(r, r, ri, ri);
180
307
181
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
308
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
182
309
183
if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
310
s->reserved_regs = 0;
184
have_altivec = true;
311
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
185
+ /* We only care about the portion of VSX that overlaps Altivec. */
312
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP_VEC);
186
+ if (hwcap & PPC_FEATURE_HAS_VSX) {
313
#ifdef _WIN64
187
+ have_vsx = true;
314
/* These are call saved, and we don't save them, so don't use them. */
188
+ }
315
tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM6);
189
}
190
191
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
192
--
316
--
193
2.17.1
317
2.34.1
194
195
diff view generated by jsdifflib
1
Add various bits and peaces related mostly to load and store
1
We will need to allocate a second general-purpose temporary.
2
operations. In that context, logic, compare, and splat Altivec
2
Rename the existing temps to add a distinguishing number.
3
instructions are used, and, therefore, the support for emitting
4
them is included in this patch too.
5
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
8
---
6
---
9
tcg/ppc/tcg-target.h | 6 +-
7
tcg/aarch64/tcg-target.c.inc | 50 ++++++++++++++++++------------------
10
tcg/ppc/tcg-target.inc.c | 472 ++++++++++++++++++++++++++++++++++++---
8
1 file changed, 25 insertions(+), 25 deletions(-)
11
2 files changed, 442 insertions(+), 36 deletions(-)
12
9
13
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
14
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/ppc/tcg-target.h
12
--- a/tcg/aarch64/tcg-target.c.inc
16
+++ b/tcg/ppc/tcg-target.h
13
+++ b/tcg/aarch64/tcg-target.c.inc
17
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
14
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
18
#define TCG_TARGET_HAS_v128 have_altivec
15
return TCG_REG_X0 + slot;
19
#define TCG_TARGET_HAS_v256 0
16
}
20
17
21
-#define TCG_TARGET_HAS_andc_vec 0
18
-#define TCG_REG_TMP TCG_REG_X30
22
+#define TCG_TARGET_HAS_andc_vec 1
19
-#define TCG_VEC_TMP TCG_REG_V31
23
#define TCG_TARGET_HAS_orc_vec 0
20
+#define TCG_REG_TMP0 TCG_REG_X30
24
-#define TCG_TARGET_HAS_not_vec 0
21
+#define TCG_VEC_TMP0 TCG_REG_V31
25
+#define TCG_TARGET_HAS_not_vec 1
22
26
#define TCG_TARGET_HAS_neg_vec 0
23
#ifndef CONFIG_SOFTMMU
27
#define TCG_TARGET_HAS_abs_vec 0
24
#define TCG_REG_GUEST_BASE TCG_REG_X28
28
#define TCG_TARGET_HAS_shi_vec 0
25
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
29
#define TCG_TARGET_HAS_shs_vec 0
26
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
30
#define TCG_TARGET_HAS_shv_vec 0
27
TCGReg r, TCGReg base, intptr_t offset)
31
-#define TCG_TARGET_HAS_cmp_vec 0
32
+#define TCG_TARGET_HAS_cmp_vec 1
33
#define TCG_TARGET_HAS_mul_vec 0
34
#define TCG_TARGET_HAS_sat_vec 0
35
#define TCG_TARGET_HAS_minmax_vec 0
36
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/tcg/ppc/tcg-target.inc.c
39
+++ b/tcg/ppc/tcg-target.inc.c
40
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
41
ct->ct |= TCG_CT_REG;
42
ct->u.regs = 0xffffffff;
43
break;
44
+ case 'v':
45
+ ct->ct |= TCG_CT_REG;
46
+ ct->u.regs = 0xffffffff00000000ull;
47
+ break;
48
case 'L': /* qemu_ld constraint */
49
ct->ct |= TCG_CT_REG;
50
ct->u.regs = 0xffffffff;
51
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
52
53
#define NOP ORI /* ori 0,0,0 */
54
55
+#define LVX XO31(103)
56
+#define LVEBX XO31(7)
57
+#define LVEHX XO31(39)
58
+#define LVEWX XO31(71)
59
+
60
+#define STVX XO31(231)
61
+#define STVEWX XO31(199)
62
+
63
+#define VCMPEQUB VX4(6)
64
+#define VCMPEQUH VX4(70)
65
+#define VCMPEQUW VX4(134)
66
+#define VCMPGTSB VX4(774)
67
+#define VCMPGTSH VX4(838)
68
+#define VCMPGTSW VX4(902)
69
+#define VCMPGTUB VX4(518)
70
+#define VCMPGTUH VX4(582)
71
+#define VCMPGTUW VX4(646)
72
+
73
+#define VAND VX4(1028)
74
+#define VANDC VX4(1092)
75
+#define VNOR VX4(1284)
76
+#define VOR VX4(1156)
77
+#define VXOR VX4(1220)
78
+
79
+#define VSPLTB VX4(524)
80
+#define VSPLTH VX4(588)
81
+#define VSPLTW VX4(652)
82
+#define VSPLTISB VX4(780)
83
+#define VSPLTISH VX4(844)
84
+#define VSPLTISW VX4(908)
85
+
86
+#define VSLDOI VX4(44)
87
+
88
#define RT(r) ((r)<<21)
89
#define RS(r) ((r)<<21)
90
#define RA(r) ((r)<<16)
91
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
92
intptr_t value, intptr_t addend)
93
{
28
{
94
tcg_insn_unit *target;
29
- TCGReg temp = TCG_REG_TMP;
95
+ int16_t lo;
30
+ TCGReg temp = TCG_REG_TMP0;
96
+ int32_t hi;
31
97
32
if (offset < -0xffffff || offset > 0xffffff) {
98
value += addend;
33
tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
99
target = (tcg_insn_unit *)value;
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
100
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
101
}
102
*code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
103
break;
104
+ case R_PPC_ADDR32:
105
+ /*
106
+ * We are abusing this relocation type. Again, this points to
107
+ * a pair of insns, lis + load. This is an absolute address
108
+ * relocation for PPC32 so the lis cannot be removed.
109
+ */
110
+ lo = value;
111
+ hi = value - lo;
112
+ if (hi + lo != value) {
113
+ return false;
114
+ }
115
+ code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
116
+ code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
117
+ break;
118
default:
119
g_assert_not_reached();
120
}
35
}
121
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
36
37
/* Worst-case scenario, move offset to temp register, use reg offset. */
38
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
39
- tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
40
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
41
+ tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
42
}
122
43
123
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
44
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
124
{
45
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
125
- tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
46
if (offset == sextract64(offset, 0, 26)) {
126
- if (ret != arg) {
47
tcg_out_insn(s, 3206, BL, offset);
127
- tcg_out32(s, OR | SAB(arg, ret, arg));
48
} else {
128
+ if (ret == arg) {
49
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
129
+ return true;
50
- tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
130
+ }
51
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
131
+ switch (type) {
52
+ tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
132
+ case TCG_TYPE_I64:
133
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
134
+ /* fallthru */
135
+ case TCG_TYPE_I32:
136
+ if (ret < TCG_REG_V0 && arg < TCG_REG_V0) {
137
+ tcg_out32(s, OR | SAB(arg, ret, arg));
138
+ break;
139
+ } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) {
140
+ /* Altivec does not support vector/integer moves. */
141
+ return false;
142
+ }
143
+ /* fallthru */
144
+ case TCG_TYPE_V64:
145
+ case TCG_TYPE_V128:
146
+ tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
147
+ tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
148
+ break;
149
+ default:
150
+ g_assert_not_reached();
151
}
152
return true;
153
}
154
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
155
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
156
tcg_target_long val)
157
{
158
- g_assert_not_reached();
159
+ uint32_t load_insn;
160
+ int rel, low;
161
+ intptr_t add;
162
+
163
+ low = (int8_t)val;
164
+ if (low >= -16 && low < 16) {
165
+ if (val == (tcg_target_long)dup_const(MO_8, low)) {
166
+ tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
167
+ return;
168
+ }
169
+ if (val == (tcg_target_long)dup_const(MO_16, low)) {
170
+ tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
171
+ return;
172
+ }
173
+ if (val == (tcg_target_long)dup_const(MO_32, low)) {
174
+ tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
175
+ return;
176
+ }
177
+ }
178
+
179
+ /*
180
+ * Otherwise we must load the value from the constant pool.
181
+ */
182
+ if (USE_REG_TB) {
183
+ rel = R_PPC_ADDR16;
184
+ add = -(intptr_t)s->code_gen_ptr;
185
+ } else {
186
+ rel = R_PPC_ADDR32;
187
+ add = 0;
188
+ }
189
+
190
+ load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
191
+ if (TCG_TARGET_REG_BITS == 64) {
192
+ new_pool_l2(s, rel, s->code_ptr, add, val, val);
193
+ } else {
194
+ new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
195
+ }
196
+
197
+ if (USE_REG_TB) {
198
+ tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
199
+ load_insn |= RA(TCG_REG_TB);
200
+ } else {
201
+ tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
202
+ tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
203
+ }
204
+ tcg_out32(s, load_insn);
205
}
206
207
static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
208
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
209
align = 3;
210
/* FALLTHRU */
211
default:
212
- if (rt != TCG_REG_R0) {
213
+ if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
214
rs = rt;
215
break;
216
}
217
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
218
}
219
220
/* For unaligned, or very large offsets, use the indexed form. */
221
- if (offset & align || offset != (int32_t)offset) {
222
+ if (offset & align || offset != (int32_t)offset || opi == 0) {
223
if (rs == base) {
224
rs = TCG_REG_R0;
225
}
226
tcg_debug_assert(!is_store || rs != rt);
227
tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
228
- tcg_out32(s, opx | TAB(rt, base, rs));
229
+ tcg_out32(s, opx | TAB(rt & 31, base, rs));
230
return;
231
}
232
233
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
234
base = rs;
235
}
236
if (opi != ADDI || base != rt || l0 != 0) {
237
- tcg_out32(s, opi | TAI(rt, base, l0));
238
+ tcg_out32(s, opi | TAI(rt & 31, base, l0));
239
}
53
}
240
}
54
}
241
55
242
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
56
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
243
- TCGReg arg1, intptr_t arg2)
57
AArch64Insn insn;
244
+static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
58
245
+ TCGReg va, TCGReg vb, int shb)
59
if (rl == ah || (!const_bh && rl == bh)) {
60
- rl = TCG_REG_TMP;
61
+ rl = TCG_REG_TMP0;
62
}
63
64
if (const_bl) {
65
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
66
possibility of adding 0+const in the low part, and the
67
immediate add instructions encode XSP not XZR. Don't try
68
anything more elaborate here than loading another zero. */
69
- al = TCG_REG_TMP;
70
+ al = TCG_REG_TMP0;
71
tcg_out_movi(s, ext, al, 0);
72
}
73
tcg_out_insn_3401(s, insn, ext, rl, al, bl);
74
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
246
{
75
{
247
- int opi, opx;
76
TCGReg a1 = a0;
248
-
77
if (is_ctz) {
249
- tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
78
- a1 = TCG_REG_TMP;
250
- if (type == TCG_TYPE_I32) {
79
+ a1 = TCG_REG_TMP0;
251
- opi = LWZ, opx = LWZX;
80
tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
252
- } else {
81
}
253
- opi = LD, opx = LDX;
82
if (const_b && b == (ext ? 64 : 32)) {
254
- }
83
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
255
- tcg_out_mem_long(s, opi, opx, ret, arg1, arg2);
84
AArch64Insn sel = I3506_CSEL;
256
+ tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
85
86
tcg_out_cmp(s, ext, a0, 0, 1);
87
- tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
88
+ tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
89
90
if (const_b) {
91
if (b == -1) {
92
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
93
b = d;
94
}
95
}
96
- tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
97
+ tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
98
}
257
}
99
}
258
100
259
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
101
@@ -XXX,XX +XXX,XX @@ bool tcg_target_has_memory_bswap(MemOp memop)
260
- TCGReg arg1, intptr_t arg2)
102
}
261
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
103
262
+ TCGReg base, intptr_t offset)
104
static const TCGLdstHelperParam ldst_helper_param = {
263
{
105
- .ntmp = 1, .tmp = { TCG_REG_TMP }
264
- int opi, opx;
106
+ .ntmp = 1, .tmp = { TCG_REG_TMP0 }
265
+ int shift;
107
};
266
108
267
- tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
109
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
268
- if (type == TCG_TYPE_I32) {
110
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which)
269
- opi = STW, opx = STWX;
111
270
- } else {
112
set_jmp_insn_offset(s, which);
271
- opi = STD, opx = STDX;
113
tcg_out32(s, I3206_B);
272
+ switch (type) {
114
- tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
273
+ case TCG_TYPE_I32:
115
+ tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
274
+ if (ret < TCG_REG_V0) {
116
set_jmp_reset_offset(s, which);
275
+ tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
117
}
276
+ break;
118
277
+ }
119
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
278
+ tcg_debug_assert((offset & 3) == 0);
120
ptrdiff_t i_offset = i_addr - jmp_rx;
279
+ tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
121
280
+ shift = (offset - 4) & 0xc;
122
/* Note that we asserted this in range in tcg_out_goto_tb. */
281
+ if (shift) {
123
- insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
282
+ tcg_out_vsldoi(s, ret, ret, ret, shift);
124
+ insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
283
+ }
284
+ break;
285
+ case TCG_TYPE_I64:
286
+ if (ret < TCG_REG_V0) {
287
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
288
+ tcg_out_mem_long(s, LD, LDX, ret, base, offset);
289
+ break;
290
+ }
291
+ /* fallthru */
292
+ case TCG_TYPE_V64:
293
+ tcg_debug_assert(ret >= TCG_REG_V0);
294
+ tcg_debug_assert((offset & 7) == 0);
295
+ tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
296
+ if (offset & 8) {
297
+ tcg_out_vsldoi(s, ret, ret, ret, 8);
298
+ }
299
+ break;
300
+ case TCG_TYPE_V128:
301
+ tcg_debug_assert(ret >= TCG_REG_V0);
302
+ tcg_debug_assert((offset & 15) == 0);
303
+ tcg_out_mem_long(s, 0, LVX, ret, base, offset);
304
+ break;
305
+ default:
306
+ g_assert_not_reached();
307
+ }
308
+}
309
+
310
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
311
+ TCGReg base, intptr_t offset)
312
+{
313
+ int shift;
314
+
315
+ switch (type) {
316
+ case TCG_TYPE_I32:
317
+ if (arg < TCG_REG_V0) {
318
+ tcg_out_mem_long(s, STW, STWX, arg, base, offset);
319
+ break;
320
+ }
321
+ tcg_debug_assert((offset & 3) == 0);
322
+ shift = (offset - 4) & 0xc;
323
+ if (shift) {
324
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
325
+ arg = TCG_VEC_TMP1;
326
+ }
327
+ tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
328
+ break;
329
+ case TCG_TYPE_I64:
330
+ if (arg < TCG_REG_V0) {
331
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
332
+ tcg_out_mem_long(s, STD, STDX, arg, base, offset);
333
+ break;
334
+ }
335
+ /* fallthru */
336
+ case TCG_TYPE_V64:
337
+ tcg_debug_assert(arg >= TCG_REG_V0);
338
+ tcg_debug_assert((offset & 7) == 0);
339
+ if (offset & 8) {
340
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
341
+ arg = TCG_VEC_TMP1;
342
+ }
343
+ tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
344
+ tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
345
+ break;
346
+ case TCG_TYPE_V128:
347
+ tcg_debug_assert(arg >= TCG_REG_V0);
348
+ tcg_out_mem_long(s, 0, STVX, arg, base, offset);
349
+ break;
350
+ default:
351
+ g_assert_not_reached();
352
}
125
}
353
- tcg_out_mem_long(s, opi, opx, arg, arg1, arg2);
126
qatomic_set((uint32_t *)jmp_rw, insn);
127
flush_idcache_range(jmp_rx, jmp_rw, 4);
128
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
129
130
case INDEX_op_rem_i64:
131
case INDEX_op_rem_i32:
132
- tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
133
- tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
134
+ tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
135
+ tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
136
break;
137
case INDEX_op_remu_i64:
138
case INDEX_op_remu_i32:
139
- tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
140
- tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
141
+ tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
142
+ tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
143
break;
144
145
case INDEX_op_shl_i64:
146
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
147
if (c2) {
148
tcg_out_rotl(s, ext, a0, a1, a2);
149
} else {
150
- tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
151
- tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
152
+ tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
153
+ tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
154
}
155
break;
156
157
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
158
break;
159
}
160
}
161
- tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
162
- a2 = TCG_VEC_TMP;
163
+ tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
164
+ a2 = TCG_VEC_TMP0;
165
}
166
if (is_scalar) {
167
insn = cmp_scalar_insn[cond];
168
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
169
s->reserved_regs = 0;
170
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
171
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
172
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
173
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
174
- tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
175
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
176
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
354
}
177
}
355
178
356
static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
179
/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
357
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
358
359
int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
360
{
361
- g_assert_not_reached();
362
+ switch (opc) {
363
+ case INDEX_op_and_vec:
364
+ case INDEX_op_or_vec:
365
+ case INDEX_op_xor_vec:
366
+ case INDEX_op_andc_vec:
367
+ case INDEX_op_not_vec:
368
+ return 1;
369
+ case INDEX_op_cmp_vec:
370
+ return vece <= MO_32 ? -1 : 0;
371
+ default:
372
+ return 0;
373
+ }
374
}
375
376
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
377
TCGReg dst, TCGReg src)
378
{
379
- g_assert_not_reached();
380
+ tcg_debug_assert(dst >= TCG_REG_V0);
381
+ tcg_debug_assert(src >= TCG_REG_V0);
382
+
383
+ /*
384
+ * Recall we use (or emulate) VSX integer loads, so the integer is
385
+ * right justified within the left (zero-index) double-word.
386
+ */
387
+ switch (vece) {
388
+ case MO_8:
389
+ tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
390
+ break;
391
+ case MO_16:
392
+ tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
393
+ break;
394
+ case MO_32:
395
+ tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
396
+ break;
397
+ case MO_64:
398
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
399
+ tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
400
+ break;
401
+ default:
402
+ g_assert_not_reached();
403
+ }
404
+ return true;
405
}
406
407
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
408
TCGReg out, TCGReg base, intptr_t offset)
409
{
410
- g_assert_not_reached();
411
+ int elt;
412
+
413
+ tcg_debug_assert(out >= TCG_REG_V0);
414
+ switch (vece) {
415
+ case MO_8:
416
+ tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
417
+ elt = extract32(offset, 0, 4);
418
+#ifndef HOST_WORDS_BIGENDIAN
419
+ elt ^= 15;
420
+#endif
421
+ tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
422
+ break;
423
+ case MO_16:
424
+ tcg_debug_assert((offset & 1) == 0);
425
+ tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
426
+ elt = extract32(offset, 1, 3);
427
+#ifndef HOST_WORDS_BIGENDIAN
428
+ elt ^= 7;
429
+#endif
430
+ tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
431
+ break;
432
+ case MO_32:
433
+ tcg_debug_assert((offset & 3) == 0);
434
+ tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
435
+ elt = extract32(offset, 2, 2);
436
+#ifndef HOST_WORDS_BIGENDIAN
437
+ elt ^= 3;
438
+#endif
439
+ tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
440
+ break;
441
+ case MO_64:
442
+ tcg_debug_assert((offset & 7) == 0);
443
+ tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
444
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
445
+ elt = extract32(offset, 3, 1);
446
+#ifndef HOST_WORDS_BIGENDIAN
447
+ elt = !elt;
448
+#endif
449
+ if (elt) {
450
+ tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
451
+ } else {
452
+ tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
453
+ }
454
+ break;
455
+ default:
456
+ g_assert_not_reached();
457
+ }
458
+ return true;
459
}
460
461
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
462
unsigned vecl, unsigned vece,
463
const TCGArg *args, const int *const_args)
464
{
465
- g_assert_not_reached();
466
+ static const uint32_t
467
+ eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
468
+ gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
469
+ gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 };
470
+
471
+ TCGType type = vecl + TCG_TYPE_V64;
472
+ TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
473
+ uint32_t insn;
474
+
475
+ switch (opc) {
476
+ case INDEX_op_ld_vec:
477
+ tcg_out_ld(s, type, a0, a1, a2);
478
+ return;
479
+ case INDEX_op_st_vec:
480
+ tcg_out_st(s, type, a0, a1, a2);
481
+ return;
482
+ case INDEX_op_dupm_vec:
483
+ tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
484
+ return;
485
+
486
+ case INDEX_op_and_vec:
487
+ insn = VAND;
488
+ break;
489
+ case INDEX_op_or_vec:
490
+ insn = VOR;
491
+ break;
492
+ case INDEX_op_xor_vec:
493
+ insn = VXOR;
494
+ break;
495
+ case INDEX_op_andc_vec:
496
+ insn = VANDC;
497
+ break;
498
+ case INDEX_op_not_vec:
499
+ insn = VNOR;
500
+ a2 = a1;
501
+ break;
502
+
503
+ case INDEX_op_cmp_vec:
504
+ switch (args[3]) {
505
+ case TCG_COND_EQ:
506
+ insn = eq_op[vece];
507
+ break;
508
+ case TCG_COND_GT:
509
+ insn = gts_op[vece];
510
+ break;
511
+ case TCG_COND_GTU:
512
+ insn = gtu_op[vece];
513
+ break;
514
+ default:
515
+ g_assert_not_reached();
516
+ }
517
+ break;
518
+
519
+ case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
520
+ case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
521
+ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
522
+ default:
523
+ g_assert_not_reached();
524
+ }
525
+
526
+ tcg_debug_assert(insn != 0);
527
+ tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
528
+}
529
+
530
+static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
531
+ TCGv_vec v1, TCGv_vec v2, TCGCond cond)
532
+{
533
+ bool need_swap = false, need_inv = false;
534
+
535
+ tcg_debug_assert(vece <= MO_32);
536
+
537
+ switch (cond) {
538
+ case TCG_COND_EQ:
539
+ case TCG_COND_GT:
540
+ case TCG_COND_GTU:
541
+ break;
542
+ case TCG_COND_NE:
543
+ case TCG_COND_LE:
544
+ case TCG_COND_LEU:
545
+ need_inv = true;
546
+ break;
547
+ case TCG_COND_LT:
548
+ case TCG_COND_LTU:
549
+ need_swap = true;
550
+ break;
551
+ case TCG_COND_GE:
552
+ case TCG_COND_GEU:
553
+ need_swap = need_inv = true;
554
+ break;
555
+ default:
556
+ g_assert_not_reached();
557
+ }
558
+
559
+ if (need_inv) {
560
+ cond = tcg_invert_cond(cond);
561
+ }
562
+ if (need_swap) {
563
+ TCGv_vec t1;
564
+ t1 = v1, v1 = v2, v2 = t1;
565
+ cond = tcg_swap_cond(cond);
566
+ }
567
+
568
+ vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
569
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
570
+
571
+ if (need_inv) {
572
+ tcg_gen_not_vec(vece, v0, v0);
573
+ }
574
}
575
576
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
577
TCGArg a0, ...)
578
{
579
- g_assert_not_reached();
580
+ va_list va;
581
+ TCGv_vec v0, v1, v2;
582
+
583
+ va_start(va, a0);
584
+ v0 = temp_tcgv_vec(arg_temp(a0));
585
+ v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
586
+ v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
587
+
588
+ switch (opc) {
589
+ case INDEX_op_cmp_vec:
590
+ expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
591
+ break;
592
+ default:
593
+ g_assert_not_reached();
594
+ }
595
+ va_end(va);
596
}
597
598
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
599
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
600
= { .args_ct_str = { "r", "r", "r", "r", "rI", "rZM" } };
601
static const TCGTargetOpDef sub2
602
= { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } };
603
+ static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
604
+ static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
605
+ static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
606
607
switch (op) {
608
case INDEX_op_goto_ptr:
609
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
610
return (TCG_TARGET_REG_BITS == 64 ? &S_S
611
: TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S);
612
613
+ case INDEX_op_and_vec:
614
+ case INDEX_op_or_vec:
615
+ case INDEX_op_xor_vec:
616
+ case INDEX_op_andc_vec:
617
+ case INDEX_op_orc_vec:
618
+ case INDEX_op_cmp_vec:
619
+ return &v_v_v;
620
+ case INDEX_op_not_vec:
621
+ case INDEX_op_dup_vec:
622
+ return &v_v;
623
+ case INDEX_op_ld_vec:
624
+ case INDEX_op_st_vec:
625
+ case INDEX_op_dupm_vec:
626
+ return &v_r;
627
+
628
default:
629
return NULL;
630
}
631
--
180
--
632
2.17.1
181
2.34.1
633
634
diff view generated by jsdifflib
1
Previously we've been hard-coding knowledge that Power7 has ISEL, but
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
it was an optional instruction before that. Use the AT_HWCAP2 bit,
3
when present, to properly determine support.
4
5
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
tcg/ppc/tcg-target.inc.c | 17 ++++++++++++-----
4
tcg/aarch64/tcg-target.c.inc | 9 +++++++--
9
1 file changed, 12 insertions(+), 5 deletions(-)
5
1 file changed, 7 insertions(+), 2 deletions(-)
10
6
11
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
7
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.inc.c
9
--- a/tcg/aarch64/tcg-target.c.inc
14
+++ b/tcg/ppc/tcg-target.inc.c
10
+++ b/tcg/aarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@
11
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
16
static tcg_insn_unit *tb_ret_addr;
12
17
13
TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
18
TCGPowerISA have_isa;
14
TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
19
-
15
- TCG_REG_X16, TCG_REG_X17,
20
-#define HAVE_ISEL have_isa_2_06
16
21
+static bool have_isel;
17
TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
18
TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
19
20
+ /* X16 reserved as temporary */
21
+ /* X17 reserved as temporary */
22
/* X18 reserved by system */
23
/* X19 reserved for AREG0 */
24
/* X29 reserved as fp */
25
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
26
return TCG_REG_X0 + slot;
27
}
28
29
-#define TCG_REG_TMP0 TCG_REG_X30
30
+#define TCG_REG_TMP0 TCG_REG_X16
31
+#define TCG_REG_TMP1 TCG_REG_X17
32
+#define TCG_REG_TMP2 TCG_REG_X30
33
#define TCG_VEC_TMP0 TCG_REG_V31
22
34
23
#ifndef CONFIG_SOFTMMU
35
#ifndef CONFIG_SOFTMMU
24
#define TCG_GUEST_BASE_REG 30
25
@@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
26
/* If we have ISEL, we can implement everything with 3 or 4 insns.
27
All other cases below are also at least 3 insns, so speed up the
28
code generator by not considering them and always using ISEL. */
29
- if (HAVE_ISEL) {
30
+ if (have_isel) {
31
int isel, tab;
32
33
tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
35
36
tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
37
38
- if (HAVE_ISEL) {
39
+ if (have_isel) {
40
int isel = tcg_to_isel[cond];
41
42
/* Swap the V operands if the operation indicates inversion. */
43
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
44
} else {
45
tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
46
/* Note that the only other valid constant for a2 is 0. */
47
- if (HAVE_ISEL) {
48
+ if (have_isel) {
49
tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
50
tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
51
} else if (!const_a2 && a0 == a2) {
52
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
36
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
53
}
37
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
54
#endif
38
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
55
39
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
56
+#ifdef PPC_FEATURE2_HAS_ISEL
40
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
57
+ /* Prefer explicit instruction from the kernel. */
41
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
58
+ have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0;
42
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
59
+#else
43
}
60
+ /* Fall back to knowing Power7 (2.06) has ISEL. */
61
+ have_isel = have_isa_2_06;
62
+#endif
63
+
64
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
65
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
66
44
67
--
45
--
68
2.17.1
46
2.34.1
69
70
diff view generated by jsdifflib
1
These new instructions are conditional only on MSR.VEC and
1
Adjust the softmmu tlb to use TMP[0-2], not any of the normally available
2
are thus part of the Altivec instruction set, and not VSX.
2
registers. Since we handle overlap betwen inputs and helper arguments,
3
This includes negation and compare not equal.
3
we can allow any allocatable reg.
4
4
5
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/ppc/tcg-target.h | 2 +-
8
tcg/aarch64/tcg-target-con-set.h | 2 --
9
tcg/ppc/tcg-target.inc.c | 23 +++++++++++++++++++++++
9
tcg/aarch64/tcg-target-con-str.h | 1 -
10
2 files changed, 24 insertions(+), 1 deletion(-)
10
tcg/aarch64/tcg-target.c.inc | 45 ++++++++++++++------------------
11
3 files changed, 19 insertions(+), 29 deletions(-)
11
12
12
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
13
diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h
13
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/ppc/tcg-target.h
15
--- a/tcg/aarch64/tcg-target-con-set.h
15
+++ b/tcg/ppc/tcg-target.h
16
+++ b/tcg/aarch64/tcg-target-con-set.h
16
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
17
@@ -XXX,XX +XXX,XX @@
17
#define TCG_TARGET_HAS_andc_vec 1
18
* tcg-target-con-str.h; the constraint combination is inclusive or.
18
#define TCG_TARGET_HAS_orc_vec have_isa_2_07
19
*/
19
#define TCG_TARGET_HAS_not_vec 1
20
C_O0_I1(r)
20
-#define TCG_TARGET_HAS_neg_vec 0
21
-C_O0_I2(lZ, l)
21
+#define TCG_TARGET_HAS_neg_vec have_isa_3_00
22
C_O0_I2(r, rA)
22
#define TCG_TARGET_HAS_abs_vec 0
23
C_O0_I2(rZ, r)
23
#define TCG_TARGET_HAS_shi_vec 0
24
C_O0_I2(w, r)
24
#define TCG_TARGET_HAS_shs_vec 0
25
-C_O1_I1(r, l)
25
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
26
C_O1_I1(r, r)
27
C_O1_I1(w, r)
28
C_O1_I1(w, w)
29
diff --git a/tcg/aarch64/tcg-target-con-str.h b/tcg/aarch64/tcg-target-con-str.h
26
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
27
--- a/tcg/ppc/tcg-target.inc.c
31
--- a/tcg/aarch64/tcg-target-con-str.h
28
+++ b/tcg/ppc/tcg-target.inc.c
32
+++ b/tcg/aarch64/tcg-target-con-str.h
29
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
33
@@ -XXX,XX +XXX,XX @@
30
#define VSUBUWM VX4(1152)
34
* REGS(letter, register_mask)
31
#define VSUBUDM VX4(1216) /* v2.07 */
35
*/
32
36
REGS('r', ALL_GENERAL_REGS)
33
+#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */
37
-REGS('l', ALL_QLDST_REGS)
34
+#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */
38
REGS('w', ALL_VECTOR_REGS)
35
+
39
36
#define VMAXSB VX4(258)
40
/*
37
#define VMAXSH VX4(322)
41
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
38
#define VMAXSW VX4(386)
42
index XXXXXXX..XXXXXXX 100644
39
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
43
--- a/tcg/aarch64/tcg-target.c.inc
40
#define VCMPGTUH VX4(582)
44
+++ b/tcg/aarch64/tcg-target.c.inc
41
#define VCMPGTUW VX4(646)
45
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
42
#define VCMPGTUD VX4(711) /* v2.07 */
46
#define ALL_GENERAL_REGS 0xffffffffu
43
+#define VCMPNEB VX4(7) /* v3.00 */
47
#define ALL_VECTOR_REGS 0xffffffff00000000ull
44
+#define VCMPNEH VX4(71) /* v3.00 */
48
45
+#define VCMPNEW VX4(135) /* v3.00 */
49
-#ifdef CONFIG_SOFTMMU
46
50
-#define ALL_QLDST_REGS \
47
#define VSLB VX4(260)
51
- (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
48
#define VSLH VX4(324)
52
- (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
49
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
53
-#else
50
case INDEX_op_shri_vec:
54
-#define ALL_QLDST_REGS ALL_GENERAL_REGS
51
case INDEX_op_sari_vec:
55
-#endif
52
return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
56
-
53
+ case INDEX_op_neg_vec:
57
/* Match a constant valid for addition (12-bit, optionally shifted). */
54
+ return vece >= MO_32 && have_isa_3_00;
58
static inline bool is_aimm(uint64_t val)
55
case INDEX_op_mul_vec:
59
{
56
switch (vece) {
60
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
57
case MO_8:
61
unsigned s_bits = opc & MO_SIZE;
58
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
62
unsigned s_mask = (1u << s_bits) - 1;
59
static const uint32_t
63
unsigned mem_index = get_mmuidx(oi);
60
add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
64
- TCGReg x3;
61
sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
65
+ TCGReg addr_adj;
62
+ neg_op[4] = { 0, 0, VNEGW, VNEGD },
66
TCGType mask_type;
63
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
67
uint64_t compare_mask;
64
+ ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
68
65
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
69
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
66
gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
70
mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
67
ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
71
? TCG_TYPE_I64 : TCG_TYPE_I32);
68
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
72
69
case INDEX_op_sub_vec:
73
- /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
70
insn = sub_op[vece];
74
+ /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
71
break;
75
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
72
+ case INDEX_op_neg_vec:
76
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
73
+ insn = neg_op[vece];
77
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
74
+ a2 = a1;
78
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
75
+ a1 = 0;
79
- tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
76
+ break;
80
+ tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
77
case INDEX_op_mul_vec:
81
TLB_MASK_TABLE_OFS(mem_index), 1, 0);
78
tcg_debug_assert(vece == MO_32 && have_isa_2_07);
82
79
insn = VMULUWM;
83
/* Extract the TLB index from the address into X0. */
80
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
84
tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
81
case TCG_COND_EQ:
85
- TCG_REG_X0, TCG_REG_X0, addr_reg,
82
insn = eq_op[vece];
86
+ TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
83
break;
87
s->page_bits - CPU_TLB_ENTRY_BITS);
84
+ case TCG_COND_NE:
88
85
+ insn = ne_op[vece];
89
- /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
86
+ break;
90
- tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
87
case TCG_COND_GT:
91
+ /* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */
88
insn = gts_op[vece];
92
+ tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
89
break;
93
90
@@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
94
- /* Load the tlb comparator into X0, and the fast path addend into X1. */
91
case TCG_COND_GTU:
95
- tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X1,
92
break;
96
+ /* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */
93
case TCG_COND_NE:
97
+ tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
94
+ if (have_isa_3_00 && vece <= MO_32) {
98
is_ld ? offsetof(CPUTLBEntry, addr_read)
95
+ break;
99
: offsetof(CPUTLBEntry, addr_write));
96
+ }
100
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
97
+ /* fall through */
101
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
98
case TCG_COND_LE:
102
offsetof(CPUTLBEntry, addend));
99
case TCG_COND_LEU:
103
100
need_inv = true;
104
/*
101
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
105
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
102
case INDEX_op_dup2_vec:
106
* cross pages using the address of the last byte of the access.
103
return &v_v_v;
107
*/
104
case INDEX_op_not_vec:
108
if (a_mask >= s_mask) {
105
+ case INDEX_op_neg_vec:
109
- x3 = addr_reg;
106
case INDEX_op_dup_vec:
110
+ addr_adj = addr_reg;
107
return &v_v;
111
} else {
108
case INDEX_op_ld_vec:
112
+ addr_adj = TCG_REG_TMP2;
113
tcg_out_insn(s, 3401, ADDI, addr_type,
114
- TCG_REG_X3, addr_reg, s_mask - a_mask);
115
- x3 = TCG_REG_X3;
116
+ addr_adj, addr_reg, s_mask - a_mask);
117
}
118
compare_mask = (uint64_t)s->page_mask | a_mask;
119
120
- /* Store the page mask part of the address into X3. */
121
- tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask);
122
+ /* Store the page mask part of the address into TMP2. */
123
+ tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
124
+ addr_adj, compare_mask);
125
126
/* Perform the address comparison. */
127
- tcg_out_cmp(s, addr_type, TCG_REG_X0, TCG_REG_X3, 0);
128
+ tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
129
130
/* If not equal, we jump to the slow path. */
131
ldst->label_ptr[0] = s->code_ptr;
132
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
133
134
- h->base = TCG_REG_X1,
135
+ h->base = TCG_REG_TMP1;
136
h->index = addr_reg;
137
h->index_ext = addr_type;
138
#else
139
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
140
case INDEX_op_qemu_ld_a64_i32:
141
case INDEX_op_qemu_ld_a32_i64:
142
case INDEX_op_qemu_ld_a64_i64:
143
- return C_O1_I1(r, l);
144
+ return C_O1_I1(r, r);
145
case INDEX_op_qemu_st_a32_i32:
146
case INDEX_op_qemu_st_a64_i32:
147
case INDEX_op_qemu_st_a32_i64:
148
case INDEX_op_qemu_st_a64_i64:
149
- return C_O0_I2(lZ, l);
150
+ return C_O0_I2(rZ, r);
151
152
case INDEX_op_deposit_i32:
153
case INDEX_op_deposit_i64:
109
--
154
--
110
2.17.1
155
2.34.1
111
112
diff view generated by jsdifflib
1
For Altivec, this is always an expansion.
1
With FEAT_LSE2, LDP/STP suffices. Without FEAT_LSE2, use LDXP+STXP
2
16-byte atomicity is required and LDP/STP otherwise.
2
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
5
---
6
---
6
tcg/ppc/tcg-target.h | 2 +-
7
tcg/aarch64/tcg-target-con-set.h | 2 +
7
tcg/ppc/tcg-target.opc.h | 8 +++
8
tcg/aarch64/tcg-target.h | 11 ++-
8
tcg/ppc/tcg-target.inc.c | 113 ++++++++++++++++++++++++++++++++++++++-
9
tcg/aarch64/tcg-target.c.inc | 141 ++++++++++++++++++++++++++++++-
9
3 files changed, 121 insertions(+), 2 deletions(-)
10
3 files changed, 151 insertions(+), 3 deletions(-)
10
11
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
12
diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.h
14
--- a/tcg/aarch64/tcg-target-con-set.h
14
+++ b/tcg/ppc/tcg-target.h
15
+++ b/tcg/aarch64/tcg-target-con-set.h
15
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
16
@@ -XXX,XX +XXX,XX @@ C_O0_I1(r)
16
#define TCG_TARGET_HAS_shs_vec 0
17
C_O0_I2(r, rA)
17
#define TCG_TARGET_HAS_shv_vec 1
18
C_O0_I2(rZ, r)
18
#define TCG_TARGET_HAS_cmp_vec 1
19
C_O0_I2(w, r)
19
-#define TCG_TARGET_HAS_mul_vec 0
20
+C_O0_I3(rZ, rZ, r)
20
+#define TCG_TARGET_HAS_mul_vec 1
21
C_O1_I1(r, r)
21
#define TCG_TARGET_HAS_sat_vec 1
22
C_O1_I1(w, r)
22
#define TCG_TARGET_HAS_minmax_vec 1
23
C_O1_I1(w, w)
23
#define TCG_TARGET_HAS_bitsel_vec 0
24
@@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, wO)
24
diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h
25
C_O1_I2(w, w, wZ)
26
C_O1_I3(w, w, w, w)
27
C_O1_I4(r, r, rA, rZ, rZ)
28
+C_O2_I1(r, r, r)
29
C_O2_I4(r, r, rZ, rZ, rA, rMZ)
30
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
25
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/ppc/tcg-target.opc.h
32
--- a/tcg/aarch64/tcg-target.h
27
+++ b/tcg/ppc/tcg-target.opc.h
33
+++ b/tcg/aarch64/tcg-target.h
28
@@ -XXX,XX +XXX,XX @@
34
@@ -XXX,XX +XXX,XX @@ typedef enum {
29
* emitted by tcg_expand_vec_op. For those familiar with GCC internals,
35
#define TCG_TARGET_HAS_muluh_i64 1
30
* consider these to be UNSPEC with names.
36
#define TCG_TARGET_HAS_mulsh_i64 1
31
*/
37
32
+
38
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
33
+DEF(ppc_mrgh_vec, 1, 2, 0, IMPLVEC)
39
+/*
34
+DEF(ppc_mrgl_vec, 1, 2, 0, IMPLVEC)
40
+ * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load,
35
+DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC)
41
+ * which requires writable pages. We must defer to the helper for user-only,
36
+DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC)
42
+ * but in system mode all ram is writable for the host.
37
+DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC)
43
+ */
38
+DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC)
44
+#ifdef CONFIG_USER_ONLY
39
+DEF(ppc_rotl_vec, 1, 2, 0, IMPLVEC)
45
+#define TCG_TARGET_HAS_qemu_ldst_i128 have_lse2
40
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
46
+#else
47
+#define TCG_TARGET_HAS_qemu_ldst_i128 1
48
+#endif
49
50
#define TCG_TARGET_HAS_v64 1
51
#define TCG_TARGET_HAS_v128 1
52
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
41
index XXXXXXX..XXXXXXX 100644
53
index XXXXXXX..XXXXXXX 100644
42
--- a/tcg/ppc/tcg-target.inc.c
54
--- a/tcg/aarch64/tcg-target.c.inc
43
+++ b/tcg/ppc/tcg-target.inc.c
55
+++ b/tcg/aarch64/tcg-target.c.inc
44
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
56
@@ -XXX,XX +XXX,XX @@ typedef enum {
45
#define VSRAB VX4(772)
57
I3305_LDR_v64 = 0x5c000000,
46
#define VSRAH VX4(836)
58
I3305_LDR_v128 = 0x9c000000,
47
#define VSRAW VX4(900)
59
48
+#define VRLB VX4(4)
60
+ /* Load/store exclusive. */
49
+#define VRLH VX4(68)
61
+ I3306_LDXP = 0xc8600000,
50
+#define VRLW VX4(132)
62
+ I3306_STXP = 0xc8200000,
51
+
63
+
52
+#define VMULEUB VX4(520)
64
/* Load/store register. Described here as 3.3.12, but the helper
53
+#define VMULEUH VX4(584)
65
that emits them can transform to 3.3.10 or 3.3.13. */
54
+#define VMULOUB VX4(8)
66
I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
55
+#define VMULOUH VX4(72)
67
@@ -XXX,XX +XXX,XX @@ typedef enum {
56
+#define VMSUMUHM VX4(38)
68
I3406_ADR = 0x10000000,
57
+
69
I3406_ADRP = 0x90000000,
58
+#define VMRGHB VX4(12)
70
59
+#define VMRGHH VX4(76)
71
+ /* Add/subtract extended register instructions. */
60
+#define VMRGHW VX4(140)
72
+ I3501_ADD = 0x0b200000,
61
+#define VMRGLB VX4(268)
73
+
62
+#define VMRGLH VX4(332)
74
/* Add/subtract shifted register instructions (without a shift). */
63
+#define VMRGLW VX4(396)
75
I3502_ADD = 0x0b000000,
64
+
76
I3502_ADDS = 0x2b000000,
65
+#define VPKUHUM VX4(14)
77
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
66
+#define VPKUWUM VX4(78)
78
tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
67
79
}
68
#define VAND VX4(1028)
80
69
#define VANDC VX4(1092)
81
+static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
70
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
82
+ TCGReg rt, TCGReg rt2, TCGReg rn)
71
case INDEX_op_sarv_vec:
83
+{
72
return vece <= MO_32;
84
+ tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
73
case INDEX_op_cmp_vec:
85
+}
74
+ case INDEX_op_mul_vec:
86
+
75
case INDEX_op_shli_vec:
87
static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
76
case INDEX_op_shri_vec:
88
TCGReg rt, int imm19)
77
case INDEX_op_sari_vec:
89
{
78
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
90
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
79
smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
91
tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
80
shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
92
}
81
shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
93
82
- sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 };
94
+static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
83
+ sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 },
95
+ TCGType sf, TCGReg rd, TCGReg rn,
84
+ mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
96
+ TCGReg rm, int opt, int imm3)
85
+ mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
97
+{
86
+ muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 },
98
+ tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
87
+ mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 },
99
+ imm3 << 10 | rn << 5 | rd);
88
+ pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
100
+}
89
+ rotl_op[4] = { VRLB, VRLH, VRLW, 0 };
101
+
90
102
/* This function is for both 3.5.2 (Add/Subtract shifted register), for
91
TCGType type = vecl + TCG_TYPE_V64;
103
the rare occasion when we actually want to supply a shift amount. */
92
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
104
static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
93
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
105
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
94
}
106
TCGType addr_type = s->addr_type;
95
break;
107
TCGLabelQemuLdst *ldst = NULL;
96
108
MemOp opc = get_memop(oi);
97
+ case INDEX_op_ppc_mrgh_vec:
109
+ MemOp s_bits = opc & MO_SIZE;
98
+ insn = mrgh_op[vece];
110
unsigned a_mask;
99
+ break;
111
100
+ case INDEX_op_ppc_mrgl_vec:
112
h->aa = atom_and_align_for_opc(s, opc,
101
+ insn = mrgl_op[vece];
113
have_lse2 ? MO_ATOM_WITHIN16
102
+ break;
114
: MO_ATOM_IFALIGN,
103
+ case INDEX_op_ppc_muleu_vec:
115
- false);
104
+ insn = muleu_op[vece];
116
+ s_bits == MO_128);
105
+ break;
117
a_mask = (1 << h->aa.align) - 1;
106
+ case INDEX_op_ppc_mulou_vec:
118
107
+ insn = mulou_op[vece];
119
#ifdef CONFIG_SOFTMMU
108
+ break;
120
- unsigned s_bits = opc & MO_SIZE;
109
+ case INDEX_op_ppc_pkum_vec:
121
unsigned s_mask = (1u << s_bits) - 1;
110
+ insn = pkum_op[vece];
122
unsigned mem_index = get_mmuidx(oi);
111
+ break;
123
TCGReg addr_adj;
112
+ case INDEX_op_ppc_rotl_vec:
124
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
113
+ insn = rotl_op[vece];
114
+ break;
115
+ case INDEX_op_ppc_msum_vec:
116
+ tcg_debug_assert(vece == MO_16);
117
+ tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
118
+ return;
119
+
120
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
121
case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
122
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
123
@@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
124
}
125
}
125
}
126
}
126
127
127
+static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
128
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
128
+ TCGv_vec v1, TCGv_vec v2)
129
+ TCGReg addr_reg, MemOpIdx oi, bool is_ld)
129
+{
130
+{
130
+ TCGv_vec t1 = tcg_temp_new_vec(type);
131
+ TCGLabelQemuLdst *ldst;
131
+ TCGv_vec t2 = tcg_temp_new_vec(type);
132
+ HostAddress h;
132
+ TCGv_vec t3, t4;
133
+ TCGReg base;
133
+
134
+ bool use_pair;
134
+ switch (vece) {
135
+
135
+ case MO_8:
136
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
136
+ case MO_16:
137
+
137
+ vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
138
+ /* Compose the final address, as LDP/STP have no indexing. */
138
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
139
+ if (h.index == TCG_REG_XZR) {
139
+ vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
140
+ base = h.base;
140
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
141
+ } else {
141
+ vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
142
+ base = TCG_REG_TMP2;
142
+ tcgv_vec_arg(t1), tcgv_vec_arg(t2));
143
+ if (h.index_ext == TCG_TYPE_I32) {
143
+ vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
144
+ /* add base, base, index, uxtw */
144
+ tcgv_vec_arg(t1), tcgv_vec_arg(t2));
145
+ tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
145
+ vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
146
+ h.base, h.index, MO_32, 0);
146
+ tcgv_vec_arg(v0), tcgv_vec_arg(t1));
147
+ } else {
147
+    break;
148
+ /* add base, base, index */
148
+
149
+ tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
149
+ case MO_32:
150
+ }
150
+ t3 = tcg_temp_new_vec(type);
151
+ }
151
+ t4 = tcg_temp_new_vec(type);
152
+
152
+ tcg_gen_dupi_vec(MO_8, t4, -16);
153
+ use_pair = h.aa.atom < MO_128 || have_lse2;
153
+ vec_gen_3(INDEX_op_ppc_rotl_vec, type, MO_32, tcgv_vec_arg(t1),
154
+
154
+ tcgv_vec_arg(v2), tcgv_vec_arg(t4));
155
+ if (!use_pair) {
155
+ vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
156
+ tcg_insn_unit *branch = NULL;
156
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
157
+ TCGReg ll, lh, sl, sh;
157
+ tcg_gen_dupi_vec(MO_8, t3, 0);
158
+
158
+ vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t3),
159
+ /*
159
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(t3));
160
+ * If we have already checked for 16-byte alignment, that's all
160
+ vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t3),
161
+ * we need. Otherwise we have determined that misaligned atomicity
161
+ tcgv_vec_arg(t3), tcgv_vec_arg(t4));
162
+ * may be handled with two 8-byte loads.
162
+ tcg_gen_add_vec(MO_32, v0, t2, t3);
163
+ */
163
+ tcg_temp_free_vec(t3);
164
+ if (h.aa.align < MO_128) {
164
+ tcg_temp_free_vec(t4);
165
+ /*
166
+ * TODO: align should be MO_64, so we only need test bit 3,
167
+ * which means we could use TBNZ instead of ANDS+B_C.
168
+ */
169
+ tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
170
+ branch = s->code_ptr;
171
+ tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
172
+ use_pair = true;
173
+ }
174
+
175
+ if (is_ld) {
176
+ /*
177
+ * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
178
+ * ldxp lo, hi, [base]
179
+ * stxp t0, lo, hi, [base]
180
+ * cbnz t0, .-8
181
+ * Require no overlap between data{lo,hi} and base.
182
+ */
183
+ if (datalo == base || datahi == base) {
184
+ tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
185
+ base = TCG_REG_TMP2;
186
+ }
187
+ ll = sl = datalo;
188
+ lh = sh = datahi;
189
+ } else {
190
+ /*
191
+ * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
192
+ * 1: ldxp t0, t1, [base]
193
+ * stxp t0, lo, hi, [base]
194
+ * cbnz t0, 1b
195
+ */
196
+ tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
197
+ ll = TCG_REG_TMP0;
198
+ lh = TCG_REG_TMP1;
199
+ sl = datalo;
200
+ sh = datahi;
201
+ }
202
+
203
+ tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
204
+ tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
205
+ tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
206
+
207
+ if (use_pair) {
208
+ /* "b .+8", branching across the one insn of use_pair. */
209
+ tcg_out_insn(s, 3206, B, 2);
210
+ reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
211
+ }
212
+ }
213
+
214
+ if (use_pair) {
215
+ if (is_ld) {
216
+ tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
217
+ } else {
218
+ tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
219
+ }
220
+ }
221
+
222
+ if (ldst) {
223
+ ldst->type = TCG_TYPE_I128;
224
+ ldst->datalo_reg = datalo;
225
+ ldst->datahi_reg = datahi;
226
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
227
+ }
228
+}
229
+
230
static const tcg_insn_unit *tb_ret_addr;
231
232
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
233
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
234
case INDEX_op_qemu_st_a64_i64:
235
tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
236
break;
237
+ case INDEX_op_qemu_ld_a32_i128:
238
+ case INDEX_op_qemu_ld_a64_i128:
239
+ tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
165
+ break;
240
+ break;
166
+
241
+ case INDEX_op_qemu_st_a32_i128:
167
+ default:
242
+ case INDEX_op_qemu_st_a64_i128:
168
+ g_assert_not_reached();
243
+ tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
169
+ }
170
+ tcg_temp_free_vec(t1);
171
+ tcg_temp_free_vec(t2);
172
+}
173
+
174
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
175
TCGArg a0, ...)
176
{
177
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
178
v2 = temp_tcgv_vec(arg_temp(a2));
179
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
180
break;
181
+ case INDEX_op_mul_vec:
182
+ v2 = temp_tcgv_vec(arg_temp(a2));
183
+ expand_vec_mul(type, vece, v0, v1, v2);
184
+ break;
244
+ break;
185
default:
245
186
g_assert_not_reached();
246
case INDEX_op_bswap64_i64:
187
}
247
tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
188
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
248
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
189
static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
249
case INDEX_op_qemu_ld_a32_i64:
190
static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
250
case INDEX_op_qemu_ld_a64_i64:
191
static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
251
return C_O1_I1(r, r);
192
+ static const TCGTargetOpDef v_v_v_v
252
+ case INDEX_op_qemu_ld_a32_i128:
193
+ = { .args_ct_str = { "v", "v", "v", "v" } };
253
+ case INDEX_op_qemu_ld_a64_i128:
194
254
+ return C_O2_I1(r, r, r);
195
switch (op) {
255
case INDEX_op_qemu_st_a32_i32:
196
case INDEX_op_goto_ptr:
256
case INDEX_op_qemu_st_a64_i32:
197
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
257
case INDEX_op_qemu_st_a32_i64:
198
258
case INDEX_op_qemu_st_a64_i64:
199
case INDEX_op_add_vec:
259
return C_O0_I2(rZ, r);
200
case INDEX_op_sub_vec:
260
+ case INDEX_op_qemu_st_a32_i128:
201
+ case INDEX_op_mul_vec:
261
+ case INDEX_op_qemu_st_a64_i128:
202
case INDEX_op_and_vec:
262
+ return C_O0_I3(rZ, rZ, r);
203
case INDEX_op_or_vec:
263
204
case INDEX_op_xor_vec:
264
case INDEX_op_deposit_i32:
205
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
265
case INDEX_op_deposit_i64:
206
case INDEX_op_shlv_vec:
207
case INDEX_op_shrv_vec:
208
case INDEX_op_sarv_vec:
209
+ case INDEX_op_ppc_mrgh_vec:
210
+ case INDEX_op_ppc_mrgl_vec:
211
+ case INDEX_op_ppc_muleu_vec:
212
+ case INDEX_op_ppc_mulou_vec:
213
+ case INDEX_op_ppc_pkum_vec:
214
+ case INDEX_op_ppc_rotl_vec:
215
return &v_v_v;
216
case INDEX_op_not_vec:
217
case INDEX_op_dup_vec:
218
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
219
case INDEX_op_st_vec:
220
case INDEX_op_dupm_vec:
221
return &v_r;
222
+ case INDEX_op_ppc_msum_vec:
223
+ return &v_v_v_v;
224
225
default:
226
return NULL;
227
--
266
--
228
2.17.1
267
2.34.1
229
230
diff view generated by jsdifflib
1
These new instructions are conditional only on MSR.VEC and
1
Use LQ/STQ with ISA v2.07, and 16-byte atomicity is required.
2
are thus part of the Altivec instruction set, and not VSX.
2
Note that these instructions do not require 16-byte alignment.
3
This includes lots of double-word arithmetic and a few extra
4
logical operations.
5
3
6
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
4
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
tcg/ppc/tcg-target.h | 4 +-
7
tcg/ppc/tcg-target-con-set.h | 2 +
10
tcg/ppc/tcg-target.inc.c | 85 ++++++++++++++++++++++++++++++----------
8
tcg/ppc/tcg-target-con-str.h | 1 +
11
2 files changed, 67 insertions(+), 22 deletions(-)
9
tcg/ppc/tcg-target.h | 3 +-
10
tcg/ppc/tcg-target.c.inc | 108 +++++++++++++++++++++++++++++++----
11
4 files changed, 101 insertions(+), 13 deletions(-)
12
12
13
diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/ppc/tcg-target-con-set.h
16
+++ b/tcg/ppc/tcg-target-con-set.h
17
@@ -XXX,XX +XXX,XX @@ C_O0_I2(r, r)
18
C_O0_I2(r, ri)
19
C_O0_I2(v, r)
20
C_O0_I3(r, r, r)
21
+C_O0_I3(o, m, r)
22
C_O0_I4(r, r, ri, ri)
23
C_O0_I4(r, r, r, r)
24
C_O1_I1(r, r)
25
@@ -XXX,XX +XXX,XX @@ C_O1_I3(v, v, v, v)
26
C_O1_I4(r, r, ri, rZ, rZ)
27
C_O1_I4(r, r, r, ri, ri)
28
C_O2_I1(r, r, r)
29
+C_O2_I1(o, m, r)
30
C_O2_I2(r, r, r, r)
31
C_O2_I4(r, r, rI, rZM, r, r)
32
C_O2_I4(r, r, r, r, rI, rZM)
33
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/tcg/ppc/tcg-target-con-str.h
36
+++ b/tcg/ppc/tcg-target-con-str.h
37
@@ -XXX,XX +XXX,XX @@
38
* REGS(letter, register_mask)
39
*/
40
REGS('r', ALL_GENERAL_REGS)
41
+REGS('o', ALL_GENERAL_REGS & 0xAAAAAAAAu) /* odd registers */
42
REGS('v', ALL_VECTOR_REGS)
43
44
/*
13
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
45
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
14
index XXXXXXX..XXXXXXX 100644
46
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/ppc/tcg-target.h
47
--- a/tcg/ppc/tcg-target.h
16
+++ b/tcg/ppc/tcg-target.h
48
+++ b/tcg/ppc/tcg-target.h
17
@@ -XXX,XX +XXX,XX @@ typedef enum {
18
typedef enum {
19
tcg_isa_base,
20
tcg_isa_2_06,
21
+ tcg_isa_2_07,
22
tcg_isa_3_00,
23
} TCGPowerISA;
24
25
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
26
extern bool have_vsx;
27
28
#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
29
+#define have_isa_2_07 (have_isa >= tcg_isa_2_07)
30
#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
31
32
/* optional instructions automatically implemented */
33
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
49
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
34
#define TCG_TARGET_HAS_v256 0
50
#define TCG_TARGET_HAS_mulsh_i64 1
35
51
#endif
36
#define TCG_TARGET_HAS_andc_vec 1
52
37
-#define TCG_TARGET_HAS_orc_vec 0
53
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
38
+#define TCG_TARGET_HAS_orc_vec have_isa_2_07
54
+#define TCG_TARGET_HAS_qemu_ldst_i128 \
39
#define TCG_TARGET_HAS_not_vec 1
55
+ (TCG_TARGET_REG_BITS == 64 && have_isa_2_07)
40
#define TCG_TARGET_HAS_neg_vec 0
56
41
#define TCG_TARGET_HAS_abs_vec 0
57
/*
42
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
58
* While technically Altivec could support V64, it has no 64-bit store
43
index XXXXXXX..XXXXXXX 100644
59
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
44
--- a/tcg/ppc/tcg-target.inc.c
60
index XXXXXXX..XXXXXXX 100644
45
+++ b/tcg/ppc/tcg-target.inc.c
61
--- a/tcg/ppc/tcg-target.c.inc
46
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
62
+++ b/tcg/ppc/tcg-target.c.inc
47
#define VADDSWS VX4(896)
63
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
48
#define VADDUWS VX4(640)
64
49
#define VADDUWM VX4(128)
65
#define B OPCD( 18)
50
+#define VADDUDM VX4(192) /* v2.07 */
66
#define BC OPCD( 16)
51
67
+
52
#define VSUBSBS VX4(1792)
68
#define LBZ OPCD( 34)
53
#define VSUBUBS VX4(1536)
69
#define LHZ OPCD( 40)
54
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
70
#define LHA OPCD( 42)
55
#define VSUBSWS VX4(1920)
71
#define LWZ OPCD( 32)
56
#define VSUBUWS VX4(1664)
72
#define LWZUX XO31( 55)
57
#define VSUBUWM VX4(1152)
73
-#define STB OPCD( 38)
58
+#define VSUBUDM VX4(1216) /* v2.07 */
74
-#define STH OPCD( 44)
59
75
-#define STW OPCD( 36)
60
#define VMAXSB VX4(258)
76
-
61
#define VMAXSH VX4(322)
77
-#define STD XO62( 0)
62
#define VMAXSW VX4(386)
78
-#define STDU XO62( 1)
63
+#define VMAXSD VX4(450) /* v2.07 */
79
-#define STDX XO31(149)
64
#define VMAXUB VX4(2)
80
-
65
#define VMAXUH VX4(66)
81
#define LD XO58( 0)
66
#define VMAXUW VX4(130)
82
#define LDX XO31( 21)
67
+#define VMAXUD VX4(194) /* v2.07 */
83
#define LDU XO58( 1)
68
#define VMINSB VX4(770)
84
#define LDUX XO31( 53)
69
#define VMINSH VX4(834)
85
#define LWA XO58( 2)
70
#define VMINSW VX4(898)
86
#define LWAX XO31(341)
71
+#define VMINSD VX4(962) /* v2.07 */
87
+#define LQ OPCD( 56)
72
#define VMINUB VX4(514)
88
+
73
#define VMINUH VX4(578)
89
+#define STB OPCD( 38)
74
#define VMINUW VX4(642)
90
+#define STH OPCD( 44)
75
+#define VMINUD VX4(706) /* v2.07 */
91
+#define STW OPCD( 36)
76
92
+#define STD XO62( 0)
77
#define VCMPEQUB VX4(6)
93
+#define STDU XO62( 1)
78
#define VCMPEQUH VX4(70)
94
+#define STDX XO31(149)
79
#define VCMPEQUW VX4(134)
95
+#define STQ XO62( 2)
80
+#define VCMPEQUD VX4(199) /* v2.07 */
96
81
#define VCMPGTSB VX4(774)
97
#define ADDIC OPCD( 12)
82
#define VCMPGTSH VX4(838)
98
#define ADDI OPCD( 14)
83
#define VCMPGTSW VX4(902)
99
@@ -XXX,XX +XXX,XX @@ typedef struct {
84
+#define VCMPGTSD VX4(967) /* v2.07 */
100
85
#define VCMPGTUB VX4(518)
101
bool tcg_target_has_memory_bswap(MemOp memop)
86
#define VCMPGTUH VX4(582)
102
{
87
#define VCMPGTUW VX4(646)
103
- return true;
88
+#define VCMPGTUD VX4(711) /* v2.07 */
104
+ TCGAtomAlign aa;
89
105
+
90
#define VSLB VX4(260)
106
+ if ((memop & MO_SIZE) <= MO_64) {
91
#define VSLH VX4(324)
107
+ return true;
92
#define VSLW VX4(388)
108
+ }
93
+#define VSLD VX4(1476) /* v2.07 */
109
+
94
#define VSRB VX4(516)
110
+ /*
95
#define VSRH VX4(580)
111
+ * Reject 16-byte memop with 16-byte atomicity,
96
#define VSRW VX4(644)
112
+ * but do allow a pair of 64-bit operations.
97
+#define VSRD VX4(1732) /* v2.07 */
113
+ */
98
#define VSRAB VX4(772)
114
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
99
#define VSRAH VX4(836)
115
+ return aa.atom <= MO_64;
100
#define VSRAW VX4(900)
116
}
101
+#define VSRAD VX4(964) /* v2.07 */
117
102
#define VRLB VX4(4)
118
/*
103
#define VRLH VX4(68)
119
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
104
#define VRLW VX4(132)
120
{
105
+#define VRLD VX4(196) /* v2.07 */
121
TCGLabelQemuLdst *ldst = NULL;
106
122
MemOp opc = get_memop(oi);
107
#define VMULEUB VX4(520)
123
- MemOp a_bits;
108
#define VMULEUH VX4(584)
124
+ MemOp a_bits, s_bits;
109
+#define VMULEUW VX4(648) /* v2.07 */
125
110
#define VMULOUB VX4(8)
126
/*
111
#define VMULOUH VX4(72)
127
* Book II, Section 1.4, Single-Copy Atomicity, specifies:
112
+#define VMULOUW VX4(136) /* v2.07 */
128
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
113
+#define VMULUWM VX4(137) /* v2.07 */
129
* As of 3.0, "the non-atomic access is performed as described in
114
#define VMSUMUHM VX4(38)
130
* the corresponding list", which matches MO_ATOM_SUBALIGN.
115
131
*/
116
#define VMRGHB VX4(12)
132
+ s_bits = opc & MO_SIZE;
117
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
133
h->aa = atom_and_align_for_opc(s, opc,
118
#define VNOR VX4(1284)
134
have_isa_3_00 ? MO_ATOM_SUBALIGN
119
#define VOR VX4(1156)
135
: MO_ATOM_IFALIGN,
120
#define VXOR VX4(1220)
136
- false);
121
+#define VEQV VX4(1668) /* v2.07 */
137
+ s_bits == MO_128);
122
+#define VNAND VX4(1412) /* v2.07 */
138
a_bits = h->aa.align;
123
+#define VORC VX4(1348) /* v2.07 */
139
124
140
#ifdef CONFIG_SOFTMMU
125
#define VSPLTB VX4(524)
141
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
126
#define VSPLTH VX4(588)
142
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
127
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
143
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
128
case INDEX_op_andc_vec:
144
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
129
case INDEX_op_not_vec:
145
- unsigned s_bits = opc & MO_SIZE;
130
return 1;
146
131
+ case INDEX_op_orc_vec:
147
ldst = new_ldst_label(s);
132
+ return have_isa_2_07;
148
ldst->is_ld = is_ld;
149
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
150
}
151
}
152
153
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
154
+ TCGReg addr_reg, MemOpIdx oi, bool is_ld)
155
+{
156
+ TCGLabelQemuLdst *ldst;
157
+ HostAddress h;
158
+ bool need_bswap;
159
+ uint32_t insn;
160
+ TCGReg index;
161
+
162
+ ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld);
163
+
164
+ /* Compose the final address, as LQ/STQ have no indexing. */
165
+ index = h.index;
166
+ if (h.base != 0) {
167
+ index = TCG_REG_TMP1;
168
+ tcg_out32(s, ADD | TAB(index, h.base, h.index));
169
+ }
170
+ need_bswap = get_memop(oi) & MO_BSWAP;
171
+
172
+ if (h.aa.atom == MO_128) {
173
+ tcg_debug_assert(!need_bswap);
174
+ tcg_debug_assert(datalo & 1);
175
+ tcg_debug_assert(datahi == datalo - 1);
176
+ insn = is_ld ? LQ : STQ;
177
+ tcg_out32(s, insn | TAI(datahi, index, 0));
178
+ } else {
179
+ TCGReg d1, d2;
180
+
181
+ if (HOST_BIG_ENDIAN ^ need_bswap) {
182
+ d1 = datahi, d2 = datalo;
183
+ } else {
184
+ d1 = datalo, d2 = datahi;
185
+ }
186
+
187
+ if (need_bswap) {
188
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
189
+ insn = is_ld ? LDBRX : STDBRX;
190
+ tcg_out32(s, insn | TAB(d1, 0, index));
191
+ tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
192
+ } else {
193
+ insn = is_ld ? LD : STD;
194
+ tcg_out32(s, insn | TAI(d1, index, 0));
195
+ tcg_out32(s, insn | TAI(d2, index, 8));
196
+ }
197
+ }
198
+
199
+ if (ldst) {
200
+ ldst->type = TCG_TYPE_I128;
201
+ ldst->datalo_reg = datalo;
202
+ ldst->datahi_reg = datahi;
203
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
204
+ }
205
+}
206
+
207
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
208
{
209
int i;
210
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
211
args[4], TCG_TYPE_I64);
212
}
213
break;
214
+ case INDEX_op_qemu_ld_a32_i128:
215
+ case INDEX_op_qemu_ld_a64_i128:
216
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
217
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
218
+ break;
219
220
case INDEX_op_qemu_st_a64_i32:
221
if (TCG_TARGET_REG_BITS == 32) {
222
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
223
args[4], TCG_TYPE_I64);
224
}
225
break;
226
+ case INDEX_op_qemu_st_a32_i128:
227
+ case INDEX_op_qemu_st_a64_i128:
228
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
229
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
230
+ break;
231
232
case INDEX_op_setcond_i32:
233
tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
234
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
235
case INDEX_op_qemu_st_a64_i64:
236
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
237
238
+ case INDEX_op_qemu_ld_a32_i128:
239
+ case INDEX_op_qemu_ld_a64_i128:
240
+ return C_O2_I1(o, m, r);
241
+ case INDEX_op_qemu_st_a32_i128:
242
+ case INDEX_op_qemu_st_a64_i128:
243
+ return C_O0_I3(o, m, r);
244
+
133
case INDEX_op_add_vec:
245
case INDEX_op_add_vec:
134
case INDEX_op_sub_vec:
246
case INDEX_op_sub_vec:
135
case INDEX_op_smax_vec:
247
case INDEX_op_mul_vec:
136
case INDEX_op_smin_vec:
137
case INDEX_op_umax_vec:
138
case INDEX_op_umin_vec:
139
+ case INDEX_op_shlv_vec:
140
+ case INDEX_op_shrv_vec:
141
+ case INDEX_op_sarv_vec:
142
+ return vece <= MO_32 || have_isa_2_07;
143
case INDEX_op_ssadd_vec:
144
case INDEX_op_sssub_vec:
145
case INDEX_op_usadd_vec:
146
case INDEX_op_ussub_vec:
147
- case INDEX_op_shlv_vec:
148
- case INDEX_op_shrv_vec:
149
- case INDEX_op_sarv_vec:
150
return vece <= MO_32;
151
case INDEX_op_cmp_vec:
152
- case INDEX_op_mul_vec:
153
case INDEX_op_shli_vec:
154
case INDEX_op_shri_vec:
155
case INDEX_op_sari_vec:
156
- return vece <= MO_32 ? -1 : 0;
157
+ return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
158
+ case INDEX_op_mul_vec:
159
+ switch (vece) {
160
+ case MO_8:
161
+ case MO_16:
162
+ return -1;
163
+ case MO_32:
164
+ return have_isa_2_07 ? 1 : -1;
165
+ }
166
+ return 0;
167
case INDEX_op_bitsel_vec:
168
return have_vsx;
169
default:
170
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
171
const TCGArg *args, const int *const_args)
172
{
173
static const uint32_t
174
- add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 },
175
- sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 },
176
- eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
177
- gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
178
- gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
179
+ add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
180
+ sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
181
+ eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
182
+ gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
183
+ gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
184
ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
185
usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
186
sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
187
ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
188
- umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
189
- smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
190
- umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
191
- smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
192
- shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
193
- shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
194
- sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 },
195
+ umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
196
+ smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
197
+ umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
198
+ smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
199
+ shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
200
+ shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
201
+ sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
202
mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
203
mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
204
- muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 },
205
- mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 },
206
+ muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
207
+ mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
208
pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
209
- rotl_op[4] = { VRLB, VRLH, VRLW, 0 };
210
+ rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
211
212
TCGType type = vecl + TCG_TYPE_V64;
213
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
214
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
215
case INDEX_op_sub_vec:
216
insn = sub_op[vece];
217
break;
218
+ case INDEX_op_mul_vec:
219
+ tcg_debug_assert(vece == MO_32 && have_isa_2_07);
220
+ insn = VMULUWM;
221
+ break;
222
case INDEX_op_ssadd_vec:
223
insn = ssadd_op[vece];
224
break;
225
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
226
insn = VNOR;
227
a2 = a1;
228
break;
229
+ case INDEX_op_orc_vec:
230
+ insn = VORC;
231
+ break;
232
233
case INDEX_op_cmp_vec:
234
switch (args[3]) {
235
@@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
236
{
237
bool need_swap = false, need_inv = false;
238
239
- tcg_debug_assert(vece <= MO_32);
240
+ tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
241
242
switch (cond) {
243
case TCG_COND_EQ:
244
@@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
245
    break;
246
247
case MO_32:
248
+ tcg_debug_assert(!have_isa_2_07);
249
t3 = tcg_temp_new_vec(type);
250
t4 = tcg_temp_new_vec(type);
251
tcg_gen_dupi_vec(MO_8, t4, -16);
252
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
253
if (hwcap & PPC_FEATURE_ARCH_2_06) {
254
have_isa = tcg_isa_2_06;
255
}
256
+#ifdef PPC_FEATURE2_ARCH_2_07
257
+ if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
258
+ have_isa = tcg_isa_2_07;
259
+ }
260
+#endif
261
#ifdef PPC_FEATURE2_ARCH_3_00
262
if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
263
have_isa = tcg_isa_3_00;
264
--
248
--
265
2.17.1
249
2.34.1
266
267
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
Use LPQ/STPQ when 16-byte atomicity is required.
2
Note that these instructions require 16-byte alignment.
2
3
3
qemu_cpu_kick is used for a number of reasons including to indicate
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
there is work to be done. However when thread=single the old
5
qemu_cpu_kick_rr_cpu only advanced the vCPU to the next executing one
6
which can lead to a hang in the case that:
7
8
a) the kick is from outside the vCPUs (e.g. iothread)
9
b) the timers are paused (i.e. iothread calling run_on_cpu)
10
11
To avoid this lets split qemu_cpu_kick_rr into two functions. One for
12
the timer which continues to advance to the next timeslice and another
13
for all other kicks.
14
15
Message-Id: <20191001160426.26644-1-alex.bennee@linaro.org>
16
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
17
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
18
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
19
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
20
---
6
---
21
cpus.c | 24 ++++++++++++++++++------
7
tcg/s390x/tcg-target-con-set.h | 2 +
22
1 file changed, 18 insertions(+), 6 deletions(-)
8
tcg/s390x/tcg-target.h | 2 +-
9
tcg/s390x/tcg-target.c.inc | 107 ++++++++++++++++++++++++++++++++-
10
3 files changed, 107 insertions(+), 4 deletions(-)
23
11
24
diff --git a/cpus.c b/cpus.c
12
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
25
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
26
--- a/cpus.c
14
--- a/tcg/s390x/tcg-target-con-set.h
27
+++ b/cpus.c
15
+++ b/tcg/s390x/tcg-target-con-set.h
28
@@ -XXX,XX +XXX,XX @@ static inline int64_t qemu_tcg_next_kick(void)
16
@@ -XXX,XX +XXX,XX @@ C_O0_I2(r, r)
29
return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
17
C_O0_I2(r, ri)
18
C_O0_I2(r, rA)
19
C_O0_I2(v, r)
20
+C_O0_I3(o, m, r)
21
C_O1_I1(r, r)
22
C_O1_I1(v, r)
23
C_O1_I1(v, v)
24
@@ -XXX,XX +XXX,XX @@ C_O1_I2(v, v, v)
25
C_O1_I3(v, v, v, v)
26
C_O1_I4(r, r, ri, rI, r)
27
C_O1_I4(r, r, rA, rI, r)
28
+C_O2_I1(o, m, r)
29
C_O2_I2(o, m, 0, r)
30
C_O2_I2(o, m, r, r)
31
C_O2_I3(o, m, 0, 1, r)
32
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/s390x/tcg-target.h
35
+++ b/tcg/s390x/tcg-target.h
36
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
37
#define TCG_TARGET_HAS_muluh_i64 0
38
#define TCG_TARGET_HAS_mulsh_i64 0
39
40
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
41
+#define TCG_TARGET_HAS_qemu_ldst_i128 1
42
43
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
44
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
45
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/s390x/tcg-target.c.inc
48
+++ b/tcg/s390x/tcg-target.c.inc
49
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
50
RXY_LLGF = 0xe316,
51
RXY_LLGH = 0xe391,
52
RXY_LMG = 0xeb04,
53
+ RXY_LPQ = 0xe38f,
54
RXY_LRV = 0xe31e,
55
RXY_LRVG = 0xe30f,
56
RXY_LRVH = 0xe31f,
57
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
58
RXY_STG = 0xe324,
59
RXY_STHY = 0xe370,
60
RXY_STMG = 0xeb24,
61
+ RXY_STPQ = 0xe38e,
62
RXY_STRV = 0xe33e,
63
RXY_STRVG = 0xe32f,
64
RXY_STRVH = 0xe33f,
65
@@ -XXX,XX +XXX,XX @@ typedef struct {
66
67
bool tcg_target_has_memory_bswap(MemOp memop)
68
{
69
- return true;
70
+ TCGAtomAlign aa;
71
+
72
+ if ((memop & MO_SIZE) <= MO_64) {
73
+ return true;
74
+ }
75
+
76
+ /*
77
+ * Reject 16-byte memop with 16-byte atomicity,
78
+ * but do allow a pair of 64-bit operations.
79
+ */
80
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
81
+ return aa.atom <= MO_64;
30
}
82
}
31
83
32
-/* Kick the currently round-robin scheduled vCPU */
84
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
33
-static void qemu_cpu_kick_rr_cpu(void)
85
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
34
+/* Kick the currently round-robin scheduled vCPU to next */
35
+static void qemu_cpu_kick_rr_next_cpu(void)
36
{
86
{
37
CPUState *cpu;
87
TCGLabelQemuLdst *ldst = NULL;
38
do {
88
MemOp opc = get_memop(oi);
39
@@ -XXX,XX +XXX,XX @@ static void qemu_cpu_kick_rr_cpu(void)
89
+ MemOp s_bits = opc & MO_SIZE;
40
} while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
90
unsigned a_mask;
91
92
- h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
93
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
94
a_mask = (1 << h->aa.align) - 1;
95
96
#ifdef CONFIG_SOFTMMU
97
- unsigned s_bits = opc & MO_SIZE;
98
unsigned s_mask = (1 << s_bits) - 1;
99
int mem_index = get_mmuidx(oi);
100
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
101
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
102
}
41
}
103
}
42
104
43
+/* Kick all RR vCPUs */
105
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
44
+static void qemu_cpu_kick_rr_cpus(void)
106
+ TCGReg addr_reg, MemOpIdx oi, bool is_ld)
45
+{
107
+{
46
+ CPUState *cpu;
108
+ TCGLabel *l1 = NULL, *l2 = NULL;
47
+
109
+ TCGLabelQemuLdst *ldst;
48
+ CPU_FOREACH(cpu) {
110
+ HostAddress h;
49
+ cpu_exit(cpu);
111
+ bool need_bswap;
50
+ };
112
+ bool use_pair;
113
+ S390Opcode insn;
114
+
115
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
116
+
117
+ use_pair = h.aa.atom < MO_128;
118
+ need_bswap = get_memop(oi) & MO_BSWAP;
119
+
120
+ if (!use_pair) {
121
+ /*
122
+ * Atomicity requires we use LPQ. If we've already checked for
123
+ * 16-byte alignment, that's all we need. If we arrive with
124
+ * lesser alignment, we have determined that less than 16-byte
125
+ * alignment can be satisfied with two 8-byte loads.
126
+ */
127
+ if (h.aa.align < MO_128) {
128
+ use_pair = true;
129
+ l1 = gen_new_label();
130
+ l2 = gen_new_label();
131
+
132
+ tcg_out_insn(s, RI, TMLL, addr_reg, 15);
133
+ tgen_branch(s, 7, l1); /* CC in {1,2,3} */
134
+ }
135
+
136
+ tcg_debug_assert(!need_bswap);
137
+ tcg_debug_assert(datalo & 1);
138
+ tcg_debug_assert(datahi == datalo - 1);
139
+ insn = is_ld ? RXY_LPQ : RXY_STPQ;
140
+ tcg_out_insn_RXY(s, insn, datahi, h.base, h.index, h.disp);
141
+
142
+ if (use_pair) {
143
+ tgen_branch(s, S390_CC_ALWAYS, l2);
144
+ tcg_out_label(s, l1);
145
+ }
146
+ }
147
+ if (use_pair) {
148
+ TCGReg d1, d2;
149
+
150
+ if (need_bswap) {
151
+ d1 = datalo, d2 = datahi;
152
+ insn = is_ld ? RXY_LRVG : RXY_STRVG;
153
+ } else {
154
+ d1 = datahi, d2 = datalo;
155
+ insn = is_ld ? RXY_LG : RXY_STG;
156
+ }
157
+
158
+ if (h.base == d1 || h.index == d1) {
159
+ tcg_out_insn(s, RXY, LAY, TCG_TMP0, h.base, h.index, h.disp);
160
+ h.base = TCG_TMP0;
161
+ h.index = TCG_REG_NONE;
162
+ h.disp = 0;
163
+ }
164
+ tcg_out_insn_RXY(s, insn, d1, h.base, h.index, h.disp);
165
+ tcg_out_insn_RXY(s, insn, d2, h.base, h.index, h.disp + 8);
166
+ }
167
+ if (l2) {
168
+ tcg_out_label(s, l2);
169
+ }
170
+
171
+ if (ldst) {
172
+ ldst->type = TCG_TYPE_I128;
173
+ ldst->datalo_reg = datalo;
174
+ ldst->datahi_reg = datahi;
175
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
176
+ }
51
+}
177
+}
52
+
178
+
53
static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
179
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
54
{
180
{
55
}
181
/* Reuse the zeroing that exists for goto_ptr. */
56
@@ -XXX,XX +XXX,XX @@ void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
182
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
57
static void kick_tcg_thread(void *opaque)
183
case INDEX_op_qemu_st_a64_i64:
58
{
184
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
59
timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
185
break;
60
- qemu_cpu_kick_rr_cpu();
186
+ case INDEX_op_qemu_ld_a32_i128:
61
+ qemu_cpu_kick_rr_next_cpu();
187
+ case INDEX_op_qemu_ld_a64_i128:
62
}
188
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
63
189
+ break;
64
static void start_tcg_kick_timer(void)
190
+ case INDEX_op_qemu_st_a32_i128:
65
@@ -XXX,XX +XXX,XX @@ void qemu_cpu_kick(CPUState *cpu)
191
+ case INDEX_op_qemu_st_a64_i128:
66
{
192
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
67
qemu_cond_broadcast(cpu->halt_cond);
193
+ break;
68
if (tcg_enabled()) {
194
69
- cpu_exit(cpu);
195
case INDEX_op_ld16s_i64:
70
- /* NOP unless doing single-thread RR */
196
tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
71
- qemu_cpu_kick_rr_cpu();
197
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
72
+ if (qemu_tcg_mttcg_enabled()) {
198
case INDEX_op_qemu_st_a32_i32:
73
+ cpu_exit(cpu);
199
case INDEX_op_qemu_st_a64_i32:
74
+ } else {
200
return C_O0_I2(r, r);
75
+ qemu_cpu_kick_rr_cpus();
201
+ case INDEX_op_qemu_ld_a32_i128:
76
+ }
202
+ case INDEX_op_qemu_ld_a64_i128:
77
} else {
203
+ return C_O2_I1(o, m, r);
78
if (hax_enabled()) {
204
+ case INDEX_op_qemu_st_a32_i128:
79
/*
205
+ case INDEX_op_qemu_st_a64_i128:
206
+ return C_O0_I3(o, m, r);
207
208
case INDEX_op_deposit_i32:
209
case INDEX_op_deposit_i64:
80
--
210
--
81
2.17.1
211
2.34.1
82
83
diff view generated by jsdifflib
1
These new instructions are conditional on MSR.FP when TX=0 and
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
MSR.VEC when TX=1. Since we only care about the Altivec registers,
3
and force TX=1, we can consider these to be Altivec instructions.
4
Since Altivec is true for any use of vector types, we only need
5
test have_isa_2_07.
6
7
This includes moves to and from the integer registers.
8
9
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
3
---
12
tcg/ppc/tcg-target.inc.c | 32 ++++++++++++++++++++++++++------
4
.../generic/host/load-extract-al16-al8.h | 45 +++++++++++++++++++
13
1 file changed, 26 insertions(+), 6 deletions(-)
5
accel/tcg/ldst_atomicity.c.inc | 36 +--------------
6
2 files changed, 47 insertions(+), 34 deletions(-)
7
create mode 100644 host/include/generic/host/load-extract-al16-al8.h
14
8
15
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
9
diff --git a/host/include/generic/host/load-extract-al16-al8.h b/host/include/generic/host/load-extract-al16-al8.h
10
new file mode 100644
11
index XXXXXXX..XXXXXXX
12
--- /dev/null
13
+++ b/host/include/generic/host/load-extract-al16-al8.h
14
@@ -XXX,XX +XXX,XX @@
15
+/*
16
+ * SPDX-License-Identifier: GPL-2.0-or-later
17
+ * Atomic extract 64 from 128-bit, generic version.
18
+ *
19
+ * Copyright (C) 2023 Linaro, Ltd.
20
+ */
21
+
22
+#ifndef HOST_LOAD_EXTRACT_AL16_AL8_H
23
+#define HOST_LOAD_EXTRACT_AL16_AL8_H
24
+
25
+/**
26
+ * load_atom_extract_al16_or_al8:
27
+ * @pv: host address
28
+ * @s: object size in bytes, @s <= 8.
29
+ *
30
+ * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not
31
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
32
+ * otherwise the access must be 8-byte atomic.
33
+ */
34
+static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
35
+load_atom_extract_al16_or_al8(void *pv, int s)
36
+{
37
+ uintptr_t pi = (uintptr_t)pv;
38
+ int o = pi & 7;
39
+ int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
40
+ Int128 r;
41
+
42
+ pv = (void *)(pi & ~7);
43
+ if (pi & 8) {
44
+ uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8);
45
+ uint64_t a = qatomic_read__nocheck(p8);
46
+ uint64_t b = qatomic_read__nocheck(p8 + 1);
47
+
48
+ if (HOST_BIG_ENDIAN) {
49
+ r = int128_make128(b, a);
50
+ } else {
51
+ r = int128_make128(a, b);
52
+ }
53
+ } else {
54
+ r = atomic16_read_ro(pv);
55
+ }
56
+ return int128_getlo(int128_urshift(r, shr));
57
+}
58
+
59
+#endif /* HOST_LOAD_EXTRACT_AL16_AL8_H */
60
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
16
index XXXXXXX..XXXXXXX 100644
61
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/ppc/tcg-target.inc.c
62
--- a/accel/tcg/ldst_atomicity.c.inc
18
+++ b/tcg/ppc/tcg-target.inc.c
63
+++ b/accel/tcg/ldst_atomicity.c.inc
19
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
64
@@ -XXX,XX +XXX,XX @@
20
#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */
65
* See the COPYING file in the top-level directory.
21
#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
66
*/
22
67
23
+#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */
68
+#include "host/load-extract-al16-al8.h"
24
+#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */
25
+#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */
26
+#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */
27
+
69
+
28
#define RT(r) ((r)<<21)
70
#ifdef CONFIG_ATOMIC64
29
#define RS(r) ((r)<<21)
71
# define HAVE_al8 true
30
#define RA(r) ((r)<<16)
72
#else
31
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
73
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra,
32
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
74
return int128_getlo(r);
33
/* fallthru */
75
}
34
case TCG_TYPE_I32:
76
35
- if (ret < TCG_REG_V0 && arg < TCG_REG_V0) {
77
-/**
36
- tcg_out32(s, OR | SAB(arg, ret, arg));
78
- * load_atom_extract_al16_or_al8:
37
- break;
79
- * @p: host address
38
- } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) {
80
- * @s: object size in bytes, @s <= 8.
39
- /* Altivec does not support vector/integer moves. */
81
- *
40
- return false;
82
- * Load @s bytes from @p, when p % s != 0. If [p, p+s-1] does not
41
+ if (ret < TCG_REG_V0) {
83
- * cross an 16-byte boundary then the access must be 16-byte atomic,
42
+ if (arg < TCG_REG_V0) {
84
- * otherwise the access must be 8-byte atomic.
43
+ tcg_out32(s, OR | SAB(arg, ret, arg));
85
- */
44
+ break;
86
-static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
45
+ } else if (have_isa_2_07) {
87
-load_atom_extract_al16_or_al8(void *pv, int s)
46
+ tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
88
-{
47
+ | VRT(arg) | RA(ret));
89
- uintptr_t pi = (uintptr_t)pv;
48
+ break;
90
- int o = pi & 7;
49
+ } else {
91
- int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
50
+ /* Altivec does not support vector->integer moves. */
92
- Int128 r;
51
+ return false;
93
-
52
+ }
94
- pv = (void *)(pi & ~7);
53
+ } else if (arg < TCG_REG_V0) {
95
- if (pi & 8) {
54
+ if (have_isa_2_07) {
96
- uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8);
55
+ tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
97
- uint64_t a = qatomic_read__nocheck(p8);
56
+ | VRT(ret) | RA(arg));
98
- uint64_t b = qatomic_read__nocheck(p8 + 1);
57
+ break;
99
-
58
+ } else {
100
- if (HOST_BIG_ENDIAN) {
59
+ /* Altivec does not support integer->vector moves. */
101
- r = int128_make128(b, a);
60
+ return false;
102
- } else {
61
+ }
103
- r = int128_make128(a, b);
62
}
104
- }
63
/* fallthru */
105
- } else {
64
case TCG_TYPE_V64:
106
- r = atomic16_read_ro(pv);
107
- }
108
- return int128_getlo(int128_urshift(r, shr));
109
-}
110
-
111
/**
112
* load_atom_4_by_2:
113
* @pv: host address
65
--
114
--
66
2.17.1
115
2.34.1
67
68
diff view generated by jsdifflib
1
Introduce all of the flags required to enable tcg backend vector support,
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
and a runtime flag to indicate the host supports Altivec instructions.
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
host/include/generic/host/store-insert-al16.h | 50 +++++++++++++++++++
5
accel/tcg/ldst_atomicity.c.inc | 40 +--------------
6
2 files changed, 51 insertions(+), 39 deletions(-)
7
create mode 100644 host/include/generic/host/store-insert-al16.h
3
8
4
For now, do not actually set have_isa_altivec to true, because we have not
9
diff --git a/host/include/generic/host/store-insert-al16.h b/host/include/generic/host/store-insert-al16.h
5
yet added all of the code to actually generate all of the required insns.
6
However, we must define these flags in order to disable ifndefs that create
7
stub versions of the functions added here.
8
9
The change to tcg_out_movi works around a buglet in tcg.c wherein if we
10
do not define tcg_out_dupi_vec we get a declared but not defined Werror,
11
but if we only declare it we get a defined but not used Werror. We need
12
to this change to tcg_out_movi eventually anyway, so it's no biggie.
13
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
16
---
17
tcg/ppc/tcg-target.h | 25 ++++++++++++++++
18
tcg/ppc/tcg-target.opc.h | 5 ++++
19
tcg/ppc/tcg-target.inc.c | 62 ++++++++++++++++++++++++++++++++++++++--
20
3 files changed, 89 insertions(+), 3 deletions(-)
21
create mode 100644 tcg/ppc/tcg-target.opc.h
22
23
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
24
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/ppc/tcg-target.h
26
+++ b/tcg/ppc/tcg-target.h
27
@@ -XXX,XX +XXX,XX @@ typedef enum {
28
} TCGPowerISA;
29
30
extern TCGPowerISA have_isa;
31
+extern bool have_altivec;
32
33
#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
34
#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
35
@@ -XXX,XX +XXX,XX @@ extern TCGPowerISA have_isa;
36
#define TCG_TARGET_HAS_mulsh_i64 1
37
#endif
38
39
+/*
40
+ * While technically Altivec could support V64, it has no 64-bit store
41
+ * instruction and substituting two 32-bit stores makes the generated
42
+ * code quite large.
43
+ */
44
+#define TCG_TARGET_HAS_v64 0
45
+#define TCG_TARGET_HAS_v128 have_altivec
46
+#define TCG_TARGET_HAS_v256 0
47
+
48
+#define TCG_TARGET_HAS_andc_vec 0
49
+#define TCG_TARGET_HAS_orc_vec 0
50
+#define TCG_TARGET_HAS_not_vec 0
51
+#define TCG_TARGET_HAS_neg_vec 0
52
+#define TCG_TARGET_HAS_abs_vec 0
53
+#define TCG_TARGET_HAS_shi_vec 0
54
+#define TCG_TARGET_HAS_shs_vec 0
55
+#define TCG_TARGET_HAS_shv_vec 0
56
+#define TCG_TARGET_HAS_cmp_vec 0
57
+#define TCG_TARGET_HAS_mul_vec 0
58
+#define TCG_TARGET_HAS_sat_vec 0
59
+#define TCG_TARGET_HAS_minmax_vec 0
60
+#define TCG_TARGET_HAS_bitsel_vec 0
61
+#define TCG_TARGET_HAS_cmpsel_vec 0
62
+
63
void flush_icache_range(uintptr_t start, uintptr_t stop);
64
void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
65
66
diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h
67
new file mode 100644
10
new file mode 100644
68
index XXXXXXX..XXXXXXX
11
index XXXXXXX..XXXXXXX
69
--- /dev/null
12
--- /dev/null
70
+++ b/tcg/ppc/tcg-target.opc.h
13
+++ b/host/include/generic/host/store-insert-al16.h
71
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@
72
+/*
15
+/*
73
+ * Target-specific opcodes for host vector expansion. These will be
16
+ * SPDX-License-Identifier: GPL-2.0-or-later
74
+ * emitted by tcg_expand_vec_op. For those familiar with GCC internals,
17
+ * Atomic store insert into 128-bit, generic version.
75
+ * consider these to be UNSPEC with names.
18
+ *
19
+ * Copyright (C) 2023 Linaro, Ltd.
76
+ */
20
+ */
77
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
21
+
78
index XXXXXXX..XXXXXXX 100644
22
+#ifndef HOST_STORE_INSERT_AL16_H
79
--- a/tcg/ppc/tcg-target.inc.c
23
+#define HOST_STORE_INSERT_AL16_H
80
+++ b/tcg/ppc/tcg-target.inc.c
24
+
81
@@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr;
25
+/**
82
26
+ * store_atom_insert_al16:
83
TCGPowerISA have_isa;
27
+ * @p: host address
84
static bool have_isel;
28
+ * @val: shifted value to store
85
+bool have_altivec;
29
+ * @msk: mask for value to store
86
30
+ *
87
#ifndef CONFIG_SOFTMMU
31
+ * Atomically store @val to @p masked by @msk.
88
#define TCG_GUEST_BASE_REG 30
32
+ */
89
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
33
+static inline void ATTRIBUTE_ATOMIC128_OPT
90
}
34
+store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk)
91
}
35
+{
92
36
+#if defined(CONFIG_ATOMIC128)
93
-static inline void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
37
+ __uint128_t *pu;
94
- tcg_target_long arg)
38
+ Int128Alias old, new;
95
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
39
+
96
+ tcg_target_long val)
40
+ /* With CONFIG_ATOMIC128, we can avoid the memory barriers. */
97
{
41
+ pu = __builtin_assume_aligned(ps, 16);
98
- tcg_out_movi_int(s, type, ret, arg, false);
42
+ old.u = *pu;
99
+ g_assert_not_reached();
43
+ msk = int128_not(msk);
44
+ do {
45
+ new.s = int128_and(old.s, msk);
46
+ new.s = int128_or(new.s, val);
47
+ } while (!__atomic_compare_exchange_n(pu, &old.u, new.u, true,
48
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED));
49
+#else
50
+ Int128 old, new, cmp;
51
+
52
+ ps = __builtin_assume_aligned(ps, 16);
53
+ old = *ps;
54
+ msk = int128_not(msk);
55
+ do {
56
+ cmp = old;
57
+ new = int128_and(old, msk);
58
+ new = int128_or(new, val);
59
+ old = atomic16_cmpxchg(ps, cmp, new);
60
+ } while (int128_ne(cmp, old));
61
+#endif
100
+}
62
+}
101
+
63
+
102
+static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
64
+#endif /* HOST_STORE_INSERT_AL16_H */
103
+ tcg_target_long arg)
65
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
104
+{
66
index XXXXXXX..XXXXXXX 100644
105
+ switch (type) {
67
--- a/accel/tcg/ldst_atomicity.c.inc
106
+ case TCG_TYPE_I32:
68
+++ b/accel/tcg/ldst_atomicity.c.inc
107
+ case TCG_TYPE_I64:
69
@@ -XXX,XX +XXX,XX @@
108
+ tcg_debug_assert(ret < TCG_REG_V0);
70
*/
109
+ tcg_out_movi_int(s, type, ret, arg, false);
71
110
+ break;
72
#include "host/load-extract-al16-al8.h"
111
+
73
+#include "host/store-insert-al16.h"
112
+ case TCG_TYPE_V64:
74
113
+ case TCG_TYPE_V128:
75
#ifdef CONFIG_ATOMIC64
114
+ tcg_debug_assert(ret >= TCG_REG_V0);
76
# define HAVE_al8 true
115
+ tcg_out_dupi_vec(s, type, ret, arg);
77
@@ -XXX,XX +XXX,XX @@ static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
116
+ break;
78
__ATOMIC_RELAXED, __ATOMIC_RELAXED));
117
+
118
+ default:
119
+ g_assert_not_reached();
120
+ }
121
}
79
}
122
80
123
static bool mask_operand(uint32_t c, int *mb, int *me)
81
-/**
124
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
82
- * store_atom_insert_al16:
125
}
83
- * @p: host address
126
}
84
- * @val: shifted value to store
127
85
- * @msk: mask for value to store
128
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
86
- *
129
+{
87
- * Atomically store @val to @p masked by @msk.
130
+ g_assert_not_reached();
88
- */
131
+}
89
-static void ATTRIBUTE_ATOMIC128_OPT
132
+
90
-store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
133
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
91
-{
134
+ TCGReg dst, TCGReg src)
92
-#if defined(CONFIG_ATOMIC128)
135
+{
93
- __uint128_t *pu, old, new;
136
+ g_assert_not_reached();
94
-
137
+}
95
- /* With CONFIG_ATOMIC128, we can avoid the memory barriers. */
138
+
96
- pu = __builtin_assume_aligned(ps, 16);
139
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
97
- old = *pu;
140
+ TCGReg out, TCGReg base, intptr_t offset)
98
- do {
141
+{
99
- new = (old & ~msk.u) | val.u;
142
+ g_assert_not_reached();
100
- } while (!__atomic_compare_exchange_n(pu, &old, new, true,
143
+}
101
- __ATOMIC_RELAXED, __ATOMIC_RELAXED));
144
+
102
-#elif defined(CONFIG_CMPXCHG128)
145
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
103
- __uint128_t *pu, old, new;
146
+ unsigned vecl, unsigned vece,
104
-
147
+ const TCGArg *args, const int *const_args)
105
- /*
148
+{
106
- * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
149
+ g_assert_not_reached();
107
- * defer to libatomic, so we must use __sync_*_compare_and_swap_16
150
+}
108
- * and accept the sequential consistency that comes with it.
151
+
109
- */
152
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
110
- pu = __builtin_assume_aligned(ps, 16);
153
+ TCGArg a0, ...)
111
- do {
154
+{
112
- old = *pu;
155
+ g_assert_not_reached();
113
- new = (old & ~msk.u) | val.u;
156
+}
114
- } while (!__sync_bool_compare_and_swap_16(pu, old, new));
157
+
115
-#else
158
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
116
- qemu_build_not_reached();
159
{
117
-#endif
160
static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
118
-}
161
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
119
-
162
120
/**
163
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
121
* store_bytes_leN:
164
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
122
* @pv: host address
165
+ if (have_altivec) {
166
+ tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
167
+ tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
168
+ }
169
170
tcg_target_call_clobber_regs = 0;
171
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
172
--
123
--
173
2.17.1
124
2.34.1
174
175
diff view generated by jsdifflib
1
These new instructions are conditional only on MSR.VSX and
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
are thus part of the VSX instruction set, and not Altivec.
3
This includes double-word loads and stores.
4
5
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
tcg/ppc/tcg-target.inc.c | 11 +++++++++++
4
.../x86_64/host/load-extract-al16-al8.h | 50 +++++++++++++++++++
9
1 file changed, 11 insertions(+)
5
1 file changed, 50 insertions(+)
6
create mode 100644 host/include/x86_64/host/load-extract-al16-al8.h
10
7
11
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
8
diff --git a/host/include/x86_64/host/load-extract-al16-al8.h b/host/include/x86_64/host/load-extract-al16-al8.h
12
index XXXXXXX..XXXXXXX 100644
9
new file mode 100644
13
--- a/tcg/ppc/tcg-target.inc.c
10
index XXXXXXX..XXXXXXX
14
+++ b/tcg/ppc/tcg-target.inc.c
11
--- /dev/null
15
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
12
+++ b/host/include/x86_64/host/load-extract-al16-al8.h
16
#define LVEWX XO31(71)
13
@@ -XXX,XX +XXX,XX @@
17
#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */
14
+/*
18
#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */
15
+ * SPDX-License-Identifier: GPL-2.0-or-later
19
+#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */
16
+ * Atomic extract 64 from 128-bit, x86_64 version.
20
17
+ *
21
#define STVX XO31(231)
18
+ * Copyright (C) 2023 Linaro, Ltd.
22
#define STVEWX XO31(199)
19
+ */
23
#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */
20
+
24
+#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */
21
+#ifndef X86_64_LOAD_EXTRACT_AL16_AL8_H
25
22
+#define X86_64_LOAD_EXTRACT_AL16_AL8_H
26
#define VADDSBS VX4(768)
23
+
27
#define VADDUBS VX4(512)
24
+#ifdef CONFIG_INT128_TYPE
28
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
25
+#include "host/cpuinfo.h"
29
tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
26
+
30
break;
27
+/**
31
}
28
+ * load_atom_extract_al16_or_al8:
32
+ if (have_isa_2_07 && have_vsx) {
29
+ * @pv: host address
33
+ tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
30
+ * @s: object size in bytes, @s <= 8.
34
+ break;
31
+ *
35
+ }
32
+ * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not
36
tcg_debug_assert((offset & 3) == 0);
33
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
37
tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
34
+ * otherwise the access must be 8-byte atomic.
38
shift = (offset - 4) & 0xc;
35
+ */
39
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
36
+static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
40
tcg_out_mem_long(s, STW, STWX, arg, base, offset);
37
+load_atom_extract_al16_or_al8(void *pv, int s)
41
break;
38
+{
42
}
39
+ uintptr_t pi = (uintptr_t)pv;
43
+ if (have_isa_2_07 && have_vsx) {
40
+ __int128_t *ptr_align = (__int128_t *)(pi & ~7);
44
+ tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
41
+ int shr = (pi & 7) * 8;
45
+ break;
42
+ Int128Alias r;
46
+ }
43
+
47
+ assert((offset & 3) == 0);
44
+ /*
48
tcg_debug_assert((offset & 3) == 0);
45
+ * ptr_align % 16 is now only 0 or 8.
49
shift = (offset - 4) & 0xc;
46
+ * If the host supports atomic loads with VMOVDQU, then always use that,
50
if (shift) {
47
+ * making the branch highly predictable. Otherwise we must use VMOVDQA
48
+ * when ptr_align % 16 == 0 for 16-byte atomicity.
49
+ */
50
+ if ((cpuinfo & CPUINFO_ATOMIC_VMOVDQU) || (pi & 8)) {
51
+ asm("vmovdqu %1, %0" : "=x" (r.i) : "m" (*ptr_align));
52
+ } else {
53
+ asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr_align));
54
+ }
55
+ return int128_getlo(int128_urshift(r.s, shr));
56
+}
57
+#else
58
+/* Fallback definition that must be optimized away, or error. */
59
+uint64_t QEMU_ERROR("unsupported atomic")
60
+ load_atom_extract_al16_or_al8(void *pv, int s);
61
+#endif
62
+
63
+#endif /* X86_64_LOAD_EXTRACT_AL16_AL8_H */
51
--
64
--
52
2.17.1
65
2.34.1
53
54
diff view generated by jsdifflib
1
Now that we have implemented the required tcg operations,
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
we can enable detection of host vector support.
3
4
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (PPC32)
5
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
tcg/ppc/tcg-target.inc.c | 4 ++++
4
.../aarch64/host/load-extract-al16-al8.h | 40 +++++++++++++++++++
9
1 file changed, 4 insertions(+)
5
1 file changed, 40 insertions(+)
6
create mode 100644 host/include/aarch64/host/load-extract-al16-al8.h
10
7
11
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
8
diff --git a/host/include/aarch64/host/load-extract-al16-al8.h b/host/include/aarch64/host/load-extract-al16-al8.h
12
index XXXXXXX..XXXXXXX 100644
9
new file mode 100644
13
--- a/tcg/ppc/tcg-target.inc.c
10
index XXXXXXX..XXXXXXX
14
+++ b/tcg/ppc/tcg-target.inc.c
11
--- /dev/null
15
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
12
+++ b/host/include/aarch64/host/load-extract-al16-al8.h
16
have_isel = have_isa_2_06;
13
@@ -XXX,XX +XXX,XX @@
17
#endif
14
+/*
18
15
+ * SPDX-License-Identifier: GPL-2.0-or-later
19
+ if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
16
+ * Atomic extract 64 from 128-bit, AArch64 version.
20
+ have_altivec = true;
17
+ *
21
+ }
18
+ * Copyright (C) 2023 Linaro, Ltd.
19
+ */
22
+
20
+
23
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
21
+#ifndef AARCH64_LOAD_EXTRACT_AL16_AL8_H
24
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
22
+#define AARCH64_LOAD_EXTRACT_AL16_AL8_H
25
if (have_altivec) {
23
+
24
+#include "host/cpuinfo.h"
25
+#include "tcg/debug-assert.h"
26
+
27
+/**
28
+ * load_atom_extract_al16_or_al8:
29
+ * @pv: host address
30
+ * @s: object size in bytes, @s <= 8.
31
+ *
32
+ * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not
33
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
34
+ * otherwise the access must be 8-byte atomic.
35
+ */
36
+static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
37
+{
38
+ uintptr_t pi = (uintptr_t)pv;
39
+ __int128_t *ptr_align = (__int128_t *)(pi & ~7);
40
+ int shr = (pi & 7) * 8;
41
+ uint64_t l, h;
42
+
43
+ /*
44
+ * With FEAT_LSE2, LDP is single-copy atomic if 16-byte aligned
45
+ * and single-copy atomic on the parts if 8-byte aligned.
46
+ * All we need do is align the pointer mod 8.
47
+ */
48
+ tcg_debug_assert(HAVE_ATOMIC128_RO);
49
+ asm("ldp %0, %1, %2" : "=r"(l), "=r"(h) : "m"(*ptr_align));
50
+ return (l >> shr) | (h << (-shr & 63));
51
+}
52
+
53
+#endif /* AARCH64_LOAD_EXTRACT_AL16_AL8_H */
26
--
54
--
27
2.17.1
55
2.34.1
28
29
diff view generated by jsdifflib
1
For Altivec, this is done via vector shift by vector,
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
and loading the immediate into a register.
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
host/include/aarch64/host/store-insert-al16.h | 47 +++++++++++++++++++
5
1 file changed, 47 insertions(+)
6
create mode 100644 host/include/aarch64/host/store-insert-al16.h
3
7
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
diff --git a/host/include/aarch64/host/store-insert-al16.h b/host/include/aarch64/host/store-insert-al16.h
5
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
9
new file mode 100644
6
---
10
index XXXXXXX..XXXXXXX
7
tcg/ppc/tcg-target.h | 2 +-
11
--- /dev/null
8
tcg/ppc/tcg-target.inc.c | 58 ++++++++++++++++++++++++++++++++++++++--
12
+++ b/host/include/aarch64/host/store-insert-al16.h
9
2 files changed, 57 insertions(+), 3 deletions(-)
13
@@ -XXX,XX +XXX,XX @@
10
14
+/*
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
15
+ * SPDX-License-Identifier: GPL-2.0-or-later
12
index XXXXXXX..XXXXXXX 100644
16
+ * Atomic store insert into 128-bit, AArch64 version.
13
--- a/tcg/ppc/tcg-target.h
17
+ *
14
+++ b/tcg/ppc/tcg-target.h
18
+ * Copyright (C) 2023 Linaro, Ltd.
15
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
19
+ */
16
#define TCG_TARGET_HAS_abs_vec 0
17
#define TCG_TARGET_HAS_shi_vec 0
18
#define TCG_TARGET_HAS_shs_vec 0
19
-#define TCG_TARGET_HAS_shv_vec 0
20
+#define TCG_TARGET_HAS_shv_vec 1
21
#define TCG_TARGET_HAS_cmp_vec 1
22
#define TCG_TARGET_HAS_mul_vec 0
23
#define TCG_TARGET_HAS_sat_vec 1
24
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/ppc/tcg-target.inc.c
27
+++ b/tcg/ppc/tcg-target.inc.c
28
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
29
#define VCMPGTUH VX4(582)
30
#define VCMPGTUW VX4(646)
31
32
+#define VSLB VX4(260)
33
+#define VSLH VX4(324)
34
+#define VSLW VX4(388)
35
+#define VSRB VX4(516)
36
+#define VSRH VX4(580)
37
+#define VSRW VX4(644)
38
+#define VSRAB VX4(772)
39
+#define VSRAH VX4(836)
40
+#define VSRAW VX4(900)
41
+
20
+
42
#define VAND VX4(1028)
21
+#ifndef AARCH64_STORE_INSERT_AL16_H
43
#define VANDC VX4(1092)
22
+#define AARCH64_STORE_INSERT_AL16_H
44
#define VNOR VX4(1284)
23
+
45
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
24
+/**
46
case INDEX_op_sssub_vec:
25
+ * store_atom_insert_al16:
47
case INDEX_op_usadd_vec:
26
+ * @p: host address
48
case INDEX_op_ussub_vec:
27
+ * @val: shifted value to store
49
+ case INDEX_op_shlv_vec:
28
+ * @msk: mask for value to store
50
+ case INDEX_op_shrv_vec:
29
+ *
51
+ case INDEX_op_sarv_vec:
30
+ * Atomically store @val to @p masked by @msk.
52
return vece <= MO_32;
31
+ */
53
case INDEX_op_cmp_vec:
32
+static inline void ATTRIBUTE_ATOMIC128_OPT
54
+ case INDEX_op_shli_vec:
33
+store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk)
55
+ case INDEX_op_shri_vec:
56
+ case INDEX_op_sari_vec:
57
return vece <= MO_32 ? -1 : 0;
58
default:
59
return 0;
60
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
61
umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
62
smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
63
umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
64
- smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 };
65
+ smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
66
+ shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
67
+ shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
68
+ sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 };
69
70
TCGType type = vecl + TCG_TYPE_V64;
71
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
72
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
73
case INDEX_op_umax_vec:
74
insn = umax_op[vece];
75
break;
76
+ case INDEX_op_shlv_vec:
77
+ insn = shlv_op[vece];
78
+ break;
79
+ case INDEX_op_shrv_vec:
80
+ insn = shrv_op[vece];
81
+ break;
82
+ case INDEX_op_sarv_vec:
83
+ insn = sarv_op[vece];
84
+ break;
85
case INDEX_op_and_vec:
86
insn = VAND;
87
break;
88
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
89
tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
90
}
91
92
+static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
93
+ TCGv_vec v1, TCGArg imm, TCGOpcode opci)
94
+{
34
+{
95
+ TCGv_vec t1 = tcg_temp_new_vec(type);
35
+ /*
36
+ * GCC only implements __sync* primitives for int128 on aarch64.
37
+ * We can do better without the barriers, and integrating the
38
+ * arithmetic into the load-exclusive/store-conditional pair.
39
+ */
40
+ uint64_t tl, th, vl, vh, ml, mh;
41
+ uint32_t fail;
96
+
42
+
97
+ /* Splat w/bytes for xxspltib. */
43
+ qemu_build_assert(!HOST_BIG_ENDIAN);
98
+ tcg_gen_dupi_vec(MO_8, t1, imm & ((8 << vece) - 1));
44
+ vl = int128_getlo(val);
99
+ vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
45
+ vh = int128_gethi(val);
100
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
46
+ ml = int128_getlo(msk);
101
+ tcg_temp_free_vec(t1);
47
+ mh = int128_gethi(msk);
48
+
49
+ asm("0: ldxp %[l], %[h], %[mem]\n\t"
50
+ "bic %[l], %[l], %[ml]\n\t"
51
+ "bic %[h], %[h], %[mh]\n\t"
52
+ "orr %[l], %[l], %[vl]\n\t"
53
+ "orr %[h], %[h], %[vh]\n\t"
54
+ "stxp %w[f], %[l], %[h], %[mem]\n\t"
55
+ "cbnz %w[f], 0b\n"
56
+ : [mem] "+Q"(*ps), [f] "=&r"(fail), [l] "=&r"(tl), [h] "=&r"(th)
57
+ : [vl] "r"(vl), [vh] "r"(vh), [ml] "r"(ml), [mh] "r"(mh));
102
+}
58
+}
103
+
59
+
104
static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
60
+#endif /* AARCH64_STORE_INSERT_AL16_H */
105
TCGv_vec v1, TCGv_vec v2, TCGCond cond)
106
{
107
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
108
{
109
va_list va;
110
TCGv_vec v0, v1, v2;
111
+ TCGArg a2;
112
113
va_start(va, a0);
114
v0 = temp_tcgv_vec(arg_temp(a0));
115
v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
116
- v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
117
+ a2 = va_arg(va, TCGArg);
118
119
switch (opc) {
120
+ case INDEX_op_shli_vec:
121
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
122
+ break;
123
+ case INDEX_op_shri_vec:
124
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
125
+ break;
126
+ case INDEX_op_sari_vec:
127
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
128
+ break;
129
case INDEX_op_cmp_vec:
130
+ v2 = temp_tcgv_vec(arg_temp(a2));
131
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
132
break;
133
default:
134
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
135
case INDEX_op_smin_vec:
136
case INDEX_op_umax_vec:
137
case INDEX_op_umin_vec:
138
+ case INDEX_op_shlv_vec:
139
+ case INDEX_op_shrv_vec:
140
+ case INDEX_op_sarv_vec:
141
return &v_v_v;
142
case INDEX_op_not_vec:
143
case INDEX_op_dup_vec:
144
--
61
--
145
2.17.1
62
2.34.1
146
147
diff view generated by jsdifflib
1
Altivec supports 32 128-bit vector registers, whose names are
1
The last use was removed by e77c89fb086a.
2
by convention v0 through v31.
3
2
3
Fixes: e77c89fb086a ("cputlb: Remove static tlb sizing")
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
6
---
7
tcg/ppc/tcg-target.h | 11 ++++-
7
tcg/aarch64/tcg-target.h | 1 -
8
tcg/ppc/tcg-target.inc.c | 88 +++++++++++++++++++++++++---------------
8
tcg/arm/tcg-target.h | 1 -
9
2 files changed, 65 insertions(+), 34 deletions(-)
9
tcg/i386/tcg-target.h | 1 -
10
tcg/mips/tcg-target.h | 1 -
11
tcg/ppc/tcg-target.h | 1 -
12
tcg/riscv/tcg-target.h | 1 -
13
tcg/s390x/tcg-target.h | 1 -
14
tcg/sparc64/tcg-target.h | 1 -
15
tcg/tci/tcg-target.h | 1 -
16
9 files changed, 9 deletions(-)
10
17
18
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/tcg/aarch64/tcg-target.h
21
+++ b/tcg/aarch64/tcg-target.h
22
@@ -XXX,XX +XXX,XX @@
23
#include "host/cpuinfo.h"
24
25
#define TCG_TARGET_INSN_UNIT_SIZE 4
26
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
27
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
28
29
typedef enum {
30
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
31
index XXXXXXX..XXXXXXX 100644
32
--- a/tcg/arm/tcg-target.h
33
+++ b/tcg/arm/tcg-target.h
34
@@ -XXX,XX +XXX,XX @@ extern int arm_arch;
35
#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7)
36
37
#define TCG_TARGET_INSN_UNIT_SIZE 4
38
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
39
#define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX
40
41
typedef enum {
42
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/tcg/i386/tcg-target.h
45
+++ b/tcg/i386/tcg-target.h
46
@@ -XXX,XX +XXX,XX @@
47
#include "host/cpuinfo.h"
48
49
#define TCG_TARGET_INSN_UNIT_SIZE 1
50
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 31
51
52
#ifdef __x86_64__
53
# define TCG_TARGET_REG_BITS 64
54
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
55
index XXXXXXX..XXXXXXX 100644
56
--- a/tcg/mips/tcg-target.h
57
+++ b/tcg/mips/tcg-target.h
58
@@ -XXX,XX +XXX,XX @@
59
#endif
60
61
#define TCG_TARGET_INSN_UNIT_SIZE 4
62
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
63
#define TCG_TARGET_NB_REGS 32
64
65
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
66
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
12
index XXXXXXX..XXXXXXX 100644
67
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.h
68
--- a/tcg/ppc/tcg-target.h
14
+++ b/tcg/ppc/tcg-target.h
69
+++ b/tcg/ppc/tcg-target.h
15
@@ -XXX,XX +XXX,XX @@
70
@@ -XXX,XX +XXX,XX @@
16
# define TCG_TARGET_REG_BITS 32
71
17
#endif
72
#define TCG_TARGET_NB_REGS 64
18
19
-#define TCG_TARGET_NB_REGS 32
20
+#define TCG_TARGET_NB_REGS 64
21
#define TCG_TARGET_INSN_UNIT_SIZE 4
73
#define TCG_TARGET_INSN_UNIT_SIZE 4
22
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
74
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
23
75
24
@@ -XXX,XX +XXX,XX @@ typedef enum {
76
typedef enum {
25
TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27,
77
TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3,
26
TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31,
78
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
27
28
+ TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
29
+ TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
30
+ TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
31
+ TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
32
+ TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
33
+ TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
34
+ TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
35
+ TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
36
+
37
TCG_REG_CALL_STACK = TCG_REG_R1,
38
TCG_AREG0 = TCG_REG_R27
39
} TCGReg;
40
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
41
index XXXXXXX..XXXXXXX 100644
79
index XXXXXXX..XXXXXXX 100644
42
--- a/tcg/ppc/tcg-target.inc.c
80
--- a/tcg/riscv/tcg-target.h
43
+++ b/tcg/ppc/tcg-target.inc.c
81
+++ b/tcg/riscv/tcg-target.h
44
@@ -XXX,XX +XXX,XX @@
82
@@ -XXX,XX +XXX,XX @@
45
# define TCG_REG_TMP1 TCG_REG_R12
83
#define TCG_TARGET_REG_BITS 64
46
#endif
84
47
85
#define TCG_TARGET_INSN_UNIT_SIZE 4
48
+#define TCG_VEC_TMP1 TCG_REG_V0
86
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 20
49
+#define TCG_VEC_TMP2 TCG_REG_V1
87
#define TCG_TARGET_NB_REGS 32
50
+
88
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
51
#define TCG_REG_TB TCG_REG_R31
89
52
#define USE_REG_TB (TCG_TARGET_REG_BITS == 64)
90
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
53
91
index XXXXXXX..XXXXXXX 100644
54
@@ -XXX,XX +XXX,XX @@ bool have_isa_3_00;
92
--- a/tcg/s390x/tcg-target.h
55
#endif
93
+++ b/tcg/s390x/tcg-target.h
56
94
@@ -XXX,XX +XXX,XX @@
57
#ifdef CONFIG_DEBUG_TCG
95
#define S390_TCG_TARGET_H
58
-static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
96
59
- "r0",
97
#define TCG_TARGET_INSN_UNIT_SIZE 2
60
- "r1",
98
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
61
- "r2",
99
62
- "r3",
100
/* We have a +- 4GB range on the branches; leave some slop. */
63
- "r4",
101
#define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB)
64
- "r5",
102
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
65
- "r6",
103
index XXXXXXX..XXXXXXX 100644
66
- "r7",
104
--- a/tcg/sparc64/tcg-target.h
67
- "r8",
105
+++ b/tcg/sparc64/tcg-target.h
68
- "r9",
106
@@ -XXX,XX +XXX,XX @@
69
- "r10",
107
#define SPARC_TCG_TARGET_H
70
- "r11",
108
71
- "r12",
109
#define TCG_TARGET_INSN_UNIT_SIZE 4
72
- "r13",
110
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
73
- "r14",
111
#define TCG_TARGET_NB_REGS 32
74
- "r15",
112
#define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
75
- "r16",
113
76
- "r17",
114
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
77
- "r18",
115
index XXXXXXX..XXXXXXX 100644
78
- "r19",
116
--- a/tcg/tci/tcg-target.h
79
- "r20",
117
+++ b/tcg/tci/tcg-target.h
80
- "r21",
118
@@ -XXX,XX +XXX,XX @@
81
- "r22",
119
82
- "r23",
120
#define TCG_TARGET_INTERPRETER 1
83
- "r24",
121
#define TCG_TARGET_INSN_UNIT_SIZE 4
84
- "r25",
122
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
85
- "r26",
123
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
86
- "r27",
124
87
- "r28",
125
#if UINTPTR_MAX == UINT32_MAX
88
- "r29",
89
- "r30",
90
- "r31"
91
+static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
92
+ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
93
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
94
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
95
+ "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
96
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
97
+ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
98
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
99
+ "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
100
};
101
#endif
102
103
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
104
TCG_REG_R5,
105
TCG_REG_R4,
106
TCG_REG_R3,
107
+
108
+ /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
109
+ TCG_REG_V2, /* call clobbered, vectors */
110
+ TCG_REG_V3,
111
+ TCG_REG_V4,
112
+ TCG_REG_V5,
113
+ TCG_REG_V6,
114
+ TCG_REG_V7,
115
+ TCG_REG_V8,
116
+ TCG_REG_V9,
117
+ TCG_REG_V10,
118
+ TCG_REG_V11,
119
+ TCG_REG_V12,
120
+ TCG_REG_V13,
121
+ TCG_REG_V14,
122
+ TCG_REG_V15,
123
+ TCG_REG_V16,
124
+ TCG_REG_V17,
125
+ TCG_REG_V18,
126
+ TCG_REG_V19,
127
};
128
129
static const int tcg_target_call_iarg_regs[] = {
130
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
131
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
132
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
133
134
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
135
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
136
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
137
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
138
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
139
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
140
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
141
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
142
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
143
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
144
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
145
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
146
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
147
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
148
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
149
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
150
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
151
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
152
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
153
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
154
+
155
s->reserved_regs = 0;
156
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
157
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
158
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
159
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
160
#endif
161
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
162
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
163
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
164
if (USE_REG_TB) {
165
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */
166
}
167
--
126
--
168
2.17.1
127
2.34.1
169
128
170
129
diff view generated by jsdifflib
1
Add support for vector saturated add/subtract using Altivec
1
Invert the exit code, for use with the testsuite.
2
instructions:
3
VADDSBS, VADDSHS, VADDSWS, VADDUBS, VADDUHS, VADDUWS, and
4
VSUBSBS, VSUBSHS, VSUBSWS, VSUBUBS, VSUBUHS, VSUBUWS.
5
2
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
8
---
4
---
9
tcg/ppc/tcg-target.h | 2 +-
5
scripts/decodetree.py | 9 +++++++--
10
tcg/ppc/tcg-target.inc.c | 36 ++++++++++++++++++++++++++++++++++++
6
1 file changed, 7 insertions(+), 2 deletions(-)
11
2 files changed, 37 insertions(+), 1 deletion(-)
12
7
13
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
8
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
14
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/ppc/tcg-target.h
10
--- a/scripts/decodetree.py
16
+++ b/tcg/ppc/tcg-target.h
11
+++ b/scripts/decodetree.py
17
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
12
@@ -XXX,XX +XXX,XX @@
18
#define TCG_TARGET_HAS_shv_vec 0
13
formats = {}
19
#define TCG_TARGET_HAS_cmp_vec 1
14
allpatterns = []
20
#define TCG_TARGET_HAS_mul_vec 0
15
anyextern = False
21
-#define TCG_TARGET_HAS_sat_vec 0
16
+testforerror = False
22
+#define TCG_TARGET_HAS_sat_vec 1
17
23
#define TCG_TARGET_HAS_minmax_vec 1
18
translate_prefix = 'trans'
24
#define TCG_TARGET_HAS_bitsel_vec 0
19
translate_scope = 'static '
25
#define TCG_TARGET_HAS_cmpsel_vec 0
20
@@ -XXX,XX +XXX,XX @@ def error_with_file(file, lineno, *args):
26
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
21
if output_file and output_fd:
27
index XXXXXXX..XXXXXXX 100644
22
output_fd.close()
28
--- a/tcg/ppc/tcg-target.inc.c
23
os.remove(output_file)
29
+++ b/tcg/ppc/tcg-target.inc.c
24
- exit(1)
30
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
25
+ exit(0 if testforerror else 1)
31
#define STVX XO31(231)
26
# end error_with_file
32
#define STVEWX XO31(199)
27
33
28
34
+#define VADDSBS VX4(768)
29
@@ -XXX,XX +XXX,XX @@ def main():
35
+#define VADDUBS VX4(512)
30
global bitop_width
36
#define VADDUBM VX4(0)
31
global variablewidth
37
+#define VADDSHS VX4(832)
32
global anyextern
38
+#define VADDUHS VX4(576)
33
+ global testforerror
39
#define VADDUHM VX4(64)
34
40
+#define VADDSWS VX4(896)
35
decode_scope = 'static '
41
+#define VADDUWS VX4(640)
36
42
#define VADDUWM VX4(128)
37
long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
43
38
- 'static-decode=', 'varinsnwidth=']
44
+#define VSUBSBS VX4(1792)
39
+ 'static-decode=', 'varinsnwidth=', 'test-for-error']
45
+#define VSUBUBS VX4(1536)
40
try:
46
#define VSUBUBM VX4(1024)
41
(opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
47
+#define VSUBSHS VX4(1856)
42
except getopt.GetoptError as err:
48
+#define VSUBUHS VX4(1600)
43
@@ -XXX,XX +XXX,XX @@ def main():
49
#define VSUBUHM VX4(1088)
44
bitop_width = 64
50
+#define VSUBSWS VX4(1920)
45
elif insnwidth != 32:
51
+#define VSUBUWS VX4(1664)
46
error(0, 'cannot handle insns of width', insnwidth)
52
#define VSUBUWM VX4(1152)
47
+ elif o == '--test-for-error':
53
48
+ testforerror = True
54
#define VMAXSB VX4(258)
49
else:
55
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
50
assert False, 'unhandled option'
56
case INDEX_op_smin_vec:
51
57
case INDEX_op_umax_vec:
52
@@ -XXX,XX +XXX,XX @@ def main():
58
case INDEX_op_umin_vec:
53
59
+ case INDEX_op_ssadd_vec:
54
if output_file:
60
+ case INDEX_op_sssub_vec:
55
output_fd.close()
61
+ case INDEX_op_usadd_vec:
56
+ exit(1 if testforerror else 0)
62
+ case INDEX_op_ussub_vec:
57
# end main
63
return vece <= MO_32;
58
64
case INDEX_op_cmp_vec:
59
65
return vece <= MO_32 ? -1 : 0;
66
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
67
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
68
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
69
gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
70
+ ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
71
+ usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
72
+ sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
73
+ ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
74
umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
75
smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
76
umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
77
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
78
case INDEX_op_sub_vec:
79
insn = sub_op[vece];
80
break;
81
+ case INDEX_op_ssadd_vec:
82
+ insn = ssadd_op[vece];
83
+ break;
84
+ case INDEX_op_sssub_vec:
85
+ insn = sssub_op[vece];
86
+ break;
87
+ case INDEX_op_usadd_vec:
88
+ insn = usadd_op[vece];
89
+ break;
90
+ case INDEX_op_ussub_vec:
91
+ insn = ussub_op[vece];
92
+ break;
93
case INDEX_op_smin_vec:
94
insn = smin_op[vece];
95
break;
96
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
97
case INDEX_op_andc_vec:
98
case INDEX_op_orc_vec:
99
case INDEX_op_cmp_vec:
100
+ case INDEX_op_ssadd_vec:
101
+ case INDEX_op_sssub_vec:
102
+ case INDEX_op_usadd_vec:
103
+ case INDEX_op_ussub_vec:
104
case INDEX_op_smax_vec:
105
case INDEX_op_smin_vec:
106
case INDEX_op_umax_vec:
107
--
60
--
108
2.17.1
61
2.34.1
109
110
diff view generated by jsdifflib
1
Add support for vector add/subtract using Altivec instructions:
1
Two copy-paste errors walking the parse tree.
2
VADDUBM, VADDUHM, VADDUWM, VSUBUBM, VSUBUHM, VSUBUWM.
3
2
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
4
---
7
tcg/ppc/tcg-target.inc.c | 20 ++++++++++++++++++++
5
scripts/decodetree.py | 4 ++--
8
1 file changed, 20 insertions(+)
6
1 file changed, 2 insertions(+), 2 deletions(-)
9
7
10
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
8
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
11
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.inc.c
10
--- a/scripts/decodetree.py
13
+++ b/tcg/ppc/tcg-target.inc.c
11
+++ b/scripts/decodetree.py
14
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
12
@@ -XXX,XX +XXX,XX @@ def build_tree(self):
15
#define STVX XO31(231)
13
16
#define STVEWX XO31(199)
14
def prop_format(self):
17
15
for p in self.pats:
18
+#define VADDUBM VX4(0)
16
- p.build_tree()
19
+#define VADDUHM VX4(64)
17
+ p.prop_format()
20
+#define VADDUWM VX4(128)
18
21
+
19
def prop_width(self):
22
+#define VSUBUBM VX4(1024)
20
width = None
23
+#define VSUBUHM VX4(1088)
21
@@ -XXX,XX +XXX,XX @@ def __build_tree(pats, outerbits, outermask):
24
+#define VSUBUWM VX4(1152)
22
return t
25
+
23
26
#define VMAXSB VX4(258)
24
def build_tree(self):
27
#define VMAXSH VX4(322)
25
- super().prop_format()
28
#define VMAXSW VX4(386)
26
+ super().build_tree()
29
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
27
self.tree = self.__build_tree(self.pats, self.fixedbits,
30
case INDEX_op_andc_vec:
28
self.fixedmask)
31
case INDEX_op_not_vec:
29
32
return 1;
33
+ case INDEX_op_add_vec:
34
+ case INDEX_op_sub_vec:
35
case INDEX_op_smax_vec:
36
case INDEX_op_smin_vec:
37
case INDEX_op_umax_vec:
38
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
39
const TCGArg *args, const int *const_args)
40
{
41
static const uint32_t
42
+ add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 },
43
+ sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 },
44
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
45
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
46
gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
47
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
48
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
49
return;
50
51
+ case INDEX_op_add_vec:
52
+ insn = add_op[vece];
53
+ break;
54
+ case INDEX_op_sub_vec:
55
+ insn = sub_op[vece];
56
+ break;
57
case INDEX_op_smin_vec:
58
insn = smin_op[vece];
59
break;
60
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
61
return (TCG_TARGET_REG_BITS == 64 ? &S_S
62
: TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S);
63
64
+ case INDEX_op_add_vec:
65
+ case INDEX_op_sub_vec:
66
case INDEX_op_and_vec:
67
case INDEX_op_or_vec:
68
case INDEX_op_xor_vec:
69
--
30
--
70
2.17.1
31
2.34.1
71
72
diff view generated by jsdifflib
1
Add support for vector maximum/minimum using Altivec instructions
1
Test err_pattern_group_empty.decode failed with exception:
2
VMAXSB, VMAXSH, VMAXSW, VMAXUB, VMAXUH, VMAXUW, and
2
3
VMINSB, VMINSH, VMINSW, VMINUB, VMINUH, VMINUW.
3
Traceback (most recent call last):
4
File "./scripts/decodetree.py", line 1424, in <module> main()
5
File "./scripts/decodetree.py", line 1342, in main toppat.build_tree()
6
File "./scripts/decodetree.py", line 627, in build_tree
7
self.tree = self.__build_tree(self.pats, self.fixedbits,
8
File "./scripts/decodetree.py", line 607, in __build_tree
9
fb = i.fixedbits & innermask
10
TypeError: unsupported operand type(s) for &: 'NoneType' and 'int'
4
11
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
7
---
13
---
8
tcg/ppc/tcg-target.h | 2 +-
14
scripts/decodetree.py | 6 ++++++
9
tcg/ppc/tcg-target.inc.c | 40 +++++++++++++++++++++++++++++++++++++++-
15
1 file changed, 6 insertions(+)
10
2 files changed, 40 insertions(+), 2 deletions(-)
11
16
12
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
17
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
13
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/ppc/tcg-target.h
19
--- a/scripts/decodetree.py
15
+++ b/tcg/ppc/tcg-target.h
20
+++ b/scripts/decodetree.py
16
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
21
@@ -XXX,XX +XXX,XX @@ def output_code(self, i, extracted, outerbits, outermask):
17
#define TCG_TARGET_HAS_cmp_vec 1
22
output(ind, '}\n')
18
#define TCG_TARGET_HAS_mul_vec 0
23
else:
19
#define TCG_TARGET_HAS_sat_vec 0
24
p.output_code(i, extracted, p.fixedbits, p.fixedmask)
20
-#define TCG_TARGET_HAS_minmax_vec 0
21
+#define TCG_TARGET_HAS_minmax_vec 1
22
#define TCG_TARGET_HAS_bitsel_vec 0
23
#define TCG_TARGET_HAS_cmpsel_vec 0
24
25
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/tcg/ppc/tcg-target.inc.c
28
+++ b/tcg/ppc/tcg-target.inc.c
29
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
30
#define STVX XO31(231)
31
#define STVEWX XO31(199)
32
33
+#define VMAXSB VX4(258)
34
+#define VMAXSH VX4(322)
35
+#define VMAXSW VX4(386)
36
+#define VMAXUB VX4(2)
37
+#define VMAXUH VX4(66)
38
+#define VMAXUW VX4(130)
39
+#define VMINSB VX4(770)
40
+#define VMINSH VX4(834)
41
+#define VMINSW VX4(898)
42
+#define VMINUB VX4(514)
43
+#define VMINUH VX4(578)
44
+#define VMINUW VX4(642)
45
+
25
+
46
#define VCMPEQUB VX4(6)
26
+ def build_tree(self):
47
#define VCMPEQUH VX4(70)
27
+ if not self.pats:
48
#define VCMPEQUW VX4(134)
28
+ error_with_file(self.file, self.lineno, 'empty pattern group')
49
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
29
+ super().build_tree()
50
case INDEX_op_andc_vec:
30
+
51
case INDEX_op_not_vec:
31
#end IncMultiPattern
52
return 1;
32
53
+ case INDEX_op_smax_vec:
33
54
+ case INDEX_op_smin_vec:
55
+ case INDEX_op_umax_vec:
56
+ case INDEX_op_umin_vec:
57
+ return vece <= MO_32;
58
case INDEX_op_cmp_vec:
59
return vece <= MO_32 ? -1 : 0;
60
default:
61
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
62
static const uint32_t
63
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
64
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
65
- gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 };
66
+ gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
67
+ umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
68
+ smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
69
+ umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
70
+ smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 };
71
72
TCGType type = vecl + TCG_TYPE_V64;
73
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
74
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
75
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
76
return;
77
78
+ case INDEX_op_smin_vec:
79
+ insn = smin_op[vece];
80
+ break;
81
+ case INDEX_op_umin_vec:
82
+ insn = umin_op[vece];
83
+ break;
84
+ case INDEX_op_smax_vec:
85
+ insn = smax_op[vece];
86
+ break;
87
+ case INDEX_op_umax_vec:
88
+ insn = umax_op[vece];
89
+ break;
90
case INDEX_op_and_vec:
91
insn = VAND;
92
break;
93
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
94
case INDEX_op_andc_vec:
95
case INDEX_op_orc_vec:
96
case INDEX_op_cmp_vec:
97
+ case INDEX_op_smax_vec:
98
+ case INDEX_op_smin_vec:
99
+ case INDEX_op_umax_vec:
100
+ case INDEX_op_umin_vec:
101
return &v_v_v;
102
case INDEX_op_not_vec:
103
case INDEX_op_dup_vec:
104
--
34
--
105
2.17.1
35
2.34.1
106
107
diff view generated by jsdifflib
1
Introduce macros VRT(), VRA(), VRB(), VRC() used for encoding
1
Nor report any PermissionError on remove.
2
elements of Altivec instructions.
2
The primary purpose is testing with -o /dev/null.
3
3
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
5
---
7
tcg/ppc/tcg-target.inc.c | 5 +++++
6
scripts/decodetree.py | 7 ++++++-
8
1 file changed, 5 insertions(+)
7
1 file changed, 6 insertions(+), 1 deletion(-)
9
8
10
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
9
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
11
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.inc.c
11
--- a/scripts/decodetree.py
13
+++ b/tcg/ppc/tcg-target.inc.c
12
+++ b/scripts/decodetree.py
14
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
13
@@ -XXX,XX +XXX,XX @@ def error_with_file(file, lineno, *args):
15
#define MB64(b) ((b)<<5)
14
16
#define FXM(b) (1 << (19 - (b)))
15
if output_file and output_fd:
17
16
output_fd.close()
18
+#define VRT(r) (((r) & 31) << 21)
17
- os.remove(output_file)
19
+#define VRA(r) (((r) & 31) << 16)
18
+ # Do not try to remove e.g. -o /dev/null
20
+#define VRB(r) (((r) & 31) << 11)
19
+ if not output_file.startswith("/dev"):
21
+#define VRC(r) (((r) & 31) << 6)
20
+ try:
22
+
21
+ os.remove(output_file)
23
#define LK 1
22
+ except PermissionError:
24
23
+ pass
25
#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
24
exit(0 if testforerror else 1)
25
# end error_with_file
26
26
--
27
--
27
2.17.1
28
2.34.1
28
29
diff view generated by jsdifflib
1
This is identical to have_isa_2_06, so replace it.
2
3
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
2
---
6
tcg/ppc/tcg-target.inc.c | 5 ++---
3
tests/decode/check.sh | 24 ----------------
7
1 file changed, 2 insertions(+), 3 deletions(-)
4
tests/decode/meson.build | 59 ++++++++++++++++++++++++++++++++++++++++
5
tests/meson.build | 5 +---
6
3 files changed, 60 insertions(+), 28 deletions(-)
7
delete mode 100755 tests/decode/check.sh
8
create mode 100644 tests/decode/meson.build
8
9
9
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
10
diff --git a/tests/decode/check.sh b/tests/decode/check.sh
11
deleted file mode 100755
12
index XXXXXXX..XXXXXXX
13
--- a/tests/decode/check.sh
14
+++ /dev/null
15
@@ -XXX,XX +XXX,XX @@
16
-#!/bin/sh
17
-# This work is licensed under the terms of the GNU LGPL, version 2 or later.
18
-# See the COPYING.LIB file in the top-level directory.
19
-
20
-PYTHON=$1
21
-DECODETREE=$2
22
-E=0
23
-
24
-# All of these tests should produce errors
25
-for i in err_*.decode; do
26
- if $PYTHON $DECODETREE $i > /dev/null 2> /dev/null; then
27
- # Pass, aka failed to fail.
28
- echo FAIL: $i 1>&2
29
- E=1
30
- fi
31
-done
32
-
33
-for i in succ_*.decode; do
34
- if ! $PYTHON $DECODETREE $i > /dev/null 2> /dev/null; then
35
- echo FAIL:$i 1>&2
36
- fi
37
-done
38
-
39
-exit $E
40
diff --git a/tests/decode/meson.build b/tests/decode/meson.build
41
new file mode 100644
42
index XXXXXXX..XXXXXXX
43
--- /dev/null
44
+++ b/tests/decode/meson.build
45
@@ -XXX,XX +XXX,XX @@
46
+err_tests = [
47
+ 'err_argset1.decode',
48
+ 'err_argset2.decode',
49
+ 'err_field1.decode',
50
+ 'err_field2.decode',
51
+ 'err_field3.decode',
52
+ 'err_field4.decode',
53
+ 'err_field5.decode',
54
+ 'err_field6.decode',
55
+ 'err_init1.decode',
56
+ 'err_init2.decode',
57
+ 'err_init3.decode',
58
+ 'err_init4.decode',
59
+ 'err_overlap1.decode',
60
+ 'err_overlap2.decode',
61
+ 'err_overlap3.decode',
62
+ 'err_overlap4.decode',
63
+ 'err_overlap5.decode',
64
+ 'err_overlap6.decode',
65
+ 'err_overlap7.decode',
66
+ 'err_overlap8.decode',
67
+ 'err_overlap9.decode',
68
+ 'err_pattern_group_empty.decode',
69
+ 'err_pattern_group_ident1.decode',
70
+ 'err_pattern_group_ident2.decode',
71
+ 'err_pattern_group_nest1.decode',
72
+ 'err_pattern_group_nest2.decode',
73
+ 'err_pattern_group_nest3.decode',
74
+ 'err_pattern_group_overlap1.decode',
75
+ 'err_width1.decode',
76
+ 'err_width2.decode',
77
+ 'err_width3.decode',
78
+ 'err_width4.decode',
79
+]
80
+
81
+succ_tests = [
82
+ 'succ_argset_type1.decode',
83
+ 'succ_function.decode',
84
+ 'succ_ident1.decode',
85
+ 'succ_pattern_group_nest1.decode',
86
+ 'succ_pattern_group_nest2.decode',
87
+ 'succ_pattern_group_nest3.decode',
88
+ 'succ_pattern_group_nest4.decode',
89
+]
90
+
91
+suite = 'decodetree'
92
+decodetree = find_program(meson.project_source_root() / 'scripts/decodetree.py')
93
+
94
+foreach t: err_tests
95
+ test(fs.replace_suffix(t, ''),
96
+ decodetree, args: ['-o', '/dev/null', '--test-for-error', files(t)],
97
+ suite: suite)
98
+endforeach
99
+
100
+foreach t: succ_tests
101
+ test(fs.replace_suffix(t, ''),
102
+ decodetree, args: ['-o', '/dev/null', files(t)],
103
+ suite: suite)
104
+endforeach
105
diff --git a/tests/meson.build b/tests/meson.build
10
index XXXXXXX..XXXXXXX 100644
106
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/ppc/tcg-target.inc.c
107
--- a/tests/meson.build
12
+++ b/tcg/ppc/tcg-target.inc.c
108
+++ b/tests/meson.build
13
@@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr;
109
@@ -XXX,XX +XXX,XX @@ if have_tools and have_vhost_user and 'CONFIG_LINUX' in config_host
14
110
dependencies: [qemuutil, vhost_user])
15
TCGPowerISA have_isa;
111
endif
16
112
17
-#define HAVE_ISA_2_06 have_isa_2_06
113
-test('decodetree', sh,
18
#define HAVE_ISEL have_isa_2_06
114
- args: [ files('decode/check.sh'), config_host['PYTHON'], files('../scripts/decodetree.py') ],
19
115
- workdir: meson.current_source_dir() / 'decode',
20
#ifndef CONFIG_SOFTMMU
116
- suite: 'decodetree')
21
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
117
+subdir('decode')
22
}
118
23
} else {
119
if 'CONFIG_TCG' in config_all
24
uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
120
subdir('fp')
25
- if (!HAVE_ISA_2_06 && insn == LDBRX) {
26
+ if (!have_isa_2_06 && insn == LDBRX) {
27
tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
28
tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
29
tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
30
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
31
}
32
} else {
33
uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
34
- if (!HAVE_ISA_2_06 && insn == STDBRX) {
35
+ if (!have_isa_2_06 && insn == STDBRX) {
36
tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
37
tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
38
tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
39
--
121
--
40
2.17.1
122
2.34.1
41
42
diff view generated by jsdifflib
New patch
1
From: Peter Maydell <peter.maydell@linaro.org>
1
2
3
Document the named field syntax that we want to implement for the
4
decodetree script. This allows a field to be defined in terms of
5
some other field that the instruction pattern has already set, for
6
example:
7
8
%sz_imm 10:3 sz:3 !function=expand_sz_imm
9
10
to allow a function to be passed both an immediate field from the
11
instruction and also a sz value which might have been specified by
12
the instruction pattern directly (sz=1, etc) rather than being a
13
simple field within the instruction.
14
15
Note that the restriction on not having the format referring to the
16
pattern and the pattern referring to the format simultaneously is a
17
restriction of the decoder generator rather than inherently being a
18
silly thing to do.
19
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
21
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
22
Message-Id: <20230523120447.728365-3-peter.maydell@linaro.org>
23
---
24
docs/devel/decodetree.rst | 33 ++++++++++++++++++++++++++++-----
25
1 file changed, 28 insertions(+), 5 deletions(-)
26
27
diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst
28
index XXXXXXX..XXXXXXX 100644
29
--- a/docs/devel/decodetree.rst
30
+++ b/docs/devel/decodetree.rst
31
@@ -XXX,XX +XXX,XX @@ Fields
32
33
Syntax::
34
35
- field_def := '%' identifier ( unnamed_field )* ( !function=identifier )?
36
+ field_def := '%' identifier ( field )* ( !function=identifier )?
37
+ field := unnamed_field | named_field
38
unnamed_field := number ':' ( 's' ) number
39
+ named_field := identifier ':' ( 's' ) number
40
41
For *unnamed_field*, the first number is the least-significant bit position
42
of the field and the second number is the length of the field. If the 's' is
43
-present, the field is considered signed. If multiple ``unnamed_fields`` are
44
-present, they are concatenated. In this way one can define disjoint fields.
45
+present, the field is considered signed.
46
+
47
+A *named_field* refers to some other field in the instruction pattern
48
+or format. Regardless of the length of the other field where it is
49
+defined, it will be inserted into this field with the specified
50
+signedness and bit width.
51
+
52
+Field definitions that involve loops (i.e. where a field is defined
53
+directly or indirectly in terms of itself) are errors.
54
+
55
+A format can include fields that refer to named fields that are
56
+defined in the instruction pattern(s) that use the format.
57
+Conversely, an instruction pattern can include fields that refer to
58
+named fields that are defined in the format it uses. However you
59
+cannot currently do both at once (i.e. pattern P uses format F; F has
60
+a field A that refers to a named field B that is defined in P, and P
61
+has a field C that refers to a named field D that is defined in F).
62
+
63
+If multiple ``fields`` are present, they are concatenated.
64
+In this way one can define disjoint fields.
65
66
If ``!function`` is specified, the concatenated result is passed through the
67
named function, taking and returning an integral value.
68
69
-One may use ``!function`` with zero ``unnamed_fields``. This case is called
70
+One may use ``!function`` with zero ``fields``. This case is called
71
a *parameter*, and the named function is only passed the ``DisasContext``
72
and returns an integral value extracted from there.
73
74
-A field with no ``unnamed_fields`` and no ``!function`` is in error.
75
+A field with no ``fields`` and no ``!function`` is in error.
76
77
Field examples:
78
79
@@ -XXX,XX +XXX,XX @@ Field examples:
80
| %shimm8 5:s8 13:1 | expand_shimm8(sextract(i, 5, 8) << 1 | |
81
| !function=expand_shimm8 | extract(i, 13, 1)) |
82
+---------------------------+---------------------------------------------+
83
+| %sz_imm 10:2 sz:3 | expand_sz_imm(extract(i, 10, 2) << 3 | |
84
+| !function=expand_sz_imm | extract(a->sz, 0, 3)) |
85
++---------------------------+---------------------------------------------+
86
87
Argument Sets
88
=============
89
--
90
2.34.1
diff view generated by jsdifflib
New patch
1
From: Peter Maydell <peter.maydell@linaro.org>
1
2
3
To support referring to other named fields in field definitions, we
4
need to pass the str_extract() method a function which tells it how
5
to emit the code for a previously initialized named field. (In
6
Pattern::output_code() the other field will be "u.f_foo.field", and
7
in Format::output_extract() it is "a->field".)
8
9
Refactor the two callsites that currently do "output code to
10
initialize each field", and have them pass a lambda that defines how
11
to format the lvalue in each case. This is then used both in
12
emitting the LHS of the assignment and also passed down to
13
str_extract() as a new argument (unused at the moment, but will be
14
used in the following patch).
15
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
18
Message-Id: <20230523120447.728365-4-peter.maydell@linaro.org>
19
---
20
scripts/decodetree.py | 26 +++++++++++++++-----------
21
1 file changed, 15 insertions(+), 11 deletions(-)
22
23
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
24
index XXXXXXX..XXXXXXX 100644
25
--- a/scripts/decodetree.py
26
+++ b/scripts/decodetree.py
27
@@ -XXX,XX +XXX,XX @@ def __str__(self):
28
s = ''
29
return str(self.pos) + ':' + s + str(self.len)
30
31
- def str_extract(self):
32
+ def str_extract(self, lvalue_formatter):
33
global bitop_width
34
s = 's' if self.sign else ''
35
return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
36
@@ -XXX,XX +XXX,XX @@ def __init__(self, subs, mask):
37
def __str__(self):
38
return str(self.subs)
39
40
- def str_extract(self):
41
+ def str_extract(self, lvalue_formatter):
42
global bitop_width
43
ret = '0'
44
pos = 0
45
for f in reversed(self.subs):
46
- ext = f.str_extract()
47
+ ext = f.str_extract(lvalue_formatter)
48
if pos == 0:
49
ret = ext
50
else:
51
@@ -XXX,XX +XXX,XX @@ def __init__(self, value):
52
def __str__(self):
53
return str(self.value)
54
55
- def str_extract(self):
56
+ def str_extract(self, lvalue_formatter):
57
return str(self.value)
58
59
def __cmp__(self, other):
60
@@ -XXX,XX +XXX,XX @@ def __init__(self, func, base):
61
def __str__(self):
62
return self.func + '(' + str(self.base) + ')'
63
64
- def str_extract(self):
65
- return self.func + '(ctx, ' + self.base.str_extract() + ')'
66
+ def str_extract(self, lvalue_formatter):
67
+ return (self.func + '(ctx, '
68
+ + self.base.str_extract(lvalue_formatter) + ')')
69
70
def __eq__(self, other):
71
return self.func == other.func and self.base == other.base
72
@@ -XXX,XX +XXX,XX @@ def __init__(self, func):
73
def __str__(self):
74
return self.func
75
76
- def str_extract(self):
77
+ def str_extract(self, lvalue_formatter):
78
return self.func + '(ctx)'
79
80
def __eq__(self, other):
81
@@ -XXX,XX +XXX,XX @@ def __str__(self):
82
83
def str1(self, i):
84
return str_indent(i) + self.__str__()
85
+
86
+ def output_fields(self, indent, lvalue_formatter):
87
+ for n, f in self.fields.items():
88
+ output(indent, lvalue_formatter(n), ' = ',
89
+ f.str_extract(lvalue_formatter), ';\n')
90
# end General
91
92
93
@@ -XXX,XX +XXX,XX @@ def extract_name(self):
94
def output_extract(self):
95
output('static void ', self.extract_name(), '(DisasContext *ctx, ',
96
self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
97
- for n, f in self.fields.items():
98
- output(' a->', n, ' = ', f.str_extract(), ';\n')
99
+ self.output_fields(str_indent(4), lambda n: 'a->' + n)
100
output('}\n\n')
101
# end Format
102
103
@@ -XXX,XX +XXX,XX @@ def output_code(self, i, extracted, outerbits, outermask):
104
if not extracted:
105
output(ind, self.base.extract_name(),
106
'(ctx, &u.f_', arg, ', insn);\n')
107
- for n, f in self.fields.items():
108
- output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
109
+ self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
110
output(ind, 'if (', translate_prefix, '_', self.name,
111
'(ctx, &u.f_', arg, ')) return true;\n')
112
113
--
114
2.34.1
diff view generated by jsdifflib
New patch
1
From: Peter Maydell <peter.maydell@linaro.org>
1
2
3
To support named fields, we will need to be able to do a topological
4
sort (so that we ensure that we output the assignment to field A
5
before the assignment to field B if field B refers to field A by
6
name). The good news is that there is a tsort in the python standard
7
library; the bad news is that it was only added in Python 3.9.
8
9
To bridge the gap between our current minimum supported Python
10
version and 3.9, provide a local implementation that has the
11
same API as the stdlib version for the parts we care about.
12
In future when QEMU's minimum Python version requirement reaches
13
3.9 we can delete this code and replace it with an 'import' line.
14
15
The core of this implementation is based on
16
https://code.activestate.com/recipes/578272-topological-sort/
17
which is MIT-licensed.
18
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
Acked-by: Richard Henderson <richard.henderson@linaro.org>
21
Message-Id: <20230523120447.728365-5-peter.maydell@linaro.org>
22
---
23
scripts/decodetree.py | 74 +++++++++++++++++++++++++++++++++++++++++++
24
1 file changed, 74 insertions(+)
25
26
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
27
index XXXXXXX..XXXXXXX 100644
28
--- a/scripts/decodetree.py
29
+++ b/scripts/decodetree.py
30
@@ -XXX,XX +XXX,XX @@
31
re_fmt_ident = '@[a-zA-Z0-9_]*'
32
re_pat_ident = '[a-zA-Z0-9_]*'
33
34
+# Local implementation of a topological sort. We use the same API that
35
+# the Python graphlib does, so that when QEMU moves forward to a
36
+# baseline of Python 3.9 or newer this code can all be dropped and
37
+# replaced with:
38
+# from graphlib import TopologicalSorter, CycleError
39
+#
40
+# https://docs.python.org/3.9/library/graphlib.html#graphlib.TopologicalSorter
41
+#
42
+# We only implement the parts of TopologicalSorter we care about:
43
+# ts = TopologicalSorter(graph=None)
44
+# create the sorter. graph is a dictionary whose keys are
45
+# nodes and whose values are lists of the predecessors of that node.
46
+# (That is, if graph contains "A" -> ["B", "C"] then we must output
47
+# B and C before A.)
48
+# ts.static_order()
49
+# returns a list of all the nodes in sorted order, or raises CycleError
50
+# CycleError
51
+# exception raised if there are cycles in the graph. The second
52
+# element in the args attribute is a list of nodes which form a
53
+# cycle; the first and last element are the same, eg [a, b, c, a]
54
+# (Our implementation doesn't give the order correctly.)
55
+#
56
+# For our purposes we can assume that the data set is always small
57
+# (typically 10 nodes or less, actual links in the graph very rare),
58
+# so we don't need to worry about efficiency of implementation.
59
+#
60
+# The core of this implementation is from
61
+# https://code.activestate.com/recipes/578272-topological-sort/
62
+# (but updated to Python 3), and is under the MIT license.
63
+
64
+class CycleError(ValueError):
65
+ """Subclass of ValueError raised if cycles exist in the graph"""
66
+ pass
67
+
68
+class TopologicalSorter:
69
+ """Topologically sort a graph"""
70
+ def __init__(self, graph=None):
71
+ self.graph = graph
72
+
73
+ def static_order(self):
74
+ # We do the sort right here, unlike the stdlib version
75
+ from functools import reduce
76
+ data = {}
77
+ r = []
78
+
79
+ if not self.graph:
80
+ return []
81
+
82
+ # This code wants the values in the dict to be specifically sets
83
+ for k, v in self.graph.items():
84
+ data[k] = set(v)
85
+
86
+ # Find all items that don't depend on anything.
87
+ extra_items_in_deps = (reduce(set.union, data.values())
88
+ - set(data.keys()))
89
+ # Add empty dependencies where needed
90
+ data.update({item:{} for item in extra_items_in_deps})
91
+ while True:
92
+ ordered = set(item for item, dep in data.items() if not dep)
93
+ if not ordered:
94
+ break
95
+ r.extend(ordered)
96
+ data = {item: (dep - ordered)
97
+ for item, dep in data.items()
98
+ if item not in ordered}
99
+ if data:
100
+ # This doesn't give as nice results as the stdlib, which
101
+ # gives you the cycle by listing the nodes in order. Here
102
+ # we only know the nodes in the cycle but not their order.
103
+ raise CycleError(f'nodes are in a cycle', list(data.keys()))
104
+
105
+ return r
106
+# end TopologicalSorter
107
+
108
def error_with_file(file, lineno, *args):
109
"""Print an error message from file:line and args and exit."""
110
global output_file
111
--
112
2.34.1
diff view generated by jsdifflib
1
Introduce macro VX4() used for encoding Altivec instructions.
1
From: Peter Maydell <peter.maydell@linaro.org>
2
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Implement support for named fields, i.e. where one field is defined
4
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
4
in terms of another, rather than directly in terms of bits extracted
5
from the instruction.
6
7
The new method referenced_fields() on all the Field classes returns a
8
list of fields that this field references. This just passes through,
9
except for the new NamedField class.
10
11
We can then use referenced_fields() to:
12
* construct a list of 'dangling references' for a format or
13
pattern, which is the fields that the format/pattern uses but
14
doesn't define itself
15
* do a topological sort, so that we output "field = value"
16
assignments in an order that means that we assign a field before
17
we reference it in a subsequent assignment
18
* check when we output the code for a pattern whether we need to
19
fill in the format fields before or after the pattern fields, and
20
do other error checking
21
22
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
23
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
24
Message-Id: <20230523120447.728365-6-peter.maydell@linaro.org>
5
---
25
---
6
tcg/ppc/tcg-target.inc.c | 1 +
26
scripts/decodetree.py | 145 ++++++++++++++++++++++++++++++++++++++++--
7
1 file changed, 1 insertion(+)
27
1 file changed, 139 insertions(+), 6 deletions(-)
8
28
9
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
29
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
10
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/ppc/tcg-target.inc.c
31
--- a/scripts/decodetree.py
12
+++ b/tcg/ppc/tcg-target.inc.c
32
+++ b/scripts/decodetree.py
13
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
33
@@ -XXX,XX +XXX,XX @@ def str_extract(self, lvalue_formatter):
14
#define XO31(opc) (OPCD(31)|((opc)<<1))
34
s = 's' if self.sign else ''
15
#define XO58(opc) (OPCD(58)|(opc))
35
return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
16
#define XO62(opc) (OPCD(62)|(opc))
36
17
+#define VX4(opc) (OPCD(4)|(opc))
37
+ def referenced_fields(self):
18
38
+ return []
19
#define B OPCD( 18)
39
+
20
#define BC OPCD( 16)
40
def __eq__(self, other):
41
return self.sign == other.sign and self.mask == other.mask
42
43
@@ -XXX,XX +XXX,XX @@ def str_extract(self, lvalue_formatter):
44
pos += f.len
45
return ret
46
47
+ def referenced_fields(self):
48
+ l = []
49
+ for f in self.subs:
50
+ l.extend(f.referenced_fields())
51
+ return l
52
+
53
def __ne__(self, other):
54
if len(self.subs) != len(other.subs):
55
return True
56
@@ -XXX,XX +XXX,XX @@ def __str__(self):
57
def str_extract(self, lvalue_formatter):
58
return str(self.value)
59
60
+ def referenced_fields(self):
61
+ return []
62
+
63
def __cmp__(self, other):
64
return self.value - other.value
65
# end ConstField
66
@@ -XXX,XX +XXX,XX @@ def str_extract(self, lvalue_formatter):
67
return (self.func + '(ctx, '
68
+ self.base.str_extract(lvalue_formatter) + ')')
69
70
+ def referenced_fields(self):
71
+ return self.base.referenced_fields()
72
+
73
def __eq__(self, other):
74
return self.func == other.func and self.base == other.base
75
76
@@ -XXX,XX +XXX,XX @@ def __str__(self):
77
def str_extract(self, lvalue_formatter):
78
return self.func + '(ctx)'
79
80
+ def referenced_fields(self):
81
+ return []
82
+
83
def __eq__(self, other):
84
return self.func == other.func
85
86
@@ -XXX,XX +XXX,XX @@ def __ne__(self, other):
87
return not self.__eq__(other)
88
# end ParameterField
89
90
+class NamedField:
91
+ """Class representing a field already named in the pattern"""
92
+ def __init__(self, name, sign, len):
93
+ self.mask = 0
94
+ self.sign = sign
95
+ self.len = len
96
+ self.name = name
97
+
98
+ def __str__(self):
99
+ return self.name
100
+
101
+ def str_extract(self, lvalue_formatter):
102
+ global bitop_width
103
+ s = 's' if self.sign else ''
104
+ lvalue = lvalue_formatter(self.name)
105
+ return f'{s}extract{bitop_width}({lvalue}, 0, {self.len})'
106
+
107
+ def referenced_fields(self):
108
+ return [self.name]
109
+
110
+ def __eq__(self, other):
111
+ return self.name == other.name
112
+
113
+ def __ne__(self, other):
114
+ return not self.__eq__(other)
115
+# end NamedField
116
117
class Arguments:
118
"""Class representing the extracted fields of a format"""
119
@@ -XXX,XX +XXX,XX @@ def output_def(self):
120
output('} ', self.struct_name(), ';\n\n')
121
# end Arguments
122
123
-
124
class General:
125
"""Common code between instruction formats and instruction patterns"""
126
def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
127
@@ -XXX,XX +XXX,XX @@ def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
128
self.fieldmask = fldm
129
self.fields = flds
130
self.width = w
131
+ self.dangling = None
132
133
def __str__(self):
134
return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
135
@@ -XXX,XX +XXX,XX @@ def __str__(self):
136
def str1(self, i):
137
return str_indent(i) + self.__str__()
138
139
+ def dangling_references(self):
140
+ # Return a list of all named references which aren't satisfied
141
+ # directly by this format/pattern. This will be either:
142
+ # * a format referring to a field which is specified by the
143
+ # pattern(s) using it
144
+ # * a pattern referring to a field which is specified by the
145
+ # format it uses
146
+ # * a user error (referring to a field that doesn't exist at all)
147
+ if self.dangling is None:
148
+ # Compute this once and cache the answer
149
+ dangling = []
150
+ for n, f in self.fields.items():
151
+ for r in f.referenced_fields():
152
+ if r not in self.fields:
153
+ dangling.append(r)
154
+ self.dangling = dangling
155
+ return self.dangling
156
+
157
def output_fields(self, indent, lvalue_formatter):
158
+ # We use a topological sort to ensure that any use of NamedField
159
+ # comes after the initialization of the field it is referencing.
160
+ graph = {}
161
for n, f in self.fields.items():
162
- output(indent, lvalue_formatter(n), ' = ',
163
- f.str_extract(lvalue_formatter), ';\n')
164
+ refs = f.referenced_fields()
165
+ graph[n] = refs
166
+
167
+ try:
168
+ ts = TopologicalSorter(graph)
169
+ for n in ts.static_order():
170
+ # We only want to emit assignments for the keys
171
+ # in our fields list, not for anything that ends up
172
+ # in the tsort graph only because it was referenced as
173
+ # a NamedField.
174
+ try:
175
+ f = self.fields[n]
176
+ output(indent, lvalue_formatter(n), ' = ',
177
+ f.str_extract(lvalue_formatter), ';\n')
178
+ except KeyError:
179
+ pass
180
+ except CycleError as e:
181
+ # The second element of args is a list of nodes which form
182
+ # a cycle (there might be others too, but only one is reported).
183
+ # Pretty-print it to tell the user.
184
+ cycle = ' => '.join(e.args[1])
185
+ error(self.lineno, 'field definitions form a cycle: ' + cycle)
186
# end General
187
188
189
@@ -XXX,XX +XXX,XX @@ def output_code(self, i, extracted, outerbits, outermask):
190
ind = str_indent(i)
191
arg = self.base.base.name
192
output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
193
+ # We might have named references in the format that refer to fields
194
+ # in the pattern, or named references in the pattern that refer
195
+ # to fields in the format. This affects whether we extract the fields
196
+ # for the format before or after the ones for the pattern.
197
+ # For simplicity we don't allow cross references in both directions.
198
+ # This is also where we catch the syntax error of referring to
199
+ # a nonexistent field.
200
+ fmt_refs = self.base.dangling_references()
201
+ for r in fmt_refs:
202
+ if r not in self.fields:
203
+ error(self.lineno, f'format refers to undefined field {r}')
204
+ pat_refs = self.dangling_references()
205
+ for r in pat_refs:
206
+ if r not in self.base.fields:
207
+ error(self.lineno, f'pattern refers to undefined field {r}')
208
+ if pat_refs and fmt_refs:
209
+ error(self.lineno, ('pattern that uses fields defined in format '
210
+ 'cannot use format that uses fields defined '
211
+ 'in pattern'))
212
+ if fmt_refs:
213
+ # pattern fields first
214
+ self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
215
+ assert not extracted, "dangling fmt refs but it was already extracted"
216
if not extracted:
217
output(ind, self.base.extract_name(),
218
'(ctx, &u.f_', arg, ', insn);\n')
219
- self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
220
+ if not fmt_refs:
221
+ # pattern fields last
222
+ self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
223
+
224
output(ind, 'if (', translate_prefix, '_', self.name,
225
'(ctx, &u.f_', arg, ')) return true;\n')
226
227
@@ -XXX,XX +XXX,XX @@ def output_code(self, i, extracted, outerbits, outermask):
228
ind = str_indent(i)
229
230
# If we identified all nodes below have the same format,
231
- # extract the fields now.
232
- if not extracted and self.base:
233
+ # extract the fields now. But don't do it if the format relies
234
+ # on named fields from the insn pattern, as those won't have
235
+ # been initialised at this point.
236
+ if not extracted and self.base and not self.base.dangling_references():
237
output(ind, self.base.extract_name(),
238
'(ctx, &u.f_', self.base.base.name, ', insn);\n')
239
extracted = True
240
@@ -XXX,XX +XXX,XX @@ def parse_field(lineno, name, toks):
241
"""Parse one instruction field from TOKS at LINENO"""
242
global fields
243
global insnwidth
244
+ global re_C_ident
245
246
# A "simple" field will have only one entry;
247
# a "multifield" will have several.
248
@@ -XXX,XX +XXX,XX @@ def parse_field(lineno, name, toks):
249
func = func[1]
250
continue
251
252
+ if re.fullmatch(re_C_ident + ':s[0-9]+', t):
253
+ # Signed named field
254
+ subtoks = t.split(':')
255
+ n = subtoks[0]
256
+ le = int(subtoks[1])
257
+ f = NamedField(n, True, le)
258
+ subs.append(f)
259
+ width += le
260
+ continue
261
+ if re.fullmatch(re_C_ident + ':[0-9]+', t):
262
+ # Unsigned named field
263
+ subtoks = t.split(':')
264
+ n = subtoks[0]
265
+ le = int(subtoks[1])
266
+ f = NamedField(n, False, le)
267
+ subs.append(f)
268
+ width += le
269
+ continue
270
+
271
if re.fullmatch('[0-9]+:s[0-9]+', t):
272
# Signed field extract
273
subtoks = t.split(':s')
21
--
274
--
22
2.17.1
275
2.34.1
23
24
diff view generated by jsdifflib
New patch
1
From: Peter Maydell <peter.maydell@linaro.org>
1
2
3
Add some tests for various cases of named-field use, both ones that
4
should work and ones that should be diagnosed as errors.
5
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20230523120447.728365-7-peter.maydell@linaro.org>
9
---
10
tests/decode/err_field10.decode | 7 +++++++
11
tests/decode/err_field7.decode | 7 +++++++
12
tests/decode/err_field8.decode | 8 ++++++++
13
tests/decode/err_field9.decode | 14 ++++++++++++++
14
tests/decode/succ_named_field.decode | 19 +++++++++++++++++++
15
tests/decode/meson.build | 5 +++++
16
6 files changed, 60 insertions(+)
17
create mode 100644 tests/decode/err_field10.decode
18
create mode 100644 tests/decode/err_field7.decode
19
create mode 100644 tests/decode/err_field8.decode
20
create mode 100644 tests/decode/err_field9.decode
21
create mode 100644 tests/decode/succ_named_field.decode
22
23
diff --git a/tests/decode/err_field10.decode b/tests/decode/err_field10.decode
24
new file mode 100644
25
index XXXXXXX..XXXXXXX
26
--- /dev/null
27
+++ b/tests/decode/err_field10.decode
28
@@ -XXX,XX +XXX,XX @@
29
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
30
+# See the COPYING.LIB file in the top-level directory.
31
+
32
+# Diagnose formats which refer to undefined fields
33
+%field1 field2:3
34
+@fmt ........ ........ ........ ........ %field1
35
+insn 00000000 00000000 00000000 00000000 @fmt
36
diff --git a/tests/decode/err_field7.decode b/tests/decode/err_field7.decode
37
new file mode 100644
38
index XXXXXXX..XXXXXXX
39
--- /dev/null
40
+++ b/tests/decode/err_field7.decode
41
@@ -XXX,XX +XXX,XX @@
42
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
43
+# See the COPYING.LIB file in the top-level directory.
44
+
45
+# Diagnose fields whose definitions form a loop
46
+%field1 field2:3
47
+%field2 field1:4
48
+insn 00000000 00000000 00000000 00000000 %field1 %field2
49
diff --git a/tests/decode/err_field8.decode b/tests/decode/err_field8.decode
50
new file mode 100644
51
index XXXXXXX..XXXXXXX
52
--- /dev/null
53
+++ b/tests/decode/err_field8.decode
54
@@ -XXX,XX +XXX,XX @@
55
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
56
+# See the COPYING.LIB file in the top-level directory.
57
+
58
+# Diagnose patterns which refer to undefined fields
59
+&f1 f1 a
60
+%field1 field2:3
61
+@fmt ........ ........ ........ .... a:4 &f1
62
+insn 00000000 00000000 00000000 0000 .... @fmt f1=%field1
63
diff --git a/tests/decode/err_field9.decode b/tests/decode/err_field9.decode
64
new file mode 100644
65
index XXXXXXX..XXXXXXX
66
--- /dev/null
67
+++ b/tests/decode/err_field9.decode
68
@@ -XXX,XX +XXX,XX @@
69
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
70
+# See the COPYING.LIB file in the top-level directory.
71
+
72
+# Diagnose fields where the format refers to a field defined in the
73
+# pattern and the pattern refers to a field defined in the format.
74
+# This is theoretically not impossible to implement, but is not
75
+# supported by the script at this time.
76
+&abcd a b c d
77
+%refa a:3
78
+%refc c:4
79
+# Format defines 'c' and sets 'b' to an indirect ref to 'a'
80
+@fmt ........ ........ ........ c:8 &abcd b=%refa
81
+# Pattern defines 'a' and sets 'd' to an indirect ref to 'c'
82
+insn 00000000 00000000 00000000 ........ @fmt d=%refc a=6
83
diff --git a/tests/decode/succ_named_field.decode b/tests/decode/succ_named_field.decode
84
new file mode 100644
85
index XXXXXXX..XXXXXXX
86
--- /dev/null
87
+++ b/tests/decode/succ_named_field.decode
88
@@ -XXX,XX +XXX,XX @@
89
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
90
+# See the COPYING.LIB file in the top-level directory.
91
+
92
+# field using a named_field
93
+%imm_sz    8:8 sz:3
94
+insn 00000000 00000000 ........ 00000000 imm_sz=%imm_sz sz=1
95
+
96
+# Ditto, via a format. Here a field in the format
97
+# references a named field defined in the insn pattern:
98
+&imm_a imm alpha
99
+%foo 0:16 alpha:4
100
+@foo 00000001 ........ ........ ........ &imm_a imm=%foo
101
+i1 ........ 00000000 ........ ........ @foo alpha=1
102
+i2 ........ 00000001 ........ ........ @foo alpha=2
103
+
104
+# Here the named field is defined in the format and referenced
105
+# from the insn pattern:
106
+@bar 00000010 ........ ........ ........ &imm_a alpha=4
107
+i3 ........ 00000000 ........ ........ @bar imm=%foo
108
diff --git a/tests/decode/meson.build b/tests/decode/meson.build
109
index XXXXXXX..XXXXXXX 100644
110
--- a/tests/decode/meson.build
111
+++ b/tests/decode/meson.build
112
@@ -XXX,XX +XXX,XX @@ err_tests = [
113
'err_field4.decode',
114
'err_field5.decode',
115
'err_field6.decode',
116
+ 'err_field7.decode',
117
+ 'err_field8.decode',
118
+ 'err_field9.decode',
119
+ 'err_field10.decode',
120
'err_init1.decode',
121
'err_init2.decode',
122
'err_init3.decode',
123
@@ -XXX,XX +XXX,XX @@ succ_tests = [
124
'succ_argset_type1.decode',
125
'succ_function.decode',
126
'succ_ident1.decode',
127
+ 'succ_named_field.decode',
128
'succ_pattern_group_nest1.decode',
129
'succ_pattern_group_nest2.decode',
130
'succ_pattern_group_nest3.decode',
131
--
132
2.34.1
diff view generated by jsdifflib