1
The following changes since commit 2d894e48362ad2a576fca929dcca1787f43a8af6:
1
The following changes since commit 9e5319ca52a5b9e84d55ad9c36e2c0b317a122bb:
2
2
3
Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging (2018-12-13 17:50:45 +0000)
3
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging (2019-10-04 18:32:34 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20181213
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20191013
8
8
9
for you to fetch changes up to 99f70ba5b6b4566509b2069a8d29c6686b8115de:
9
for you to fetch changes up to d2f86bba6931388e275e8eb4ccd1dbcc7cae6328:
10
10
11
xxhash: match output against the original xxhash32 (2018-12-13 18:56:11 -0600)
11
cpus: kick all vCPUs when running thread=single (2019-10-07 14:08:58 -0400)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
- Remove retranslation remenents
14
Host vector support for tcg/ppc.
15
- Return success from patch_reloc
15
Fix thread=single cpu kicking.
16
- Preserve 32-bit values as zero-extended on x86_64
17
- Make bswap during memory ops as optional
18
- Cleanup xxhash
19
16
20
----------------------------------------------------------------
17
----------------------------------------------------------------
21
Alistair Francis (1):
18
Alex Bennée (1):
22
tcg/mips: Improve the add2/sub2 command to use TCG_TARGET_REG_BITS
19
cpus: kick all vCPUs when running thread=single
23
20
24
Emilio G. Cota (5):
21
Richard Henderson (22):
25
tcg: Drop nargs from tcg_op_insert_{before,after}
22
tcg/ppc: Introduce Altivec registers
26
qht-bench: document -p flag
23
tcg/ppc: Introduce macro VX4()
27
exec: introduce qemu_xxhash{2,4,5,6,7}
24
tcg/ppc: Introduce macros VRT(), VRA(), VRB(), VRC()
28
include: move exec/tb-hash-xx.h to qemu/xxhash.h
25
tcg/ppc: Create TCGPowerISA and have_isa
29
xxhash: match output against the original xxhash32
26
tcg/ppc: Replace HAVE_ISA_2_06
27
tcg/ppc: Replace HAVE_ISEL macro with a variable
28
tcg/ppc: Enable tcg backend vector compilation
29
tcg/ppc: Add support for load/store/logic/comparison
30
tcg/ppc: Add support for vector maximum/minimum
31
tcg/ppc: Add support for vector add/subtract
32
tcg/ppc: Add support for vector saturated add/subtract
33
tcg/ppc: Support vector shift by immediate
34
tcg/ppc: Support vector multiply
35
tcg/ppc: Support vector dup2
36
tcg/ppc: Enable Altivec detection
37
tcg/ppc: Update vector support for VSX
38
tcg/ppc: Update vector support for v2.07 Altivec
39
tcg/ppc: Update vector support for v2.07 VSX
40
tcg/ppc: Update vector support for v2.07 FP
41
tcg/ppc: Update vector support for v3.00 Altivec
42
tcg/ppc: Update vector support for v3.00 load/store
43
tcg/ppc: Update vector support for v3.00 dup/dupi
30
44
31
Richard Henderson (26):
45
tcg/ppc/tcg-target.h | 51 ++-
32
tcg/i386: Always use %ebp for TCG_AREG0
46
tcg/ppc/tcg-target.opc.h | 13 +
33
tcg/i386: Move TCG_REG_CALL_STACK from define to enum
47
cpus.c | 24 +-
34
tcg/aarch64: Remove reloc_pc26_atomic
48
tcg/ppc/tcg-target.inc.c | 1118 ++++++++++++++++++++++++++++++++++++++++++----
35
tcg/aarch64: Fold away "noaddr" branch routines
49
4 files changed, 1119 insertions(+), 87 deletions(-)
36
tcg/arm: Remove reloc_pc24_atomic
50
create mode 100644 tcg/ppc/tcg-target.opc.h
37
tcg/arm: Fold away "noaddr" branch routines
38
tcg/ppc: Fold away "noaddr" branch routines
39
tcg/s390: Remove retranslation code
40
tcg/sparc: Remove retranslation code
41
tcg/mips: Remove retranslation code
42
tcg: Return success from patch_reloc
43
tcg/i386: Return false on failure from patch_reloc
44
tcg/aarch64: Return false on failure from patch_reloc
45
tcg/arm: Return false on failure from patch_reloc
46
tcg/ppc: Return false on failure from patch_reloc
47
tcg/s390x: Return false on failure from patch_reloc
48
tcg/i386: Propagate is64 to tcg_out_qemu_ld_direct
49
tcg/i386: Propagate is64 to tcg_out_qemu_ld_slow_path
50
tcg/i386: Implement INDEX_op_extr{lh}_i64_i32 for 32-bit guests
51
tcg/i386: Assume 32-bit values are zero-extended
52
tcg/i386: Precompute all guest_base parameters
53
tcg/i386: Add setup_guest_base_seg for FreeBSD
54
tcg: Clean up generic bswap32
55
tcg: Clean up generic bswap64
56
tcg/optimize: Optimize bswap
57
tcg: Add TCG_TARGET_HAS_MEMORY_BSWAP
58
51
59
include/exec/tb-hash.h | 4 +-
60
include/{exec/tb-hash-xx.h => qemu/xxhash.h} | 47 ++++--
61
tcg/aarch64/tcg-target.h | 1 +
62
tcg/arm/tcg-target.h | 1 +
63
tcg/i386/tcg-target.h | 17 +--
64
tcg/mips/tcg-target.h | 1 +
65
tcg/ppc/tcg-target.h | 1 +
66
tcg/s390/tcg-target.h | 1 +
67
tcg/sparc/tcg-target.h | 1 +
68
tcg/tcg.h | 4 +-
69
tcg/tci/tcg-target.h | 2 +
70
tcg/aarch64/tcg-target.inc.c | 71 +++------
71
tcg/arm/tcg-target.inc.c | 55 +++----
72
tcg/i386/tcg-target.inc.c | 208 ++++++++++++--------------
73
tcg/mips/tcg-target.inc.c | 12 +-
74
tcg/optimize.c | 16 +-
75
tcg/ppc/tcg-target.inc.c | 60 ++++----
76
tcg/s390/tcg-target.inc.c | 45 +++---
77
tcg/sparc/tcg-target.inc.c | 13 +-
78
tcg/tcg-op.c | 215 ++++++++++++++++++++-------
79
tcg/tcg.c | 18 +--
80
tcg/tci/tcg-target.inc.c | 3 +-
81
tests/qht-bench.c | 5 +-
82
util/qsp.c | 14 +-
83
24 files changed, 452 insertions(+), 363 deletions(-)
84
rename include/{exec/tb-hash-xx.h => qemu/xxhash.h} (73%)
85
diff view generated by jsdifflib
Deleted patch
1
For x86_64, this can remove a REX prefix resulting in smaller code
2
when manipulating globals of type i32, as we move them between backing
3
store via cpu_env, aka TCG_AREG0.
4
1
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Emilio G. Cota <cota@braap.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/i386/tcg-target.h | 8 ++------
10
1 file changed, 2 insertions(+), 6 deletions(-)
11
12
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/i386/tcg-target.h
15
+++ b/tcg/i386/tcg-target.h
16
@@ -XXX,XX +XXX,XX @@ typedef enum {
17
TCG_REG_RBP = TCG_REG_EBP,
18
TCG_REG_RSI = TCG_REG_ESI,
19
TCG_REG_RDI = TCG_REG_EDI,
20
+
21
+ TCG_AREG0 = TCG_REG_EBP,
22
} TCGReg;
23
24
/* used for function call generation */
25
@@ -XXX,XX +XXX,XX @@ extern bool have_avx2;
26
#define TCG_TARGET_extract_i64_valid(ofs, len) \
27
(((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
28
29
-#if TCG_TARGET_REG_BITS == 64
30
-# define TCG_AREG0 TCG_REG_R14
31
-#else
32
-# define TCG_AREG0 TCG_REG_EBP
33
-#endif
34
-
35
static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
36
{
37
}
38
--
39
2.17.2
40
41
diff view generated by jsdifflib
1
This preserves the invariant that all TCG_TYPE_I32 values are
1
Altivec supports 32 128-bit vector registers, whose names are
2
zero-extended in the 64-bit host register.
2
by convention v0 through v31.
3
3
4
Reviewed-by: Emilio G. Cota <cota@braap.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
6
---
7
tcg/i386/tcg-target.h | 5 +++--
7
tcg/ppc/tcg-target.h | 11 ++++-
8
tcg/i386/tcg-target.inc.c | 6 ++++++
8
tcg/ppc/tcg-target.inc.c | 88 +++++++++++++++++++++++++---------------
9
2 files changed, 9 insertions(+), 2 deletions(-)
9
2 files changed, 65 insertions(+), 34 deletions(-)
10
10
11
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/i386/tcg-target.h
13
--- a/tcg/ppc/tcg-target.h
14
+++ b/tcg/i386/tcg-target.h
14
+++ b/tcg/ppc/tcg-target.h
15
@@ -XXX,XX +XXX,XX @@ extern bool have_avx2;
15
@@ -XXX,XX +XXX,XX @@
16
#define TCG_TARGET_HAS_direct_jump 1
16
# define TCG_TARGET_REG_BITS 32
17
17
#endif
18
#if TCG_TARGET_REG_BITS == 64
18
19
-#define TCG_TARGET_HAS_extrl_i64_i32 0
19
-#define TCG_TARGET_NB_REGS 32
20
-#define TCG_TARGET_HAS_extrh_i64_i32 0
20
+#define TCG_TARGET_NB_REGS 64
21
+/* Keep target addresses zero-extended in a register. */
21
#define TCG_TARGET_INSN_UNIT_SIZE 4
22
+#define TCG_TARGET_HAS_extrl_i64_i32 (TARGET_LONG_BITS == 32)
22
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
23
+#define TCG_TARGET_HAS_extrh_i64_i32 (TARGET_LONG_BITS == 32)
23
24
#define TCG_TARGET_HAS_div2_i64 1
24
@@ -XXX,XX +XXX,XX @@ typedef enum {
25
#define TCG_TARGET_HAS_rot_i64 1
25
TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27,
26
#define TCG_TARGET_HAS_ext8s_i64 1
26
TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31,
27
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
27
28
+ TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
29
+ TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
30
+ TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
31
+ TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
32
+ TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
33
+ TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
34
+ TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
35
+ TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
36
+
37
TCG_REG_CALL_STACK = TCG_REG_R1,
38
TCG_AREG0 = TCG_REG_R27
39
} TCGReg;
40
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
28
index XXXXXXX..XXXXXXX 100644
41
index XXXXXXX..XXXXXXX 100644
29
--- a/tcg/i386/tcg-target.inc.c
42
--- a/tcg/ppc/tcg-target.inc.c
30
+++ b/tcg/i386/tcg-target.inc.c
43
+++ b/tcg/ppc/tcg-target.inc.c
31
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
44
@@ -XXX,XX +XXX,XX @@
32
break;
45
# define TCG_REG_TMP1 TCG_REG_R12
33
case INDEX_op_extu_i32_i64:
34
case INDEX_op_ext32u_i64:
35
+ case INDEX_op_extrl_i64_i32:
36
tcg_out_ext32u(s, a0, a1);
37
break;
38
case INDEX_op_ext_i32_i64:
39
case INDEX_op_ext32s_i64:
40
tcg_out_ext32s(s, a0, a1);
41
break;
42
+ case INDEX_op_extrh_i64_i32:
43
+ tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32);
44
+ break;
45
#endif
46
#endif
46
47
47
OP_32_64(deposit):
48
+#define TCG_VEC_TMP1 TCG_REG_V0
48
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
49
+#define TCG_VEC_TMP2 TCG_REG_V1
49
case INDEX_op_neg_i64:
50
+
50
case INDEX_op_not_i32:
51
#define TCG_REG_TB TCG_REG_R31
51
case INDEX_op_not_i64:
52
#define USE_REG_TB (TCG_TARGET_REG_BITS == 64)
52
+ case INDEX_op_extrh_i64_i32:
53
53
return &r_0;
54
@@ -XXX,XX +XXX,XX @@ bool have_isa_3_00;
54
55
#endif
55
case INDEX_op_ext8s_i32:
56
56
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
57
#ifdef CONFIG_DEBUG_TCG
57
case INDEX_op_ext32u_i64:
58
-static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
58
case INDEX_op_ext_i32_i64:
59
- "r0",
59
case INDEX_op_extu_i32_i64:
60
- "r1",
60
+ case INDEX_op_extrl_i64_i32:
61
- "r2",
61
case INDEX_op_extract_i32:
62
- "r3",
62
case INDEX_op_extract_i64:
63
- "r4",
63
case INDEX_op_sextract_i32:
64
- "r5",
65
- "r6",
66
- "r7",
67
- "r8",
68
- "r9",
69
- "r10",
70
- "r11",
71
- "r12",
72
- "r13",
73
- "r14",
74
- "r15",
75
- "r16",
76
- "r17",
77
- "r18",
78
- "r19",
79
- "r20",
80
- "r21",
81
- "r22",
82
- "r23",
83
- "r24",
84
- "r25",
85
- "r26",
86
- "r27",
87
- "r28",
88
- "r29",
89
- "r30",
90
- "r31"
91
+static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
92
+ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
93
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
94
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
95
+ "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
96
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
97
+ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
98
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
99
+ "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
100
};
101
#endif
102
103
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
104
TCG_REG_R5,
105
TCG_REG_R4,
106
TCG_REG_R3,
107
+
108
+ /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
109
+ TCG_REG_V2, /* call clobbered, vectors */
110
+ TCG_REG_V3,
111
+ TCG_REG_V4,
112
+ TCG_REG_V5,
113
+ TCG_REG_V6,
114
+ TCG_REG_V7,
115
+ TCG_REG_V8,
116
+ TCG_REG_V9,
117
+ TCG_REG_V10,
118
+ TCG_REG_V11,
119
+ TCG_REG_V12,
120
+ TCG_REG_V13,
121
+ TCG_REG_V14,
122
+ TCG_REG_V15,
123
+ TCG_REG_V16,
124
+ TCG_REG_V17,
125
+ TCG_REG_V18,
126
+ TCG_REG_V19,
127
};
128
129
static const int tcg_target_call_iarg_regs[] = {
130
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
131
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
132
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
133
134
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
135
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
136
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
137
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
138
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
139
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
140
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
141
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
142
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
143
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
144
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
145
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
146
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
147
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
148
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
149
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
150
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
151
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
152
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
153
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
154
+
155
s->reserved_regs = 0;
156
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
157
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
158
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
159
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
160
#endif
161
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
162
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
163
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
164
if (USE_REG_TB) {
165
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */
166
}
64
--
167
--
65
2.17.2
168
2.17.1
66
169
67
170
diff view generated by jsdifflib
1
From: "Emilio G. Cota" <cota@braap.org>
1
Introduce macro VX4() used for encoding Altivec instructions.
2
2
3
Which we forgot to do in bd224fce60 ("qht-bench: add -p flag
4
to precompute hash values", 2018-09-26).
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Emilio G. Cota <cota@braap.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
9
---
5
---
10
tests/qht-bench.c | 1 +
6
tcg/ppc/tcg-target.inc.c | 1 +
11
1 file changed, 1 insertion(+)
7
1 file changed, 1 insertion(+)
12
8
13
diff --git a/tests/qht-bench.c b/tests/qht-bench.c
9
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
14
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
15
--- a/tests/qht-bench.c
11
--- a/tcg/ppc/tcg-target.inc.c
16
+++ b/tests/qht-bench.c
12
+++ b/tcg/ppc/tcg-target.inc.c
17
@@ -XXX,XX +XXX,XX @@ static const char commands_string[] =
13
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
18
" -n = number of threads\n"
14
#define XO31(opc) (OPCD(31)|((opc)<<1))
19
"\n"
15
#define XO58(opc) (OPCD(58)|(opc))
20
" -o = offset at which keys start\n"
16
#define XO62(opc) (OPCD(62)|(opc))
21
+ " -p = precompute hashes\n"
17
+#define VX4(opc) (OPCD(4)|(opc))
22
"\n"
18
23
" -g = set -s,-k,-K,-l,-r to the same value\n"
19
#define B OPCD( 18)
24
" -s = initial size hint\n"
20
#define BC OPCD( 16)
25
--
21
--
26
2.17.2
22
2.17.1
27
23
28
24
diff view generated by jsdifflib
1
From: "Emilio G. Cota" <cota@braap.org>
1
Introduce macros VRT(), VRA(), VRB(), VRC() used for encoding
2
elements of Altivec instructions.
2
3
3
Change the order in which we extract a/b and c/d to
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
match the output of the upstream xxhash32.
5
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
7
tcg/ppc/tcg-target.inc.c | 5 +++++
8
1 file changed, 5 insertions(+)
5
9
6
Tested with:
10
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
7
https://github.com/cota/xxhash/tree/qemu
8
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
10
Tested-by: Alex Bennée <alex.bennee@linaro.org>
11
Signed-off-by: Emilio G. Cota <cota@braap.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
include/qemu/xxhash.h | 8 ++++----
15
1 file changed, 4 insertions(+), 4 deletions(-)
16
17
diff --git a/include/qemu/xxhash.h b/include/qemu/xxhash.h
18
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
19
--- a/include/qemu/xxhash.h
12
--- a/tcg/ppc/tcg-target.inc.c
20
+++ b/include/qemu/xxhash.h
13
+++ b/tcg/ppc/tcg-target.inc.c
21
@@ -XXX,XX +XXX,XX @@ qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
14
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
22
uint32_t v2 = QEMU_XXHASH_SEED + PRIME32_2;
15
#define MB64(b) ((b)<<5)
23
uint32_t v3 = QEMU_XXHASH_SEED + 0;
16
#define FXM(b) (1 << (19 - (b)))
24
uint32_t v4 = QEMU_XXHASH_SEED - PRIME32_1;
17
25
- uint32_t a = ab >> 32;
18
+#define VRT(r) (((r) & 31) << 21)
26
- uint32_t b = ab;
19
+#define VRA(r) (((r) & 31) << 16)
27
- uint32_t c = cd >> 32;
20
+#define VRB(r) (((r) & 31) << 11)
28
- uint32_t d = cd;
21
+#define VRC(r) (((r) & 31) << 6)
29
+ uint32_t a = ab;
22
+
30
+ uint32_t b = ab >> 32;
23
#define LK 1
31
+ uint32_t c = cd;
24
32
+ uint32_t d = cd >> 32;
25
#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
33
uint32_t h32;
34
35
v1 += a * PRIME32_2;
36
--
26
--
37
2.17.2
27
2.17.1
38
28
39
29
diff view generated by jsdifflib
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
1
Introduce an enum to hold base < 2.06 < 3.00. Use macros to
2
Reviewed-by: Emilio G. Cota <cota@braap.org>
2
preserve the existing have_isa_2_06 and have_isa_3_00 predicates.
3
4
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
6
---
5
tcg/i386/tcg-target.h | 2 +-
7
tcg/ppc/tcg-target.h | 12 ++++++++++--
6
1 file changed, 1 insertion(+), 1 deletion(-)
8
tcg/ppc/tcg-target.inc.c | 8 ++++----
9
2 files changed, 14 insertions(+), 6 deletions(-)
7
10
8
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
9
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/i386/tcg-target.h
13
--- a/tcg/ppc/tcg-target.h
11
+++ b/tcg/i386/tcg-target.h
14
+++ b/tcg/ppc/tcg-target.h
12
@@ -XXX,XX +XXX,XX @@ typedef enum {
15
@@ -XXX,XX +XXX,XX @@ typedef enum {
13
TCG_REG_RDI = TCG_REG_EDI,
16
TCG_AREG0 = TCG_REG_R27
14
15
TCG_AREG0 = TCG_REG_EBP,
16
+ TCG_REG_CALL_STACK = TCG_REG_ESP
17
} TCGReg;
17
} TCGReg;
18
18
19
/* used for function call generation */
19
-extern bool have_isa_2_06;
20
-#define TCG_REG_CALL_STACK TCG_REG_ESP
20
-extern bool have_isa_3_00;
21
#define TCG_TARGET_STACK_ALIGN 16
21
+typedef enum {
22
#if defined(_WIN64)
22
+ tcg_isa_base,
23
#define TCG_TARGET_CALL_STACK_OFFSET 32
23
+ tcg_isa_2_06,
24
+ tcg_isa_3_00,
25
+} TCGPowerISA;
26
+
27
+extern TCGPowerISA have_isa;
28
+
29
+#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
30
+#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
31
32
/* optional instructions automatically implemented */
33
#define TCG_TARGET_HAS_ext8u_i32 0 /* andi */
34
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/tcg/ppc/tcg-target.inc.c
37
+++ b/tcg/ppc/tcg-target.inc.c
38
@@ -XXX,XX +XXX,XX @@
39
40
static tcg_insn_unit *tb_ret_addr;
41
42
-bool have_isa_2_06;
43
-bool have_isa_3_00;
44
+TCGPowerISA have_isa;
45
46
#define HAVE_ISA_2_06 have_isa_2_06
47
#define HAVE_ISEL have_isa_2_06
48
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
49
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
50
unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
51
52
+ have_isa = tcg_isa_base;
53
if (hwcap & PPC_FEATURE_ARCH_2_06) {
54
- have_isa_2_06 = true;
55
+ have_isa = tcg_isa_2_06;
56
}
57
#ifdef PPC_FEATURE2_ARCH_3_00
58
if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
59
- have_isa_3_00 = true;
60
+ have_isa = tcg_isa_3_00;
61
}
62
#endif
63
24
--
64
--
25
2.17.2
65
2.17.1
26
66
27
67
diff view generated by jsdifflib
Deleted patch
1
It is unused since b68686bd4bfeb70040b4099df993dfa0b4f37b03.
2
1
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/aarch64/tcg-target.inc.c | 12 ------------
7
1 file changed, 12 deletions(-)
8
9
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/aarch64/tcg-target.inc.c
12
+++ b/tcg/aarch64/tcg-target.inc.c
13
@@ -XXX,XX +XXX,XX @@ static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
14
*code_ptr = deposit32(*code_ptr, 0, 26, offset);
15
}
16
17
-static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr,
18
- tcg_insn_unit *target)
19
-{
20
- ptrdiff_t offset = target - code_ptr;
21
- tcg_insn_unit insn;
22
- tcg_debug_assert(offset == sextract64(offset, 0, 26));
23
- /* read instruction, mask away previous PC_REL26 parameter contents,
24
- set the proper offset, then write back the instruction. */
25
- insn = atomic_read(code_ptr);
26
- atomic_set(code_ptr, deposit32(insn, 0, 26, offset));
27
-}
28
-
29
static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
30
{
31
ptrdiff_t offset = target - code_ptr;
32
--
33
2.17.2
34
35
diff view generated by jsdifflib
Deleted patch
1
There are one use apiece for these. There is no longer a need for
2
preserving branch offset operands, as we no longer re-translate.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/aarch64/tcg-target.inc.c | 21 ++-------------------
8
1 file changed, 2 insertions(+), 19 deletions(-)
9
10
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/aarch64/tcg-target.inc.c
13
+++ b/tcg/aarch64/tcg-target.inc.c
14
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
15
}
16
}
17
18
-static inline void tcg_out_goto_noaddr(TCGContext *s)
19
-{
20
- /* We pay attention here to not modify the branch target by reading from
21
- the buffer. This ensure that caches and memory are kept coherent during
22
- retranslation. Mask away possible garbage in the high bits for the
23
- first translation, while keeping the offset bits for retranslation. */
24
- uint32_t old = tcg_in32(s);
25
- tcg_out_insn(s, 3206, B, old);
26
-}
27
-
28
-static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
29
-{
30
- /* See comments in tcg_out_goto_noaddr. */
31
- uint32_t old = tcg_in32(s) >> 5;
32
- tcg_out_insn(s, 3202, B_C, c, old);
33
-}
34
-
35
static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
36
{
37
tcg_out_insn(s, 3207, BLR, reg);
38
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
39
{
40
if (!l->has_value) {
41
tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
42
- tcg_out_goto_noaddr(s);
43
+ tcg_out_insn(s, 3206, B, 0);
44
} else {
45
tcg_out_goto(s, l->u.value_ptr);
46
}
47
@@ -XXX,XX +XXX,XX @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
48
49
/* If not equal, we jump to the slow path. */
50
*label_ptr = s->code_ptr;
51
- tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
52
+ tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
53
}
54
55
#endif /* CONFIG_SOFTMMU */
56
--
57
2.17.2
58
59
diff view generated by jsdifflib
Deleted patch
1
It is unused since 3fb53fb4d12f2e7833bd1659e6013237b130ef20.
2
1
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/arm/tcg-target.inc.c | 8 --------
7
1 file changed, 8 deletions(-)
8
9
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/arm/tcg-target.inc.c
12
+++ b/tcg/arm/tcg-target.inc.c
13
@@ -XXX,XX +XXX,XX @@ static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
14
*code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
15
}
16
17
-static inline void reloc_pc24_atomic(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
18
-{
19
- ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
20
- tcg_insn_unit insn = atomic_read(code_ptr);
21
- tcg_debug_assert(offset == sextract32(offset, 0, 24));
22
- atomic_set(code_ptr, deposit32(insn, 0, 24, offset));
23
-}
24
-
25
static void patch_reloc(tcg_insn_unit *code_ptr, int type,
26
intptr_t value, intptr_t addend)
27
{
28
--
29
2.17.2
30
31
diff view generated by jsdifflib
Deleted patch
1
There are one use apiece for these. There is no longer a need for
2
preserving branch offset operands, as we no longer re-translate.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/arm/tcg-target.inc.c | 22 +++-------------------
8
1 file changed, 3 insertions(+), 19 deletions(-)
9
10
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/arm/tcg-target.inc.c
13
+++ b/tcg/arm/tcg-target.inc.c
14
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
15
(((offset - 8) >> 2) & 0x00ffffff));
16
}
17
18
-static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
19
-{
20
- /* We pay attention here to not modify the branch target by masking
21
- the corresponding bytes. This ensure that caches and memory are
22
- kept coherent during retranslation. */
23
- tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a));
24
-}
25
-
26
-static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
27
-{
28
- /* We pay attention here to not modify the branch target by masking
29
- the corresponding bytes. This ensure that caches and memory are
30
- kept coherent during retranslation. */
31
- tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b));
32
-}
33
-
34
static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
35
{
36
tcg_out32(s, (cond << 28) | 0x0b000000 |
37
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
38
tcg_out_goto(s, cond, l->u.value_ptr);
39
} else {
40
tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
41
- tcg_out_b_noaddr(s, cond);
42
+ tcg_out_b(s, cond, 0);
43
}
44
}
45
46
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
47
/* This a conditional BL only to load a pointer within this opcode into LR
48
for the slow path. We will not be using the value for a tail call. */
49
label_ptr = s->code_ptr;
50
- tcg_out_bl_noaddr(s, COND_NE);
51
+ tcg_out_bl(s, COND_NE, 0);
52
53
tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
54
55
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
56
57
/* The conditional call must come last, as we're going to return here. */
58
label_ptr = s->code_ptr;
59
- tcg_out_bl_noaddr(s, COND_NE);
60
+ tcg_out_bl(s, COND_NE, 0);
61
62
add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
63
s->code_ptr, label_ptr);
64
--
65
2.17.2
66
67
diff view generated by jsdifflib
1
From: "Emilio G. Cota" <cota@braap.org>
1
This is identical to have_isa_2_06, so replace it.
2
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
4
Signed-off-by: Emilio G. Cota <cota@braap.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
include/exec/tb-hash.h | 2 +-
6
tcg/ppc/tcg-target.inc.c | 5 ++---
8
include/{exec/tb-hash-xx.h => qemu/xxhash.h} | 6 +++---
7
1 file changed, 2 insertions(+), 3 deletions(-)
9
tests/qht-bench.c | 2 +-
10
util/qsp.c | 2 +-
11
4 files changed, 6 insertions(+), 6 deletions(-)
12
rename include/{exec/tb-hash-xx.h => qemu/xxhash.h} (97%)
13
8
14
diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
9
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
15
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
16
--- a/include/exec/tb-hash.h
11
--- a/tcg/ppc/tcg-target.inc.c
17
+++ b/include/exec/tb-hash.h
12
+++ b/tcg/ppc/tcg-target.inc.c
18
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr;
19
#ifndef EXEC_TB_HASH_H
14
20
#define EXEC_TB_HASH_H
15
TCGPowerISA have_isa;
21
16
22
-#include "exec/tb-hash-xx.h"
17
-#define HAVE_ISA_2_06 have_isa_2_06
23
+#include "qemu/xxhash.h"
18
#define HAVE_ISEL have_isa_2_06
24
19
25
#ifdef CONFIG_SOFTMMU
20
#ifndef CONFIG_SOFTMMU
26
21
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
27
diff --git a/include/exec/tb-hash-xx.h b/include/qemu/xxhash.h
22
}
28
similarity index 97%
23
} else {
29
rename from include/exec/tb-hash-xx.h
24
uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
30
rename to include/qemu/xxhash.h
25
- if (!HAVE_ISA_2_06 && insn == LDBRX) {
31
index XXXXXXX..XXXXXXX 100644
26
+ if (!have_isa_2_06 && insn == LDBRX) {
32
--- a/include/exec/tb-hash-xx.h
27
tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
33
+++ b/include/qemu/xxhash.h
28
tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
34
@@ -XXX,XX +XXX,XX @@
29
tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
35
* - xxHash source repository : https://github.com/Cyan4973/xxHash
30
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
36
*/
31
}
37
32
} else {
38
-#ifndef EXEC_TB_HASH_XX_H
33
uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
39
-#define EXEC_TB_HASH_XX_H
34
- if (!HAVE_ISA_2_06 && insn == STDBRX) {
40
+#ifndef QEMU_XXHASH_H
35
+ if (!have_isa_2_06 && insn == STDBRX) {
41
+#define QEMU_XXHASH_H
36
tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
42
37
tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
43
#include "qemu/bitops.h"
38
tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
44
45
@@ -XXX,XX +XXX,XX @@ static inline uint32_t qemu_xxhash6(uint64_t ab, uint64_t cd, uint32_t e,
46
return qemu_xxhash7(ab, cd, e, f, 0);
47
}
48
49
-#endif /* EXEC_TB_HASH_XX_H */
50
+#endif /* QEMU_XXHASH_H */
51
diff --git a/tests/qht-bench.c b/tests/qht-bench.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/tests/qht-bench.c
54
+++ b/tests/qht-bench.c
55
@@ -XXX,XX +XXX,XX @@
56
#include "qemu/atomic.h"
57
#include "qemu/qht.h"
58
#include "qemu/rcu.h"
59
-#include "exec/tb-hash-xx.h"
60
+#include "qemu/xxhash.h"
61
62
struct thread_stats {
63
size_t rd;
64
diff --git a/util/qsp.c b/util/qsp.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/util/qsp.c
67
+++ b/util/qsp.c
68
@@ -XXX,XX +XXX,XX @@
69
#include "qemu/timer.h"
70
#include "qemu/qht.h"
71
#include "qemu/rcu.h"
72
-#include "exec/tb-hash-xx.h"
73
+#include "qemu/xxhash.h"
74
75
enum QSPType {
76
QSP_MUTEX,
77
--
39
--
78
2.17.2
40
2.17.1
79
41
80
42
diff view generated by jsdifflib
1
This helps preserve the invariant that all TCG_TYPE_I32 values
1
Previously we've been hard-coding knowledge that Power7 has ISEL, but
2
are stored zero-extended in the 64-bit host registers.
2
it was an optional instruction before that. Use the AT_HWCAP2 bit,
3
when present, to properly determine support.
3
4
4
Reviewed-by: Emilio G. Cota <cota@braap.org>
5
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
tcg/i386/tcg-target.inc.c | 13 +++++++------
8
tcg/ppc/tcg-target.inc.c | 17 ++++++++++++-----
8
1 file changed, 7 insertions(+), 6 deletions(-)
9
1 file changed, 12 insertions(+), 5 deletions(-)
9
10
10
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
11
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/i386/tcg-target.inc.c
13
--- a/tcg/ppc/tcg-target.inc.c
13
+++ b/tcg/i386/tcg-target.inc.c
14
+++ b/tcg/ppc/tcg-target.inc.c
14
@@ -XXX,XX +XXX,XX @@ static inline void setup_guest_base_seg(void) { }
15
@@ -XXX,XX +XXX,XX @@
15
16
static tcg_insn_unit *tb_ret_addr;
16
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
17
17
TCGReg base, int index, intptr_t ofs,
18
TCGPowerISA have_isa;
18
- int seg, TCGMemOp memop)
19
-
19
+ int seg, bool is64, TCGMemOp memop)
20
-#define HAVE_ISEL have_isa_2_06
20
{
21
+static bool have_isel;
21
const TCGMemOp real_bswap = memop & MO_BSWAP;
22
22
TCGMemOp bswap = real_bswap;
23
#ifndef CONFIG_SOFTMMU
23
+ int rexw = is64 * P_REXW;
24
#define TCG_GUEST_BASE_REG 30
24
int movop = OPC_MOVL_GvEv;
25
@@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
25
26
/* If we have ISEL, we can implement everything with 3 or 4 insns.
26
if (have_movbe && real_bswap) {
27
All other cases below are also at least 3 insns, so speed up the
27
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
28
code generator by not considering them and always using ISEL. */
28
base, index, 0, ofs);
29
- if (HAVE_ISEL) {
29
break;
30
+ if (have_isel) {
30
case MO_SB:
31
int isel, tab;
31
- tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
32
32
+ tcg_out_modrm_sib_offset(s, OPC_MOVSBL + rexw + seg, datalo,
33
tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
33
base, index, 0, ofs);
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
34
break;
35
35
case MO_UW:
36
tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
36
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
37
37
base, index, 0, ofs);
38
- if (HAVE_ISEL) {
38
tcg_out_rolw_8(s, datalo);
39
+ if (have_isel) {
39
}
40
int isel = tcg_to_isel[cond];
40
- tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
41
41
+ tcg_out_modrm(s, OPC_MOVSWL + rexw, datalo, datalo);
42
/* Swap the V operands if the operation indicates inversion. */
42
} else {
43
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
43
- tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
44
} else {
44
+ tcg_out_modrm_sib_offset(s, OPC_MOVSWL + rexw + seg,
45
tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
45
datalo, base, index, 0, ofs);
46
/* Note that the only other valid constant for a2 is 0. */
46
}
47
- if (HAVE_ISEL) {
47
break;
48
+ if (have_isel) {
48
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
49
tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
49
label_ptr, offsetof(CPUTLBEntry, addr_read));
50
tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
50
51
} else if (!const_a2 && a0 == a2) {
51
/* TLB Hit. */
52
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
52
- tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
53
+ tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, is64, opc);
54
55
/* Record the current context of a load into ldst label */
56
add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
57
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
58
}
59
60
tcg_out_qemu_ld_direct(s, datalo, datahi,
61
- base, index, offset, seg, opc);
62
+ base, index, offset, seg, is64, opc);
63
}
53
}
64
#endif
54
#endif
65
}
55
56
+#ifdef PPC_FEATURE2_HAS_ISEL
57
+ /* Prefer explicit instruction from the kernel. */
58
+ have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0;
59
+#else
60
+ /* Fall back to knowing Power7 (2.06) has ISEL. */
61
+ have_isel = have_isa_2_06;
62
+#endif
63
+
64
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
65
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
66
66
--
67
--
67
2.17.2
68
2.17.1
68
69
69
70
diff view generated by jsdifflib
1
For now, defined universally as true, since we previously required
1
Introduce all of the flags required to enable tcg backend vector support,
2
backends to implement swapped memory operations. Future patches
2
and a runtime flag to indicate the host supports Altivec instructions.
3
may now remove that support where it is onerous.
3
4
For now, do not actually set have_isa_altivec to true, because we have not
5
yet added all of the code to actually generate all of the required insns.
6
However, we must define these flags in order to disable ifndefs that create
7
stub versions of the functions added here.
8
9
The change to tcg_out_movi works around a buglet in tcg.c wherein if we
10
do not define tcg_out_dupi_vec we get a declared but not defined Werror,
11
but if we only declare it we get a defined but not used Werror. We need
12
to this change to tcg_out_movi eventually anyway, so it's no biggie.
4
13
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
16
---
7
tcg/aarch64/tcg-target.h | 1 +
17
tcg/ppc/tcg-target.h | 25 ++++++++++++++++
8
tcg/arm/tcg-target.h | 1 +
18
tcg/ppc/tcg-target.opc.h | 5 ++++
9
tcg/i386/tcg-target.h | 2 +
19
tcg/ppc/tcg-target.inc.c | 62 ++++++++++++++++++++++++++++++++++++++--
10
tcg/mips/tcg-target.h | 1 +
20
3 files changed, 89 insertions(+), 3 deletions(-)
11
tcg/ppc/tcg-target.h | 1 +
21
create mode 100644 tcg/ppc/tcg-target.opc.h
12
tcg/s390/tcg-target.h | 1 +
13
tcg/sparc/tcg-target.h | 1 +
14
tcg/tci/tcg-target.h | 2 +
15
tcg/tcg-op.c | 118 ++++++++++++++++++++++++++++++++++++++-
16
9 files changed, 126 insertions(+), 2 deletions(-)
17
22
18
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/tcg/aarch64/tcg-target.h
21
+++ b/tcg/aarch64/tcg-target.h
22
@@ -XXX,XX +XXX,XX @@ typedef enum {
23
#define TCG_TARGET_HAS_mul_vec 1
24
25
#define TCG_TARGET_DEFAULT_MO (0)
26
+#define TCG_TARGET_HAS_MEMORY_BSWAP 1
27
28
static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
29
{
30
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
31
index XXXXXXX..XXXXXXX 100644
32
--- a/tcg/arm/tcg-target.h
33
+++ b/tcg/arm/tcg-target.h
34
@@ -XXX,XX +XXX,XX @@ enum {
35
};
36
37
#define TCG_TARGET_DEFAULT_MO (0)
38
+#define TCG_TARGET_HAS_MEMORY_BSWAP 1
39
40
static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
41
{
42
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/tcg/i386/tcg-target.h
45
+++ b/tcg/i386/tcg-target.h
46
@@ -XXX,XX +XXX,XX @@ static inline void tb_target_set_jmp_target(uintptr_t tc_ptr,
47
48
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
49
50
+#define TCG_TARGET_HAS_MEMORY_BSWAP 1
51
+
52
#ifdef CONFIG_SOFTMMU
53
#define TCG_TARGET_NEED_LDST_LABELS
54
#endif
55
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/tcg/mips/tcg-target.h
58
+++ b/tcg/mips/tcg-target.h
59
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
60
#endif
61
62
#define TCG_TARGET_DEFAULT_MO (0)
63
+#define TCG_TARGET_HAS_MEMORY_BSWAP 1
64
65
static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
66
{
67
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
23
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
68
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
69
--- a/tcg/ppc/tcg-target.h
25
--- a/tcg/ppc/tcg-target.h
70
+++ b/tcg/ppc/tcg-target.h
26
+++ b/tcg/ppc/tcg-target.h
71
@@ -XXX,XX +XXX,XX @@ void flush_icache_range(uintptr_t start, uintptr_t stop);
27
@@ -XXX,XX +XXX,XX @@ typedef enum {
28
} TCGPowerISA;
29
30
extern TCGPowerISA have_isa;
31
+extern bool have_altivec;
32
33
#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
34
#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
35
@@ -XXX,XX +XXX,XX @@ extern TCGPowerISA have_isa;
36
#define TCG_TARGET_HAS_mulsh_i64 1
37
#endif
38
39
+/*
40
+ * While technically Altivec could support V64, it has no 64-bit store
41
+ * instruction and substituting two 32-bit stores makes the generated
42
+ * code quite large.
43
+ */
44
+#define TCG_TARGET_HAS_v64 0
45
+#define TCG_TARGET_HAS_v128 have_altivec
46
+#define TCG_TARGET_HAS_v256 0
47
+
48
+#define TCG_TARGET_HAS_andc_vec 0
49
+#define TCG_TARGET_HAS_orc_vec 0
50
+#define TCG_TARGET_HAS_not_vec 0
51
+#define TCG_TARGET_HAS_neg_vec 0
52
+#define TCG_TARGET_HAS_abs_vec 0
53
+#define TCG_TARGET_HAS_shi_vec 0
54
+#define TCG_TARGET_HAS_shs_vec 0
55
+#define TCG_TARGET_HAS_shv_vec 0
56
+#define TCG_TARGET_HAS_cmp_vec 0
57
+#define TCG_TARGET_HAS_mul_vec 0
58
+#define TCG_TARGET_HAS_sat_vec 0
59
+#define TCG_TARGET_HAS_minmax_vec 0
60
+#define TCG_TARGET_HAS_bitsel_vec 0
61
+#define TCG_TARGET_HAS_cmpsel_vec 0
62
+
63
void flush_icache_range(uintptr_t start, uintptr_t stop);
72
void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
64
void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
73
65
74
#define TCG_TARGET_DEFAULT_MO (0)
66
diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h
75
+#define TCG_TARGET_HAS_MEMORY_BSWAP 1
67
new file mode 100644
76
68
index XXXXXXX..XXXXXXX
77
#ifdef CONFIG_SOFTMMU
69
--- /dev/null
78
#define TCG_TARGET_NEED_LDST_LABELS
70
+++ b/tcg/ppc/tcg-target.opc.h
79
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
71
@@ -XXX,XX +XXX,XX @@
72
+/*
73
+ * Target-specific opcodes for host vector expansion. These will be
74
+ * emitted by tcg_expand_vec_op. For those familiar with GCC internals,
75
+ * consider these to be UNSPEC with names.
76
+ */
77
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
80
index XXXXXXX..XXXXXXX 100644
78
index XXXXXXX..XXXXXXX 100644
81
--- a/tcg/s390/tcg-target.h
79
--- a/tcg/ppc/tcg-target.inc.c
82
+++ b/tcg/s390/tcg-target.h
80
+++ b/tcg/ppc/tcg-target.inc.c
83
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities;
81
@@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr;
84
#define TCG_TARGET_CALL_STACK_OFFSET    160
82
85
83
TCGPowerISA have_isa;
86
#define TCG_TARGET_EXTEND_ARGS 1
84
static bool have_isel;
87
+#define TCG_TARGET_HAS_MEMORY_BSWAP 1
85
+bool have_altivec;
88
86
89
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
87
#ifndef CONFIG_SOFTMMU
90
88
#define TCG_GUEST_BASE_REG 30
91
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
89
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
92
index XXXXXXX..XXXXXXX 100644
90
}
93
--- a/tcg/sparc/tcg-target.h
91
}
94
+++ b/tcg/sparc/tcg-target.h
92
95
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
93
-static inline void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
96
#define TCG_AREG0 TCG_REG_I0
94
- tcg_target_long arg)
97
95
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
98
#define TCG_TARGET_DEFAULT_MO (0)
96
+ tcg_target_long val)
99
+#define TCG_TARGET_HAS_MEMORY_BSWAP 1
100
101
static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
102
{
97
{
103
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
98
- tcg_out_movi_int(s, type, ret, arg, false);
104
index XXXXXXX..XXXXXXX 100644
99
+ g_assert_not_reached();
105
--- a/tcg/tci/tcg-target.h
100
+}
106
+++ b/tcg/tci/tcg-target.h
107
@@ -XXX,XX +XXX,XX @@ static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
108
We prefer consistency across hosts on this. */
109
#define TCG_TARGET_DEFAULT_MO (0)
110
111
+#define TCG_TARGET_HAS_MEMORY_BSWAP 1
112
+
101
+
113
static inline void tb_target_set_jmp_target(uintptr_t tc_ptr,
102
+static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
114
uintptr_t jmp_addr, uintptr_t addr)
103
+ tcg_target_long arg)
115
{
104
+{
116
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
105
+ switch (type) {
117
index XXXXXXX..XXXXXXX 100644
106
+ case TCG_TYPE_I32:
118
--- a/tcg/tcg-op.c
107
+ case TCG_TYPE_I64:
119
+++ b/tcg/tcg-op.c
108
+ tcg_debug_assert(ret < TCG_REG_V0);
120
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_req_mo(TCGBar type)
109
+ tcg_out_movi_int(s, type, ret, arg, false);
121
110
+ break;
122
void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
123
{
124
+ TCGMemOp orig_memop;
125
+
111
+
126
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
112
+ case TCG_TYPE_V64:
127
memop = tcg_canonicalize_memop(memop, 0, 0);
113
+ case TCG_TYPE_V128:
128
trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
114
+ tcg_debug_assert(ret >= TCG_REG_V0);
129
addr, trace_mem_get_info(memop, 0));
115
+ tcg_out_dupi_vec(s, type, ret, arg);
116
+ break;
130
+
117
+
131
+ orig_memop = memop;
118
+ default:
132
+ if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
119
+ g_assert_not_reached();
133
+ memop &= ~MO_BSWAP;
134
+ /* The bswap primitive requires zero-extended input. */
135
+ if ((memop & MO_SSIZE) == MO_SW) {
136
+ memop &= ~MO_SIGN;
137
+ }
138
+ }
139
+
140
gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
141
+
142
+ if ((orig_memop ^ memop) & MO_BSWAP) {
143
+ switch (orig_memop & MO_SIZE) {
144
+ case MO_16:
145
+ tcg_gen_bswap16_i32(val, val);
146
+ if (orig_memop & MO_SIGN) {
147
+ tcg_gen_ext16s_i32(val, val);
148
+ }
149
+ break;
150
+ case MO_32:
151
+ tcg_gen_bswap32_i32(val, val);
152
+ break;
153
+ default:
154
+ g_assert_not_reached();
155
+ }
156
+ }
120
+ }
157
}
121
}
158
122
159
void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
123
static bool mask_operand(uint32_t c, int *mb, int *me)
124
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
125
}
126
}
127
128
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
129
+{
130
+ g_assert_not_reached();
131
+}
132
+
133
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
134
+ TCGReg dst, TCGReg src)
135
+{
136
+ g_assert_not_reached();
137
+}
138
+
139
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
140
+ TCGReg out, TCGReg base, intptr_t offset)
141
+{
142
+ g_assert_not_reached();
143
+}
144
+
145
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
146
+ unsigned vecl, unsigned vece,
147
+ const TCGArg *args, const int *const_args)
148
+{
149
+ g_assert_not_reached();
150
+}
151
+
152
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
153
+ TCGArg a0, ...)
154
+{
155
+ g_assert_not_reached();
156
+}
157
+
158
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
160
{
159
{
161
+ TCGv_i32 swap = NULL;
160
static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
162
+
161
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
163
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
162
164
memop = tcg_canonicalize_memop(memop, 0, 1);
163
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
165
trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
164
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
166
addr, trace_mem_get_info(memop, 1));
165
+ if (have_altivec) {
167
+
166
+ tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
168
+ if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
167
+ tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
169
+ swap = tcg_temp_new_i32();
170
+ switch (memop & MO_SIZE) {
171
+ case MO_16:
172
+ tcg_gen_ext16u_i32(swap, val);
173
+ tcg_gen_bswap16_i32(swap, swap);
174
+ break;
175
+ case MO_32:
176
+ tcg_gen_bswap32_i32(swap, val);
177
+ break;
178
+ default:
179
+ g_assert_not_reached();
180
+ }
181
+ val = swap;
182
+ memop &= ~MO_BSWAP;
183
+ }
168
+ }
184
+
169
185
gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
170
tcg_target_call_clobber_regs = 0;
186
+
171
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
187
+ if (swap) {
188
+ tcg_temp_free_i32(swap);
189
+ }
190
}
191
192
void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
193
{
194
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
195
+ TCGMemOp orig_memop;
196
+
197
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
198
tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
199
if (memop & MO_SIGN) {
200
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
201
return;
202
}
203
204
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
205
memop = tcg_canonicalize_memop(memop, 1, 0);
206
trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
207
addr, trace_mem_get_info(memop, 0));
208
+
209
+ orig_memop = memop;
210
+ if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
211
+ memop &= ~MO_BSWAP;
212
+ /* The bswap primitive requires zero-extended input. */
213
+ if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
214
+ memop &= ~MO_SIGN;
215
+ }
216
+ }
217
+
218
gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
219
+
220
+ if ((orig_memop ^ memop) & MO_BSWAP) {
221
+ switch (orig_memop & MO_SIZE) {
222
+ case MO_16:
223
+ tcg_gen_bswap16_i64(val, val);
224
+ if (orig_memop & MO_SIGN) {
225
+ tcg_gen_ext16s_i64(val, val);
226
+ }
227
+ break;
228
+ case MO_32:
229
+ tcg_gen_bswap32_i64(val, val);
230
+ if (orig_memop & MO_SIGN) {
231
+ tcg_gen_ext32s_i64(val, val);
232
+ }
233
+ break;
234
+ case MO_64:
235
+ tcg_gen_bswap64_i64(val, val);
236
+ break;
237
+ default:
238
+ g_assert_not_reached();
239
+ }
240
+ }
241
}
242
243
void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
244
{
245
- tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
246
+ TCGv_i64 swap = NULL;
247
+
248
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
249
tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
250
return;
251
}
252
253
+ tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
254
memop = tcg_canonicalize_memop(memop, 1, 1);
255
trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
256
addr, trace_mem_get_info(memop, 1));
257
+
258
+ if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
259
+ swap = tcg_temp_new_i64();
260
+ switch (memop & MO_SIZE) {
261
+ case MO_16:
262
+ tcg_gen_ext16u_i64(swap, val);
263
+ tcg_gen_bswap16_i64(swap, swap);
264
+ break;
265
+ case MO_32:
266
+ tcg_gen_ext32u_i64(swap, val);
267
+ tcg_gen_bswap32_i64(swap, swap);
268
+ break;
269
+ case MO_64:
270
+ tcg_gen_bswap64_i64(swap, val);
271
+ break;
272
+ default:
273
+ g_assert_not_reached();
274
+ }
275
+ val = swap;
276
+ memop &= ~MO_BSWAP;
277
+ }
278
+
279
gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
280
+
281
+ if (swap) {
282
+ tcg_temp_free_i64(swap);
283
+ }
284
}
285
286
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, TCGMemOp opc)
287
--
172
--
288
2.17.2
173
2.17.1
289
174
290
175
diff view generated by jsdifflib
1
This will move the assert for success from within (subroutines of)
1
Add various bits and peaces related mostly to load and store
2
patch_reloc into the callers. It will also let new code do something
2
operations. In that context, logic, compare, and splat Altivec
3
different when a relocation is out of range.
3
instructions are used, and, therefore, the support for emitting
4
them is included in this patch too.
4
5
5
For the moment, all backends are trivially converted to return true.
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
8
---
9
tcg/ppc/tcg-target.h | 6 +-
10
tcg/ppc/tcg-target.inc.c | 472 ++++++++++++++++++++++++++++++++++++---
11
2 files changed, 442 insertions(+), 36 deletions(-)
6
12
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
13
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
tcg/aarch64/tcg-target.inc.c | 3 ++-
11
tcg/arm/tcg-target.inc.c | 3 ++-
12
tcg/i386/tcg-target.inc.c | 3 ++-
13
tcg/mips/tcg-target.inc.c | 3 ++-
14
tcg/ppc/tcg-target.inc.c | 3 ++-
15
tcg/s390/tcg-target.inc.c | 3 ++-
16
tcg/sparc/tcg-target.inc.c | 5 +++--
17
tcg/tcg.c | 8 +++++---
18
tcg/tci/tcg-target.inc.c | 3 ++-
19
9 files changed, 22 insertions(+), 12 deletions(-)
20
21
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
22
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
23
--- a/tcg/aarch64/tcg-target.inc.c
15
--- a/tcg/ppc/tcg-target.h
24
+++ b/tcg/aarch64/tcg-target.inc.c
16
+++ b/tcg/ppc/tcg-target.h
25
@@ -XXX,XX +XXX,XX @@ static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
17
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
26
*code_ptr = deposit32(*code_ptr, 5, 19, offset);
18
#define TCG_TARGET_HAS_v128 have_altivec
27
}
19
#define TCG_TARGET_HAS_v256 0
28
20
29
-static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
21
-#define TCG_TARGET_HAS_andc_vec 0
30
+static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
22
+#define TCG_TARGET_HAS_andc_vec 1
31
intptr_t value, intptr_t addend)
23
#define TCG_TARGET_HAS_orc_vec 0
32
{
24
-#define TCG_TARGET_HAS_not_vec 0
33
tcg_debug_assert(addend == 0);
25
+#define TCG_TARGET_HAS_not_vec 1
34
@@ -XXX,XX +XXX,XX @@ static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
26
#define TCG_TARGET_HAS_neg_vec 0
35
default:
27
#define TCG_TARGET_HAS_abs_vec 0
36
tcg_abort();
28
#define TCG_TARGET_HAS_shi_vec 0
37
}
29
#define TCG_TARGET_HAS_shs_vec 0
38
+ return true;
30
#define TCG_TARGET_HAS_shv_vec 0
39
}
31
-#define TCG_TARGET_HAS_cmp_vec 0
40
32
+#define TCG_TARGET_HAS_cmp_vec 1
41
#define TCG_CT_CONST_AIMM 0x100
33
#define TCG_TARGET_HAS_mul_vec 0
42
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
34
#define TCG_TARGET_HAS_sat_vec 0
43
index XXXXXXX..XXXXXXX 100644
35
#define TCG_TARGET_HAS_minmax_vec 0
44
--- a/tcg/arm/tcg-target.inc.c
45
+++ b/tcg/arm/tcg-target.inc.c
46
@@ -XXX,XX +XXX,XX @@ static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
47
*code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
48
}
49
50
-static void patch_reloc(tcg_insn_unit *code_ptr, int type,
51
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
52
intptr_t value, intptr_t addend)
53
{
54
tcg_debug_assert(addend == 0);
55
@@ -XXX,XX +XXX,XX @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
56
} else {
57
g_assert_not_reached();
58
}
59
+ return true;
60
}
61
62
#define TCG_CT_CONST_ARM 0x100
63
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
64
index XXXXXXX..XXXXXXX 100644
65
--- a/tcg/i386/tcg-target.inc.c
66
+++ b/tcg/i386/tcg-target.inc.c
67
@@ -XXX,XX +XXX,XX @@ static bool have_lzcnt;
68
69
static tcg_insn_unit *tb_ret_addr;
70
71
-static void patch_reloc(tcg_insn_unit *code_ptr, int type,
72
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
73
intptr_t value, intptr_t addend)
74
{
75
value += addend;
76
@@ -XXX,XX +XXX,XX @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
77
default:
78
tcg_abort();
79
}
80
+ return true;
81
}
82
83
#if TCG_TARGET_REG_BITS == 64
84
diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c
85
index XXXXXXX..XXXXXXX 100644
86
--- a/tcg/mips/tcg-target.inc.c
87
+++ b/tcg/mips/tcg-target.inc.c
88
@@ -XXX,XX +XXX,XX @@ static inline void reloc_26(tcg_insn_unit *pc, tcg_insn_unit *target)
89
*pc = deposit32(*pc, 0, 26, reloc_26_val(pc, target));
90
}
91
92
-static void patch_reloc(tcg_insn_unit *code_ptr, int type,
93
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
94
intptr_t value, intptr_t addend)
95
{
96
tcg_debug_assert(type == R_MIPS_PC16);
97
tcg_debug_assert(addend == 0);
98
reloc_pc16(code_ptr, (tcg_insn_unit *)value);
99
+ return true;
100
}
101
102
#define TCG_CT_CONST_ZERO 0x100
103
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
36
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
104
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
105
--- a/tcg/ppc/tcg-target.inc.c
38
--- a/tcg/ppc/tcg-target.inc.c
106
+++ b/tcg/ppc/tcg-target.inc.c
39
+++ b/tcg/ppc/tcg-target.inc.c
107
@@ -XXX,XX +XXX,XX @@ static const uint32_t tcg_to_isel[] = {
40
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
108
[TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
41
ct->ct |= TCG_CT_REG;
109
};
42
ct->u.regs = 0xffffffff;
110
43
break;
111
-static void patch_reloc(tcg_insn_unit *code_ptr, int type,
44
+ case 'v':
112
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
45
+ ct->ct |= TCG_CT_REG;
46
+ ct->u.regs = 0xffffffff00000000ull;
47
+ break;
48
case 'L': /* qemu_ld constraint */
49
ct->ct |= TCG_CT_REG;
50
ct->u.regs = 0xffffffff;
51
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
52
53
#define NOP ORI /* ori 0,0,0 */
54
55
+#define LVX XO31(103)
56
+#define LVEBX XO31(7)
57
+#define LVEHX XO31(39)
58
+#define LVEWX XO31(71)
59
+
60
+#define STVX XO31(231)
61
+#define STVEWX XO31(199)
62
+
63
+#define VCMPEQUB VX4(6)
64
+#define VCMPEQUH VX4(70)
65
+#define VCMPEQUW VX4(134)
66
+#define VCMPGTSB VX4(774)
67
+#define VCMPGTSH VX4(838)
68
+#define VCMPGTSW VX4(902)
69
+#define VCMPGTUB VX4(518)
70
+#define VCMPGTUH VX4(582)
71
+#define VCMPGTUW VX4(646)
72
+
73
+#define VAND VX4(1028)
74
+#define VANDC VX4(1092)
75
+#define VNOR VX4(1284)
76
+#define VOR VX4(1156)
77
+#define VXOR VX4(1220)
78
+
79
+#define VSPLTB VX4(524)
80
+#define VSPLTH VX4(588)
81
+#define VSPLTW VX4(652)
82
+#define VSPLTISB VX4(780)
83
+#define VSPLTISH VX4(844)
84
+#define VSPLTISW VX4(908)
85
+
86
+#define VSLDOI VX4(44)
87
+
88
#define RT(r) ((r)<<21)
89
#define RS(r) ((r)<<21)
90
#define RA(r) ((r)<<16)
91
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
113
intptr_t value, intptr_t addend)
92
intptr_t value, intptr_t addend)
114
{
93
{
115
tcg_insn_unit *target;
94
tcg_insn_unit *target;
116
@@ -XXX,XX +XXX,XX @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
95
+ int16_t lo;
96
+ int32_t hi;
97
98
value += addend;
99
target = (tcg_insn_unit *)value;
100
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
101
}
102
*code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
103
break;
104
+ case R_PPC_ADDR32:
105
+ /*
106
+ * We are abusing this relocation type. Again, this points to
107
+ * a pair of insns, lis + load. This is an absolute address
108
+ * relocation for PPC32 so the lis cannot be removed.
109
+ */
110
+ lo = value;
111
+ hi = value - lo;
112
+ if (hi + lo != value) {
113
+ return false;
114
+ }
115
+ code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
116
+ code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
117
+ break;
117
default:
118
default:
118
g_assert_not_reached();
119
g_assert_not_reached();
119
}
120
}
121
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
122
123
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
124
{
125
- tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
126
- if (ret != arg) {
127
- tcg_out32(s, OR | SAB(arg, ret, arg));
128
+ if (ret == arg) {
129
+ return true;
130
+ }
131
+ switch (type) {
132
+ case TCG_TYPE_I64:
133
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
134
+ /* fallthru */
135
+ case TCG_TYPE_I32:
136
+ if (ret < TCG_REG_V0 && arg < TCG_REG_V0) {
137
+ tcg_out32(s, OR | SAB(arg, ret, arg));
138
+ break;
139
+ } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) {
140
+ /* Altivec does not support vector/integer moves. */
141
+ return false;
142
+ }
143
+ /* fallthru */
144
+ case TCG_TYPE_V64:
145
+ case TCG_TYPE_V128:
146
+ tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
147
+ tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
148
+ break;
149
+ default:
150
+ g_assert_not_reached();
151
}
152
return true;
153
}
154
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
155
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
156
tcg_target_long val)
157
{
158
- g_assert_not_reached();
159
+ uint32_t load_insn;
160
+ int rel, low;
161
+ intptr_t add;
162
+
163
+ low = (int8_t)val;
164
+ if (low >= -16 && low < 16) {
165
+ if (val == (tcg_target_long)dup_const(MO_8, low)) {
166
+ tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
167
+ return;
168
+ }
169
+ if (val == (tcg_target_long)dup_const(MO_16, low)) {
170
+ tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
171
+ return;
172
+ }
173
+ if (val == (tcg_target_long)dup_const(MO_32, low)) {
174
+ tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
175
+ return;
176
+ }
177
+ }
178
+
179
+ /*
180
+ * Otherwise we must load the value from the constant pool.
181
+ */
182
+ if (USE_REG_TB) {
183
+ rel = R_PPC_ADDR16;
184
+ add = -(intptr_t)s->code_gen_ptr;
185
+ } else {
186
+ rel = R_PPC_ADDR32;
187
+ add = 0;
188
+ }
189
+
190
+ load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
191
+ if (TCG_TARGET_REG_BITS == 64) {
192
+ new_pool_l2(s, rel, s->code_ptr, add, val, val);
193
+ } else {
194
+ new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
195
+ }
196
+
197
+ if (USE_REG_TB) {
198
+ tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
199
+ load_insn |= RA(TCG_REG_TB);
200
+ } else {
201
+ tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
202
+ tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
203
+ }
204
+ tcg_out32(s, load_insn);
205
}
206
207
static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
208
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
209
align = 3;
210
/* FALLTHRU */
211
default:
212
- if (rt != TCG_REG_R0) {
213
+ if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
214
rs = rt;
215
break;
216
}
217
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
218
}
219
220
/* For unaligned, or very large offsets, use the indexed form. */
221
- if (offset & align || offset != (int32_t)offset) {
222
+ if (offset & align || offset != (int32_t)offset || opi == 0) {
223
if (rs == base) {
224
rs = TCG_REG_R0;
225
}
226
tcg_debug_assert(!is_store || rs != rt);
227
tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
228
- tcg_out32(s, opx | TAB(rt, base, rs));
229
+ tcg_out32(s, opx | TAB(rt & 31, base, rs));
230
return;
231
}
232
233
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
234
base = rs;
235
}
236
if (opi != ADDI || base != rt || l0 != 0) {
237
- tcg_out32(s, opi | TAI(rt, base, l0));
238
+ tcg_out32(s, opi | TAI(rt & 31, base, l0));
239
}
240
}
241
242
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
243
- TCGReg arg1, intptr_t arg2)
244
+static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
245
+ TCGReg va, TCGReg vb, int shb)
246
{
247
- int opi, opx;
248
-
249
- tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
250
- if (type == TCG_TYPE_I32) {
251
- opi = LWZ, opx = LWZX;
252
- } else {
253
- opi = LD, opx = LDX;
254
- }
255
- tcg_out_mem_long(s, opi, opx, ret, arg1, arg2);
256
+ tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
257
}
258
259
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
260
- TCGReg arg1, intptr_t arg2)
261
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
262
+ TCGReg base, intptr_t offset)
263
{
264
- int opi, opx;
265
+ int shift;
266
267
- tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
268
- if (type == TCG_TYPE_I32) {
269
- opi = STW, opx = STWX;
270
- } else {
271
- opi = STD, opx = STDX;
272
+ switch (type) {
273
+ case TCG_TYPE_I32:
274
+ if (ret < TCG_REG_V0) {
275
+ tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
276
+ break;
277
+ }
278
+ tcg_debug_assert((offset & 3) == 0);
279
+ tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
280
+ shift = (offset - 4) & 0xc;
281
+ if (shift) {
282
+ tcg_out_vsldoi(s, ret, ret, ret, shift);
283
+ }
284
+ break;
285
+ case TCG_TYPE_I64:
286
+ if (ret < TCG_REG_V0) {
287
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
288
+ tcg_out_mem_long(s, LD, LDX, ret, base, offset);
289
+ break;
290
+ }
291
+ /* fallthru */
292
+ case TCG_TYPE_V64:
293
+ tcg_debug_assert(ret >= TCG_REG_V0);
294
+ tcg_debug_assert((offset & 7) == 0);
295
+ tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
296
+ if (offset & 8) {
297
+ tcg_out_vsldoi(s, ret, ret, ret, 8);
298
+ }
299
+ break;
300
+ case TCG_TYPE_V128:
301
+ tcg_debug_assert(ret >= TCG_REG_V0);
302
+ tcg_debug_assert((offset & 15) == 0);
303
+ tcg_out_mem_long(s, 0, LVX, ret, base, offset);
304
+ break;
305
+ default:
306
+ g_assert_not_reached();
307
+ }
308
+}
309
+
310
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
311
+ TCGReg base, intptr_t offset)
312
+{
313
+ int shift;
314
+
315
+ switch (type) {
316
+ case TCG_TYPE_I32:
317
+ if (arg < TCG_REG_V0) {
318
+ tcg_out_mem_long(s, STW, STWX, arg, base, offset);
319
+ break;
320
+ }
321
+ tcg_debug_assert((offset & 3) == 0);
322
+ shift = (offset - 4) & 0xc;
323
+ if (shift) {
324
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
325
+ arg = TCG_VEC_TMP1;
326
+ }
327
+ tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
328
+ break;
329
+ case TCG_TYPE_I64:
330
+ if (arg < TCG_REG_V0) {
331
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
332
+ tcg_out_mem_long(s, STD, STDX, arg, base, offset);
333
+ break;
334
+ }
335
+ /* fallthru */
336
+ case TCG_TYPE_V64:
337
+ tcg_debug_assert(arg >= TCG_REG_V0);
338
+ tcg_debug_assert((offset & 7) == 0);
339
+ if (offset & 8) {
340
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
341
+ arg = TCG_VEC_TMP1;
342
+ }
343
+ tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
344
+ tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
345
+ break;
346
+ case TCG_TYPE_V128:
347
+ tcg_debug_assert(arg >= TCG_REG_V0);
348
+ tcg_out_mem_long(s, 0, STVX, arg, base, offset);
349
+ break;
350
+ default:
351
+ g_assert_not_reached();
352
}
353
- tcg_out_mem_long(s, opi, opx, arg, arg1, arg2);
354
}
355
356
static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
357
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
358
359
int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
360
{
361
- g_assert_not_reached();
362
+ switch (opc) {
363
+ case INDEX_op_and_vec:
364
+ case INDEX_op_or_vec:
365
+ case INDEX_op_xor_vec:
366
+ case INDEX_op_andc_vec:
367
+ case INDEX_op_not_vec:
368
+ return 1;
369
+ case INDEX_op_cmp_vec:
370
+ return vece <= MO_32 ? -1 : 0;
371
+ default:
372
+ return 0;
373
+ }
374
}
375
376
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
377
TCGReg dst, TCGReg src)
378
{
379
- g_assert_not_reached();
380
+ tcg_debug_assert(dst >= TCG_REG_V0);
381
+ tcg_debug_assert(src >= TCG_REG_V0);
382
+
383
+ /*
384
+ * Recall we use (or emulate) VSX integer loads, so the integer is
385
+ * right justified within the left (zero-index) double-word.
386
+ */
387
+ switch (vece) {
388
+ case MO_8:
389
+ tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
390
+ break;
391
+ case MO_16:
392
+ tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
393
+ break;
394
+ case MO_32:
395
+ tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
396
+ break;
397
+ case MO_64:
398
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
399
+ tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
400
+ break;
401
+ default:
402
+ g_assert_not_reached();
403
+ }
120
+ return true;
404
+ return true;
121
}
405
}
122
406
123
static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
407
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
124
diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c
408
TCGReg out, TCGReg base, intptr_t offset)
125
index XXXXXXX..XXXXXXX 100644
409
{
126
--- a/tcg/s390/tcg-target.inc.c
410
- g_assert_not_reached();
127
+++ b/tcg/s390/tcg-target.inc.c
411
+ int elt;
128
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[16] = {
412
+
129
static tcg_insn_unit *tb_ret_addr;
413
+ tcg_debug_assert(out >= TCG_REG_V0);
130
uint64_t s390_facilities;
414
+ switch (vece) {
131
415
+ case MO_8:
132
-static void patch_reloc(tcg_insn_unit *code_ptr, int type,
416
+ tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
133
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
417
+ elt = extract32(offset, 0, 4);
134
intptr_t value, intptr_t addend)
418
+#ifndef HOST_WORDS_BIGENDIAN
135
{
419
+ elt ^= 15;
136
intptr_t pcrel2;
420
+#endif
137
@@ -XXX,XX +XXX,XX @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
421
+ tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
422
+ break;
423
+ case MO_16:
424
+ tcg_debug_assert((offset & 1) == 0);
425
+ tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
426
+ elt = extract32(offset, 1, 3);
427
+#ifndef HOST_WORDS_BIGENDIAN
428
+ elt ^= 7;
429
+#endif
430
+ tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
431
+ break;
432
+ case MO_32:
433
+ tcg_debug_assert((offset & 3) == 0);
434
+ tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
435
+ elt = extract32(offset, 2, 2);
436
+#ifndef HOST_WORDS_BIGENDIAN
437
+ elt ^= 3;
438
+#endif
439
+ tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
440
+ break;
441
+ case MO_64:
442
+ tcg_debug_assert((offset & 7) == 0);
443
+ tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
444
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
445
+ elt = extract32(offset, 3, 1);
446
+#ifndef HOST_WORDS_BIGENDIAN
447
+ elt = !elt;
448
+#endif
449
+ if (elt) {
450
+ tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
451
+ } else {
452
+ tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
453
+ }
454
+ break;
455
+ default:
456
+ g_assert_not_reached();
457
+ }
458
+ return true;
459
}
460
461
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
462
unsigned vecl, unsigned vece,
463
const TCGArg *args, const int *const_args)
464
{
465
- g_assert_not_reached();
466
+ static const uint32_t
467
+ eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
468
+ gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
469
+ gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 };
470
+
471
+ TCGType type = vecl + TCG_TYPE_V64;
472
+ TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
473
+ uint32_t insn;
474
+
475
+ switch (opc) {
476
+ case INDEX_op_ld_vec:
477
+ tcg_out_ld(s, type, a0, a1, a2);
478
+ return;
479
+ case INDEX_op_st_vec:
480
+ tcg_out_st(s, type, a0, a1, a2);
481
+ return;
482
+ case INDEX_op_dupm_vec:
483
+ tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
484
+ return;
485
+
486
+ case INDEX_op_and_vec:
487
+ insn = VAND;
488
+ break;
489
+ case INDEX_op_or_vec:
490
+ insn = VOR;
491
+ break;
492
+ case INDEX_op_xor_vec:
493
+ insn = VXOR;
494
+ break;
495
+ case INDEX_op_andc_vec:
496
+ insn = VANDC;
497
+ break;
498
+ case INDEX_op_not_vec:
499
+ insn = VNOR;
500
+ a2 = a1;
501
+ break;
502
+
503
+ case INDEX_op_cmp_vec:
504
+ switch (args[3]) {
505
+ case TCG_COND_EQ:
506
+ insn = eq_op[vece];
507
+ break;
508
+ case TCG_COND_GT:
509
+ insn = gts_op[vece];
510
+ break;
511
+ case TCG_COND_GTU:
512
+ insn = gtu_op[vece];
513
+ break;
514
+ default:
515
+ g_assert_not_reached();
516
+ }
517
+ break;
518
+
519
+ case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
520
+ case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
521
+ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
522
+ default:
523
+ g_assert_not_reached();
524
+ }
525
+
526
+ tcg_debug_assert(insn != 0);
527
+ tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
528
+}
529
+
530
+static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
531
+ TCGv_vec v1, TCGv_vec v2, TCGCond cond)
532
+{
533
+ bool need_swap = false, need_inv = false;
534
+
535
+ tcg_debug_assert(vece <= MO_32);
536
+
537
+ switch (cond) {
538
+ case TCG_COND_EQ:
539
+ case TCG_COND_GT:
540
+ case TCG_COND_GTU:
541
+ break;
542
+ case TCG_COND_NE:
543
+ case TCG_COND_LE:
544
+ case TCG_COND_LEU:
545
+ need_inv = true;
546
+ break;
547
+ case TCG_COND_LT:
548
+ case TCG_COND_LTU:
549
+ need_swap = true;
550
+ break;
551
+ case TCG_COND_GE:
552
+ case TCG_COND_GEU:
553
+ need_swap = need_inv = true;
554
+ break;
555
+ default:
556
+ g_assert_not_reached();
557
+ }
558
+
559
+ if (need_inv) {
560
+ cond = tcg_invert_cond(cond);
561
+ }
562
+ if (need_swap) {
563
+ TCGv_vec t1;
564
+ t1 = v1, v1 = v2, v2 = t1;
565
+ cond = tcg_swap_cond(cond);
566
+ }
567
+
568
+ vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
569
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
570
+
571
+ if (need_inv) {
572
+ tcg_gen_not_vec(vece, v0, v0);
573
+ }
574
}
575
576
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
577
TCGArg a0, ...)
578
{
579
- g_assert_not_reached();
580
+ va_list va;
581
+ TCGv_vec v0, v1, v2;
582
+
583
+ va_start(va, a0);
584
+ v0 = temp_tcgv_vec(arg_temp(a0));
585
+ v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
586
+ v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
587
+
588
+ switch (opc) {
589
+ case INDEX_op_cmp_vec:
590
+ expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
591
+ break;
592
+ default:
593
+ g_assert_not_reached();
594
+ }
595
+ va_end(va);
596
}
597
598
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
599
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
600
= { .args_ct_str = { "r", "r", "r", "r", "rI", "rZM" } };
601
static const TCGTargetOpDef sub2
602
= { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } };
603
+ static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
604
+ static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
605
+ static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
606
607
switch (op) {
608
case INDEX_op_goto_ptr:
609
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
610
return (TCG_TARGET_REG_BITS == 64 ? &S_S
611
: TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S);
612
613
+ case INDEX_op_and_vec:
614
+ case INDEX_op_or_vec:
615
+ case INDEX_op_xor_vec:
616
+ case INDEX_op_andc_vec:
617
+ case INDEX_op_orc_vec:
618
+ case INDEX_op_cmp_vec:
619
+ return &v_v_v;
620
+ case INDEX_op_not_vec:
621
+ case INDEX_op_dup_vec:
622
+ return &v_v;
623
+ case INDEX_op_ld_vec:
624
+ case INDEX_op_st_vec:
625
+ case INDEX_op_dupm_vec:
626
+ return &v_r;
627
+
138
default:
628
default:
139
g_assert_not_reached();
629
return NULL;
140
}
630
}
141
+ return true;
142
}
143
144
/* parse target specific constraints */
145
diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c
146
index XXXXXXX..XXXXXXX 100644
147
--- a/tcg/sparc/tcg-target.inc.c
148
+++ b/tcg/sparc/tcg-target.inc.c
149
@@ -XXX,XX +XXX,XX @@ static inline int check_fit_i32(int32_t val, unsigned int bits)
150
# define check_fit_ptr check_fit_i32
151
#endif
152
153
-static void patch_reloc(tcg_insn_unit *code_ptr, int type,
154
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
155
intptr_t value, intptr_t addend)
156
{
157
uint32_t insn = *code_ptr;
158
@@ -XXX,XX +XXX,XX @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
159
/* Note that we're abusing this reloc type for our own needs. */
160
code_ptr[0] = deposit32(code_ptr[0], 0, 22, value >> 10);
161
code_ptr[1] = deposit32(code_ptr[1], 0, 10, value);
162
- return;
163
+ return true;
164
default:
165
g_assert_not_reached();
166
}
167
168
*code_ptr = insn;
169
+ return true;
170
}
171
172
/* parse target specific constraints */
173
diff --git a/tcg/tcg.c b/tcg/tcg.c
174
index XXXXXXX..XXXXXXX 100644
175
--- a/tcg/tcg.c
176
+++ b/tcg/tcg.c
177
@@ -XXX,XX +XXX,XX @@
178
static void tcg_target_init(TCGContext *s);
179
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
180
static void tcg_target_qemu_prologue(TCGContext *s);
181
-static void patch_reloc(tcg_insn_unit *code_ptr, int type,
182
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
183
intptr_t value, intptr_t addend);
184
185
/* The CIE and FDE header definitions will be common to all hosts. */
186
@@ -XXX,XX +XXX,XX @@ static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
187
/* FIXME: This may break relocations on RISC targets that
188
modify instruction fields in place. The caller may not have
189
written the initial value. */
190
- patch_reloc(code_ptr, type, l->u.value, addend);
191
+ bool ok = patch_reloc(code_ptr, type, l->u.value, addend);
192
+ tcg_debug_assert(ok);
193
} else {
194
/* add a new relocation entry */
195
r = tcg_malloc(sizeof(TCGRelocation));
196
@@ -XXX,XX +XXX,XX @@ static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
197
tcg_debug_assert(!l->has_value);
198
199
for (r = l->u.first_reloc; r != NULL; r = r->next) {
200
- patch_reloc(r->ptr, r->type, value, r->addend);
201
+ bool ok = patch_reloc(r->ptr, r->type, value, r->addend);
202
+ tcg_debug_assert(ok);
203
}
204
205
l->has_value = 1;
206
diff --git a/tcg/tci/tcg-target.inc.c b/tcg/tci/tcg-target.inc.c
207
index XXXXXXX..XXXXXXX 100644
208
--- a/tcg/tci/tcg-target.inc.c
209
+++ b/tcg/tci/tcg-target.inc.c
210
@@ -XXX,XX +XXX,XX @@ static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
211
};
212
#endif
213
214
-static void patch_reloc(tcg_insn_unit *code_ptr, int type,
215
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
216
intptr_t value, intptr_t addend)
217
{
218
/* tcg_out_reloc always uses the same type, addend. */
219
@@ -XXX,XX +XXX,XX @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
220
} else {
221
tcg_patch64(code_ptr, value);
222
}
223
+ return true;
224
}
225
226
/* Parse target specific constraints. */
227
--
631
--
228
2.17.2
632
2.17.1
229
633
230
634
diff view generated by jsdifflib
1
Somehow we forgot these operations, once upon a time.
1
Add support for vector maximum/minimum using Altivec instructions
2
This will allow immediate stores to have their bswap
2
VMAXSB, VMAXSH, VMAXSW, VMAXUB, VMAXUH, VMAXUW, and
3
optimized away.
3
VMINSB, VMINSH, VMINSW, VMINUB, VMINUH, VMINUW.
4
4
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
7
---
7
tcg/optimize.c | 12 ++++++++++++
8
tcg/ppc/tcg-target.h | 2 +-
8
1 file changed, 12 insertions(+)
9
tcg/ppc/tcg-target.inc.c | 40 +++++++++++++++++++++++++++++++++++++++-
10
2 files changed, 40 insertions(+), 2 deletions(-)
9
11
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
11
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
14
--- a/tcg/ppc/tcg-target.h
13
+++ b/tcg/optimize.c
15
+++ b/tcg/ppc/tcg-target.h
14
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
16
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
15
CASE_OP_32_64(ext16u):
17
#define TCG_TARGET_HAS_cmp_vec 1
16
return (uint16_t)x;
18
#define TCG_TARGET_HAS_mul_vec 0
17
19
#define TCG_TARGET_HAS_sat_vec 0
18
+ CASE_OP_32_64(bswap16):
20
-#define TCG_TARGET_HAS_minmax_vec 0
19
+ return bswap16(x);
21
+#define TCG_TARGET_HAS_minmax_vec 1
22
#define TCG_TARGET_HAS_bitsel_vec 0
23
#define TCG_TARGET_HAS_cmpsel_vec 0
24
25
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/tcg/ppc/tcg-target.inc.c
28
+++ b/tcg/ppc/tcg-target.inc.c
29
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
30
#define STVX XO31(231)
31
#define STVEWX XO31(199)
32
33
+#define VMAXSB VX4(258)
34
+#define VMAXSH VX4(322)
35
+#define VMAXSW VX4(386)
36
+#define VMAXUB VX4(2)
37
+#define VMAXUH VX4(66)
38
+#define VMAXUW VX4(130)
39
+#define VMINSB VX4(770)
40
+#define VMINSH VX4(834)
41
+#define VMINSW VX4(898)
42
+#define VMINUB VX4(514)
43
+#define VMINUH VX4(578)
44
+#define VMINUW VX4(642)
20
+
45
+
21
+ CASE_OP_32_64(bswap32):
46
#define VCMPEQUB VX4(6)
22
+ return bswap32(x);
47
#define VCMPEQUH VX4(70)
23
+
48
#define VCMPEQUW VX4(134)
24
+ case INDEX_op_bswap64_i64:
49
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
25
+ return bswap64(x);
50
case INDEX_op_andc_vec:
26
+
51
case INDEX_op_not_vec:
27
case INDEX_op_ext_i32_i64:
52
return 1;
28
case INDEX_op_ext32s_i64:
53
+ case INDEX_op_smax_vec:
29
return (int32_t)x;
54
+ case INDEX_op_smin_vec:
30
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
55
+ case INDEX_op_umax_vec:
31
CASE_OP_32_64(ext16s):
56
+ case INDEX_op_umin_vec:
32
CASE_OP_32_64(ext16u):
57
+ return vece <= MO_32;
33
CASE_OP_32_64(ctpop):
58
case INDEX_op_cmp_vec:
34
+ CASE_OP_32_64(bswap16):
59
return vece <= MO_32 ? -1 : 0;
35
+ CASE_OP_32_64(bswap32):
60
default:
36
+ case INDEX_op_bswap64_i64:
61
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
37
case INDEX_op_ext32s_i64:
62
static const uint32_t
38
case INDEX_op_ext32u_i64:
63
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
39
case INDEX_op_ext_i32_i64:
64
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
65
- gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 };
66
+ gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
67
+ umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
68
+ smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
69
+ umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
70
+ smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 };
71
72
TCGType type = vecl + TCG_TYPE_V64;
73
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
74
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
75
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
76
return;
77
78
+ case INDEX_op_smin_vec:
79
+ insn = smin_op[vece];
80
+ break;
81
+ case INDEX_op_umin_vec:
82
+ insn = umin_op[vece];
83
+ break;
84
+ case INDEX_op_smax_vec:
85
+ insn = smax_op[vece];
86
+ break;
87
+ case INDEX_op_umax_vec:
88
+ insn = umax_op[vece];
89
+ break;
90
case INDEX_op_and_vec:
91
insn = VAND;
92
break;
93
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
94
case INDEX_op_andc_vec:
95
case INDEX_op_orc_vec:
96
case INDEX_op_cmp_vec:
97
+ case INDEX_op_smax_vec:
98
+ case INDEX_op_smin_vec:
99
+ case INDEX_op_umax_vec:
100
+ case INDEX_op_umin_vec:
101
return &v_v_v;
102
case INDEX_op_not_vec:
103
case INDEX_op_dup_vec:
40
--
104
--
41
2.17.2
105
2.17.1
42
106
43
107
diff view generated by jsdifflib
1
Based on the only current user, Sparc:
1
Add support for vector add/subtract using Altivec instructions:
2
2
VADDUBM, VADDUHM, VADDUWM, VSUBUBM, VSUBUHM, VSUBUWM.
3
New code uses 1 constant that takes 2 insns to create, plus 8.
4
Old code used 2 constants that took 2 insns to create, plus 9.
5
The result is a new total of 10 vs an old total of 13.
6
3
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
8
---
6
---
9
tcg/tcg-op.c | 54 ++++++++++++++++++++++++++--------------------------
7
tcg/ppc/tcg-target.inc.c | 20 ++++++++++++++++++++
10
1 file changed, 27 insertions(+), 27 deletions(-)
8
1 file changed, 20 insertions(+)
11
9
12
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
10
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/tcg-op.c
12
--- a/tcg/ppc/tcg-target.inc.c
15
+++ b/tcg/tcg-op.c
13
+++ b/tcg/ppc/tcg-target.inc.c
16
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
14
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
17
if (TCG_TARGET_HAS_bswap32_i32) {
15
#define STVX XO31(231)
18
tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg);
16
#define STVEWX XO31(199)
19
} else {
17
20
- TCGv_i32 t0, t1;
18
+#define VADDUBM VX4(0)
21
- t0 = tcg_temp_new_i32();
19
+#define VADDUHM VX4(64)
22
- t1 = tcg_temp_new_i32();
20
+#define VADDUWM VX4(128)
23
+ TCGv_i32 t0 = tcg_temp_new_i32();
21
+
24
+ TCGv_i32 t1 = tcg_temp_new_i32();
22
+#define VSUBUBM VX4(1024)
25
+ TCGv_i32 t2 = tcg_const_i32(0x00ff00ff);
23
+#define VSUBUHM VX4(1088)
26
24
+#define VSUBUWM VX4(1152)
27
- tcg_gen_shli_i32(t0, arg, 24);
25
+
28
+ /* arg = abcd */
26
#define VMAXSB VX4(258)
29
+ tcg_gen_shri_i32(t0, arg, 8); /* t0 = .abc */
27
#define VMAXSH VX4(322)
30
+ tcg_gen_and_i32(t1, arg, t2); /* t1 = .b.d */
28
#define VMAXSW VX4(386)
31
+ tcg_gen_and_i32(t0, t0, t2); /* t0 = .a.c */
29
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
32
+ tcg_temp_free_i32(t2);
30
case INDEX_op_andc_vec:
33
+ tcg_gen_shli_i32(t1, t1, 8); /* t1 = b.d. */
31
case INDEX_op_not_vec:
34
+ tcg_gen_or_i32(ret, t0, t1); /* ret = badc */
32
return 1;
35
33
+ case INDEX_op_add_vec:
36
- tcg_gen_andi_i32(t1, arg, 0x0000ff00);
34
+ case INDEX_op_sub_vec:
37
- tcg_gen_shli_i32(t1, t1, 8);
35
case INDEX_op_smax_vec:
38
- tcg_gen_or_i32(t0, t0, t1);
36
case INDEX_op_smin_vec:
39
+ tcg_gen_shri_i32(t0, ret, 16); /* t0 = ..ba */
37
case INDEX_op_umax_vec:
40
+ tcg_gen_shli_i32(t1, ret, 16); /* t1 = dc.. */
38
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
41
+ tcg_gen_or_i32(ret, t0, t1); /* ret = dcba */
39
const TCGArg *args, const int *const_args)
42
40
{
43
- tcg_gen_shri_i32(t1, arg, 8);
41
static const uint32_t
44
- tcg_gen_andi_i32(t1, t1, 0x0000ff00);
42
+ add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 },
45
- tcg_gen_or_i32(t0, t0, t1);
43
+ sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 },
46
-
44
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
47
- tcg_gen_shri_i32(t1, arg, 24);
45
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
48
- tcg_gen_or_i32(ret, t0, t1);
46
gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
49
tcg_temp_free_i32(t0);
47
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
50
tcg_temp_free_i32(t1);
48
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
51
}
49
return;
52
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
50
53
} else if (TCG_TARGET_HAS_bswap32_i64) {
51
+ case INDEX_op_add_vec:
54
tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg);
52
+ insn = add_op[vece];
55
} else {
53
+ break;
56
- TCGv_i64 t0, t1;
54
+ case INDEX_op_sub_vec:
57
- t0 = tcg_temp_new_i64();
55
+ insn = sub_op[vece];
58
- t1 = tcg_temp_new_i64();
56
+ break;
59
+ TCGv_i64 t0 = tcg_temp_new_i64();
57
case INDEX_op_smin_vec:
60
+ TCGv_i64 t1 = tcg_temp_new_i64();
58
insn = smin_op[vece];
61
+ TCGv_i64 t2 = tcg_const_i64(0x00ff00ff);
59
break;
62
60
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
63
- tcg_gen_shli_i64(t0, arg, 24);
61
return (TCG_TARGET_REG_BITS == 64 ? &S_S
64
- tcg_gen_ext32u_i64(t0, t0);
62
: TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S);
65
+ /* arg = ....abcd */
63
66
+ tcg_gen_shri_i64(t0, arg, 8); /* t0 = .....abc */
64
+ case INDEX_op_add_vec:
67
+ tcg_gen_and_i64(t1, arg, t2); /* t1 = .....b.d */
65
+ case INDEX_op_sub_vec:
68
+ tcg_gen_and_i64(t0, t0, t2); /* t0 = .....a.c */
66
case INDEX_op_and_vec:
69
+ tcg_temp_free_i64(t2);
67
case INDEX_op_or_vec:
70
+ tcg_gen_shli_i64(t1, t1, 8); /* t1 = ....b.d. */
68
case INDEX_op_xor_vec:
71
+ tcg_gen_or_i64(ret, t0, t1); /* ret = ....badc */
72
73
- tcg_gen_andi_i64(t1, arg, 0x0000ff00);
74
- tcg_gen_shli_i64(t1, t1, 8);
75
- tcg_gen_or_i64(t0, t0, t1);
76
+ tcg_gen_shli_i64(t1, ret, 48); /* t1 = dc...... */
77
+ tcg_gen_shri_i64(t0, ret, 16); /* t0 = ......ba */
78
+ tcg_gen_shri_i64(t1, ret, 32); /* t1 = ....dc.. */
79
+ tcg_gen_or_i64(ret, t0, t1); /* ret = ....dcba */
80
81
- tcg_gen_shri_i64(t1, arg, 8);
82
- tcg_gen_andi_i64(t1, t1, 0x0000ff00);
83
- tcg_gen_or_i64(t0, t0, t1);
84
-
85
- tcg_gen_shri_i64(t1, arg, 24);
86
- tcg_gen_or_i64(ret, t0, t1);
87
tcg_temp_free_i64(t0);
88
tcg_temp_free_i64(t1);
89
}
90
--
69
--
91
2.17.2
70
2.17.1
92
71
93
72
diff view generated by jsdifflib
1
This helps preserve the invariant that all TCG_TYPE_I32 values
1
Add support for vector saturated add/subtract using Altivec
2
are stored zero-extended in the 64-bit host registers.
2
instructions:
3
VADDSBS, VADDSHS, VADDSWS, VADDUBS, VADDUHS, VADDUWS, and
4
VSUBSBS, VSUBSHS, VSUBSWS, VSUBUBS, VSUBUHS, VSUBUWS.
3
5
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
5
---
8
---
6
tcg/i386/tcg-target.inc.c | 13 ++++++++-----
9
tcg/ppc/tcg-target.h | 2 +-
7
1 file changed, 8 insertions(+), 5 deletions(-)
10
tcg/ppc/tcg-target.inc.c | 36 ++++++++++++++++++++++++++++++++++++
11
2 files changed, 37 insertions(+), 1 deletion(-)
8
12
9
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
13
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
10
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/i386/tcg-target.inc.c
15
--- a/tcg/ppc/tcg-target.h
12
+++ b/tcg/i386/tcg-target.inc.c
16
+++ b/tcg/ppc/tcg-target.h
13
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
17
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
14
* Record the context of a call to the out of line helper code for the slow path
18
#define TCG_TARGET_HAS_shv_vec 0
15
* for a load or store, so that we can later generate the correct helper code
19
#define TCG_TARGET_HAS_cmp_vec 1
16
*/
20
#define TCG_TARGET_HAS_mul_vec 0
17
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
21
-#define TCG_TARGET_HAS_sat_vec 0
18
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, bool is_64,
22
+#define TCG_TARGET_HAS_sat_vec 1
19
+ TCGMemOpIdx oi,
23
#define TCG_TARGET_HAS_minmax_vec 1
20
TCGReg datalo, TCGReg datahi,
24
#define TCG_TARGET_HAS_bitsel_vec 0
21
TCGReg addrlo, TCGReg addrhi,
25
#define TCG_TARGET_HAS_cmpsel_vec 0
22
tcg_insn_unit *raddr,
26
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
23
@@ -XXX,XX +XXX,XX @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
27
index XXXXXXX..XXXXXXX 100644
24
28
--- a/tcg/ppc/tcg-target.inc.c
25
label->is_ld = is_ld;
29
+++ b/tcg/ppc/tcg-target.inc.c
26
label->oi = oi;
30
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
27
+ label->type = is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
31
#define STVX XO31(231)
28
label->datalo_reg = datalo;
32
#define STVEWX XO31(199)
29
label->datahi_reg = datahi;
33
30
label->addrlo_reg = addrlo;
34
+#define VADDSBS VX4(768)
31
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
35
+#define VADDUBS VX4(512)
32
TCGMemOp opc = get_memop(oi);
36
#define VADDUBM VX4(0)
33
TCGReg data_reg;
37
+#define VADDSHS VX4(832)
34
tcg_insn_unit **label_ptr = &l->label_ptr[0];
38
+#define VADDUHS VX4(576)
35
+ int rexw = (l->type == TCG_TYPE_I64 ? P_REXW : 0);
39
#define VADDUHM VX4(64)
36
40
+#define VADDSWS VX4(896)
37
/* resolve label address */
41
+#define VADDUWS VX4(640)
38
tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
42
#define VADDUWM VX4(128)
39
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
43
40
data_reg = l->datalo_reg;
44
+#define VSUBSBS VX4(1792)
41
switch (opc & MO_SSIZE) {
45
+#define VSUBUBS VX4(1536)
42
case MO_SB:
46
#define VSUBUBM VX4(1024)
43
- tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
47
+#define VSUBSHS VX4(1856)
44
+ tcg_out_ext8s(s, data_reg, TCG_REG_EAX, rexw);
48
+#define VSUBUHS VX4(1600)
49
#define VSUBUHM VX4(1088)
50
+#define VSUBSWS VX4(1920)
51
+#define VSUBUWS VX4(1664)
52
#define VSUBUWM VX4(1152)
53
54
#define VMAXSB VX4(258)
55
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
56
case INDEX_op_smin_vec:
57
case INDEX_op_umax_vec:
58
case INDEX_op_umin_vec:
59
+ case INDEX_op_ssadd_vec:
60
+ case INDEX_op_sssub_vec:
61
+ case INDEX_op_usadd_vec:
62
+ case INDEX_op_ussub_vec:
63
return vece <= MO_32;
64
case INDEX_op_cmp_vec:
65
return vece <= MO_32 ? -1 : 0;
66
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
67
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
68
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
69
gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
70
+ ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
71
+ usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
72
+ sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
73
+ ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
74
umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
75
smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
76
umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
77
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
78
case INDEX_op_sub_vec:
79
insn = sub_op[vece];
45
break;
80
break;
46
case MO_SW:
81
+ case INDEX_op_ssadd_vec:
47
- tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
82
+ insn = ssadd_op[vece];
48
+ tcg_out_ext16s(s, data_reg, TCG_REG_EAX, rexw);
83
+ break;
84
+ case INDEX_op_sssub_vec:
85
+ insn = sssub_op[vece];
86
+ break;
87
+ case INDEX_op_usadd_vec:
88
+ insn = usadd_op[vece];
89
+ break;
90
+ case INDEX_op_ussub_vec:
91
+ insn = ussub_op[vece];
92
+ break;
93
case INDEX_op_smin_vec:
94
insn = smin_op[vece];
49
break;
95
break;
50
#if TCG_TARGET_REG_BITS == 64
96
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
51
case MO_SL:
97
case INDEX_op_andc_vec:
52
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
98
case INDEX_op_orc_vec:
53
tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, is64, opc);
99
case INDEX_op_cmp_vec:
54
100
+ case INDEX_op_ssadd_vec:
55
/* Record the current context of a load into ldst label */
101
+ case INDEX_op_sssub_vec:
56
- add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
102
+ case INDEX_op_usadd_vec:
57
+ add_qemu_ldst_label(s, true, is64, oi, datalo, datahi, addrlo, addrhi,
103
+ case INDEX_op_ussub_vec:
58
s->code_ptr, label_ptr);
104
case INDEX_op_smax_vec:
59
#else
105
case INDEX_op_smin_vec:
60
{
106
case INDEX_op_umax_vec:
61
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
62
tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
63
64
/* Record the current context of a store into ldst label */
65
- add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
66
+ add_qemu_ldst_label(s, false, is64, oi, datalo, datahi, addrlo, addrhi,
67
s->code_ptr, label_ptr);
68
#else
69
{
70
--
107
--
71
2.17.2
108
2.17.1
72
109
73
110
diff view generated by jsdifflib
1
From: "Emilio G. Cota" <cota@braap.org>
1
For Altivec, this is done via vector shift by vector,
2
and loading the immediate into a register.
2
3
3
It's unused since 75e8b9b7aa0b95a761b9add7e2f09248b101a392.
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
7
tcg/ppc/tcg-target.h | 2 +-
8
tcg/ppc/tcg-target.inc.c | 58 ++++++++++++++++++++++++++++++++++++++--
9
2 files changed, 57 insertions(+), 3 deletions(-)
4
10
5
Signed-off-by: Emilio G. Cota <cota@braap.org>
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
6
Message-Id: <20181209193749.12277-9-cota@braap.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
tcg/tcg.h | 4 ++--
11
tcg/optimize.c | 4 ++--
12
tcg/tcg.c | 10 ++++------
13
3 files changed, 8 insertions(+), 10 deletions(-)
14
15
diff --git a/tcg/tcg.h b/tcg/tcg.h
16
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/tcg.h
13
--- a/tcg/ppc/tcg-target.h
18
+++ b/tcg/tcg.h
14
+++ b/tcg/ppc/tcg-target.h
19
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args);
15
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
20
16
#define TCG_TARGET_HAS_abs_vec 0
21
TCGOp *tcg_emit_op(TCGOpcode opc);
17
#define TCG_TARGET_HAS_shi_vec 0
22
void tcg_op_remove(TCGContext *s, TCGOp *op);
18
#define TCG_TARGET_HAS_shs_vec 0
23
-TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
19
-#define TCG_TARGET_HAS_shv_vec 0
24
-TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
20
+#define TCG_TARGET_HAS_shv_vec 1
25
+TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc);
21
#define TCG_TARGET_HAS_cmp_vec 1
26
+TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc);
22
#define TCG_TARGET_HAS_mul_vec 0
27
23
#define TCG_TARGET_HAS_sat_vec 1
28
void tcg_optimize(TCGContext *s);
24
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
29
30
diff --git a/tcg/optimize.c b/tcg/optimize.c
31
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
32
--- a/tcg/optimize.c
26
--- a/tcg/ppc/tcg-target.inc.c
33
+++ b/tcg/optimize.c
27
+++ b/tcg/ppc/tcg-target.inc.c
34
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
28
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
35
uint64_t a = ((uint64_t)ah << 32) | al;
29
#define VCMPGTUH VX4(582)
36
uint64_t b = ((uint64_t)bh << 32) | bl;
30
#define VCMPGTUW VX4(646)
37
TCGArg rl, rh;
31
38
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
32
+#define VSLB VX4(260)
39
+ TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
33
+#define VSLH VX4(324)
40
34
+#define VSLW VX4(388)
41
if (opc == INDEX_op_add2_i32) {
35
+#define VSRB VX4(516)
42
a += b;
36
+#define VSRH VX4(580)
43
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
37
+#define VSRW VX4(644)
44
uint32_t b = arg_info(op->args[3])->val;
38
+#define VSRAB VX4(772)
45
uint64_t r = (uint64_t)a * b;
39
+#define VSRAH VX4(836)
46
TCGArg rl, rh;
40
+#define VSRAW VX4(900)
47
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
41
+
48
+ TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
42
#define VAND VX4(1028)
49
43
#define VANDC VX4(1092)
50
rl = op->args[0];
44
#define VNOR VX4(1284)
51
rh = op->args[1];
45
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
52
diff --git a/tcg/tcg.c b/tcg/tcg.c
46
case INDEX_op_sssub_vec:
53
index XXXXXXX..XXXXXXX 100644
47
case INDEX_op_usadd_vec:
54
--- a/tcg/tcg.c
48
case INDEX_op_ussub_vec:
55
+++ b/tcg/tcg.c
49
+ case INDEX_op_shlv_vec:
56
@@ -XXX,XX +XXX,XX @@ TCGOp *tcg_emit_op(TCGOpcode opc)
50
+ case INDEX_op_shrv_vec:
57
return op;
51
+ case INDEX_op_sarv_vec:
52
return vece <= MO_32;
53
case INDEX_op_cmp_vec:
54
+ case INDEX_op_shli_vec:
55
+ case INDEX_op_shri_vec:
56
+ case INDEX_op_sari_vec:
57
return vece <= MO_32 ? -1 : 0;
58
default:
59
return 0;
60
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
61
umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
62
smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
63
umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
64
- smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 };
65
+ smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
66
+ shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
67
+ shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
68
+ sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 };
69
70
TCGType type = vecl + TCG_TYPE_V64;
71
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
72
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
73
case INDEX_op_umax_vec:
74
insn = umax_op[vece];
75
break;
76
+ case INDEX_op_shlv_vec:
77
+ insn = shlv_op[vece];
78
+ break;
79
+ case INDEX_op_shrv_vec:
80
+ insn = shrv_op[vece];
81
+ break;
82
+ case INDEX_op_sarv_vec:
83
+ insn = sarv_op[vece];
84
+ break;
85
case INDEX_op_and_vec:
86
insn = VAND;
87
break;
88
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
89
tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
58
}
90
}
59
91
60
-TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
92
+static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
61
- TCGOpcode opc, int nargs)
93
+ TCGv_vec v1, TCGArg imm, TCGOpcode opci)
62
+TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
94
+{
95
+ TCGv_vec t1 = tcg_temp_new_vec(type);
96
+
97
+ /* Splat w/bytes for xxspltib. */
98
+ tcg_gen_dupi_vec(MO_8, t1, imm & ((8 << vece) - 1));
99
+ vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
100
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
101
+ tcg_temp_free_vec(t1);
102
+}
103
+
104
static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
105
TCGv_vec v1, TCGv_vec v2, TCGCond cond)
63
{
106
{
64
TCGOp *new_op = tcg_op_alloc(opc);
107
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
65
QTAILQ_INSERT_BEFORE(old_op, new_op, link);
66
return new_op;
67
}
68
69
-TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
70
- TCGOpcode opc, int nargs)
71
+TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
72
{
108
{
73
TCGOp *new_op = tcg_op_alloc(opc);
109
va_list va;
74
QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
110
TCGv_vec v0, v1, v2;
75
@@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s)
111
+ TCGArg a2;
76
TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
112
77
? INDEX_op_ld_i32
113
va_start(va, a0);
78
: INDEX_op_ld_i64);
114
v0 = temp_tcgv_vec(arg_temp(a0));
79
- TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
115
v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
80
+ TCGOp *lop = tcg_op_insert_before(s, op, lopc);
116
- v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
81
117
+ a2 = va_arg(va, TCGArg);
82
lop->args[0] = temp_arg(dir_ts);
118
83
lop->args[1] = temp_arg(arg_ts->mem_base);
119
switch (opc) {
84
@@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s)
120
+ case INDEX_op_shli_vec:
85
TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
121
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
86
? INDEX_op_st_i32
122
+ break;
87
: INDEX_op_st_i64);
123
+ case INDEX_op_shri_vec:
88
- TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
124
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
89
+ TCGOp *sop = tcg_op_insert_after(s, op, sopc);
125
+ break;
90
126
+ case INDEX_op_sari_vec:
91
sop->args[0] = temp_arg(dir_ts);
127
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
92
sop->args[1] = temp_arg(arg_ts->mem_base);
128
+ break;
129
case INDEX_op_cmp_vec:
130
+ v2 = temp_tcgv_vec(arg_temp(a2));
131
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
132
break;
133
default:
134
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
135
case INDEX_op_smin_vec:
136
case INDEX_op_umax_vec:
137
case INDEX_op_umin_vec:
138
+ case INDEX_op_shlv_vec:
139
+ case INDEX_op_shrv_vec:
140
+ case INDEX_op_sarv_vec:
141
return &v_v_v;
142
case INDEX_op_not_vec:
143
case INDEX_op_dup_vec:
93
--
144
--
94
2.17.2
145
2.17.1
95
146
96
147
diff view generated by jsdifflib
1
Based on the only current user, Sparc:
1
For Altivec, this is always an expansion.
2
3
New code uses 2 constants that take 2 insns to load from constant pool,
4
plus 13. Old code used 6 constants that took 1 or 2 insns to create,
5
plus 21. The result is a new total of 17 vs an old total of 29.
6
2
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
8
---
5
---
9
tcg/tcg-op.c | 43 ++++++++++++++++++-------------------------
6
tcg/ppc/tcg-target.h | 2 +-
10
1 file changed, 18 insertions(+), 25 deletions(-)
7
tcg/ppc/tcg-target.opc.h | 8 +++
11
8
tcg/ppc/tcg-target.inc.c | 113 ++++++++++++++++++++++++++++++++++++++-
12
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
9
3 files changed, 121 insertions(+), 2 deletions(-)
10
11
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/tcg-op.c
13
--- a/tcg/ppc/tcg-target.h
15
+++ b/tcg/tcg-op.c
14
+++ b/tcg/ppc/tcg-target.h
16
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
15
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
17
} else {
16
#define TCG_TARGET_HAS_shs_vec 0
18
TCGv_i64 t0 = tcg_temp_new_i64();
17
#define TCG_TARGET_HAS_shv_vec 1
19
TCGv_i64 t1 = tcg_temp_new_i64();
18
#define TCG_TARGET_HAS_cmp_vec 1
20
+ TCGv_i64 t2 = tcg_temp_new_i64();
19
-#define TCG_TARGET_HAS_mul_vec 0
21
20
+#define TCG_TARGET_HAS_mul_vec 1
22
- tcg_gen_shli_i64(t0, arg, 56);
21
#define TCG_TARGET_HAS_sat_vec 1
23
+ /* arg = abcdefgh */
22
#define TCG_TARGET_HAS_minmax_vec 1
24
+ tcg_gen_movi_i64(t2, 0x00ff00ff00ff00ffull);
23
#define TCG_TARGET_HAS_bitsel_vec 0
25
+ tcg_gen_shri_i64(t0, arg, 8); /* t0 = .abcdefg */
24
diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h
26
+ tcg_gen_and_i64(t1, arg, t2); /* t1 = .b.d.f.h */
25
index XXXXXXX..XXXXXXX 100644
27
+ tcg_gen_and_i64(t0, t0, t2); /* t0 = .a.c.e.g */
26
--- a/tcg/ppc/tcg-target.opc.h
28
+ tcg_gen_shli_i64(t1, t1, 8); /* t1 = b.d.f.h. */
27
+++ b/tcg/ppc/tcg-target.opc.h
29
+ tcg_gen_or_i64(ret, t0, t1); /* ret = badcfehg */
28
@@ -XXX,XX +XXX,XX @@
30
29
* emitted by tcg_expand_vec_op. For those familiar with GCC internals,
31
- tcg_gen_andi_i64(t1, arg, 0x0000ff00);
30
* consider these to be UNSPEC with names.
32
- tcg_gen_shli_i64(t1, t1, 40);
31
*/
33
- tcg_gen_or_i64(t0, t0, t1);
32
+
34
+ tcg_gen_movi_i64(t2, 0x0000ffff0000ffffull);
33
+DEF(ppc_mrgh_vec, 1, 2, 0, IMPLVEC)
35
+ tcg_gen_shri_i64(t0, ret, 16); /* t0 = ..badcfe */
34
+DEF(ppc_mrgl_vec, 1, 2, 0, IMPLVEC)
36
+ tcg_gen_and_i64(t1, ret, t2); /* t1 = ..dc..hg */
35
+DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC)
37
+ tcg_gen_and_i64(t0, t0, t2); /* t0 = ..ba..fe */
36
+DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC)
38
+ tcg_gen_shli_i64(t1, t1, 16); /* t1 = dc..hg.. */
37
+DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC)
39
+ tcg_gen_or_i64(ret, t0, t1); /* ret = dcbahgfe */
38
+DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC)
40
39
+DEF(ppc_rotl_vec, 1, 2, 0, IMPLVEC)
41
- tcg_gen_andi_i64(t1, arg, 0x00ff0000);
40
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
42
- tcg_gen_shli_i64(t1, t1, 24);
41
index XXXXXXX..XXXXXXX 100644
43
- tcg_gen_or_i64(t0, t0, t1);
42
--- a/tcg/ppc/tcg-target.inc.c
44
+ tcg_gen_shri_i64(t0, ret, 32); /* t0 = ....dcba */
43
+++ b/tcg/ppc/tcg-target.inc.c
45
+ tcg_gen_shli_i64(t1, ret, 32); /* t1 = hgfe.... */
44
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
46
+ tcg_gen_or_i64(ret, t0, t1); /* ret = hgfedcba */
45
#define VSRAB VX4(772)
47
46
#define VSRAH VX4(836)
48
- tcg_gen_andi_i64(t1, arg, 0xff000000);
47
#define VSRAW VX4(900)
49
- tcg_gen_shli_i64(t1, t1, 8);
48
+#define VRLB VX4(4)
50
- tcg_gen_or_i64(t0, t0, t1);
49
+#define VRLH VX4(68)
51
-
50
+#define VRLW VX4(132)
52
- tcg_gen_shri_i64(t1, arg, 8);
51
+
53
- tcg_gen_andi_i64(t1, t1, 0xff000000);
52
+#define VMULEUB VX4(520)
54
- tcg_gen_or_i64(t0, t0, t1);
53
+#define VMULEUH VX4(584)
55
-
54
+#define VMULOUB VX4(8)
56
- tcg_gen_shri_i64(t1, arg, 24);
55
+#define VMULOUH VX4(72)
57
- tcg_gen_andi_i64(t1, t1, 0x00ff0000);
56
+#define VMSUMUHM VX4(38)
58
- tcg_gen_or_i64(t0, t0, t1);
57
+
59
-
58
+#define VMRGHB VX4(12)
60
- tcg_gen_shri_i64(t1, arg, 40);
59
+#define VMRGHH VX4(76)
61
- tcg_gen_andi_i64(t1, t1, 0x0000ff00);
60
+#define VMRGHW VX4(140)
62
- tcg_gen_or_i64(t0, t0, t1);
61
+#define VMRGLB VX4(268)
63
-
62
+#define VMRGLH VX4(332)
64
- tcg_gen_shri_i64(t1, arg, 56);
63
+#define VMRGLW VX4(396)
65
- tcg_gen_or_i64(ret, t0, t1);
64
+
66
tcg_temp_free_i64(t0);
65
+#define VPKUHUM VX4(14)
67
tcg_temp_free_i64(t1);
66
+#define VPKUWUM VX4(78)
68
+ tcg_temp_free_i64(t2);
67
68
#define VAND VX4(1028)
69
#define VANDC VX4(1092)
70
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
71
case INDEX_op_sarv_vec:
72
return vece <= MO_32;
73
case INDEX_op_cmp_vec:
74
+ case INDEX_op_mul_vec:
75
case INDEX_op_shli_vec:
76
case INDEX_op_shri_vec:
77
case INDEX_op_sari_vec:
78
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
79
smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
80
shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
81
shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
82
- sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 };
83
+ sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 },
84
+ mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
85
+ mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
86
+ muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 },
87
+ mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 },
88
+ pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
89
+ rotl_op[4] = { VRLB, VRLH, VRLW, 0 };
90
91
TCGType type = vecl + TCG_TYPE_V64;
92
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
93
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
94
}
95
break;
96
97
+ case INDEX_op_ppc_mrgh_vec:
98
+ insn = mrgh_op[vece];
99
+ break;
100
+ case INDEX_op_ppc_mrgl_vec:
101
+ insn = mrgl_op[vece];
102
+ break;
103
+ case INDEX_op_ppc_muleu_vec:
104
+ insn = muleu_op[vece];
105
+ break;
106
+ case INDEX_op_ppc_mulou_vec:
107
+ insn = mulou_op[vece];
108
+ break;
109
+ case INDEX_op_ppc_pkum_vec:
110
+ insn = pkum_op[vece];
111
+ break;
112
+ case INDEX_op_ppc_rotl_vec:
113
+ insn = rotl_op[vece];
114
+ break;
115
+ case INDEX_op_ppc_msum_vec:
116
+ tcg_debug_assert(vece == MO_16);
117
+ tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
118
+ return;
119
+
120
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
121
case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
122
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
123
@@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
69
}
124
}
70
}
125
}
71
126
127
+static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
128
+ TCGv_vec v1, TCGv_vec v2)
129
+{
130
+ TCGv_vec t1 = tcg_temp_new_vec(type);
131
+ TCGv_vec t2 = tcg_temp_new_vec(type);
132
+ TCGv_vec t3, t4;
133
+
134
+ switch (vece) {
135
+ case MO_8:
136
+ case MO_16:
137
+ vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
138
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
139
+ vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
140
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
141
+ vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
142
+ tcgv_vec_arg(t1), tcgv_vec_arg(t2));
143
+ vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
144
+ tcgv_vec_arg(t1), tcgv_vec_arg(t2));
145
+ vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
146
+ tcgv_vec_arg(v0), tcgv_vec_arg(t1));
147
+    break;
148
+
149
+ case MO_32:
150
+ t3 = tcg_temp_new_vec(type);
151
+ t4 = tcg_temp_new_vec(type);
152
+ tcg_gen_dupi_vec(MO_8, t4, -16);
153
+ vec_gen_3(INDEX_op_ppc_rotl_vec, type, MO_32, tcgv_vec_arg(t1),
154
+ tcgv_vec_arg(v2), tcgv_vec_arg(t4));
155
+ vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
156
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
157
+ tcg_gen_dupi_vec(MO_8, t3, 0);
158
+ vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t3),
159
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(t3));
160
+ vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t3),
161
+ tcgv_vec_arg(t3), tcgv_vec_arg(t4));
162
+ tcg_gen_add_vec(MO_32, v0, t2, t3);
163
+ tcg_temp_free_vec(t3);
164
+ tcg_temp_free_vec(t4);
165
+ break;
166
+
167
+ default:
168
+ g_assert_not_reached();
169
+ }
170
+ tcg_temp_free_vec(t1);
171
+ tcg_temp_free_vec(t2);
172
+}
173
+
174
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
175
TCGArg a0, ...)
176
{
177
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
178
v2 = temp_tcgv_vec(arg_temp(a2));
179
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
180
break;
181
+ case INDEX_op_mul_vec:
182
+ v2 = temp_tcgv_vec(arg_temp(a2));
183
+ expand_vec_mul(type, vece, v0, v1, v2);
184
+ break;
185
default:
186
g_assert_not_reached();
187
}
188
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
189
static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
190
static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
191
static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
192
+ static const TCGTargetOpDef v_v_v_v
193
+ = { .args_ct_str = { "v", "v", "v", "v" } };
194
195
switch (op) {
196
case INDEX_op_goto_ptr:
197
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
198
199
case INDEX_op_add_vec:
200
case INDEX_op_sub_vec:
201
+ case INDEX_op_mul_vec:
202
case INDEX_op_and_vec:
203
case INDEX_op_or_vec:
204
case INDEX_op_xor_vec:
205
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
206
case INDEX_op_shlv_vec:
207
case INDEX_op_shrv_vec:
208
case INDEX_op_sarv_vec:
209
+ case INDEX_op_ppc_mrgh_vec:
210
+ case INDEX_op_ppc_mrgl_vec:
211
+ case INDEX_op_ppc_muleu_vec:
212
+ case INDEX_op_ppc_mulou_vec:
213
+ case INDEX_op_ppc_pkum_vec:
214
+ case INDEX_op_ppc_rotl_vec:
215
return &v_v_v;
216
case INDEX_op_not_vec:
217
case INDEX_op_dup_vec:
218
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
219
case INDEX_op_st_vec:
220
case INDEX_op_dupm_vec:
221
return &v_r;
222
+ case INDEX_op_ppc_msum_vec:
223
+ return &v_v_v_v;
224
225
default:
226
return NULL;
72
--
227
--
73
2.17.2
228
2.17.1
74
229
75
230
diff view generated by jsdifflib
1
There is no longer a need for preserving branch offset operands,
1
This is only used for 32-bit hosts.
2
as we no longer re-translate.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
6
---
5
---
7
tcg/ppc/tcg-target.inc.c | 25 +++++++------------------
6
tcg/ppc/tcg-target.inc.c | 9 +++++++++
8
1 file changed, 7 insertions(+), 18 deletions(-)
7
1 file changed, 9 insertions(+)
9
8
10
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
9
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
11
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.inc.c
11
--- a/tcg/ppc/tcg-target.inc.c
13
+++ b/tcg/ppc/tcg-target.inc.c
12
+++ b/tcg/ppc/tcg-target.inc.c
14
@@ -XXX,XX +XXX,XX @@ static void reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target)
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
15
*pc = (*pc & ~0xfffc) | reloc_pc14_val(pc, target);
16
}
17
18
-static inline void tcg_out_b_noaddr(TCGContext *s, int insn)
19
-{
20
- unsigned retrans = *s->code_ptr & 0x3fffffc;
21
- tcg_out32(s, insn | retrans);
22
-}
23
-
24
-static inline void tcg_out_bc_noaddr(TCGContext *s, int insn)
25
-{
26
- unsigned retrans = *s->code_ptr & 0xfffc;
27
- tcg_out32(s, insn | retrans);
28
-}
29
-
30
/* parse target specific constraints */
31
static const char *target_parse_constraint(TCGArgConstraint *ct,
32
const char *ct_str, TCGType type)
33
@@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
34
static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
35
{
36
if (l->has_value) {
37
- tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value_ptr));
38
+ bc |= reloc_pc14_val(s->code_ptr, l->u.value_ptr);
39
} else {
40
tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
41
- tcg_out_bc_noaddr(s, bc);
42
}
43
+ tcg_out32(s, bc);
44
}
45
46
static void tcg_out_brcond(TCGContext *s, TCGCond cond,
47
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
48
49
/* Load a pointer into the current opcode w/conditional branch-link. */
50
label_ptr = s->code_ptr;
51
- tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
52
+ tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
53
54
rbase = TCG_REG_R3;
55
#else /* !CONFIG_SOFTMMU */
56
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
57
58
/* Load a pointer into the current opcode w/conditional branch-link. */
59
label_ptr = s->code_ptr;
60
- tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
61
+ tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
62
63
rbase = TCG_REG_R3;
64
#else /* !CONFIG_SOFTMMU */
65
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
66
case INDEX_op_br:
67
{
68
TCGLabel *l = arg_label(args[0]);
69
+ uint32_t insn = B;
70
71
if (l->has_value) {
72
- tcg_out_b(s, 0, l->u.value_ptr);
73
+ insn |= reloc_pc24_val(s->code_ptr, l->u.value_ptr);
74
} else {
75
tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
76
- tcg_out_b_noaddr(s, B);
77
}
78
+ tcg_out32(s, insn);
79
}
14
}
80
break;
15
break;
81
case INDEX_op_ld8u_i32:
16
17
+ case INDEX_op_dup2_vec:
18
+ assert(TCG_TARGET_REG_BITS == 32);
19
+ /* With inputs a1 = xLxx, a2 = xHxx */
20
+ tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */
21
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */
22
+ tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */
23
+ return;
24
+
25
case INDEX_op_ppc_mrgh_vec:
26
insn = mrgh_op[vece];
27
break;
28
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
29
case INDEX_op_ppc_mulou_vec:
30
case INDEX_op_ppc_pkum_vec:
31
case INDEX_op_ppc_rotl_vec:
32
+ case INDEX_op_dup2_vec:
33
return &v_v_v;
34
case INDEX_op_not_vec:
35
case INDEX_op_dup_vec:
82
--
36
--
83
2.17.2
37
2.17.1
84
38
85
39
diff view generated by jsdifflib
Deleted patch
1
There is no longer a need for preserving branch offset operands,
2
as we no longer re-translate.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/s390/tcg-target.inc.c | 10 ++--------
8
1 file changed, 2 insertions(+), 8 deletions(-)
9
10
diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/s390/tcg-target.inc.c
13
+++ b/tcg/s390/tcg-target.inc.c
14
@@ -XXX,XX +XXX,XX @@ static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
15
static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
16
TCGReg r1, TCGReg r2, TCGLabel *l)
17
{
18
- intptr_t off;
19
+ intptr_t off = 0;
20
21
if (l->has_value) {
22
off = l->u.value_ptr - s->code_ptr;
23
} else {
24
- /* We need to keep the offset unchanged for retranslation. */
25
- off = s->code_ptr[1];
26
tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
27
}
28
29
@@ -XXX,XX +XXX,XX @@ static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
30
static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
31
TCGReg r1, int i2, TCGLabel *l)
32
{
33
- tcg_target_long off;
34
+ tcg_target_long off = 0;
35
36
if (l->has_value) {
37
off = l->u.value_ptr - s->code_ptr;
38
} else {
39
- /* We need to keep the offset unchanged for retranslation. */
40
- off = s->code_ptr[1];
41
tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
42
}
43
44
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
45
46
base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
47
48
- /* We need to keep the offset unchanged for retranslation. */
49
tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
50
label_ptr = s->code_ptr;
51
s->code_ptr += 1;
52
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
53
54
base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
55
56
- /* We need to keep the offset unchanged for retranslation. */
57
tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
58
label_ptr = s->code_ptr;
59
s->code_ptr += 1;
60
--
61
2.17.2
62
63
diff view generated by jsdifflib
Deleted patch
1
There is no longer a need for preserving branch offset operands,
2
as we no longer re-translate.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/sparc/tcg-target.inc.c | 8 ++------
8
1 file changed, 2 insertions(+), 6 deletions(-)
9
10
diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/sparc/tcg-target.inc.c
13
+++ b/tcg/sparc/tcg-target.inc.c
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19)
15
16
static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l)
17
{
18
- int off19;
19
+ int off19 = 0;
20
21
if (l->has_value) {
22
off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr));
23
} else {
24
- /* Make sure to preserve destinations during retranslation. */
25
- off19 = *s->code_ptr & INSN_OFF19(-1);
26
tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0);
27
}
28
tcg_out_bpcc0(s, scond, flags, off19);
29
@@ -XXX,XX +XXX,XX @@ static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1,
30
{
31
/* For 64-bit signed comparisons vs zero, we can avoid the compare. */
32
if (arg2 == 0 && !is_unsigned_cond(cond)) {
33
- int off16;
34
+ int off16 = 0;
35
36
if (l->has_value) {
37
off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr));
38
} else {
39
- /* Make sure to preserve destinations during retranslation. */
40
- off16 = *s->code_ptr & INSN_OFF16(-1);
41
tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0);
42
}
43
tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
44
--
45
2.17.2
46
47
diff view generated by jsdifflib
Deleted patch
1
There is no longer a need for preserving branch offset operands,
2
as we no longer re-translate.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/mips/tcg-target.inc.c | 7 +------
8
1 file changed, 1 insertion(+), 6 deletions(-)
9
10
diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/mips/tcg-target.inc.c
13
+++ b/tcg/mips/tcg-target.inc.c
14
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_opc_bf64(TCGContext *s, MIPSInsn opc, MIPSInsn opm,
15
static inline void tcg_out_opc_br(TCGContext *s, MIPSInsn opc,
16
TCGReg rt, TCGReg rs)
17
{
18
- /* We pay attention here to not modify the branch target by reading
19
- the existing value and using it again. This ensure that caches and
20
- memory are kept coherent during retranslation. */
21
- uint16_t offset = (uint16_t)*s->code_ptr;
22
-
23
- tcg_out_opc_imm(s, opc, rt, rs, offset);
24
+ tcg_out_opc_imm(s, opc, rt, rs, 0);
25
}
26
27
/*
28
--
29
2.17.2
30
31
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/i386/tcg-target.inc.c | 4 ++--
5
1 file changed, 2 insertions(+), 2 deletions(-)
6
1
7
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/i386/tcg-target.inc.c
10
+++ b/tcg/i386/tcg-target.inc.c
11
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
12
case R_386_PC32:
13
value -= (uintptr_t)code_ptr;
14
if (value != (int32_t)value) {
15
- tcg_abort();
16
+ return false;
17
}
18
/* FALLTHRU */
19
case R_386_32:
20
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
21
case R_386_PC8:
22
value -= (uintptr_t)code_ptr;
23
if (value != (int8_t)value) {
24
- tcg_abort();
25
+ return false;
26
}
27
tcg_patch8(code_ptr, value);
28
break;
29
--
30
2.17.2
31
32
diff view generated by jsdifflib
1
This does require an extra two checks within the slow paths
1
Now that we have implemented the required tcg operations,
2
to replace the assert that we're moving.
2
we can enable detection of host vector support.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (PPC32)
5
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
tcg/aarch64/tcg-target.inc.c | 37 ++++++++++++++++++++----------------
8
tcg/ppc/tcg-target.inc.c | 4 ++++
8
1 file changed, 21 insertions(+), 16 deletions(-)
9
1 file changed, 4 insertions(+)
9
10
10
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
11
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/aarch64/tcg-target.inc.c
13
--- a/tcg/ppc/tcg-target.inc.c
13
+++ b/tcg/aarch64/tcg-target.inc.c
14
+++ b/tcg/ppc/tcg-target.inc.c
14
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_oarg_regs[1] = {
15
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
15
#define TCG_REG_GUEST_BASE TCG_REG_X28
16
have_isel = have_isa_2_06;
16
#endif
17
#endif
17
18
18
-static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
19
+ if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
19
+static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
20
+ have_altivec = true;
20
{
21
ptrdiff_t offset = target - code_ptr;
22
- tcg_debug_assert(offset == sextract64(offset, 0, 26));
23
- /* read instruction, mask away previous PC_REL26 parameter contents,
24
- set the proper offset, then write back the instruction. */
25
- *code_ptr = deposit32(*code_ptr, 0, 26, offset);
26
+ if (offset == sextract64(offset, 0, 26)) {
27
+ /* read instruction, mask away previous PC_REL26 parameter contents,
28
+ set the proper offset, then write back the instruction. */
29
+ *code_ptr = deposit32(*code_ptr, 0, 26, offset);
30
+ return true;
31
+ }
21
+ }
32
+ return false;
22
+
33
}
23
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
34
24
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
35
-static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
25
if (have_altivec) {
36
+static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
37
{
38
ptrdiff_t offset = target - code_ptr;
39
- tcg_debug_assert(offset == sextract64(offset, 0, 19));
40
- *code_ptr = deposit32(*code_ptr, 5, 19, offset);
41
+ if (offset == sextract64(offset, 0, 19)) {
42
+ *code_ptr = deposit32(*code_ptr, 5, 19, offset);
43
+ return true;
44
+ }
45
+ return false;
46
}
47
48
static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
49
@@ -XXX,XX +XXX,XX @@ static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
50
switch (type) {
51
case R_AARCH64_JUMP26:
52
case R_AARCH64_CALL26:
53
- reloc_pc26(code_ptr, (tcg_insn_unit *)value);
54
- break;
55
+ return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
56
case R_AARCH64_CONDBR19:
57
- reloc_pc19(code_ptr, (tcg_insn_unit *)value);
58
- break;
59
+ return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
60
default:
61
- tcg_abort();
62
+ g_assert_not_reached();
63
}
64
- return true;
65
}
66
67
#define TCG_CT_CONST_AIMM 0x100
68
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
69
TCGMemOp opc = get_memop(oi);
70
TCGMemOp size = opc & MO_SIZE;
71
72
- reloc_pc19(lb->label_ptr[0], s->code_ptr);
73
+ bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr);
74
+ tcg_debug_assert(ok);
75
76
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
77
tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
78
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
79
TCGMemOp opc = get_memop(oi);
80
TCGMemOp size = opc & MO_SIZE;
81
82
- reloc_pc19(lb->label_ptr[0], s->code_ptr);
83
+ bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr);
84
+ tcg_debug_assert(ok);
85
86
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
87
tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
88
--
26
--
89
2.17.2
27
2.17.1
90
28
91
29
diff view generated by jsdifflib
1
We now have an invariant that all TCG_TYPE_I32 values are
1
The VSX instruction set instructions include double-word loads and
2
zero-extended, which means that we do not need to extend
2
stores, double-word load and splat, double-word permute, and bit
3
them again during qemu_ld/st, either explicitly via a separate
3
select. All of which require multiple operations in the Altivec
4
tcg_out_ext32u or implicitly via P_ADDR32.
4
instruction set.
5
5
6
Reviewed-by: Emilio G. Cota <cota@braap.org>
6
Because the VSX registers map %vsr32 to %vr0, and we have no current
7
intention or need to use vector registers outside %vr0-%vr19, force
8
on the {ax,bx,cx,tx} bits within the added VSX insns so that we don't
9
have to otherwise modify the VR[TABC] macros.
10
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
8
---
13
---
9
tcg/i386/tcg-target.inc.c | 103 +++++++++++++++-----------------------
14
tcg/ppc/tcg-target.h | 5 ++--
10
1 file changed, 40 insertions(+), 63 deletions(-)
15
tcg/ppc/tcg-target.inc.c | 52 ++++++++++++++++++++++++++++++++++++----
16
2 files changed, 51 insertions(+), 6 deletions(-)
11
17
12
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
18
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
13
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/i386/tcg-target.inc.c
20
--- a/tcg/ppc/tcg-target.h
15
+++ b/tcg/i386/tcg-target.inc.c
21
+++ b/tcg/ppc/tcg-target.h
16
@@ -XXX,XX +XXX,XX @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
22
@@ -XXX,XX +XXX,XX @@ typedef enum {
17
#define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
23
18
#define P_DATA16 0x400 /* 0x66 opcode prefix */
24
extern TCGPowerISA have_isa;
19
#if TCG_TARGET_REG_BITS == 64
25
extern bool have_altivec;
20
-# define P_ADDR32 0x800 /* 0x67 opcode prefix */
26
+extern bool have_vsx;
21
# define P_REXW 0x1000 /* Set REX.W = 1 */
27
22
# define P_REXB_R 0x2000 /* REG field as byte register */
28
#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
23
# define P_REXB_RM 0x4000 /* R/M field as byte register */
29
#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
24
# define P_GS 0x8000 /* gs segment override */
30
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
25
#else
31
* instruction and substituting two 32-bit stores makes the generated
26
-# define P_ADDR32    0
32
* code quite large.
27
# define P_REXW        0
33
*/
28
# define P_REXB_R    0
34
-#define TCG_TARGET_HAS_v64 0
29
# define P_REXB_RM    0
35
+#define TCG_TARGET_HAS_v64 have_vsx
30
@@ -XXX,XX +XXX,XX @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
36
#define TCG_TARGET_HAS_v128 have_altivec
31
tcg_debug_assert((opc & P_REXW) == 0);
37
#define TCG_TARGET_HAS_v256 0
32
tcg_out8(s, 0x66);
38
39
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
40
#define TCG_TARGET_HAS_mul_vec 1
41
#define TCG_TARGET_HAS_sat_vec 1
42
#define TCG_TARGET_HAS_minmax_vec 1
43
-#define TCG_TARGET_HAS_bitsel_vec 0
44
+#define TCG_TARGET_HAS_bitsel_vec have_vsx
45
#define TCG_TARGET_HAS_cmpsel_vec 0
46
47
void flush_icache_range(uintptr_t start, uintptr_t stop);
48
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/tcg/ppc/tcg-target.inc.c
51
+++ b/tcg/ppc/tcg-target.inc.c
52
@@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr;
53
TCGPowerISA have_isa;
54
static bool have_isel;
55
bool have_altivec;
56
+bool have_vsx;
57
58
#ifndef CONFIG_SOFTMMU
59
#define TCG_GUEST_BASE_REG 30
60
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
61
#define LVEBX XO31(7)
62
#define LVEHX XO31(39)
63
#define LVEWX XO31(71)
64
+#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */
65
+#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */
66
67
#define STVX XO31(231)
68
#define STVEWX XO31(199)
69
+#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */
70
71
#define VADDSBS VX4(768)
72
#define VADDUBS VX4(512)
73
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
74
75
#define VSLDOI VX4(44)
76
77
+#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */
78
+#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
79
+
80
#define RT(r) ((r)<<21)
81
#define RS(r) ((r)<<21)
82
#define RA(r) ((r)<<16)
83
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
84
add = 0;
33
}
85
}
34
- if (opc & P_ADDR32) {
86
35
- tcg_out8(s, 0x67);
87
- load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
36
- }
88
- if (TCG_TARGET_REG_BITS == 64) {
37
if (opc & P_SIMDF3) {
89
- new_pool_l2(s, rel, s->code_ptr, add, val, val);
38
tcg_out8(s, 0xf3);
90
+ if (have_vsx) {
39
} else if (opc & P_SIMDF2) {
91
+ load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
40
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
92
+ load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
41
tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
93
+ if (TCG_TARGET_REG_BITS == 64) {
42
94
+ new_pool_label(s, val, rel, s->code_ptr, add);
43
/* Prepare for both the fast path add of the tlb addend, and the slow
95
+ } else {
44
- path function argument setup. There are two cases worth note:
96
+ new_pool_l2(s, rel, s->code_ptr, add, val, val);
45
- For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
97
+ }
46
- before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
98
} else {
47
- copies the entire guest address for the slow path, while truncation
99
- new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
48
- for the 32-bit host happens with the fastpath ADDL below. */
100
+ load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
49
+ path function argument setup. */
101
+ if (TCG_TARGET_REG_BITS == 64) {
50
tcg_out_mov(s, ttype, r1, addrlo);
102
+ new_pool_l2(s, rel, s->code_ptr, add, val, val);
51
103
+ } else {
52
/* jne slow_path */
104
+ new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
53
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
105
+ }
54
#else
55
{
56
int32_t offset = guest_base;
57
- TCGReg base = addrlo;
58
int index = -1;
59
int seg = 0;
60
61
- /* For a 32-bit guest, the high 32 bits may contain garbage.
62
- We can do this with the ADDR32 prefix if we're not using
63
- a guest base, or when using segmentation. Otherwise we
64
- need to zero-extend manually. */
65
+ /*
66
+ * Recall we store 32-bit values zero-extended. No need for
67
+ * further manual extension or an addr32 (0x67) prefix.
68
+ */
69
if (guest_base == 0 || guest_base_flags) {
70
seg = guest_base_flags;
71
offset = 0;
72
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
73
- seg |= P_ADDR32;
74
- }
75
- } else if (TCG_TARGET_REG_BITS == 64) {
76
- if (TARGET_LONG_BITS == 32) {
77
- tcg_out_ext32u(s, TCG_REG_L0, base);
78
- base = TCG_REG_L0;
79
- }
80
- if (offset != guest_base) {
81
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
82
- index = TCG_REG_L1;
83
- offset = 0;
84
- }
85
+ } else if (TCG_TARGET_REG_BITS == 64 && offset != guest_base) {
86
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
87
+ index = TCG_REG_L1;
88
+ offset = 0;
89
}
90
91
tcg_out_qemu_ld_direct(s, datalo, datahi,
92
- base, index, offset, seg, is64, opc);
93
+ addrlo, index, offset, seg, is64, opc);
94
}
106
}
95
#endif
107
96
}
108
if (USE_REG_TB) {
97
109
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
98
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
110
/* fallthru */
99
- TCGReg base, intptr_t ofs, int seg,
111
case TCG_TYPE_V64:
100
- TCGMemOp memop)
112
tcg_debug_assert(ret >= TCG_REG_V0);
101
+ TCGReg base, int index, intptr_t ofs,
113
+ if (have_vsx) {
102
+ int seg, TCGMemOp memop)
114
+ tcg_out_mem_long(s, 0, LXSDX, ret, base, offset);
103
{
115
+ break;
104
/* ??? Ideally we wouldn't need a scratch register. For user-only,
116
+ }
105
we could perform the bswap twice to restore the original value
117
tcg_debug_assert((offset & 7) == 0);
106
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
118
tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
107
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
119
if (offset & 8) {
108
datalo = scratch;
120
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
109
}
121
/* fallthru */
110
- tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
122
case TCG_TYPE_V64:
111
- datalo, base, ofs);
123
tcg_debug_assert(arg >= TCG_REG_V0);
112
+ tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
124
+ if (have_vsx) {
113
+ datalo, base, index, 0, ofs);
125
+ tcg_out_mem_long(s, 0, STXSDX, arg, base, offset);
114
break;
126
+ break;
115
case MO_16:
127
+ }
116
if (bswap) {
128
tcg_debug_assert((offset & 7) == 0);
117
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
129
if (offset & 8) {
118
tcg_out_rolw_8(s, scratch);
130
tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
119
datalo = scratch;
131
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
120
}
132
case INDEX_op_shri_vec:
121
- tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
133
case INDEX_op_sari_vec:
122
+ tcg_out_modrm_sib_offset(s, movop + P_DATA16 + seg, datalo,
134
return vece <= MO_32 ? -1 : 0;
123
+ base, index, 0, ofs);
135
+ case INDEX_op_bitsel_vec:
124
break;
136
+ return have_vsx;
125
case MO_32:
137
default:
126
if (bswap) {
138
return 0;
127
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
139
}
128
tcg_out_bswap32(s, scratch);
140
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
129
datalo = scratch;
141
tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
130
}
131
- tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
132
+ tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
133
break;
142
break;
134
case MO_64:
143
case MO_64:
135
if (TCG_TARGET_REG_BITS == 64) {
144
+ if (have_vsx) {
136
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
145
+ tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
137
tcg_out_bswap64(s, scratch);
146
+ break;
138
datalo = scratch;
147
+ }
139
}
148
tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
140
- tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
149
tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
141
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
150
break;
142
+ base, index, 0, ofs);
151
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143
} else if (bswap) {
152
tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
144
tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
153
break;
145
tcg_out_bswap32(s, scratch);
154
case MO_64:
146
- tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
155
+ if (have_vsx) {
147
+ tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch,
156
+ tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
148
+ base, index, 0, ofs);
157
+ break;
149
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
158
+ }
150
tcg_out_bswap32(s, scratch);
159
tcg_debug_assert((offset & 7) == 0);
151
- tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
160
tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
152
+ tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch,
161
tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
153
+ base, index, 0, ofs + 4);
162
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
154
} else {
155
if (real_bswap) {
156
int t = datalo;
157
datalo = datahi;
158
datahi = t;
159
}
160
- tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
161
- tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
162
+ tcg_out_modrm_sib_offset(s, movop + seg, datalo,
163
+ base, index, 0, ofs);
164
+ tcg_out_modrm_sib_offset(s, movop + seg, datahi,
165
+ base, index, 0, ofs + 4);
166
}
163
}
167
break;
164
break;
168
default:
165
169
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
166
+ case INDEX_op_bitsel_vec:
170
label_ptr, offsetof(CPUTLBEntry, addr_write));
167
+ tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
171
168
+ return;
172
/* TLB Hit. */
169
+
173
- tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
170
case INDEX_op_dup2_vec:
174
+ tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
171
assert(TCG_TARGET_REG_BITS == 32);
175
172
/* With inputs a1 = xLxx, a2 = xHxx */
176
/* Record the current context of a store into ldst label */
173
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
177
add_qemu_ldst_label(s, false, is64, oi, datalo, datahi, addrlo, addrhi,
174
case INDEX_op_st_vec:
178
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
175
case INDEX_op_dupm_vec:
179
#else
176
return &v_r;
180
{
177
+ case INDEX_op_bitsel_vec:
181
int32_t offset = guest_base;
178
case INDEX_op_ppc_msum_vec:
182
- TCGReg base = addrlo;
179
return &v_v_v_v;
183
+ int index = -1;
180
184
int seg = 0;
181
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
185
182
186
- /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */
183
if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
187
+ /*
184
have_altivec = true;
188
+ * Recall we store 32-bit values zero-extended. No need for
185
+ /* We only care about the portion of VSX that overlaps Altivec. */
189
+ * further manual extension or an addr32 (0x67) prefix.
186
+ if (hwcap & PPC_FEATURE_HAS_VSX) {
190
+ */
187
+ have_vsx = true;
191
if (guest_base == 0 || guest_base_flags) {
188
+ }
192
seg = guest_base_flags;
193
offset = 0;
194
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
195
- seg |= P_ADDR32;
196
- }
197
- } else if (TCG_TARGET_REG_BITS == 64) {
198
- /* ??? Note that we can't use the same SIB addressing scheme
199
- as for loads, since we require L0 free for bswap. */
200
- if (offset != guest_base) {
201
- if (TARGET_LONG_BITS == 32) {
202
- tcg_out_ext32u(s, TCG_REG_L0, base);
203
- base = TCG_REG_L0;
204
- }
205
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
206
- tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
207
- base = TCG_REG_L1;
208
- offset = 0;
209
- } else if (TARGET_LONG_BITS == 32) {
210
- tcg_out_ext32u(s, TCG_REG_L1, base);
211
- base = TCG_REG_L1;
212
- }
213
+ } else if (TCG_TARGET_REG_BITS == 64 && offset != guest_base) {
214
+ /* ??? Note that we require L0 free for bswap. */
215
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
216
+ index = TCG_REG_L1;
217
+ offset = 0;
218
}
219
220
- tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
221
+ tcg_out_qemu_st_direct(s, datalo, datahi,
222
+ addrlo, index, offset, seg, opc);
223
}
189
}
224
#endif
190
225
}
191
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
226
--
192
--
227
2.17.2
193
2.17.1
228
194
229
195
diff view generated by jsdifflib
1
This does require an extra two checks within the slow paths
1
These new instructions are conditional only on MSR.VEC and
2
to replace the assert that we're moving.
2
are thus part of the Altivec instruction set, and not VSX.
3
3
This includes lots of double-word arithmetic and a few extra
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
logical operations.
5
6
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
8
---
7
tcg/arm/tcg-target.inc.c | 22 ++++++++++++++++------
9
tcg/ppc/tcg-target.h | 4 +-
8
1 file changed, 16 insertions(+), 6 deletions(-)
10
tcg/ppc/tcg-target.inc.c | 85 ++++++++++++++++++++++++++++++----------
9
11
2 files changed, 67 insertions(+), 22 deletions(-)
10
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
12
13
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
11
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/arm/tcg-target.inc.c
15
--- a/tcg/ppc/tcg-target.h
13
+++ b/tcg/arm/tcg-target.inc.c
16
+++ b/tcg/ppc/tcg-target.h
14
@@ -XXX,XX +XXX,XX @@ static const uint8_t tcg_cond_to_arm_cond[] = {
17
@@ -XXX,XX +XXX,XX @@ typedef enum {
15
[TCG_COND_GTU] = COND_HI,
18
typedef enum {
16
};
19
tcg_isa_base,
17
20
tcg_isa_2_06,
18
-static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
21
+ tcg_isa_2_07,
19
+static inline bool reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
22
tcg_isa_3_00,
23
} TCGPowerISA;
24
25
@@ -XXX,XX +XXX,XX @@ extern bool have_altivec;
26
extern bool have_vsx;
27
28
#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
29
+#define have_isa_2_07 (have_isa >= tcg_isa_2_07)
30
#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
31
32
/* optional instructions automatically implemented */
33
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
34
#define TCG_TARGET_HAS_v256 0
35
36
#define TCG_TARGET_HAS_andc_vec 1
37
-#define TCG_TARGET_HAS_orc_vec 0
38
+#define TCG_TARGET_HAS_orc_vec have_isa_2_07
39
#define TCG_TARGET_HAS_not_vec 1
40
#define TCG_TARGET_HAS_neg_vec 0
41
#define TCG_TARGET_HAS_abs_vec 0
42
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/tcg/ppc/tcg-target.inc.c
45
+++ b/tcg/ppc/tcg-target.inc.c
46
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
47
#define VADDSWS VX4(896)
48
#define VADDUWS VX4(640)
49
#define VADDUWM VX4(128)
50
+#define VADDUDM VX4(192) /* v2.07 */
51
52
#define VSUBSBS VX4(1792)
53
#define VSUBUBS VX4(1536)
54
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
55
#define VSUBSWS VX4(1920)
56
#define VSUBUWS VX4(1664)
57
#define VSUBUWM VX4(1152)
58
+#define VSUBUDM VX4(1216) /* v2.07 */
59
60
#define VMAXSB VX4(258)
61
#define VMAXSH VX4(322)
62
#define VMAXSW VX4(386)
63
+#define VMAXSD VX4(450) /* v2.07 */
64
#define VMAXUB VX4(2)
65
#define VMAXUH VX4(66)
66
#define VMAXUW VX4(130)
67
+#define VMAXUD VX4(194) /* v2.07 */
68
#define VMINSB VX4(770)
69
#define VMINSH VX4(834)
70
#define VMINSW VX4(898)
71
+#define VMINSD VX4(962) /* v2.07 */
72
#define VMINUB VX4(514)
73
#define VMINUH VX4(578)
74
#define VMINUW VX4(642)
75
+#define VMINUD VX4(706) /* v2.07 */
76
77
#define VCMPEQUB VX4(6)
78
#define VCMPEQUH VX4(70)
79
#define VCMPEQUW VX4(134)
80
+#define VCMPEQUD VX4(199) /* v2.07 */
81
#define VCMPGTSB VX4(774)
82
#define VCMPGTSH VX4(838)
83
#define VCMPGTSW VX4(902)
84
+#define VCMPGTSD VX4(967) /* v2.07 */
85
#define VCMPGTUB VX4(518)
86
#define VCMPGTUH VX4(582)
87
#define VCMPGTUW VX4(646)
88
+#define VCMPGTUD VX4(711) /* v2.07 */
89
90
#define VSLB VX4(260)
91
#define VSLH VX4(324)
92
#define VSLW VX4(388)
93
+#define VSLD VX4(1476) /* v2.07 */
94
#define VSRB VX4(516)
95
#define VSRH VX4(580)
96
#define VSRW VX4(644)
97
+#define VSRD VX4(1732) /* v2.07 */
98
#define VSRAB VX4(772)
99
#define VSRAH VX4(836)
100
#define VSRAW VX4(900)
101
+#define VSRAD VX4(964) /* v2.07 */
102
#define VRLB VX4(4)
103
#define VRLH VX4(68)
104
#define VRLW VX4(132)
105
+#define VRLD VX4(196) /* v2.07 */
106
107
#define VMULEUB VX4(520)
108
#define VMULEUH VX4(584)
109
+#define VMULEUW VX4(648) /* v2.07 */
110
#define VMULOUB VX4(8)
111
#define VMULOUH VX4(72)
112
+#define VMULOUW VX4(136) /* v2.07 */
113
+#define VMULUWM VX4(137) /* v2.07 */
114
#define VMSUMUHM VX4(38)
115
116
#define VMRGHB VX4(12)
117
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
118
#define VNOR VX4(1284)
119
#define VOR VX4(1156)
120
#define VXOR VX4(1220)
121
+#define VEQV VX4(1668) /* v2.07 */
122
+#define VNAND VX4(1412) /* v2.07 */
123
+#define VORC VX4(1348) /* v2.07 */
124
125
#define VSPLTB VX4(524)
126
#define VSPLTH VX4(588)
127
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
128
case INDEX_op_andc_vec:
129
case INDEX_op_not_vec:
130
return 1;
131
+ case INDEX_op_orc_vec:
132
+ return have_isa_2_07;
133
case INDEX_op_add_vec:
134
case INDEX_op_sub_vec:
135
case INDEX_op_smax_vec:
136
case INDEX_op_smin_vec:
137
case INDEX_op_umax_vec:
138
case INDEX_op_umin_vec:
139
+ case INDEX_op_shlv_vec:
140
+ case INDEX_op_shrv_vec:
141
+ case INDEX_op_sarv_vec:
142
+ return vece <= MO_32 || have_isa_2_07;
143
case INDEX_op_ssadd_vec:
144
case INDEX_op_sssub_vec:
145
case INDEX_op_usadd_vec:
146
case INDEX_op_ussub_vec:
147
- case INDEX_op_shlv_vec:
148
- case INDEX_op_shrv_vec:
149
- case INDEX_op_sarv_vec:
150
return vece <= MO_32;
151
case INDEX_op_cmp_vec:
152
- case INDEX_op_mul_vec:
153
case INDEX_op_shli_vec:
154
case INDEX_op_shri_vec:
155
case INDEX_op_sari_vec:
156
- return vece <= MO_32 ? -1 : 0;
157
+ return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
158
+ case INDEX_op_mul_vec:
159
+ switch (vece) {
160
+ case MO_8:
161
+ case MO_16:
162
+ return -1;
163
+ case MO_32:
164
+ return have_isa_2_07 ? 1 : -1;
165
+ }
166
+ return 0;
167
case INDEX_op_bitsel_vec:
168
return have_vsx;
169
default:
170
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
171
const TCGArg *args, const int *const_args)
20
{
172
{
21
ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
173
static const uint32_t
22
- *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
174
- add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 },
23
+ if (offset == sextract32(offset, 0, 24)) {
175
- sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 },
24
+ *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
176
- eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
25
+ return true;
177
- gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
178
- gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
179
+ add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
180
+ sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
181
+ eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
182
+ gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
183
+ gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
184
ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
185
usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
186
sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
187
ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
188
- umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
189
- smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
190
- umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
191
- smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
192
- shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
193
- shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
194
- sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 },
195
+ umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
196
+ smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
197
+ umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
198
+ smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
199
+ shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
200
+ shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
201
+ sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
202
mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
203
mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
204
- muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 },
205
- mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 },
206
+ muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
207
+ mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
208
pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
209
- rotl_op[4] = { VRLB, VRLH, VRLW, 0 };
210
+ rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
211
212
TCGType type = vecl + TCG_TYPE_V64;
213
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
214
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
215
case INDEX_op_sub_vec:
216
insn = sub_op[vece];
217
break;
218
+ case INDEX_op_mul_vec:
219
+ tcg_debug_assert(vece == MO_32 && have_isa_2_07);
220
+ insn = VMULUWM;
221
+ break;
222
case INDEX_op_ssadd_vec:
223
insn = ssadd_op[vece];
224
break;
225
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
226
insn = VNOR;
227
a2 = a1;
228
break;
229
+ case INDEX_op_orc_vec:
230
+ insn = VORC;
231
+ break;
232
233
case INDEX_op_cmp_vec:
234
switch (args[3]) {
235
@@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
236
{
237
bool need_swap = false, need_inv = false;
238
239
- tcg_debug_assert(vece <= MO_32);
240
+ tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
241
242
switch (cond) {
243
case TCG_COND_EQ:
244
@@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
245
    break;
246
247
case MO_32:
248
+ tcg_debug_assert(!have_isa_2_07);
249
t3 = tcg_temp_new_vec(type);
250
t4 = tcg_temp_new_vec(type);
251
tcg_gen_dupi_vec(MO_8, t4, -16);
252
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
253
if (hwcap & PPC_FEATURE_ARCH_2_06) {
254
have_isa = tcg_isa_2_06;
255
}
256
+#ifdef PPC_FEATURE2_ARCH_2_07
257
+ if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
258
+ have_isa = tcg_isa_2_07;
26
+ }
259
+ }
27
+ return false;
260
+#endif
28
}
261
#ifdef PPC_FEATURE2_ARCH_3_00
29
262
if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
30
static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
263
have_isa = tcg_isa_3_00;
31
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
32
tcg_debug_assert(addend == 0);
33
34
if (type == R_ARM_PC24) {
35
- reloc_pc24(code_ptr, (tcg_insn_unit *)value);
36
+ return reloc_pc24(code_ptr, (tcg_insn_unit *)value);
37
} else if (type == R_ARM_PC13) {
38
intptr_t diff = value - (uintptr_t)(code_ptr + 2);
39
tcg_insn_unit insn = *code_ptr;
40
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
41
} else {
42
int rd = extract32(insn, 12, 4);
43
int rt = rd == TCG_REG_PC ? TCG_REG_TMP : rd;
44
- assert(diff >= 0x1000 && diff < 0x100000);
45
+
46
+ if (diff < 0x1000 || diff >= 0x100000) {
47
+ return false;
48
+ }
49
+
50
/* add rt, pc, #high */
51
*code_ptr++ = ((insn & 0xf0000000) | (1 << 25) | ARITH_ADD
52
| (TCG_REG_PC << 16) | (rt << 12)
53
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
54
TCGMemOp opc = get_memop(oi);
55
void *func;
56
57
- reloc_pc24(lb->label_ptr[0], s->code_ptr);
58
+ bool ok = reloc_pc24(lb->label_ptr[0], s->code_ptr);
59
+ tcg_debug_assert(ok);
60
61
argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
62
if (TARGET_LONG_BITS == 64) {
63
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
64
TCGMemOpIdx oi = lb->oi;
65
TCGMemOp opc = get_memop(oi);
66
67
- reloc_pc24(lb->label_ptr[0], s->code_ptr);
68
+ bool ok = reloc_pc24(lb->label_ptr[0], s->code_ptr);
69
+ tcg_debug_assert(ok);
70
71
argreg = TCG_REG_R0;
72
argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
73
--
264
--
74
2.17.2
265
2.17.1
75
266
76
267
diff view generated by jsdifflib
1
From: Alistair Francis <Alistair.Francis@wdc.com>
1
These new instructions are conditional only on MSR.VSX and
2
are thus part of the VSX instruction set, and not Altivec.
3
This includes double-word loads and stores.
2
4
3
Instead of hard coding 31 for the shift right use TCG_TARGET_REG_BITS - 1.
5
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
4
5
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
6
Message-Id: <7dfbddf7014a595150aa79011ddb342c3cc17ec3.1544648105.git.alistair.francis@wdc.com>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
7
---
10
tcg/mips/tcg-target.inc.c | 2 +-
8
tcg/ppc/tcg-target.inc.c | 11 +++++++++++
11
1 file changed, 1 insertion(+), 1 deletion(-)
9
1 file changed, 11 insertions(+)
12
10
13
diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c
11
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
14
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/mips/tcg-target.inc.c
13
--- a/tcg/ppc/tcg-target.inc.c
16
+++ b/tcg/mips/tcg-target.inc.c
14
+++ b/tcg/ppc/tcg-target.inc.c
17
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al,
15
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
18
tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl);
16
#define LVEWX XO31(71)
19
tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl);
17
#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */
20
} else if (rl == al && rl == bl) {
18
#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */
21
- tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, 31);
19
+#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */
22
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, TCG_TARGET_REG_BITS - 1);
20
23
tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl);
21
#define STVX XO31(231)
24
} else {
22
#define STVEWX XO31(199)
25
tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl);
23
#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */
24
+#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */
25
26
#define VADDSBS VX4(768)
27
#define VADDUBS VX4(512)
28
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
29
tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
30
break;
31
}
32
+ if (have_isa_2_07 && have_vsx) {
33
+ tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
34
+ break;
35
+ }
36
tcg_debug_assert((offset & 3) == 0);
37
tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
38
shift = (offset - 4) & 0xc;
39
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
40
tcg_out_mem_long(s, STW, STWX, arg, base, offset);
41
break;
42
}
43
+ if (have_isa_2_07 && have_vsx) {
44
+ tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
45
+ break;
46
+ }
47
+ assert((offset & 3) == 0);
48
tcg_debug_assert((offset & 3) == 0);
49
shift = (offset - 4) & 0xc;
50
if (shift) {
26
--
51
--
27
2.17.2
52
2.17.1
28
53
29
54
diff view generated by jsdifflib
1
The reloc_pc{14,24}_val routines retain their asserts.
1
These new instructions are conditional on MSR.FP when TX=0 and
2
Use these directly within the slow paths.
2
MSR.VEC when TX=1. Since we only care about the Altivec registers,
3
and force TX=1, we can consider these to be Altivec instructions.
4
Since Altivec is true for any use of vector types, we only need
5
test have_isa_2_07.
3
6
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
This includes moves to and from the integer registers.
8
9
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
11
---
7
tcg/ppc/tcg-target.inc.c | 32 +++++++++++++++++++++-----------
12
tcg/ppc/tcg-target.inc.c | 32 ++++++++++++++++++++++++++------
8
1 file changed, 21 insertions(+), 11 deletions(-)
13
1 file changed, 26 insertions(+), 6 deletions(-)
9
14
10
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
15
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
11
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.inc.c
17
--- a/tcg/ppc/tcg-target.inc.c
13
+++ b/tcg/ppc/tcg-target.inc.c
18
+++ b/tcg/ppc/tcg-target.inc.c
14
@@ -XXX,XX +XXX,XX @@ static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target)
19
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
15
return disp & 0x3fffffc;
20
#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */
16
}
21
#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
17
22
18
-static void reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target)
23
+#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */
19
+static bool reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target)
24
+#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */
20
{
25
+#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */
21
- *pc = (*pc & ~0x3fffffc) | reloc_pc24_val(pc, target);
26
+#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */
22
+ ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
27
+
23
+ if (in_range_b(disp)) {
28
#define RT(r) ((r)<<21)
24
+ *pc = (*pc & ~0x3fffffc) | (disp & 0x3fffffc);
29
#define RS(r) ((r)<<21)
25
+ return true;
30
#define RA(r) ((r)<<16)
26
+ }
31
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
27
+ return false;
32
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
28
}
33
/* fallthru */
29
34
case TCG_TYPE_I32:
30
static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target)
35
- if (ret < TCG_REG_V0 && arg < TCG_REG_V0) {
31
@@ -XXX,XX +XXX,XX @@ static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target)
36
- tcg_out32(s, OR | SAB(arg, ret, arg));
32
return disp & 0xfffc;
37
- break;
33
}
38
- } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) {
34
39
- /* Altivec does not support vector/integer moves. */
35
-static void reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target)
40
- return false;
36
+static bool reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target)
41
+ if (ret < TCG_REG_V0) {
37
{
42
+ if (arg < TCG_REG_V0) {
38
- *pc = (*pc & ~0xfffc) | reloc_pc14_val(pc, target);
43
+ tcg_out32(s, OR | SAB(arg, ret, arg));
39
+ ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
44
+ break;
40
+ if (disp == (int16_t) disp) {
45
+ } else if (have_isa_2_07) {
41
+ *pc = (*pc & ~0xfffc) | (disp & 0xfffc);
46
+ tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
42
+ return true;
47
+ | VRT(arg) | RA(ret));
43
+ }
48
+ break;
44
+ return false;
49
+ } else {
45
}
50
+ /* Altivec does not support vector->integer moves. */
46
47
/* parse target specific constraints */
48
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
49
50
switch (type) {
51
case R_PPC_REL14:
52
- reloc_pc14(code_ptr, target);
53
- break;
54
+ return reloc_pc14(code_ptr, target);
55
case R_PPC_REL24:
56
- reloc_pc24(code_ptr, target);
57
- break;
58
+ return reloc_pc24(code_ptr, target);
59
case R_PPC_ADDR16:
60
/* We are abusing this relocation type. This points to a pair
61
of insns, addis + load. If the displacement is small, we
62
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
63
} else {
64
int16_t lo = value;
65
int hi = value - lo;
66
- assert(hi + lo == value);
67
+ if (hi + lo != value) {
68
+ return false;
51
+ return false;
69
+ }
52
+ }
70
code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
53
+ } else if (arg < TCG_REG_V0) {
71
code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
54
+ if (have_isa_2_07) {
55
+ tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
56
+ | VRT(ret) | RA(arg));
57
+ break;
58
+ } else {
59
+ /* Altivec does not support integer->vector moves. */
60
+ return false;
61
+ }
72
}
62
}
73
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
63
/* fallthru */
74
TCGMemOp opc = get_memop(oi);
64
case TCG_TYPE_V64:
75
TCGReg hi, lo, arg = TCG_REG_R3;
76
77
- reloc_pc14(lb->label_ptr[0], s->code_ptr);
78
+ **lb->label_ptr |= reloc_pc14_val(*lb->label_ptr, s->code_ptr);
79
80
tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
81
82
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
83
TCGMemOp s_bits = opc & MO_SIZE;
84
TCGReg hi, lo, arg = TCG_REG_R3;
85
86
- reloc_pc14(lb->label_ptr[0], s->code_ptr);
87
+ **lb->label_ptr |= reloc_pc14_val(*lb->label_ptr, s->code_ptr);
88
89
tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
90
91
--
65
--
92
2.17.2
66
2.17.1
93
67
94
68
diff view generated by jsdifflib
1
Reviewed-by: Emilio G. Cota <cota@braap.org>
1
These new instructions are conditional only on MSR.VEC and
2
are thus part of the Altivec instruction set, and not VSX.
3
This includes negation and compare not equal.
4
5
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
7
---
4
tcg/i386/tcg-target.inc.c | 9 +++++++++
8
tcg/ppc/tcg-target.h | 2 +-
5
1 file changed, 9 insertions(+)
9
tcg/ppc/tcg-target.inc.c | 23 +++++++++++++++++++++++
10
2 files changed, 24 insertions(+), 1 deletion(-)
6
11
7
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
12
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
8
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/i386/tcg-target.inc.c
14
--- a/tcg/ppc/tcg-target.h
10
+++ b/tcg/i386/tcg-target.inc.c
15
+++ b/tcg/ppc/tcg-target.h
11
@@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void)
16
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
12
}
17
#define TCG_TARGET_HAS_andc_vec 1
13
return 0;
18
#define TCG_TARGET_HAS_orc_vec have_isa_2_07
14
}
19
#define TCG_TARGET_HAS_not_vec 1
15
+# elif defined (__FreeBSD__) || defined (__FreeBSD_kernel__)
20
-#define TCG_TARGET_HAS_neg_vec 0
16
+# include <machine/sysarch.h>
21
+#define TCG_TARGET_HAS_neg_vec have_isa_3_00
17
+static inline int setup_guest_base_seg(void)
22
#define TCG_TARGET_HAS_abs_vec 0
18
+{
23
#define TCG_TARGET_HAS_shi_vec 0
19
+ if (sysarch(AMD64_SET_GSBASE, &guest_base) == 0) {
24
#define TCG_TARGET_HAS_shs_vec 0
20
+ return P_GS;
25
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
21
+ }
26
index XXXXXXX..XXXXXXX 100644
22
+ return 0;
27
--- a/tcg/ppc/tcg-target.inc.c
23
+}
28
+++ b/tcg/ppc/tcg-target.inc.c
24
# else
29
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
25
static inline int setup_guest_base_seg(void)
30
#define VSUBUWM VX4(1152)
26
{
31
#define VSUBUDM VX4(1216) /* v2.07 */
32
33
+#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */
34
+#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */
35
+
36
#define VMAXSB VX4(258)
37
#define VMAXSH VX4(322)
38
#define VMAXSW VX4(386)
39
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
40
#define VCMPGTUH VX4(582)
41
#define VCMPGTUW VX4(646)
42
#define VCMPGTUD VX4(711) /* v2.07 */
43
+#define VCMPNEB VX4(7) /* v3.00 */
44
+#define VCMPNEH VX4(71) /* v3.00 */
45
+#define VCMPNEW VX4(135) /* v3.00 */
46
47
#define VSLB VX4(260)
48
#define VSLH VX4(324)
49
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
50
case INDEX_op_shri_vec:
51
case INDEX_op_sari_vec:
52
return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
53
+ case INDEX_op_neg_vec:
54
+ return vece >= MO_32 && have_isa_3_00;
55
case INDEX_op_mul_vec:
56
switch (vece) {
57
case MO_8:
58
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
59
static const uint32_t
60
add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
61
sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
62
+ neg_op[4] = { 0, 0, VNEGW, VNEGD },
63
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
64
+ ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
65
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
66
gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
67
ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
68
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
69
case INDEX_op_sub_vec:
70
insn = sub_op[vece];
71
break;
72
+ case INDEX_op_neg_vec:
73
+ insn = neg_op[vece];
74
+ a2 = a1;
75
+ a1 = 0;
76
+ break;
77
case INDEX_op_mul_vec:
78
tcg_debug_assert(vece == MO_32 && have_isa_2_07);
79
insn = VMULUWM;
80
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
81
case TCG_COND_EQ:
82
insn = eq_op[vece];
83
break;
84
+ case TCG_COND_NE:
85
+ insn = ne_op[vece];
86
+ break;
87
case TCG_COND_GT:
88
insn = gts_op[vece];
89
break;
90
@@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
91
case TCG_COND_GTU:
92
break;
93
case TCG_COND_NE:
94
+ if (have_isa_3_00 && vece <= MO_32) {
95
+ break;
96
+ }
97
+ /* fall through */
98
case TCG_COND_LE:
99
case TCG_COND_LEU:
100
need_inv = true;
101
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
102
case INDEX_op_dup2_vec:
103
return &v_v_v;
104
case INDEX_op_not_vec:
105
+ case INDEX_op_neg_vec:
106
case INDEX_op_dup_vec:
107
return &v_v;
108
case INDEX_op_ld_vec:
27
--
109
--
28
2.17.2
110
2.17.1
29
111
30
112
diff view generated by jsdifflib
1
This does require an extra two checks within the slow paths
1
These new instructions are a mix of those like LXSD that are
2
to replace the assert that we're moving. Also add two checks
2
only conditional only on MSR.VEC and those like LXV that are
3
within existing functions that lacked any kind of assert for
3
conditional on MSR.VEC for TX=1. Thus, in the end, we can
4
out of range branch.
4
consider all of these as Altivec instructions.
5
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
tcg/s390/tcg-target.inc.c | 34 +++++++++++++++++++++++-----------
9
tcg/ppc/tcg-target.inc.c | 47 ++++++++++++++++++++++++++++++++--------
10
1 file changed, 23 insertions(+), 11 deletions(-)
10
1 file changed, 38 insertions(+), 9 deletions(-)
11
11
12
diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c
12
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/s390/tcg-target.inc.c
14
--- a/tcg/ppc/tcg-target.inc.c
15
+++ b/tcg/s390/tcg-target.inc.c
15
+++ b/tcg/ppc/tcg-target.inc.c
16
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
16
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
17
17
#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */
18
switch (type) {
18
#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */
19
case R_390_PC16DBL:
19
#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */
20
- assert(pcrel2 == (int16_t)pcrel2);
20
+#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */
21
- tcg_patch16(code_ptr, pcrel2);
21
+#define LXSD (OPCD(57) | 2) /* v3.00 */
22
+ if (pcrel2 == (int16_t)pcrel2) {
22
+#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */
23
+ tcg_patch16(code_ptr, pcrel2);
23
24
+ return true;
24
#define STVX XO31(231)
25
+ }
25
#define STVEWX XO31(199)
26
#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */
27
#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */
28
+#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
29
+#define STXSD (OPCD(61) | 2) /* v3.00 */
30
31
#define VADDSBS VX4(768)
32
#define VADDUBS VX4(512)
33
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
34
TCGReg base, tcg_target_long offset)
35
{
36
tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
37
- bool is_store = false;
38
+ bool is_int_store = false;
39
TCGReg rs = TCG_REG_TMP1;
40
41
switch (opi) {
42
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
43
break;
44
}
26
break;
45
break;
27
case R_390_PC32DBL:
46
+ case LXSD:
28
- assert(pcrel2 == (int32_t)pcrel2);
47
+ case STXSD:
29
- tcg_patch32(code_ptr, pcrel2);
48
+ align = 3;
30
+ if (pcrel2 == (int32_t)pcrel2) {
49
+ break;
31
+ tcg_patch32(code_ptr, pcrel2);
50
+ case LXV:
32
+ return true;
51
+ case STXV:
33
+ }
52
+ align = 15;
53
+ break;
54
case STD:
55
align = 3;
56
/* FALLTHRU */
57
case STB: case STH: case STW:
58
- is_store = true;
59
+ is_int_store = true;
34
break;
60
break;
35
case R_390_20:
61
}
36
- assert(value == sextract64(value, 0, 20));
62
37
- old = *(uint32_t *)code_ptr & 0xf00000ff;
63
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
38
- old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
64
if (rs == base) {
39
- tcg_patch32(code_ptr, old);
65
rs = TCG_REG_R0;
40
+ if (value == sextract64(value, 0, 20)) {
66
}
41
+ old = *(uint32_t *)code_ptr & 0xf00000ff;
67
- tcg_debug_assert(!is_store || rs != rt);
42
+ old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
68
+ tcg_debug_assert(!is_int_store || rs != rt);
43
+ tcg_patch32(code_ptr, old);
69
tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
44
+ return true;
70
tcg_out32(s, opx | TAB(rt & 31, base, rs));
45
+ }
71
return;
72
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
73
case TCG_TYPE_V64:
74
tcg_debug_assert(ret >= TCG_REG_V0);
75
if (have_vsx) {
76
- tcg_out_mem_long(s, 0, LXSDX, ret, base, offset);
77
+ tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
78
+ ret, base, offset);
79
break;
80
}
81
tcg_debug_assert((offset & 7) == 0);
82
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
83
case TCG_TYPE_V128:
84
tcg_debug_assert(ret >= TCG_REG_V0);
85
tcg_debug_assert((offset & 15) == 0);
86
- tcg_out_mem_long(s, 0, LVX, ret, base, offset);
87
+ tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
88
+ LVX, ret, base, offset);
46
break;
89
break;
47
default:
90
default:
48
g_assert_not_reached();
91
g_assert_not_reached();
49
}
92
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
50
- return true;
93
case TCG_TYPE_V64:
51
+ return false;
94
tcg_debug_assert(arg >= TCG_REG_V0);
52
}
95
if (have_vsx) {
53
96
- tcg_out_mem_long(s, 0, STXSDX, arg, base, offset);
54
/* parse target specific constraints */
97
+ tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
55
@@ -XXX,XX +XXX,XX @@ static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
98
+ STXSDX, arg, base, offset);
56
99
break;
57
if (l->has_value) {
100
}
58
off = l->u.value_ptr - s->code_ptr;
101
tcg_debug_assert((offset & 7) == 0);
59
+ tcg_debug_assert(off == (int16_t)off);
102
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
60
} else {
103
break;
61
tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
104
case TCG_TYPE_V128:
62
}
105
tcg_debug_assert(arg >= TCG_REG_V0);
63
@@ -XXX,XX +XXX,XX @@ static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
106
- tcg_out_mem_long(s, 0, STVX, arg, base, offset);
64
107
+ tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
65
if (l->has_value) {
108
+ STVX, arg, base, offset);
66
off = l->u.value_ptr - s->code_ptr;
109
break;
67
+ tcg_debug_assert(off == (int16_t)off);
110
default:
68
} else {
111
g_assert_not_reached();
69
tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
112
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
70
}
113
tcg_debug_assert(out >= TCG_REG_V0);
71
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
114
switch (vece) {
72
TCGMemOpIdx oi = lb->oi;
115
case MO_8:
73
TCGMemOp opc = get_memop(oi);
116
- tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
74
117
+ if (have_isa_3_00) {
75
- patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, 2);
118
+ tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
76
+ bool ok = patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
119
+ } else {
77
+ (intptr_t)s->code_ptr, 2);
120
+ tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
78
+ tcg_debug_assert(ok);
121
+ }
79
122
elt = extract32(offset, 0, 4);
80
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
123
#ifndef HOST_WORDS_BIGENDIAN
81
if (TARGET_LONG_BITS == 64) {
124
elt ^= 15;
82
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
125
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
83
TCGMemOpIdx oi = lb->oi;
126
break;
84
TCGMemOp opc = get_memop(oi);
127
case MO_16:
85
128
tcg_debug_assert((offset & 1) == 0);
86
- patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, 2);
129
- tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
87
+ bool ok = patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
130
+ if (have_isa_3_00) {
88
+ (intptr_t)s->code_ptr, 2);
131
+ tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
89
+ tcg_debug_assert(ok);
132
+ } else {
90
133
+ tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
91
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
134
+ }
92
if (TARGET_LONG_BITS == 64) {
135
elt = extract32(offset, 1, 3);
136
#ifndef HOST_WORDS_BIGENDIAN
137
elt ^= 7;
138
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
139
tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
140
break;
141
case MO_32:
142
+ if (have_isa_3_00) {
143
+ tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
144
+ break;
145
+ }
146
tcg_debug_assert((offset & 3) == 0);
147
tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
148
elt = extract32(offset, 2, 2);
93
--
149
--
94
2.17.2
150
2.17.1
95
151
96
152
diff view generated by jsdifflib
1
These values are constant between all qemu_ld/st invocations;
1
These new instructions are conditional on MSR.VEC for TX=1,
2
there is no need to figure this out each time. If we cannot
2
so we can consider these Altivec instructions.
3
use a segment or an offset directly for guest_base, load the
4
value into a register in the prologue.
5
3
6
Reviewed-by: Emilio G. Cota <cota@braap.org>
4
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
tcg/i386/tcg-target.inc.c | 101 +++++++++++++++-----------------------
7
tcg/ppc/tcg-target.inc.c | 28 ++++++++++++++++++++++++++--
10
1 file changed, 40 insertions(+), 61 deletions(-)
8
1 file changed, 26 insertions(+), 2 deletions(-)
11
9
12
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
10
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/i386/tcg-target.inc.c
12
--- a/tcg/ppc/tcg-target.inc.c
15
+++ b/tcg/i386/tcg-target.inc.c
13
+++ b/tcg/ppc/tcg-target.inc.c
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
14
@@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
17
tcg_out_push(s, retaddr);
15
18
tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
16
#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */
19
}
17
#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
20
-#elif defined(__x86_64__) && defined(__linux__)
18
+#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
21
-# include <asm/prctl.h>
19
22
-# include <sys/prctl.h>
20
#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */
23
-
21
#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */
24
+#elif TCG_TARGET_REG_BITS == 32
22
#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */
25
+# define x86_guest_base_seg 0
23
#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */
26
+# define x86_guest_base_index -1
24
+#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */
27
+# define x86_guest_base_offset guest_base
25
+#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */
28
+#else
26
29
+static int x86_guest_base_seg;
27
#define RT(r) ((r)<<21)
30
+static int x86_guest_base_index = -1;
28
#define RS(r) ((r)<<21)
31
+static int32_t x86_guest_base_offset;
29
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
32
+# if defined(__x86_64__) && defined(__linux__)
30
return;
33
+# include <asm/prctl.h>
31
}
34
+# include <sys/prctl.h>
32
}
35
int arch_prctl(int code, unsigned long addr);
33
+ if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) {
36
-
34
+ tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
37
-static int guest_base_flags;
35
+ return;
38
-static inline void setup_guest_base_seg(void)
36
+ }
39
+static inline int setup_guest_base_seg(void)
37
38
/*
39
* Otherwise we must load the value from the constant pool.
40
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
41
TCGReg dst, TCGReg src)
40
{
42
{
41
if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
43
tcg_debug_assert(dst >= TCG_REG_V0);
42
- guest_base_flags = P_GS;
44
- tcg_debug_assert(src >= TCG_REG_V0);
43
+ return P_GS;
45
+
44
}
46
+ /* Splat from integer reg allowed via constraints for v3.00. */
45
+ return 0;
47
+ if (src < TCG_REG_V0) {
46
}
48
+ tcg_debug_assert(have_isa_3_00);
47
-#else
49
+ switch (vece) {
48
-# define guest_base_flags 0
50
+ case MO_64:
49
-static inline void setup_guest_base_seg(void) { }
51
+ tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
50
+# else
52
+ return true;
51
+static inline int setup_guest_base_seg(void)
53
+ case MO_32:
52
+{
54
+ tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
53
+ return 0;
55
+ return true;
54
+}
56
+ default:
55
+# endif
57
+ /* Fail, so that we fall back on either dupm or mov+dup. */
56
#endif /* SOFTMMU */
58
+ return false;
57
58
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
59
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
60
add_qemu_ldst_label(s, true, is64, oi, datalo, datahi, addrlo, addrhi,
61
s->code_ptr, label_ptr);
62
#else
63
- {
64
- int32_t offset = guest_base;
65
- int index = -1;
66
- int seg = 0;
67
-
68
- /*
69
- * Recall we store 32-bit values zero-extended. No need for
70
- * further manual extension or an addr32 (0x67) prefix.
71
- */
72
- if (guest_base == 0 || guest_base_flags) {
73
- seg = guest_base_flags;
74
- offset = 0;
75
- } else if (TCG_TARGET_REG_BITS == 64 && offset != guest_base) {
76
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
77
- index = TCG_REG_L1;
78
- offset = 0;
79
- }
80
-
81
- tcg_out_qemu_ld_direct(s, datalo, datahi,
82
- addrlo, index, offset, seg, is64, opc);
83
- }
84
+ tcg_out_qemu_ld_direct(s, datalo, datahi, addrlo, x86_guest_base_index,
85
+ x86_guest_base_offset, x86_guest_base_seg,
86
+ is64, opc);
87
#endif
88
}
89
90
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
91
add_qemu_ldst_label(s, false, is64, oi, datalo, datahi, addrlo, addrhi,
92
s->code_ptr, label_ptr);
93
#else
94
- {
95
- int32_t offset = guest_base;
96
- int index = -1;
97
- int seg = 0;
98
-
99
- /*
100
- * Recall we store 32-bit values zero-extended. No need for
101
- * further manual extension or an addr32 (0x67) prefix.
102
- */
103
- if (guest_base == 0 || guest_base_flags) {
104
- seg = guest_base_flags;
105
- offset = 0;
106
- } else if (TCG_TARGET_REG_BITS == 64 && offset != guest_base) {
107
- /* ??? Note that we require L0 free for bswap. */
108
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
109
- index = TCG_REG_L1;
110
- offset = 0;
111
- }
112
-
113
- tcg_out_qemu_st_direct(s, datalo, datahi,
114
- addrlo, index, offset, seg, opc);
115
- }
116
+ tcg_out_qemu_st_direct(s, datalo, datahi, addrlo, x86_guest_base_index,
117
+ x86_guest_base_offset, x86_guest_base_seg, opc);
118
#endif
119
}
120
121
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
122
         (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
123
             + stack_addend);
124
#else
125
+# if !defined(CONFIG_SOFTMMU) && TCG_TARGET_REG_BITS == 64
126
+ if (guest_base) {
127
+ int seg = setup_guest_base_seg();
128
+ if (seg != 0) {
129
+ x86_guest_base_seg = seg;
130
+ } else if (guest_base == (int32_t)guest_base) {
131
+ x86_guest_base_offset = guest_base;
132
+ } else {
133
+ /* Choose R12 because, as a base, it requires a SIB byte. */
134
+ x86_guest_base_index = TCG_REG_R12;
135
+ tcg_out_mov(s, TCG_TYPE_PTR, x86_guest_base_index, guest_base);
136
+ tcg_regset_set_reg(s->reserved_regs, x86_guest_base_index);
137
+ }
59
+ }
138
+ }
60
+ }
139
+# endif
61
140
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
62
/*
141
tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
63
* Recall we use (or emulate) VSX integer loads, so the integer is
142
/* jmp *tb. */
64
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
143
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
65
static const TCGTargetOpDef sub2
144
tcg_out_pop(s, tcg_target_callee_save_regs[i]);
66
= { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } };
145
}
67
static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
146
tcg_out_opc(s, OPC_RET, 0, 0, 0);
68
+ static const TCGTargetOpDef v_vr = { .args_ct_str = { "v", "vr" } };
147
-
69
static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
148
-#if !defined(CONFIG_SOFTMMU)
70
static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
149
- /* Try to set up a segment register to point to guest_base. */
71
static const TCGTargetOpDef v_v_v_v
150
- if (guest_base) {
72
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
151
- setup_guest_base_seg();
73
return &v_v_v;
152
- }
74
case INDEX_op_not_vec:
153
-#endif
75
case INDEX_op_neg_vec:
154
}
76
- case INDEX_op_dup_vec:
155
77
return &v_v;
156
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
78
+ case INDEX_op_dup_vec:
79
+ return have_isa_3_00 ? &v_vr : &v_v;
80
case INDEX_op_ld_vec:
81
case INDEX_op_st_vec:
82
case INDEX_op_dupm_vec:
157
--
83
--
158
2.17.2
84
2.17.1
159
85
160
86
diff view generated by jsdifflib
1
From: "Emilio G. Cota" <cota@braap.org>
1
From: Alex Bennée <alex.bennee@linaro.org>
2
2
3
Before moving them all to include/qemu/xxhash.h.
3
qemu_cpu_kick is used for a number of reasons including to indicate
4
there is work to be done. However when thread=single the old
5
qemu_cpu_kick_rr_cpu only advanced the vCPU to the next executing one
6
which can lead to a hang in the case that:
4
7
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
a) the kick is from outside the vCPUs (e.g. iothread)
6
Signed-off-by: Emilio G. Cota <cota@braap.org>
9
b) the timers are paused (i.e. iothread calling run_on_cpu)
10
11
To avoid this lets split qemu_cpu_kick_rr into two functions. One for
12
the timer which continues to advance to the next timeslice and another
13
for all other kicks.
14
15
Message-Id: <20191001160426.26644-1-alex.bennee@linaro.org>
16
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
17
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
18
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
19
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
20
---
9
include/exec/tb-hash-xx.h | 41 +++++++++++++++++++++++++++++----------
21
cpus.c | 24 ++++++++++++++++++------
10
include/exec/tb-hash.h | 2 +-
22
1 file changed, 18 insertions(+), 6 deletions(-)
11
tests/qht-bench.c | 2 +-
12
util/qsp.c | 12 ++++++------
13
4 files changed, 39 insertions(+), 18 deletions(-)
14
23
15
diff --git a/include/exec/tb-hash-xx.h b/include/exec/tb-hash-xx.h
24
diff --git a/cpus.c b/cpus.c
16
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
17
--- a/include/exec/tb-hash-xx.h
26
--- a/cpus.c
18
+++ b/include/exec/tb-hash-xx.h
27
+++ b/cpus.c
19
@@ -XXX,XX +XXX,XX @@
28
@@ -XXX,XX +XXX,XX @@ static inline int64_t qemu_tcg_next_kick(void)
20
#define PRIME32_4 668265263U
29
return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
21
#define PRIME32_5 374761393U
30
}
22
31
23
-#define TB_HASH_XX_SEED 1
32
-/* Kick the currently round-robin scheduled vCPU */
24
+#define QEMU_XXHASH_SEED 1
33
-static void qemu_cpu_kick_rr_cpu(void)
25
34
+/* Kick the currently round-robin scheduled vCPU to next */
26
/*
35
+static void qemu_cpu_kick_rr_next_cpu(void)
27
* xxhash32, customized for input variables that are not guaranteed to be
28
* contiguous in memory.
29
*/
30
static inline uint32_t
31
-tb_hash_func7(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f, uint32_t g)
32
+qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
33
{
36
{
34
- uint32_t v1 = TB_HASH_XX_SEED + PRIME32_1 + PRIME32_2;
37
CPUState *cpu;
35
- uint32_t v2 = TB_HASH_XX_SEED + PRIME32_2;
38
do {
36
- uint32_t v3 = TB_HASH_XX_SEED + 0;
39
@@ -XXX,XX +XXX,XX @@ static void qemu_cpu_kick_rr_cpu(void)
37
- uint32_t v4 = TB_HASH_XX_SEED - PRIME32_1;
40
} while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
38
- uint32_t a = a0 >> 32;
39
- uint32_t b = a0;
40
- uint32_t c = b0 >> 32;
41
- uint32_t d = b0;
42
+ uint32_t v1 = QEMU_XXHASH_SEED + PRIME32_1 + PRIME32_2;
43
+ uint32_t v2 = QEMU_XXHASH_SEED + PRIME32_2;
44
+ uint32_t v3 = QEMU_XXHASH_SEED + 0;
45
+ uint32_t v4 = QEMU_XXHASH_SEED - PRIME32_1;
46
+ uint32_t a = ab >> 32;
47
+ uint32_t b = ab;
48
+ uint32_t c = cd >> 32;
49
+ uint32_t d = cd;
50
uint32_t h32;
51
52
v1 += a * PRIME32_2;
53
@@ -XXX,XX +XXX,XX @@ tb_hash_func7(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f, uint32_t g)
54
return h32;
55
}
41
}
56
42
57
+static inline uint32_t qemu_xxhash2(uint64_t ab)
43
+/* Kick all RR vCPUs */
44
+static void qemu_cpu_kick_rr_cpus(void)
58
+{
45
+{
59
+ return qemu_xxhash7(ab, 0, 0, 0, 0);
46
+ CPUState *cpu;
47
+
48
+ CPU_FOREACH(cpu) {
49
+ cpu_exit(cpu);
50
+ };
60
+}
51
+}
61
+
52
+
62
+static inline uint32_t qemu_xxhash4(uint64_t ab, uint64_t cd)
53
static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
63
+{
64
+ return qemu_xxhash7(ab, cd, 0, 0, 0);
65
+}
66
+
67
+static inline uint32_t qemu_xxhash5(uint64_t ab, uint64_t cd, uint32_t e)
68
+{
69
+ return qemu_xxhash7(ab, cd, e, 0, 0);
70
+}
71
+
72
+static inline uint32_t qemu_xxhash6(uint64_t ab, uint64_t cd, uint32_t e,
73
+ uint32_t f)
74
+{
75
+ return qemu_xxhash7(ab, cd, e, f, 0);
76
+}
77
+
78
#endif /* EXEC_TB_HASH_XX_H */
79
diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
80
index XXXXXXX..XXXXXXX 100644
81
--- a/include/exec/tb-hash.h
82
+++ b/include/exec/tb-hash.h
83
@@ -XXX,XX +XXX,XX @@ static inline
84
uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
85
uint32_t cf_mask, uint32_t trace_vcpu_dstate)
86
{
54
{
87
- return tb_hash_func7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
88
+ return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
89
}
55
}
90
56
@@ -XXX,XX +XXX,XX @@ void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
91
#endif
57
static void kick_tcg_thread(void *opaque)
92
diff --git a/tests/qht-bench.c b/tests/qht-bench.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/tests/qht-bench.c
95
+++ b/tests/qht-bench.c
96
@@ -XXX,XX +XXX,XX @@ static bool is_equal(const void *ap, const void *bp)
97
98
static uint32_t h(unsigned long v)
99
{
58
{
100
- return tb_hash_func7(v, 0, 0, 0, 0);
59
timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
101
+ return qemu_xxhash2(v);
60
- qemu_cpu_kick_rr_cpu();
61
+ qemu_cpu_kick_rr_next_cpu();
102
}
62
}
103
63
104
static uint32_t hval(unsigned long v)
64
static void start_tcg_kick_timer(void)
105
diff --git a/util/qsp.c b/util/qsp.c
65
@@ -XXX,XX +XXX,XX @@ void qemu_cpu_kick(CPUState *cpu)
106
index XXXXXXX..XXXXXXX 100644
107
--- a/util/qsp.c
108
+++ b/util/qsp.c
109
@@ -XXX,XX +XXX,XX @@ QemuCondWaitFunc qemu_cond_wait_func = qemu_cond_wait_impl;
110
* without it we still get a pretty unique hash.
111
*/
112
static inline
113
-uint32_t do_qsp_callsite_hash(const QSPCallSite *callsite, uint64_t a)
114
+uint32_t do_qsp_callsite_hash(const QSPCallSite *callsite, uint64_t ab)
115
{
66
{
116
- uint64_t b = (uint64_t)(uintptr_t)callsite->obj;
67
qemu_cond_broadcast(cpu->halt_cond);
117
+ uint64_t cd = (uint64_t)(uintptr_t)callsite->obj;
68
if (tcg_enabled()) {
118
uint32_t e = callsite->line;
69
- cpu_exit(cpu);
119
uint32_t f = callsite->type;
70
- /* NOP unless doing single-thread RR */
120
71
- qemu_cpu_kick_rr_cpu();
121
- return tb_hash_func7(a, b, e, f, 0);
72
+ if (qemu_tcg_mttcg_enabled()) {
122
+ return qemu_xxhash6(ab, cd, e, f);
73
+ cpu_exit(cpu);
123
}
74
+ } else {
124
75
+ qemu_cpu_kick_rr_cpus();
125
static inline
76
+ }
126
@@ -XXX,XX +XXX,XX @@ static uint32_t qsp_entry_no_thread_hash(const QSPEntry *entry)
77
} else {
127
static uint32_t qsp_entry_no_thread_obj_hash(const QSPEntry *entry)
78
if (hax_enabled()) {
128
{
79
/*
129
const QSPCallSite *callsite = entry->callsite;
130
- uint64_t a = g_str_hash(callsite->file);
131
- uint64_t b = callsite->line;
132
+ uint64_t ab = g_str_hash(callsite->file);
133
+ uint64_t cd = callsite->line;
134
uint32_t e = callsite->type;
135
136
- return tb_hash_func7(a, b, e, 0, 0);
137
+ return qemu_xxhash5(ab, cd, e);
138
}
139
140
static bool qsp_callsite_cmp(const void *ap, const void *bp)
141
--
80
--
142
2.17.2
81
2.17.1
143
82
144
83
diff view generated by jsdifflib
Deleted patch
1
I didn't get this fix pushed back into the patch set that I actually
2
sent last week. The patch is in target-arm.next, and I'm sure you
3
would have eventually seen the error in testing.
4
1
5
6
r~
7
---
8
target/arm/kvm64.c | 4 ++--
9
1 file changed, 2 insertions(+), 2 deletions(-)
10
11
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/kvm64.c
14
+++ b/target/arm/kvm64.c
15
@@ -XXX,XX +XXX,XX @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
16
ARM64_SYS_REG(3, 0, 0, 6, 0));
17
err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1,
18
ARM64_SYS_REG(3, 0, 0, 6, 1));
19
- err |= read_sys_reg64(fdarray[2], &achf->isar.id_aa64mmfr0,
20
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0,
21
ARM64_SYS_REG(3, 0, 0, 7, 0));
22
- err |= read_sys_reg64(fdarray[2], &achf->isar.id_aa64mmfr1,
23
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1,
24
ARM64_SYS_REG(3, 0, 0, 7, 1));
25
26
/*
27
--
28
2.17.2
29
30
diff view generated by jsdifflib