1
This patch collection contains:
1
The following changes since commit c52d69e7dbaaed0ffdef8125e79218672c30161d:
2
2
3
* A couple of fixes for i386 host vector support.
3
Merge remote-tracking branch 'remotes/cschoenebeck/tags/pull-9p-20211027' into staging (2021-10-27 11:45:18 -0700)
4
4
5
* Some random cleanups cherry-picked from some inactive branches.
5
are available in the Git repository at:
6
6
7
* A reposting (with fix) of my "better handling of constants" set:
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20211027
8
8
9
https://lists.nongnu.org/archive/html/qemu-devel/2020-05/msg02152.html
9
for you to fetch changes up to 820c025f0dcacf2f3c12735b1f162893fbfa7bc6:
10
10
11
* A couple patches that centralizes the set of host constraints.
11
tcg/optimize: Propagate sign info for shifting (2021-10-27 17:11:23 -0700)
12
This, I believe is slightly cleaner than the current state of
13
afairs, even before the ultimtate goal of pre-validating the
14
contents as well.
15
12
13
----------------------------------------------------------------
14
Improvements to qemu/int128
15
Fixes for 128/64 division.
16
Cleanup tcg/optimize.c
17
Optimize redundant sign extensions
16
18
17
r~
19
----------------------------------------------------------------
20
Frédéric Pétrot (1):
21
qemu/int128: Add int128_{not,xor}
18
22
23
Luis Pires (4):
24
host-utils: move checks out of divu128/divs128
25
host-utils: move udiv_qrnnd() to host-utils
26
host-utils: add 128-bit quotient support to divu128/divs128
27
host-utils: add unit tests for divu128/divs128
19
28
20
Richard Henderson (43):
29
Richard Henderson (51):
21
tcg: Adjust simd_desc size encoding
30
tcg/optimize: Rename "mask" to "z_mask"
22
tcg: Drop union from TCGArgConstraint
31
tcg/optimize: Split out OptContext
23
tcg: Move sorted_args into TCGArgConstraint.sort_index
32
tcg/optimize: Remove do_default label
24
tcg: Remove TCG_CT_REG
33
tcg/optimize: Change tcg_opt_gen_{mov,movi} interface
25
tcg: Move some TCG_CT_* bits to TCGArgConstraint bitfields
34
tcg/optimize: Move prev_mb into OptContext
26
tcg: Remove TCGOpDef.used
35
tcg/optimize: Split out init_arguments
27
tcg/i386: Fix dupi for avx2 32-bit hosts
36
tcg/optimize: Split out copy_propagate
28
tcg: Fix generation of dupi_vec for 32-bit host
37
tcg/optimize: Split out fold_call
29
tcg/optimize: Fold dup2_vec
38
tcg/optimize: Drop nb_oargs, nb_iargs locals
30
tcg: Remove TCG_TARGET_HAS_cmp_vec
39
tcg/optimize: Change fail return for do_constant_folding_cond*
31
tcg: Use tcg_out_dupi_vec from temp_load
40
tcg/optimize: Return true from tcg_opt_gen_{mov,movi}
32
tcg: Increase tcg_out_dupi_vec immediate to int64_t
41
tcg/optimize: Split out finish_folding
33
tcg: Consolidate 3 bits into enum TCGTempKind
42
tcg/optimize: Use a boolean to avoid a mass of continues
34
tcg: Add temp_readonly
43
tcg/optimize: Split out fold_mb, fold_qemu_{ld,st}
35
tcg: Expand TCGTemp.val to 64-bits
44
tcg/optimize: Split out fold_const{1,2}
36
tcg: Rename struct tcg_temp_info to TempOptInfo
45
tcg/optimize: Split out fold_setcond2
37
tcg: Expand TempOptInfo to 64-bits
46
tcg/optimize: Split out fold_brcond2
38
tcg: Introduce TYPE_CONST temporaries
47
tcg/optimize: Split out fold_brcond
39
tcg/optimize: Improve find_better_copy
48
tcg/optimize: Split out fold_setcond
40
tcg/optimize: Adjust TempOptInfo allocation
49
tcg/optimize: Split out fold_mulu2_i32
41
tcg/optimize: Use tcg_constant_internal with constant folding
50
tcg/optimize: Split out fold_addsub2_i32
42
tcg: Convert tcg_gen_dupi_vec to TCG_CONST
51
tcg/optimize: Split out fold_movcond
43
tcg: Use tcg_constant_i32 with icount expander
52
tcg/optimize: Split out fold_extract2
44
tcg: Use tcg_constant_{i32,i64} with tcg int expanders
53
tcg/optimize: Split out fold_extract, fold_sextract
45
tcg: Use tcg_constant_{i32,i64} with tcg plugins
54
tcg/optimize: Split out fold_deposit
46
tcg: Use tcg_constant_{i32,i64,vec} with gvec expanders
55
tcg/optimize: Split out fold_count_zeros
47
tcg/tci: Add special tci_movi_{i32,i64} opcodes
56
tcg/optimize: Split out fold_bswap
48
tcg: Remove movi and dupi opcodes
57
tcg/optimize: Split out fold_dup, fold_dup2
49
tcg: Add tcg_reg_alloc_dup2
58
tcg/optimize: Split out fold_mov
50
tcg/i386: Use tcg_constant_vec with tcg vec expanders
59
tcg/optimize: Split out fold_xx_to_i
51
tcg: Remove tcg_gen_dup{8,16,32,64}i_vec
60
tcg/optimize: Split out fold_xx_to_x
52
tcg/ppc: Use tcg_constant_vec with tcg vec expanders
61
tcg/optimize: Split out fold_xi_to_i
53
tcg/aarch64: Use tcg_constant_vec with tcg vec expanders
62
tcg/optimize: Add type to OptContext
54
tcg: Add tcg-constr.c.inc
63
tcg/optimize: Split out fold_to_not
55
tcg/i386: Convert to tcg-constr.c.inc
64
tcg/optimize: Split out fold_sub_to_neg
56
tcg/aarch64: Convert to tcg-constr.c.inc
65
tcg/optimize: Split out fold_xi_to_x
57
tcg/arm: Convert to tcg-constr.c.inc
66
tcg/optimize: Split out fold_ix_to_i
58
tcg/mips: Convert to tcg-constr.c.inc
67
tcg/optimize: Split out fold_masks
59
tcg/ppc: Convert to tcg-constr.c.inc
68
tcg/optimize: Expand fold_mulu2_i32 to all 4-arg multiplies
60
tcg/riscv: Convert to tcg-constr.c.inc
69
tcg/optimize: Expand fold_addsub2_i32 to 64-bit ops
61
tcg/s390: Convert to tcg-constr.c.inc
70
tcg/optimize: Sink commutative operand swapping into fold functions
62
tcg/sparc: Convert to tcg-constr.c.inc
71
tcg/optimize: Stop forcing z_mask to "garbage" for 32-bit values
63
tcg/tci: Convert to tcg-constr.c.inc
72
tcg/optimize: Use fold_xx_to_i for orc
73
tcg/optimize: Use fold_xi_to_x for mul
74
tcg/optimize: Use fold_xi_to_x for div
75
tcg/optimize: Use fold_xx_to_i for rem
76
tcg/optimize: Optimize sign extensions
77
tcg/optimize: Propagate sign info for logical operations
78
tcg/optimize: Propagate sign info for setcond
79
tcg/optimize: Propagate sign info for bit counting
80
tcg/optimize: Propagate sign info for shifting
64
81
65
include/exec/gen-icount.h | 25 +-
82
include/fpu/softfloat-macros.h | 82 --
66
include/tcg/tcg-gvec-desc.h | 38 ++-
83
include/hw/clock.h | 5 +-
67
include/tcg/tcg-op.h | 17 +-
84
include/qemu/host-utils.h | 121 +-
68
include/tcg/tcg-opc.h | 11 +-
85
include/qemu/int128.h | 20 +
69
include/tcg/tcg.h | 72 +++--
86
target/ppc/int_helper.c | 23 +-
70
tcg/aarch64/tcg-target-constr.h | 31 ++
87
tcg/optimize.c | 2644 ++++++++++++++++++++++++----------------
71
tcg/aarch64/tcg-target.h | 1 -
88
tests/unit/test-div128.c | 197 +++
72
tcg/arm/tcg-target-constr.h | 30 ++
89
util/host-utils.c | 147 ++-
73
tcg/i386/tcg-target-constr.h | 55 ++++
90
tests/unit/meson.build | 1 +
74
tcg/i386/tcg-target.h | 1 -
91
9 files changed, 2053 insertions(+), 1187 deletions(-)
75
tcg/mips/tcg-target-constr.h | 31 ++
92
create mode 100644 tests/unit/test-div128.c
76
tcg/ppc/tcg-target-constr.h | 37 +++
77
tcg/ppc/tcg-target.h | 1 -
78
tcg/riscv/tcg-target-constr.h | 25 ++
79
tcg/s390/tcg-target-constr.h | 24 ++
80
tcg/sparc/tcg-target-constr.h | 27 ++
81
tcg/tci/tcg-target-constr.h | 28 ++
82
accel/tcg/plugin-gen.c | 49 ++-
83
tcg/optimize.c | 254 ++++++++-------
84
tcg/tcg-op-gvec.c | 160 +++++-----
85
tcg/tcg-op-vec.c | 48 +--
86
tcg/tcg-op.c | 227 +++++++------
87
tcg/tcg.c | 549 +++++++++++++++++++++++---------
88
tcg/tci.c | 4 +-
89
tcg/aarch64/tcg-target.c.inc | 134 +++-----
90
tcg/arm/tcg-target.c.inc | 123 +++----
91
tcg/i386/tcg-target.c.inc | 336 +++++++++----------
92
tcg/mips/tcg-target.c.inc | 118 +++----
93
tcg/ppc/tcg-target.c.inc | 254 +++++++--------
94
tcg/riscv/tcg-target.c.inc | 100 ++----
95
tcg/s390/tcg-target.c.inc | 143 ++++-----
96
tcg/sparc/tcg-target.c.inc | 97 ++----
97
tcg/tcg-constr.c.inc | 108 +++++++
98
tcg/tci/tcg-target.c.inc | 369 ++++++++-------------
99
34 files changed, 1893 insertions(+), 1634 deletions(-)
100
create mode 100644 tcg/aarch64/tcg-target-constr.h
101
create mode 100644 tcg/arm/tcg-target-constr.h
102
create mode 100644 tcg/i386/tcg-target-constr.h
103
create mode 100644 tcg/mips/tcg-target-constr.h
104
create mode 100644 tcg/ppc/tcg-target-constr.h
105
create mode 100644 tcg/riscv/tcg-target-constr.h
106
create mode 100644 tcg/s390/tcg-target-constr.h
107
create mode 100644 tcg/sparc/tcg-target-constr.h
108
create mode 100644 tcg/tci/tcg-target-constr.h
109
create mode 100644 tcg/tcg-constr.c.inc
110
93
111
--
112
2.25.1
113
114
diff view generated by jsdifflib
1
From: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
2
3
Addition of not and xor on 128-bit integers.
4
5
Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
6
Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
7
Message-Id: <20211025122818.168890-3-frederic.petrot@univ-grenoble-alpes.fr>
8
[rth: Split out logical operations.]
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
11
---
3
include/tcg/tcg-op.h | 13 +--
12
include/qemu/int128.h | 20 ++++++++++++++++++++
4
tcg/tcg-op.c | 227 ++++++++++++++++++++-----------------------
13
1 file changed, 20 insertions(+)
5
2 files changed, 109 insertions(+), 131 deletions(-)
6
14
7
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
15
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
8
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
9
--- a/include/tcg/tcg-op.h
17
--- a/include/qemu/int128.h
10
+++ b/include/tcg/tcg-op.h
18
+++ b/include/qemu/int128.h
11
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mb(TCGBar);
19
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
12
20
return a;
13
/* 32 bit ops */
14
15
+void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg);
16
void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
17
void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2);
18
void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
19
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg)
20
}
21
}
21
}
22
22
23
-static inline void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg)
23
+static inline Int128 int128_not(Int128 a)
24
-{
25
- tcg_gen_op2i_i32(INDEX_op_movi_i32, ret, arg);
26
-}
27
-
28
static inline void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2,
29
tcg_target_long offset)
30
{
31
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg)
32
33
/* 64 bit ops */
34
35
+void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg);
36
void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
37
void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2);
38
void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
39
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
40
}
41
}
42
43
-static inline void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
44
-{
45
- tcg_gen_op2i_i64(INDEX_op_movi_i64, ret, arg);
46
-}
47
-
48
static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2,
49
tcg_target_long offset)
50
{
51
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
52
53
void tcg_gen_discard_i64(TCGv_i64 arg);
54
void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg);
55
-void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg);
56
void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
57
void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
58
void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
59
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/tcg/tcg-op.c
62
+++ b/tcg/tcg-op.c
63
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mb(TCGBar mb_type)
64
65
/* 32 bit ops */
66
67
+void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg)
68
+{
24
+{
69
+ tcg_gen_mov_i32(ret, tcg_constant_i32(arg));
25
+ return ~a;
70
+}
26
+}
71
+
27
+
72
void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
28
static inline Int128 int128_and(Int128 a, Int128 b)
73
{
29
{
74
/* some cases can be optimized here */
30
return a & b;
75
if (arg2 == 0) {
31
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
76
tcg_gen_mov_i32(ret, arg1);
32
return a | b;
77
} else {
78
- TCGv_i32 t0 = tcg_const_i32(arg2);
79
- tcg_gen_add_i32(ret, arg1, t0);
80
- tcg_temp_free_i32(t0);
81
+ tcg_gen_add_i32(ret, arg1, tcg_constant_i32(arg2));
82
}
83
}
33
}
84
34
85
@@ -XXX,XX +XXX,XX @@ void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2)
35
+static inline Int128 int128_xor(Int128 a, Int128 b)
86
/* Don't recurse with tcg_gen_neg_i32. */
87
tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg2);
88
} else {
89
- TCGv_i32 t0 = tcg_const_i32(arg1);
90
- tcg_gen_sub_i32(ret, t0, arg2);
91
- tcg_temp_free_i32(t0);
92
+ tcg_gen_sub_i32(ret, tcg_constant_i32(arg1), arg2);
93
}
94
}
95
96
@@ -XXX,XX +XXX,XX @@ void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
97
if (arg2 == 0) {
98
tcg_gen_mov_i32(ret, arg1);
99
} else {
100
- TCGv_i32 t0 = tcg_const_i32(arg2);
101
- tcg_gen_sub_i32(ret, arg1, t0);
102
- tcg_temp_free_i32(t0);
103
+ tcg_gen_sub_i32(ret, arg1, tcg_constant_i32(arg2));
104
}
105
}
106
107
void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
108
{
109
- TCGv_i32 t0;
110
/* Some cases can be optimized here. */
111
switch (arg2) {
112
case 0:
113
@@ -XXX,XX +XXX,XX @@ void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
114
}
115
break;
116
}
117
- t0 = tcg_const_i32(arg2);
118
- tcg_gen_and_i32(ret, arg1, t0);
119
- tcg_temp_free_i32(t0);
120
+
121
+ tcg_gen_and_i32(ret, arg1, tcg_constant_i32(arg2));
122
}
123
124
void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
125
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
126
} else if (arg2 == 0) {
127
tcg_gen_mov_i32(ret, arg1);
128
} else {
129
- TCGv_i32 t0 = tcg_const_i32(arg2);
130
- tcg_gen_or_i32(ret, arg1, t0);
131
- tcg_temp_free_i32(t0);
132
+ tcg_gen_or_i32(ret, arg1, tcg_constant_i32(arg2));
133
}
134
}
135
136
@@ -XXX,XX +XXX,XX @@ void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
137
/* Don't recurse with tcg_gen_not_i32. */
138
tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1);
139
} else {
140
- TCGv_i32 t0 = tcg_const_i32(arg2);
141
- tcg_gen_xor_i32(ret, arg1, t0);
142
- tcg_temp_free_i32(t0);
143
+ tcg_gen_xor_i32(ret, arg1, tcg_constant_i32(arg2));
144
}
145
}
146
147
@@ -XXX,XX +XXX,XX @@ void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
148
if (arg2 == 0) {
149
tcg_gen_mov_i32(ret, arg1);
150
} else {
151
- TCGv_i32 t0 = tcg_const_i32(arg2);
152
- tcg_gen_shl_i32(ret, arg1, t0);
153
- tcg_temp_free_i32(t0);
154
+ tcg_gen_shl_i32(ret, arg1, tcg_constant_i32(arg2));
155
}
156
}
157
158
@@ -XXX,XX +XXX,XX @@ void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
159
if (arg2 == 0) {
160
tcg_gen_mov_i32(ret, arg1);
161
} else {
162
- TCGv_i32 t0 = tcg_const_i32(arg2);
163
- tcg_gen_shr_i32(ret, arg1, t0);
164
- tcg_temp_free_i32(t0);
165
+ tcg_gen_shr_i32(ret, arg1, tcg_constant_i32(arg2));
166
}
167
}
168
169
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
170
if (arg2 == 0) {
171
tcg_gen_mov_i32(ret, arg1);
172
} else {
173
- TCGv_i32 t0 = tcg_const_i32(arg2);
174
- tcg_gen_sar_i32(ret, arg1, t0);
175
- tcg_temp_free_i32(t0);
176
+ tcg_gen_sar_i32(ret, arg1, tcg_constant_i32(arg2));
177
}
178
}
179
180
@@ -XXX,XX +XXX,XX @@ void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *l)
181
if (cond == TCG_COND_ALWAYS) {
182
tcg_gen_br(l);
183
} else if (cond != TCG_COND_NEVER) {
184
- TCGv_i32 t0 = tcg_const_i32(arg2);
185
- tcg_gen_brcond_i32(cond, arg1, t0, l);
186
- tcg_temp_free_i32(t0);
187
+ tcg_gen_brcond_i32(cond, arg1, tcg_constant_i32(arg2), l);
188
}
189
}
190
191
@@ -XXX,XX +XXX,XX @@ void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
192
void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
193
TCGv_i32 arg1, int32_t arg2)
194
{
195
- TCGv_i32 t0 = tcg_const_i32(arg2);
196
- tcg_gen_setcond_i32(cond, ret, arg1, t0);
197
- tcg_temp_free_i32(t0);
198
+ tcg_gen_setcond_i32(cond, ret, arg1, tcg_constant_i32(arg2));
199
}
200
201
void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
202
@@ -XXX,XX +XXX,XX @@ void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
203
} else if (is_power_of_2(arg2)) {
204
tcg_gen_shli_i32(ret, arg1, ctz32(arg2));
205
} else {
206
- TCGv_i32 t0 = tcg_const_i32(arg2);
207
- tcg_gen_mul_i32(ret, arg1, t0);
208
- tcg_temp_free_i32(t0);
209
+ tcg_gen_mul_i32(ret, arg1, tcg_constant_i32(arg2));
210
}
211
}
212
213
@@ -XXX,XX +XXX,XX @@ void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
214
215
void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
216
{
217
- TCGv_i32 t = tcg_const_i32(arg2);
218
- tcg_gen_clz_i32(ret, arg1, t);
219
- tcg_temp_free_i32(t);
220
+ tcg_gen_clz_i32(ret, arg1, tcg_constant_i32(arg2));
221
}
222
223
void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
224
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
225
tcg_gen_clzi_i32(t, t, 32);
226
tcg_gen_xori_i32(t, t, 31);
227
}
228
- z = tcg_const_i32(0);
229
+ z = tcg_constant_i32(0);
230
tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t);
231
tcg_temp_free_i32(t);
232
- tcg_temp_free_i32(z);
233
} else {
234
gen_helper_ctz_i32(ret, arg1, arg2);
235
}
236
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
237
tcg_gen_ctpop_i32(ret, t);
238
tcg_temp_free_i32(t);
239
} else {
240
- TCGv_i32 t = tcg_const_i32(arg2);
241
- tcg_gen_ctz_i32(ret, arg1, t);
242
- tcg_temp_free_i32(t);
243
+ tcg_gen_ctz_i32(ret, arg1, tcg_constant_i32(arg2));
244
}
245
}
246
247
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
248
if (arg2 == 0) {
249
tcg_gen_mov_i32(ret, arg1);
250
} else if (TCG_TARGET_HAS_rot_i32) {
251
- TCGv_i32 t0 = tcg_const_i32(arg2);
252
- tcg_gen_rotl_i32(ret, arg1, t0);
253
- tcg_temp_free_i32(t0);
254
+ tcg_gen_rotl_i32(ret, arg1, tcg_constant_i32(arg2));
255
} else {
256
TCGv_i32 t0, t1;
257
t0 = tcg_temp_new_i32();
258
@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
259
tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
260
} else if (TCG_TARGET_HAS_deposit_i32
261
&& TCG_TARGET_deposit_i32_valid(ofs, len)) {
262
- TCGv_i32 zero = tcg_const_i32(0);
263
+ TCGv_i32 zero = tcg_constant_i32(0);
264
tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len);
265
- tcg_temp_free_i32(zero);
266
} else {
267
/* To help two-operand hosts we prefer to zero-extend first,
268
which allows ARG to stay live. */
269
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
270
} else {
271
TCGv_i32 t0 = tcg_temp_new_i32();
272
TCGv_i32 t1 = tcg_temp_new_i32();
273
- TCGv_i32 t2 = tcg_const_i32(0x00ff00ff);
274
+ TCGv_i32 t2 = tcg_constant_i32(0x00ff00ff);
275
276
/* arg = abcd */
277
tcg_gen_shri_i32(t0, arg, 8); /* t0 = .abc */
278
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
279
280
tcg_temp_free_i32(t0);
281
tcg_temp_free_i32(t1);
282
- tcg_temp_free_i32(t2);
283
}
284
}
285
286
@@ -XXX,XX +XXX,XX @@ void tcg_gen_discard_i64(TCGv_i64 arg)
287
288
void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
289
{
290
- tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
291
- tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
292
+ TCGTemp *ts = tcgv_i64_temp(arg);
293
+
294
+ /* Canonicalize TCGv_i64 TEMP_CONST into TCGv_i32 TEMP_CONST. */
295
+ if (ts->kind == TEMP_CONST) {
296
+ tcg_gen_movi_i64(ret, ts->val);
297
+ } else {
298
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
299
+ tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
300
+ }
301
}
302
303
void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
304
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
305
tcg_temp_free_i64(t0);
306
tcg_temp_free_i32(t1);
307
}
308
+
309
+#else
310
+
311
+void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
312
+{
36
+{
313
+ tcg_gen_mov_i64(ret, tcg_constant_i64(arg));
37
+ return a ^ b;
314
+}
38
+}
315
+
39
+
316
#endif /* TCG_TARGET_REG_SIZE == 32 */
40
static inline Int128 int128_rshift(Int128 a, int n)
317
41
{
318
void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
42
return a >> n;
319
@@ -XXX,XX +XXX,XX @@ void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
43
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
320
/* some cases can be optimized here */
44
return int128_make128(a, (a < 0) ? -1 : 0);
321
if (arg2 == 0) {
322
tcg_gen_mov_i64(ret, arg1);
323
+ } else if (TCG_TARGET_REG_BITS == 64) {
324
+ tcg_gen_add_i64(ret, arg1, tcg_constant_i64(arg2));
325
} else {
326
- TCGv_i64 t0 = tcg_const_i64(arg2);
327
- tcg_gen_add_i64(ret, arg1, t0);
328
- tcg_temp_free_i64(t0);
329
+ tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
330
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
331
+ tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
332
}
333
}
45
}
334
46
335
@@ -XXX,XX +XXX,XX @@ void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2)
47
+static inline Int128 int128_not(Int128 a)
336
if (arg1 == 0 && TCG_TARGET_HAS_neg_i64) {
48
+{
337
/* Don't recurse with tcg_gen_neg_i64. */
49
+ return int128_make128(~a.lo, ~a.hi);
338
tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg2);
50
+}
339
+ } else if (TCG_TARGET_REG_BITS == 64) {
51
+
340
+ tcg_gen_sub_i64(ret, tcg_constant_i64(arg1), arg2);
52
static inline Int128 int128_and(Int128 a, Int128 b)
341
} else {
53
{
342
- TCGv_i64 t0 = tcg_const_i64(arg1);
54
return int128_make128(a.lo & b.lo, a.hi & b.hi);
343
- tcg_gen_sub_i64(ret, t0, arg2);
55
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
344
- tcg_temp_free_i64(t0);
56
return int128_make128(a.lo | b.lo, a.hi | b.hi);
345
+ tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
346
+ tcg_constant_i32(arg1), tcg_constant_i32(arg1 >> 32),
347
+ TCGV_LOW(arg2), TCGV_HIGH(arg2));
348
}
349
}
57
}
350
58
351
@@ -XXX,XX +XXX,XX @@ void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
59
+static inline Int128 int128_xor(Int128 a, Int128 b)
352
/* some cases can be optimized here */
60
+{
353
if (arg2 == 0) {
61
+ return int128_make128(a.lo ^ b.lo, a.hi ^ b.hi);
354
tcg_gen_mov_i64(ret, arg1);
62
+}
355
+ } else if (TCG_TARGET_REG_BITS == 64) {
63
+
356
+ tcg_gen_sub_i64(ret, arg1, tcg_constant_i64(arg2));
64
static inline Int128 int128_rshift(Int128 a, int n)
357
} else {
358
- TCGv_i64 t0 = tcg_const_i64(arg2);
359
- tcg_gen_sub_i64(ret, arg1, t0);
360
- tcg_temp_free_i64(t0);
361
+ tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
362
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
363
+ tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
364
}
365
}
366
367
void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
368
{
65
{
369
- TCGv_i64 t0;
66
int64_t h;
370
-
371
if (TCG_TARGET_REG_BITS == 32) {
372
tcg_gen_andi_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
373
tcg_gen_andi_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
374
@@ -XXX,XX +XXX,XX @@ void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
375
}
376
break;
377
}
378
- t0 = tcg_const_i64(arg2);
379
- tcg_gen_and_i64(ret, arg1, t0);
380
- tcg_temp_free_i64(t0);
381
+
382
+ tcg_gen_and_i64(ret, arg1, tcg_constant_i64(arg2));
383
}
384
385
void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
386
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
387
} else if (arg2 == 0) {
388
tcg_gen_mov_i64(ret, arg1);
389
} else {
390
- TCGv_i64 t0 = tcg_const_i64(arg2);
391
- tcg_gen_or_i64(ret, arg1, t0);
392
- tcg_temp_free_i64(t0);
393
+ tcg_gen_or_i64(ret, arg1, tcg_constant_i64(arg2));
394
}
395
}
396
397
@@ -XXX,XX +XXX,XX @@ void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
398
/* Don't recurse with tcg_gen_not_i64. */
399
tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1);
400
} else {
401
- TCGv_i64 t0 = tcg_const_i64(arg2);
402
- tcg_gen_xor_i64(ret, arg1, t0);
403
- tcg_temp_free_i64(t0);
404
+ tcg_gen_xor_i64(ret, arg1, tcg_constant_i64(arg2));
405
}
406
}
407
408
@@ -XXX,XX +XXX,XX @@ void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
409
} else if (arg2 == 0) {
410
tcg_gen_mov_i64(ret, arg1);
411
} else {
412
- TCGv_i64 t0 = tcg_const_i64(arg2);
413
- tcg_gen_shl_i64(ret, arg1, t0);
414
- tcg_temp_free_i64(t0);
415
+ tcg_gen_shl_i64(ret, arg1, tcg_constant_i64(arg2));
416
}
417
}
418
419
@@ -XXX,XX +XXX,XX @@ void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
420
} else if (arg2 == 0) {
421
tcg_gen_mov_i64(ret, arg1);
422
} else {
423
- TCGv_i64 t0 = tcg_const_i64(arg2);
424
- tcg_gen_shr_i64(ret, arg1, t0);
425
- tcg_temp_free_i64(t0);
426
+ tcg_gen_shr_i64(ret, arg1, tcg_constant_i64(arg2));
427
}
428
}
429
430
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
431
} else if (arg2 == 0) {
432
tcg_gen_mov_i64(ret, arg1);
433
} else {
434
- TCGv_i64 t0 = tcg_const_i64(arg2);
435
- tcg_gen_sar_i64(ret, arg1, t0);
436
- tcg_temp_free_i64(t0);
437
+ tcg_gen_sar_i64(ret, arg1, tcg_constant_i64(arg2));
438
}
439
}
440
441
@@ -XXX,XX +XXX,XX @@ void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l)
442
443
void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l)
444
{
445
- if (cond == TCG_COND_ALWAYS) {
446
+ if (TCG_TARGET_REG_BITS == 64) {
447
+ tcg_gen_brcond_i64(cond, arg1, tcg_constant_i64(arg2), l);
448
+ } else if (cond == TCG_COND_ALWAYS) {
449
tcg_gen_br(l);
450
} else if (cond != TCG_COND_NEVER) {
451
- TCGv_i64 t0 = tcg_const_i64(arg2);
452
- tcg_gen_brcond_i64(cond, arg1, t0, l);
453
- tcg_temp_free_i64(t0);
454
+ l->refs++;
455
+ tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
456
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
457
+ tcg_constant_i32(arg2),
458
+ tcg_constant_i32(arg2 >> 32),
459
+ cond, label_arg(l));
460
}
461
}
462
463
@@ -XXX,XX +XXX,XX @@ void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
464
void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
465
TCGv_i64 arg1, int64_t arg2)
466
{
467
- TCGv_i64 t0 = tcg_const_i64(arg2);
468
- tcg_gen_setcond_i64(cond, ret, arg1, t0);
469
- tcg_temp_free_i64(t0);
470
+ if (TCG_TARGET_REG_BITS == 64) {
471
+ tcg_gen_setcond_i64(cond, ret, arg1, tcg_constant_i64(arg2));
472
+ } else if (cond == TCG_COND_ALWAYS) {
473
+ tcg_gen_movi_i64(ret, 1);
474
+ } else if (cond == TCG_COND_NEVER) {
475
+ tcg_gen_movi_i64(ret, 0);
476
+ } else {
477
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
478
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
479
+ tcg_constant_i32(arg2),
480
+ tcg_constant_i32(arg2 >> 32), cond);
481
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
482
+ }
483
}
484
485
void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
486
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
487
} else {
488
TCGv_i64 t0 = tcg_temp_new_i64();
489
TCGv_i64 t1 = tcg_temp_new_i64();
490
- TCGv_i64 t2 = tcg_const_i64(0x00ff00ff);
491
+ TCGv_i64 t2 = tcg_constant_i64(0x00ff00ff);
492
493
/* arg = ....abcd */
494
tcg_gen_shri_i64(t0, arg, 8); /* t0 = .....abc */
495
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
496
497
tcg_temp_free_i64(t0);
498
tcg_temp_free_i64(t1);
499
- tcg_temp_free_i64(t2);
500
}
501
}
502
503
@@ -XXX,XX +XXX,XX @@ void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
504
if (TCG_TARGET_REG_BITS == 32
505
&& TCG_TARGET_HAS_clz_i32
506
&& arg2 <= 0xffffffffu) {
507
- TCGv_i32 t = tcg_const_i32((uint32_t)arg2 - 32);
508
- tcg_gen_clz_i32(t, TCGV_LOW(arg1), t);
509
+ TCGv_i32 t = tcg_temp_new_i32();
510
+ tcg_gen_clzi_i32(t, TCGV_LOW(arg1), arg2 - 32);
511
tcg_gen_addi_i32(t, t, 32);
512
tcg_gen_clz_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), t);
513
tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
514
tcg_temp_free_i32(t);
515
} else {
516
- TCGv_i64 t = tcg_const_i64(arg2);
517
- tcg_gen_clz_i64(ret, arg1, t);
518
- tcg_temp_free_i64(t);
519
+ TCGv_i64 t0 = tcg_const_i64(arg2);
520
+ tcg_gen_clz_i64(ret, arg1, t0);
521
+ tcg_temp_free_i64(t0);
522
}
523
}
524
525
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
526
tcg_gen_clzi_i64(t, t, 64);
527
tcg_gen_xori_i64(t, t, 63);
528
}
529
- z = tcg_const_i64(0);
530
+ z = tcg_constant_i64(0);
531
tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t);
532
tcg_temp_free_i64(t);
533
tcg_temp_free_i64(z);
534
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
535
if (TCG_TARGET_REG_BITS == 32
536
&& TCG_TARGET_HAS_ctz_i32
537
&& arg2 <= 0xffffffffu) {
538
- TCGv_i32 t32 = tcg_const_i32((uint32_t)arg2 - 32);
539
- tcg_gen_ctz_i32(t32, TCGV_HIGH(arg1), t32);
540
+ TCGv_i32 t32 = tcg_temp_new_i32();
541
+ tcg_gen_ctzi_i32(t32, TCGV_HIGH(arg1), arg2 - 32);
542
tcg_gen_addi_i32(t32, t32, 32);
543
tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32);
544
tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
545
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
546
tcg_gen_ctpop_i64(ret, t);
547
tcg_temp_free_i64(t);
548
} else {
549
- TCGv_i64 t64 = tcg_const_i64(arg2);
550
- tcg_gen_ctz_i64(ret, arg1, t64);
551
- tcg_temp_free_i64(t64);
552
+ TCGv_i64 t0 = tcg_const_i64(arg2);
553
+ tcg_gen_ctz_i64(ret, arg1, t0);
554
+ tcg_temp_free_i64(t0);
555
}
556
}
557
558
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
559
if (arg2 == 0) {
560
tcg_gen_mov_i64(ret, arg1);
561
} else if (TCG_TARGET_HAS_rot_i64) {
562
- TCGv_i64 t0 = tcg_const_i64(arg2);
563
- tcg_gen_rotl_i64(ret, arg1, t0);
564
- tcg_temp_free_i64(t0);
565
+ tcg_gen_rotl_i64(ret, arg1, tcg_constant_i64(arg2));
566
} else {
567
TCGv_i64 t0, t1;
568
t0 = tcg_temp_new_i64();
569
@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
570
tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
571
} else if (TCG_TARGET_HAS_deposit_i64
572
&& TCG_TARGET_deposit_i64_valid(ofs, len)) {
573
- TCGv_i64 zero = tcg_const_i64(0);
574
+ TCGv_i64 zero = tcg_constant_i64(0);
575
tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len);
576
- tcg_temp_free_i64(zero);
577
} else {
578
if (TCG_TARGET_REG_BITS == 32) {
579
if (ofs >= 32) {
580
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
581
582
#ifdef CONFIG_SOFTMMU
583
{
584
- TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
585
- gen(retv, cpu_env, addr, cmpv, newv, oi);
586
- tcg_temp_free_i32(oi);
587
+ TCGMemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
588
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
589
}
590
#else
591
gen(retv, cpu_env, addr, cmpv, newv);
592
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
593
594
#ifdef CONFIG_SOFTMMU
595
{
596
- TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop, idx));
597
- gen(retv, cpu_env, addr, cmpv, newv, oi);
598
- tcg_temp_free_i32(oi);
599
+ TCGMemOpIdx oi = make_memop_idx(memop, idx);
600
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
601
}
602
#else
603
gen(retv, cpu_env, addr, cmpv, newv);
604
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
605
606
#ifdef CONFIG_SOFTMMU
607
{
608
- TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
609
- gen(ret, cpu_env, addr, val, oi);
610
- tcg_temp_free_i32(oi);
611
+ TCGMemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
612
+ gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
613
}
614
#else
615
gen(ret, cpu_env, addr, val);
616
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
617
618
#ifdef CONFIG_SOFTMMU
619
{
620
- TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
621
- gen(ret, cpu_env, addr, val, oi);
622
- tcg_temp_free_i32(oi);
623
+ TCGMemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
624
+ gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
625
}
626
#else
627
gen(ret, cpu_env, addr, val);
628
--
67
--
629
2.25.1
68
2.25.1
630
69
631
70
diff view generated by jsdifflib
New patch
1
1
From: Luis Pires <luis.pires@eldorado.org.br>
2
3
In preparation for changing the divu128/divs128 implementations
4
to allow for quotients larger than 64 bits, move the div-by-zero
5
and overflow checks to the callers.
6
7
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20211025191154.350831-2-luis.pires@eldorado.org.br>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
include/hw/clock.h | 5 +++--
13
include/qemu/host-utils.h | 34 ++++++++++++---------------------
14
target/ppc/int_helper.c | 14 +++++++++-----
15
util/host-utils.c | 40 ++++++++++++++++++---------------------
16
4 files changed, 42 insertions(+), 51 deletions(-)
17
18
diff --git a/include/hw/clock.h b/include/hw/clock.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/hw/clock.h
21
+++ b/include/hw/clock.h
22
@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
23
return 0;
24
}
25
/*
26
- * Ignore divu128() return value as we've caught div-by-zero and don't
27
- * need different behaviour for overflow.
28
+ * BUG: when CONFIG_INT128 is not defined, the current implementation of
29
+ * divu128 does not return a valid truncated quotient, so the result will
30
+ * be wrong.
31
*/
32
divu128(&lo, &hi, clk->period);
33
return lo;
34
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/include/qemu/host-utils.h
37
+++ b/include/qemu/host-utils.h
38
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
39
return (__int128_t)a * b / c;
40
}
41
42
-static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
43
+static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
44
{
45
- if (divisor == 0) {
46
- return 1;
47
- } else {
48
- __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
49
- __uint128_t result = dividend / divisor;
50
- *plow = result;
51
- *phigh = dividend % divisor;
52
- return result > UINT64_MAX;
53
- }
54
+ __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
55
+ __uint128_t result = dividend / divisor;
56
+ *plow = result;
57
+ *phigh = dividend % divisor;
58
}
59
60
-static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
61
+static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
62
{
63
- if (divisor == 0) {
64
- return 1;
65
- } else {
66
- __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
67
- __int128_t result = dividend / divisor;
68
- *plow = result;
69
- *phigh = dividend % divisor;
70
- return result != *plow;
71
- }
72
+ __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
73
+ __int128_t result = dividend / divisor;
74
+ *plow = result;
75
+ *phigh = dividend % divisor;
76
}
77
#else
78
void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
79
void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
80
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
81
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
82
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
83
+void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
84
85
static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
86
{
87
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/target/ppc/int_helper.c
90
+++ b/target/ppc/int_helper.c
91
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
92
uint64_t rt = 0;
93
int overflow = 0;
94
95
- overflow = divu128(&rt, &ra, rb);
96
-
97
- if (unlikely(overflow)) {
98
+ if (unlikely(rb == 0 || ra >= rb)) {
99
+ overflow = 1;
100
rt = 0; /* Undefined */
101
+ } else {
102
+ divu128(&rt, &ra, rb);
103
}
104
105
if (oe) {
106
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
107
int64_t rt = 0;
108
int64_t ra = (int64_t)rau;
109
int64_t rb = (int64_t)rbu;
110
- int overflow = divs128(&rt, &ra, rb);
111
+ int overflow = 0;
112
113
- if (unlikely(overflow)) {
114
+ if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
115
+ overflow = 1;
116
rt = 0; /* Undefined */
117
+ } else {
118
+ divs128(&rt, &ra, rb);
119
}
120
121
if (oe) {
122
diff --git a/util/host-utils.c b/util/host-utils.c
123
index XXXXXXX..XXXXXXX 100644
124
--- a/util/host-utils.c
125
+++ b/util/host-utils.c
126
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
127
*phigh = rh;
128
}
129
130
-/* Unsigned 128x64 division. Returns 1 if overflow (divide by zero or */
131
-/* quotient exceeds 64 bits). Otherwise returns quotient via plow and */
132
-/* remainder via phigh. */
133
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
134
+/*
135
+ * Unsigned 128-by-64 division. Returns quotient via plow and
136
+ * remainder via phigh.
137
+ * The result must fit in 64 bits (plow) - otherwise, the result
138
+ * is undefined.
139
+ * This function will cause a division by zero if passed a zero divisor.
140
+ */
141
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
142
{
143
uint64_t dhi = *phigh;
144
uint64_t dlo = *plow;
145
unsigned i;
146
uint64_t carry = 0;
147
148
- if (divisor == 0) {
149
- return 1;
150
- } else if (dhi == 0) {
151
+ if (divisor == 0 || dhi == 0) {
152
*plow = dlo / divisor;
153
*phigh = dlo % divisor;
154
- return 0;
155
- } else if (dhi >= divisor) {
156
- return 1;
157
} else {
158
159
for (i = 0; i < 64; i++) {
160
@@ -XXX,XX +XXX,XX @@ int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
161
162
*plow = dlo;
163
*phigh = dhi;
164
- return 0;
165
}
166
}
167
168
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
169
+/*
170
+ * Signed 128-by-64 division. Returns quotient via plow and
171
+ * remainder via phigh.
172
+ * The result must fit in 64 bits (plow) - otherwise, the result
173
+ * is undefined.
174
+ * This function will cause a division by zero if passed a zero divisor.
175
+ */
176
+void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
177
{
178
int sgn_dvdnd = *phigh < 0;
179
int sgn_divsr = divisor < 0;
180
- int overflow = 0;
181
182
if (sgn_dvdnd) {
183
*plow = ~(*plow);
184
@@ -XXX,XX +XXX,XX @@ int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
185
divisor = 0 - divisor;
186
}
187
188
- overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
189
+ divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
190
191
if (sgn_dvdnd ^ sgn_divsr) {
192
*plow = 0 - *plow;
193
}
194
-
195
- if (!overflow) {
196
- if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
197
- overflow = 1;
198
- }
199
- }
200
-
201
- return overflow;
202
}
203
#endif
204
205
--
206
2.25.1
207
208
diff view generated by jsdifflib
New patch
1
1
From: Luis Pires <luis.pires@eldorado.org.br>
2
3
Move udiv_qrnnd() from include/fpu/softfloat-macros.h to host-utils,
4
so it can be reused by divu128().
5
6
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20211025191154.350831-3-luis.pires@eldorado.org.br>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
include/fpu/softfloat-macros.h | 82 ----------------------------------
12
include/qemu/host-utils.h | 81 +++++++++++++++++++++++++++++++++
13
2 files changed, 81 insertions(+), 82 deletions(-)
14
15
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/fpu/softfloat-macros.h
18
+++ b/include/fpu/softfloat-macros.h
19
@@ -XXX,XX +XXX,XX @@
20
* so some portions are provided under:
21
* the SoftFloat-2a license
22
* the BSD license
23
- * GPL-v2-or-later
24
*
25
* Any future contributions to this file after December 1st 2014 will be
26
* taken to be licensed under the Softfloat-2a license unless specifically
27
@@ -XXX,XX +XXX,XX @@ this code that are retained.
28
* THE POSSIBILITY OF SUCH DAMAGE.
29
*/
30
31
-/* Portions of this work are licensed under the terms of the GNU GPL,
32
- * version 2 or later. See the COPYING file in the top-level directory.
33
- */
34
-
35
#ifndef FPU_SOFTFLOAT_MACROS_H
36
#define FPU_SOFTFLOAT_MACROS_H
37
38
@@ -XXX,XX +XXX,XX @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
39
40
}
41
42
-/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
43
- * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
44
- *
45
- * Licensed under the GPLv2/LGPLv3
46
- */
47
-static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
48
- uint64_t n0, uint64_t d)
49
-{
50
-#if defined(__x86_64__)
51
- uint64_t q;
52
- asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
53
- return q;
54
-#elif defined(__s390x__) && !defined(__clang__)
55
- /* Need to use a TImode type to get an even register pair for DLGR. */
56
- unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
57
- asm("dlgr %0, %1" : "+r"(n) : "r"(d));
58
- *r = n >> 64;
59
- return n;
60
-#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
61
- /* From Power ISA 2.06, programming note for divdeu. */
62
- uint64_t q1, q2, Q, r1, r2, R;
63
- asm("divdeu %0,%2,%4; divdu %1,%3,%4"
64
- : "=&r"(q1), "=r"(q2)
65
- : "r"(n1), "r"(n0), "r"(d));
66
- r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
67
- r2 = n0 - (q2 * d);
68
- Q = q1 + q2;
69
- R = r1 + r2;
70
- if (R >= d || R < r2) { /* overflow implies R > d */
71
- Q += 1;
72
- R -= d;
73
- }
74
- *r = R;
75
- return Q;
76
-#else
77
- uint64_t d0, d1, q0, q1, r1, r0, m;
78
-
79
- d0 = (uint32_t)d;
80
- d1 = d >> 32;
81
-
82
- r1 = n1 % d1;
83
- q1 = n1 / d1;
84
- m = q1 * d0;
85
- r1 = (r1 << 32) | (n0 >> 32);
86
- if (r1 < m) {
87
- q1 -= 1;
88
- r1 += d;
89
- if (r1 >= d) {
90
- if (r1 < m) {
91
- q1 -= 1;
92
- r1 += d;
93
- }
94
- }
95
- }
96
- r1 -= m;
97
-
98
- r0 = r1 % d1;
99
- q0 = r1 / d1;
100
- m = q0 * d0;
101
- r0 = (r0 << 32) | (uint32_t)n0;
102
- if (r0 < m) {
103
- q0 -= 1;
104
- r0 += d;
105
- if (r0 >= d) {
106
- if (r0 < m) {
107
- q0 -= 1;
108
- r0 += d;
109
- }
110
- }
111
- }
112
- r0 -= m;
113
-
114
- *r = r0;
115
- return (q1 << 32) | q0;
116
-#endif
117
-}
118
-
119
/*----------------------------------------------------------------------------
120
| Returns an approximation to the square root of the 32-bit significand given
121
| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
122
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
123
index XXXXXXX..XXXXXXX 100644
124
--- a/include/qemu/host-utils.h
125
+++ b/include/qemu/host-utils.h
126
@@ -XXX,XX +XXX,XX @@
127
* THE SOFTWARE.
128
*/
129
130
+/* Portions of this work are licensed under the terms of the GNU GPL,
131
+ * version 2 or later. See the COPYING file in the top-level directory.
132
+ */
133
+
134
#ifndef HOST_UTILS_H
135
#define HOST_UTILS_H
136
137
@@ -XXX,XX +XXX,XX @@ void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
138
*/
139
void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
140
141
+/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
142
+ * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
143
+ *
144
+ * Licensed under the GPLv2/LGPLv3
145
+ */
146
+static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
147
+ uint64_t n0, uint64_t d)
148
+{
149
+#if defined(__x86_64__)
150
+ uint64_t q;
151
+ asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
152
+ return q;
153
+#elif defined(__s390x__) && !defined(__clang__)
154
+ /* Need to use a TImode type to get an even register pair for DLGR. */
155
+ unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
156
+ asm("dlgr %0, %1" : "+r"(n) : "r"(d));
157
+ *r = n >> 64;
158
+ return n;
159
+#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
160
+ /* From Power ISA 2.06, programming note for divdeu. */
161
+ uint64_t q1, q2, Q, r1, r2, R;
162
+ asm("divdeu %0,%2,%4; divdu %1,%3,%4"
163
+ : "=&r"(q1), "=r"(q2)
164
+ : "r"(n1), "r"(n0), "r"(d));
165
+ r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
166
+ r2 = n0 - (q2 * d);
167
+ Q = q1 + q2;
168
+ R = r1 + r2;
169
+ if (R >= d || R < r2) { /* overflow implies R > d */
170
+ Q += 1;
171
+ R -= d;
172
+ }
173
+ *r = R;
174
+ return Q;
175
+#else
176
+ uint64_t d0, d1, q0, q1, r1, r0, m;
177
+
178
+ d0 = (uint32_t)d;
179
+ d1 = d >> 32;
180
+
181
+ r1 = n1 % d1;
182
+ q1 = n1 / d1;
183
+ m = q1 * d0;
184
+ r1 = (r1 << 32) | (n0 >> 32);
185
+ if (r1 < m) {
186
+ q1 -= 1;
187
+ r1 += d;
188
+ if (r1 >= d) {
189
+ if (r1 < m) {
190
+ q1 -= 1;
191
+ r1 += d;
192
+ }
193
+ }
194
+ }
195
+ r1 -= m;
196
+
197
+ r0 = r1 % d1;
198
+ q0 = r1 / d1;
199
+ m = q0 * d0;
200
+ r0 = (r0 << 32) | (uint32_t)n0;
201
+ if (r0 < m) {
202
+ q0 -= 1;
203
+ r0 += d;
204
+ if (r0 >= d) {
205
+ if (r0 < m) {
206
+ q0 -= 1;
207
+ r0 += d;
208
+ }
209
+ }
210
+ }
211
+ r0 -= m;
212
+
213
+ *r = r0;
214
+ return (q1 << 32) | q0;
215
+#endif
216
+}
217
+
218
#endif
219
--
220
2.25.1
221
222
diff view generated by jsdifflib
New patch
1
1
From: Luis Pires <luis.pires@eldorado.org.br>
2
3
These will be used to implement new decimal floating point
4
instructions from Power ISA 3.1.
5
6
The remainder is now returned directly by divu128/divs128,
7
freeing up phigh to receive the high 64 bits of the quotient.
8
9
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-Id: <20211025191154.350831-4-luis.pires@eldorado.org.br>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
include/hw/clock.h | 6 +-
15
include/qemu/host-utils.h | 20 ++++--
16
target/ppc/int_helper.c | 9 +--
17
util/host-utils.c | 133 +++++++++++++++++++++++++-------------
18
4 files changed, 108 insertions(+), 60 deletions(-)
19
20
diff --git a/include/hw/clock.h b/include/hw/clock.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/include/hw/clock.h
23
+++ b/include/hw/clock.h
24
@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
25
if (clk->period == 0) {
26
return 0;
27
}
28
- /*
29
- * BUG: when CONFIG_INT128 is not defined, the current implementation of
30
- * divu128 does not return a valid truncated quotient, so the result will
31
- * be wrong.
32
- */
33
+
34
divu128(&lo, &hi, clk->period);
35
return lo;
36
}
37
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
38
index XXXXXXX..XXXXXXX 100644
39
--- a/include/qemu/host-utils.h
40
+++ b/include/qemu/host-utils.h
41
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
42
return (__int128_t)a * b / c;
43
}
44
45
-static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
46
+static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
47
+ uint64_t divisor)
48
{
49
__uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
50
__uint128_t result = dividend / divisor;
51
+
52
*plow = result;
53
- *phigh = dividend % divisor;
54
+ *phigh = result >> 64;
55
+ return dividend % divisor;
56
}
57
58
-static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
59
+static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
60
+ int64_t divisor)
61
{
62
- __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
63
+ __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
64
__int128_t result = dividend / divisor;
65
+
66
*plow = result;
67
- *phigh = dividend % divisor;
68
+ *phigh = result >> 64;
69
+ return dividend % divisor;
70
}
71
#else
72
void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
73
void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
74
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
75
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
76
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
77
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
78
79
static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
80
{
81
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
82
index XXXXXXX..XXXXXXX 100644
83
--- a/target/ppc/int_helper.c
84
+++ b/target/ppc/int_helper.c
85
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
86
87
uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
88
{
89
- int64_t rt = 0;
90
+ uint64_t rt = 0;
91
int64_t ra = (int64_t)rau;
92
int64_t rb = (int64_t)rbu;
93
int overflow = 0;
94
@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
95
int cr;
96
uint64_t lo_value;
97
uint64_t hi_value;
98
+ uint64_t rem;
99
ppc_avr_t ret = { .u64 = { 0, 0 } };
100
101
if (b->VsrSD(0) < 0) {
102
@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
103
* In that case, we leave r unchanged.
104
*/
105
} else {
106
- divu128(&lo_value, &hi_value, 1000000000000000ULL);
107
+ rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
108
109
- for (i = 1; i < 16; hi_value /= 10, i++) {
110
- bcd_put_digit(&ret, hi_value % 10, i);
111
+ for (i = 1; i < 16; rem /= 10, i++) {
112
+ bcd_put_digit(&ret, rem % 10, i);
113
}
114
115
for (; i < 32; lo_value /= 10, i++) {
116
diff --git a/util/host-utils.c b/util/host-utils.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/util/host-utils.c
119
+++ b/util/host-utils.c
120
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
121
}
122
123
/*
124
- * Unsigned 128-by-64 division. Returns quotient via plow and
125
- * remainder via phigh.
126
- * The result must fit in 64 bits (plow) - otherwise, the result
127
- * is undefined.
128
- * This function will cause a division by zero if passed a zero divisor.
129
+ * Unsigned 128-by-64 division.
130
+ * Returns the remainder.
131
+ * Returns quotient via plow and phigh.
132
+ * Also returns the remainder via the function return value.
133
*/
134
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
135
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
136
{
137
uint64_t dhi = *phigh;
138
uint64_t dlo = *plow;
139
- unsigned i;
140
- uint64_t carry = 0;
141
+ uint64_t rem, dhighest;
142
+ int sh;
143
144
if (divisor == 0 || dhi == 0) {
145
*plow = dlo / divisor;
146
- *phigh = dlo % divisor;
147
+ *phigh = 0;
148
+ return dlo % divisor;
149
} else {
150
+ sh = clz64(divisor);
151
152
- for (i = 0; i < 64; i++) {
153
- carry = dhi >> 63;
154
- dhi = (dhi << 1) | (dlo >> 63);
155
- if (carry || (dhi >= divisor)) {
156
- dhi -= divisor;
157
- carry = 1;
158
- } else {
159
- carry = 0;
160
+ if (dhi < divisor) {
161
+ if (sh != 0) {
162
+ /* normalize the divisor, shifting the dividend accordingly */
163
+ divisor <<= sh;
164
+ dhi = (dhi << sh) | (dlo >> (64 - sh));
165
+ dlo <<= sh;
166
}
167
- dlo = (dlo << 1) | carry;
168
+
169
+ *phigh = 0;
170
+ *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
171
+ } else {
172
+ if (sh != 0) {
173
+ /* normalize the divisor, shifting the dividend accordingly */
174
+ divisor <<= sh;
175
+ dhighest = dhi >> (64 - sh);
176
+ dhi = (dhi << sh) | (dlo >> (64 - sh));
177
+ dlo <<= sh;
178
+
179
+ *phigh = udiv_qrnnd(&dhi, dhighest, dhi, divisor);
180
+ } else {
181
+ /**
182
+ * dhi >= divisor
183
+ * Since the MSB of divisor is set (sh == 0),
184
+ * (dhi - divisor) < divisor
185
+ *
186
+ * Thus, the high part of the quotient is 1, and we can
187
+ * calculate the low part with a single call to udiv_qrnnd
188
+ * after subtracting divisor from dhi
189
+ */
190
+ dhi -= divisor;
191
+ *phigh = 1;
192
+ }
193
+
194
+ *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
195
}
196
197
- *plow = dlo;
198
- *phigh = dhi;
199
+ /*
200
+ * since the dividend/divisor might have been normalized,
201
+ * the remainder might also have to be shifted back
202
+ */
203
+ return rem >> sh;
204
}
205
}
206
207
/*
208
- * Signed 128-by-64 division. Returns quotient via plow and
209
- * remainder via phigh.
210
- * The result must fit in 64 bits (plow) - otherwise, the result
211
- * is undefined.
212
- * This function will cause a division by zero if passed a zero divisor.
213
+ * Signed 128-by-64 division.
214
+ * Returns quotient via plow and phigh.
215
+ * Also returns the remainder via the function return value.
216
*/
217
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
218
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor)
219
{
220
- int sgn_dvdnd = *phigh < 0;
221
- int sgn_divsr = divisor < 0;
222
+ bool neg_quotient = false, neg_remainder = false;
223
+ uint64_t unsig_hi = *phigh, unsig_lo = *plow;
224
+ uint64_t rem;
225
226
- if (sgn_dvdnd) {
227
- *plow = ~(*plow);
228
- *phigh = ~(*phigh);
229
- if (*plow == (int64_t)-1) {
230
+ if (*phigh < 0) {
231
+ neg_quotient = !neg_quotient;
232
+ neg_remainder = !neg_remainder;
233
+
234
+ if (unsig_lo == 0) {
235
+ unsig_hi = -unsig_hi;
236
+ } else {
237
+ unsig_hi = ~unsig_hi;
238
+ unsig_lo = -unsig_lo;
239
+ }
240
+ }
241
+
242
+ if (divisor < 0) {
243
+ neg_quotient = !neg_quotient;
244
+
245
+ divisor = -divisor;
246
+ }
247
+
248
+ rem = divu128(&unsig_lo, &unsig_hi, (uint64_t)divisor);
249
+
250
+ if (neg_quotient) {
251
+ if (unsig_lo == 0) {
252
+ *phigh = -unsig_hi;
253
*plow = 0;
254
- (*phigh)++;
255
- } else {
256
- (*plow)++;
257
- }
258
+ } else {
259
+ *phigh = ~unsig_hi;
260
+ *plow = -unsig_lo;
261
+ }
262
+ } else {
263
+ *phigh = unsig_hi;
264
+ *plow = unsig_lo;
265
}
266
267
- if (sgn_divsr) {
268
- divisor = 0 - divisor;
269
- }
270
-
271
- divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
272
-
273
- if (sgn_dvdnd ^ sgn_divsr) {
274
- *plow = 0 - *plow;
275
+ if (neg_remainder) {
276
+ return -rem;
277
+ } else {
278
+ return rem;
279
}
280
}
281
#endif
282
--
283
2.25.1
284
285
diff view generated by jsdifflib
1
Begin conversion of constraints to pre-validated, read-only entities.
1
From: Luis Pires <luis.pires@eldorado.org.br>
2
To begin, create a simple method by which sets of TCGTargetOpDef
2
3
structures may be declared and used. This simplifies each host's
3
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
4
tcg_target_op_def function and ensures that we have a collected
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
set of constraints.
5
Message-Id: <20211025191154.350831-5-luis.pires@eldorado.org.br>
6
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/tcg-constr.c.inc | 108 +++++++++++++++++++++++++++++++++++++++++++
8
tests/unit/test-div128.c | 197 +++++++++++++++++++++++++++++++++++++++
10
1 file changed, 108 insertions(+)
9
tests/unit/meson.build | 1 +
11
create mode 100644 tcg/tcg-constr.c.inc
10
2 files changed, 198 insertions(+)
12
11
create mode 100644 tests/unit/test-div128.c
13
diff --git a/tcg/tcg-constr.c.inc b/tcg/tcg-constr.c.inc
12
13
diff --git a/tests/unit/test-div128.c b/tests/unit/test-div128.c
14
new file mode 100644
14
new file mode 100644
15
index XXXXXXX..XXXXXXX
15
index XXXXXXX..XXXXXXX
16
--- /dev/null
16
--- /dev/null
17
+++ b/tcg/tcg-constr.c.inc
17
+++ b/tests/unit/test-div128.c
18
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@
19
+/* SPDX-License-Identifier: GPL-2.0-or-later */
20
+/*
19
+/*
21
+ * TCG backend data: operand constaints.
20
+ * Test 128-bit division functions
22
+ * Copyright (c) 2020 Linaro
21
+ *
22
+ * Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
23
+ *
24
+ * This library is free software; you can redistribute it and/or
25
+ * modify it under the terms of the GNU Lesser General Public
26
+ * License as published by the Free Software Foundation; either
27
+ * version 2.1 of the License, or (at your option) any later version.
28
+ *
29
+ * This library is distributed in the hope that it will be useful,
30
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
32
+ * Lesser General Public License for more details.
33
+ *
34
+ * You should have received a copy of the GNU Lesser General Public
35
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23
+ */
36
+ */
24
+
37
+
25
+/*
38
+#include "qemu/osdep.h"
26
+ * Define structures for each set of constraints.
39
+#include "qemu/host-utils.h"
27
+ */
40
+
28
+
41
+typedef struct {
29
+#define C_PFX1(P, A) P##A
42
+ uint64_t high;
30
+#define C_PFX2(P, A, B) P##A##_##B
43
+ uint64_t low;
31
+#define C_PFX3(P, A, B, C) P##A##_##B##_##C
44
+ uint64_t rhigh;
32
+#define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D
45
+ uint64_t rlow;
33
+#define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E
46
+ uint64_t divisor;
34
+#define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F
47
+ uint64_t remainder;
35
+
48
+} test_data_unsigned;
36
+#define C_O0_I1(I1) \
49
+
37
+ static const TCGTargetOpDef C_PFX1(c_o0_i1_, I1) \
50
+typedef struct {
38
+ = { .args_ct_str = { #I1 } };
51
+ int64_t high;
39
+
52
+ uint64_t low;
40
+#define C_O0_I2(I1, I2) \
53
+ int64_t rhigh;
41
+ static const TCGTargetOpDef C_PFX2(c_o0_i2_, I1, I2) \
54
+ uint64_t rlow;
42
+ = { .args_ct_str = { #I1, #I2 } };
55
+ int64_t divisor;
43
+
56
+ int64_t remainder;
44
+#define C_O0_I3(I1, I2, I3) \
57
+} test_data_signed;
45
+ static const TCGTargetOpDef C_PFX3(c_o0_i3_, I1, I2, I3) \
58
+
46
+ = { .args_ct_str = { #I1, #I2, #I3 } };
59
+static const test_data_unsigned test_table_unsigned[] = {
47
+
60
+ /* Dividend fits in 64 bits */
48
+#define C_O0_I4(I1, I2, I3, I4) \
61
+ { 0x0000000000000000ULL, 0x0000000000000000ULL,
49
+ static const TCGTargetOpDef C_PFX4(c_o0_i4_, I1, I2, I3, I4) \
62
+ 0x0000000000000000ULL, 0x0000000000000000ULL,
50
+ = { .args_ct_str = { #I1, #I2, #I3, #I4 } };
63
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
51
+
64
+ { 0x0000000000000000ULL, 0x0000000000000001ULL,
52
+#define C_O1_I1(O1, I1) \
65
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
53
+ static const TCGTargetOpDef C_PFX2(c_o1_i1_, O1, I1) \
66
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
54
+ = { .args_ct_str = { #O1, #I1 } };
67
+ { 0x0000000000000000ULL, 0x0000000000000003ULL,
55
+
68
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
56
+#define C_O1_I2(O1, I1, I2) \
69
+ 0x0000000000000002ULL, 0x0000000000000001ULL},
57
+ static const TCGTargetOpDef C_PFX3(c_o1_i2_, O1, I1, I2) \
70
+ { 0x0000000000000000ULL, 0x8000000000000000ULL,
58
+ = { .args_ct_str = { #O1, #I1, #I2 } };
71
+ 0x0000000000000000ULL, 0x8000000000000000ULL,
59
+
72
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
60
+#define C_O1_I3(O1, I1, I2, I3) \
73
+ { 0x0000000000000000ULL, 0xa000000000000000ULL,
61
+ static const TCGTargetOpDef C_PFX4(c_o1_i3_, O1, I1, I2, I3) \
74
+ 0x0000000000000000ULL, 0x0000000000000002ULL,
62
+ = { .args_ct_str = { #O1, #I1, #I2, #I3 } };
75
+ 0x4000000000000000ULL, 0x2000000000000000ULL},
63
+
76
+ { 0x0000000000000000ULL, 0x8000000000000000ULL,
64
+#define C_O1_I4(O1, I1, I2, I3, I4) \
77
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
65
+ static const TCGTargetOpDef C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) \
78
+ 0x8000000000000000ULL, 0x0000000000000000ULL},
66
+ = { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } };
79
+
67
+
80
+ /* Dividend > 64 bits, with MSB 0 */
68
+#define C_N1_I2(O1, I1, I2) \
81
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
69
+ static const TCGTargetOpDef C_PFX3(c_n1_i2_, O1, I1, I2) \
82
+ 0x123456789abcdefeULL, 0xefedcba987654321ULL,
70
+ = { .args_ct_str = { "&" #O1, #I1, #I2 } };
83
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
71
+
84
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
72
+#define C_O2_I1(O1, O2, I1) \
85
+ 0x0000000000000001ULL, 0x000000000000000dULL,
73
+ static const TCGTargetOpDef C_PFX3(c_o2_i1_, O1, O2, I1) \
86
+ 0x123456789abcdefeULL, 0x03456789abcdf03bULL},
74
+ = { .args_ct_str = { #O1, #O2, #I1 } };
87
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
75
+
88
+ 0x0123456789abcdefULL, 0xeefedcba98765432ULL,
76
+#define C_O2_I2(O1, O2, I1, I2) \
89
+ 0x0000000000000010ULL, 0x0000000000000001ULL},
77
+ static const TCGTargetOpDef C_PFX4(c_o2_i2_, O1, O2, I1, I2) \
90
+
78
+ = { .args_ct_str = { #O1, #O2, #I1, #I2 } };
91
+ /* Dividend > 64 bits, with MSB 1 */
79
+
92
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
80
+#define C_O2_I3(O1, O2, I1, I2, I3) \
93
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
81
+ static const TCGTargetOpDef C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) \
94
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
82
+ = { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } };
95
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
83
+
96
+ 0x0000000000000001ULL, 0x0000000000000000ULL,
84
+#define C_O2_I4(O1, O2, I1, I2, I3, I4) \
97
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
85
+ static const TCGTargetOpDef C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) \
98
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
86
+ = { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } };
99
+ 0x0feeddccbbaa9988ULL, 0x7766554433221100ULL,
87
+
100
+ 0x0000000000000010ULL, 0x000000000000000fULL},
88
+#include "tcg-target-constr.h"
101
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
89
+
102
+ 0x000000000000000eULL, 0x00f0f0f0f0f0f35aULL,
90
+
103
+ 0x123456789abcdefeULL, 0x0f8922bc55ef90c3ULL},
91
+/*
104
+
92
+ * Redefine the macros so that they now reference those structures.
105
+ /**
93
+ * These values should be returned from tcg_target_op_def().
106
+ * Divisor == 64 bits, with MSB 1
94
+ */
107
+ * and high 64 bits of dividend >= divisor
95
+
108
+ * (for testing normalization)
96
+#undef C_O0_I1
109
+ */
97
+#undef C_O0_I2
110
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
98
+#undef C_O0_I3
111
+ 0x0000000000000001ULL, 0x0000000000000000ULL,
99
+#undef C_O0_I4
112
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
100
+#undef C_O1_I1
113
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
101
+#undef C_O1_I2
114
+ 0x0000000000000001ULL, 0xfddbb9977553310aULL,
102
+#undef C_O1_I3
115
+ 0x8000000000000001ULL, 0x78899aabbccddf05ULL},
103
+#undef C_O1_I4
116
+
104
+#undef C_N1_I2
117
+ /* Dividend > 64 bits, divisor almost as big */
105
+#undef C_O2_I1
118
+ { 0x0000000000000001ULL, 0x23456789abcdef01ULL,
106
+#undef C_O2_I2
119
+ 0x0000000000000000ULL, 0x000000000000000fULL,
107
+#undef C_O2_I3
120
+ 0x123456789abcdefeULL, 0x123456789abcde1fULL},
108
+#undef C_O2_I4
121
+};
109
+
122
+
110
+#define C_O0_I1(I1) &C_PFX1(c_o0_i1_, I1)
123
+static const test_data_signed test_table_signed[] = {
111
+#define C_O0_I2(I1, I2) &C_PFX2(c_o0_i2_, I1, I2)
124
+ /* Positive dividend, positive/negative divisors */
112
+#define C_O0_I3(I1, I2, I3) &C_PFX3(c_o0_i3_, I1, I2, I3)
125
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
113
+#define C_O0_I4(I1, I2, I3, I4) &C_PFX4(c_o0_i4_, I1, I2, I3, I4)
126
+ 0x0000000000000000LL, 0x0000000000bc614eULL,
114
+
127
+ 0x0000000000000001LL, 0x0000000000000000LL},
115
+#define C_O1_I1(O1, I1) &C_PFX2(c_o1_i1_, O1, I1)
128
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
116
+#define C_O1_I2(O1, I1, I2) &C_PFX3(c_o1_i2_, O1, I1, I2)
129
+ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
117
+#define C_O1_I3(O1, I1, I2, I3) &C_PFX4(c_o1_i3_, O1, I1, I2, I3)
130
+ 0xffffffffffffffffLL, 0x0000000000000000LL},
118
+#define C_O1_I4(O1, I1, I2, I3, I4) &C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
131
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
119
+
132
+ 0x0000000000000000LL, 0x00000000005e30a7ULL,
120
+#define C_N1_I2(O1, I1, I2) &C_PFX3(c_n1_i2_, O1, I1, I2)
133
+ 0x0000000000000002LL, 0x0000000000000000LL},
121
+
134
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
122
+#define C_O2_I1(O1, O2, I1) &C_PFX3(c_o2_i1_, O1, O2, I1)
135
+ 0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
123
+#define C_O2_I2(O1, O2, I1, I2) &C_PFX4(c_o2_i2_, O1, O2, I1, I2)
136
+ 0xfffffffffffffffeLL, 0x0000000000000000LL},
124
+#define C_O2_I3(O1, O2, I1, I2, I3) &C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
137
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
125
+#define C_O2_I4(O1, O2, I1, I2, I3, I4) \
138
+ 0x0000000000000000LL, 0x0000000000178c29ULL,
126
+ &C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
139
+ 0x0000000000000008LL, 0x0000000000000006LL},
140
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
141
+ 0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
142
+ 0xfffffffffffffff8LL, 0x0000000000000006LL},
143
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
144
+ 0x0000000000000000LL, 0x000000000000550dULL,
145
+ 0x0000000000000237LL, 0x0000000000000183LL},
146
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
147
+ 0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
148
+ 0xfffffffffffffdc9LL, 0x0000000000000183LL},
149
+
150
+ /* Negative dividend, positive/negative divisors */
151
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
152
+ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
153
+ 0x0000000000000001LL, 0x0000000000000000LL},
154
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
155
+ 0x0000000000000000LL, 0x0000000000bc614eULL,
156
+ 0xffffffffffffffffLL, 0x0000000000000000LL},
157
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
158
+ 0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
159
+ 0x0000000000000002LL, 0x0000000000000000LL},
160
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
161
+ 0x0000000000000000LL, 0x00000000005e30a7ULL,
162
+ 0xfffffffffffffffeLL, 0x0000000000000000LL},
163
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
164
+ 0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
165
+ 0x0000000000000008LL, 0xfffffffffffffffaLL},
166
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
167
+ 0x0000000000000000LL, 0x0000000000178c29ULL,
168
+ 0xfffffffffffffff8LL, 0xfffffffffffffffaLL},
169
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
170
+ 0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
171
+ 0x0000000000000237LL, 0xfffffffffffffe7dLL},
172
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
173
+ 0x0000000000000000LL, 0x000000000000550dULL,
174
+ 0xfffffffffffffdc9LL, 0xfffffffffffffe7dLL},
175
+};
176
+
177
+static void test_divu128(void)
178
+{
179
+ int i;
180
+ uint64_t rem;
181
+ test_data_unsigned tmp;
182
+
183
+ for (i = 0; i < ARRAY_SIZE(test_table_unsigned); ++i) {
184
+ tmp = test_table_unsigned[i];
185
+
186
+ rem = divu128(&tmp.low, &tmp.high, tmp.divisor);
187
+ g_assert_cmpuint(tmp.low, ==, tmp.rlow);
188
+ g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
189
+ g_assert_cmpuint(rem, ==, tmp.remainder);
190
+ }
191
+}
192
+
193
+static void test_divs128(void)
194
+{
195
+ int i;
196
+ int64_t rem;
197
+ test_data_signed tmp;
198
+
199
+ for (i = 0; i < ARRAY_SIZE(test_table_signed); ++i) {
200
+ tmp = test_table_signed[i];
201
+
202
+ rem = divs128(&tmp.low, &tmp.high, tmp.divisor);
203
+ g_assert_cmpuint(tmp.low, ==, tmp.rlow);
204
+ g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
205
+ g_assert_cmpuint(rem, ==, tmp.remainder);
206
+ }
207
+}
208
+
209
+int main(int argc, char **argv)
210
+{
211
+ g_test_init(&argc, &argv, NULL);
212
+ g_test_add_func("/host-utils/test_divu128", test_divu128);
213
+ g_test_add_func("/host-utils/test_divs128", test_divs128);
214
+ return g_test_run();
215
+}
216
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
217
index XXXXXXX..XXXXXXX 100644
218
--- a/tests/unit/meson.build
219
+++ b/tests/unit/meson.build
220
@@ -XXX,XX +XXX,XX @@ tests = {
221
# all code tested by test-x86-cpuid is inside topology.h
222
'test-x86-cpuid': [],
223
'test-cutils': [],
224
+ 'test-div128': [],
225
'test-shift128': [],
226
'test-mul64': [],
227
# all code tested by test-int128 is inside int128.h
127
--
228
--
128
2.25.1
229
2.25.1
129
230
130
231
diff view generated by jsdifflib
1
This propagates the extended value of TCGTemp.val that we did before.
1
Prepare for tracking different masks by renaming this one.
2
In addition, it will be required for vector constants.
3
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
7
---
6
tcg/optimize.c | 40 +++++++++++++++++++++-------------------
8
tcg/optimize.c | 142 +++++++++++++++++++++++++------------------------
7
1 file changed, 21 insertions(+), 19 deletions(-)
9
1 file changed, 72 insertions(+), 70 deletions(-)
8
10
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
15
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
14
bool is_const;
15
TCGTemp *prev_copy;
16
TCGTemp *prev_copy;
16
TCGTemp *next_copy;
17
TCGTemp *next_copy;
17
- tcg_target_ulong val;
18
uint64_t val;
18
- tcg_target_ulong mask;
19
- uint64_t mask;
19
+ uint64_t val;
20
+ uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
20
+ uint64_t mask;
21
} TempOptInfo;
21
} TempOptInfo;
22
22
23
static inline TempOptInfo *ts_info(TCGTemp *ts)
23
static inline TempOptInfo *ts_info(TCGTemp *ts)
24
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
24
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
25
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
25
ti->next_copy = ts;
26
ti->prev_copy = ts;
27
ti->is_const = false;
28
- ti->mask = -1;
29
+ ti->z_mask = -1;
26
}
30
}
27
31
28
-static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
32
static void reset_temp(TCGArg arg)
29
+static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, uint64_t val)
33
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
30
{
34
if (ts->kind == TEMP_CONST) {
31
const TCGOpDef *def;
35
ti->is_const = true;
32
TCGOpcode new_op;
36
ti->val = ts->val;
33
- tcg_target_ulong mask;
37
- ti->mask = ts->val;
34
+ uint64_t mask;
38
+ ti->z_mask = ts->val;
35
TempOptInfo *di = arg_info(dst);
39
if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
36
40
/* High bits of a 32-bit quantity are garbage. */
37
def = &tcg_op_defs[op->opc];
41
- ti->mask |= ~0xffffffffull;
42
+ ti->z_mask |= ~0xffffffffull;
43
}
44
} else {
45
ti->is_const = false;
46
- ti->mask = -1;
47
+ ti->z_mask = -1;
48
}
49
}
50
38
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
51
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
39
const TCGOpDef *def;
52
const TCGOpDef *def;
40
TempOptInfo *di;
53
TempOptInfo *di;
41
TempOptInfo *si;
54
TempOptInfo *si;
42
- tcg_target_ulong mask;
55
- uint64_t mask;
43
+ uint64_t mask;
56
+ uint64_t z_mask;
44
TCGOpcode new_op;
57
TCGOpcode new_op;
45
58
46
if (ts_are_copies(dst_ts, src_ts)) {
59
if (ts_are_copies(dst_ts, src_ts)) {
47
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
60
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
61
op->args[0] = dst;
62
op->args[1] = src;
63
64
- mask = si->mask;
65
+ z_mask = si->z_mask;
66
if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
67
/* High bits of the destination are now garbage. */
68
- mask |= ~0xffffffffull;
69
+ z_mask |= ~0xffffffffull;
48
}
70
}
49
}
71
- di->mask = mask;
50
72
+ di->z_mask = z_mask;
51
-static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
73
52
+static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
74
if (src_ts->type == dst_ts->type) {
53
{
75
TempOptInfo *ni = ts_info(si->next_copy);
54
uint64_t l64, h64;
76
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
55
56
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
57
}
77
}
58
}
59
60
-static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
61
+static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y)
62
{
63
const TCGOpDef *def = &tcg_op_defs[op];
64
- TCGArg res = do_constant_folding_2(op, x, y);
65
+ uint64_t res = do_constant_folding_2(op, x, y);
66
if (!(def->flags & TCG_OPF_64BIT)) {
67
res = (int32_t)res;
68
}
69
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
70
static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
71
TCGArg y, TCGCond c)
72
{
73
- tcg_target_ulong xv = arg_info(x)->val;
74
- tcg_target_ulong yv = arg_info(y)->val;
75
+ uint64_t xv = arg_info(x)->val;
76
+ uint64_t yv = arg_info(y)->val;
77
+
78
if (arg_is_const(x) && arg_is_const(y)) {
79
const TCGOpDef *def = &tcg_op_defs[op];
80
tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
81
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
82
infos = tcg_malloc(sizeof(TempOptInfo) * nb_temps);
83
78
84
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
79
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
85
- tcg_target_ulong mask, partmask, affected;
80
- uint64_t mask, partmask, affected, tmp;
86
+ uint64_t mask, partmask, affected, tmp;
81
+ uint64_t z_mask, partmask, affected, tmp;
87
int nb_oargs, nb_iargs, i;
82
int nb_oargs, nb_iargs;
88
- TCGArg tmp;
89
TCGOpcode opc = op->opc;
83
TCGOpcode opc = op->opc;
90
const TCGOpDef *def = &tcg_op_defs[opc];
84
const TCGOpDef *def = &tcg_op_defs[opc];
91
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
92
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
86
93
87
/* Simplify using known-zero bits. Currently only ops with a single
94
CASE_OP_32_64(extract2):
88
output argument is supported. */
95
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
89
- mask = -1;
96
- TCGArg v1 = arg_info(op->args[1])->val;
90
+ z_mask = -1;
97
- TCGArg v2 = arg_info(op->args[2])->val;
91
affected = -1;
98
+ uint64_t v1 = arg_info(op->args[1])->val;
92
switch (opc) {
99
+ uint64_t v2 = arg_info(op->args[2])->val;
93
CASE_OP_32_64(ext8s):
100
+ int shr = op->args[3];
94
- if ((arg_info(op->args[1])->mask & 0x80) != 0) {
101
95
+ if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
102
if (opc == INDEX_op_extract2_i64) {
96
break;
103
- tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3]));
97
}
104
+ tmp = (v1 >> shr) | (v2 << (64 - shr));
98
QEMU_FALLTHROUGH;
105
} else {
99
CASE_OP_32_64(ext8u):
106
- tmp = (int32_t)(((uint32_t)v1 >> op->args[3]) |
100
- mask = 0xff;
107
- ((uint32_t)v2 << (32 - op->args[3])));
101
+ z_mask = 0xff;
108
+ tmp = (int32_t)(((uint32_t)v1 >> shr) |
102
goto and_const;
109
+ ((uint32_t)v2 << (32 - shr)));
103
CASE_OP_32_64(ext16s):
104
- if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
105
+ if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
106
break;
107
}
108
QEMU_FALLTHROUGH;
109
CASE_OP_32_64(ext16u):
110
- mask = 0xffff;
111
+ z_mask = 0xffff;
112
goto and_const;
113
case INDEX_op_ext32s_i64:
114
- if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
115
+ if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
116
break;
117
}
118
QEMU_FALLTHROUGH;
119
case INDEX_op_ext32u_i64:
120
- mask = 0xffffffffU;
121
+ z_mask = 0xffffffffU;
122
goto and_const;
123
124
CASE_OP_32_64(and):
125
- mask = arg_info(op->args[2])->mask;
126
+ z_mask = arg_info(op->args[2])->z_mask;
127
if (arg_is_const(op->args[2])) {
128
and_const:
129
- affected = arg_info(op->args[1])->mask & ~mask;
130
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
131
}
132
- mask = arg_info(op->args[1])->mask & mask;
133
+ z_mask = arg_info(op->args[1])->z_mask & z_mask;
134
break;
135
136
case INDEX_op_ext_i32_i64:
137
- if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
138
+ if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
139
break;
140
}
141
QEMU_FALLTHROUGH;
142
case INDEX_op_extu_i32_i64:
143
/* We do not compute affected as it is a size changing op. */
144
- mask = (uint32_t)arg_info(op->args[1])->mask;
145
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
146
break;
147
148
CASE_OP_32_64(andc):
149
/* Known-zeros does not imply known-ones. Therefore unless
150
op->args[2] is constant, we can't infer anything from it. */
151
if (arg_is_const(op->args[2])) {
152
- mask = ~arg_info(op->args[2])->mask;
153
+ z_mask = ~arg_info(op->args[2])->z_mask;
154
goto and_const;
155
}
156
/* But we certainly know nothing outside args[1] may be set. */
157
- mask = arg_info(op->args[1])->mask;
158
+ z_mask = arg_info(op->args[1])->z_mask;
159
break;
160
161
case INDEX_op_sar_i32:
162
if (arg_is_const(op->args[2])) {
163
tmp = arg_info(op->args[2])->val & 31;
164
- mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
165
+ z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
166
}
167
break;
168
case INDEX_op_sar_i64:
169
if (arg_is_const(op->args[2])) {
170
tmp = arg_info(op->args[2])->val & 63;
171
- mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
172
+ z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
173
}
174
break;
175
176
case INDEX_op_shr_i32:
177
if (arg_is_const(op->args[2])) {
178
tmp = arg_info(op->args[2])->val & 31;
179
- mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
180
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
181
}
182
break;
183
case INDEX_op_shr_i64:
184
if (arg_is_const(op->args[2])) {
185
tmp = arg_info(op->args[2])->val & 63;
186
- mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
187
+ z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
188
}
189
break;
190
191
case INDEX_op_extrl_i64_i32:
192
- mask = (uint32_t)arg_info(op->args[1])->mask;
193
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
194
break;
195
case INDEX_op_extrh_i64_i32:
196
- mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
197
+ z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
198
break;
199
200
CASE_OP_32_64(shl):
201
if (arg_is_const(op->args[2])) {
202
tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
203
- mask = arg_info(op->args[1])->mask << tmp;
204
+ z_mask = arg_info(op->args[1])->z_mask << tmp;
205
}
206
break;
207
208
CASE_OP_32_64(neg):
209
/* Set to 1 all bits to the left of the rightmost. */
210
- mask = -(arg_info(op->args[1])->mask
211
- & -arg_info(op->args[1])->mask);
212
+ z_mask = -(arg_info(op->args[1])->z_mask
213
+ & -arg_info(op->args[1])->z_mask);
214
break;
215
216
CASE_OP_32_64(deposit):
217
- mask = deposit64(arg_info(op->args[1])->mask,
218
- op->args[3], op->args[4],
219
- arg_info(op->args[2])->mask);
220
+ z_mask = deposit64(arg_info(op->args[1])->z_mask,
221
+ op->args[3], op->args[4],
222
+ arg_info(op->args[2])->z_mask);
223
break;
224
225
CASE_OP_32_64(extract):
226
- mask = extract64(arg_info(op->args[1])->mask,
227
- op->args[2], op->args[3]);
228
+ z_mask = extract64(arg_info(op->args[1])->z_mask,
229
+ op->args[2], op->args[3]);
230
if (op->args[2] == 0) {
231
- affected = arg_info(op->args[1])->mask & ~mask;
232
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
233
}
234
break;
235
CASE_OP_32_64(sextract):
236
- mask = sextract64(arg_info(op->args[1])->mask,
237
- op->args[2], op->args[3]);
238
- if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
239
- affected = arg_info(op->args[1])->mask & ~mask;
240
+ z_mask = sextract64(arg_info(op->args[1])->z_mask,
241
+ op->args[2], op->args[3]);
242
+ if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
243
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
244
}
245
break;
246
247
CASE_OP_32_64(or):
248
CASE_OP_32_64(xor):
249
- mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
250
+ z_mask = arg_info(op->args[1])->z_mask
251
+ | arg_info(op->args[2])->z_mask;
252
break;
253
254
case INDEX_op_clz_i32:
255
case INDEX_op_ctz_i32:
256
- mask = arg_info(op->args[2])->mask | 31;
257
+ z_mask = arg_info(op->args[2])->z_mask | 31;
258
break;
259
260
case INDEX_op_clz_i64:
261
case INDEX_op_ctz_i64:
262
- mask = arg_info(op->args[2])->mask | 63;
263
+ z_mask = arg_info(op->args[2])->z_mask | 63;
264
break;
265
266
case INDEX_op_ctpop_i32:
267
- mask = 32 | 31;
268
+ z_mask = 32 | 31;
269
break;
270
case INDEX_op_ctpop_i64:
271
- mask = 64 | 63;
272
+ z_mask = 64 | 63;
273
break;
274
275
CASE_OP_32_64(setcond):
276
case INDEX_op_setcond2_i32:
277
- mask = 1;
278
+ z_mask = 1;
279
break;
280
281
CASE_OP_32_64(movcond):
282
- mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
283
+ z_mask = arg_info(op->args[3])->z_mask
284
+ | arg_info(op->args[4])->z_mask;
285
break;
286
287
CASE_OP_32_64(ld8u):
288
- mask = 0xff;
289
+ z_mask = 0xff;
290
break;
291
CASE_OP_32_64(ld16u):
292
- mask = 0xffff;
293
+ z_mask = 0xffff;
294
break;
295
case INDEX_op_ld32u_i64:
296
- mask = 0xffffffffu;
297
+ z_mask = 0xffffffffu;
298
break;
299
300
CASE_OP_32_64(qemu_ld):
301
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
302
MemOpIdx oi = op->args[nb_oargs + nb_iargs];
303
MemOp mop = get_memop(oi);
304
if (!(mop & MO_SIGN)) {
305
- mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
306
+ z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
110
}
307
}
111
tcg_opt_gen_movi(s, op, op->args[0], tmp);
308
}
112
break;
309
break;
113
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
310
114
break;
311
CASE_OP_32_64(bswap16):
115
}
312
- mask = arg_info(op->args[1])->mask;
116
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
313
- if (mask <= 0xffff) {
117
- tcg_target_ulong tv = arg_info(op->args[3])->val;
314
+ z_mask = arg_info(op->args[1])->z_mask;
118
- tcg_target_ulong fv = arg_info(op->args[4])->val;
315
+ if (z_mask <= 0xffff) {
119
+ uint64_t tv = arg_info(op->args[3])->val;
316
op->args[2] |= TCG_BSWAP_IZ;
120
+ uint64_t fv = arg_info(op->args[4])->val;
317
}
121
TCGCond cond = op->args[5];
318
- mask = bswap16(mask);
122
+
319
+ z_mask = bswap16(z_mask);
123
if (fv == 1 && tv == 0) {
320
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
124
cond = tcg_invert_cond(cond);
321
case TCG_BSWAP_OZ:
125
} else if (!(tv == 1 && fv == 0)) {
322
break;
323
case TCG_BSWAP_OS:
324
- mask = (int16_t)mask;
325
+ z_mask = (int16_t)z_mask;
326
break;
327
default: /* undefined high bits */
328
- mask |= MAKE_64BIT_MASK(16, 48);
329
+ z_mask |= MAKE_64BIT_MASK(16, 48);
330
break;
331
}
332
break;
333
334
case INDEX_op_bswap32_i64:
335
- mask = arg_info(op->args[1])->mask;
336
- if (mask <= 0xffffffffu) {
337
+ z_mask = arg_info(op->args[1])->z_mask;
338
+ if (z_mask <= 0xffffffffu) {
339
op->args[2] |= TCG_BSWAP_IZ;
340
}
341
- mask = bswap32(mask);
342
+ z_mask = bswap32(z_mask);
343
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
344
case TCG_BSWAP_OZ:
345
break;
346
case TCG_BSWAP_OS:
347
- mask = (int32_t)mask;
348
+ z_mask = (int32_t)z_mask;
349
break;
350
default: /* undefined high bits */
351
- mask |= MAKE_64BIT_MASK(32, 32);
352
+ z_mask |= MAKE_64BIT_MASK(32, 32);
353
break;
354
}
355
break;
356
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
357
/* 32-bit ops generate 32-bit results. For the result is zero test
358
below, we can ignore high bits, but for further optimizations we
359
need to record that the high bits contain garbage. */
360
- partmask = mask;
361
+ partmask = z_mask;
362
if (!(def->flags & TCG_OPF_64BIT)) {
363
- mask |= ~(tcg_target_ulong)0xffffffffu;
364
+ z_mask |= ~(tcg_target_ulong)0xffffffffu;
365
partmask &= 0xffffffffu;
366
affected &= 0xffffffffu;
367
}
368
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
369
vs the high word of the input. */
370
do_setcond_high:
371
reset_temp(op->args[0]);
372
- arg_info(op->args[0])->mask = 1;
373
+ arg_info(op->args[0])->z_mask = 1;
374
op->opc = INDEX_op_setcond_i32;
375
op->args[1] = op->args[2];
376
op->args[2] = op->args[4];
377
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
378
}
379
do_setcond_low:
380
reset_temp(op->args[0]);
381
- arg_info(op->args[0])->mask = 1;
382
+ arg_info(op->args[0])->z_mask = 1;
383
op->opc = INDEX_op_setcond_i32;
384
op->args[2] = op->args[3];
385
op->args[3] = op->args[5];
386
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
387
/* Default case: we know nothing about operation (or were unable
388
to compute the operation result) so no propagation is done.
389
We trash everything if the operation is the end of a basic
390
- block, otherwise we only trash the output args. "mask" is
391
+ block, otherwise we only trash the output args. "z_mask" is
392
the non-zero bits mask for the first output arg. */
393
if (def->flags & TCG_OPF_BB_END) {
394
memset(&temps_used, 0, sizeof(temps_used));
395
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
396
/* Save the corresponding known-zero bits mask for the
397
first output argument (only one supported so far). */
398
if (i == 0) {
399
- arg_info(op->args[i])->mask = mask;
400
+ arg_info(op->args[i])->z_mask = z_mask;
401
}
402
}
403
}
126
--
404
--
127
2.25.1
405
2.25.1
128
406
129
407
diff view generated by jsdifflib
1
Provide what will become a larger context for splitting
2
the very large tcg_optimize function.
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
8
---
3
tcg/optimize.c | 108 ++++++++++++++++++++++---------------------------
9
tcg/optimize.c | 77 ++++++++++++++++++++++++++------------------------
4
1 file changed, 49 insertions(+), 59 deletions(-)
10
1 file changed, 40 insertions(+), 37 deletions(-)
5
11
6
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
7
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/optimize.c
14
--- a/tcg/optimize.c
9
+++ b/tcg/optimize.c
15
+++ b/tcg/optimize.c
10
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
16
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
11
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
17
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
12
}
18
} TempOptInfo;
13
19
14
-static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, uint64_t val)
20
+typedef struct OptContext {
15
-{
21
+ TCGTempSet temps_used;
16
- const TCGOpDef *def;
22
+} OptContext;
17
- TCGOpcode new_op;
23
+
18
- uint64_t mask;
24
static inline TempOptInfo *ts_info(TCGTemp *ts)
19
- TempOptInfo *di = arg_info(dst);
25
{
20
-
26
return ts->state_ptr;
21
- def = &tcg_op_defs[op->opc];
27
@@ -XXX,XX +XXX,XX @@ static void reset_temp(TCGArg arg)
22
- if (def->flags & TCG_OPF_VECTOR) {
28
}
23
- new_op = INDEX_op_dupi_vec;
29
24
- } else if (def->flags & TCG_OPF_64BIT) {
30
/* Initialize and activate a temporary. */
25
- new_op = INDEX_op_movi_i64;
31
-static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
26
- } else {
32
+static void init_ts_info(OptContext *ctx, TCGTemp *ts)
27
- new_op = INDEX_op_movi_i32;
33
{
28
- }
34
size_t idx = temp_idx(ts);
29
- op->opc = new_op;
35
TempOptInfo *ti;
30
- /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
36
31
- op->args[0] = dst;
37
- if (test_bit(idx, temps_used->l)) {
32
- op->args[1] = val;
38
+ if (test_bit(idx, ctx->temps_used.l)) {
33
-
39
return;
34
- reset_temp(dst);
40
}
35
- di->is_const = true;
41
- set_bit(idx, temps_used->l);
36
- di->val = val;
42
+ set_bit(idx, ctx->temps_used.l);
37
- mask = val;
43
38
- if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
44
ti = ts->state_ptr;
39
- /* High bits of the destination are now garbage. */
45
if (ti == NULL) {
40
- mask |= ~0xffffffffull;
46
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
41
- }
47
}
42
- di->mask = mask;
48
}
43
-}
49
44
-
50
-static void init_arg_info(TCGTempSet *temps_used, TCGArg arg)
45
static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
51
+static void init_arg_info(OptContext *ctx, TCGArg arg)
46
{
52
{
47
TCGTemp *dst_ts = arg_temp(dst);
53
- init_ts_info(temps_used, arg_temp(arg));
54
+ init_ts_info(ctx, arg_temp(arg));
55
}
56
57
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
48
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
58
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
49
}
59
}
50
}
60
}
51
61
52
+static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
62
-static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
53
+ TCGOp *op, TCGArg dst, uint64_t val)
63
+static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
54
+{
64
TCGOp *op, TCGArg dst, uint64_t val)
55
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
65
{
56
+ TCGType type;
66
const TCGOpDef *def = &tcg_op_defs[op->opc];
57
+ TCGTemp *tv;
67
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
58
+
68
59
+ if (def->flags & TCG_OPF_VECTOR) {
69
/* Convert movi to mov with constant temp. */
60
+ type = TCGOP_VECL(op) + TCG_TYPE_V64;
70
tv = tcg_constant_internal(type, val);
61
+ } else if (def->flags & TCG_OPF_64BIT) {
71
- init_ts_info(temps_used, tv);
62
+ type = TCG_TYPE_I64;
72
+ init_ts_info(ctx, tv);
63
+ } else {
73
tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
64
+ type = TCG_TYPE_I32;
74
}
65
+ }
75
66
+
76
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
67
+ /* Convert movi to mov with constant temp. */
77
{
68
+ tv = tcg_constant_internal(type, val);
78
int nb_temps, nb_globals, i;
69
+ init_ts_info(temps_used, tv);
79
TCGOp *op, *op_next, *prev_mb = NULL;
70
+ tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
80
- TCGTempSet temps_used;
71
+}
81
+ OptContext ctx = {};
72
+
82
73
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
83
/* Array VALS has an element for each temp.
74
{
84
If this temp holds a constant then its value is kept in VALS' element.
75
uint64_t l64, h64;
76
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
77
nb_temps = s->nb_temps;
86
nb_temps = s->nb_temps;
78
nb_globals = s->nb_globals;
87
nb_globals = s->nb_globals;
79
88
80
- bitmap_zero(temps_used.l, nb_temps);
89
- memset(&temps_used, 0, sizeof(temps_used));
81
+ memset(&temps_used, 0, sizeof(temps_used));
82
for (i = 0; i < nb_temps; ++i) {
90
for (i = 0; i < nb_temps; ++i) {
83
s->temps[i].state_ptr = NULL;
91
s->temps[i].state_ptr = NULL;
84
}
92
}
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
93
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
94
for (i = 0; i < nb_oargs + nb_iargs; i++) {
95
TCGTemp *ts = arg_temp(op->args[i]);
96
if (ts) {
97
- init_ts_info(&temps_used, ts);
98
+ init_ts_info(&ctx, ts);
99
}
100
}
101
} else {
102
nb_oargs = def->nb_oargs;
103
nb_iargs = def->nb_iargs;
104
for (i = 0; i < nb_oargs + nb_iargs; i++) {
105
- init_arg_info(&temps_used, op->args[i]);
106
+ init_arg_info(&ctx, op->args[i]);
107
}
108
}
109
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
86
CASE_OP_32_64(rotr):
111
CASE_OP_32_64(rotr):
87
if (arg_is_const(op->args[1])
112
if (arg_is_const(op->args[1])
88
&& arg_info(op->args[1])->val == 0) {
113
&& arg_info(op->args[1])->val == 0) {
89
- tcg_opt_gen_movi(s, op, op->args[0], 0);
114
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
90
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
115
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
91
continue;
116
continue;
92
}
117
}
93
break;
118
break;
94
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
119
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
95
120
96
if (partmask == 0) {
121
if (partmask == 0) {
97
tcg_debug_assert(nb_oargs == 1);
122
tcg_debug_assert(nb_oargs == 1);
98
- tcg_opt_gen_movi(s, op, op->args[0], 0);
123
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
99
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
124
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
100
continue;
125
continue;
101
}
126
}
102
if (affected == 0) {
127
if (affected == 0) {
103
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
128
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
104
CASE_OP_32_64(mulsh):
129
CASE_OP_32_64(mulsh):
105
if (arg_is_const(op->args[2])
130
if (arg_is_const(op->args[2])
106
&& arg_info(op->args[2])->val == 0) {
131
&& arg_info(op->args[2])->val == 0) {
107
- tcg_opt_gen_movi(s, op, op->args[0], 0);
132
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
108
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
133
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
109
continue;
134
continue;
110
}
135
}
111
break;
136
break;
112
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
137
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
113
CASE_OP_32_64_VEC(sub):
138
CASE_OP_32_64_VEC(sub):
114
CASE_OP_32_64_VEC(xor):
139
CASE_OP_32_64_VEC(xor):
115
if (args_are_copies(op->args[1], op->args[2])) {
140
if (args_are_copies(op->args[1], op->args[2])) {
116
- tcg_opt_gen_movi(s, op, op->args[0], 0);
141
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
117
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
142
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
118
continue;
143
continue;
119
}
144
}
120
break;
145
break;
121
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
146
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
122
break;
123
CASE_OP_32_64(movi):
124
case INDEX_op_dupi_vec:
125
- tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);
126
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], op->args[1]);
127
break;
128
129
case INDEX_op_dup_vec:
130
if (arg_is_const(op->args[1])) {
147
if (arg_is_const(op->args[1])) {
131
tmp = arg_info(op->args[1])->val;
148
tmp = arg_info(op->args[1])->val;
132
tmp = dup_const(TCGOP_VECE(op), tmp);
149
tmp = dup_const(TCGOP_VECE(op), tmp);
133
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
150
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
134
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
151
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
135
break;
152
break;
136
}
153
}
137
goto do_default;
154
goto do_default;
138
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
155
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
156
case INDEX_op_dup2_vec:
157
assert(TCG_TARGET_REG_BITS == 32);
139
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
158
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
140
tmp = arg_info(op->args[1])->val;
159
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0],
141
if (tmp == arg_info(op->args[2])->val) {
160
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0],
142
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
161
deposit64(arg_info(op->args[1])->val, 32, 32,
143
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
162
arg_info(op->args[2])->val));
144
break;
163
break;
145
}
146
} else if (args_are_copies(op->args[1], op->args[2])) {
147
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
164
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
148
case INDEX_op_extrh_i64_i32:
165
case INDEX_op_extrh_i64_i32:
149
if (arg_is_const(op->args[1])) {
166
if (arg_is_const(op->args[1])) {
150
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
167
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
151
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
168
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
152
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
169
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
170
break;
171
}
172
goto do_default;
173
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
174
if (arg_is_const(op->args[1])) {
175
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
176
op->args[2]);
177
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
178
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
153
break;
179
break;
154
}
180
}
155
goto do_default;
181
goto do_default;
156
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
182
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
157
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
183
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
158
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
184
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
159
arg_info(op->args[2])->val);
185
arg_info(op->args[2])->val);
160
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
186
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
161
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
187
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
162
break;
188
break;
163
}
189
}
164
goto do_default;
190
goto do_default;
165
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
191
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
166
TCGArg v = arg_info(op->args[1])->val;
192
TCGArg v = arg_info(op->args[1])->val;
167
if (v != 0) {
193
if (v != 0) {
168
tmp = do_constant_folding(opc, v, 0);
194
tmp = do_constant_folding(opc, v, 0);
169
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
195
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
170
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
196
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
171
} else {
197
} else {
172
tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
198
tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
173
}
199
}
174
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
200
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
175
tmp = deposit64(arg_info(op->args[1])->val,
201
tmp = deposit64(arg_info(op->args[1])->val,
176
op->args[3], op->args[4],
202
op->args[3], op->args[4],
177
arg_info(op->args[2])->val);
203
arg_info(op->args[2])->val);
178
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
204
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
179
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
205
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
180
break;
206
break;
181
}
207
}
182
goto do_default;
208
goto do_default;
183
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
209
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
184
if (arg_is_const(op->args[1])) {
210
if (arg_is_const(op->args[1])) {
185
tmp = extract64(arg_info(op->args[1])->val,
211
tmp = extract64(arg_info(op->args[1])->val,
186
op->args[2], op->args[3]);
212
op->args[2], op->args[3]);
187
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
213
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
188
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
214
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
189
break;
215
break;
190
}
216
}
191
goto do_default;
217
goto do_default;
192
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
218
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
193
if (arg_is_const(op->args[1])) {
219
if (arg_is_const(op->args[1])) {
194
tmp = sextract64(arg_info(op->args[1])->val,
220
tmp = sextract64(arg_info(op->args[1])->val,
195
op->args[2], op->args[3]);
221
op->args[2], op->args[3]);
196
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
222
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
197
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
223
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
198
break;
224
break;
199
}
225
}
200
goto do_default;
226
goto do_default;
201
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
227
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
202
tmp = (int32_t)(((uint32_t)v1 >> shr) |
228
tmp = (int32_t)(((uint32_t)v1 >> shr) |
203
((uint32_t)v2 << (32 - shr)));
229
((uint32_t)v2 << (32 - shr)));
204
}
230
}
205
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
231
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
206
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
232
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
207
break;
233
break;
208
}
234
}
209
goto do_default;
235
goto do_default;
210
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
236
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
211
tmp = do_constant_folding_cond(opc, op->args[1],
237
tmp = do_constant_folding_cond(opc, op->args[1],
212
op->args[2], op->args[3]);
238
op->args[2], op->args[3]);
213
if (tmp != 2) {
239
if (tmp != 2) {
214
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
240
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
215
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
241
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
216
break;
242
break;
217
}
243
}
218
goto do_default;
244
goto do_default;
219
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
245
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
220
op->args[1], op->args[2]);
246
op->args[1], op->args[2]);
221
if (tmp != 2) {
247
if (tmp != 2) {
222
if (tmp) {
248
if (tmp) {
223
- bitmap_zero(temps_used.l, nb_temps);
249
- memset(&temps_used, 0, sizeof(temps_used));
224
+ memset(&temps_used, 0, sizeof(temps_used));
250
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
225
op->opc = INDEX_op_br;
251
op->opc = INDEX_op_br;
226
op->args[0] = op->args[3];
252
op->args[0] = op->args[3];
227
} else {
253
} else {
228
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
254
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
229
uint64_t a = ((uint64_t)ah << 32) | al;
230
uint64_t b = ((uint64_t)bh << 32) | bl;
231
TCGArg rl, rh;
232
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
233
+ TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
234
235
if (opc == INDEX_op_add2_i32) {
236
a += b;
237
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
238
255
239
rl = op->args[0];
256
rl = op->args[0];
240
rh = op->args[1];
257
rh = op->args[1];
241
- tcg_opt_gen_movi(s, op, rl, (int32_t)a);
258
- tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a);
242
- tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32));
259
- tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32));
243
+ tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a);
260
+ tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
244
+ tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32));
261
+ tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
245
break;
262
break;
246
}
263
}
247
goto do_default;
264
goto do_default;
248
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
265
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
249
uint32_t b = arg_info(op->args[3])->val;
250
uint64_t r = (uint64_t)a * b;
251
TCGArg rl, rh;
252
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
253
+ TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
254
266
255
rl = op->args[0];
267
rl = op->args[0];
256
rh = op->args[1];
268
rh = op->args[1];
257
- tcg_opt_gen_movi(s, op, rl, (int32_t)r);
269
- tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r);
258
- tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32));
270
- tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32));
259
+ tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r);
271
+ tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
260
+ tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32));
272
+ tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
261
break;
273
break;
262
}
274
}
263
goto do_default;
275
goto do_default;
264
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
276
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
265
if (tmp != 2) {
277
if (tmp != 2) {
266
if (tmp) {
278
if (tmp) {
267
do_brcond_true:
279
do_brcond_true:
268
- bitmap_zero(temps_used.l, nb_temps);
280
- memset(&temps_used, 0, sizeof(temps_used));
269
+ memset(&temps_used, 0, sizeof(temps_used));
281
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
270
op->opc = INDEX_op_br;
282
op->opc = INDEX_op_br;
271
op->args[0] = op->args[5];
283
op->args[0] = op->args[5];
272
} else {
284
} else {
273
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
285
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
274
/* Simplify LT/GE comparisons vs zero to a single compare
286
/* Simplify LT/GE comparisons vs zero to a single compare
275
vs the high word of the input. */
287
vs the high word of the input. */
276
do_brcond_high:
288
do_brcond_high:
277
- bitmap_zero(temps_used.l, nb_temps);
289
- memset(&temps_used, 0, sizeof(temps_used));
278
+ memset(&temps_used, 0, sizeof(temps_used));
290
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
279
op->opc = INDEX_op_brcond_i32;
291
op->opc = INDEX_op_brcond_i32;
280
op->args[0] = op->args[1];
292
op->args[0] = op->args[1];
281
op->args[1] = op->args[3];
293
op->args[1] = op->args[3];
282
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
294
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
283
goto do_default;
295
goto do_default;
284
}
296
}
285
do_brcond_low:
297
do_brcond_low:
286
- bitmap_zero(temps_used.l, nb_temps);
298
- memset(&temps_used, 0, sizeof(temps_used));
287
+ memset(&temps_used, 0, sizeof(temps_used));
299
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
288
op->opc = INDEX_op_brcond_i32;
300
op->opc = INDEX_op_brcond_i32;
289
op->args[1] = op->args[2];
301
op->args[1] = op->args[2];
290
op->args[2] = op->args[4];
302
op->args[2] = op->args[4];
291
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
303
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
292
op->args[5]);
304
op->args[5]);
293
if (tmp != 2) {
305
if (tmp != 2) {
294
do_setcond_const:
306
do_setcond_const:
295
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
307
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
296
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
308
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
297
} else if ((op->args[5] == TCG_COND_LT
309
} else if ((op->args[5] == TCG_COND_LT
298
|| op->args[5] == TCG_COND_GE)
310
|| op->args[5] == TCG_COND_GE)
299
&& arg_is_const(op->args[3])
311
&& arg_is_const(op->args[3])
300
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
312
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
301
block, otherwise we only trash the output args. "mask" is
313
if (!(tcg_call_flags(op)
314
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
315
for (i = 0; i < nb_globals; i++) {
316
- if (test_bit(i, temps_used.l)) {
317
+ if (test_bit(i, ctx.temps_used.l)) {
318
reset_ts(&s->temps[i]);
319
}
320
}
321
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
322
block, otherwise we only trash the output args. "z_mask" is
302
the non-zero bits mask for the first output arg. */
323
the non-zero bits mask for the first output arg. */
303
if (def->flags & TCG_OPF_BB_END) {
324
if (def->flags & TCG_OPF_BB_END) {
304
- bitmap_zero(temps_used.l, nb_temps);
325
- memset(&temps_used, 0, sizeof(temps_used));
305
+ memset(&temps_used, 0, sizeof(temps_used));
326
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
306
} else {
327
} else {
307
do_reset_output:
328
do_reset_output:
308
for (i = 0; i < nb_oargs; i++) {
329
for (i = 0; i < nb_oargs; i++) {
309
--
330
--
310
2.25.1
331
2.25.1
311
332
312
333
diff view generated by jsdifflib
1
These are now completely covered by mov from a
1
Break the final cleanup clause out of the main switch
2
TYPE_CONST temporary.
2
statement. When fully folding an opcode to mov/movi,
3
use "continue" to process the next opcode, else break
4
to fall into the final cleanup.
3
5
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Aleksandar Markovic <aleksandar.qemu.devel@gmail.com>
7
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
10
---
8
include/tcg/tcg-opc.h | 3 ---
11
tcg/optimize.c | 190 ++++++++++++++++++++++++-------------------------
9
tcg/optimize.c | 4 ----
12
1 file changed, 94 insertions(+), 96 deletions(-)
10
tcg/tcg-op-vec.c | 1 -
11
tcg/tcg.c | 18 +-----------------
12
tcg/aarch64/tcg-target.c.inc | 3 ---
13
tcg/arm/tcg-target.c.inc | 1 -
14
tcg/i386/tcg-target.c.inc | 3 ---
15
tcg/mips/tcg-target.c.inc | 2 --
16
tcg/ppc/tcg-target.c.inc | 3 ---
17
tcg/riscv/tcg-target.c.inc | 2 --
18
tcg/s390/tcg-target.c.inc | 2 --
19
tcg/sparc/tcg-target.c.inc | 2 --
20
tcg/tci/tcg-target.c.inc | 2 --
21
13 files changed, 1 insertion(+), 45 deletions(-)
22
13
23
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
24
index XXXXXXX..XXXXXXX 100644
25
--- a/include/tcg/tcg-opc.h
26
+++ b/include/tcg/tcg-opc.h
27
@@ -XXX,XX +XXX,XX @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END)
28
DEF(mb, 0, 0, 1, 0)
29
30
DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT)
31
-DEF(movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT)
32
DEF(setcond_i32, 1, 2, 1, 0)
33
DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32))
34
/* load/store */
35
@@ -XXX,XX +XXX,XX @@ DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32))
36
DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32))
37
38
DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
39
-DEF(movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
40
DEF(setcond_i64, 1, 2, 1, IMPL64)
41
DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64))
42
/* load/store */
43
@@ -XXX,XX +XXX,XX @@ DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1,
44
#define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)
45
46
DEF(mov_vec, 1, 1, 0, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
47
-DEF(dupi_vec, 1, 0, 1, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
48
49
DEF(dup_vec, 1, 1, 0, IMPLVEC)
50
DEF(dup2_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_REG_BITS == 32))
51
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
52
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
53
--- a/tcg/optimize.c
16
--- a/tcg/optimize.c
54
+++ b/tcg/optimize.c
17
+++ b/tcg/optimize.c
55
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
18
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
19
switch (opc) {
56
CASE_OP_32_64_VEC(mov):
20
CASE_OP_32_64_VEC(mov):
57
tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
21
tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
58
break;
59
- CASE_OP_32_64(movi):
60
- case INDEX_op_dupi_vec:
61
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], op->args[1]);
62
- break;
22
- break;
23
+ continue;
63
24
64
case INDEX_op_dup_vec:
25
case INDEX_op_dup_vec:
65
if (arg_is_const(op->args[1])) {
26
if (arg_is_const(op->args[1])) {
66
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
27
tmp = arg_info(op->args[1])->val;
67
index XXXXXXX..XXXXXXX 100644
28
tmp = dup_const(TCGOP_VECE(op), tmp);
68
--- a/tcg/tcg-op-vec.c
29
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
69
+++ b/tcg/tcg-op-vec.c
30
- break;
70
@@ -XXX,XX +XXX,XX @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list,
31
+ continue;
71
case INDEX_op_xor_vec:
32
}
72
case INDEX_op_mov_vec:
33
- goto do_default;
73
case INDEX_op_dup_vec:
34
+ break;
74
- case INDEX_op_dupi_vec:
35
75
case INDEX_op_dup2_vec:
36
case INDEX_op_dup2_vec:
76
case INDEX_op_ld_vec:
37
assert(TCG_TARGET_REG_BITS == 32);
77
case INDEX_op_st_vec:
38
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
78
diff --git a/tcg/tcg.c b/tcg/tcg.c
39
tcg_opt_gen_movi(s, &ctx, op, op->args[0],
79
index XXXXXXX..XXXXXXX 100644
40
deposit64(arg_info(op->args[1])->val, 32, 32,
80
--- a/tcg/tcg.c
41
arg_info(op->args[2])->val));
81
+++ b/tcg/tcg.c
42
- break;
82
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
43
+ continue;
83
return TCG_TARGET_HAS_goto_ptr;
44
} else if (args_are_copies(op->args[1], op->args[2])) {
84
45
op->opc = INDEX_op_dup_vec;
85
case INDEX_op_mov_i32:
46
TCGOP_VECE(op) = MO_32;
86
- case INDEX_op_movi_i32:
47
nb_iargs = 1;
87
case INDEX_op_setcond_i32:
48
}
88
case INDEX_op_brcond_i32:
49
- goto do_default;
89
case INDEX_op_ld8u_i32:
50
+ break;
90
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
51
91
return TCG_TARGET_REG_BITS == 32;
52
CASE_OP_32_64(not):
92
53
CASE_OP_32_64(neg):
93
case INDEX_op_mov_i64:
54
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
94
- case INDEX_op_movi_i64:
55
if (arg_is_const(op->args[1])) {
95
case INDEX_op_setcond_i64:
56
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
96
case INDEX_op_brcond_i64:
57
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
97
case INDEX_op_ld8u_i64:
58
- break;
98
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
59
+ continue;
99
60
}
100
case INDEX_op_mov_vec:
61
- goto do_default;
101
case INDEX_op_dup_vec:
62
+ break;
102
- case INDEX_op_dupi_vec:
63
103
case INDEX_op_dupm_vec:
64
CASE_OP_32_64(bswap16):
104
case INDEX_op_ld_vec:
65
CASE_OP_32_64(bswap32):
105
case INDEX_op_st_vec:
66
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
106
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
67
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
107
}
68
op->args[2]);
108
69
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
109
/*
70
- break;
110
- * Specialized code generation for INDEX_op_movi_*.
71
+ continue;
111
+ * Specialized code generation for INDEX_op_mov_* with a constant.
72
}
112
*/
73
- goto do_default;
113
static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
74
+ break;
114
tcg_target_ulong val, TCGLifeData arg_life,
75
115
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
76
CASE_OP_32_64(add):
116
}
77
CASE_OP_32_64(sub):
117
}
78
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
118
79
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
119
-static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
80
arg_info(op->args[2])->val);
120
-{
81
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
121
- TCGTemp *ots = arg_temp(op->args[0]);
82
- break;
122
- tcg_target_ulong val = op->args[1];
83
+ continue;
123
-
84
}
124
- tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
85
- goto do_default;
125
-}
86
+ break;
126
-
87
127
/*
88
CASE_OP_32_64(clz):
128
* Specialized code generation for INDEX_op_mov_*.
89
CASE_OP_32_64(ctz):
129
*/
90
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
130
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
91
} else {
131
case INDEX_op_mov_vec:
92
tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
132
tcg_reg_alloc_mov(s, op);
93
}
94
- break;
95
+ continue;
96
}
97
- goto do_default;
98
+ break;
99
100
CASE_OP_32_64(deposit):
101
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
102
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
103
op->args[3], op->args[4],
104
arg_info(op->args[2])->val);
105
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
106
- break;
107
+ continue;
108
}
109
- goto do_default;
110
+ break;
111
112
CASE_OP_32_64(extract):
113
if (arg_is_const(op->args[1])) {
114
tmp = extract64(arg_info(op->args[1])->val,
115
op->args[2], op->args[3]);
116
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
117
- break;
118
+ continue;
119
}
120
- goto do_default;
121
+ break;
122
123
CASE_OP_32_64(sextract):
124
if (arg_is_const(op->args[1])) {
125
tmp = sextract64(arg_info(op->args[1])->val,
126
op->args[2], op->args[3]);
127
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
128
- break;
129
+ continue;
130
}
131
- goto do_default;
132
+ break;
133
134
CASE_OP_32_64(extract2):
135
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
136
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
137
((uint32_t)v2 << (32 - shr)));
138
}
139
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
140
- break;
141
+ continue;
142
}
143
- goto do_default;
144
+ break;
145
146
CASE_OP_32_64(setcond):
147
tmp = do_constant_folding_cond(opc, op->args[1],
148
op->args[2], op->args[3]);
149
if (tmp != 2) {
150
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
151
- break;
152
+ continue;
153
}
154
- goto do_default;
155
+ break;
156
157
CASE_OP_32_64(brcond):
158
tmp = do_constant_folding_cond(opc, op->args[0],
159
op->args[1], op->args[2]);
160
- if (tmp != 2) {
161
- if (tmp) {
162
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
163
- op->opc = INDEX_op_br;
164
- op->args[0] = op->args[3];
165
- } else {
166
- tcg_op_remove(s, op);
167
- }
168
+ switch (tmp) {
169
+ case 0:
170
+ tcg_op_remove(s, op);
171
+ continue;
172
+ case 1:
173
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
174
+ op->opc = opc = INDEX_op_br;
175
+ op->args[0] = op->args[3];
176
break;
177
}
178
- goto do_default;
179
+ break;
180
181
CASE_OP_32_64(movcond):
182
tmp = do_constant_folding_cond(opc, op->args[1],
183
op->args[2], op->args[5]);
184
if (tmp != 2) {
185
tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
186
- break;
187
+ continue;
188
}
189
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
190
uint64_t tv = arg_info(op->args[3])->val;
191
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
192
if (fv == 1 && tv == 0) {
193
cond = tcg_invert_cond(cond);
194
} else if (!(tv == 1 && fv == 0)) {
195
- goto do_default;
196
+ break;
197
}
198
op->args[3] = cond;
199
op->opc = opc = (opc == INDEX_op_movcond_i32
200
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
201
: INDEX_op_setcond_i64);
202
nb_iargs = 2;
203
}
204
- goto do_default;
205
+ break;
206
207
case INDEX_op_add2_i32:
208
case INDEX_op_sub2_i32:
209
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
210
rh = op->args[1];
211
tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
212
tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
213
- break;
214
+ continue;
215
}
216
- goto do_default;
217
+ break;
218
219
case INDEX_op_mulu2_i32:
220
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
221
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
222
rh = op->args[1];
223
tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
224
tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
225
- break;
226
+ continue;
227
}
228
- goto do_default;
229
+ break;
230
231
case INDEX_op_brcond2_i32:
232
tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
233
op->args[4]);
234
- if (tmp != 2) {
235
- if (tmp) {
236
- do_brcond_true:
237
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
238
- op->opc = INDEX_op_br;
239
- op->args[0] = op->args[5];
240
- } else {
241
+ if (tmp == 0) {
242
do_brcond_false:
243
- tcg_op_remove(s, op);
244
- }
245
- } else if ((op->args[4] == TCG_COND_LT
246
- || op->args[4] == TCG_COND_GE)
247
- && arg_is_const(op->args[2])
248
- && arg_info(op->args[2])->val == 0
249
- && arg_is_const(op->args[3])
250
- && arg_info(op->args[3])->val == 0) {
251
+ tcg_op_remove(s, op);
252
+ continue;
253
+ }
254
+ if (tmp == 1) {
255
+ do_brcond_true:
256
+ op->opc = opc = INDEX_op_br;
257
+ op->args[0] = op->args[5];
258
+ break;
259
+ }
260
+ if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
261
+ && arg_is_const(op->args[2])
262
+ && arg_info(op->args[2])->val == 0
263
+ && arg_is_const(op->args[3])
264
+ && arg_info(op->args[3])->val == 0) {
265
/* Simplify LT/GE comparisons vs zero to a single compare
266
vs the high word of the input. */
267
do_brcond_high:
268
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
269
- op->opc = INDEX_op_brcond_i32;
270
+ op->opc = opc = INDEX_op_brcond_i32;
271
op->args[0] = op->args[1];
272
op->args[1] = op->args[3];
273
op->args[2] = op->args[4];
274
op->args[3] = op->args[5];
275
- } else if (op->args[4] == TCG_COND_EQ) {
276
+ break;
277
+ }
278
+ if (op->args[4] == TCG_COND_EQ) {
279
/* Simplify EQ comparisons where one of the pairs
280
can be simplified. */
281
tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
282
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
283
if (tmp == 0) {
284
goto do_brcond_false;
285
} else if (tmp != 1) {
286
- goto do_default;
287
+ break;
288
}
289
do_brcond_low:
290
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
291
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
292
op->args[1] = op->args[2];
293
op->args[2] = op->args[4];
294
op->args[3] = op->args[5];
295
- } else if (op->args[4] == TCG_COND_NE) {
296
+ break;
297
+ }
298
+ if (op->args[4] == TCG_COND_NE) {
299
/* Simplify NE comparisons where one of the pairs
300
can be simplified. */
301
tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
302
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
303
} else if (tmp == 1) {
304
goto do_brcond_true;
305
}
306
- goto do_default;
307
- } else {
308
- goto do_default;
309
}
133
break;
310
break;
134
- case INDEX_op_movi_i32:
311
135
- case INDEX_op_movi_i64:
312
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
136
- case INDEX_op_dupi_vec:
313
if (tmp != 2) {
137
- tcg_reg_alloc_movi(s, op);
314
do_setcond_const:
315
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
316
- } else if ((op->args[5] == TCG_COND_LT
317
- || op->args[5] == TCG_COND_GE)
318
- && arg_is_const(op->args[3])
319
- && arg_info(op->args[3])->val == 0
320
- && arg_is_const(op->args[4])
321
- && arg_info(op->args[4])->val == 0) {
322
+ continue;
323
+ }
324
+ if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
325
+ && arg_is_const(op->args[3])
326
+ && arg_info(op->args[3])->val == 0
327
+ && arg_is_const(op->args[4])
328
+ && arg_info(op->args[4])->val == 0) {
329
/* Simplify LT/GE comparisons vs zero to a single compare
330
vs the high word of the input. */
331
do_setcond_high:
332
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
333
op->args[1] = op->args[2];
334
op->args[2] = op->args[4];
335
op->args[3] = op->args[5];
336
- } else if (op->args[5] == TCG_COND_EQ) {
337
+ break;
338
+ }
339
+ if (op->args[5] == TCG_COND_EQ) {
340
/* Simplify EQ comparisons where one of the pairs
341
can be simplified. */
342
tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
343
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
344
if (tmp == 0) {
345
goto do_setcond_high;
346
} else if (tmp != 1) {
347
- goto do_default;
348
+ break;
349
}
350
do_setcond_low:
351
reset_temp(op->args[0]);
352
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
353
op->opc = INDEX_op_setcond_i32;
354
op->args[2] = op->args[3];
355
op->args[3] = op->args[5];
356
- } else if (op->args[5] == TCG_COND_NE) {
357
+ break;
358
+ }
359
+ if (op->args[5] == TCG_COND_NE) {
360
/* Simplify NE comparisons where one of the pairs
361
can be simplified. */
362
tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
363
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
364
} else if (tmp == 1) {
365
goto do_setcond_const;
366
}
367
- goto do_default;
368
- } else {
369
- goto do_default;
370
}
371
break;
372
373
- case INDEX_op_call:
374
- if (!(tcg_call_flags(op)
375
+ default:
376
+ break;
377
+ }
378
+
379
+ /* Some of the folding above can change opc. */
380
+ opc = op->opc;
381
+ def = &tcg_op_defs[opc];
382
+ if (def->flags & TCG_OPF_BB_END) {
383
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
384
+ } else {
385
+ if (opc == INDEX_op_call &&
386
+ !(tcg_call_flags(op)
387
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
388
for (i = 0; i < nb_globals; i++) {
389
if (test_bit(i, ctx.temps_used.l)) {
390
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
391
}
392
}
393
}
394
- goto do_reset_output;
395
396
- default:
397
- do_default:
398
- /* Default case: we know nothing about operation (or were unable
399
- to compute the operation result) so no propagation is done.
400
- We trash everything if the operation is the end of a basic
401
- block, otherwise we only trash the output args. "z_mask" is
402
- the non-zero bits mask for the first output arg. */
403
- if (def->flags & TCG_OPF_BB_END) {
404
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
405
- } else {
406
- do_reset_output:
407
- for (i = 0; i < nb_oargs; i++) {
408
- reset_temp(op->args[i]);
409
- /* Save the corresponding known-zero bits mask for the
410
- first output argument (only one supported so far). */
411
- if (i == 0) {
412
- arg_info(op->args[i])->z_mask = z_mask;
413
- }
414
+ for (i = 0; i < nb_oargs; i++) {
415
+ reset_temp(op->args[i]);
416
+ /* Save the corresponding known-zero bits mask for the
417
+ first output argument (only one supported so far). */
418
+ if (i == 0) {
419
+ arg_info(op->args[i])->z_mask = z_mask;
420
}
421
}
138
- break;
422
- break;
139
case INDEX_op_dup_vec:
423
}
140
tcg_reg_alloc_dup(s, op);
424
141
break;
425
/* Eliminate duplicate and redundant fence instructions. */
142
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
143
index XXXXXXX..XXXXXXX 100644
144
--- a/tcg/aarch64/tcg-target.c.inc
145
+++ b/tcg/aarch64/tcg-target.c.inc
146
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
147
148
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
149
case INDEX_op_mov_i64:
150
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
151
- case INDEX_op_movi_i64:
152
case INDEX_op_call: /* Always emitted via tcg_out_call. */
153
default:
154
g_assert_not_reached();
155
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
156
break;
157
158
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
159
- case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
160
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
161
default:
162
g_assert_not_reached();
163
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
164
index XXXXXXX..XXXXXXX 100644
165
--- a/tcg/arm/tcg-target.c.inc
166
+++ b/tcg/arm/tcg-target.c.inc
167
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
168
break;
169
170
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
171
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
172
case INDEX_op_call: /* Always emitted via tcg_out_call. */
173
default:
174
tcg_abort();
175
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
176
index XXXXXXX..XXXXXXX 100644
177
--- a/tcg/i386/tcg-target.c.inc
178
+++ b/tcg/i386/tcg-target.c.inc
179
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
180
break;
181
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
182
case INDEX_op_mov_i64:
183
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
184
- case INDEX_op_movi_i64:
185
case INDEX_op_call: /* Always emitted via tcg_out_call. */
186
default:
187
tcg_abort();
188
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
189
break;
190
191
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
192
- case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
193
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
194
default:
195
g_assert_not_reached();
196
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
197
index XXXXXXX..XXXXXXX 100644
198
--- a/tcg/mips/tcg-target.c.inc
199
+++ b/tcg/mips/tcg-target.c.inc
200
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
201
break;
202
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
203
case INDEX_op_mov_i64:
204
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
205
- case INDEX_op_movi_i64:
206
case INDEX_op_call: /* Always emitted via tcg_out_call. */
207
default:
208
tcg_abort();
209
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
210
index XXXXXXX..XXXXXXX 100644
211
--- a/tcg/ppc/tcg-target.c.inc
212
+++ b/tcg/ppc/tcg-target.c.inc
213
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
214
215
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
216
case INDEX_op_mov_i64:
217
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
218
- case INDEX_op_movi_i64:
219
case INDEX_op_call: /* Always emitted via tcg_out_call. */
220
default:
221
tcg_abort();
222
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
223
return;
224
225
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
226
- case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
227
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
228
default:
229
g_assert_not_reached();
230
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
231
index XXXXXXX..XXXXXXX 100644
232
--- a/tcg/riscv/tcg-target.c.inc
233
+++ b/tcg/riscv/tcg-target.c.inc
234
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
235
236
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
237
case INDEX_op_mov_i64:
238
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
239
- case INDEX_op_movi_i64:
240
case INDEX_op_call: /* Always emitted via tcg_out_call. */
241
default:
242
g_assert_not_reached();
243
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
244
index XXXXXXX..XXXXXXX 100644
245
--- a/tcg/s390/tcg-target.c.inc
246
+++ b/tcg/s390/tcg-target.c.inc
247
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
248
249
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
250
case INDEX_op_mov_i64:
251
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
252
- case INDEX_op_movi_i64:
253
case INDEX_op_call: /* Always emitted via tcg_out_call. */
254
default:
255
tcg_abort();
256
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
257
index XXXXXXX..XXXXXXX 100644
258
--- a/tcg/sparc/tcg-target.c.inc
259
+++ b/tcg/sparc/tcg-target.c.inc
260
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
261
262
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
263
case INDEX_op_mov_i64:
264
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
265
- case INDEX_op_movi_i64:
266
case INDEX_op_call: /* Always emitted via tcg_out_call. */
267
default:
268
tcg_abort();
269
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
270
index XXXXXXX..XXXXXXX 100644
271
--- a/tcg/tci/tcg-target.c.inc
272
+++ b/tcg/tci/tcg-target.c.inc
273
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
274
break;
275
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
276
case INDEX_op_mov_i64:
277
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
278
- case INDEX_op_movi_i64:
279
case INDEX_op_call: /* Always emitted via tcg_out_call. */
280
default:
281
tcg_abort();
282
--
426
--
283
2.25.1
427
2.25.1
284
428
285
429
diff view generated by jsdifflib
1
While we don't store more than tcg_target_long in TCGTemp,
1
Adjust the interface to take the OptContext parameter instead
2
we shouldn't be limited to that for code generation. We will
2
of TCGContext or both.
3
be able to use this for INDEX_op_dup2_vec with 2 constants.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Also pass along the minimal vece that may be said to apply
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
to the constant. This allows some simplification in the
7
various backends.
8
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
7
---
11
tcg/tcg.c | 31 +++++++++++++++++++++++++-----
8
tcg/optimize.c | 67 +++++++++++++++++++++++++-------------------------
12
tcg/aarch64/tcg-target.c.inc | 12 ++++++------
9
1 file changed, 34 insertions(+), 33 deletions(-)
13
tcg/i386/tcg-target.c.inc | 22 ++++++++++++---------
10
14
tcg/ppc/tcg-target.c.inc | 37 +++++++++++++++++++++++-------------
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
4 files changed, 69 insertions(+), 33 deletions(-)
16
17
diff --git a/tcg/tcg.c b/tcg/tcg.c
18
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
19
--- a/tcg/tcg.c
13
--- a/tcg/optimize.c
20
+++ b/tcg/tcg.c
14
+++ b/tcg/optimize.c
21
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
15
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
22
TCGReg dst, TCGReg src);
16
} TempOptInfo;
23
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
17
24
TCGReg dst, TCGReg base, intptr_t offset);
18
typedef struct OptContext {
25
-static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
19
+ TCGContext *tcg;
26
- TCGReg dst, tcg_target_long arg);
20
TCGTempSet temps_used;
27
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
21
} OptContext;
28
+ TCGReg dst, int64_t arg);
22
29
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
23
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
30
unsigned vece, const TCGArg *args,
24
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
31
const int *const_args);
25
}
32
@@ -XXX,XX +XXX,XX @@ static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
26
27
-static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
28
+static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
33
{
29
{
34
g_assert_not_reached();
30
TCGTemp *dst_ts = arg_temp(dst);
35
}
31
TCGTemp *src_ts = arg_temp(src);
36
-static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
32
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
37
- TCGReg dst, tcg_target_long arg)
33
TCGOpcode new_op;
38
+static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
34
39
+ TCGReg dst, int64_t arg)
35
if (ts_are_copies(dst_ts, src_ts)) {
40
{
36
- tcg_op_remove(s, op);
41
g_assert_not_reached();
37
+ tcg_op_remove(ctx->tcg, op);
42
}
43
@@ -XXX,XX +XXX,XX @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
44
if (ts->type <= TCG_TYPE_I64) {
45
tcg_out_movi(s, ts->type, reg, ts->val);
46
} else {
47
- tcg_out_dupi_vec(s, ts->type, reg, ts->val);
48
+ uint64_t val = ts->val;
49
+ MemOp vece = MO_64;
50
+
51
+ /*
52
+ * Find the minimal vector element that matches the constant.
53
+ * The targets will, in general, have to do this search anyway,
54
+ * do this generically.
55
+ */
56
+ if (TCG_TARGET_REG_BITS == 32) {
57
+ val = dup_const(MO_32, val);
58
+ vece = MO_32;
59
+ }
60
+ if (val == dup_const(MO_8, val)) {
61
+ vece = MO_8;
62
+ } else if (val == dup_const(MO_16, val)) {
63
+ vece = MO_16;
64
+ } else if (TCG_TARGET_REG_BITS == 64 &&
65
+ val == dup_const(MO_32, val)) {
66
+ vece = MO_32;
67
+ }
68
+
69
+ tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
70
}
71
ts->mem_coherent = 0;
72
break;
73
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
74
index XXXXXXX..XXXXXXX 100644
75
--- a/tcg/aarch64/tcg-target.c.inc
76
+++ b/tcg/aarch64/tcg-target.c.inc
77
@@ -XXX,XX +XXX,XX @@ static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
78
tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
79
}
80
81
-static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
82
- TCGReg rd, tcg_target_long v64)
83
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
84
+ TCGReg rd, int64_t v64)
85
{
86
bool q = type == TCG_TYPE_V128;
87
int cmode, imm8, i;
88
89
/* Test all bytes equal first. */
90
- if (v64 == dup_const(MO_8, v64)) {
91
+ if (vece == MO_8) {
92
imm8 = (uint8_t)v64;
93
tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
94
return;
95
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
96
* cannot find an expansion there's no point checking a larger
97
* width because we already know by replication it cannot match.
98
*/
99
- if (v64 == dup_const(MO_16, v64)) {
100
+ if (vece == MO_16) {
101
uint16_t v16 = v64;
102
103
if (is_shimm16(v16, &cmode, &imm8)) {
104
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
105
tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
106
tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
107
return;
108
- } else if (v64 == dup_const(MO_32, v64)) {
109
+ } else if (vece == MO_32) {
110
uint32_t v32 = v64;
111
uint32_t n32 = ~v32;
112
113
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
114
tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
115
break;
116
}
117
- tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
118
+ tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
119
a2 = TCG_VEC_TMP;
120
}
121
insn = cmp_insn[cond];
122
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
123
index XXXXXXX..XXXXXXX 100644
124
--- a/tcg/i386/tcg-target.c.inc
125
+++ b/tcg/i386/tcg-target.c.inc
126
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
127
return true;
128
}
129
130
-static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
131
- TCGReg ret, tcg_target_long arg)
132
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
133
+ TCGReg ret, int64_t arg)
134
{
135
int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
136
137
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
138
return;
38
return;
139
}
39
}
140
40
141
- if (TCG_TARGET_REG_BITS == 64) {
41
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
142
+ if (TCG_TARGET_REG_BITS == 32 && vece < MO_64) {
143
+ if (have_avx2) {
144
+ tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
145
+ } else {
146
+ tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
147
+ }
148
+ new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
149
+ } else {
150
if (type == TCG_TYPE_V64) {
151
tcg_out_vex_modrm_pool(s, OPC_MOVQ_VqWq, ret);
152
} else if (have_avx2) {
153
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
154
} else {
155
tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret);
156
}
157
- new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
158
- } else {
159
- if (have_avx2) {
160
- tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
161
+ if (TCG_TARGET_REG_BITS == 64) {
162
+ new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
163
} else {
164
- tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
165
+ new_pool_l2(s, R_386_32, s->code_ptr - 4, 0, arg, arg >> 32);
166
}
167
- new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
168
}
42
}
169
}
43
}
170
44
171
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
45
-static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
172
index XXXXXXX..XXXXXXX 100644
46
- TCGOp *op, TCGArg dst, uint64_t val)
173
--- a/tcg/ppc/tcg-target.c.inc
47
+static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
174
+++ b/tcg/ppc/tcg-target.c.inc
48
+ TCGArg dst, uint64_t val)
175
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
49
{
176
}
50
const TCGOpDef *def = &tcg_op_defs[op->opc];
51
TCGType type;
52
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
53
/* Convert movi to mov with constant temp. */
54
tv = tcg_constant_internal(type, val);
55
init_ts_info(ctx, tv);
56
- tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
57
+ tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
177
}
58
}
178
59
179
-static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
60
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
180
- tcg_target_long val)
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
181
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
182
+ TCGReg ret, int64_t val)
183
{
62
{
184
uint32_t load_insn;
63
int nb_temps, nb_globals, i;
185
int rel, low;
64
TCGOp *op, *op_next, *prev_mb = NULL;
186
intptr_t add;
65
- OptContext ctx = {};
187
66
+ OptContext ctx = { .tcg = s };
188
- low = (int8_t)val;
67
189
- if (low >= -16 && low < 16) {
68
/* Array VALS has an element for each temp.
190
- if (val == (tcg_target_long)dup_const(MO_8, low)) {
69
If this temp holds a constant then its value is kept in VALS' element.
191
+ switch (vece) {
70
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
192
+ case MO_8:
71
CASE_OP_32_64(rotr):
193
+ low = (int8_t)val;
72
if (arg_is_const(op->args[1])
194
+ if (low >= -16 && low < 16) {
73
&& arg_info(op->args[1])->val == 0) {
195
tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
74
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
196
return;
75
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
76
continue;
77
}
78
break;
79
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
80
if (!arg_is_const(op->args[1])
81
&& arg_is_const(op->args[2])
82
&& arg_info(op->args[2])->val == 0) {
83
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
84
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
85
continue;
86
}
87
break;
88
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
89
if (!arg_is_const(op->args[1])
90
&& arg_is_const(op->args[2])
91
&& arg_info(op->args[2])->val == -1) {
92
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
93
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
94
continue;
95
}
96
break;
97
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
98
99
if (partmask == 0) {
100
tcg_debug_assert(nb_oargs == 1);
101
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
102
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
103
continue;
197
}
104
}
198
- if (val == (tcg_target_long)dup_const(MO_16, low)) {
105
if (affected == 0) {
199
+ if (have_isa_3_00) {
106
tcg_debug_assert(nb_oargs == 1);
200
+ tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
107
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
201
+ return;
108
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
202
+ }
109
continue;
203
+ break;
204
+
205
+ case MO_16:
206
+ low = (int16_t)val;
207
+ if (low >= -16 && low < 16) {
208
tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
209
return;
210
}
110
}
211
- if (val == (tcg_target_long)dup_const(MO_32, low)) {
111
212
+ break;
112
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
213
+
113
CASE_OP_32_64(mulsh):
214
+ case MO_32:
114
if (arg_is_const(op->args[2])
215
+ low = (int32_t)val;
115
&& arg_info(op->args[2])->val == 0) {
216
+ if (low >= -16 && low < 16) {
116
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
217
tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
117
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
218
return;
118
continue;
219
}
119
}
220
- }
120
break;
221
- if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) {
121
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
222
- tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
122
CASE_OP_32_64_VEC(or):
223
- return;
123
CASE_OP_32_64_VEC(and):
224
+ break;
124
if (args_are_copies(op->args[1], op->args[2])) {
225
}
125
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
226
126
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
227
/*
127
continue;
228
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
128
}
229
if (TCG_TARGET_REG_BITS == 64) {
129
break;
230
new_pool_label(s, val, rel, s->code_ptr, add);
130
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
231
} else {
131
CASE_OP_32_64_VEC(sub):
232
- new_pool_l2(s, rel, s->code_ptr, add, val, val);
132
CASE_OP_32_64_VEC(xor):
233
+ new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
133
if (args_are_copies(op->args[1], op->args[2])) {
234
}
134
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
235
} else {
135
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
236
load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
136
continue;
237
if (TCG_TARGET_REG_BITS == 64) {
137
}
238
new_pool_l2(s, rel, s->code_ptr, add, val, val);
138
break;
239
} else {
139
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
240
- new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
140
allocator where needed and possible. Also detect copies. */
241
+ new_pool_l4(s, rel, s->code_ptr, add,
141
switch (opc) {
242
+ val >> 32, val, val >> 32, val);
142
CASE_OP_32_64_VEC(mov):
243
}
143
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
244
}
144
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
245
145
continue;
146
147
case INDEX_op_dup_vec:
148
if (arg_is_const(op->args[1])) {
149
tmp = arg_info(op->args[1])->val;
150
tmp = dup_const(TCGOP_VECE(op), tmp);
151
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
152
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
153
continue;
154
}
155
break;
156
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
157
case INDEX_op_dup2_vec:
158
assert(TCG_TARGET_REG_BITS == 32);
159
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
160
- tcg_opt_gen_movi(s, &ctx, op, op->args[0],
161
+ tcg_opt_gen_movi(&ctx, op, op->args[0],
162
deposit64(arg_info(op->args[1])->val, 32, 32,
163
arg_info(op->args[2])->val));
164
continue;
165
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
166
case INDEX_op_extrh_i64_i32:
167
if (arg_is_const(op->args[1])) {
168
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
169
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
170
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
171
continue;
172
}
173
break;
174
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
175
if (arg_is_const(op->args[1])) {
176
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
177
op->args[2]);
178
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
179
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
180
continue;
181
}
182
break;
183
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
184
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
185
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
186
arg_info(op->args[2])->val);
187
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
188
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
189
continue;
190
}
191
break;
192
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
193
TCGArg v = arg_info(op->args[1])->val;
194
if (v != 0) {
195
tmp = do_constant_folding(opc, v, 0);
196
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
197
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
198
} else {
199
- tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
200
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
201
}
202
continue;
203
}
204
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
205
tmp = deposit64(arg_info(op->args[1])->val,
206
op->args[3], op->args[4],
207
arg_info(op->args[2])->val);
208
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
209
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
210
continue;
211
}
212
break;
213
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
214
if (arg_is_const(op->args[1])) {
215
tmp = extract64(arg_info(op->args[1])->val,
216
op->args[2], op->args[3]);
217
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
218
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
219
continue;
220
}
221
break;
222
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
223
if (arg_is_const(op->args[1])) {
224
tmp = sextract64(arg_info(op->args[1])->val,
225
op->args[2], op->args[3]);
226
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
227
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
228
continue;
229
}
230
break;
231
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
232
tmp = (int32_t)(((uint32_t)v1 >> shr) |
233
((uint32_t)v2 << (32 - shr)));
234
}
235
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
236
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
237
continue;
238
}
239
break;
240
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
241
tmp = do_constant_folding_cond(opc, op->args[1],
242
op->args[2], op->args[3]);
243
if (tmp != 2) {
244
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
245
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
246
continue;
247
}
248
break;
249
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
250
tmp = do_constant_folding_cond(opc, op->args[1],
251
op->args[2], op->args[5]);
252
if (tmp != 2) {
253
- tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
254
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
255
continue;
256
}
257
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
258
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
259
260
rl = op->args[0];
261
rh = op->args[1];
262
- tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
263
- tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
264
+ tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
265
+ tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
266
continue;
267
}
268
break;
269
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
270
271
rl = op->args[0];
272
rh = op->args[1];
273
- tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
274
- tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
275
+ tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
276
+ tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
277
continue;
278
}
279
break;
280
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
281
op->args[5]);
282
if (tmp != 2) {
283
do_setcond_const:
284
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
285
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
286
continue;
287
}
288
if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
246
--
289
--
247
2.25.1
290
2.25.1
248
291
249
292
diff view generated by jsdifflib
New patch
1
This will expose the variable to subroutines that
2
will be broken out of tcg_optimize.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/optimize.c | 11 ++++++-----
10
1 file changed, 6 insertions(+), 5 deletions(-)
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
17
18
typedef struct OptContext {
19
TCGContext *tcg;
20
+ TCGOp *prev_mb;
21
TCGTempSet temps_used;
22
} OptContext;
23
24
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
25
void tcg_optimize(TCGContext *s)
26
{
27
int nb_temps, nb_globals, i;
28
- TCGOp *op, *op_next, *prev_mb = NULL;
29
+ TCGOp *op, *op_next;
30
OptContext ctx = { .tcg = s };
31
32
/* Array VALS has an element for each temp.
33
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
34
}
35
36
/* Eliminate duplicate and redundant fence instructions. */
37
- if (prev_mb) {
38
+ if (ctx.prev_mb) {
39
switch (opc) {
40
case INDEX_op_mb:
41
/* Merge two barriers of the same type into one,
42
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
43
* barrier. This is stricter than specified but for
44
* the purposes of TCG is better than not optimizing.
45
*/
46
- prev_mb->args[0] |= op->args[0];
47
+ ctx.prev_mb->args[0] |= op->args[0];
48
tcg_op_remove(s, op);
49
break;
50
51
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
52
case INDEX_op_qemu_st_i64:
53
case INDEX_op_call:
54
/* Opcodes that touch guest memory stop the optimization. */
55
- prev_mb = NULL;
56
+ ctx.prev_mb = NULL;
57
break;
58
}
59
} else if (opc == INDEX_op_mb) {
60
- prev_mb = op;
61
+ ctx.prev_mb = op;
62
}
63
}
64
}
65
--
66
2.25.1
67
68
diff view generated by jsdifflib
1
Do not allocate a large block for indexing. Instead, allocate
1
There was no real reason for calls to have separate code here.
2
for each temporary as they are seen.
2
Unify init for calls vs non-calls using the call path, which
3
3
handles TCG_CALL_DUMMY_ARG.
4
In general, this will use less memory, if we consider that most
5
TBs do not touch every target register. This also allows us to
6
allocate TempOptInfo for new temps created during optimization.
7
4
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
9
---
11
tcg/optimize.c | 60 ++++++++++++++++++++++++++++----------------------
10
tcg/optimize.c | 25 +++++++++++--------------
12
1 file changed, 34 insertions(+), 26 deletions(-)
11
1 file changed, 11 insertions(+), 14 deletions(-)
13
12
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
15
--- a/tcg/optimize.c
17
+++ b/tcg/optimize.c
16
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ static void reset_temp(TCGArg arg)
17
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
19
}
20
21
/* Initialize and activate a temporary. */
22
-static void init_ts_info(TempOptInfo *infos,
23
- TCGTempSet *temps_used, TCGTemp *ts)
24
+static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
25
{
26
size_t idx = temp_idx(ts);
27
- if (!test_bit(idx, temps_used->l)) {
28
- TempOptInfo *ti = &infos[idx];
29
+ TempOptInfo *ti;
30
31
+ if (test_bit(idx, temps_used->l)) {
32
+ return;
33
+ }
34
+ set_bit(idx, temps_used->l);
35
+
36
+ ti = ts->state_ptr;
37
+ if (ti == NULL) {
38
+ ti = tcg_malloc(sizeof(TempOptInfo));
39
ts->state_ptr = ti;
40
- ti->next_copy = ts;
41
- ti->prev_copy = ts;
42
- if (ts->kind == TEMP_CONST) {
43
- ti->is_const = true;
44
- ti->val = ti->mask = ts->val;
45
- if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
46
- /* High bits of a 32-bit quantity are garbage. */
47
- ti->mask |= ~0xffffffffull;
48
- }
49
- } else {
50
- ti->is_const = false;
51
- ti->mask = -1;
52
+ }
53
+
54
+ ti->next_copy = ts;
55
+ ti->prev_copy = ts;
56
+ if (ts->kind == TEMP_CONST) {
57
+ ti->is_const = true;
58
+ ti->val = ts->val;
59
+ ti->mask = ts->val;
60
+ if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
61
+ /* High bits of a 32-bit quantity are garbage. */
62
+ ti->mask |= ~0xffffffffull;
63
}
64
- set_bit(idx, temps_used->l);
65
+ } else {
66
+ ti->is_const = false;
67
+ ti->mask = -1;
68
}
18
}
69
}
19
}
70
20
71
-static void init_arg_info(TempOptInfo *infos,
21
-static void init_arg_info(OptContext *ctx, TCGArg arg)
72
- TCGTempSet *temps_used, TCGArg arg)
22
-{
73
+static void init_arg_info(TCGTempSet *temps_used, TCGArg arg)
23
- init_ts_info(ctx, arg_temp(arg));
24
-}
25
-
26
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
74
{
27
{
75
- init_ts_info(infos, temps_used, arg_temp(arg));
28
TCGTemp *i, *g, *l;
76
+ init_ts_info(temps_used, arg_temp(arg));
29
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
30
return false;
77
}
31
}
78
32
79
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
33
+static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
80
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
34
+{
35
+ for (int i = 0; i < nb_args; i++) {
36
+ TCGTemp *ts = arg_temp(op->args[i]);
37
+ if (ts) {
38
+ init_ts_info(ctx, ts);
39
+ }
40
+ }
41
+}
42
+
81
/* Propagate constants and copies, fold constant expressions. */
43
/* Propagate constants and copies, fold constant expressions. */
82
void tcg_optimize(TCGContext *s)
44
void tcg_optimize(TCGContext *s)
83
{
45
{
84
- int nb_temps, nb_globals;
85
+ int nb_temps, nb_globals, i;
86
TCGOp *op, *op_next, *prev_mb = NULL;
87
- TempOptInfo *infos;
88
TCGTempSet temps_used;
89
90
/* Array VALS has an element for each temp.
91
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
46
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
92
47
if (opc == INDEX_op_call) {
93
nb_temps = s->nb_temps;
48
nb_oargs = TCGOP_CALLO(op);
94
nb_globals = s->nb_globals;
49
nb_iargs = TCGOP_CALLI(op);
95
+
50
- for (i = 0; i < nb_oargs + nb_iargs; i++) {
96
bitmap_zero(temps_used.l, nb_temps);
51
- TCGTemp *ts = arg_temp(op->args[i]);
97
- infos = tcg_malloc(sizeof(TempOptInfo) * nb_temps);
52
- if (ts) {
98
+ for (i = 0; i < nb_temps; ++i) {
53
- init_ts_info(&ctx, ts);
99
+ s->temps[i].state_ptr = NULL;
54
- }
100
+ }
55
- }
101
102
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
103
uint64_t mask, partmask, affected, tmp;
104
- int nb_oargs, nb_iargs, i;
105
+ int nb_oargs, nb_iargs;
106
TCGOpcode opc = op->opc;
107
const TCGOpDef *def = &tcg_op_defs[opc];
108
109
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
110
for (i = 0; i < nb_oargs + nb_iargs; i++) {
111
TCGTemp *ts = arg_temp(op->args[i]);
112
if (ts) {
113
- init_ts_info(infos, &temps_used, ts);
114
+ init_ts_info(&temps_used, ts);
115
}
116
}
117
} else {
56
} else {
118
nb_oargs = def->nb_oargs;
57
nb_oargs = def->nb_oargs;
119
nb_iargs = def->nb_iargs;
58
nb_iargs = def->nb_iargs;
120
for (i = 0; i < nb_oargs + nb_iargs; i++) {
59
- for (i = 0; i < nb_oargs + nb_iargs; i++) {
121
- init_arg_info(infos, &temps_used, op->args[i]);
60
- init_arg_info(&ctx, op->args[i]);
122
+ init_arg_info(&temps_used, op->args[i]);
61
- }
123
}
124
}
62
}
125
63
+ init_arguments(&ctx, op, nb_oargs + nb_iargs);
64
65
/* Do copy propagation */
66
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
126
--
67
--
127
2.25.1
68
2.25.1
128
69
129
70
diff view generated by jsdifflib
1
Continue splitting tcg_optimize.
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
7
---
3
tcg/sparc/tcg-target-constr.h | 27 +++++++++++++
8
tcg/optimize.c | 22 ++++++++++++++--------
4
tcg/sparc/tcg-target.c.inc | 74 ++++++++++++-----------------------
9
1 file changed, 14 insertions(+), 8 deletions(-)
5
2 files changed, 51 insertions(+), 50 deletions(-)
6
create mode 100644 tcg/sparc/tcg-target-constr.h
7
10
8
diff --git a/tcg/sparc/tcg-target-constr.h b/tcg/sparc/tcg-target-constr.h
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
new file mode 100644
10
index XXXXXXX..XXXXXXX
11
--- /dev/null
12
+++ b/tcg/sparc/tcg-target-constr.h
13
@@ -XXX,XX +XXX,XX @@
14
+/* SPDX-License-Identifier: GPL-2.0-or-later */
15
+/*
16
+ * Sparc target-specific operand constaints.
17
+ * Copyright (c) 2020 Linaro
18
+ */
19
+
20
+C_O0_I1(r)
21
+C_O0_I2(rZ, r)
22
+C_O0_I2(RZ, r)
23
+C_O0_I2(rZ, rJ)
24
+C_O0_I2(RZ, RJ)
25
+C_O0_I2(sZ, A)
26
+C_O0_I2(SZ, A)
27
+C_O1_I1(r, A)
28
+C_O1_I1(R, A)
29
+C_O1_I1(r, r)
30
+C_O1_I1(r, R)
31
+C_O1_I1(R, r)
32
+C_O1_I1(R, R)
33
+C_O1_I2(R, R, R)
34
+C_O1_I2(r, rZ, rJ)
35
+C_O1_I2(R, RZ, RJ)
36
+C_O1_I4(r, rZ, rJ, rI, 0)
37
+C_O1_I4(R, RZ, RJ, RI, 0)
38
+C_O2_I2(r, r, rZ, rJ)
39
+C_O2_I4(R, R, RZ, RZ, RJ, RI)
40
+C_O2_I4(r, r, rZ, rZ, rJ, rJ)
41
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
42
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
43
--- a/tcg/sparc/tcg-target.c.inc
13
--- a/tcg/optimize.c
44
+++ b/tcg/sparc/tcg-target.c.inc
14
+++ b/tcg/optimize.c
45
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
15
@@ -XXX,XX +XXX,XX @@ static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
46
}
16
}
47
}
17
}
48
18
49
+/* Define all constraint sets. */
19
+static void copy_propagate(OptContext *ctx, TCGOp *op,
50
+#include "../tcg-constr.c.inc"
20
+ int nb_oargs, int nb_iargs)
21
+{
22
+ TCGContext *s = ctx->tcg;
51
+
23
+
52
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
24
+ for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
25
+ TCGTemp *ts = arg_temp(op->args[i]);
26
+ if (ts && ts_is_copy(ts)) {
27
+ op->args[i] = temp_arg(find_better_copy(s, ts));
28
+ }
29
+ }
30
+}
31
+
32
/* Propagate constants and copies, fold constant expressions. */
33
void tcg_optimize(TCGContext *s)
53
{
34
{
54
- static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
35
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
55
- static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
36
nb_iargs = def->nb_iargs;
56
- static const TCGTargetOpDef R_r = { .args_ct_str = { "R", "r" } };
37
}
57
- static const TCGTargetOpDef r_R = { .args_ct_str = { "r", "R" } };
38
init_arguments(&ctx, op, nb_oargs + nb_iargs);
58
- static const TCGTargetOpDef R_R = { .args_ct_str = { "R", "R" } };
59
- static const TCGTargetOpDef r_A = { .args_ct_str = { "r", "A" } };
60
- static const TCGTargetOpDef R_A = { .args_ct_str = { "R", "A" } };
61
- static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
62
- static const TCGTargetOpDef RZ_r = { .args_ct_str = { "RZ", "r" } };
63
- static const TCGTargetOpDef sZ_A = { .args_ct_str = { "sZ", "A" } };
64
- static const TCGTargetOpDef SZ_A = { .args_ct_str = { "SZ", "A" } };
65
- static const TCGTargetOpDef rZ_rJ = { .args_ct_str = { "rZ", "rJ" } };
66
- static const TCGTargetOpDef RZ_RJ = { .args_ct_str = { "RZ", "RJ" } };
67
- static const TCGTargetOpDef R_R_R = { .args_ct_str = { "R", "R", "R" } };
68
- static const TCGTargetOpDef r_rZ_rJ
69
- = { .args_ct_str = { "r", "rZ", "rJ" } };
70
- static const TCGTargetOpDef R_RZ_RJ
71
- = { .args_ct_str = { "R", "RZ", "RJ" } };
72
- static const TCGTargetOpDef r_r_rZ_rJ
73
- = { .args_ct_str = { "r", "r", "rZ", "rJ" } };
74
- static const TCGTargetOpDef movc_32
75
- = { .args_ct_str = { "r", "rZ", "rJ", "rI", "0" } };
76
- static const TCGTargetOpDef movc_64
77
- = { .args_ct_str = { "R", "RZ", "RJ", "RI", "0" } };
78
- static const TCGTargetOpDef add2_32
79
- = { .args_ct_str = { "r", "r", "rZ", "rZ", "rJ", "rJ" } };
80
- static const TCGTargetOpDef add2_64
81
- = { .args_ct_str = { "R", "R", "RZ", "RZ", "RJ", "RI" } };
82
-
39
-
83
switch (op) {
40
- /* Do copy propagation */
84
case INDEX_op_goto_ptr:
41
- for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
85
- return &r;
42
- TCGTemp *ts = arg_temp(op->args[i]);
86
+ return C_O0_I1(r);
43
- if (ts && ts_is_copy(ts)) {
87
44
- op->args[i] = temp_arg(find_better_copy(s, ts));
88
case INDEX_op_ld8u_i32:
45
- }
89
case INDEX_op_ld8s_i32:
46
- }
90
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
47
+ copy_propagate(&ctx, op, nb_oargs, nb_iargs);
91
case INDEX_op_ld_i32:
48
92
case INDEX_op_neg_i32:
49
/* For commutative operations make constant second argument */
93
case INDEX_op_not_i32:
50
switch (opc) {
94
- return &r_r;
95
+ return C_O1_I1(r, r);
96
97
case INDEX_op_st8_i32:
98
case INDEX_op_st16_i32:
99
case INDEX_op_st_i32:
100
- return &rZ_r;
101
+ return C_O0_I2(rZ, r);
102
103
case INDEX_op_add_i32:
104
case INDEX_op_mul_i32:
105
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
106
case INDEX_op_shr_i32:
107
case INDEX_op_sar_i32:
108
case INDEX_op_setcond_i32:
109
- return &r_rZ_rJ;
110
+ return C_O1_I2(r, rZ, rJ);
111
112
case INDEX_op_brcond_i32:
113
- return &rZ_rJ;
114
+ return C_O0_I2(rZ, rJ);
115
case INDEX_op_movcond_i32:
116
- return &movc_32;
117
+ return C_O1_I4(r, rZ, rJ, rI, 0);
118
case INDEX_op_add2_i32:
119
case INDEX_op_sub2_i32:
120
- return &add2_32;
121
+ return C_O2_I4(r, r, rZ, rZ, rJ, rJ);
122
case INDEX_op_mulu2_i32:
123
case INDEX_op_muls2_i32:
124
- return &r_r_rZ_rJ;
125
+ return C_O2_I2(r, r, rZ, rJ);
126
127
case INDEX_op_ld8u_i64:
128
case INDEX_op_ld8s_i64:
129
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
130
case INDEX_op_ld_i64:
131
case INDEX_op_ext_i32_i64:
132
case INDEX_op_extu_i32_i64:
133
- return &R_r;
134
+ return C_O1_I1(R, r);
135
136
case INDEX_op_st8_i64:
137
case INDEX_op_st16_i64:
138
case INDEX_op_st32_i64:
139
case INDEX_op_st_i64:
140
- return &RZ_r;
141
+ return C_O0_I2(RZ, r);
142
143
case INDEX_op_add_i64:
144
case INDEX_op_mul_i64:
145
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
146
case INDEX_op_shr_i64:
147
case INDEX_op_sar_i64:
148
case INDEX_op_setcond_i64:
149
- return &R_RZ_RJ;
150
+ return C_O1_I2(R, RZ, RJ);
151
152
case INDEX_op_neg_i64:
153
case INDEX_op_not_i64:
154
case INDEX_op_ext32s_i64:
155
case INDEX_op_ext32u_i64:
156
- return &R_R;
157
+ return C_O1_I1(R, R);
158
159
case INDEX_op_extrl_i64_i32:
160
case INDEX_op_extrh_i64_i32:
161
- return &r_R;
162
+ return C_O1_I1(r, R);
163
164
case INDEX_op_brcond_i64:
165
- return &RZ_RJ;
166
+ return C_O0_I2(RZ, RJ);
167
case INDEX_op_movcond_i64:
168
- return &movc_64;
169
+ return C_O1_I4(R, RZ, RJ, RI, 0);
170
case INDEX_op_add2_i64:
171
case INDEX_op_sub2_i64:
172
- return &add2_64;
173
+ return C_O2_I4(R, R, RZ, RZ, RJ, RI);
174
case INDEX_op_muluh_i64:
175
- return &R_R_R;
176
+ return C_O1_I2(R, R, R);
177
178
case INDEX_op_qemu_ld_i32:
179
- return &r_A;
180
+ return C_O1_I1(r, A);
181
case INDEX_op_qemu_ld_i64:
182
- return &R_A;
183
+ return C_O1_I1(R, A);
184
case INDEX_op_qemu_st_i32:
185
- return &sZ_A;
186
+ return C_O0_I2(sZ, A);
187
case INDEX_op_qemu_st_i64:
188
- return &SZ_A;
189
+ return C_O0_I2(SZ, A);
190
191
default:
192
return NULL;
193
--
51
--
194
2.25.1
52
2.25.1
195
53
196
54
diff view generated by jsdifflib
1
Calls are special in that they have a variable number
2
of arguments, and need to be able to clobber globals.
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
7
---
3
tcg/s390/tcg-target-constr.h | 24 +++++++
8
tcg/optimize.c | 63 ++++++++++++++++++++++++++++++++------------------
4
tcg/s390/tcg-target.c.inc | 119 +++++++++++++++--------------------
9
1 file changed, 41 insertions(+), 22 deletions(-)
5
2 files changed, 76 insertions(+), 67 deletions(-)
6
create mode 100644 tcg/s390/tcg-target-constr.h
7
10
8
diff --git a/tcg/s390/tcg-target-constr.h b/tcg/s390/tcg-target-constr.h
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
new file mode 100644
10
index XXXXXXX..XXXXXXX
11
--- /dev/null
12
+++ b/tcg/s390/tcg-target-constr.h
13
@@ -XXX,XX +XXX,XX @@
14
+/* SPDX-License-Identifier: GPL-2.0-or-later */
15
+/*
16
+ * S390 target-specific operand constaints.
17
+ * Copyright (c) 2020 Linaro
18
+ */
19
+
20
+C_O0_I1(r)
21
+C_O0_I2(L, L)
22
+C_O0_I2(r, r)
23
+C_O0_I2(r, ri)
24
+C_O1_I1(r, L)
25
+C_O1_I1(r, r)
26
+C_O1_I2(r, 0, ri)
27
+C_O1_I2(r, 0, rI)
28
+C_O1_I2(r, 0, rJ)
29
+C_O1_I2(r, r, ri)
30
+C_O1_I2(r, rZ, r)
31
+C_O1_I4(r, r, ri, r, 0)
32
+C_O1_I4(r, r, ri, rI, 0)
33
+C_O2_I2(b, a, 0, r)
34
+C_O2_I3(b, a, 0, 1, r)
35
+C_O2_I4(r, r, 0, 1, rA, r)
36
+C_O2_I4(r, r, 0, 1, ri, r)
37
+C_O2_I4(r, r, 0, 1, r, r)
38
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
39
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
40
--- a/tcg/s390/tcg-target.c.inc
13
--- a/tcg/optimize.c
41
+++ b/tcg/s390/tcg-target.c.inc
14
+++ b/tcg/optimize.c
42
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
15
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
43
}
16
}
44
}
17
}
45
18
46
+/* Define all constraint sets. */
19
+static bool fold_call(OptContext *ctx, TCGOp *op)
47
+#include "../tcg-constr.c.inc"
20
+{
21
+ TCGContext *s = ctx->tcg;
22
+ int nb_oargs = TCGOP_CALLO(op);
23
+ int nb_iargs = TCGOP_CALLI(op);
24
+ int flags, i;
48
+
25
+
49
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
26
+ init_arguments(ctx, op, nb_oargs + nb_iargs);
27
+ copy_propagate(ctx, op, nb_oargs, nb_iargs);
28
+
29
+ /* If the function reads or writes globals, reset temp data. */
30
+ flags = tcg_call_flags(op);
31
+ if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
32
+ int nb_globals = s->nb_globals;
33
+
34
+ for (i = 0; i < nb_globals; i++) {
35
+ if (test_bit(i, ctx->temps_used.l)) {
36
+ reset_ts(&ctx->tcg->temps[i]);
37
+ }
38
+ }
39
+ }
40
+
41
+ /* Reset temp data for outputs. */
42
+ for (i = 0; i < nb_oargs; i++) {
43
+ reset_temp(op->args[i]);
44
+ }
45
+
46
+ /* Stop optimizing MB across calls. */
47
+ ctx->prev_mb = NULL;
48
+ return true;
49
+}
50
+
51
/* Propagate constants and copies, fold constant expressions. */
52
void tcg_optimize(TCGContext *s)
50
{
53
{
51
- static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
54
- int nb_temps, nb_globals, i;
52
- static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
55
+ int nb_temps, i;
53
- static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
56
TCGOp *op, *op_next;
54
- static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } };
57
OptContext ctx = { .tcg = s };
55
- static const TCGTargetOpDef r_ri = { .args_ct_str = { "r", "ri" } };
58
56
- static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
59
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
57
- static const TCGTargetOpDef r_0_ri = { .args_ct_str = { "r", "0", "ri" } };
60
available through the doubly linked circular list. */
58
- static const TCGTargetOpDef r_0_rI = { .args_ct_str = { "r", "0", "rI" } };
61
59
- static const TCGTargetOpDef r_0_rJ = { .args_ct_str = { "r", "0", "rJ" } };
62
nb_temps = s->nb_temps;
60
- static const TCGTargetOpDef a2_r
63
- nb_globals = s->nb_globals;
61
- = { .args_ct_str = { "r", "r", "0", "1", "r", "r" } };
62
- static const TCGTargetOpDef a2_ri
63
- = { .args_ct_str = { "r", "r", "0", "1", "ri", "r" } };
64
- static const TCGTargetOpDef a2_rA
65
- = { .args_ct_str = { "r", "r", "0", "1", "rA", "r" } };
66
-
64
-
67
switch (op) {
65
for (i = 0; i < nb_temps; ++i) {
68
case INDEX_op_goto_ptr:
66
s->temps[i].state_ptr = NULL;
69
- return &r;
67
}
70
+ return C_O0_I1(r);
68
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
71
69
uint64_t z_mask, partmask, affected, tmp;
72
case INDEX_op_ld8u_i32:
70
int nb_oargs, nb_iargs;
73
case INDEX_op_ld8u_i64:
71
TCGOpcode opc = op->opc;
74
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
72
- const TCGOpDef *def = &tcg_op_defs[opc];
75
case INDEX_op_ld32u_i64:
73
+ const TCGOpDef *def;
76
case INDEX_op_ld32s_i64:
74
77
case INDEX_op_ld_i64:
75
- /* Count the arguments, and initialize the temps that are
78
+ return C_O1_I1(r, r);
76
- going to be used */
77
+ /* Calls are special. */
78
if (opc == INDEX_op_call) {
79
- nb_oargs = TCGOP_CALLO(op);
80
- nb_iargs = TCGOP_CALLI(op);
81
- } else {
82
- nb_oargs = def->nb_oargs;
83
- nb_iargs = def->nb_iargs;
84
+ fold_call(&ctx, op);
85
+ continue;
86
}
79
+
87
+
80
case INDEX_op_st8_i32:
88
+ def = &tcg_op_defs[opc];
81
case INDEX_op_st8_i64:
89
+ nb_oargs = def->nb_oargs;
82
case INDEX_op_st16_i32:
90
+ nb_iargs = def->nb_iargs;
83
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
91
init_arguments(&ctx, op, nb_oargs + nb_iargs);
84
case INDEX_op_st_i32:
92
copy_propagate(&ctx, op, nb_oargs, nb_iargs);
85
case INDEX_op_st32_i64:
93
86
case INDEX_op_st_i64:
94
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
87
- return &r_r;
95
if (def->flags & TCG_OPF_BB_END) {
88
+ return C_O0_I2(r, r);
96
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
89
97
} else {
90
case INDEX_op_add_i32:
98
- if (opc == INDEX_op_call &&
91
case INDEX_op_add_i64:
99
- !(tcg_call_flags(op)
92
- return &r_r_ri;
100
- & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
93
+ case INDEX_op_shl_i64:
101
- for (i = 0; i < nb_globals; i++) {
94
+ case INDEX_op_shr_i64:
102
- if (test_bit(i, ctx.temps_used.l)) {
95
+ case INDEX_op_sar_i64:
103
- reset_ts(&s->temps[i]);
96
+ case INDEX_op_rotl_i32:
104
- }
97
+ case INDEX_op_rotl_i64:
105
- }
98
+ case INDEX_op_rotr_i32:
106
- }
99
+ case INDEX_op_rotr_i64:
100
+ case INDEX_op_clz_i64:
101
+ case INDEX_op_setcond_i32:
102
+ case INDEX_op_setcond_i64:
103
+ return C_O1_I2(r, r, ri);
104
+
105
case INDEX_op_sub_i32:
106
case INDEX_op_sub_i64:
107
case INDEX_op_and_i32:
108
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
109
case INDEX_op_or_i64:
110
case INDEX_op_xor_i32:
111
case INDEX_op_xor_i64:
112
- return (s390_facilities & FACILITY_DISTINCT_OPS ? &r_r_ri : &r_0_ri);
113
+ return (s390_facilities & FACILITY_DISTINCT_OPS
114
+ ? C_O1_I2(r, r, ri)
115
+ : C_O1_I2(r, 0, ri));
116
117
case INDEX_op_mul_i32:
118
/* If we have the general-instruction-extensions, then we have
119
MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we
120
have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit. */
121
- return (s390_facilities & FACILITY_GEN_INST_EXT ? &r_0_ri : &r_0_rI);
122
+ return (s390_facilities & FACILITY_GEN_INST_EXT
123
+ ? C_O1_I2(r, 0, ri)
124
+ : C_O1_I2(r, 0, rI));
125
+
126
case INDEX_op_mul_i64:
127
- return (s390_facilities & FACILITY_GEN_INST_EXT ? &r_0_rJ : &r_0_rI);
128
+ return (s390_facilities & FACILITY_GEN_INST_EXT
129
+ ? C_O1_I2(r, 0, rJ)
130
+ : C_O1_I2(r, 0, rI));
131
132
case INDEX_op_shl_i32:
133
case INDEX_op_shr_i32:
134
case INDEX_op_sar_i32:
135
- return (s390_facilities & FACILITY_DISTINCT_OPS ? &r_r_ri : &r_0_ri);
136
-
107
-
137
- case INDEX_op_shl_i64:
108
for (i = 0; i < nb_oargs; i++) {
138
- case INDEX_op_shr_i64:
109
reset_temp(op->args[i]);
139
- case INDEX_op_sar_i64:
110
/* Save the corresponding known-zero bits mask for the
140
- return &r_r_ri;
111
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
141
-
112
case INDEX_op_qemu_st_i32:
142
- case INDEX_op_rotl_i32:
113
case INDEX_op_qemu_st8_i32:
143
- case INDEX_op_rotl_i64:
114
case INDEX_op_qemu_st_i64:
144
- case INDEX_op_rotr_i32:
115
- case INDEX_op_call:
145
- case INDEX_op_rotr_i64:
116
/* Opcodes that touch guest memory stop the optimization. */
146
- return &r_r_ri;
117
ctx.prev_mb = NULL;
147
+ return (s390_facilities & FACILITY_DISTINCT_OPS
118
break;
148
+ ? C_O1_I2(r, r, ri)
149
+ : C_O1_I2(r, 0, ri));
150
151
case INDEX_op_brcond_i32:
152
case INDEX_op_brcond_i64:
153
- return &r_ri;
154
+ return C_O0_I2(r, ri);
155
156
case INDEX_op_bswap16_i32:
157
case INDEX_op_bswap16_i64:
158
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
159
case INDEX_op_extu_i32_i64:
160
case INDEX_op_extract_i32:
161
case INDEX_op_extract_i64:
162
- return &r_r;
163
-
164
- case INDEX_op_clz_i64:
165
- case INDEX_op_setcond_i32:
166
- case INDEX_op_setcond_i64:
167
- return &r_r_ri;
168
+ return C_O1_I1(r, r);
169
170
case INDEX_op_qemu_ld_i32:
171
case INDEX_op_qemu_ld_i64:
172
- return &r_L;
173
+ return C_O1_I1(r, L);
174
case INDEX_op_qemu_st_i64:
175
case INDEX_op_qemu_st_i32:
176
- return &L_L;
177
+ return C_O0_I2(L, L);
178
179
case INDEX_op_deposit_i32:
180
case INDEX_op_deposit_i64:
181
- {
182
- static const TCGTargetOpDef dep
183
- = { .args_ct_str = { "r", "rZ", "r" } };
184
- return &dep;
185
- }
186
+ return C_O1_I2(r, rZ, r);
187
+
188
case INDEX_op_movcond_i32:
189
case INDEX_op_movcond_i64:
190
- {
191
- static const TCGTargetOpDef movc
192
- = { .args_ct_str = { "r", "r", "ri", "r", "0" } };
193
- static const TCGTargetOpDef movc_l
194
- = { .args_ct_str = { "r", "r", "ri", "rI", "0" } };
195
- return (s390_facilities & FACILITY_LOAD_ON_COND2 ? &movc_l : &movc);
196
- }
197
+ return (s390_facilities & FACILITY_LOAD_ON_COND2
198
+ ? C_O1_I4(r, r, ri, rI, 0)
199
+ : C_O1_I4(r, r, ri, r, 0));
200
+
201
case INDEX_op_div2_i32:
202
case INDEX_op_div2_i64:
203
case INDEX_op_divu2_i32:
204
case INDEX_op_divu2_i64:
205
- {
206
- static const TCGTargetOpDef div2
207
- = { .args_ct_str = { "b", "a", "0", "1", "r" } };
208
- return &div2;
209
- }
210
+ return C_O2_I3(b, a, 0, 1, r);
211
+
212
case INDEX_op_mulu2_i64:
213
- {
214
- static const TCGTargetOpDef mul2
215
- = { .args_ct_str = { "b", "a", "0", "r" } };
216
- return &mul2;
217
- }
218
+ return C_O2_I2(b, a, 0, r);
219
220
case INDEX_op_add2_i32:
221
case INDEX_op_sub2_i32:
222
- return (s390_facilities & FACILITY_EXT_IMM ? &a2_ri : &a2_r);
223
+ return (s390_facilities & FACILITY_EXT_IMM
224
+ ? C_O2_I4(r, r, 0, 1, ri, r)
225
+ : C_O2_I4(r, r, 0, 1, r, r));
226
+
227
case INDEX_op_add2_i64:
228
case INDEX_op_sub2_i64:
229
- return (s390_facilities & FACILITY_EXT_IMM ? &a2_rA : &a2_r);
230
+ return (s390_facilities & FACILITY_EXT_IMM
231
+ ? C_O2_I4(r, r, 0, 1, rA, r)
232
+ : C_O2_I4(r, r, 0, 1, r, r));
233
234
default:
235
break;
236
--
119
--
237
2.25.1
120
2.25.1
238
121
239
122
diff view generated by jsdifflib
New patch
1
Rather than try to keep these up-to-date across folding,
2
re-read nb_oargs at the end, after re-reading the opcode.
1
3
4
A couple of asserts need dropping, but that will take care
5
of itself as we split the function further.
6
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
tcg/optimize.c | 14 ++++----------
12
1 file changed, 4 insertions(+), 10 deletions(-)
13
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
17
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
19
20
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
21
uint64_t z_mask, partmask, affected, tmp;
22
- int nb_oargs, nb_iargs;
23
TCGOpcode opc = op->opc;
24
const TCGOpDef *def;
25
26
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
27
}
28
29
def = &tcg_op_defs[opc];
30
- nb_oargs = def->nb_oargs;
31
- nb_iargs = def->nb_iargs;
32
- init_arguments(&ctx, op, nb_oargs + nb_iargs);
33
- copy_propagate(&ctx, op, nb_oargs, nb_iargs);
34
+ init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
35
+ copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
36
37
/* For commutative operations make constant second argument */
38
switch (opc) {
39
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
40
41
CASE_OP_32_64(qemu_ld):
42
{
43
- MemOpIdx oi = op->args[nb_oargs + nb_iargs];
44
+ MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
45
MemOp mop = get_memop(oi);
46
if (!(mop & MO_SIGN)) {
47
z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
}
50
51
if (partmask == 0) {
52
- tcg_debug_assert(nb_oargs == 1);
53
tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
54
continue;
55
}
56
if (affected == 0) {
57
- tcg_debug_assert(nb_oargs == 1);
58
tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
59
continue;
60
}
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
} else if (args_are_copies(op->args[1], op->args[2])) {
63
op->opc = INDEX_op_dup_vec;
64
TCGOP_VECE(op) = MO_32;
65
- nb_iargs = 1;
66
}
67
break;
68
69
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
70
op->opc = opc = (opc == INDEX_op_movcond_i32
71
? INDEX_op_setcond_i32
72
: INDEX_op_setcond_i64);
73
- nb_iargs = 2;
74
}
75
break;
76
77
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
78
if (def->flags & TCG_OPF_BB_END) {
79
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
80
} else {
81
+ int nb_oargs = def->nb_oargs;
82
for (i = 0; i < nb_oargs; i++) {
83
reset_temp(op->args[i]);
84
/* Save the corresponding known-zero bits mask for the
85
--
86
2.25.1
87
88
diff view generated by jsdifflib
1
Return -1 instead of 2 for failure, so that we can
2
use comparisons against 0 for all cases.
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
7
---
3
tcg/riscv/tcg-target-constr.h | 25 +++++++++++
8
tcg/optimize.c | 145 +++++++++++++++++++++++++------------------------
4
tcg/riscv/tcg-target.c.inc | 82 ++++++++++-------------------------
9
1 file changed, 74 insertions(+), 71 deletions(-)
5
2 files changed, 49 insertions(+), 58 deletions(-)
10
6
create mode 100644 tcg/riscv/tcg-target-constr.h
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
7
8
diff --git a/tcg/riscv/tcg-target-constr.h b/tcg/riscv/tcg-target-constr.h
9
new file mode 100644
10
index XXXXXXX..XXXXXXX
11
--- /dev/null
12
+++ b/tcg/riscv/tcg-target-constr.h
13
@@ -XXX,XX +XXX,XX @@
14
+/* SPDX-License-Identifier: GPL-2.0-or-later */
15
+/*
16
+ * RISC-V target-specific operand constaints.
17
+ * Copyright (c) 2020 Linaro
18
+ */
19
+
20
+C_O0_I1(r)
21
+C_O0_I2(LZ, L)
22
+C_O0_I2(rZ, r)
23
+C_O0_I2(rZ, rZ)
24
+C_O0_I3(LZ, L, L)
25
+C_O0_I3(LZ, LZ, L)
26
+C_O0_I4(LZ, LZ, L, L)
27
+C_O0_I4(rZ, rZ, rZ, rZ)
28
+C_O1_I1(r, L)
29
+C_O1_I1(r, r)
30
+C_O1_I2(r, L, L)
31
+C_O1_I2(r, r, ri)
32
+C_O1_I2(r, r, rI)
33
+C_O1_I2(r, rZ, rN)
34
+C_O1_I2(r, rZ, rZ)
35
+C_O1_I4(r, rZ, rZ, rZ, rZ)
36
+C_O2_I1(r, r, L)
37
+C_O2_I2(r, r, L, L)
38
+C_O2_I4(r, r, rZ, rZ, rM, rM)
39
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
40
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
41
--- a/tcg/riscv/tcg-target.c.inc
13
--- a/tcg/optimize.c
42
+++ b/tcg/riscv/tcg-target.c.inc
14
+++ b/tcg/optimize.c
43
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
15
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
44
}
16
}
45
}
17
}
46
18
47
+/* Define all constraint sets. */
19
-/* Return 2 if the condition can't be simplified, and the result
48
+#include "../tcg-constr.c.inc"
20
- of the condition (0 or 1) if it can */
49
+
21
-static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
50
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
22
- TCGArg y, TCGCond c)
23
+/*
24
+ * Return -1 if the condition can't be simplified,
25
+ * and the result of the condition (0 or 1) if it can.
26
+ */
27
+static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
28
+ TCGArg y, TCGCond c)
51
{
29
{
52
- static const TCGTargetOpDef r
30
uint64_t xv = arg_info(x)->val;
53
- = { .args_ct_str = { "r" } };
31
uint64_t yv = arg_info(y)->val;
54
- static const TCGTargetOpDef r_r
32
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
55
- = { .args_ct_str = { "r", "r" } };
33
case TCG_COND_GEU:
56
- static const TCGTargetOpDef rZ_r
34
return 1;
57
- = { .args_ct_str = { "rZ", "r" } };
35
default:
58
- static const TCGTargetOpDef rZ_rZ
36
- return 2;
59
- = { .args_ct_str = { "rZ", "rZ" } };
37
+ return -1;
60
- static const TCGTargetOpDef rZ_rZ_rZ_rZ
38
}
61
- = { .args_ct_str = { "rZ", "rZ", "rZ", "rZ" } };
39
}
62
- static const TCGTargetOpDef r_r_ri
40
- return 2;
63
- = { .args_ct_str = { "r", "r", "ri" } };
41
+ return -1;
64
- static const TCGTargetOpDef r_r_rI
42
}
65
- = { .args_ct_str = { "r", "r", "rI" } };
43
66
- static const TCGTargetOpDef r_rZ_rN
44
-/* Return 2 if the condition can't be simplified, and the result
67
- = { .args_ct_str = { "r", "rZ", "rN" } };
45
- of the condition (0 or 1) if it can */
68
- static const TCGTargetOpDef r_rZ_rZ
46
-static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
69
- = { .args_ct_str = { "r", "rZ", "rZ" } };
47
+/*
70
- static const TCGTargetOpDef r_rZ_rZ_rZ_rZ
48
+ * Return -1 if the condition can't be simplified,
71
- = { .args_ct_str = { "r", "rZ", "rZ", "rZ", "rZ" } };
49
+ * and the result of the condition (0 or 1) if it can.
72
- static const TCGTargetOpDef r_L
50
+ */
73
- = { .args_ct_str = { "r", "L" } };
51
+static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
74
- static const TCGTargetOpDef r_r_L
52
{
75
- = { .args_ct_str = { "r", "r", "L" } };
53
TCGArg al = p1[0], ah = p1[1];
76
- static const TCGTargetOpDef r_L_L
54
TCGArg bl = p2[0], bh = p2[1];
77
- = { .args_ct_str = { "r", "L", "L" } };
55
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
78
- static const TCGTargetOpDef r_r_L_L
56
if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
79
- = { .args_ct_str = { "r", "r", "L", "L" } };
57
return do_constant_folding_cond_eq(c);
80
- static const TCGTargetOpDef LZ_L
58
}
81
- = { .args_ct_str = { "LZ", "L" } };
59
- return 2;
82
- static const TCGTargetOpDef LZ_L_L
60
+ return -1;
83
- = { .args_ct_str = { "LZ", "L", "L" } };
61
}
84
- static const TCGTargetOpDef LZ_LZ_L
62
85
- = { .args_ct_str = { "LZ", "LZ", "L" } };
63
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
86
- static const TCGTargetOpDef LZ_LZ_L_L
64
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
87
- = { .args_ct_str = { "LZ", "LZ", "L", "L" } };
65
break;
88
- static const TCGTargetOpDef r_r_rZ_rZ_rM_rM
66
89
- = { .args_ct_str = { "r", "r", "rZ", "rZ", "rM", "rM" } };
67
CASE_OP_32_64(setcond):
90
-
68
- tmp = do_constant_folding_cond(opc, op->args[1],
91
switch (op) {
69
- op->args[2], op->args[3]);
92
case INDEX_op_goto_ptr:
70
- if (tmp != 2) {
93
- return &r;
71
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
94
+ return C_O0_I1(r);
72
+ i = do_constant_folding_cond(opc, op->args[1],
95
73
+ op->args[2], op->args[3]);
96
case INDEX_op_ld8u_i32:
74
+ if (i >= 0) {
97
case INDEX_op_ld8s_i32:
75
+ tcg_opt_gen_movi(&ctx, op, op->args[0], i);
98
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
76
continue;
99
case INDEX_op_extrl_i64_i32:
77
}
100
case INDEX_op_extrh_i64_i32:
78
break;
101
case INDEX_op_ext_i32_i64:
79
102
- return &r_r;
80
CASE_OP_32_64(brcond):
103
+ return C_O1_I1(r, r);
81
- tmp = do_constant_folding_cond(opc, op->args[0],
104
82
- op->args[1], op->args[2]);
105
case INDEX_op_st8_i32:
83
- switch (tmp) {
106
case INDEX_op_st16_i32:
84
- case 0:
107
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
85
+ i = do_constant_folding_cond(opc, op->args[0],
108
case INDEX_op_st16_i64:
86
+ op->args[1], op->args[2]);
109
case INDEX_op_st32_i64:
87
+ if (i == 0) {
110
case INDEX_op_st_i64:
88
tcg_op_remove(s, op);
111
- return &rZ_r;
89
continue;
112
+ return C_O0_I2(rZ, r);
90
- case 1:
113
91
+ } else if (i > 0) {
114
case INDEX_op_add_i32:
92
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
115
case INDEX_op_and_i32:
93
op->opc = opc = INDEX_op_br;
116
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
94
op->args[0] = op->args[3];
117
case INDEX_op_and_i64:
95
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
118
case INDEX_op_or_i64:
96
break;
119
case INDEX_op_xor_i64:
97
120
- return &r_r_rI;
98
CASE_OP_32_64(movcond):
121
+ return C_O1_I2(r, r, rI);
99
- tmp = do_constant_folding_cond(opc, op->args[1],
122
100
- op->args[2], op->args[5]);
123
case INDEX_op_sub_i32:
101
- if (tmp != 2) {
124
case INDEX_op_sub_i64:
102
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
125
- return &r_rZ_rN;
103
+ i = do_constant_folding_cond(opc, op->args[1],
126
+ return C_O1_I2(r, rZ, rN);
104
+ op->args[2], op->args[5]);
127
105
+ if (i >= 0) {
128
case INDEX_op_mul_i32:
106
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
129
case INDEX_op_mulsh_i32:
107
continue;
130
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
108
}
131
case INDEX_op_rem_i64:
109
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
132
case INDEX_op_remu_i64:
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
133
case INDEX_op_setcond_i64:
111
break;
134
- return &r_rZ_rZ;
112
135
+ return C_O1_I2(r, rZ, rZ);
113
case INDEX_op_brcond2_i32:
136
114
- tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
137
case INDEX_op_shl_i32:
115
- op->args[4]);
138
case INDEX_op_shr_i32:
116
- if (tmp == 0) {
139
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
117
+ i = do_constant_folding_cond2(&op->args[0], &op->args[2],
140
case INDEX_op_shl_i64:
118
+ op->args[4]);
141
case INDEX_op_shr_i64:
119
+ if (i == 0) {
142
case INDEX_op_sar_i64:
120
do_brcond_false:
143
- return &r_r_ri;
121
tcg_op_remove(s, op);
144
+ return C_O1_I2(r, r, ri);
122
continue;
145
123
}
146
case INDEX_op_brcond_i32:
124
- if (tmp == 1) {
147
case INDEX_op_brcond_i64:
125
+ if (i > 0) {
148
- return &rZ_rZ;
126
do_brcond_true:
149
+ return C_O0_I2(rZ, rZ);
127
op->opc = opc = INDEX_op_br;
150
128
op->args[0] = op->args[5];
151
case INDEX_op_add2_i32:
129
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
152
case INDEX_op_add2_i64:
130
if (op->args[4] == TCG_COND_EQ) {
153
case INDEX_op_sub2_i32:
131
/* Simplify EQ comparisons where one of the pairs
154
case INDEX_op_sub2_i64:
132
can be simplified. */
155
- return &r_r_rZ_rZ_rM_rM;
133
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
156
+ return C_O2_I4(r, r, rZ, rZ, rM, rM);
134
- op->args[0], op->args[2],
157
135
- TCG_COND_EQ);
158
case INDEX_op_brcond2_i32:
136
- if (tmp == 0) {
159
- return &rZ_rZ_rZ_rZ;
137
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
160
+ return C_O0_I4(rZ, rZ, rZ, rZ);
138
+ op->args[0], op->args[2],
161
139
+ TCG_COND_EQ);
162
case INDEX_op_setcond2_i32:
140
+ if (i == 0) {
163
- return &r_rZ_rZ_rZ_rZ;
141
goto do_brcond_false;
164
+ return C_O1_I4(r, rZ, rZ, rZ, rZ);
142
- } else if (tmp == 1) {
165
143
+ } else if (i > 0) {
166
case INDEX_op_qemu_ld_i32:
144
goto do_brcond_high;
167
- return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
145
}
168
+ return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
146
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
169
+ ? C_O1_I1(r, L) : C_O1_I2(r, L, L));
147
- op->args[1], op->args[3],
170
case INDEX_op_qemu_st_i32:
148
- TCG_COND_EQ);
171
- return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &LZ_L : &LZ_L_L;
149
- if (tmp == 0) {
172
+ return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
150
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
173
+ ? C_O0_I2(LZ, L) : C_O0_I3(LZ, L, L));
151
+ op->args[1], op->args[3],
174
case INDEX_op_qemu_ld_i64:
152
+ TCG_COND_EQ);
175
- return TCG_TARGET_REG_BITS == 64 ? &r_L
153
+ if (i == 0) {
176
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L
154
goto do_brcond_false;
177
- : &r_r_L_L;
155
- } else if (tmp != 1) {
178
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
156
+ } else if (i < 0) {
179
+ : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, L)
157
break;
180
+ : C_O2_I2(r, r, L, L));
158
}
181
case INDEX_op_qemu_st_i64:
159
do_brcond_low:
182
- return TCG_TARGET_REG_BITS == 64 ? &LZ_L
160
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
183
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &LZ_LZ_L
161
if (op->args[4] == TCG_COND_NE) {
184
- : &LZ_LZ_L_L;
162
/* Simplify NE comparisons where one of the pairs
185
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(LZ, L)
163
can be simplified. */
186
+ : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(LZ, LZ, L)
164
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
187
+ : C_O0_I4(LZ, LZ, L, L));
165
- op->args[0], op->args[2],
188
166
- TCG_COND_NE);
189
default:
167
- if (tmp == 0) {
190
return NULL;
168
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
169
+ op->args[0], op->args[2],
170
+ TCG_COND_NE);
171
+ if (i == 0) {
172
goto do_brcond_high;
173
- } else if (tmp == 1) {
174
+ } else if (i > 0) {
175
goto do_brcond_true;
176
}
177
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
178
- op->args[1], op->args[3],
179
- TCG_COND_NE);
180
- if (tmp == 0) {
181
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
182
+ op->args[1], op->args[3],
183
+ TCG_COND_NE);
184
+ if (i == 0) {
185
goto do_brcond_low;
186
- } else if (tmp == 1) {
187
+ } else if (i > 0) {
188
goto do_brcond_true;
189
}
190
}
191
break;
192
193
case INDEX_op_setcond2_i32:
194
- tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
195
- op->args[5]);
196
- if (tmp != 2) {
197
+ i = do_constant_folding_cond2(&op->args[1], &op->args[3],
198
+ op->args[5]);
199
+ if (i >= 0) {
200
do_setcond_const:
201
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
202
+ tcg_opt_gen_movi(&ctx, op, op->args[0], i);
203
continue;
204
}
205
if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
206
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
207
if (op->args[5] == TCG_COND_EQ) {
208
/* Simplify EQ comparisons where one of the pairs
209
can be simplified. */
210
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
211
- op->args[1], op->args[3],
212
- TCG_COND_EQ);
213
- if (tmp == 0) {
214
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
215
+ op->args[1], op->args[3],
216
+ TCG_COND_EQ);
217
+ if (i == 0) {
218
goto do_setcond_const;
219
- } else if (tmp == 1) {
220
+ } else if (i > 0) {
221
goto do_setcond_high;
222
}
223
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
224
- op->args[2], op->args[4],
225
- TCG_COND_EQ);
226
- if (tmp == 0) {
227
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
228
+ op->args[2], op->args[4],
229
+ TCG_COND_EQ);
230
+ if (i == 0) {
231
goto do_setcond_high;
232
- } else if (tmp != 1) {
233
+ } else if (i < 0) {
234
break;
235
}
236
do_setcond_low:
237
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
238
if (op->args[5] == TCG_COND_NE) {
239
/* Simplify NE comparisons where one of the pairs
240
can be simplified. */
241
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
242
- op->args[1], op->args[3],
243
- TCG_COND_NE);
244
- if (tmp == 0) {
245
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
246
+ op->args[1], op->args[3],
247
+ TCG_COND_NE);
248
+ if (i == 0) {
249
goto do_setcond_high;
250
- } else if (tmp == 1) {
251
+ } else if (i > 0) {
252
goto do_setcond_const;
253
}
254
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
255
- op->args[2], op->args[4],
256
- TCG_COND_NE);
257
- if (tmp == 0) {
258
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
259
+ op->args[2], op->args[4],
260
+ TCG_COND_NE);
261
+ if (i == 0) {
262
goto do_setcond_low;
263
- } else if (tmp == 1) {
264
+ } else if (i > 0) {
265
goto do_setcond_const;
266
}
267
}
191
--
268
--
192
2.25.1
269
2.25.1
193
270
194
271
diff view generated by jsdifflib
1
This will allow callers to tail call to these functions
2
and return true indicating processing complete.
3
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
8
---
4
accel/tcg/plugin-gen.c | 49 +++++++++++++++++++-----------------------
9
tcg/optimize.c | 9 +++++----
5
1 file changed, 22 insertions(+), 27 deletions(-)
10
1 file changed, 5 insertions(+), 4 deletions(-)
6
11
7
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
9
--- a/accel/tcg/plugin-gen.c
14
--- a/tcg/optimize.c
10
+++ b/accel/tcg/plugin-gen.c
15
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static TCGOp *copy_extu_i32_i64(TCGOp **begin_op, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
12
if (TCG_TARGET_REG_BITS == 32) {
17
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
13
/* mov_i32 */
14
op = copy_op(begin_op, op, INDEX_op_mov_i32);
15
- /* movi_i32 */
16
- op = copy_op(begin_op, op, INDEX_op_movi_i32);
17
+ /* mov_i32 w/ $0 */
18
+ op = copy_op(begin_op, op, INDEX_op_mov_i32);
19
} else {
20
/* extu_i32_i64 */
21
op = copy_op(begin_op, op, INDEX_op_extu_i32_i64);
22
@@ -XXX,XX +XXX,XX @@ static TCGOp *copy_mov_i64(TCGOp **begin_op, TCGOp *op)
23
return op;
24
}
18
}
25
19
26
-static TCGOp *copy_movi_i64(TCGOp **begin_op, TCGOp *op, uint64_t v)
20
-static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
27
-{
21
+static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
28
- if (TCG_TARGET_REG_BITS == 32) {
29
- /* 2x movi_i32 */
30
- op = copy_op(begin_op, op, INDEX_op_movi_i32);
31
- op->args[1] = v;
32
-
33
- op = copy_op(begin_op, op, INDEX_op_movi_i32);
34
- op->args[1] = v >> 32;
35
- } else {
36
- /* movi_i64 */
37
- op = copy_op(begin_op, op, INDEX_op_movi_i64);
38
- op->args[1] = v;
39
- }
40
- return op;
41
-}
42
-
43
static TCGOp *copy_const_ptr(TCGOp **begin_op, TCGOp *op, void *ptr)
44
{
22
{
45
if (UINTPTR_MAX == UINT32_MAX) {
23
TCGTemp *dst_ts = arg_temp(dst);
46
- /* movi_i32 */
24
TCGTemp *src_ts = arg_temp(src);
47
- op = copy_op(begin_op, op, INDEX_op_movi_i32);
25
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
48
- op->args[1] = (uintptr_t)ptr;
26
49
+ /* mov_i32 */
27
if (ts_are_copies(dst_ts, src_ts)) {
50
+ op = copy_op(begin_op, op, INDEX_op_mov_i32);
28
tcg_op_remove(ctx->tcg, op);
51
+ op->args[1] = tcgv_i32_arg(tcg_constant_i32((uintptr_t)ptr));
29
- return;
52
} else {
30
+ return true;
53
- /* movi_i64 */
54
- op = copy_movi_i64(begin_op, op, (uint64_t)(uintptr_t)ptr);
55
+ /* mov_i64 */
56
+ op = copy_op(begin_op, op, INDEX_op_mov_i64);
57
+ op->args[1] = tcgv_i64_arg(tcg_constant_i64((uintptr_t)ptr));
58
}
31
}
59
return op;
32
33
reset_ts(dst_ts);
34
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
35
di->is_const = si->is_const;
36
di->val = si->val;
37
}
38
+ return true;
60
}
39
}
61
40
62
static TCGOp *copy_const_i64(TCGOp **begin_op, TCGOp *op, uint64_t v)
41
-static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
42
+static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
43
TCGArg dst, uint64_t val)
63
{
44
{
64
- return copy_movi_i64(begin_op, op, v);
45
const TCGOpDef *def = &tcg_op_defs[op->opc];
65
+ if (TCG_TARGET_REG_BITS == 32) {
46
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
66
+ /* 2x mov_i32 */
47
/* Convert movi to mov with constant temp. */
67
+ op = copy_op(begin_op, op, INDEX_op_mov_i32);
48
tv = tcg_constant_internal(type, val);
68
+ op->args[1] = tcgv_i32_arg(tcg_constant_i32(v));
49
init_ts_info(ctx, tv);
69
+ op = copy_op(begin_op, op, INDEX_op_mov_i32);
50
- tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
70
+ op->args[1] = tcgv_i32_arg(tcg_constant_i32(v >> 32));
51
+ return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
71
+ } else {
72
+ /* mov_i64 */
73
+ op = copy_op(begin_op, op, INDEX_op_mov_i64);
74
+ op->args[1] = tcgv_i64_arg(tcg_constant_i64(v));
75
+ }
76
+ return op;
77
}
52
}
78
53
79
static TCGOp *copy_extu_tl_i64(TCGOp **begin_op, TCGOp *op)
54
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
80
@@ -XXX,XX +XXX,XX @@ static TCGOp *append_mem_cb(const struct qemu_plugin_dyn_cb *cb,
81
82
tcg_debug_assert(type == PLUGIN_GEN_CB_MEM);
83
84
- /* const_i32 == movi_i32 ("info", so it remains as is) */
85
- op = copy_op(&begin_op, op, INDEX_op_movi_i32);
86
+ /* const_i32 == mov_i32 ("info", so it remains as is) */
87
+ op = copy_op(&begin_op, op, INDEX_op_mov_i32);
88
89
/* const_ptr */
90
op = copy_const_ptr(&begin_op, op, cb->userp);
91
--
55
--
92
2.25.1
56
2.25.1
93
57
94
58
diff view generated by jsdifflib
1
There are several ways we can expand a vector dup of a 64-bit
1
Copy z_mask into OptContext, for writeback to the
2
element on a 32-bit host.
2
first output within the new function.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
7
---
6
tcg/tcg.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
8
tcg/optimize.c | 49 +++++++++++++++++++++++++++++++++----------------
7
1 file changed, 97 insertions(+)
9
1 file changed, 33 insertions(+), 16 deletions(-)
8
10
9
diff --git a/tcg/tcg.c b/tcg/tcg.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/tcg.c
13
--- a/tcg/optimize.c
12
+++ b/tcg/tcg.c
14
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
16
TCGContext *tcg;
17
TCGOp *prev_mb;
18
TCGTempSet temps_used;
19
+
20
+ /* In flight values from optimization. */
21
+ uint64_t z_mask;
22
} OptContext;
23
24
static inline TempOptInfo *ts_info(TCGTemp *ts)
25
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
14
}
26
}
15
}
27
}
16
28
17
+static void tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
29
+static void finish_folding(OptContext *ctx, TCGOp *op)
18
+{
30
+{
19
+ const TCGLifeData arg_life = op->life;
31
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
20
+ TCGTemp *ots, *itsl, *itsh;
32
+ int i, nb_oargs;
21
+ TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
22
+
33
+
23
+ /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
34
+ /*
24
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
35
+ * For an opcode that ends a BB, reset all temp data.
25
+ tcg_debug_assert(TCGOP_VECE(op) == MO_64);
36
+ * We do no cross-BB optimization.
26
+
37
+ */
27
+ ots = arg_temp(op->args[0]);
38
+ if (def->flags & TCG_OPF_BB_END) {
28
+ itsl = arg_temp(op->args[1]);
39
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
29
+ itsh = arg_temp(op->args[2]);
40
+ ctx->prev_mb = NULL;
30
+
41
+ return;
31
+ /* ENV should not be modified. */
32
+ tcg_debug_assert(!temp_readonly(ots));
33
+
34
+ /* Allocate the output register now. */
35
+ if (ots->val_type != TEMP_VAL_REG) {
36
+ TCGRegSet allocated_regs = s->reserved_regs;
37
+ TCGRegSet dup_out_regs =
38
+ tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
39
+
40
+ /* Make sure to not spill the input registers. */
41
+ if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
42
+ tcg_regset_set_reg(allocated_regs, itsl->reg);
43
+ }
44
+ if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
45
+ tcg_regset_set_reg(allocated_regs, itsh->reg);
46
+ }
47
+
48
+ ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
49
+ op->output_pref[0], ots->indirect_base);
50
+ ots->val_type = TEMP_VAL_REG;
51
+ ots->mem_coherent = 0;
52
+ s->reg_to_temp[ots->reg] = ots;
53
+ }
42
+ }
54
+
43
+
55
+ /* Promote dup2 of immediates to dupi_vec. */
44
+ nb_oargs = def->nb_oargs;
56
+ if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
45
+ for (i = 0; i < nb_oargs; i++) {
57
+ uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
46
+ reset_temp(op->args[i]);
58
+ MemOp vece = MO_64;
47
+ /*
59
+
48
+ * Save the corresponding known-zero bits mask for the
60
+ if (val == dup_const(MO_8, val)) {
49
+ * first output argument (only one supported so far).
61
+ vece = MO_8;
50
+ */
62
+ } else if (val == dup_const(MO_16, val)) {
51
+ if (i == 0) {
63
+ vece = MO_16;
52
+ arg_info(op->args[i])->z_mask = ctx->z_mask;
64
+ } else if (val == dup_const(MO_32, val)) {
65
+ vece = MO_32;
66
+ }
53
+ }
67
+
68
+ tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
69
+ goto done;
70
+ }
71
+
72
+ /* If the two inputs form one 64-bit value, try dupm_vec. */
73
+ if (itsl + 1 == itsh &&
74
+ itsl->base_type == TCG_TYPE_I64 &&
75
+ itsh->base_type == TCG_TYPE_I64) {
76
+ if (!itsl->mem_coherent) {
77
+ temp_sync(s, itsl, s->reserved_regs, 0, 0);
78
+ }
79
+ if (!itsl->mem_coherent) {
80
+ temp_sync(s, itsl, s->reserved_regs, 0, 0);
81
+ }
82
+#ifdef HOST_WORDS_BIGENDIAN
83
+ TCGTemp *its = itsh;
84
+#else
85
+ TCGTemp *its = itsl;
86
+#endif
87
+ if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
88
+ its->mem_base->reg, its->mem_offset)) {
89
+ goto done;
90
+ }
91
+ }
92
+
93
+ /* Fall back to generic expansion. */
94
+ tcg_reg_alloc_op(s, op);
95
+ return;
96
+
97
+ done:
98
+ if (IS_DEAD_ARG(1)) {
99
+ temp_dead(s, itsl);
100
+ }
101
+ if (IS_DEAD_ARG(2)) {
102
+ temp_dead(s, itsh);
103
+ }
104
+ if (NEED_SYNC_ARG(0)) {
105
+ temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
106
+ } else if (IS_DEAD_ARG(0)) {
107
+ temp_dead(s, ots);
108
+ }
54
+ }
109
+}
55
+}
110
+
56
+
111
#ifdef TCG_TARGET_STACK_GROWSUP
57
static bool fold_call(OptContext *ctx, TCGOp *op)
112
#define STACK_DIR(x) (-(x))
58
{
113
#else
59
TCGContext *s = ctx->tcg;
114
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
60
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
115
case INDEX_op_dup_vec:
61
partmask &= 0xffffffffu;
116
tcg_reg_alloc_dup(s, op);
62
affected &= 0xffffffffu;
63
}
64
+ ctx.z_mask = z_mask;
65
66
if (partmask == 0) {
67
tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
68
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
117
break;
69
break;
118
+ case INDEX_op_dup2_vec:
70
}
119
+ tcg_reg_alloc_dup2(s, op);
71
120
+ break;
72
- /* Some of the folding above can change opc. */
121
case INDEX_op_insn_start:
73
- opc = op->opc;
122
if (num_insns >= 0) {
74
- def = &tcg_op_defs[opc];
123
size_t off = tcg_current_code_size(s);
75
- if (def->flags & TCG_OPF_BB_END) {
76
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
77
- } else {
78
- int nb_oargs = def->nb_oargs;
79
- for (i = 0; i < nb_oargs; i++) {
80
- reset_temp(op->args[i]);
81
- /* Save the corresponding known-zero bits mask for the
82
- first output argument (only one supported so far). */
83
- if (i == 0) {
84
- arg_info(op->args[i])->z_mask = z_mask;
85
- }
86
- }
87
- }
88
+ finish_folding(&ctx, op);
89
90
/* Eliminate duplicate and redundant fence instructions. */
91
if (ctx.prev_mb) {
124
--
92
--
125
2.25.1
93
2.25.1
126
94
127
95
diff view generated by jsdifflib
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
3
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
5
---
4
tcg/i386/tcg-target.c.inc | 26 +++++++++++++-------------
6
tcg/optimize.c | 9 ++++++---
5
1 file changed, 13 insertions(+), 13 deletions(-)
7
1 file changed, 6 insertions(+), 3 deletions(-)
6
8
7
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/i386/tcg-target.c.inc
11
--- a/tcg/optimize.c
10
+++ b/tcg/i386/tcg-target.c.inc
12
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
13
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
12
static void expand_vec_mul(TCGType type, unsigned vece,
14
uint64_t z_mask, partmask, affected, tmp;
13
TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
15
TCGOpcode opc = op->opc;
14
{
16
const TCGOpDef *def;
15
- TCGv_vec t1, t2, t3, t4;
17
+ bool done = false;
16
+ TCGv_vec t1, t2, t3, t4, zero;
18
17
19
/* Calls are special. */
18
tcg_debug_assert(vece == MO_8);
20
if (opc == INDEX_op_call) {
19
21
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
20
@@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece,
22
allocator where needed and possible. Also detect copies. */
21
case TCG_TYPE_V64:
23
switch (opc) {
22
t1 = tcg_temp_new_vec(TCG_TYPE_V128);
24
CASE_OP_32_64_VEC(mov):
23
t2 = tcg_temp_new_vec(TCG_TYPE_V128);
25
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
24
- tcg_gen_dup16i_vec(t2, 0);
26
- continue;
25
+ zero = tcg_constant_vec(TCG_TYPE_V128, MO_8, 0);
27
+ done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
26
vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
28
+ break;
27
- tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(t2));
29
28
+ tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(zero));
30
case INDEX_op_dup_vec:
29
vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
31
if (arg_is_const(op->args[1])) {
30
- tcgv_vec_arg(t2), tcgv_vec_arg(t2), tcgv_vec_arg(v2));
32
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
31
+ tcgv_vec_arg(t2), tcgv_vec_arg(zero), tcgv_vec_arg(v2));
33
break;
32
tcg_gen_mul_vec(MO_16, t1, t1, t2);
34
}
33
tcg_gen_shri_vec(MO_16, t1, t1, 8);
35
34
vec_gen_3(INDEX_op_x86_packus_vec, TCG_TYPE_V128, MO_8,
36
- finish_folding(&ctx, op);
35
@@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece,
37
+ if (!done) {
36
t2 = tcg_temp_new_vec(type);
38
+ finish_folding(&ctx, op);
37
t3 = tcg_temp_new_vec(type);
39
+ }
38
t4 = tcg_temp_new_vec(type);
40
39
- tcg_gen_dup16i_vec(t4, 0);
41
/* Eliminate duplicate and redundant fence instructions. */
40
+ zero = tcg_constant_vec(TCG_TYPE_V128, MO_8, 0);
42
if (ctx.prev_mb) {
41
vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
42
- tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(t4));
43
+ tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(zero));
44
vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
45
- tcgv_vec_arg(t2), tcgv_vec_arg(t4), tcgv_vec_arg(v2));
46
+ tcgv_vec_arg(t2), tcgv_vec_arg(zero), tcgv_vec_arg(v2));
47
vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
48
- tcgv_vec_arg(t3), tcgv_vec_arg(v1), tcgv_vec_arg(t4));
49
+ tcgv_vec_arg(t3), tcgv_vec_arg(v1), tcgv_vec_arg(zero));
50
vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
51
- tcgv_vec_arg(t4), tcgv_vec_arg(t4), tcgv_vec_arg(v2));
52
+ tcgv_vec_arg(t4), tcgv_vec_arg(zero), tcgv_vec_arg(v2));
53
tcg_gen_mul_vec(MO_16, t1, t1, t2);
54
tcg_gen_mul_vec(MO_16, t3, t3, t4);
55
tcg_gen_shri_vec(MO_16, t1, t1, 8);
56
@@ -XXX,XX +XXX,XX @@ static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
57
NEED_UMIN = 8,
58
NEED_UMAX = 16,
59
};
60
- TCGv_vec t1, t2;
61
+ TCGv_vec t1, t2, t3;
62
uint8_t fixup;
63
64
switch (cond) {
65
@@ -XXX,XX +XXX,XX @@ static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
66
} else if (fixup & NEED_BIAS) {
67
t1 = tcg_temp_new_vec(type);
68
t2 = tcg_temp_new_vec(type);
69
- tcg_gen_dupi_vec(vece, t2, 1ull << ((8 << vece) - 1));
70
- tcg_gen_sub_vec(vece, t1, v1, t2);
71
- tcg_gen_sub_vec(vece, t2, v2, t2);
72
+ t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1));
73
+ tcg_gen_sub_vec(vece, t1, v1, t3);
74
+ tcg_gen_sub_vec(vece, t2, v2, t3);
75
v1 = t1;
76
v2 = t2;
77
cond = tcg_signed_cond(cond);
78
--
43
--
79
2.25.1
44
2.25.1
80
45
81
46
diff view generated by jsdifflib
New patch
1
This puts the separate mb optimization into the same framework
2
as the others. While fold_qemu_{ld,st} are currently identical,
3
that won't last as more code gets moved.
1
4
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/optimize.c | 89 +++++++++++++++++++++++++++++---------------------
10
1 file changed, 51 insertions(+), 38 deletions(-)
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
17
return true;
18
}
19
20
+static bool fold_mb(OptContext *ctx, TCGOp *op)
21
+{
22
+ /* Eliminate duplicate and redundant fence instructions. */
23
+ if (ctx->prev_mb) {
24
+ /*
25
+ * Merge two barriers of the same type into one,
26
+ * or a weaker barrier into a stronger one,
27
+ * or two weaker barriers into a stronger one.
28
+ * mb X; mb Y => mb X|Y
29
+ * mb; strl => mb; st
30
+ * ldaq; mb => ld; mb
31
+ * ldaq; strl => ld; mb; st
32
+ * Other combinations are also merged into a strong
33
+ * barrier. This is stricter than specified but for
34
+ * the purposes of TCG is better than not optimizing.
35
+ */
36
+ ctx->prev_mb->args[0] |= op->args[0];
37
+ tcg_op_remove(ctx->tcg, op);
38
+ } else {
39
+ ctx->prev_mb = op;
40
+ }
41
+ return true;
42
+}
43
+
44
+static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
45
+{
46
+ /* Opcodes that touch guest memory stop the mb optimization. */
47
+ ctx->prev_mb = NULL;
48
+ return false;
49
+}
50
+
51
+static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
52
+{
53
+ /* Opcodes that touch guest memory stop the mb optimization. */
54
+ ctx->prev_mb = NULL;
55
+ return false;
56
+}
57
+
58
/* Propagate constants and copies, fold constant expressions. */
59
void tcg_optimize(TCGContext *s)
60
{
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
}
63
break;
64
65
+ case INDEX_op_mb:
66
+ done = fold_mb(&ctx, op);
67
+ break;
68
+ case INDEX_op_qemu_ld_i32:
69
+ case INDEX_op_qemu_ld_i64:
70
+ done = fold_qemu_ld(&ctx, op);
71
+ break;
72
+ case INDEX_op_qemu_st_i32:
73
+ case INDEX_op_qemu_st8_i32:
74
+ case INDEX_op_qemu_st_i64:
75
+ done = fold_qemu_st(&ctx, op);
76
+ break;
77
+
78
default:
79
break;
80
}
81
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
82
if (!done) {
83
finish_folding(&ctx, op);
84
}
85
-
86
- /* Eliminate duplicate and redundant fence instructions. */
87
- if (ctx.prev_mb) {
88
- switch (opc) {
89
- case INDEX_op_mb:
90
- /* Merge two barriers of the same type into one,
91
- * or a weaker barrier into a stronger one,
92
- * or two weaker barriers into a stronger one.
93
- * mb X; mb Y => mb X|Y
94
- * mb; strl => mb; st
95
- * ldaq; mb => ld; mb
96
- * ldaq; strl => ld; mb; st
97
- * Other combinations are also merged into a strong
98
- * barrier. This is stricter than specified but for
99
- * the purposes of TCG is better than not optimizing.
100
- */
101
- ctx.prev_mb->args[0] |= op->args[0];
102
- tcg_op_remove(s, op);
103
- break;
104
-
105
- default:
106
- /* Opcodes that end the block stop the optimization. */
107
- if ((def->flags & TCG_OPF_BB_END) == 0) {
108
- break;
109
- }
110
- /* fallthru */
111
- case INDEX_op_qemu_ld_i32:
112
- case INDEX_op_qemu_ld_i64:
113
- case INDEX_op_qemu_st_i32:
114
- case INDEX_op_qemu_st8_i32:
115
- case INDEX_op_qemu_st_i64:
116
- /* Opcodes that touch guest memory stop the optimization. */
117
- ctx.prev_mb = NULL;
118
- break;
119
- }
120
- } else if (opc == INDEX_op_mb) {
121
- ctx.prev_mb = op;
122
- }
123
}
124
}
125
--
126
2.25.1
127
128
diff view generated by jsdifflib
1
Split out a whole bunch of placeholder functions, which are
2
currently identical. That won't last as more code gets moved.
3
4
Use CASE_32_64_VEC for some logical operators that previously
5
missed the addition of vectors.
6
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
10
---
3
include/tcg/tcg.h | 1 +
11
tcg/optimize.c | 271 +++++++++++++++++++++++++++++++++++++++----------
4
tcg/tcg-op-gvec.c | 125 ++++++++++++++++++----------------------------
12
1 file changed, 219 insertions(+), 52 deletions(-)
5
tcg/tcg.c | 8 +++
13
6
3 files changed, 58 insertions(+), 76 deletions(-)
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
7
8
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
9
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
10
--- a/include/tcg/tcg.h
16
--- a/tcg/optimize.c
11
+++ b/include/tcg/tcg.h
17
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i64 tcg_constant_i64(int64_t val)
18
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
19
}
13
}
20
}
14
21
15
TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val);
22
+/*
16
+TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val);
23
+ * The fold_* functions return true when processing is complete,
17
24
+ * usually by folding the operation to a constant or to a copy,
18
#if UINTPTR_MAX == UINT32_MAX
25
+ * and calling tcg_opt_gen_{mov,movi}. They may do other things,
19
# define tcg_const_ptr(x) ((TCGv_ptr)tcg_const_i32((intptr_t)(x)))
26
+ * like collect information about the value produced, for use in
20
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
27
+ * optimizing a subsequent operation.
21
index XXXXXXX..XXXXXXX 100644
28
+ *
22
--- a/tcg/tcg-op-gvec.c
29
+ * These first fold_* functions are all helpers, used by other
23
+++ b/tcg/tcg-op-gvec.c
30
+ * folders for more specific operations.
24
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
31
+ */
25
gen_helper_gvec_2 *fn)
32
+
33
+static bool fold_const1(OptContext *ctx, TCGOp *op)
34
+{
35
+ if (arg_is_const(op->args[1])) {
36
+ uint64_t t;
37
+
38
+ t = arg_info(op->args[1])->val;
39
+ t = do_constant_folding(op->opc, t, 0);
40
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
41
+ }
42
+ return false;
43
+}
44
+
45
+static bool fold_const2(OptContext *ctx, TCGOp *op)
46
+{
47
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
48
+ uint64_t t1 = arg_info(op->args[1])->val;
49
+ uint64_t t2 = arg_info(op->args[2])->val;
50
+
51
+ t1 = do_constant_folding(op->opc, t1, t2);
52
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
53
+ }
54
+ return false;
55
+}
56
+
57
+/*
58
+ * These outermost fold_<op> functions are sorted alphabetically.
59
+ */
60
+
61
+static bool fold_add(OptContext *ctx, TCGOp *op)
62
+{
63
+ return fold_const2(ctx, op);
64
+}
65
+
66
+static bool fold_and(OptContext *ctx, TCGOp *op)
67
+{
68
+ return fold_const2(ctx, op);
69
+}
70
+
71
+static bool fold_andc(OptContext *ctx, TCGOp *op)
72
+{
73
+ return fold_const2(ctx, op);
74
+}
75
+
76
static bool fold_call(OptContext *ctx, TCGOp *op)
26
{
77
{
27
TCGv_ptr a0, a1;
78
TCGContext *s = ctx->tcg;
28
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
79
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
29
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
80
return true;
30
31
a0 = tcg_temp_new_ptr();
32
a1 = tcg_temp_new_ptr();
33
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
34
35
tcg_temp_free_ptr(a0);
36
tcg_temp_free_ptr(a1);
37
- tcg_temp_free_i32(desc);
38
}
81
}
39
82
40
/* Generate a call to a gvec-style helper with two vector operands
83
+static bool fold_ctpop(OptContext *ctx, TCGOp *op)
41
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c,
84
+{
42
gen_helper_gvec_2i *fn)
85
+ return fold_const1(ctx, op);
86
+}
87
+
88
+static bool fold_divide(OptContext *ctx, TCGOp *op)
89
+{
90
+ return fold_const2(ctx, op);
91
+}
92
+
93
+static bool fold_eqv(OptContext *ctx, TCGOp *op)
94
+{
95
+ return fold_const2(ctx, op);
96
+}
97
+
98
+static bool fold_exts(OptContext *ctx, TCGOp *op)
99
+{
100
+ return fold_const1(ctx, op);
101
+}
102
+
103
+static bool fold_extu(OptContext *ctx, TCGOp *op)
104
+{
105
+ return fold_const1(ctx, op);
106
+}
107
+
108
static bool fold_mb(OptContext *ctx, TCGOp *op)
43
{
109
{
44
TCGv_ptr a0, a1;
110
/* Eliminate duplicate and redundant fence instructions. */
45
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
111
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
46
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
112
return true;
47
48
a0 = tcg_temp_new_ptr();
49
a1 = tcg_temp_new_ptr();
50
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c,
51
52
tcg_temp_free_ptr(a0);
53
tcg_temp_free_ptr(a1);
54
- tcg_temp_free_i32(desc);
55
}
113
}
56
114
57
/* Generate a call to a gvec-style helper with three vector operands. */
115
+static bool fold_mul(OptContext *ctx, TCGOp *op)
58
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
116
+{
59
gen_helper_gvec_3 *fn)
117
+ return fold_const2(ctx, op);
118
+}
119
+
120
+static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
121
+{
122
+ return fold_const2(ctx, op);
123
+}
124
+
125
+static bool fold_nand(OptContext *ctx, TCGOp *op)
126
+{
127
+ return fold_const2(ctx, op);
128
+}
129
+
130
+static bool fold_neg(OptContext *ctx, TCGOp *op)
131
+{
132
+ return fold_const1(ctx, op);
133
+}
134
+
135
+static bool fold_nor(OptContext *ctx, TCGOp *op)
136
+{
137
+ return fold_const2(ctx, op);
138
+}
139
+
140
+static bool fold_not(OptContext *ctx, TCGOp *op)
141
+{
142
+ return fold_const1(ctx, op);
143
+}
144
+
145
+static bool fold_or(OptContext *ctx, TCGOp *op)
146
+{
147
+ return fold_const2(ctx, op);
148
+}
149
+
150
+static bool fold_orc(OptContext *ctx, TCGOp *op)
151
+{
152
+ return fold_const2(ctx, op);
153
+}
154
+
155
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
60
{
156
{
61
TCGv_ptr a0, a1, a2;
157
/* Opcodes that touch guest memory stop the mb optimization. */
62
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
158
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
63
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
159
return false;
64
65
a0 = tcg_temp_new_ptr();
66
a1 = tcg_temp_new_ptr();
67
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
68
tcg_temp_free_ptr(a0);
69
tcg_temp_free_ptr(a1);
70
tcg_temp_free_ptr(a2);
71
- tcg_temp_free_i32(desc);
72
}
160
}
73
161
74
/* Generate a call to a gvec-style helper with four vector operands. */
162
+static bool fold_remainder(OptContext *ctx, TCGOp *op)
75
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
163
+{
76
int32_t data, gen_helper_gvec_4 *fn)
164
+ return fold_const2(ctx, op);
165
+}
166
+
167
+static bool fold_shift(OptContext *ctx, TCGOp *op)
168
+{
169
+ return fold_const2(ctx, op);
170
+}
171
+
172
+static bool fold_sub(OptContext *ctx, TCGOp *op)
173
+{
174
+ return fold_const2(ctx, op);
175
+}
176
+
177
+static bool fold_xor(OptContext *ctx, TCGOp *op)
178
+{
179
+ return fold_const2(ctx, op);
180
+}
181
+
182
/* Propagate constants and copies, fold constant expressions. */
183
void tcg_optimize(TCGContext *s)
77
{
184
{
78
TCGv_ptr a0, a1, a2, a3;
185
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
79
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
80
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
81
82
a0 = tcg_temp_new_ptr();
83
a1 = tcg_temp_new_ptr();
84
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
85
tcg_temp_free_ptr(a1);
86
tcg_temp_free_ptr(a2);
87
tcg_temp_free_ptr(a3);
88
- tcg_temp_free_i32(desc);
89
}
90
91
/* Generate a call to a gvec-style helper with five vector operands. */
92
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_5_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
93
uint32_t maxsz, int32_t data, gen_helper_gvec_5 *fn)
94
{
95
TCGv_ptr a0, a1, a2, a3, a4;
96
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
97
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
98
99
a0 = tcg_temp_new_ptr();
100
a1 = tcg_temp_new_ptr();
101
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_5_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
102
tcg_temp_free_ptr(a2);
103
tcg_temp_free_ptr(a3);
104
tcg_temp_free_ptr(a4);
105
- tcg_temp_free_i32(desc);
106
}
107
108
/* Generate a call to a gvec-style helper with three vector operands
109
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2_ptr(uint32_t dofs, uint32_t aofs,
110
int32_t data, gen_helper_gvec_2_ptr *fn)
111
{
112
TCGv_ptr a0, a1;
113
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
114
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
115
116
a0 = tcg_temp_new_ptr();
117
a1 = tcg_temp_new_ptr();
118
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2_ptr(uint32_t dofs, uint32_t aofs,
119
120
tcg_temp_free_ptr(a0);
121
tcg_temp_free_ptr(a1);
122
- tcg_temp_free_i32(desc);
123
}
124
125
/* Generate a call to a gvec-style helper with three vector operands
126
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
127
int32_t data, gen_helper_gvec_3_ptr *fn)
128
{
129
TCGv_ptr a0, a1, a2;
130
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
131
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
132
133
a0 = tcg_temp_new_ptr();
134
a1 = tcg_temp_new_ptr();
135
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
136
tcg_temp_free_ptr(a0);
137
tcg_temp_free_ptr(a1);
138
tcg_temp_free_ptr(a2);
139
- tcg_temp_free_i32(desc);
140
}
141
142
/* Generate a call to a gvec-style helper with four vector operands
143
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
144
gen_helper_gvec_4_ptr *fn)
145
{
146
TCGv_ptr a0, a1, a2, a3;
147
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
148
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
149
150
a0 = tcg_temp_new_ptr();
151
a1 = tcg_temp_new_ptr();
152
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
153
tcg_temp_free_ptr(a1);
154
tcg_temp_free_ptr(a2);
155
tcg_temp_free_ptr(a3);
156
- tcg_temp_free_i32(desc);
157
}
158
159
/* Generate a call to a gvec-style helper with five vector operands
160
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
161
gen_helper_gvec_5_ptr *fn)
162
{
163
TCGv_ptr a0, a1, a2, a3, a4;
164
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
165
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
166
167
a0 = tcg_temp_new_ptr();
168
a1 = tcg_temp_new_ptr();
169
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
170
tcg_temp_free_ptr(a2);
171
tcg_temp_free_ptr(a3);
172
tcg_temp_free_ptr(a4);
173
- tcg_temp_free_i32(desc);
174
}
175
176
/* Return true if we want to implement something of OPRSZ bytes
177
@@ -XXX,XX +XXX,XX @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
178
|| (TCG_TARGET_REG_BITS == 64
179
&& (in_c == 0 || in_c == -1
180
|| !check_size_impl(oprsz, 4)))) {
181
- t_64 = tcg_const_i64(in_c);
182
+ t_64 = tcg_constant_i64(in_c);
183
} else {
184
- t_32 = tcg_const_i32(in_c);
185
+ t_32 = tcg_constant_i32(in_c);
186
}
186
}
187
break;
188
189
- CASE_OP_32_64(not):
190
- CASE_OP_32_64(neg):
191
- CASE_OP_32_64(ext8s):
192
- CASE_OP_32_64(ext8u):
193
- CASE_OP_32_64(ext16s):
194
- CASE_OP_32_64(ext16u):
195
- CASE_OP_32_64(ctpop):
196
- case INDEX_op_ext32s_i64:
197
- case INDEX_op_ext32u_i64:
198
- case INDEX_op_ext_i32_i64:
199
- case INDEX_op_extu_i32_i64:
200
- case INDEX_op_extrl_i64_i32:
201
- case INDEX_op_extrh_i64_i32:
202
- if (arg_is_const(op->args[1])) {
203
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
204
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
205
- continue;
206
- }
207
- break;
208
-
209
CASE_OP_32_64(bswap16):
210
CASE_OP_32_64(bswap32):
211
case INDEX_op_bswap64_i64:
212
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
213
}
214
break;
215
216
- CASE_OP_32_64(add):
217
- CASE_OP_32_64(sub):
218
- CASE_OP_32_64(mul):
219
- CASE_OP_32_64(or):
220
- CASE_OP_32_64(and):
221
- CASE_OP_32_64(xor):
222
- CASE_OP_32_64(shl):
223
- CASE_OP_32_64(shr):
224
- CASE_OP_32_64(sar):
225
- CASE_OP_32_64(rotl):
226
- CASE_OP_32_64(rotr):
227
- CASE_OP_32_64(andc):
228
- CASE_OP_32_64(orc):
229
- CASE_OP_32_64(eqv):
230
- CASE_OP_32_64(nand):
231
- CASE_OP_32_64(nor):
232
- CASE_OP_32_64(muluh):
233
- CASE_OP_32_64(mulsh):
234
- CASE_OP_32_64(div):
235
- CASE_OP_32_64(divu):
236
- CASE_OP_32_64(rem):
237
- CASE_OP_32_64(remu):
238
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
239
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
240
- arg_info(op->args[2])->val);
241
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
242
- continue;
243
- }
244
- break;
245
-
246
CASE_OP_32_64(clz):
247
CASE_OP_32_64(ctz):
248
if (arg_is_const(op->args[1])) {
249
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
250
}
251
break;
252
253
+ default:
254
+ break;
255
+
256
+ /* ---------------------------------------------------------- */
257
+ /* Sorted alphabetically by opcode as much as possible. */
258
+
259
+ CASE_OP_32_64_VEC(add):
260
+ done = fold_add(&ctx, op);
261
+ break;
262
+ CASE_OP_32_64_VEC(and):
263
+ done = fold_and(&ctx, op);
264
+ break;
265
+ CASE_OP_32_64_VEC(andc):
266
+ done = fold_andc(&ctx, op);
267
+ break;
268
+ CASE_OP_32_64(ctpop):
269
+ done = fold_ctpop(&ctx, op);
270
+ break;
271
+ CASE_OP_32_64(div):
272
+ CASE_OP_32_64(divu):
273
+ done = fold_divide(&ctx, op);
274
+ break;
275
+ CASE_OP_32_64(eqv):
276
+ done = fold_eqv(&ctx, op);
277
+ break;
278
+ CASE_OP_32_64(ext8s):
279
+ CASE_OP_32_64(ext16s):
280
+ case INDEX_op_ext32s_i64:
281
+ case INDEX_op_ext_i32_i64:
282
+ done = fold_exts(&ctx, op);
283
+ break;
284
+ CASE_OP_32_64(ext8u):
285
+ CASE_OP_32_64(ext16u):
286
+ case INDEX_op_ext32u_i64:
287
+ case INDEX_op_extu_i32_i64:
288
+ case INDEX_op_extrl_i64_i32:
289
+ case INDEX_op_extrh_i64_i32:
290
+ done = fold_extu(&ctx, op);
291
+ break;
292
case INDEX_op_mb:
293
done = fold_mb(&ctx, op);
294
break;
295
+ CASE_OP_32_64(mul):
296
+ done = fold_mul(&ctx, op);
297
+ break;
298
+ CASE_OP_32_64(mulsh):
299
+ CASE_OP_32_64(muluh):
300
+ done = fold_mul_highpart(&ctx, op);
301
+ break;
302
+ CASE_OP_32_64(nand):
303
+ done = fold_nand(&ctx, op);
304
+ break;
305
+ CASE_OP_32_64(neg):
306
+ done = fold_neg(&ctx, op);
307
+ break;
308
+ CASE_OP_32_64(nor):
309
+ done = fold_nor(&ctx, op);
310
+ break;
311
+ CASE_OP_32_64_VEC(not):
312
+ done = fold_not(&ctx, op);
313
+ break;
314
+ CASE_OP_32_64_VEC(or):
315
+ done = fold_or(&ctx, op);
316
+ break;
317
+ CASE_OP_32_64_VEC(orc):
318
+ done = fold_orc(&ctx, op);
319
+ break;
320
case INDEX_op_qemu_ld_i32:
321
case INDEX_op_qemu_ld_i64:
322
done = fold_qemu_ld(&ctx, op);
323
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
324
case INDEX_op_qemu_st_i64:
325
done = fold_qemu_st(&ctx, op);
326
break;
327
-
328
- default:
329
+ CASE_OP_32_64(rem):
330
+ CASE_OP_32_64(remu):
331
+ done = fold_remainder(&ctx, op);
332
+ break;
333
+ CASE_OP_32_64(rotl):
334
+ CASE_OP_32_64(rotr):
335
+ CASE_OP_32_64(sar):
336
+ CASE_OP_32_64(shl):
337
+ CASE_OP_32_64(shr):
338
+ done = fold_shift(&ctx, op);
339
+ break;
340
+ CASE_OP_32_64_VEC(sub):
341
+ done = fold_sub(&ctx, op);
342
+ break;
343
+ CASE_OP_32_64_VEC(xor):
344
+ done = fold_xor(&ctx, op);
345
break;
187
}
346
}
188
347
189
@@ -XXX,XX +XXX,XX @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
190
/* Otherwise implement out of line. */
191
t_ptr = tcg_temp_new_ptr();
192
tcg_gen_addi_ptr(t_ptr, cpu_env, dofs);
193
- t_desc = tcg_const_i32(simd_desc(oprsz, maxsz, 0));
194
+ t_desc = tcg_constant_i32(simd_desc(oprsz, maxsz, 0));
195
196
if (vece == MO_64) {
197
if (in_64) {
198
gen_helper_gvec_dup64(t_ptr, t_desc, in_64);
199
} else {
200
- t_64 = tcg_const_i64(in_c);
201
+ t_64 = tcg_constant_i64(in_c);
202
gen_helper_gvec_dup64(t_ptr, t_desc, t_64);
203
- tcg_temp_free_i64(t_64);
204
}
205
} else {
206
typedef void dup_fn(TCGv_ptr, TCGv_i32, TCGv_i32);
207
@@ -XXX,XX +XXX,XX @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
208
209
if (in_32) {
210
fns[vece](t_ptr, t_desc, in_32);
211
- } else {
212
+ } else if (in_64) {
213
t_32 = tcg_temp_new_i32();
214
- if (in_64) {
215
- tcg_gen_extrl_i64_i32(t_32, in_64);
216
- } else if (vece == MO_8) {
217
- tcg_gen_movi_i32(t_32, in_c & 0xff);
218
- } else if (vece == MO_16) {
219
- tcg_gen_movi_i32(t_32, in_c & 0xffff);
220
- } else {
221
- tcg_gen_movi_i32(t_32, in_c);
222
- }
223
+ tcg_gen_extrl_i64_i32(t_32, in_64);
224
fns[vece](t_ptr, t_desc, t_32);
225
tcg_temp_free_i32(t_32);
226
+ } else {
227
+ if (vece == MO_8) {
228
+ in_c &= 0xff;
229
+ } else if (vece == MO_16) {
230
+ in_c &= 0xffff;
231
+ }
232
+ t_32 = tcg_constant_i32(in_c);
233
+ fns[vece](t_ptr, t_desc, t_32);
234
}
235
}
236
237
tcg_temp_free_ptr(t_ptr);
238
- tcg_temp_free_i32(t_desc);
239
return;
240
241
done:
242
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
243
if (g->fno) {
244
tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, c, g->fno);
245
} else {
246
- TCGv_i64 tcg_c = tcg_const_i64(c);
247
+ TCGv_i64 tcg_c = tcg_constant_i64(c);
248
tcg_gen_gvec_2i_ool(dofs, aofs, tcg_c, oprsz,
249
maxsz, c, g->fnoi);
250
- tcg_temp_free_i64(tcg_c);
251
}
252
oprsz = maxsz;
253
}
254
@@ -XXX,XX +XXX,XX @@ static void gen_addv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)
255
256
void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
257
{
258
- TCGv_i64 m = tcg_const_i64(dup_const(MO_8, 0x80));
259
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_8, 0x80));
260
gen_addv_mask(d, a, b, m);
261
- tcg_temp_free_i64(m);
262
}
263
264
void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
265
{
266
- TCGv_i64 m = tcg_const_i64(dup_const(MO_16, 0x8000));
267
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
268
gen_addv_mask(d, a, b, m);
269
- tcg_temp_free_i64(m);
270
}
271
272
void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
273
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_adds(unsigned vece, uint32_t dofs, uint32_t aofs,
274
void tcg_gen_gvec_addi(unsigned vece, uint32_t dofs, uint32_t aofs,
275
int64_t c, uint32_t oprsz, uint32_t maxsz)
276
{
277
- TCGv_i64 tmp = tcg_const_i64(c);
278
+ TCGv_i64 tmp = tcg_constant_i64(c);
279
tcg_gen_gvec_adds(vece, dofs, aofs, tmp, oprsz, maxsz);
280
- tcg_temp_free_i64(tmp);
281
}
282
283
static const TCGOpcode vecop_list_sub[] = { INDEX_op_sub_vec, 0 };
284
@@ -XXX,XX +XXX,XX @@ static void gen_subv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)
285
286
void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
287
{
288
- TCGv_i64 m = tcg_const_i64(dup_const(MO_8, 0x80));
289
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_8, 0x80));
290
gen_subv_mask(d, a, b, m);
291
- tcg_temp_free_i64(m);
292
}
293
294
void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
295
{
296
- TCGv_i64 m = tcg_const_i64(dup_const(MO_16, 0x8000));
297
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
298
gen_subv_mask(d, a, b, m);
299
- tcg_temp_free_i64(m);
300
}
301
302
void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
303
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_muls(unsigned vece, uint32_t dofs, uint32_t aofs,
304
void tcg_gen_gvec_muli(unsigned vece, uint32_t dofs, uint32_t aofs,
305
int64_t c, uint32_t oprsz, uint32_t maxsz)
306
{
307
- TCGv_i64 tmp = tcg_const_i64(c);
308
+ TCGv_i64 tmp = tcg_constant_i64(c);
309
tcg_gen_gvec_muls(vece, dofs, aofs, tmp, oprsz, maxsz);
310
- tcg_temp_free_i64(tmp);
311
}
312
313
void tcg_gen_gvec_ssadd(unsigned vece, uint32_t dofs, uint32_t aofs,
314
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_sssub(unsigned vece, uint32_t dofs, uint32_t aofs,
315
316
static void tcg_gen_usadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
317
{
318
- TCGv_i32 max = tcg_const_i32(-1);
319
+ TCGv_i32 max = tcg_constant_i32(-1);
320
tcg_gen_add_i32(d, a, b);
321
tcg_gen_movcond_i32(TCG_COND_LTU, d, d, a, max, d);
322
- tcg_temp_free_i32(max);
323
}
324
325
static void tcg_gen_usadd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
326
{
327
- TCGv_i64 max = tcg_const_i64(-1);
328
+ TCGv_i64 max = tcg_constant_i64(-1);
329
tcg_gen_add_i64(d, a, b);
330
tcg_gen_movcond_i64(TCG_COND_LTU, d, d, a, max, d);
331
- tcg_temp_free_i64(max);
332
}
333
334
void tcg_gen_gvec_usadd(unsigned vece, uint32_t dofs, uint32_t aofs,
335
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_usadd(unsigned vece, uint32_t dofs, uint32_t aofs,
336
337
static void tcg_gen_ussub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
338
{
339
- TCGv_i32 min = tcg_const_i32(0);
340
+ TCGv_i32 min = tcg_constant_i32(0);
341
tcg_gen_sub_i32(d, a, b);
342
tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, min, d);
343
- tcg_temp_free_i32(min);
344
}
345
346
static void tcg_gen_ussub_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
347
{
348
- TCGv_i64 min = tcg_const_i64(0);
349
+ TCGv_i64 min = tcg_constant_i64(0);
350
tcg_gen_sub_i64(d, a, b);
351
tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, min, d);
352
- tcg_temp_free_i64(min);
353
}
354
355
void tcg_gen_gvec_ussub(unsigned vece, uint32_t dofs, uint32_t aofs,
356
@@ -XXX,XX +XXX,XX @@ static void gen_negv_mask(TCGv_i64 d, TCGv_i64 b, TCGv_i64 m)
357
358
void tcg_gen_vec_neg8_i64(TCGv_i64 d, TCGv_i64 b)
359
{
360
- TCGv_i64 m = tcg_const_i64(dup_const(MO_8, 0x80));
361
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_8, 0x80));
362
gen_negv_mask(d, b, m);
363
- tcg_temp_free_i64(m);
364
}
365
366
void tcg_gen_vec_neg16_i64(TCGv_i64 d, TCGv_i64 b)
367
{
368
- TCGv_i64 m = tcg_const_i64(dup_const(MO_16, 0x8000));
369
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
370
gen_negv_mask(d, b, m);
371
- tcg_temp_free_i64(m);
372
}
373
374
void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 b)
375
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
376
void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs,
377
int64_t c, uint32_t oprsz, uint32_t maxsz)
378
{
379
- TCGv_i64 tmp = tcg_const_i64(dup_const(vece, c));
380
+ TCGv_i64 tmp = tcg_constant_i64(dup_const(vece, c));
381
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
382
- tcg_temp_free_i64(tmp);
383
}
384
385
static const GVecGen2s gop_xors = {
386
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
387
void tcg_gen_gvec_xori(unsigned vece, uint32_t dofs, uint32_t aofs,
388
int64_t c, uint32_t oprsz, uint32_t maxsz)
389
{
390
- TCGv_i64 tmp = tcg_const_i64(dup_const(vece, c));
391
+ TCGv_i64 tmp = tcg_constant_i64(dup_const(vece, c));
392
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_xors);
393
- tcg_temp_free_i64(tmp);
394
}
395
396
static const GVecGen2s gop_ors = {
397
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
398
void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs,
399
int64_t c, uint32_t oprsz, uint32_t maxsz)
400
{
401
- TCGv_i64 tmp = tcg_const_i64(dup_const(vece, c));
402
+ TCGv_i64 tmp = tcg_constant_i64(dup_const(vece, c));
403
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ors);
404
- tcg_temp_free_i64(tmp);
405
}
406
407
void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
408
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_shlv_mod_vec(unsigned vece, TCGv_vec d,
409
TCGv_vec a, TCGv_vec b)
410
{
411
TCGv_vec t = tcg_temp_new_vec_matching(d);
412
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);
413
414
- tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
415
- tcg_gen_and_vec(vece, t, t, b);
416
+ tcg_gen_and_vec(vece, t, b, m);
417
tcg_gen_shlv_vec(vece, d, a, t);
418
tcg_temp_free_vec(t);
419
}
420
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_shrv_mod_vec(unsigned vece, TCGv_vec d,
421
TCGv_vec a, TCGv_vec b)
422
{
423
TCGv_vec t = tcg_temp_new_vec_matching(d);
424
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);
425
426
- tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
427
- tcg_gen_and_vec(vece, t, t, b);
428
+ tcg_gen_and_vec(vece, t, b, m);
429
tcg_gen_shrv_vec(vece, d, a, t);
430
tcg_temp_free_vec(t);
431
}
432
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_sarv_mod_vec(unsigned vece, TCGv_vec d,
433
TCGv_vec a, TCGv_vec b)
434
{
435
TCGv_vec t = tcg_temp_new_vec_matching(d);
436
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);
437
438
- tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
439
- tcg_gen_and_vec(vece, t, t, b);
440
+ tcg_gen_and_vec(vece, t, b, m);
441
tcg_gen_sarv_vec(vece, d, a, t);
442
tcg_temp_free_vec(t);
443
}
444
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_rotlv_mod_vec(unsigned vece, TCGv_vec d,
445
TCGv_vec a, TCGv_vec b)
446
{
447
TCGv_vec t = tcg_temp_new_vec_matching(d);
448
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);
449
450
- tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
451
- tcg_gen_and_vec(vece, t, t, b);
452
+ tcg_gen_and_vec(vece, t, b, m);
453
tcg_gen_rotlv_vec(vece, d, a, t);
454
tcg_temp_free_vec(t);
455
}
456
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_rotrv_mod_vec(unsigned vece, TCGv_vec d,
457
TCGv_vec a, TCGv_vec b)
458
{
459
TCGv_vec t = tcg_temp_new_vec_matching(d);
460
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);
461
462
- tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
463
- tcg_gen_and_vec(vece, t, t, b);
464
+ tcg_gen_and_vec(vece, t, b, m);
465
tcg_gen_rotrv_vec(vece, d, a, t);
466
tcg_temp_free_vec(t);
467
}
468
diff --git a/tcg/tcg.c b/tcg/tcg.c
469
index XXXXXXX..XXXXXXX 100644
470
--- a/tcg/tcg.c
471
+++ b/tcg/tcg.c
472
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
473
return temp_tcgv_vec(tcg_constant_internal(type, val));
474
}
475
476
+TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
477
+{
478
+ TCGTemp *t = tcgv_vec_temp(match);
479
+
480
+ tcg_debug_assert(t->temp_allocated != 0);
481
+ return tcg_constant_vec(t->base_type, vece, val);
482
+}
483
+
484
TCGv_i32 tcg_const_i32(int32_t val)
485
{
486
TCGv_i32 t0;
487
--
348
--
488
2.25.1
349
2.25.1
489
350
490
351
diff view generated by jsdifflib
New patch
1
Reduce some code duplication by folding the NE and EQ cases.
1
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 145 ++++++++++++++++++++++++-------------------------
8
1 file changed, 72 insertions(+), 73 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
15
return fold_const2(ctx, op);
16
}
17
18
+static bool fold_setcond2(OptContext *ctx, TCGOp *op)
19
+{
20
+ TCGCond cond = op->args[5];
21
+ int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
22
+ int inv = 0;
23
+
24
+ if (i >= 0) {
25
+ goto do_setcond_const;
26
+ }
27
+
28
+ switch (cond) {
29
+ case TCG_COND_LT:
30
+ case TCG_COND_GE:
31
+ /*
32
+ * Simplify LT/GE comparisons vs zero to a single compare
33
+ * vs the high word of the input.
34
+ */
35
+ if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
36
+ arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
37
+ goto do_setcond_high;
38
+ }
39
+ break;
40
+
41
+ case TCG_COND_NE:
42
+ inv = 1;
43
+ QEMU_FALLTHROUGH;
44
+ case TCG_COND_EQ:
45
+ /*
46
+ * Simplify EQ/NE comparisons where one of the pairs
47
+ * can be simplified.
48
+ */
49
+ i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
50
+ op->args[3], cond);
51
+ switch (i ^ inv) {
52
+ case 0:
53
+ goto do_setcond_const;
54
+ case 1:
55
+ goto do_setcond_high;
56
+ }
57
+
58
+ i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
59
+ op->args[4], cond);
60
+ switch (i ^ inv) {
61
+ case 0:
62
+ goto do_setcond_const;
63
+ case 1:
64
+ op->args[2] = op->args[3];
65
+ op->args[3] = cond;
66
+ op->opc = INDEX_op_setcond_i32;
67
+ break;
68
+ }
69
+ break;
70
+
71
+ default:
72
+ break;
73
+
74
+ do_setcond_high:
75
+ op->args[1] = op->args[2];
76
+ op->args[2] = op->args[4];
77
+ op->args[3] = cond;
78
+ op->opc = INDEX_op_setcond_i32;
79
+ break;
80
+ }
81
+ return false;
82
+
83
+ do_setcond_const:
84
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
85
+}
86
+
87
static bool fold_shift(OptContext *ctx, TCGOp *op)
88
{
89
return fold_const2(ctx, op);
90
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
91
}
92
break;
93
94
- case INDEX_op_setcond2_i32:
95
- i = do_constant_folding_cond2(&op->args[1], &op->args[3],
96
- op->args[5]);
97
- if (i >= 0) {
98
- do_setcond_const:
99
- tcg_opt_gen_movi(&ctx, op, op->args[0], i);
100
- continue;
101
- }
102
- if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
103
- && arg_is_const(op->args[3])
104
- && arg_info(op->args[3])->val == 0
105
- && arg_is_const(op->args[4])
106
- && arg_info(op->args[4])->val == 0) {
107
- /* Simplify LT/GE comparisons vs zero to a single compare
108
- vs the high word of the input. */
109
- do_setcond_high:
110
- reset_temp(op->args[0]);
111
- arg_info(op->args[0])->z_mask = 1;
112
- op->opc = INDEX_op_setcond_i32;
113
- op->args[1] = op->args[2];
114
- op->args[2] = op->args[4];
115
- op->args[3] = op->args[5];
116
- break;
117
- }
118
- if (op->args[5] == TCG_COND_EQ) {
119
- /* Simplify EQ comparisons where one of the pairs
120
- can be simplified. */
121
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
122
- op->args[1], op->args[3],
123
- TCG_COND_EQ);
124
- if (i == 0) {
125
- goto do_setcond_const;
126
- } else if (i > 0) {
127
- goto do_setcond_high;
128
- }
129
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
130
- op->args[2], op->args[4],
131
- TCG_COND_EQ);
132
- if (i == 0) {
133
- goto do_setcond_high;
134
- } else if (i < 0) {
135
- break;
136
- }
137
- do_setcond_low:
138
- reset_temp(op->args[0]);
139
- arg_info(op->args[0])->z_mask = 1;
140
- op->opc = INDEX_op_setcond_i32;
141
- op->args[2] = op->args[3];
142
- op->args[3] = op->args[5];
143
- break;
144
- }
145
- if (op->args[5] == TCG_COND_NE) {
146
- /* Simplify NE comparisons where one of the pairs
147
- can be simplified. */
148
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
149
- op->args[1], op->args[3],
150
- TCG_COND_NE);
151
- if (i == 0) {
152
- goto do_setcond_high;
153
- } else if (i > 0) {
154
- goto do_setcond_const;
155
- }
156
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
157
- op->args[2], op->args[4],
158
- TCG_COND_NE);
159
- if (i == 0) {
160
- goto do_setcond_low;
161
- } else if (i > 0) {
162
- goto do_setcond_const;
163
- }
164
- }
165
- break;
166
-
167
default:
168
break;
169
170
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
171
CASE_OP_32_64(shr):
172
done = fold_shift(&ctx, op);
173
break;
174
+ case INDEX_op_setcond2_i32:
175
+ done = fold_setcond2(&ctx, op);
176
+ break;
177
CASE_OP_32_64_VEC(sub):
178
done = fold_sub(&ctx, op);
179
break;
180
--
181
2.25.1
182
183
diff view generated by jsdifflib
New patch
1
Reduce some code duplication by folding the NE and EQ cases.
1
2
3
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 159 +++++++++++++++++++++++++------------------------
7
1 file changed, 81 insertions(+), 78 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
14
return fold_const2(ctx, op);
15
}
16
17
+static bool fold_brcond2(OptContext *ctx, TCGOp *op)
18
+{
19
+ TCGCond cond = op->args[4];
20
+ int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
21
+ TCGArg label = op->args[5];
22
+ int inv = 0;
23
+
24
+ if (i >= 0) {
25
+ goto do_brcond_const;
26
+ }
27
+
28
+ switch (cond) {
29
+ case TCG_COND_LT:
30
+ case TCG_COND_GE:
31
+ /*
32
+ * Simplify LT/GE comparisons vs zero to a single compare
33
+ * vs the high word of the input.
34
+ */
35
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
36
+ arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
37
+ goto do_brcond_high;
38
+ }
39
+ break;
40
+
41
+ case TCG_COND_NE:
42
+ inv = 1;
43
+ QEMU_FALLTHROUGH;
44
+ case TCG_COND_EQ:
45
+ /*
46
+ * Simplify EQ/NE comparisons where one of the pairs
47
+ * can be simplified.
48
+ */
49
+ i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
50
+ op->args[2], cond);
51
+ switch (i ^ inv) {
52
+ case 0:
53
+ goto do_brcond_const;
54
+ case 1:
55
+ goto do_brcond_high;
56
+ }
57
+
58
+ i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
59
+ op->args[3], cond);
60
+ switch (i ^ inv) {
61
+ case 0:
62
+ goto do_brcond_const;
63
+ case 1:
64
+ op->opc = INDEX_op_brcond_i32;
65
+ op->args[1] = op->args[2];
66
+ op->args[2] = cond;
67
+ op->args[3] = label;
68
+ break;
69
+ }
70
+ break;
71
+
72
+ default:
73
+ break;
74
+
75
+ do_brcond_high:
76
+ op->opc = INDEX_op_brcond_i32;
77
+ op->args[0] = op->args[1];
78
+ op->args[1] = op->args[3];
79
+ op->args[2] = cond;
80
+ op->args[3] = label;
81
+ break;
82
+
83
+ do_brcond_const:
84
+ if (i == 0) {
85
+ tcg_op_remove(ctx->tcg, op);
86
+ return true;
87
+ }
88
+ op->opc = INDEX_op_br;
89
+ op->args[0] = label;
90
+ break;
91
+ }
92
+ return false;
93
+}
94
+
95
static bool fold_call(OptContext *ctx, TCGOp *op)
96
{
97
TCGContext *s = ctx->tcg;
98
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
99
}
100
break;
101
102
- case INDEX_op_brcond2_i32:
103
- i = do_constant_folding_cond2(&op->args[0], &op->args[2],
104
- op->args[4]);
105
- if (i == 0) {
106
- do_brcond_false:
107
- tcg_op_remove(s, op);
108
- continue;
109
- }
110
- if (i > 0) {
111
- do_brcond_true:
112
- op->opc = opc = INDEX_op_br;
113
- op->args[0] = op->args[5];
114
- break;
115
- }
116
- if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
117
- && arg_is_const(op->args[2])
118
- && arg_info(op->args[2])->val == 0
119
- && arg_is_const(op->args[3])
120
- && arg_info(op->args[3])->val == 0) {
121
- /* Simplify LT/GE comparisons vs zero to a single compare
122
- vs the high word of the input. */
123
- do_brcond_high:
124
- op->opc = opc = INDEX_op_brcond_i32;
125
- op->args[0] = op->args[1];
126
- op->args[1] = op->args[3];
127
- op->args[2] = op->args[4];
128
- op->args[3] = op->args[5];
129
- break;
130
- }
131
- if (op->args[4] == TCG_COND_EQ) {
132
- /* Simplify EQ comparisons where one of the pairs
133
- can be simplified. */
134
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
135
- op->args[0], op->args[2],
136
- TCG_COND_EQ);
137
- if (i == 0) {
138
- goto do_brcond_false;
139
- } else if (i > 0) {
140
- goto do_brcond_high;
141
- }
142
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
143
- op->args[1], op->args[3],
144
- TCG_COND_EQ);
145
- if (i == 0) {
146
- goto do_brcond_false;
147
- } else if (i < 0) {
148
- break;
149
- }
150
- do_brcond_low:
151
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
152
- op->opc = INDEX_op_brcond_i32;
153
- op->args[1] = op->args[2];
154
- op->args[2] = op->args[4];
155
- op->args[3] = op->args[5];
156
- break;
157
- }
158
- if (op->args[4] == TCG_COND_NE) {
159
- /* Simplify NE comparisons where one of the pairs
160
- can be simplified. */
161
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
162
- op->args[0], op->args[2],
163
- TCG_COND_NE);
164
- if (i == 0) {
165
- goto do_brcond_high;
166
- } else if (i > 0) {
167
- goto do_brcond_true;
168
- }
169
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
170
- op->args[1], op->args[3],
171
- TCG_COND_NE);
172
- if (i == 0) {
173
- goto do_brcond_low;
174
- } else if (i > 0) {
175
- goto do_brcond_true;
176
- }
177
- }
178
- break;
179
-
180
default:
181
break;
182
183
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
184
CASE_OP_32_64_VEC(andc):
185
done = fold_andc(&ctx, op);
186
break;
187
+ case INDEX_op_brcond2_i32:
188
+ done = fold_brcond2(&ctx, op);
189
+ break;
190
CASE_OP_32_64(ctpop):
191
done = fold_ctpop(&ctx, op);
192
break;
193
--
194
2.25.1
195
196
diff view generated by jsdifflib
New patch
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/optimize.c | 33 +++++++++++++++++++--------------
6
1 file changed, 19 insertions(+), 14 deletions(-)
1
7
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
13
return fold_const2(ctx, op);
14
}
15
16
+static bool fold_brcond(OptContext *ctx, TCGOp *op)
17
+{
18
+ TCGCond cond = op->args[2];
19
+ int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
20
+
21
+ if (i == 0) {
22
+ tcg_op_remove(ctx->tcg, op);
23
+ return true;
24
+ }
25
+ if (i > 0) {
26
+ op->opc = INDEX_op_br;
27
+ op->args[0] = op->args[3];
28
+ }
29
+ return false;
30
+}
31
+
32
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
33
{
34
TCGCond cond = op->args[4];
35
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
36
}
37
break;
38
39
- CASE_OP_32_64(brcond):
40
- i = do_constant_folding_cond(opc, op->args[0],
41
- op->args[1], op->args[2]);
42
- if (i == 0) {
43
- tcg_op_remove(s, op);
44
- continue;
45
- } else if (i > 0) {
46
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
47
- op->opc = opc = INDEX_op_br;
48
- op->args[0] = op->args[3];
49
- break;
50
- }
51
- break;
52
-
53
CASE_OP_32_64(movcond):
54
i = do_constant_folding_cond(opc, op->args[1],
55
op->args[2], op->args[5]);
56
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
57
CASE_OP_32_64_VEC(andc):
58
done = fold_andc(&ctx, op);
59
break;
60
+ CASE_OP_32_64(brcond):
61
+ done = fold_brcond(&ctx, op);
62
+ break;
63
case INDEX_op_brcond2_i32:
64
done = fold_brcond2(&ctx, op);
65
break;
66
--
67
2.25.1
68
69
diff view generated by jsdifflib
New patch
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/optimize.c | 23 ++++++++++++++---------
6
1 file changed, 14 insertions(+), 9 deletions(-)
1
7
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
13
return fold_const2(ctx, op);
14
}
15
16
+static bool fold_setcond(OptContext *ctx, TCGOp *op)
17
+{
18
+ TCGCond cond = op->args[3];
19
+ int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
20
+
21
+ if (i >= 0) {
22
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
23
+ }
24
+ return false;
25
+}
26
+
27
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
28
{
29
TCGCond cond = op->args[5];
30
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
31
}
32
break;
33
34
- CASE_OP_32_64(setcond):
35
- i = do_constant_folding_cond(opc, op->args[1],
36
- op->args[2], op->args[3]);
37
- if (i >= 0) {
38
- tcg_opt_gen_movi(&ctx, op, op->args[0], i);
39
- continue;
40
- }
41
- break;
42
-
43
CASE_OP_32_64(movcond):
44
i = do_constant_folding_cond(opc, op->args[1],
45
op->args[2], op->args[5]);
46
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
47
CASE_OP_32_64(shr):
48
done = fold_shift(&ctx, op);
49
break;
50
+ CASE_OP_32_64(setcond):
51
+ done = fold_setcond(&ctx, op);
52
+ break;
53
case INDEX_op_setcond2_i32:
54
done = fold_setcond2(&ctx, op);
55
break;
56
--
57
2.25.1
58
59
diff view generated by jsdifflib
New patch
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/optimize.c | 37 +++++++++++++++++++++----------------
6
1 file changed, 21 insertions(+), 16 deletions(-)
1
7
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
13
return fold_const2(ctx, op);
14
}
15
16
+static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
17
+{
18
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
19
+ uint32_t a = arg_info(op->args[2])->val;
20
+ uint32_t b = arg_info(op->args[3])->val;
21
+ uint64_t r = (uint64_t)a * b;
22
+ TCGArg rl, rh;
23
+ TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
24
+
25
+ rl = op->args[0];
26
+ rh = op->args[1];
27
+ tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
28
+ tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
29
+ return true;
30
+ }
31
+ return false;
32
+}
33
+
34
static bool fold_nand(OptContext *ctx, TCGOp *op)
35
{
36
return fold_const2(ctx, op);
37
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
38
}
39
break;
40
41
- case INDEX_op_mulu2_i32:
42
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
43
- uint32_t a = arg_info(op->args[2])->val;
44
- uint32_t b = arg_info(op->args[3])->val;
45
- uint64_t r = (uint64_t)a * b;
46
- TCGArg rl, rh;
47
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
48
-
49
- rl = op->args[0];
50
- rh = op->args[1];
51
- tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
52
- tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
53
- continue;
54
- }
55
- break;
56
-
57
default:
58
break;
59
60
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
61
CASE_OP_32_64(muluh):
62
done = fold_mul_highpart(&ctx, op);
63
break;
64
+ case INDEX_op_mulu2_i32:
65
+ done = fold_mulu2_i32(&ctx, op);
66
+ break;
67
CASE_OP_32_64(nand):
68
done = fold_nand(&ctx, op);
69
break;
70
--
71
2.25.1
72
73
diff view generated by jsdifflib
New patch
1
Add two additional helpers, fold_add2_i32 and fold_sub2_i32
2
which will not be simple wrappers forever.
1
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 70 +++++++++++++++++++++++++++++++-------------------
9
1 file changed, 44 insertions(+), 26 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
16
return fold_const2(ctx, op);
17
}
18
19
+static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
20
+{
21
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
22
+ arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
23
+ uint32_t al = arg_info(op->args[2])->val;
24
+ uint32_t ah = arg_info(op->args[3])->val;
25
+ uint32_t bl = arg_info(op->args[4])->val;
26
+ uint32_t bh = arg_info(op->args[5])->val;
27
+ uint64_t a = ((uint64_t)ah << 32) | al;
28
+ uint64_t b = ((uint64_t)bh << 32) | bl;
29
+ TCGArg rl, rh;
30
+ TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
31
+
32
+ if (add) {
33
+ a += b;
34
+ } else {
35
+ a -= b;
36
+ }
37
+
38
+ rl = op->args[0];
39
+ rh = op->args[1];
40
+ tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
41
+ tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
42
+ return true;
43
+ }
44
+ return false;
45
+}
46
+
47
+static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
48
+{
49
+ return fold_addsub2_i32(ctx, op, true);
50
+}
51
+
52
static bool fold_and(OptContext *ctx, TCGOp *op)
53
{
54
return fold_const2(ctx, op);
55
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
56
return fold_const2(ctx, op);
57
}
58
59
+static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
60
+{
61
+ return fold_addsub2_i32(ctx, op, false);
62
+}
63
+
64
static bool fold_xor(OptContext *ctx, TCGOp *op)
65
{
66
return fold_const2(ctx, op);
67
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
68
}
69
break;
70
71
- case INDEX_op_add2_i32:
72
- case INDEX_op_sub2_i32:
73
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
74
- && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
75
- uint32_t al = arg_info(op->args[2])->val;
76
- uint32_t ah = arg_info(op->args[3])->val;
77
- uint32_t bl = arg_info(op->args[4])->val;
78
- uint32_t bh = arg_info(op->args[5])->val;
79
- uint64_t a = ((uint64_t)ah << 32) | al;
80
- uint64_t b = ((uint64_t)bh << 32) | bl;
81
- TCGArg rl, rh;
82
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
83
-
84
- if (opc == INDEX_op_add2_i32) {
85
- a += b;
86
- } else {
87
- a -= b;
88
- }
89
-
90
- rl = op->args[0];
91
- rh = op->args[1];
92
- tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
93
- tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
94
- continue;
95
- }
96
- break;
97
98
default:
99
break;
100
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
101
CASE_OP_32_64_VEC(add):
102
done = fold_add(&ctx, op);
103
break;
104
+ case INDEX_op_add2_i32:
105
+ done = fold_add2_i32(&ctx, op);
106
+ break;
107
CASE_OP_32_64_VEC(and):
108
done = fold_and(&ctx, op);
109
break;
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
CASE_OP_32_64_VEC(sub):
112
done = fold_sub(&ctx, op);
113
break;
114
+ case INDEX_op_sub2_i32:
115
+ done = fold_sub2_i32(&ctx, op);
116
+ break;
117
CASE_OP_32_64_VEC(xor):
118
done = fold_xor(&ctx, op);
119
break;
120
--
121
2.25.1
122
123
diff view generated by jsdifflib
New patch
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/optimize.c | 56 ++++++++++++++++++++++++++++----------------------
6
1 file changed, 31 insertions(+), 25 deletions(-)
1
7
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
13
return true;
14
}
15
16
+static bool fold_movcond(OptContext *ctx, TCGOp *op)
17
+{
18
+ TCGOpcode opc = op->opc;
19
+ TCGCond cond = op->args[5];
20
+ int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
21
+
22
+ if (i >= 0) {
23
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
24
+ }
25
+
26
+ if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
27
+ uint64_t tv = arg_info(op->args[3])->val;
28
+ uint64_t fv = arg_info(op->args[4])->val;
29
+
30
+ opc = (opc == INDEX_op_movcond_i32
31
+ ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
32
+
33
+ if (tv == 1 && fv == 0) {
34
+ op->opc = opc;
35
+ op->args[3] = cond;
36
+ } else if (fv == 1 && tv == 0) {
37
+ op->opc = opc;
38
+ op->args[3] = tcg_invert_cond(cond);
39
+ }
40
+ }
41
+ return false;
42
+}
43
+
44
static bool fold_mul(OptContext *ctx, TCGOp *op)
45
{
46
return fold_const2(ctx, op);
47
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
48
}
49
break;
50
51
- CASE_OP_32_64(movcond):
52
- i = do_constant_folding_cond(opc, op->args[1],
53
- op->args[2], op->args[5]);
54
- if (i >= 0) {
55
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
56
- continue;
57
- }
58
- if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
59
- uint64_t tv = arg_info(op->args[3])->val;
60
- uint64_t fv = arg_info(op->args[4])->val;
61
- TCGCond cond = op->args[5];
62
-
63
- if (fv == 1 && tv == 0) {
64
- cond = tcg_invert_cond(cond);
65
- } else if (!(tv == 1 && fv == 0)) {
66
- break;
67
- }
68
- op->args[3] = cond;
69
- op->opc = opc = (opc == INDEX_op_movcond_i32
70
- ? INDEX_op_setcond_i32
71
- : INDEX_op_setcond_i64);
72
- }
73
- break;
74
-
75
-
76
default:
77
break;
78
79
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
80
case INDEX_op_mb:
81
done = fold_mb(&ctx, op);
82
break;
83
+ CASE_OP_32_64(movcond):
84
+ done = fold_movcond(&ctx, op);
85
+ break;
86
CASE_OP_32_64(mul):
87
done = fold_mul(&ctx, op);
88
break;
89
--
90
2.25.1
91
92
diff view generated by jsdifflib
1
Improve rotrv_vec to reduce "t1 = -v2, t2 = t1 + c" to
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
"t1 = -v, t2 = c - v2". This avoids a serial dependency
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
between t1 and t2.
4
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
4
---
7
tcg/aarch64/tcg-target.c.inc | 10 +++++-----
5
tcg/optimize.c | 39 ++++++++++++++++++++++-----------------
8
1 file changed, 5 insertions(+), 5 deletions(-)
6
1 file changed, 22 insertions(+), 17 deletions(-)
9
7
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/aarch64/tcg-target.c.inc
10
--- a/tcg/optimize.c
13
+++ b/tcg/aarch64/tcg-target.c.inc
11
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
12
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
15
TCGArg a0, ...)
13
return fold_const2(ctx, op);
14
}
15
16
+static bool fold_extract2(OptContext *ctx, TCGOp *op)
17
+{
18
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
19
+ uint64_t v1 = arg_info(op->args[1])->val;
20
+ uint64_t v2 = arg_info(op->args[2])->val;
21
+ int shr = op->args[3];
22
+
23
+ if (op->opc == INDEX_op_extract2_i64) {
24
+ v1 >>= shr;
25
+ v2 <<= 64 - shr;
26
+ } else {
27
+ v1 = (uint32_t)v1 >> shr;
28
+ v2 = (int32_t)v2 << (32 - shr);
29
+ }
30
+ return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
31
+ }
32
+ return false;
33
+}
34
+
35
static bool fold_exts(OptContext *ctx, TCGOp *op)
16
{
36
{
17
va_list va;
37
return fold_const1(ctx, op);
18
- TCGv_vec v0, v1, v2, t1, t2;
38
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
19
+ TCGv_vec v0, v1, v2, t1, t2, c1;
39
}
20
TCGArg a2;
40
break;
21
41
22
va_start(va, a0);
42
- CASE_OP_32_64(extract2):
23
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
43
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
24
44
- uint64_t v1 = arg_info(op->args[1])->val;
25
case INDEX_op_rotlv_vec:
45
- uint64_t v2 = arg_info(op->args[2])->val;
26
t1 = tcg_temp_new_vec(type);
46
- int shr = op->args[3];
27
- tcg_gen_dupi_vec(vece, t1, 8 << vece);
47
-
28
- tcg_gen_sub_vec(vece, t1, v2, t1);
48
- if (opc == INDEX_op_extract2_i64) {
29
+ c1 = tcg_constant_vec(type, vece, 8 << vece);
49
- tmp = (v1 >> shr) | (v2 << (64 - shr));
30
+ tcg_gen_sub_vec(vece, t1, v2, c1);
50
- } else {
31
/* Right shifts are negative left shifts for AArch64. */
51
- tmp = (int32_t)(((uint32_t)v1 >> shr) |
32
vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
52
- ((uint32_t)v2 << (32 - shr)));
33
tcgv_vec_arg(v1), tcgv_vec_arg(t1));
53
- }
34
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
54
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
35
case INDEX_op_rotrv_vec:
55
- continue;
36
t1 = tcg_temp_new_vec(type);
56
- }
37
t2 = tcg_temp_new_vec(type);
57
- break;
38
+ c1 = tcg_constant_vec(type, vece, 8 << vece);
58
-
39
tcg_gen_neg_vec(vece, t1, v2);
59
default:
40
- tcg_gen_dupi_vec(vece, t2, 8 << vece);
60
break;
41
- tcg_gen_add_vec(vece, t2, t1, t2);
61
42
+ tcg_gen_sub_vec(vece, t2, c1, v2);
62
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
43
/* Right shifts are negative left shifts for AArch64. */
63
CASE_OP_32_64(eqv):
44
vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
64
done = fold_eqv(&ctx, op);
45
tcgv_vec_arg(v1), tcgv_vec_arg(t1));
65
break;
66
+ CASE_OP_32_64(extract2):
67
+ done = fold_extract2(&ctx, op);
68
+ break;
69
CASE_OP_32_64(ext8s):
70
CASE_OP_32_64(ext16s):
71
case INDEX_op_ext32s_i64:
46
--
72
--
47
2.25.1
73
2.25.1
48
74
49
75
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
4
---
3
tcg/ppc/tcg-target-constr.h | 37 ++++++++++
5
tcg/optimize.c | 48 ++++++++++++++++++++++++++++++------------------
4
tcg/ppc/tcg-target.c.inc | 135 +++++++++++++++---------------------
6
1 file changed, 30 insertions(+), 18 deletions(-)
5
2 files changed, 94 insertions(+), 78 deletions(-)
6
create mode 100644 tcg/ppc/tcg-target-constr.h
7
7
8
diff --git a/tcg/ppc/tcg-target-constr.h b/tcg/ppc/tcg-target-constr.h
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
new file mode 100644
9
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX
10
--- a/tcg/optimize.c
11
--- /dev/null
11
+++ b/tcg/optimize.c
12
+++ b/tcg/ppc/tcg-target-constr.h
12
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
13
@@ -XXX,XX +XXX,XX @@
13
return fold_const2(ctx, op);
14
+/* SPDX-License-Identifier: GPL-2.0-or-later */
14
}
15
+/*
15
16
+ * PowerPC target-specific operand constaints.
16
+static bool fold_extract(OptContext *ctx, TCGOp *op)
17
+ * Copyright (c) 2020 Linaro
17
+{
18
+ */
18
+ if (arg_is_const(op->args[1])) {
19
+ uint64_t t;
19
+
20
+
20
+C_O0_I1(r)
21
+ t = arg_info(op->args[1])->val;
21
+C_O0_I2(r, r)
22
+ t = extract64(t, op->args[2], op->args[3]);
22
+C_O0_I2(r, ri)
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
23
+C_O0_I2(S, S)
24
+ }
24
+C_O0_I2(v, r)
25
+ return false;
25
+C_O0_I3(S, S, S)
26
+}
26
+C_O0_I4(r, r, ri, ri)
27
+
27
+C_O0_I4(S, S, S, S)
28
static bool fold_extract2(OptContext *ctx, TCGOp *op)
28
+C_O1_I1(r, L)
29
{
29
+C_O1_I1(r, r)
30
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
30
+C_O1_I1(v, r)
31
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
31
+C_O1_I1(v, v)
32
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
32
+C_O1_I1(v, vr)
33
+C_O1_I2(r, 0, rZ)
34
+C_O1_I2(r, L, L)
35
+C_O1_I2(r, rI, ri)
36
+C_O1_I2(r, rI, rT)
37
+C_O1_I2(r, r, r)
38
+C_O1_I2(r, r, ri)
39
+C_O1_I2(r, r, rI)
40
+C_O1_I2(r, r, rT)
41
+C_O1_I2(r, r, rU)
42
+C_O1_I2(r, r, rZW)
43
+C_O1_I2(v, v, v)
44
+C_O1_I3(v, v, v, v)
45
+C_O1_I4(r, r, ri, rZ, rZ)
46
+C_O1_I4(r, r, r, ri, ri)
47
+C_O2_I1(L, L, L)
48
+C_O2_I2(L, L, L, L)
49
+C_O2_I4(r, r, rI, rZM, r, r)
50
+C_O2_I4(r, r, r, r, rI, rZM)
51
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
52
index XXXXXXX..XXXXXXX 100644
53
--- a/tcg/ppc/tcg-target.c.inc
54
+++ b/tcg/ppc/tcg-target.c.inc
55
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
56
va_end(va);
57
}
33
}
58
34
59
+/* Define all constraint sets. */
35
+static bool fold_sextract(OptContext *ctx, TCGOp *op)
60
+#include "../tcg-constr.c.inc"
36
+{
37
+ if (arg_is_const(op->args[1])) {
38
+ uint64_t t;
61
+
39
+
62
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
40
+ t = arg_info(op->args[1])->val;
41
+ t = sextract64(t, op->args[2], op->args[3]);
42
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
43
+ }
44
+ return false;
45
+}
46
+
47
static bool fold_shift(OptContext *ctx, TCGOp *op)
63
{
48
{
64
- static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
49
return fold_const2(ctx, op);
65
- static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
50
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
66
- static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
51
}
67
- static const TCGTargetOpDef S_S = { .args_ct_str = { "S", "S" } };
52
break;
68
- static const TCGTargetOpDef r_ri = { .args_ct_str = { "r", "ri" } };
53
69
- static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
54
- CASE_OP_32_64(extract):
70
- static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
55
- if (arg_is_const(op->args[1])) {
71
- static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
56
- tmp = extract64(arg_info(op->args[1])->val,
72
- static const TCGTargetOpDef S_S_S = { .args_ct_str = { "S", "S", "S" } };
57
- op->args[2], op->args[3]);
73
- static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
58
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
74
- static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } };
59
- continue;
75
- static const TCGTargetOpDef r_r_rT = { .args_ct_str = { "r", "r", "rT" } };
60
- }
76
- static const TCGTargetOpDef r_r_rU = { .args_ct_str = { "r", "r", "rU" } };
61
- break;
77
- static const TCGTargetOpDef r_rI_ri
78
- = { .args_ct_str = { "r", "rI", "ri" } };
79
- static const TCGTargetOpDef r_rI_rT
80
- = { .args_ct_str = { "r", "rI", "rT" } };
81
- static const TCGTargetOpDef r_r_rZW
82
- = { .args_ct_str = { "r", "r", "rZW" } };
83
- static const TCGTargetOpDef L_L_L_L
84
- = { .args_ct_str = { "L", "L", "L", "L" } };
85
- static const TCGTargetOpDef S_S_S_S
86
- = { .args_ct_str = { "S", "S", "S", "S" } };
87
- static const TCGTargetOpDef movc
88
- = { .args_ct_str = { "r", "r", "ri", "rZ", "rZ" } };
89
- static const TCGTargetOpDef dep
90
- = { .args_ct_str = { "r", "0", "rZ" } };
91
- static const TCGTargetOpDef br2
92
- = { .args_ct_str = { "r", "r", "ri", "ri" } };
93
- static const TCGTargetOpDef setc2
94
- = { .args_ct_str = { "r", "r", "r", "ri", "ri" } };
95
- static const TCGTargetOpDef add2
96
- = { .args_ct_str = { "r", "r", "r", "r", "rI", "rZM" } };
97
- static const TCGTargetOpDef sub2
98
- = { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } };
99
- static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
100
- static const TCGTargetOpDef v_vr = { .args_ct_str = { "v", "vr" } };
101
- static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
102
- static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
103
- static const TCGTargetOpDef v_v_v_v
104
- = { .args_ct_str = { "v", "v", "v", "v" } };
105
-
62
-
106
switch (op) {
63
- CASE_OP_32_64(sextract):
107
case INDEX_op_goto_ptr:
64
- if (arg_is_const(op->args[1])) {
108
- return &r;
65
- tmp = sextract64(arg_info(op->args[1])->val,
109
+ return C_O0_I1(r);
66
- op->args[2], op->args[3]);
110
67
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
111
case INDEX_op_ld8u_i32:
68
- continue;
112
case INDEX_op_ld8s_i32:
69
- }
113
case INDEX_op_ld16u_i32:
70
- break;
114
case INDEX_op_ld16s_i32:
71
-
115
case INDEX_op_ld_i32:
72
default:
116
- case INDEX_op_st8_i32:
73
break;
117
- case INDEX_op_st16_i32:
74
118
- case INDEX_op_st_i32:
75
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
119
case INDEX_op_ctpop_i32:
76
CASE_OP_32_64(eqv):
120
case INDEX_op_neg_i32:
77
done = fold_eqv(&ctx, op);
121
case INDEX_op_not_i32:
78
break;
122
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
79
+ CASE_OP_32_64(extract):
123
case INDEX_op_ld32u_i64:
80
+ done = fold_extract(&ctx, op);
124
case INDEX_op_ld32s_i64:
81
+ break;
125
case INDEX_op_ld_i64:
82
CASE_OP_32_64(extract2):
126
- case INDEX_op_st8_i64:
83
done = fold_extract2(&ctx, op);
127
- case INDEX_op_st16_i64:
84
break;
128
- case INDEX_op_st32_i64:
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
129
- case INDEX_op_st_i64:
86
case INDEX_op_setcond2_i32:
130
case INDEX_op_ctpop_i64:
87
done = fold_setcond2(&ctx, op);
131
case INDEX_op_neg_i64:
88
break;
132
case INDEX_op_not_i64:
89
+ CASE_OP_32_64(sextract):
133
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
90
+ done = fold_sextract(&ctx, op);
134
case INDEX_op_bswap32_i64:
91
+ break;
135
case INDEX_op_bswap64_i64:
92
CASE_OP_32_64_VEC(sub):
136
case INDEX_op_extract_i64:
93
done = fold_sub(&ctx, op);
137
- return &r_r;
94
break;
138
+ return C_O1_I1(r, r);
139
+
140
+ case INDEX_op_st8_i32:
141
+ case INDEX_op_st16_i32:
142
+ case INDEX_op_st_i32:
143
+ case INDEX_op_st8_i64:
144
+ case INDEX_op_st16_i64:
145
+ case INDEX_op_st32_i64:
146
+ case INDEX_op_st_i64:
147
+ return C_O0_I2(r, r);
148
149
case INDEX_op_add_i32:
150
case INDEX_op_and_i32:
151
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
152
case INDEX_op_rotl_i64:
153
case INDEX_op_rotr_i64:
154
case INDEX_op_setcond_i64:
155
- return &r_r_ri;
156
+ return C_O1_I2(r, r, ri);
157
+
158
case INDEX_op_mul_i32:
159
case INDEX_op_mul_i64:
160
- return &r_r_rI;
161
+ return C_O1_I2(r, r, rI);
162
+
163
case INDEX_op_div_i32:
164
case INDEX_op_divu_i32:
165
case INDEX_op_nand_i32:
166
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
167
case INDEX_op_divu_i64:
168
case INDEX_op_mulsh_i64:
169
case INDEX_op_muluh_i64:
170
- return &r_r_r;
171
+ return C_O1_I2(r, r, r);
172
+
173
case INDEX_op_sub_i32:
174
- return &r_rI_ri;
175
+ return C_O1_I2(r, rI, ri);
176
case INDEX_op_add_i64:
177
- return &r_r_rT;
178
+ return C_O1_I2(r, r, rT);
179
case INDEX_op_or_i64:
180
case INDEX_op_xor_i64:
181
- return &r_r_rU;
182
+ return C_O1_I2(r, r, rU);
183
case INDEX_op_sub_i64:
184
- return &r_rI_rT;
185
+ return C_O1_I2(r, rI, rT);
186
case INDEX_op_clz_i32:
187
case INDEX_op_ctz_i32:
188
case INDEX_op_clz_i64:
189
case INDEX_op_ctz_i64:
190
- return &r_r_rZW;
191
+ return C_O1_I2(r, r, rZW);
192
193
case INDEX_op_brcond_i32:
194
case INDEX_op_brcond_i64:
195
- return &r_ri;
196
+ return C_O0_I2(r, ri);
197
198
case INDEX_op_movcond_i32:
199
case INDEX_op_movcond_i64:
200
- return &movc;
201
+ return C_O1_I4(r, r, ri, rZ, rZ);
202
case INDEX_op_deposit_i32:
203
case INDEX_op_deposit_i64:
204
- return &dep;
205
+ return C_O1_I2(r, 0, rZ);
206
case INDEX_op_brcond2_i32:
207
- return &br2;
208
+ return C_O0_I4(r, r, ri, ri);
209
case INDEX_op_setcond2_i32:
210
- return &setc2;
211
+ return C_O1_I4(r, r, r, ri, ri);
212
case INDEX_op_add2_i64:
213
case INDEX_op_add2_i32:
214
- return &add2;
215
+ return C_O2_I4(r, r, r, r, rI, rZM);
216
case INDEX_op_sub2_i64:
217
case INDEX_op_sub2_i32:
218
- return &sub2;
219
+ return C_O2_I4(r, r, rI, rZM, r, r);
220
221
case INDEX_op_qemu_ld_i32:
222
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
223
- ? &r_L : &r_L_L);
224
+ ? C_O1_I1(r, L)
225
+ : C_O1_I2(r, L, L));
226
+
227
case INDEX_op_qemu_st_i32:
228
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
229
- ? &S_S : &S_S_S);
230
+ ? C_O0_I2(S, S)
231
+ : C_O0_I3(S, S, S));
232
+
233
case INDEX_op_qemu_ld_i64:
234
- return (TCG_TARGET_REG_BITS == 64 ? &r_L
235
- : TARGET_LONG_BITS == 32 ? &L_L_L : &L_L_L_L);
236
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
237
+ : TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L)
238
+ : C_O2_I2(L, L, L, L));
239
+
240
case INDEX_op_qemu_st_i64:
241
- return (TCG_TARGET_REG_BITS == 64 ? &S_S
242
- : TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S);
243
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S)
244
+ : TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S)
245
+ : C_O0_I4(S, S, S, S));
246
247
case INDEX_op_add_vec:
248
case INDEX_op_sub_vec:
249
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
250
case INDEX_op_ppc_mulou_vec:
251
case INDEX_op_ppc_pkum_vec:
252
case INDEX_op_dup2_vec:
253
- return &v_v_v;
254
+ return C_O1_I2(v, v, v);
255
+
256
case INDEX_op_not_vec:
257
case INDEX_op_neg_vec:
258
- return &v_v;
259
+ return C_O1_I1(v, v);
260
+
261
case INDEX_op_dup_vec:
262
- return have_isa_3_00 ? &v_vr : &v_v;
263
+ return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
264
+
265
case INDEX_op_ld_vec:
266
- case INDEX_op_st_vec:
267
case INDEX_op_dupm_vec:
268
- return &v_r;
269
+ return C_O1_I1(v, r);
270
+
271
+ case INDEX_op_st_vec:
272
+ return C_O0_I2(v, r);
273
+
274
case INDEX_op_bitsel_vec:
275
case INDEX_op_ppc_msum_vec:
276
- return &v_v_v_v;
277
+ return C_O1_I3(v, v, v, v);
278
279
default:
280
return NULL;
281
--
95
--
282
2.25.1
96
2.25.1
283
97
284
98
diff view generated by jsdifflib
1
This will reduce the differences between 32-bit and 64-bit hosts,
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
allowing full 64-bit constants to be created with the same interface.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
4
---
6
include/tcg/tcg.h | 2 +-
5
tcg/optimize.c | 25 +++++++++++++++----------
7
tcg/tcg.c | 2 +-
6
1 file changed, 15 insertions(+), 10 deletions(-)
8
2 files changed, 2 insertions(+), 2 deletions(-)
9
7
10
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
12
--- a/include/tcg/tcg.h
10
--- a/tcg/optimize.c
13
+++ b/include/tcg/tcg.h
11
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ typedef struct TCGTemp {
12
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
15
unsigned int mem_allocated:1;
13
return fold_const1(ctx, op);
16
unsigned int temp_allocated:1;
14
}
17
15
18
- tcg_target_long val;
16
+static bool fold_deposit(OptContext *ctx, TCGOp *op)
19
+ int64_t val;
17
+{
20
struct TCGTemp *mem_base;
18
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
21
intptr_t mem_offset;
19
+ uint64_t t1 = arg_info(op->args[1])->val;
22
const char *name;
20
+ uint64_t t2 = arg_info(op->args[2])->val;
23
diff --git a/tcg/tcg.c b/tcg/tcg.c
21
+
24
index XXXXXXX..XXXXXXX 100644
22
+ t1 = deposit64(t1, op->args[3], op->args[4], t2);
25
--- a/tcg/tcg.c
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
26
+++ b/tcg/tcg.c
24
+ }
27
@@ -XXX,XX +XXX,XX @@ static void dump_regs(TCGContext *s)
25
+ return false;
28
tcg_target_reg_names[ts->mem_base->reg]);
26
+}
27
+
28
static bool fold_divide(OptContext *ctx, TCGOp *op)
29
{
30
return fold_const2(ctx, op);
31
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
32
}
29
break;
33
break;
30
case TEMP_VAL_CONST:
34
31
- printf("$0x%" TCG_PRIlx, ts->val);
35
- CASE_OP_32_64(deposit):
32
+ printf("$0x%" PRIx64, ts->val);
36
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
37
- tmp = deposit64(arg_info(op->args[1])->val,
38
- op->args[3], op->args[4],
39
- arg_info(op->args[2])->val);
40
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
41
- continue;
42
- }
43
- break;
44
-
45
default:
33
break;
46
break;
34
case TEMP_VAL_DEAD:
47
35
printf("D");
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
CASE_OP_32_64(ctpop):
50
done = fold_ctpop(&ctx, op);
51
break;
52
+ CASE_OP_32_64(deposit):
53
+ done = fold_deposit(&ctx, op);
54
+ break;
55
CASE_OP_32_64(div):
56
CASE_OP_32_64(divu):
57
done = fold_divide(&ctx, op);
36
--
58
--
37
2.25.1
59
2.25.1
38
60
39
61
diff view generated by jsdifflib
1
The cmp_vec opcode is mandatory; this symbol is unused.
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
4
---
5
tcg/aarch64/tcg-target.h | 1 -
5
tcg/optimize.c | 32 ++++++++++++++++++--------------
6
tcg/i386/tcg-target.h | 1 -
6
1 file changed, 18 insertions(+), 14 deletions(-)
7
tcg/ppc/tcg-target.h | 1 -
8
3 files changed, 3 deletions(-)
9
7
10
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/aarch64/tcg-target.h
10
--- a/tcg/optimize.c
13
+++ b/tcg/aarch64/tcg-target.h
11
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ typedef enum {
12
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
15
#define TCG_TARGET_HAS_shi_vec 1
13
return true;
16
#define TCG_TARGET_HAS_shs_vec 0
14
}
17
#define TCG_TARGET_HAS_shv_vec 1
15
18
-#define TCG_TARGET_HAS_cmp_vec 1
16
+static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
19
#define TCG_TARGET_HAS_mul_vec 1
17
+{
20
#define TCG_TARGET_HAS_sat_vec 1
18
+ if (arg_is_const(op->args[1])) {
21
#define TCG_TARGET_HAS_minmax_vec 1
19
+ uint64_t t = arg_info(op->args[1])->val;
22
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
20
+
23
index XXXXXXX..XXXXXXX 100644
21
+ if (t != 0) {
24
--- a/tcg/i386/tcg-target.h
22
+ t = do_constant_folding(op->opc, t, 0);
25
+++ b/tcg/i386/tcg-target.h
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
26
@@ -XXX,XX +XXX,XX @@ extern bool have_avx2;
24
+ }
27
#define TCG_TARGET_HAS_shi_vec 1
25
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
28
#define TCG_TARGET_HAS_shs_vec 1
26
+ }
29
#define TCG_TARGET_HAS_shv_vec have_avx2
27
+ return false;
30
-#define TCG_TARGET_HAS_cmp_vec 1
28
+}
31
#define TCG_TARGET_HAS_mul_vec 1
29
+
32
#define TCG_TARGET_HAS_sat_vec 1
30
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
33
#define TCG_TARGET_HAS_minmax_vec 1
31
{
34
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
32
return fold_const1(ctx, op);
35
index XXXXXXX..XXXXXXX 100644
33
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
36
--- a/tcg/ppc/tcg-target.h
34
}
37
+++ b/tcg/ppc/tcg-target.h
35
break;
38
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
36
39
#define TCG_TARGET_HAS_shi_vec 0
37
- CASE_OP_32_64(clz):
40
#define TCG_TARGET_HAS_shs_vec 0
38
- CASE_OP_32_64(ctz):
41
#define TCG_TARGET_HAS_shv_vec 1
39
- if (arg_is_const(op->args[1])) {
42
-#define TCG_TARGET_HAS_cmp_vec 1
40
- TCGArg v = arg_info(op->args[1])->val;
43
#define TCG_TARGET_HAS_mul_vec 1
41
- if (v != 0) {
44
#define TCG_TARGET_HAS_sat_vec 1
42
- tmp = do_constant_folding(opc, v, 0);
45
#define TCG_TARGET_HAS_minmax_vec 1
43
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
44
- } else {
45
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
46
- }
47
- continue;
48
- }
49
- break;
50
-
51
default:
52
break;
53
54
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
55
case INDEX_op_brcond2_i32:
56
done = fold_brcond2(&ctx, op);
57
break;
58
+ CASE_OP_32_64(clz):
59
+ CASE_OP_32_64(ctz):
60
+ done = fold_count_zeros(&ctx, op);
61
+ break;
62
CASE_OP_32_64(ctpop):
63
done = fold_ctpop(&ctx, op);
64
break;
46
--
65
--
47
2.25.1
66
2.25.1
48
67
49
68
diff view generated by jsdifflib
1
When the two arguments are identical, this can be reduced to
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
dup_vec or to mov_vec from a tcg_constant_vec.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
4
---
6
tcg/optimize.c | 15 +++++++++++++++
5
tcg/optimize.c | 27 ++++++++++++++++-----------
7
1 file changed, 15 insertions(+)
6
1 file changed, 16 insertions(+), 11 deletions(-)
8
7
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
10
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
13
return false;
14
}
15
16
+static bool fold_bswap(OptContext *ctx, TCGOp *op)
17
+{
18
+ if (arg_is_const(op->args[1])) {
19
+ uint64_t t = arg_info(op->args[1])->val;
20
+
21
+ t = do_constant_folding(op->opc, t, op->args[2]);
22
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
23
+ }
24
+ return false;
25
+}
26
+
27
static bool fold_call(OptContext *ctx, TCGOp *op)
28
{
29
TCGContext *s = ctx->tcg;
13
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
30
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
14
}
31
}
15
goto do_default;
32
break;
16
33
17
+ case INDEX_op_dup2_vec:
34
- CASE_OP_32_64(bswap16):
18
+ assert(TCG_TARGET_REG_BITS == 32);
35
- CASE_OP_32_64(bswap32):
19
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
36
- case INDEX_op_bswap64_i64:
20
+ tmp = arg_info(op->args[1])->val;
37
- if (arg_is_const(op->args[1])) {
21
+ if (tmp == arg_info(op->args[2])->val) {
38
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
22
+ tcg_opt_gen_movi(s, op, op->args[0], tmp);
39
- op->args[2]);
23
+ break;
40
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
24
+ }
41
- continue;
25
+ } else if (args_are_copies(op->args[1], op->args[2])) {
42
- }
26
+ op->opc = INDEX_op_dup_vec;
43
- break;
27
+ TCGOP_VECE(op) = MO_32;
44
-
28
+ nb_iargs = 1;
45
default:
29
+ }
46
break;
30
+ goto do_default;
47
31
+
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
32
CASE_OP_32_64(not):
49
case INDEX_op_brcond2_i32:
33
CASE_OP_32_64(neg):
50
done = fold_brcond2(&ctx, op);
34
CASE_OP_32_64(ext8s):
51
break;
52
+ CASE_OP_32_64(bswap16):
53
+ CASE_OP_32_64(bswap32):
54
+ case INDEX_op_bswap64_i64:
55
+ done = fold_bswap(&ctx, op);
56
+ break;
57
CASE_OP_32_64(clz):
58
CASE_OP_32_64(ctz):
59
done = fold_count_zeros(&ctx, op);
35
--
60
--
36
2.25.1
61
2.25.1
37
62
38
63
diff view generated by jsdifflib
1
With larger vector sizes, it turns out oprsz == maxsz, and we only
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
need to represent mismatch for oprsz <= 32. We do, however, need
3
to represent larger oprsz and do so without reducing SIMD_DATA_BITS.
4
5
Reduce the size of the oprsz field and increase the maxsz field.
6
Steal the oprsz value of 24 to indicate equality with maxsz.
7
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
4
---
11
include/tcg/tcg-gvec-desc.h | 38 ++++++++++++++++++++++++-------------
5
tcg/optimize.c | 53 +++++++++++++++++++++++++++++---------------------
12
tcg/tcg-op-gvec.c | 35 ++++++++++++++++++++++++++--------
6
1 file changed, 31 insertions(+), 22 deletions(-)
13
2 files changed, 52 insertions(+), 21 deletions(-)
14
7
15
diff --git a/include/tcg/tcg-gvec-desc.h b/include/tcg/tcg-gvec-desc.h
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
16
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
17
--- a/include/tcg/tcg-gvec-desc.h
10
--- a/tcg/optimize.c
18
+++ b/include/tcg/tcg-gvec-desc.h
11
+++ b/tcg/optimize.c
19
@@ -XXX,XX +XXX,XX @@
12
@@ -XXX,XX +XXX,XX @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
20
#ifndef TCG_TCG_GVEC_DESC_H
13
return fold_const2(ctx, op);
21
#define TCG_TCG_GVEC_DESC_H
14
}
22
15
23
-/* ??? These bit widths are set for ARM SVE, maxing out at 256 byte vectors. */
16
+static bool fold_dup(OptContext *ctx, TCGOp *op)
24
-#define SIMD_OPRSZ_SHIFT 0
17
+{
25
-#define SIMD_OPRSZ_BITS 5
18
+ if (arg_is_const(op->args[1])) {
26
+/*
19
+ uint64_t t = arg_info(op->args[1])->val;
27
+ * This configuration allows MAXSZ to represent 2048 bytes, and
20
+ t = dup_const(TCGOP_VECE(op), t);
28
+ * OPRSZ to match MAXSZ, or represent the smaller values 8, 16, or 32.
21
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
29
+ *
22
+ }
30
+ * Encode this with:
23
+ return false;
31
+ * 0, 1, 3 -> 8, 16, 32
32
+ * 2 -> maxsz
33
+ *
34
+ * This steals the input that would otherwise map to 24 to match maxsz.
35
+ */
36
+#define SIMD_MAXSZ_SHIFT 0
37
+#define SIMD_MAXSZ_BITS 8
38
39
-#define SIMD_MAXSZ_SHIFT (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS)
40
-#define SIMD_MAXSZ_BITS 5
41
+#define SIMD_OPRSZ_SHIFT (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS)
42
+#define SIMD_OPRSZ_BITS 2
43
44
-#define SIMD_DATA_SHIFT (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS)
45
+#define SIMD_DATA_SHIFT (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS)
46
#define SIMD_DATA_BITS (32 - SIMD_DATA_SHIFT)
47
48
/* Create a descriptor from components. */
49
uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data);
50
51
-/* Extract the operation size from a descriptor. */
52
-static inline intptr_t simd_oprsz(uint32_t desc)
53
-{
54
- return (extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS) + 1) * 8;
55
-}
56
-
57
/* Extract the max vector size from a descriptor. */
58
static inline intptr_t simd_maxsz(uint32_t desc)
59
{
60
- return (extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) + 1) * 8;
61
+ return extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) * 8 + 8;
62
+}
24
+}
63
+
25
+
64
+/* Extract the operation size from a descriptor. */
26
+static bool fold_dup2(OptContext *ctx, TCGOp *op)
65
+static inline intptr_t simd_oprsz(uint32_t desc)
66
+{
27
+{
67
+ uint32_t f = extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS);
28
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
68
+ intptr_t o = f * 8 + 8;
29
+ uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
69
+ intptr_t m = simd_maxsz(desc);
30
+ arg_info(op->args[2])->val);
70
+ return f == 2 ? m : o;
31
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
71
}
72
73
/* Extract the operation-specific data from a descriptor. */
74
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/tcg/tcg-op-gvec.c
77
+++ b/tcg/tcg-op-gvec.c
78
@@ -XXX,XX +XXX,XX @@ static const TCGOpcode vecop_list_empty[1] = { 0 };
79
of the operand offsets so that we can check them all at once. */
80
static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs)
81
{
82
- uint32_t opr_align = oprsz >= 16 ? 15 : 7;
83
- uint32_t max_align = maxsz >= 16 || oprsz >= 16 ? 15 : 7;
84
- tcg_debug_assert(oprsz > 0);
85
- tcg_debug_assert(oprsz <= maxsz);
86
- tcg_debug_assert((oprsz & opr_align) == 0);
87
+ uint32_t max_align;
88
+
89
+ switch (oprsz) {
90
+ case 8:
91
+ case 16:
92
+ case 32:
93
+ tcg_debug_assert(oprsz <= maxsz);
94
+ break;
95
+ default:
96
+ tcg_debug_assert(oprsz == maxsz);
97
+ break;
98
+ }
99
+ tcg_debug_assert(maxsz <= (8 << SIMD_MAXSZ_BITS));
100
+
101
+ max_align = maxsz >= 16 ? 15 : 7;
102
tcg_debug_assert((maxsz & max_align) == 0);
103
tcg_debug_assert((ofs & max_align) == 0);
104
}
105
@@ -XXX,XX +XXX,XX @@ uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data)
106
{
107
uint32_t desc = 0;
108
109
- assert(oprsz % 8 == 0 && oprsz <= (8 << SIMD_OPRSZ_BITS));
110
- assert(maxsz % 8 == 0 && maxsz <= (8 << SIMD_MAXSZ_BITS));
111
- assert(data == sextract32(data, 0, SIMD_DATA_BITS));
112
+ check_size_align(oprsz, maxsz, 0);
113
+ tcg_debug_assert(data == sextract32(data, 0, SIMD_DATA_BITS));
114
115
oprsz = (oprsz / 8) - 1;
116
maxsz = (maxsz / 8) - 1;
117
+
118
+ /*
119
+ * We have just asserted in check_size_align that either
120
+ * oprsz is {8,16,32} or matches maxsz. Encode the final
121
+ * case with '2', as that would otherwise map to 24.
122
+ */
123
+ if (oprsz == maxsz) {
124
+ oprsz = 2;
125
+ }
32
+ }
126
+
33
+
127
desc = deposit32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS, oprsz);
34
+ if (args_are_copies(op->args[1], op->args[2])) {
128
desc = deposit32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS, maxsz);
35
+ op->opc = INDEX_op_dup_vec;
129
desc = deposit32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS, data);
36
+ TCGOP_VECE(op) = MO_32;
37
+ }
38
+ return false;
39
+}
40
+
41
static bool fold_eqv(OptContext *ctx, TCGOp *op)
42
{
43
return fold_const2(ctx, op);
44
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
45
done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
46
break;
47
48
- case INDEX_op_dup_vec:
49
- if (arg_is_const(op->args[1])) {
50
- tmp = arg_info(op->args[1])->val;
51
- tmp = dup_const(TCGOP_VECE(op), tmp);
52
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
53
- continue;
54
- }
55
- break;
56
-
57
- case INDEX_op_dup2_vec:
58
- assert(TCG_TARGET_REG_BITS == 32);
59
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
60
- tcg_opt_gen_movi(&ctx, op, op->args[0],
61
- deposit64(arg_info(op->args[1])->val, 32, 32,
62
- arg_info(op->args[2])->val));
63
- continue;
64
- } else if (args_are_copies(op->args[1], op->args[2])) {
65
- op->opc = INDEX_op_dup_vec;
66
- TCGOP_VECE(op) = MO_32;
67
- }
68
- break;
69
-
70
default:
71
break;
72
73
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
74
CASE_OP_32_64(divu):
75
done = fold_divide(&ctx, op);
76
break;
77
+ case INDEX_op_dup_vec:
78
+ done = fold_dup(&ctx, op);
79
+ break;
80
+ case INDEX_op_dup2_vec:
81
+ done = fold_dup2(&ctx, op);
82
+ break;
83
CASE_OP_32_64(eqv):
84
done = fold_eqv(&ctx, op);
85
break;
130
--
86
--
131
2.25.1
87
2.25.1
132
88
133
89
diff view generated by jsdifflib
1
The last user of this field disappeared in f69d277ece4.
1
This is the final entry in the main switch that was in a
2
different form. After this, we have the option to convert
3
the switch into a function dispatch table.
2
4
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
8
---
5
include/tcg/tcg.h | 3 ---
9
tcg/optimize.c | 27 ++++++++++++++-------------
6
1 file changed, 3 deletions(-)
10
1 file changed, 14 insertions(+), 13 deletions(-)
7
11
8
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
10
--- a/include/tcg/tcg.h
14
--- a/tcg/optimize.c
11
+++ b/include/tcg/tcg.h
15
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ typedef struct TCGOpDef {
16
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
13
uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
17
return true;
14
uint8_t flags;
18
}
15
TCGArgConstraint *args_ct;
19
16
-#if defined(CONFIG_DEBUG_TCG)
20
+static bool fold_mov(OptContext *ctx, TCGOp *op)
17
- int used;
21
+{
18
-#endif
22
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
19
} TCGOpDef;
23
+}
20
24
+
21
extern TCGOpDef tcg_op_defs[];
25
static bool fold_movcond(OptContext *ctx, TCGOp *op)
26
{
27
TCGOpcode opc = op->opc;
28
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
29
break;
30
}
31
32
- /* Propagate constants through copy operations and do constant
33
- folding. Constants will be substituted to arguments by register
34
- allocator where needed and possible. Also detect copies. */
35
+ /*
36
+ * Process each opcode.
37
+ * Sorted alphabetically by opcode as much as possible.
38
+ */
39
switch (opc) {
40
- CASE_OP_32_64_VEC(mov):
41
- done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
42
- break;
43
-
44
- default:
45
- break;
46
-
47
- /* ---------------------------------------------------------- */
48
- /* Sorted alphabetically by opcode as much as possible. */
49
-
50
CASE_OP_32_64_VEC(add):
51
done = fold_add(&ctx, op);
52
break;
53
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
54
case INDEX_op_mb:
55
done = fold_mb(&ctx, op);
56
break;
57
+ CASE_OP_32_64_VEC(mov):
58
+ done = fold_mov(&ctx, op);
59
+ break;
60
CASE_OP_32_64(movcond):
61
done = fold_movcond(&ctx, op);
62
break;
63
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
64
CASE_OP_32_64_VEC(xor):
65
done = fold_xor(&ctx, op);
66
break;
67
+ default:
68
+ break;
69
}
70
71
if (!done) {
22
--
72
--
23
2.25.1
73
2.25.1
24
74
25
75
diff view generated by jsdifflib
1
Pull the "op r, a, a => movi r, 0" optimization into a function,
2
and use it in the outer opcode fold functions.
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
7
---
3
tcg/mips/tcg-target-constr.h | 31 ++++++++++++
8
tcg/optimize.c | 41 ++++++++++++++++++++++++-----------------
4
tcg/mips/tcg-target.c.inc | 95 ++++++++++++------------------------
9
1 file changed, 24 insertions(+), 17 deletions(-)
5
2 files changed, 61 insertions(+), 65 deletions(-)
6
create mode 100644 tcg/mips/tcg-target-constr.h
7
10
8
diff --git a/tcg/mips/tcg-target-constr.h b/tcg/mips/tcg-target-constr.h
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
new file mode 100644
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX
13
--- a/tcg/optimize.c
11
--- /dev/null
14
+++ b/tcg/optimize.c
12
+++ b/tcg/mips/tcg-target-constr.h
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
13
@@ -XXX,XX +XXX,XX @@
16
return false;
14
+/* SPDX-License-Identifier: GPL-2.0-or-later */
17
}
15
+/*
18
16
+ * MIPS target-specific operand constaints.
19
+/* If the binary operation has both arguments equal, fold to @i. */
17
+ * Copyright (c) 2020 Linaro
20
+static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
18
+ */
21
+{
22
+ if (args_are_copies(op->args[1], op->args[2])) {
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
24
+ }
25
+ return false;
26
+}
19
+
27
+
20
+C_O0_I1(r)
28
/*
21
+C_O0_I2(rZ, r)
29
* These outermost fold_<op> functions are sorted alphabetically.
22
+C_O0_I2(rZ, rZ)
30
*/
23
+C_O0_I2(SZ, S)
31
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
24
+C_O0_I3(SZ, S, S)
32
25
+C_O0_I3(SZ, SZ, S)
33
static bool fold_andc(OptContext *ctx, TCGOp *op)
26
+C_O0_I4(rZ, rZ, rZ, rZ)
34
{
27
+C_O0_I4(SZ, SZ, S, S)
35
- return fold_const2(ctx, op);
28
+C_O1_I1(r, L)
36
+ if (fold_const2(ctx, op) ||
29
+C_O1_I1(r, r)
37
+ fold_xx_to_i(ctx, op, 0)) {
30
+C_O1_I2(r, 0, rZ)
38
+ return true;
31
+C_O1_I2(r, L, L)
39
+ }
32
+C_O1_I2(r, r, ri)
40
+ return false;
33
+C_O1_I2(r, r, rI)
34
+C_O1_I2(r, r, rIK)
35
+C_O1_I2(r, r, rJ)
36
+C_O1_I2(r, r, rWZ)
37
+C_O1_I2(r, rZ, rN)
38
+C_O1_I2(r, rZ, rZ)
39
+C_O1_I4(r, rZ, rZ, rZ, 0)
40
+C_O1_I4(r, rZ, rZ, rZ, rZ)
41
+C_O2_I1(r, r, L)
42
+C_O2_I2(r, r, L, L)
43
+C_O2_I2(r, r, r, r)
44
+C_O2_I4(r, r, rZ, rZ, rN, rN)
45
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/mips/tcg-target.c.inc
48
+++ b/tcg/mips/tcg-target.c.inc
49
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
50
}
51
}
41
}
52
42
53
+/* Define all constraint sets. */
43
static bool fold_brcond(OptContext *ctx, TCGOp *op)
54
+#include "../tcg-constr.c.inc"
44
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
55
+
45
56
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
46
static bool fold_sub(OptContext *ctx, TCGOp *op)
57
{
47
{
58
- static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
48
- return fold_const2(ctx, op);
59
- static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
49
+ if (fold_const2(ctx, op) ||
60
- static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
50
+ fold_xx_to_i(ctx, op, 0)) {
61
- static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
51
+ return true;
62
- static const TCGTargetOpDef SZ_S = { .args_ct_str = { "SZ", "S" } };
52
+ }
63
- static const TCGTargetOpDef rZ_rZ = { .args_ct_str = { "rZ", "rZ" } };
53
+ return false;
64
- static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } };
54
}
65
- static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
55
66
- static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
56
static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
67
- static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } };
57
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
68
- static const TCGTargetOpDef r_r_rJ = { .args_ct_str = { "r", "r", "rJ" } };
58
69
- static const TCGTargetOpDef SZ_S_S = { .args_ct_str = { "SZ", "S", "S" } };
59
static bool fold_xor(OptContext *ctx, TCGOp *op)
70
- static const TCGTargetOpDef SZ_SZ_S
60
{
71
- = { .args_ct_str = { "SZ", "SZ", "S" } };
61
- return fold_const2(ctx, op);
72
- static const TCGTargetOpDef SZ_SZ_S_S
62
+ if (fold_const2(ctx, op) ||
73
- = { .args_ct_str = { "SZ", "SZ", "S", "S" } };
63
+ fold_xx_to_i(ctx, op, 0)) {
74
- static const TCGTargetOpDef r_rZ_rN
64
+ return true;
75
- = { .args_ct_str = { "r", "rZ", "rN" } };
65
+ }
76
- static const TCGTargetOpDef r_rZ_rZ
66
+ return false;
77
- = { .args_ct_str = { "r", "rZ", "rZ" } };
67
}
78
- static const TCGTargetOpDef r_r_rIK
68
79
- = { .args_ct_str = { "r", "r", "rIK" } };
69
/* Propagate constants and copies, fold constant expressions. */
80
- static const TCGTargetOpDef r_r_rWZ
70
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
81
- = { .args_ct_str = { "r", "r", "rWZ" } };
71
break;
82
- static const TCGTargetOpDef r_r_r_r
72
}
83
- = { .args_ct_str = { "r", "r", "r", "r" } };
73
84
- static const TCGTargetOpDef r_r_L_L
74
- /* Simplify expression for "op r, a, a => movi r, 0" cases */
85
- = { .args_ct_str = { "r", "r", "L", "L" } };
75
- switch (opc) {
86
- static const TCGTargetOpDef dep
76
- CASE_OP_32_64_VEC(andc):
87
- = { .args_ct_str = { "r", "0", "rZ" } };
77
- CASE_OP_32_64_VEC(sub):
88
- static const TCGTargetOpDef movc
78
- CASE_OP_32_64_VEC(xor):
89
- = { .args_ct_str = { "r", "rZ", "rZ", "rZ", "0" } };
79
- if (args_are_copies(op->args[1], op->args[2])) {
90
- static const TCGTargetOpDef movc_r6
80
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
91
- = { .args_ct_str = { "r", "rZ", "rZ", "rZ", "rZ" } };
81
- continue;
92
- static const TCGTargetOpDef add2
82
- }
93
- = { .args_ct_str = { "r", "r", "rZ", "rZ", "rN", "rN" } };
83
- break;
94
- static const TCGTargetOpDef br2
84
- default:
95
- = { .args_ct_str = { "rZ", "rZ", "rZ", "rZ" } };
85
- break;
96
- static const TCGTargetOpDef setc2
86
- }
97
- = { .args_ct_str = { "r", "rZ", "rZ", "rZ", "rZ" } };
98
-
87
-
99
switch (op) {
88
/*
100
case INDEX_op_goto_ptr:
89
* Process each opcode.
101
- return &r;
90
* Sorted alphabetically by opcode as much as possible.
102
+ return C_O0_I1(r);
103
104
case INDEX_op_ld8u_i32:
105
case INDEX_op_ld8s_i32:
106
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
107
case INDEX_op_extrl_i64_i32:
108
case INDEX_op_extrh_i64_i32:
109
case INDEX_op_extract_i64:
110
- return &r_r;
111
+ return C_O1_I1(r, r);
112
113
case INDEX_op_st8_i32:
114
case INDEX_op_st16_i32:
115
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
116
case INDEX_op_st16_i64:
117
case INDEX_op_st32_i64:
118
case INDEX_op_st_i64:
119
- return &rZ_r;
120
+ return C_O0_I2(rZ, r);
121
122
case INDEX_op_add_i32:
123
case INDEX_op_add_i64:
124
- return &r_r_rJ;
125
+ return C_O1_I2(r, r, rJ);
126
case INDEX_op_sub_i32:
127
case INDEX_op_sub_i64:
128
- return &r_rZ_rN;
129
+ return C_O1_I2(r, rZ, rN);
130
case INDEX_op_mul_i32:
131
case INDEX_op_mulsh_i32:
132
case INDEX_op_muluh_i32:
133
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
134
case INDEX_op_remu_i64:
135
case INDEX_op_nor_i64:
136
case INDEX_op_setcond_i64:
137
- return &r_rZ_rZ;
138
+ return C_O1_I2(r, rZ, rZ);
139
case INDEX_op_muls2_i32:
140
case INDEX_op_mulu2_i32:
141
case INDEX_op_muls2_i64:
142
case INDEX_op_mulu2_i64:
143
- return &r_r_r_r;
144
+ return C_O2_I2(r, r, r, r);
145
case INDEX_op_and_i32:
146
case INDEX_op_and_i64:
147
- return &r_r_rIK;
148
+ return C_O1_I2(r, r, rIK);
149
case INDEX_op_or_i32:
150
case INDEX_op_xor_i32:
151
case INDEX_op_or_i64:
152
case INDEX_op_xor_i64:
153
- return &r_r_rI;
154
+ return C_O1_I2(r, r, rI);
155
case INDEX_op_shl_i32:
156
case INDEX_op_shr_i32:
157
case INDEX_op_sar_i32:
158
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
159
case INDEX_op_sar_i64:
160
case INDEX_op_rotr_i64:
161
case INDEX_op_rotl_i64:
162
- return &r_r_ri;
163
+ return C_O1_I2(r, r, ri);
164
case INDEX_op_clz_i32:
165
case INDEX_op_clz_i64:
166
- return &r_r_rWZ;
167
+ return C_O1_I2(r, r, rWZ);
168
169
case INDEX_op_deposit_i32:
170
case INDEX_op_deposit_i64:
171
- return &dep;
172
+ return C_O1_I2(r, 0, rZ);
173
case INDEX_op_brcond_i32:
174
case INDEX_op_brcond_i64:
175
- return &rZ_rZ;
176
+ return C_O0_I2(rZ, rZ);
177
case INDEX_op_movcond_i32:
178
case INDEX_op_movcond_i64:
179
- return use_mips32r6_instructions ? &movc_r6 : &movc;
180
-
181
+ return (use_mips32r6_instructions
182
+ ? C_O1_I4(r, rZ, rZ, rZ, rZ)
183
+ : C_O1_I4(r, rZ, rZ, rZ, 0));
184
case INDEX_op_add2_i32:
185
case INDEX_op_sub2_i32:
186
- return &add2;
187
+ return C_O2_I4(r, r, rZ, rZ, rN, rN);
188
case INDEX_op_setcond2_i32:
189
- return &setc2;
190
+ return C_O1_I4(r, rZ, rZ, rZ, rZ);
191
case INDEX_op_brcond2_i32:
192
- return &br2;
193
+ return C_O0_I4(rZ, rZ, rZ, rZ);
194
195
case INDEX_op_qemu_ld_i32:
196
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
197
- ? &r_L : &r_L_L);
198
+ ? C_O1_I1(r, L) : C_O1_I2(r, L, L));
199
case INDEX_op_qemu_st_i32:
200
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
201
- ? &SZ_S : &SZ_S_S);
202
+ ? C_O0_I2(SZ, S) : C_O0_I3(SZ, S, S));
203
case INDEX_op_qemu_ld_i64:
204
- return (TCG_TARGET_REG_BITS == 64 ? &r_L
205
- : TARGET_LONG_BITS == 32 ? &r_r_L : &r_r_L_L);
206
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
207
+ : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, L)
208
+ : C_O2_I2(r, r, L, L));
209
case INDEX_op_qemu_st_i64:
210
- return (TCG_TARGET_REG_BITS == 64 ? &SZ_S
211
- : TARGET_LONG_BITS == 32 ? &SZ_SZ_S : &SZ_SZ_S_S);
212
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(SZ, S)
213
+ : TARGET_LONG_BITS == 32 ? C_O0_I3(SZ, SZ, S)
214
+ : C_O0_I4(SZ, SZ, S, S));
215
216
default:
217
return NULL;
218
--
91
--
219
2.25.1
92
2.25.1
220
93
221
94
diff view generated by jsdifflib
1
Pull the "op r, a, a => mov r, a" optimization into a function,
2
and use it in the outer opcode fold functions.
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
7
---
3
tcg/arm/tcg-target-constr.h | 30 ++++++++++++
8
tcg/optimize.c | 39 ++++++++++++++++++++++++---------------
4
tcg/arm/tcg-target.c.inc | 93 +++++++++++++------------------------
9
1 file changed, 24 insertions(+), 15 deletions(-)
5
2 files changed, 63 insertions(+), 60 deletions(-)
6
create mode 100644 tcg/arm/tcg-target-constr.h
7
10
8
diff --git a/tcg/arm/tcg-target-constr.h b/tcg/arm/tcg-target-constr.h
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
new file mode 100644
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX
13
--- a/tcg/optimize.c
11
--- /dev/null
14
+++ b/tcg/optimize.c
12
+++ b/tcg/arm/tcg-target-constr.h
15
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
13
@@ -XXX,XX +XXX,XX @@
16
return false;
14
+/* SPDX-License-Identifier: GPL-2.0-or-later */
17
}
15
+/*
18
16
+ * ARM32 target-specific operand constaints.
19
+/* If the binary operation has both arguments equal, fold to identity. */
17
+ * Copyright (c) 2020 Linaro
20
+static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
18
+ */
21
+{
22
+ if (args_are_copies(op->args[1], op->args[2])) {
23
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
24
+ }
25
+ return false;
26
+}
19
+
27
+
20
+C_O0_I1(r)
28
/*
21
+C_O0_I2(r, r)
29
* These outermost fold_<op> functions are sorted alphabetically.
22
+C_O0_I2(r, rIN)
30
+ *
23
+C_O0_I2(s, s)
31
+ * The ordering of the transformations should be:
24
+C_O0_I3(s, s, s)
32
+ * 1) those that produce a constant
25
+C_O0_I4(r, r, rI, rI)
33
+ * 2) those that produce a copy
26
+C_O0_I4(s, s, s, s)
34
+ * 3) those that produce information about the result value.
27
+C_O1_I1(r, l)
35
*/
28
+C_O1_I1(r, r)
36
29
+C_O1_I2(r, 0, rZ)
37
static bool fold_add(OptContext *ctx, TCGOp *op)
30
+C_O1_I2(r, l, l)
38
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
31
+C_O1_I2(r, r, r)
39
32
+C_O1_I2(r, r, rI)
40
static bool fold_and(OptContext *ctx, TCGOp *op)
33
+C_O1_I2(r, r, rIK)
41
{
34
+C_O1_I2(r, r, rIN)
42
- return fold_const2(ctx, op);
35
+C_O1_I2(r, r, ri)
43
+ if (fold_const2(ctx, op) ||
36
+C_O1_I2(r, rZ, rZ)
44
+ fold_xx_to_x(ctx, op)) {
37
+C_O1_I4(r, r, r, rI, rI)
45
+ return true;
38
+C_O1_I4(r, r, rIN, rIK, 0)
46
+ }
39
+C_O2_I1(r, r, l)
47
+ return false;
40
+C_O2_I2(r, r, l, l)
41
+C_O2_I2(r, r, r, r)
42
+C_O2_I4(r, r, r, r, rIN, rIK)
43
+C_O2_I4(r, r, rI, rI, rIN, rIK)
44
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
45
index XXXXXXX..XXXXXXX 100644
46
--- a/tcg/arm/tcg-target.c.inc
47
+++ b/tcg/arm/tcg-target.c.inc
48
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
49
}
50
}
48
}
51
49
52
+/* Define all constraint sets. */
50
static bool fold_andc(OptContext *ctx, TCGOp *op)
53
+#include "../tcg-constr.c.inc"
51
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
54
+
52
55
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
53
static bool fold_or(OptContext *ctx, TCGOp *op)
56
{
54
{
57
- static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
55
- return fold_const2(ctx, op);
58
- static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
56
+ if (fold_const2(ctx, op) ||
59
- static const TCGTargetOpDef s_s = { .args_ct_str = { "s", "s" } };
57
+ fold_xx_to_x(ctx, op)) {
60
- static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
58
+ return true;
61
- static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
59
+ }
62
- static const TCGTargetOpDef r_r_l = { .args_ct_str = { "r", "r", "l" } };
60
+ return false;
63
- static const TCGTargetOpDef r_l_l = { .args_ct_str = { "r", "l", "l" } };
61
}
64
- static const TCGTargetOpDef s_s_s = { .args_ct_str = { "s", "s", "s" } };
62
65
- static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
63
static bool fold_orc(OptContext *ctx, TCGOp *op)
66
- static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } };
64
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
67
- static const TCGTargetOpDef r_r_rIN
65
break;
68
- = { .args_ct_str = { "r", "r", "rIN" } };
66
}
69
- static const TCGTargetOpDef r_r_rIK
67
70
- = { .args_ct_str = { "r", "r", "rIK" } };
68
- /* Simplify expression for "op r, a, a => mov r, a" cases */
71
- static const TCGTargetOpDef r_r_r_r
69
- switch (opc) {
72
- = { .args_ct_str = { "r", "r", "r", "r" } };
70
- CASE_OP_32_64_VEC(or):
73
- static const TCGTargetOpDef r_r_l_l
71
- CASE_OP_32_64_VEC(and):
74
- = { .args_ct_str = { "r", "r", "l", "l" } };
72
- if (args_are_copies(op->args[1], op->args[2])) {
75
- static const TCGTargetOpDef s_s_s_s
73
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
76
- = { .args_ct_str = { "s", "s", "s", "s" } };
74
- continue;
77
- static const TCGTargetOpDef br
75
- }
78
- = { .args_ct_str = { "r", "rIN" } };
76
- break;
79
- static const TCGTargetOpDef ext2
77
- default:
80
- = { .args_ct_str = { "r", "rZ", "rZ" } };
78
- break;
81
- static const TCGTargetOpDef dep
79
- }
82
- = { .args_ct_str = { "r", "0", "rZ" } };
83
- static const TCGTargetOpDef movc
84
- = { .args_ct_str = { "r", "r", "rIN", "rIK", "0" } };
85
- static const TCGTargetOpDef add2
86
- = { .args_ct_str = { "r", "r", "r", "r", "rIN", "rIK" } };
87
- static const TCGTargetOpDef sub2
88
- = { .args_ct_str = { "r", "r", "rI", "rI", "rIN", "rIK" } };
89
- static const TCGTargetOpDef br2
90
- = { .args_ct_str = { "r", "r", "rI", "rI" } };
91
- static const TCGTargetOpDef setc2
92
- = { .args_ct_str = { "r", "r", "r", "rI", "rI" } };
93
-
80
-
94
switch (op) {
81
/*
95
case INDEX_op_goto_ptr:
82
* Process each opcode.
96
- return &r;
83
* Sorted alphabetically by opcode as much as possible.
97
+ return C_O0_I1(r);
98
99
case INDEX_op_ld8u_i32:
100
case INDEX_op_ld8s_i32:
101
case INDEX_op_ld16u_i32:
102
case INDEX_op_ld16s_i32:
103
case INDEX_op_ld_i32:
104
- case INDEX_op_st8_i32:
105
- case INDEX_op_st16_i32:
106
- case INDEX_op_st_i32:
107
case INDEX_op_neg_i32:
108
case INDEX_op_not_i32:
109
case INDEX_op_bswap16_i32:
110
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
111
case INDEX_op_ext16u_i32:
112
case INDEX_op_extract_i32:
113
case INDEX_op_sextract_i32:
114
- return &r_r;
115
+ return C_O1_I1(r, r);
116
+
117
+ case INDEX_op_st8_i32:
118
+ case INDEX_op_st16_i32:
119
+ case INDEX_op_st_i32:
120
+ return C_O0_I2(r, r);
121
122
case INDEX_op_add_i32:
123
case INDEX_op_sub_i32:
124
case INDEX_op_setcond_i32:
125
- return &r_r_rIN;
126
+ return C_O1_I2(r, r, rIN);
127
+
128
case INDEX_op_and_i32:
129
case INDEX_op_andc_i32:
130
case INDEX_op_clz_i32:
131
case INDEX_op_ctz_i32:
132
- return &r_r_rIK;
133
+ return C_O1_I2(r, r, rIK);
134
+
135
case INDEX_op_mul_i32:
136
case INDEX_op_div_i32:
137
case INDEX_op_divu_i32:
138
- return &r_r_r;
139
+ return C_O1_I2(r, r, r);
140
+
141
case INDEX_op_mulu2_i32:
142
case INDEX_op_muls2_i32:
143
- return &r_r_r_r;
144
+ return C_O2_I2(r, r, r, r);
145
+
146
case INDEX_op_or_i32:
147
case INDEX_op_xor_i32:
148
- return &r_r_rI;
149
+ return C_O1_I2(r, r, rI);
150
+
151
case INDEX_op_shl_i32:
152
case INDEX_op_shr_i32:
153
case INDEX_op_sar_i32:
154
case INDEX_op_rotl_i32:
155
case INDEX_op_rotr_i32:
156
- return &r_r_ri;
157
+ return C_O1_I2(r, r, ri);
158
159
case INDEX_op_brcond_i32:
160
- return &br;
161
+ return C_O0_I2(r, rIN);
162
case INDEX_op_deposit_i32:
163
- return &dep;
164
+ return C_O1_I2(r, 0, rZ);
165
case INDEX_op_extract2_i32:
166
- return &ext2;
167
+ return C_O1_I2(r, rZ, rZ);
168
case INDEX_op_movcond_i32:
169
- return &movc;
170
+ return C_O1_I4(r, r, rIN, rIK, 0);
171
case INDEX_op_add2_i32:
172
- return &add2;
173
+ return C_O2_I4(r, r, r, r, rIN, rIK);
174
case INDEX_op_sub2_i32:
175
- return &sub2;
176
+ return C_O2_I4(r, r, rI, rI, rIN, rIK);
177
case INDEX_op_brcond2_i32:
178
- return &br2;
179
+ return C_O0_I4(r, r, rI, rI);
180
case INDEX_op_setcond2_i32:
181
- return &setc2;
182
+ return C_O1_I4(r, r, r, rI, rI);
183
184
case INDEX_op_qemu_ld_i32:
185
- return TARGET_LONG_BITS == 32 ? &r_l : &r_l_l;
186
+ return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l);
187
case INDEX_op_qemu_ld_i64:
188
- return TARGET_LONG_BITS == 32 ? &r_r_l : &r_r_l_l;
189
+ return TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, l) : C_O2_I2(r, r, l, l);
190
case INDEX_op_qemu_st_i32:
191
- return TARGET_LONG_BITS == 32 ? &s_s : &s_s_s;
192
+ return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s);
193
case INDEX_op_qemu_st_i64:
194
- return TARGET_LONG_BITS == 32 ? &s_s_s : &s_s_s_s;
195
+ return TARGET_LONG_BITS == 32 ? C_O0_I3(s, s, s) : C_O0_I4(s, s, s, s);
196
197
default:
198
return NULL;
199
--
84
--
200
2.25.1
85
2.25.1
201
86
202
87
diff view generated by jsdifflib
1
In most, but not all, places that we check for TEMP_FIXED,
1
Pull the "op r, a, 0 => movi r, 0" optimization into a function,
2
we are really testing that we do not modify the temporary.
2
and use it in the outer opcode fold functions.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
include/tcg/tcg.h | 5 +++++
8
tcg/optimize.c | 38 ++++++++++++++++++++------------------
9
tcg/tcg.c | 21 ++++++++++-----------
9
1 file changed, 20 insertions(+), 18 deletions(-)
10
2 files changed, 15 insertions(+), 11 deletions(-)
11
10
12
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg.h
13
--- a/tcg/optimize.c
15
+++ b/include/tcg/tcg.h
14
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
17
target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
16
return false;
18
};
17
}
19
18
20
+static inline bool temp_readonly(TCGTemp *ts)
19
+/* If the binary operation has second argument @i, fold to @i. */
20
+static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
21
+{
21
+{
22
+ return ts->kind == TEMP_FIXED;
22
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
24
+ }
25
+ return false;
23
+}
26
+}
24
+
27
+
25
extern TCGContext tcg_init_ctx;
28
/* If the binary operation has both arguments equal, fold to @i. */
26
extern __thread TCGContext *tcg_ctx;
29
static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
27
extern TCGv_env cpu_env;
28
diff --git a/tcg/tcg.c b/tcg/tcg.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tcg/tcg.c
31
+++ b/tcg/tcg.c
32
@@ -XXX,XX +XXX,XX @@ static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
33
mark it free; otherwise mark it dead. */
34
static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
35
{
30
{
36
- if (ts->kind == TEMP_FIXED) {
31
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
37
+ if (temp_readonly(ts)) {
32
static bool fold_and(OptContext *ctx, TCGOp *op)
38
return;
33
{
34
if (fold_const2(ctx, op) ||
35
+ fold_xi_to_i(ctx, op, 0) ||
36
fold_xx_to_x(ctx, op)) {
37
return true;
39
}
38
}
40
if (ts->val_type == TEMP_VAL_REG) {
39
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
41
@@ -XXX,XX +XXX,XX @@ static inline void temp_dead(TCGContext *s, TCGTemp *ts)
40
42
static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
41
static bool fold_mul(OptContext *ctx, TCGOp *op)
43
TCGRegSet preferred_regs, int free_or_dead)
44
{
42
{
45
- if (ts->kind == TEMP_FIXED) {
43
- return fold_const2(ctx, op);
46
+ if (temp_readonly(ts)) {
44
+ if (fold_const2(ctx, op) ||
47
return;
45
+ fold_xi_to_i(ctx, op, 0)) {
48
}
46
+ return true;
49
if (!ts->mem_coherent) {
47
+ }
50
@@ -XXX,XX +XXX,XX @@ static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
48
+ return false;
49
}
50
51
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
51
{
52
{
52
/* The liveness analysis already ensures that globals are back
53
- return fold_const2(ctx, op);
53
in memory. Keep an tcg_debug_assert for safety. */
54
+ if (fold_const2(ctx, op) ||
54
- tcg_debug_assert(ts->val_type == TEMP_VAL_MEM
55
+ fold_xi_to_i(ctx, op, 0)) {
55
- || ts->kind == TEMP_FIXED);
56
+ return true;
56
+ tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
57
+ }
58
+ return false;
57
}
59
}
58
60
59
/* save globals to their canonical location and assume they can be
61
static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
60
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
62
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
61
TCGRegSet preferred_regs)
63
continue;
62
{
64
}
63
/* ENV should not be modified. */
65
64
- tcg_debug_assert(ots->kind != TEMP_FIXED);
66
- /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
65
+ tcg_debug_assert(!temp_readonly(ots));
67
- switch (opc) {
66
68
- CASE_OP_32_64_VEC(and):
67
/* The movi is not explicitly generated here. */
69
- CASE_OP_32_64_VEC(mul):
68
if (ots->val_type == TEMP_VAL_REG) {
70
- CASE_OP_32_64(muluh):
69
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
71
- CASE_OP_32_64(mulsh):
70
ts = arg_temp(op->args[1]);
72
- if (arg_is_const(op->args[2])
71
73
- && arg_info(op->args[2])->val == 0) {
72
/* ENV should not be modified. */
74
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
73
- tcg_debug_assert(ots->kind != TEMP_FIXED);
75
- continue;
74
+ tcg_debug_assert(!temp_readonly(ots));
76
- }
75
77
- break;
76
/* Note that otype != itype for no-op truncation. */
78
- default:
77
otype = ots->type;
79
- break;
78
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
80
- }
79
* Store the source register into the destination slot
81
-
80
* and leave the destination temp as TEMP_VAL_MEM.
82
/*
81
*/
83
* Process each opcode.
82
- assert(ots->kind != TEMP_FIXED);
84
* Sorted alphabetically by opcode as much as possible.
83
+ assert(!temp_readonly(ots));
84
if (!ts->mem_allocated) {
85
temp_allocate_frame(s, ots);
86
}
87
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
88
its = arg_temp(op->args[1]);
89
90
/* ENV should not be modified. */
91
- tcg_debug_assert(ots->kind != TEMP_FIXED);
92
+ tcg_debug_assert(!temp_readonly(ots));
93
94
itype = its->type;
95
vece = TCGOP_VECE(op);
96
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
97
ts = arg_temp(arg);
98
99
/* ENV should not be modified. */
100
- tcg_debug_assert(ts->kind != TEMP_FIXED);
101
+ tcg_debug_assert(!temp_readonly(ts));
102
103
if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
104
reg = new_args[arg_ct->alias_index];
105
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
106
ts = arg_temp(op->args[i]);
107
108
/* ENV should not be modified. */
109
- tcg_debug_assert(ts->kind != TEMP_FIXED);
110
+ tcg_debug_assert(!temp_readonly(ts));
111
112
if (NEED_SYNC_ARG(i)) {
113
temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
114
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
115
ts = arg_temp(arg);
116
117
/* ENV should not be modified. */
118
- tcg_debug_assert(ts->kind != TEMP_FIXED);
119
+ tcg_debug_assert(!temp_readonly(ts));
120
121
reg = tcg_target_call_oarg_regs[i];
122
tcg_debug_assert(s->reg_to_temp[reg] == NULL);
123
--
85
--
124
2.25.1
86
2.25.1
125
87
126
88
diff view generated by jsdifflib
1
These will hold a single constant for the duration of the TB.
1
Compute the type of the operation early.
2
They are hashed, so that each value has one temp across the TB.
2
3
3
There are at least 4 places that used a def->flags ladder
4
Not used yet, this is all infrastructure.
4
to determine the type of the operation being optimized.
5
5
6
There were two places that assumed !TCG_OPF_64BIT means
7
TCG_TYPE_I32, and so could potentially compute incorrect
8
results for vector operations.
9
10
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
12
---
8
include/tcg/tcg.h | 24 +++++-
13
tcg/optimize.c | 149 +++++++++++++++++++++++++++++--------------------
9
tcg/optimize.c | 13 +++-
14
1 file changed, 89 insertions(+), 60 deletions(-)
10
tcg/tcg.c | 195 ++++++++++++++++++++++++++++++++++++----------
15
11
3 files changed, 188 insertions(+), 44 deletions(-)
12
13
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg.h
16
+++ b/include/tcg/tcg.h
17
@@ -XXX,XX +XXX,XX @@ typedef enum TCGTempKind {
18
TEMP_GLOBAL,
19
/* Temp is in a fixed register. */
20
TEMP_FIXED,
21
+ /* Temp is a fixed constant. */
22
+ TEMP_CONST,
23
} TCGTempKind;
24
25
typedef struct TCGTemp {
26
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
27
QSIMPLEQ_HEAD(, TCGOp) plugin_ops;
28
#endif
29
30
+ GHashTable *const_table[TCG_TYPE_COUNT];
31
TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
32
TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
33
34
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
35
36
static inline bool temp_readonly(TCGTemp *ts)
37
{
38
- return ts->kind == TEMP_FIXED;
39
+ return ts->kind >= TEMP_FIXED;
40
}
41
42
extern TCGContext tcg_init_ctx;
43
@@ -XXX,XX +XXX,XX @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc);
44
45
void tcg_optimize(TCGContext *s);
46
47
+/* Allocate a new temporary and initialize it with a constant. */
48
TCGv_i32 tcg_const_i32(int32_t val);
49
TCGv_i64 tcg_const_i64(int64_t val);
50
TCGv_i32 tcg_const_local_i32(int32_t val);
51
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_const_ones_vec(TCGType);
52
TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec);
53
TCGv_vec tcg_const_ones_vec_matching(TCGv_vec);
54
55
+/*
56
+ * Locate or create a read-only temporary that is a constant.
57
+ * This kind of temporary need not and should not be freed.
58
+ */
59
+TCGTemp *tcg_constant_internal(TCGType type, int64_t val);
60
+
61
+static inline TCGv_i32 tcg_constant_i32(int32_t val)
62
+{
63
+ return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
64
+}
65
+
66
+static inline TCGv_i64 tcg_constant_i64(int64_t val)
67
+{
68
+ return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
69
+}
70
+
71
+TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val);
72
+
73
#if UINTPTR_MAX == UINT32_MAX
74
# define tcg_const_ptr(x) ((TCGv_ptr)tcg_const_i32((intptr_t)(x)))
75
# define tcg_const_local_ptr(x) ((TCGv_ptr)tcg_const_local_i32((intptr_t)(x)))
76
diff --git a/tcg/optimize.c b/tcg/optimize.c
16
diff --git a/tcg/optimize.c b/tcg/optimize.c
77
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
78
--- a/tcg/optimize.c
18
--- a/tcg/optimize.c
79
+++ b/tcg/optimize.c
19
+++ b/tcg/optimize.c
80
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TempOptInfo *infos,
20
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
81
ts->state_ptr = ti;
21
82
ti->next_copy = ts;
22
/* In flight values from optimization. */
83
ti->prev_copy = ts;
23
uint64_t z_mask;
84
- ti->is_const = false;
24
+ TCGType type;
85
- ti->mask = -1;
25
} OptContext;
86
+ if (ts->kind == TEMP_CONST) {
26
87
+ ti->is_const = true;
27
static inline TempOptInfo *ts_info(TCGTemp *ts)
88
+ ti->val = ti->mask = ts->val;
28
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
89
+ if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
29
{
90
+ /* High bits of a 32-bit quantity are garbage. */
30
TCGTemp *dst_ts = arg_temp(dst);
91
+ ti->mask |= ~0xffffffffull;
31
TCGTemp *src_ts = arg_temp(src);
92
+ }
32
- const TCGOpDef *def;
93
+ } else {
33
TempOptInfo *di;
94
+ ti->is_const = false;
34
TempOptInfo *si;
95
+ ti->mask = -1;
35
uint64_t z_mask;
96
+ }
36
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
97
set_bit(idx, temps_used->l);
37
reset_ts(dst_ts);
98
}
38
di = ts_info(dst_ts);
99
}
39
si = ts_info(src_ts);
100
diff --git a/tcg/tcg.c b/tcg/tcg.c
40
- def = &tcg_op_defs[op->opc];
101
index XXXXXXX..XXXXXXX 100644
41
- if (def->flags & TCG_OPF_VECTOR) {
102
--- a/tcg/tcg.c
42
- new_op = INDEX_op_mov_vec;
103
+++ b/tcg/tcg.c
43
- } else if (def->flags & TCG_OPF_64BIT) {
104
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
44
- new_op = INDEX_op_mov_i64;
105
bigendian = 1;
45
- } else {
106
#endif
46
+
107
47
+ switch (ctx->type) {
108
- if (base_ts->kind != TEMP_FIXED) {
48
+ case TCG_TYPE_I32:
109
+ switch (base_ts->kind) {
49
new_op = INDEX_op_mov_i32;
110
+ case TEMP_FIXED:
111
+ break;
50
+ break;
112
+ case TEMP_GLOBAL:
51
+ case TCG_TYPE_I64:
113
/* We do not support double-indirect registers. */
52
+ new_op = INDEX_op_mov_i64;
114
tcg_debug_assert(!base_ts->indirect_reg);
53
+ break;
115
base_ts->indirect_base = 1;
54
+ case TCG_TYPE_V64:
116
s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
55
+ case TCG_TYPE_V128:
117
? 2 : 1);
56
+ case TCG_TYPE_V256:
118
indirect_reg = 1;
57
+ /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
58
+ new_op = INDEX_op_mov_vec;
119
+ break;
59
+ break;
120
+ default:
60
+ default:
121
+ g_assert_not_reached();
61
+ g_assert_not_reached();
122
}
62
}
123
63
op->opc = new_op;
124
if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
64
- /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
125
@@ -XXX,XX +XXX,XX @@ void tcg_temp_free_internal(TCGTemp *ts)
65
op->args[0] = dst;
126
TCGContext *s = tcg_ctx;
66
op->args[1] = src;
127
int k, idx;
67
128
68
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
129
+ /* In order to simplify users of tcg_constant_*, silently ignore free. */
69
static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
130
+ if (ts->kind == TEMP_CONST) {
70
TCGArg dst, uint64_t val)
131
+ return;
71
{
132
+ }
72
- const TCGOpDef *def = &tcg_op_defs[op->opc];
73
- TCGType type;
74
- TCGTemp *tv;
75
-
76
- if (def->flags & TCG_OPF_VECTOR) {
77
- type = TCGOP_VECL(op) + TCG_TYPE_V64;
78
- } else if (def->flags & TCG_OPF_64BIT) {
79
- type = TCG_TYPE_I64;
80
- } else {
81
- type = TCG_TYPE_I32;
82
- }
83
-
84
/* Convert movi to mov with constant temp. */
85
- tv = tcg_constant_internal(type, val);
86
+ TCGTemp *tv = tcg_constant_internal(ctx->type, val);
133
+
87
+
134
#if defined(CONFIG_DEBUG_TCG)
88
init_ts_info(ctx, tv);
135
s->temps_in_use--;
89
return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
136
if (s->temps_in_use < 0) {
137
@@ -XXX,XX +XXX,XX @@ void tcg_temp_free_internal(TCGTemp *ts)
138
set_bit(idx, s->free_temps[k].l);
139
}
90
}
140
91
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
141
+TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
92
}
142
+{
93
}
143
+ TCGContext *s = tcg_ctx;
94
144
+ GHashTable *h = s->const_table[type];
95
-static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y)
145
+ TCGTemp *ts;
96
+static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
146
+
97
+ uint64_t x, uint64_t y)
147
+ if (h == NULL) {
98
{
148
+ h = g_hash_table_new(g_int64_hash, g_int64_equal);
99
- const TCGOpDef *def = &tcg_op_defs[op];
149
+ s->const_table[type] = h;
100
uint64_t res = do_constant_folding_2(op, x, y);
150
+ }
101
- if (!(def->flags & TCG_OPF_64BIT)) {
151
+
102
+ if (type == TCG_TYPE_I32) {
152
+ ts = g_hash_table_lookup(h, &val);
103
res = (int32_t)res;
153
+ if (ts == NULL) {
104
}
154
+ ts = tcg_temp_alloc(s);
105
return res;
155
+
106
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
156
+ if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
107
* Return -1 if the condition can't be simplified,
157
+ TCGTemp *ts2 = tcg_temp_alloc(s);
108
* and the result of the condition (0 or 1) if it can.
158
+
109
*/
159
+ ts->base_type = TCG_TYPE_I64;
110
-static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
160
+ ts->type = TCG_TYPE_I32;
111
+static int do_constant_folding_cond(TCGType type, TCGArg x,
161
+ ts->kind = TEMP_CONST;
112
TCGArg y, TCGCond c)
162
+ ts->temp_allocated = 1;
113
{
163
+ /*
114
uint64_t xv = arg_info(x)->val;
164
+ * Retain the full value of the 64-bit constant in the low
115
uint64_t yv = arg_info(y)->val;
165
+ * part, so that the hash table works. Actual uses will
116
166
+ * truncate the value to the low part.
117
if (arg_is_const(x) && arg_is_const(y)) {
167
+ */
118
- const TCGOpDef *def = &tcg_op_defs[op];
168
+ ts->val = val;
119
- tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
169
+
120
- if (def->flags & TCG_OPF_64BIT) {
170
+ tcg_debug_assert(ts2 == ts + 1);
121
- return do_constant_folding_cond_64(xv, yv, c);
171
+ ts2->base_type = TCG_TYPE_I64;
122
- } else {
172
+ ts2->type = TCG_TYPE_I32;
123
+ switch (type) {
173
+ ts2->kind = TEMP_CONST;
124
+ case TCG_TYPE_I32:
174
+ ts2->temp_allocated = 1;
125
return do_constant_folding_cond_32(xv, yv, c);
175
+ ts2->val = val >> 32;
126
+ case TCG_TYPE_I64:
176
+ } else {
127
+ return do_constant_folding_cond_64(xv, yv, c);
177
+ ts->base_type = type;
128
+ default:
178
+ ts->type = type;
129
+ /* Only scalar comparisons are optimizable */
179
+ ts->kind = TEMP_CONST;
130
+ return -1;
180
+ ts->temp_allocated = 1;
131
}
181
+ ts->val = val;
132
} else if (args_are_copies(x, y)) {
182
+ }
133
return do_constant_folding_cond_eq(c);
183
+ g_hash_table_insert(h, &ts->val, ts);
134
@@ -XXX,XX +XXX,XX @@ static bool fold_const1(OptContext *ctx, TCGOp *op)
184
+ }
135
uint64_t t;
185
+
136
186
+ return ts;
137
t = arg_info(op->args[1])->val;
187
+}
138
- t = do_constant_folding(op->opc, t, 0);
188
+
139
+ t = do_constant_folding(op->opc, ctx->type, t, 0);
189
+TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
140
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
190
+{
141
}
191
+ val = dup_const(vece, val);
142
return false;
192
+ return temp_tcgv_vec(tcg_constant_internal(type, val));
143
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
193
+}
144
uint64_t t1 = arg_info(op->args[1])->val;
194
+
145
uint64_t t2 = arg_info(op->args[2])->val;
195
TCGv_i32 tcg_const_i32(int32_t val)
146
196
{
147
- t1 = do_constant_folding(op->opc, t1, t2);
197
TCGv_i32 t0;
148
+ t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
198
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_start(TCGContext *s)
149
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
199
TCGTempVal val = TEMP_VAL_MEM;
150
}
200
151
return false;
201
switch (ts->kind) {
152
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
202
+ case TEMP_CONST:
153
static bool fold_brcond(OptContext *ctx, TCGOp *op)
203
+ val = TEMP_VAL_CONST;
154
{
155
TCGCond cond = op->args[2];
156
- int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
157
+ int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
158
159
if (i == 0) {
160
tcg_op_remove(ctx->tcg, op);
161
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
162
* Simplify EQ/NE comparisons where one of the pairs
163
* can be simplified.
164
*/
165
- i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
166
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
167
op->args[2], cond);
168
switch (i ^ inv) {
169
case 0:
170
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
171
goto do_brcond_high;
172
}
173
174
- i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
175
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
176
op->args[3], cond);
177
switch (i ^ inv) {
178
case 0:
179
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
180
if (arg_is_const(op->args[1])) {
181
uint64_t t = arg_info(op->args[1])->val;
182
183
- t = do_constant_folding(op->opc, t, op->args[2]);
184
+ t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
185
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
186
}
187
return false;
188
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
189
uint64_t t = arg_info(op->args[1])->val;
190
191
if (t != 0) {
192
- t = do_constant_folding(op->opc, t, 0);
193
+ t = do_constant_folding(op->opc, ctx->type, t, 0);
194
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
195
}
196
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
197
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
198
199
static bool fold_movcond(OptContext *ctx, TCGOp *op)
200
{
201
- TCGOpcode opc = op->opc;
202
TCGCond cond = op->args[5];
203
- int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
204
+ int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
205
206
if (i >= 0) {
207
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
208
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
209
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
210
uint64_t tv = arg_info(op->args[3])->val;
211
uint64_t fv = arg_info(op->args[4])->val;
212
+ TCGOpcode opc;
213
214
- opc = (opc == INDEX_op_movcond_i32
215
- ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
216
+ switch (ctx->type) {
217
+ case TCG_TYPE_I32:
218
+ opc = INDEX_op_setcond_i32;
204
+ break;
219
+ break;
205
case TEMP_FIXED:
206
val = TEMP_VAL_REG;
207
break;
208
@@ -XXX,XX +XXX,XX @@ static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
209
case TEMP_NORMAL:
210
snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
211
break;
212
+ case TEMP_CONST:
213
+ switch (ts->type) {
214
+ case TCG_TYPE_I32:
215
+ snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
216
+ break;
217
+#if TCG_TARGET_REG_BITS > 32
218
+ case TCG_TYPE_I64:
220
+ case TCG_TYPE_I64:
219
+ snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
221
+ opc = INDEX_op_setcond_i64;
220
+ break;
221
+#endif
222
+ case TCG_TYPE_V64:
223
+ case TCG_TYPE_V128:
224
+ case TCG_TYPE_V256:
225
+ snprintf(buf, buf_size, "v%d$0x%" PRIx64,
226
+ 64 << (ts->type - TCG_TYPE_V64), ts->val);
227
+ break;
222
+ break;
228
+ default:
223
+ default:
229
+ g_assert_not_reached();
224
+ g_assert_not_reached();
230
+ }
225
+ }
231
+ break;
226
232
}
227
if (tv == 1 && fv == 0) {
233
return buf;
228
op->opc = opc;
234
}
229
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
235
@@ -XXX,XX +XXX,XX @@ static void la_bb_end(TCGContext *s, int ng, int nt)
230
static bool fold_setcond(OptContext *ctx, TCGOp *op)
236
state = TS_DEAD | TS_MEM;
231
{
237
break;
232
TCGCond cond = op->args[3];
238
case TEMP_NORMAL:
233
- int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
239
+ case TEMP_CONST:
234
+ int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
240
state = TS_DEAD;
235
241
break;
236
if (i >= 0) {
242
default:
237
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
243
@@ -XXX,XX +XXX,XX @@ static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
238
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
244
mark it free; otherwise mark it dead. */
239
* Simplify EQ/NE comparisons where one of the pairs
245
static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
240
* can be simplified.
246
{
241
*/
247
- if (temp_readonly(ts)) {
242
- i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
248
+ TCGTempVal new_type;
243
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
244
op->args[3], cond);
245
switch (i ^ inv) {
246
case 0:
247
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
248
goto do_setcond_high;
249
}
250
251
- i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
252
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
253
op->args[4], cond);
254
switch (i ^ inv) {
255
case 0:
256
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
257
init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
258
copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
259
260
+ /* Pre-compute the type of the operation. */
261
+ if (def->flags & TCG_OPF_VECTOR) {
262
+ ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
263
+ } else if (def->flags & TCG_OPF_64BIT) {
264
+ ctx.type = TCG_TYPE_I64;
265
+ } else {
266
+ ctx.type = TCG_TYPE_I32;
267
+ }
249
+
268
+
250
+ switch (ts->kind) {
269
/* For commutative operations make constant second argument */
251
+ case TEMP_FIXED:
270
switch (opc) {
252
return;
271
CASE_OP_32_64_VEC(add):
253
+ case TEMP_GLOBAL:
272
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
254
+ case TEMP_LOCAL:
273
/* Proceed with possible constant folding. */
255
+ new_type = TEMP_VAL_MEM;
274
break;
256
+ break;
257
+ case TEMP_NORMAL:
258
+ new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
259
+ break;
260
+ case TEMP_CONST:
261
+ new_type = TEMP_VAL_CONST;
262
+ break;
263
+ default:
264
+ g_assert_not_reached();
265
}
266
if (ts->val_type == TEMP_VAL_REG) {
267
s->reg_to_temp[ts->reg] = NULL;
268
}
269
- ts->val_type = (free_or_dead < 0
270
- || ts->kind != TEMP_NORMAL
271
- ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
272
+ ts->val_type = new_type;
273
}
274
275
/* Mark a temporary as dead. */
276
@@ -XXX,XX +XXX,XX @@ static inline void temp_dead(TCGContext *s, TCGTemp *ts)
277
static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
278
TCGRegSet preferred_regs, int free_or_dead)
279
{
280
- if (temp_readonly(ts)) {
281
- return;
282
- }
283
- if (!ts->mem_coherent) {
284
+ if (!temp_readonly(ts) && !ts->mem_coherent) {
285
if (!ts->mem_allocated) {
286
temp_allocate_frame(s, ts);
287
}
288
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
289
290
for (i = s->nb_globals; i < s->nb_temps; i++) {
291
TCGTemp *ts = &s->temps[i];
292
- if (ts->kind == TEMP_LOCAL) {
293
+
294
+ switch (ts->kind) {
295
+ case TEMP_LOCAL:
296
temp_save(s, ts, allocated_regs);
297
- } else {
298
+ break;
299
+ case TEMP_NORMAL:
300
/* The liveness analysis already ensures that temps are dead.
301
Keep an tcg_debug_assert for safety. */
302
tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
303
+ break;
304
+ case TEMP_CONST:
305
+ /* Similarly, we should have freed any allocated register. */
306
+ tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
307
+ break;
308
+ default:
309
+ g_assert_not_reached();
310
}
311
}
312
313
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
314
i_preferred_regs = o_preferred_regs = 0;
315
if (arg_ct->ialias) {
316
o_preferred_regs = op->output_pref[arg_ct->alias_index];
317
- if (ts->kind == TEMP_FIXED) {
318
- /* if fixed register, we must allocate a new register
319
- if the alias is not the same register */
320
- if (arg != op->args[arg_ct->alias_index]) {
321
- goto allocate_in_reg;
322
- }
323
- } else {
324
- /* if the input is aliased to an output and if it is
325
- not dead after the instruction, we must allocate
326
- a new register and move it */
327
- if (!IS_DEAD_ARG(i)) {
328
- goto allocate_in_reg;
329
- }
330
331
- /* check if the current register has already been allocated
332
- for another input aliased to an output */
333
- if (ts->val_type == TEMP_VAL_REG) {
334
- int k2, i2;
335
- reg = ts->reg;
336
- for (k2 = 0 ; k2 < k ; k2++) {
337
- i2 = def->args_ct[nb_oargs + k2].sort_index;
338
- if (def->args_ct[i2].ialias && reg == new_args[i2]) {
339
- goto allocate_in_reg;
340
- }
341
+ /*
342
+ * If the input is readonly, then it cannot also be an
343
+ * output and aliased to itself. If the input is not
344
+ * dead after the instruction, we must allocate a new
345
+ * register and move it.
346
+ */
347
+ if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
348
+ goto allocate_in_reg;
349
+ }
350
+
351
+ /*
352
+ * Check if the current register has already been allocated
353
+ * for another input aliased to an output.
354
+ */
355
+ if (ts->val_type == TEMP_VAL_REG) {
356
+ reg = ts->reg;
357
+ for (int k2 = 0; k2 < k; k2++) {
358
+ int i2 = def->args_ct[nb_oargs + k2].sort_index;
359
+ if (def->args_ct[i2].ialias && reg == new_args[i2]) {
360
+ goto allocate_in_reg;
361
}
362
}
275
}
363
- i_preferred_regs = o_preferred_regs;
276
- if (opc == INDEX_op_sub_i32) {
364
}
277
+ switch (ctx.type) {
365
+ i_preferred_regs = o_preferred_regs;
278
+ case TCG_TYPE_I32:
366
}
279
neg_op = INDEX_op_neg_i32;
367
280
have_neg = TCG_TARGET_HAS_neg_i32;
368
temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
281
- } else if (opc == INDEX_op_sub_i64) {
369
reg = ts->reg;
282
+ break;
370
283
+ case TCG_TYPE_I64:
371
- if (tcg_regset_test_reg(arg_ct->regs, reg)) {
284
neg_op = INDEX_op_neg_i64;
372
- /* nothing to do : the constraint is satisfied */
285
have_neg = TCG_TARGET_HAS_neg_i64;
373
- } else {
286
- } else if (TCG_TARGET_HAS_neg_vec) {
374
- allocate_in_reg:
287
- TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
375
- /* allocate a new register matching the constraint
288
- unsigned vece = TCGOP_VECE(op);
376
- and move the temporary register into it */
289
- neg_op = INDEX_op_neg_vec;
377
+ if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
290
- have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
378
+ allocate_in_reg:
291
- } else {
379
+ /*
292
break;
380
+ * Allocate a new register matching the constraint
293
+ case TCG_TYPE_V64:
381
+ * and move the temporary register into it.
294
+ case TCG_TYPE_V128:
382
+ */
295
+ case TCG_TYPE_V256:
383
temp_load(s, ts, tcg_target_available_regs[ts->type],
296
+ neg_op = INDEX_op_neg_vec;
384
i_allocated_regs, 0);
297
+ have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
385
reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
298
+ TCGOP_VECE(op)) > 0;
386
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
299
+ break;
387
}
300
+ default:
388
#endif
301
+ g_assert_not_reached();
389
302
}
390
+ for (i = 0; i < TCG_TYPE_COUNT; ++i) {
303
if (!have_neg) {
391
+ if (s->const_table[i]) {
304
break;
392
+ g_hash_table_destroy(s->const_table[i]);
305
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
393
+ s->const_table[i] = NULL;
306
TCGOpcode not_op;
394
+ }
307
bool have_not;
395
+ }
308
396
+
309
- if (def->flags & TCG_OPF_VECTOR) {
397
tcg_reg_alloc_start(s);
310
- not_op = INDEX_op_not_vec;
398
311
- have_not = TCG_TARGET_HAS_not_vec;
399
s->code_buf = tb->tc.ptr;
312
- } else if (def->flags & TCG_OPF_64BIT) {
313
- not_op = INDEX_op_not_i64;
314
- have_not = TCG_TARGET_HAS_not_i64;
315
- } else {
316
+ switch (ctx.type) {
317
+ case TCG_TYPE_I32:
318
not_op = INDEX_op_not_i32;
319
have_not = TCG_TARGET_HAS_not_i32;
320
+ break;
321
+ case TCG_TYPE_I64:
322
+ not_op = INDEX_op_not_i64;
323
+ have_not = TCG_TARGET_HAS_not_i64;
324
+ break;
325
+ case TCG_TYPE_V64:
326
+ case TCG_TYPE_V128:
327
+ case TCG_TYPE_V256:
328
+ not_op = INDEX_op_not_vec;
329
+ have_not = TCG_TARGET_HAS_not_vec;
330
+ break;
331
+ default:
332
+ g_assert_not_reached();
333
}
334
if (!have_not) {
335
break;
336
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
337
below, we can ignore high bits, but for further optimizations we
338
need to record that the high bits contain garbage. */
339
partmask = z_mask;
340
- if (!(def->flags & TCG_OPF_64BIT)) {
341
+ if (ctx.type == TCG_TYPE_I32) {
342
z_mask |= ~(tcg_target_ulong)0xffffffffu;
343
partmask &= 0xffffffffu;
344
affected &= 0xffffffffu;
400
--
345
--
401
2.25.1
346
2.25.1
402
347
403
348
diff view generated by jsdifflib
1
The previous change wrongly stated that 32-bit avx2 should have
1
Split out the conditional conversion from a more complex logical
2
used VPBROADCASTW. But that's a 16-bit broadcast and we want a
2
operation to a simple NOT. Create a couple more helpers to make
3
32-bit broadcast.
3
this easy for the outer-most logical operations.
4
4
5
Fixes: 7b60ef3264e
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Cc: qemu-stable@nongnu.org
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/i386/tcg-target.c.inc | 2 +-
8
tcg/optimize.c | 158 +++++++++++++++++++++++++++----------------------
10
1 file changed, 1 insertion(+), 1 deletion(-)
9
1 file changed, 86 insertions(+), 72 deletions(-)
11
10
12
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/i386/tcg-target.c.inc
13
--- a/tcg/optimize.c
15
+++ b/tcg/i386/tcg-target.c.inc
14
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
17
new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
16
return false;
18
} else {
17
}
19
if (have_avx2) {
18
20
- tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTW + vex_l, ret);
19
+/*
21
+ tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
20
+ * Convert @op to NOT, if NOT is supported by the host.
22
} else {
21
+ * Return true f the conversion is successful, which will still
23
tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
22
+ * indicate that the processing is complete.
23
+ */
24
+static bool fold_not(OptContext *ctx, TCGOp *op);
25
+static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
26
+{
27
+ TCGOpcode not_op;
28
+ bool have_not;
29
+
30
+ switch (ctx->type) {
31
+ case TCG_TYPE_I32:
32
+ not_op = INDEX_op_not_i32;
33
+ have_not = TCG_TARGET_HAS_not_i32;
34
+ break;
35
+ case TCG_TYPE_I64:
36
+ not_op = INDEX_op_not_i64;
37
+ have_not = TCG_TARGET_HAS_not_i64;
38
+ break;
39
+ case TCG_TYPE_V64:
40
+ case TCG_TYPE_V128:
41
+ case TCG_TYPE_V256:
42
+ not_op = INDEX_op_not_vec;
43
+ have_not = TCG_TARGET_HAS_not_vec;
44
+ break;
45
+ default:
46
+ g_assert_not_reached();
47
+ }
48
+ if (have_not) {
49
+ op->opc = not_op;
50
+ op->args[1] = op->args[idx];
51
+ return fold_not(ctx, op);
52
+ }
53
+ return false;
54
+}
55
+
56
+/* If the binary operation has first argument @i, fold to NOT. */
57
+static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
58
+{
59
+ if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
60
+ return fold_to_not(ctx, op, 2);
61
+ }
62
+ return false;
63
+}
64
+
65
/* If the binary operation has second argument @i, fold to @i. */
66
static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
67
{
68
@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
69
return false;
70
}
71
72
+/* If the binary operation has second argument @i, fold to NOT. */
73
+static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
74
+{
75
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
76
+ return fold_to_not(ctx, op, 1);
77
+ }
78
+ return false;
79
+}
80
+
81
/* If the binary operation has both arguments equal, fold to @i. */
82
static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
83
{
84
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
85
static bool fold_andc(OptContext *ctx, TCGOp *op)
86
{
87
if (fold_const2(ctx, op) ||
88
- fold_xx_to_i(ctx, op, 0)) {
89
+ fold_xx_to_i(ctx, op, 0) ||
90
+ fold_ix_to_not(ctx, op, -1)) {
91
return true;
92
}
93
return false;
94
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
95
96
static bool fold_eqv(OptContext *ctx, TCGOp *op)
97
{
98
- return fold_const2(ctx, op);
99
+ if (fold_const2(ctx, op) ||
100
+ fold_xi_to_not(ctx, op, 0)) {
101
+ return true;
102
+ }
103
+ return false;
104
}
105
106
static bool fold_extract(OptContext *ctx, TCGOp *op)
107
@@ -XXX,XX +XXX,XX @@ static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
108
109
static bool fold_nand(OptContext *ctx, TCGOp *op)
110
{
111
- return fold_const2(ctx, op);
112
+ if (fold_const2(ctx, op) ||
113
+ fold_xi_to_not(ctx, op, -1)) {
114
+ return true;
115
+ }
116
+ return false;
117
}
118
119
static bool fold_neg(OptContext *ctx, TCGOp *op)
120
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
121
122
static bool fold_nor(OptContext *ctx, TCGOp *op)
123
{
124
- return fold_const2(ctx, op);
125
+ if (fold_const2(ctx, op) ||
126
+ fold_xi_to_not(ctx, op, 0)) {
127
+ return true;
128
+ }
129
+ return false;
130
}
131
132
static bool fold_not(OptContext *ctx, TCGOp *op)
133
{
134
- return fold_const1(ctx, op);
135
+ if (fold_const1(ctx, op)) {
136
+ return true;
137
+ }
138
+
139
+ /* Because of fold_to_not, we want to always return true, via finish. */
140
+ finish_folding(ctx, op);
141
+ return true;
142
}
143
144
static bool fold_or(OptContext *ctx, TCGOp *op)
145
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
146
147
static bool fold_orc(OptContext *ctx, TCGOp *op)
148
{
149
- return fold_const2(ctx, op);
150
+ if (fold_const2(ctx, op) ||
151
+ fold_ix_to_not(ctx, op, 0)) {
152
+ return true;
153
+ }
154
+ return false;
155
}
156
157
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
158
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
159
static bool fold_xor(OptContext *ctx, TCGOp *op)
160
{
161
if (fold_const2(ctx, op) ||
162
- fold_xx_to_i(ctx, op, 0)) {
163
+ fold_xx_to_i(ctx, op, 0) ||
164
+ fold_xi_to_not(ctx, op, -1)) {
165
return true;
166
}
167
return false;
168
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
169
}
170
}
171
break;
172
- CASE_OP_32_64_VEC(xor):
173
- CASE_OP_32_64(nand):
174
- if (!arg_is_const(op->args[1])
175
- && arg_is_const(op->args[2])
176
- && arg_info(op->args[2])->val == -1) {
177
- i = 1;
178
- goto try_not;
179
- }
180
- break;
181
- CASE_OP_32_64(nor):
182
- if (!arg_is_const(op->args[1])
183
- && arg_is_const(op->args[2])
184
- && arg_info(op->args[2])->val == 0) {
185
- i = 1;
186
- goto try_not;
187
- }
188
- break;
189
- CASE_OP_32_64_VEC(andc):
190
- if (!arg_is_const(op->args[2])
191
- && arg_is_const(op->args[1])
192
- && arg_info(op->args[1])->val == -1) {
193
- i = 2;
194
- goto try_not;
195
- }
196
- break;
197
- CASE_OP_32_64_VEC(orc):
198
- CASE_OP_32_64(eqv):
199
- if (!arg_is_const(op->args[2])
200
- && arg_is_const(op->args[1])
201
- && arg_info(op->args[1])->val == 0) {
202
- i = 2;
203
- goto try_not;
204
- }
205
- break;
206
- try_not:
207
- {
208
- TCGOpcode not_op;
209
- bool have_not;
210
-
211
- switch (ctx.type) {
212
- case TCG_TYPE_I32:
213
- not_op = INDEX_op_not_i32;
214
- have_not = TCG_TARGET_HAS_not_i32;
215
- break;
216
- case TCG_TYPE_I64:
217
- not_op = INDEX_op_not_i64;
218
- have_not = TCG_TARGET_HAS_not_i64;
219
- break;
220
- case TCG_TYPE_V64:
221
- case TCG_TYPE_V128:
222
- case TCG_TYPE_V256:
223
- not_op = INDEX_op_not_vec;
224
- have_not = TCG_TARGET_HAS_not_vec;
225
- break;
226
- default:
227
- g_assert_not_reached();
228
- }
229
- if (!have_not) {
230
- break;
231
- }
232
- op->opc = not_op;
233
- reset_temp(op->args[0]);
234
- op->args[1] = op->args[i];
235
- continue;
236
- }
237
default:
238
break;
24
}
239
}
25
--
240
--
26
2.25.1
241
2.25.1
27
242
28
243
diff view generated by jsdifflib
1
Having dupi pass though movi is confusing and arguably wrong.
1
Even though there is only one user, place this more complex
2
conversion into its own helper.
2
3
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
---
6
tcg/tcg.c | 6 +++-
7
tcg/optimize.c | 89 ++++++++++++++++++++++++++------------------------
7
tcg/aarch64/tcg-target.c.inc | 7 ----
8
1 file changed, 47 insertions(+), 42 deletions(-)
8
tcg/i386/tcg-target.c.inc | 63 ++++++++++++++++++++++++------------
9
tcg/ppc/tcg-target.c.inc | 6 ----
10
4 files changed, 47 insertions(+), 35 deletions(-)
11
9
12
diff --git a/tcg/tcg.c b/tcg/tcg.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/tcg.c
12
--- a/tcg/optimize.c
15
+++ b/tcg/tcg.c
13
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
14
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
17
case TEMP_VAL_CONST:
15
18
reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
16
static bool fold_neg(OptContext *ctx, TCGOp *op)
19
preferred_regs, ts->indirect_base);
17
{
20
- tcg_out_movi(s, ts->type, reg, ts->val);
18
- return fold_const1(ctx, op);
21
+ if (ts->type <= TCG_TYPE_I64) {
19
+ if (fold_const1(ctx, op)) {
22
+ tcg_out_movi(s, ts->type, reg, ts->val);
20
+ return true;
23
+ } else {
21
+ }
24
+ tcg_out_dupi_vec(s, ts->type, reg, ts->val);
22
+ /*
25
+ }
23
+ * Because of fold_sub_to_neg, we want to always return true,
26
ts->mem_coherent = 0;
24
+ * via finish_folding.
27
break;
25
+ */
28
case TEMP_VAL_MEM:
26
+ finish_folding(ctx, op);
29
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
27
+ return true;
30
index XXXXXXX..XXXXXXX 100644
31
--- a/tcg/aarch64/tcg-target.c.inc
32
+++ b/tcg/aarch64/tcg-target.c.inc
33
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
34
case TCG_TYPE_I64:
35
tcg_debug_assert(rd < 32);
36
break;
37
-
38
- case TCG_TYPE_V64:
39
- case TCG_TYPE_V128:
40
- tcg_debug_assert(rd >= 32);
41
- tcg_out_dupi_vec(s, type, rd, value);
42
- return;
43
-
44
default:
45
g_assert_not_reached();
46
}
47
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
48
index XXXXXXX..XXXXXXX 100644
49
--- a/tcg/i386/tcg-target.c.inc
50
+++ b/tcg/i386/tcg-target.c.inc
51
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
52
}
53
}
28
}
54
29
55
-static void tcg_out_movi(TCGContext *s, TCGType type,
30
static bool fold_nor(OptContext *ctx, TCGOp *op)
56
- TCGReg ret, tcg_target_long arg)
31
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
57
+static void tcg_out_movi_vec(TCGContext *s, TCGType type,
32
return fold_const2(ctx, op);
58
+ TCGReg ret, tcg_target_long arg)
33
}
34
35
+static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
59
+{
36
+{
60
+ if (arg == 0) {
37
+ TCGOpcode neg_op;
61
+ tcg_out_vex_modrm(s, OPC_PXOR, ret, ret, ret);
38
+ bool have_neg;
62
+ return;
39
+
63
+ }
40
+ if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
64
+ if (arg == -1) {
41
+ return false;
65
+ tcg_out_vex_modrm(s, OPC_PCMPEQB, ret, ret, ret);
66
+ return;
67
+ }
42
+ }
68
+
43
+
69
+ int rexw = (type == TCG_TYPE_I32 ? 0 : P_REXW);
44
+ switch (ctx->type) {
70
+ tcg_out_vex_modrm_pool(s, OPC_MOVD_VyEy + rexw, ret);
71
+ if (TCG_TARGET_REG_BITS == 64) {
72
+ new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
73
+ } else {
74
+ new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
75
+ }
76
+}
77
+
78
+static void tcg_out_movi_int(TCGContext *s, TCGType type,
79
+ TCGReg ret, tcg_target_long arg)
80
{
81
tcg_target_long diff;
82
83
- switch (type) {
84
- case TCG_TYPE_I32:
85
-#if TCG_TARGET_REG_BITS == 64
86
- case TCG_TYPE_I64:
87
-#endif
88
- if (ret < 16) {
89
- break;
90
- }
91
- /* fallthru */
92
- case TCG_TYPE_V64:
93
- case TCG_TYPE_V128:
94
- case TCG_TYPE_V256:
95
- tcg_debug_assert(ret >= 16);
96
- tcg_out_dupi_vec(s, type, ret, arg);
97
- return;
98
- default:
99
- g_assert_not_reached();
100
- }
101
-
102
if (arg == 0) {
103
tgen_arithr(s, ARITH_XOR, ret, ret);
104
return;
105
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
106
tcg_out64(s, arg);
107
}
108
109
+static void tcg_out_movi(TCGContext *s, TCGType type,
110
+ TCGReg ret, tcg_target_long arg)
111
+{
112
+ switch (type) {
113
+ case TCG_TYPE_I32:
45
+ case TCG_TYPE_I32:
114
+#if TCG_TARGET_REG_BITS == 64
46
+ neg_op = INDEX_op_neg_i32;
47
+ have_neg = TCG_TARGET_HAS_neg_i32;
48
+ break;
115
+ case TCG_TYPE_I64:
49
+ case TCG_TYPE_I64:
116
+#endif
50
+ neg_op = INDEX_op_neg_i64;
117
+ if (ret < 16) {
51
+ have_neg = TCG_TARGET_HAS_neg_i64;
118
+ tcg_out_movi_int(s, type, ret, arg);
52
+ break;
119
+ } else {
53
+ case TCG_TYPE_V64:
120
+ tcg_out_movi_vec(s, type, ret, arg);
54
+ case TCG_TYPE_V128:
121
+ }
55
+ case TCG_TYPE_V256:
56
+ neg_op = INDEX_op_neg_vec;
57
+ have_neg = (TCG_TARGET_HAS_neg_vec &&
58
+ tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
122
+ break;
59
+ break;
123
+ default:
60
+ default:
124
+ g_assert_not_reached();
61
+ g_assert_not_reached();
125
+ }
62
+ }
63
+ if (have_neg) {
64
+ op->opc = neg_op;
65
+ op->args[1] = op->args[2];
66
+ return fold_neg(ctx, op);
67
+ }
68
+ return false;
126
+}
69
+}
127
+
70
+
128
static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
71
static bool fold_sub(OptContext *ctx, TCGOp *op)
129
{
72
{
130
if (val == (int8_t)val) {
73
if (fold_const2(ctx, op) ||
131
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
74
- fold_xx_to_i(ctx, op, 0)) {
132
index XXXXXXX..XXXXXXX 100644
75
+ fold_xx_to_i(ctx, op, 0) ||
133
--- a/tcg/ppc/tcg-target.c.inc
76
+ fold_sub_to_neg(ctx, op)) {
134
+++ b/tcg/ppc/tcg-target.c.inc
77
return true;
135
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
78
}
136
tcg_out_movi_int(s, type, ret, arg, false);
79
return false;
137
break;
80
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
138
81
continue;
139
- case TCG_TYPE_V64:
82
}
140
- case TCG_TYPE_V128:
83
break;
141
- tcg_debug_assert(ret >= TCG_REG_V0);
84
- CASE_OP_32_64_VEC(sub):
142
- tcg_out_dupi_vec(s, type, ret, arg);
85
- {
143
- break;
86
- TCGOpcode neg_op;
87
- bool have_neg;
144
-
88
-
145
default:
89
- if (arg_is_const(op->args[2])) {
146
g_assert_not_reached();
90
- /* Proceed with possible constant folding. */
147
}
91
- break;
92
- }
93
- switch (ctx.type) {
94
- case TCG_TYPE_I32:
95
- neg_op = INDEX_op_neg_i32;
96
- have_neg = TCG_TARGET_HAS_neg_i32;
97
- break;
98
- case TCG_TYPE_I64:
99
- neg_op = INDEX_op_neg_i64;
100
- have_neg = TCG_TARGET_HAS_neg_i64;
101
- break;
102
- case TCG_TYPE_V64:
103
- case TCG_TYPE_V128:
104
- case TCG_TYPE_V256:
105
- neg_op = INDEX_op_neg_vec;
106
- have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
107
- TCGOP_VECE(op)) > 0;
108
- break;
109
- default:
110
- g_assert_not_reached();
111
- }
112
- if (!have_neg) {
113
- break;
114
- }
115
- if (arg_is_const(op->args[1])
116
- && arg_info(op->args[1])->val == 0) {
117
- op->opc = neg_op;
118
- reset_temp(op->args[0]);
119
- op->args[1] = op->args[2];
120
- continue;
121
- }
122
- }
123
- break;
124
default:
125
break;
126
}
148
--
127
--
149
2.25.1
128
2.25.1
150
129
151
130
diff view generated by jsdifflib
1
Pull the "op r, a, i => mov r, a" optimization into a function,
2
and use them in the outer-most logical operations.
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
6
---
3
tcg/aarch64/tcg-target-constr.h | 31 ++++++++++++
7
tcg/optimize.c | 61 +++++++++++++++++++++-----------------------------
4
tcg/aarch64/tcg-target.c.inc | 85 +++++++++++----------------------
8
1 file changed, 26 insertions(+), 35 deletions(-)
5
2 files changed, 60 insertions(+), 56 deletions(-)
6
create mode 100644 tcg/aarch64/tcg-target-constr.h
7
9
8
diff --git a/tcg/aarch64/tcg-target-constr.h b/tcg/aarch64/tcg-target-constr.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
new file mode 100644
11
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX
12
--- a/tcg/optimize.c
11
--- /dev/null
13
+++ b/tcg/optimize.c
12
+++ b/tcg/aarch64/tcg-target-constr.h
14
@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
13
@@ -XXX,XX +XXX,XX @@
15
return false;
14
+/* SPDX-License-Identifier: GPL-2.0-or-later */
16
}
15
+/*
17
16
+ * AArch64 target-specific operand constaints.
18
+/* If the binary operation has second argument @i, fold to identity. */
17
+ * Copyright (c) 2020 Linaro
19
+static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
18
+ */
20
+{
21
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
22
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
23
+ }
24
+ return false;
25
+}
19
+
26
+
20
+C_O0_I1(r)
27
/* If the binary operation has second argument @i, fold to NOT. */
21
+C_O0_I2(lZ, l)
28
static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
22
+C_O0_I2(r, rA)
29
{
23
+C_O0_I2(rZ, r)
30
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
24
+C_O0_I2(w, r)
31
25
+C_O1_I1(r, l)
32
static bool fold_add(OptContext *ctx, TCGOp *op)
26
+C_O1_I1(r, r)
33
{
27
+C_O1_I1(w, r)
34
- return fold_const2(ctx, op);
28
+C_O1_I1(w, w)
35
+ if (fold_const2(ctx, op) ||
29
+C_O1_I1(w, wr)
36
+ fold_xi_to_x(ctx, op, 0)) {
30
+C_O1_I2(r, 0, rZ)
37
+ return true;
31
+C_O1_I2(r, r, r)
38
+ }
32
+C_O1_I2(r, r, rA)
39
+ return false;
33
+C_O1_I2(r, r, rAL)
34
+C_O1_I2(r, r, ri)
35
+C_O1_I2(r, r, rL)
36
+C_O1_I2(r, rZ, rZ)
37
+C_O1_I2(w, 0, w)
38
+C_O1_I2(w, w, w)
39
+C_O1_I2(w, w, wN)
40
+C_O1_I2(w, w, wO)
41
+C_O1_I2(w, w, wZ)
42
+C_O1_I3(w, w, w, w)
43
+C_O1_I4(r, r, rA, rZ, rZ)
44
+C_O2_I4(r, r, rZ, rZ, rA, rMZ)
45
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/aarch64/tcg-target.c.inc
48
+++ b/tcg/aarch64/tcg-target.c.inc
49
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
50
va_end(va);
51
}
40
}
52
41
53
+/* Define all constraint sets. */
42
static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
54
+#include "../tcg-constr.c.inc"
43
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
55
+
56
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
57
{
44
{
58
- static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
45
if (fold_const2(ctx, op) ||
59
- static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
46
fold_xi_to_i(ctx, op, 0) ||
60
- static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
47
+ fold_xi_to_x(ctx, op, -1) ||
61
- static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
48
fold_xx_to_x(ctx, op)) {
62
- static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
49
return true;
63
- static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
50
}
64
- static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
51
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
65
- static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
52
{
66
- static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
53
if (fold_const2(ctx, op) ||
67
- static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
54
fold_xx_to_i(ctx, op, 0) ||
68
- static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
55
+ fold_xi_to_x(ctx, op, 0) ||
69
- static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } };
56
fold_ix_to_not(ctx, op, -1)) {
70
- static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
57
return true;
71
- static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
58
}
72
- static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
59
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
73
- static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
60
static bool fold_eqv(OptContext *ctx, TCGOp *op)
74
- static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
61
{
75
- static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
62
if (fold_const2(ctx, op) ||
76
- static const TCGTargetOpDef r_r_rAL
63
+ fold_xi_to_x(ctx, op, -1) ||
77
- = { .args_ct_str = { "r", "r", "rAL" } };
64
fold_xi_to_not(ctx, op, 0)) {
78
- static const TCGTargetOpDef dep
65
return true;
79
- = { .args_ct_str = { "r", "0", "rZ" } };
66
}
80
- static const TCGTargetOpDef ext2
67
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
81
- = { .args_ct_str = { "r", "rZ", "rZ" } };
68
static bool fold_or(OptContext *ctx, TCGOp *op)
82
- static const TCGTargetOpDef movc
69
{
83
- = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
70
if (fold_const2(ctx, op) ||
84
- static const TCGTargetOpDef add2
71
+ fold_xi_to_x(ctx, op, 0) ||
85
- = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
72
fold_xx_to_x(ctx, op)) {
86
- static const TCGTargetOpDef w_w_w_w
73
return true;
87
- = { .args_ct_str = { "w", "w", "w", "w" } };
74
}
75
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
76
static bool fold_orc(OptContext *ctx, TCGOp *op)
77
{
78
if (fold_const2(ctx, op) ||
79
+ fold_xi_to_x(ctx, op, -1) ||
80
fold_ix_to_not(ctx, op, 0)) {
81
return true;
82
}
83
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
84
85
static bool fold_shift(OptContext *ctx, TCGOp *op)
86
{
87
- return fold_const2(ctx, op);
88
+ if (fold_const2(ctx, op) ||
89
+ fold_xi_to_x(ctx, op, 0)) {
90
+ return true;
91
+ }
92
+ return false;
93
}
94
95
static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
96
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
97
{
98
if (fold_const2(ctx, op) ||
99
fold_xx_to_i(ctx, op, 0) ||
100
+ fold_xi_to_x(ctx, op, 0) ||
101
fold_sub_to_neg(ctx, op)) {
102
return true;
103
}
104
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
105
{
106
if (fold_const2(ctx, op) ||
107
fold_xx_to_i(ctx, op, 0) ||
108
+ fold_xi_to_x(ctx, op, 0) ||
109
fold_xi_to_not(ctx, op, -1)) {
110
return true;
111
}
112
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
113
break;
114
}
115
116
- /* Simplify expression for "op r, a, const => mov r, a" cases */
117
- switch (opc) {
118
- CASE_OP_32_64_VEC(add):
119
- CASE_OP_32_64_VEC(sub):
120
- CASE_OP_32_64_VEC(or):
121
- CASE_OP_32_64_VEC(xor):
122
- CASE_OP_32_64_VEC(andc):
123
- CASE_OP_32_64(shl):
124
- CASE_OP_32_64(shr):
125
- CASE_OP_32_64(sar):
126
- CASE_OP_32_64(rotl):
127
- CASE_OP_32_64(rotr):
128
- if (!arg_is_const(op->args[1])
129
- && arg_is_const(op->args[2])
130
- && arg_info(op->args[2])->val == 0) {
131
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
132
- continue;
133
- }
134
- break;
135
- CASE_OP_32_64_VEC(and):
136
- CASE_OP_32_64_VEC(orc):
137
- CASE_OP_32_64(eqv):
138
- if (!arg_is_const(op->args[1])
139
- && arg_is_const(op->args[2])
140
- && arg_info(op->args[2])->val == -1) {
141
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
142
- continue;
143
- }
144
- break;
145
- default:
146
- break;
147
- }
88
-
148
-
89
switch (op) {
149
/* Simplify using known-zero bits. Currently only ops with a single
90
case INDEX_op_goto_ptr:
150
output argument is supported. */
91
- return &r;
151
z_mask = -1;
92
+ return C_O0_I1(r);
93
94
case INDEX_op_ld8u_i32:
95
case INDEX_op_ld8s_i32:
96
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
97
case INDEX_op_extract_i64:
98
case INDEX_op_sextract_i32:
99
case INDEX_op_sextract_i64:
100
- return &r_r;
101
+ return C_O1_I1(r, r);
102
103
case INDEX_op_st8_i32:
104
case INDEX_op_st16_i32:
105
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
106
case INDEX_op_st16_i64:
107
case INDEX_op_st32_i64:
108
case INDEX_op_st_i64:
109
- return &rZ_r;
110
+ return C_O0_I2(rZ, r);
111
112
case INDEX_op_add_i32:
113
case INDEX_op_add_i64:
114
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
115
case INDEX_op_sub_i64:
116
case INDEX_op_setcond_i32:
117
case INDEX_op_setcond_i64:
118
- return &r_r_rA;
119
+ return C_O1_I2(r, r, rA);
120
121
case INDEX_op_mul_i32:
122
case INDEX_op_mul_i64:
123
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
124
case INDEX_op_remu_i64:
125
case INDEX_op_muluh_i64:
126
case INDEX_op_mulsh_i64:
127
- return &r_r_r;
128
+ return C_O1_I2(r, r, r);
129
130
case INDEX_op_and_i32:
131
case INDEX_op_and_i64:
132
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
133
case INDEX_op_orc_i64:
134
case INDEX_op_eqv_i32:
135
case INDEX_op_eqv_i64:
136
- return &r_r_rL;
137
+ return C_O1_I2(r, r, rL);
138
139
case INDEX_op_shl_i32:
140
case INDEX_op_shr_i32:
141
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
142
case INDEX_op_sar_i64:
143
case INDEX_op_rotl_i64:
144
case INDEX_op_rotr_i64:
145
- return &r_r_ri;
146
+ return C_O1_I2(r, r, ri);
147
148
case INDEX_op_clz_i32:
149
case INDEX_op_ctz_i32:
150
case INDEX_op_clz_i64:
151
case INDEX_op_ctz_i64:
152
- return &r_r_rAL;
153
+ return C_O1_I2(r, r, rAL);
154
155
case INDEX_op_brcond_i32:
156
case INDEX_op_brcond_i64:
157
- return &r_rA;
158
+ return C_O0_I2(r, rA);
159
160
case INDEX_op_movcond_i32:
161
case INDEX_op_movcond_i64:
162
- return &movc;
163
+ return C_O1_I4(r, r, rA, rZ, rZ);
164
165
case INDEX_op_qemu_ld_i32:
166
case INDEX_op_qemu_ld_i64:
167
- return &r_l;
168
+ return C_O1_I1(r, l);
169
case INDEX_op_qemu_st_i32:
170
case INDEX_op_qemu_st_i64:
171
- return &lZ_l;
172
+ return C_O0_I2(lZ, l);
173
174
case INDEX_op_deposit_i32:
175
case INDEX_op_deposit_i64:
176
- return &dep;
177
+ return C_O1_I2(r, 0, rZ);
178
179
case INDEX_op_extract2_i32:
180
case INDEX_op_extract2_i64:
181
- return &ext2;
182
+ return C_O1_I2(r, rZ, rZ);
183
184
case INDEX_op_add2_i32:
185
case INDEX_op_add2_i64:
186
case INDEX_op_sub2_i32:
187
case INDEX_op_sub2_i64:
188
- return &add2;
189
+ return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
190
191
case INDEX_op_add_vec:
192
case INDEX_op_sub_vec:
193
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
194
case INDEX_op_shrv_vec:
195
case INDEX_op_sarv_vec:
196
case INDEX_op_aa64_sshl_vec:
197
- return &w_w_w;
198
+ return C_O1_I2(w, w, w);
199
case INDEX_op_not_vec:
200
case INDEX_op_neg_vec:
201
case INDEX_op_abs_vec:
202
case INDEX_op_shli_vec:
203
case INDEX_op_shri_vec:
204
case INDEX_op_sari_vec:
205
- return &w_w;
206
+ return C_O1_I1(w, w);
207
case INDEX_op_ld_vec:
208
- case INDEX_op_st_vec:
209
case INDEX_op_dupm_vec:
210
- return &w_r;
211
+ return C_O1_I1(w, r);
212
+ case INDEX_op_st_vec:
213
+ return C_O0_I2(w, r);
214
case INDEX_op_dup_vec:
215
- return &w_wr;
216
+ return C_O1_I1(w, wr);
217
case INDEX_op_or_vec:
218
case INDEX_op_andc_vec:
219
- return &w_w_wO;
220
+ return C_O1_I2(w, w, wO);
221
case INDEX_op_and_vec:
222
case INDEX_op_orc_vec:
223
- return &w_w_wN;
224
+ return C_O1_I2(w, w, wN);
225
case INDEX_op_cmp_vec:
226
- return &w_w_wZ;
227
+ return C_O1_I2(w, w, wZ);
228
case INDEX_op_bitsel_vec:
229
- return &w_w_w_w;
230
+ return C_O1_I3(w, w, w, w);
231
case INDEX_op_aa64_sli_vec:
232
- return &w_0_w;
233
+ return C_O1_I2(w, 0, w);
234
235
default:
236
return NULL;
237
--
152
--
238
2.25.1
153
2.25.1
239
154
240
155
diff view generated by jsdifflib
1
These are easier to set and test when they have their own fields.
1
Pull the "op r, 0, b => movi r, 0" optimization into a function,
2
Reduce the size of alias_index and sort_index to 4 bits, which is
2
and use it in fold_shift.
3
sufficient for TCG_MAX_OP_ARGS. This leaves only the bits indicating
4
constants within the ct field.
5
3
6
Move all initialization to allocation time, rather than init
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
individual fields in process_op_defs.
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
7
---
11
include/tcg/tcg.h | 14 +++++++-------
8
tcg/optimize.c | 28 ++++++++++------------------
12
tcg/tcg.c | 28 ++++++++++++----------------
9
1 file changed, 10 insertions(+), 18 deletions(-)
13
2 files changed, 19 insertions(+), 23 deletions(-)
14
10
15
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
16
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
17
--- a/include/tcg/tcg.h
13
--- a/tcg/optimize.c
18
+++ b/include/tcg/tcg.h
14
+++ b/tcg/optimize.c
19
@@ -XXX,XX +XXX,XX @@ int64_t tcg_cpu_exec_time(void);
15
@@ -XXX,XX +XXX,XX @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
20
void tcg_dump_info(void);
16
return false;
21
void tcg_dump_op_count(void);
17
}
22
18
23
-#define TCG_CT_ALIAS 0x80
19
+/* If the binary operation has first argument @i, fold to @i. */
24
-#define TCG_CT_IALIAS 0x40
20
+static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
25
-#define TCG_CT_NEWREG 0x20 /* output requires a new register */
21
+{
26
-#define TCG_CT_CONST 0x02 /* any constant of register size */
22
+ if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
27
+#define TCG_CT_CONST 1 /* any constant of register size */
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
28
24
+ }
29
typedef struct TCGArgConstraint {
25
+ return false;
30
- uint16_t ct;
26
+}
31
- uint8_t alias_index;
27
+
32
- uint8_t sort_index;
28
/* If the binary operation has first argument @i, fold to NOT. */
33
+ unsigned ct : 16;
29
static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
34
+ unsigned alias_index : 4;
30
{
35
+ unsigned sort_index : 4;
31
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
36
+ bool oalias : 1;
32
static bool fold_shift(OptContext *ctx, TCGOp *op)
37
+ bool ialias : 1;
33
{
38
+ bool newreg : 1;
34
if (fold_const2(ctx, op) ||
39
TCGRegSet regs;
35
+ fold_ix_to_i(ctx, op, 0) ||
40
} TCGArgConstraint;
36
fold_xi_to_x(ctx, op, 0)) {
41
37
return true;
42
diff --git a/tcg/tcg.c b/tcg/tcg.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/tcg/tcg.c
45
+++ b/tcg/tcg.c
46
@@ -XXX,XX +XXX,XX @@ void tcg_context_init(TCGContext *s)
47
total_args += n;
48
}
38
}
49
39
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
50
- args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
40
break;
51
+ args_ct = g_new0(TCGArgConstraint, total_args);
52
53
for(op = 0; op < NB_OPS; op++) {
54
def = &tcg_op_defs[op];
55
@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
56
const TCGArgConstraint *arg_ct = &def->args_ct[k];
57
int n;
58
59
- if (arg_ct->ct & TCG_CT_ALIAS) {
60
+ if (arg_ct->oalias) {
61
/* an alias is equivalent to a single register */
62
n = 1;
63
} else {
64
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
65
/* Incomplete TCGTargetOpDef entry. */
66
tcg_debug_assert(ct_str != NULL);
67
68
- def->args_ct[i].regs = 0;
69
- def->args_ct[i].ct = 0;
70
while (*ct_str != '\0') {
71
switch(*ct_str) {
72
case '0' ... '9':
73
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
74
tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
75
tcg_debug_assert(oarg < def->nb_oargs);
76
tcg_debug_assert(def->args_ct[oarg].regs != 0);
77
- /* TCG_CT_ALIAS is for the output arguments.
78
- The input is tagged with TCG_CT_IALIAS. */
79
def->args_ct[i] = def->args_ct[oarg];
80
- def->args_ct[oarg].ct |= TCG_CT_ALIAS;
81
+ /* The output sets oalias. */
82
+ def->args_ct[oarg].oalias = true;
83
def->args_ct[oarg].alias_index = i;
84
- def->args_ct[i].ct |= TCG_CT_IALIAS;
85
+ /* The input sets ialias. */
86
+ def->args_ct[i].ialias = true;
87
def->args_ct[i].alias_index = oarg;
88
}
89
ct_str++;
90
break;
91
case '&':
92
- def->args_ct[i].ct |= TCG_CT_NEWREG;
93
+ def->args_ct[i].newreg = true;
94
ct_str++;
95
break;
96
case 'i':
97
@@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s)
98
set = *pset;
99
100
set &= ct->regs;
101
- if (ct->ct & TCG_CT_IALIAS) {
102
+ if (ct->ialias) {
103
set &= op->output_pref[ct->alias_index];
104
}
105
/* If the combination is not possible, restart. */
106
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
107
}
41
}
108
42
109
i_preferred_regs = o_preferred_regs = 0;
43
- /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
110
- if (arg_ct->ct & TCG_CT_IALIAS) {
44
- and "sub r, 0, a => neg r, a" case. */
111
+ if (arg_ct->ialias) {
45
- switch (opc) {
112
o_preferred_regs = op->output_pref[arg_ct->alias_index];
46
- CASE_OP_32_64(shl):
113
if (ts->fixed_reg) {
47
- CASE_OP_32_64(shr):
114
/* if fixed register, we must allocate a new register
48
- CASE_OP_32_64(sar):
115
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
49
- CASE_OP_32_64(rotl):
116
reg = ts->reg;
50
- CASE_OP_32_64(rotr):
117
for (k2 = 0 ; k2 < k ; k2++) {
51
- if (arg_is_const(op->args[1])
118
i2 = def->args_ct[nb_oargs + k2].sort_index;
52
- && arg_info(op->args[1])->val == 0) {
119
- if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
53
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
120
- reg == new_args[i2]) {
54
- continue;
121
+ if (def->args_ct[i2].ialias && reg == new_args[i2]) {
55
- }
122
goto allocate_in_reg;
56
- break;
123
}
57
- default:
124
}
58
- break;
125
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
59
- }
126
/* ENV should not be modified. */
60
-
127
tcg_debug_assert(!ts->fixed_reg);
61
/* Simplify using known-zero bits. Currently only ops with a single
128
62
output argument is supported. */
129
- if ((arg_ct->ct & TCG_CT_ALIAS)
63
z_mask = -1;
130
- && !const_args[arg_ct->alias_index]) {
131
+ if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
132
reg = new_args[arg_ct->alias_index];
133
- } else if (arg_ct->ct & TCG_CT_NEWREG) {
134
+ } else if (arg_ct->newreg) {
135
reg = tcg_reg_alloc(s, arg_ct->regs,
136
i_allocated_regs | o_allocated_regs,
137
op->output_pref[k], ts->indirect_base);
138
--
64
--
139
2.25.1
65
2.25.1
140
66
141
67
diff view generated by jsdifflib
1
This does require finishing the conversion to tcg_target_op_def.
1
Move all of the known-zero optimizations into the per-opcode
2
Remove quite a lot of ifdefs, since we can reference opcodes
2
functions. Use fold_masks when there is a possibility of the
3
even if they are not implemented.
3
result being determined, and simply set ctx->z_mask otherwise.
4
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
8
---
7
tcg/tci/tcg-target-constr.h | 28 +++
9
tcg/optimize.c | 545 ++++++++++++++++++++++++++-----------------------
8
tcg/tci/tcg-target.c.inc | 360 ++++++++++++++----------------------
10
1 file changed, 294 insertions(+), 251 deletions(-)
9
2 files changed, 163 insertions(+), 225 deletions(-)
10
create mode 100644 tcg/tci/tcg-target-constr.h
11
11
12
diff --git a/tcg/tci/tcg-target-constr.h b/tcg/tci/tcg-target-constr.h
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
new file mode 100644
14
index XXXXXXX..XXXXXXX
15
--- /dev/null
16
+++ b/tcg/tci/tcg-target-constr.h
17
@@ -XXX,XX +XXX,XX @@
18
+/* SPDX-License-Identifier: GPL-2.0-or-later */
19
+/*
20
+ * TCI target-specific operand constaints.
21
+ * Copyright (c) 2020 Linaro
22
+ */
23
+
24
+C_O0_I2(r, r)
25
+C_O0_I2(r, ri)
26
+C_O0_I2(r, S)
27
+C_O0_I3(r, r, S)
28
+C_O0_I3(r, S, S)
29
+C_O0_I4(r, r, S, S)
30
+C_O1_I1(r, L)
31
+C_O1_I1(r, r)
32
+C_O1_I2(r, 0, r)
33
+C_O1_I2(r, L, L)
34
+C_O1_I2(r, ri, ri)
35
+C_O1_I2(r, r, r)
36
+C_O1_I2(r, r, ri)
37
+C_O2_I1(r, r, L)
38
+C_O2_I2(r, r, L, L)
39
+
40
+#if TCG_TARGET_REG_BITS == 32
41
+C_O0_I4(r, r, ri, ri)
42
+C_O1_I4(r, r, r, ri, ri)
43
+C_O2_I2(r, r, r, r)
44
+C_O2_I4(r, r, r, r, r, r)
45
+#endif
46
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
47
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
48
--- a/tcg/tci/tcg-target.c.inc
14
--- a/tcg/optimize.c
49
+++ b/tcg/tci/tcg-target.c.inc
15
+++ b/tcg/optimize.c
50
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
51
/* Bitfield n...m (in 32 bit value). */
17
TCGTempSet temps_used;
52
#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m)
18
53
19
/* In flight values from optimization. */
54
-/* Macros used in tcg_target_op_defs. */
20
- uint64_t z_mask;
55
-#define R "r"
21
+ uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
56
-#define RI "ri"
22
+ uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
57
-#if TCG_TARGET_REG_BITS == 32
23
TCGType type;
58
-# define R64 "r", "r"
24
} OptContext;
59
-#else
25
60
-# define R64 "r"
26
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
61
-#endif
27
return false;
62
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
28
}
63
-# define L "L", "L"
29
64
-# define S "S", "S"
30
+static bool fold_masks(OptContext *ctx, TCGOp *op)
65
-#else
31
+{
66
-# define L "L"
32
+ uint64_t a_mask = ctx->a_mask;
67
-# define S "S"
33
+ uint64_t z_mask = ctx->z_mask;
68
-#endif
34
+
69
-
35
+ /*
70
-/* TODO: documentation. */
36
+ * 32-bit ops generate 32-bit results. For the result is zero test
71
-static const TCGTargetOpDef tcg_target_op_defs[] = {
37
+ * below, we can ignore high bits, but for further optimizations we
72
- { INDEX_op_exit_tb, { NULL } },
38
+ * need to record that the high bits contain garbage.
73
- { INDEX_op_goto_tb, { NULL } },
39
+ */
74
- { INDEX_op_br, { NULL } },
40
+ if (ctx->type == TCG_TYPE_I32) {
75
-
41
+ ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
76
- { INDEX_op_ld8u_i32, { R, R } },
42
+ a_mask &= MAKE_64BIT_MASK(0, 32);
77
- { INDEX_op_ld8s_i32, { R, R } },
43
+ z_mask &= MAKE_64BIT_MASK(0, 32);
78
- { INDEX_op_ld16u_i32, { R, R } },
44
+ }
79
- { INDEX_op_ld16s_i32, { R, R } },
45
+
80
- { INDEX_op_ld_i32, { R, R } },
46
+ if (z_mask == 0) {
81
- { INDEX_op_st8_i32, { R, R } },
47
+ return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
82
- { INDEX_op_st16_i32, { R, R } },
48
+ }
83
- { INDEX_op_st_i32, { R, R } },
49
+ if (a_mask == 0) {
84
-
50
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
85
- { INDEX_op_add_i32, { R, RI, RI } },
51
+ }
86
- { INDEX_op_sub_i32, { R, RI, RI } },
52
+ return false;
87
- { INDEX_op_mul_i32, { R, RI, RI } },
53
+}
88
-#if TCG_TARGET_HAS_div_i32
54
+
89
- { INDEX_op_div_i32, { R, R, R } },
55
/*
90
- { INDEX_op_divu_i32, { R, R, R } },
56
* Convert @op to NOT, if NOT is supported by the host.
91
- { INDEX_op_rem_i32, { R, R, R } },
57
* Return true f the conversion is successful, which will still
92
- { INDEX_op_remu_i32, { R, R, R } },
58
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
93
-#elif TCG_TARGET_HAS_div2_i32
59
94
- { INDEX_op_div2_i32, { R, R, "0", "1", R } },
60
static bool fold_and(OptContext *ctx, TCGOp *op)
95
- { INDEX_op_divu2_i32, { R, R, "0", "1", R } },
61
{
96
-#endif
62
+ uint64_t z1, z2;
97
- /* TODO: Does R, RI, RI result in faster code than R, R, RI?
63
+
98
- If both operands are constants, we can optimize. */
64
if (fold_const2(ctx, op) ||
99
- { INDEX_op_and_i32, { R, RI, RI } },
65
fold_xi_to_i(ctx, op, 0) ||
100
-#if TCG_TARGET_HAS_andc_i32
66
fold_xi_to_x(ctx, op, -1) ||
101
- { INDEX_op_andc_i32, { R, RI, RI } },
67
fold_xx_to_x(ctx, op)) {
102
-#endif
68
return true;
103
-#if TCG_TARGET_HAS_eqv_i32
69
}
104
- { INDEX_op_eqv_i32, { R, RI, RI } },
70
- return false;
105
-#endif
71
+
106
-#if TCG_TARGET_HAS_nand_i32
72
+ z1 = arg_info(op->args[1])->z_mask;
107
- { INDEX_op_nand_i32, { R, RI, RI } },
73
+ z2 = arg_info(op->args[2])->z_mask;
108
-#endif
74
+ ctx->z_mask = z1 & z2;
109
-#if TCG_TARGET_HAS_nor_i32
75
+
110
- { INDEX_op_nor_i32, { R, RI, RI } },
76
+ /*
111
-#endif
77
+ * Known-zeros does not imply known-ones. Therefore unless
112
- { INDEX_op_or_i32, { R, RI, RI } },
78
+ * arg2 is constant, we can't infer affected bits from it.
113
-#if TCG_TARGET_HAS_orc_i32
79
+ */
114
- { INDEX_op_orc_i32, { R, RI, RI } },
80
+ if (arg_is_const(op->args[2])) {
115
-#endif
81
+ ctx->a_mask = z1 & ~z2;
116
- { INDEX_op_xor_i32, { R, RI, RI } },
82
+ }
117
- { INDEX_op_shl_i32, { R, RI, RI } },
83
+
118
- { INDEX_op_shr_i32, { R, RI, RI } },
84
+ return fold_masks(ctx, op);
119
- { INDEX_op_sar_i32, { R, RI, RI } },
85
}
120
-#if TCG_TARGET_HAS_rot_i32
86
121
- { INDEX_op_rotl_i32, { R, RI, RI } },
87
static bool fold_andc(OptContext *ctx, TCGOp *op)
122
- { INDEX_op_rotr_i32, { R, RI, RI } },
88
{
123
-#endif
89
+ uint64_t z1;
124
-#if TCG_TARGET_HAS_deposit_i32
90
+
125
- { INDEX_op_deposit_i32, { R, "0", R } },
91
if (fold_const2(ctx, op) ||
126
-#endif
92
fold_xx_to_i(ctx, op, 0) ||
127
-
93
fold_xi_to_x(ctx, op, 0) ||
128
- { INDEX_op_brcond_i32, { R, RI } },
94
fold_ix_to_not(ctx, op, -1)) {
129
-
95
return true;
130
- { INDEX_op_setcond_i32, { R, R, RI } },
96
}
131
-#if TCG_TARGET_REG_BITS == 64
97
- return false;
132
- { INDEX_op_setcond_i64, { R, R, RI } },
98
+
133
-#endif /* TCG_TARGET_REG_BITS == 64 */
99
+ z1 = arg_info(op->args[1])->z_mask;
134
-
100
+
135
-#if TCG_TARGET_REG_BITS == 32
101
+ /*
136
- /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */
102
+ * Known-zeros does not imply known-ones. Therefore unless
137
- { INDEX_op_add2_i32, { R, R, R, R, R, R } },
103
+ * arg2 is constant, we can't infer anything from it.
138
- { INDEX_op_sub2_i32, { R, R, R, R, R, R } },
104
+ */
139
- { INDEX_op_brcond2_i32, { R, R, RI, RI } },
105
+ if (arg_is_const(op->args[2])) {
140
- { INDEX_op_mulu2_i32, { R, R, R, R } },
106
+ uint64_t z2 = ~arg_info(op->args[2])->z_mask;
141
- { INDEX_op_setcond2_i32, { R, R, R, RI, RI } },
107
+ ctx->a_mask = z1 & ~z2;
142
-#endif
108
+ z1 &= z2;
143
-
109
+ }
144
-#if TCG_TARGET_HAS_not_i32
110
+ ctx->z_mask = z1;
145
- { INDEX_op_not_i32, { R, R } },
111
+
146
-#endif
112
+ return fold_masks(ctx, op);
147
-#if TCG_TARGET_HAS_neg_i32
113
}
148
- { INDEX_op_neg_i32, { R, R } },
114
149
-#endif
115
static bool fold_brcond(OptContext *ctx, TCGOp *op)
150
-
116
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
151
-#if TCG_TARGET_REG_BITS == 64
117
152
- { INDEX_op_ld8u_i64, { R, R } },
118
static bool fold_bswap(OptContext *ctx, TCGOp *op)
153
- { INDEX_op_ld8s_i64, { R, R } },
119
{
154
- { INDEX_op_ld16u_i64, { R, R } },
120
+ uint64_t z_mask, sign;
155
- { INDEX_op_ld16s_i64, { R, R } },
121
+
156
- { INDEX_op_ld32u_i64, { R, R } },
122
if (arg_is_const(op->args[1])) {
157
- { INDEX_op_ld32s_i64, { R, R } },
123
uint64_t t = arg_info(op->args[1])->val;
158
- { INDEX_op_ld_i64, { R, R } },
124
159
-
125
t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
160
- { INDEX_op_st8_i64, { R, R } },
126
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
161
- { INDEX_op_st16_i64, { R, R } },
127
}
162
- { INDEX_op_st32_i64, { R, R } },
128
- return false;
163
- { INDEX_op_st_i64, { R, R } },
129
+
164
-
130
+ z_mask = arg_info(op->args[1])->z_mask;
165
- { INDEX_op_add_i64, { R, RI, RI } },
131
+ switch (op->opc) {
166
- { INDEX_op_sub_i64, { R, RI, RI } },
167
- { INDEX_op_mul_i64, { R, RI, RI } },
168
-#if TCG_TARGET_HAS_div_i64
169
- { INDEX_op_div_i64, { R, R, R } },
170
- { INDEX_op_divu_i64, { R, R, R } },
171
- { INDEX_op_rem_i64, { R, R, R } },
172
- { INDEX_op_remu_i64, { R, R, R } },
173
-#elif TCG_TARGET_HAS_div2_i64
174
- { INDEX_op_div2_i64, { R, R, "0", "1", R } },
175
- { INDEX_op_divu2_i64, { R, R, "0", "1", R } },
176
-#endif
177
- { INDEX_op_and_i64, { R, RI, RI } },
178
-#if TCG_TARGET_HAS_andc_i64
179
- { INDEX_op_andc_i64, { R, RI, RI } },
180
-#endif
181
-#if TCG_TARGET_HAS_eqv_i64
182
- { INDEX_op_eqv_i64, { R, RI, RI } },
183
-#endif
184
-#if TCG_TARGET_HAS_nand_i64
185
- { INDEX_op_nand_i64, { R, RI, RI } },
186
-#endif
187
-#if TCG_TARGET_HAS_nor_i64
188
- { INDEX_op_nor_i64, { R, RI, RI } },
189
-#endif
190
- { INDEX_op_or_i64, { R, RI, RI } },
191
-#if TCG_TARGET_HAS_orc_i64
192
- { INDEX_op_orc_i64, { R, RI, RI } },
193
-#endif
194
- { INDEX_op_xor_i64, { R, RI, RI } },
195
- { INDEX_op_shl_i64, { R, RI, RI } },
196
- { INDEX_op_shr_i64, { R, RI, RI } },
197
- { INDEX_op_sar_i64, { R, RI, RI } },
198
-#if TCG_TARGET_HAS_rot_i64
199
- { INDEX_op_rotl_i64, { R, RI, RI } },
200
- { INDEX_op_rotr_i64, { R, RI, RI } },
201
-#endif
202
-#if TCG_TARGET_HAS_deposit_i64
203
- { INDEX_op_deposit_i64, { R, "0", R } },
204
-#endif
205
- { INDEX_op_brcond_i64, { R, RI } },
206
-
207
-#if TCG_TARGET_HAS_ext8s_i64
208
- { INDEX_op_ext8s_i64, { R, R } },
209
-#endif
210
-#if TCG_TARGET_HAS_ext16s_i64
211
- { INDEX_op_ext16s_i64, { R, R } },
212
-#endif
213
-#if TCG_TARGET_HAS_ext32s_i64
214
- { INDEX_op_ext32s_i64, { R, R } },
215
-#endif
216
-#if TCG_TARGET_HAS_ext8u_i64
217
- { INDEX_op_ext8u_i64, { R, R } },
218
-#endif
219
-#if TCG_TARGET_HAS_ext16u_i64
220
- { INDEX_op_ext16u_i64, { R, R } },
221
-#endif
222
-#if TCG_TARGET_HAS_ext32u_i64
223
- { INDEX_op_ext32u_i64, { R, R } },
224
-#endif
225
- { INDEX_op_ext_i32_i64, { R, R } },
226
- { INDEX_op_extu_i32_i64, { R, R } },
227
-#if TCG_TARGET_HAS_bswap16_i64
228
- { INDEX_op_bswap16_i64, { R, R } },
229
-#endif
230
-#if TCG_TARGET_HAS_bswap32_i64
231
- { INDEX_op_bswap32_i64, { R, R } },
232
-#endif
233
-#if TCG_TARGET_HAS_bswap64_i64
234
- { INDEX_op_bswap64_i64, { R, R } },
235
-#endif
236
-#if TCG_TARGET_HAS_not_i64
237
- { INDEX_op_not_i64, { R, R } },
238
-#endif
239
-#if TCG_TARGET_HAS_neg_i64
240
- { INDEX_op_neg_i64, { R, R } },
241
-#endif
242
-#endif /* TCG_TARGET_REG_BITS == 64 */
243
-
244
- { INDEX_op_qemu_ld_i32, { R, L } },
245
- { INDEX_op_qemu_ld_i64, { R64, L } },
246
-
247
- { INDEX_op_qemu_st_i32, { R, S } },
248
- { INDEX_op_qemu_st_i64, { R64, S } },
249
-
250
-#if TCG_TARGET_HAS_ext8s_i32
251
- { INDEX_op_ext8s_i32, { R, R } },
252
-#endif
253
-#if TCG_TARGET_HAS_ext16s_i32
254
- { INDEX_op_ext16s_i32, { R, R } },
255
-#endif
256
-#if TCG_TARGET_HAS_ext8u_i32
257
- { INDEX_op_ext8u_i32, { R, R } },
258
-#endif
259
-#if TCG_TARGET_HAS_ext16u_i32
260
- { INDEX_op_ext16u_i32, { R, R } },
261
-#endif
262
-
263
-#if TCG_TARGET_HAS_bswap16_i32
264
- { INDEX_op_bswap16_i32, { R, R } },
265
-#endif
266
-#if TCG_TARGET_HAS_bswap32_i32
267
- { INDEX_op_bswap32_i32, { R, R } },
268
-#endif
269
-
270
- { INDEX_op_mb, { } },
271
- { -1 },
272
-};
273
+/* Define all constraint sets. */
274
+#include "../tcg-constr.c.inc"
275
276
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
277
{
278
- int i, n = ARRAY_SIZE(tcg_target_op_defs);
279
+ switch (op) {
280
+ case INDEX_op_ld8u_i32:
281
+ case INDEX_op_ld8s_i32:
282
+ case INDEX_op_ld16u_i32:
283
+ case INDEX_op_ld16s_i32:
284
+ case INDEX_op_ld_i32:
285
+ case INDEX_op_ld8u_i64:
286
+ case INDEX_op_ld8s_i64:
287
+ case INDEX_op_ld16u_i64:
288
+ case INDEX_op_ld16s_i64:
289
+ case INDEX_op_ld32u_i64:
290
+ case INDEX_op_ld32s_i64:
291
+ case INDEX_op_ld_i64:
292
+ case INDEX_op_not_i32:
293
+ case INDEX_op_not_i64:
294
+ case INDEX_op_neg_i32:
295
+ case INDEX_op_neg_i64:
296
+ case INDEX_op_ext8s_i32:
297
+ case INDEX_op_ext8s_i64:
298
+ case INDEX_op_ext16s_i32:
299
+ case INDEX_op_ext16s_i64:
300
+ case INDEX_op_ext8u_i32:
301
+ case INDEX_op_ext8u_i64:
302
+ case INDEX_op_ext16u_i32:
303
+ case INDEX_op_ext16u_i64:
304
+ case INDEX_op_ext32s_i64:
305
+ case INDEX_op_ext32u_i64:
306
+ case INDEX_op_ext_i32_i64:
307
+ case INDEX_op_extu_i32_i64:
308
+ case INDEX_op_bswap16_i32:
132
+ case INDEX_op_bswap16_i32:
309
+ case INDEX_op_bswap16_i64:
133
+ case INDEX_op_bswap16_i64:
134
+ z_mask = bswap16(z_mask);
135
+ sign = INT16_MIN;
136
+ break;
310
+ case INDEX_op_bswap32_i32:
137
+ case INDEX_op_bswap32_i32:
311
+ case INDEX_op_bswap32_i64:
138
+ case INDEX_op_bswap32_i64:
139
+ z_mask = bswap32(z_mask);
140
+ sign = INT32_MIN;
141
+ break;
312
+ case INDEX_op_bswap64_i64:
142
+ case INDEX_op_bswap64_i64:
313
+ return C_O1_I1(r, r);
143
+ z_mask = bswap64(z_mask);
314
144
+ sign = INT64_MIN;
315
- for (i = 0; i < n; ++i) {
145
+ break;
316
- if (tcg_target_op_defs[i].op == op) {
146
+ default:
317
- return &tcg_target_op_defs[i];
147
+ g_assert_not_reached();
148
+ }
149
+
150
+ switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
151
+ case TCG_BSWAP_OZ:
152
+ break;
153
+ case TCG_BSWAP_OS:
154
+ /* If the sign bit may be 1, force all the bits above to 1. */
155
+ if (z_mask & sign) {
156
+ z_mask |= sign;
157
+ }
158
+ break;
159
+ default:
160
+ /* The high bits are undefined: force all bits above the sign to 1. */
161
+ z_mask |= sign << 1;
162
+ break;
163
+ }
164
+ ctx->z_mask = z_mask;
165
+
166
+ return fold_masks(ctx, op);
167
}
168
169
static bool fold_call(OptContext *ctx, TCGOp *op)
170
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
171
172
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
173
{
174
+ uint64_t z_mask;
175
+
176
if (arg_is_const(op->args[1])) {
177
uint64_t t = arg_info(op->args[1])->val;
178
179
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
180
}
181
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
182
}
183
+
184
+ switch (ctx->type) {
185
+ case TCG_TYPE_I32:
186
+ z_mask = 31;
187
+ break;
188
+ case TCG_TYPE_I64:
189
+ z_mask = 63;
190
+ break;
191
+ default:
192
+ g_assert_not_reached();
193
+ }
194
+ ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
195
+
196
return false;
197
}
198
199
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
200
{
201
- return fold_const1(ctx, op);
202
+ if (fold_const1(ctx, op)) {
203
+ return true;
204
+ }
205
+
206
+ switch (ctx->type) {
207
+ case TCG_TYPE_I32:
208
+ ctx->z_mask = 32 | 31;
209
+ break;
210
+ case TCG_TYPE_I64:
211
+ ctx->z_mask = 64 | 63;
212
+ break;
213
+ default:
214
+ g_assert_not_reached();
215
+ }
216
+ return false;
217
}
218
219
static bool fold_deposit(OptContext *ctx, TCGOp *op)
220
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
221
t1 = deposit64(t1, op->args[3], op->args[4], t2);
222
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
223
}
224
+
225
+ ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
226
+ op->args[3], op->args[4],
227
+ arg_info(op->args[2])->z_mask);
228
return false;
229
}
230
231
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
232
233
static bool fold_extract(OptContext *ctx, TCGOp *op)
234
{
235
+ uint64_t z_mask_old, z_mask;
236
+
237
if (arg_is_const(op->args[1])) {
238
uint64_t t;
239
240
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
241
t = extract64(t, op->args[2], op->args[3]);
242
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
243
}
244
- return false;
245
+
246
+ z_mask_old = arg_info(op->args[1])->z_mask;
247
+ z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
248
+ if (op->args[2] == 0) {
249
+ ctx->a_mask = z_mask_old ^ z_mask;
250
+ }
251
+ ctx->z_mask = z_mask;
252
+
253
+ return fold_masks(ctx, op);
254
}
255
256
static bool fold_extract2(OptContext *ctx, TCGOp *op)
257
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
258
259
static bool fold_exts(OptContext *ctx, TCGOp *op)
260
{
261
- return fold_const1(ctx, op);
262
+ uint64_t z_mask_old, z_mask, sign;
263
+ bool type_change = false;
264
+
265
+ if (fold_const1(ctx, op)) {
266
+ return true;
267
+ }
268
+
269
+ z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
270
+
271
+ switch (op->opc) {
272
+ CASE_OP_32_64(ext8s):
273
+ sign = INT8_MIN;
274
+ z_mask = (uint8_t)z_mask;
275
+ break;
276
+ CASE_OP_32_64(ext16s):
277
+ sign = INT16_MIN;
278
+ z_mask = (uint16_t)z_mask;
279
+ break;
280
+ case INDEX_op_ext_i32_i64:
281
+ type_change = true;
282
+ QEMU_FALLTHROUGH;
283
+ case INDEX_op_ext32s_i64:
284
+ sign = INT32_MIN;
285
+ z_mask = (uint32_t)z_mask;
286
+ break;
287
+ default:
288
+ g_assert_not_reached();
289
+ }
290
+
291
+ if (z_mask & sign) {
292
+ z_mask |= sign;
293
+ } else if (!type_change) {
294
+ ctx->a_mask = z_mask_old ^ z_mask;
295
+ }
296
+ ctx->z_mask = z_mask;
297
+
298
+ return fold_masks(ctx, op);
299
}
300
301
static bool fold_extu(OptContext *ctx, TCGOp *op)
302
{
303
- return fold_const1(ctx, op);
304
+ uint64_t z_mask_old, z_mask;
305
+ bool type_change = false;
306
+
307
+ if (fold_const1(ctx, op)) {
308
+ return true;
309
+ }
310
+
311
+ z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
312
+
313
+ switch (op->opc) {
314
+ CASE_OP_32_64(ext8u):
315
+ z_mask = (uint8_t)z_mask;
316
+ break;
317
+ CASE_OP_32_64(ext16u):
318
+ z_mask = (uint16_t)z_mask;
319
+ break;
320
+ case INDEX_op_extrl_i64_i32:
321
+ case INDEX_op_extu_i32_i64:
322
+ type_change = true;
323
+ QEMU_FALLTHROUGH;
324
+ case INDEX_op_ext32u_i64:
325
+ z_mask = (uint32_t)z_mask;
326
+ break;
327
+ case INDEX_op_extrh_i64_i32:
328
+ type_change = true;
329
+ z_mask >>= 32;
330
+ break;
331
+ default:
332
+ g_assert_not_reached();
333
+ }
334
+
335
+ ctx->z_mask = z_mask;
336
+ if (!type_change) {
337
+ ctx->a_mask = z_mask_old ^ z_mask;
338
+ }
339
+ return fold_masks(ctx, op);
340
}
341
342
static bool fold_mb(OptContext *ctx, TCGOp *op)
343
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
344
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
345
}
346
347
+ ctx->z_mask = arg_info(op->args[3])->z_mask
348
+ | arg_info(op->args[4])->z_mask;
349
+
350
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
351
uint64_t tv = arg_info(op->args[3])->val;
352
uint64_t fv = arg_info(op->args[4])->val;
353
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
354
355
static bool fold_neg(OptContext *ctx, TCGOp *op)
356
{
357
+ uint64_t z_mask;
358
+
359
if (fold_const1(ctx, op)) {
360
return true;
361
}
362
+
363
+ /* Set to 1 all bits to the left of the rightmost. */
364
+ z_mask = arg_info(op->args[1])->z_mask;
365
+ ctx->z_mask = -(z_mask & -z_mask);
366
+
367
/*
368
* Because of fold_sub_to_neg, we want to always return true,
369
* via finish_folding.
370
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
371
fold_xx_to_x(ctx, op)) {
372
return true;
373
}
374
- return false;
375
+
376
+ ctx->z_mask = arg_info(op->args[1])->z_mask
377
+ | arg_info(op->args[2])->z_mask;
378
+ return fold_masks(ctx, op);
379
}
380
381
static bool fold_orc(OptContext *ctx, TCGOp *op)
382
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
383
384
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
385
{
386
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
387
+ MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
388
+ MemOp mop = get_memop(oi);
389
+ int width = 8 * memop_size(mop);
390
+
391
+ if (!(mop & MO_SIGN) && width < 64) {
392
+ ctx->z_mask = MAKE_64BIT_MASK(0, width);
393
+ }
394
+
395
/* Opcodes that touch guest memory stop the mb optimization. */
396
ctx->prev_mb = NULL;
397
return false;
398
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
399
if (i >= 0) {
400
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
401
}
402
+
403
+ ctx->z_mask = 1;
404
return false;
405
}
406
407
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
408
op->opc = INDEX_op_setcond_i32;
409
break;
410
}
411
+
412
+ ctx->z_mask = 1;
413
return false;
414
415
do_setcond_const:
416
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
417
418
static bool fold_sextract(OptContext *ctx, TCGOp *op)
419
{
420
+ int64_t z_mask_old, z_mask;
421
+
422
if (arg_is_const(op->args[1])) {
423
uint64_t t;
424
425
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
426
t = sextract64(t, op->args[2], op->args[3]);
427
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
428
}
429
- return false;
430
+
431
+ z_mask_old = arg_info(op->args[1])->z_mask;
432
+ z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
433
+ if (op->args[2] == 0 && z_mask >= 0) {
434
+ ctx->a_mask = z_mask_old ^ z_mask;
435
+ }
436
+ ctx->z_mask = z_mask;
437
+
438
+ return fold_masks(ctx, op);
439
}
440
441
static bool fold_shift(OptContext *ctx, TCGOp *op)
442
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
443
fold_xi_to_x(ctx, op, 0)) {
444
return true;
445
}
446
+
447
+ if (arg_is_const(op->args[2])) {
448
+ ctx->z_mask = do_constant_folding(op->opc, ctx->type,
449
+ arg_info(op->args[1])->z_mask,
450
+ arg_info(op->args[2])->val);
451
+ return fold_masks(ctx, op);
452
+ }
453
return false;
454
}
455
456
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
457
return fold_addsub2_i32(ctx, op, false);
458
}
459
460
+static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
461
+{
462
+ /* We can't do any folding with a load, but we can record bits. */
463
+ switch (op->opc) {
464
+ CASE_OP_32_64(ld8u):
465
+ ctx->z_mask = MAKE_64BIT_MASK(0, 8);
466
+ break;
467
+ CASE_OP_32_64(ld16u):
468
+ ctx->z_mask = MAKE_64BIT_MASK(0, 16);
469
+ break;
470
+ case INDEX_op_ld32u_i64:
471
+ ctx->z_mask = MAKE_64BIT_MASK(0, 32);
472
+ break;
473
+ default:
474
+ g_assert_not_reached();
475
+ }
476
+ return false;
477
+}
478
+
479
static bool fold_xor(OptContext *ctx, TCGOp *op)
480
{
481
if (fold_const2(ctx, op) ||
482
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
483
fold_xi_to_not(ctx, op, -1)) {
484
return true;
485
}
486
- return false;
487
+
488
+ ctx->z_mask = arg_info(op->args[1])->z_mask
489
+ | arg_info(op->args[2])->z_mask;
490
+ return fold_masks(ctx, op);
491
}
492
493
/* Propagate constants and copies, fold constant expressions. */
494
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
495
}
496
497
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
498
- uint64_t z_mask, partmask, affected, tmp;
499
TCGOpcode opc = op->opc;
500
const TCGOpDef *def;
501
bool done = false;
502
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
503
break;
504
}
505
506
- /* Simplify using known-zero bits. Currently only ops with a single
507
- output argument is supported. */
508
- z_mask = -1;
509
- affected = -1;
510
- switch (opc) {
511
- CASE_OP_32_64(ext8s):
512
- if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
513
- break;
514
- }
515
- QEMU_FALLTHROUGH;
516
- CASE_OP_32_64(ext8u):
517
- z_mask = 0xff;
518
- goto and_const;
519
- CASE_OP_32_64(ext16s):
520
- if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
521
- break;
522
- }
523
- QEMU_FALLTHROUGH;
524
- CASE_OP_32_64(ext16u):
525
- z_mask = 0xffff;
526
- goto and_const;
527
- case INDEX_op_ext32s_i64:
528
- if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
529
- break;
530
- }
531
- QEMU_FALLTHROUGH;
532
- case INDEX_op_ext32u_i64:
533
- z_mask = 0xffffffffU;
534
- goto and_const;
535
-
536
- CASE_OP_32_64(and):
537
- z_mask = arg_info(op->args[2])->z_mask;
538
- if (arg_is_const(op->args[2])) {
539
- and_const:
540
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
541
- }
542
- z_mask = arg_info(op->args[1])->z_mask & z_mask;
543
- break;
544
-
545
- case INDEX_op_ext_i32_i64:
546
- if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
547
- break;
548
- }
549
- QEMU_FALLTHROUGH;
550
- case INDEX_op_extu_i32_i64:
551
- /* We do not compute affected as it is a size changing op. */
552
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
553
- break;
554
-
555
- CASE_OP_32_64(andc):
556
- /* Known-zeros does not imply known-ones. Therefore unless
557
- op->args[2] is constant, we can't infer anything from it. */
558
- if (arg_is_const(op->args[2])) {
559
- z_mask = ~arg_info(op->args[2])->z_mask;
560
- goto and_const;
561
- }
562
- /* But we certainly know nothing outside args[1] may be set. */
563
- z_mask = arg_info(op->args[1])->z_mask;
564
- break;
565
-
566
- case INDEX_op_sar_i32:
567
- if (arg_is_const(op->args[2])) {
568
- tmp = arg_info(op->args[2])->val & 31;
569
- z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
570
- }
571
- break;
572
- case INDEX_op_sar_i64:
573
- if (arg_is_const(op->args[2])) {
574
- tmp = arg_info(op->args[2])->val & 63;
575
- z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
576
- }
577
- break;
578
-
579
- case INDEX_op_shr_i32:
580
- if (arg_is_const(op->args[2])) {
581
- tmp = arg_info(op->args[2])->val & 31;
582
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
583
- }
584
- break;
585
- case INDEX_op_shr_i64:
586
- if (arg_is_const(op->args[2])) {
587
- tmp = arg_info(op->args[2])->val & 63;
588
- z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
589
- }
590
- break;
591
-
592
- case INDEX_op_extrl_i64_i32:
593
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
594
- break;
595
- case INDEX_op_extrh_i64_i32:
596
- z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
597
- break;
598
-
599
- CASE_OP_32_64(shl):
600
- if (arg_is_const(op->args[2])) {
601
- tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
602
- z_mask = arg_info(op->args[1])->z_mask << tmp;
603
- }
604
- break;
605
-
606
- CASE_OP_32_64(neg):
607
- /* Set to 1 all bits to the left of the rightmost. */
608
- z_mask = -(arg_info(op->args[1])->z_mask
609
- & -arg_info(op->args[1])->z_mask);
610
- break;
611
-
612
- CASE_OP_32_64(deposit):
613
- z_mask = deposit64(arg_info(op->args[1])->z_mask,
614
- op->args[3], op->args[4],
615
- arg_info(op->args[2])->z_mask);
616
- break;
617
-
618
- CASE_OP_32_64(extract):
619
- z_mask = extract64(arg_info(op->args[1])->z_mask,
620
- op->args[2], op->args[3]);
621
- if (op->args[2] == 0) {
622
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
623
- }
624
- break;
625
- CASE_OP_32_64(sextract):
626
- z_mask = sextract64(arg_info(op->args[1])->z_mask,
627
- op->args[2], op->args[3]);
628
- if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
629
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
630
- }
631
- break;
632
-
633
- CASE_OP_32_64(or):
634
- CASE_OP_32_64(xor):
635
- z_mask = arg_info(op->args[1])->z_mask
636
- | arg_info(op->args[2])->z_mask;
637
- break;
638
-
639
- case INDEX_op_clz_i32:
640
- case INDEX_op_ctz_i32:
641
- z_mask = arg_info(op->args[2])->z_mask | 31;
642
- break;
643
-
644
- case INDEX_op_clz_i64:
645
- case INDEX_op_ctz_i64:
646
- z_mask = arg_info(op->args[2])->z_mask | 63;
647
- break;
648
-
649
- case INDEX_op_ctpop_i32:
650
- z_mask = 32 | 31;
651
- break;
652
- case INDEX_op_ctpop_i64:
653
- z_mask = 64 | 63;
654
- break;
655
-
656
- CASE_OP_32_64(setcond):
657
- case INDEX_op_setcond2_i32:
658
- z_mask = 1;
659
- break;
660
-
661
- CASE_OP_32_64(movcond):
662
- z_mask = arg_info(op->args[3])->z_mask
663
- | arg_info(op->args[4])->z_mask;
664
- break;
665
-
666
- CASE_OP_32_64(ld8u):
667
- z_mask = 0xff;
668
- break;
669
- CASE_OP_32_64(ld16u):
670
- z_mask = 0xffff;
671
- break;
672
- case INDEX_op_ld32u_i64:
673
- z_mask = 0xffffffffu;
674
- break;
675
-
676
- CASE_OP_32_64(qemu_ld):
677
- {
678
- MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
679
- MemOp mop = get_memop(oi);
680
- if (!(mop & MO_SIGN)) {
681
- z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
682
- }
683
- }
684
- break;
685
-
686
- CASE_OP_32_64(bswap16):
687
- z_mask = arg_info(op->args[1])->z_mask;
688
- if (z_mask <= 0xffff) {
689
- op->args[2] |= TCG_BSWAP_IZ;
690
- }
691
- z_mask = bswap16(z_mask);
692
- switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
693
- case TCG_BSWAP_OZ:
694
- break;
695
- case TCG_BSWAP_OS:
696
- z_mask = (int16_t)z_mask;
697
- break;
698
- default: /* undefined high bits */
699
- z_mask |= MAKE_64BIT_MASK(16, 48);
700
- break;
701
- }
702
- break;
703
-
704
- case INDEX_op_bswap32_i64:
705
- z_mask = arg_info(op->args[1])->z_mask;
706
- if (z_mask <= 0xffffffffu) {
707
- op->args[2] |= TCG_BSWAP_IZ;
708
- }
709
- z_mask = bswap32(z_mask);
710
- switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
711
- case TCG_BSWAP_OZ:
712
- break;
713
- case TCG_BSWAP_OS:
714
- z_mask = (int32_t)z_mask;
715
- break;
716
- default: /* undefined high bits */
717
- z_mask |= MAKE_64BIT_MASK(32, 32);
718
- break;
719
- }
720
- break;
721
-
722
- default:
723
- break;
318
- }
724
- }
319
+ case INDEX_op_st8_i32:
725
-
320
+ case INDEX_op_st16_i32:
726
- /* 32-bit ops generate 32-bit results. For the result is zero test
321
+ case INDEX_op_st_i32:
727
- below, we can ignore high bits, but for further optimizations we
322
+ case INDEX_op_st8_i64:
728
- need to record that the high bits contain garbage. */
323
+ case INDEX_op_st16_i64:
729
- partmask = z_mask;
324
+ case INDEX_op_st32_i64:
730
- if (ctx.type == TCG_TYPE_I32) {
325
+ case INDEX_op_st_i64:
731
- z_mask |= ~(tcg_target_ulong)0xffffffffu;
326
+ return C_O0_I2(r, r);
732
- partmask &= 0xffffffffu;
327
+
733
- affected &= 0xffffffffu;
328
+ case INDEX_op_div_i32:
734
- }
329
+ case INDEX_op_div_i64:
735
- ctx.z_mask = z_mask;
330
+ case INDEX_op_divu_i32:
736
-
331
+ case INDEX_op_divu_i64:
737
- if (partmask == 0) {
332
+ case INDEX_op_rem_i32:
738
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
333
+ case INDEX_op_rem_i64:
739
- continue;
334
+ case INDEX_op_remu_i32:
740
- }
335
+ case INDEX_op_remu_i64:
741
- if (affected == 0) {
336
+ return C_O1_I2(r, r, r);
742
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
337
+
743
- continue;
338
+ case INDEX_op_add_i32:
744
- }
339
+ case INDEX_op_add_i64:
745
+ /* Assume all bits affected, and no bits known zero. */
340
+ case INDEX_op_sub_i32:
746
+ ctx.a_mask = -1;
341
+ case INDEX_op_sub_i64:
747
+ ctx.z_mask = -1;
342
+ case INDEX_op_mul_i32:
748
343
+ case INDEX_op_mul_i64:
749
/*
344
+ case INDEX_op_and_i32:
750
* Process each opcode.
345
+ case INDEX_op_and_i64:
751
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
346
+ case INDEX_op_andc_i32:
752
case INDEX_op_extrh_i64_i32:
347
+ case INDEX_op_andc_i64:
753
done = fold_extu(&ctx, op);
348
+ case INDEX_op_eqv_i32:
754
break;
349
+ case INDEX_op_eqv_i64:
755
+ CASE_OP_32_64(ld8u):
350
+ case INDEX_op_nand_i32:
756
+ CASE_OP_32_64(ld16u):
351
+ case INDEX_op_nand_i64:
757
+ case INDEX_op_ld32u_i64:
352
+ case INDEX_op_nor_i32:
758
+ done = fold_tcg_ld(&ctx, op);
353
+ case INDEX_op_nor_i64:
759
+ break;
354
+ case INDEX_op_or_i32:
760
case INDEX_op_mb:
355
+ case INDEX_op_or_i64:
761
done = fold_mb(&ctx, op);
356
+ case INDEX_op_orc_i32:
762
break;
357
+ case INDEX_op_orc_i64:
358
+ case INDEX_op_xor_i32:
359
+ case INDEX_op_xor_i64:
360
+ case INDEX_op_shl_i32:
361
+ case INDEX_op_shl_i64:
362
+ case INDEX_op_shr_i32:
363
+ case INDEX_op_shr_i64:
364
+ case INDEX_op_sar_i32:
365
+ case INDEX_op_sar_i64:
366
+ case INDEX_op_rotl_i32:
367
+ case INDEX_op_rotl_i64:
368
+ case INDEX_op_rotr_i32:
369
+ case INDEX_op_rotr_i64:
370
+ /* TODO: Does R, RI, RI result in faster code than R, R, RI? */
371
+ return C_O1_I2(r, ri, ri);
372
+
373
+ case INDEX_op_deposit_i32:
374
+ case INDEX_op_deposit_i64:
375
+ return C_O1_I2(r, 0, r);
376
+
377
+ case INDEX_op_brcond_i32:
378
+ case INDEX_op_brcond_i64:
379
+ return C_O0_I2(r, ri);
380
+
381
+ case INDEX_op_setcond_i32:
382
+ case INDEX_op_setcond_i64:
383
+ return C_O1_I2(r, r, ri);
384
+
385
+#if TCG_TARGET_REG_BITS == 32
386
+ /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */
387
+ case INDEX_op_add2_i32:
388
+ case INDEX_op_sub2_i32:
389
+ return C_O2_I4(r, r, r, r, r, r);
390
+ case INDEX_op_brcond2_i32:
391
+ return C_O0_I4(r, r, ri, ri);
392
+ case INDEX_op_mulu2_i32:
393
+ return C_O2_I2(r, r, r, r);
394
+ case INDEX_op_setcond2_i32
395
+ return C_O1_I4(r, r, r, ri, ri);
396
+#endif
397
+
398
+ case INDEX_op_qemu_ld_i32:
399
+ return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
400
+ ? C_O1_I1(r, L)
401
+ : C_O1_I2(r, L, L));
402
+ case INDEX_op_qemu_ld_i64:
403
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
404
+ : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, L)
405
+ : C_O2_I2(r, r, L, L));
406
+ case INDEX_op_qemu_st_i32:
407
+ return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
408
+ ? C_O0_I2(r, S)
409
+ : C_O0_I3(r, S, S));
410
+ case INDEX_op_qemu_st_i64:
411
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, S)
412
+ : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(r, r, S)
413
+ : C_O0_I4(r, r, S, S));
414
+
415
+ default:
416
+ return NULL;
417
}
418
- return NULL;
419
}
420
421
static const int tcg_target_reg_alloc_order[] = {
422
--
763
--
423
2.25.1
764
2.25.1
424
765
425
766
diff view generated by jsdifflib
1
The temp_fixed, temp_global, temp_local bits are all related.
1
Rename to fold_multiply2, and handle muls2_i32, mulu2_i64,
2
Combine them into a single enumeration.
2
and muls2_i64.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
include/tcg/tcg.h | 20 +++++---
8
tcg/optimize.c | 44 +++++++++++++++++++++++++++++++++++---------
8
tcg/optimize.c | 8 +--
9
1 file changed, 35 insertions(+), 9 deletions(-)
9
tcg/tcg.c | 122 ++++++++++++++++++++++++++++------------------
10
3 files changed, 90 insertions(+), 60 deletions(-)
11
10
12
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg.h
15
+++ b/include/tcg/tcg.h
16
@@ -XXX,XX +XXX,XX @@ typedef enum TCGTempVal {
17
TEMP_VAL_CONST,
18
} TCGTempVal;
19
20
+typedef enum TCGTempKind {
21
+ /* Temp is dead at the end of all basic blocks. */
22
+ TEMP_NORMAL,
23
+ /* Temp is saved across basic blocks but dead at the end of TBs. */
24
+ TEMP_LOCAL,
25
+ /* Temp is saved across both basic blocks and translation blocks. */
26
+ TEMP_GLOBAL,
27
+ /* Temp is in a fixed register. */
28
+ TEMP_FIXED,
29
+} TCGTempKind;
30
+
31
typedef struct TCGTemp {
32
TCGReg reg:8;
33
TCGTempVal val_type:8;
34
TCGType base_type:8;
35
TCGType type:8;
36
- unsigned int fixed_reg:1;
37
+ TCGTempKind kind:3;
38
unsigned int indirect_reg:1;
39
unsigned int indirect_base:1;
40
unsigned int mem_coherent:1;
41
unsigned int mem_allocated:1;
42
- /* If true, the temp is saved across both basic blocks and
43
- translation blocks. */
44
- unsigned int temp_global:1;
45
- /* If true, the temp is saved across basic blocks but dead
46
- at the end of translation blocks. If false, the temp is
47
- dead at the end of basic blocks. */
48
- unsigned int temp_local:1;
49
unsigned int temp_allocated:1;
50
51
tcg_target_long val;
52
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
53
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
54
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
55
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
56
@@ -XXX,XX +XXX,XX @@ static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
15
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
57
TCGTemp *i;
16
return false;
58
59
/* If this is already a global, we can't do better. */
60
- if (ts->temp_global) {
61
+ if (ts->kind >= TEMP_GLOBAL) {
62
return ts;
63
}
64
65
/* Search for a global first. */
66
for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
67
- if (i->temp_global) {
68
+ if (i->kind >= TEMP_GLOBAL) {
69
return i;
70
}
71
}
72
73
/* If it is a temp, search for a temp local. */
74
- if (!ts->temp_local) {
75
+ if (ts->kind == TEMP_NORMAL) {
76
for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
77
- if (ts->temp_local) {
78
+ if (i->kind >= TEMP_LOCAL) {
79
return i;
80
}
81
}
82
diff --git a/tcg/tcg.c b/tcg/tcg.c
83
index XXXXXXX..XXXXXXX 100644
84
--- a/tcg/tcg.c
85
+++ b/tcg/tcg.c
86
@@ -XXX,XX +XXX,XX @@ static inline TCGTemp *tcg_global_alloc(TCGContext *s)
87
tcg_debug_assert(s->nb_globals == s->nb_temps);
88
s->nb_globals++;
89
ts = tcg_temp_alloc(s);
90
- ts->temp_global = 1;
91
+ ts->kind = TEMP_GLOBAL;
92
93
return ts;
94
}
17
}
95
@@ -XXX,XX +XXX,XX @@ static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
18
96
ts = tcg_global_alloc(s);
19
-static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
97
ts->base_type = type;
20
+static bool fold_multiply2(OptContext *ctx, TCGOp *op)
98
ts->type = type;
99
- ts->fixed_reg = 1;
100
+ ts->kind = TEMP_FIXED;
101
ts->reg = reg;
102
ts->name = name;
103
tcg_regset_set_reg(s->reserved_regs, reg);
104
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
105
bigendian = 1;
106
#endif
107
108
- if (!base_ts->fixed_reg) {
109
+ if (base_ts->kind != TEMP_FIXED) {
110
/* We do not support double-indirect registers. */
111
tcg_debug_assert(!base_ts->indirect_reg);
112
base_ts->indirect_base = 1;
113
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
114
TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
115
{
21
{
116
TCGContext *s = tcg_ctx;
22
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
117
+ TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
23
- uint32_t a = arg_info(op->args[2])->val;
118
TCGTemp *ts;
24
- uint32_t b = arg_info(op->args[3])->val;
119
int idx, k;
25
- uint64_t r = (uint64_t)a * b;
120
26
+ uint64_t a = arg_info(op->args[2])->val;
121
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
27
+ uint64_t b = arg_info(op->args[3])->val;
122
ts = &s->temps[idx];
28
+ uint64_t h, l;
123
ts->temp_allocated = 1;
29
TCGArg rl, rh;
124
tcg_debug_assert(ts->base_type == type);
30
- TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
125
- tcg_debug_assert(ts->temp_local == temp_local);
31
+ TCGOp *op2;
126
+ tcg_debug_assert(ts->kind == kind);
127
} else {
128
ts = tcg_temp_alloc(s);
129
if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
130
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
131
ts->base_type = type;
132
ts->type = TCG_TYPE_I32;
133
ts->temp_allocated = 1;
134
- ts->temp_local = temp_local;
135
+ ts->kind = kind;
136
137
tcg_debug_assert(ts2 == ts + 1);
138
ts2->base_type = TCG_TYPE_I64;
139
ts2->type = TCG_TYPE_I32;
140
ts2->temp_allocated = 1;
141
- ts2->temp_local = temp_local;
142
+ ts2->kind = kind;
143
} else {
144
ts->base_type = type;
145
ts->type = type;
146
ts->temp_allocated = 1;
147
- ts->temp_local = temp_local;
148
+ ts->kind = kind;
149
}
150
}
151
152
@@ -XXX,XX +XXX,XX @@ void tcg_temp_free_internal(TCGTemp *ts)
153
}
154
#endif
155
156
- tcg_debug_assert(ts->temp_global == 0);
157
+ tcg_debug_assert(ts->kind < TEMP_GLOBAL);
158
tcg_debug_assert(ts->temp_allocated != 0);
159
ts->temp_allocated = 0;
160
161
idx = temp_idx(ts);
162
- k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
163
+ k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
164
set_bit(idx, s->free_temps[k].l);
165
}
166
167
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
168
static void tcg_reg_alloc_start(TCGContext *s)
169
{
170
int i, n;
171
- TCGTemp *ts;
172
173
- for (i = 0, n = s->nb_globals; i < n; i++) {
174
- ts = &s->temps[i];
175
- ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
176
- }
177
- for (n = s->nb_temps; i < n; i++) {
178
- ts = &s->temps[i];
179
- ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
180
- ts->mem_allocated = 0;
181
- ts->fixed_reg = 0;
182
+ for (i = 0, n = s->nb_temps; i < n; i++) {
183
+ TCGTemp *ts = &s->temps[i];
184
+ TCGTempVal val = TEMP_VAL_MEM;
185
+
32
+
186
+ switch (ts->kind) {
33
+ switch (op->opc) {
187
+ case TEMP_FIXED:
34
+ case INDEX_op_mulu2_i32:
188
+ val = TEMP_VAL_REG;
35
+ l = (uint64_t)(uint32_t)a * (uint32_t)b;
36
+ h = (int32_t)(l >> 32);
37
+ l = (int32_t)l;
189
+ break;
38
+ break;
190
+ case TEMP_GLOBAL:
39
+ case INDEX_op_muls2_i32:
40
+ l = (int64_t)(int32_t)a * (int32_t)b;
41
+ h = l >> 32;
42
+ l = (int32_t)l;
191
+ break;
43
+ break;
192
+ case TEMP_NORMAL:
44
+ case INDEX_op_mulu2_i64:
193
+ val = TEMP_VAL_DEAD;
45
+ mulu64(&l, &h, a, b);
194
+ /* fall through */
46
+ break;
195
+ case TEMP_LOCAL:
47
+ case INDEX_op_muls2_i64:
196
+ ts->mem_allocated = 0;
48
+ muls64(&l, &h, a, b);
197
+ break;
49
+ break;
198
+ default:
50
+ default:
199
+ g_assert_not_reached();
51
+ g_assert_not_reached();
200
+ }
52
+ }
201
+ ts->val_type = val;
53
54
rl = op->args[0];
55
rh = op->args[1];
56
- tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
57
- tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
58
+
59
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
60
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0);
61
+
62
+ tcg_opt_gen_movi(ctx, op, rl, l);
63
+ tcg_opt_gen_movi(ctx, op2, rh, h);
64
return true;
202
}
65
}
203
66
return false;
204
memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
67
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
205
@@ -XXX,XX +XXX,XX @@ static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
68
CASE_OP_32_64(muluh):
206
{
69
done = fold_mul_highpart(&ctx, op);
207
int idx = temp_idx(ts);
70
break;
208
71
- case INDEX_op_mulu2_i32:
209
- if (ts->temp_global) {
72
- done = fold_mulu2_i32(&ctx, op);
210
+ switch (ts->kind) {
73
+ CASE_OP_32_64(muls2):
211
+ case TEMP_FIXED:
74
+ CASE_OP_32_64(mulu2):
212
+ case TEMP_GLOBAL:
75
+ done = fold_multiply2(&ctx, op);
213
pstrcpy(buf, buf_size, ts->name);
76
break;
214
- } else if (ts->temp_local) {
77
CASE_OP_32_64(nand):
215
+ break;
78
done = fold_nand(&ctx, op);
216
+ case TEMP_LOCAL:
217
snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
218
- } else {
219
+ break;
220
+ case TEMP_NORMAL:
221
snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
222
+ break;
223
}
224
return buf;
225
}
226
@@ -XXX,XX +XXX,XX @@ static void la_bb_end(TCGContext *s, int ng, int nt)
227
{
228
int i;
229
230
- for (i = 0; i < ng; ++i) {
231
- s->temps[i].state = TS_DEAD | TS_MEM;
232
- la_reset_pref(&s->temps[i]);
233
- }
234
- for (i = ng; i < nt; ++i) {
235
- s->temps[i].state = (s->temps[i].temp_local
236
- ? TS_DEAD | TS_MEM
237
- : TS_DEAD);
238
- la_reset_pref(&s->temps[i]);
239
+ for (i = 0; i < nt; ++i) {
240
+ TCGTemp *ts = &s->temps[i];
241
+ int state;
242
+
243
+ switch (ts->kind) {
244
+ case TEMP_FIXED:
245
+ case TEMP_GLOBAL:
246
+ case TEMP_LOCAL:
247
+ state = TS_DEAD | TS_MEM;
248
+ break;
249
+ case TEMP_NORMAL:
250
+ state = TS_DEAD;
251
+ break;
252
+ default:
253
+ g_assert_not_reached();
254
+ }
255
+ ts->state = state;
256
+ la_reset_pref(ts);
257
}
258
}
259
260
@@ -XXX,XX +XXX,XX @@ static void check_regs(TCGContext *s)
261
}
262
for (k = 0; k < s->nb_temps; k++) {
263
ts = &s->temps[k];
264
- if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
265
+ if (ts->val_type == TEMP_VAL_REG
266
+ && ts->kind != TEMP_FIXED
267
&& s->reg_to_temp[ts->reg] != ts) {
268
printf("Inconsistency for temp %s:\n",
269
tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
270
@@ -XXX,XX +XXX,XX @@ static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
271
mark it free; otherwise mark it dead. */
272
static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
273
{
274
- if (ts->fixed_reg) {
275
+ if (ts->kind == TEMP_FIXED) {
276
return;
277
}
278
if (ts->val_type == TEMP_VAL_REG) {
279
s->reg_to_temp[ts->reg] = NULL;
280
}
281
ts->val_type = (free_or_dead < 0
282
- || ts->temp_local
283
- || ts->temp_global
284
+ || ts->kind != TEMP_NORMAL
285
? TEMP_VAL_MEM : TEMP_VAL_DEAD);
286
}
287
288
@@ -XXX,XX +XXX,XX @@ static inline void temp_dead(TCGContext *s, TCGTemp *ts)
289
static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
290
TCGRegSet preferred_regs, int free_or_dead)
291
{
292
- if (ts->fixed_reg) {
293
+ if (ts->kind == TEMP_FIXED) {
294
return;
295
}
296
if (!ts->mem_coherent) {
297
@@ -XXX,XX +XXX,XX @@ static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
298
{
299
/* The liveness analysis already ensures that globals are back
300
in memory. Keep an tcg_debug_assert for safety. */
301
- tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
302
+ tcg_debug_assert(ts->val_type == TEMP_VAL_MEM
303
+ || ts->kind == TEMP_FIXED);
304
}
305
306
/* save globals to their canonical location and assume they can be
307
@@ -XXX,XX +XXX,XX @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
308
for (i = 0, n = s->nb_globals; i < n; i++) {
309
TCGTemp *ts = &s->temps[i];
310
tcg_debug_assert(ts->val_type != TEMP_VAL_REG
311
- || ts->fixed_reg
312
+ || ts->kind == TEMP_FIXED
313
|| ts->mem_coherent);
314
}
315
}
316
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
317
318
for (i = s->nb_globals; i < s->nb_temps; i++) {
319
TCGTemp *ts = &s->temps[i];
320
- if (ts->temp_local) {
321
+ if (ts->kind == TEMP_LOCAL) {
322
temp_save(s, ts, allocated_regs);
323
} else {
324
/* The liveness analysis already ensures that temps are dead.
325
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
326
TCGRegSet preferred_regs)
327
{
328
/* ENV should not be modified. */
329
- tcg_debug_assert(!ots->fixed_reg);
330
+ tcg_debug_assert(ots->kind != TEMP_FIXED);
331
332
/* The movi is not explicitly generated here. */
333
if (ots->val_type == TEMP_VAL_REG) {
334
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
335
ts = arg_temp(op->args[1]);
336
337
/* ENV should not be modified. */
338
- tcg_debug_assert(!ots->fixed_reg);
339
+ tcg_debug_assert(ots->kind != TEMP_FIXED);
340
341
/* Note that otype != itype for no-op truncation. */
342
otype = ots->type;
343
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
344
}
345
temp_dead(s, ots);
346
} else {
347
- if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
348
+ if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
349
/* the mov can be suppressed */
350
if (ots->val_type == TEMP_VAL_REG) {
351
s->reg_to_temp[ots->reg] = NULL;
352
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
353
* Store the source register into the destination slot
354
* and leave the destination temp as TEMP_VAL_MEM.
355
*/
356
- assert(!ots->fixed_reg);
357
+ assert(ots->kind != TEMP_FIXED);
358
if (!ts->mem_allocated) {
359
temp_allocate_frame(s, ots);
360
}
361
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
362
its = arg_temp(op->args[1]);
363
364
/* ENV should not be modified. */
365
- tcg_debug_assert(!ots->fixed_reg);
366
+ tcg_debug_assert(ots->kind != TEMP_FIXED);
367
368
itype = its->type;
369
vece = TCGOP_VECE(op);
370
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
371
i_preferred_regs = o_preferred_regs = 0;
372
if (arg_ct->ialias) {
373
o_preferred_regs = op->output_pref[arg_ct->alias_index];
374
- if (ts->fixed_reg) {
375
+ if (ts->kind == TEMP_FIXED) {
376
/* if fixed register, we must allocate a new register
377
if the alias is not the same register */
378
if (arg != op->args[arg_ct->alias_index]) {
379
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
380
ts = arg_temp(arg);
381
382
/* ENV should not be modified. */
383
- tcg_debug_assert(!ts->fixed_reg);
384
+ tcg_debug_assert(ts->kind != TEMP_FIXED);
385
386
if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
387
reg = new_args[arg_ct->alias_index];
388
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
389
ts = arg_temp(op->args[i]);
390
391
/* ENV should not be modified. */
392
- tcg_debug_assert(!ts->fixed_reg);
393
+ tcg_debug_assert(ts->kind != TEMP_FIXED);
394
395
if (NEED_SYNC_ARG(i)) {
396
temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
397
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
398
ts = arg_temp(arg);
399
400
/* ENV should not be modified. */
401
- tcg_debug_assert(!ts->fixed_reg);
402
+ tcg_debug_assert(ts->kind != TEMP_FIXED);
403
404
reg = tcg_target_call_oarg_regs[i];
405
tcg_debug_assert(s->reg_to_temp[reg] == NULL);
406
--
79
--
407
2.25.1
80
2.25.1
408
81
409
82
diff view generated by jsdifflib
1
Fix this name vs our coding style.
1
Rename to fold_addsub2.
2
Use Int128 to implement the wider operation.
2
3
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
8
---
7
tcg/optimize.c | 32 ++++++++++++++++----------------
9
tcg/optimize.c | 65 ++++++++++++++++++++++++++++++++++----------------
8
1 file changed, 16 insertions(+), 16 deletions(-)
10
1 file changed, 44 insertions(+), 21 deletions(-)
9
11
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
14
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
15
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@
15
glue(glue(case INDEX_op_, x), _i64): \
17
*/
16
glue(glue(case INDEX_op_, x), _vec)
18
17
19
#include "qemu/osdep.h"
18
-struct tcg_temp_info {
20
+#include "qemu/int128.h"
19
+typedef struct TempOptInfo {
21
#include "tcg/tcg-op.h"
20
bool is_const;
22
#include "tcg-internal.h"
21
TCGTemp *prev_copy;
23
22
TCGTemp *next_copy;
24
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
23
tcg_target_ulong val;
25
return false;
24
tcg_target_ulong mask;
26
}
25
-};
27
26
+} TempOptInfo;
28
-static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
27
29
+static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
28
-static inline struct tcg_temp_info *ts_info(TCGTemp *ts)
29
+static inline TempOptInfo *ts_info(TCGTemp *ts)
30
{
30
{
31
return ts->state_ptr;
31
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
32
arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
33
- uint32_t al = arg_info(op->args[2])->val;
34
- uint32_t ah = arg_info(op->args[3])->val;
35
- uint32_t bl = arg_info(op->args[4])->val;
36
- uint32_t bh = arg_info(op->args[5])->val;
37
- uint64_t a = ((uint64_t)ah << 32) | al;
38
- uint64_t b = ((uint64_t)bh << 32) | bl;
39
+ uint64_t al = arg_info(op->args[2])->val;
40
+ uint64_t ah = arg_info(op->args[3])->val;
41
+ uint64_t bl = arg_info(op->args[4])->val;
42
+ uint64_t bh = arg_info(op->args[5])->val;
43
TCGArg rl, rh;
44
- TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
45
+ TCGOp *op2;
46
47
- if (add) {
48
- a += b;
49
+ if (ctx->type == TCG_TYPE_I32) {
50
+ uint64_t a = deposit64(al, 32, 32, ah);
51
+ uint64_t b = deposit64(bl, 32, 32, bh);
52
+
53
+ if (add) {
54
+ a += b;
55
+ } else {
56
+ a -= b;
57
+ }
58
+
59
+ al = sextract64(a, 0, 32);
60
+ ah = sextract64(a, 32, 32);
61
} else {
62
- a -= b;
63
+ Int128 a = int128_make128(al, ah);
64
+ Int128 b = int128_make128(bl, bh);
65
+
66
+ if (add) {
67
+ a = int128_add(a, b);
68
+ } else {
69
+ a = int128_sub(a, b);
70
+ }
71
+
72
+ al = int128_getlo(a);
73
+ ah = int128_gethi(a);
74
}
75
76
rl = op->args[0];
77
rh = op->args[1];
78
- tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
79
- tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
80
+
81
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
82
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0);
83
+
84
+ tcg_opt_gen_movi(ctx, op, rl, al);
85
+ tcg_opt_gen_movi(ctx, op2, rh, ah);
86
return true;
87
}
88
return false;
32
}
89
}
33
90
34
-static inline struct tcg_temp_info *arg_info(TCGArg arg)
91
-static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
35
+static inline TempOptInfo *arg_info(TCGArg arg)
92
+static bool fold_add2(OptContext *ctx, TCGOp *op)
36
{
93
{
37
return ts_info(arg_temp(arg));
94
- return fold_addsub2_i32(ctx, op, true);
95
+ return fold_addsub2(ctx, op, true);
38
}
96
}
39
@@ -XXX,XX +XXX,XX @@ static inline bool ts_is_copy(TCGTemp *ts)
97
40
/* Reset TEMP's state, possibly removing the temp for the list of copies. */
98
static bool fold_and(OptContext *ctx, TCGOp *op)
41
static void reset_ts(TCGTemp *ts)
99
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
100
return false;
101
}
102
103
-static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
104
+static bool fold_sub2(OptContext *ctx, TCGOp *op)
42
{
105
{
43
- struct tcg_temp_info *ti = ts_info(ts);
106
- return fold_addsub2_i32(ctx, op, false);
44
- struct tcg_temp_info *pi = ts_info(ti->prev_copy);
107
+ return fold_addsub2(ctx, op, false);
45
- struct tcg_temp_info *ni = ts_info(ti->next_copy);
46
+ TempOptInfo *ti = ts_info(ts);
47
+ TempOptInfo *pi = ts_info(ti->prev_copy);
48
+ TempOptInfo *ni = ts_info(ti->next_copy);
49
50
ni->prev_copy = ti->prev_copy;
51
pi->next_copy = ti->next_copy;
52
@@ -XXX,XX +XXX,XX @@ static void reset_temp(TCGArg arg)
53
}
108
}
54
109
55
/* Initialize and activate a temporary. */
110
static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
56
-static void init_ts_info(struct tcg_temp_info *infos,
57
+static void init_ts_info(TempOptInfo *infos,
58
TCGTempSet *temps_used, TCGTemp *ts)
59
{
60
size_t idx = temp_idx(ts);
61
if (!test_bit(idx, temps_used->l)) {
62
- struct tcg_temp_info *ti = &infos[idx];
63
+ TempOptInfo *ti = &infos[idx];
64
65
ts->state_ptr = ti;
66
ti->next_copy = ts;
67
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(struct tcg_temp_info *infos,
68
}
69
}
70
71
-static void init_arg_info(struct tcg_temp_info *infos,
72
+static void init_arg_info(TempOptInfo *infos,
73
TCGTempSet *temps_used, TCGArg arg)
74
{
75
init_ts_info(infos, temps_used, arg_temp(arg));
76
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
77
const TCGOpDef *def;
78
TCGOpcode new_op;
79
tcg_target_ulong mask;
80
- struct tcg_temp_info *di = arg_info(dst);
81
+ TempOptInfo *di = arg_info(dst);
82
83
def = &tcg_op_defs[op->opc];
84
if (def->flags & TCG_OPF_VECTOR) {
85
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
86
TCGTemp *dst_ts = arg_temp(dst);
87
TCGTemp *src_ts = arg_temp(src);
88
const TCGOpDef *def;
89
- struct tcg_temp_info *di;
90
- struct tcg_temp_info *si;
91
+ TempOptInfo *di;
92
+ TempOptInfo *si;
93
tcg_target_ulong mask;
94
TCGOpcode new_op;
95
96
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
97
di->mask = mask;
98
99
if (src_ts->type == dst_ts->type) {
100
- struct tcg_temp_info *ni = ts_info(si->next_copy);
101
+ TempOptInfo *ni = ts_info(si->next_copy);
102
103
di->next_copy = si->next_copy;
104
di->prev_copy = src_ts;
105
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
106
{
112
CASE_OP_32_64_VEC(add):
107
int nb_temps, nb_globals;
113
done = fold_add(&ctx, op);
108
TCGOp *op, *op_next, *prev_mb = NULL;
114
break;
109
- struct tcg_temp_info *infos;
115
- case INDEX_op_add2_i32:
110
+ TempOptInfo *infos;
116
- done = fold_add2_i32(&ctx, op);
111
TCGTempSet temps_used;
117
+ CASE_OP_32_64(add2):
112
118
+ done = fold_add2(&ctx, op);
113
/* Array VALS has an element for each temp.
119
break;
120
CASE_OP_32_64_VEC(and):
121
done = fold_and(&ctx, op);
114
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
122
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
115
nb_temps = s->nb_temps;
123
CASE_OP_32_64_VEC(sub):
116
nb_globals = s->nb_globals;
124
done = fold_sub(&ctx, op);
117
bitmap_zero(temps_used.l, nb_temps);
125
break;
118
- infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps);
126
- case INDEX_op_sub2_i32:
119
+ infos = tcg_malloc(sizeof(TempOptInfo) * nb_temps);
127
- done = fold_sub2_i32(&ctx, op);
120
128
+ CASE_OP_32_64(sub2):
121
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
129
+ done = fold_sub2(&ctx, op);
122
tcg_target_ulong mask, partmask, affected;
130
break;
131
CASE_OP_32_64_VEC(xor):
132
done = fold_xor(&ctx, op);
123
--
133
--
124
2.25.1
134
2.25.1
125
135
126
136
diff view generated by jsdifflib
1
Most of these are handled by creating a fold_const2_commutative
2
to handle all of the binary operators. The rest were already
3
handled on a case-by-case basis in the switch, and have their
4
own fold function in which to place the call.
5
6
We now have only one major switch on TCGOpcode.
7
8
Introduce NO_DEST and a block comment for swap_commutative in
9
order to make the handling of brcond and movcond opcodes cleaner.
10
11
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
13
---
3
tcg/i386/tcg-target-constr.h | 55 +++++++++++
14
tcg/optimize.c | 142 ++++++++++++++++++++++++-------------------------
4
tcg/i386/tcg-target.c.inc | 187 +++++++++++++----------------------
15
1 file changed, 70 insertions(+), 72 deletions(-)
5
2 files changed, 121 insertions(+), 121 deletions(-)
16
6
create mode 100644 tcg/i386/tcg-target-constr.h
17
diff --git a/tcg/optimize.c b/tcg/optimize.c
7
18
index XXXXXXX..XXXXXXX 100644
8
diff --git a/tcg/i386/tcg-target-constr.h b/tcg/i386/tcg-target-constr.h
19
--- a/tcg/optimize.c
9
new file mode 100644
20
+++ b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX
21
@@ -XXX,XX +XXX,XX @@ static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
11
--- /dev/null
22
return -1;
12
+++ b/tcg/i386/tcg-target-constr.h
23
}
13
@@ -XXX,XX +XXX,XX @@
24
14
+/* SPDX-License-Identifier: GPL-2.0-or-later */
25
+/**
15
+/*
26
+ * swap_commutative:
16
+ * i386 target-specific operand constaints.
27
+ * @dest: TCGArg of the destination argument, or NO_DEST.
17
+ * Copyright (c) 2020 Linaro
28
+ * @p1: first paired argument
29
+ * @p2: second paired argument
30
+ *
31
+ * If *@p1 is a constant and *@p2 is not, swap.
32
+ * If *@p2 matches @dest, swap.
33
+ * Return true if a swap was performed.
18
+ */
34
+ */
19
+
35
+
20
+C_O0_I1(r)
36
+#define NO_DEST temp_arg(NULL)
21
+
37
+
22
+C_O0_I2(qi, r)
38
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
23
+C_O0_I2(ri, r)
39
{
24
+C_O0_I2(re, r)
40
TCGArg a1 = *p1, a2 = *p2;
25
+C_O0_I2(r, re)
41
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
26
+C_O0_I2(L, L)
42
return false;
27
+C_O0_I2(x, r)
28
+
29
+C_O0_I3(L, L, L)
30
+
31
+C_O0_I4(L, L, L, L)
32
+C_O0_I4(r, r, ri, ri)
33
+
34
+C_O1_I1(r, 0)
35
+C_O1_I1(r, q)
36
+C_O1_I1(r, r)
37
+C_O1_I1(r, L)
38
+C_O1_I1(x, r)
39
+C_O1_I1(x, x)
40
+
41
+C_O1_I2(r, r, re)
42
+C_O1_I2(r, 0, r)
43
+C_O1_I2(r, 0, re)
44
+C_O1_I2(r, 0, reZ)
45
+C_O1_I2(r, 0, rI)
46
+C_O1_I2(r, 0, ri)
47
+C_O1_I2(r, 0, ci)
48
+C_O1_I2(r, r, ri)
49
+C_O1_I2(Q, 0, Q)
50
+C_O1_I2(q, r, re)
51
+C_O1_I2(r, L, L)
52
+C_O1_I2(x, x, x)
53
+C_N1_I2(r, r, r)
54
+C_N1_I2(r, r, rW)
55
+
56
+C_O1_I3(x, x, x, x)
57
+
58
+C_O1_I4(r, r, re, r, 0)
59
+C_O1_I4(r, r, r, ri, ri)
60
+
61
+C_O2_I1(r, r, L)
62
+
63
+C_O2_I2(r, r, L, L)
64
+C_O2_I2(a, d, a, r)
65
+
66
+C_O2_I3(a, d, 0, 1, r)
67
+
68
+C_O2_I4(r, r, 0, 1, re, re)
69
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
70
index XXXXXXX..XXXXXXX 100644
71
--- a/tcg/i386/tcg-target.c.inc
72
+++ b/tcg/i386/tcg-target.c.inc
73
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
74
}
75
}
43
}
76
44
77
+/* Define all constraint sets. */
45
+static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
78
+#include "../tcg-constr.c.inc"
46
+{
79
+
47
+ swap_commutative(op->args[0], &op->args[1], &op->args[2]);
80
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
48
+ return fold_const2(ctx, op);
81
{
49
+}
82
- static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
50
+
83
- static const TCGTargetOpDef ri_r = { .args_ct_str = { "ri", "r" } };
51
static bool fold_masks(OptContext *ctx, TCGOp *op)
84
- static const TCGTargetOpDef re_r = { .args_ct_str = { "re", "r" } };
52
{
85
- static const TCGTargetOpDef qi_r = { .args_ct_str = { "qi", "r" } };
53
uint64_t a_mask = ctx->a_mask;
86
- static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
54
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
87
- static const TCGTargetOpDef r_q = { .args_ct_str = { "r", "q" } };
55
88
- static const TCGTargetOpDef r_re = { .args_ct_str = { "r", "re" } };
56
static bool fold_add(OptContext *ctx, TCGOp *op)
89
- static const TCGTargetOpDef r_0 = { .args_ct_str = { "r", "0" } };
57
{
90
- static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
58
- if (fold_const2(ctx, op) ||
91
- static const TCGTargetOpDef r_r_re = { .args_ct_str = { "r", "r", "re" } };
59
+ if (fold_const2_commutative(ctx, op) ||
92
- static const TCGTargetOpDef r_0_r = { .args_ct_str = { "r", "0", "r" } };
60
fold_xi_to_x(ctx, op, 0)) {
93
- static const TCGTargetOpDef r_0_re = { .args_ct_str = { "r", "0", "re" } };
61
return true;
94
- static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } };
62
}
95
- static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
63
@@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
96
- static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } };
64
97
- static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
65
static bool fold_add2(OptContext *ctx, TCGOp *op)
98
- static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } };
66
{
99
- static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
67
+ /* Note that the high and low parts may be independently swapped. */
100
- static const TCGTargetOpDef r_r_L_L
68
+ swap_commutative(op->args[0], &op->args[2], &op->args[4]);
101
- = { .args_ct_str = { "r", "r", "L", "L" } };
69
+ swap_commutative(op->args[1], &op->args[3], &op->args[5]);
102
- static const TCGTargetOpDef L_L_L_L
70
+
103
- = { .args_ct_str = { "L", "L", "L", "L" } };
71
return fold_addsub2(ctx, op, true);
104
- static const TCGTargetOpDef x_x = { .args_ct_str = { "x", "x" } };
72
}
105
- static const TCGTargetOpDef x_x_x = { .args_ct_str = { "x", "x", "x" } };
73
106
- static const TCGTargetOpDef x_x_x_x
74
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
107
- = { .args_ct_str = { "x", "x", "x", "x" } };
75
{
108
- static const TCGTargetOpDef x_r = { .args_ct_str = { "x", "r" } };
76
uint64_t z1, z2;
77
78
- if (fold_const2(ctx, op) ||
79
+ if (fold_const2_commutative(ctx, op) ||
80
fold_xi_to_i(ctx, op, 0) ||
81
fold_xi_to_x(ctx, op, -1) ||
82
fold_xx_to_x(ctx, op)) {
83
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
84
static bool fold_brcond(OptContext *ctx, TCGOp *op)
85
{
86
TCGCond cond = op->args[2];
87
- int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
88
+ int i;
89
90
+ if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
91
+ op->args[2] = cond = tcg_swap_cond(cond);
92
+ }
93
+
94
+ i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
95
if (i == 0) {
96
tcg_op_remove(ctx->tcg, op);
97
return true;
98
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
99
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
100
{
101
TCGCond cond = op->args[4];
102
- int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
103
TCGArg label = op->args[5];
104
- int inv = 0;
105
+ int i, inv = 0;
106
107
+ if (swap_commutative2(&op->args[0], &op->args[2])) {
108
+ op->args[4] = cond = tcg_swap_cond(cond);
109
+ }
110
+
111
+ i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
112
if (i >= 0) {
113
goto do_brcond_const;
114
}
115
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
116
117
static bool fold_eqv(OptContext *ctx, TCGOp *op)
118
{
119
- if (fold_const2(ctx, op) ||
120
+ if (fold_const2_commutative(ctx, op) ||
121
fold_xi_to_x(ctx, op, -1) ||
122
fold_xi_to_not(ctx, op, 0)) {
123
return true;
124
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
125
static bool fold_movcond(OptContext *ctx, TCGOp *op)
126
{
127
TCGCond cond = op->args[5];
128
- int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
129
+ int i;
130
131
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
132
+ op->args[5] = cond = tcg_swap_cond(cond);
133
+ }
134
+ /*
135
+ * Canonicalize the "false" input reg to match the destination reg so
136
+ * that the tcg backend can implement a "move if true" operation.
137
+ */
138
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
139
+ op->args[5] = cond = tcg_invert_cond(cond);
140
+ }
141
+
142
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
143
if (i >= 0) {
144
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
145
}
146
@@ -XXX,XX +XXX,XX @@ static bool fold_mul(OptContext *ctx, TCGOp *op)
147
148
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
149
{
150
- if (fold_const2(ctx, op) ||
151
+ if (fold_const2_commutative(ctx, op) ||
152
fold_xi_to_i(ctx, op, 0)) {
153
return true;
154
}
155
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
156
157
static bool fold_multiply2(OptContext *ctx, TCGOp *op)
158
{
159
+ swap_commutative(op->args[0], &op->args[2], &op->args[3]);
160
+
161
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
162
uint64_t a = arg_info(op->args[2])->val;
163
uint64_t b = arg_info(op->args[3])->val;
164
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
165
166
static bool fold_nand(OptContext *ctx, TCGOp *op)
167
{
168
- if (fold_const2(ctx, op) ||
169
+ if (fold_const2_commutative(ctx, op) ||
170
fold_xi_to_not(ctx, op, -1)) {
171
return true;
172
}
173
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
174
175
static bool fold_nor(OptContext *ctx, TCGOp *op)
176
{
177
- if (fold_const2(ctx, op) ||
178
+ if (fold_const2_commutative(ctx, op) ||
179
fold_xi_to_not(ctx, op, 0)) {
180
return true;
181
}
182
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
183
184
static bool fold_or(OptContext *ctx, TCGOp *op)
185
{
186
- if (fold_const2(ctx, op) ||
187
+ if (fold_const2_commutative(ctx, op) ||
188
fold_xi_to_x(ctx, op, 0) ||
189
fold_xx_to_x(ctx, op)) {
190
return true;
191
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
192
static bool fold_setcond(OptContext *ctx, TCGOp *op)
193
{
194
TCGCond cond = op->args[3];
195
- int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
196
+ int i;
197
198
+ if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
199
+ op->args[3] = cond = tcg_swap_cond(cond);
200
+ }
201
+
202
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
203
if (i >= 0) {
204
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
205
}
206
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
207
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
208
{
209
TCGCond cond = op->args[5];
210
- int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
211
- int inv = 0;
212
+ int i, inv = 0;
213
214
+ if (swap_commutative2(&op->args[1], &op->args[3])) {
215
+ op->args[5] = cond = tcg_swap_cond(cond);
216
+ }
217
+
218
+ i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
219
if (i >= 0) {
220
goto do_setcond_const;
221
}
222
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
223
224
static bool fold_xor(OptContext *ctx, TCGOp *op)
225
{
226
- if (fold_const2(ctx, op) ||
227
+ if (fold_const2_commutative(ctx, op) ||
228
fold_xx_to_i(ctx, op, 0) ||
229
fold_xi_to_x(ctx, op, 0) ||
230
fold_xi_to_not(ctx, op, -1)) {
231
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
232
ctx.type = TCG_TYPE_I32;
233
}
234
235
- /* For commutative operations make constant second argument */
236
- switch (opc) {
237
- CASE_OP_32_64_VEC(add):
238
- CASE_OP_32_64_VEC(mul):
239
- CASE_OP_32_64_VEC(and):
240
- CASE_OP_32_64_VEC(or):
241
- CASE_OP_32_64_VEC(xor):
242
- CASE_OP_32_64(eqv):
243
- CASE_OP_32_64(nand):
244
- CASE_OP_32_64(nor):
245
- CASE_OP_32_64(muluh):
246
- CASE_OP_32_64(mulsh):
247
- swap_commutative(op->args[0], &op->args[1], &op->args[2]);
248
- break;
249
- CASE_OP_32_64(brcond):
250
- if (swap_commutative(-1, &op->args[0], &op->args[1])) {
251
- op->args[2] = tcg_swap_cond(op->args[2]);
252
- }
253
- break;
254
- CASE_OP_32_64(setcond):
255
- if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
256
- op->args[3] = tcg_swap_cond(op->args[3]);
257
- }
258
- break;
259
- CASE_OP_32_64(movcond):
260
- if (swap_commutative(-1, &op->args[1], &op->args[2])) {
261
- op->args[5] = tcg_swap_cond(op->args[5]);
262
- }
263
- /* For movcond, we canonicalize the "false" input reg to match
264
- the destination reg so that the tcg backend can implement
265
- a "move if true" operation. */
266
- if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
267
- op->args[5] = tcg_invert_cond(op->args[5]);
268
- }
269
- break;
270
- CASE_OP_32_64(add2):
271
- swap_commutative(op->args[0], &op->args[2], &op->args[4]);
272
- swap_commutative(op->args[1], &op->args[3], &op->args[5]);
273
- break;
274
- CASE_OP_32_64(mulu2):
275
- CASE_OP_32_64(muls2):
276
- swap_commutative(op->args[0], &op->args[2], &op->args[3]);
277
- break;
278
- case INDEX_op_brcond2_i32:
279
- if (swap_commutative2(&op->args[0], &op->args[2])) {
280
- op->args[4] = tcg_swap_cond(op->args[4]);
281
- }
282
- break;
283
- case INDEX_op_setcond2_i32:
284
- if (swap_commutative2(&op->args[1], &op->args[3])) {
285
- op->args[5] = tcg_swap_cond(op->args[5]);
286
- }
287
- break;
288
- default:
289
- break;
290
- }
109
-
291
-
110
switch (op) {
292
/* Assume all bits affected, and no bits known zero. */
111
case INDEX_op_goto_ptr:
293
ctx.a_mask = -1;
112
- return &r;
294
ctx.z_mask = -1;
113
+ return C_O0_I1(r);
114
115
case INDEX_op_ld8u_i32:
116
case INDEX_op_ld8u_i64:
117
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
118
case INDEX_op_ld32u_i64:
119
case INDEX_op_ld32s_i64:
120
case INDEX_op_ld_i64:
121
- return &r_r;
122
+ return C_O1_I1(r, r);
123
124
case INDEX_op_st8_i32:
125
case INDEX_op_st8_i64:
126
- return &qi_r;
127
+ return C_O0_I2(qi, r);
128
+
129
case INDEX_op_st16_i32:
130
case INDEX_op_st16_i64:
131
case INDEX_op_st_i32:
132
case INDEX_op_st32_i64:
133
- return &ri_r;
134
+ return C_O0_I2(ri, r);
135
+
136
case INDEX_op_st_i64:
137
- return &re_r;
138
+ return C_O0_I2(re, r);
139
140
case INDEX_op_add_i32:
141
case INDEX_op_add_i64:
142
- return &r_r_re;
143
+ return C_O1_I2(r, r, re);
144
+
145
case INDEX_op_sub_i32:
146
case INDEX_op_sub_i64:
147
case INDEX_op_mul_i32:
148
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
149
case INDEX_op_or_i64:
150
case INDEX_op_xor_i32:
151
case INDEX_op_xor_i64:
152
- return &r_0_re;
153
+ return C_O1_I2(r, 0, re);
154
155
case INDEX_op_and_i32:
156
case INDEX_op_and_i64:
157
- {
158
- static const TCGTargetOpDef and
159
- = { .args_ct_str = { "r", "0", "reZ" } };
160
- return &and;
161
- }
162
- break;
163
+ return C_O1_I2(r, 0, reZ);
164
+
165
case INDEX_op_andc_i32:
166
case INDEX_op_andc_i64:
167
- {
168
- static const TCGTargetOpDef andc
169
- = { .args_ct_str = { "r", "r", "rI" } };
170
- return &andc;
171
- }
172
- break;
173
+ return C_O1_I2(r, 0, rI);
174
175
case INDEX_op_shl_i32:
176
case INDEX_op_shl_i64:
177
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
178
case INDEX_op_shr_i64:
179
case INDEX_op_sar_i32:
180
case INDEX_op_sar_i64:
181
- return have_bmi2 ? &r_r_ri : &r_0_ci;
182
+ return have_bmi2 ? C_O1_I2(r, r, ri) : C_O1_I2(r, 0, ci);
183
+
184
case INDEX_op_rotl_i32:
185
case INDEX_op_rotl_i64:
186
case INDEX_op_rotr_i32:
187
case INDEX_op_rotr_i64:
188
- return &r_0_ci;
189
+ return C_O1_I2(r, 0, ci);
190
191
case INDEX_op_brcond_i32:
192
case INDEX_op_brcond_i64:
193
- return &r_re;
194
+ return C_O0_I2(r, re);
195
196
case INDEX_op_bswap16_i32:
197
case INDEX_op_bswap16_i64:
198
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
199
case INDEX_op_not_i32:
200
case INDEX_op_not_i64:
201
case INDEX_op_extrh_i64_i32:
202
- return &r_0;
203
+ return C_O1_I1(r, 0);
204
205
case INDEX_op_ext8s_i32:
206
case INDEX_op_ext8s_i64:
207
case INDEX_op_ext8u_i32:
208
case INDEX_op_ext8u_i64:
209
- return &r_q;
210
+ return C_O1_I1(r, q);
211
+
212
case INDEX_op_ext16s_i32:
213
case INDEX_op_ext16s_i64:
214
case INDEX_op_ext16u_i32:
215
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
216
case INDEX_op_sextract_i32:
217
case INDEX_op_ctpop_i32:
218
case INDEX_op_ctpop_i64:
219
- return &r_r;
220
+ return C_O1_I1(r, r);
221
+
222
case INDEX_op_extract2_i32:
223
case INDEX_op_extract2_i64:
224
- return &r_0_r;
225
+ return C_O1_I2(r, 0, r);
226
227
case INDEX_op_deposit_i32:
228
case INDEX_op_deposit_i64:
229
- {
230
- static const TCGTargetOpDef dep
231
- = { .args_ct_str = { "Q", "0", "Q" } };
232
- return &dep;
233
- }
234
+ return C_O1_I2(Q, 0, Q);
235
+
236
case INDEX_op_setcond_i32:
237
case INDEX_op_setcond_i64:
238
- {
239
- static const TCGTargetOpDef setc
240
- = { .args_ct_str = { "q", "r", "re" } };
241
- return &setc;
242
- }
243
+ return C_O1_I2(q, r, re);
244
+
245
case INDEX_op_movcond_i32:
246
case INDEX_op_movcond_i64:
247
- {
248
- static const TCGTargetOpDef movc
249
- = { .args_ct_str = { "r", "r", "re", "r", "0" } };
250
- return &movc;
251
- }
252
+ return C_O1_I4(r, r, re, r, 0);
253
+
254
case INDEX_op_div2_i32:
255
case INDEX_op_div2_i64:
256
case INDEX_op_divu2_i32:
257
case INDEX_op_divu2_i64:
258
- {
259
- static const TCGTargetOpDef div2
260
- = { .args_ct_str = { "a", "d", "0", "1", "r" } };
261
- return &div2;
262
- }
263
+ return C_O2_I3(a, d, 0, 1, r);
264
+
265
case INDEX_op_mulu2_i32:
266
case INDEX_op_mulu2_i64:
267
case INDEX_op_muls2_i32:
268
case INDEX_op_muls2_i64:
269
- {
270
- static const TCGTargetOpDef mul2
271
- = { .args_ct_str = { "a", "d", "a", "r" } };
272
- return &mul2;
273
- }
274
+ return C_O2_I2(a, d, a, r);
275
+
276
case INDEX_op_add2_i32:
277
case INDEX_op_add2_i64:
278
case INDEX_op_sub2_i32:
279
case INDEX_op_sub2_i64:
280
- {
281
- static const TCGTargetOpDef arith2
282
- = { .args_ct_str = { "r", "r", "0", "1", "re", "re" } };
283
- return &arith2;
284
- }
285
+ return C_O2_I4(r, r, 0, 1, re, re);
286
+
287
case INDEX_op_ctz_i32:
288
case INDEX_op_ctz_i64:
289
- {
290
- static const TCGTargetOpDef ctz[2] = {
291
- { .args_ct_str = { "&r", "r", "r" } },
292
- { .args_ct_str = { "&r", "r", "rW" } },
293
- };
294
- return &ctz[have_bmi1];
295
- }
296
+ return have_bmi1 ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r);
297
+
298
case INDEX_op_clz_i32:
299
case INDEX_op_clz_i64:
300
- {
301
- static const TCGTargetOpDef clz[2] = {
302
- { .args_ct_str = { "&r", "r", "r" } },
303
- { .args_ct_str = { "&r", "r", "rW" } },
304
- };
305
- return &clz[have_lzcnt];
306
- }
307
+ return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r);
308
309
case INDEX_op_qemu_ld_i32:
310
- return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
311
+ return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
312
+ ? C_O1_I1(r, L) : C_O1_I2(r, L, L));
313
+
314
case INDEX_op_qemu_st_i32:
315
- return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L : &L_L_L;
316
+ return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
317
+ ? C_O0_I2(L, L) : C_O0_I3(L, L, L));
318
+
319
case INDEX_op_qemu_ld_i64:
320
- return (TCG_TARGET_REG_BITS == 64 ? &r_L
321
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L
322
- : &r_r_L_L);
323
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
324
+ : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, L)
325
+ : C_O2_I2(r, r, L, L));
326
+
327
case INDEX_op_qemu_st_i64:
328
- return (TCG_TARGET_REG_BITS == 64 ? &L_L
329
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L_L
330
- : &L_L_L_L);
331
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L)
332
+ : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(L, L, L)
333
+ : C_O0_I4(L, L, L, L));
334
335
case INDEX_op_brcond2_i32:
336
- {
337
- static const TCGTargetOpDef b2
338
- = { .args_ct_str = { "r", "r", "ri", "ri" } };
339
- return &b2;
340
- }
341
+ return C_O0_I4(r, r, ri, ri);
342
+
343
case INDEX_op_setcond2_i32:
344
- {
345
- static const TCGTargetOpDef s2
346
- = { .args_ct_str = { "r", "r", "r", "ri", "ri" } };
347
- return &s2;
348
- }
349
+ return C_O1_I4(r, r, r, ri, ri);
350
351
case INDEX_op_ld_vec:
352
- case INDEX_op_st_vec:
353
case INDEX_op_dupm_vec:
354
- return &x_r;
355
+ return C_O1_I1(x, r);
356
+
357
+ case INDEX_op_st_vec:
358
+ return C_O0_I2(x, r);
359
360
case INDEX_op_add_vec:
361
case INDEX_op_sub_vec:
362
@@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
363
#if TCG_TARGET_REG_BITS == 32
364
case INDEX_op_dup2_vec:
365
#endif
366
- return &x_x_x;
367
+ return C_O1_I2(x, x, x);
368
+
369
case INDEX_op_abs_vec:
370
case INDEX_op_dup_vec:
371
case INDEX_op_shli_vec:
372
case INDEX_op_shri_vec:
373
case INDEX_op_sari_vec:
374
case INDEX_op_x86_psrldq_vec:
375
- return &x_x;
376
+ return C_O1_I1(x, x);
377
+
378
case INDEX_op_x86_vpblendvb_vec:
379
- return &x_x_x_x;
380
+ return C_O1_I3(x, x, x, x);
381
382
default:
383
break;
384
--
295
--
385
2.25.1
296
2.25.1
386
297
387
298
diff view generated by jsdifflib
1
This uses an existing hole in the TCGArgConstraint structure
1
This "garbage" setting pre-dates the addition of the type
2
and will be convenient for keeping the data in one place.
2
changing opcodes INDEX_op_ext_i32_i64, INDEX_op_extu_i32_i64,
3
and INDEX_op_extr{l,h}_i64_i32.
3
4
5
So now we have a definitive points at which to adjust z_mask
6
to eliminate such bits from the 32-bit operands.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
11
---
6
include/tcg/tcg.h | 2 +-
12
tcg/optimize.c | 35 ++++++++++++++++-------------------
7
tcg/tcg.c | 35 +++++++++++++++++------------------
13
1 file changed, 16 insertions(+), 19 deletions(-)
8
2 files changed, 18 insertions(+), 19 deletions(-)
9
14
10
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
15
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
12
--- a/include/tcg/tcg.h
17
--- a/tcg/optimize.c
13
+++ b/include/tcg/tcg.h
18
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(void);
19
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
15
typedef struct TCGArgConstraint {
20
ti->is_const = true;
16
uint16_t ct;
21
ti->val = ts->val;
17
uint8_t alias_index;
22
ti->z_mask = ts->val;
18
+ uint8_t sort_index;
23
- if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
19
TCGRegSet regs;
24
- /* High bits of a 32-bit quantity are garbage. */
20
} TCGArgConstraint;
25
- ti->z_mask |= ~0xffffffffull;
21
26
- }
22
@@ -XXX,XX +XXX,XX @@ typedef struct TCGOpDef {
27
} else {
23
uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
28
ti->is_const = false;
24
uint8_t flags;
29
ti->z_mask = -1;
25
TCGArgConstraint *args_ct;
30
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
26
- int *sorted_args;
31
TCGTemp *src_ts = arg_temp(src);
27
#if defined(CONFIG_DEBUG_TCG)
32
TempOptInfo *di;
28
int used;
33
TempOptInfo *si;
29
#endif
34
- uint64_t z_mask;
30
diff --git a/tcg/tcg.c b/tcg/tcg.c
35
TCGOpcode new_op;
31
index XXXXXXX..XXXXXXX 100644
36
32
--- a/tcg/tcg.c
37
if (ts_are_copies(dst_ts, src_ts)) {
33
+++ b/tcg/tcg.c
38
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
34
@@ -XXX,XX +XXX,XX @@ void tcg_context_init(TCGContext *s)
39
op->args[0] = dst;
35
int op, total_args, n, i;
40
op->args[1] = src;
36
TCGOpDef *def;
41
37
TCGArgConstraint *args_ct;
42
- z_mask = si->z_mask;
38
- int *sorted_args;
43
- if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
39
TCGTemp *ts;
44
- /* High bits of the destination are now garbage. */
40
45
- z_mask |= ~0xffffffffull;
41
memset(s, 0, sizeof(*s));
46
- }
42
@@ -XXX,XX +XXX,XX @@ void tcg_context_init(TCGContext *s)
47
- di->z_mask = z_mask;
48
+ di->z_mask = si->z_mask;
49
50
if (src_ts->type == dst_ts->type) {
51
TempOptInfo *ni = ts_info(si->next_copy);
52
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
53
static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
54
TCGArg dst, uint64_t val)
55
{
56
- /* Convert movi to mov with constant temp. */
57
- TCGTemp *tv = tcg_constant_internal(ctx->type, val);
58
+ TCGTemp *tv;
59
60
+ if (ctx->type == TCG_TYPE_I32) {
61
+ val = (int32_t)val;
62
+ }
63
+
64
+ /* Convert movi to mov with constant temp. */
65
+ tv = tcg_constant_internal(ctx->type, val);
66
init_ts_info(ctx, tv);
67
return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
68
}
69
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
70
uint64_t z_mask = ctx->z_mask;
71
72
/*
73
- * 32-bit ops generate 32-bit results. For the result is zero test
74
- * below, we can ignore high bits, but for further optimizations we
75
- * need to record that the high bits contain garbage.
76
+ * 32-bit ops generate 32-bit results, which for the purpose of
77
+ * simplifying tcg are sign-extended. Certainly that's how we
78
+ * represent our constants elsewhere. Note that the bits will
79
+ * be reset properly for a 64-bit value when encountering the
80
+ * type changing opcodes.
81
*/
82
if (ctx->type == TCG_TYPE_I32) {
83
- ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
84
- a_mask &= MAKE_64BIT_MASK(0, 32);
85
- z_mask &= MAKE_64BIT_MASK(0, 32);
86
+ a_mask = (int32_t)a_mask;
87
+ z_mask = (int32_t)z_mask;
88
+ ctx->z_mask = z_mask;
43
}
89
}
44
90
45
args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
91
if (z_mask == 0) {
46
- sorted_args = g_malloc(sizeof(int) * total_args);
47
48
for(op = 0; op < NB_OPS; op++) {
49
def = &tcg_op_defs[op];
50
def->args_ct = args_ct;
51
- def->sorted_args = sorted_args;
52
n = def->nb_iargs + def->nb_oargs;
53
- sorted_args += n;
54
args_ct += n;
55
}
56
57
@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
58
/* sort from highest priority to lowest */
59
static void sort_constraints(TCGOpDef *def, int start, int n)
60
{
61
- int i, j, p1, p2, tmp;
62
+ int i, j;
63
+ TCGArgConstraint *a = def->args_ct;
64
65
- for(i = 0; i < n; i++)
66
- def->sorted_args[start + i] = start + i;
67
- if (n <= 1)
68
+ for (i = 0; i < n; i++) {
69
+ a[start + i].sort_index = start + i;
70
+ }
71
+ if (n <= 1) {
72
return;
73
- for(i = 0; i < n - 1; i++) {
74
- for(j = i + 1; j < n; j++) {
75
- p1 = get_constraint_priority(def, def->sorted_args[start + i]);
76
- p2 = get_constraint_priority(def, def->sorted_args[start + j]);
77
+ }
78
+ for (i = 0; i < n - 1; i++) {
79
+ for (j = i + 1; j < n; j++) {
80
+ int p1 = get_constraint_priority(def, a[start + i].sort_index);
81
+ int p2 = get_constraint_priority(def, a[start + j].sort_index);
82
if (p1 < p2) {
83
- tmp = def->sorted_args[start + i];
84
- def->sorted_args[start + i] = def->sorted_args[start + j];
85
- def->sorted_args[start + j] = tmp;
86
+ int tmp = a[start + i].sort_index;
87
+ a[start + i].sort_index = a[start + j].sort_index;
88
+ a[start + j].sort_index = tmp;
89
}
90
}
91
}
92
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
93
for (k = 0; k < nb_iargs; k++) {
94
TCGRegSet i_preferred_regs, o_preferred_regs;
95
96
- i = def->sorted_args[nb_oargs + k];
97
+ i = def->args_ct[nb_oargs + k].sort_index;
98
arg = op->args[i];
99
arg_ct = &def->args_ct[i];
100
ts = arg_temp(arg);
101
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
102
int k2, i2;
103
reg = ts->reg;
104
for (k2 = 0 ; k2 < k ; k2++) {
105
- i2 = def->sorted_args[nb_oargs + k2];
106
+ i2 = def->args_ct[nb_oargs + k2].sort_index;
107
if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
108
reg == new_args[i2]) {
109
goto allocate_in_reg;
110
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
111
112
/* satisfy the output constraints */
113
for(k = 0; k < nb_oargs; k++) {
114
- i = def->sorted_args[k];
115
+ i = def->args_ct[k].sort_index;
116
arg = op->args[i];
117
arg_ct = &def->args_ct[i];
118
ts = arg_temp(arg);
119
--
92
--
120
2.25.1
93
2.25.1
121
94
122
95
diff view generated by jsdifflib
1
The normal movi opcodes are going away. We need something
1
Recognize the constant function for or-complement.
2
for TCI to use internally.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
include/tcg/tcg-opc.h | 8 ++++++++
8
tcg/optimize.c | 1 +
8
tcg/tci.c | 4 ++--
9
1 file changed, 1 insertion(+)
9
tcg/tci/tcg-target.c.inc | 4 ++--
10
3 files changed, 12 insertions(+), 4 deletions(-)
11
10
12
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg-opc.h
13
--- a/tcg/optimize.c
15
+++ b/include/tcg/tcg-opc.h
14
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
15
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
17
#include "tcg-target.opc.h"
16
static bool fold_orc(OptContext *ctx, TCGOp *op)
18
#endif
17
{
19
18
if (fold_const2(ctx, op) ||
20
+#ifdef TCG_TARGET_INTERPRETER
19
+ fold_xx_to_i(ctx, op, -1) ||
21
+/* These opcodes are only for use between the tci generator and interpreter. */
20
fold_xi_to_x(ctx, op, -1) ||
22
+DEF(tci_movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT)
21
fold_ix_to_not(ctx, op, 0)) {
23
+#if TCG_TARGET_REG_BITS == 64
22
return true;
24
+DEF(tci_movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
25
+#endif
26
+#endif
27
+
28
#undef TLADDR_ARGS
29
#undef DATA64_ARGS
30
#undef IMPL
31
diff --git a/tcg/tci.c b/tcg/tci.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/tcg/tci.c
34
+++ b/tcg/tci.c
35
@@ -XXX,XX +XXX,XX @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
36
t1 = tci_read_r32(regs, &tb_ptr);
37
tci_write_reg32(regs, t0, t1);
38
break;
39
- case INDEX_op_movi_i32:
40
+ case INDEX_op_tci_movi_i32:
41
t0 = *tb_ptr++;
42
t1 = tci_read_i32(&tb_ptr);
43
tci_write_reg32(regs, t0, t1);
44
@@ -XXX,XX +XXX,XX @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
45
t1 = tci_read_r64(regs, &tb_ptr);
46
tci_write_reg64(regs, t0, t1);
47
break;
48
- case INDEX_op_movi_i64:
49
+ case INDEX_op_tci_movi_i64:
50
t0 = *tb_ptr++;
51
t1 = tci_read_i64(&tb_ptr);
52
tci_write_reg64(regs, t0, t1);
53
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
54
index XXXXXXX..XXXXXXX 100644
55
--- a/tcg/tci/tcg-target.c.inc
56
+++ b/tcg/tci/tcg-target.c.inc
57
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
58
uint8_t *old_code_ptr = s->code_ptr;
59
uint32_t arg32 = arg;
60
if (type == TCG_TYPE_I32 || arg == arg32) {
61
- tcg_out_op_t(s, INDEX_op_movi_i32);
62
+ tcg_out_op_t(s, INDEX_op_tci_movi_i32);
63
tcg_out_r(s, t0);
64
tcg_out32(s, arg32);
65
} else {
66
tcg_debug_assert(type == TCG_TYPE_I64);
67
#if TCG_TARGET_REG_BITS == 64
68
- tcg_out_op_t(s, INDEX_op_movi_i64);
69
+ tcg_out_op_t(s, INDEX_op_tci_movi_i64);
70
tcg_out_r(s, t0);
71
tcg_out64(s, arg);
72
#else
73
--
23
--
74
2.25.1
24
2.25.1
75
25
76
26
diff view generated by jsdifflib
1
This wasn't actually used for anything, really. All variable
1
Recognize the identity function for low-part multiply.
2
operands must accept registers, and which are indicated by the
3
set in TCGArgConstraint.regs.
4
2
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
include/tcg/tcg.h | 1 -
8
tcg/optimize.c | 3 ++-
8
tcg/tcg.c | 15 ++++-----------
9
1 file changed, 2 insertions(+), 1 deletion(-)
9
tcg/aarch64/tcg-target.c.inc | 3 ---
10
tcg/arm/tcg-target.c.inc | 3 ---
11
tcg/i386/tcg-target.c.inc | 11 -----------
12
tcg/mips/tcg-target.c.inc | 3 ---
13
tcg/ppc/tcg-target.c.inc | 5 -----
14
tcg/riscv/tcg-target.c.inc | 2 --
15
tcg/s390/tcg-target.c.inc | 4 ----
16
tcg/sparc/tcg-target.c.inc | 5 -----
17
tcg/tci/tcg-target.c.inc | 1 -
18
11 files changed, 4 insertions(+), 49 deletions(-)
19
10
20
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
21
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
22
--- a/include/tcg/tcg.h
13
--- a/tcg/optimize.c
23
+++ b/include/tcg/tcg.h
14
+++ b/tcg/optimize.c
24
@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(void);
15
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
25
#define TCG_CT_ALIAS 0x80
16
static bool fold_mul(OptContext *ctx, TCGOp *op)
26
#define TCG_CT_IALIAS 0x40
27
#define TCG_CT_NEWREG 0x20 /* output requires a new register */
28
-#define TCG_CT_REG 0x01
29
#define TCG_CT_CONST 0x02 /* any constant of register size */
30
31
typedef struct TCGArgConstraint {
32
diff --git a/tcg/tcg.c b/tcg/tcg.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/tcg.c
35
+++ b/tcg/tcg.c
36
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
37
/* we give more priority to constraints with less registers */
38
static int get_constraint_priority(const TCGOpDef *def, int k)
39
{
17
{
40
- const TCGArgConstraint *arg_ct;
18
if (fold_const2(ctx, op) ||
41
+ const TCGArgConstraint *arg_ct = &def->args_ct[k];
19
- fold_xi_to_i(ctx, op, 0)) {
42
+ int n;
20
+ fold_xi_to_i(ctx, op, 0) ||
43
21
+ fold_xi_to_x(ctx, op, 1)) {
44
- int i, n;
22
return true;
45
- arg_ct = &def->args_ct[k];
46
if (arg_ct->ct & TCG_CT_ALIAS) {
47
/* an alias is equivalent to a single register */
48
n = 1;
49
} else {
50
- if (!(arg_ct->ct & TCG_CT_REG))
51
- return 0;
52
- n = 0;
53
- for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
54
- if (tcg_regset_test_reg(arg_ct->regs, i))
55
- n++;
56
- }
57
+ n = ctpop64(arg_ct->regs);
58
}
23
}
59
return TCG_TARGET_NB_REGS - n + 1;
24
return false;
60
}
61
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
62
int oarg = *ct_str - '0';
63
tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
64
tcg_debug_assert(oarg < def->nb_oargs);
65
- tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
66
+ tcg_debug_assert(def->args_ct[oarg].regs != 0);
67
/* TCG_CT_ALIAS is for the output arguments.
68
The input is tagged with TCG_CT_IALIAS. */
69
def->args_ct[i] = def->args_ct[oarg];
70
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
71
index XXXXXXX..XXXXXXX 100644
72
--- a/tcg/aarch64/tcg-target.c.inc
73
+++ b/tcg/aarch64/tcg-target.c.inc
74
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
75
{
76
switch (*ct_str++) {
77
case 'r': /* general registers */
78
- ct->ct |= TCG_CT_REG;
79
ct->regs |= 0xffffffffu;
80
break;
81
case 'w': /* advsimd registers */
82
- ct->ct |= TCG_CT_REG;
83
ct->regs |= 0xffffffff00000000ull;
84
break;
85
case 'l': /* qemu_ld / qemu_st address, data_reg */
86
- ct->ct |= TCG_CT_REG;
87
ct->regs = 0xffffffffu;
88
#ifdef CONFIG_SOFTMMU
89
/* x0 and x1 will be overwritten when reading the tlb entry,
90
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
91
index XXXXXXX..XXXXXXX 100644
92
--- a/tcg/arm/tcg-target.c.inc
93
+++ b/tcg/arm/tcg-target.c.inc
94
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
95
break;
96
97
case 'r':
98
- ct->ct |= TCG_CT_REG;
99
ct->regs = 0xffff;
100
break;
101
102
/* qemu_ld address */
103
case 'l':
104
- ct->ct |= TCG_CT_REG;
105
ct->regs = 0xffff;
106
#ifdef CONFIG_SOFTMMU
107
/* r0-r2,lr will be overwritten when reading the tlb entry,
108
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
109
110
/* qemu_st address & data */
111
case 's':
112
- ct->ct |= TCG_CT_REG;
113
ct->regs = 0xffff;
114
/* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
115
and r0-r1 doing the byte swapping, so don't use these. */
116
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
117
index XXXXXXX..XXXXXXX 100644
118
--- a/tcg/i386/tcg-target.c.inc
119
+++ b/tcg/i386/tcg-target.c.inc
120
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
121
{
122
switch(*ct_str++) {
123
case 'a':
124
- ct->ct |= TCG_CT_REG;
125
tcg_regset_set_reg(ct->regs, TCG_REG_EAX);
126
break;
127
case 'b':
128
- ct->ct |= TCG_CT_REG;
129
tcg_regset_set_reg(ct->regs, TCG_REG_EBX);
130
break;
131
case 'c':
132
- ct->ct |= TCG_CT_REG;
133
tcg_regset_set_reg(ct->regs, TCG_REG_ECX);
134
break;
135
case 'd':
136
- ct->ct |= TCG_CT_REG;
137
tcg_regset_set_reg(ct->regs, TCG_REG_EDX);
138
break;
139
case 'S':
140
- ct->ct |= TCG_CT_REG;
141
tcg_regset_set_reg(ct->regs, TCG_REG_ESI);
142
break;
143
case 'D':
144
- ct->ct |= TCG_CT_REG;
145
tcg_regset_set_reg(ct->regs, TCG_REG_EDI);
146
break;
147
case 'q':
148
/* A register that can be used as a byte operand. */
149
- ct->ct |= TCG_CT_REG;
150
ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
151
break;
152
case 'Q':
153
/* A register with an addressable second byte (e.g. %ah). */
154
- ct->ct |= TCG_CT_REG;
155
ct->regs = 0xf;
156
break;
157
case 'r':
158
/* A general register. */
159
- ct->ct |= TCG_CT_REG;
160
ct->regs |= ALL_GENERAL_REGS;
161
break;
162
case 'W':
163
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
164
break;
165
case 'x':
166
/* A vector register. */
167
- ct->ct |= TCG_CT_REG;
168
ct->regs |= ALL_VECTOR_REGS;
169
break;
170
171
/* qemu_ld/st address constraint */
172
case 'L':
173
- ct->ct |= TCG_CT_REG;
174
ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
175
tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
176
tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
177
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
178
index XXXXXXX..XXXXXXX 100644
179
--- a/tcg/mips/tcg-target.c.inc
180
+++ b/tcg/mips/tcg-target.c.inc
181
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
182
{
183
switch(*ct_str++) {
184
case 'r':
185
- ct->ct |= TCG_CT_REG;
186
ct->regs = 0xffffffff;
187
break;
188
case 'L': /* qemu_ld input arg constraint */
189
- ct->ct |= TCG_CT_REG;
190
ct->regs = 0xffffffff;
191
tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
192
#if defined(CONFIG_SOFTMMU)
193
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
194
#endif
195
break;
196
case 'S': /* qemu_st constraint */
197
- ct->ct |= TCG_CT_REG;
198
ct->regs = 0xffffffff;
199
tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
200
#if defined(CONFIG_SOFTMMU)
201
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
202
index XXXXXXX..XXXXXXX 100644
203
--- a/tcg/ppc/tcg-target.c.inc
204
+++ b/tcg/ppc/tcg-target.c.inc
205
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
206
{
207
switch (*ct_str++) {
208
case 'A': case 'B': case 'C': case 'D':
209
- ct->ct |= TCG_CT_REG;
210
tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A');
211
break;
212
case 'r':
213
- ct->ct |= TCG_CT_REG;
214
ct->regs = 0xffffffff;
215
break;
216
case 'v':
217
- ct->ct |= TCG_CT_REG;
218
ct->regs = 0xffffffff00000000ull;
219
break;
220
case 'L': /* qemu_ld constraint */
221
- ct->ct |= TCG_CT_REG;
222
ct->regs = 0xffffffff;
223
tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
224
#ifdef CONFIG_SOFTMMU
225
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
226
#endif
227
break;
228
case 'S': /* qemu_st constraint */
229
- ct->ct |= TCG_CT_REG;
230
ct->regs = 0xffffffff;
231
tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
232
#ifdef CONFIG_SOFTMMU
233
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
234
index XXXXXXX..XXXXXXX 100644
235
--- a/tcg/riscv/tcg-target.c.inc
236
+++ b/tcg/riscv/tcg-target.c.inc
237
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
238
{
239
switch (*ct_str++) {
240
case 'r':
241
- ct->ct |= TCG_CT_REG;
242
ct->regs = 0xffffffff;
243
break;
244
case 'L':
245
/* qemu_ld/qemu_st constraint */
246
- ct->ct |= TCG_CT_REG;
247
ct->regs = 0xffffffff;
248
/* qemu_ld/qemu_st uses TCG_REG_TMP0 */
249
#if defined(CONFIG_SOFTMMU)
250
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
251
index XXXXXXX..XXXXXXX 100644
252
--- a/tcg/s390/tcg-target.c.inc
253
+++ b/tcg/s390/tcg-target.c.inc
254
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
255
{
256
switch (*ct_str++) {
257
case 'r': /* all registers */
258
- ct->ct |= TCG_CT_REG;
259
ct->regs = 0xffff;
260
break;
261
case 'L': /* qemu_ld/st constraint */
262
- ct->ct |= TCG_CT_REG;
263
ct->regs = 0xffff;
264
tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
265
tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
266
tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
267
break;
268
case 'a': /* force R2 for division */
269
- ct->ct |= TCG_CT_REG;
270
ct->regs = 0;
271
tcg_regset_set_reg(ct->regs, TCG_REG_R2);
272
break;
273
case 'b': /* force R3 for division */
274
- ct->ct |= TCG_CT_REG;
275
ct->regs = 0;
276
tcg_regset_set_reg(ct->regs, TCG_REG_R3);
277
break;
278
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
279
index XXXXXXX..XXXXXXX 100644
280
--- a/tcg/sparc/tcg-target.c.inc
281
+++ b/tcg/sparc/tcg-target.c.inc
282
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
283
{
284
switch (*ct_str++) {
285
case 'r':
286
- ct->ct |= TCG_CT_REG;
287
ct->regs = 0xffffffff;
288
break;
289
case 'R':
290
- ct->ct |= TCG_CT_REG;
291
ct->regs = ALL_64;
292
break;
293
case 'A': /* qemu_ld/st address constraint */
294
- ct->ct |= TCG_CT_REG;
295
ct->regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
296
reserve_helpers:
297
tcg_regset_reset_reg(ct->regs, TCG_REG_O0);
298
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
299
tcg_regset_reset_reg(ct->regs, TCG_REG_O2);
300
break;
301
case 's': /* qemu_st data 32-bit constraint */
302
- ct->ct |= TCG_CT_REG;
303
ct->regs = 0xffffffff;
304
goto reserve_helpers;
305
case 'S': /* qemu_st data 64-bit constraint */
306
- ct->ct |= TCG_CT_REG;
307
ct->regs = ALL_64;
308
goto reserve_helpers;
309
case 'I':
310
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
311
index XXXXXXX..XXXXXXX 100644
312
--- a/tcg/tci/tcg-target.c.inc
313
+++ b/tcg/tci/tcg-target.c.inc
314
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
315
case 'r':
316
case 'L': /* qemu_ld constraint */
317
case 'S': /* qemu_st constraint */
318
- ct->ct |= TCG_CT_REG;
319
ct->regs = BIT(TCG_TARGET_NB_REGS) - 1;
320
break;
321
default:
322
--
25
--
323
2.25.1
26
2.25.1
324
27
325
28
diff view generated by jsdifflib
1
Improve expand_vec_shi to use sign-extraction for MO_32.
1
Recognize the identity function for division.
2
This allows a single VSPLTISB instruction to load all of
3
the valid shift constants.
4
2
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
tcg/ppc/tcg-target.c.inc | 44 ++++++++++++++++++++++++----------------
8
tcg/optimize.c | 6 +++++-
8
1 file changed, 27 insertions(+), 17 deletions(-)
9
1 file changed, 5 insertions(+), 1 deletion(-)
9
10
10
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.c.inc
13
--- a/tcg/optimize.c
13
+++ b/tcg/ppc/tcg-target.c.inc
14
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
15
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
15
static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
16
16
TCGv_vec v1, TCGArg imm, TCGOpcode opci)
17
static bool fold_divide(OptContext *ctx, TCGOp *op)
17
{
18
{
18
- TCGv_vec t1 = tcg_temp_new_vec(type);
19
- return fold_const2(ctx, op);
19
+ TCGv_vec t1;
20
+ if (fold_const2(ctx, op) ||
20
21
+ fold_xi_to_x(ctx, op, 1)) {
21
- /* Splat w/bytes for xxspltib. */
22
+ return true;
22
- tcg_gen_dupi_vec(MO_8, t1, imm & ((8 << vece) - 1));
23
+ if (vece == MO_32) {
24
+ /*
25
+ * Only 5 bits are significant, and VSPLTISB can represent -16..15.
26
+ * So using negative numbers gets us the 4th bit easily.
27
+ */
28
+ imm = sextract32(imm, 0, 5);
29
+ } else {
30
+ imm &= (8 << vece) - 1;
31
+ }
23
+ }
32
+
24
+ return false;
33
+ /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
34
+ t1 = tcg_constant_vec(type, MO_8, imm);
35
vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
36
tcgv_vec_arg(v1), tcgv_vec_arg(t1));
37
- tcg_temp_free_vec(t1);
38
}
25
}
39
26
40
static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
27
static bool fold_dup(OptContext *ctx, TCGOp *op)
41
@@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
42
{
43
TCGv_vec t1 = tcg_temp_new_vec(type);
44
TCGv_vec t2 = tcg_temp_new_vec(type);
45
- TCGv_vec t3, t4;
46
+ TCGv_vec c0, c16;
47
48
switch (vece) {
49
case MO_8:
50
@@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
51
52
case MO_32:
53
tcg_debug_assert(!have_isa_2_07);
54
- t3 = tcg_temp_new_vec(type);
55
- t4 = tcg_temp_new_vec(type);
56
- tcg_gen_dupi_vec(MO_8, t4, -16);
57
+ /*
58
+ * Only 5 bits are significant, and VSPLTISB can represent -16..15.
59
+ * So using -16 is a quick way to represent 16.
60
+ */
61
+ c16 = tcg_constant_vec(type, MO_8, -16);
62
+ c0 = tcg_constant_vec(type, MO_8, 0);
63
+
64
vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
65
- tcgv_vec_arg(v2), tcgv_vec_arg(t4));
66
+ tcgv_vec_arg(v2), tcgv_vec_arg(c16));
67
vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
68
tcgv_vec_arg(v1), tcgv_vec_arg(v2));
69
- tcg_gen_dupi_vec(MO_8, t3, 0);
70
- vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t3),
71
- tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(t3));
72
- vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t3),
73
- tcgv_vec_arg(t3), tcgv_vec_arg(t4));
74
- tcg_gen_add_vec(MO_32, v0, t2, t3);
75
- tcg_temp_free_vec(t3);
76
- tcg_temp_free_vec(t4);
77
+ vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
78
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
79
+ vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
80
+ tcgv_vec_arg(t1), tcgv_vec_arg(c16));
81
+ tcg_gen_add_vec(MO_32, v0, t1, t2);
82
break;
83
84
default:
85
--
28
--
86
2.25.1
29
2.25.1
87
30
88
31
diff view generated by jsdifflib
1
The definition of INDEX_op_dupi_vec is that it operates on
1
Recognize the constant function for remainder.
2
units of tcg_target_ulong -- in this case 32 bits. It does
3
not work to use this for a uint64_t value that happens to be
4
small enough to fit in tcg_target_ulong.
5
2
6
Fixes: d2fd745fe8b
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
7
Fixes: db432672dc5
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Cc: qemu-stable@nongnu.org
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
6
---
11
tcg/tcg-op-vec.c | 12 ++++++++----
7
tcg/optimize.c | 6 +++++-
12
1 file changed, 8 insertions(+), 4 deletions(-)
8
1 file changed, 5 insertions(+), 1 deletion(-)
13
9
14
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/tcg-op-vec.c
12
--- a/tcg/optimize.c
17
+++ b/tcg/tcg-op-vec.c
13
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
19
15
20
void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
16
static bool fold_remainder(OptContext *ctx, TCGOp *op)
21
{
17
{
22
- if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) {
18
- return fold_const2(ctx, op);
23
- do_dupi_vec(r, MO_32, a);
19
+ if (fold_const2(ctx, op) ||
24
- } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) {
20
+ fold_xx_to_i(ctx, op, 0)) {
25
+ if (TCG_TARGET_REG_BITS == 64) {
21
+ return true;
26
do_dupi_vec(r, MO_64, a);
27
+ } else if (a == dup_const(MO_32, a)) {
28
+ do_dupi_vec(r, MO_32, a);
29
} else {
30
TCGv_i64 c = tcg_const_i64(a);
31
tcg_gen_dup_i64_vec(MO_64, r, c);
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
33
34
void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
35
{
36
- do_dupi_vec(r, MO_REG, dup_const(vece, a));
37
+ if (vece == MO_64) {
38
+ tcg_gen_dup64i_vec(r, a);
39
+ } else {
40
+ do_dupi_vec(r, MO_REG, dup_const(vece, a));
41
+ }
22
+ }
23
+ return false;
42
}
24
}
43
25
44
void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
26
static bool fold_setcond(OptContext *ctx, TCGOp *op)
45
--
27
--
46
2.25.1
28
2.25.1
47
29
48
30
diff view generated by jsdifflib
1
The union is unused; let "regs" appear in the main structure
1
Certain targets, like riscv, produce signed 32-bit results.
2
without the "u.regs" wrapping.
2
This can lead to lots of redundant extensions as values are
3
3
manipulated.
4
5
Begin by tracking only the obvious sign-extensions, and
6
converting them to simple copies when possible.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
11
---
6
include/tcg/tcg.h | 4 +---
12
tcg/optimize.c | 123 ++++++++++++++++++++++++++++++++++++++++---------
7
tcg/tcg.c | 22 +++++++++++-----------
13
1 file changed, 102 insertions(+), 21 deletions(-)
8
tcg/aarch64/tcg-target.c.inc | 14 +++++++-------
14
9
tcg/arm/tcg-target.c.inc | 26 +++++++++++++-------------
15
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
tcg/i386/tcg-target.c.inc | 26 +++++++++++++-------------
11
tcg/mips/tcg-target.c.inc | 18 +++++++++---------
12
tcg/ppc/tcg-target.c.inc | 24 ++++++++++++------------
13
tcg/riscv/tcg-target.c.inc | 14 +++++++-------
14
tcg/s390/tcg-target.c.inc | 18 +++++++++---------
15
tcg/sparc/tcg-target.c.inc | 16 ++++++++--------
16
tcg/tci/tcg-target.c.inc | 2 +-
17
11 files changed, 91 insertions(+), 93 deletions(-)
18
19
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
20
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
21
--- a/include/tcg/tcg.h
17
--- a/tcg/optimize.c
22
+++ b/include/tcg/tcg.h
18
+++ b/tcg/optimize.c
23
@@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(void);
19
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
24
typedef struct TCGArgConstraint {
20
TCGTemp *next_copy;
25
uint16_t ct;
21
uint64_t val;
26
uint8_t alias_index;
22
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
27
- union {
23
+ uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
28
- TCGRegSet regs;
24
} TempOptInfo;
29
- } u;
25
30
+ TCGRegSet regs;
26
typedef struct OptContext {
31
} TCGArgConstraint;
27
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
32
28
/* In flight values from optimization. */
33
#define TCG_MAX_OP_ARGS 16
29
uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
34
diff --git a/tcg/tcg.c b/tcg/tcg.c
30
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
35
index XXXXXXX..XXXXXXX 100644
31
+ uint64_t s_mask; /* mask of clrsb(value) bits */
36
--- a/tcg/tcg.c
32
TCGType type;
37
+++ b/tcg/tcg.c
33
} OptContext;
38
@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
34
39
return 0;
35
+/* Calculate the smask for a specific value. */
40
n = 0;
36
+static uint64_t smask_from_value(uint64_t value)
41
for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
37
+{
42
- if (tcg_regset_test_reg(arg_ct->u.regs, i))
38
+ int rep = clrsb64(value);
43
+ if (tcg_regset_test_reg(arg_ct->regs, i))
39
+ return ~(~0ull >> rep);
44
n++;
40
+}
41
+
42
+/*
43
+ * Calculate the smask for a given set of known-zeros.
44
+ * If there are lots of zeros on the left, we can consider the remainder
45
+ * an unsigned field, and thus the corresponding signed field is one bit
46
+ * larger.
47
+ */
48
+static uint64_t smask_from_zmask(uint64_t zmask)
49
+{
50
+ /*
51
+ * Only the 0 bits are significant for zmask, thus the msb itself
52
+ * must be zero, else we have no sign information.
53
+ */
54
+ int rep = clz64(zmask);
55
+ if (rep == 0) {
56
+ return 0;
57
+ }
58
+ rep -= 1;
59
+ return ~(~0ull >> rep);
60
+}
61
+
62
static inline TempOptInfo *ts_info(TCGTemp *ts)
63
{
64
return ts->state_ptr;
65
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
66
ti->prev_copy = ts;
67
ti->is_const = false;
68
ti->z_mask = -1;
69
+ ti->s_mask = 0;
70
}
71
72
static void reset_temp(TCGArg arg)
73
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
74
ti->is_const = true;
75
ti->val = ts->val;
76
ti->z_mask = ts->val;
77
+ ti->s_mask = smask_from_value(ts->val);
78
} else {
79
ti->is_const = false;
80
ti->z_mask = -1;
81
+ ti->s_mask = 0;
82
}
83
}
84
85
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
86
op->args[1] = src;
87
88
di->z_mask = si->z_mask;
89
+ di->s_mask = si->s_mask;
90
91
if (src_ts->type == dst_ts->type) {
92
TempOptInfo *ni = ts_info(si->next_copy);
93
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
94
95
nb_oargs = def->nb_oargs;
96
for (i = 0; i < nb_oargs; i++) {
97
- reset_temp(op->args[i]);
98
+ TCGTemp *ts = arg_temp(op->args[i]);
99
+ reset_ts(ts);
100
/*
101
- * Save the corresponding known-zero bits mask for the
102
+ * Save the corresponding known-zero/sign bits mask for the
103
* first output argument (only one supported so far).
104
*/
105
if (i == 0) {
106
- arg_info(op->args[i])->z_mask = ctx->z_mask;
107
+ ts_info(ts)->z_mask = ctx->z_mask;
108
+ ts_info(ts)->s_mask = ctx->s_mask;
45
}
109
}
46
}
110
}
47
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
111
}
48
/* Incomplete TCGTargetOpDef entry. */
112
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
49
tcg_debug_assert(ct_str != NULL);
113
{
50
114
uint64_t a_mask = ctx->a_mask;
51
- def->args_ct[i].u.regs = 0;
115
uint64_t z_mask = ctx->z_mask;
52
+ def->args_ct[i].regs = 0;
116
+ uint64_t s_mask = ctx->s_mask;
53
def->args_ct[i].ct = 0;
117
54
while (*ct_str != '\0') {
118
/*
55
switch(*ct_str) {
119
* 32-bit ops generate 32-bit results, which for the purpose of
56
@@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s)
120
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
57
pset = la_temp_pref(ts);
121
if (ctx->type == TCG_TYPE_I32) {
58
set = *pset;
122
a_mask = (int32_t)a_mask;
59
123
z_mask = (int32_t)z_mask;
60
- set &= ct->u.regs;
124
+ s_mask |= MAKE_64BIT_MASK(32, 32);
61
+ set &= ct->regs;
125
ctx->z_mask = z_mask;
62
if (ct->ct & TCG_CT_IALIAS) {
126
+ ctx->s_mask = s_mask;
63
set &= op->output_pref[ct->alias_index];
127
}
64
}
128
65
/* If the combination is not possible, restart. */
129
if (z_mask == 0) {
66
if (set == 0) {
130
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
67
- set = ct->u.regs;
131
68
+ set = ct->regs;
132
static bool fold_bswap(OptContext *ctx, TCGOp *op)
69
}
133
{
70
*pset = set;
134
- uint64_t z_mask, sign;
71
}
135
+ uint64_t z_mask, s_mask, sign;
72
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
136
73
return;
137
if (arg_is_const(op->args[1])) {
74
}
138
uint64_t t = arg_info(op->args[1])->val;
75
139
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
76
- dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
140
}
77
- dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
141
78
+ dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
142
z_mask = arg_info(op->args[1])->z_mask;
79
+ dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
143
+
80
144
switch (op->opc) {
81
/* Allocate the output register now. */
145
case INDEX_op_bswap16_i32:
82
if (ots->val_type != TEMP_VAL_REG) {
146
case INDEX_op_bswap16_i64:
83
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
147
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
84
}
148
default:
149
g_assert_not_reached();
150
}
151
+ s_mask = smask_from_zmask(z_mask);
152
153
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
154
case TCG_BSWAP_OZ:
155
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
156
/* If the sign bit may be 1, force all the bits above to 1. */
157
if (z_mask & sign) {
158
z_mask |= sign;
159
+ s_mask = sign << 1;
85
}
160
}
86
161
break;
87
- temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
162
default:
88
+ temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
163
/* The high bits are undefined: force all bits above the sign to 1. */
89
reg = ts->reg;
164
z_mask |= sign << 1;
90
165
+ s_mask = 0;
91
- if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
166
break;
92
+ if (tcg_regset_test_reg(arg_ct->regs, reg)) {
167
}
93
/* nothing to do : the constraint is satisfied */
168
ctx->z_mask = z_mask;
94
} else {
169
+ ctx->s_mask = s_mask;
95
allocate_in_reg:
170
96
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
171
return fold_masks(ctx, op);
97
and move the temporary register into it */
172
}
98
temp_load(s, ts, tcg_target_available_regs[ts->type],
173
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
99
i_allocated_regs, 0);
174
static bool fold_extract(OptContext *ctx, TCGOp *op)
100
- reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
175
{
101
+ reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
176
uint64_t z_mask_old, z_mask;
102
o_preferred_regs, ts->indirect_base);
177
+ int pos = op->args[2];
103
if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
178
+ int len = op->args[3];
104
/*
179
105
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
180
if (arg_is_const(op->args[1])) {
106
&& !const_args[arg_ct->alias_index]) {
181
uint64_t t;
107
reg = new_args[arg_ct->alias_index];
182
108
} else if (arg_ct->ct & TCG_CT_NEWREG) {
183
t = arg_info(op->args[1])->val;
109
- reg = tcg_reg_alloc(s, arg_ct->u.regs,
184
- t = extract64(t, op->args[2], op->args[3]);
110
+ reg = tcg_reg_alloc(s, arg_ct->regs,
185
+ t = extract64(t, pos, len);
111
i_allocated_regs | o_allocated_regs,
186
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
112
op->output_pref[k], ts->indirect_base);
187
}
113
} else {
188
114
- reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
189
z_mask_old = arg_info(op->args[1])->z_mask;
115
+ reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
190
- z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
116
op->output_pref[k], ts->indirect_base);
191
- if (op->args[2] == 0) {
117
}
192
+ z_mask = extract64(z_mask_old, pos, len);
118
tcg_regset_set_reg(o_allocated_regs, reg);
193
+ if (pos == 0) {
119
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
194
ctx->a_mask = z_mask_old ^ z_mask;
120
index XXXXXXX..XXXXXXX 100644
195
}
121
--- a/tcg/aarch64/tcg-target.c.inc
196
ctx->z_mask = z_mask;
122
+++ b/tcg/aarch64/tcg-target.c.inc
197
+ ctx->s_mask = smask_from_zmask(z_mask);
123
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
198
124
switch (*ct_str++) {
199
return fold_masks(ctx, op);
125
case 'r': /* general registers */
200
}
126
ct->ct |= TCG_CT_REG;
201
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
127
- ct->u.regs |= 0xffffffffu;
202
128
+ ct->regs |= 0xffffffffu;
203
static bool fold_exts(OptContext *ctx, TCGOp *op)
129
break;
204
{
130
case 'w': /* advsimd registers */
205
- uint64_t z_mask_old, z_mask, sign;
131
ct->ct |= TCG_CT_REG;
206
+ uint64_t s_mask_old, s_mask, z_mask, sign;
132
- ct->u.regs |= 0xffffffff00000000ull;
207
bool type_change = false;
133
+ ct->regs |= 0xffffffff00000000ull;
208
134
break;
209
if (fold_const1(ctx, op)) {
135
case 'l': /* qemu_ld / qemu_st address, data_reg */
210
return true;
136
ct->ct |= TCG_CT_REG;
211
}
137
- ct->u.regs = 0xffffffffu;
212
138
+ ct->regs = 0xffffffffu;
213
- z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
139
#ifdef CONFIG_SOFTMMU
214
+ z_mask = arg_info(op->args[1])->z_mask;
140
/* x0 and x1 will be overwritten when reading the tlb entry,
215
+ s_mask = arg_info(op->args[1])->s_mask;
141
and x2, and x3 for helper args, better to avoid using them. */
216
+ s_mask_old = s_mask;
142
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
217
143
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
218
switch (op->opc) {
144
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
219
CASE_OP_32_64(ext8s):
145
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
220
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
146
+ tcg_regset_reset_reg(ct->regs, TCG_REG_X0);
221
147
+ tcg_regset_reset_reg(ct->regs, TCG_REG_X1);
222
if (z_mask & sign) {
148
+ tcg_regset_reset_reg(ct->regs, TCG_REG_X2);
223
z_mask |= sign;
149
+ tcg_regset_reset_reg(ct->regs, TCG_REG_X3);
224
- } else if (!type_change) {
150
#endif
225
- ctx->a_mask = z_mask_old ^ z_mask;
151
break;
226
}
152
case 'A': /* Valid for arithmetic immediate (positive or negative). */
227
+ s_mask |= sign << 1;
153
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
228
+
154
index XXXXXXX..XXXXXXX 100644
229
ctx->z_mask = z_mask;
155
--- a/tcg/arm/tcg-target.c.inc
230
+ ctx->s_mask = s_mask;
156
+++ b/tcg/arm/tcg-target.c.inc
231
+ if (!type_change) {
157
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
232
+ ctx->a_mask = s_mask & ~s_mask_old;
158
233
+ }
159
case 'r':
234
160
ct->ct |= TCG_CT_REG;
235
return fold_masks(ctx, op);
161
- ct->u.regs = 0xffff;
236
}
162
+ ct->regs = 0xffff;
237
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
163
break;
238
}
164
239
165
/* qemu_ld address */
240
ctx->z_mask = z_mask;
166
case 'l':
241
+ ctx->s_mask = smask_from_zmask(z_mask);
167
ct->ct |= TCG_CT_REG;
242
if (!type_change) {
168
- ct->u.regs = 0xffff;
243
ctx->a_mask = z_mask_old ^ z_mask;
169
+ ct->regs = 0xffff;
244
}
170
#ifdef CONFIG_SOFTMMU
245
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
171
/* r0-r2,lr will be overwritten when reading the tlb entry,
246
MemOp mop = get_memop(oi);
172
so don't use these. */
247
int width = 8 * memop_size(mop);
173
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
248
174
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
249
- if (!(mop & MO_SIGN) && width < 64) {
175
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
250
- ctx->z_mask = MAKE_64BIT_MASK(0, width);
176
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
251
+ if (width < 64) {
177
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
252
+ ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
178
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R0);
253
+ if (!(mop & MO_SIGN)) {
179
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R1);
254
+ ctx->z_mask = MAKE_64BIT_MASK(0, width);
180
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
255
+ ctx->s_mask <<= 1;
181
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
256
+ }
182
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R14);
257
}
183
#endif
258
184
break;
259
/* Opcodes that touch guest memory stop the mb optimization. */
185
260
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
186
/* qemu_st address & data */
261
187
case 's':
262
static bool fold_sextract(OptContext *ctx, TCGOp *op)
188
ct->ct |= TCG_CT_REG;
263
{
189
- ct->u.regs = 0xffff;
264
- int64_t z_mask_old, z_mask;
190
+ ct->regs = 0xffff;
265
+ uint64_t z_mask, s_mask, s_mask_old;
191
/* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
266
+ int pos = op->args[2];
192
and r0-r1 doing the byte swapping, so don't use these. */
267
+ int len = op->args[3];
193
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
268
194
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
269
if (arg_is_const(op->args[1])) {
195
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R0);
270
uint64_t t;
196
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R1);
271
197
#if defined(CONFIG_SOFTMMU)
272
t = arg_info(op->args[1])->val;
198
/* Avoid clashes with registers being used for helper args */
273
- t = sextract64(t, op->args[2], op->args[3]);
199
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
274
+ t = sextract64(t, pos, len);
200
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
275
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
201
#if TARGET_LONG_BITS == 64
276
}
202
/* Avoid clashes with registers being used for helper args */
277
203
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
278
- z_mask_old = arg_info(op->args[1])->z_mask;
204
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
279
- z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
205
#endif
280
- if (op->args[2] == 0 && z_mask >= 0) {
206
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
281
- ctx->a_mask = z_mask_old ^ z_mask;
207
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R14);
282
- }
208
#endif
283
+ z_mask = arg_info(op->args[1])->z_mask;
209
break;
284
+ z_mask = sextract64(z_mask, pos, len);
210
285
ctx->z_mask = z_mask;
211
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
286
212
index XXXXXXX..XXXXXXX 100644
287
+ s_mask_old = arg_info(op->args[1])->s_mask;
213
--- a/tcg/i386/tcg-target.c.inc
288
+ s_mask = sextract64(s_mask_old, pos, len);
214
+++ b/tcg/i386/tcg-target.c.inc
289
+ s_mask |= MAKE_64BIT_MASK(len, 64 - len);
215
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
290
+ ctx->s_mask = s_mask;
216
switch(*ct_str++) {
291
+
217
case 'a':
292
+ if (pos == 0) {
218
ct->ct |= TCG_CT_REG;
293
+ ctx->a_mask = s_mask & ~s_mask_old;
219
- tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
294
+ }
220
+ tcg_regset_set_reg(ct->regs, TCG_REG_EAX);
295
+
221
break;
296
return fold_masks(ctx, op);
222
case 'b':
297
}
223
ct->ct |= TCG_CT_REG;
298
224
- tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
299
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
225
+ tcg_regset_set_reg(ct->regs, TCG_REG_EBX);
300
{
226
break;
301
/* We can't do any folding with a load, but we can record bits. */
227
case 'c':
302
switch (op->opc) {
228
ct->ct |= TCG_CT_REG;
303
+ CASE_OP_32_64(ld8s):
229
- tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
304
+ ctx->s_mask = MAKE_64BIT_MASK(8, 56);
230
+ tcg_regset_set_reg(ct->regs, TCG_REG_ECX);
305
+ break;
231
break;
306
CASE_OP_32_64(ld8u):
232
case 'd':
307
ctx->z_mask = MAKE_64BIT_MASK(0, 8);
233
ct->ct |= TCG_CT_REG;
308
+ ctx->s_mask = MAKE_64BIT_MASK(9, 55);
234
- tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
309
+ break;
235
+ tcg_regset_set_reg(ct->regs, TCG_REG_EDX);
310
+ CASE_OP_32_64(ld16s):
236
break;
311
+ ctx->s_mask = MAKE_64BIT_MASK(16, 48);
237
case 'S':
312
break;
238
ct->ct |= TCG_CT_REG;
313
CASE_OP_32_64(ld16u):
239
- tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
314
ctx->z_mask = MAKE_64BIT_MASK(0, 16);
240
+ tcg_regset_set_reg(ct->regs, TCG_REG_ESI);
315
+ ctx->s_mask = MAKE_64BIT_MASK(17, 47);
241
break;
316
+ break;
242
case 'D':
317
+ case INDEX_op_ld32s_i64:
243
ct->ct |= TCG_CT_REG;
318
+ ctx->s_mask = MAKE_64BIT_MASK(32, 32);
244
- tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
319
break;
245
+ tcg_regset_set_reg(ct->regs, TCG_REG_EDI);
320
case INDEX_op_ld32u_i64:
246
break;
321
ctx->z_mask = MAKE_64BIT_MASK(0, 32);
247
case 'q':
322
+ ctx->s_mask = MAKE_64BIT_MASK(33, 31);
248
/* A register that can be used as a byte operand. */
323
break;
249
ct->ct |= TCG_CT_REG;
324
default:
250
- ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
325
g_assert_not_reached();
251
+ ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
326
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
252
break;
327
ctx.type = TCG_TYPE_I32;
253
case 'Q':
254
/* A register with an addressable second byte (e.g. %ah). */
255
ct->ct |= TCG_CT_REG;
256
- ct->u.regs = 0xf;
257
+ ct->regs = 0xf;
258
break;
259
case 'r':
260
/* A general register. */
261
ct->ct |= TCG_CT_REG;
262
- ct->u.regs |= ALL_GENERAL_REGS;
263
+ ct->regs |= ALL_GENERAL_REGS;
264
break;
265
case 'W':
266
/* With TZCNT/LZCNT, we can have operand-size as an input. */
267
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
268
case 'x':
269
/* A vector register. */
270
ct->ct |= TCG_CT_REG;
271
- ct->u.regs |= ALL_VECTOR_REGS;
272
+ ct->regs |= ALL_VECTOR_REGS;
273
break;
274
275
/* qemu_ld/st address constraint */
276
case 'L':
277
ct->ct |= TCG_CT_REG;
278
- ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
279
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
280
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
281
+ ct->regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
282
+ tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
283
+ tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
284
break;
285
286
case 'e':
287
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
288
index XXXXXXX..XXXXXXX 100644
289
--- a/tcg/mips/tcg-target.c.inc
290
+++ b/tcg/mips/tcg-target.c.inc
291
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
292
switch(*ct_str++) {
293
case 'r':
294
ct->ct |= TCG_CT_REG;
295
- ct->u.regs = 0xffffffff;
296
+ ct->regs = 0xffffffff;
297
break;
298
case 'L': /* qemu_ld input arg constraint */
299
ct->ct |= TCG_CT_REG;
300
- ct->u.regs = 0xffffffff;
301
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
302
+ ct->regs = 0xffffffff;
303
+ tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
304
#if defined(CONFIG_SOFTMMU)
305
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
306
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
307
+ tcg_regset_reset_reg(ct->regs, TCG_REG_A2);
308
}
328
}
309
#endif
329
310
break;
330
- /* Assume all bits affected, and no bits known zero. */
311
case 'S': /* qemu_st constraint */
331
+ /* Assume all bits affected, no bits known zero, no sign reps. */
312
ct->ct |= TCG_CT_REG;
332
ctx.a_mask = -1;
313
- ct->u.regs = 0xffffffff;
333
ctx.z_mask = -1;
314
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
334
+ ctx.s_mask = 0;
315
+ ct->regs = 0xffffffff;
335
316
+ tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
336
/*
317
#if defined(CONFIG_SOFTMMU)
337
* Process each opcode.
318
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
338
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
319
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
339
case INDEX_op_extrh_i64_i32:
320
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3);
340
done = fold_extu(&ctx, op);
321
+ tcg_regset_reset_reg(ct->regs, TCG_REG_A2);
341
break;
322
+ tcg_regset_reset_reg(ct->regs, TCG_REG_A3);
342
+ CASE_OP_32_64(ld8s):
323
} else {
343
CASE_OP_32_64(ld8u):
324
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
344
+ CASE_OP_32_64(ld16s):
325
+ tcg_regset_reset_reg(ct->regs, TCG_REG_A1);
345
CASE_OP_32_64(ld16u):
326
}
346
+ case INDEX_op_ld32s_i64:
327
#endif
347
case INDEX_op_ld32u_i64:
328
break;
348
done = fold_tcg_ld(&ctx, op);
329
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
349
break;
330
index XXXXXXX..XXXXXXX 100644
331
--- a/tcg/ppc/tcg-target.c.inc
332
+++ b/tcg/ppc/tcg-target.c.inc
333
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
334
switch (*ct_str++) {
335
case 'A': case 'B': case 'C': case 'D':
336
ct->ct |= TCG_CT_REG;
337
- tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A');
338
+ tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A');
339
break;
340
case 'r':
341
ct->ct |= TCG_CT_REG;
342
- ct->u.regs = 0xffffffff;
343
+ ct->regs = 0xffffffff;
344
break;
345
case 'v':
346
ct->ct |= TCG_CT_REG;
347
- ct->u.regs = 0xffffffff00000000ull;
348
+ ct->regs = 0xffffffff00000000ull;
349
break;
350
case 'L': /* qemu_ld constraint */
351
ct->ct |= TCG_CT_REG;
352
- ct->u.regs = 0xffffffff;
353
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
354
+ ct->regs = 0xffffffff;
355
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
356
#ifdef CONFIG_SOFTMMU
357
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
358
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
359
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
360
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
361
#endif
362
break;
363
case 'S': /* qemu_st constraint */
364
ct->ct |= TCG_CT_REG;
365
- ct->u.regs = 0xffffffff;
366
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
367
+ ct->regs = 0xffffffff;
368
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
369
#ifdef CONFIG_SOFTMMU
370
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
371
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
372
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
373
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
374
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
375
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R6);
376
#endif
377
break;
378
case 'I':
379
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
380
index XXXXXXX..XXXXXXX 100644
381
--- a/tcg/riscv/tcg-target.c.inc
382
+++ b/tcg/riscv/tcg-target.c.inc
383
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
384
switch (*ct_str++) {
385
case 'r':
386
ct->ct |= TCG_CT_REG;
387
- ct->u.regs = 0xffffffff;
388
+ ct->regs = 0xffffffff;
389
break;
390
case 'L':
391
/* qemu_ld/qemu_st constraint */
392
ct->ct |= TCG_CT_REG;
393
- ct->u.regs = 0xffffffff;
394
+ ct->regs = 0xffffffff;
395
/* qemu_ld/qemu_st uses TCG_REG_TMP0 */
396
#if defined(CONFIG_SOFTMMU)
397
- tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
398
- tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
399
- tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
400
- tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[3]);
401
- tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[4]);
402
+ tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[0]);
403
+ tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[1]);
404
+ tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[2]);
405
+ tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[3]);
406
+ tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[4]);
407
#endif
408
break;
409
case 'I':
410
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
411
index XXXXXXX..XXXXXXX 100644
412
--- a/tcg/s390/tcg-target.c.inc
413
+++ b/tcg/s390/tcg-target.c.inc
414
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
415
switch (*ct_str++) {
416
case 'r': /* all registers */
417
ct->ct |= TCG_CT_REG;
418
- ct->u.regs = 0xffff;
419
+ ct->regs = 0xffff;
420
break;
421
case 'L': /* qemu_ld/st constraint */
422
ct->ct |= TCG_CT_REG;
423
- ct->u.regs = 0xffff;
424
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
425
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
426
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
427
+ ct->regs = 0xffff;
428
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
429
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
430
+ tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
431
break;
432
case 'a': /* force R2 for division */
433
ct->ct |= TCG_CT_REG;
434
- ct->u.regs = 0;
435
- tcg_regset_set_reg(ct->u.regs, TCG_REG_R2);
436
+ ct->regs = 0;
437
+ tcg_regset_set_reg(ct->regs, TCG_REG_R2);
438
break;
439
case 'b': /* force R3 for division */
440
ct->ct |= TCG_CT_REG;
441
- ct->u.regs = 0;
442
- tcg_regset_set_reg(ct->u.regs, TCG_REG_R3);
443
+ ct->regs = 0;
444
+ tcg_regset_set_reg(ct->regs, TCG_REG_R3);
445
break;
446
case 'A':
447
ct->ct |= TCG_CT_CONST_S33;
448
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
449
index XXXXXXX..XXXXXXX 100644
450
--- a/tcg/sparc/tcg-target.c.inc
451
+++ b/tcg/sparc/tcg-target.c.inc
452
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
453
switch (*ct_str++) {
454
case 'r':
455
ct->ct |= TCG_CT_REG;
456
- ct->u.regs = 0xffffffff;
457
+ ct->regs = 0xffffffff;
458
break;
459
case 'R':
460
ct->ct |= TCG_CT_REG;
461
- ct->u.regs = ALL_64;
462
+ ct->regs = ALL_64;
463
break;
464
case 'A': /* qemu_ld/st address constraint */
465
ct->ct |= TCG_CT_REG;
466
- ct->u.regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
467
+ ct->regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
468
reserve_helpers:
469
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
470
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
471
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
472
+ tcg_regset_reset_reg(ct->regs, TCG_REG_O0);
473
+ tcg_regset_reset_reg(ct->regs, TCG_REG_O1);
474
+ tcg_regset_reset_reg(ct->regs, TCG_REG_O2);
475
break;
476
case 's': /* qemu_st data 32-bit constraint */
477
ct->ct |= TCG_CT_REG;
478
- ct->u.regs = 0xffffffff;
479
+ ct->regs = 0xffffffff;
480
goto reserve_helpers;
481
case 'S': /* qemu_st data 64-bit constraint */
482
ct->ct |= TCG_CT_REG;
483
- ct->u.regs = ALL_64;
484
+ ct->regs = ALL_64;
485
goto reserve_helpers;
486
case 'I':
487
ct->ct |= TCG_CT_CONST_S11;
488
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
489
index XXXXXXX..XXXXXXX 100644
490
--- a/tcg/tci/tcg-target.c.inc
491
+++ b/tcg/tci/tcg-target.c.inc
492
@@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
493
case 'L': /* qemu_ld constraint */
494
case 'S': /* qemu_st constraint */
495
ct->ct |= TCG_CT_REG;
496
- ct->u.regs = BIT(TCG_TARGET_NB_REGS) - 1;
497
+ ct->regs = BIT(TCG_TARGET_NB_REGS) - 1;
498
break;
499
default:
500
return NULL;
501
--
350
--
502
2.25.1
351
2.25.1
503
352
504
353
diff view generated by jsdifflib
1
Because we now store uint64_t in TCGTemp, we can now always
1
Sign repetitions are perforce all identical, whether they are 1 or 0.
2
store the full 64-bit duplicate immediate. So remove the
2
Bitwise operations preserve the relative quantity of the repetitions.
3
difference between 32- and 64-bit hosts.
4
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
8
---
7
tcg/optimize.c | 9 ++++-----
9
tcg/optimize.c | 29 +++++++++++++++++++++++++++++
8
tcg/tcg-op-vec.c | 39 ++++++++++-----------------------------
10
1 file changed, 29 insertions(+)
9
tcg/tcg.c | 7 +------
10
3 files changed, 15 insertions(+), 40 deletions(-)
11
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
16
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
17
case INDEX_op_dup2_vec:
17
z2 = arg_info(op->args[2])->z_mask;
18
assert(TCG_TARGET_REG_BITS == 32);
18
ctx->z_mask = z1 & z2;
19
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
19
20
- tmp = arg_info(op->args[1])->val;
20
+ /*
21
- if (tmp == arg_info(op->args[2])->val) {
21
+ * Sign repetitions are perforce all identical, whether they are 1 or 0.
22
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
22
+ * Bitwise operations preserve the relative quantity of the repetitions.
23
- break;
23
+ */
24
- }
24
+ ctx->s_mask = arg_info(op->args[1])->s_mask
25
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0],
25
+ & arg_info(op->args[2])->s_mask;
26
+ deposit64(arg_info(op->args[1])->val, 32, 32,
26
+
27
+ arg_info(op->args[2])->val));
27
/*
28
+ break;
28
* Known-zeros does not imply known-ones. Therefore unless
29
} else if (args_are_copies(op->args[1], op->args[2])) {
29
* arg2 is constant, we can't infer affected bits from it.
30
op->opc = INDEX_op_dup_vec;
30
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
31
TCGOP_VECE(op) = MO_32;
32
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/tcg-op-vec.c
35
+++ b/tcg/tcg-op-vec.c
36
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
37
}
31
}
32
ctx->z_mask = z1;
33
34
+ ctx->s_mask = arg_info(op->args[1])->s_mask
35
+ & arg_info(op->args[2])->s_mask;
36
return fold_masks(ctx, op);
38
}
37
}
39
38
40
-#define MO_REG (TCG_TARGET_REG_BITS == 64 ? MO_64 : MO_32)
39
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
41
-
40
fold_xi_to_not(ctx, op, 0)) {
42
-static void do_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a)
41
return true;
43
-{
42
}
44
- TCGTemp *rt = tcgv_vec_temp(r);
43
+
45
- vec_gen_2(INDEX_op_dupi_vec, rt->base_type, vece, temp_arg(rt), a);
44
+ ctx->s_mask = arg_info(op->args[1])->s_mask
46
-}
45
+ & arg_info(op->args[2])->s_mask;
47
-
46
return false;
48
TCGv_vec tcg_const_zeros_vec(TCGType type)
49
{
50
TCGv_vec ret = tcg_temp_new_vec(type);
51
- do_dupi_vec(ret, MO_REG, 0);
52
+ tcg_gen_dupi_vec(MO_64, ret, 0);
53
return ret;
54
}
47
}
55
48
56
TCGv_vec tcg_const_ones_vec(TCGType type)
49
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
57
{
50
58
TCGv_vec ret = tcg_temp_new_vec(type);
51
ctx->z_mask = arg_info(op->args[3])->z_mask
59
- do_dupi_vec(ret, MO_REG, -1);
52
| arg_info(op->args[4])->z_mask;
60
+ tcg_gen_dupi_vec(MO_64, ret, -1);
53
+ ctx->s_mask = arg_info(op->args[3])->s_mask
61
return ret;
54
+ & arg_info(op->args[4])->s_mask;
55
56
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
57
uint64_t tv = arg_info(op->args[3])->val;
58
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
59
fold_xi_to_not(ctx, op, -1)) {
60
return true;
61
}
62
+
63
+ ctx->s_mask = arg_info(op->args[1])->s_mask
64
+ & arg_info(op->args[2])->s_mask;
65
return false;
62
}
66
}
63
67
64
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
68
@@ -XXX,XX +XXX,XX @@ static bool fold_nor(OptContext *ctx, TCGOp *op)
65
69
fold_xi_to_not(ctx, op, 0)) {
66
void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
70
return true;
67
{
71
}
68
- if (TCG_TARGET_REG_BITS == 64) {
72
+
69
- do_dupi_vec(r, MO_64, a);
73
+ ctx->s_mask = arg_info(op->args[1])->s_mask
70
- } else if (a == dup_const(MO_32, a)) {
74
+ & arg_info(op->args[2])->s_mask;
71
- do_dupi_vec(r, MO_32, a);
75
return false;
72
- } else {
73
- TCGv_i64 c = tcg_const_i64(a);
74
- tcg_gen_dup_i64_vec(MO_64, r, c);
75
- tcg_temp_free_i64(c);
76
- }
77
+ tcg_gen_dupi_vec(MO_64, r, a);
78
}
76
}
79
77
80
void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a)
78
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
81
{
79
return true;
82
- do_dupi_vec(r, MO_REG, dup_const(MO_32, a));
80
}
83
+ tcg_gen_dupi_vec(MO_32, r, a);
81
82
+ ctx->s_mask = arg_info(op->args[1])->s_mask;
83
+
84
/* Because of fold_to_not, we want to always return true, via finish. */
85
finish_folding(ctx, op);
86
return true;
87
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
88
89
ctx->z_mask = arg_info(op->args[1])->z_mask
90
| arg_info(op->args[2])->z_mask;
91
+ ctx->s_mask = arg_info(op->args[1])->s_mask
92
+ & arg_info(op->args[2])->s_mask;
93
return fold_masks(ctx, op);
84
}
94
}
85
95
86
void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a)
96
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
87
{
97
fold_ix_to_not(ctx, op, 0)) {
88
- do_dupi_vec(r, MO_REG, dup_const(MO_16, a));
98
return true;
89
+ tcg_gen_dupi_vec(MO_16, r, a);
99
}
100
+
101
+ ctx->s_mask = arg_info(op->args[1])->s_mask
102
+ & arg_info(op->args[2])->s_mask;
103
return false;
90
}
104
}
91
105
92
void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
106
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
93
{
107
94
- do_dupi_vec(r, MO_REG, dup_const(MO_8, a));
108
ctx->z_mask = arg_info(op->args[1])->z_mask
95
+ tcg_gen_dupi_vec(MO_8, r, a);
109
| arg_info(op->args[2])->z_mask;
110
+ ctx->s_mask = arg_info(op->args[1])->s_mask
111
+ & arg_info(op->args[2])->s_mask;
112
return fold_masks(ctx, op);
96
}
113
}
97
98
void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
99
{
100
- if (vece == MO_64) {
101
- tcg_gen_dup64i_vec(r, a);
102
- } else {
103
- do_dupi_vec(r, MO_REG, dup_const(vece, a));
104
- }
105
+ TCGTemp *rt = tcgv_vec_temp(r);
106
+ tcg_gen_mov_vec(r, tcg_constant_vec(rt->base_type, vece, a));
107
}
108
109
void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
110
@@ -XXX,XX +XXX,XX @@ void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
111
if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) {
112
tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1);
113
} else {
114
- do_dupi_vec(t, MO_REG, 0);
115
- tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a, t);
116
+ tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a,
117
+ tcg_constant_vec(type, vece, 0));
118
}
119
tcg_gen_xor_vec(vece, r, a, t);
120
tcg_gen_sub_vec(vece, r, r, t);
121
diff --git a/tcg/tcg.c b/tcg/tcg.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/tcg/tcg.c
124
+++ b/tcg/tcg.c
125
@@ -XXX,XX +XXX,XX @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
126
* The targets will, in general, have to do this search anyway,
127
* do this generically.
128
*/
129
- if (TCG_TARGET_REG_BITS == 32) {
130
- val = dup_const(MO_32, val);
131
- vece = MO_32;
132
- }
133
if (val == dup_const(MO_8, val)) {
134
vece = MO_8;
135
} else if (val == dup_const(MO_16, val)) {
136
vece = MO_16;
137
- } else if (TCG_TARGET_REG_BITS == 64 &&
138
- val == dup_const(MO_32, val)) {
139
+ } else if (val == dup_const(MO_32, val)) {
140
vece = MO_32;
141
}
142
114
143
--
115
--
144
2.25.1
116
2.25.1
145
117
146
118
diff view generated by jsdifflib
1
We must do this before we adjust tcg_out_movi_i32, lest the
1
The result is either 0 or 1, which means that we have
2
under-the-hood poking that we do for icount be broken.
2
a 2 bit signed result, and thus 62 bits of sign.
3
For clarity, use the smask_from_zmask function.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
8
---
7
include/exec/gen-icount.h | 25 +++++++++++++------------
9
tcg/optimize.c | 2 ++
8
1 file changed, 13 insertions(+), 12 deletions(-)
10
1 file changed, 2 insertions(+)
9
11
10
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
12
--- a/include/exec/gen-icount.h
14
--- a/tcg/optimize.c
13
+++ b/include/exec/gen-icount.h
15
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static inline void gen_io_end(void)
16
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
15
16
static inline void gen_tb_start(TranslationBlock *tb)
17
{
18
- TCGv_i32 count, imm;
19
+ TCGv_i32 count;
20
21
tcg_ctx->exitreq_label = gen_new_label();
22
if (tb_cflags(tb) & CF_USE_ICOUNT) {
23
@@ -XXX,XX +XXX,XX @@ static inline void gen_tb_start(TranslationBlock *tb)
24
offsetof(ArchCPU, env));
25
26
if (tb_cflags(tb) & CF_USE_ICOUNT) {
27
- imm = tcg_temp_new_i32();
28
- /* We emit a movi with a dummy immediate argument. Keep the insn index
29
- * of the movi so that we later (when we know the actual insn count)
30
- * can update the immediate argument with the actual insn count. */
31
- tcg_gen_movi_i32(imm, 0xdeadbeef);
32
+ /*
33
+ * We emit a sub with a dummy immediate argument. Keep the insn index
34
+ * of the sub so that we later (when we know the actual insn count)
35
+ * can update the argument with the actual insn count.
36
+ */
37
+ tcg_gen_sub_i32(count, count, tcg_constant_i32(0));
38
icount_start_insn = tcg_last_op();
39
-
40
- tcg_gen_sub_i32(count, count, imm);
41
- tcg_temp_free_i32(imm);
42
}
17
}
43
18
44
tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label);
19
ctx->z_mask = 1;
45
@@ -XXX,XX +XXX,XX @@ static inline void gen_tb_start(TranslationBlock *tb)
20
+ ctx->s_mask = smask_from_zmask(1);
46
static inline void gen_tb_end(TranslationBlock *tb, int num_insns)
21
return false;
47
{
22
}
48
if (tb_cflags(tb) & CF_USE_ICOUNT) {
23
49
- /* Update the num_insn immediate parameter now that we know
24
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
50
- * the actual insn count. */
51
- tcg_set_insn_param(icount_start_insn, 1, num_insns);
52
+ /*
53
+ * Update the num_insn immediate parameter now that we know
54
+ * the actual insn count.
55
+ */
56
+ tcg_set_insn_param(icount_start_insn, 2,
57
+ tcgv_i32_arg(tcg_constant_i32(num_insns)));
58
}
25
}
59
26
60
gen_set_label(tcg_ctx->exitreq_label);
27
ctx->z_mask = 1;
28
+ ctx->s_mask = smask_from_zmask(1);
29
return false;
30
31
do_setcond_const:
61
--
32
--
62
2.25.1
33
2.25.1
63
34
64
35
diff view generated by jsdifflib
1
These interfaces have been replaced by tcg_gen_dupi_vec
1
The results are generally 6 bit unsigned values, though
2
and tcg_constant_vec.
2
the count leading and trailing bits may produce any value
3
for a zero input.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
8
---
7
include/tcg/tcg-op.h | 4 ----
9
tcg/optimize.c | 3 ++-
8
tcg/tcg-op-vec.c | 20 --------------------
10
1 file changed, 2 insertions(+), 1 deletion(-)
9
2 files changed, 24 deletions(-)
10
11
11
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/include/tcg/tcg-op.h
14
--- a/tcg/optimize.c
14
+++ b/include/tcg/tcg-op.h
15
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
16
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
16
void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);
17
g_assert_not_reached();
17
void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec, TCGv_i64);
18
}
18
void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec, TCGv_ptr, tcg_target_long);
19
ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
19
-void tcg_gen_dup8i_vec(TCGv_vec, uint32_t);
20
-
20
-void tcg_gen_dup16i_vec(TCGv_vec, uint32_t);
21
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
21
-void tcg_gen_dup32i_vec(TCGv_vec, uint32_t);
22
return false;
22
-void tcg_gen_dup64i_vec(TCGv_vec, uint64_t);
23
void tcg_gen_dupi_vec(unsigned vece, TCGv_vec, uint64_t);
24
void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
25
void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
26
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/tcg/tcg-op-vec.c
29
+++ b/tcg/tcg-op-vec.c
30
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
31
return tcg_const_ones_vec(t->base_type);
32
}
23
}
33
24
34
-void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
25
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
35
-{
26
default:
36
- tcg_gen_dupi_vec(MO_64, r, a);
27
g_assert_not_reached();
37
-}
28
}
38
-
29
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
39
-void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a)
30
return false;
40
-{
31
}
41
- tcg_gen_dupi_vec(MO_32, r, a);
32
42
-}
43
-
44
-void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a)
45
-{
46
- tcg_gen_dupi_vec(MO_16, r, a);
47
-}
48
-
49
-void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
50
-{
51
- tcg_gen_dupi_vec(MO_8, r, a);
52
-}
53
-
54
void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
55
{
56
TCGTemp *rt = tcgv_vec_temp(r);
57
--
33
--
58
2.25.1
34
2.25.1
59
35
60
36
diff view generated by jsdifflib
1
Prefer TEMP_CONST over anything else.
1
For constant shifts, we can simply shift the s_mask.
2
2
3
For variable shifts, we know that sar does not reduce
4
the s_mask, which helps for sequences like
5
6
ext32s_i64 t, in
7
sar_i64 t, t, v
8
ext32s_i64 out, t
9
10
allowing the final extend to be eliminated.
11
12
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
13
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
15
---
5
tcg/optimize.c | 27 ++++++++++++---------------
16
tcg/optimize.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
6
1 file changed, 12 insertions(+), 15 deletions(-)
17
1 file changed, 47 insertions(+), 3 deletions(-)
7
18
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
19
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
21
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
22
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static void init_arg_info(TempOptInfo *infos,
23
@@ -XXX,XX +XXX,XX @@ static uint64_t smask_from_zmask(uint64_t zmask)
13
24
return ~(~0ull >> rep);
14
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
25
}
26
27
+/*
28
+ * Recreate a properly left-aligned smask after manipulation.
29
+ * Some bit-shuffling, particularly shifts and rotates, may
30
+ * retain sign bits on the left, but may scatter disconnected
31
+ * sign bits on the right. Retain only what remains to the left.
32
+ */
33
+static uint64_t smask_from_smask(int64_t smask)
34
+{
35
+ /* Only the 1 bits are significant for smask */
36
+ return smask_from_zmask(~smask);
37
+}
38
+
39
static inline TempOptInfo *ts_info(TCGTemp *ts)
15
{
40
{
16
- TCGTemp *i;
41
return ts->state_ptr;
17
+ TCGTemp *i, *g, *l;
42
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
18
43
19
- /* If this is already a global, we can't do better. */
44
static bool fold_shift(OptContext *ctx, TCGOp *op)
20
- if (ts->kind >= TEMP_GLOBAL) {
45
{
21
+ /* If this is already readonly, we can't do better. */
46
+ uint64_t s_mask, z_mask, sign;
22
+ if (temp_readonly(ts)) {
47
+
23
return ts;
48
if (fold_const2(ctx, op) ||
49
fold_ix_to_i(ctx, op, 0) ||
50
fold_xi_to_x(ctx, op, 0)) {
51
return true;
24
}
52
}
25
53
26
- /* Search for a global first. */
54
+ s_mask = arg_info(op->args[1])->s_mask;
27
+ g = l = NULL;
55
+ z_mask = arg_info(op->args[1])->z_mask;
28
for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
56
+
29
- if (i->kind >= TEMP_GLOBAL) {
57
if (arg_is_const(op->args[2])) {
30
+ if (temp_readonly(i)) {
58
- ctx->z_mask = do_constant_folding(op->opc, ctx->type,
31
return i;
59
- arg_info(op->args[1])->z_mask,
32
- }
60
- arg_info(op->args[2])->val);
33
- }
61
+ int sh = arg_info(op->args[2])->val;
34
-
62
+
35
- /* If it is a temp, search for a temp local. */
63
+ ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
36
- if (ts->kind == TEMP_NORMAL) {
64
+
37
- for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
65
+ s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
38
- if (i->kind >= TEMP_LOCAL) {
66
+ ctx->s_mask = smask_from_smask(s_mask);
39
- return i;
67
+
40
+ } else if (i->kind > ts->kind) {
68
return fold_masks(ctx, op);
41
+ if (i->kind == TEMP_GLOBAL) {
42
+ g = i;
43
+ } else if (i->kind == TEMP_LOCAL) {
44
+ l = i;
45
}
46
}
47
}
69
}
48
70
+
49
- /* Failure to find a better representation, return the same temp. */
71
+ switch (op->opc) {
50
- return ts;
72
+ CASE_OP_32_64(sar):
51
+ /* If we didn't find a better representation, return the same temp. */
73
+ /*
52
+ return g ? g : l ? l : ts;
74
+ * Arithmetic right shift will not reduce the number of
75
+ * input sign repetitions.
76
+ */
77
+ ctx->s_mask = s_mask;
78
+ break;
79
+ CASE_OP_32_64(shr):
80
+ /*
81
+ * If the sign bit is known zero, then logical right shift
82
+ * will not reduced the number of input sign repetitions.
83
+ */
84
+ sign = (s_mask & -s_mask) >> 1;
85
+ if (!(z_mask & sign)) {
86
+ ctx->s_mask = s_mask;
87
+ }
88
+ break;
89
+ default:
90
+ break;
91
+ }
92
+
93
return false;
53
}
94
}
54
95
55
static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
56
--
96
--
57
2.25.1
97
2.25.1
58
98
59
99
diff view generated by jsdifflib