1
The following changes since commit c52d69e7dbaaed0ffdef8125e79218672c30161d:
1
Note that I have refreshed the expiry of my public key.
2
2
and pushed to keys.openpgp.org.
3
Merge remote-tracking branch 'remotes/cschoenebeck/tags/pull-9p-20211027' into staging (2021-10-27 11:45:18 -0700)
3
4
5
r~
6
7
8
The following changes since commit 4d5d933bbc7cc52f6cc6b9021f91fa06266222d5:
9
10
Merge tag 'pull-xenfv-20250116' of git://git.infradead.org/users/dwmw2/qemu into staging (2025-01-16 09:03:43 -0500)
4
11
5
are available in the Git repository at:
12
are available in the Git repository at:
6
13
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20211027
14
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20250117
8
15
9
for you to fetch changes up to 820c025f0dcacf2f3c12735b1f162893fbfa7bc6:
16
for you to fetch changes up to db1649823d4f27b924a5aa5f9e0111457accb798:
10
17
11
tcg/optimize: Propagate sign info for shifting (2021-10-27 17:11:23 -0700)
18
softfloat: Constify helpers returning float_status field (2025-01-17 08:29:25 -0800)
12
19
13
----------------------------------------------------------------
20
----------------------------------------------------------------
14
Improvements to qemu/int128
21
tcg:
15
Fixes for 128/64 division.
22
- Add TCGOP_TYPE, TCGOP_FLAGS.
16
Cleanup tcg/optimize.c
23
- Pass type and flags to tcg_op_supported, tcg_target_op_def.
17
Optimize redundant sign extensions
24
- Split out tcg-target-has.h and unexport from tcg.h.
25
- Reorg constraint processing; constify TCGOpDef.
26
- Make extract, sextract, deposit opcodes mandatory.
27
- Merge ext{8,16,32}{s,u} opcodes into {s}extract.
28
tcg/mips: Expand bswap unconditionally
29
tcg/riscv: Use SRAIW, SRLIW for {s}extract_i64
30
tcg/riscv: Use BEXTI for single-bit extractions
31
tcg/sparc64: Use SRA, SRL for {s}extract_i64
32
33
disas/riscv: Guard dec->cfg dereference for host disassemble
34
util/cpuinfo-riscv: Detect Zbs
35
accel/tcg: Call tcg_tb_insert() for one-insn TBs
36
linux-user: Add missing /proc/cpuinfo fields for sparc
18
37
19
----------------------------------------------------------------
38
----------------------------------------------------------------
20
Frédéric Pétrot (1):
39
Helge Deller (1):
21
qemu/int128: Add int128_{not,xor}
40
linux-user: Add missing /proc/cpuinfo fields for sparc
22
41
23
Luis Pires (4):
42
Ilya Leoshkevich (2):
24
host-utils: move checks out of divu128/divs128
43
tcg: Document tb_lookup() and tcg_tb_lookup()
25
host-utils: move udiv_qrnnd() to host-utils
44
accel/tcg: Call tcg_tb_insert() for one-insn TBs
26
host-utils: add 128-bit quotient support to divu128/divs128
45
27
host-utils: add unit tests for divu128/divs128
46
LIU Zhiwei (1):
28
47
disas/riscv: Guard dec->cfg dereference for host disassemble
29
Richard Henderson (51):
48
30
tcg/optimize: Rename "mask" to "z_mask"
49
Philippe Mathieu-Daudé (1):
31
tcg/optimize: Split out OptContext
50
softfloat: Constify helpers returning float_status field
32
tcg/optimize: Remove do_default label
51
33
tcg/optimize: Change tcg_opt_gen_{mov,movi} interface
52
Richard Henderson (63):
34
tcg/optimize: Move prev_mb into OptContext
53
tcg: Move call abi parameters from tcg-target.h to tcg-target.c.inc
35
tcg/optimize: Split out init_arguments
54
tcg: Replace TCGOP_VECL with TCGOP_TYPE
36
tcg/optimize: Split out copy_propagate
55
tcg: Move tcg_op_insert_{after,before} decls to tcg-internal.h
37
tcg/optimize: Split out fold_call
56
tcg: Copy TCGOP_TYPE in tcg_op_insert_{after,before}
38
tcg/optimize: Drop nb_oargs, nb_iargs locals
57
tcg: Add TCGOP_FLAGS
39
tcg/optimize: Change fail return for do_constant_folding_cond*
58
tcg: Add type and flags arguments to tcg_op_supported
40
tcg/optimize: Return true from tcg_opt_gen_{mov,movi}
59
target/arm: Do not test TCG_TARGET_HAS_bitsel_vec
41
tcg/optimize: Split out finish_folding
60
target/arm: Use tcg_op_supported
42
tcg/optimize: Use a boolean to avoid a mass of continues
61
target/tricore: Use tcg_op_supported
43
tcg/optimize: Split out fold_mb, fold_qemu_{ld,st}
62
tcg: Add tcg_op_deposit_valid
44
tcg/optimize: Split out fold_const{1,2}
63
target/i386: Remove TCG_TARGET_extract_tl_valid
45
tcg/optimize: Split out fold_setcond2
64
target/i386: Use tcg_op_deposit_valid
46
tcg/optimize: Split out fold_brcond2
65
target/i386: Use tcg_op_supported
47
tcg/optimize: Split out fold_brcond
66
tcg: Remove TCG_TARGET_NEED_LDST_LABELS and TCG_TARGET_NEED_POOL_LABELS
48
tcg/optimize: Split out fold_setcond
67
tcg: Rename tcg-target.opc.h to tcg-target-opc.h.inc
49
tcg/optimize: Split out fold_mulu2_i32
68
tcg/tci: Move TCI specific opcodes to tcg-target-opc.h.inc
50
tcg/optimize: Split out fold_addsub2_i32
69
tcg: Move fallback tcg_can_emit_vec_op out of line
51
tcg/optimize: Split out fold_movcond
70
tcg/ppc: Remove TCGPowerISA enum
52
tcg/optimize: Split out fold_extract2
71
tcg: Extract default TCG_TARGET_HAS_foo definitions to 'tcg-has.h'
53
tcg/optimize: Split out fold_extract, fold_sextract
72
tcg/aarch64: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
54
tcg/optimize: Split out fold_deposit
73
tcg/arm: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
55
tcg/optimize: Split out fold_count_zeros
74
tcg/i386: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
56
tcg/optimize: Split out fold_bswap
75
tcg/loongarch64: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
57
tcg/optimize: Split out fold_dup, fold_dup2
76
tcg/mips: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
58
tcg/optimize: Split out fold_mov
77
tcg/ppc: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
59
tcg/optimize: Split out fold_xx_to_i
78
tcg/riscv: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
60
tcg/optimize: Split out fold_xx_to_x
79
tcg/s390x: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
61
tcg/optimize: Split out fold_xi_to_i
80
tcg/sparc64: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
62
tcg/optimize: Add type to OptContext
81
tcg/tci: Extract TCG_TARGET_HAS_foo defs to 'tcg-target-has.h'
63
tcg/optimize: Split out fold_to_not
82
tcg: Include 'tcg-target-has.h' once in 'tcg-has.h'
64
tcg/optimize: Split out fold_sub_to_neg
83
tcg: Only include 'tcg-has.h' when necessary
65
tcg/optimize: Split out fold_xi_to_x
84
tcg: Split out tcg-target-mo.h
66
tcg/optimize: Split out fold_ix_to_i
85
tcg: Use C_NotImplemented in tcg_target_op_def
67
tcg/optimize: Split out fold_masks
86
tcg: Change have_vec to has_type in tcg_op_supported
68
tcg/optimize: Expand fold_mulu2_i32 to all 4-arg multiplies
87
tcg: Reorg process_op_defs
69
tcg/optimize: Expand fold_addsub2_i32 to 64-bit ops
88
tcg: Remove args_ct from TCGOpDef
70
tcg/optimize: Sink commutative operand swapping into fold functions
89
tcg: Constify tcg_op_defs
71
tcg/optimize: Stop forcing z_mask to "garbage" for 32-bit values
90
tcg: Validate op supported in opcode_args_ct
72
tcg/optimize: Use fold_xx_to_i for orc
91
tcg: Add TCG_OPF_NOT_PRESENT to opcodes without inputs or outputs
73
tcg/optimize: Use fold_xi_to_x for mul
92
tcg: Pass type and flags to tcg_target_op_def
74
tcg/optimize: Use fold_xi_to_x for div
93
tcg: Add TCGType argument to tcg_out_op
75
tcg/optimize: Use fold_xx_to_i for rem
94
tcg: Remove TCG_OPF_64BIT
76
tcg/optimize: Optimize sign extensions
95
tcg: Drop implementation checks from tcg-opc.h
77
tcg/optimize: Propagate sign info for logical operations
96
tcg: Replace IMPLVEC with TCG_OPF_VECTOR
78
tcg/optimize: Propagate sign info for setcond
97
tcg/mips: Expand bswap unconditionally
79
tcg/optimize: Propagate sign info for bit counting
98
tcg/i386: Handle all 8-bit extensions for i686
80
tcg/optimize: Propagate sign info for shifting
99
tcg/i386: Fold the ext{8,16,32}[us] cases into {s}extract
81
100
tcg/aarch64: Provide TCG_TARGET_{s}extract_valid
82
include/fpu/softfloat-macros.h | 82 --
101
tcg/aarch64: Expand extract with offset 0 with andi
83
include/hw/clock.h | 5 +-
102
tcg/arm: Add full [US]XT[BH] into {s}extract
84
include/qemu/host-utils.h | 121 +-
103
tcg/loongarch64: Fold the ext{8,16,32}[us] cases into {s}extract
85
include/qemu/int128.h | 20 +
104
tcg/mips: Fold the ext{8,16,32}[us] cases into {s}extract
86
target/ppc/int_helper.c | 23 +-
105
tcg/ppc: Fold the ext{8,16,32}[us] cases into {s}extract
87
tcg/optimize.c | 2644 ++++++++++++++++++++++++----------------
106
tcg/riscv64: Fold the ext{8,16,32}[us] cases into {s}extract
88
tests/unit/test-div128.c | 197 +++
107
tcg/riscv: Use SRAIW, SRLIW for {s}extract_i64
89
util/host-utils.c | 147 ++-
108
tcg/s390x: Fold the ext{8,16,32}[us] cases into {s}extract
90
tests/unit/meson.build | 1 +
109
tcg/sparc64: Use SRA, SRL for {s}extract_i64
91
9 files changed, 2053 insertions(+), 1187 deletions(-)
110
tcg/tci: Provide TCG_TARGET_{s}extract_valid
92
create mode 100644 tests/unit/test-div128.c
111
tcg/tci: Remove assertions for deposit and extract
93
112
tcg: Remove TCG_TARGET_HAS_{s}extract_{i32,i64}
113
tcg: Remove TCG_TARGET_HAS_deposit_{i32,i64}
114
util/cpuinfo-riscv: Detect Zbs
115
tcg/riscv: Use BEXTI for single-bit extractions
116
117
accel/tcg/internal-target.h | 1 +
118
host/include/riscv/host/cpuinfo.h | 5 +-
119
include/fpu/softfloat-helpers.h | 25 +-
120
include/tcg/tcg-opc.h | 355 +++++-------
121
include/tcg/tcg.h | 187 ++----
122
linux-user/sparc/target_proc.h | 20 +-
123
tcg/aarch64/tcg-target-has.h | 117 ++++
124
tcg/aarch64/tcg-target-mo.h | 12 +
125
tcg/aarch64/tcg-target.h | 126 ----
126
tcg/arm/tcg-target-has.h | 100 ++++
127
tcg/arm/tcg-target-mo.h | 13 +
128
tcg/arm/tcg-target.h | 86 ---
129
tcg/i386/tcg-target-has.h | 169 ++++++
130
tcg/i386/tcg-target-mo.h | 19 +
131
tcg/i386/tcg-target.h | 162 ------
132
tcg/loongarch64/tcg-target-has.h | 119 ++++
133
tcg/loongarch64/tcg-target-mo.h | 12 +
134
tcg/loongarch64/tcg-target.h | 115 ----
135
tcg/mips/tcg-target-has.h | 135 +++++
136
tcg/mips/tcg-target-mo.h | 13 +
137
tcg/mips/tcg-target.h | 130 -----
138
tcg/ppc/tcg-target-has.h | 131 +++++
139
tcg/ppc/tcg-target-mo.h | 12 +
140
tcg/ppc/tcg-target.h | 126 ----
141
tcg/riscv/tcg-target-has.h | 135 +++++
142
tcg/riscv/tcg-target-mo.h | 12 +
143
tcg/riscv/tcg-target.h | 116 ----
144
tcg/s390x/tcg-target-has.h | 137 +++++
145
tcg/s390x/tcg-target-mo.h | 12 +
146
tcg/s390x/tcg-target.h | 126 ----
147
tcg/sparc64/tcg-target-has.h | 87 +++
148
tcg/sparc64/tcg-target-mo.h | 12 +
149
tcg/sparc64/tcg-target.h | 91 ---
150
tcg/tcg-has.h | 101 ++++
151
tcg/tcg-internal.h | 18 +-
152
tcg/tci/tcg-target-has.h | 81 +++
153
tcg/tci/tcg-target-mo.h | 17 +
154
tcg/tci/tcg-target.h | 94 ---
155
accel/tcg/cpu-exec.c | 15 +-
156
accel/tcg/translate-all.c | 29 +-
157
disas/riscv.c | 23 +-
158
target/arm/tcg/translate-a64.c | 10 +-
159
target/arm/tcg/translate-sve.c | 22 +-
160
target/arm/tcg/translate.c | 2 +-
161
target/tricore/translate.c | 4 +-
162
tcg/optimize.c | 27 +-
163
tcg/tcg-common.c | 5 +-
164
tcg/tcg-op-gvec.c | 1 +
165
tcg/tcg-op-ldst.c | 29 +-
166
tcg/tcg-op-vec.c | 9 +-
167
tcg/tcg-op.c | 149 ++---
168
tcg/tcg.c | 643 ++++++++++++++-------
169
tcg/tci.c | 13 +-
170
util/cpuinfo-riscv.c | 18 +-
171
docs/devel/tcg-ops.rst | 15 +-
172
target/i386/tcg/emit.c.inc | 14 +-
173
.../{tcg-target.opc.h => tcg-target-opc.h.inc} | 4 +-
174
tcg/aarch64/tcg-target.c.inc | 33 +-
175
tcg/arm/{tcg-target.opc.h => tcg-target-opc.h.inc} | 6 +-
176
tcg/arm/tcg-target.c.inc | 71 ++-
177
.../{tcg-target.opc.h => tcg-target-opc.h.inc} | 22 +-
178
tcg/i386/tcg-target.c.inc | 121 +++-
179
.../{tcg-target.opc.h => tcg-target-opc.h.inc} | 0
180
tcg/loongarch64/tcg-target.c.inc | 59 +-
181
tcg/mips/tcg-target-opc.h.inc | 1 +
182
tcg/mips/tcg-target.c.inc | 55 +-
183
tcg/ppc/{tcg-target.opc.h => tcg-target-opc.h.inc} | 12 +-
184
tcg/ppc/tcg-target.c.inc | 39 +-
185
.../{tcg-target.opc.h => tcg-target-opc.h.inc} | 0
186
tcg/riscv/tcg-target.c.inc | 66 ++-
187
.../{tcg-target.opc.h => tcg-target-opc.h.inc} | 6 +-
188
tcg/s390x/tcg-target.c.inc | 59 +-
189
tcg/sparc64/tcg-target-opc.h.inc | 1 +
190
tcg/sparc64/tcg-target.c.inc | 29 +-
191
tcg/tcg-ldst.c.inc | 65 ---
192
tcg/tcg-pool.c.inc | 162 ------
193
tcg/tci/tcg-target-opc.h.inc | 4 +
194
tcg/tci/tcg-target.c.inc | 53 +-
195
78 files changed, 2856 insertions(+), 2269 deletions(-)
196
create mode 100644 tcg/aarch64/tcg-target-has.h
197
create mode 100644 tcg/aarch64/tcg-target-mo.h
198
create mode 100644 tcg/arm/tcg-target-has.h
199
create mode 100644 tcg/arm/tcg-target-mo.h
200
create mode 100644 tcg/i386/tcg-target-has.h
201
create mode 100644 tcg/i386/tcg-target-mo.h
202
create mode 100644 tcg/loongarch64/tcg-target-has.h
203
create mode 100644 tcg/loongarch64/tcg-target-mo.h
204
create mode 100644 tcg/mips/tcg-target-has.h
205
create mode 100644 tcg/mips/tcg-target-mo.h
206
create mode 100644 tcg/ppc/tcg-target-has.h
207
create mode 100644 tcg/ppc/tcg-target-mo.h
208
create mode 100644 tcg/riscv/tcg-target-has.h
209
create mode 100644 tcg/riscv/tcg-target-mo.h
210
create mode 100644 tcg/s390x/tcg-target-has.h
211
create mode 100644 tcg/s390x/tcg-target-mo.h
212
create mode 100644 tcg/sparc64/tcg-target-has.h
213
create mode 100644 tcg/sparc64/tcg-target-mo.h
214
create mode 100644 tcg/tcg-has.h
215
create mode 100644 tcg/tci/tcg-target-has.h
216
create mode 100644 tcg/tci/tcg-target-mo.h
217
rename tcg/aarch64/{tcg-target.opc.h => tcg-target-opc.h.inc} (82%)
218
rename tcg/arm/{tcg-target.opc.h => tcg-target-opc.h.inc} (75%)
219
rename tcg/i386/{tcg-target.opc.h => tcg-target-opc.h.inc} (72%)
220
rename tcg/loongarch64/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
221
create mode 100644 tcg/mips/tcg-target-opc.h.inc
222
rename tcg/ppc/{tcg-target.opc.h => tcg-target-opc.h.inc} (83%)
223
rename tcg/riscv/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
224
rename tcg/s390x/{tcg-target.opc.h => tcg-target-opc.h.inc} (75%)
225
create mode 100644 tcg/sparc64/tcg-target-opc.h.inc
226
delete mode 100644 tcg/tcg-ldst.c.inc
227
delete mode 100644 tcg/tcg-pool.c.inc
228
create mode 100644 tcg/tci/tcg-target-opc.h.inc
229
diff view generated by jsdifflib
New patch
1
From: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
1
2
3
For riscv host, it will set dec->cfg to zero. Thus we shuld guard
4
the dec->cfg deference for riscv host disassemble.
5
6
And in general, we should only use dec->cfg for target in three cases:
7
8
1) For not incompatible encodings, such as zcmp/zcmt/zfinx.
9
2) For maybe-ops encodings, they are better to be disassembled to
10
the "real" extensions, such as zicfiss. The guard of dec->zimop
11
and dec->zcmop is for comment and avoid check for every extension
12
that encoded in maybe-ops area.
13
3) For custom encodings, we have to use dec->cfg to disassemble
14
custom encodings using the same encoding area.
15
16
Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
17
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
18
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
19
Message-ID: <20241206032411.52528-1-zhiwei_liu@linux.alibaba.com>
20
---
21
disas/riscv.c | 23 ++++++++++++-----------
22
1 file changed, 12 insertions(+), 11 deletions(-)
23
24
diff --git a/disas/riscv.c b/disas/riscv.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/disas/riscv.c
27
+++ b/disas/riscv.c
28
@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
29
break;
30
case 2: op = rv_op_c_li; break;
31
case 3:
32
- if (dec->cfg->ext_zcmop) {
33
+ if (dec->cfg && dec->cfg->ext_zcmop) {
34
if ((((inst >> 2) & 0b111111) == 0b100000) &&
35
(((inst >> 11) & 0b11) == 0b0)) {
36
unsigned int cmop_code = 0;
37
@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
38
op = rv_op_c_sqsp;
39
} else {
40
op = rv_op_c_fsdsp;
41
- if (dec->cfg->ext_zcmp && ((inst >> 12) & 0b01)) {
42
+ if (dec->cfg && dec->cfg->ext_zcmp && ((inst >> 12) & 0b01)) {
43
switch ((inst >> 8) & 0b01111) {
44
case 8:
45
if (((inst >> 4) & 0b01111) >= 4) {
46
@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
47
} else {
48
switch ((inst >> 10) & 0b011) {
49
case 0:
50
- if (!dec->cfg->ext_zcmt) {
51
+ if (dec->cfg && !dec->cfg->ext_zcmt) {
52
break;
53
}
54
if (((inst >> 2) & 0xFF) >= 32) {
55
@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
56
}
57
break;
58
case 3:
59
- if (!dec->cfg->ext_zcmp) {
60
+ if (dec->cfg && !dec->cfg->ext_zcmp) {
61
break;
62
}
63
switch ((inst >> 5) & 0b011) {
64
@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
65
break;
66
case 5:
67
op = rv_op_auipc;
68
- if (dec->cfg->ext_zicfilp &&
69
+ if (dec->cfg && dec->cfg->ext_zicfilp &&
70
(((inst >> 7) & 0b11111) == 0b00000)) {
71
op = rv_op_lpad;
72
}
73
@@ -XXX,XX +XXX,XX @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
74
case 2: op = rv_op_csrrs; break;
75
case 3: op = rv_op_csrrc; break;
76
case 4:
77
- if (dec->cfg->ext_zimop) {
78
+ if (dec->cfg && dec->cfg->ext_zimop) {
79
int imm_mop5, imm_mop3, reg_num;
80
if ((extract32(inst, 22, 10) & 0b1011001111)
81
== 0b1000000111) {
82
@@ -XXX,XX +XXX,XX @@ static GString *format_inst(size_t tab, rv_decode *dec)
83
g_string_append(buf, rv_ireg_name_sym[dec->rs2]);
84
break;
85
case '3':
86
- if (dec->cfg->ext_zfinx) {
87
+ if (dec->cfg && dec->cfg->ext_zfinx) {
88
g_string_append(buf, rv_ireg_name_sym[dec->rd]);
89
} else {
90
g_string_append(buf, rv_freg_name_sym[dec->rd]);
91
}
92
break;
93
case '4':
94
- if (dec->cfg->ext_zfinx) {
95
+ if (dec->cfg && dec->cfg->ext_zfinx) {
96
g_string_append(buf, rv_ireg_name_sym[dec->rs1]);
97
} else {
98
g_string_append(buf, rv_freg_name_sym[dec->rs1]);
99
}
100
break;
101
case '5':
102
- if (dec->cfg->ext_zfinx) {
103
+ if (dec->cfg && dec->cfg->ext_zfinx) {
104
g_string_append(buf, rv_ireg_name_sym[dec->rs2]);
105
} else {
106
g_string_append(buf, rv_freg_name_sym[dec->rs2]);
107
}
108
break;
109
case '6':
110
- if (dec->cfg->ext_zfinx) {
111
+ if (dec->cfg && dec->cfg->ext_zfinx) {
112
g_string_append(buf, rv_ireg_name_sym[dec->rs3]);
113
} else {
114
g_string_append(buf, rv_freg_name_sym[dec->rs3]);
115
@@ -XXX,XX +XXX,XX @@ static GString *disasm_inst(rv_isa isa, uint64_t pc, rv_inst inst,
116
const rv_opcode_data *opcode_data = decoders[i].opcode_data;
117
void (*decode_func)(rv_decode *, rv_isa) = decoders[i].decode_func;
118
119
- if (guard_func(cfg)) {
120
+ /* always_true_p don't dereference cfg */
121
+ if (((i == 0) || cfg) && guard_func(cfg)) {
122
dec.opcode_data = opcode_data;
123
decode_func(&dec, isa);
124
if (dec.op != rv_op_illegal)
125
--
126
2.43.0
diff view generated by jsdifflib
1
This is the final entry in the main switch that was in a
1
These defines are not required outside of tcg/tcg.c,
2
different form. After this, we have the option to convert
2
which includes tcg-target.c.inc before use.
3
the switch into a function dispatch table.
3
Reduces the exported symbol set of tcg-target.h.
4
4
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/optimize.c | 27 ++++++++++++++-------------
8
tcg/aarch64/tcg-target.h | 13 -------------
10
1 file changed, 14 insertions(+), 13 deletions(-)
9
tcg/arm/tcg-target.h | 8 --------
10
tcg/i386/tcg-target.h | 20 --------------------
11
tcg/loongarch64/tcg-target.h | 9 ---------
12
tcg/mips/tcg-target.h | 14 --------------
13
tcg/riscv/tcg-target.h | 9 ---------
14
tcg/s390x/tcg-target.h | 8 --------
15
tcg/sparc64/tcg-target.h | 11 -----------
16
tcg/tci/tcg-target.h | 14 --------------
17
tcg/aarch64/tcg-target.c.inc | 13 +++++++++++++
18
tcg/arm/tcg-target.c.inc | 8 ++++++++
19
tcg/i386/tcg-target.c.inc | 20 ++++++++++++++++++++
20
tcg/loongarch64/tcg-target.c.inc | 9 +++++++++
21
tcg/mips/tcg-target.c.inc | 14 ++++++++++++++
22
tcg/riscv/tcg-target.c.inc | 9 +++++++++
23
tcg/s390x/tcg-target.c.inc | 8 ++++++++
24
tcg/sparc64/tcg-target.c.inc | 10 ++++++++++
25
tcg/tci/tcg-target.c.inc | 14 ++++++++++++++
26
18 files changed, 105 insertions(+), 106 deletions(-)
11
27
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
28
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
13
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
30
--- a/tcg/aarch64/tcg-target.h
15
+++ b/tcg/optimize.c
31
+++ b/tcg/aarch64/tcg-target.h
16
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
32
@@ -XXX,XX +XXX,XX @@ typedef enum {
17
return true;
33
18
}
34
#define TCG_TARGET_NB_REGS 64
19
35
20
+static bool fold_mov(OptContext *ctx, TCGOp *op)
36
-/* used for function call generation */
21
+{
37
-#define TCG_REG_CALL_STACK TCG_REG_SP
22
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
38
-#define TCG_TARGET_STACK_ALIGN 16
23
+}
39
-#define TCG_TARGET_CALL_STACK_OFFSET 0
24
+
40
-#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
25
static bool fold_movcond(OptContext *ctx, TCGOp *op)
41
-#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
42
-#ifdef CONFIG_DARWIN
43
-# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
44
-#else
45
-# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
46
-#endif
47
-#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
48
-
49
#define have_lse (cpuinfo & CPUINFO_LSE)
50
#define have_lse2 (cpuinfo & CPUINFO_LSE2)
51
52
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
53
index XXXXXXX..XXXXXXX 100644
54
--- a/tcg/arm/tcg-target.h
55
+++ b/tcg/arm/tcg-target.h
56
@@ -XXX,XX +XXX,XX @@ extern bool use_idiv_instructions;
57
extern bool use_neon_instructions;
58
#endif
59
60
-/* used for function call generation */
61
-#define TCG_TARGET_STACK_ALIGN        8
62
-#define TCG_TARGET_CALL_STACK_OFFSET    0
63
-#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
64
-#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
65
-#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
66
-#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
67
-
68
/* optional instructions */
69
#define TCG_TARGET_HAS_ext8s_i32 1
70
#define TCG_TARGET_HAS_ext16s_i32 1
71
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
72
index XXXXXXX..XXXXXXX 100644
73
--- a/tcg/i386/tcg-target.h
74
+++ b/tcg/i386/tcg-target.h
75
@@ -XXX,XX +XXX,XX @@ typedef enum {
76
TCG_REG_CALL_STACK = TCG_REG_ESP
77
} TCGReg;
78
79
-/* used for function call generation */
80
-#define TCG_TARGET_STACK_ALIGN 16
81
-#if defined(_WIN64)
82
-#define TCG_TARGET_CALL_STACK_OFFSET 32
83
-#else
84
-#define TCG_TARGET_CALL_STACK_OFFSET 0
85
-#endif
86
-#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
87
-#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
88
-#if defined(_WIN64)
89
-# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF
90
-# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_VEC
91
-#elif TCG_TARGET_REG_BITS == 64
92
-# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
93
-# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
94
-#else
95
-# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
96
-# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
97
-#endif
98
-
99
#define have_bmi1 (cpuinfo & CPUINFO_BMI1)
100
#define have_popcnt (cpuinfo & CPUINFO_POPCNT)
101
#define have_avx1 (cpuinfo & CPUINFO_AVX1)
102
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
103
index XXXXXXX..XXXXXXX 100644
104
--- a/tcg/loongarch64/tcg-target.h
105
+++ b/tcg/loongarch64/tcg-target.h
106
@@ -XXX,XX +XXX,XX @@ typedef enum {
107
TCG_VEC_TMP0 = TCG_REG_V23,
108
} TCGReg;
109
110
-/* used for function call generation */
111
-#define TCG_REG_CALL_STACK TCG_REG_SP
112
-#define TCG_TARGET_STACK_ALIGN 16
113
-#define TCG_TARGET_CALL_STACK_OFFSET 0
114
-#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
115
-#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
116
-#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
117
-#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
118
-
119
/* optional instructions */
120
#define TCG_TARGET_HAS_negsetcond_i32 0
121
#define TCG_TARGET_HAS_div_i32 1
122
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
123
index XXXXXXX..XXXXXXX 100644
124
--- a/tcg/mips/tcg-target.h
125
+++ b/tcg/mips/tcg-target.h
126
@@ -XXX,XX +XXX,XX @@ typedef enum {
127
TCG_AREG0 = TCG_REG_S8,
128
} TCGReg;
129
130
-/* used for function call generation */
131
-#define TCG_TARGET_STACK_ALIGN 16
132
-#if _MIPS_SIM == _ABIO32
133
-# define TCG_TARGET_CALL_STACK_OFFSET 16
134
-# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
135
-# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
136
-#else
137
-# define TCG_TARGET_CALL_STACK_OFFSET 0
138
-# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
139
-# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
140
-#endif
141
-#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
142
-#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
143
-
144
/* MOVN/MOVZ instructions detection */
145
#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
146
defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \
147
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
148
index XXXXXXX..XXXXXXX 100644
149
--- a/tcg/riscv/tcg-target.h
150
+++ b/tcg/riscv/tcg-target.h
151
@@ -XXX,XX +XXX,XX @@ typedef enum {
152
TCG_REG_TMP2 = TCG_REG_T4,
153
} TCGReg;
154
155
-/* used for function call generation */
156
-#define TCG_REG_CALL_STACK TCG_REG_SP
157
-#define TCG_TARGET_STACK_ALIGN 16
158
-#define TCG_TARGET_CALL_STACK_OFFSET 0
159
-#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
160
-#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
161
-#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
162
-#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
163
-
164
/* optional instructions */
165
#define TCG_TARGET_HAS_negsetcond_i32 1
166
#define TCG_TARGET_HAS_div_i32 1
167
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
168
index XXXXXXX..XXXXXXX 100644
169
--- a/tcg/s390x/tcg-target.h
170
+++ b/tcg/s390x/tcg-target.h
171
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
172
#define TCG_TARGET_HAS_cmpsel_vec 1
173
#define TCG_TARGET_HAS_tst_vec 0
174
175
-/* used for function call generation */
176
-#define TCG_TARGET_STACK_ALIGN        8
177
-#define TCG_TARGET_CALL_STACK_OFFSET    160
178
-#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND
179
-#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
180
-#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF
181
-#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
182
-
183
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
184
#define TCG_TARGET_NEED_LDST_LABELS
185
#define TCG_TARGET_NEED_POOL_LABELS
186
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
187
index XXXXXXX..XXXXXXX 100644
188
--- a/tcg/sparc64/tcg-target.h
189
+++ b/tcg/sparc64/tcg-target.h
190
@@ -XXX,XX +XXX,XX @@ typedef enum {
191
TCG_REG_I7,
192
} TCGReg;
193
194
-/* used for function call generation */
195
-#define TCG_REG_CALL_STACK TCG_REG_O6
196
-
197
-#define TCG_TARGET_STACK_BIAS 2047
198
-#define TCG_TARGET_STACK_ALIGN 16
199
-#define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS)
200
-#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND
201
-#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
202
-#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
203
-#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
204
-
205
#if defined(__VIS__) && __VIS__ >= 0x300
206
#define use_vis3_instructions 1
207
#else
208
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
209
index XXXXXXX..XXXXXXX 100644
210
--- a/tcg/tci/tcg-target.h
211
+++ b/tcg/tci/tcg-target.h
212
@@ -XXX,XX +XXX,XX @@ typedef enum {
213
TCG_REG_CALL_STACK = TCG_REG_R15,
214
} TCGReg;
215
216
-/* Used for function call generation. */
217
-#define TCG_TARGET_CALL_STACK_OFFSET 0
218
-#define TCG_TARGET_STACK_ALIGN 8
219
-#if TCG_TARGET_REG_BITS == 32
220
-# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EVEN
221
-# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
222
-# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
223
-#else
224
-# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
225
-# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
226
-# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
227
-#endif
228
-#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
229
-
230
#define HAVE_TCG_QEMU_TB_EXEC
231
#define TCG_TARGET_NEED_POOL_LABELS
232
233
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
234
index XXXXXXX..XXXXXXX 100644
235
--- a/tcg/aarch64/tcg-target.c.inc
236
+++ b/tcg/aarch64/tcg-target.c.inc
237
@@ -XXX,XX +XXX,XX @@
238
#include "../tcg-pool.c.inc"
239
#include "qemu/bitops.h"
240
241
+/* Used for function call generation. */
242
+#define TCG_REG_CALL_STACK TCG_REG_SP
243
+#define TCG_TARGET_STACK_ALIGN 16
244
+#define TCG_TARGET_CALL_STACK_OFFSET 0
245
+#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
246
+#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
247
+#ifdef CONFIG_DARWIN
248
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
249
+#else
250
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
251
+#endif
252
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
253
+
254
/* We're going to re-use TCGType in setting of the SF bit, which controls
255
the size of the operation performed. If we know the values match, it
256
makes things much cleaner. */
257
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
258
index XXXXXXX..XXXXXXX 100644
259
--- a/tcg/arm/tcg-target.c.inc
260
+++ b/tcg/arm/tcg-target.c.inc
261
@@ -XXX,XX +XXX,XX @@ bool use_idiv_instructions;
262
bool use_neon_instructions;
263
#endif
264
265
+/* Used for function call generation. */
266
+#define TCG_TARGET_STACK_ALIGN 8
267
+#define TCG_TARGET_CALL_STACK_OFFSET 0
268
+#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
269
+#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
270
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
271
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
272
+
273
#ifdef CONFIG_DEBUG_TCG
274
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
275
"%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
276
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
277
index XXXXXXX..XXXXXXX 100644
278
--- a/tcg/i386/tcg-target.c.inc
279
+++ b/tcg/i386/tcg-target.c.inc
280
@@ -XXX,XX +XXX,XX @@
281
#include "../tcg-ldst.c.inc"
282
#include "../tcg-pool.c.inc"
283
284
+/* Used for function call generation. */
285
+#define TCG_TARGET_STACK_ALIGN 16
286
+#if defined(_WIN64)
287
+#define TCG_TARGET_CALL_STACK_OFFSET 32
288
+#else
289
+#define TCG_TARGET_CALL_STACK_OFFSET 0
290
+#endif
291
+#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
292
+#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
293
+#if defined(_WIN64)
294
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF
295
+# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_VEC
296
+#elif TCG_TARGET_REG_BITS == 64
297
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
298
+# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
299
+#else
300
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
301
+# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
302
+#endif
303
+
304
#ifdef CONFIG_DEBUG_TCG
305
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
306
#if TCG_TARGET_REG_BITS == 64
307
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
308
index XXXXXXX..XXXXXXX 100644
309
--- a/tcg/loongarch64/tcg-target.c.inc
310
+++ b/tcg/loongarch64/tcg-target.c.inc
311
@@ -XXX,XX +XXX,XX @@
312
#include "../tcg-ldst.c.inc"
313
#include <asm/hwcap.h>
314
315
+/* used for function call generation */
316
+#define TCG_REG_CALL_STACK TCG_REG_SP
317
+#define TCG_TARGET_STACK_ALIGN 16
318
+#define TCG_TARGET_CALL_STACK_OFFSET 0
319
+#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
320
+#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
321
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
322
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
323
+
324
#ifdef CONFIG_DEBUG_TCG
325
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
326
"zero",
327
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
328
index XXXXXXX..XXXXXXX 100644
329
--- a/tcg/mips/tcg-target.c.inc
330
+++ b/tcg/mips/tcg-target.c.inc
331
@@ -XXX,XX +XXX,XX @@
332
#include "../tcg-ldst.c.inc"
333
#include "../tcg-pool.c.inc"
334
335
+/* used for function call generation */
336
+#define TCG_TARGET_STACK_ALIGN 16
337
+#if _MIPS_SIM == _ABIO32
338
+# define TCG_TARGET_CALL_STACK_OFFSET 16
339
+# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
340
+# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
341
+#else
342
+# define TCG_TARGET_CALL_STACK_OFFSET 0
343
+# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
344
+# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
345
+#endif
346
+#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
347
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
348
+
349
#if TCG_TARGET_REG_BITS == 32
350
# define LO_OFF (HOST_BIG_ENDIAN * 4)
351
# define HI_OFF (4 - LO_OFF)
352
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
353
index XXXXXXX..XXXXXXX 100644
354
--- a/tcg/riscv/tcg-target.c.inc
355
+++ b/tcg/riscv/tcg-target.c.inc
356
@@ -XXX,XX +XXX,XX @@
357
#include "../tcg-ldst.c.inc"
358
#include "../tcg-pool.c.inc"
359
360
+/* Used for function call generation. */
361
+#define TCG_REG_CALL_STACK TCG_REG_SP
362
+#define TCG_TARGET_STACK_ALIGN 16
363
+#define TCG_TARGET_CALL_STACK_OFFSET 0
364
+#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
365
+#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
366
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
367
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
368
+
369
#ifdef CONFIG_DEBUG_TCG
370
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
371
"zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2",
372
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
373
index XXXXXXX..XXXXXXX 100644
374
--- a/tcg/s390x/tcg-target.c.inc
375
+++ b/tcg/s390x/tcg-target.c.inc
376
@@ -XXX,XX +XXX,XX @@
377
#include "../tcg-pool.c.inc"
378
#include "elf.h"
379
380
+/* Used for function call generation. */
381
+#define TCG_TARGET_STACK_ALIGN 8
382
+#define TCG_TARGET_CALL_STACK_OFFSET 160
383
+#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND
384
+#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
385
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF
386
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
387
+
388
#define TCG_CT_CONST_S16 (1 << 8)
389
#define TCG_CT_CONST_S32 (1 << 9)
390
#define TCG_CT_CONST_U32 (1 << 10)
391
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
392
index XXXXXXX..XXXXXXX 100644
393
--- a/tcg/sparc64/tcg-target.c.inc
394
+++ b/tcg/sparc64/tcg-target.c.inc
395
@@ -XXX,XX +XXX,XX @@
396
#include "../tcg-ldst.c.inc"
397
#include "../tcg-pool.c.inc"
398
399
+/* Used for function call generation. */
400
+#define TCG_REG_CALL_STACK TCG_REG_O6
401
+#define TCG_TARGET_STACK_BIAS 2047
402
+#define TCG_TARGET_STACK_ALIGN 16
403
+#define TCG_TARGET_CALL_STACK_OFFSET (128 + 6 * 8 + TCG_TARGET_STACK_BIAS)
404
+#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND
405
+#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
406
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
407
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
408
+
409
#ifdef CONFIG_DEBUG_TCG
410
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
411
"%g0",
412
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
413
index XXXXXXX..XXXXXXX 100644
414
--- a/tcg/tci/tcg-target.c.inc
415
+++ b/tcg/tci/tcg-target.c.inc
416
@@ -XXX,XX +XXX,XX @@
417
418
#include "../tcg-pool.c.inc"
419
420
+/* Used for function call generation. */
421
+#define TCG_TARGET_CALL_STACK_OFFSET 0
422
+#define TCG_TARGET_STACK_ALIGN 8
423
+#if TCG_TARGET_REG_BITS == 32
424
+# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EVEN
425
+# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
426
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
427
+#else
428
+# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
429
+# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
430
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
431
+#endif
432
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
433
+
434
static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
26
{
435
{
27
TCGOpcode opc = op->opc;
436
switch (op) {
28
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
29
break;
30
}
31
32
- /* Propagate constants through copy operations and do constant
33
- folding. Constants will be substituted to arguments by register
34
- allocator where needed and possible. Also detect copies. */
35
+ /*
36
+ * Process each opcode.
37
+ * Sorted alphabetically by opcode as much as possible.
38
+ */
39
switch (opc) {
40
- CASE_OP_32_64_VEC(mov):
41
- done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
42
- break;
43
-
44
- default:
45
- break;
46
-
47
- /* ---------------------------------------------------------- */
48
- /* Sorted alphabetically by opcode as much as possible. */
49
-
50
CASE_OP_32_64_VEC(add):
51
done = fold_add(&ctx, op);
52
break;
53
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
54
case INDEX_op_mb:
55
done = fold_mb(&ctx, op);
56
break;
57
+ CASE_OP_32_64_VEC(mov):
58
+ done = fold_mov(&ctx, op);
59
+ break;
60
CASE_OP_32_64(movcond):
61
done = fold_movcond(&ctx, op);
62
break;
63
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
64
CASE_OP_32_64_VEC(xor):
65
done = fold_xor(&ctx, op);
66
break;
67
+ default:
68
+ break;
69
}
70
71
if (!done) {
72
--
437
--
73
2.25.1
438
2.43.0
74
439
75
440
diff view generated by jsdifflib
1
This will expose the variable to subroutines that
1
In the replacement, drop the TCGType - TCG_TYPE_V64 adjustment,
2
will be broken out of tcg_optimize.
2
except for the call to tcg_out_vec_op. Pass type to tcg_gen_op[1-6],
3
so that all integer opcodes gain the type.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/optimize.c | 11 ++++++-----
8
include/tcg/tcg.h | 2 +-
10
1 file changed, 6 insertions(+), 5 deletions(-)
9
tcg/tcg-internal.h | 13 ++---
10
tcg/optimize.c | 10 +---
11
tcg/tcg-op-ldst.c | 26 ++++++----
12
tcg/tcg-op-vec.c | 8 +--
13
tcg/tcg-op.c | 113 +++++++++++++++++++++++------------------
14
tcg/tcg.c | 11 ++--
15
docs/devel/tcg-ops.rst | 15 +++---
16
8 files changed, 105 insertions(+), 93 deletions(-)
11
17
18
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/tcg/tcg.h
21
+++ b/include/tcg/tcg.h
22
@@ -XXX,XX +XXX,XX @@ struct TCGOp {
23
#define TCGOP_CALLI(X) (X)->param1
24
#define TCGOP_CALLO(X) (X)->param2
25
26
-#define TCGOP_VECL(X) (X)->param1
27
+#define TCGOP_TYPE(X) (X)->param1
28
#define TCGOP_VECE(X) (X)->param2
29
30
/* Make sure operands fit in the bitfields above. */
31
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/tcg/tcg-internal.h
34
+++ b/tcg/tcg-internal.h
35
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind);
36
*/
37
TCGTemp *tcg_constant_internal(TCGType type, int64_t val);
38
39
-TCGOp *tcg_gen_op1(TCGOpcode, TCGArg);
40
-TCGOp *tcg_gen_op2(TCGOpcode, TCGArg, TCGArg);
41
-TCGOp *tcg_gen_op3(TCGOpcode, TCGArg, TCGArg, TCGArg);
42
-TCGOp *tcg_gen_op4(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg);
43
-TCGOp *tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
44
-TCGOp *tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
45
+TCGOp *tcg_gen_op1(TCGOpcode, TCGType, TCGArg);
46
+TCGOp *tcg_gen_op2(TCGOpcode, TCGType, TCGArg, TCGArg);
47
+TCGOp *tcg_gen_op3(TCGOpcode, TCGType, TCGArg, TCGArg, TCGArg);
48
+TCGOp *tcg_gen_op4(TCGOpcode, TCGType, TCGArg, TCGArg, TCGArg, TCGArg);
49
+TCGOp *tcg_gen_op5(TCGOpcode, TCGType, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
50
+TCGOp *tcg_gen_op6(TCGOpcode, TCGType, TCGArg, TCGArg,
51
+ TCGArg, TCGArg, TCGArg, TCGArg);
52
53
void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg);
54
void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg);
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
55
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
56
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
57
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
58
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
59
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
17
60
case TCG_TYPE_V64:
18
typedef struct OptContext {
61
case TCG_TYPE_V128:
19
TCGContext *tcg;
62
case TCG_TYPE_V256:
20
+ TCGOp *prev_mb;
63
- /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
21
TCGTempSet temps_used;
64
+ /* TCGOP_TYPE and TCGOP_VECE remain unchanged. */
22
} OptContext;
65
new_op = INDEX_op_mov_vec;
23
66
break;
24
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
67
default:
25
void tcg_optimize(TCGContext *s)
26
{
27
int nb_temps, nb_globals, i;
28
- TCGOp *op, *op_next, *prev_mb = NULL;
29
+ TCGOp *op, *op_next;
30
OptContext ctx = { .tcg = s };
31
32
/* Array VALS has an element for each temp.
33
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
68
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
69
copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
70
71
/* Pre-compute the type of the operation. */
72
- if (def->flags & TCG_OPF_VECTOR) {
73
- ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
74
- } else if (def->flags & TCG_OPF_64BIT) {
75
- ctx.type = TCG_TYPE_I64;
76
- } else {
77
- ctx.type = TCG_TYPE_I32;
78
- }
79
+ ctx.type = TCGOP_TYPE(op);
80
81
/*
82
* Process each opcode.
83
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
84
index XXXXXXX..XXXXXXX 100644
85
--- a/tcg/tcg-op-ldst.c
86
+++ b/tcg/tcg-op-ldst.c
87
@@ -XXX,XX +XXX,XX @@ static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
88
return op;
89
}
90
91
-static void gen_ldst(TCGOpcode opc, TCGTemp *vl, TCGTemp *vh,
92
+static void gen_ldst(TCGOpcode opc, TCGType type, TCGTemp *vl, TCGTemp *vh,
93
TCGTemp *addr, MemOpIdx oi)
94
{
95
if (TCG_TARGET_REG_BITS == 64 || tcg_ctx->addr_type == TCG_TYPE_I32) {
96
if (vh) {
97
- tcg_gen_op4(opc, temp_arg(vl), temp_arg(vh), temp_arg(addr), oi);
98
+ tcg_gen_op4(opc, type, temp_arg(vl), temp_arg(vh),
99
+ temp_arg(addr), oi);
100
} else {
101
- tcg_gen_op3(opc, temp_arg(vl), temp_arg(addr), oi);
102
+ tcg_gen_op3(opc, type, temp_arg(vl), temp_arg(addr), oi);
34
}
103
}
35
104
} else {
36
/* Eliminate duplicate and redundant fence instructions. */
105
/* See TCGV_LOW/HIGH. */
37
- if (prev_mb) {
106
@@ -XXX,XX +XXX,XX @@ static void gen_ldst(TCGOpcode opc, TCGTemp *vl, TCGTemp *vh,
38
+ if (ctx.prev_mb) {
107
TCGTemp *ah = addr + !HOST_BIG_ENDIAN;
39
switch (opc) {
108
40
case INDEX_op_mb:
109
if (vh) {
41
/* Merge two barriers of the same type into one,
110
- tcg_gen_op5(opc, temp_arg(vl), temp_arg(vh),
42
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
+ tcg_gen_op5(opc, type, temp_arg(vl), temp_arg(vh),
43
* barrier. This is stricter than specified but for
112
temp_arg(al), temp_arg(ah), oi);
44
* the purposes of TCG is better than not optimizing.
113
} else {
45
*/
114
- tcg_gen_op4(opc, temp_arg(vl), temp_arg(al), temp_arg(ah), oi);
46
- prev_mb->args[0] |= op->args[0];
115
+ tcg_gen_op4(opc, type, temp_arg(vl),
47
+ ctx.prev_mb->args[0] |= op->args[0];
116
+ temp_arg(al), temp_arg(ah), oi);
48
tcg_op_remove(s, op);
49
break;
50
51
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
52
case INDEX_op_qemu_st_i64:
53
case INDEX_op_call:
54
/* Opcodes that touch guest memory stop the optimization. */
55
- prev_mb = NULL;
56
+ ctx.prev_mb = NULL;
57
break;
58
}
59
} else if (opc == INDEX_op_mb) {
60
- prev_mb = op;
61
+ ctx.prev_mb = op;
62
}
117
}
63
}
118
}
64
}
119
}
120
@@ -XXX,XX +XXX,XX @@ static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 v, TCGTemp *addr, MemOpIdx oi)
121
if (TCG_TARGET_REG_BITS == 32) {
122
TCGTemp *vl = tcgv_i32_temp(TCGV_LOW(v));
123
TCGTemp *vh = tcgv_i32_temp(TCGV_HIGH(v));
124
- gen_ldst(opc, vl, vh, addr, oi);
125
+ gen_ldst(opc, TCG_TYPE_I64, vl, vh, addr, oi);
126
} else {
127
- gen_ldst(opc, tcgv_i64_temp(v), NULL, addr, oi);
128
+ gen_ldst(opc, TCG_TYPE_I64, tcgv_i64_temp(v), NULL, addr, oi);
129
}
130
}
131
132
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
133
} else {
134
opc = INDEX_op_qemu_ld_a64_i32;
135
}
136
- gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
137
+ gen_ldst(opc, TCG_TYPE_I32, tcgv_i32_temp(val), NULL, addr, oi);
138
plugin_gen_mem_callbacks_i32(val, copy_addr, addr, orig_oi,
139
QEMU_PLUGIN_MEM_R);
140
141
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
142
opc = INDEX_op_qemu_st_a64_i32;
143
}
144
}
145
- gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
146
+ gen_ldst(opc, TCG_TYPE_I32, tcgv_i32_temp(val), NULL, addr, oi);
147
plugin_gen_mem_callbacks_i32(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
148
149
if (swap) {
150
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
151
} else {
152
opc = INDEX_op_qemu_ld_a64_i128;
153
}
154
- gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
155
+ gen_ldst(opc, TCG_TYPE_I128, tcgv_i64_temp(lo),
156
+ tcgv_i64_temp(hi), addr, oi);
157
158
if (need_bswap) {
159
tcg_gen_bswap64_i64(lo, lo);
160
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
161
} else {
162
opc = INDEX_op_qemu_st_a64_i128;
163
}
164
- gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
165
+ gen_ldst(opc, TCG_TYPE_I128, tcgv_i64_temp(lo),
166
+ tcgv_i64_temp(hi), addr, oi);
167
168
if (need_bswap) {
169
tcg_temp_free_i64(lo);
170
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
171
index XXXXXXX..XXXXXXX 100644
172
--- a/tcg/tcg-op-vec.c
173
+++ b/tcg/tcg-op-vec.c
174
@@ -XXX,XX +XXX,XX @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list,
175
void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
176
{
177
TCGOp *op = tcg_emit_op(opc, 2);
178
- TCGOP_VECL(op) = type - TCG_TYPE_V64;
179
+ TCGOP_TYPE(op) = type;
180
TCGOP_VECE(op) = vece;
181
op->args[0] = r;
182
op->args[1] = a;
183
@@ -XXX,XX +XXX,XX @@ void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
184
TCGArg r, TCGArg a, TCGArg b)
185
{
186
TCGOp *op = tcg_emit_op(opc, 3);
187
- TCGOP_VECL(op) = type - TCG_TYPE_V64;
188
+ TCGOP_TYPE(op) = type;
189
TCGOP_VECE(op) = vece;
190
op->args[0] = r;
191
op->args[1] = a;
192
@@ -XXX,XX +XXX,XX @@ void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
193
TCGArg r, TCGArg a, TCGArg b, TCGArg c)
194
{
195
TCGOp *op = tcg_emit_op(opc, 4);
196
- TCGOP_VECL(op) = type - TCG_TYPE_V64;
197
+ TCGOP_TYPE(op) = type;
198
TCGOP_VECE(op) = vece;
199
op->args[0] = r;
200
op->args[1] = a;
201
@@ -XXX,XX +XXX,XX @@ void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
202
TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
203
{
204
TCGOp *op = tcg_emit_op(opc, 6);
205
- TCGOP_VECL(op) = type - TCG_TYPE_V64;
206
+ TCGOP_TYPE(op) = type;
207
TCGOP_VECE(op) = vece;
208
op->args[0] = r;
209
op->args[1] = a;
210
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
211
index XXXXXXX..XXXXXXX 100644
212
--- a/tcg/tcg-op.c
213
+++ b/tcg/tcg-op.c
214
@@ -XXX,XX +XXX,XX @@
215
*/
216
#define NI __attribute__((noinline))
217
218
-TCGOp * NI tcg_gen_op1(TCGOpcode opc, TCGArg a1)
219
+TCGOp * NI tcg_gen_op1(TCGOpcode opc, TCGType type, TCGArg a1)
220
{
221
TCGOp *op = tcg_emit_op(opc, 1);
222
+ TCGOP_TYPE(op) = type;
223
op->args[0] = a1;
224
return op;
225
}
226
227
-TCGOp * NI tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2)
228
+TCGOp * NI tcg_gen_op2(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2)
229
{
230
TCGOp *op = tcg_emit_op(opc, 2);
231
+ TCGOP_TYPE(op) = type;
232
op->args[0] = a1;
233
op->args[1] = a2;
234
return op;
235
}
236
237
-TCGOp * NI tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
238
+TCGOp * NI tcg_gen_op3(TCGOpcode opc, TCGType type, TCGArg a1,
239
+ TCGArg a2, TCGArg a3)
240
{
241
TCGOp *op = tcg_emit_op(opc, 3);
242
+ TCGOP_TYPE(op) = type;
243
op->args[0] = a1;
244
op->args[1] = a2;
245
op->args[2] = a3;
246
return op;
247
}
248
249
-TCGOp * NI tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2,
250
+TCGOp * NI tcg_gen_op4(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2,
251
TCGArg a3, TCGArg a4)
252
{
253
TCGOp *op = tcg_emit_op(opc, 4);
254
+ TCGOP_TYPE(op) = type;
255
op->args[0] = a1;
256
op->args[1] = a2;
257
op->args[2] = a3;
258
@@ -XXX,XX +XXX,XX @@ TCGOp * NI tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2,
259
return op;
260
}
261
262
-TCGOp * NI tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2,
263
+TCGOp * NI tcg_gen_op5(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2,
264
TCGArg a3, TCGArg a4, TCGArg a5)
265
{
266
TCGOp *op = tcg_emit_op(opc, 5);
267
+ TCGOP_TYPE(op) = type;
268
op->args[0] = a1;
269
op->args[1] = a2;
270
op->args[2] = a3;
271
@@ -XXX,XX +XXX,XX @@ TCGOp * NI tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2,
272
return op;
273
}
274
275
-TCGOp * NI tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
276
- TCGArg a4, TCGArg a5, TCGArg a6)
277
+TCGOp * NI tcg_gen_op6(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2,
278
+ TCGArg a3, TCGArg a4, TCGArg a5, TCGArg a6)
279
{
280
TCGOp *op = tcg_emit_op(opc, 6);
281
+ TCGOP_TYPE(op) = type;
282
op->args[0] = a1;
283
op->args[1] = a2;
284
op->args[2] = a3;
285
@@ -XXX,XX +XXX,XX @@ TCGOp * NI tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
286
# define DNI
287
#endif
288
289
-static void DNI tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 a1)
290
+static void DNI tcg_gen_op1_i32(TCGOpcode opc, TCGType type, TCGv_i32 a1)
291
{
292
- tcg_gen_op1(opc, tcgv_i32_arg(a1));
293
+ tcg_gen_op1(opc, type, tcgv_i32_arg(a1));
294
}
295
296
-static void DNI tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 a1)
297
+static void DNI tcg_gen_op1_i64(TCGOpcode opc, TCGType type, TCGv_i64 a1)
298
{
299
- tcg_gen_op1(opc, tcgv_i64_arg(a1));
300
+ tcg_gen_op1(opc, type, tcgv_i64_arg(a1));
301
}
302
303
-static TCGOp * DNI tcg_gen_op1i(TCGOpcode opc, TCGArg a1)
304
+static TCGOp * DNI tcg_gen_op1i(TCGOpcode opc, TCGType type, TCGArg a1)
305
{
306
- return tcg_gen_op1(opc, a1);
307
+ return tcg_gen_op1(opc, type, a1);
308
}
309
310
static void DNI tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2)
311
{
312
- tcg_gen_op2(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2));
313
+ tcg_gen_op2(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2));
314
}
315
316
static void DNI tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2)
317
{
318
- tcg_gen_op2(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2));
319
+ tcg_gen_op2(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2));
320
}
321
322
static void DNI tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 a1,
323
TCGv_i32 a2, TCGv_i32 a3)
324
{
325
- tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3));
326
+ tcg_gen_op3(opc, TCG_TYPE_I32, tcgv_i32_arg(a1),
327
+ tcgv_i32_arg(a2), tcgv_i32_arg(a3));
328
}
329
330
static void DNI tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 a1,
331
TCGv_i64 a2, TCGv_i64 a3)
332
{
333
- tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3));
334
+ tcg_gen_op3(opc, TCG_TYPE_I64, tcgv_i64_arg(a1),
335
+ tcgv_i64_arg(a2), tcgv_i64_arg(a3));
336
}
337
338
static void DNI tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 a1,
339
TCGv_i32 a2, TCGArg a3)
340
{
341
- tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3);
342
+ tcg_gen_op3(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3);
343
}
344
345
static void DNI tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 a1,
346
TCGv_i64 a2, TCGArg a3)
347
{
348
- tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3);
349
+ tcg_gen_op3(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3);
350
}
351
352
static void DNI tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val,
353
TCGv_ptr base, TCGArg offset)
354
{
355
- tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_ptr_arg(base), offset);
356
+ tcg_gen_op3(opc, TCG_TYPE_I32, tcgv_i32_arg(val),
357
+ tcgv_ptr_arg(base), offset);
358
}
359
360
static void DNI tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val,
361
TCGv_ptr base, TCGArg offset)
362
{
363
- tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_ptr_arg(base), offset);
364
+ tcg_gen_op3(opc, TCG_TYPE_I64, tcgv_i64_arg(val),
365
+ tcgv_ptr_arg(base), offset);
366
}
367
368
static void DNI tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
369
TCGv_i32 a3, TCGv_i32 a4)
370
{
371
- tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
372
+ tcg_gen_op4(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
373
tcgv_i32_arg(a3), tcgv_i32_arg(a4));
374
}
375
376
static void DNI tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
377
TCGv_i64 a3, TCGv_i64 a4)
378
{
379
- tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
380
+ tcg_gen_op4(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
381
tcgv_i64_arg(a3), tcgv_i64_arg(a4));
382
}
383
384
static void DNI tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
385
TCGv_i32 a3, TCGArg a4)
386
{
387
- tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
388
+ tcg_gen_op4(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
389
tcgv_i32_arg(a3), a4);
390
}
391
392
static void DNI tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
393
TCGv_i64 a3, TCGArg a4)
394
{
395
- tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
396
+ tcg_gen_op4(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
397
tcgv_i64_arg(a3), a4);
398
}
399
400
static TCGOp * DNI tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
401
TCGArg a3, TCGArg a4)
402
{
403
- return tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4);
404
+ return tcg_gen_op4(opc, TCG_TYPE_I32,
405
+ tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4);
406
}
407
408
static TCGOp * DNI tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
409
TCGArg a3, TCGArg a4)
410
{
411
- return tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4);
412
+ return tcg_gen_op4(opc, TCG_TYPE_I64,
413
+ tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4);
414
}
415
416
static void DNI tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
417
TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5)
418
{
419
- tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
420
+ tcg_gen_op5(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
421
tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5));
422
}
423
424
static void DNI tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
425
TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5)
426
{
427
- tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
428
+ tcg_gen_op5(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
429
tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5));
430
}
431
432
static void DNI tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
433
TCGv_i32 a3, TCGArg a4, TCGArg a5)
434
{
435
- tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
436
+ tcg_gen_op5(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
437
tcgv_i32_arg(a3), a4, a5);
438
}
439
440
static void DNI tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
441
TCGv_i64 a3, TCGArg a4, TCGArg a5)
442
{
443
- tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
444
+ tcg_gen_op5(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
445
tcgv_i64_arg(a3), a4, a5);
446
}
447
448
@@ -XXX,XX +XXX,XX @@ static void DNI tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
449
TCGv_i32 a3, TCGv_i32 a4,
450
TCGv_i32 a5, TCGv_i32 a6)
451
{
452
- tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
453
+ tcg_gen_op6(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
454
tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5),
455
tcgv_i32_arg(a6));
456
}
457
@@ -XXX,XX +XXX,XX @@ static void DNI tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
458
TCGv_i64 a3, TCGv_i64 a4,
459
TCGv_i64 a5, TCGv_i64 a6)
460
{
461
- tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
462
+ tcg_gen_op6(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
463
tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5),
464
tcgv_i64_arg(a6));
465
}
466
@@ -XXX,XX +XXX,XX @@ static void DNI tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
467
TCGv_i32 a3, TCGv_i32 a4,
468
TCGv_i32 a5, TCGArg a6)
469
{
470
- tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
471
+ tcg_gen_op6(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
472
tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), a6);
473
}
474
475
@@ -XXX,XX +XXX,XX @@ static void DNI tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
476
TCGv_i64 a3, TCGv_i64 a4,
477
TCGv_i64 a5, TCGArg a6)
478
{
479
- tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
480
+ tcg_gen_op6(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
481
tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), a6);
482
}
483
484
@@ -XXX,XX +XXX,XX @@ static TCGOp * DNI tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
485
TCGv_i32 a3, TCGv_i32 a4,
486
TCGArg a5, TCGArg a6)
487
{
488
- return tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
489
+ return tcg_gen_op6(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
490
tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5, a6);
491
}
492
493
@@ -XXX,XX +XXX,XX @@ static TCGOp * DNI tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
494
void gen_set_label(TCGLabel *l)
495
{
496
l->present = 1;
497
- tcg_gen_op1(INDEX_op_set_label, label_arg(l));
498
+ tcg_gen_op1(INDEX_op_set_label, 0, label_arg(l));
499
}
500
501
static void add_as_label_use(TCGLabel *l, TCGOp *op)
502
@@ -XXX,XX +XXX,XX @@ static void add_as_label_use(TCGLabel *l, TCGOp *op)
503
504
void tcg_gen_br(TCGLabel *l)
505
{
506
- add_as_label_use(l, tcg_gen_op1(INDEX_op_br, label_arg(l)));
507
+ add_as_label_use(l, tcg_gen_op1(INDEX_op_br, 0, label_arg(l)));
508
}
509
510
void tcg_gen_mb(TCGBar mb_type)
511
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mb(TCGBar mb_type)
512
#endif
513
514
if (parallel) {
515
- tcg_gen_op1(INDEX_op_mb, mb_type);
516
+ tcg_gen_op1(INDEX_op_mb, 0, mb_type);
517
}
518
}
519
520
void tcg_gen_plugin_cb(unsigned from)
521
{
522
- tcg_gen_op1(INDEX_op_plugin_cb, from);
523
+ tcg_gen_op1(INDEX_op_plugin_cb, 0, from);
524
}
525
526
void tcg_gen_plugin_mem_cb(TCGv_i64 addr, unsigned meminfo)
527
{
528
- tcg_gen_op2(INDEX_op_plugin_mem_cb, tcgv_i64_arg(addr), meminfo);
529
+ tcg_gen_op2(INDEX_op_plugin_mem_cb, 0, tcgv_i64_arg(addr), meminfo);
530
}
531
532
/* 32 bit ops */
533
534
void tcg_gen_discard_i32(TCGv_i32 arg)
535
{
536
- tcg_gen_op1_i32(INDEX_op_discard, arg);
537
+ tcg_gen_op1_i32(INDEX_op_discard, TCG_TYPE_I32, arg);
538
}
539
540
void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg)
541
@@ -XXX,XX +XXX,XX @@ void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset)
542
void tcg_gen_discard_i64(TCGv_i64 arg)
543
{
544
if (TCG_TARGET_REG_BITS == 64) {
545
- tcg_gen_op1_i64(INDEX_op_discard, arg);
546
+ tcg_gen_op1_i64(INDEX_op_discard, TCG_TYPE_I64, arg);
547
} else {
548
tcg_gen_discard_i32(TCGV_LOW(arg));
549
tcg_gen_discard_i32(TCGV_HIGH(arg));
550
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
551
if (TCG_TARGET_REG_BITS == 32) {
552
tcg_gen_mov_i32(ret, TCGV_LOW(arg));
553
} else if (TCG_TARGET_HAS_extr_i64_i32) {
554
- tcg_gen_op2(INDEX_op_extrl_i64_i32,
555
+ tcg_gen_op2(INDEX_op_extrl_i64_i32, TCG_TYPE_I32,
556
tcgv_i32_arg(ret), tcgv_i64_arg(arg));
557
} else {
558
tcg_gen_mov_i32(ret, (TCGv_i32)arg);
559
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
560
if (TCG_TARGET_REG_BITS == 32) {
561
tcg_gen_mov_i32(ret, TCGV_HIGH(arg));
562
} else if (TCG_TARGET_HAS_extr_i64_i32) {
563
- tcg_gen_op2(INDEX_op_extrh_i64_i32,
564
+ tcg_gen_op2(INDEX_op_extrh_i64_i32, TCG_TYPE_I32,
565
tcgv_i32_arg(ret), tcgv_i64_arg(arg));
566
} else {
567
TCGv_i64 t = tcg_temp_ebb_new_i64();
568
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
569
tcg_gen_mov_i32(TCGV_LOW(ret), arg);
570
tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
571
} else {
572
- tcg_gen_op2(INDEX_op_extu_i32_i64,
573
+ tcg_gen_op2(INDEX_op_extu_i32_i64, TCG_TYPE_I64,
574
tcgv_i64_arg(ret), tcgv_i32_arg(arg));
575
}
576
}
577
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
578
tcg_gen_mov_i32(TCGV_LOW(ret), arg);
579
tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
580
} else {
581
- tcg_gen_op2(INDEX_op_ext_i32_i64,
582
+ tcg_gen_op2(INDEX_op_ext_i32_i64, TCG_TYPE_I64,
583
tcgv_i64_arg(ret), tcgv_i32_arg(arg));
584
}
585
}
586
@@ -XXX,XX +XXX,XX @@ void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
587
tcg_debug_assert(idx == TB_EXIT_REQUESTED);
588
}
589
590
- tcg_gen_op1i(INDEX_op_exit_tb, val);
591
+ tcg_gen_op1i(INDEX_op_exit_tb, 0, val);
592
}
593
594
void tcg_gen_goto_tb(unsigned idx)
595
@@ -XXX,XX +XXX,XX @@ void tcg_gen_goto_tb(unsigned idx)
596
tcg_ctx->goto_tb_issue_mask |= 1 << idx;
597
#endif
598
plugin_gen_disable_mem_helpers();
599
- tcg_gen_op1i(INDEX_op_goto_tb, idx);
600
+ tcg_gen_op1i(INDEX_op_goto_tb, 0, idx);
601
}
602
603
void tcg_gen_lookup_and_goto_ptr(void)
604
@@ -XXX,XX +XXX,XX @@ void tcg_gen_lookup_and_goto_ptr(void)
605
plugin_gen_disable_mem_helpers();
606
ptr = tcg_temp_ebb_new_ptr();
607
gen_helper_lookup_tb_ptr(ptr, tcg_env);
608
- tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
609
+ tcg_gen_op1i(INDEX_op_goto_ptr, TCG_TYPE_PTR, tcgv_ptr_arg(ptr));
610
tcg_temp_free_ptr(ptr);
611
}
612
diff --git a/tcg/tcg.c b/tcg/tcg.c
613
index XXXXXXX..XXXXXXX 100644
614
--- a/tcg/tcg.c
615
+++ b/tcg/tcg.c
616
@@ -XXX,XX +XXX,XX @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
617
nb_cargs = def->nb_cargs;
618
619
if (def->flags & TCG_OPF_VECTOR) {
620
- col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
621
+ col += ne_fprintf(f, "v%d,e%d,",
622
+ 8 * tcg_type_size(TCGOP_TYPE(op)),
623
8 << TCGOP_VECE(op));
624
}
625
626
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
627
628
itype = its->type;
629
vece = TCGOP_VECE(op);
630
- vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
631
+ vtype = TCGOP_TYPE(op);
632
633
if (its->val_type == TEMP_VAL_CONST) {
634
/* Propagate constant via movi -> dupi. */
635
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
636
break;
637
default:
638
if (def->flags & TCG_OPF_VECTOR) {
639
- tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
640
- new_args, const_args);
641
+ tcg_out_vec_op(s, op->opc, TCGOP_TYPE(op) - TCG_TYPE_V64,
642
+ TCGOP_VECE(op), new_args, const_args);
643
} else {
644
tcg_out_op(s, op->opc, new_args, const_args);
645
}
646
@@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
647
{
648
const TCGLifeData arg_life = op->life;
649
TCGTemp *ots, *itsl, *itsh;
650
- TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
651
+ TCGType vtype = TCGOP_TYPE(op);
652
653
/* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
654
tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
655
diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst
656
index XXXXXXX..XXXXXXX 100644
657
--- a/docs/devel/tcg-ops.rst
658
+++ b/docs/devel/tcg-ops.rst
659
@@ -XXX,XX +XXX,XX @@ QEMU specific operations
660
Host vector operations
661
----------------------
662
663
-All of the vector ops have two parameters, ``TCGOP_VECL`` & ``TCGOP_VECE``.
664
-The former specifies the length of the vector in log2 64-bit units; the
665
-latter specifies the length of the element (if applicable) in log2 8-bit units.
666
-E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32.
667
+All of the vector ops have two parameters, ``TCGOP_TYPE`` & ``TCGOP_VECE``.
668
+The former specifies the length of the vector as a TCGType; the latter
669
+specifies the length of the element (if applicable) in log2 8-bit units.
670
671
.. list-table::
672
673
@@ -XXX,XX +XXX,XX @@ E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32.
674
675
* - dup_vec *v0*, *r1*
676
677
- - | Duplicate the low N bits of *r1* into VECL/VECE copies across *v0*.
678
+ - | Duplicate the low N bits of *r1* into TYPE/VECE copies across *v0*.
679
680
* - dupi_vec *v0*, *c*
681
682
@@ -XXX,XX +XXX,XX @@ E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32.
683
684
* - dup2_vec *v0*, *r1*, *r2*
685
686
- - | Duplicate *r2*:*r1* into VECL/64 copies across *v0*. This opcode is
687
+ - | Duplicate *r2*:*r1* into TYPE/64 copies across *v0*. This opcode is
688
only present for 32-bit hosts.
689
690
* - add_vec *v0*, *v1*, *v2*
691
@@ -XXX,XX +XXX,XX @@ E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32.
692
693
.. code-block:: c
694
695
- for (i = 0; i < VECL/VECE; ++i) {
696
+ for (i = 0; i < TYPE/VECE; ++i) {
697
v0[i] = v1[i] << s2;
698
}
699
700
@@ -XXX,XX +XXX,XX @@ E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32.
701
702
.. code-block:: c
703
704
- for (i = 0; i < VECL/VECE; ++i) {
705
+ for (i = 0; i < TYPE/VECE; ++i) {
706
v0[i] = v1[i] << v2[i];
707
}
708
65
--
709
--
66
2.25.1
710
2.43.0
67
711
68
712
diff view generated by jsdifflib
1
Recognize the constant function for remainder.
1
These are not particularly useful outside of optimization passes.
2
2
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
tcg/optimize.c | 6 +++++-
6
include/tcg/tcg.h | 4 ----
8
1 file changed, 5 insertions(+), 1 deletion(-)
7
tcg/tcg-internal.h | 5 +++++
8
2 files changed, 5 insertions(+), 4 deletions(-)
9
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
11
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
12
--- a/include/tcg/tcg.h
13
+++ b/tcg/optimize.c
13
+++ b/include/tcg/tcg.h
14
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ void tcg_gen_call7(void *func, TCGHelperInfo *, TCGTemp *ret,
15
15
16
static bool fold_remainder(OptContext *ctx, TCGOp *op)
16
TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs);
17
{
17
void tcg_op_remove(TCGContext *s, TCGOp *op);
18
- return fold_const2(ctx, op);
18
-TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op,
19
+ if (fold_const2(ctx, op) ||
19
- TCGOpcode opc, unsigned nargs);
20
+ fold_xx_to_i(ctx, op, 0)) {
20
-TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op,
21
+ return true;
21
- TCGOpcode opc, unsigned nargs);
22
+ }
22
23
+ return false;
23
/**
24
}
24
* tcg_remove_ops_after:
25
25
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
26
static bool fold_setcond(OptContext *ctx, TCGOp *op)
26
index XXXXXXX..XXXXXXX 100644
27
--- a/tcg/tcg-internal.h
28
+++ b/tcg/tcg-internal.h
29
@@ -XXX,XX +XXX,XX @@ void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg);
30
void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
31
TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e);
32
33
+TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op,
34
+ TCGOpcode opc, unsigned nargs);
35
+TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op,
36
+ TCGOpcode opc, unsigned nargs);
37
+
38
#endif /* TCG_INTERNAL_H */
27
--
39
--
28
2.25.1
40
2.43.0
29
41
30
42
diff view generated by jsdifflib
1
This puts the separate mb optimization into the same framework
1
Simplify use within the optimizers by defaulting the
2
as the others. While fold_qemu_{ld,st} are currently identical,
2
new opcode to the same type as the old opcode.
3
that won't last as more code gets moved.
4
3
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
tcg/optimize.c | 89 +++++++++++++++++++++++++++++---------------------
7
tcg/tcg.c | 4 ++++
10
1 file changed, 51 insertions(+), 38 deletions(-)
8
1 file changed, 4 insertions(+)
11
9
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
12
--- a/tcg/tcg.c
15
+++ b/tcg/optimize.c
13
+++ b/tcg/tcg.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
17
return true;
15
TCGOpcode opc, unsigned nargs)
16
{
17
TCGOp *new_op = tcg_op_alloc(opc, nargs);
18
+
19
+ TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op);
20
QTAILQ_INSERT_BEFORE(old_op, new_op, link);
21
return new_op;
18
}
22
}
19
23
@@ -XXX,XX +XXX,XX @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
20
+static bool fold_mb(OptContext *ctx, TCGOp *op)
24
TCGOpcode opc, unsigned nargs)
21
+{
25
{
22
+ /* Eliminate duplicate and redundant fence instructions. */
26
TCGOp *new_op = tcg_op_alloc(opc, nargs);
23
+ if (ctx->prev_mb) {
24
+ /*
25
+ * Merge two barriers of the same type into one,
26
+ * or a weaker barrier into a stronger one,
27
+ * or two weaker barriers into a stronger one.
28
+ * mb X; mb Y => mb X|Y
29
+ * mb; strl => mb; st
30
+ * ldaq; mb => ld; mb
31
+ * ldaq; strl => ld; mb; st
32
+ * Other combinations are also merged into a strong
33
+ * barrier. This is stricter than specified but for
34
+ * the purposes of TCG is better than not optimizing.
35
+ */
36
+ ctx->prev_mb->args[0] |= op->args[0];
37
+ tcg_op_remove(ctx->tcg, op);
38
+ } else {
39
+ ctx->prev_mb = op;
40
+ }
41
+ return true;
42
+}
43
+
27
+
44
+static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
28
+ TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op);
45
+{
29
QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
46
+ /* Opcodes that touch guest memory stop the mb optimization. */
30
return new_op;
47
+ ctx->prev_mb = NULL;
48
+ return false;
49
+}
50
+
51
+static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
52
+{
53
+ /* Opcodes that touch guest memory stop the mb optimization. */
54
+ ctx->prev_mb = NULL;
55
+ return false;
56
+}
57
+
58
/* Propagate constants and copies, fold constant expressions. */
59
void tcg_optimize(TCGContext *s)
60
{
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
}
63
break;
64
65
+ case INDEX_op_mb:
66
+ done = fold_mb(&ctx, op);
67
+ break;
68
+ case INDEX_op_qemu_ld_i32:
69
+ case INDEX_op_qemu_ld_i64:
70
+ done = fold_qemu_ld(&ctx, op);
71
+ break;
72
+ case INDEX_op_qemu_st_i32:
73
+ case INDEX_op_qemu_st8_i32:
74
+ case INDEX_op_qemu_st_i64:
75
+ done = fold_qemu_st(&ctx, op);
76
+ break;
77
+
78
default:
79
break;
80
}
81
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
82
if (!done) {
83
finish_folding(&ctx, op);
84
}
85
-
86
- /* Eliminate duplicate and redundant fence instructions. */
87
- if (ctx.prev_mb) {
88
- switch (opc) {
89
- case INDEX_op_mb:
90
- /* Merge two barriers of the same type into one,
91
- * or a weaker barrier into a stronger one,
92
- * or two weaker barriers into a stronger one.
93
- * mb X; mb Y => mb X|Y
94
- * mb; strl => mb; st
95
- * ldaq; mb => ld; mb
96
- * ldaq; strl => ld; mb; st
97
- * Other combinations are also merged into a strong
98
- * barrier. This is stricter than specified but for
99
- * the purposes of TCG is better than not optimizing.
100
- */
101
- ctx.prev_mb->args[0] |= op->args[0];
102
- tcg_op_remove(s, op);
103
- break;
104
-
105
- default:
106
- /* Opcodes that end the block stop the optimization. */
107
- if ((def->flags & TCG_OPF_BB_END) == 0) {
108
- break;
109
- }
110
- /* fallthru */
111
- case INDEX_op_qemu_ld_i32:
112
- case INDEX_op_qemu_ld_i64:
113
- case INDEX_op_qemu_st_i32:
114
- case INDEX_op_qemu_st8_i32:
115
- case INDEX_op_qemu_st_i64:
116
- /* Opcodes that touch guest memory stop the optimization. */
117
- ctx.prev_mb = NULL;
118
- break;
119
- }
120
- } else if (opc == INDEX_op_mb) {
121
- ctx.prev_mb = op;
122
- }
123
}
124
}
31
}
125
--
32
--
126
2.25.1
33
2.43.0
127
34
128
35
diff view generated by jsdifflib
New patch
1
To be used by some integer operations instead of,
2
or in addition to, a trailing constant argument.
1
3
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
include/tcg/tcg.h | 1 +
7
1 file changed, 1 insertion(+)
8
9
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
10
index XXXXXXX..XXXXXXX 100644
11
--- a/include/tcg/tcg.h
12
+++ b/include/tcg/tcg.h
13
@@ -XXX,XX +XXX,XX @@ struct TCGOp {
14
#define TCGOP_CALLO(X) (X)->param2
15
16
#define TCGOP_TYPE(X) (X)->param1
17
+#define TCGOP_FLAGS(X) (X)->param2
18
#define TCGOP_VECE(X) (X)->param2
19
20
/* Make sure operands fit in the bitfields above. */
21
--
22
2.43.0
diff view generated by jsdifflib
1
Recognize the identity function for division.
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
tcg/optimize.c | 6 +++++-
4
include/tcg/tcg.h | 7 ++++++-
9
1 file changed, 5 insertions(+), 1 deletion(-)
5
tcg/tcg.c | 11 +++++++----
6
2 files changed, 13 insertions(+), 5 deletions(-)
10
7
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
12
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
10
--- a/include/tcg/tcg.h
14
+++ b/tcg/optimize.c
11
+++ b/include/tcg/tcg.h
15
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
12
@@ -XXX,XX +XXX,XX @@ typedef struct TCGTargetOpDef {
16
13
const char *args_ct_str[TCG_MAX_OP_ARGS];
17
static bool fold_divide(OptContext *ctx, TCGOp *op)
14
} TCGTargetOpDef;
15
16
-bool tcg_op_supported(TCGOpcode op);
17
+/*
18
+ * tcg_op_supported:
19
+ * Query if @op, for @type and @flags, is supported by the host
20
+ * on which we are currently executing.
21
+ */
22
+bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags);
23
24
void tcg_gen_call0(void *func, TCGHelperInfo *, TCGTemp *ret);
25
void tcg_gen_call1(void *func, TCGHelperInfo *, TCGTemp *ret, TCGTemp *);
26
diff --git a/tcg/tcg.c b/tcg/tcg.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/tcg/tcg.c
29
+++ b/tcg/tcg.c
30
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcgv_i32_temp(TCGv_i32 v)
31
}
32
#endif /* CONFIG_DEBUG_TCG */
33
34
-/* Return true if OP may appear in the opcode stream.
35
- Test the runtime variable that controls each opcode. */
36
-bool tcg_op_supported(TCGOpcode op)
37
+/*
38
+ * Return true if OP may appear in the opcode stream with TYPE.
39
+ * Test the runtime variable that controls each opcode.
40
+ */
41
+bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
18
{
42
{
19
- return fold_const2(ctx, op);
43
const bool have_vec
20
+ if (fold_const2(ctx, op) ||
44
= TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
21
+ fold_xi_to_x(ctx, op, 1)) {
45
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
22
+ return true;
46
/* fall through */
23
+ }
47
default:
24
+ return false;
48
/* Sanity check that we've not introduced any unhandled opcodes. */
25
}
49
- tcg_debug_assert(tcg_op_supported(opc));
26
50
+ tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
27
static bool fold_dup(OptContext *ctx, TCGOp *op)
51
+ TCGOP_FLAGS(op)));
52
/* Note: in order to speed up the code, it would be much
53
faster to have specialized register allocator functions for
54
some common argument patterns */
28
--
55
--
29
2.25.1
56
2.43.0
30
57
31
58
diff view generated by jsdifflib
1
Pull the "op r, a, i => mov r, a" optimization into a function,
1
Rely on tcg-op-vec.c to expand the opcode if missing.
2
and use them in the outer-most logical operations.
3
2
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
tcg/optimize.c | 61 +++++++++++++++++++++-----------------------------
6
target/arm/tcg/translate-sve.c | 20 ++++----------------
8
1 file changed, 26 insertions(+), 35 deletions(-)
7
1 file changed, 4 insertions(+), 16 deletions(-)
9
8
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
11
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
11
--- a/target/arm/tcg/translate-sve.c
13
+++ b/tcg/optimize.c
12
+++ b/target/arm/tcg/translate-sve.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
13
@@ -XXX,XX +XXX,XX @@ static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
15
return false;
14
static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
15
TCGv_vec m, TCGv_vec k)
16
{
17
- if (TCG_TARGET_HAS_bitsel_vec) {
18
- tcg_gen_not_vec(vece, n, n);
19
- tcg_gen_bitsel_vec(vece, d, k, n, m);
20
- } else {
21
- tcg_gen_andc_vec(vece, n, k, n);
22
- tcg_gen_andc_vec(vece, m, m, k);
23
- tcg_gen_or_vec(vece, d, n, m);
24
- }
25
+ tcg_gen_not_vec(vece, n, n);
26
+ tcg_gen_bitsel_vec(vece, d, k, n, m);
16
}
27
}
17
28
18
+/* If the binary operation has second argument @i, fold to identity. */
29
static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
19
+static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
30
@@ -XXX,XX +XXX,XX @@ static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
20
+{
31
static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
21
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
32
TCGv_vec m, TCGv_vec k)
22
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
23
+ }
24
+ return false;
25
+}
26
+
27
/* If the binary operation has second argument @i, fold to NOT. */
28
static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
29
{
33
{
30
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
34
- if (TCG_TARGET_HAS_bitsel_vec) {
31
35
- tcg_gen_not_vec(vece, m, m);
32
static bool fold_add(OptContext *ctx, TCGOp *op)
36
- tcg_gen_bitsel_vec(vece, d, k, n, m);
33
{
37
- } else {
34
- return fold_const2(ctx, op);
38
- tcg_gen_and_vec(vece, n, n, k);
35
+ if (fold_const2(ctx, op) ||
39
- tcg_gen_or_vec(vece, m, m, k);
36
+ fold_xi_to_x(ctx, op, 0)) {
40
- tcg_gen_orc_vec(vece, d, n, m);
37
+ return true;
41
- }
38
+ }
42
+ tcg_gen_not_vec(vece, m, m);
39
+ return false;
43
+ tcg_gen_bitsel_vec(vece, d, k, n, m);
40
}
44
}
41
45
42
static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
46
static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
43
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
44
{
45
if (fold_const2(ctx, op) ||
46
fold_xi_to_i(ctx, op, 0) ||
47
+ fold_xi_to_x(ctx, op, -1) ||
48
fold_xx_to_x(ctx, op)) {
49
return true;
50
}
51
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
52
{
53
if (fold_const2(ctx, op) ||
54
fold_xx_to_i(ctx, op, 0) ||
55
+ fold_xi_to_x(ctx, op, 0) ||
56
fold_ix_to_not(ctx, op, -1)) {
57
return true;
58
}
59
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
60
static bool fold_eqv(OptContext *ctx, TCGOp *op)
61
{
62
if (fold_const2(ctx, op) ||
63
+ fold_xi_to_x(ctx, op, -1) ||
64
fold_xi_to_not(ctx, op, 0)) {
65
return true;
66
}
67
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
68
static bool fold_or(OptContext *ctx, TCGOp *op)
69
{
70
if (fold_const2(ctx, op) ||
71
+ fold_xi_to_x(ctx, op, 0) ||
72
fold_xx_to_x(ctx, op)) {
73
return true;
74
}
75
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
76
static bool fold_orc(OptContext *ctx, TCGOp *op)
77
{
78
if (fold_const2(ctx, op) ||
79
+ fold_xi_to_x(ctx, op, -1) ||
80
fold_ix_to_not(ctx, op, 0)) {
81
return true;
82
}
83
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
84
85
static bool fold_shift(OptContext *ctx, TCGOp *op)
86
{
87
- return fold_const2(ctx, op);
88
+ if (fold_const2(ctx, op) ||
89
+ fold_xi_to_x(ctx, op, 0)) {
90
+ return true;
91
+ }
92
+ return false;
93
}
94
95
static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
96
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
97
{
98
if (fold_const2(ctx, op) ||
99
fold_xx_to_i(ctx, op, 0) ||
100
+ fold_xi_to_x(ctx, op, 0) ||
101
fold_sub_to_neg(ctx, op)) {
102
return true;
103
}
104
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
105
{
106
if (fold_const2(ctx, op) ||
107
fold_xx_to_i(ctx, op, 0) ||
108
+ fold_xi_to_x(ctx, op, 0) ||
109
fold_xi_to_not(ctx, op, -1)) {
110
return true;
111
}
112
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
113
break;
114
}
115
116
- /* Simplify expression for "op r, a, const => mov r, a" cases */
117
- switch (opc) {
118
- CASE_OP_32_64_VEC(add):
119
- CASE_OP_32_64_VEC(sub):
120
- CASE_OP_32_64_VEC(or):
121
- CASE_OP_32_64_VEC(xor):
122
- CASE_OP_32_64_VEC(andc):
123
- CASE_OP_32_64(shl):
124
- CASE_OP_32_64(shr):
125
- CASE_OP_32_64(sar):
126
- CASE_OP_32_64(rotl):
127
- CASE_OP_32_64(rotr):
128
- if (!arg_is_const(op->args[1])
129
- && arg_is_const(op->args[2])
130
- && arg_info(op->args[2])->val == 0) {
131
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
132
- continue;
133
- }
134
- break;
135
- CASE_OP_32_64_VEC(and):
136
- CASE_OP_32_64_VEC(orc):
137
- CASE_OP_32_64(eqv):
138
- if (!arg_is_const(op->args[1])
139
- && arg_is_const(op->args[2])
140
- && arg_info(op->args[2])->val == -1) {
141
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
142
- continue;
143
- }
144
- break;
145
- default:
146
- break;
147
- }
148
-
149
/* Simplify using known-zero bits. Currently only ops with a single
150
output argument is supported. */
151
z_mask = -1;
152
--
47
--
153
2.25.1
48
2.43.0
154
49
155
50
diff view generated by jsdifflib
1
Return -1 instead of 2 for failure, so that we can
1
Do not reference TCG_TARGET_HAS_* directly.
2
use comparisons against 0 for all cases.
3
2
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
tcg/optimize.c | 145 +++++++++++++++++++++++++------------------------
6
target/arm/tcg/translate-a64.c | 10 ++++++----
9
1 file changed, 74 insertions(+), 71 deletions(-)
7
target/arm/tcg/translate-sve.c | 2 +-
8
target/arm/tcg/translate.c | 2 +-
9
3 files changed, 8 insertions(+), 6 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/target/arm/tcg/translate-a64.c
14
+++ b/tcg/optimize.c
14
+++ b/target/arm/tcg/translate-a64.c
15
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
15
@@ -XXX,XX +XXX,XX @@ static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
16
}
16
TCGv_i64 tcg_rn, tcg_y;
17
}
17
DisasCompare c;
18
18
unsigned nzcv;
19
-/* Return 2 if the condition can't be simplified, and the result
19
+ bool has_andc;
20
- of the condition (0 or 1) if it can */
20
21
-static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
21
/* Set T0 = !COND. */
22
- TCGArg y, TCGCond c)
22
arm_test_cc(&c, a->cond);
23
+/*
23
@@ -XXX,XX +XXX,XX @@ static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
24
+ * Return -1 if the condition can't be simplified,
24
tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
25
+ * and the result of the condition (0 or 1) if it can.
25
26
+ */
26
nzcv = a->nzcv;
27
+static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
27
+ has_andc = tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0);
28
+ TCGArg y, TCGCond c)
28
if (nzcv & 8) { /* N */
29
{
29
tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
30
uint64_t xv = arg_info(x)->val;
30
} else {
31
uint64_t yv = arg_info(y)->val;
31
- if (TCG_TARGET_HAS_andc_i32) {
32
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
32
+ if (has_andc) {
33
case TCG_COND_GEU:
33
tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
34
return 1;
34
} else {
35
default:
35
tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
36
- return 2;
37
+ return -1;
38
}
36
}
39
}
37
}
40
- return 2;
38
if (nzcv & 4) { /* Z */
41
+ return -1;
39
- if (TCG_TARGET_HAS_andc_i32) {
42
}
40
+ if (has_andc) {
43
41
tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
44
-/* Return 2 if the condition can't be simplified, and the result
42
} else {
45
- of the condition (0 or 1) if it can */
43
tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
46
-static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
44
@@ -XXX,XX +XXX,XX @@ static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
47
+/*
45
if (nzcv & 2) { /* C */
48
+ * Return -1 if the condition can't be simplified,
46
tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
49
+ * and the result of the condition (0 or 1) if it can.
47
} else {
50
+ */
48
- if (TCG_TARGET_HAS_andc_i32) {
51
+static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
49
+ if (has_andc) {
50
tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
51
} else {
52
tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
53
@@ -XXX,XX +XXX,XX @@ static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
54
if (nzcv & 1) { /* V */
55
tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
56
} else {
57
- if (TCG_TARGET_HAS_andc_i32) {
58
+ if (has_andc) {
59
tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
60
} else {
61
tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
62
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/arm/tcg/translate-sve.c
65
+++ b/target/arm/tcg/translate-sve.c
66
@@ -XXX,XX +XXX,XX @@ static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
67
* = | ~(m | k)
68
*/
69
tcg_gen_and_i64(n, n, k);
70
- if (TCG_TARGET_HAS_orc_i64) {
71
+ if (tcg_op_supported(INDEX_op_orc_i64, TCG_TYPE_I64, 0)) {
72
tcg_gen_or_i64(m, m, k);
73
tcg_gen_orc_i64(d, n, m);
74
} else {
75
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/arm/tcg/translate.c
78
+++ b/target/arm/tcg/translate.c
79
@@ -XXX,XX +XXX,XX @@ static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
80
static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
52
{
81
{
53
TCGArg al = p1[0], ah = p1[1];
82
TCGv_i32 tmp = tcg_temp_new_i32();
54
TCGArg bl = p2[0], bh = p2[1];
83
- if (TCG_TARGET_HAS_add2_i32) {
55
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
84
+ if (tcg_op_supported(INDEX_op_add2_i32, TCG_TYPE_I32, 0)) {
56
if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
85
tcg_gen_movi_i32(tmp, 0);
57
return do_constant_folding_cond_eq(c);
86
tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
58
}
87
tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
59
- return 2;
60
+ return -1;
61
}
62
63
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
64
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
65
break;
66
67
CASE_OP_32_64(setcond):
68
- tmp = do_constant_folding_cond(opc, op->args[1],
69
- op->args[2], op->args[3]);
70
- if (tmp != 2) {
71
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
72
+ i = do_constant_folding_cond(opc, op->args[1],
73
+ op->args[2], op->args[3]);
74
+ if (i >= 0) {
75
+ tcg_opt_gen_movi(&ctx, op, op->args[0], i);
76
continue;
77
}
78
break;
79
80
CASE_OP_32_64(brcond):
81
- tmp = do_constant_folding_cond(opc, op->args[0],
82
- op->args[1], op->args[2]);
83
- switch (tmp) {
84
- case 0:
85
+ i = do_constant_folding_cond(opc, op->args[0],
86
+ op->args[1], op->args[2]);
87
+ if (i == 0) {
88
tcg_op_remove(s, op);
89
continue;
90
- case 1:
91
+ } else if (i > 0) {
92
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
93
op->opc = opc = INDEX_op_br;
94
op->args[0] = op->args[3];
95
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
96
break;
97
98
CASE_OP_32_64(movcond):
99
- tmp = do_constant_folding_cond(opc, op->args[1],
100
- op->args[2], op->args[5]);
101
- if (tmp != 2) {
102
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
103
+ i = do_constant_folding_cond(opc, op->args[1],
104
+ op->args[2], op->args[5]);
105
+ if (i >= 0) {
106
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
107
continue;
108
}
109
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
break;
112
113
case INDEX_op_brcond2_i32:
114
- tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
115
- op->args[4]);
116
- if (tmp == 0) {
117
+ i = do_constant_folding_cond2(&op->args[0], &op->args[2],
118
+ op->args[4]);
119
+ if (i == 0) {
120
do_brcond_false:
121
tcg_op_remove(s, op);
122
continue;
123
}
124
- if (tmp == 1) {
125
+ if (i > 0) {
126
do_brcond_true:
127
op->opc = opc = INDEX_op_br;
128
op->args[0] = op->args[5];
129
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
130
if (op->args[4] == TCG_COND_EQ) {
131
/* Simplify EQ comparisons where one of the pairs
132
can be simplified. */
133
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
134
- op->args[0], op->args[2],
135
- TCG_COND_EQ);
136
- if (tmp == 0) {
137
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
138
+ op->args[0], op->args[2],
139
+ TCG_COND_EQ);
140
+ if (i == 0) {
141
goto do_brcond_false;
142
- } else if (tmp == 1) {
143
+ } else if (i > 0) {
144
goto do_brcond_high;
145
}
146
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
147
- op->args[1], op->args[3],
148
- TCG_COND_EQ);
149
- if (tmp == 0) {
150
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
151
+ op->args[1], op->args[3],
152
+ TCG_COND_EQ);
153
+ if (i == 0) {
154
goto do_brcond_false;
155
- } else if (tmp != 1) {
156
+ } else if (i < 0) {
157
break;
158
}
159
do_brcond_low:
160
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
161
if (op->args[4] == TCG_COND_NE) {
162
/* Simplify NE comparisons where one of the pairs
163
can be simplified. */
164
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
165
- op->args[0], op->args[2],
166
- TCG_COND_NE);
167
- if (tmp == 0) {
168
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
169
+ op->args[0], op->args[2],
170
+ TCG_COND_NE);
171
+ if (i == 0) {
172
goto do_brcond_high;
173
- } else if (tmp == 1) {
174
+ } else if (i > 0) {
175
goto do_brcond_true;
176
}
177
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
178
- op->args[1], op->args[3],
179
- TCG_COND_NE);
180
- if (tmp == 0) {
181
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
182
+ op->args[1], op->args[3],
183
+ TCG_COND_NE);
184
+ if (i == 0) {
185
goto do_brcond_low;
186
- } else if (tmp == 1) {
187
+ } else if (i > 0) {
188
goto do_brcond_true;
189
}
190
}
191
break;
192
193
case INDEX_op_setcond2_i32:
194
- tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
195
- op->args[5]);
196
- if (tmp != 2) {
197
+ i = do_constant_folding_cond2(&op->args[1], &op->args[3],
198
+ op->args[5]);
199
+ if (i >= 0) {
200
do_setcond_const:
201
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
202
+ tcg_opt_gen_movi(&ctx, op, op->args[0], i);
203
continue;
204
}
205
if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
206
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
207
if (op->args[5] == TCG_COND_EQ) {
208
/* Simplify EQ comparisons where one of the pairs
209
can be simplified. */
210
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
211
- op->args[1], op->args[3],
212
- TCG_COND_EQ);
213
- if (tmp == 0) {
214
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
215
+ op->args[1], op->args[3],
216
+ TCG_COND_EQ);
217
+ if (i == 0) {
218
goto do_setcond_const;
219
- } else if (tmp == 1) {
220
+ } else if (i > 0) {
221
goto do_setcond_high;
222
}
223
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
224
- op->args[2], op->args[4],
225
- TCG_COND_EQ);
226
- if (tmp == 0) {
227
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
228
+ op->args[2], op->args[4],
229
+ TCG_COND_EQ);
230
+ if (i == 0) {
231
goto do_setcond_high;
232
- } else if (tmp != 1) {
233
+ } else if (i < 0) {
234
break;
235
}
236
do_setcond_low:
237
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
238
if (op->args[5] == TCG_COND_NE) {
239
/* Simplify NE comparisons where one of the pairs
240
can be simplified. */
241
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
242
- op->args[1], op->args[3],
243
- TCG_COND_NE);
244
- if (tmp == 0) {
245
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
246
+ op->args[1], op->args[3],
247
+ TCG_COND_NE);
248
+ if (i == 0) {
249
goto do_setcond_high;
250
- } else if (tmp == 1) {
251
+ } else if (i > 0) {
252
goto do_setcond_const;
253
}
254
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
255
- op->args[2], op->args[4],
256
- TCG_COND_NE);
257
- if (tmp == 0) {
258
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
259
+ op->args[2], op->args[4],
260
+ TCG_COND_NE);
261
+ if (i == 0) {
262
goto do_setcond_low;
263
- } else if (tmp == 1) {
264
+ } else if (i > 0) {
265
goto do_setcond_const;
266
}
267
}
268
--
88
--
269
2.25.1
89
2.43.0
270
90
271
91
diff view generated by jsdifflib
1
Recognize the identity function for low-part multiply.
1
Do not reference TCG_TARGET_HAS_* directly.
2
2
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
tcg/optimize.c | 3 ++-
6
target/tricore/translate.c | 4 ++--
9
1 file changed, 2 insertions(+), 1 deletion(-)
7
1 file changed, 2 insertions(+), 2 deletions(-)
10
8
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
11
--- a/target/tricore/translate.c
14
+++ b/tcg/optimize.c
12
+++ b/target/tricore/translate.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
13
@@ -XXX,XX +XXX,XX @@ static void decode_bit_andacc(DisasContext *ctx)
16
static bool fold_mul(OptContext *ctx, TCGOp *op)
14
pos1, pos2, &tcg_gen_andc_tl, &tcg_gen_and_tl);
17
{
15
break;
18
if (fold_const2(ctx, op) ||
16
case OPC2_32_BIT_AND_NOR_T:
19
- fold_xi_to_i(ctx, op, 0)) {
17
- if (TCG_TARGET_HAS_andc_i32) {
20
+ fold_xi_to_i(ctx, op, 0) ||
18
+ if (tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0)) {
21
+ fold_xi_to_x(ctx, op, 1)) {
19
gen_bit_2op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2],
22
return true;
20
pos1, pos2, &tcg_gen_or_tl, &tcg_gen_andc_tl);
23
}
21
} else {
24
return false;
22
@@ -XXX,XX +XXX,XX @@ static void decode_bit_orand(DisasContext *ctx)
23
pos1, pos2, &tcg_gen_andc_tl, &tcg_gen_or_tl);
24
break;
25
case OPC2_32_BIT_OR_NOR_T:
26
- if (TCG_TARGET_HAS_orc_i32) {
27
+ if (tcg_op_supported(INDEX_op_orc_i32, TCG_TYPE_I32, 0)) {
28
gen_bit_2op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2],
29
pos1, pos2, &tcg_gen_or_tl, &tcg_gen_orc_tl);
30
} else {
25
--
31
--
26
2.25.1
32
2.43.0
27
33
28
34
diff view generated by jsdifflib
1
Even though there is only one user, place this more complex
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
conversion into its own helper.
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
3
---
7
tcg/optimize.c | 89 ++++++++++++++++++++++++++------------------------
4
include/tcg/tcg.h | 6 ++++++
8
1 file changed, 47 insertions(+), 42 deletions(-)
5
tcg/tcg.c | 21 +++++++++++++++++++++
6
2 files changed, 27 insertions(+)
9
7
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
11
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
10
--- a/include/tcg/tcg.h
13
+++ b/tcg/optimize.c
11
+++ b/include/tcg/tcg.h
14
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
12
@@ -XXX,XX +XXX,XX @@ typedef struct TCGTargetOpDef {
15
13
* on which we are currently executing.
16
static bool fold_neg(OptContext *ctx, TCGOp *op)
14
*/
17
{
15
bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags);
18
- return fold_const1(ctx, op);
16
+/*
19
+ if (fold_const1(ctx, op)) {
17
+ * tcg_op_deposit_valid:
20
+ return true;
18
+ * Query if a deposit into (ofs, len) is supported for @type by
21
+ }
19
+ * the host on which we are currently executing.
22
+ /*
20
+ */
23
+ * Because of fold_sub_to_neg, we want to always return true,
21
+bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len);
24
+ * via finish_folding.
22
25
+ */
23
void tcg_gen_call0(void *func, TCGHelperInfo *, TCGTemp *ret);
26
+ finish_folding(ctx, op);
24
void tcg_gen_call1(void *func, TCGHelperInfo *, TCGTemp *ret, TCGTemp *);
27
+ return true;
25
diff --git a/tcg/tcg.c b/tcg/tcg.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/tcg/tcg.c
28
+++ b/tcg/tcg.c
29
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
30
}
28
}
31
}
29
32
30
static bool fold_nor(OptContext *ctx, TCGOp *op)
33
+bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
31
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
32
return fold_const2(ctx, op);
33
}
34
35
+static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
36
+{
34
+{
37
+ TCGOpcode neg_op;
35
+ tcg_debug_assert(len > 0);
38
+ bool have_neg;
36
+ switch (type) {
39
+
40
+ if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
41
+ return false;
42
+ }
43
+
44
+ switch (ctx->type) {
45
+ case TCG_TYPE_I32:
37
+ case TCG_TYPE_I32:
46
+ neg_op = INDEX_op_neg_i32;
38
+ tcg_debug_assert(ofs < 32);
47
+ have_neg = TCG_TARGET_HAS_neg_i32;
39
+ tcg_debug_assert(len <= 32);
48
+ break;
40
+ tcg_debug_assert(ofs + len <= 32);
41
+ return TCG_TARGET_HAS_deposit_i32 &&
42
+ TCG_TARGET_deposit_i32_valid(ofs, len);
49
+ case TCG_TYPE_I64:
43
+ case TCG_TYPE_I64:
50
+ neg_op = INDEX_op_neg_i64;
44
+ tcg_debug_assert(ofs < 64);
51
+ have_neg = TCG_TARGET_HAS_neg_i64;
45
+ tcg_debug_assert(len <= 64);
52
+ break;
46
+ tcg_debug_assert(ofs + len <= 64);
53
+ case TCG_TYPE_V64:
47
+ return TCG_TARGET_HAS_deposit_i64 &&
54
+ case TCG_TYPE_V128:
48
+ TCG_TARGET_deposit_i64_valid(ofs, len);
55
+ case TCG_TYPE_V256:
56
+ neg_op = INDEX_op_neg_vec;
57
+ have_neg = (TCG_TARGET_HAS_neg_vec &&
58
+ tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
59
+ break;
60
+ default:
49
+ default:
61
+ g_assert_not_reached();
50
+ g_assert_not_reached();
62
+ }
51
+ }
63
+ if (have_neg) {
64
+ op->opc = neg_op;
65
+ op->args[1] = op->args[2];
66
+ return fold_neg(ctx, op);
67
+ }
68
+ return false;
69
+}
52
+}
70
+
53
+
71
static bool fold_sub(OptContext *ctx, TCGOp *op)
54
static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
72
{
55
73
if (fold_const2(ctx, op) ||
56
static void tcg_gen_callN(void *func, TCGHelperInfo *info,
74
- fold_xx_to_i(ctx, op, 0)) {
75
+ fold_xx_to_i(ctx, op, 0) ||
76
+ fold_sub_to_neg(ctx, op)) {
77
return true;
78
}
79
return false;
80
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
81
continue;
82
}
83
break;
84
- CASE_OP_32_64_VEC(sub):
85
- {
86
- TCGOpcode neg_op;
87
- bool have_neg;
88
-
89
- if (arg_is_const(op->args[2])) {
90
- /* Proceed with possible constant folding. */
91
- break;
92
- }
93
- switch (ctx.type) {
94
- case TCG_TYPE_I32:
95
- neg_op = INDEX_op_neg_i32;
96
- have_neg = TCG_TARGET_HAS_neg_i32;
97
- break;
98
- case TCG_TYPE_I64:
99
- neg_op = INDEX_op_neg_i64;
100
- have_neg = TCG_TARGET_HAS_neg_i64;
101
- break;
102
- case TCG_TYPE_V64:
103
- case TCG_TYPE_V128:
104
- case TCG_TYPE_V256:
105
- neg_op = INDEX_op_neg_vec;
106
- have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
107
- TCGOP_VECE(op)) > 0;
108
- break;
109
- default:
110
- g_assert_not_reached();
111
- }
112
- if (!have_neg) {
113
- break;
114
- }
115
- if (arg_is_const(op->args[1])
116
- && arg_info(op->args[1])->val == 0) {
117
- op->opc = neg_op;
118
- reset_temp(op->args[0]);
119
- op->args[1] = op->args[2];
120
- continue;
121
- }
122
- }
123
- break;
124
default:
125
break;
126
}
127
--
57
--
128
2.25.1
58
2.43.0
129
59
130
60
diff view generated by jsdifflib
1
Recognize the constant function for or-complement.
1
This macro is unused.
2
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
tcg/optimize.c | 1 +
6
target/i386/tcg/emit.c.inc | 2 --
9
1 file changed, 1 insertion(+)
7
1 file changed, 2 deletions(-)
10
8
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
11
--- a/target/i386/tcg/emit.c.inc
14
+++ b/tcg/optimize.c
12
+++ b/target/i386/tcg/emit.c.inc
15
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
13
@@ -XXX,XX +XXX,XX @@
16
static bool fold_orc(OptContext *ctx, TCGOp *op)
14
#ifdef TARGET_X86_64
17
{
15
#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i64
18
if (fold_const2(ctx, op) ||
16
#define TCG_TARGET_deposit_tl_valid TCG_TARGET_deposit_i64_valid
19
+ fold_xx_to_i(ctx, op, -1) ||
17
-#define TCG_TARGET_extract_tl_valid TCG_TARGET_extract_i64_valid
20
fold_xi_to_x(ctx, op, -1) ||
18
#else
21
fold_ix_to_not(ctx, op, 0)) {
19
#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i32
22
return true;
20
#define TCG_TARGET_deposit_tl_valid TCG_TARGET_deposit_i32_valid
21
-#define TCG_TARGET_extract_tl_valid TCG_TARGET_extract_i32_valid
22
#endif
23
24
#define MMX_OFFSET(reg) \
23
--
25
--
24
2.25.1
26
2.43.0
25
27
26
28
diff view generated by jsdifflib
1
The result is either 0 or 1, which means that we have
1
Avoid direct usage of TCG_TARGET_deposit_*_valid.
2
a 2 bit signed result, and thus 62 bits of sign.
3
For clarity, use the smask_from_zmask function.
4
2
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
tcg/optimize.c | 2 ++
6
target/i386/tcg/emit.c.inc | 6 ++----
10
1 file changed, 2 insertions(+)
7
1 file changed, 2 insertions(+), 4 deletions(-)
11
8
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
11
--- a/target/i386/tcg/emit.c.inc
15
+++ b/tcg/optimize.c
12
+++ b/target/i386/tcg/emit.c.inc
16
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
13
@@ -XXX,XX +XXX,XX @@
14
*/
15
#ifdef TARGET_X86_64
16
#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i64
17
-#define TCG_TARGET_deposit_tl_valid TCG_TARGET_deposit_i64_valid
18
#else
19
#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i32
20
-#define TCG_TARGET_deposit_tl_valid TCG_TARGET_deposit_i32_valid
21
#endif
22
23
#define MMX_OFFSET(reg) \
24
@@ -XXX,XX +XXX,XX @@ static void gen_RCL(DisasContext *s, X86DecodedInsn *decode)
17
}
25
}
18
26
19
ctx->z_mask = 1;
27
/* Compute high part, including incoming carry. */
20
+ ctx->s_mask = smask_from_zmask(1);
28
- if (!have_1bit_cin || TCG_TARGET_deposit_tl_valid(1, TARGET_LONG_BITS - 1)) {
21
return false;
29
+ if (!have_1bit_cin || tcg_op_deposit_valid(TCG_TYPE_TL, 1, TARGET_LONG_BITS - 1)) {
22
}
30
/* high = (T0 << 1) | cin */
23
31
TCGv cin = have_1bit_cin ? decode->cc_dst : decode->cc_src;
24
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
32
tcg_gen_deposit_tl(high, cin, s->T0, 1, TARGET_LONG_BITS - 1);
33
@@ -XXX,XX +XXX,XX @@ static void gen_RCR(DisasContext *s, X86DecodedInsn *decode)
25
}
34
}
26
35
27
ctx->z_mask = 1;
36
/* Save incoming carry into high, it will be shifted later. */
28
+ ctx->s_mask = smask_from_zmask(1);
37
- if (!have_1bit_cin || TCG_TARGET_deposit_tl_valid(1, TARGET_LONG_BITS - 1)) {
29
return false;
38
+ if (!have_1bit_cin || tcg_op_deposit_valid(TCG_TYPE_TL, 1, TARGET_LONG_BITS - 1)) {
30
39
TCGv cin = have_1bit_cin ? decode->cc_dst : decode->cc_src;
31
do_setcond_const:
40
tcg_gen_deposit_tl(high, cin, s->T0, 1, TARGET_LONG_BITS - 1);
41
} else {
32
--
42
--
33
2.25.1
43
2.43.0
34
44
35
45
diff view generated by jsdifflib
1
This "garbage" setting pre-dates the addition of the type
1
Do not reference TCG_TARGET_HAS_* directly.
2
changing opcodes INDEX_op_ext_i32_i64, INDEX_op_extu_i32_i64,
3
and INDEX_op_extr{l,h}_i64_i32.
4
2
5
So now we have a definitive points at which to adjust z_mask
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
to eliminate such bits from the 32-bit operands.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
5
---
12
tcg/optimize.c | 35 ++++++++++++++++-------------------
6
target/i386/tcg/emit.c.inc | 6 +++---
13
1 file changed, 16 insertions(+), 19 deletions(-)
7
1 file changed, 3 insertions(+), 3 deletions(-)
14
8
15
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
16
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/optimize.c
11
--- a/target/i386/tcg/emit.c.inc
18
+++ b/tcg/optimize.c
12
+++ b/target/i386/tcg/emit.c.inc
19
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
13
@@ -XXX,XX +XXX,XX @@
20
ti->is_const = true;
14
* The exact opcode to check depends on 32- vs. 64-bit.
21
ti->val = ts->val;
15
*/
22
ti->z_mask = ts->val;
16
#ifdef TARGET_X86_64
23
- if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
17
-#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i64
24
- /* High bits of a 32-bit quantity are garbage. */
18
+#define INDEX_op_extract2_tl INDEX_op_extract2_i64
25
- ti->z_mask |= ~0xffffffffull;
19
#else
26
- }
20
-#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i32
27
} else {
21
+#define INDEX_op_extract2_tl INDEX_op_extract2_i32
28
ti->is_const = false;
22
#endif
29
ti->z_mask = -1;
23
30
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
24
#define MMX_OFFSET(reg) \
31
TCGTemp *src_ts = arg_temp(src);
25
@@ -XXX,XX +XXX,XX @@ static void gen_PMOVMSKB(DisasContext *s, X86DecodedInsn *decode)
32
TempOptInfo *di;
26
tcg_gen_ld8u_tl(s->T0, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1)));
33
TempOptInfo *si;
27
while (vec_len > 8) {
34
- uint64_t z_mask;
28
vec_len -= 8;
35
TCGOpcode new_op;
29
- if (TCG_TARGET_HAS_extract2_tl) {
36
30
+ if (tcg_op_supported(INDEX_op_extract2_tl, TCG_TYPE_TL, 0)) {
37
if (ts_are_copies(dst_ts, src_ts)) {
31
/*
38
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
32
* Load the next byte of the result into the high byte of T.
39
op->args[0] = dst;
33
* TCG does a similar expansion of deposit to shl+extract2; by
40
op->args[1] = src;
41
42
- z_mask = si->z_mask;
43
- if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
44
- /* High bits of the destination are now garbage. */
45
- z_mask |= ~0xffffffffull;
46
- }
47
- di->z_mask = z_mask;
48
+ di->z_mask = si->z_mask;
49
50
if (src_ts->type == dst_ts->type) {
51
TempOptInfo *ni = ts_info(si->next_copy);
52
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
53
static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
54
TCGArg dst, uint64_t val)
55
{
56
- /* Convert movi to mov with constant temp. */
57
- TCGTemp *tv = tcg_constant_internal(ctx->type, val);
58
+ TCGTemp *tv;
59
60
+ if (ctx->type == TCG_TYPE_I32) {
61
+ val = (int32_t)val;
62
+ }
63
+
64
+ /* Convert movi to mov with constant temp. */
65
+ tv = tcg_constant_internal(ctx->type, val);
66
init_ts_info(ctx, tv);
67
return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
68
}
69
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
70
uint64_t z_mask = ctx->z_mask;
71
72
/*
73
- * 32-bit ops generate 32-bit results. For the result is zero test
74
- * below, we can ignore high bits, but for further optimizations we
75
- * need to record that the high bits contain garbage.
76
+ * 32-bit ops generate 32-bit results, which for the purpose of
77
+ * simplifying tcg are sign-extended. Certainly that's how we
78
+ * represent our constants elsewhere. Note that the bits will
79
+ * be reset properly for a 64-bit value when encountering the
80
+ * type changing opcodes.
81
*/
82
if (ctx->type == TCG_TYPE_I32) {
83
- ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
84
- a_mask &= MAKE_64BIT_MASK(0, 32);
85
- z_mask &= MAKE_64BIT_MASK(0, 32);
86
+ a_mask = (int32_t)a_mask;
87
+ z_mask = (int32_t)z_mask;
88
+ ctx->z_mask = z_mask;
89
}
90
91
if (z_mask == 0) {
92
--
34
--
93
2.25.1
35
2.43.0
94
36
95
37
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
Make these features unconditional, as they're used by most
2
tcg backends anyway. Merge tcg-ldst.c.inc and tcg-pool.c.inc
3
into tcg.c and mark some of the functions unused, so that
4
when the features are not used we won't get Werrors.
2
5
3
Move udiv_qrnnd() from include/fpu/softfloat-macros.h to host-utils,
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
so it can be reused by divu128().
5
6
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20211025191154.350831-3-luis.pires@eldorado.org.br>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
8
---
11
include/fpu/softfloat-macros.h | 82 ----------------------------------
9
include/tcg/tcg.h | 4 -
12
include/qemu/host-utils.h | 81 +++++++++++++++++++++++++++++++++
10
tcg/aarch64/tcg-target.h | 2 -
13
2 files changed, 81 insertions(+), 82 deletions(-)
11
tcg/arm/tcg-target.h | 2 -
12
tcg/i386/tcg-target.h | 2 -
13
tcg/loongarch64/tcg-target.h | 2 -
14
tcg/mips/tcg-target.h | 2 -
15
tcg/ppc/tcg-target.h | 2 -
16
tcg/riscv/tcg-target.h | 3 -
17
tcg/s390x/tcg-target.h | 2 -
18
tcg/sparc64/tcg-target.h | 2 -
19
tcg/tcg.c | 211 +++++++++++++++++++++++++++++--
20
tcg/aarch64/tcg-target.c.inc | 2 -
21
tcg/arm/tcg-target.c.inc | 2 -
22
tcg/i386/tcg-target.c.inc | 3 -
23
tcg/loongarch64/tcg-target.c.inc | 9 +-
24
tcg/mips/tcg-target.c.inc | 3 -
25
tcg/ppc/tcg-target.c.inc | 2 -
26
tcg/riscv/tcg-target.c.inc | 3 -
27
tcg/s390x/tcg-target.c.inc | 2 -
28
tcg/sparc64/tcg-target.c.inc | 3 -
29
tcg/tcg-ldst.c.inc | 65 ----------
30
tcg/tcg-pool.c.inc | 162 ------------------------
31
tcg/tci/tcg-target.c.inc | 12 +-
32
23 files changed, 216 insertions(+), 286 deletions(-)
33
delete mode 100644 tcg/tcg-ldst.c.inc
34
delete mode 100644 tcg/tcg-pool.c.inc
14
35
15
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
36
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
16
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
17
--- a/include/fpu/softfloat-macros.h
38
--- a/include/tcg/tcg.h
18
+++ b/include/fpu/softfloat-macros.h
39
+++ b/include/tcg/tcg.h
19
@@ -XXX,XX +XXX,XX @@
40
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
20
* so some portions are provided under:
41
CPUState *cpu; /* *_trans */
21
* the SoftFloat-2a license
42
22
* the BSD license
43
/* These structures are private to tcg-target.c.inc. */
23
- * GPL-v2-or-later
44
-#ifdef TCG_TARGET_NEED_LDST_LABELS
24
*
45
QSIMPLEQ_HEAD(, TCGLabelQemuLdst) ldst_labels;
25
* Any future contributions to this file after December 1st 2014 will be
46
-#endif
26
* taken to be licensed under the Softfloat-2a license unless specifically
47
-#ifdef TCG_TARGET_NEED_POOL_LABELS
27
@@ -XXX,XX +XXX,XX @@ this code that are retained.
48
struct TCGLabelPoolData *pool_labels;
28
* THE POSSIBILITY OF SUCH DAMAGE.
49
-#endif
50
51
TCGLabel *exitreq_label;
52
53
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
54
index XXXXXXX..XXXXXXX 100644
55
--- a/tcg/aarch64/tcg-target.h
56
+++ b/tcg/aarch64/tcg-target.h
57
@@ -XXX,XX +XXX,XX @@ typedef enum {
58
#define TCG_TARGET_HAS_tst_vec 1
59
60
#define TCG_TARGET_DEFAULT_MO (0)
61
-#define TCG_TARGET_NEED_LDST_LABELS
62
-#define TCG_TARGET_NEED_POOL_LABELS
63
64
#endif /* AARCH64_TCG_TARGET_H */
65
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
66
index XXXXXXX..XXXXXXX 100644
67
--- a/tcg/arm/tcg-target.h
68
+++ b/tcg/arm/tcg-target.h
69
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
70
#define TCG_TARGET_HAS_tst_vec 1
71
72
#define TCG_TARGET_DEFAULT_MO (0)
73
-#define TCG_TARGET_NEED_LDST_LABELS
74
-#define TCG_TARGET_NEED_POOL_LABELS
75
76
#endif
77
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
78
index XXXXXXX..XXXXXXX 100644
79
--- a/tcg/i386/tcg-target.h
80
+++ b/tcg/i386/tcg-target.h
81
@@ -XXX,XX +XXX,XX @@ typedef enum {
82
#include "tcg/tcg-mo.h"
83
84
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
85
-#define TCG_TARGET_NEED_LDST_LABELS
86
-#define TCG_TARGET_NEED_POOL_LABELS
87
88
#endif
89
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
90
index XXXXXXX..XXXXXXX 100644
91
--- a/tcg/loongarch64/tcg-target.h
92
+++ b/tcg/loongarch64/tcg-target.h
93
@@ -XXX,XX +XXX,XX @@ typedef enum {
94
95
#define TCG_TARGET_DEFAULT_MO (0)
96
97
-#define TCG_TARGET_NEED_LDST_LABELS
98
-
99
#endif /* LOONGARCH_TCG_TARGET_H */
100
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
101
index XXXXXXX..XXXXXXX 100644
102
--- a/tcg/mips/tcg-target.h
103
+++ b/tcg/mips/tcg-target.h
104
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
105
#define TCG_TARGET_HAS_tst 0
106
107
#define TCG_TARGET_DEFAULT_MO 0
108
-#define TCG_TARGET_NEED_LDST_LABELS
109
-#define TCG_TARGET_NEED_POOL_LABELS
110
111
#endif
112
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
113
index XXXXXXX..XXXXXXX 100644
114
--- a/tcg/ppc/tcg-target.h
115
+++ b/tcg/ppc/tcg-target.h
116
@@ -XXX,XX +XXX,XX @@ typedef enum {
117
#define TCG_TARGET_HAS_tst_vec 0
118
119
#define TCG_TARGET_DEFAULT_MO (0)
120
-#define TCG_TARGET_NEED_LDST_LABELS
121
-#define TCG_TARGET_NEED_POOL_LABELS
122
123
#endif
124
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
125
index XXXXXXX..XXXXXXX 100644
126
--- a/tcg/riscv/tcg-target.h
127
+++ b/tcg/riscv/tcg-target.h
128
@@ -XXX,XX +XXX,XX @@ typedef enum {
129
130
#define TCG_TARGET_DEFAULT_MO (0)
131
132
-#define TCG_TARGET_NEED_LDST_LABELS
133
-#define TCG_TARGET_NEED_POOL_LABELS
134
-
135
#endif
136
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
137
index XXXXXXX..XXXXXXX 100644
138
--- a/tcg/s390x/tcg-target.h
139
+++ b/tcg/s390x/tcg-target.h
140
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
141
#define TCG_TARGET_HAS_tst_vec 0
142
143
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
144
-#define TCG_TARGET_NEED_LDST_LABELS
145
-#define TCG_TARGET_NEED_POOL_LABELS
146
147
#endif
148
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
149
index XXXXXXX..XXXXXXX 100644
150
--- a/tcg/sparc64/tcg-target.h
151
+++ b/tcg/sparc64/tcg-target.h
152
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
153
#define TCG_AREG0 TCG_REG_I0
154
155
#define TCG_TARGET_DEFAULT_MO (0)
156
-#define TCG_TARGET_NEED_LDST_LABELS
157
-#define TCG_TARGET_NEED_POOL_LABELS
158
159
#endif
160
diff --git a/tcg/tcg.c b/tcg/tcg.c
161
index XXXXXXX..XXXXXXX 100644
162
--- a/tcg/tcg.c
163
+++ b/tcg/tcg.c
164
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s);
165
static void tcg_target_qemu_prologue(TCGContext *s);
166
static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
167
intptr_t value, intptr_t addend);
168
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
169
+
170
+typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
171
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
172
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
173
174
/* The CIE and FDE header definitions will be common to all hosts. */
175
typedef struct {
176
@@ -XXX,XX +XXX,XX @@ typedef struct QEMU_PACKED {
177
DebugFrameFDEHeader fde;
178
} DebugFrameHeader;
179
180
-typedef struct TCGLabelQemuLdst {
181
+struct TCGLabelQemuLdst {
182
bool is_ld; /* qemu_ld: true, qemu_st: false */
183
MemOpIdx oi;
184
TCGType type; /* result type of a load */
185
@@ -XXX,XX +XXX,XX @@ typedef struct TCGLabelQemuLdst {
186
const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */
187
tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
188
QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
189
-} TCGLabelQemuLdst;
190
+};
191
192
static void tcg_register_jit_int(const void *buf, size_t size,
193
const void *debug_frame,
194
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
195
static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
196
static bool tcg_target_const_match(int64_t val, int ct,
197
TCGType type, TCGCond cond, int vece);
198
-#ifdef TCG_TARGET_NEED_LDST_LABELS
199
-static int tcg_out_ldst_finalize(TCGContext *s);
200
-#endif
201
202
#ifndef CONFIG_USER_ONLY
203
#define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; })
204
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
205
}
206
}
207
208
+/*
209
+ * Allocate a new TCGLabelQemuLdst entry.
210
+ */
211
+
212
+__attribute__((unused))
213
+static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
214
+{
215
+ TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
216
+
217
+ memset(l, 0, sizeof(*l));
218
+ QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
219
+
220
+ return l;
221
+}
222
+
223
+/*
224
+ * Allocate new constant pool entries.
225
+ */
226
+
227
+typedef struct TCGLabelPoolData {
228
+ struct TCGLabelPoolData *next;
229
+ tcg_insn_unit *label;
230
+ intptr_t addend;
231
+ int rtype;
232
+ unsigned nlong;
233
+ tcg_target_ulong data[];
234
+} TCGLabelPoolData;
235
+
236
+static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
237
+ tcg_insn_unit *label, intptr_t addend)
238
+{
239
+ TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
240
+ + sizeof(tcg_target_ulong) * nlong);
241
+
242
+ n->label = label;
243
+ n->addend = addend;
244
+ n->rtype = rtype;
245
+ n->nlong = nlong;
246
+ return n;
247
+}
248
+
249
+static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
250
+{
251
+ TCGLabelPoolData *i, **pp;
252
+ int nlong = n->nlong;
253
+
254
+ /* Insertion sort on the pool. */
255
+ for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
256
+ if (nlong > i->nlong) {
257
+ break;
258
+ }
259
+ if (nlong < i->nlong) {
260
+ continue;
261
+ }
262
+ if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
263
+ break;
264
+ }
265
+ }
266
+ n->next = *pp;
267
+ *pp = n;
268
+}
269
+
270
+/* The "usual" for generic integer code. */
271
+__attribute__((unused))
272
+static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
273
+ tcg_insn_unit *label, intptr_t addend)
274
+{
275
+ TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
276
+ n->data[0] = d;
277
+ new_pool_insert(s, n);
278
+}
279
+
280
+/* For v64 or v128, depending on the host. */
281
+__attribute__((unused))
282
+static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
283
+ intptr_t addend, tcg_target_ulong d0,
284
+ tcg_target_ulong d1)
285
+{
286
+ TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
287
+ n->data[0] = d0;
288
+ n->data[1] = d1;
289
+ new_pool_insert(s, n);
290
+}
291
+
292
+/* For v128 or v256, depending on the host. */
293
+__attribute__((unused))
294
+static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
295
+ intptr_t addend, tcg_target_ulong d0,
296
+ tcg_target_ulong d1, tcg_target_ulong d2,
297
+ tcg_target_ulong d3)
298
+{
299
+ TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
300
+ n->data[0] = d0;
301
+ n->data[1] = d1;
302
+ n->data[2] = d2;
303
+ n->data[3] = d3;
304
+ new_pool_insert(s, n);
305
+}
306
+
307
+/* For v256, for 32-bit host. */
308
+__attribute__((unused))
309
+static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
310
+ intptr_t addend, tcg_target_ulong d0,
311
+ tcg_target_ulong d1, tcg_target_ulong d2,
312
+ tcg_target_ulong d3, tcg_target_ulong d4,
313
+ tcg_target_ulong d5, tcg_target_ulong d6,
314
+ tcg_target_ulong d7)
315
+{
316
+ TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
317
+ n->data[0] = d0;
318
+ n->data[1] = d1;
319
+ n->data[2] = d2;
320
+ n->data[3] = d3;
321
+ n->data[4] = d4;
322
+ n->data[5] = d5;
323
+ n->data[6] = d6;
324
+ n->data[7] = d7;
325
+ new_pool_insert(s, n);
326
+}
327
+
328
+/*
329
+ * Generate TB finalization at the end of block
330
+ */
331
+
332
+static int tcg_out_ldst_finalize(TCGContext *s)
333
+{
334
+ TCGLabelQemuLdst *lb;
335
+
336
+ /* qemu_ld/st slow paths */
337
+ QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
338
+ if (lb->is_ld
339
+ ? !tcg_out_qemu_ld_slow_path(s, lb)
340
+ : !tcg_out_qemu_st_slow_path(s, lb)) {
341
+ return -2;
342
+ }
343
+
344
+ /*
345
+ * Test for (pending) buffer overflow. The assumption is that any
346
+ * one operation beginning below the high water mark cannot overrun
347
+ * the buffer completely. Thus we can test for overflow after
348
+ * generating code without having to check during generation.
349
+ */
350
+ if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
351
+ return -1;
352
+ }
353
+ }
354
+ return 0;
355
+}
356
+
357
+static int tcg_out_pool_finalize(TCGContext *s)
358
+{
359
+ TCGLabelPoolData *p = s->pool_labels;
360
+ TCGLabelPoolData *l = NULL;
361
+ void *a;
362
+
363
+ if (p == NULL) {
364
+ return 0;
365
+ }
366
+
367
+ /*
368
+ * ??? Round up to qemu_icache_linesize, but then do not round
369
+ * again when allocating the next TranslationBlock structure.
370
+ */
371
+ a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
372
+ sizeof(tcg_target_ulong) * p->nlong);
373
+ tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
374
+ s->data_gen_ptr = a;
375
+
376
+ for (; p != NULL; p = p->next) {
377
+ size_t size = sizeof(tcg_target_ulong) * p->nlong;
378
+ uintptr_t value;
379
+
380
+ if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
381
+ if (unlikely(a > s->code_gen_highwater)) {
382
+ return -1;
383
+ }
384
+ memcpy(a, p->data, size);
385
+ a += size;
386
+ l = p;
387
+ }
388
+
389
+ value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
390
+ if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
391
+ return -2;
392
+ }
393
+ }
394
+
395
+ s->code_ptr = a;
396
+ return 0;
397
+}
398
+
399
#define C_PFX1(P, A) P##A
400
#define C_PFX2(P, A, B) P##A##_##B
401
#define C_PFX3(P, A, B, C) P##A##_##B##_##C
402
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
403
s->code_ptr = s->code_buf;
404
s->data_gen_ptr = NULL;
405
406
-#ifdef TCG_TARGET_NEED_LDST_LABELS
407
QSIMPLEQ_INIT(&s->ldst_labels);
408
-#endif
409
-#ifdef TCG_TARGET_NEED_POOL_LABELS
410
s->pool_labels = NULL;
411
-#endif
412
413
start_words = s->insn_start_words;
414
s->gen_insn_data =
415
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
416
s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
417
418
/* Generate TB finalization at the end of block */
419
-#ifdef TCG_TARGET_NEED_LDST_LABELS
420
i = tcg_out_ldst_finalize(s);
421
if (i < 0) {
422
return i;
423
}
424
-#endif
425
-#ifdef TCG_TARGET_NEED_POOL_LABELS
426
i = tcg_out_pool_finalize(s);
427
if (i < 0) {
428
return i;
429
}
430
-#endif
431
if (!tcg_resolve_relocs(s)) {
432
return -2;
433
}
434
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
435
index XXXXXXX..XXXXXXX 100644
436
--- a/tcg/aarch64/tcg-target.c.inc
437
+++ b/tcg/aarch64/tcg-target.c.inc
438
@@ -XXX,XX +XXX,XX @@
439
* See the COPYING file in the top-level directory for details.
29
*/
440
*/
30
441
31
-/* Portions of this work are licensed under the terms of the GNU GPL,
442
-#include "../tcg-ldst.c.inc"
32
- * version 2 or later. See the COPYING file in the top-level directory.
443
-#include "../tcg-pool.c.inc"
444
#include "qemu/bitops.h"
445
446
/* Used for function call generation. */
447
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
448
index XXXXXXX..XXXXXXX 100644
449
--- a/tcg/arm/tcg-target.c.inc
450
+++ b/tcg/arm/tcg-target.c.inc
451
@@ -XXX,XX +XXX,XX @@
452
*/
453
454
#include "elf.h"
455
-#include "../tcg-ldst.c.inc"
456
-#include "../tcg-pool.c.inc"
457
458
int arm_arch = __ARM_ARCH;
459
460
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
461
index XXXXXXX..XXXXXXX 100644
462
--- a/tcg/i386/tcg-target.c.inc
463
+++ b/tcg/i386/tcg-target.c.inc
464
@@ -XXX,XX +XXX,XX @@
465
* THE SOFTWARE.
466
*/
467
468
-#include "../tcg-ldst.c.inc"
469
-#include "../tcg-pool.c.inc"
470
-
471
/* Used for function call generation. */
472
#define TCG_TARGET_STACK_ALIGN 16
473
#if defined(_WIN64)
474
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
475
index XXXXXXX..XXXXXXX 100644
476
--- a/tcg/loongarch64/tcg-target.c.inc
477
+++ b/tcg/loongarch64/tcg-target.c.inc
478
@@ -XXX,XX +XXX,XX @@
479
* THE SOFTWARE.
480
*/
481
482
-#include "../tcg-ldst.c.inc"
483
#include <asm/hwcap.h>
484
485
/* used for function call generation */
486
@@ -XXX,XX +XXX,XX @@ static void tcg_out_tb_start(TCGContext *s)
487
/* nothing to do */
488
}
489
490
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
491
+{
492
+ for (int i = 0; i < count; ++i) {
493
+ /* Canonical nop is andi r0,r0,0 */
494
+ p[i] = OPC_ANDI;
495
+ }
496
+}
497
+
498
static void tcg_target_init(TCGContext *s)
499
{
500
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
501
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
502
index XXXXXXX..XXXXXXX 100644
503
--- a/tcg/mips/tcg-target.c.inc
504
+++ b/tcg/mips/tcg-target.c.inc
505
@@ -XXX,XX +XXX,XX @@
506
* THE SOFTWARE.
507
*/
508
509
-#include "../tcg-ldst.c.inc"
510
-#include "../tcg-pool.c.inc"
511
-
512
/* used for function call generation */
513
#define TCG_TARGET_STACK_ALIGN 16
514
#if _MIPS_SIM == _ABIO32
515
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
516
index XXXXXXX..XXXXXXX 100644
517
--- a/tcg/ppc/tcg-target.c.inc
518
+++ b/tcg/ppc/tcg-target.c.inc
519
@@ -XXX,XX +XXX,XX @@
520
*/
521
522
#include "elf.h"
523
-#include "../tcg-pool.c.inc"
524
-#include "../tcg-ldst.c.inc"
525
526
/*
527
* Standardize on the _CALL_FOO symbols used by GCC:
528
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
529
index XXXXXXX..XXXXXXX 100644
530
--- a/tcg/riscv/tcg-target.c.inc
531
+++ b/tcg/riscv/tcg-target.c.inc
532
@@ -XXX,XX +XXX,XX @@
533
* THE SOFTWARE.
534
*/
535
536
-#include "../tcg-ldst.c.inc"
537
-#include "../tcg-pool.c.inc"
538
-
539
/* Used for function call generation. */
540
#define TCG_REG_CALL_STACK TCG_REG_SP
541
#define TCG_TARGET_STACK_ALIGN 16
542
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
543
index XXXXXXX..XXXXXXX 100644
544
--- a/tcg/s390x/tcg-target.c.inc
545
+++ b/tcg/s390x/tcg-target.c.inc
546
@@ -XXX,XX +XXX,XX @@
547
* THE SOFTWARE.
548
*/
549
550
-#include "../tcg-ldst.c.inc"
551
-#include "../tcg-pool.c.inc"
552
#include "elf.h"
553
554
/* Used for function call generation. */
555
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
556
index XXXXXXX..XXXXXXX 100644
557
--- a/tcg/sparc64/tcg-target.c.inc
558
+++ b/tcg/sparc64/tcg-target.c.inc
559
@@ -XXX,XX +XXX,XX @@
560
#error "unsupported code generation mode"
561
#endif
562
563
-#include "../tcg-ldst.c.inc"
564
-#include "../tcg-pool.c.inc"
565
-
566
/* Used for function call generation. */
567
#define TCG_REG_CALL_STACK TCG_REG_O6
568
#define TCG_TARGET_STACK_BIAS 2047
569
diff --git a/tcg/tcg-ldst.c.inc b/tcg/tcg-ldst.c.inc
570
deleted file mode 100644
571
index XXXXXXX..XXXXXXX
572
--- a/tcg/tcg-ldst.c.inc
573
+++ /dev/null
574
@@ -XXX,XX +XXX,XX @@
575
-/*
576
- * TCG Backend Data: load-store optimization only.
577
- *
578
- * Permission is hereby granted, free of charge, to any person obtaining a copy
579
- * of this software and associated documentation files (the "Software"), to deal
580
- * in the Software without restriction, including without limitation the rights
581
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
582
- * copies of the Software, and to permit persons to whom the Software is
583
- * furnished to do so, subject to the following conditions:
584
- *
585
- * The above copyright notice and this permission notice shall be included in
586
- * all copies or substantial portions of the Software.
587
- *
588
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
589
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
590
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
591
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
592
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
593
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
594
- * THE SOFTWARE.
33
- */
595
- */
34
-
596
-
35
#ifndef FPU_SOFTFLOAT_MACROS_H
597
-/*
36
#define FPU_SOFTFLOAT_MACROS_H
598
- * Generate TB finalization at the end of block
37
38
@@ -XXX,XX +XXX,XX @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
39
40
}
41
42
-/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
43
- * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
44
- *
45
- * Licensed under the GPLv2/LGPLv3
46
- */
599
- */
47
-static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
600
-
48
- uint64_t n0, uint64_t d)
601
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
602
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
603
-
604
-static int tcg_out_ldst_finalize(TCGContext *s)
49
-{
605
-{
50
-#if defined(__x86_64__)
606
- TCGLabelQemuLdst *lb;
51
- uint64_t q;
607
-
52
- asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
608
- /* qemu_ld/st slow paths */
53
- return q;
609
- QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
54
-#elif defined(__s390x__) && !defined(__clang__)
610
- if (lb->is_ld
55
- /* Need to use a TImode type to get an even register pair for DLGR. */
611
- ? !tcg_out_qemu_ld_slow_path(s, lb)
56
- unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
612
- : !tcg_out_qemu_st_slow_path(s, lb)) {
57
- asm("dlgr %0, %1" : "+r"(n) : "r"(d));
613
- return -2;
58
- *r = n >> 64;
614
- }
59
- return n;
615
-
60
-#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
616
- /* Test for (pending) buffer overflow. The assumption is that any
61
- /* From Power ISA 2.06, programming note for divdeu. */
617
- one operation beginning below the high water mark cannot overrun
62
- uint64_t q1, q2, Q, r1, r2, R;
618
- the buffer completely. Thus we can test for overflow after
63
- asm("divdeu %0,%2,%4; divdu %1,%3,%4"
619
- generating code without having to check during generation. */
64
- : "=&r"(q1), "=r"(q2)
620
- if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
65
- : "r"(n1), "r"(n0), "r"(d));
621
- return -1;
66
- r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
67
- r2 = n0 - (q2 * d);
68
- Q = q1 + q2;
69
- R = r1 + r2;
70
- if (R >= d || R < r2) { /* overflow implies R > d */
71
- Q += 1;
72
- R -= d;
73
- }
74
- *r = R;
75
- return Q;
76
-#else
77
- uint64_t d0, d1, q0, q1, r1, r0, m;
78
-
79
- d0 = (uint32_t)d;
80
- d1 = d >> 32;
81
-
82
- r1 = n1 % d1;
83
- q1 = n1 / d1;
84
- m = q1 * d0;
85
- r1 = (r1 << 32) | (n0 >> 32);
86
- if (r1 < m) {
87
- q1 -= 1;
88
- r1 += d;
89
- if (r1 >= d) {
90
- if (r1 < m) {
91
- q1 -= 1;
92
- r1 += d;
93
- }
94
- }
622
- }
95
- }
623
- }
96
- r1 -= m;
624
- return 0;
97
-
625
-}
98
- r0 = r1 % d1;
626
-
99
- q0 = r1 / d1;
627
-/*
100
- m = q0 * d0;
628
- * Allocate a new TCGLabelQemuLdst entry.
101
- r0 = (r0 << 32) | (uint32_t)n0;
629
- */
102
- if (r0 < m) {
630
-
103
- q0 -= 1;
631
-static inline TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
104
- r0 += d;
632
-{
105
- if (r0 >= d) {
633
- TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
106
- if (r0 < m) {
634
-
107
- q0 -= 1;
635
- memset(l, 0, sizeof(*l));
108
- r0 += d;
636
- QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
109
- }
637
-
638
- return l;
639
-}
640
diff --git a/tcg/tcg-pool.c.inc b/tcg/tcg-pool.c.inc
641
deleted file mode 100644
642
index XXXXXXX..XXXXXXX
643
--- a/tcg/tcg-pool.c.inc
644
+++ /dev/null
645
@@ -XXX,XX +XXX,XX @@
646
-/*
647
- * TCG Backend Data: constant pool.
648
- *
649
- * Permission is hereby granted, free of charge, to any person obtaining a copy
650
- * of this software and associated documentation files (the "Software"), to deal
651
- * in the Software without restriction, including without limitation the rights
652
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
653
- * copies of the Software, and to permit persons to whom the Software is
654
- * furnished to do so, subject to the following conditions:
655
- *
656
- * The above copyright notice and this permission notice shall be included in
657
- * all copies or substantial portions of the Software.
658
- *
659
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
660
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
661
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
662
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
663
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
664
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
665
- * THE SOFTWARE.
666
- */
667
-
668
-typedef struct TCGLabelPoolData {
669
- struct TCGLabelPoolData *next;
670
- tcg_insn_unit *label;
671
- intptr_t addend;
672
- int rtype;
673
- unsigned nlong;
674
- tcg_target_ulong data[];
675
-} TCGLabelPoolData;
676
-
677
-
678
-static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
679
- tcg_insn_unit *label, intptr_t addend)
680
-{
681
- TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
682
- + sizeof(tcg_target_ulong) * nlong);
683
-
684
- n->label = label;
685
- n->addend = addend;
686
- n->rtype = rtype;
687
- n->nlong = nlong;
688
- return n;
689
-}
690
-
691
-static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
692
-{
693
- TCGLabelPoolData *i, **pp;
694
- int nlong = n->nlong;
695
-
696
- /* Insertion sort on the pool. */
697
- for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
698
- if (nlong > i->nlong) {
699
- break;
700
- }
701
- if (nlong < i->nlong) {
702
- continue;
703
- }
704
- if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
705
- break;
110
- }
706
- }
111
- }
707
- }
112
- r0 -= m;
708
- n->next = *pp;
113
-
709
- *pp = n;
114
- *r = r0;
115
- return (q1 << 32) | q0;
116
-#endif
117
-}
710
-}
118
-
711
-
119
/*----------------------------------------------------------------------------
712
-/* The "usual" for generic integer code. */
120
| Returns an approximation to the square root of the 32-bit significand given
713
-static inline void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
121
| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
714
- tcg_insn_unit *label, intptr_t addend)
122
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
715
-{
123
index XXXXXXX..XXXXXXX 100644
716
- TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
124
--- a/include/qemu/host-utils.h
717
- n->data[0] = d;
125
+++ b/include/qemu/host-utils.h
718
- new_pool_insert(s, n);
719
-}
720
-
721
-/* For v64 or v128, depending on the host. */
722
-static inline void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
723
- intptr_t addend, tcg_target_ulong d0,
724
- tcg_target_ulong d1)
725
-{
726
- TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
727
- n->data[0] = d0;
728
- n->data[1] = d1;
729
- new_pool_insert(s, n);
730
-}
731
-
732
-/* For v128 or v256, depending on the host. */
733
-static inline void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
734
- intptr_t addend, tcg_target_ulong d0,
735
- tcg_target_ulong d1, tcg_target_ulong d2,
736
- tcg_target_ulong d3)
737
-{
738
- TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
739
- n->data[0] = d0;
740
- n->data[1] = d1;
741
- n->data[2] = d2;
742
- n->data[3] = d3;
743
- new_pool_insert(s, n);
744
-}
745
-
746
-/* For v256, for 32-bit host. */
747
-static inline void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
748
- intptr_t addend, tcg_target_ulong d0,
749
- tcg_target_ulong d1, tcg_target_ulong d2,
750
- tcg_target_ulong d3, tcg_target_ulong d4,
751
- tcg_target_ulong d5, tcg_target_ulong d6,
752
- tcg_target_ulong d7)
753
-{
754
- TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
755
- n->data[0] = d0;
756
- n->data[1] = d1;
757
- n->data[2] = d2;
758
- n->data[3] = d3;
759
- n->data[4] = d4;
760
- n->data[5] = d5;
761
- n->data[6] = d6;
762
- n->data[7] = d7;
763
- new_pool_insert(s, n);
764
-}
765
-
766
-/* To be provided by cpu/tcg-target.c.inc. */
767
-static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
768
-
769
-static int tcg_out_pool_finalize(TCGContext *s)
770
-{
771
- TCGLabelPoolData *p = s->pool_labels;
772
- TCGLabelPoolData *l = NULL;
773
- void *a;
774
-
775
- if (p == NULL) {
776
- return 0;
777
- }
778
-
779
- /* ??? Round up to qemu_icache_linesize, but then do not round
780
- again when allocating the next TranslationBlock structure. */
781
- a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
782
- sizeof(tcg_target_ulong) * p->nlong);
783
- tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
784
- s->data_gen_ptr = a;
785
-
786
- for (; p != NULL; p = p->next) {
787
- size_t size = sizeof(tcg_target_ulong) * p->nlong;
788
- uintptr_t value;
789
-
790
- if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
791
- if (unlikely(a > s->code_gen_highwater)) {
792
- return -1;
793
- }
794
- memcpy(a, p->data, size);
795
- a += size;
796
- l = p;
797
- }
798
-
799
- value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
800
- if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
801
- return -2;
802
- }
803
- }
804
-
805
- s->code_ptr = a;
806
- return 0;
807
-}
808
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
809
index XXXXXXX..XXXXXXX 100644
810
--- a/tcg/tci/tcg-target.c.inc
811
+++ b/tcg/tci/tcg-target.c.inc
126
@@ -XXX,XX +XXX,XX @@
812
@@ -XXX,XX +XXX,XX @@
127
* THE SOFTWARE.
813
* THE SOFTWARE.
128
*/
814
*/
129
815
130
+/* Portions of this work are licensed under the terms of the GNU GPL,
816
-#include "../tcg-pool.c.inc"
131
+ * version 2 or later. See the COPYING file in the top-level directory.
817
-
132
+ */
818
/* Used for function call generation. */
133
+
819
#define TCG_TARGET_CALL_STACK_OFFSET 0
134
#ifndef HOST_UTILS_H
820
#define TCG_TARGET_STACK_ALIGN 8
135
#define HOST_UTILS_H
821
@@ -XXX,XX +XXX,XX @@ bool tcg_target_has_memory_bswap(MemOp memop)
136
822
{
137
@@ -XXX,XX +XXX,XX @@ void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
823
return true;
138
*/
824
}
139
void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
825
+
140
826
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
141
+/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
827
+{
142
+ * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
828
+ g_assert_not_reached();
143
+ *
829
+}
144
+ * Licensed under the GPLv2/LGPLv3
830
+
145
+ */
831
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
146
+static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
832
+{
147
+ uint64_t n0, uint64_t d)
833
+ g_assert_not_reached();
148
+{
834
+}
149
+#if defined(__x86_64__)
150
+ uint64_t q;
151
+ asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
152
+ return q;
153
+#elif defined(__s390x__) && !defined(__clang__)
154
+ /* Need to use a TImode type to get an even register pair for DLGR. */
155
+ unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
156
+ asm("dlgr %0, %1" : "+r"(n) : "r"(d));
157
+ *r = n >> 64;
158
+ return n;
159
+#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
160
+ /* From Power ISA 2.06, programming note for divdeu. */
161
+ uint64_t q1, q2, Q, r1, r2, R;
162
+ asm("divdeu %0,%2,%4; divdu %1,%3,%4"
163
+ : "=&r"(q1), "=r"(q2)
164
+ : "r"(n1), "r"(n0), "r"(d));
165
+ r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
166
+ r2 = n0 - (q2 * d);
167
+ Q = q1 + q2;
168
+ R = r1 + r2;
169
+ if (R >= d || R < r2) { /* overflow implies R > d */
170
+ Q += 1;
171
+ R -= d;
172
+ }
173
+ *r = R;
174
+ return Q;
175
+#else
176
+ uint64_t d0, d1, q0, q1, r1, r0, m;
177
+
178
+ d0 = (uint32_t)d;
179
+ d1 = d >> 32;
180
+
181
+ r1 = n1 % d1;
182
+ q1 = n1 / d1;
183
+ m = q1 * d0;
184
+ r1 = (r1 << 32) | (n0 >> 32);
185
+ if (r1 < m) {
186
+ q1 -= 1;
187
+ r1 += d;
188
+ if (r1 >= d) {
189
+ if (r1 < m) {
190
+ q1 -= 1;
191
+ r1 += d;
192
+ }
193
+ }
194
+ }
195
+ r1 -= m;
196
+
197
+ r0 = r1 % d1;
198
+ q0 = r1 / d1;
199
+ m = q0 * d0;
200
+ r0 = (r0 << 32) | (uint32_t)n0;
201
+ if (r0 < m) {
202
+ q0 -= 1;
203
+ r0 += d;
204
+ if (r0 >= d) {
205
+ if (r0 < m) {
206
+ q0 -= 1;
207
+ r0 += d;
208
+ }
209
+ }
210
+ }
211
+ r0 -= m;
212
+
213
+ *r = r0;
214
+ return (q1 << 32) | q0;
215
+#endif
216
+}
217
+
218
#endif
219
--
835
--
220
2.25.1
836
2.43.0
221
837
222
838
diff view generated by jsdifflib
1
Rename to fold_multiply2, and handle muls2_i32, mulu2_i64,
1
In addition, add empty files for mips, sparc64 and tci.
2
and muls2_i64.
2
Make the include unconditional within tcg-opc.h.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/optimize.c | 44 +++++++++++++++++++++++++++++++++++---------
7
include/tcg/tcg-opc.h | 4 +---
9
1 file changed, 35 insertions(+), 9 deletions(-)
8
tcg/aarch64/{tcg-target.opc.h => tcg-target-opc.h.inc} | 0
9
tcg/arm/{tcg-target.opc.h => tcg-target-opc.h.inc} | 0
10
tcg/i386/{tcg-target.opc.h => tcg-target-opc.h.inc} | 0
11
tcg/loongarch64/{tcg-target.opc.h => tcg-target-opc.h.inc} | 0
12
tcg/mips/tcg-target-opc.h.inc | 1 +
13
tcg/ppc/{tcg-target.opc.h => tcg-target-opc.h.inc} | 0
14
tcg/riscv/{tcg-target.opc.h => tcg-target-opc.h.inc} | 0
15
tcg/s390x/{tcg-target.opc.h => tcg-target-opc.h.inc} | 0
16
tcg/sparc64/tcg-target-opc.h.inc | 1 +
17
tcg/tci/tcg-target-opc.h.inc | 1 +
18
11 files changed, 4 insertions(+), 3 deletions(-)
19
rename tcg/aarch64/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
20
rename tcg/arm/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
21
rename tcg/i386/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
22
rename tcg/loongarch64/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
23
create mode 100644 tcg/mips/tcg-target-opc.h.inc
24
rename tcg/ppc/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
25
rename tcg/riscv/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
26
rename tcg/s390x/{tcg-target.opc.h => tcg-target-opc.h.inc} (100%)
27
create mode 100644 tcg/sparc64/tcg-target-opc.h.inc
28
create mode 100644 tcg/tci/tcg-target-opc.h.inc
10
29
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
30
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
12
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
32
--- a/include/tcg/tcg-opc.h
14
+++ b/tcg/optimize.c
33
+++ b/include/tcg/tcg-opc.h
15
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
34
@@ -XXX,XX +XXX,XX @@ DEF(cmpsel_vec, 1, 4, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_cmpsel_vec))
16
return false;
35
17
}
36
DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
18
37
19
-static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
38
-#if TCG_TARGET_MAYBE_vec
20
+static bool fold_multiply2(OptContext *ctx, TCGOp *op)
39
-#include "tcg-target.opc.h"
21
{
40
-#endif
22
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
41
+#include "tcg-target-opc.h.inc"
23
- uint32_t a = arg_info(op->args[2])->val;
42
24
- uint32_t b = arg_info(op->args[3])->val;
43
#ifdef TCG_TARGET_INTERPRETER
25
- uint64_t r = (uint64_t)a * b;
44
/* These opcodes are only for use between the tci generator and interpreter. */
26
+ uint64_t a = arg_info(op->args[2])->val;
45
diff --git a/tcg/aarch64/tcg-target.opc.h b/tcg/aarch64/tcg-target-opc.h.inc
27
+ uint64_t b = arg_info(op->args[3])->val;
46
similarity index 100%
28
+ uint64_t h, l;
47
rename from tcg/aarch64/tcg-target.opc.h
29
TCGArg rl, rh;
48
rename to tcg/aarch64/tcg-target-opc.h.inc
30
- TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
49
diff --git a/tcg/arm/tcg-target.opc.h b/tcg/arm/tcg-target-opc.h.inc
31
+ TCGOp *op2;
50
similarity index 100%
32
+
51
rename from tcg/arm/tcg-target.opc.h
33
+ switch (op->opc) {
52
rename to tcg/arm/tcg-target-opc.h.inc
34
+ case INDEX_op_mulu2_i32:
53
diff --git a/tcg/i386/tcg-target.opc.h b/tcg/i386/tcg-target-opc.h.inc
35
+ l = (uint64_t)(uint32_t)a * (uint32_t)b;
54
similarity index 100%
36
+ h = (int32_t)(l >> 32);
55
rename from tcg/i386/tcg-target.opc.h
37
+ l = (int32_t)l;
56
rename to tcg/i386/tcg-target-opc.h.inc
38
+ break;
57
diff --git a/tcg/loongarch64/tcg-target.opc.h b/tcg/loongarch64/tcg-target-opc.h.inc
39
+ case INDEX_op_muls2_i32:
58
similarity index 100%
40
+ l = (int64_t)(int32_t)a * (int32_t)b;
59
rename from tcg/loongarch64/tcg-target.opc.h
41
+ h = l >> 32;
60
rename to tcg/loongarch64/tcg-target-opc.h.inc
42
+ l = (int32_t)l;
61
diff --git a/tcg/mips/tcg-target-opc.h.inc b/tcg/mips/tcg-target-opc.h.inc
43
+ break;
62
new file mode 100644
44
+ case INDEX_op_mulu2_i64:
63
index XXXXXXX..XXXXXXX
45
+ mulu64(&l, &h, a, b);
64
--- /dev/null
46
+ break;
65
+++ b/tcg/mips/tcg-target-opc.h.inc
47
+ case INDEX_op_muls2_i64:
66
@@ -0,0 +1 @@
48
+ muls64(&l, &h, a, b);
67
+/* No target specific opcodes. */
49
+ break;
68
diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target-opc.h.inc
50
+ default:
69
similarity index 100%
51
+ g_assert_not_reached();
70
rename from tcg/ppc/tcg-target.opc.h
52
+ }
71
rename to tcg/ppc/tcg-target-opc.h.inc
53
72
diff --git a/tcg/riscv/tcg-target.opc.h b/tcg/riscv/tcg-target-opc.h.inc
54
rl = op->args[0];
73
similarity index 100%
55
rh = op->args[1];
74
rename from tcg/riscv/tcg-target.opc.h
56
- tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
75
rename to tcg/riscv/tcg-target-opc.h.inc
57
- tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
76
diff --git a/tcg/s390x/tcg-target.opc.h b/tcg/s390x/tcg-target-opc.h.inc
58
+
77
similarity index 100%
59
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
78
rename from tcg/s390x/tcg-target.opc.h
60
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0);
79
rename to tcg/s390x/tcg-target-opc.h.inc
61
+
80
diff --git a/tcg/sparc64/tcg-target-opc.h.inc b/tcg/sparc64/tcg-target-opc.h.inc
62
+ tcg_opt_gen_movi(ctx, op, rl, l);
81
new file mode 100644
63
+ tcg_opt_gen_movi(ctx, op2, rh, h);
82
index XXXXXXX..XXXXXXX
64
return true;
83
--- /dev/null
65
}
84
+++ b/tcg/sparc64/tcg-target-opc.h.inc
66
return false;
85
@@ -0,0 +1 @@
67
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
86
+/* No target specific opcodes. */
68
CASE_OP_32_64(muluh):
87
diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc
69
done = fold_mul_highpart(&ctx, op);
88
new file mode 100644
70
break;
89
index XXXXXXX..XXXXXXX
71
- case INDEX_op_mulu2_i32:
90
--- /dev/null
72
- done = fold_mulu2_i32(&ctx, op);
91
+++ b/tcg/tci/tcg-target-opc.h.inc
73
+ CASE_OP_32_64(muls2):
92
@@ -0,0 +1 @@
74
+ CASE_OP_32_64(mulu2):
93
+/* No target specific opcodes. */
75
+ done = fold_multiply2(&ctx, op);
76
break;
77
CASE_OP_32_64(nand):
78
done = fold_nand(&ctx, op);
79
--
94
--
80
2.25.1
95
2.43.0
81
96
82
97
diff view generated by jsdifflib
1
Pull the "op r, 0, b => movi r, 0" optimization into a function,
1
Now that tcg-target-opc.h.inc is unconditional,
2
and use it in fold_shift.
2
we can move these out of the generic header.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/optimize.c | 28 ++++++++++------------------
7
include/tcg/tcg-opc.h | 6 ------
9
1 file changed, 10 insertions(+), 18 deletions(-)
8
tcg/tci/tcg-target-opc.h.inc | 5 ++++-
9
2 files changed, 4 insertions(+), 7 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/include/tcg/tcg-opc.h
14
+++ b/tcg/optimize.c
14
+++ b/include/tcg/tcg-opc.h
15
@@ -XXX,XX +XXX,XX @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
15
@@ -XXX,XX +XXX,XX @@ DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
16
return false;
16
17
}
17
#include "tcg-target-opc.h.inc"
18
18
19
+/* If the binary operation has first argument @i, fold to @i. */
19
-#ifdef TCG_TARGET_INTERPRETER
20
+static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
20
-/* These opcodes are only for use between the tci generator and interpreter. */
21
+{
21
-DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT)
22
+ if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
22
-DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT)
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
23
-#endif
24
+ }
25
+ return false;
26
+}
27
+
28
/* If the binary operation has first argument @i, fold to NOT. */
29
static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
30
{
31
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
32
static bool fold_shift(OptContext *ctx, TCGOp *op)
33
{
34
if (fold_const2(ctx, op) ||
35
+ fold_ix_to_i(ctx, op, 0) ||
36
fold_xi_to_x(ctx, op, 0)) {
37
return true;
38
}
39
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
40
break;
41
}
42
43
- /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
44
- and "sub r, 0, a => neg r, a" case. */
45
- switch (opc) {
46
- CASE_OP_32_64(shl):
47
- CASE_OP_32_64(shr):
48
- CASE_OP_32_64(sar):
49
- CASE_OP_32_64(rotl):
50
- CASE_OP_32_64(rotr):
51
- if (arg_is_const(op->args[1])
52
- && arg_info(op->args[1])->val == 0) {
53
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
54
- continue;
55
- }
56
- break;
57
- default:
58
- break;
59
- }
60
-
24
-
61
/* Simplify using known-zero bits. Currently only ops with a single
25
#undef DATA64_ARGS
62
output argument is supported. */
26
#undef IMPL
63
z_mask = -1;
27
#undef IMPL64
28
diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tcg/tci/tcg-target-opc.h.inc
31
+++ b/tcg/tci/tcg-target-opc.h.inc
32
@@ -1 +1,4 @@
33
-/* No target specific opcodes. */
34
+/* SPDX-License-Identifier: MIT */
35
+/* These opcodes for use between the tci generator and interpreter. */
36
+DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT)
37
+DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT)
64
--
38
--
65
2.25.1
39
2.43.0
66
40
67
41
diff view generated by jsdifflib
1
Pull the "op r, a, 0 => movi r, 0" optimization into a function,
1
Don't reference TCG_TARGET_MAYBE_vec in a public header.
2
and use it in the outer opcode fold functions.
3
2
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
tcg/optimize.c | 38 ++++++++++++++++++++------------------
6
include/tcg/tcg.h | 7 -------
9
1 file changed, 20 insertions(+), 18 deletions(-)
7
tcg/tcg.c | 4 ++++
8
2 files changed, 4 insertions(+), 7 deletions(-)
10
9
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
12
--- a/include/tcg/tcg.h
14
+++ b/tcg/optimize.c
13
+++ b/include/tcg/tcg.h
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ extern tcg_prologue_fn *tcg_qemu_tb_exec;
16
return false;
15
16
void tcg_register_jit(const void *buf, size_t buf_size);
17
18
-#if TCG_TARGET_MAYBE_vec
19
/* Return zero if the tuple (opc, type, vece) is unsupportable;
20
return > 0 if it is directly supportable;
21
return < 0 if we must call tcg_expand_vec_op. */
22
int tcg_can_emit_vec_op(TCGOpcode, TCGType, unsigned);
23
-#else
24
-static inline int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
25
-{
26
- return 0;
27
-}
28
-#endif
29
30
/* Expand the tuple (opc, type, vece) on the given arguments. */
31
void tcg_expand_vec_op(TCGOpcode, TCGType, unsigned, TCGArg, ...);
32
diff --git a/tcg/tcg.c b/tcg/tcg.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/tcg.c
35
+++ b/tcg/tcg.c
36
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
37
{
38
g_assert_not_reached();
17
}
39
}
18
40
+int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
19
+/* If the binary operation has second argument @i, fold to @i. */
20
+static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
21
+{
41
+{
22
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
42
+ return 0;
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
24
+ }
25
+ return false;
26
+}
43
+}
27
+
44
#endif
28
/* If the binary operation has both arguments equal, fold to @i. */
45
static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
29
static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
46
intptr_t arg2);
30
{
31
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
32
static bool fold_and(OptContext *ctx, TCGOp *op)
33
{
34
if (fold_const2(ctx, op) ||
35
+ fold_xi_to_i(ctx, op, 0) ||
36
fold_xx_to_x(ctx, op)) {
37
return true;
38
}
39
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
40
41
static bool fold_mul(OptContext *ctx, TCGOp *op)
42
{
43
- return fold_const2(ctx, op);
44
+ if (fold_const2(ctx, op) ||
45
+ fold_xi_to_i(ctx, op, 0)) {
46
+ return true;
47
+ }
48
+ return false;
49
}
50
51
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
52
{
53
- return fold_const2(ctx, op);
54
+ if (fold_const2(ctx, op) ||
55
+ fold_xi_to_i(ctx, op, 0)) {
56
+ return true;
57
+ }
58
+ return false;
59
}
60
61
static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
62
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
63
continue;
64
}
65
66
- /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
67
- switch (opc) {
68
- CASE_OP_32_64_VEC(and):
69
- CASE_OP_32_64_VEC(mul):
70
- CASE_OP_32_64(muluh):
71
- CASE_OP_32_64(mulsh):
72
- if (arg_is_const(op->args[2])
73
- && arg_info(op->args[2])->val == 0) {
74
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
75
- continue;
76
- }
77
- break;
78
- default:
79
- break;
80
- }
81
-
82
/*
83
* Process each opcode.
84
* Sorted alphabetically by opcode as much as possible.
85
--
47
--
86
2.25.1
48
2.43.0
87
49
88
50
diff view generated by jsdifflib
New patch
1
Left-over from commit 623d7e3551a ("util: Add cpuinfo-ppc.c").
1
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-ID: <20250108215156.8731-2-philmd@linaro.org>
6
---
7
tcg/ppc/tcg-target.h | 8 --------
8
1 file changed, 8 deletions(-)
9
10
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.h
13
+++ b/tcg/ppc/tcg-target.h
14
@@ -XXX,XX +XXX,XX @@ typedef enum {
15
TCG_AREG0 = TCG_REG_R27
16
} TCGReg;
17
18
-typedef enum {
19
- tcg_isa_base,
20
- tcg_isa_2_06,
21
- tcg_isa_2_07,
22
- tcg_isa_3_00,
23
- tcg_isa_3_10,
24
-} TCGPowerISA;
25
-
26
#define have_isa_2_06 (cpuinfo & CPUINFO_V2_06)
27
#define have_isa_2_07 (cpuinfo & CPUINFO_V2_07)
28
#define have_isa_3_00 (cpuinfo & CPUINFO_V3_0)
29
--
30
2.43.0
31
32
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Message-ID: <20250108215156.8731-3-philmd@linaro.org>
4
---
5
include/tcg/tcg.h | 105 +-----------------------------------------
6
tcg/tcg-has.h | 115 ++++++++++++++++++++++++++++++++++++++++++++++
7
2 files changed, 116 insertions(+), 104 deletions(-)
8
create mode 100644 tcg/tcg-has.h
1
9
10
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/include/tcg/tcg.h
13
+++ b/include/tcg/tcg.h
14
@@ -XXX,XX +XXX,XX @@ typedef uint64_t TCGRegSet;
15
#error unsupported
16
#endif
17
18
-#if TCG_TARGET_REG_BITS == 32
19
-/* Turn some undef macros into false macros. */
20
-#define TCG_TARGET_HAS_extr_i64_i32 0
21
-#define TCG_TARGET_HAS_div_i64 0
22
-#define TCG_TARGET_HAS_rem_i64 0
23
-#define TCG_TARGET_HAS_div2_i64 0
24
-#define TCG_TARGET_HAS_rot_i64 0
25
-#define TCG_TARGET_HAS_ext8s_i64 0
26
-#define TCG_TARGET_HAS_ext16s_i64 0
27
-#define TCG_TARGET_HAS_ext32s_i64 0
28
-#define TCG_TARGET_HAS_ext8u_i64 0
29
-#define TCG_TARGET_HAS_ext16u_i64 0
30
-#define TCG_TARGET_HAS_ext32u_i64 0
31
-#define TCG_TARGET_HAS_bswap16_i64 0
32
-#define TCG_TARGET_HAS_bswap32_i64 0
33
-#define TCG_TARGET_HAS_bswap64_i64 0
34
-#define TCG_TARGET_HAS_not_i64 0
35
-#define TCG_TARGET_HAS_andc_i64 0
36
-#define TCG_TARGET_HAS_orc_i64 0
37
-#define TCG_TARGET_HAS_eqv_i64 0
38
-#define TCG_TARGET_HAS_nand_i64 0
39
-#define TCG_TARGET_HAS_nor_i64 0
40
-#define TCG_TARGET_HAS_clz_i64 0
41
-#define TCG_TARGET_HAS_ctz_i64 0
42
-#define TCG_TARGET_HAS_ctpop_i64 0
43
-#define TCG_TARGET_HAS_deposit_i64 0
44
-#define TCG_TARGET_HAS_extract_i64 0
45
-#define TCG_TARGET_HAS_sextract_i64 0
46
-#define TCG_TARGET_HAS_extract2_i64 0
47
-#define TCG_TARGET_HAS_negsetcond_i64 0
48
-#define TCG_TARGET_HAS_add2_i64 0
49
-#define TCG_TARGET_HAS_sub2_i64 0
50
-#define TCG_TARGET_HAS_mulu2_i64 0
51
-#define TCG_TARGET_HAS_muls2_i64 0
52
-#define TCG_TARGET_HAS_muluh_i64 0
53
-#define TCG_TARGET_HAS_mulsh_i64 0
54
-/* Turn some undef macros into true macros. */
55
-#define TCG_TARGET_HAS_add2_i32 1
56
-#define TCG_TARGET_HAS_sub2_i32 1
57
-#endif
58
-
59
-#ifndef TCG_TARGET_deposit_i32_valid
60
-#define TCG_TARGET_deposit_i32_valid(ofs, len) 1
61
-#endif
62
-#ifndef TCG_TARGET_deposit_i64_valid
63
-#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
64
-#endif
65
-#ifndef TCG_TARGET_extract_i32_valid
66
-#define TCG_TARGET_extract_i32_valid(ofs, len) 1
67
-#endif
68
-#ifndef TCG_TARGET_extract_i64_valid
69
-#define TCG_TARGET_extract_i64_valid(ofs, len) 1
70
-#endif
71
-
72
-/* Only one of DIV or DIV2 should be defined. */
73
-#if defined(TCG_TARGET_HAS_div_i32)
74
-#define TCG_TARGET_HAS_div2_i32 0
75
-#elif defined(TCG_TARGET_HAS_div2_i32)
76
-#define TCG_TARGET_HAS_div_i32 0
77
-#define TCG_TARGET_HAS_rem_i32 0
78
-#endif
79
-#if defined(TCG_TARGET_HAS_div_i64)
80
-#define TCG_TARGET_HAS_div2_i64 0
81
-#elif defined(TCG_TARGET_HAS_div2_i64)
82
-#define TCG_TARGET_HAS_div_i64 0
83
-#define TCG_TARGET_HAS_rem_i64 0
84
-#endif
85
-
86
-#if !defined(TCG_TARGET_HAS_v64) \
87
- && !defined(TCG_TARGET_HAS_v128) \
88
- && !defined(TCG_TARGET_HAS_v256)
89
-#define TCG_TARGET_MAYBE_vec 0
90
-#define TCG_TARGET_HAS_abs_vec 0
91
-#define TCG_TARGET_HAS_neg_vec 0
92
-#define TCG_TARGET_HAS_not_vec 0
93
-#define TCG_TARGET_HAS_andc_vec 0
94
-#define TCG_TARGET_HAS_orc_vec 0
95
-#define TCG_TARGET_HAS_nand_vec 0
96
-#define TCG_TARGET_HAS_nor_vec 0
97
-#define TCG_TARGET_HAS_eqv_vec 0
98
-#define TCG_TARGET_HAS_roti_vec 0
99
-#define TCG_TARGET_HAS_rots_vec 0
100
-#define TCG_TARGET_HAS_rotv_vec 0
101
-#define TCG_TARGET_HAS_shi_vec 0
102
-#define TCG_TARGET_HAS_shs_vec 0
103
-#define TCG_TARGET_HAS_shv_vec 0
104
-#define TCG_TARGET_HAS_mul_vec 0
105
-#define TCG_TARGET_HAS_sat_vec 0
106
-#define TCG_TARGET_HAS_minmax_vec 0
107
-#define TCG_TARGET_HAS_bitsel_vec 0
108
-#define TCG_TARGET_HAS_cmpsel_vec 0
109
-#define TCG_TARGET_HAS_tst_vec 0
110
-#else
111
-#define TCG_TARGET_MAYBE_vec 1
112
-#endif
113
-#ifndef TCG_TARGET_HAS_v64
114
-#define TCG_TARGET_HAS_v64 0
115
-#endif
116
-#ifndef TCG_TARGET_HAS_v128
117
-#define TCG_TARGET_HAS_v128 0
118
-#endif
119
-#ifndef TCG_TARGET_HAS_v256
120
-#define TCG_TARGET_HAS_v256 0
121
-#endif
122
+#include "tcg/tcg-has.h"
123
124
typedef enum TCGOpcode {
125
#define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name,
126
diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
127
new file mode 100644
128
index XXXXXXX..XXXXXXX
129
--- /dev/null
130
+++ b/tcg/tcg-has.h
131
@@ -XXX,XX +XXX,XX @@
132
+/* SPDX-License-Identifier: MIT */
133
+/*
134
+ * Define target-specific opcode support
135
+ * Copyright (c) 2024 Linaro, Ltd.
136
+ */
137
+
138
+#ifndef TCG_HAS_H
139
+#define TCG_HAS_H
140
+
141
+#if TCG_TARGET_REG_BITS == 32
142
+/* Turn some undef macros into false macros. */
143
+#define TCG_TARGET_HAS_extr_i64_i32 0
144
+#define TCG_TARGET_HAS_div_i64 0
145
+#define TCG_TARGET_HAS_rem_i64 0
146
+#define TCG_TARGET_HAS_div2_i64 0
147
+#define TCG_TARGET_HAS_rot_i64 0
148
+#define TCG_TARGET_HAS_ext8s_i64 0
149
+#define TCG_TARGET_HAS_ext16s_i64 0
150
+#define TCG_TARGET_HAS_ext32s_i64 0
151
+#define TCG_TARGET_HAS_ext8u_i64 0
152
+#define TCG_TARGET_HAS_ext16u_i64 0
153
+#define TCG_TARGET_HAS_ext32u_i64 0
154
+#define TCG_TARGET_HAS_bswap16_i64 0
155
+#define TCG_TARGET_HAS_bswap32_i64 0
156
+#define TCG_TARGET_HAS_bswap64_i64 0
157
+#define TCG_TARGET_HAS_not_i64 0
158
+#define TCG_TARGET_HAS_andc_i64 0
159
+#define TCG_TARGET_HAS_orc_i64 0
160
+#define TCG_TARGET_HAS_eqv_i64 0
161
+#define TCG_TARGET_HAS_nand_i64 0
162
+#define TCG_TARGET_HAS_nor_i64 0
163
+#define TCG_TARGET_HAS_clz_i64 0
164
+#define TCG_TARGET_HAS_ctz_i64 0
165
+#define TCG_TARGET_HAS_ctpop_i64 0
166
+#define TCG_TARGET_HAS_deposit_i64 0
167
+#define TCG_TARGET_HAS_extract_i64 0
168
+#define TCG_TARGET_HAS_sextract_i64 0
169
+#define TCG_TARGET_HAS_extract2_i64 0
170
+#define TCG_TARGET_HAS_negsetcond_i64 0
171
+#define TCG_TARGET_HAS_add2_i64 0
172
+#define TCG_TARGET_HAS_sub2_i64 0
173
+#define TCG_TARGET_HAS_mulu2_i64 0
174
+#define TCG_TARGET_HAS_muls2_i64 0
175
+#define TCG_TARGET_HAS_muluh_i64 0
176
+#define TCG_TARGET_HAS_mulsh_i64 0
177
+/* Turn some undef macros into true macros. */
178
+#define TCG_TARGET_HAS_add2_i32 1
179
+#define TCG_TARGET_HAS_sub2_i32 1
180
+#endif
181
+
182
+#ifndef TCG_TARGET_deposit_i32_valid
183
+#define TCG_TARGET_deposit_i32_valid(ofs, len) 1
184
+#endif
185
+#ifndef TCG_TARGET_deposit_i64_valid
186
+#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
187
+#endif
188
+#ifndef TCG_TARGET_extract_i32_valid
189
+#define TCG_TARGET_extract_i32_valid(ofs, len) 1
190
+#endif
191
+#ifndef TCG_TARGET_extract_i64_valid
192
+#define TCG_TARGET_extract_i64_valid(ofs, len) 1
193
+#endif
194
+
195
+/* Only one of DIV or DIV2 should be defined. */
196
+#if defined(TCG_TARGET_HAS_div_i32)
197
+#define TCG_TARGET_HAS_div2_i32 0
198
+#elif defined(TCG_TARGET_HAS_div2_i32)
199
+#define TCG_TARGET_HAS_div_i32 0
200
+#define TCG_TARGET_HAS_rem_i32 0
201
+#endif
202
+#if defined(TCG_TARGET_HAS_div_i64)
203
+#define TCG_TARGET_HAS_div2_i64 0
204
+#elif defined(TCG_TARGET_HAS_div2_i64)
205
+#define TCG_TARGET_HAS_div_i64 0
206
+#define TCG_TARGET_HAS_rem_i64 0
207
+#endif
208
+
209
+#if !defined(TCG_TARGET_HAS_v64) \
210
+ && !defined(TCG_TARGET_HAS_v128) \
211
+ && !defined(TCG_TARGET_HAS_v256)
212
+#define TCG_TARGET_MAYBE_vec 0
213
+#define TCG_TARGET_HAS_abs_vec 0
214
+#define TCG_TARGET_HAS_neg_vec 0
215
+#define TCG_TARGET_HAS_not_vec 0
216
+#define TCG_TARGET_HAS_andc_vec 0
217
+#define TCG_TARGET_HAS_orc_vec 0
218
+#define TCG_TARGET_HAS_nand_vec 0
219
+#define TCG_TARGET_HAS_nor_vec 0
220
+#define TCG_TARGET_HAS_eqv_vec 0
221
+#define TCG_TARGET_HAS_roti_vec 0
222
+#define TCG_TARGET_HAS_rots_vec 0
223
+#define TCG_TARGET_HAS_rotv_vec 0
224
+#define TCG_TARGET_HAS_shi_vec 0
225
+#define TCG_TARGET_HAS_shs_vec 0
226
+#define TCG_TARGET_HAS_shv_vec 0
227
+#define TCG_TARGET_HAS_mul_vec 0
228
+#define TCG_TARGET_HAS_sat_vec 0
229
+#define TCG_TARGET_HAS_minmax_vec 0
230
+#define TCG_TARGET_HAS_bitsel_vec 0
231
+#define TCG_TARGET_HAS_cmpsel_vec 0
232
+#define TCG_TARGET_HAS_tst_vec 0
233
+#else
234
+#define TCG_TARGET_MAYBE_vec 1
235
+#endif
236
+#ifndef TCG_TARGET_HAS_v64
237
+#define TCG_TARGET_HAS_v64 0
238
+#endif
239
+#ifndef TCG_TARGET_HAS_v128
240
+#define TCG_TARGET_HAS_v128 0
241
+#endif
242
+#ifndef TCG_TARGET_HAS_v256
243
+#define TCG_TARGET_HAS_v256 0
244
+#endif
245
+
246
+#endif
247
--
248
2.43.0
249
250
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Message-ID: <20250108215156.8731-4-philmd@linaro.org>
4
---
5
tcg/aarch64/tcg-target-has.h | 119 +++++++++++++++++++++++++++++++++++
6
tcg/aarch64/tcg-target.h | 109 +-------------------------------
7
2 files changed, 120 insertions(+), 108 deletions(-)
8
create mode 100644 tcg/aarch64/tcg-target-has.h
1
9
10
diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h
11
new file mode 100644
12
index XXXXXXX..XXXXXXX
13
--- /dev/null
14
+++ b/tcg/aarch64/tcg-target-has.h
15
@@ -XXX,XX +XXX,XX @@
16
+/* SPDX-License-Identifier: GPL-2.0-or-later */
17
+/*
18
+ * Define target-specific opcode support
19
+ * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
20
+ */
21
+
22
+#ifndef TCG_TARGET_HAS_H
23
+#define TCG_TARGET_HAS_H
24
+
25
+#include "host/cpuinfo.h"
26
+
27
+#define have_lse (cpuinfo & CPUINFO_LSE)
28
+#define have_lse2 (cpuinfo & CPUINFO_LSE2)
29
+
30
+/* optional instructions */
31
+#define TCG_TARGET_HAS_div_i32 1
32
+#define TCG_TARGET_HAS_rem_i32 1
33
+#define TCG_TARGET_HAS_ext8s_i32 1
34
+#define TCG_TARGET_HAS_ext16s_i32 1
35
+#define TCG_TARGET_HAS_ext8u_i32 1
36
+#define TCG_TARGET_HAS_ext16u_i32 1
37
+#define TCG_TARGET_HAS_bswap16_i32 1
38
+#define TCG_TARGET_HAS_bswap32_i32 1
39
+#define TCG_TARGET_HAS_not_i32 1
40
+#define TCG_TARGET_HAS_rot_i32 1
41
+#define TCG_TARGET_HAS_andc_i32 1
42
+#define TCG_TARGET_HAS_orc_i32 1
43
+#define TCG_TARGET_HAS_eqv_i32 1
44
+#define TCG_TARGET_HAS_nand_i32 0
45
+#define TCG_TARGET_HAS_nor_i32 0
46
+#define TCG_TARGET_HAS_clz_i32 1
47
+#define TCG_TARGET_HAS_ctz_i32 1
48
+#define TCG_TARGET_HAS_ctpop_i32 0
49
+#define TCG_TARGET_HAS_deposit_i32 1
50
+#define TCG_TARGET_HAS_extract_i32 1
51
+#define TCG_TARGET_HAS_sextract_i32 1
52
+#define TCG_TARGET_HAS_extract2_i32 1
53
+#define TCG_TARGET_HAS_negsetcond_i32 1
54
+#define TCG_TARGET_HAS_add2_i32 1
55
+#define TCG_TARGET_HAS_sub2_i32 1
56
+#define TCG_TARGET_HAS_mulu2_i32 0
57
+#define TCG_TARGET_HAS_muls2_i32 0
58
+#define TCG_TARGET_HAS_muluh_i32 0
59
+#define TCG_TARGET_HAS_mulsh_i32 0
60
+#define TCG_TARGET_HAS_extr_i64_i32 0
61
+#define TCG_TARGET_HAS_qemu_st8_i32 0
62
+
63
+#define TCG_TARGET_HAS_div_i64 1
64
+#define TCG_TARGET_HAS_rem_i64 1
65
+#define TCG_TARGET_HAS_ext8s_i64 1
66
+#define TCG_TARGET_HAS_ext16s_i64 1
67
+#define TCG_TARGET_HAS_ext32s_i64 1
68
+#define TCG_TARGET_HAS_ext8u_i64 1
69
+#define TCG_TARGET_HAS_ext16u_i64 1
70
+#define TCG_TARGET_HAS_ext32u_i64 1
71
+#define TCG_TARGET_HAS_bswap16_i64 1
72
+#define TCG_TARGET_HAS_bswap32_i64 1
73
+#define TCG_TARGET_HAS_bswap64_i64 1
74
+#define TCG_TARGET_HAS_not_i64 1
75
+#define TCG_TARGET_HAS_rot_i64 1
76
+#define TCG_TARGET_HAS_andc_i64 1
77
+#define TCG_TARGET_HAS_orc_i64 1
78
+#define TCG_TARGET_HAS_eqv_i64 1
79
+#define TCG_TARGET_HAS_nand_i64 0
80
+#define TCG_TARGET_HAS_nor_i64 0
81
+#define TCG_TARGET_HAS_clz_i64 1
82
+#define TCG_TARGET_HAS_ctz_i64 1
83
+#define TCG_TARGET_HAS_ctpop_i64 0
84
+#define TCG_TARGET_HAS_deposit_i64 1
85
+#define TCG_TARGET_HAS_extract_i64 1
86
+#define TCG_TARGET_HAS_sextract_i64 1
87
+#define TCG_TARGET_HAS_extract2_i64 1
88
+#define TCG_TARGET_HAS_negsetcond_i64 1
89
+#define TCG_TARGET_HAS_add2_i64 1
90
+#define TCG_TARGET_HAS_sub2_i64 1
91
+#define TCG_TARGET_HAS_mulu2_i64 0
92
+#define TCG_TARGET_HAS_muls2_i64 0
93
+#define TCG_TARGET_HAS_muluh_i64 1
94
+#define TCG_TARGET_HAS_mulsh_i64 1
95
+
96
+/*
97
+ * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load,
98
+ * which requires writable pages. We must defer to the helper for user-only,
99
+ * but in system mode all ram is writable for the host.
100
+ */
101
+#ifdef CONFIG_USER_ONLY
102
+#define TCG_TARGET_HAS_qemu_ldst_i128 have_lse2
103
+#else
104
+#define TCG_TARGET_HAS_qemu_ldst_i128 1
105
+#endif
106
+
107
+#define TCG_TARGET_HAS_tst 1
108
+
109
+#define TCG_TARGET_HAS_v64 1
110
+#define TCG_TARGET_HAS_v128 1
111
+#define TCG_TARGET_HAS_v256 0
112
+
113
+#define TCG_TARGET_HAS_andc_vec 1
114
+#define TCG_TARGET_HAS_orc_vec 1
115
+#define TCG_TARGET_HAS_nand_vec 0
116
+#define TCG_TARGET_HAS_nor_vec 0
117
+#define TCG_TARGET_HAS_eqv_vec 0
118
+#define TCG_TARGET_HAS_not_vec 1
119
+#define TCG_TARGET_HAS_neg_vec 1
120
+#define TCG_TARGET_HAS_abs_vec 1
121
+#define TCG_TARGET_HAS_roti_vec 0
122
+#define TCG_TARGET_HAS_rots_vec 0
123
+#define TCG_TARGET_HAS_rotv_vec 0
124
+#define TCG_TARGET_HAS_shi_vec 1
125
+#define TCG_TARGET_HAS_shs_vec 0
126
+#define TCG_TARGET_HAS_shv_vec 1
127
+#define TCG_TARGET_HAS_mul_vec 1
128
+#define TCG_TARGET_HAS_sat_vec 1
129
+#define TCG_TARGET_HAS_minmax_vec 1
130
+#define TCG_TARGET_HAS_bitsel_vec 1
131
+#define TCG_TARGET_HAS_cmpsel_vec 0
132
+#define TCG_TARGET_HAS_tst_vec 1
133
+
134
+#endif
135
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
136
index XXXXXXX..XXXXXXX 100644
137
--- a/tcg/aarch64/tcg-target.h
138
+++ b/tcg/aarch64/tcg-target.h
139
@@ -XXX,XX +XXX,XX @@
140
#ifndef AARCH64_TCG_TARGET_H
141
#define AARCH64_TCG_TARGET_H
142
143
-#include "host/cpuinfo.h"
144
-
145
#define TCG_TARGET_INSN_UNIT_SIZE 4
146
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
147
148
@@ -XXX,XX +XXX,XX @@ typedef enum {
149
150
#define TCG_TARGET_NB_REGS 64
151
152
-#define have_lse (cpuinfo & CPUINFO_LSE)
153
-#define have_lse2 (cpuinfo & CPUINFO_LSE2)
154
-
155
-/* optional instructions */
156
-#define TCG_TARGET_HAS_div_i32 1
157
-#define TCG_TARGET_HAS_rem_i32 1
158
-#define TCG_TARGET_HAS_ext8s_i32 1
159
-#define TCG_TARGET_HAS_ext16s_i32 1
160
-#define TCG_TARGET_HAS_ext8u_i32 1
161
-#define TCG_TARGET_HAS_ext16u_i32 1
162
-#define TCG_TARGET_HAS_bswap16_i32 1
163
-#define TCG_TARGET_HAS_bswap32_i32 1
164
-#define TCG_TARGET_HAS_not_i32 1
165
-#define TCG_TARGET_HAS_rot_i32 1
166
-#define TCG_TARGET_HAS_andc_i32 1
167
-#define TCG_TARGET_HAS_orc_i32 1
168
-#define TCG_TARGET_HAS_eqv_i32 1
169
-#define TCG_TARGET_HAS_nand_i32 0
170
-#define TCG_TARGET_HAS_nor_i32 0
171
-#define TCG_TARGET_HAS_clz_i32 1
172
-#define TCG_TARGET_HAS_ctz_i32 1
173
-#define TCG_TARGET_HAS_ctpop_i32 0
174
-#define TCG_TARGET_HAS_deposit_i32 1
175
-#define TCG_TARGET_HAS_extract_i32 1
176
-#define TCG_TARGET_HAS_sextract_i32 1
177
-#define TCG_TARGET_HAS_extract2_i32 1
178
-#define TCG_TARGET_HAS_negsetcond_i32 1
179
-#define TCG_TARGET_HAS_add2_i32 1
180
-#define TCG_TARGET_HAS_sub2_i32 1
181
-#define TCG_TARGET_HAS_mulu2_i32 0
182
-#define TCG_TARGET_HAS_muls2_i32 0
183
-#define TCG_TARGET_HAS_muluh_i32 0
184
-#define TCG_TARGET_HAS_mulsh_i32 0
185
-#define TCG_TARGET_HAS_extr_i64_i32 0
186
-#define TCG_TARGET_HAS_qemu_st8_i32 0
187
-
188
-#define TCG_TARGET_HAS_div_i64 1
189
-#define TCG_TARGET_HAS_rem_i64 1
190
-#define TCG_TARGET_HAS_ext8s_i64 1
191
-#define TCG_TARGET_HAS_ext16s_i64 1
192
-#define TCG_TARGET_HAS_ext32s_i64 1
193
-#define TCG_TARGET_HAS_ext8u_i64 1
194
-#define TCG_TARGET_HAS_ext16u_i64 1
195
-#define TCG_TARGET_HAS_ext32u_i64 1
196
-#define TCG_TARGET_HAS_bswap16_i64 1
197
-#define TCG_TARGET_HAS_bswap32_i64 1
198
-#define TCG_TARGET_HAS_bswap64_i64 1
199
-#define TCG_TARGET_HAS_not_i64 1
200
-#define TCG_TARGET_HAS_rot_i64 1
201
-#define TCG_TARGET_HAS_andc_i64 1
202
-#define TCG_TARGET_HAS_orc_i64 1
203
-#define TCG_TARGET_HAS_eqv_i64 1
204
-#define TCG_TARGET_HAS_nand_i64 0
205
-#define TCG_TARGET_HAS_nor_i64 0
206
-#define TCG_TARGET_HAS_clz_i64 1
207
-#define TCG_TARGET_HAS_ctz_i64 1
208
-#define TCG_TARGET_HAS_ctpop_i64 0
209
-#define TCG_TARGET_HAS_deposit_i64 1
210
-#define TCG_TARGET_HAS_extract_i64 1
211
-#define TCG_TARGET_HAS_sextract_i64 1
212
-#define TCG_TARGET_HAS_extract2_i64 1
213
-#define TCG_TARGET_HAS_negsetcond_i64 1
214
-#define TCG_TARGET_HAS_add2_i64 1
215
-#define TCG_TARGET_HAS_sub2_i64 1
216
-#define TCG_TARGET_HAS_mulu2_i64 0
217
-#define TCG_TARGET_HAS_muls2_i64 0
218
-#define TCG_TARGET_HAS_muluh_i64 1
219
-#define TCG_TARGET_HAS_mulsh_i64 1
220
-
221
-/*
222
- * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load,
223
- * which requires writable pages. We must defer to the helper for user-only,
224
- * but in system mode all ram is writable for the host.
225
- */
226
-#ifdef CONFIG_USER_ONLY
227
-#define TCG_TARGET_HAS_qemu_ldst_i128 have_lse2
228
-#else
229
-#define TCG_TARGET_HAS_qemu_ldst_i128 1
230
-#endif
231
-
232
-#define TCG_TARGET_HAS_tst 1
233
-
234
-#define TCG_TARGET_HAS_v64 1
235
-#define TCG_TARGET_HAS_v128 1
236
-#define TCG_TARGET_HAS_v256 0
237
-
238
-#define TCG_TARGET_HAS_andc_vec 1
239
-#define TCG_TARGET_HAS_orc_vec 1
240
-#define TCG_TARGET_HAS_nand_vec 0
241
-#define TCG_TARGET_HAS_nor_vec 0
242
-#define TCG_TARGET_HAS_eqv_vec 0
243
-#define TCG_TARGET_HAS_not_vec 1
244
-#define TCG_TARGET_HAS_neg_vec 1
245
-#define TCG_TARGET_HAS_abs_vec 1
246
-#define TCG_TARGET_HAS_roti_vec 0
247
-#define TCG_TARGET_HAS_rots_vec 0
248
-#define TCG_TARGET_HAS_rotv_vec 0
249
-#define TCG_TARGET_HAS_shi_vec 1
250
-#define TCG_TARGET_HAS_shs_vec 0
251
-#define TCG_TARGET_HAS_shv_vec 1
252
-#define TCG_TARGET_HAS_mul_vec 1
253
-#define TCG_TARGET_HAS_sat_vec 1
254
-#define TCG_TARGET_HAS_minmax_vec 1
255
-#define TCG_TARGET_HAS_bitsel_vec 1
256
-#define TCG_TARGET_HAS_cmpsel_vec 0
257
-#define TCG_TARGET_HAS_tst_vec 1
258
+#include "tcg-target-has.h"
259
260
#define TCG_TARGET_DEFAULT_MO (0)
261
262
--
263
2.43.0
264
265
diff view generated by jsdifflib
1
Split out a whole bunch of placeholder functions, which are
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
currently identical. That won't last as more code gets moved.
2
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Message-ID: <20250108215156.8731-5-philmd@linaro.org>
4
---
5
tcg/arm/tcg-target-has.h | 85 ++++++++++++++++++++++++++++++++++++++++
6
tcg/arm/tcg-target.h | 74 +---------------------------------
7
2 files changed, 86 insertions(+), 73 deletions(-)
8
create mode 100644 tcg/arm/tcg-target-has.h
3
9
4
Use CASE_32_64_VEC for some logical operators that previously
10
diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
5
missed the addition of vectors.
11
new file mode 100644
6
12
index XXXXXXX..XXXXXXX
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
13
--- /dev/null
8
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
14
+++ b/tcg/arm/tcg-target-has.h
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
@@ -XXX,XX +XXX,XX @@
10
---
16
+/* SPDX-License-Identifier: MIT */
11
tcg/optimize.c | 271 +++++++++++++++++++++++++++++++++++++++----------
12
1 file changed, 219 insertions(+), 52 deletions(-)
13
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
17
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
19
}
20
}
21
22
+/*
17
+/*
23
+ * The fold_* functions return true when processing is complete,
18
+ * Define target-specific opcode support
24
+ * usually by folding the operation to a constant or to a copy,
19
+ * Copyright (c) 2008 Fabrice Bellard
25
+ * and calling tcg_opt_gen_{mov,movi}. They may do other things,
20
+ * Copyright (c) 2008 Andrzej Zaborowski
26
+ * like collect information about the value produced, for use in
27
+ * optimizing a subsequent operation.
28
+ *
29
+ * These first fold_* functions are all helpers, used by other
30
+ * folders for more specific operations.
31
+ */
21
+ */
32
+
22
+
33
+static bool fold_const1(OptContext *ctx, TCGOp *op)
23
+#ifndef TCG_TARGET_HAS_H
34
+{
24
+#define TCG_TARGET_HAS_H
35
+ if (arg_is_const(op->args[1])) {
36
+ uint64_t t;
37
+
25
+
38
+ t = arg_info(op->args[1])->val;
26
+extern int arm_arch;
39
+ t = do_constant_folding(op->opc, t, 0);
40
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
41
+ }
42
+ return false;
43
+}
44
+
27
+
45
+static bool fold_const2(OptContext *ctx, TCGOp *op)
28
+#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7)
46
+{
47
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
48
+ uint64_t t1 = arg_info(op->args[1])->val;
49
+ uint64_t t2 = arg_info(op->args[2])->val;
50
+
29
+
51
+ t1 = do_constant_folding(op->opc, t1, t2);
30
+#ifdef __ARM_ARCH_EXT_IDIV__
52
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
31
+#define use_idiv_instructions 1
53
+ }
32
+#else
54
+ return false;
33
+extern bool use_idiv_instructions;
55
+}
34
+#endif
35
+#ifdef __ARM_NEON__
36
+#define use_neon_instructions 1
37
+#else
38
+extern bool use_neon_instructions;
39
+#endif
56
+
40
+
57
+/*
41
+/* optional instructions */
58
+ * These outermost fold_<op> functions are sorted alphabetically.
42
+#define TCG_TARGET_HAS_ext8s_i32 1
59
+ */
43
+#define TCG_TARGET_HAS_ext16s_i32 1
44
+#define TCG_TARGET_HAS_ext8u_i32 0 /* and r0, r1, #0xff */
45
+#define TCG_TARGET_HAS_ext16u_i32 1
46
+#define TCG_TARGET_HAS_bswap16_i32 1
47
+#define TCG_TARGET_HAS_bswap32_i32 1
48
+#define TCG_TARGET_HAS_not_i32 1
49
+#define TCG_TARGET_HAS_rot_i32 1
50
+#define TCG_TARGET_HAS_andc_i32 1
51
+#define TCG_TARGET_HAS_orc_i32 0
52
+#define TCG_TARGET_HAS_eqv_i32 0
53
+#define TCG_TARGET_HAS_nand_i32 0
54
+#define TCG_TARGET_HAS_nor_i32 0
55
+#define TCG_TARGET_HAS_clz_i32 1
56
+#define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions
57
+#define TCG_TARGET_HAS_ctpop_i32 0
58
+#define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions
59
+#define TCG_TARGET_HAS_extract_i32 use_armv7_instructions
60
+#define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions
61
+#define TCG_TARGET_HAS_extract2_i32 1
62
+#define TCG_TARGET_HAS_negsetcond_i32 1
63
+#define TCG_TARGET_HAS_mulu2_i32 1
64
+#define TCG_TARGET_HAS_muls2_i32 1
65
+#define TCG_TARGET_HAS_muluh_i32 0
66
+#define TCG_TARGET_HAS_mulsh_i32 0
67
+#define TCG_TARGET_HAS_div_i32 use_idiv_instructions
68
+#define TCG_TARGET_HAS_rem_i32 0
69
+#define TCG_TARGET_HAS_qemu_st8_i32 0
60
+
70
+
61
+static bool fold_add(OptContext *ctx, TCGOp *op)
71
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
62
+{
63
+ return fold_const2(ctx, op);
64
+}
65
+
72
+
66
+static bool fold_and(OptContext *ctx, TCGOp *op)
73
+#define TCG_TARGET_HAS_tst 1
67
+{
68
+ return fold_const2(ctx, op);
69
+}
70
+
74
+
71
+static bool fold_andc(OptContext *ctx, TCGOp *op)
75
+#define TCG_TARGET_HAS_v64 use_neon_instructions
72
+{
76
+#define TCG_TARGET_HAS_v128 use_neon_instructions
73
+ return fold_const2(ctx, op);
77
+#define TCG_TARGET_HAS_v256 0
74
+}
75
+
78
+
76
static bool fold_call(OptContext *ctx, TCGOp *op)
79
+#define TCG_TARGET_HAS_andc_vec 1
77
{
80
+#define TCG_TARGET_HAS_orc_vec 1
78
TCGContext *s = ctx->tcg;
81
+#define TCG_TARGET_HAS_nand_vec 0
79
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
82
+#define TCG_TARGET_HAS_nor_vec 0
80
return true;
83
+#define TCG_TARGET_HAS_eqv_vec 0
81
}
84
+#define TCG_TARGET_HAS_not_vec 1
82
85
+#define TCG_TARGET_HAS_neg_vec 1
83
+static bool fold_ctpop(OptContext *ctx, TCGOp *op)
86
+#define TCG_TARGET_HAS_abs_vec 1
84
+{
87
+#define TCG_TARGET_HAS_roti_vec 0
85
+ return fold_const1(ctx, op);
88
+#define TCG_TARGET_HAS_rots_vec 0
86
+}
89
+#define TCG_TARGET_HAS_rotv_vec 0
90
+#define TCG_TARGET_HAS_shi_vec 1
91
+#define TCG_TARGET_HAS_shs_vec 0
92
+#define TCG_TARGET_HAS_shv_vec 0
93
+#define TCG_TARGET_HAS_mul_vec 1
94
+#define TCG_TARGET_HAS_sat_vec 1
95
+#define TCG_TARGET_HAS_minmax_vec 1
96
+#define TCG_TARGET_HAS_bitsel_vec 1
97
+#define TCG_TARGET_HAS_cmpsel_vec 0
98
+#define TCG_TARGET_HAS_tst_vec 1
87
+
99
+
88
+static bool fold_divide(OptContext *ctx, TCGOp *op)
100
+#endif
89
+{
101
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
90
+ return fold_const2(ctx, op);
102
index XXXXXXX..XXXXXXX 100644
91
+}
103
--- a/tcg/arm/tcg-target.h
92
+
104
+++ b/tcg/arm/tcg-target.h
93
+static bool fold_eqv(OptContext *ctx, TCGOp *op)
105
@@ -XXX,XX +XXX,XX @@
94
+{
106
#ifndef ARM_TCG_TARGET_H
95
+ return fold_const2(ctx, op);
107
#define ARM_TCG_TARGET_H
96
+}
108
97
+
109
-extern int arm_arch;
98
+static bool fold_exts(OptContext *ctx, TCGOp *op)
99
+{
100
+ return fold_const1(ctx, op);
101
+}
102
+
103
+static bool fold_extu(OptContext *ctx, TCGOp *op)
104
+{
105
+ return fold_const1(ctx, op);
106
+}
107
+
108
static bool fold_mb(OptContext *ctx, TCGOp *op)
109
{
110
/* Eliminate duplicate and redundant fence instructions. */
111
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
112
return true;
113
}
114
115
+static bool fold_mul(OptContext *ctx, TCGOp *op)
116
+{
117
+ return fold_const2(ctx, op);
118
+}
119
+
120
+static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
121
+{
122
+ return fold_const2(ctx, op);
123
+}
124
+
125
+static bool fold_nand(OptContext *ctx, TCGOp *op)
126
+{
127
+ return fold_const2(ctx, op);
128
+}
129
+
130
+static bool fold_neg(OptContext *ctx, TCGOp *op)
131
+{
132
+ return fold_const1(ctx, op);
133
+}
134
+
135
+static bool fold_nor(OptContext *ctx, TCGOp *op)
136
+{
137
+ return fold_const2(ctx, op);
138
+}
139
+
140
+static bool fold_not(OptContext *ctx, TCGOp *op)
141
+{
142
+ return fold_const1(ctx, op);
143
+}
144
+
145
+static bool fold_or(OptContext *ctx, TCGOp *op)
146
+{
147
+ return fold_const2(ctx, op);
148
+}
149
+
150
+static bool fold_orc(OptContext *ctx, TCGOp *op)
151
+{
152
+ return fold_const2(ctx, op);
153
+}
154
+
155
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
156
{
157
/* Opcodes that touch guest memory stop the mb optimization. */
158
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
159
return false;
160
}
161
162
+static bool fold_remainder(OptContext *ctx, TCGOp *op)
163
+{
164
+ return fold_const2(ctx, op);
165
+}
166
+
167
+static bool fold_shift(OptContext *ctx, TCGOp *op)
168
+{
169
+ return fold_const2(ctx, op);
170
+}
171
+
172
+static bool fold_sub(OptContext *ctx, TCGOp *op)
173
+{
174
+ return fold_const2(ctx, op);
175
+}
176
+
177
+static bool fold_xor(OptContext *ctx, TCGOp *op)
178
+{
179
+ return fold_const2(ctx, op);
180
+}
181
+
182
/* Propagate constants and copies, fold constant expressions. */
183
void tcg_optimize(TCGContext *s)
184
{
185
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
186
}
187
break;
188
189
- CASE_OP_32_64(not):
190
- CASE_OP_32_64(neg):
191
- CASE_OP_32_64(ext8s):
192
- CASE_OP_32_64(ext8u):
193
- CASE_OP_32_64(ext16s):
194
- CASE_OP_32_64(ext16u):
195
- CASE_OP_32_64(ctpop):
196
- case INDEX_op_ext32s_i64:
197
- case INDEX_op_ext32u_i64:
198
- case INDEX_op_ext_i32_i64:
199
- case INDEX_op_extu_i32_i64:
200
- case INDEX_op_extrl_i64_i32:
201
- case INDEX_op_extrh_i64_i32:
202
- if (arg_is_const(op->args[1])) {
203
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
204
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
205
- continue;
206
- }
207
- break;
208
-
110
-
209
CASE_OP_32_64(bswap16):
111
-#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7)
210
CASE_OP_32_64(bswap32):
211
case INDEX_op_bswap64_i64:
212
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
213
}
214
break;
215
216
- CASE_OP_32_64(add):
217
- CASE_OP_32_64(sub):
218
- CASE_OP_32_64(mul):
219
- CASE_OP_32_64(or):
220
- CASE_OP_32_64(and):
221
- CASE_OP_32_64(xor):
222
- CASE_OP_32_64(shl):
223
- CASE_OP_32_64(shr):
224
- CASE_OP_32_64(sar):
225
- CASE_OP_32_64(rotl):
226
- CASE_OP_32_64(rotr):
227
- CASE_OP_32_64(andc):
228
- CASE_OP_32_64(orc):
229
- CASE_OP_32_64(eqv):
230
- CASE_OP_32_64(nand):
231
- CASE_OP_32_64(nor):
232
- CASE_OP_32_64(muluh):
233
- CASE_OP_32_64(mulsh):
234
- CASE_OP_32_64(div):
235
- CASE_OP_32_64(divu):
236
- CASE_OP_32_64(rem):
237
- CASE_OP_32_64(remu):
238
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
239
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
240
- arg_info(op->args[2])->val);
241
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
242
- continue;
243
- }
244
- break;
245
-
112
-
246
CASE_OP_32_64(clz):
113
#define TCG_TARGET_INSN_UNIT_SIZE 4
247
CASE_OP_32_64(ctz):
114
#define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX
248
if (arg_is_const(op->args[1])) {
115
249
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
116
@@ -XXX,XX +XXX,XX @@ typedef enum {
250
}
117
251
break;
118
#define TCG_TARGET_NB_REGS 32
252
119
253
+ default:
120
-#ifdef __ARM_ARCH_EXT_IDIV__
254
+ break;
121
-#define use_idiv_instructions 1
255
+
122
-#else
256
+ /* ---------------------------------------------------------- */
123
-extern bool use_idiv_instructions;
257
+ /* Sorted alphabetically by opcode as much as possible. */
124
-#endif
258
+
125
-#ifdef __ARM_NEON__
259
+ CASE_OP_32_64_VEC(add):
126
-#define use_neon_instructions 1
260
+ done = fold_add(&ctx, op);
127
-#else
261
+ break;
128
-extern bool use_neon_instructions;
262
+ CASE_OP_32_64_VEC(and):
129
-#endif
263
+ done = fold_and(&ctx, op);
264
+ break;
265
+ CASE_OP_32_64_VEC(andc):
266
+ done = fold_andc(&ctx, op);
267
+ break;
268
+ CASE_OP_32_64(ctpop):
269
+ done = fold_ctpop(&ctx, op);
270
+ break;
271
+ CASE_OP_32_64(div):
272
+ CASE_OP_32_64(divu):
273
+ done = fold_divide(&ctx, op);
274
+ break;
275
+ CASE_OP_32_64(eqv):
276
+ done = fold_eqv(&ctx, op);
277
+ break;
278
+ CASE_OP_32_64(ext8s):
279
+ CASE_OP_32_64(ext16s):
280
+ case INDEX_op_ext32s_i64:
281
+ case INDEX_op_ext_i32_i64:
282
+ done = fold_exts(&ctx, op);
283
+ break;
284
+ CASE_OP_32_64(ext8u):
285
+ CASE_OP_32_64(ext16u):
286
+ case INDEX_op_ext32u_i64:
287
+ case INDEX_op_extu_i32_i64:
288
+ case INDEX_op_extrl_i64_i32:
289
+ case INDEX_op_extrh_i64_i32:
290
+ done = fold_extu(&ctx, op);
291
+ break;
292
case INDEX_op_mb:
293
done = fold_mb(&ctx, op);
294
break;
295
+ CASE_OP_32_64(mul):
296
+ done = fold_mul(&ctx, op);
297
+ break;
298
+ CASE_OP_32_64(mulsh):
299
+ CASE_OP_32_64(muluh):
300
+ done = fold_mul_highpart(&ctx, op);
301
+ break;
302
+ CASE_OP_32_64(nand):
303
+ done = fold_nand(&ctx, op);
304
+ break;
305
+ CASE_OP_32_64(neg):
306
+ done = fold_neg(&ctx, op);
307
+ break;
308
+ CASE_OP_32_64(nor):
309
+ done = fold_nor(&ctx, op);
310
+ break;
311
+ CASE_OP_32_64_VEC(not):
312
+ done = fold_not(&ctx, op);
313
+ break;
314
+ CASE_OP_32_64_VEC(or):
315
+ done = fold_or(&ctx, op);
316
+ break;
317
+ CASE_OP_32_64_VEC(orc):
318
+ done = fold_orc(&ctx, op);
319
+ break;
320
case INDEX_op_qemu_ld_i32:
321
case INDEX_op_qemu_ld_i64:
322
done = fold_qemu_ld(&ctx, op);
323
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
324
case INDEX_op_qemu_st_i64:
325
done = fold_qemu_st(&ctx, op);
326
break;
327
-
130
-
328
- default:
131
-/* optional instructions */
329
+ CASE_OP_32_64(rem):
132
-#define TCG_TARGET_HAS_ext8s_i32 1
330
+ CASE_OP_32_64(remu):
133
-#define TCG_TARGET_HAS_ext16s_i32 1
331
+ done = fold_remainder(&ctx, op);
134
-#define TCG_TARGET_HAS_ext8u_i32 0 /* and r0, r1, #0xff */
332
+ break;
135
-#define TCG_TARGET_HAS_ext16u_i32 1
333
+ CASE_OP_32_64(rotl):
136
-#define TCG_TARGET_HAS_bswap16_i32 1
334
+ CASE_OP_32_64(rotr):
137
-#define TCG_TARGET_HAS_bswap32_i32 1
335
+ CASE_OP_32_64(sar):
138
-#define TCG_TARGET_HAS_not_i32 1
336
+ CASE_OP_32_64(shl):
139
-#define TCG_TARGET_HAS_rot_i32 1
337
+ CASE_OP_32_64(shr):
140
-#define TCG_TARGET_HAS_andc_i32 1
338
+ done = fold_shift(&ctx, op);
141
-#define TCG_TARGET_HAS_orc_i32 0
339
+ break;
142
-#define TCG_TARGET_HAS_eqv_i32 0
340
+ CASE_OP_32_64_VEC(sub):
143
-#define TCG_TARGET_HAS_nand_i32 0
341
+ done = fold_sub(&ctx, op);
144
-#define TCG_TARGET_HAS_nor_i32 0
342
+ break;
145
-#define TCG_TARGET_HAS_clz_i32 1
343
+ CASE_OP_32_64_VEC(xor):
146
-#define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions
344
+ done = fold_xor(&ctx, op);
147
-#define TCG_TARGET_HAS_ctpop_i32 0
345
break;
148
-#define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions
346
}
149
-#define TCG_TARGET_HAS_extract_i32 use_armv7_instructions
150
-#define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions
151
-#define TCG_TARGET_HAS_extract2_i32 1
152
-#define TCG_TARGET_HAS_negsetcond_i32 1
153
-#define TCG_TARGET_HAS_mulu2_i32 1
154
-#define TCG_TARGET_HAS_muls2_i32 1
155
-#define TCG_TARGET_HAS_muluh_i32 0
156
-#define TCG_TARGET_HAS_mulsh_i32 0
157
-#define TCG_TARGET_HAS_div_i32 use_idiv_instructions
158
-#define TCG_TARGET_HAS_rem_i32 0
159
-#define TCG_TARGET_HAS_qemu_st8_i32 0
160
-
161
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
162
-
163
-#define TCG_TARGET_HAS_tst 1
164
-
165
-#define TCG_TARGET_HAS_v64 use_neon_instructions
166
-#define TCG_TARGET_HAS_v128 use_neon_instructions
167
-#define TCG_TARGET_HAS_v256 0
168
-
169
-#define TCG_TARGET_HAS_andc_vec 1
170
-#define TCG_TARGET_HAS_orc_vec 1
171
-#define TCG_TARGET_HAS_nand_vec 0
172
-#define TCG_TARGET_HAS_nor_vec 0
173
-#define TCG_TARGET_HAS_eqv_vec 0
174
-#define TCG_TARGET_HAS_not_vec 1
175
-#define TCG_TARGET_HAS_neg_vec 1
176
-#define TCG_TARGET_HAS_abs_vec 1
177
-#define TCG_TARGET_HAS_roti_vec 0
178
-#define TCG_TARGET_HAS_rots_vec 0
179
-#define TCG_TARGET_HAS_rotv_vec 0
180
-#define TCG_TARGET_HAS_shi_vec 1
181
-#define TCG_TARGET_HAS_shs_vec 0
182
-#define TCG_TARGET_HAS_shv_vec 0
183
-#define TCG_TARGET_HAS_mul_vec 1
184
-#define TCG_TARGET_HAS_sat_vec 1
185
-#define TCG_TARGET_HAS_minmax_vec 1
186
-#define TCG_TARGET_HAS_bitsel_vec 1
187
-#define TCG_TARGET_HAS_cmpsel_vec 0
188
-#define TCG_TARGET_HAS_tst_vec 1
189
+#include "tcg-target-has.h"
190
191
#define TCG_TARGET_DEFAULT_MO (0)
347
192
348
--
193
--
349
2.25.1
194
2.43.0
350
195
351
196
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Message-ID: <20250108215156.8731-6-philmd@linaro.org>
4
---
5
tcg/i386/tcg-target-has.h | 139 ++++++++++++++++++++++++++++++++++++++
6
tcg/i386/tcg-target.h | 129 +----------------------------------
7
2 files changed, 140 insertions(+), 128 deletions(-)
8
create mode 100644 tcg/i386/tcg-target-has.h
1
9
10
diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h
11
new file mode 100644
12
index XXXXXXX..XXXXXXX
13
--- /dev/null
14
+++ b/tcg/i386/tcg-target-has.h
15
@@ -XXX,XX +XXX,XX @@
16
+/* SPDX-License-Identifier: MIT */
17
+/*
18
+ * Define target-specific opcode support
19
+ * Copyright (c) 2008 Fabrice Bellard
20
+ */
21
+
22
+#ifndef TCG_TARGET_HAS_H
23
+#define TCG_TARGET_HAS_H
24
+
25
+#include "host/cpuinfo.h"
26
+
27
+#define have_bmi1 (cpuinfo & CPUINFO_BMI1)
28
+#define have_popcnt (cpuinfo & CPUINFO_POPCNT)
29
+#define have_avx1 (cpuinfo & CPUINFO_AVX1)
30
+#define have_avx2 (cpuinfo & CPUINFO_AVX2)
31
+#define have_movbe (cpuinfo & CPUINFO_MOVBE)
32
+
33
+/*
34
+ * There are interesting instructions in AVX512, so long as we have AVX512VL,
35
+ * which indicates support for EVEX on sizes smaller than 512 bits.
36
+ */
37
+#define have_avx512vl ((cpuinfo & CPUINFO_AVX512VL) && \
38
+ (cpuinfo & CPUINFO_AVX512F))
39
+#define have_avx512bw ((cpuinfo & CPUINFO_AVX512BW) && have_avx512vl)
40
+#define have_avx512dq ((cpuinfo & CPUINFO_AVX512DQ) && have_avx512vl)
41
+#define have_avx512vbmi2 ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl)
42
+
43
+/* optional instructions */
44
+#define TCG_TARGET_HAS_div2_i32 1
45
+#define TCG_TARGET_HAS_rot_i32 1
46
+#define TCG_TARGET_HAS_ext8s_i32 1
47
+#define TCG_TARGET_HAS_ext16s_i32 1
48
+#define TCG_TARGET_HAS_ext8u_i32 1
49
+#define TCG_TARGET_HAS_ext16u_i32 1
50
+#define TCG_TARGET_HAS_bswap16_i32 1
51
+#define TCG_TARGET_HAS_bswap32_i32 1
52
+#define TCG_TARGET_HAS_not_i32 1
53
+#define TCG_TARGET_HAS_andc_i32 have_bmi1
54
+#define TCG_TARGET_HAS_orc_i32 0
55
+#define TCG_TARGET_HAS_eqv_i32 0
56
+#define TCG_TARGET_HAS_nand_i32 0
57
+#define TCG_TARGET_HAS_nor_i32 0
58
+#define TCG_TARGET_HAS_clz_i32 1
59
+#define TCG_TARGET_HAS_ctz_i32 1
60
+#define TCG_TARGET_HAS_ctpop_i32 have_popcnt
61
+#define TCG_TARGET_HAS_deposit_i32 1
62
+#define TCG_TARGET_HAS_extract_i32 1
63
+#define TCG_TARGET_HAS_sextract_i32 1
64
+#define TCG_TARGET_HAS_extract2_i32 1
65
+#define TCG_TARGET_HAS_negsetcond_i32 1
66
+#define TCG_TARGET_HAS_add2_i32 1
67
+#define TCG_TARGET_HAS_sub2_i32 1
68
+#define TCG_TARGET_HAS_mulu2_i32 1
69
+#define TCG_TARGET_HAS_muls2_i32 1
70
+#define TCG_TARGET_HAS_muluh_i32 0
71
+#define TCG_TARGET_HAS_mulsh_i32 0
72
+
73
+#if TCG_TARGET_REG_BITS == 64
74
+/* Keep 32-bit values zero-extended in a register. */
75
+#define TCG_TARGET_HAS_extr_i64_i32 1
76
+#define TCG_TARGET_HAS_div2_i64 1
77
+#define TCG_TARGET_HAS_rot_i64 1
78
+#define TCG_TARGET_HAS_ext8s_i64 1
79
+#define TCG_TARGET_HAS_ext16s_i64 1
80
+#define TCG_TARGET_HAS_ext32s_i64 1
81
+#define TCG_TARGET_HAS_ext8u_i64 1
82
+#define TCG_TARGET_HAS_ext16u_i64 1
83
+#define TCG_TARGET_HAS_ext32u_i64 1
84
+#define TCG_TARGET_HAS_bswap16_i64 1
85
+#define TCG_TARGET_HAS_bswap32_i64 1
86
+#define TCG_TARGET_HAS_bswap64_i64 1
87
+#define TCG_TARGET_HAS_not_i64 1
88
+#define TCG_TARGET_HAS_andc_i64 have_bmi1
89
+#define TCG_TARGET_HAS_orc_i64 0
90
+#define TCG_TARGET_HAS_eqv_i64 0
91
+#define TCG_TARGET_HAS_nand_i64 0
92
+#define TCG_TARGET_HAS_nor_i64 0
93
+#define TCG_TARGET_HAS_clz_i64 1
94
+#define TCG_TARGET_HAS_ctz_i64 1
95
+#define TCG_TARGET_HAS_ctpop_i64 have_popcnt
96
+#define TCG_TARGET_HAS_deposit_i64 1
97
+#define TCG_TARGET_HAS_extract_i64 1
98
+#define TCG_TARGET_HAS_sextract_i64 0
99
+#define TCG_TARGET_HAS_extract2_i64 1
100
+#define TCG_TARGET_HAS_negsetcond_i64 1
101
+#define TCG_TARGET_HAS_add2_i64 1
102
+#define TCG_TARGET_HAS_sub2_i64 1
103
+#define TCG_TARGET_HAS_mulu2_i64 1
104
+#define TCG_TARGET_HAS_muls2_i64 1
105
+#define TCG_TARGET_HAS_muluh_i64 0
106
+#define TCG_TARGET_HAS_mulsh_i64 0
107
+#define TCG_TARGET_HAS_qemu_st8_i32 0
108
+#else
109
+#define TCG_TARGET_HAS_qemu_st8_i32 1
110
+#endif
111
+
112
+#define TCG_TARGET_HAS_qemu_ldst_i128 \
113
+ (TCG_TARGET_REG_BITS == 64 && (cpuinfo & CPUINFO_ATOMIC_VMOVDQA))
114
+
115
+#define TCG_TARGET_HAS_tst 1
116
+
117
+/* We do not support older SSE systems, only beginning with AVX1. */
118
+#define TCG_TARGET_HAS_v64 have_avx1
119
+#define TCG_TARGET_HAS_v128 have_avx1
120
+#define TCG_TARGET_HAS_v256 have_avx2
121
+
122
+#define TCG_TARGET_HAS_andc_vec 1
123
+#define TCG_TARGET_HAS_orc_vec have_avx512vl
124
+#define TCG_TARGET_HAS_nand_vec have_avx512vl
125
+#define TCG_TARGET_HAS_nor_vec have_avx512vl
126
+#define TCG_TARGET_HAS_eqv_vec have_avx512vl
127
+#define TCG_TARGET_HAS_not_vec have_avx512vl
128
+#define TCG_TARGET_HAS_neg_vec 0
129
+#define TCG_TARGET_HAS_abs_vec 1
130
+#define TCG_TARGET_HAS_roti_vec have_avx512vl
131
+#define TCG_TARGET_HAS_rots_vec 0
132
+#define TCG_TARGET_HAS_rotv_vec have_avx512vl
133
+#define TCG_TARGET_HAS_shi_vec 1
134
+#define TCG_TARGET_HAS_shs_vec 1
135
+#define TCG_TARGET_HAS_shv_vec have_avx2
136
+#define TCG_TARGET_HAS_mul_vec 1
137
+#define TCG_TARGET_HAS_sat_vec 1
138
+#define TCG_TARGET_HAS_minmax_vec 1
139
+#define TCG_TARGET_HAS_bitsel_vec have_avx512vl
140
+#define TCG_TARGET_HAS_cmpsel_vec 1
141
+#define TCG_TARGET_HAS_tst_vec have_avx512bw
142
+
143
+#define TCG_TARGET_deposit_i32_valid(ofs, len) \
144
+ (((ofs) == 0 && ((len) == 8 || (len) == 16)) || \
145
+ (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
146
+#define TCG_TARGET_deposit_i64_valid TCG_TARGET_deposit_i32_valid
147
+
148
+/* Check for the possibility of high-byte extraction and, for 64-bit,
149
+ zero-extending 32-bit right-shift. */
150
+#define TCG_TARGET_extract_i32_valid(ofs, len) ((ofs) == 8 && (len) == 8)
151
+#define TCG_TARGET_extract_i64_valid(ofs, len) \
152
+ (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
153
+
154
+#endif
155
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
156
index XXXXXXX..XXXXXXX 100644
157
--- a/tcg/i386/tcg-target.h
158
+++ b/tcg/i386/tcg-target.h
159
@@ -XXX,XX +XXX,XX @@
160
#ifndef I386_TCG_TARGET_H
161
#define I386_TCG_TARGET_H
162
163
-#include "host/cpuinfo.h"
164
-
165
#define TCG_TARGET_INSN_UNIT_SIZE 1
166
167
#ifdef __x86_64__
168
@@ -XXX,XX +XXX,XX @@ typedef enum {
169
TCG_REG_CALL_STACK = TCG_REG_ESP
170
} TCGReg;
171
172
-#define have_bmi1 (cpuinfo & CPUINFO_BMI1)
173
-#define have_popcnt (cpuinfo & CPUINFO_POPCNT)
174
-#define have_avx1 (cpuinfo & CPUINFO_AVX1)
175
-#define have_avx2 (cpuinfo & CPUINFO_AVX2)
176
-#define have_movbe (cpuinfo & CPUINFO_MOVBE)
177
-
178
-/*
179
- * There are interesting instructions in AVX512, so long as we have AVX512VL,
180
- * which indicates support for EVEX on sizes smaller than 512 bits.
181
- */
182
-#define have_avx512vl ((cpuinfo & CPUINFO_AVX512VL) && \
183
- (cpuinfo & CPUINFO_AVX512F))
184
-#define have_avx512bw ((cpuinfo & CPUINFO_AVX512BW) && have_avx512vl)
185
-#define have_avx512dq ((cpuinfo & CPUINFO_AVX512DQ) && have_avx512vl)
186
-#define have_avx512vbmi2 ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl)
187
-
188
-/* optional instructions */
189
-#define TCG_TARGET_HAS_div2_i32 1
190
-#define TCG_TARGET_HAS_rot_i32 1
191
-#define TCG_TARGET_HAS_ext8s_i32 1
192
-#define TCG_TARGET_HAS_ext16s_i32 1
193
-#define TCG_TARGET_HAS_ext8u_i32 1
194
-#define TCG_TARGET_HAS_ext16u_i32 1
195
-#define TCG_TARGET_HAS_bswap16_i32 1
196
-#define TCG_TARGET_HAS_bswap32_i32 1
197
-#define TCG_TARGET_HAS_not_i32 1
198
-#define TCG_TARGET_HAS_andc_i32 have_bmi1
199
-#define TCG_TARGET_HAS_orc_i32 0
200
-#define TCG_TARGET_HAS_eqv_i32 0
201
-#define TCG_TARGET_HAS_nand_i32 0
202
-#define TCG_TARGET_HAS_nor_i32 0
203
-#define TCG_TARGET_HAS_clz_i32 1
204
-#define TCG_TARGET_HAS_ctz_i32 1
205
-#define TCG_TARGET_HAS_ctpop_i32 have_popcnt
206
-#define TCG_TARGET_HAS_deposit_i32 1
207
-#define TCG_TARGET_HAS_extract_i32 1
208
-#define TCG_TARGET_HAS_sextract_i32 1
209
-#define TCG_TARGET_HAS_extract2_i32 1
210
-#define TCG_TARGET_HAS_negsetcond_i32 1
211
-#define TCG_TARGET_HAS_add2_i32 1
212
-#define TCG_TARGET_HAS_sub2_i32 1
213
-#define TCG_TARGET_HAS_mulu2_i32 1
214
-#define TCG_TARGET_HAS_muls2_i32 1
215
-#define TCG_TARGET_HAS_muluh_i32 0
216
-#define TCG_TARGET_HAS_mulsh_i32 0
217
-
218
-#if TCG_TARGET_REG_BITS == 64
219
-/* Keep 32-bit values zero-extended in a register. */
220
-#define TCG_TARGET_HAS_extr_i64_i32 1
221
-#define TCG_TARGET_HAS_div2_i64 1
222
-#define TCG_TARGET_HAS_rot_i64 1
223
-#define TCG_TARGET_HAS_ext8s_i64 1
224
-#define TCG_TARGET_HAS_ext16s_i64 1
225
-#define TCG_TARGET_HAS_ext32s_i64 1
226
-#define TCG_TARGET_HAS_ext8u_i64 1
227
-#define TCG_TARGET_HAS_ext16u_i64 1
228
-#define TCG_TARGET_HAS_ext32u_i64 1
229
-#define TCG_TARGET_HAS_bswap16_i64 1
230
-#define TCG_TARGET_HAS_bswap32_i64 1
231
-#define TCG_TARGET_HAS_bswap64_i64 1
232
-#define TCG_TARGET_HAS_not_i64 1
233
-#define TCG_TARGET_HAS_andc_i64 have_bmi1
234
-#define TCG_TARGET_HAS_orc_i64 0
235
-#define TCG_TARGET_HAS_eqv_i64 0
236
-#define TCG_TARGET_HAS_nand_i64 0
237
-#define TCG_TARGET_HAS_nor_i64 0
238
-#define TCG_TARGET_HAS_clz_i64 1
239
-#define TCG_TARGET_HAS_ctz_i64 1
240
-#define TCG_TARGET_HAS_ctpop_i64 have_popcnt
241
-#define TCG_TARGET_HAS_deposit_i64 1
242
-#define TCG_TARGET_HAS_extract_i64 1
243
-#define TCG_TARGET_HAS_sextract_i64 0
244
-#define TCG_TARGET_HAS_extract2_i64 1
245
-#define TCG_TARGET_HAS_negsetcond_i64 1
246
-#define TCG_TARGET_HAS_add2_i64 1
247
-#define TCG_TARGET_HAS_sub2_i64 1
248
-#define TCG_TARGET_HAS_mulu2_i64 1
249
-#define TCG_TARGET_HAS_muls2_i64 1
250
-#define TCG_TARGET_HAS_muluh_i64 0
251
-#define TCG_TARGET_HAS_mulsh_i64 0
252
-#define TCG_TARGET_HAS_qemu_st8_i32 0
253
-#else
254
-#define TCG_TARGET_HAS_qemu_st8_i32 1
255
-#endif
256
-
257
-#define TCG_TARGET_HAS_qemu_ldst_i128 \
258
- (TCG_TARGET_REG_BITS == 64 && (cpuinfo & CPUINFO_ATOMIC_VMOVDQA))
259
-
260
-#define TCG_TARGET_HAS_tst 1
261
-
262
-/* We do not support older SSE systems, only beginning with AVX1. */
263
-#define TCG_TARGET_HAS_v64 have_avx1
264
-#define TCG_TARGET_HAS_v128 have_avx1
265
-#define TCG_TARGET_HAS_v256 have_avx2
266
-
267
-#define TCG_TARGET_HAS_andc_vec 1
268
-#define TCG_TARGET_HAS_orc_vec have_avx512vl
269
-#define TCG_TARGET_HAS_nand_vec have_avx512vl
270
-#define TCG_TARGET_HAS_nor_vec have_avx512vl
271
-#define TCG_TARGET_HAS_eqv_vec have_avx512vl
272
-#define TCG_TARGET_HAS_not_vec have_avx512vl
273
-#define TCG_TARGET_HAS_neg_vec 0
274
-#define TCG_TARGET_HAS_abs_vec 1
275
-#define TCG_TARGET_HAS_roti_vec have_avx512vl
276
-#define TCG_TARGET_HAS_rots_vec 0
277
-#define TCG_TARGET_HAS_rotv_vec have_avx512vl
278
-#define TCG_TARGET_HAS_shi_vec 1
279
-#define TCG_TARGET_HAS_shs_vec 1
280
-#define TCG_TARGET_HAS_shv_vec have_avx2
281
-#define TCG_TARGET_HAS_mul_vec 1
282
-#define TCG_TARGET_HAS_sat_vec 1
283
-#define TCG_TARGET_HAS_minmax_vec 1
284
-#define TCG_TARGET_HAS_bitsel_vec have_avx512vl
285
-#define TCG_TARGET_HAS_cmpsel_vec 1
286
-#define TCG_TARGET_HAS_tst_vec have_avx512bw
287
-
288
-#define TCG_TARGET_deposit_i32_valid(ofs, len) \
289
- (((ofs) == 0 && ((len) == 8 || (len) == 16)) || \
290
- (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
291
-#define TCG_TARGET_deposit_i64_valid TCG_TARGET_deposit_i32_valid
292
-
293
-/* Check for the possibility of high-byte extraction and, for 64-bit,
294
- zero-extending 32-bit right-shift. */
295
-#define TCG_TARGET_extract_i32_valid(ofs, len) ((ofs) == 8 && (len) == 8)
296
-#define TCG_TARGET_extract_i64_valid(ofs, len) \
297
- (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
298
+#include "tcg-target-has.h"
299
300
/* This defines the natural memory order supported by this
301
* architecture before guarantees made by various barrier
302
--
303
2.43.0
304
305
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Message-ID: <20250108215156.8731-7-philmd@linaro.org>
4
---
5
tcg/loongarch64/tcg-target-has.h | 113 +++++++++++++++++++++++++++++++
6
tcg/loongarch64/tcg-target.h | 102 +---------------------------
7
2 files changed, 114 insertions(+), 101 deletions(-)
8
create mode 100644 tcg/loongarch64/tcg-target-has.h
1
9
10
diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h
11
new file mode 100644
12
index XXXXXXX..XXXXXXX
13
--- /dev/null
14
+++ b/tcg/loongarch64/tcg-target-has.h
15
@@ -XXX,XX +XXX,XX @@
16
+/* SPDX-License-Identifier: MIT */
17
+/*
18
+ * Define target-specific opcode support
19
+ * Copyright (c) 2021 WANG Xuerui <git@xen0n.name>
20
+ */
21
+
22
+#ifndef TCG_TARGET_HAS_H
23
+#define TCG_TARGET_HAS_H
24
+
25
+#include "host/cpuinfo.h"
26
+
27
+/* optional instructions */
28
+#define TCG_TARGET_HAS_negsetcond_i32 0
29
+#define TCG_TARGET_HAS_div_i32 1
30
+#define TCG_TARGET_HAS_rem_i32 1
31
+#define TCG_TARGET_HAS_div2_i32 0
32
+#define TCG_TARGET_HAS_rot_i32 1
33
+#define TCG_TARGET_HAS_deposit_i32 1
34
+#define TCG_TARGET_HAS_extract_i32 1
35
+#define TCG_TARGET_HAS_sextract_i32 0
36
+#define TCG_TARGET_HAS_extract2_i32 0
37
+#define TCG_TARGET_HAS_add2_i32 0
38
+#define TCG_TARGET_HAS_sub2_i32 0
39
+#define TCG_TARGET_HAS_mulu2_i32 0
40
+#define TCG_TARGET_HAS_muls2_i32 0
41
+#define TCG_TARGET_HAS_muluh_i32 1
42
+#define TCG_TARGET_HAS_mulsh_i32 1
43
+#define TCG_TARGET_HAS_ext8s_i32 1
44
+#define TCG_TARGET_HAS_ext16s_i32 1
45
+#define TCG_TARGET_HAS_ext8u_i32 1
46
+#define TCG_TARGET_HAS_ext16u_i32 1
47
+#define TCG_TARGET_HAS_bswap16_i32 1
48
+#define TCG_TARGET_HAS_bswap32_i32 1
49
+#define TCG_TARGET_HAS_not_i32 1
50
+#define TCG_TARGET_HAS_andc_i32 1
51
+#define TCG_TARGET_HAS_orc_i32 1
52
+#define TCG_TARGET_HAS_eqv_i32 0
53
+#define TCG_TARGET_HAS_nand_i32 0
54
+#define TCG_TARGET_HAS_nor_i32 1
55
+#define TCG_TARGET_HAS_clz_i32 1
56
+#define TCG_TARGET_HAS_ctz_i32 1
57
+#define TCG_TARGET_HAS_ctpop_i32 0
58
+#define TCG_TARGET_HAS_brcond2 0
59
+#define TCG_TARGET_HAS_setcond2 0
60
+#define TCG_TARGET_HAS_qemu_st8_i32 0
61
+
62
+/* 64-bit operations */
63
+#define TCG_TARGET_HAS_negsetcond_i64 0
64
+#define TCG_TARGET_HAS_div_i64 1
65
+#define TCG_TARGET_HAS_rem_i64 1
66
+#define TCG_TARGET_HAS_div2_i64 0
67
+#define TCG_TARGET_HAS_rot_i64 1
68
+#define TCG_TARGET_HAS_deposit_i64 1
69
+#define TCG_TARGET_HAS_extract_i64 1
70
+#define TCG_TARGET_HAS_sextract_i64 0
71
+#define TCG_TARGET_HAS_extract2_i64 0
72
+#define TCG_TARGET_HAS_extr_i64_i32 1
73
+#define TCG_TARGET_HAS_ext8s_i64 1
74
+#define TCG_TARGET_HAS_ext16s_i64 1
75
+#define TCG_TARGET_HAS_ext32s_i64 1
76
+#define TCG_TARGET_HAS_ext8u_i64 1
77
+#define TCG_TARGET_HAS_ext16u_i64 1
78
+#define TCG_TARGET_HAS_ext32u_i64 1
79
+#define TCG_TARGET_HAS_bswap16_i64 1
80
+#define TCG_TARGET_HAS_bswap32_i64 1
81
+#define TCG_TARGET_HAS_bswap64_i64 1
82
+#define TCG_TARGET_HAS_not_i64 1
83
+#define TCG_TARGET_HAS_andc_i64 1
84
+#define TCG_TARGET_HAS_orc_i64 1
85
+#define TCG_TARGET_HAS_eqv_i64 0
86
+#define TCG_TARGET_HAS_nand_i64 0
87
+#define TCG_TARGET_HAS_nor_i64 1
88
+#define TCG_TARGET_HAS_clz_i64 1
89
+#define TCG_TARGET_HAS_ctz_i64 1
90
+#define TCG_TARGET_HAS_ctpop_i64 0
91
+#define TCG_TARGET_HAS_add2_i64 0
92
+#define TCG_TARGET_HAS_sub2_i64 0
93
+#define TCG_TARGET_HAS_mulu2_i64 0
94
+#define TCG_TARGET_HAS_muls2_i64 0
95
+#define TCG_TARGET_HAS_muluh_i64 1
96
+#define TCG_TARGET_HAS_mulsh_i64 1
97
+
98
+#define TCG_TARGET_HAS_qemu_ldst_i128 (cpuinfo & CPUINFO_LSX)
99
+
100
+#define TCG_TARGET_HAS_tst 0
101
+
102
+#define TCG_TARGET_HAS_v64 (cpuinfo & CPUINFO_LSX)
103
+#define TCG_TARGET_HAS_v128 (cpuinfo & CPUINFO_LSX)
104
+#define TCG_TARGET_HAS_v256 (cpuinfo & CPUINFO_LASX)
105
+
106
+#define TCG_TARGET_HAS_not_vec 1
107
+#define TCG_TARGET_HAS_neg_vec 1
108
+#define TCG_TARGET_HAS_abs_vec 0
109
+#define TCG_TARGET_HAS_andc_vec 1
110
+#define TCG_TARGET_HAS_orc_vec 1
111
+#define TCG_TARGET_HAS_nand_vec 0
112
+#define TCG_TARGET_HAS_nor_vec 1
113
+#define TCG_TARGET_HAS_eqv_vec 0
114
+#define TCG_TARGET_HAS_mul_vec 1
115
+#define TCG_TARGET_HAS_shi_vec 1
116
+#define TCG_TARGET_HAS_shs_vec 0
117
+#define TCG_TARGET_HAS_shv_vec 1
118
+#define TCG_TARGET_HAS_roti_vec 1
119
+#define TCG_TARGET_HAS_rots_vec 0
120
+#define TCG_TARGET_HAS_rotv_vec 1
121
+#define TCG_TARGET_HAS_sat_vec 1
122
+#define TCG_TARGET_HAS_minmax_vec 1
123
+#define TCG_TARGET_HAS_bitsel_vec 1
124
+#define TCG_TARGET_HAS_cmpsel_vec 0
125
+#define TCG_TARGET_HAS_tst_vec 0
126
+
127
+
128
+#endif
129
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
130
index XXXXXXX..XXXXXXX 100644
131
--- a/tcg/loongarch64/tcg-target.h
132
+++ b/tcg/loongarch64/tcg-target.h
133
@@ -XXX,XX +XXX,XX @@
134
#ifndef LOONGARCH_TCG_TARGET_H
135
#define LOONGARCH_TCG_TARGET_H
136
137
-#include "host/cpuinfo.h"
138
-
139
#define TCG_TARGET_INSN_UNIT_SIZE 4
140
#define TCG_TARGET_NB_REGS 64
141
142
@@ -XXX,XX +XXX,XX @@ typedef enum {
143
TCG_VEC_TMP0 = TCG_REG_V23,
144
} TCGReg;
145
146
-/* optional instructions */
147
-#define TCG_TARGET_HAS_negsetcond_i32 0
148
-#define TCG_TARGET_HAS_div_i32 1
149
-#define TCG_TARGET_HAS_rem_i32 1
150
-#define TCG_TARGET_HAS_div2_i32 0
151
-#define TCG_TARGET_HAS_rot_i32 1
152
-#define TCG_TARGET_HAS_deposit_i32 1
153
-#define TCG_TARGET_HAS_extract_i32 1
154
-#define TCG_TARGET_HAS_sextract_i32 0
155
-#define TCG_TARGET_HAS_extract2_i32 0
156
-#define TCG_TARGET_HAS_add2_i32 0
157
-#define TCG_TARGET_HAS_sub2_i32 0
158
-#define TCG_TARGET_HAS_mulu2_i32 0
159
-#define TCG_TARGET_HAS_muls2_i32 0
160
-#define TCG_TARGET_HAS_muluh_i32 1
161
-#define TCG_TARGET_HAS_mulsh_i32 1
162
-#define TCG_TARGET_HAS_ext8s_i32 1
163
-#define TCG_TARGET_HAS_ext16s_i32 1
164
-#define TCG_TARGET_HAS_ext8u_i32 1
165
-#define TCG_TARGET_HAS_ext16u_i32 1
166
-#define TCG_TARGET_HAS_bswap16_i32 1
167
-#define TCG_TARGET_HAS_bswap32_i32 1
168
-#define TCG_TARGET_HAS_not_i32 1
169
-#define TCG_TARGET_HAS_andc_i32 1
170
-#define TCG_TARGET_HAS_orc_i32 1
171
-#define TCG_TARGET_HAS_eqv_i32 0
172
-#define TCG_TARGET_HAS_nand_i32 0
173
-#define TCG_TARGET_HAS_nor_i32 1
174
-#define TCG_TARGET_HAS_clz_i32 1
175
-#define TCG_TARGET_HAS_ctz_i32 1
176
-#define TCG_TARGET_HAS_ctpop_i32 0
177
-#define TCG_TARGET_HAS_brcond2 0
178
-#define TCG_TARGET_HAS_setcond2 0
179
-#define TCG_TARGET_HAS_qemu_st8_i32 0
180
-
181
-/* 64-bit operations */
182
-#define TCG_TARGET_HAS_negsetcond_i64 0
183
-#define TCG_TARGET_HAS_div_i64 1
184
-#define TCG_TARGET_HAS_rem_i64 1
185
-#define TCG_TARGET_HAS_div2_i64 0
186
-#define TCG_TARGET_HAS_rot_i64 1
187
-#define TCG_TARGET_HAS_deposit_i64 1
188
-#define TCG_TARGET_HAS_extract_i64 1
189
-#define TCG_TARGET_HAS_sextract_i64 0
190
-#define TCG_TARGET_HAS_extract2_i64 0
191
-#define TCG_TARGET_HAS_extr_i64_i32 1
192
-#define TCG_TARGET_HAS_ext8s_i64 1
193
-#define TCG_TARGET_HAS_ext16s_i64 1
194
-#define TCG_TARGET_HAS_ext32s_i64 1
195
-#define TCG_TARGET_HAS_ext8u_i64 1
196
-#define TCG_TARGET_HAS_ext16u_i64 1
197
-#define TCG_TARGET_HAS_ext32u_i64 1
198
-#define TCG_TARGET_HAS_bswap16_i64 1
199
-#define TCG_TARGET_HAS_bswap32_i64 1
200
-#define TCG_TARGET_HAS_bswap64_i64 1
201
-#define TCG_TARGET_HAS_not_i64 1
202
-#define TCG_TARGET_HAS_andc_i64 1
203
-#define TCG_TARGET_HAS_orc_i64 1
204
-#define TCG_TARGET_HAS_eqv_i64 0
205
-#define TCG_TARGET_HAS_nand_i64 0
206
-#define TCG_TARGET_HAS_nor_i64 1
207
-#define TCG_TARGET_HAS_clz_i64 1
208
-#define TCG_TARGET_HAS_ctz_i64 1
209
-#define TCG_TARGET_HAS_ctpop_i64 0
210
-#define TCG_TARGET_HAS_add2_i64 0
211
-#define TCG_TARGET_HAS_sub2_i64 0
212
-#define TCG_TARGET_HAS_mulu2_i64 0
213
-#define TCG_TARGET_HAS_muls2_i64 0
214
-#define TCG_TARGET_HAS_muluh_i64 1
215
-#define TCG_TARGET_HAS_mulsh_i64 1
216
-
217
-#define TCG_TARGET_HAS_qemu_ldst_i128 (cpuinfo & CPUINFO_LSX)
218
-
219
-#define TCG_TARGET_HAS_tst 0
220
-
221
-#define TCG_TARGET_HAS_v64 (cpuinfo & CPUINFO_LSX)
222
-#define TCG_TARGET_HAS_v128 (cpuinfo & CPUINFO_LSX)
223
-#define TCG_TARGET_HAS_v256 (cpuinfo & CPUINFO_LASX)
224
-
225
-#define TCG_TARGET_HAS_not_vec 1
226
-#define TCG_TARGET_HAS_neg_vec 1
227
-#define TCG_TARGET_HAS_abs_vec 0
228
-#define TCG_TARGET_HAS_andc_vec 1
229
-#define TCG_TARGET_HAS_orc_vec 1
230
-#define TCG_TARGET_HAS_nand_vec 0
231
-#define TCG_TARGET_HAS_nor_vec 1
232
-#define TCG_TARGET_HAS_eqv_vec 0
233
-#define TCG_TARGET_HAS_mul_vec 1
234
-#define TCG_TARGET_HAS_shi_vec 1
235
-#define TCG_TARGET_HAS_shs_vec 0
236
-#define TCG_TARGET_HAS_shv_vec 1
237
-#define TCG_TARGET_HAS_roti_vec 1
238
-#define TCG_TARGET_HAS_rots_vec 0
239
-#define TCG_TARGET_HAS_rotv_vec 1
240
-#define TCG_TARGET_HAS_sat_vec 1
241
-#define TCG_TARGET_HAS_minmax_vec 1
242
-#define TCG_TARGET_HAS_bitsel_vec 1
243
-#define TCG_TARGET_HAS_cmpsel_vec 0
244
-#define TCG_TARGET_HAS_tst_vec 0
245
+#include "tcg-target-has.h"
246
247
#define TCG_TARGET_DEFAULT_MO (0)
248
249
--
250
2.43.0
251
252
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Message-ID: <20250108215156.8731-8-philmd@linaro.org>
4
---
5
tcg/mips/tcg-target-has.h | 122 ++++++++++++++++++++++++++++++++++++++
6
tcg/mips/tcg-target.h | 112 +---------------------------------
7
2 files changed, 123 insertions(+), 111 deletions(-)
8
create mode 100644 tcg/mips/tcg-target-has.h
1
9
10
diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h
11
new file mode 100644
12
index XXXXXXX..XXXXXXX
13
--- /dev/null
14
+++ b/tcg/mips/tcg-target-has.h
15
@@ -XXX,XX +XXX,XX @@
16
+/* SPDX-License-Identifier: MIT */
17
+/*
18
+ * Define target-specific opcode support
19
+ * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
20
+ * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
21
+ */
22
+
23
+#ifndef TCG_TARGET_HAS_H
24
+#define TCG_TARGET_HAS_H
25
+
26
+/* MOVN/MOVZ instructions detection */
27
+#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
28
+ defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \
29
+ defined(_MIPS_ARCH_MIPS4)
30
+#define use_movnz_instructions 1
31
+#else
32
+extern bool use_movnz_instructions;
33
+#endif
34
+
35
+/* MIPS32 instruction set detection */
36
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 1)
37
+#define use_mips32_instructions 1
38
+#else
39
+extern bool use_mips32_instructions;
40
+#endif
41
+
42
+/* MIPS32R2 instruction set detection */
43
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
44
+#define use_mips32r2_instructions 1
45
+#else
46
+extern bool use_mips32r2_instructions;
47
+#endif
48
+
49
+/* MIPS32R6 instruction set detection */
50
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 6)
51
+#define use_mips32r6_instructions 1
52
+#else
53
+#define use_mips32r6_instructions 0
54
+#endif
55
+
56
+/* optional instructions */
57
+#define TCG_TARGET_HAS_div_i32 1
58
+#define TCG_TARGET_HAS_rem_i32 1
59
+#define TCG_TARGET_HAS_not_i32 1
60
+#define TCG_TARGET_HAS_nor_i32 1
61
+#define TCG_TARGET_HAS_andc_i32 0
62
+#define TCG_TARGET_HAS_orc_i32 0
63
+#define TCG_TARGET_HAS_eqv_i32 0
64
+#define TCG_TARGET_HAS_nand_i32 0
65
+#define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions)
66
+#define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions)
67
+#define TCG_TARGET_HAS_muluh_i32 1
68
+#define TCG_TARGET_HAS_mulsh_i32 1
69
+#define TCG_TARGET_HAS_bswap32_i32 1
70
+#define TCG_TARGET_HAS_negsetcond_i32 0
71
+
72
+#if TCG_TARGET_REG_BITS == 64
73
+#define TCG_TARGET_HAS_add2_i32 0
74
+#define TCG_TARGET_HAS_sub2_i32 0
75
+#define TCG_TARGET_HAS_extr_i64_i32 1
76
+#define TCG_TARGET_HAS_div_i64 1
77
+#define TCG_TARGET_HAS_rem_i64 1
78
+#define TCG_TARGET_HAS_not_i64 1
79
+#define TCG_TARGET_HAS_nor_i64 1
80
+#define TCG_TARGET_HAS_andc_i64 0
81
+#define TCG_TARGET_HAS_orc_i64 0
82
+#define TCG_TARGET_HAS_eqv_i64 0
83
+#define TCG_TARGET_HAS_nand_i64 0
84
+#define TCG_TARGET_HAS_add2_i64 0
85
+#define TCG_TARGET_HAS_sub2_i64 0
86
+#define TCG_TARGET_HAS_mulu2_i64 (!use_mips32r6_instructions)
87
+#define TCG_TARGET_HAS_muls2_i64 (!use_mips32r6_instructions)
88
+#define TCG_TARGET_HAS_muluh_i64 1
89
+#define TCG_TARGET_HAS_mulsh_i64 1
90
+#define TCG_TARGET_HAS_ext32s_i64 1
91
+#define TCG_TARGET_HAS_ext32u_i64 1
92
+#define TCG_TARGET_HAS_negsetcond_i64 0
93
+#endif
94
+
95
+/* optional instructions detected at runtime */
96
+#define TCG_TARGET_HAS_bswap16_i32 use_mips32r2_instructions
97
+#define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions
98
+#define TCG_TARGET_HAS_extract_i32 use_mips32r2_instructions
99
+#define TCG_TARGET_HAS_sextract_i32 0
100
+#define TCG_TARGET_HAS_extract2_i32 0
101
+#define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions
102
+#define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions
103
+#define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions
104
+#define TCG_TARGET_HAS_clz_i32 use_mips32r2_instructions
105
+#define TCG_TARGET_HAS_ctz_i32 0
106
+#define TCG_TARGET_HAS_ctpop_i32 0
107
+#define TCG_TARGET_HAS_qemu_st8_i32 0
108
+
109
+#if TCG_TARGET_REG_BITS == 64
110
+#define TCG_TARGET_HAS_bswap16_i64 use_mips32r2_instructions
111
+#define TCG_TARGET_HAS_bswap32_i64 use_mips32r2_instructions
112
+#define TCG_TARGET_HAS_bswap64_i64 use_mips32r2_instructions
113
+#define TCG_TARGET_HAS_deposit_i64 use_mips32r2_instructions
114
+#define TCG_TARGET_HAS_extract_i64 use_mips32r2_instructions
115
+#define TCG_TARGET_HAS_sextract_i64 0
116
+#define TCG_TARGET_HAS_extract2_i64 0
117
+#define TCG_TARGET_HAS_ext8s_i64 use_mips32r2_instructions
118
+#define TCG_TARGET_HAS_ext16s_i64 use_mips32r2_instructions
119
+#define TCG_TARGET_HAS_rot_i64 use_mips32r2_instructions
120
+#define TCG_TARGET_HAS_clz_i64 use_mips32r2_instructions
121
+#define TCG_TARGET_HAS_ctz_i64 0
122
+#define TCG_TARGET_HAS_ctpop_i64 0
123
+#endif
124
+
125
+/* optional instructions automatically implemented */
126
+#define TCG_TARGET_HAS_ext8u_i32 0 /* andi rt, rs, 0xff */
127
+#define TCG_TARGET_HAS_ext16u_i32 0 /* andi rt, rs, 0xffff */
128
+
129
+#if TCG_TARGET_REG_BITS == 64
130
+#define TCG_TARGET_HAS_ext8u_i64 0 /* andi rt, rs, 0xff */
131
+#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */
132
+#endif
133
+
134
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
135
+#define TCG_TARGET_HAS_tst 0
136
+
137
+#endif
138
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
139
index XXXXXXX..XXXXXXX 100644
140
--- a/tcg/mips/tcg-target.h
141
+++ b/tcg/mips/tcg-target.h
142
@@ -XXX,XX +XXX,XX @@ typedef enum {
143
TCG_AREG0 = TCG_REG_S8,
144
} TCGReg;
145
146
-/* MOVN/MOVZ instructions detection */
147
-#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
148
- defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \
149
- defined(_MIPS_ARCH_MIPS4)
150
-#define use_movnz_instructions 1
151
-#else
152
-extern bool use_movnz_instructions;
153
-#endif
154
-
155
-/* MIPS32 instruction set detection */
156
-#if defined(__mips_isa_rev) && (__mips_isa_rev >= 1)
157
-#define use_mips32_instructions 1
158
-#else
159
-extern bool use_mips32_instructions;
160
-#endif
161
-
162
-/* MIPS32R2 instruction set detection */
163
-#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
164
-#define use_mips32r2_instructions 1
165
-#else
166
-extern bool use_mips32r2_instructions;
167
-#endif
168
-
169
-/* MIPS32R6 instruction set detection */
170
-#if defined(__mips_isa_rev) && (__mips_isa_rev >= 6)
171
-#define use_mips32r6_instructions 1
172
-#else
173
-#define use_mips32r6_instructions 0
174
-#endif
175
-
176
-/* optional instructions */
177
-#define TCG_TARGET_HAS_div_i32 1
178
-#define TCG_TARGET_HAS_rem_i32 1
179
-#define TCG_TARGET_HAS_not_i32 1
180
-#define TCG_TARGET_HAS_nor_i32 1
181
-#define TCG_TARGET_HAS_andc_i32 0
182
-#define TCG_TARGET_HAS_orc_i32 0
183
-#define TCG_TARGET_HAS_eqv_i32 0
184
-#define TCG_TARGET_HAS_nand_i32 0
185
-#define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions)
186
-#define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions)
187
-#define TCG_TARGET_HAS_muluh_i32 1
188
-#define TCG_TARGET_HAS_mulsh_i32 1
189
-#define TCG_TARGET_HAS_bswap32_i32 1
190
-#define TCG_TARGET_HAS_negsetcond_i32 0
191
-
192
-#if TCG_TARGET_REG_BITS == 64
193
-#define TCG_TARGET_HAS_add2_i32 0
194
-#define TCG_TARGET_HAS_sub2_i32 0
195
-#define TCG_TARGET_HAS_extr_i64_i32 1
196
-#define TCG_TARGET_HAS_div_i64 1
197
-#define TCG_TARGET_HAS_rem_i64 1
198
-#define TCG_TARGET_HAS_not_i64 1
199
-#define TCG_TARGET_HAS_nor_i64 1
200
-#define TCG_TARGET_HAS_andc_i64 0
201
-#define TCG_TARGET_HAS_orc_i64 0
202
-#define TCG_TARGET_HAS_eqv_i64 0
203
-#define TCG_TARGET_HAS_nand_i64 0
204
-#define TCG_TARGET_HAS_add2_i64 0
205
-#define TCG_TARGET_HAS_sub2_i64 0
206
-#define TCG_TARGET_HAS_mulu2_i64 (!use_mips32r6_instructions)
207
-#define TCG_TARGET_HAS_muls2_i64 (!use_mips32r6_instructions)
208
-#define TCG_TARGET_HAS_muluh_i64 1
209
-#define TCG_TARGET_HAS_mulsh_i64 1
210
-#define TCG_TARGET_HAS_ext32s_i64 1
211
-#define TCG_TARGET_HAS_ext32u_i64 1
212
-#define TCG_TARGET_HAS_negsetcond_i64 0
213
-#endif
214
-
215
-/* optional instructions detected at runtime */
216
-#define TCG_TARGET_HAS_bswap16_i32 use_mips32r2_instructions
217
-#define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions
218
-#define TCG_TARGET_HAS_extract_i32 use_mips32r2_instructions
219
-#define TCG_TARGET_HAS_sextract_i32 0
220
-#define TCG_TARGET_HAS_extract2_i32 0
221
-#define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions
222
-#define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions
223
-#define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions
224
-#define TCG_TARGET_HAS_clz_i32 use_mips32r2_instructions
225
-#define TCG_TARGET_HAS_ctz_i32 0
226
-#define TCG_TARGET_HAS_ctpop_i32 0
227
-#define TCG_TARGET_HAS_qemu_st8_i32 0
228
-
229
-#if TCG_TARGET_REG_BITS == 64
230
-#define TCG_TARGET_HAS_bswap16_i64 use_mips32r2_instructions
231
-#define TCG_TARGET_HAS_bswap32_i64 use_mips32r2_instructions
232
-#define TCG_TARGET_HAS_bswap64_i64 use_mips32r2_instructions
233
-#define TCG_TARGET_HAS_deposit_i64 use_mips32r2_instructions
234
-#define TCG_TARGET_HAS_extract_i64 use_mips32r2_instructions
235
-#define TCG_TARGET_HAS_sextract_i64 0
236
-#define TCG_TARGET_HAS_extract2_i64 0
237
-#define TCG_TARGET_HAS_ext8s_i64 use_mips32r2_instructions
238
-#define TCG_TARGET_HAS_ext16s_i64 use_mips32r2_instructions
239
-#define TCG_TARGET_HAS_rot_i64 use_mips32r2_instructions
240
-#define TCG_TARGET_HAS_clz_i64 use_mips32r2_instructions
241
-#define TCG_TARGET_HAS_ctz_i64 0
242
-#define TCG_TARGET_HAS_ctpop_i64 0
243
-#endif
244
-
245
-/* optional instructions automatically implemented */
246
-#define TCG_TARGET_HAS_ext8u_i32 0 /* andi rt, rs, 0xff */
247
-#define TCG_TARGET_HAS_ext16u_i32 0 /* andi rt, rs, 0xffff */
248
-
249
-#if TCG_TARGET_REG_BITS == 64
250
-#define TCG_TARGET_HAS_ext8u_i64 0 /* andi rt, rs, 0xff */
251
-#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */
252
-#endif
253
-
254
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
255
-
256
-#define TCG_TARGET_HAS_tst 0
257
+#include "tcg-target-has.h"
258
259
#define TCG_TARGET_DEFAULT_MO 0
260
261
--
262
2.43.0
263
264
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Message-ID: <20250108215156.8731-9-philmd@linaro.org>
4
---
5
tcg/ppc/tcg-target-has.h | 124 +++++++++++++++++++++++++++++++++++++++
6
tcg/ppc/tcg-target.h | 114 +----------------------------------
7
2 files changed, 125 insertions(+), 113 deletions(-)
8
create mode 100644 tcg/ppc/tcg-target-has.h
1
9
10
diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h
11
new file mode 100644
12
index XXXXXXX..XXXXXXX
13
--- /dev/null
14
+++ b/tcg/ppc/tcg-target-has.h
15
@@ -XXX,XX +XXX,XX @@
16
+/* SPDX-License-Identifier: MIT */
17
+/*
18
+ * Define target-specific opcode support
19
+ * Copyright (c) 2008 Fabrice Bellard
20
+ */
21
+
22
+#ifndef TCG_TARGET_HAS_H
23
+#define TCG_TARGET_HAS_H
24
+
25
+#include "host/cpuinfo.h"
26
+
27
+#define have_isa_2_06 (cpuinfo & CPUINFO_V2_06)
28
+#define have_isa_2_07 (cpuinfo & CPUINFO_V2_07)
29
+#define have_isa_3_00 (cpuinfo & CPUINFO_V3_0)
30
+#define have_isa_3_10 (cpuinfo & CPUINFO_V3_1)
31
+#define have_altivec (cpuinfo & CPUINFO_ALTIVEC)
32
+#define have_vsx (cpuinfo & CPUINFO_VSX)
33
+
34
+/* optional instructions automatically implemented */
35
+#define TCG_TARGET_HAS_ext8u_i32 0 /* andi */
36
+#define TCG_TARGET_HAS_ext16u_i32 0
37
+
38
+/* optional instructions */
39
+#define TCG_TARGET_HAS_div_i32 1
40
+#define TCG_TARGET_HAS_rem_i32 have_isa_3_00
41
+#define TCG_TARGET_HAS_rot_i32 1
42
+#define TCG_TARGET_HAS_ext8s_i32 1
43
+#define TCG_TARGET_HAS_ext16s_i32 1
44
+#define TCG_TARGET_HAS_bswap16_i32 1
45
+#define TCG_TARGET_HAS_bswap32_i32 1
46
+#define TCG_TARGET_HAS_not_i32 1
47
+#define TCG_TARGET_HAS_andc_i32 1
48
+#define TCG_TARGET_HAS_orc_i32 1
49
+#define TCG_TARGET_HAS_eqv_i32 1
50
+#define TCG_TARGET_HAS_nand_i32 1
51
+#define TCG_TARGET_HAS_nor_i32 1
52
+#define TCG_TARGET_HAS_clz_i32 1
53
+#define TCG_TARGET_HAS_ctz_i32 have_isa_3_00
54
+#define TCG_TARGET_HAS_ctpop_i32 have_isa_2_06
55
+#define TCG_TARGET_HAS_deposit_i32 1
56
+#define TCG_TARGET_HAS_extract_i32 1
57
+#define TCG_TARGET_HAS_sextract_i32 0
58
+#define TCG_TARGET_HAS_extract2_i32 0
59
+#define TCG_TARGET_HAS_negsetcond_i32 1
60
+#define TCG_TARGET_HAS_mulu2_i32 0
61
+#define TCG_TARGET_HAS_muls2_i32 0
62
+#define TCG_TARGET_HAS_muluh_i32 1
63
+#define TCG_TARGET_HAS_mulsh_i32 1
64
+#define TCG_TARGET_HAS_qemu_st8_i32 0
65
+
66
+#if TCG_TARGET_REG_BITS == 64
67
+#define TCG_TARGET_HAS_add2_i32 0
68
+#define TCG_TARGET_HAS_sub2_i32 0
69
+#define TCG_TARGET_HAS_extr_i64_i32 0
70
+#define TCG_TARGET_HAS_div_i64 1
71
+#define TCG_TARGET_HAS_rem_i64 have_isa_3_00
72
+#define TCG_TARGET_HAS_rot_i64 1
73
+#define TCG_TARGET_HAS_ext8s_i64 1
74
+#define TCG_TARGET_HAS_ext16s_i64 1
75
+#define TCG_TARGET_HAS_ext32s_i64 1
76
+#define TCG_TARGET_HAS_ext8u_i64 0
77
+#define TCG_TARGET_HAS_ext16u_i64 0
78
+#define TCG_TARGET_HAS_ext32u_i64 0
79
+#define TCG_TARGET_HAS_bswap16_i64 1
80
+#define TCG_TARGET_HAS_bswap32_i64 1
81
+#define TCG_TARGET_HAS_bswap64_i64 1
82
+#define TCG_TARGET_HAS_not_i64 1
83
+#define TCG_TARGET_HAS_andc_i64 1
84
+#define TCG_TARGET_HAS_orc_i64 1
85
+#define TCG_TARGET_HAS_eqv_i64 1
86
+#define TCG_TARGET_HAS_nand_i64 1
87
+#define TCG_TARGET_HAS_nor_i64 1
88
+#define TCG_TARGET_HAS_clz_i64 1
89
+#define TCG_TARGET_HAS_ctz_i64 have_isa_3_00
90
+#define TCG_TARGET_HAS_ctpop_i64 have_isa_2_06
91
+#define TCG_TARGET_HAS_deposit_i64 1
92
+#define TCG_TARGET_HAS_extract_i64 1
93
+#define TCG_TARGET_HAS_sextract_i64 0
94
+#define TCG_TARGET_HAS_extract2_i64 0
95
+#define TCG_TARGET_HAS_negsetcond_i64 1
96
+#define TCG_TARGET_HAS_add2_i64 1
97
+#define TCG_TARGET_HAS_sub2_i64 1
98
+#define TCG_TARGET_HAS_mulu2_i64 0
99
+#define TCG_TARGET_HAS_muls2_i64 0
100
+#define TCG_TARGET_HAS_muluh_i64 1
101
+#define TCG_TARGET_HAS_mulsh_i64 1
102
+#endif
103
+
104
+#define TCG_TARGET_HAS_qemu_ldst_i128 \
105
+ (TCG_TARGET_REG_BITS == 64 && have_isa_2_07)
106
+
107
+#define TCG_TARGET_HAS_tst 1
108
+
109
+/*
110
+ * While technically Altivec could support V64, it has no 64-bit store
111
+ * instruction and substituting two 32-bit stores makes the generated
112
+ * code quite large.
113
+ */
114
+#define TCG_TARGET_HAS_v64 have_vsx
115
+#define TCG_TARGET_HAS_v128 have_altivec
116
+#define TCG_TARGET_HAS_v256 0
117
+
118
+#define TCG_TARGET_HAS_andc_vec 1
119
+#define TCG_TARGET_HAS_orc_vec have_isa_2_07
120
+#define TCG_TARGET_HAS_nand_vec have_isa_2_07
121
+#define TCG_TARGET_HAS_nor_vec 1
122
+#define TCG_TARGET_HAS_eqv_vec have_isa_2_07
123
+#define TCG_TARGET_HAS_not_vec 1
124
+#define TCG_TARGET_HAS_neg_vec have_isa_3_00
125
+#define TCG_TARGET_HAS_abs_vec 0
126
+#define TCG_TARGET_HAS_roti_vec 0
127
+#define TCG_TARGET_HAS_rots_vec 0
128
+#define TCG_TARGET_HAS_rotv_vec 1
129
+#define TCG_TARGET_HAS_shi_vec 0
130
+#define TCG_TARGET_HAS_shs_vec 0
131
+#define TCG_TARGET_HAS_shv_vec 1
132
+#define TCG_TARGET_HAS_mul_vec 1
133
+#define TCG_TARGET_HAS_sat_vec 1
134
+#define TCG_TARGET_HAS_minmax_vec 1
135
+#define TCG_TARGET_HAS_bitsel_vec have_vsx
136
+#define TCG_TARGET_HAS_cmpsel_vec 1
137
+#define TCG_TARGET_HAS_tst_vec 0
138
+
139
+#endif
140
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
141
index XXXXXXX..XXXXXXX 100644
142
--- a/tcg/ppc/tcg-target.h
143
+++ b/tcg/ppc/tcg-target.h
144
@@ -XXX,XX +XXX,XX @@
145
#ifndef PPC_TCG_TARGET_H
146
#define PPC_TCG_TARGET_H
147
148
-#include "host/cpuinfo.h"
149
-
150
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
151
152
#define TCG_TARGET_NB_REGS 64
153
@@ -XXX,XX +XXX,XX @@ typedef enum {
154
TCG_AREG0 = TCG_REG_R27
155
} TCGReg;
156
157
-#define have_isa_2_06 (cpuinfo & CPUINFO_V2_06)
158
-#define have_isa_2_07 (cpuinfo & CPUINFO_V2_07)
159
-#define have_isa_3_00 (cpuinfo & CPUINFO_V3_0)
160
-#define have_isa_3_10 (cpuinfo & CPUINFO_V3_1)
161
-#define have_altivec (cpuinfo & CPUINFO_ALTIVEC)
162
-#define have_vsx (cpuinfo & CPUINFO_VSX)
163
-
164
-/* optional instructions automatically implemented */
165
-#define TCG_TARGET_HAS_ext8u_i32 0 /* andi */
166
-#define TCG_TARGET_HAS_ext16u_i32 0
167
-
168
-/* optional instructions */
169
-#define TCG_TARGET_HAS_div_i32 1
170
-#define TCG_TARGET_HAS_rem_i32 have_isa_3_00
171
-#define TCG_TARGET_HAS_rot_i32 1
172
-#define TCG_TARGET_HAS_ext8s_i32 1
173
-#define TCG_TARGET_HAS_ext16s_i32 1
174
-#define TCG_TARGET_HAS_bswap16_i32 1
175
-#define TCG_TARGET_HAS_bswap32_i32 1
176
-#define TCG_TARGET_HAS_not_i32 1
177
-#define TCG_TARGET_HAS_andc_i32 1
178
-#define TCG_TARGET_HAS_orc_i32 1
179
-#define TCG_TARGET_HAS_eqv_i32 1
180
-#define TCG_TARGET_HAS_nand_i32 1
181
-#define TCG_TARGET_HAS_nor_i32 1
182
-#define TCG_TARGET_HAS_clz_i32 1
183
-#define TCG_TARGET_HAS_ctz_i32 have_isa_3_00
184
-#define TCG_TARGET_HAS_ctpop_i32 have_isa_2_06
185
-#define TCG_TARGET_HAS_deposit_i32 1
186
-#define TCG_TARGET_HAS_extract_i32 1
187
-#define TCG_TARGET_HAS_sextract_i32 0
188
-#define TCG_TARGET_HAS_extract2_i32 0
189
-#define TCG_TARGET_HAS_negsetcond_i32 1
190
-#define TCG_TARGET_HAS_mulu2_i32 0
191
-#define TCG_TARGET_HAS_muls2_i32 0
192
-#define TCG_TARGET_HAS_muluh_i32 1
193
-#define TCG_TARGET_HAS_mulsh_i32 1
194
-#define TCG_TARGET_HAS_qemu_st8_i32 0
195
-
196
-#if TCG_TARGET_REG_BITS == 64
197
-#define TCG_TARGET_HAS_add2_i32 0
198
-#define TCG_TARGET_HAS_sub2_i32 0
199
-#define TCG_TARGET_HAS_extr_i64_i32 0
200
-#define TCG_TARGET_HAS_div_i64 1
201
-#define TCG_TARGET_HAS_rem_i64 have_isa_3_00
202
-#define TCG_TARGET_HAS_rot_i64 1
203
-#define TCG_TARGET_HAS_ext8s_i64 1
204
-#define TCG_TARGET_HAS_ext16s_i64 1
205
-#define TCG_TARGET_HAS_ext32s_i64 1
206
-#define TCG_TARGET_HAS_ext8u_i64 0
207
-#define TCG_TARGET_HAS_ext16u_i64 0
208
-#define TCG_TARGET_HAS_ext32u_i64 0
209
-#define TCG_TARGET_HAS_bswap16_i64 1
210
-#define TCG_TARGET_HAS_bswap32_i64 1
211
-#define TCG_TARGET_HAS_bswap64_i64 1
212
-#define TCG_TARGET_HAS_not_i64 1
213
-#define TCG_TARGET_HAS_andc_i64 1
214
-#define TCG_TARGET_HAS_orc_i64 1
215
-#define TCG_TARGET_HAS_eqv_i64 1
216
-#define TCG_TARGET_HAS_nand_i64 1
217
-#define TCG_TARGET_HAS_nor_i64 1
218
-#define TCG_TARGET_HAS_clz_i64 1
219
-#define TCG_TARGET_HAS_ctz_i64 have_isa_3_00
220
-#define TCG_TARGET_HAS_ctpop_i64 have_isa_2_06
221
-#define TCG_TARGET_HAS_deposit_i64 1
222
-#define TCG_TARGET_HAS_extract_i64 1
223
-#define TCG_TARGET_HAS_sextract_i64 0
224
-#define TCG_TARGET_HAS_extract2_i64 0
225
-#define TCG_TARGET_HAS_negsetcond_i64 1
226
-#define TCG_TARGET_HAS_add2_i64 1
227
-#define TCG_TARGET_HAS_sub2_i64 1
228
-#define TCG_TARGET_HAS_mulu2_i64 0
229
-#define TCG_TARGET_HAS_muls2_i64 0
230
-#define TCG_TARGET_HAS_muluh_i64 1
231
-#define TCG_TARGET_HAS_mulsh_i64 1
232
-#endif
233
-
234
-#define TCG_TARGET_HAS_qemu_ldst_i128 \
235
- (TCG_TARGET_REG_BITS == 64 && have_isa_2_07)
236
-
237
-#define TCG_TARGET_HAS_tst 1
238
-
239
-/*
240
- * While technically Altivec could support V64, it has no 64-bit store
241
- * instruction and substituting two 32-bit stores makes the generated
242
- * code quite large.
243
- */
244
-#define TCG_TARGET_HAS_v64 have_vsx
245
-#define TCG_TARGET_HAS_v128 have_altivec
246
-#define TCG_TARGET_HAS_v256 0
247
-
248
-#define TCG_TARGET_HAS_andc_vec 1
249
-#define TCG_TARGET_HAS_orc_vec have_isa_2_07
250
-#define TCG_TARGET_HAS_nand_vec have_isa_2_07
251
-#define TCG_TARGET_HAS_nor_vec 1
252
-#define TCG_TARGET_HAS_eqv_vec have_isa_2_07
253
-#define TCG_TARGET_HAS_not_vec 1
254
-#define TCG_TARGET_HAS_neg_vec have_isa_3_00
255
-#define TCG_TARGET_HAS_abs_vec 0
256
-#define TCG_TARGET_HAS_roti_vec 0
257
-#define TCG_TARGET_HAS_rots_vec 0
258
-#define TCG_TARGET_HAS_rotv_vec 1
259
-#define TCG_TARGET_HAS_shi_vec 0
260
-#define TCG_TARGET_HAS_shs_vec 0
261
-#define TCG_TARGET_HAS_shv_vec 1
262
-#define TCG_TARGET_HAS_mul_vec 1
263
-#define TCG_TARGET_HAS_sat_vec 1
264
-#define TCG_TARGET_HAS_minmax_vec 1
265
-#define TCG_TARGET_HAS_bitsel_vec have_vsx
266
-#define TCG_TARGET_HAS_cmpsel_vec 1
267
-#define TCG_TARGET_HAS_tst_vec 0
268
+#include "tcg-target-has.h"
269
270
#define TCG_TARGET_DEFAULT_MO (0)
271
272
--
273
2.43.0
274
275
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Message-ID: <20250108215156.8731-10-philmd@linaro.org>
4
---
5
tcg/riscv/tcg-target-has.h | 112 +++++++++++++++++++++++++++++++++++++
6
tcg/riscv/tcg-target.h | 102 +--------------------------------
7
2 files changed, 113 insertions(+), 101 deletions(-)
8
create mode 100644 tcg/riscv/tcg-target-has.h
1
9
10
diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
11
new file mode 100644
12
index XXXXXXX..XXXXXXX
13
--- /dev/null
14
+++ b/tcg/riscv/tcg-target-has.h
15
@@ -XXX,XX +XXX,XX @@
16
+/* SPDX-License-Identifier: MIT */
17
+/*
18
+ * Define target-specific opcode support
19
+ * Copyright (c) 2018 SiFive, Inc
20
+ */
21
+
22
+#ifndef TCG_TARGET_HAS_H
23
+#define TCG_TARGET_HAS_H
24
+
25
+#include "host/cpuinfo.h"
26
+
27
+/* optional instructions */
28
+#define TCG_TARGET_HAS_negsetcond_i32 1
29
+#define TCG_TARGET_HAS_div_i32 1
30
+#define TCG_TARGET_HAS_rem_i32 1
31
+#define TCG_TARGET_HAS_div2_i32 0
32
+#define TCG_TARGET_HAS_rot_i32 (cpuinfo & CPUINFO_ZBB)
33
+#define TCG_TARGET_HAS_deposit_i32 0
34
+#define TCG_TARGET_HAS_extract_i32 0
35
+#define TCG_TARGET_HAS_sextract_i32 0
36
+#define TCG_TARGET_HAS_extract2_i32 0
37
+#define TCG_TARGET_HAS_add2_i32 1
38
+#define TCG_TARGET_HAS_sub2_i32 1
39
+#define TCG_TARGET_HAS_mulu2_i32 0
40
+#define TCG_TARGET_HAS_muls2_i32 0
41
+#define TCG_TARGET_HAS_muluh_i32 0
42
+#define TCG_TARGET_HAS_mulsh_i32 0
43
+#define TCG_TARGET_HAS_ext8s_i32 1
44
+#define TCG_TARGET_HAS_ext16s_i32 1
45
+#define TCG_TARGET_HAS_ext8u_i32 1
46
+#define TCG_TARGET_HAS_ext16u_i32 1
47
+#define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB)
48
+#define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB)
49
+#define TCG_TARGET_HAS_not_i32 1
50
+#define TCG_TARGET_HAS_andc_i32 (cpuinfo & CPUINFO_ZBB)
51
+#define TCG_TARGET_HAS_orc_i32 (cpuinfo & CPUINFO_ZBB)
52
+#define TCG_TARGET_HAS_eqv_i32 (cpuinfo & CPUINFO_ZBB)
53
+#define TCG_TARGET_HAS_nand_i32 0
54
+#define TCG_TARGET_HAS_nor_i32 0
55
+#define TCG_TARGET_HAS_clz_i32 (cpuinfo & CPUINFO_ZBB)
56
+#define TCG_TARGET_HAS_ctz_i32 (cpuinfo & CPUINFO_ZBB)
57
+#define TCG_TARGET_HAS_ctpop_i32 (cpuinfo & CPUINFO_ZBB)
58
+#define TCG_TARGET_HAS_brcond2 1
59
+#define TCG_TARGET_HAS_setcond2 1
60
+#define TCG_TARGET_HAS_qemu_st8_i32 0
61
+
62
+#define TCG_TARGET_HAS_negsetcond_i64 1
63
+#define TCG_TARGET_HAS_div_i64 1
64
+#define TCG_TARGET_HAS_rem_i64 1
65
+#define TCG_TARGET_HAS_div2_i64 0
66
+#define TCG_TARGET_HAS_rot_i64 (cpuinfo & CPUINFO_ZBB)
67
+#define TCG_TARGET_HAS_deposit_i64 0
68
+#define TCG_TARGET_HAS_extract_i64 0
69
+#define TCG_TARGET_HAS_sextract_i64 0
70
+#define TCG_TARGET_HAS_extract2_i64 0
71
+#define TCG_TARGET_HAS_extr_i64_i32 1
72
+#define TCG_TARGET_HAS_ext8s_i64 1
73
+#define TCG_TARGET_HAS_ext16s_i64 1
74
+#define TCG_TARGET_HAS_ext32s_i64 1
75
+#define TCG_TARGET_HAS_ext8u_i64 1
76
+#define TCG_TARGET_HAS_ext16u_i64 1
77
+#define TCG_TARGET_HAS_ext32u_i64 1
78
+#define TCG_TARGET_HAS_bswap16_i64 (cpuinfo & CPUINFO_ZBB)
79
+#define TCG_TARGET_HAS_bswap32_i64 (cpuinfo & CPUINFO_ZBB)
80
+#define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB)
81
+#define TCG_TARGET_HAS_not_i64 1
82
+#define TCG_TARGET_HAS_andc_i64 (cpuinfo & CPUINFO_ZBB)
83
+#define TCG_TARGET_HAS_orc_i64 (cpuinfo & CPUINFO_ZBB)
84
+#define TCG_TARGET_HAS_eqv_i64 (cpuinfo & CPUINFO_ZBB)
85
+#define TCG_TARGET_HAS_nand_i64 0
86
+#define TCG_TARGET_HAS_nor_i64 0
87
+#define TCG_TARGET_HAS_clz_i64 (cpuinfo & CPUINFO_ZBB)
88
+#define TCG_TARGET_HAS_ctz_i64 (cpuinfo & CPUINFO_ZBB)
89
+#define TCG_TARGET_HAS_ctpop_i64 (cpuinfo & CPUINFO_ZBB)
90
+#define TCG_TARGET_HAS_add2_i64 1
91
+#define TCG_TARGET_HAS_sub2_i64 1
92
+#define TCG_TARGET_HAS_mulu2_i64 0
93
+#define TCG_TARGET_HAS_muls2_i64 0
94
+#define TCG_TARGET_HAS_muluh_i64 1
95
+#define TCG_TARGET_HAS_mulsh_i64 1
96
+
97
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
98
+
99
+#define TCG_TARGET_HAS_tst 0
100
+
101
+/* vector instructions */
102
+#define TCG_TARGET_HAS_v64 (cpuinfo & CPUINFO_ZVE64X)
103
+#define TCG_TARGET_HAS_v128 (cpuinfo & CPUINFO_ZVE64X)
104
+#define TCG_TARGET_HAS_v256 (cpuinfo & CPUINFO_ZVE64X)
105
+#define TCG_TARGET_HAS_andc_vec 0
106
+#define TCG_TARGET_HAS_orc_vec 0
107
+#define TCG_TARGET_HAS_nand_vec 0
108
+#define TCG_TARGET_HAS_nor_vec 0
109
+#define TCG_TARGET_HAS_eqv_vec 0
110
+#define TCG_TARGET_HAS_not_vec 1
111
+#define TCG_TARGET_HAS_neg_vec 1
112
+#define TCG_TARGET_HAS_abs_vec 0
113
+#define TCG_TARGET_HAS_roti_vec 1
114
+#define TCG_TARGET_HAS_rots_vec 1
115
+#define TCG_TARGET_HAS_rotv_vec 1
116
+#define TCG_TARGET_HAS_shi_vec 1
117
+#define TCG_TARGET_HAS_shs_vec 1
118
+#define TCG_TARGET_HAS_shv_vec 1
119
+#define TCG_TARGET_HAS_mul_vec 1
120
+#define TCG_TARGET_HAS_sat_vec 1
121
+#define TCG_TARGET_HAS_minmax_vec 1
122
+#define TCG_TARGET_HAS_bitsel_vec 0
123
+#define TCG_TARGET_HAS_cmpsel_vec 1
124
+
125
+#define TCG_TARGET_HAS_tst_vec 0
126
+
127
+#endif
128
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
129
index XXXXXXX..XXXXXXX 100644
130
--- a/tcg/riscv/tcg-target.h
131
+++ b/tcg/riscv/tcg-target.h
132
@@ -XXX,XX +XXX,XX @@
133
#ifndef RISCV_TCG_TARGET_H
134
#define RISCV_TCG_TARGET_H
135
136
-#include "host/cpuinfo.h"
137
-
138
#define TCG_TARGET_INSN_UNIT_SIZE 4
139
#define TCG_TARGET_NB_REGS 64
140
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
141
@@ -XXX,XX +XXX,XX @@ typedef enum {
142
TCG_REG_TMP2 = TCG_REG_T4,
143
} TCGReg;
144
145
-/* optional instructions */
146
-#define TCG_TARGET_HAS_negsetcond_i32 1
147
-#define TCG_TARGET_HAS_div_i32 1
148
-#define TCG_TARGET_HAS_rem_i32 1
149
-#define TCG_TARGET_HAS_div2_i32 0
150
-#define TCG_TARGET_HAS_rot_i32 (cpuinfo & CPUINFO_ZBB)
151
-#define TCG_TARGET_HAS_deposit_i32 0
152
-#define TCG_TARGET_HAS_extract_i32 0
153
-#define TCG_TARGET_HAS_sextract_i32 0
154
-#define TCG_TARGET_HAS_extract2_i32 0
155
-#define TCG_TARGET_HAS_add2_i32 1
156
-#define TCG_TARGET_HAS_sub2_i32 1
157
-#define TCG_TARGET_HAS_mulu2_i32 0
158
-#define TCG_TARGET_HAS_muls2_i32 0
159
-#define TCG_TARGET_HAS_muluh_i32 0
160
-#define TCG_TARGET_HAS_mulsh_i32 0
161
-#define TCG_TARGET_HAS_ext8s_i32 1
162
-#define TCG_TARGET_HAS_ext16s_i32 1
163
-#define TCG_TARGET_HAS_ext8u_i32 1
164
-#define TCG_TARGET_HAS_ext16u_i32 1
165
-#define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB)
166
-#define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB)
167
-#define TCG_TARGET_HAS_not_i32 1
168
-#define TCG_TARGET_HAS_andc_i32 (cpuinfo & CPUINFO_ZBB)
169
-#define TCG_TARGET_HAS_orc_i32 (cpuinfo & CPUINFO_ZBB)
170
-#define TCG_TARGET_HAS_eqv_i32 (cpuinfo & CPUINFO_ZBB)
171
-#define TCG_TARGET_HAS_nand_i32 0
172
-#define TCG_TARGET_HAS_nor_i32 0
173
-#define TCG_TARGET_HAS_clz_i32 (cpuinfo & CPUINFO_ZBB)
174
-#define TCG_TARGET_HAS_ctz_i32 (cpuinfo & CPUINFO_ZBB)
175
-#define TCG_TARGET_HAS_ctpop_i32 (cpuinfo & CPUINFO_ZBB)
176
-#define TCG_TARGET_HAS_brcond2 1
177
-#define TCG_TARGET_HAS_setcond2 1
178
-#define TCG_TARGET_HAS_qemu_st8_i32 0
179
-
180
-#define TCG_TARGET_HAS_negsetcond_i64 1
181
-#define TCG_TARGET_HAS_div_i64 1
182
-#define TCG_TARGET_HAS_rem_i64 1
183
-#define TCG_TARGET_HAS_div2_i64 0
184
-#define TCG_TARGET_HAS_rot_i64 (cpuinfo & CPUINFO_ZBB)
185
-#define TCG_TARGET_HAS_deposit_i64 0
186
-#define TCG_TARGET_HAS_extract_i64 0
187
-#define TCG_TARGET_HAS_sextract_i64 0
188
-#define TCG_TARGET_HAS_extract2_i64 0
189
-#define TCG_TARGET_HAS_extr_i64_i32 1
190
-#define TCG_TARGET_HAS_ext8s_i64 1
191
-#define TCG_TARGET_HAS_ext16s_i64 1
192
-#define TCG_TARGET_HAS_ext32s_i64 1
193
-#define TCG_TARGET_HAS_ext8u_i64 1
194
-#define TCG_TARGET_HAS_ext16u_i64 1
195
-#define TCG_TARGET_HAS_ext32u_i64 1
196
-#define TCG_TARGET_HAS_bswap16_i64 (cpuinfo & CPUINFO_ZBB)
197
-#define TCG_TARGET_HAS_bswap32_i64 (cpuinfo & CPUINFO_ZBB)
198
-#define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB)
199
-#define TCG_TARGET_HAS_not_i64 1
200
-#define TCG_TARGET_HAS_andc_i64 (cpuinfo & CPUINFO_ZBB)
201
-#define TCG_TARGET_HAS_orc_i64 (cpuinfo & CPUINFO_ZBB)
202
-#define TCG_TARGET_HAS_eqv_i64 (cpuinfo & CPUINFO_ZBB)
203
-#define TCG_TARGET_HAS_nand_i64 0
204
-#define TCG_TARGET_HAS_nor_i64 0
205
-#define TCG_TARGET_HAS_clz_i64 (cpuinfo & CPUINFO_ZBB)
206
-#define TCG_TARGET_HAS_ctz_i64 (cpuinfo & CPUINFO_ZBB)
207
-#define TCG_TARGET_HAS_ctpop_i64 (cpuinfo & CPUINFO_ZBB)
208
-#define TCG_TARGET_HAS_add2_i64 1
209
-#define TCG_TARGET_HAS_sub2_i64 1
210
-#define TCG_TARGET_HAS_mulu2_i64 0
211
-#define TCG_TARGET_HAS_muls2_i64 0
212
-#define TCG_TARGET_HAS_muluh_i64 1
213
-#define TCG_TARGET_HAS_mulsh_i64 1
214
-
215
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
216
-
217
-#define TCG_TARGET_HAS_tst 0
218
-
219
-/* vector instructions */
220
-#define TCG_TARGET_HAS_v64 (cpuinfo & CPUINFO_ZVE64X)
221
-#define TCG_TARGET_HAS_v128 (cpuinfo & CPUINFO_ZVE64X)
222
-#define TCG_TARGET_HAS_v256 (cpuinfo & CPUINFO_ZVE64X)
223
-#define TCG_TARGET_HAS_andc_vec 0
224
-#define TCG_TARGET_HAS_orc_vec 0
225
-#define TCG_TARGET_HAS_nand_vec 0
226
-#define TCG_TARGET_HAS_nor_vec 0
227
-#define TCG_TARGET_HAS_eqv_vec 0
228
-#define TCG_TARGET_HAS_not_vec 1
229
-#define TCG_TARGET_HAS_neg_vec 1
230
-#define TCG_TARGET_HAS_abs_vec 0
231
-#define TCG_TARGET_HAS_roti_vec 1
232
-#define TCG_TARGET_HAS_rots_vec 1
233
-#define TCG_TARGET_HAS_rotv_vec 1
234
-#define TCG_TARGET_HAS_shi_vec 1
235
-#define TCG_TARGET_HAS_shs_vec 1
236
-#define TCG_TARGET_HAS_shv_vec 1
237
-#define TCG_TARGET_HAS_mul_vec 1
238
-#define TCG_TARGET_HAS_sat_vec 1
239
-#define TCG_TARGET_HAS_minmax_vec 1
240
-#define TCG_TARGET_HAS_bitsel_vec 0
241
-#define TCG_TARGET_HAS_cmpsel_vec 1
242
-
243
-#define TCG_TARGET_HAS_tst_vec 0
244
+#include "tcg-target-has.h"
245
246
#define TCG_TARGET_DEFAULT_MO (0)
247
248
--
249
2.43.0
250
251
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Message-ID: <20250108215156.8731-11-philmd@linaro.org>
4
---
4
---
5
tcg/optimize.c | 27 ++++++++++++++++-----------
5
tcg/s390x/tcg-target-has.h | 124 +++++++++++++++++++++++++++++++++++++
6
1 file changed, 16 insertions(+), 11 deletions(-)
6
tcg/s390x/tcg-target.h | 114 +---------------------------------
7
2 files changed, 125 insertions(+), 113 deletions(-)
8
create mode 100644 tcg/s390x/tcg-target-has.h
7
9
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h
11
new file mode 100644
12
index XXXXXXX..XXXXXXX
13
--- /dev/null
14
+++ b/tcg/s390x/tcg-target-has.h
15
@@ -XXX,XX +XXX,XX @@
16
+/* SPDX-License-Identifier: MIT */
17
+/*
18
+ * Define target-specific opcode support
19
+ * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
20
+ */
21
+
22
+#ifndef TCG_TARGET_HAS_H
23
+#define TCG_TARGET_HAS_H
24
+
25
+/* Facilities required for proper operation; checked at startup. */
26
+
27
+#define FACILITY_ZARCH_ACTIVE 2
28
+#define FACILITY_LONG_DISP 18
29
+#define FACILITY_EXT_IMM 21
30
+#define FACILITY_GEN_INST_EXT 34
31
+#define FACILITY_45 45
32
+
33
+/* Facilities that are checked at runtime. */
34
+
35
+#define FACILITY_LOAD_ON_COND2 53
36
+#define FACILITY_MISC_INSN_EXT2 58
37
+#define FACILITY_MISC_INSN_EXT3 61
38
+#define FACILITY_VECTOR 129
39
+#define FACILITY_VECTOR_ENH1 135
40
+
41
+extern uint64_t s390_facilities[3];
42
+
43
+#define HAVE_FACILITY(X) \
44
+ ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1)
45
+
46
+/* optional instructions */
47
+#define TCG_TARGET_HAS_div2_i32 1
48
+#define TCG_TARGET_HAS_rot_i32 1
49
+#define TCG_TARGET_HAS_ext8s_i32 1
50
+#define TCG_TARGET_HAS_ext16s_i32 1
51
+#define TCG_TARGET_HAS_ext8u_i32 1
52
+#define TCG_TARGET_HAS_ext16u_i32 1
53
+#define TCG_TARGET_HAS_bswap16_i32 1
54
+#define TCG_TARGET_HAS_bswap32_i32 1
55
+#define TCG_TARGET_HAS_not_i32 HAVE_FACILITY(MISC_INSN_EXT3)
56
+#define TCG_TARGET_HAS_andc_i32 HAVE_FACILITY(MISC_INSN_EXT3)
57
+#define TCG_TARGET_HAS_orc_i32 HAVE_FACILITY(MISC_INSN_EXT3)
58
+#define TCG_TARGET_HAS_eqv_i32 HAVE_FACILITY(MISC_INSN_EXT3)
59
+#define TCG_TARGET_HAS_nand_i32 HAVE_FACILITY(MISC_INSN_EXT3)
60
+#define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3)
61
+#define TCG_TARGET_HAS_clz_i32 0
62
+#define TCG_TARGET_HAS_ctz_i32 0
63
+#define TCG_TARGET_HAS_ctpop_i32 1
64
+#define TCG_TARGET_HAS_deposit_i32 1
65
+#define TCG_TARGET_HAS_extract_i32 1
66
+#define TCG_TARGET_HAS_sextract_i32 0
67
+#define TCG_TARGET_HAS_extract2_i32 0
68
+#define TCG_TARGET_HAS_negsetcond_i32 1
69
+#define TCG_TARGET_HAS_add2_i32 1
70
+#define TCG_TARGET_HAS_sub2_i32 1
71
+#define TCG_TARGET_HAS_mulu2_i32 0
72
+#define TCG_TARGET_HAS_muls2_i32 0
73
+#define TCG_TARGET_HAS_muluh_i32 0
74
+#define TCG_TARGET_HAS_mulsh_i32 0
75
+#define TCG_TARGET_HAS_extr_i64_i32 0
76
+#define TCG_TARGET_HAS_qemu_st8_i32 0
77
+
78
+#define TCG_TARGET_HAS_div2_i64 1
79
+#define TCG_TARGET_HAS_rot_i64 1
80
+#define TCG_TARGET_HAS_ext8s_i64 1
81
+#define TCG_TARGET_HAS_ext16s_i64 1
82
+#define TCG_TARGET_HAS_ext32s_i64 1
83
+#define TCG_TARGET_HAS_ext8u_i64 1
84
+#define TCG_TARGET_HAS_ext16u_i64 1
85
+#define TCG_TARGET_HAS_ext32u_i64 1
86
+#define TCG_TARGET_HAS_bswap16_i64 1
87
+#define TCG_TARGET_HAS_bswap32_i64 1
88
+#define TCG_TARGET_HAS_bswap64_i64 1
89
+#define TCG_TARGET_HAS_not_i64 HAVE_FACILITY(MISC_INSN_EXT3)
90
+#define TCG_TARGET_HAS_andc_i64 HAVE_FACILITY(MISC_INSN_EXT3)
91
+#define TCG_TARGET_HAS_orc_i64 HAVE_FACILITY(MISC_INSN_EXT3)
92
+#define TCG_TARGET_HAS_eqv_i64 HAVE_FACILITY(MISC_INSN_EXT3)
93
+#define TCG_TARGET_HAS_nand_i64 HAVE_FACILITY(MISC_INSN_EXT3)
94
+#define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3)
95
+#define TCG_TARGET_HAS_clz_i64 1
96
+#define TCG_TARGET_HAS_ctz_i64 0
97
+#define TCG_TARGET_HAS_ctpop_i64 1
98
+#define TCG_TARGET_HAS_deposit_i64 1
99
+#define TCG_TARGET_HAS_extract_i64 1
100
+#define TCG_TARGET_HAS_sextract_i64 0
101
+#define TCG_TARGET_HAS_extract2_i64 0
102
+#define TCG_TARGET_HAS_negsetcond_i64 1
103
+#define TCG_TARGET_HAS_add2_i64 1
104
+#define TCG_TARGET_HAS_sub2_i64 1
105
+#define TCG_TARGET_HAS_mulu2_i64 1
106
+#define TCG_TARGET_HAS_muls2_i64 HAVE_FACILITY(MISC_INSN_EXT2)
107
+#define TCG_TARGET_HAS_muluh_i64 0
108
+#define TCG_TARGET_HAS_mulsh_i64 0
109
+
110
+#define TCG_TARGET_HAS_qemu_ldst_i128 1
111
+
112
+#define TCG_TARGET_HAS_tst 1
113
+
114
+#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
115
+#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
116
+#define TCG_TARGET_HAS_v256 0
117
+
118
+#define TCG_TARGET_HAS_andc_vec 1
119
+#define TCG_TARGET_HAS_orc_vec HAVE_FACILITY(VECTOR_ENH1)
120
+#define TCG_TARGET_HAS_nand_vec HAVE_FACILITY(VECTOR_ENH1)
121
+#define TCG_TARGET_HAS_nor_vec 1
122
+#define TCG_TARGET_HAS_eqv_vec HAVE_FACILITY(VECTOR_ENH1)
123
+#define TCG_TARGET_HAS_not_vec 1
124
+#define TCG_TARGET_HAS_neg_vec 1
125
+#define TCG_TARGET_HAS_abs_vec 1
126
+#define TCG_TARGET_HAS_roti_vec 1
127
+#define TCG_TARGET_HAS_rots_vec 1
128
+#define TCG_TARGET_HAS_rotv_vec 1
129
+#define TCG_TARGET_HAS_shi_vec 1
130
+#define TCG_TARGET_HAS_shs_vec 1
131
+#define TCG_TARGET_HAS_shv_vec 1
132
+#define TCG_TARGET_HAS_mul_vec 1
133
+#define TCG_TARGET_HAS_sat_vec 0
134
+#define TCG_TARGET_HAS_minmax_vec 1
135
+#define TCG_TARGET_HAS_bitsel_vec 1
136
+#define TCG_TARGET_HAS_cmpsel_vec 1
137
+#define TCG_TARGET_HAS_tst_vec 0
138
+
139
+#endif
140
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
9
index XXXXXXX..XXXXXXX 100644
141
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
142
--- a/tcg/s390x/tcg-target.h
11
+++ b/tcg/optimize.c
143
+++ b/tcg/s390x/tcg-target.h
12
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
144
@@ -XXX,XX +XXX,XX @@ typedef enum TCGReg {
13
return false;
145
14
}
146
#define TCG_TARGET_NB_REGS 64
15
147
16
+static bool fold_bswap(OptContext *ctx, TCGOp *op)
148
-/* Facilities required for proper operation; checked at startup. */
17
+{
149
-
18
+ if (arg_is_const(op->args[1])) {
150
-#define FACILITY_ZARCH_ACTIVE 2
19
+ uint64_t t = arg_info(op->args[1])->val;
151
-#define FACILITY_LONG_DISP 18
20
+
152
-#define FACILITY_EXT_IMM 21
21
+ t = do_constant_folding(op->opc, t, op->args[2]);
153
-#define FACILITY_GEN_INST_EXT 34
22
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
154
-#define FACILITY_45 45
23
+ }
155
-
24
+ return false;
156
-/* Facilities that are checked at runtime. */
25
+}
157
-
26
+
158
-#define FACILITY_LOAD_ON_COND2 53
27
static bool fold_call(OptContext *ctx, TCGOp *op)
159
-#define FACILITY_MISC_INSN_EXT2 58
28
{
160
-#define FACILITY_MISC_INSN_EXT3 61
29
TCGContext *s = ctx->tcg;
161
-#define FACILITY_VECTOR 129
30
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
162
-#define FACILITY_VECTOR_ENH1 135
31
}
163
-
32
break;
164
-extern uint64_t s390_facilities[3];
33
165
-
34
- CASE_OP_32_64(bswap16):
166
-#define HAVE_FACILITY(X) \
35
- CASE_OP_32_64(bswap32):
167
- ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1)
36
- case INDEX_op_bswap64_i64:
168
-
37
- if (arg_is_const(op->args[1])) {
169
-/* optional instructions */
38
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
170
-#define TCG_TARGET_HAS_div2_i32 1
39
- op->args[2]);
171
-#define TCG_TARGET_HAS_rot_i32 1
40
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
172
-#define TCG_TARGET_HAS_ext8s_i32 1
41
- continue;
173
-#define TCG_TARGET_HAS_ext16s_i32 1
42
- }
174
-#define TCG_TARGET_HAS_ext8u_i32 1
43
- break;
175
-#define TCG_TARGET_HAS_ext16u_i32 1
44
-
176
-#define TCG_TARGET_HAS_bswap16_i32 1
45
default:
177
-#define TCG_TARGET_HAS_bswap32_i32 1
46
break;
178
-#define TCG_TARGET_HAS_not_i32 HAVE_FACILITY(MISC_INSN_EXT3)
47
179
-#define TCG_TARGET_HAS_andc_i32 HAVE_FACILITY(MISC_INSN_EXT3)
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
180
-#define TCG_TARGET_HAS_orc_i32 HAVE_FACILITY(MISC_INSN_EXT3)
49
case INDEX_op_brcond2_i32:
181
-#define TCG_TARGET_HAS_eqv_i32 HAVE_FACILITY(MISC_INSN_EXT3)
50
done = fold_brcond2(&ctx, op);
182
-#define TCG_TARGET_HAS_nand_i32 HAVE_FACILITY(MISC_INSN_EXT3)
51
break;
183
-#define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3)
52
+ CASE_OP_32_64(bswap16):
184
-#define TCG_TARGET_HAS_clz_i32 0
53
+ CASE_OP_32_64(bswap32):
185
-#define TCG_TARGET_HAS_ctz_i32 0
54
+ case INDEX_op_bswap64_i64:
186
-#define TCG_TARGET_HAS_ctpop_i32 1
55
+ done = fold_bswap(&ctx, op);
187
-#define TCG_TARGET_HAS_deposit_i32 1
56
+ break;
188
-#define TCG_TARGET_HAS_extract_i32 1
57
CASE_OP_32_64(clz):
189
-#define TCG_TARGET_HAS_sextract_i32 0
58
CASE_OP_32_64(ctz):
190
-#define TCG_TARGET_HAS_extract2_i32 0
59
done = fold_count_zeros(&ctx, op);
191
-#define TCG_TARGET_HAS_negsetcond_i32 1
192
-#define TCG_TARGET_HAS_add2_i32 1
193
-#define TCG_TARGET_HAS_sub2_i32 1
194
-#define TCG_TARGET_HAS_mulu2_i32 0
195
-#define TCG_TARGET_HAS_muls2_i32 0
196
-#define TCG_TARGET_HAS_muluh_i32 0
197
-#define TCG_TARGET_HAS_mulsh_i32 0
198
-#define TCG_TARGET_HAS_extr_i64_i32 0
199
-#define TCG_TARGET_HAS_qemu_st8_i32 0
200
-
201
-#define TCG_TARGET_HAS_div2_i64 1
202
-#define TCG_TARGET_HAS_rot_i64 1
203
-#define TCG_TARGET_HAS_ext8s_i64 1
204
-#define TCG_TARGET_HAS_ext16s_i64 1
205
-#define TCG_TARGET_HAS_ext32s_i64 1
206
-#define TCG_TARGET_HAS_ext8u_i64 1
207
-#define TCG_TARGET_HAS_ext16u_i64 1
208
-#define TCG_TARGET_HAS_ext32u_i64 1
209
-#define TCG_TARGET_HAS_bswap16_i64 1
210
-#define TCG_TARGET_HAS_bswap32_i64 1
211
-#define TCG_TARGET_HAS_bswap64_i64 1
212
-#define TCG_TARGET_HAS_not_i64 HAVE_FACILITY(MISC_INSN_EXT3)
213
-#define TCG_TARGET_HAS_andc_i64 HAVE_FACILITY(MISC_INSN_EXT3)
214
-#define TCG_TARGET_HAS_orc_i64 HAVE_FACILITY(MISC_INSN_EXT3)
215
-#define TCG_TARGET_HAS_eqv_i64 HAVE_FACILITY(MISC_INSN_EXT3)
216
-#define TCG_TARGET_HAS_nand_i64 HAVE_FACILITY(MISC_INSN_EXT3)
217
-#define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3)
218
-#define TCG_TARGET_HAS_clz_i64 1
219
-#define TCG_TARGET_HAS_ctz_i64 0
220
-#define TCG_TARGET_HAS_ctpop_i64 1
221
-#define TCG_TARGET_HAS_deposit_i64 1
222
-#define TCG_TARGET_HAS_extract_i64 1
223
-#define TCG_TARGET_HAS_sextract_i64 0
224
-#define TCG_TARGET_HAS_extract2_i64 0
225
-#define TCG_TARGET_HAS_negsetcond_i64 1
226
-#define TCG_TARGET_HAS_add2_i64 1
227
-#define TCG_TARGET_HAS_sub2_i64 1
228
-#define TCG_TARGET_HAS_mulu2_i64 1
229
-#define TCG_TARGET_HAS_muls2_i64 HAVE_FACILITY(MISC_INSN_EXT2)
230
-#define TCG_TARGET_HAS_muluh_i64 0
231
-#define TCG_TARGET_HAS_mulsh_i64 0
232
-
233
-#define TCG_TARGET_HAS_qemu_ldst_i128 1
234
-
235
-#define TCG_TARGET_HAS_tst 1
236
-
237
-#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
238
-#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
239
-#define TCG_TARGET_HAS_v256 0
240
-
241
-#define TCG_TARGET_HAS_andc_vec 1
242
-#define TCG_TARGET_HAS_orc_vec HAVE_FACILITY(VECTOR_ENH1)
243
-#define TCG_TARGET_HAS_nand_vec HAVE_FACILITY(VECTOR_ENH1)
244
-#define TCG_TARGET_HAS_nor_vec 1
245
-#define TCG_TARGET_HAS_eqv_vec HAVE_FACILITY(VECTOR_ENH1)
246
-#define TCG_TARGET_HAS_not_vec 1
247
-#define TCG_TARGET_HAS_neg_vec 1
248
-#define TCG_TARGET_HAS_abs_vec 1
249
-#define TCG_TARGET_HAS_roti_vec 1
250
-#define TCG_TARGET_HAS_rots_vec 1
251
-#define TCG_TARGET_HAS_rotv_vec 1
252
-#define TCG_TARGET_HAS_shi_vec 1
253
-#define TCG_TARGET_HAS_shs_vec 1
254
-#define TCG_TARGET_HAS_shv_vec 1
255
-#define TCG_TARGET_HAS_mul_vec 1
256
-#define TCG_TARGET_HAS_sat_vec 0
257
-#define TCG_TARGET_HAS_minmax_vec 1
258
-#define TCG_TARGET_HAS_bitsel_vec 1
259
-#define TCG_TARGET_HAS_cmpsel_vec 1
260
-#define TCG_TARGET_HAS_tst_vec 0
261
+#include "tcg-target-has.h"
262
263
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
264
60
--
265
--
61
2.25.1
266
2.43.0
62
267
63
268
diff view generated by jsdifflib
1
Most of these are handled by creating a fold_const2_commutative
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
to handle all of the binary operators. The rest were already
2
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
handled on a case-by-case basis in the switch, and have their
3
Message-ID: <20250108215156.8731-12-philmd@linaro.org>
4
own fold function in which to place the call.
4
---
5
tcg/sparc64/tcg-target-has.h | 86 ++++++++++++++++++++++++++++++++++++
6
tcg/sparc64/tcg-target.h | 78 +-------------------------------
7
2 files changed, 88 insertions(+), 76 deletions(-)
8
create mode 100644 tcg/sparc64/tcg-target-has.h
5
9
6
We now have only one major switch on TCGOpcode.
10
diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h
7
11
new file mode 100644
8
Introduce NO_DEST and a block comment for swap_commutative in
12
index XXXXXXX..XXXXXXX
9
order to make the handling of brcond and movcond opcodes cleaner.
13
--- /dev/null
10
14
+++ b/tcg/sparc64/tcg-target-has.h
11
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
15
@@ -XXX,XX +XXX,XX @@
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
16
+/* SPDX-License-Identifier: MIT */
13
---
17
+/*
14
tcg/optimize.c | 142 ++++++++++++++++++++++++-------------------------
18
+ * Define target-specific opcode support
15
1 file changed, 70 insertions(+), 72 deletions(-)
19
+ * Copyright (c) 2008 Fabrice Bellard
16
17
diff --git a/tcg/optimize.c b/tcg/optimize.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/tcg/optimize.c
20
+++ b/tcg/optimize.c
21
@@ -XXX,XX +XXX,XX @@ static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
22
return -1;
23
}
24
25
+/**
26
+ * swap_commutative:
27
+ * @dest: TCGArg of the destination argument, or NO_DEST.
28
+ * @p1: first paired argument
29
+ * @p2: second paired argument
30
+ *
31
+ * If *@p1 is a constant and *@p2 is not, swap.
32
+ * If *@p2 matches @dest, swap.
33
+ * Return true if a swap was performed.
34
+ */
20
+ */
35
+
21
+
36
+#define NO_DEST temp_arg(NULL)
22
+#ifndef TCG_TARGET_HAS_H
23
+#define TCG_TARGET_HAS_H
37
+
24
+
38
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
25
+#if defined(__VIS__) && __VIS__ >= 0x300
39
{
26
+#define use_vis3_instructions 1
40
TCGArg a1 = *p1, a2 = *p2;
27
+#else
41
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
28
+extern bool use_vis3_instructions;
42
return false;
29
+#endif
43
}
44
45
+static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
46
+{
47
+ swap_commutative(op->args[0], &op->args[1], &op->args[2]);
48
+ return fold_const2(ctx, op);
49
+}
50
+
30
+
51
static bool fold_masks(OptContext *ctx, TCGOp *op)
31
+/* optional instructions */
52
{
32
+#define TCG_TARGET_HAS_div_i32        1
53
uint64_t a_mask = ctx->a_mask;
33
+#define TCG_TARGET_HAS_rem_i32        0
54
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
34
+#define TCG_TARGET_HAS_rot_i32 0
55
35
+#define TCG_TARGET_HAS_ext8s_i32 0
56
static bool fold_add(OptContext *ctx, TCGOp *op)
36
+#define TCG_TARGET_HAS_ext16s_i32 0
57
{
37
+#define TCG_TARGET_HAS_ext8u_i32 0
58
- if (fold_const2(ctx, op) ||
38
+#define TCG_TARGET_HAS_ext16u_i32 0
59
+ if (fold_const2_commutative(ctx, op) ||
39
+#define TCG_TARGET_HAS_bswap16_i32 0
60
fold_xi_to_x(ctx, op, 0)) {
40
+#define TCG_TARGET_HAS_bswap32_i32 0
61
return true;
41
+#define TCG_TARGET_HAS_not_i32 1
62
}
42
+#define TCG_TARGET_HAS_andc_i32 1
63
@@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
43
+#define TCG_TARGET_HAS_orc_i32 1
64
44
+#define TCG_TARGET_HAS_eqv_i32 0
65
static bool fold_add2(OptContext *ctx, TCGOp *op)
45
+#define TCG_TARGET_HAS_nand_i32 0
66
{
46
+#define TCG_TARGET_HAS_nor_i32 0
67
+ /* Note that the high and low parts may be independently swapped. */
47
+#define TCG_TARGET_HAS_clz_i32 0
68
+ swap_commutative(op->args[0], &op->args[2], &op->args[4]);
48
+#define TCG_TARGET_HAS_ctz_i32 0
69
+ swap_commutative(op->args[1], &op->args[3], &op->args[5]);
49
+#define TCG_TARGET_HAS_ctpop_i32 0
50
+#define TCG_TARGET_HAS_deposit_i32 0
51
+#define TCG_TARGET_HAS_extract_i32 0
52
+#define TCG_TARGET_HAS_sextract_i32 0
53
+#define TCG_TARGET_HAS_extract2_i32 0
54
+#define TCG_TARGET_HAS_negsetcond_i32 1
55
+#define TCG_TARGET_HAS_add2_i32 1
56
+#define TCG_TARGET_HAS_sub2_i32 1
57
+#define TCG_TARGET_HAS_mulu2_i32 1
58
+#define TCG_TARGET_HAS_muls2_i32 1
59
+#define TCG_TARGET_HAS_muluh_i32 0
60
+#define TCG_TARGET_HAS_mulsh_i32 0
61
+#define TCG_TARGET_HAS_qemu_st8_i32 0
70
+
62
+
71
return fold_addsub2(ctx, op, true);
63
+#define TCG_TARGET_HAS_extr_i64_i32 0
72
}
64
+#define TCG_TARGET_HAS_div_i64 1
73
65
+#define TCG_TARGET_HAS_rem_i64 0
74
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
66
+#define TCG_TARGET_HAS_rot_i64 0
75
{
67
+#define TCG_TARGET_HAS_ext8s_i64 0
76
uint64_t z1, z2;
68
+#define TCG_TARGET_HAS_ext16s_i64 0
77
69
+#define TCG_TARGET_HAS_ext32s_i64 1
78
- if (fold_const2(ctx, op) ||
70
+#define TCG_TARGET_HAS_ext8u_i64 0
79
+ if (fold_const2_commutative(ctx, op) ||
71
+#define TCG_TARGET_HAS_ext16u_i64 0
80
fold_xi_to_i(ctx, op, 0) ||
72
+#define TCG_TARGET_HAS_ext32u_i64 1
81
fold_xi_to_x(ctx, op, -1) ||
73
+#define TCG_TARGET_HAS_bswap16_i64 0
82
fold_xx_to_x(ctx, op)) {
74
+#define TCG_TARGET_HAS_bswap32_i64 0
83
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
75
+#define TCG_TARGET_HAS_bswap64_i64 0
84
static bool fold_brcond(OptContext *ctx, TCGOp *op)
76
+#define TCG_TARGET_HAS_not_i64 1
85
{
77
+#define TCG_TARGET_HAS_andc_i64 1
86
TCGCond cond = op->args[2];
78
+#define TCG_TARGET_HAS_orc_i64 1
87
- int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
79
+#define TCG_TARGET_HAS_eqv_i64 0
88
+ int i;
80
+#define TCG_TARGET_HAS_nand_i64 0
89
81
+#define TCG_TARGET_HAS_nor_i64 0
90
+ if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
82
+#define TCG_TARGET_HAS_clz_i64 0
91
+ op->args[2] = cond = tcg_swap_cond(cond);
83
+#define TCG_TARGET_HAS_ctz_i64 0
92
+ }
84
+#define TCG_TARGET_HAS_ctpop_i64 0
85
+#define TCG_TARGET_HAS_deposit_i64 0
86
+#define TCG_TARGET_HAS_extract_i64 0
87
+#define TCG_TARGET_HAS_sextract_i64 0
88
+#define TCG_TARGET_HAS_extract2_i64 0
89
+#define TCG_TARGET_HAS_negsetcond_i64 1
90
+#define TCG_TARGET_HAS_add2_i64 1
91
+#define TCG_TARGET_HAS_sub2_i64 1
92
+#define TCG_TARGET_HAS_mulu2_i64 0
93
+#define TCG_TARGET_HAS_muls2_i64 0
94
+#define TCG_TARGET_HAS_muluh_i64 use_vis3_instructions
95
+#define TCG_TARGET_HAS_mulsh_i64 0
93
+
96
+
94
+ i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
97
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
95
if (i == 0) {
96
tcg_op_remove(ctx->tcg, op);
97
return true;
98
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
99
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
100
{
101
TCGCond cond = op->args[4];
102
- int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
103
TCGArg label = op->args[5];
104
- int inv = 0;
105
+ int i, inv = 0;
106
107
+ if (swap_commutative2(&op->args[0], &op->args[2])) {
108
+ op->args[4] = cond = tcg_swap_cond(cond);
109
+ }
110
+
98
+
111
+ i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
99
+#define TCG_TARGET_HAS_tst 1
112
if (i >= 0) {
113
goto do_brcond_const;
114
}
115
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
116
117
static bool fold_eqv(OptContext *ctx, TCGOp *op)
118
{
119
- if (fold_const2(ctx, op) ||
120
+ if (fold_const2_commutative(ctx, op) ||
121
fold_xi_to_x(ctx, op, -1) ||
122
fold_xi_to_not(ctx, op, 0)) {
123
return true;
124
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
125
static bool fold_movcond(OptContext *ctx, TCGOp *op)
126
{
127
TCGCond cond = op->args[5];
128
- int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
129
+ int i;
130
131
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
132
+ op->args[5] = cond = tcg_swap_cond(cond);
133
+ }
134
+ /*
135
+ * Canonicalize the "false" input reg to match the destination reg so
136
+ * that the tcg backend can implement a "move if true" operation.
137
+ */
138
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
139
+ op->args[5] = cond = tcg_invert_cond(cond);
140
+ }
141
+
100
+
142
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
101
+#endif
143
if (i >= 0) {
102
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
144
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
103
index XXXXXXX..XXXXXXX 100644
145
}
104
--- a/tcg/sparc64/tcg-target.h
146
@@ -XXX,XX +XXX,XX @@ static bool fold_mul(OptContext *ctx, TCGOp *op)
105
+++ b/tcg/sparc64/tcg-target.h
147
106
@@ -XXX,XX +XXX,XX @@ typedef enum {
148
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
107
TCG_REG_I7,
149
{
108
} TCGReg;
150
- if (fold_const2(ctx, op) ||
109
151
+ if (fold_const2_commutative(ctx, op) ||
110
-#if defined(__VIS__) && __VIS__ >= 0x300
152
fold_xi_to_i(ctx, op, 0)) {
111
-#define use_vis3_instructions 1
153
return true;
112
-#else
154
}
113
-extern bool use_vis3_instructions;
155
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
114
-#endif
156
115
-
157
static bool fold_multiply2(OptContext *ctx, TCGOp *op)
116
-/* optional instructions */
158
{
117
-#define TCG_TARGET_HAS_div_i32        1
159
+ swap_commutative(op->args[0], &op->args[2], &op->args[3]);
118
-#define TCG_TARGET_HAS_rem_i32        0
119
-#define TCG_TARGET_HAS_rot_i32 0
120
-#define TCG_TARGET_HAS_ext8s_i32 0
121
-#define TCG_TARGET_HAS_ext16s_i32 0
122
-#define TCG_TARGET_HAS_ext8u_i32 0
123
-#define TCG_TARGET_HAS_ext16u_i32 0
124
-#define TCG_TARGET_HAS_bswap16_i32 0
125
-#define TCG_TARGET_HAS_bswap32_i32 0
126
-#define TCG_TARGET_HAS_not_i32 1
127
-#define TCG_TARGET_HAS_andc_i32 1
128
-#define TCG_TARGET_HAS_orc_i32 1
129
-#define TCG_TARGET_HAS_eqv_i32 0
130
-#define TCG_TARGET_HAS_nand_i32 0
131
-#define TCG_TARGET_HAS_nor_i32 0
132
-#define TCG_TARGET_HAS_clz_i32 0
133
-#define TCG_TARGET_HAS_ctz_i32 0
134
-#define TCG_TARGET_HAS_ctpop_i32 0
135
-#define TCG_TARGET_HAS_deposit_i32 0
136
-#define TCG_TARGET_HAS_extract_i32 0
137
-#define TCG_TARGET_HAS_sextract_i32 0
138
-#define TCG_TARGET_HAS_extract2_i32 0
139
-#define TCG_TARGET_HAS_negsetcond_i32 1
140
-#define TCG_TARGET_HAS_add2_i32 1
141
-#define TCG_TARGET_HAS_sub2_i32 1
142
-#define TCG_TARGET_HAS_mulu2_i32 1
143
-#define TCG_TARGET_HAS_muls2_i32 1
144
-#define TCG_TARGET_HAS_muluh_i32 0
145
-#define TCG_TARGET_HAS_mulsh_i32 0
146
-#define TCG_TARGET_HAS_qemu_st8_i32 0
147
-
148
-#define TCG_TARGET_HAS_extr_i64_i32 0
149
-#define TCG_TARGET_HAS_div_i64 1
150
-#define TCG_TARGET_HAS_rem_i64 0
151
-#define TCG_TARGET_HAS_rot_i64 0
152
-#define TCG_TARGET_HAS_ext8s_i64 0
153
-#define TCG_TARGET_HAS_ext16s_i64 0
154
-#define TCG_TARGET_HAS_ext32s_i64 1
155
-#define TCG_TARGET_HAS_ext8u_i64 0
156
-#define TCG_TARGET_HAS_ext16u_i64 0
157
-#define TCG_TARGET_HAS_ext32u_i64 1
158
-#define TCG_TARGET_HAS_bswap16_i64 0
159
-#define TCG_TARGET_HAS_bswap32_i64 0
160
-#define TCG_TARGET_HAS_bswap64_i64 0
161
-#define TCG_TARGET_HAS_not_i64 1
162
-#define TCG_TARGET_HAS_andc_i64 1
163
-#define TCG_TARGET_HAS_orc_i64 1
164
-#define TCG_TARGET_HAS_eqv_i64 0
165
-#define TCG_TARGET_HAS_nand_i64 0
166
-#define TCG_TARGET_HAS_nor_i64 0
167
-#define TCG_TARGET_HAS_clz_i64 0
168
-#define TCG_TARGET_HAS_ctz_i64 0
169
-#define TCG_TARGET_HAS_ctpop_i64 0
170
-#define TCG_TARGET_HAS_deposit_i64 0
171
-#define TCG_TARGET_HAS_extract_i64 0
172
-#define TCG_TARGET_HAS_sextract_i64 0
173
-#define TCG_TARGET_HAS_extract2_i64 0
174
-#define TCG_TARGET_HAS_negsetcond_i64 1
175
-#define TCG_TARGET_HAS_add2_i64 1
176
-#define TCG_TARGET_HAS_sub2_i64 1
177
-#define TCG_TARGET_HAS_mulu2_i64 0
178
-#define TCG_TARGET_HAS_muls2_i64 0
179
-#define TCG_TARGET_HAS_muluh_i64 use_vis3_instructions
180
-#define TCG_TARGET_HAS_mulsh_i64 0
181
-
182
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
183
-
184
-#define TCG_TARGET_HAS_tst 1
185
-
186
#define TCG_AREG0 TCG_REG_I0
187
188
+#include "tcg-target-has.h"
160
+
189
+
161
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
190
#define TCG_TARGET_DEFAULT_MO (0)
162
uint64_t a = arg_info(op->args[2])->val;
191
163
uint64_t b = arg_info(op->args[3])->val;
192
#endif
164
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
165
166
static bool fold_nand(OptContext *ctx, TCGOp *op)
167
{
168
- if (fold_const2(ctx, op) ||
169
+ if (fold_const2_commutative(ctx, op) ||
170
fold_xi_to_not(ctx, op, -1)) {
171
return true;
172
}
173
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
174
175
static bool fold_nor(OptContext *ctx, TCGOp *op)
176
{
177
- if (fold_const2(ctx, op) ||
178
+ if (fold_const2_commutative(ctx, op) ||
179
fold_xi_to_not(ctx, op, 0)) {
180
return true;
181
}
182
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
183
184
static bool fold_or(OptContext *ctx, TCGOp *op)
185
{
186
- if (fold_const2(ctx, op) ||
187
+ if (fold_const2_commutative(ctx, op) ||
188
fold_xi_to_x(ctx, op, 0) ||
189
fold_xx_to_x(ctx, op)) {
190
return true;
191
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
192
static bool fold_setcond(OptContext *ctx, TCGOp *op)
193
{
194
TCGCond cond = op->args[3];
195
- int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
196
+ int i;
197
198
+ if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
199
+ op->args[3] = cond = tcg_swap_cond(cond);
200
+ }
201
+
202
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
203
if (i >= 0) {
204
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
205
}
206
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
207
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
208
{
209
TCGCond cond = op->args[5];
210
- int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
211
- int inv = 0;
212
+ int i, inv = 0;
213
214
+ if (swap_commutative2(&op->args[1], &op->args[3])) {
215
+ op->args[5] = cond = tcg_swap_cond(cond);
216
+ }
217
+
218
+ i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
219
if (i >= 0) {
220
goto do_setcond_const;
221
}
222
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
223
224
static bool fold_xor(OptContext *ctx, TCGOp *op)
225
{
226
- if (fold_const2(ctx, op) ||
227
+ if (fold_const2_commutative(ctx, op) ||
228
fold_xx_to_i(ctx, op, 0) ||
229
fold_xi_to_x(ctx, op, 0) ||
230
fold_xi_to_not(ctx, op, -1)) {
231
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
232
ctx.type = TCG_TYPE_I32;
233
}
234
235
- /* For commutative operations make constant second argument */
236
- switch (opc) {
237
- CASE_OP_32_64_VEC(add):
238
- CASE_OP_32_64_VEC(mul):
239
- CASE_OP_32_64_VEC(and):
240
- CASE_OP_32_64_VEC(or):
241
- CASE_OP_32_64_VEC(xor):
242
- CASE_OP_32_64(eqv):
243
- CASE_OP_32_64(nand):
244
- CASE_OP_32_64(nor):
245
- CASE_OP_32_64(muluh):
246
- CASE_OP_32_64(mulsh):
247
- swap_commutative(op->args[0], &op->args[1], &op->args[2]);
248
- break;
249
- CASE_OP_32_64(brcond):
250
- if (swap_commutative(-1, &op->args[0], &op->args[1])) {
251
- op->args[2] = tcg_swap_cond(op->args[2]);
252
- }
253
- break;
254
- CASE_OP_32_64(setcond):
255
- if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
256
- op->args[3] = tcg_swap_cond(op->args[3]);
257
- }
258
- break;
259
- CASE_OP_32_64(movcond):
260
- if (swap_commutative(-1, &op->args[1], &op->args[2])) {
261
- op->args[5] = tcg_swap_cond(op->args[5]);
262
- }
263
- /* For movcond, we canonicalize the "false" input reg to match
264
- the destination reg so that the tcg backend can implement
265
- a "move if true" operation. */
266
- if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
267
- op->args[5] = tcg_invert_cond(op->args[5]);
268
- }
269
- break;
270
- CASE_OP_32_64(add2):
271
- swap_commutative(op->args[0], &op->args[2], &op->args[4]);
272
- swap_commutative(op->args[1], &op->args[3], &op->args[5]);
273
- break;
274
- CASE_OP_32_64(mulu2):
275
- CASE_OP_32_64(muls2):
276
- swap_commutative(op->args[0], &op->args[2], &op->args[3]);
277
- break;
278
- case INDEX_op_brcond2_i32:
279
- if (swap_commutative2(&op->args[0], &op->args[2])) {
280
- op->args[4] = tcg_swap_cond(op->args[4]);
281
- }
282
- break;
283
- case INDEX_op_setcond2_i32:
284
- if (swap_commutative2(&op->args[1], &op->args[3])) {
285
- op->args[5] = tcg_swap_cond(op->args[5]);
286
- }
287
- break;
288
- default:
289
- break;
290
- }
291
-
292
/* Assume all bits affected, and no bits known zero. */
293
ctx.a_mask = -1;
294
ctx.z_mask = -1;
295
--
193
--
296
2.25.1
194
2.43.0
297
195
298
196
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Message-ID: <20250108215156.8731-13-philmd@linaro.org>
4
---
5
tcg/tci/tcg-target-has.h | 83 ++++++++++++++++++++++++++++++++++++++++
6
tcg/tci/tcg-target.h | 75 +-----------------------------------
7
2 files changed, 84 insertions(+), 74 deletions(-)
8
create mode 100644 tcg/tci/tcg-target-has.h
2
9
3
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
10
diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-Id: <20211025191154.350831-5-luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tests/unit/test-div128.c | 197 +++++++++++++++++++++++++++++++++++++++
9
tests/unit/meson.build | 1 +
10
2 files changed, 198 insertions(+)
11
create mode 100644 tests/unit/test-div128.c
12
13
diff --git a/tests/unit/test-div128.c b/tests/unit/test-div128.c
14
new file mode 100644
11
new file mode 100644
15
index XXXXXXX..XXXXXXX
12
index XXXXXXX..XXXXXXX
16
--- /dev/null
13
--- /dev/null
17
+++ b/tests/unit/test-div128.c
14
+++ b/tcg/tci/tcg-target-has.h
18
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@
16
+/* SPDX-License-Identifier: MIT */
19
+/*
17
+/*
20
+ * Test 128-bit division functions
18
+ * Define target-specific opcode support
21
+ *
19
+ * Copyright (c) 2009, 2011 Stefan Weil
22
+ * Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
23
+ *
24
+ * This library is free software; you can redistribute it and/or
25
+ * modify it under the terms of the GNU Lesser General Public
26
+ * License as published by the Free Software Foundation; either
27
+ * version 2.1 of the License, or (at your option) any later version.
28
+ *
29
+ * This library is distributed in the hope that it will be useful,
30
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
32
+ * Lesser General Public License for more details.
33
+ *
34
+ * You should have received a copy of the GNU Lesser General Public
35
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
36
+ */
20
+ */
37
+
21
+
38
+#include "qemu/osdep.h"
22
+#ifndef TCG_TARGET_HAS_H
39
+#include "qemu/host-utils.h"
23
+#define TCG_TARGET_HAS_H
40
+
24
+
41
+typedef struct {
25
+#define TCG_TARGET_HAS_bswap16_i32 1
42
+ uint64_t high;
26
+#define TCG_TARGET_HAS_bswap32_i32 1
43
+ uint64_t low;
27
+#define TCG_TARGET_HAS_div_i32 1
44
+ uint64_t rhigh;
28
+#define TCG_TARGET_HAS_rem_i32 1
45
+ uint64_t rlow;
29
+#define TCG_TARGET_HAS_ext8s_i32 1
46
+ uint64_t divisor;
30
+#define TCG_TARGET_HAS_ext16s_i32 1
47
+ uint64_t remainder;
31
+#define TCG_TARGET_HAS_ext8u_i32 1
48
+} test_data_unsigned;
32
+#define TCG_TARGET_HAS_ext16u_i32 1
33
+#define TCG_TARGET_HAS_andc_i32 1
34
+#define TCG_TARGET_HAS_deposit_i32 1
35
+#define TCG_TARGET_HAS_extract_i32 1
36
+#define TCG_TARGET_HAS_sextract_i32 1
37
+#define TCG_TARGET_HAS_extract2_i32 0
38
+#define TCG_TARGET_HAS_eqv_i32 1
39
+#define TCG_TARGET_HAS_nand_i32 1
40
+#define TCG_TARGET_HAS_nor_i32 1
41
+#define TCG_TARGET_HAS_clz_i32 1
42
+#define TCG_TARGET_HAS_ctz_i32 1
43
+#define TCG_TARGET_HAS_ctpop_i32 1
44
+#define TCG_TARGET_HAS_not_i32 1
45
+#define TCG_TARGET_HAS_orc_i32 1
46
+#define TCG_TARGET_HAS_rot_i32 1
47
+#define TCG_TARGET_HAS_negsetcond_i32 0
48
+#define TCG_TARGET_HAS_muls2_i32 1
49
+#define TCG_TARGET_HAS_muluh_i32 0
50
+#define TCG_TARGET_HAS_mulsh_i32 0
51
+#define TCG_TARGET_HAS_qemu_st8_i32 0
49
+
52
+
50
+typedef struct {
53
+#if TCG_TARGET_REG_BITS == 64
51
+ int64_t high;
54
+#define TCG_TARGET_HAS_extr_i64_i32 0
52
+ uint64_t low;
55
+#define TCG_TARGET_HAS_bswap16_i64 1
53
+ int64_t rhigh;
56
+#define TCG_TARGET_HAS_bswap32_i64 1
54
+ uint64_t rlow;
57
+#define TCG_TARGET_HAS_bswap64_i64 1
55
+ int64_t divisor;
58
+#define TCG_TARGET_HAS_deposit_i64 1
56
+ int64_t remainder;
59
+#define TCG_TARGET_HAS_extract_i64 1
57
+} test_data_signed;
60
+#define TCG_TARGET_HAS_sextract_i64 1
61
+#define TCG_TARGET_HAS_extract2_i64 0
62
+#define TCG_TARGET_HAS_div_i64 1
63
+#define TCG_TARGET_HAS_rem_i64 1
64
+#define TCG_TARGET_HAS_ext8s_i64 1
65
+#define TCG_TARGET_HAS_ext16s_i64 1
66
+#define TCG_TARGET_HAS_ext32s_i64 1
67
+#define TCG_TARGET_HAS_ext8u_i64 1
68
+#define TCG_TARGET_HAS_ext16u_i64 1
69
+#define TCG_TARGET_HAS_ext32u_i64 1
70
+#define TCG_TARGET_HAS_andc_i64 1
71
+#define TCG_TARGET_HAS_eqv_i64 1
72
+#define TCG_TARGET_HAS_nand_i64 1
73
+#define TCG_TARGET_HAS_nor_i64 1
74
+#define TCG_TARGET_HAS_clz_i64 1
75
+#define TCG_TARGET_HAS_ctz_i64 1
76
+#define TCG_TARGET_HAS_ctpop_i64 1
77
+#define TCG_TARGET_HAS_not_i64 1
78
+#define TCG_TARGET_HAS_orc_i64 1
79
+#define TCG_TARGET_HAS_rot_i64 1
80
+#define TCG_TARGET_HAS_negsetcond_i64 0
81
+#define TCG_TARGET_HAS_muls2_i64 1
82
+#define TCG_TARGET_HAS_add2_i32 1
83
+#define TCG_TARGET_HAS_sub2_i32 1
84
+#define TCG_TARGET_HAS_mulu2_i32 1
85
+#define TCG_TARGET_HAS_add2_i64 1
86
+#define TCG_TARGET_HAS_sub2_i64 1
87
+#define TCG_TARGET_HAS_mulu2_i64 1
88
+#define TCG_TARGET_HAS_muluh_i64 0
89
+#define TCG_TARGET_HAS_mulsh_i64 0
90
+#else
91
+#define TCG_TARGET_HAS_mulu2_i32 1
92
+#endif /* TCG_TARGET_REG_BITS == 64 */
58
+
93
+
59
+static const test_data_unsigned test_table_unsigned[] = {
94
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
60
+ /* Dividend fits in 64 bits */
61
+ { 0x0000000000000000ULL, 0x0000000000000000ULL,
62
+ 0x0000000000000000ULL, 0x0000000000000000ULL,
63
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
64
+ { 0x0000000000000000ULL, 0x0000000000000001ULL,
65
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
66
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
67
+ { 0x0000000000000000ULL, 0x0000000000000003ULL,
68
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
69
+ 0x0000000000000002ULL, 0x0000000000000001ULL},
70
+ { 0x0000000000000000ULL, 0x8000000000000000ULL,
71
+ 0x0000000000000000ULL, 0x8000000000000000ULL,
72
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
73
+ { 0x0000000000000000ULL, 0xa000000000000000ULL,
74
+ 0x0000000000000000ULL, 0x0000000000000002ULL,
75
+ 0x4000000000000000ULL, 0x2000000000000000ULL},
76
+ { 0x0000000000000000ULL, 0x8000000000000000ULL,
77
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
78
+ 0x8000000000000000ULL, 0x0000000000000000ULL},
79
+
95
+
80
+ /* Dividend > 64 bits, with MSB 0 */
96
+#define TCG_TARGET_HAS_tst 1
81
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
82
+ 0x123456789abcdefeULL, 0xefedcba987654321ULL,
83
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
84
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
85
+ 0x0000000000000001ULL, 0x000000000000000dULL,
86
+ 0x123456789abcdefeULL, 0x03456789abcdf03bULL},
87
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
88
+ 0x0123456789abcdefULL, 0xeefedcba98765432ULL,
89
+ 0x0000000000000010ULL, 0x0000000000000001ULL},
90
+
97
+
91
+ /* Dividend > 64 bits, with MSB 1 */
98
+#endif
92
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
99
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
93
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
94
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
95
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
96
+ 0x0000000000000001ULL, 0x0000000000000000ULL,
97
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
98
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
99
+ 0x0feeddccbbaa9988ULL, 0x7766554433221100ULL,
100
+ 0x0000000000000010ULL, 0x000000000000000fULL},
101
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
102
+ 0x000000000000000eULL, 0x00f0f0f0f0f0f35aULL,
103
+ 0x123456789abcdefeULL, 0x0f8922bc55ef90c3ULL},
104
+
105
+ /**
106
+ * Divisor == 64 bits, with MSB 1
107
+ * and high 64 bits of dividend >= divisor
108
+ * (for testing normalization)
109
+ */
110
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
111
+ 0x0000000000000001ULL, 0x0000000000000000ULL,
112
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
113
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
114
+ 0x0000000000000001ULL, 0xfddbb9977553310aULL,
115
+ 0x8000000000000001ULL, 0x78899aabbccddf05ULL},
116
+
117
+ /* Dividend > 64 bits, divisor almost as big */
118
+ { 0x0000000000000001ULL, 0x23456789abcdef01ULL,
119
+ 0x0000000000000000ULL, 0x000000000000000fULL,
120
+ 0x123456789abcdefeULL, 0x123456789abcde1fULL},
121
+};
122
+
123
+static const test_data_signed test_table_signed[] = {
124
+ /* Positive dividend, positive/negative divisors */
125
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
126
+ 0x0000000000000000LL, 0x0000000000bc614eULL,
127
+ 0x0000000000000001LL, 0x0000000000000000LL},
128
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
129
+ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
130
+ 0xffffffffffffffffLL, 0x0000000000000000LL},
131
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
132
+ 0x0000000000000000LL, 0x00000000005e30a7ULL,
133
+ 0x0000000000000002LL, 0x0000000000000000LL},
134
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
135
+ 0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
136
+ 0xfffffffffffffffeLL, 0x0000000000000000LL},
137
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
138
+ 0x0000000000000000LL, 0x0000000000178c29ULL,
139
+ 0x0000000000000008LL, 0x0000000000000006LL},
140
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
141
+ 0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
142
+ 0xfffffffffffffff8LL, 0x0000000000000006LL},
143
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
144
+ 0x0000000000000000LL, 0x000000000000550dULL,
145
+ 0x0000000000000237LL, 0x0000000000000183LL},
146
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
147
+ 0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
148
+ 0xfffffffffffffdc9LL, 0x0000000000000183LL},
149
+
150
+ /* Negative dividend, positive/negative divisors */
151
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
152
+ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
153
+ 0x0000000000000001LL, 0x0000000000000000LL},
154
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
155
+ 0x0000000000000000LL, 0x0000000000bc614eULL,
156
+ 0xffffffffffffffffLL, 0x0000000000000000LL},
157
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
158
+ 0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
159
+ 0x0000000000000002LL, 0x0000000000000000LL},
160
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
161
+ 0x0000000000000000LL, 0x00000000005e30a7ULL,
162
+ 0xfffffffffffffffeLL, 0x0000000000000000LL},
163
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
164
+ 0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
165
+ 0x0000000000000008LL, 0xfffffffffffffffaLL},
166
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
167
+ 0x0000000000000000LL, 0x0000000000178c29ULL,
168
+ 0xfffffffffffffff8LL, 0xfffffffffffffffaLL},
169
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
170
+ 0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
171
+ 0x0000000000000237LL, 0xfffffffffffffe7dLL},
172
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
173
+ 0x0000000000000000LL, 0x000000000000550dULL,
174
+ 0xfffffffffffffdc9LL, 0xfffffffffffffe7dLL},
175
+};
176
+
177
+static void test_divu128(void)
178
+{
179
+ int i;
180
+ uint64_t rem;
181
+ test_data_unsigned tmp;
182
+
183
+ for (i = 0; i < ARRAY_SIZE(test_table_unsigned); ++i) {
184
+ tmp = test_table_unsigned[i];
185
+
186
+ rem = divu128(&tmp.low, &tmp.high, tmp.divisor);
187
+ g_assert_cmpuint(tmp.low, ==, tmp.rlow);
188
+ g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
189
+ g_assert_cmpuint(rem, ==, tmp.remainder);
190
+ }
191
+}
192
+
193
+static void test_divs128(void)
194
+{
195
+ int i;
196
+ int64_t rem;
197
+ test_data_signed tmp;
198
+
199
+ for (i = 0; i < ARRAY_SIZE(test_table_signed); ++i) {
200
+ tmp = test_table_signed[i];
201
+
202
+ rem = divs128(&tmp.low, &tmp.high, tmp.divisor);
203
+ g_assert_cmpuint(tmp.low, ==, tmp.rlow);
204
+ g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
205
+ g_assert_cmpuint(rem, ==, tmp.remainder);
206
+ }
207
+}
208
+
209
+int main(int argc, char **argv)
210
+{
211
+ g_test_init(&argc, &argv, NULL);
212
+ g_test_add_func("/host-utils/test_divu128", test_divu128);
213
+ g_test_add_func("/host-utils/test_divs128", test_divs128);
214
+ return g_test_run();
215
+}
216
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
217
index XXXXXXX..XXXXXXX 100644
100
index XXXXXXX..XXXXXXX 100644
218
--- a/tests/unit/meson.build
101
--- a/tcg/tci/tcg-target.h
219
+++ b/tests/unit/meson.build
102
+++ b/tcg/tci/tcg-target.h
220
@@ -XXX,XX +XXX,XX @@ tests = {
103
@@ -XXX,XX +XXX,XX @@
221
# all code tested by test-x86-cpuid is inside topology.h
104
#define TCG_TARGET_INSN_UNIT_SIZE 4
222
'test-x86-cpuid': [],
105
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
223
'test-cutils': [],
106
224
+ 'test-div128': [],
107
-/* Optional instructions. */
225
'test-shift128': [],
108
-
226
'test-mul64': [],
109
-#define TCG_TARGET_HAS_bswap16_i32 1
227
# all code tested by test-int128 is inside int128.h
110
-#define TCG_TARGET_HAS_bswap32_i32 1
111
-#define TCG_TARGET_HAS_div_i32 1
112
-#define TCG_TARGET_HAS_rem_i32 1
113
-#define TCG_TARGET_HAS_ext8s_i32 1
114
-#define TCG_TARGET_HAS_ext16s_i32 1
115
-#define TCG_TARGET_HAS_ext8u_i32 1
116
-#define TCG_TARGET_HAS_ext16u_i32 1
117
-#define TCG_TARGET_HAS_andc_i32 1
118
-#define TCG_TARGET_HAS_deposit_i32 1
119
-#define TCG_TARGET_HAS_extract_i32 1
120
-#define TCG_TARGET_HAS_sextract_i32 1
121
-#define TCG_TARGET_HAS_extract2_i32 0
122
-#define TCG_TARGET_HAS_eqv_i32 1
123
-#define TCG_TARGET_HAS_nand_i32 1
124
-#define TCG_TARGET_HAS_nor_i32 1
125
-#define TCG_TARGET_HAS_clz_i32 1
126
-#define TCG_TARGET_HAS_ctz_i32 1
127
-#define TCG_TARGET_HAS_ctpop_i32 1
128
-#define TCG_TARGET_HAS_not_i32 1
129
-#define TCG_TARGET_HAS_orc_i32 1
130
-#define TCG_TARGET_HAS_rot_i32 1
131
-#define TCG_TARGET_HAS_negsetcond_i32 0
132
-#define TCG_TARGET_HAS_muls2_i32 1
133
-#define TCG_TARGET_HAS_muluh_i32 0
134
-#define TCG_TARGET_HAS_mulsh_i32 0
135
-#define TCG_TARGET_HAS_qemu_st8_i32 0
136
-
137
-#if TCG_TARGET_REG_BITS == 64
138
-#define TCG_TARGET_HAS_extr_i64_i32 0
139
-#define TCG_TARGET_HAS_bswap16_i64 1
140
-#define TCG_TARGET_HAS_bswap32_i64 1
141
-#define TCG_TARGET_HAS_bswap64_i64 1
142
-#define TCG_TARGET_HAS_deposit_i64 1
143
-#define TCG_TARGET_HAS_extract_i64 1
144
-#define TCG_TARGET_HAS_sextract_i64 1
145
-#define TCG_TARGET_HAS_extract2_i64 0
146
-#define TCG_TARGET_HAS_div_i64 1
147
-#define TCG_TARGET_HAS_rem_i64 1
148
-#define TCG_TARGET_HAS_ext8s_i64 1
149
-#define TCG_TARGET_HAS_ext16s_i64 1
150
-#define TCG_TARGET_HAS_ext32s_i64 1
151
-#define TCG_TARGET_HAS_ext8u_i64 1
152
-#define TCG_TARGET_HAS_ext16u_i64 1
153
-#define TCG_TARGET_HAS_ext32u_i64 1
154
-#define TCG_TARGET_HAS_andc_i64 1
155
-#define TCG_TARGET_HAS_eqv_i64 1
156
-#define TCG_TARGET_HAS_nand_i64 1
157
-#define TCG_TARGET_HAS_nor_i64 1
158
-#define TCG_TARGET_HAS_clz_i64 1
159
-#define TCG_TARGET_HAS_ctz_i64 1
160
-#define TCG_TARGET_HAS_ctpop_i64 1
161
-#define TCG_TARGET_HAS_not_i64 1
162
-#define TCG_TARGET_HAS_orc_i64 1
163
-#define TCG_TARGET_HAS_rot_i64 1
164
-#define TCG_TARGET_HAS_negsetcond_i64 0
165
-#define TCG_TARGET_HAS_muls2_i64 1
166
-#define TCG_TARGET_HAS_add2_i32 1
167
-#define TCG_TARGET_HAS_sub2_i32 1
168
-#define TCG_TARGET_HAS_mulu2_i32 1
169
-#define TCG_TARGET_HAS_add2_i64 1
170
-#define TCG_TARGET_HAS_sub2_i64 1
171
-#define TCG_TARGET_HAS_mulu2_i64 1
172
-#define TCG_TARGET_HAS_muluh_i64 0
173
-#define TCG_TARGET_HAS_mulsh_i64 0
174
-#else
175
-#define TCG_TARGET_HAS_mulu2_i32 1
176
-#endif /* TCG_TARGET_REG_BITS == 64 */
177
-
178
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
179
-
180
-#define TCG_TARGET_HAS_tst 1
181
+#include "tcg-target-has.h"
182
183
/* Number of registers available. */
184
#define TCG_TARGET_NB_REGS 16
228
--
185
--
229
2.25.1
186
2.43.0
230
187
231
188
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Message-ID: <20250108215156.8731-14-philmd@linaro.org>
4
---
4
---
5
tcg/optimize.c | 32 ++++++++++++++++++--------------
5
tcg/aarch64/tcg-target.h | 2 --
6
1 file changed, 18 insertions(+), 14 deletions(-)
6
tcg/arm/tcg-target.h | 2 --
7
tcg/i386/tcg-target.h | 2 --
8
tcg/loongarch64/tcg-target.h | 2 --
9
tcg/mips/tcg-target.h | 2 --
10
tcg/ppc/tcg-target.h | 2 --
11
tcg/riscv/tcg-target.h | 2 --
12
tcg/s390x/tcg-target.h | 2 --
13
tcg/sparc64/tcg-target.h | 2 --
14
tcg/tcg-has.h | 2 ++
15
tcg/tci/tcg-target.h | 2 --
16
11 files changed, 2 insertions(+), 20 deletions(-)
7
17
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
18
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
9
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
20
--- a/tcg/aarch64/tcg-target.h
11
+++ b/tcg/optimize.c
21
+++ b/tcg/aarch64/tcg-target.h
12
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
22
@@ -XXX,XX +XXX,XX @@ typedef enum {
13
return true;
23
14
}
24
#define TCG_TARGET_NB_REGS 64
15
25
16
+static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
26
-#include "tcg-target-has.h"
17
+{
27
-
18
+ if (arg_is_const(op->args[1])) {
28
#define TCG_TARGET_DEFAULT_MO (0)
19
+ uint64_t t = arg_info(op->args[1])->val;
29
30
#endif /* AARCH64_TCG_TARGET_H */
31
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/tcg/arm/tcg-target.h
34
+++ b/tcg/arm/tcg-target.h
35
@@ -XXX,XX +XXX,XX @@ typedef enum {
36
37
#define TCG_TARGET_NB_REGS 32
38
39
-#include "tcg-target-has.h"
40
-
41
#define TCG_TARGET_DEFAULT_MO (0)
42
43
#endif
44
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
45
index XXXXXXX..XXXXXXX 100644
46
--- a/tcg/i386/tcg-target.h
47
+++ b/tcg/i386/tcg-target.h
48
@@ -XXX,XX +XXX,XX @@ typedef enum {
49
TCG_REG_CALL_STACK = TCG_REG_ESP
50
} TCGReg;
51
52
-#include "tcg-target-has.h"
53
-
54
/* This defines the natural memory order supported by this
55
* architecture before guarantees made by various barrier
56
* instructions.
57
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
58
index XXXXXXX..XXXXXXX 100644
59
--- a/tcg/loongarch64/tcg-target.h
60
+++ b/tcg/loongarch64/tcg-target.h
61
@@ -XXX,XX +XXX,XX @@ typedef enum {
62
TCG_VEC_TMP0 = TCG_REG_V23,
63
} TCGReg;
64
65
-#include "tcg-target-has.h"
66
-
67
#define TCG_TARGET_DEFAULT_MO (0)
68
69
#endif /* LOONGARCH_TCG_TARGET_H */
70
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
71
index XXXXXXX..XXXXXXX 100644
72
--- a/tcg/mips/tcg-target.h
73
+++ b/tcg/mips/tcg-target.h
74
@@ -XXX,XX +XXX,XX @@ typedef enum {
75
TCG_AREG0 = TCG_REG_S8,
76
} TCGReg;
77
78
-#include "tcg-target-has.h"
79
-
80
#define TCG_TARGET_DEFAULT_MO 0
81
82
#endif
83
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
84
index XXXXXXX..XXXXXXX 100644
85
--- a/tcg/ppc/tcg-target.h
86
+++ b/tcg/ppc/tcg-target.h
87
@@ -XXX,XX +XXX,XX @@ typedef enum {
88
TCG_AREG0 = TCG_REG_R27
89
} TCGReg;
90
91
-#include "tcg-target-has.h"
92
-
93
#define TCG_TARGET_DEFAULT_MO (0)
94
95
#endif
96
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
97
index XXXXXXX..XXXXXXX 100644
98
--- a/tcg/riscv/tcg-target.h
99
+++ b/tcg/riscv/tcg-target.h
100
@@ -XXX,XX +XXX,XX @@ typedef enum {
101
TCG_REG_TMP2 = TCG_REG_T4,
102
} TCGReg;
103
104
-#include "tcg-target-has.h"
105
-
106
#define TCG_TARGET_DEFAULT_MO (0)
107
108
#endif
109
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
110
index XXXXXXX..XXXXXXX 100644
111
--- a/tcg/s390x/tcg-target.h
112
+++ b/tcg/s390x/tcg-target.h
113
@@ -XXX,XX +XXX,XX @@ typedef enum TCGReg {
114
115
#define TCG_TARGET_NB_REGS 64
116
117
-#include "tcg-target-has.h"
118
-
119
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
120
121
#endif
122
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
123
index XXXXXXX..XXXXXXX 100644
124
--- a/tcg/sparc64/tcg-target.h
125
+++ b/tcg/sparc64/tcg-target.h
126
@@ -XXX,XX +XXX,XX @@ typedef enum {
127
128
#define TCG_AREG0 TCG_REG_I0
129
130
-#include "tcg-target-has.h"
131
-
132
#define TCG_TARGET_DEFAULT_MO (0)
133
134
#endif
135
diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
136
index XXXXXXX..XXXXXXX 100644
137
--- a/tcg/tcg-has.h
138
+++ b/tcg/tcg-has.h
139
@@ -XXX,XX +XXX,XX @@
140
#ifndef TCG_HAS_H
141
#define TCG_HAS_H
142
143
+#include "tcg-target-has.h"
20
+
144
+
21
+ if (t != 0) {
145
#if TCG_TARGET_REG_BITS == 32
22
+ t = do_constant_folding(op->opc, t, 0);
146
/* Turn some undef macros into false macros. */
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
147
#define TCG_TARGET_HAS_extr_i64_i32 0
24
+ }
148
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
25
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
149
index XXXXXXX..XXXXXXX 100644
26
+ }
150
--- a/tcg/tci/tcg-target.h
27
+ return false;
151
+++ b/tcg/tci/tcg-target.h
28
+}
152
@@ -XXX,XX +XXX,XX @@
29
+
153
#define TCG_TARGET_INSN_UNIT_SIZE 4
30
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
154
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
31
{
155
32
return fold_const1(ctx, op);
156
-#include "tcg-target-has.h"
33
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
34
}
35
break;
36
37
- CASE_OP_32_64(clz):
38
- CASE_OP_32_64(ctz):
39
- if (arg_is_const(op->args[1])) {
40
- TCGArg v = arg_info(op->args[1])->val;
41
- if (v != 0) {
42
- tmp = do_constant_folding(opc, v, 0);
43
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
44
- } else {
45
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
46
- }
47
- continue;
48
- }
49
- break;
50
-
157
-
51
default:
158
/* Number of registers available. */
52
break;
159
#define TCG_TARGET_NB_REGS 16
53
160
54
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
55
case INDEX_op_brcond2_i32:
56
done = fold_brcond2(&ctx, op);
57
break;
58
+ CASE_OP_32_64(clz):
59
+ CASE_OP_32_64(ctz):
60
+ done = fold_count_zeros(&ctx, op);
61
+ break;
62
CASE_OP_32_64(ctpop):
63
done = fold_ctpop(&ctx, op);
64
break;
65
--
161
--
66
2.25.1
162
2.43.0
67
163
68
164
diff view generated by jsdifflib
1
Rename to fold_addsub2.
1
TCG_TARGET_HAS_* definitions don't need to be exposed
2
Use Int128 to implement the wider operation.
2
by "tcg/tcg.h". Only include 'tcg-has.h' when necessary.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Message-ID: <20250108215156.8731-15-philmd@linaro.org>
8
---
7
---
9
tcg/optimize.c | 65 ++++++++++++++++++++++++++++++++++----------------
8
include/tcg/tcg.h | 2 --
10
1 file changed, 44 insertions(+), 21 deletions(-)
9
tcg/optimize.c | 1 +
10
tcg/tcg-common.c | 1 +
11
tcg/tcg-op-gvec.c | 1 +
12
tcg/tcg-op-ldst.c | 2 +-
13
tcg/tcg-op-vec.c | 1 +
14
tcg/tcg-op.c | 2 +-
15
tcg/tcg.c | 1 +
16
tcg/tci.c | 1 +
17
9 files changed, 8 insertions(+), 4 deletions(-)
11
18
19
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/include/tcg/tcg.h
22
+++ b/include/tcg/tcg.h
23
@@ -XXX,XX +XXX,XX @@ typedef uint64_t TCGRegSet;
24
#error unsupported
25
#endif
26
27
-#include "tcg/tcg-has.h"
28
-
29
typedef enum TCGOpcode {
30
#define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name,
31
#include "tcg/tcg-opc.h"
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
32
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
33
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
34
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
35
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@
36
@@ -XXX,XX +XXX,XX @@
17
*/
37
#include "qemu/interval-tree.h"
38
#include "tcg/tcg-op-common.h"
39
#include "tcg-internal.h"
40
+#include "tcg-has.h"
41
42
#define CASE_OP_32_64(x) \
43
glue(glue(case INDEX_op_, x), _i32): \
44
diff --git a/tcg/tcg-common.c b/tcg/tcg-common.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/tcg/tcg-common.c
47
+++ b/tcg/tcg-common.c
48
@@ -XXX,XX +XXX,XX @@
18
49
19
#include "qemu/osdep.h"
50
#include "qemu/osdep.h"
20
+#include "qemu/int128.h"
51
#include "tcg/tcg.h"
21
#include "tcg/tcg-op.h"
52
+#include "tcg-has.h"
53
54
TCGOpDef tcg_op_defs[] = {
55
#define DEF(s, oargs, iargs, cargs, flags) \
56
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/tcg/tcg-op-gvec.c
59
+++ b/tcg/tcg-op-gvec.c
60
@@ -XXX,XX +XXX,XX @@
61
#include "tcg/tcg-op-common.h"
62
#include "tcg/tcg-op-gvec-common.h"
63
#include "tcg/tcg-gvec-desc.h"
64
+#include "tcg-has.h"
65
66
#define MAX_UNROLL 4
67
68
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/tcg/tcg-op-ldst.c
71
+++ b/tcg/tcg-op-ldst.c
72
@@ -XXX,XX +XXX,XX @@
73
#include "exec/translation-block.h"
74
#include "exec/plugin-gen.h"
22
#include "tcg-internal.h"
75
#include "tcg-internal.h"
23
76
-
24
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
77
+#include "tcg-has.h"
25
return false;
78
26
}
79
static void check_max_alignment(unsigned a_bits)
27
28
-static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
29
+static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
30
{
80
{
31
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
81
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
32
arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
82
index XXXXXXX..XXXXXXX 100644
33
- uint32_t al = arg_info(op->args[2])->val;
83
--- a/tcg/tcg-op-vec.c
34
- uint32_t ah = arg_info(op->args[3])->val;
84
+++ b/tcg/tcg-op-vec.c
35
- uint32_t bl = arg_info(op->args[4])->val;
85
@@ -XXX,XX +XXX,XX @@
36
- uint32_t bh = arg_info(op->args[5])->val;
86
#include "tcg/tcg-op-common.h"
37
- uint64_t a = ((uint64_t)ah << 32) | al;
87
#include "tcg/tcg-mo.h"
38
- uint64_t b = ((uint64_t)bh << 32) | bl;
88
#include "tcg-internal.h"
39
+ uint64_t al = arg_info(op->args[2])->val;
89
+#include "tcg-has.h"
40
+ uint64_t ah = arg_info(op->args[3])->val;
90
41
+ uint64_t bl = arg_info(op->args[4])->val;
91
/*
42
+ uint64_t bh = arg_info(op->args[5])->val;
92
* Vector optional opcode tracking.
43
TCGArg rl, rh;
93
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
44
- TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
94
index XXXXXXX..XXXXXXX 100644
45
+ TCGOp *op2;
95
--- a/tcg/tcg-op.c
46
96
+++ b/tcg/tcg-op.c
47
- if (add) {
97
@@ -XXX,XX +XXX,XX @@
48
- a += b;
98
#include "exec/translation-block.h"
49
+ if (ctx->type == TCG_TYPE_I32) {
99
#include "exec/plugin-gen.h"
50
+ uint64_t a = deposit64(al, 32, 32, ah);
100
#include "tcg-internal.h"
51
+ uint64_t b = deposit64(bl, 32, 32, bh);
101
-
52
+
102
+#include "tcg-has.h"
53
+ if (add) {
103
54
+ a += b;
104
/*
55
+ } else {
105
* Encourage the compiler to tail-call to a function, rather than inlining.
56
+ a -= b;
106
diff --git a/tcg/tcg.c b/tcg/tcg.c
57
+ }
107
index XXXXXXX..XXXXXXX 100644
58
+
108
--- a/tcg/tcg.c
59
+ al = sextract64(a, 0, 32);
109
+++ b/tcg/tcg.c
60
+ ah = sextract64(a, 32, 32);
110
@@ -XXX,XX +XXX,XX @@
61
} else {
111
#include "tcg/tcg-temp-internal.h"
62
- a -= b;
112
#include "tcg-internal.h"
63
+ Int128 a = int128_make128(al, ah);
113
#include "tcg/perf.h"
64
+ Int128 b = int128_make128(bl, bh);
114
+#include "tcg-has.h"
65
+
115
#ifdef CONFIG_USER_ONLY
66
+ if (add) {
116
#include "user/guest-base.h"
67
+ a = int128_add(a, b);
117
#endif
68
+ } else {
118
diff --git a/tcg/tci.c b/tcg/tci.c
69
+ a = int128_sub(a, b);
119
index XXXXXXX..XXXXXXX 100644
70
+ }
120
--- a/tcg/tci.c
71
+
121
+++ b/tcg/tci.c
72
+ al = int128_getlo(a);
122
@@ -XXX,XX +XXX,XX @@
73
+ ah = int128_gethi(a);
123
#include "tcg/helper-info.h"
74
}
124
#include "tcg/tcg-ldst.h"
75
125
#include "disas/dis-asm.h"
76
rl = op->args[0];
126
+#include "tcg-has.h"
77
rh = op->args[1];
127
#include <ffi.h>
78
- tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
128
79
- tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
129
80
+
81
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
82
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0);
83
+
84
+ tcg_opt_gen_movi(ctx, op, rl, al);
85
+ tcg_opt_gen_movi(ctx, op2, rh, ah);
86
return true;
87
}
88
return false;
89
}
90
91
-static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
92
+static bool fold_add2(OptContext *ctx, TCGOp *op)
93
{
94
- return fold_addsub2_i32(ctx, op, true);
95
+ return fold_addsub2(ctx, op, true);
96
}
97
98
static bool fold_and(OptContext *ctx, TCGOp *op)
99
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
100
return false;
101
}
102
103
-static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
104
+static bool fold_sub2(OptContext *ctx, TCGOp *op)
105
{
106
- return fold_addsub2_i32(ctx, op, false);
107
+ return fold_addsub2(ctx, op, false);
108
}
109
110
static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
111
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
112
CASE_OP_32_64_VEC(add):
113
done = fold_add(&ctx, op);
114
break;
115
- case INDEX_op_add2_i32:
116
- done = fold_add2_i32(&ctx, op);
117
+ CASE_OP_32_64(add2):
118
+ done = fold_add2(&ctx, op);
119
break;
120
CASE_OP_32_64_VEC(and):
121
done = fold_and(&ctx, op);
122
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
123
CASE_OP_32_64_VEC(sub):
124
done = fold_sub(&ctx, op);
125
break;
126
- case INDEX_op_sub2_i32:
127
- done = fold_sub2_i32(&ctx, op);
128
+ CASE_OP_32_64(sub2):
129
+ done = fold_sub2(&ctx, op);
130
break;
131
CASE_OP_32_64_VEC(xor):
132
done = fold_xor(&ctx, op);
133
--
130
--
134
2.25.1
131
2.43.0
135
132
136
133
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
3
---
5
tcg/optimize.c | 25 +++++++++++++++----------
4
accel/tcg/internal-target.h | 1 +
6
1 file changed, 15 insertions(+), 10 deletions(-)
5
tcg/aarch64/tcg-target-mo.h | 12 ++++++++++++
6
tcg/aarch64/tcg-target.h | 2 --
7
tcg/arm/tcg-target-mo.h | 13 +++++++++++++
8
tcg/arm/tcg-target.h | 2 --
9
tcg/i386/tcg-target-mo.h | 19 +++++++++++++++++++
10
tcg/i386/tcg-target.h | 11 -----------
11
tcg/loongarch64/tcg-target-mo.h | 12 ++++++++++++
12
tcg/loongarch64/tcg-target.h | 2 --
13
tcg/mips/tcg-target-mo.h | 13 +++++++++++++
14
tcg/mips/tcg-target.h | 2 --
15
tcg/ppc/tcg-target-mo.h | 12 ++++++++++++
16
tcg/ppc/tcg-target.h | 2 --
17
tcg/riscv/tcg-target-mo.h | 12 ++++++++++++
18
tcg/riscv/tcg-target.h | 2 --
19
tcg/s390x/tcg-target-mo.h | 12 ++++++++++++
20
tcg/s390x/tcg-target.h | 2 --
21
tcg/sparc64/tcg-target-mo.h | 12 ++++++++++++
22
tcg/sparc64/tcg-target.h | 2 --
23
tcg/tci/tcg-target-mo.h | 17 +++++++++++++++++
24
tcg/tci/tcg-target.h | 5 -----
25
tcg/tcg-op-ldst.c | 1 +
26
22 files changed, 136 insertions(+), 32 deletions(-)
27
create mode 100644 tcg/aarch64/tcg-target-mo.h
28
create mode 100644 tcg/arm/tcg-target-mo.h
29
create mode 100644 tcg/i386/tcg-target-mo.h
30
create mode 100644 tcg/loongarch64/tcg-target-mo.h
31
create mode 100644 tcg/mips/tcg-target-mo.h
32
create mode 100644 tcg/ppc/tcg-target-mo.h
33
create mode 100644 tcg/riscv/tcg-target-mo.h
34
create mode 100644 tcg/s390x/tcg-target-mo.h
35
create mode 100644 tcg/sparc64/tcg-target-mo.h
36
create mode 100644 tcg/tci/tcg-target-mo.h
7
37
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
38
diff --git a/accel/tcg/internal-target.h b/accel/tcg/internal-target.h
9
index XXXXXXX..XXXXXXX 100644
39
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
40
--- a/accel/tcg/internal-target.h
11
+++ b/tcg/optimize.c
41
+++ b/accel/tcg/internal-target.h
12
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
42
@@ -XXX,XX +XXX,XX @@
13
return fold_const1(ctx, op);
43
#include "exec/exec-all.h"
14
}
44
#include "exec/translation-block.h"
15
45
#include "tb-internal.h"
16
+static bool fold_deposit(OptContext *ctx, TCGOp *op)
46
+#include "tcg-target-mo.h"
17
+{
47
18
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
48
/*
19
+ uint64_t t1 = arg_info(op->args[1])->val;
49
* Access to the various translations structures need to be serialised
20
+ uint64_t t2 = arg_info(op->args[2])->val;
50
diff --git a/tcg/aarch64/tcg-target-mo.h b/tcg/aarch64/tcg-target-mo.h
21
+
51
new file mode 100644
22
+ t1 = deposit64(t1, op->args[3], op->args[4], t2);
52
index XXXXXXX..XXXXXXX
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
53
--- /dev/null
24
+ }
54
+++ b/tcg/aarch64/tcg-target-mo.h
25
+ return false;
55
@@ -XXX,XX +XXX,XX @@
26
+}
56
+/* SPDX-License-Identifier: GPL-2.0-or-later */
27
+
57
+/*
28
static bool fold_divide(OptContext *ctx, TCGOp *op)
58
+ * Define target-specific memory model
59
+ * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
60
+ */
61
+
62
+#ifndef TCG_TARGET_MO_H
63
+#define TCG_TARGET_MO_H
64
+
65
+#define TCG_TARGET_DEFAULT_MO 0
66
+
67
+#endif
68
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
69
index XXXXXXX..XXXXXXX 100644
70
--- a/tcg/aarch64/tcg-target.h
71
+++ b/tcg/aarch64/tcg-target.h
72
@@ -XXX,XX +XXX,XX @@ typedef enum {
73
74
#define TCG_TARGET_NB_REGS 64
75
76
-#define TCG_TARGET_DEFAULT_MO (0)
77
-
78
#endif /* AARCH64_TCG_TARGET_H */
79
diff --git a/tcg/arm/tcg-target-mo.h b/tcg/arm/tcg-target-mo.h
80
new file mode 100644
81
index XXXXXXX..XXXXXXX
82
--- /dev/null
83
+++ b/tcg/arm/tcg-target-mo.h
84
@@ -XXX,XX +XXX,XX @@
85
+/* SPDX-License-Identifier: MIT */
86
+/*
87
+ * Define target-specific memory model
88
+ * Copyright (c) 2008 Fabrice Bellard
89
+ * Copyright (c) 2008 Andrzej Zaborowski
90
+ */
91
+
92
+#ifndef TCG_TARGET_MO_H
93
+#define TCG_TARGET_MO_H
94
+
95
+#define TCG_TARGET_DEFAULT_MO 0
96
+
97
+#endif
98
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
99
index XXXXXXX..XXXXXXX 100644
100
--- a/tcg/arm/tcg-target.h
101
+++ b/tcg/arm/tcg-target.h
102
@@ -XXX,XX +XXX,XX @@ typedef enum {
103
104
#define TCG_TARGET_NB_REGS 32
105
106
-#define TCG_TARGET_DEFAULT_MO (0)
107
-
108
#endif
109
diff --git a/tcg/i386/tcg-target-mo.h b/tcg/i386/tcg-target-mo.h
110
new file mode 100644
111
index XXXXXXX..XXXXXXX
112
--- /dev/null
113
+++ b/tcg/i386/tcg-target-mo.h
114
@@ -XXX,XX +XXX,XX @@
115
+/* SPDX-License-Identifier: MIT */
116
+/*
117
+ * Define target-specific memory model
118
+ * Copyright (c) 2008 Fabrice Bellard
119
+ */
120
+
121
+#ifndef TCG_TARGET_MO_H
122
+#define TCG_TARGET_MO_H
123
+
124
+/*
125
+ * This defines the natural memory order supported by this architecture
126
+ * before guarantees made by various barrier instructions.
127
+ *
128
+ * The x86 has a pretty strong memory ordering which only really
129
+ * allows for some stores to be re-ordered after loads.
130
+ */
131
+#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
132
+
133
+#endif
134
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
135
index XXXXXXX..XXXXXXX 100644
136
--- a/tcg/i386/tcg-target.h
137
+++ b/tcg/i386/tcg-target.h
138
@@ -XXX,XX +XXX,XX @@ typedef enum {
139
TCG_REG_CALL_STACK = TCG_REG_ESP
140
} TCGReg;
141
142
-/* This defines the natural memory order supported by this
143
- * architecture before guarantees made by various barrier
144
- * instructions.
145
- *
146
- * The x86 has a pretty strong memory ordering which only really
147
- * allows for some stores to be re-ordered after loads.
148
- */
149
-#include "tcg/tcg-mo.h"
150
-
151
-#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
152
-
153
#endif
154
diff --git a/tcg/loongarch64/tcg-target-mo.h b/tcg/loongarch64/tcg-target-mo.h
155
new file mode 100644
156
index XXXXXXX..XXXXXXX
157
--- /dev/null
158
+++ b/tcg/loongarch64/tcg-target-mo.h
159
@@ -XXX,XX +XXX,XX @@
160
+/* SPDX-License-Identifier: MIT */
161
+/*
162
+ * Define target-specific memory model
163
+ * Copyright (c) 2021 WANG Xuerui <git@xen0n.name>
164
+ */
165
+
166
+#ifndef TCG_TARGET_MO_H
167
+#define TCG_TARGET_MO_H
168
+
169
+#define TCG_TARGET_DEFAULT_MO 0
170
+
171
+#endif
172
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
173
index XXXXXXX..XXXXXXX 100644
174
--- a/tcg/loongarch64/tcg-target.h
175
+++ b/tcg/loongarch64/tcg-target.h
176
@@ -XXX,XX +XXX,XX @@ typedef enum {
177
TCG_VEC_TMP0 = TCG_REG_V23,
178
} TCGReg;
179
180
-#define TCG_TARGET_DEFAULT_MO (0)
181
-
182
#endif /* LOONGARCH_TCG_TARGET_H */
183
diff --git a/tcg/mips/tcg-target-mo.h b/tcg/mips/tcg-target-mo.h
184
new file mode 100644
185
index XXXXXXX..XXXXXXX
186
--- /dev/null
187
+++ b/tcg/mips/tcg-target-mo.h
188
@@ -XXX,XX +XXX,XX @@
189
+/* SPDX-License-Identifier: MIT */
190
+/*
191
+ * Define target-specific memory model
192
+ * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
193
+ * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
194
+ */
195
+
196
+#ifndef TCG_TARGET_MO_H
197
+#define TCG_TARGET_MO_H
198
+
199
+#define TCG_TARGET_DEFAULT_MO 0
200
+
201
+#endif
202
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
203
index XXXXXXX..XXXXXXX 100644
204
--- a/tcg/mips/tcg-target.h
205
+++ b/tcg/mips/tcg-target.h
206
@@ -XXX,XX +XXX,XX @@ typedef enum {
207
TCG_AREG0 = TCG_REG_S8,
208
} TCGReg;
209
210
-#define TCG_TARGET_DEFAULT_MO 0
211
-
212
#endif
213
diff --git a/tcg/ppc/tcg-target-mo.h b/tcg/ppc/tcg-target-mo.h
214
new file mode 100644
215
index XXXXXXX..XXXXXXX
216
--- /dev/null
217
+++ b/tcg/ppc/tcg-target-mo.h
218
@@ -XXX,XX +XXX,XX @@
219
+/* SPDX-License-Identifier: MIT */
220
+/*
221
+ * Define target-specific memory model
222
+ * Copyright (c) 2008 Fabrice Bellard
223
+ */
224
+
225
+#ifndef TCG_TARGET_MO_H
226
+#define TCG_TARGET_MO_H
227
+
228
+#define TCG_TARGET_DEFAULT_MO 0
229
+
230
+#endif
231
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
232
index XXXXXXX..XXXXXXX 100644
233
--- a/tcg/ppc/tcg-target.h
234
+++ b/tcg/ppc/tcg-target.h
235
@@ -XXX,XX +XXX,XX @@ typedef enum {
236
TCG_AREG0 = TCG_REG_R27
237
} TCGReg;
238
239
-#define TCG_TARGET_DEFAULT_MO (0)
240
-
241
#endif
242
diff --git a/tcg/riscv/tcg-target-mo.h b/tcg/riscv/tcg-target-mo.h
243
new file mode 100644
244
index XXXXXXX..XXXXXXX
245
--- /dev/null
246
+++ b/tcg/riscv/tcg-target-mo.h
247
@@ -XXX,XX +XXX,XX @@
248
+/* SPDX-License-Identifier: MIT */
249
+/*
250
+ * Define target-specific memory model
251
+ * Copyright (c) 2018 SiFive, Inc
252
+ */
253
+
254
+#ifndef TCG_TARGET_MO_H
255
+#define TCG_TARGET_MO_H
256
+
257
+#define TCG_TARGET_DEFAULT_MO 0
258
+
259
+#endif
260
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
261
index XXXXXXX..XXXXXXX 100644
262
--- a/tcg/riscv/tcg-target.h
263
+++ b/tcg/riscv/tcg-target.h
264
@@ -XXX,XX +XXX,XX @@ typedef enum {
265
TCG_REG_TMP2 = TCG_REG_T4,
266
} TCGReg;
267
268
-#define TCG_TARGET_DEFAULT_MO (0)
269
-
270
#endif
271
diff --git a/tcg/s390x/tcg-target-mo.h b/tcg/s390x/tcg-target-mo.h
272
new file mode 100644
273
index XXXXXXX..XXXXXXX
274
--- /dev/null
275
+++ b/tcg/s390x/tcg-target-mo.h
276
@@ -XXX,XX +XXX,XX @@
277
+/* SPDX-License-Identifier: MIT */
278
+/*
279
+ * Define target-specific memory model
280
+ * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
281
+ */
282
+
283
+#ifndef TCG_TARGET_MO_H
284
+#define TCG_TARGET_MO_H
285
+
286
+#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
287
+
288
+#endif
289
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
290
index XXXXXXX..XXXXXXX 100644
291
--- a/tcg/s390x/tcg-target.h
292
+++ b/tcg/s390x/tcg-target.h
293
@@ -XXX,XX +XXX,XX @@ typedef enum TCGReg {
294
295
#define TCG_TARGET_NB_REGS 64
296
297
-#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
298
-
299
#endif
300
diff --git a/tcg/sparc64/tcg-target-mo.h b/tcg/sparc64/tcg-target-mo.h
301
new file mode 100644
302
index XXXXXXX..XXXXXXX
303
--- /dev/null
304
+++ b/tcg/sparc64/tcg-target-mo.h
305
@@ -XXX,XX +XXX,XX @@
306
+/* SPDX-License-Identifier: MIT */
307
+/*
308
+ * Define target-specific memory model
309
+ * Copyright (c) 2008 Fabrice Bellard
310
+ */
311
+
312
+#ifndef TCG_TARGET_MO_H
313
+#define TCG_TARGET_MO_H
314
+
315
+#define TCG_TARGET_DEFAULT_MO 0
316
+
317
+#endif
318
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
319
index XXXXXXX..XXXXXXX 100644
320
--- a/tcg/sparc64/tcg-target.h
321
+++ b/tcg/sparc64/tcg-target.h
322
@@ -XXX,XX +XXX,XX @@ typedef enum {
323
324
#define TCG_AREG0 TCG_REG_I0
325
326
-#define TCG_TARGET_DEFAULT_MO (0)
327
-
328
#endif
329
diff --git a/tcg/tci/tcg-target-mo.h b/tcg/tci/tcg-target-mo.h
330
new file mode 100644
331
index XXXXXXX..XXXXXXX
332
--- /dev/null
333
+++ b/tcg/tci/tcg-target-mo.h
334
@@ -XXX,XX +XXX,XX @@
335
+/* SPDX-License-Identifier: MIT */
336
+/*
337
+ * Define target-specific memory model
338
+ * Copyright (c) 2009, 2011 Stefan Weil
339
+ */
340
+
341
+#ifndef TCG_TARGET_MO_H
342
+#define TCG_TARGET_MO_H
343
+
344
+/*
345
+ * We could notice __i386__ or __s390x__ and reduce the barriers depending
346
+ * on the host. But if you want performance, you use the normal backend.
347
+ * We prefer consistency across hosts on this.
348
+ */
349
+#define TCG_TARGET_DEFAULT_MO 0
350
+
351
+#endif
352
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
353
index XXXXXXX..XXXXXXX 100644
354
--- a/tcg/tci/tcg-target.h
355
+++ b/tcg/tci/tcg-target.h
356
@@ -XXX,XX +XXX,XX @@ typedef enum {
357
#define HAVE_TCG_QEMU_TB_EXEC
358
#define TCG_TARGET_NEED_POOL_LABELS
359
360
-/* We could notice __i386__ or __s390x__ and reduce the barriers depending
361
- on the host. But if you want performance, you use the normal backend.
362
- We prefer consistency across hosts on this. */
363
-#define TCG_TARGET_DEFAULT_MO (0)
364
-
365
#endif /* TCG_TARGET_H */
366
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
367
index XXXXXXX..XXXXXXX 100644
368
--- a/tcg/tcg-op-ldst.c
369
+++ b/tcg/tcg-op-ldst.c
370
@@ -XXX,XX +XXX,XX @@
371
#include "exec/plugin-gen.h"
372
#include "tcg-internal.h"
373
#include "tcg-has.h"
374
+#include "tcg-target-mo.h"
375
376
static void check_max_alignment(unsigned a_bits)
29
{
377
{
30
return fold_const2(ctx, op);
31
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
32
}
33
break;
34
35
- CASE_OP_32_64(deposit):
36
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
37
- tmp = deposit64(arg_info(op->args[1])->val,
38
- op->args[3], op->args[4],
39
- arg_info(op->args[2])->val);
40
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
41
- continue;
42
- }
43
- break;
44
-
45
default:
46
break;
47
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
CASE_OP_32_64(ctpop):
50
done = fold_ctpop(&ctx, op);
51
break;
52
+ CASE_OP_32_64(deposit):
53
+ done = fold_deposit(&ctx, op);
54
+ break;
55
CASE_OP_32_64(div):
56
CASE_OP_32_64(divu):
57
done = fold_divide(&ctx, op);
58
--
378
--
59
2.25.1
379
2.43.0
60
380
61
381
diff view generated by jsdifflib
1
Sign repetitions are perforce all identical, whether they are 1 or 0.
1
Return C_NotImplemented instead of asserting for opcodes
2
Bitwise operations preserve the relative quantity of the repetitions.
2
not implemented by the backend. For now, the assertion
3
moves to process_op_defs.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/optimize.c | 29 +++++++++++++++++++++++++++++
8
tcg/tcg.c | 10 ++++++----
10
1 file changed, 29 insertions(+)
9
tcg/aarch64/tcg-target.c.inc | 2 +-
10
tcg/arm/tcg-target.c.inc | 2 +-
11
tcg/i386/tcg-target.c.inc | 2 +-
12
tcg/loongarch64/tcg-target.c.inc | 2 +-
13
tcg/mips/tcg-target.c.inc | 2 +-
14
tcg/ppc/tcg-target.c.inc | 2 +-
15
tcg/riscv/tcg-target.c.inc | 2 +-
16
tcg/s390x/tcg-target.c.inc | 2 +-
17
tcg/sparc64/tcg-target.c.inc | 2 +-
18
tcg/tci/tcg-target.c.inc | 2 +-
19
11 files changed, 16 insertions(+), 14 deletions(-)
11
20
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
21
diff --git a/tcg/tcg.c b/tcg/tcg.c
13
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
23
--- a/tcg/tcg.c
15
+++ b/tcg/optimize.c
24
+++ b/tcg/tcg.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
25
@@ -XXX,XX +XXX,XX @@ static int tcg_out_pool_finalize(TCGContext *s)
17
z2 = arg_info(op->args[2])->z_mask;
26
#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
18
ctx->z_mask = z1 & z2;
27
19
28
typedef enum {
20
+ /*
29
+ C_NotImplemented = -1,
21
+ * Sign repetitions are perforce all identical, whether they are 1 or 0.
30
#include "tcg-target-con-set.h"
22
+ * Bitwise operations preserve the relative quantity of the repetitions.
31
} TCGConstraintSetIndex;
23
+ */
32
24
+ ctx->s_mask = arg_info(op->args[1])->s_mask
33
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
25
+ & arg_info(op->args[2])->s_mask;
34
const TCGTargetOpDef *tdefs;
26
+
35
bool saw_alias_pair = false;
27
/*
36
int i, o, i2, o2, nb_args;
28
* Known-zeros does not imply known-ones. Therefore unless
37
+ TCGConstraintSetIndex con_set;
29
* arg2 is constant, we can't infer affected bits from it.
38
30
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
39
if (def->flags & TCG_OPF_NOT_PRESENT) {
40
continue;
41
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
42
43
/*
44
* Macro magic should make it impossible, but double-check that
45
- * the array index is in range. Since the signness of an enum
46
- * is implementation defined, force the result to unsigned.
47
+ * the array index is in range. At the same time, double-check
48
+ * that the opcode is implemented, i.e. not C_NotImplemented.
49
*/
50
- unsigned con_set = tcg_target_op_def(op);
51
- tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
52
+ con_set = tcg_target_op_def(op);
53
+ tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
54
tdefs = &constraint_sets[con_set];
55
56
for (i = 0; i < nb_args; i++) {
57
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
58
index XXXXXXX..XXXXXXX 100644
59
--- a/tcg/aarch64/tcg-target.c.inc
60
+++ b/tcg/aarch64/tcg-target.c.inc
61
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
62
return C_O1_I2(w, 0, w);
63
64
default:
65
- g_assert_not_reached();
66
+ return C_NotImplemented;
31
}
67
}
32
ctx->z_mask = z1;
33
34
+ ctx->s_mask = arg_info(op->args[1])->s_mask
35
+ & arg_info(op->args[2])->s_mask;
36
return fold_masks(ctx, op);
37
}
68
}
38
69
39
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
70
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
40
fold_xi_to_not(ctx, op, 0)) {
71
index XXXXXXX..XXXXXXX 100644
41
return true;
72
--- a/tcg/arm/tcg-target.c.inc
73
+++ b/tcg/arm/tcg-target.c.inc
74
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
75
case INDEX_op_bitsel_vec:
76
return C_O1_I3(w, w, w, w);
77
default:
78
- g_assert_not_reached();
79
+ return C_NotImplemented;
42
}
80
}
43
+
44
+ ctx->s_mask = arg_info(op->args[1])->s_mask
45
+ & arg_info(op->args[2])->s_mask;
46
return false;
47
}
81
}
48
82
49
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
83
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
50
84
index XXXXXXX..XXXXXXX 100644
51
ctx->z_mask = arg_info(op->args[3])->z_mask
85
--- a/tcg/i386/tcg-target.c.inc
52
| arg_info(op->args[4])->z_mask;
86
+++ b/tcg/i386/tcg-target.c.inc
53
+ ctx->s_mask = arg_info(op->args[3])->s_mask
87
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
54
+ & arg_info(op->args[4])->s_mask;
88
return C_O1_I4(x, x, x, xO, x);
55
89
56
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
90
default:
57
uint64_t tv = arg_info(op->args[3])->val;
91
- g_assert_not_reached();
58
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
92
+ return C_NotImplemented;
59
fold_xi_to_not(ctx, op, -1)) {
60
return true;
61
}
93
}
62
+
63
+ ctx->s_mask = arg_info(op->args[1])->s_mask
64
+ & arg_info(op->args[2])->s_mask;
65
return false;
66
}
94
}
67
95
68
@@ -XXX,XX +XXX,XX @@ static bool fold_nor(OptContext *ctx, TCGOp *op)
96
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
69
fold_xi_to_not(ctx, op, 0)) {
97
index XXXXXXX..XXXXXXX 100644
70
return true;
98
--- a/tcg/loongarch64/tcg-target.c.inc
99
+++ b/tcg/loongarch64/tcg-target.c.inc
100
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
101
return C_O1_I3(w, w, w, w);
102
103
default:
104
- g_assert_not_reached();
105
+ return C_NotImplemented;
71
}
106
}
72
+
73
+ ctx->s_mask = arg_info(op->args[1])->s_mask
74
+ & arg_info(op->args[2])->s_mask;
75
return false;
76
}
107
}
77
108
78
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
109
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
79
return true;
110
index XXXXXXX..XXXXXXX 100644
111
--- a/tcg/mips/tcg-target.c.inc
112
+++ b/tcg/mips/tcg-target.c.inc
113
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
114
: C_O0_I4(rZ, rZ, r, r));
115
116
default:
117
- g_assert_not_reached();
118
+ return C_NotImplemented;
80
}
119
}
81
82
+ ctx->s_mask = arg_info(op->args[1])->s_mask;
83
+
84
/* Because of fold_to_not, we want to always return true, via finish. */
85
finish_folding(ctx, op);
86
return true;
87
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
88
89
ctx->z_mask = arg_info(op->args[1])->z_mask
90
| arg_info(op->args[2])->z_mask;
91
+ ctx->s_mask = arg_info(op->args[1])->s_mask
92
+ & arg_info(op->args[2])->s_mask;
93
return fold_masks(ctx, op);
94
}
120
}
95
121
96
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
122
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
97
fold_ix_to_not(ctx, op, 0)) {
123
index XXXXXXX..XXXXXXX 100644
98
return true;
124
--- a/tcg/ppc/tcg-target.c.inc
125
+++ b/tcg/ppc/tcg-target.c.inc
126
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
127
return C_O1_I4(v, v, v, vZM, v);
128
129
default:
130
- g_assert_not_reached();
131
+ return C_NotImplemented;
99
}
132
}
100
+
101
+ ctx->s_mask = arg_info(op->args[1])->s_mask
102
+ & arg_info(op->args[2])->s_mask;
103
return false;
104
}
133
}
105
134
106
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
135
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
107
136
index XXXXXXX..XXXXXXX 100644
108
ctx->z_mask = arg_info(op->args[1])->z_mask
137
--- a/tcg/riscv/tcg-target.c.inc
109
| arg_info(op->args[2])->z_mask;
138
+++ b/tcg/riscv/tcg-target.c.inc
110
+ ctx->s_mask = arg_info(op->args[1])->s_mask
139
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
111
+ & arg_info(op->args[2])->s_mask;
140
case INDEX_op_cmpsel_vec:
112
return fold_masks(ctx, op);
141
return C_O1_I4(v, v, vL, vK, vK);
142
default:
143
- g_assert_not_reached();
144
+ return C_NotImplemented;
145
}
113
}
146
}
114
147
148
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
149
index XXXXXXX..XXXXXXX 100644
150
--- a/tcg/s390x/tcg-target.c.inc
151
+++ b/tcg/s390x/tcg-target.c.inc
152
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
153
: C_O1_I4(v, v, v, vZ, v));
154
155
default:
156
- g_assert_not_reached();
157
+ return C_NotImplemented;
158
}
159
}
160
161
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
162
index XXXXXXX..XXXXXXX 100644
163
--- a/tcg/sparc64/tcg-target.c.inc
164
+++ b/tcg/sparc64/tcg-target.c.inc
165
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
166
return C_O1_I2(r, r, r);
167
168
default:
169
- g_assert_not_reached();
170
+ return C_NotImplemented;
171
}
172
}
173
174
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
175
index XXXXXXX..XXXXXXX 100644
176
--- a/tcg/tci/tcg-target.c.inc
177
+++ b/tcg/tci/tcg-target.c.inc
178
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
179
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
180
181
default:
182
- g_assert_not_reached();
183
+ return C_NotImplemented;
184
}
185
}
186
115
--
187
--
116
2.25.1
188
2.43.0
117
189
118
190
diff view generated by jsdifflib
1
Compute the type of the operation early.
1
Test each vector type, not just lumping them all together.
2
Add tests for I32 (always true) and I64 (64-bit hosts).
2
3
3
There are at least 4 places that used a def->flags ladder
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
to determine the type of the operation being optimized.
5
6
There were two places that assumed !TCG_OPF_64BIT means
7
TCG_TYPE_I32, and so could potentially compute incorrect
8
results for vector operations.
9
10
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
6
---
13
tcg/optimize.c | 149 +++++++++++++++++++++++++++++--------------------
7
tcg/tcg.c | 66 ++++++++++++++++++++++++++++++++++++-------------------
14
1 file changed, 89 insertions(+), 60 deletions(-)
8
1 file changed, 43 insertions(+), 23 deletions(-)
15
9
16
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
17
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
18
--- a/tcg/optimize.c
12
--- a/tcg/tcg.c
19
+++ b/tcg/optimize.c
13
+++ b/tcg/tcg.c
20
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
14
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcgv_i32_temp(TCGv_i32 v)
21
15
*/
22
/* In flight values from optimization. */
16
bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
23
uint64_t z_mask;
24
+ TCGType type;
25
} OptContext;
26
27
static inline TempOptInfo *ts_info(TCGTemp *ts)
28
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
29
{
17
{
30
TCGTemp *dst_ts = arg_temp(dst);
18
- const bool have_vec
31
TCGTemp *src_ts = arg_temp(src);
19
- = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
32
- const TCGOpDef *def;
20
+ bool has_type;
33
TempOptInfo *di;
34
TempOptInfo *si;
35
uint64_t z_mask;
36
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
37
reset_ts(dst_ts);
38
di = ts_info(dst_ts);
39
si = ts_info(src_ts);
40
- def = &tcg_op_defs[op->opc];
41
- if (def->flags & TCG_OPF_VECTOR) {
42
- new_op = INDEX_op_mov_vec;
43
- } else if (def->flags & TCG_OPF_64BIT) {
44
- new_op = INDEX_op_mov_i64;
45
- } else {
46
+
21
+
47
+ switch (ctx->type) {
22
+ switch (type) {
48
+ case TCG_TYPE_I32:
23
+ case TCG_TYPE_I32:
49
new_op = INDEX_op_mov_i32;
24
+ has_type = true;
50
+ break;
25
+ break;
51
+ case TCG_TYPE_I64:
26
+ case TCG_TYPE_I64:
52
+ new_op = INDEX_op_mov_i64;
27
+ has_type = TCG_TARGET_REG_BITS == 64;
53
+ break;
28
+ break;
54
+ case TCG_TYPE_V64:
29
+ case TCG_TYPE_V64:
30
+ has_type = TCG_TARGET_HAS_v64;
31
+ break;
55
+ case TCG_TYPE_V128:
32
+ case TCG_TYPE_V128:
33
+ has_type = TCG_TARGET_HAS_v128;
34
+ break;
56
+ case TCG_TYPE_V256:
35
+ case TCG_TYPE_V256:
57
+ /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
36
+ has_type = TCG_TARGET_HAS_v256;
58
+ new_op = INDEX_op_mov_vec;
59
+ break;
37
+ break;
60
+ default:
38
+ default:
61
+ g_assert_not_reached();
39
+ has_type = false;
62
}
40
+ break;
63
op->opc = new_op;
41
+ }
64
- /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
42
65
op->args[0] = dst;
43
switch (op) {
66
op->args[1] = src;
44
case INDEX_op_discard:
67
45
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
68
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
46
case INDEX_op_or_vec:
69
static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
47
case INDEX_op_xor_vec:
70
TCGArg dst, uint64_t val)
48
case INDEX_op_cmp_vec:
71
{
49
- return have_vec;
72
- const TCGOpDef *def = &tcg_op_defs[op->opc];
50
+ return has_type;
73
- TCGType type;
51
case INDEX_op_dup2_vec:
74
- TCGTemp *tv;
52
- return have_vec && TCG_TARGET_REG_BITS == 32;
75
-
53
+ return has_type && TCG_TARGET_REG_BITS == 32;
76
- if (def->flags & TCG_OPF_VECTOR) {
54
case INDEX_op_not_vec:
77
- type = TCGOP_VECL(op) + TCG_TYPE_V64;
55
- return have_vec && TCG_TARGET_HAS_not_vec;
78
- } else if (def->flags & TCG_OPF_64BIT) {
56
+ return has_type && TCG_TARGET_HAS_not_vec;
79
- type = TCG_TYPE_I64;
57
case INDEX_op_neg_vec:
80
- } else {
58
- return have_vec && TCG_TARGET_HAS_neg_vec;
81
- type = TCG_TYPE_I32;
59
+ return has_type && TCG_TARGET_HAS_neg_vec;
82
- }
60
case INDEX_op_abs_vec:
83
-
61
- return have_vec && TCG_TARGET_HAS_abs_vec;
84
/* Convert movi to mov with constant temp. */
62
+ return has_type && TCG_TARGET_HAS_abs_vec;
85
- tv = tcg_constant_internal(type, val);
63
case INDEX_op_andc_vec:
86
+ TCGTemp *tv = tcg_constant_internal(ctx->type, val);
64
- return have_vec && TCG_TARGET_HAS_andc_vec;
87
+
65
+ return has_type && TCG_TARGET_HAS_andc_vec;
88
init_ts_info(ctx, tv);
66
case INDEX_op_orc_vec:
89
return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
67
- return have_vec && TCG_TARGET_HAS_orc_vec;
90
}
68
+ return has_type && TCG_TARGET_HAS_orc_vec;
91
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
69
case INDEX_op_nand_vec:
92
}
70
- return have_vec && TCG_TARGET_HAS_nand_vec;
93
}
71
+ return has_type && TCG_TARGET_HAS_nand_vec;
94
72
case INDEX_op_nor_vec:
95
-static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y)
73
- return have_vec && TCG_TARGET_HAS_nor_vec;
96
+static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
74
+ return has_type && TCG_TARGET_HAS_nor_vec;
97
+ uint64_t x, uint64_t y)
75
case INDEX_op_eqv_vec:
98
{
76
- return have_vec && TCG_TARGET_HAS_eqv_vec;
99
- const TCGOpDef *def = &tcg_op_defs[op];
77
+ return has_type && TCG_TARGET_HAS_eqv_vec;
100
uint64_t res = do_constant_folding_2(op, x, y);
78
case INDEX_op_mul_vec:
101
- if (!(def->flags & TCG_OPF_64BIT)) {
79
- return have_vec && TCG_TARGET_HAS_mul_vec;
102
+ if (type == TCG_TYPE_I32) {
80
+ return has_type && TCG_TARGET_HAS_mul_vec;
103
res = (int32_t)res;
81
case INDEX_op_shli_vec:
104
}
82
case INDEX_op_shri_vec:
105
return res;
83
case INDEX_op_sari_vec:
106
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
84
- return have_vec && TCG_TARGET_HAS_shi_vec;
107
* Return -1 if the condition can't be simplified,
85
+ return has_type && TCG_TARGET_HAS_shi_vec;
108
* and the result of the condition (0 or 1) if it can.
86
case INDEX_op_shls_vec:
109
*/
87
case INDEX_op_shrs_vec:
110
-static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
88
case INDEX_op_sars_vec:
111
+static int do_constant_folding_cond(TCGType type, TCGArg x,
89
- return have_vec && TCG_TARGET_HAS_shs_vec;
112
TCGArg y, TCGCond c)
90
+ return has_type && TCG_TARGET_HAS_shs_vec;
113
{
91
case INDEX_op_shlv_vec:
114
uint64_t xv = arg_info(x)->val;
92
case INDEX_op_shrv_vec:
115
uint64_t yv = arg_info(y)->val;
93
case INDEX_op_sarv_vec:
116
94
- return have_vec && TCG_TARGET_HAS_shv_vec;
117
if (arg_is_const(x) && arg_is_const(y)) {
95
+ return has_type && TCG_TARGET_HAS_shv_vec;
118
- const TCGOpDef *def = &tcg_op_defs[op];
96
case INDEX_op_rotli_vec:
119
- tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
97
- return have_vec && TCG_TARGET_HAS_roti_vec;
120
- if (def->flags & TCG_OPF_64BIT) {
98
+ return has_type && TCG_TARGET_HAS_roti_vec;
121
- return do_constant_folding_cond_64(xv, yv, c);
99
case INDEX_op_rotls_vec:
122
- } else {
100
- return have_vec && TCG_TARGET_HAS_rots_vec;
123
+ switch (type) {
101
+ return has_type && TCG_TARGET_HAS_rots_vec;
124
+ case TCG_TYPE_I32:
102
case INDEX_op_rotlv_vec:
125
return do_constant_folding_cond_32(xv, yv, c);
103
case INDEX_op_rotrv_vec:
126
+ case TCG_TYPE_I64:
104
- return have_vec && TCG_TARGET_HAS_rotv_vec;
127
+ return do_constant_folding_cond_64(xv, yv, c);
105
+ return has_type && TCG_TARGET_HAS_rotv_vec;
128
+ default:
106
case INDEX_op_ssadd_vec:
129
+ /* Only scalar comparisons are optimizable */
107
case INDEX_op_usadd_vec:
130
+ return -1;
108
case INDEX_op_sssub_vec:
131
}
109
case INDEX_op_ussub_vec:
132
} else if (args_are_copies(x, y)) {
110
- return have_vec && TCG_TARGET_HAS_sat_vec;
133
return do_constant_folding_cond_eq(c);
111
+ return has_type && TCG_TARGET_HAS_sat_vec;
134
@@ -XXX,XX +XXX,XX @@ static bool fold_const1(OptContext *ctx, TCGOp *op)
112
case INDEX_op_smin_vec:
135
uint64_t t;
113
case INDEX_op_umin_vec:
136
114
case INDEX_op_smax_vec:
137
t = arg_info(op->args[1])->val;
115
case INDEX_op_umax_vec:
138
- t = do_constant_folding(op->opc, t, 0);
116
- return have_vec && TCG_TARGET_HAS_minmax_vec;
139
+ t = do_constant_folding(op->opc, ctx->type, t, 0);
117
+ return has_type && TCG_TARGET_HAS_minmax_vec;
140
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
118
case INDEX_op_bitsel_vec:
141
}
119
- return have_vec && TCG_TARGET_HAS_bitsel_vec;
142
return false;
120
+ return has_type && TCG_TARGET_HAS_bitsel_vec;
143
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
121
case INDEX_op_cmpsel_vec:
144
uint64_t t1 = arg_info(op->args[1])->val;
122
- return have_vec && TCG_TARGET_HAS_cmpsel_vec;
145
uint64_t t2 = arg_info(op->args[2])->val;
123
+ return has_type && TCG_TARGET_HAS_cmpsel_vec;
146
124
147
- t1 = do_constant_folding(op->opc, t1, t2);
125
default:
148
+ t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
126
tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
149
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
150
}
151
return false;
152
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
153
static bool fold_brcond(OptContext *ctx, TCGOp *op)
154
{
155
TCGCond cond = op->args[2];
156
- int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
157
+ int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
158
159
if (i == 0) {
160
tcg_op_remove(ctx->tcg, op);
161
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
162
* Simplify EQ/NE comparisons where one of the pairs
163
* can be simplified.
164
*/
165
- i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
166
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
167
op->args[2], cond);
168
switch (i ^ inv) {
169
case 0:
170
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
171
goto do_brcond_high;
172
}
173
174
- i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
175
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
176
op->args[3], cond);
177
switch (i ^ inv) {
178
case 0:
179
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
180
if (arg_is_const(op->args[1])) {
181
uint64_t t = arg_info(op->args[1])->val;
182
183
- t = do_constant_folding(op->opc, t, op->args[2]);
184
+ t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
185
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
186
}
187
return false;
188
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
189
uint64_t t = arg_info(op->args[1])->val;
190
191
if (t != 0) {
192
- t = do_constant_folding(op->opc, t, 0);
193
+ t = do_constant_folding(op->opc, ctx->type, t, 0);
194
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
195
}
196
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
197
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
198
199
static bool fold_movcond(OptContext *ctx, TCGOp *op)
200
{
201
- TCGOpcode opc = op->opc;
202
TCGCond cond = op->args[5];
203
- int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
204
+ int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
205
206
if (i >= 0) {
207
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
208
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
209
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
210
uint64_t tv = arg_info(op->args[3])->val;
211
uint64_t fv = arg_info(op->args[4])->val;
212
+ TCGOpcode opc;
213
214
- opc = (opc == INDEX_op_movcond_i32
215
- ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
216
+ switch (ctx->type) {
217
+ case TCG_TYPE_I32:
218
+ opc = INDEX_op_setcond_i32;
219
+ break;
220
+ case TCG_TYPE_I64:
221
+ opc = INDEX_op_setcond_i64;
222
+ break;
223
+ default:
224
+ g_assert_not_reached();
225
+ }
226
227
if (tv == 1 && fv == 0) {
228
op->opc = opc;
229
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
230
static bool fold_setcond(OptContext *ctx, TCGOp *op)
231
{
232
TCGCond cond = op->args[3];
233
- int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
234
+ int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
235
236
if (i >= 0) {
237
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
238
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
239
* Simplify EQ/NE comparisons where one of the pairs
240
* can be simplified.
241
*/
242
- i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
243
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
244
op->args[3], cond);
245
switch (i ^ inv) {
246
case 0:
247
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
248
goto do_setcond_high;
249
}
250
251
- i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
252
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
253
op->args[4], cond);
254
switch (i ^ inv) {
255
case 0:
256
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
257
init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
258
copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
259
260
+ /* Pre-compute the type of the operation. */
261
+ if (def->flags & TCG_OPF_VECTOR) {
262
+ ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
263
+ } else if (def->flags & TCG_OPF_64BIT) {
264
+ ctx.type = TCG_TYPE_I64;
265
+ } else {
266
+ ctx.type = TCG_TYPE_I32;
267
+ }
268
+
269
/* For commutative operations make constant second argument */
270
switch (opc) {
271
CASE_OP_32_64_VEC(add):
272
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
273
/* Proceed with possible constant folding. */
274
break;
275
}
276
- if (opc == INDEX_op_sub_i32) {
277
+ switch (ctx.type) {
278
+ case TCG_TYPE_I32:
279
neg_op = INDEX_op_neg_i32;
280
have_neg = TCG_TARGET_HAS_neg_i32;
281
- } else if (opc == INDEX_op_sub_i64) {
282
+ break;
283
+ case TCG_TYPE_I64:
284
neg_op = INDEX_op_neg_i64;
285
have_neg = TCG_TARGET_HAS_neg_i64;
286
- } else if (TCG_TARGET_HAS_neg_vec) {
287
- TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
288
- unsigned vece = TCGOP_VECE(op);
289
- neg_op = INDEX_op_neg_vec;
290
- have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
291
- } else {
292
break;
293
+ case TCG_TYPE_V64:
294
+ case TCG_TYPE_V128:
295
+ case TCG_TYPE_V256:
296
+ neg_op = INDEX_op_neg_vec;
297
+ have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
298
+ TCGOP_VECE(op)) > 0;
299
+ break;
300
+ default:
301
+ g_assert_not_reached();
302
}
303
if (!have_neg) {
304
break;
305
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
306
TCGOpcode not_op;
307
bool have_not;
308
309
- if (def->flags & TCG_OPF_VECTOR) {
310
- not_op = INDEX_op_not_vec;
311
- have_not = TCG_TARGET_HAS_not_vec;
312
- } else if (def->flags & TCG_OPF_64BIT) {
313
- not_op = INDEX_op_not_i64;
314
- have_not = TCG_TARGET_HAS_not_i64;
315
- } else {
316
+ switch (ctx.type) {
317
+ case TCG_TYPE_I32:
318
not_op = INDEX_op_not_i32;
319
have_not = TCG_TARGET_HAS_not_i32;
320
+ break;
321
+ case TCG_TYPE_I64:
322
+ not_op = INDEX_op_not_i64;
323
+ have_not = TCG_TARGET_HAS_not_i64;
324
+ break;
325
+ case TCG_TYPE_V64:
326
+ case TCG_TYPE_V128:
327
+ case TCG_TYPE_V256:
328
+ not_op = INDEX_op_not_vec;
329
+ have_not = TCG_TARGET_HAS_not_vec;
330
+ break;
331
+ default:
332
+ g_assert_not_reached();
333
}
334
if (!have_not) {
335
break;
336
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
337
below, we can ignore high bits, but for further optimizations we
338
need to record that the high bits contain garbage. */
339
partmask = z_mask;
340
- if (!(def->flags & TCG_OPF_64BIT)) {
341
+ if (ctx.type == TCG_TYPE_I32) {
342
z_mask |= ~(tcg_target_ulong)0xffffffffu;
343
partmask &= 0xffffffffu;
344
affected &= 0xffffffffu;
345
--
127
--
346
2.25.1
128
2.43.0
347
129
348
130
diff view generated by jsdifflib
1
Adjust the interface to take the OptContext parameter instead
1
Process each TCGConstraintSetIndex first. Allocate TCGArgConstraint
2
of TCGContext or both.
2
arrays based on those. Only afterward process the TCGOpcodes and
3
share those TCGArgConstraint arrays.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 67 +++++++++++++++++++++++++-------------------------
8
include/tcg/tcg.h | 7 +-
9
1 file changed, 34 insertions(+), 33 deletions(-)
9
tcg/tcg.c | 272 +++++++++++++++++++++++-----------------------
10
2 files changed, 136 insertions(+), 143 deletions(-)
10
11
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
--- a/include/tcg/tcg.h
14
+++ b/tcg/optimize.c
15
+++ b/include/tcg/tcg.h
15
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
16
@@ -XXX,XX +XXX,XX @@ typedef struct TCGOpDef {
16
} TempOptInfo;
17
const char *name;
17
18
uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
18
typedef struct OptContext {
19
uint8_t flags;
19
+ TCGContext *tcg;
20
- TCGArgConstraint *args_ct;
20
TCGTempSet temps_used;
21
+ const TCGArgConstraint *args_ct;
21
} OptContext;
22
} TCGOpDef;
22
23
23
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
24
extern TCGOpDef tcg_op_defs[];
24
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
25
extern const size_t tcg_op_defs_max;
26
27
-typedef struct TCGTargetOpDef {
28
- TCGOpcode op;
29
- const char *args_ct_str[TCG_MAX_OP_ARGS];
30
-} TCGTargetOpDef;
31
-
32
/*
33
* tcg_op_supported:
34
* Query if @op, for @type and @flags, is supported by the host
35
diff --git a/tcg/tcg.c b/tcg/tcg.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/tcg/tcg.c
38
+++ b/tcg/tcg.c
39
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
40
41
/* Put all of the constraint sets into an array, indexed by the enum. */
42
43
-#define C_O0_I1(I1) { .args_ct_str = { #I1 } },
44
-#define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } },
45
-#define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } },
46
-#define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } },
47
+typedef struct TCGConstraintSet {
48
+ uint8_t nb_oargs, nb_iargs;
49
+ const char *args_ct_str[TCG_MAX_OP_ARGS];
50
+} TCGConstraintSet;
51
52
-#define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } },
53
-#define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } },
54
-#define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } },
55
-#define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
56
+#define C_O0_I1(I1) { 0, 1, { #I1 } },
57
+#define C_O0_I2(I1, I2) { 0, 2, { #I1, #I2 } },
58
+#define C_O0_I3(I1, I2, I3) { 0, 3, { #I1, #I2, #I3 } },
59
+#define C_O0_I4(I1, I2, I3, I4) { 0, 4, { #I1, #I2, #I3, #I4 } },
60
61
-#define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } },
62
-#define C_N1O1_I1(O1, O2, I1) { .args_ct_str = { "&" #O1, #O2, #I1 } },
63
-#define C_N2_I1(O1, O2, I1) { .args_ct_str = { "&" #O1, "&" #O2, #I1 } },
64
+#define C_O1_I1(O1, I1) { 1, 1, { #O1, #I1 } },
65
+#define C_O1_I2(O1, I1, I2) { 1, 2, { #O1, #I1, #I2 } },
66
+#define C_O1_I3(O1, I1, I2, I3) { 1, 3, { #O1, #I1, #I2, #I3 } },
67
+#define C_O1_I4(O1, I1, I2, I3, I4) { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
68
69
-#define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } },
70
-#define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } },
71
-#define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
72
-#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
73
-#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
74
+#define C_N1_I2(O1, I1, I2) { 1, 2, { "&" #O1, #I1, #I2 } },
75
+#define C_N1O1_I1(O1, O2, I1) { 2, 1, { "&" #O1, #O2, #I1 } },
76
+#define C_N2_I1(O1, O2, I1) { 2, 1, { "&" #O1, "&" #O2, #I1 } },
77
78
-static const TCGTargetOpDef constraint_sets[] = {
79
+#define C_O2_I1(O1, O2, I1) { 2, 1, { #O1, #O2, #I1 } },
80
+#define C_O2_I2(O1, O2, I1, I2) { 2, 2, { #O1, #O2, #I1, #I2 } },
81
+#define C_O2_I3(O1, O2, I1, I2, I3) { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
82
+#define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
83
+#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
84
+
85
+static const TCGConstraintSet constraint_sets[] = {
86
#include "tcg-target-con-set.h"
87
};
88
89
-
90
#undef C_O0_I1
91
#undef C_O0_I2
92
#undef C_O0_I3
93
@@ -XXX,XX +XXX,XX @@ static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
94
static void tcg_context_init(unsigned max_cpus)
95
{
96
TCGContext *s = &tcg_init_ctx;
97
- int op, total_args, n, i;
98
- TCGOpDef *def;
99
- TCGArgConstraint *args_ct;
100
+ int n, i;
101
TCGTemp *ts;
102
103
memset(s, 0, sizeof(*s));
104
s->nb_globals = 0;
105
106
- /* Count total number of arguments and allocate the corresponding
107
- space */
108
- total_args = 0;
109
- for(op = 0; op < NB_OPS; op++) {
110
- def = &tcg_op_defs[op];
111
- n = def->nb_iargs + def->nb_oargs;
112
- total_args += n;
113
- }
114
-
115
- args_ct = g_new0(TCGArgConstraint, total_args);
116
-
117
- for(op = 0; op < NB_OPS; op++) {
118
- def = &tcg_op_defs[op];
119
- def->args_ct = args_ct;
120
- n = def->nb_iargs + def->nb_oargs;
121
- args_ct += n;
122
- }
123
-
124
init_call_layout(&info_helper_ld32_mmu);
125
init_call_layout(&info_helper_ld64_mmu);
126
init_call_layout(&info_helper_ld128_mmu);
127
@@ -XXX,XX +XXX,XX @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
25
}
128
}
26
129
27
-static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
130
/* we give more priority to constraints with less registers */
28
+static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
131
-static int get_constraint_priority(const TCGOpDef *def, int k)
132
+static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
29
{
133
{
30
TCGTemp *dst_ts = arg_temp(dst);
134
- const TCGArgConstraint *arg_ct = &def->args_ct[k];
31
TCGTemp *src_ts = arg_temp(src);
135
- int n = ctpop64(arg_ct->regs);
32
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
136
+ int n;
33
TCGOpcode new_op;
137
+
34
138
+ arg_ct += k;
35
if (ts_are_copies(dst_ts, src_ts)) {
139
+ n = ctpop64(arg_ct->regs);
36
- tcg_op_remove(s, op);
140
37
+ tcg_op_remove(ctx->tcg, op);
141
/*
38
return;
142
* Sort constraints of a single register first, which includes output
143
@@ -XXX,XX +XXX,XX @@ static int get_constraint_priority(const TCGOpDef *def, int k)
144
}
145
146
/* sort from highest priority to lowest */
147
-static void sort_constraints(TCGOpDef *def, int start, int n)
148
+static void sort_constraints(TCGArgConstraint *a, int start, int n)
149
{
150
int i, j;
151
- TCGArgConstraint *a = def->args_ct;
152
153
for (i = 0; i < n; i++) {
154
a[start + i].sort_index = start + i;
155
@@ -XXX,XX +XXX,XX @@ static void sort_constraints(TCGOpDef *def, int start, int n)
39
}
156
}
40
157
for (i = 0; i < n - 1; i++) {
41
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
158
for (j = i + 1; j < n; j++) {
159
- int p1 = get_constraint_priority(def, a[start + i].sort_index);
160
- int p2 = get_constraint_priority(def, a[start + j].sort_index);
161
+ int p1 = get_constraint_priority(a, a[start + i].sort_index);
162
+ int p2 = get_constraint_priority(a, a[start + j].sort_index);
163
if (p1 < p2) {
164
int tmp = a[start + i].sort_index;
165
a[start + i].sort_index = a[start + j].sort_index;
166
@@ -XXX,XX +XXX,XX @@ static void sort_constraints(TCGOpDef *def, int start, int n)
42
}
167
}
43
}
168
}
44
169
45
-static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
170
+static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
46
- TCGOp *op, TCGArg dst, uint64_t val)
171
+static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
47
+static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
172
+
48
+ TCGArg dst, uint64_t val)
173
static void process_op_defs(TCGContext *s)
49
{
174
{
50
const TCGOpDef *def = &tcg_op_defs[op->opc];
175
- TCGOpcode op;
51
TCGType type;
176
-
52
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
177
- for (op = 0; op < NB_OPS; op++) {
53
/* Convert movi to mov with constant temp. */
178
- TCGOpDef *def = &tcg_op_defs[op];
54
tv = tcg_constant_internal(type, val);
179
- const TCGTargetOpDef *tdefs;
55
init_ts_info(ctx, tv);
180
+ for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
56
- tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
181
+ const TCGConstraintSet *tdefs = &constraint_sets[c];
57
+ tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
182
+ TCGArgConstraint *args_ct = all_cts[c];
58
}
183
+ int nb_oargs = tdefs->nb_oargs;
59
184
+ int nb_iargs = tdefs->nb_iargs;
60
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
185
+ int nb_args = nb_oargs + nb_iargs;
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
186
bool saw_alias_pair = false;
62
{
187
- int i, o, i2, o2, nb_args;
63
int nb_temps, nb_globals, i;
188
- TCGConstraintSetIndex con_set;
64
TCGOp *op, *op_next, *prev_mb = NULL;
189
65
- OptContext ctx = {};
190
- if (def->flags & TCG_OPF_NOT_PRESENT) {
66
+ OptContext ctx = { .tcg = s };
191
- continue;
67
192
- }
68
/* Array VALS has an element for each temp.
193
-
69
If this temp holds a constant then its value is kept in VALS' element.
194
- nb_args = def->nb_iargs + def->nb_oargs;
70
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
195
- if (nb_args == 0) {
71
CASE_OP_32_64(rotr):
196
- continue;
72
if (arg_is_const(op->args[1])
197
- }
73
&& arg_info(op->args[1])->val == 0) {
198
-
74
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
199
- /*
75
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
200
- * Macro magic should make it impossible, but double-check that
201
- * the array index is in range. At the same time, double-check
202
- * that the opcode is implemented, i.e. not C_NotImplemented.
203
- */
204
- con_set = tcg_target_op_def(op);
205
- tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
206
- tdefs = &constraint_sets[con_set];
207
-
208
- for (i = 0; i < nb_args; i++) {
209
+ for (int i = 0; i < nb_args; i++) {
210
const char *ct_str = tdefs->args_ct_str[i];
211
- bool input_p = i >= def->nb_oargs;
212
-
213
- /* Incomplete TCGTargetOpDef entry. */
214
- tcg_debug_assert(ct_str != NULL);
215
+ bool input_p = i >= nb_oargs;
216
+ int o;
217
218
switch (*ct_str) {
219
case '0' ... '9':
220
o = *ct_str - '0';
221
tcg_debug_assert(input_p);
222
- tcg_debug_assert(o < def->nb_oargs);
223
- tcg_debug_assert(def->args_ct[o].regs != 0);
224
- tcg_debug_assert(!def->args_ct[o].oalias);
225
- def->args_ct[i] = def->args_ct[o];
226
+ tcg_debug_assert(o < nb_oargs);
227
+ tcg_debug_assert(args_ct[o].regs != 0);
228
+ tcg_debug_assert(!args_ct[o].oalias);
229
+ args_ct[i] = args_ct[o];
230
/* The output sets oalias. */
231
- def->args_ct[o].oalias = 1;
232
- def->args_ct[o].alias_index = i;
233
+ args_ct[o].oalias = 1;
234
+ args_ct[o].alias_index = i;
235
/* The input sets ialias. */
236
- def->args_ct[i].ialias = 1;
237
- def->args_ct[i].alias_index = o;
238
- if (def->args_ct[i].pair) {
239
+ args_ct[i].ialias = 1;
240
+ args_ct[i].alias_index = o;
241
+ if (args_ct[i].pair) {
242
saw_alias_pair = true;
243
}
244
tcg_debug_assert(ct_str[1] == '\0');
245
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
246
247
case '&':
248
tcg_debug_assert(!input_p);
249
- def->args_ct[i].newreg = true;
250
+ args_ct[i].newreg = true;
251
ct_str++;
252
break;
253
254
case 'p': /* plus */
255
/* Allocate to the register after the previous. */
256
- tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
257
+ tcg_debug_assert(i > (input_p ? nb_oargs : 0));
258
o = i - 1;
259
- tcg_debug_assert(!def->args_ct[o].pair);
260
- tcg_debug_assert(!def->args_ct[o].ct);
261
- def->args_ct[i] = (TCGArgConstraint){
262
+ tcg_debug_assert(!args_ct[o].pair);
263
+ tcg_debug_assert(!args_ct[o].ct);
264
+ args_ct[i] = (TCGArgConstraint){
265
.pair = 2,
266
.pair_index = o,
267
- .regs = def->args_ct[o].regs << 1,
268
- .newreg = def->args_ct[o].newreg,
269
+ .regs = args_ct[o].regs << 1,
270
+ .newreg = args_ct[o].newreg,
271
};
272
- def->args_ct[o].pair = 1;
273
- def->args_ct[o].pair_index = i;
274
+ args_ct[o].pair = 1;
275
+ args_ct[o].pair_index = i;
276
tcg_debug_assert(ct_str[1] == '\0');
277
continue;
278
279
case 'm': /* minus */
280
/* Allocate to the register before the previous. */
281
- tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
282
+ tcg_debug_assert(i > (input_p ? nb_oargs : 0));
283
o = i - 1;
284
- tcg_debug_assert(!def->args_ct[o].pair);
285
- tcg_debug_assert(!def->args_ct[o].ct);
286
- def->args_ct[i] = (TCGArgConstraint){
287
+ tcg_debug_assert(!args_ct[o].pair);
288
+ tcg_debug_assert(!args_ct[o].ct);
289
+ args_ct[i] = (TCGArgConstraint){
290
.pair = 1,
291
.pair_index = o,
292
- .regs = def->args_ct[o].regs >> 1,
293
- .newreg = def->args_ct[o].newreg,
294
+ .regs = args_ct[o].regs >> 1,
295
+ .newreg = args_ct[o].newreg,
296
};
297
- def->args_ct[o].pair = 2;
298
- def->args_ct[o].pair_index = i;
299
+ args_ct[o].pair = 2;
300
+ args_ct[o].pair_index = i;
301
tcg_debug_assert(ct_str[1] == '\0');
76
continue;
302
continue;
77
}
303
}
78
break;
304
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
79
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
305
do {
80
if (!arg_is_const(op->args[1])
306
switch (*ct_str) {
81
&& arg_is_const(op->args[2])
307
case 'i':
82
&& arg_info(op->args[2])->val == 0) {
308
- def->args_ct[i].ct |= TCG_CT_CONST;
83
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
309
+ args_ct[i].ct |= TCG_CT_CONST;
84
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
310
break;
85
continue;
311
86
}
312
/* Include all of the target-specific constraints. */
87
break;
313
88
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
314
#undef CONST
89
if (!arg_is_const(op->args[1])
315
#define CONST(CASE, MASK) \
90
&& arg_is_const(op->args[2])
316
- case CASE: def->args_ct[i].ct |= MASK; break;
91
&& arg_info(op->args[2])->val == -1) {
317
+ case CASE: args_ct[i].ct |= MASK; break;
92
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
318
#define REGS(CASE, MASK) \
93
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
319
- case CASE: def->args_ct[i].regs |= MASK; break;
94
continue;
320
+ case CASE: args_ct[i].regs |= MASK; break;
95
}
321
96
break;
322
#include "tcg-target-con-str.h"
97
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
323
98
324
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
99
if (partmask == 0) {
325
case '&':
100
tcg_debug_assert(nb_oargs == 1);
326
case 'p':
101
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
327
case 'm':
102
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
328
- /* Typo in TCGTargetOpDef constraint. */
103
continue;
329
+ /* Typo in TCGConstraintSet constraint. */
330
g_assert_not_reached();
331
}
332
} while (*++ct_str != '\0');
104
}
333
}
105
if (affected == 0) {
334
106
tcg_debug_assert(nb_oargs == 1);
335
- /* TCGTargetOpDef entry with too much information? */
107
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
336
- tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
108
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
337
-
109
continue;
338
/*
339
* Fix up output pairs that are aliased with inputs.
340
* When we created the alias, we copied pair from the output.
341
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
342
* first output to pair=3, and the pair_index'es to match.
343
*/
344
if (saw_alias_pair) {
345
- for (i = def->nb_oargs; i < nb_args; i++) {
346
+ for (int i = nb_oargs; i < nb_args; i++) {
347
+ int o, o2, i2;
348
+
349
/*
350
* Since [0-9pm] must be alone in the constraint string,
351
* the only way they can both be set is if the pair comes
352
* from the output alias.
353
*/
354
- if (!def->args_ct[i].ialias) {
355
+ if (!args_ct[i].ialias) {
356
continue;
357
}
358
- switch (def->args_ct[i].pair) {
359
+ switch (args_ct[i].pair) {
360
case 0:
361
break;
362
case 1:
363
- o = def->args_ct[i].alias_index;
364
- o2 = def->args_ct[o].pair_index;
365
- tcg_debug_assert(def->args_ct[o].pair == 1);
366
- tcg_debug_assert(def->args_ct[o2].pair == 2);
367
- if (def->args_ct[o2].oalias) {
368
+ o = args_ct[i].alias_index;
369
+ o2 = args_ct[o].pair_index;
370
+ tcg_debug_assert(args_ct[o].pair == 1);
371
+ tcg_debug_assert(args_ct[o2].pair == 2);
372
+ if (args_ct[o2].oalias) {
373
/* Case 1a */
374
- i2 = def->args_ct[o2].alias_index;
375
- tcg_debug_assert(def->args_ct[i2].pair == 2);
376
- def->args_ct[i2].pair_index = i;
377
- def->args_ct[i].pair_index = i2;
378
+ i2 = args_ct[o2].alias_index;
379
+ tcg_debug_assert(args_ct[i2].pair == 2);
380
+ args_ct[i2].pair_index = i;
381
+ args_ct[i].pair_index = i2;
382
} else {
383
/* Case 1b */
384
- def->args_ct[i].pair_index = i;
385
+ args_ct[i].pair_index = i;
386
}
387
break;
388
case 2:
389
- o = def->args_ct[i].alias_index;
390
- o2 = def->args_ct[o].pair_index;
391
- tcg_debug_assert(def->args_ct[o].pair == 2);
392
- tcg_debug_assert(def->args_ct[o2].pair == 1);
393
- if (def->args_ct[o2].oalias) {
394
+ o = args_ct[i].alias_index;
395
+ o2 = args_ct[o].pair_index;
396
+ tcg_debug_assert(args_ct[o].pair == 2);
397
+ tcg_debug_assert(args_ct[o2].pair == 1);
398
+ if (args_ct[o2].oalias) {
399
/* Case 1a */
400
- i2 = def->args_ct[o2].alias_index;
401
- tcg_debug_assert(def->args_ct[i2].pair == 1);
402
- def->args_ct[i2].pair_index = i;
403
- def->args_ct[i].pair_index = i2;
404
+ i2 = args_ct[o2].alias_index;
405
+ tcg_debug_assert(args_ct[i2].pair == 1);
406
+ args_ct[i2].pair_index = i;
407
+ args_ct[i].pair_index = i2;
408
} else {
409
/* Case 2 */
410
- def->args_ct[i].pair = 3;
411
- def->args_ct[o2].pair = 3;
412
- def->args_ct[i].pair_index = o2;
413
- def->args_ct[o2].pair_index = i;
414
+ args_ct[i].pair = 3;
415
+ args_ct[o2].pair = 3;
416
+ args_ct[i].pair_index = o2;
417
+ args_ct[o2].pair_index = i;
418
}
419
break;
420
default:
421
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
110
}
422
}
111
423
112
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
424
/* sort the constraints (XXX: this is just an heuristic) */
113
CASE_OP_32_64(mulsh):
425
- sort_constraints(def, 0, def->nb_oargs);
114
if (arg_is_const(op->args[2])
426
- sort_constraints(def, def->nb_oargs, def->nb_iargs);
115
&& arg_info(op->args[2])->val == 0) {
427
+ sort_constraints(args_ct, 0, nb_oargs);
116
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
428
+ sort_constraints(args_ct, nb_oargs, nb_iargs);
117
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
429
+ }
118
continue;
430
+
119
}
431
+ for (TCGOpcode op = 0; op < NB_OPS; op++) {
120
break;
432
+ TCGOpDef *def = &tcg_op_defs[op];
121
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
433
+ const TCGConstraintSet *tdefs;
122
CASE_OP_32_64_VEC(or):
434
+ TCGConstraintSetIndex con_set;
123
CASE_OP_32_64_VEC(and):
435
+ int nb_args;
124
if (args_are_copies(op->args[1], op->args[2])) {
436
+
125
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
437
+ nb_args = def->nb_iargs + def->nb_oargs;
126
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
438
+ if (nb_args == 0) {
127
continue;
439
+ continue;
128
}
440
+ }
129
break;
441
+
130
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
442
+ if (def->flags & TCG_OPF_NOT_PRESENT) {
131
CASE_OP_32_64_VEC(sub):
443
+ def->args_ct = empty_cts;
132
CASE_OP_32_64_VEC(xor):
444
+ continue;
133
if (args_are_copies(op->args[1], op->args[2])) {
445
+ }
134
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
446
+
135
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
447
+ /*
136
continue;
448
+ * Macro magic should make it impossible, but double-check that
137
}
449
+ * the array index is in range. At the same time, double-check
138
break;
450
+ * that the opcode is implemented, i.e. not C_NotImplemented.
139
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
451
+ */
140
allocator where needed and possible. Also detect copies. */
452
+ con_set = tcg_target_op_def(op);
141
switch (opc) {
453
+ tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
142
CASE_OP_32_64_VEC(mov):
454
+
143
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
455
+ /* The constraint arguments must match TCGOpcode arguments. */
144
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
456
+ tdefs = &constraint_sets[con_set];
145
continue;
457
+ tcg_debug_assert(tdefs->nb_oargs == def->nb_oargs);
146
458
+ tcg_debug_assert(tdefs->nb_iargs == def->nb_iargs);
147
case INDEX_op_dup_vec:
459
+
148
if (arg_is_const(op->args[1])) {
460
+ def->args_ct = all_cts[con_set];
149
tmp = arg_info(op->args[1])->val;
461
}
150
tmp = dup_const(TCGOP_VECE(op), tmp);
462
}
151
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
463
152
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
153
continue;
154
}
155
break;
156
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
157
case INDEX_op_dup2_vec:
158
assert(TCG_TARGET_REG_BITS == 32);
159
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
160
- tcg_opt_gen_movi(s, &ctx, op, op->args[0],
161
+ tcg_opt_gen_movi(&ctx, op, op->args[0],
162
deposit64(arg_info(op->args[1])->val, 32, 32,
163
arg_info(op->args[2])->val));
164
continue;
165
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
166
case INDEX_op_extrh_i64_i32:
167
if (arg_is_const(op->args[1])) {
168
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
169
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
170
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
171
continue;
172
}
173
break;
174
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
175
if (arg_is_const(op->args[1])) {
176
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
177
op->args[2]);
178
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
179
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
180
continue;
181
}
182
break;
183
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
184
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
185
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
186
arg_info(op->args[2])->val);
187
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
188
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
189
continue;
190
}
191
break;
192
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
193
TCGArg v = arg_info(op->args[1])->val;
194
if (v != 0) {
195
tmp = do_constant_folding(opc, v, 0);
196
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
197
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
198
} else {
199
- tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
200
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
201
}
202
continue;
203
}
204
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
205
tmp = deposit64(arg_info(op->args[1])->val,
206
op->args[3], op->args[4],
207
arg_info(op->args[2])->val);
208
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
209
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
210
continue;
211
}
212
break;
213
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
214
if (arg_is_const(op->args[1])) {
215
tmp = extract64(arg_info(op->args[1])->val,
216
op->args[2], op->args[3]);
217
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
218
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
219
continue;
220
}
221
break;
222
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
223
if (arg_is_const(op->args[1])) {
224
tmp = sextract64(arg_info(op->args[1])->val,
225
op->args[2], op->args[3]);
226
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
227
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
228
continue;
229
}
230
break;
231
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
232
tmp = (int32_t)(((uint32_t)v1 >> shr) |
233
((uint32_t)v2 << (32 - shr)));
234
}
235
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
236
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
237
continue;
238
}
239
break;
240
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
241
tmp = do_constant_folding_cond(opc, op->args[1],
242
op->args[2], op->args[3]);
243
if (tmp != 2) {
244
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
245
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
246
continue;
247
}
248
break;
249
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
250
tmp = do_constant_folding_cond(opc, op->args[1],
251
op->args[2], op->args[5]);
252
if (tmp != 2) {
253
- tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
254
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
255
continue;
256
}
257
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
258
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
259
260
rl = op->args[0];
261
rh = op->args[1];
262
- tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
263
- tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
264
+ tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
265
+ tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
266
continue;
267
}
268
break;
269
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
270
271
rl = op->args[0];
272
rh = op->args[1];
273
- tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
274
- tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
275
+ tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
276
+ tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
277
continue;
278
}
279
break;
280
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
281
op->args[5]);
282
if (tmp != 2) {
283
do_setcond_const:
284
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
285
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
286
continue;
287
}
288
if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
289
--
464
--
290
2.25.1
465
2.43.0
291
466
292
467
diff view generated by jsdifflib
1
Prepare for tracking different masks by renaming this one.
1
Introduce a new function, opcode_args_ct, to look up the argument
2
2
set for an opcode. We lose the ability to assert the correctness
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
of the map from TCGOpcode to constraint sets at startup, but we can
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
still validate at runtime upon lookup.
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
6
Rename process_op_defs to process_constraint_sets, as it now does
7
nothing to TCGOpDef.
8
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
11
---
8
tcg/optimize.c | 142 +++++++++++++++++++++++++------------------------
12
include/tcg/tcg.h | 1 -
9
1 file changed, 72 insertions(+), 70 deletions(-)
13
tcg/tcg-common.c | 2 +-
10
14
tcg/tcg.c | 82 ++++++++++++++++++++++-------------------------
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
3 files changed, 40 insertions(+), 45 deletions(-)
16
17
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
12
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
19
--- a/include/tcg/tcg.h
14
+++ b/tcg/optimize.c
20
+++ b/include/tcg/tcg.h
15
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
21
@@ -XXX,XX +XXX,XX @@ typedef struct TCGOpDef {
16
TCGTemp *prev_copy;
22
const char *name;
17
TCGTemp *next_copy;
23
uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
18
uint64_t val;
24
uint8_t flags;
19
- uint64_t mask;
25
- const TCGArgConstraint *args_ct;
20
+ uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
26
} TCGOpDef;
21
} TempOptInfo;
27
22
28
extern TCGOpDef tcg_op_defs[];
23
static inline TempOptInfo *ts_info(TCGTemp *ts)
29
diff --git a/tcg/tcg-common.c b/tcg/tcg-common.c
24
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
30
index XXXXXXX..XXXXXXX 100644
25
ti->next_copy = ts;
31
--- a/tcg/tcg-common.c
26
ti->prev_copy = ts;
32
+++ b/tcg/tcg-common.c
27
ti->is_const = false;
33
@@ -XXX,XX +XXX,XX @@
28
- ti->mask = -1;
34
29
+ ti->z_mask = -1;
35
TCGOpDef tcg_op_defs[] = {
36
#define DEF(s, oargs, iargs, cargs, flags) \
37
- { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags, NULL },
38
+ { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
39
#include "tcg/tcg-opc.h"
40
#undef DEF
41
};
42
diff --git a/tcg/tcg.c b/tcg/tcg.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/tcg/tcg.c
45
+++ b/tcg/tcg.c
46
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
30
}
47
}
31
48
32
static void reset_temp(TCGArg arg)
49
static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
33
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
50
-static void process_op_defs(TCGContext *s);
34
if (ts->kind == TEMP_CONST) {
51
+static void process_constraint_sets(void);
35
ti->is_const = true;
52
static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
36
ti->val = ts->val;
53
TCGReg reg, const char *name);
37
- ti->mask = ts->val;
54
38
+ ti->z_mask = ts->val;
55
@@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus)
39
if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
56
init_call_layout(&info_helper_st128_mmu);
40
/* High bits of a 32-bit quantity are garbage. */
57
41
- ti->mask |= ~0xffffffffull;
58
tcg_target_init(s);
42
+ ti->z_mask |= ~0xffffffffull;
59
- process_op_defs(s);
43
}
60
+ process_constraint_sets();
44
} else {
61
45
ti->is_const = false;
62
/* Reverse the order of the saved registers, assuming they're all at
46
- ti->mask = -1;
63
the start of tcg_target_reg_alloc_order. */
47
+ ti->z_mask = -1;
64
@@ -XXX,XX +XXX,XX @@ static void sort_constraints(TCGArgConstraint *a, int start, int n)
48
}
65
static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
66
static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
67
68
-static void process_op_defs(TCGContext *s)
69
+static void process_constraint_sets(void)
70
{
71
for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
72
const TCGConstraintSet *tdefs = &constraint_sets[c];
73
@@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s)
74
sort_constraints(args_ct, 0, nb_oargs);
75
sort_constraints(args_ct, nb_oargs, nb_iargs);
76
}
77
+}
78
79
- for (TCGOpcode op = 0; op < NB_OPS; op++) {
80
- TCGOpDef *def = &tcg_op_defs[op];
81
- const TCGConstraintSet *tdefs;
82
- TCGConstraintSetIndex con_set;
83
- int nb_args;
84
+static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
85
+{
86
+ TCGOpDef *def = &tcg_op_defs[op->opc];
87
+ TCGConstraintSetIndex con_set;
88
89
- nb_args = def->nb_iargs + def->nb_oargs;
90
- if (nb_args == 0) {
91
- continue;
92
- }
93
-
94
- if (def->flags & TCG_OPF_NOT_PRESENT) {
95
- def->args_ct = empty_cts;
96
- continue;
97
- }
98
-
99
- /*
100
- * Macro magic should make it impossible, but double-check that
101
- * the array index is in range. At the same time, double-check
102
- * that the opcode is implemented, i.e. not C_NotImplemented.
103
- */
104
- con_set = tcg_target_op_def(op);
105
- tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
106
-
107
- /* The constraint arguments must match TCGOpcode arguments. */
108
- tdefs = &constraint_sets[con_set];
109
- tcg_debug_assert(tdefs->nb_oargs == def->nb_oargs);
110
- tcg_debug_assert(tdefs->nb_iargs == def->nb_iargs);
111
-
112
- def->args_ct = all_cts[con_set];
113
+ if (def->nb_iargs + def->nb_oargs == 0) {
114
+ return NULL;
115
}
116
+ if (def->flags & TCG_OPF_NOT_PRESENT) {
117
+ return empty_cts;
118
+ }
119
+
120
+ con_set = tcg_target_op_def(op->opc);
121
+ tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
122
+
123
+ /* The constraint arguments must match TCGOpcode arguments. */
124
+ tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
125
+ tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
126
+
127
+ return all_cts[con_set];
49
}
128
}
50
129
51
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
130
static void remove_label_use(TCGOp *op, int idx)
52
const TCGOpDef *def;
131
@@ -XXX,XX +XXX,XX @@ liveness_pass_1(TCGContext *s)
53
TempOptInfo *di;
132
TCGTemp *ts;
54
TempOptInfo *si;
55
- uint64_t mask;
56
+ uint64_t z_mask;
57
TCGOpcode new_op;
58
59
if (ts_are_copies(dst_ts, src_ts)) {
60
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
61
op->args[0] = dst;
62
op->args[1] = src;
63
64
- mask = si->mask;
65
+ z_mask = si->z_mask;
66
if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
67
/* High bits of the destination are now garbage. */
68
- mask |= ~0xffffffffull;
69
+ z_mask |= ~0xffffffffull;
70
}
71
- di->mask = mask;
72
+ di->z_mask = z_mask;
73
74
if (src_ts->type == dst_ts->type) {
75
TempOptInfo *ni = ts_info(si->next_copy);
76
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
77
}
78
79
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
80
- uint64_t mask, partmask, affected, tmp;
81
+ uint64_t z_mask, partmask, affected, tmp;
82
int nb_oargs, nb_iargs;
83
TCGOpcode opc = op->opc;
133
TCGOpcode opc = op->opc;
84
const TCGOpDef *def = &tcg_op_defs[opc];
134
const TCGOpDef *def = &tcg_op_defs[opc];
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
135
+ const TCGArgConstraint *args_ct;
86
136
87
/* Simplify using known-zero bits. Currently only ops with a single
88
output argument is supported. */
89
- mask = -1;
90
+ z_mask = -1;
91
affected = -1;
92
switch (opc) {
137
switch (opc) {
93
CASE_OP_32_64(ext8s):
138
case INDEX_op_call:
94
- if ((arg_info(op->args[1])->mask & 0x80) != 0) {
139
@@ -XXX,XX +XXX,XX @@ liveness_pass_1(TCGContext *s)
95
+ if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
96
break;
140
break;
97
}
141
98
QEMU_FALLTHROUGH;
142
default:
99
CASE_OP_32_64(ext8u):
143
+ args_ct = opcode_args_ct(op);
100
- mask = 0xff;
144
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
101
+ z_mask = 0xff;
145
- const TCGArgConstraint *ct = &def->args_ct[i];
102
goto and_const;
146
+ const TCGArgConstraint *ct = &args_ct[i];
103
CASE_OP_32_64(ext16s):
147
TCGRegSet set, *pset;
104
- if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
148
105
+ if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
149
ts = arg_temp(op->args[i]);
106
break;
150
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
107
}
151
{
108
QEMU_FALLTHROUGH;
152
const TCGLifeData arg_life = op->life;
109
CASE_OP_32_64(ext16u):
153
TCGRegSet dup_out_regs, dup_in_regs;
110
- mask = 0xffff;
154
+ const TCGArgConstraint *dup_args_ct;
111
+ z_mask = 0xffff;
155
TCGTemp *its, *ots;
112
goto and_const;
156
TCGType itype, vtype;
113
case INDEX_op_ext32s_i64:
157
unsigned vece;
114
- if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
158
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
115
+ if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
159
return;
116
break;
160
}
117
}
161
118
QEMU_FALLTHROUGH;
162
- dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
119
case INDEX_op_ext32u_i64:
163
- dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
120
- mask = 0xffffffffU;
164
+ dup_args_ct = opcode_args_ct(op);
121
+ z_mask = 0xffffffffU;
165
+ dup_out_regs = dup_args_ct[0].regs;
122
goto and_const;
166
+ dup_in_regs = dup_args_ct[1].regs;
123
167
124
CASE_OP_32_64(and):
168
/* Allocate the output register now. */
125
- mask = arg_info(op->args[2])->mask;
169
if (ots->val_type != TEMP_VAL_REG) {
126
+ z_mask = arg_info(op->args[2])->z_mask;
170
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
127
if (arg_is_const(op->args[2])) {
171
int i, k, nb_iargs, nb_oargs;
128
and_const:
172
TCGReg reg;
129
- affected = arg_info(op->args[1])->mask & ~mask;
173
TCGArg arg;
130
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
174
+ const TCGArgConstraint *args_ct;
131
}
175
const TCGArgConstraint *arg_ct;
132
- mask = arg_info(op->args[1])->mask & mask;
176
TCGTemp *ts;
133
+ z_mask = arg_info(op->args[1])->z_mask & z_mask;
177
TCGArg new_args[TCG_MAX_OP_ARGS];
134
break;
178
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
135
179
break;
136
case INDEX_op_ext_i32_i64:
180
}
137
- if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
181
138
+ if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
182
+ args_ct = opcode_args_ct(op);
139
break;
183
+
140
}
184
/* satisfy input constraints */
141
QEMU_FALLTHROUGH;
185
for (k = 0; k < nb_iargs; k++) {
142
case INDEX_op_extu_i32_i64:
186
TCGRegSet i_preferred_regs, i_required_regs;
143
/* We do not compute affected as it is a size changing op. */
187
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
144
- mask = (uint32_t)arg_info(op->args[1])->mask;
188
TCGTemp *ts2;
145
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
189
int i1, i2;
146
break;
190
147
191
- i = def->args_ct[nb_oargs + k].sort_index;
148
CASE_OP_32_64(andc):
192
+ i = args_ct[nb_oargs + k].sort_index;
149
/* Known-zeros does not imply known-ones. Therefore unless
193
arg = op->args[i];
150
op->args[2] is constant, we can't infer anything from it. */
194
- arg_ct = &def->args_ct[i];
151
if (arg_is_const(op->args[2])) {
195
+ arg_ct = &args_ct[i];
152
- mask = ~arg_info(op->args[2])->mask;
196
ts = arg_temp(arg);
153
+ z_mask = ~arg_info(op->args[2])->z_mask;
197
154
goto and_const;
198
if (ts->val_type == TEMP_VAL_CONST
155
}
199
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
156
/* But we certainly know nothing outside args[1] may be set. */
200
* register and move it.
157
- mask = arg_info(op->args[1])->mask;
201
*/
158
+ z_mask = arg_info(op->args[1])->z_mask;
202
if (temp_readonly(ts) || !IS_DEAD_ARG(i)
159
break;
203
- || def->args_ct[arg_ct->alias_index].newreg) {
160
204
+ || args_ct[arg_ct->alias_index].newreg) {
161
case INDEX_op_sar_i32:
205
allocate_new_reg = true;
162
if (arg_is_const(op->args[2])) {
206
} else if (ts->val_type == TEMP_VAL_REG) {
163
tmp = arg_info(op->args[2])->val & 31;
207
/*
164
- mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
208
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
165
+ z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
166
}
167
break;
168
case INDEX_op_sar_i64:
169
if (arg_is_const(op->args[2])) {
170
tmp = arg_info(op->args[2])->val & 63;
171
- mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
172
+ z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
173
}
174
break;
175
176
case INDEX_op_shr_i32:
177
if (arg_is_const(op->args[2])) {
178
tmp = arg_info(op->args[2])->val & 31;
179
- mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
180
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
181
}
182
break;
183
case INDEX_op_shr_i64:
184
if (arg_is_const(op->args[2])) {
185
tmp = arg_info(op->args[2])->val & 63;
186
- mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
187
+ z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
188
}
189
break;
190
191
case INDEX_op_extrl_i64_i32:
192
- mask = (uint32_t)arg_info(op->args[1])->mask;
193
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
194
break;
195
case INDEX_op_extrh_i64_i32:
196
- mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
197
+ z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
198
break;
199
200
CASE_OP_32_64(shl):
201
if (arg_is_const(op->args[2])) {
202
tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
203
- mask = arg_info(op->args[1])->mask << tmp;
204
+ z_mask = arg_info(op->args[1])->z_mask << tmp;
205
}
206
break;
207
208
CASE_OP_32_64(neg):
209
/* Set to 1 all bits to the left of the rightmost. */
210
- mask = -(arg_info(op->args[1])->mask
211
- & -arg_info(op->args[1])->mask);
212
+ z_mask = -(arg_info(op->args[1])->z_mask
213
+ & -arg_info(op->args[1])->z_mask);
214
break;
215
216
CASE_OP_32_64(deposit):
217
- mask = deposit64(arg_info(op->args[1])->mask,
218
- op->args[3], op->args[4],
219
- arg_info(op->args[2])->mask);
220
+ z_mask = deposit64(arg_info(op->args[1])->z_mask,
221
+ op->args[3], op->args[4],
222
+ arg_info(op->args[2])->z_mask);
223
break;
224
225
CASE_OP_32_64(extract):
226
- mask = extract64(arg_info(op->args[1])->mask,
227
- op->args[2], op->args[3]);
228
+ z_mask = extract64(arg_info(op->args[1])->z_mask,
229
+ op->args[2], op->args[3]);
230
if (op->args[2] == 0) {
231
- affected = arg_info(op->args[1])->mask & ~mask;
232
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
233
}
234
break;
235
CASE_OP_32_64(sextract):
236
- mask = sextract64(arg_info(op->args[1])->mask,
237
- op->args[2], op->args[3]);
238
- if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
239
- affected = arg_info(op->args[1])->mask & ~mask;
240
+ z_mask = sextract64(arg_info(op->args[1])->z_mask,
241
+ op->args[2], op->args[3]);
242
+ if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
243
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
244
}
245
break;
246
247
CASE_OP_32_64(or):
248
CASE_OP_32_64(xor):
249
- mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
250
+ z_mask = arg_info(op->args[1])->z_mask
251
+ | arg_info(op->args[2])->z_mask;
252
break;
253
254
case INDEX_op_clz_i32:
255
case INDEX_op_ctz_i32:
256
- mask = arg_info(op->args[2])->mask | 31;
257
+ z_mask = arg_info(op->args[2])->z_mask | 31;
258
break;
259
260
case INDEX_op_clz_i64:
261
case INDEX_op_ctz_i64:
262
- mask = arg_info(op->args[2])->mask | 63;
263
+ z_mask = arg_info(op->args[2])->z_mask | 63;
264
break;
265
266
case INDEX_op_ctpop_i32:
267
- mask = 32 | 31;
268
+ z_mask = 32 | 31;
269
break;
270
case INDEX_op_ctpop_i64:
271
- mask = 64 | 63;
272
+ z_mask = 64 | 63;
273
break;
274
275
CASE_OP_32_64(setcond):
276
case INDEX_op_setcond2_i32:
277
- mask = 1;
278
+ z_mask = 1;
279
break;
280
281
CASE_OP_32_64(movcond):
282
- mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
283
+ z_mask = arg_info(op->args[3])->z_mask
284
+ | arg_info(op->args[4])->z_mask;
285
break;
286
287
CASE_OP_32_64(ld8u):
288
- mask = 0xff;
289
+ z_mask = 0xff;
290
break;
291
CASE_OP_32_64(ld16u):
292
- mask = 0xffff;
293
+ z_mask = 0xffff;
294
break;
295
case INDEX_op_ld32u_i64:
296
- mask = 0xffffffffu;
297
+ z_mask = 0xffffffffu;
298
break;
299
300
CASE_OP_32_64(qemu_ld):
301
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
302
MemOpIdx oi = op->args[nb_oargs + nb_iargs];
303
MemOp mop = get_memop(oi);
304
if (!(mop & MO_SIGN)) {
305
- mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
306
+ z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
307
}
308
}
309
break;
310
311
CASE_OP_32_64(bswap16):
312
- mask = arg_info(op->args[1])->mask;
313
- if (mask <= 0xffff) {
314
+ z_mask = arg_info(op->args[1])->z_mask;
315
+ if (z_mask <= 0xffff) {
316
op->args[2] |= TCG_BSWAP_IZ;
317
}
318
- mask = bswap16(mask);
319
+ z_mask = bswap16(z_mask);
320
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
321
case TCG_BSWAP_OZ:
322
break;
323
case TCG_BSWAP_OS:
324
- mask = (int16_t)mask;
325
+ z_mask = (int16_t)z_mask;
326
break;
327
default: /* undefined high bits */
328
- mask |= MAKE_64BIT_MASK(16, 48);
329
+ z_mask |= MAKE_64BIT_MASK(16, 48);
330
break;
331
}
332
break;
333
334
case INDEX_op_bswap32_i64:
335
- mask = arg_info(op->args[1])->mask;
336
- if (mask <= 0xffffffffu) {
337
+ z_mask = arg_info(op->args[1])->z_mask;
338
+ if (z_mask <= 0xffffffffu) {
339
op->args[2] |= TCG_BSWAP_IZ;
340
}
341
- mask = bswap32(mask);
342
+ z_mask = bswap32(z_mask);
343
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
344
case TCG_BSWAP_OZ:
345
break;
346
case TCG_BSWAP_OS:
347
- mask = (int32_t)mask;
348
+ z_mask = (int32_t)z_mask;
349
break;
350
default: /* undefined high bits */
351
- mask |= MAKE_64BIT_MASK(32, 32);
352
+ z_mask |= MAKE_64BIT_MASK(32, 32);
353
break;
354
}
355
break;
356
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
357
/* 32-bit ops generate 32-bit results. For the result is zero test
358
below, we can ignore high bits, but for further optimizations we
359
need to record that the high bits contain garbage. */
360
- partmask = mask;
361
+ partmask = z_mask;
362
if (!(def->flags & TCG_OPF_64BIT)) {
363
- mask |= ~(tcg_target_ulong)0xffffffffu;
364
+ z_mask |= ~(tcg_target_ulong)0xffffffffu;
365
partmask &= 0xffffffffu;
366
affected &= 0xffffffffu;
367
}
209
}
368
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
210
369
vs the high word of the input. */
211
/* satisfy the output constraints */
370
do_setcond_high:
212
- for(k = 0; k < nb_oargs; k++) {
371
reset_temp(op->args[0]);
213
- i = def->args_ct[k].sort_index;
372
- arg_info(op->args[0])->mask = 1;
214
+ for (k = 0; k < nb_oargs; k++) {
373
+ arg_info(op->args[0])->z_mask = 1;
215
+ i = args_ct[k].sort_index;
374
op->opc = INDEX_op_setcond_i32;
216
arg = op->args[i];
375
op->args[1] = op->args[2];
217
- arg_ct = &def->args_ct[i];
376
op->args[2] = op->args[4];
218
+ arg_ct = &args_ct[i];
377
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
219
ts = arg_temp(arg);
378
}
220
379
do_setcond_low:
221
/* ENV should not be modified. */
380
reset_temp(op->args[0]);
222
@@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
381
- arg_info(op->args[0])->mask = 1;
223
/* Allocate the output register now. */
382
+ arg_info(op->args[0])->z_mask = 1;
224
if (ots->val_type != TEMP_VAL_REG) {
383
op->opc = INDEX_op_setcond_i32;
225
TCGRegSet allocated_regs = s->reserved_regs;
384
op->args[2] = op->args[3];
226
- TCGRegSet dup_out_regs =
385
op->args[3] = op->args[5];
227
- tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
386
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
228
+ TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
387
/* Default case: we know nothing about operation (or were unable
229
TCGReg oreg;
388
to compute the operation result) so no propagation is done.
230
389
We trash everything if the operation is the end of a basic
231
/* Make sure to not spill the input registers. */
390
- block, otherwise we only trash the output args. "mask" is
391
+ block, otherwise we only trash the output args. "z_mask" is
392
the non-zero bits mask for the first output arg. */
393
if (def->flags & TCG_OPF_BB_END) {
394
memset(&temps_used, 0, sizeof(temps_used));
395
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
396
/* Save the corresponding known-zero bits mask for the
397
first output argument (only one supported so far). */
398
if (i == 0) {
399
- arg_info(op->args[i])->mask = mask;
400
+ arg_info(op->args[i])->z_mask = z_mask;
401
}
402
}
403
}
404
--
232
--
405
2.25.1
233
2.43.0
406
234
407
235
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Now that we're no longer assigning to TCGOpDef.args_ct,
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
we can make the array constant.
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
6
---
5
tcg/optimize.c | 39 ++++++++++++++++++++++-----------------
7
include/tcg/tcg.h | 2 +-
6
1 file changed, 22 insertions(+), 17 deletions(-)
8
tcg/tcg-common.c | 2 +-
9
tcg/tcg.c | 2 +-
10
3 files changed, 3 insertions(+), 3 deletions(-)
7
11
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
9
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
14
--- a/include/tcg/tcg.h
11
+++ b/tcg/optimize.c
15
+++ b/include/tcg/tcg.h
12
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ typedef struct TCGOpDef {
13
return fold_const2(ctx, op);
17
uint8_t flags;
14
}
18
} TCGOpDef;
15
19
16
+static bool fold_extract2(OptContext *ctx, TCGOp *op)
20
-extern TCGOpDef tcg_op_defs[];
17
+{
21
+extern const TCGOpDef tcg_op_defs[];
18
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
22
extern const size_t tcg_op_defs_max;
19
+ uint64_t v1 = arg_info(op->args[1])->val;
23
20
+ uint64_t v2 = arg_info(op->args[2])->val;
24
/*
21
+ int shr = op->args[3];
25
diff --git a/tcg/tcg-common.c b/tcg/tcg-common.c
22
+
26
index XXXXXXX..XXXXXXX 100644
23
+ if (op->opc == INDEX_op_extract2_i64) {
27
--- a/tcg/tcg-common.c
24
+ v1 >>= shr;
28
+++ b/tcg/tcg-common.c
25
+ v2 <<= 64 - shr;
29
@@ -XXX,XX +XXX,XX @@
26
+ } else {
30
#include "tcg/tcg.h"
27
+ v1 = (uint32_t)v1 >> shr;
31
#include "tcg-has.h"
28
+ v2 = (int32_t)v2 << (32 - shr);
32
29
+ }
33
-TCGOpDef tcg_op_defs[] = {
30
+ return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
34
+const TCGOpDef tcg_op_defs[] = {
31
+ }
35
#define DEF(s, oargs, iargs, cargs, flags) \
32
+ return false;
36
{ #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
33
+}
37
#include "tcg/tcg-opc.h"
34
+
38
diff --git a/tcg/tcg.c b/tcg/tcg.c
35
static bool fold_exts(OptContext *ctx, TCGOp *op)
39
index XXXXXXX..XXXXXXX 100644
40
--- a/tcg/tcg.c
41
+++ b/tcg/tcg.c
42
@@ -XXX,XX +XXX,XX @@ static void process_constraint_sets(void)
43
44
static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
36
{
45
{
37
return fold_const1(ctx, op);
46
- TCGOpDef *def = &tcg_op_defs[op->opc];
38
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
47
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
39
}
48
TCGConstraintSetIndex con_set;
40
break;
49
41
50
if (def->nb_iargs + def->nb_oargs == 0) {
42
- CASE_OP_32_64(extract2):
43
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
44
- uint64_t v1 = arg_info(op->args[1])->val;
45
- uint64_t v2 = arg_info(op->args[2])->val;
46
- int shr = op->args[3];
47
-
48
- if (opc == INDEX_op_extract2_i64) {
49
- tmp = (v1 >> shr) | (v2 << (64 - shr));
50
- } else {
51
- tmp = (int32_t)(((uint32_t)v1 >> shr) |
52
- ((uint32_t)v2 << (32 - shr)));
53
- }
54
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
55
- continue;
56
- }
57
- break;
58
-
59
default:
60
break;
61
62
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
63
CASE_OP_32_64(eqv):
64
done = fold_eqv(&ctx, op);
65
break;
66
+ CASE_OP_32_64(extract2):
67
+ done = fold_extract2(&ctx, op);
68
+ break;
69
CASE_OP_32_64(ext8s):
70
CASE_OP_32_64(ext16s):
71
case INDEX_op_ext32s_i64:
72
--
51
--
73
2.25.1
52
2.43.0
74
53
75
54
diff view generated by jsdifflib
1
Add two additional helpers, fold_add2_i32 and fold_sub2_i32
1
We should have checked that the op is supported before
2
which will not be simple wrappers forever.
2
emitting it. The backend cannot be expected to have a
3
constraint set for unsupported ops.
3
4
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 70 +++++++++++++++++++++++++++++++-------------------
8
tcg/tcg.c | 4 ++++
9
1 file changed, 44 insertions(+), 26 deletions(-)
9
1 file changed, 4 insertions(+)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/tcg.c b/tcg/tcg.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/tcg.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/tcg.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
16
return fold_const2(ctx, op);
16
const TCGOpDef *def = &tcg_op_defs[op->opc];
17
}
17
TCGConstraintSetIndex con_set;
18
18
19
+static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
19
+#ifdef CONFIG_DEBUG_TCG
20
+{
20
+ assert(tcg_op_supported(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op)));
21
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
21
+#endif
22
+ arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
23
+ uint32_t al = arg_info(op->args[2])->val;
24
+ uint32_t ah = arg_info(op->args[3])->val;
25
+ uint32_t bl = arg_info(op->args[4])->val;
26
+ uint32_t bh = arg_info(op->args[5])->val;
27
+ uint64_t a = ((uint64_t)ah << 32) | al;
28
+ uint64_t b = ((uint64_t)bh << 32) | bl;
29
+ TCGArg rl, rh;
30
+ TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
31
+
22
+
32
+ if (add) {
23
if (def->nb_iargs + def->nb_oargs == 0) {
33
+ a += b;
24
return NULL;
34
+ } else {
25
}
35
+ a -= b;
36
+ }
37
+
38
+ rl = op->args[0];
39
+ rh = op->args[1];
40
+ tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
41
+ tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
42
+ return true;
43
+ }
44
+ return false;
45
+}
46
+
47
+static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
48
+{
49
+ return fold_addsub2_i32(ctx, op, true);
50
+}
51
+
52
static bool fold_and(OptContext *ctx, TCGOp *op)
53
{
54
return fold_const2(ctx, op);
55
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
56
return fold_const2(ctx, op);
57
}
58
59
+static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
60
+{
61
+ return fold_addsub2_i32(ctx, op, false);
62
+}
63
+
64
static bool fold_xor(OptContext *ctx, TCGOp *op)
65
{
66
return fold_const2(ctx, op);
67
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
68
}
69
break;
70
71
- case INDEX_op_add2_i32:
72
- case INDEX_op_sub2_i32:
73
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
74
- && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
75
- uint32_t al = arg_info(op->args[2])->val;
76
- uint32_t ah = arg_info(op->args[3])->val;
77
- uint32_t bl = arg_info(op->args[4])->val;
78
- uint32_t bh = arg_info(op->args[5])->val;
79
- uint64_t a = ((uint64_t)ah << 32) | al;
80
- uint64_t b = ((uint64_t)bh << 32) | bl;
81
- TCGArg rl, rh;
82
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
83
-
84
- if (opc == INDEX_op_add2_i32) {
85
- a += b;
86
- } else {
87
- a -= b;
88
- }
89
-
90
- rl = op->args[0];
91
- rh = op->args[1];
92
- tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
93
- tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
94
- continue;
95
- }
96
- break;
97
98
default:
99
break;
100
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
101
CASE_OP_32_64_VEC(add):
102
done = fold_add(&ctx, op);
103
break;
104
+ case INDEX_op_add2_i32:
105
+ done = fold_add2_i32(&ctx, op);
106
+ break;
107
CASE_OP_32_64_VEC(and):
108
done = fold_and(&ctx, op);
109
break;
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
CASE_OP_32_64_VEC(sub):
112
done = fold_sub(&ctx, op);
113
break;
114
+ case INDEX_op_sub2_i32:
115
+ done = fold_sub2_i32(&ctx, op);
116
+ break;
117
CASE_OP_32_64_VEC(xor):
118
done = fold_xor(&ctx, op);
119
break;
120
--
26
--
121
2.25.1
27
2.43.0
122
28
123
29
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
The br, mb, goto_tb and exit_tb opcodes do not have
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
register operands, only constants, flags, or labels.
3
Remove the special case in opcode_args_ct by including
4
TCG_OPF_NOT_PRESENT in the flags for these opcodes.
5
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
8
---
5
tcg/optimize.c | 37 +++++++++++++++++++++----------------
9
include/tcg/tcg-opc.h | 8 ++++----
6
1 file changed, 21 insertions(+), 16 deletions(-)
10
tcg/tcg.c | 3 ---
11
2 files changed, 4 insertions(+), 7 deletions(-)
7
12
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
9
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
15
--- a/include/tcg/tcg-opc.h
11
+++ b/tcg/optimize.c
16
+++ b/include/tcg/tcg-opc.h
12
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
17
@@ -XXX,XX +XXX,XX @@ DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
13
return fold_const2(ctx, op);
18
/* variable number of parameters */
14
}
19
DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT)
15
20
16
+static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
21
-DEF(br, 0, 0, 1, TCG_OPF_BB_END)
17
+{
22
+DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
18
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
23
19
+ uint32_t a = arg_info(op->args[2])->val;
24
#define IMPL(X) (__builtin_constant_p(X) && (X) <= 0 ? TCG_OPF_NOT_PRESENT : 0)
20
+ uint32_t b = arg_info(op->args[3])->val;
25
#if TCG_TARGET_REG_BITS == 32
21
+ uint64_t r = (uint64_t)a * b;
26
@@ -XXX,XX +XXX,XX @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END)
22
+ TCGArg rl, rh;
27
# define IMPL64 TCG_OPF_64BIT
23
+ TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
28
#endif
24
+
29
25
+ rl = op->args[0];
30
-DEF(mb, 0, 0, 1, 0)
26
+ rh = op->args[1];
31
+DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
27
+ tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
32
28
+ tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
33
DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT)
29
+ return true;
34
DEF(setcond_i32, 1, 2, 1, 0)
30
+ }
35
@@ -XXX,XX +XXX,XX @@ DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
31
+ return false;
36
/* There are tcg_ctx->insn_start_words here, not just one. */
32
+}
37
DEF(insn_start, 0, 0, DATA64_ARGS, TCG_OPF_NOT_PRESENT)
33
+
38
34
static bool fold_nand(OptContext *ctx, TCGOp *op)
39
-DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
35
{
40
-DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
36
return fold_const2(ctx, op);
41
+DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
37
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
42
+DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
38
}
43
DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
39
break;
44
40
45
DEF(plugin_cb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
41
- case INDEX_op_mulu2_i32:
46
diff --git a/tcg/tcg.c b/tcg/tcg.c
42
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
47
index XXXXXXX..XXXXXXX 100644
43
- uint32_t a = arg_info(op->args[2])->val;
48
--- a/tcg/tcg.c
44
- uint32_t b = arg_info(op->args[3])->val;
49
+++ b/tcg/tcg.c
45
- uint64_t r = (uint64_t)a * b;
50
@@ -XXX,XX +XXX,XX @@ static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
46
- TCGArg rl, rh;
51
assert(tcg_op_supported(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op)));
47
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
52
#endif
48
-
53
49
- rl = op->args[0];
54
- if (def->nb_iargs + def->nb_oargs == 0) {
50
- rh = op->args[1];
55
- return NULL;
51
- tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
56
- }
52
- tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
57
if (def->flags & TCG_OPF_NOT_PRESENT) {
53
- continue;
58
return empty_cts;
54
- }
59
}
55
- break;
56
-
57
default:
58
break;
59
60
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
61
CASE_OP_32_64(muluh):
62
done = fold_mul_highpart(&ctx, op);
63
break;
64
+ case INDEX_op_mulu2_i32:
65
+ done = fold_mulu2_i32(&ctx, op);
66
+ break;
67
CASE_OP_32_64(nand):
68
done = fold_nand(&ctx, op);
69
break;
70
--
60
--
71
2.25.1
61
2.43.0
72
62
73
63
diff view generated by jsdifflib
1
Split out the conditional conversion from a more complex logical
1
Allow the backend to make constraint choices based on more parameters.
2
operation to a simple NOT. Create a couple more helpers to make
3
this easy for the outer-most logical operations.
4
2
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
tcg/optimize.c | 158 +++++++++++++++++++++++++++----------------------
6
tcg/tcg.c | 4 ++--
9
1 file changed, 86 insertions(+), 72 deletions(-)
7
tcg/aarch64/tcg-target.c.inc | 3 ++-
8
tcg/arm/tcg-target.c.inc | 3 ++-
9
tcg/i386/tcg-target.c.inc | 3 ++-
10
tcg/loongarch64/tcg-target.c.inc | 3 ++-
11
tcg/mips/tcg-target.c.inc | 3 ++-
12
tcg/ppc/tcg-target.c.inc | 3 ++-
13
tcg/riscv/tcg-target.c.inc | 3 ++-
14
tcg/s390x/tcg-target.c.inc | 3 ++-
15
tcg/sparc64/tcg-target.c.inc | 3 ++-
16
tcg/tci/tcg-target.c.inc | 3 ++-
17
11 files changed, 22 insertions(+), 12 deletions(-)
10
18
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
19
diff --git a/tcg/tcg.c b/tcg/tcg.c
12
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
21
--- a/tcg/tcg.c
14
+++ b/tcg/optimize.c
22
+++ b/tcg/tcg.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
23
@@ -XXX,XX +XXX,XX @@ typedef enum {
16
return false;
24
#include "tcg-target-con-set.h"
25
} TCGConstraintSetIndex;
26
27
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
28
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
29
30
#undef C_O0_I1
31
#undef C_O0_I2
32
@@ -XXX,XX +XXX,XX @@ static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
33
return empty_cts;
34
}
35
36
- con_set = tcg_target_op_def(op->opc);
37
+ con_set = tcg_target_op_def(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op));
38
tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
39
40
/* The constraint arguments must match TCGOpcode arguments. */
41
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
42
index XXXXXXX..XXXXXXX 100644
43
--- a/tcg/aarch64/tcg-target.c.inc
44
+++ b/tcg/aarch64/tcg-target.c.inc
45
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
46
}
17
}
47
}
18
48
19
+/*
49
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
20
+ * Convert @op to NOT, if NOT is supported by the host.
50
+static TCGConstraintSetIndex
21
+ * Return true f the conversion is successful, which will still
51
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
22
+ * indicate that the processing is complete.
23
+ */
24
+static bool fold_not(OptContext *ctx, TCGOp *op);
25
+static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
26
+{
27
+ TCGOpcode not_op;
28
+ bool have_not;
29
+
30
+ switch (ctx->type) {
31
+ case TCG_TYPE_I32:
32
+ not_op = INDEX_op_not_i32;
33
+ have_not = TCG_TARGET_HAS_not_i32;
34
+ break;
35
+ case TCG_TYPE_I64:
36
+ not_op = INDEX_op_not_i64;
37
+ have_not = TCG_TARGET_HAS_not_i64;
38
+ break;
39
+ case TCG_TYPE_V64:
40
+ case TCG_TYPE_V128:
41
+ case TCG_TYPE_V256:
42
+ not_op = INDEX_op_not_vec;
43
+ have_not = TCG_TARGET_HAS_not_vec;
44
+ break;
45
+ default:
46
+ g_assert_not_reached();
47
+ }
48
+ if (have_not) {
49
+ op->opc = not_op;
50
+ op->args[1] = op->args[idx];
51
+ return fold_not(ctx, op);
52
+ }
53
+ return false;
54
+}
55
+
56
+/* If the binary operation has first argument @i, fold to NOT. */
57
+static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
58
+{
59
+ if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
60
+ return fold_to_not(ctx, op, 2);
61
+ }
62
+ return false;
63
+}
64
+
65
/* If the binary operation has second argument @i, fold to @i. */
66
static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
67
{
52
{
68
@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
53
switch (op) {
69
return false;
54
case INDEX_op_goto_ptr:
55
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
56
index XXXXXXX..XXXXXXX 100644
57
--- a/tcg/arm/tcg-target.c.inc
58
+++ b/tcg/arm/tcg-target.c.inc
59
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
60
}
70
}
61
}
71
62
72
+/* If the binary operation has second argument @i, fold to NOT. */
63
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
73
+static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
64
+static TCGConstraintSetIndex
74
+{
65
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
75
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
76
+ return fold_to_not(ctx, op, 1);
77
+ }
78
+ return false;
79
+}
80
+
81
/* If the binary operation has both arguments equal, fold to @i. */
82
static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
83
{
66
{
84
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
67
switch (op) {
85
static bool fold_andc(OptContext *ctx, TCGOp *op)
68
case INDEX_op_goto_ptr:
69
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
70
index XXXXXXX..XXXXXXX 100644
71
--- a/tcg/i386/tcg-target.c.inc
72
+++ b/tcg/i386/tcg-target.c.inc
73
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
74
}
75
}
76
77
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
78
+static TCGConstraintSetIndex
79
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
86
{
80
{
87
if (fold_const2(ctx, op) ||
81
switch (op) {
88
- fold_xx_to_i(ctx, op, 0)) {
82
case INDEX_op_goto_ptr:
89
+ fold_xx_to_i(ctx, op, 0) ||
83
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
90
+ fold_ix_to_not(ctx, op, -1)) {
84
index XXXXXXX..XXXXXXX 100644
91
return true;
85
--- a/tcg/loongarch64/tcg-target.c.inc
86
+++ b/tcg/loongarch64/tcg-target.c.inc
87
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
88
g_assert_not_reached();
89
}
90
91
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
92
+static TCGConstraintSetIndex
93
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
94
{
95
switch (op) {
96
case INDEX_op_goto_ptr:
97
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
98
index XXXXXXX..XXXXXXX 100644
99
--- a/tcg/mips/tcg-target.c.inc
100
+++ b/tcg/mips/tcg-target.c.inc
101
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
92
}
102
}
93
return false;
103
}
94
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
104
95
105
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
96
static bool fold_eqv(OptContext *ctx, TCGOp *op)
106
+static TCGConstraintSetIndex
107
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
97
{
108
{
98
- return fold_const2(ctx, op);
109
switch (op) {
99
+ if (fold_const2(ctx, op) ||
110
case INDEX_op_goto_ptr:
100
+ fold_xi_to_not(ctx, op, 0)) {
111
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
101
+ return true;
112
index XXXXXXX..XXXXXXX 100644
102
+ }
113
--- a/tcg/ppc/tcg-target.c.inc
103
+ return false;
114
+++ b/tcg/ppc/tcg-target.c.inc
115
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
116
va_end(va);
104
}
117
}
105
118
106
static bool fold_extract(OptContext *ctx, TCGOp *op)
119
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
107
@@ -XXX,XX +XXX,XX @@ static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
120
+static TCGConstraintSetIndex
108
121
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
109
static bool fold_nand(OptContext *ctx, TCGOp *op)
110
{
122
{
111
- return fold_const2(ctx, op);
123
switch (op) {
112
+ if (fold_const2(ctx, op) ||
124
case INDEX_op_goto_ptr:
113
+ fold_xi_to_not(ctx, op, -1)) {
125
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
114
+ return true;
126
index XXXXXXX..XXXXXXX 100644
115
+ }
127
--- a/tcg/riscv/tcg-target.c.inc
116
+ return false;
128
+++ b/tcg/riscv/tcg-target.c.inc
129
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
130
}
117
}
131
}
118
132
119
static bool fold_neg(OptContext *ctx, TCGOp *op)
133
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
120
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
134
+static TCGConstraintSetIndex
121
135
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
122
static bool fold_nor(OptContext *ctx, TCGOp *op)
123
{
136
{
124
- return fold_const2(ctx, op);
137
switch (op) {
125
+ if (fold_const2(ctx, op) ||
138
case INDEX_op_goto_ptr:
126
+ fold_xi_to_not(ctx, op, 0)) {
139
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
127
+ return true;
140
index XXXXXXX..XXXXXXX 100644
128
+ }
141
--- a/tcg/s390x/tcg-target.c.inc
129
+ return false;
142
+++ b/tcg/s390x/tcg-target.c.inc
143
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
144
va_end(va);
130
}
145
}
131
146
132
static bool fold_not(OptContext *ctx, TCGOp *op)
147
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
148
+static TCGConstraintSetIndex
149
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
133
{
150
{
134
- return fold_const1(ctx, op);
151
switch (op) {
135
+ if (fold_const1(ctx, op)) {
152
case INDEX_op_goto_ptr:
136
+ return true;
153
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
137
+ }
154
index XXXXXXX..XXXXXXX 100644
138
+
155
--- a/tcg/sparc64/tcg-target.c.inc
139
+ /* Because of fold_to_not, we want to always return true, via finish. */
156
+++ b/tcg/sparc64/tcg-target.c.inc
140
+ finish_folding(ctx, op);
157
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
141
+ return true;
158
}
142
}
159
}
143
160
144
static bool fold_or(OptContext *ctx, TCGOp *op)
161
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
145
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
162
+static TCGConstraintSetIndex
146
163
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
147
static bool fold_orc(OptContext *ctx, TCGOp *op)
148
{
164
{
149
- return fold_const2(ctx, op);
165
switch (op) {
150
+ if (fold_const2(ctx, op) ||
166
case INDEX_op_goto_ptr:
151
+ fold_ix_to_not(ctx, op, 0)) {
167
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
152
+ return true;
168
index XXXXXXX..XXXXXXX 100644
153
+ }
169
--- a/tcg/tci/tcg-target.c.inc
154
+ return false;
170
+++ b/tcg/tci/tcg-target.c.inc
155
}
171
@@ -XXX,XX +XXX,XX @@
156
172
#endif
157
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
173
#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
158
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
174
159
static bool fold_xor(OptContext *ctx, TCGOp *op)
175
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
176
+static TCGConstraintSetIndex
177
+tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
160
{
178
{
161
if (fold_const2(ctx, op) ||
179
switch (op) {
162
- fold_xx_to_i(ctx, op, 0)) {
180
case INDEX_op_goto_ptr:
163
+ fold_xx_to_i(ctx, op, 0) ||
164
+ fold_xi_to_not(ctx, op, -1)) {
165
return true;
166
}
167
return false;
168
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
169
}
170
}
171
break;
172
- CASE_OP_32_64_VEC(xor):
173
- CASE_OP_32_64(nand):
174
- if (!arg_is_const(op->args[1])
175
- && arg_is_const(op->args[2])
176
- && arg_info(op->args[2])->val == -1) {
177
- i = 1;
178
- goto try_not;
179
- }
180
- break;
181
- CASE_OP_32_64(nor):
182
- if (!arg_is_const(op->args[1])
183
- && arg_is_const(op->args[2])
184
- && arg_info(op->args[2])->val == 0) {
185
- i = 1;
186
- goto try_not;
187
- }
188
- break;
189
- CASE_OP_32_64_VEC(andc):
190
- if (!arg_is_const(op->args[2])
191
- && arg_is_const(op->args[1])
192
- && arg_info(op->args[1])->val == -1) {
193
- i = 2;
194
- goto try_not;
195
- }
196
- break;
197
- CASE_OP_32_64_VEC(orc):
198
- CASE_OP_32_64(eqv):
199
- if (!arg_is_const(op->args[2])
200
- && arg_is_const(op->args[1])
201
- && arg_info(op->args[1])->val == 0) {
202
- i = 2;
203
- goto try_not;
204
- }
205
- break;
206
- try_not:
207
- {
208
- TCGOpcode not_op;
209
- bool have_not;
210
-
211
- switch (ctx.type) {
212
- case TCG_TYPE_I32:
213
- not_op = INDEX_op_not_i32;
214
- have_not = TCG_TARGET_HAS_not_i32;
215
- break;
216
- case TCG_TYPE_I64:
217
- not_op = INDEX_op_not_i64;
218
- have_not = TCG_TARGET_HAS_not_i64;
219
- break;
220
- case TCG_TYPE_V64:
221
- case TCG_TYPE_V128:
222
- case TCG_TYPE_V256:
223
- not_op = INDEX_op_not_vec;
224
- have_not = TCG_TARGET_HAS_not_vec;
225
- break;
226
- default:
227
- g_assert_not_reached();
228
- }
229
- if (!have_not) {
230
- break;
231
- }
232
- op->opc = not_op;
233
- reset_temp(op->args[0]);
234
- op->args[1] = op->args[i];
235
- continue;
236
- }
237
default:
238
break;
239
}
240
--
181
--
241
2.25.1
182
2.43.0
242
183
243
184
diff view generated by jsdifflib
New patch
1
1
Pass TCGOp.type to the output function.
2
For aarch64 and tci, use this instead of testing TCG_OPF_64BIT.
3
For s390x, use this instead of testing INDEX_op_deposit_i64.
4
For i386, use this to initialize rexw.
5
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/tcg.c | 4 ++--
10
tcg/aarch64/tcg-target.c.inc | 6 +-----
11
tcg/arm/tcg-target.c.inc | 2 +-
12
tcg/i386/tcg-target.c.inc | 10 +++++-----
13
tcg/loongarch64/tcg-target.c.inc | 2 +-
14
tcg/mips/tcg-target.c.inc | 2 +-
15
tcg/ppc/tcg-target.c.inc | 2 +-
16
tcg/riscv/tcg-target.c.inc | 2 +-
17
tcg/s390x/tcg-target.c.inc | 7 +++----
18
tcg/sparc64/tcg-target.c.inc | 2 +-
19
tcg/tci/tcg-target.c.inc | 4 ++--
20
11 files changed, 19 insertions(+), 24 deletions(-)
21
22
diff --git a/tcg/tcg.c b/tcg/tcg.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/tcg/tcg.c
25
+++ b/tcg/tcg.c
26
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
27
static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
28
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
29
static void tcg_out_goto_tb(TCGContext *s, int which);
30
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
31
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
32
const TCGArg args[TCG_MAX_OP_ARGS],
33
const int const_args[TCG_MAX_OP_ARGS]);
34
#if TCG_TARGET_MAYBE_vec
35
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
36
tcg_out_vec_op(s, op->opc, TCGOP_TYPE(op) - TCG_TYPE_V64,
37
TCGOP_VECE(op), new_args, const_args);
38
} else {
39
- tcg_out_op(s, op->opc, new_args, const_args);
40
+ tcg_out_op(s, op->opc, TCGOP_TYPE(op), new_args, const_args);
41
}
42
break;
43
}
44
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
45
index XXXXXXX..XXXXXXX 100644
46
--- a/tcg/aarch64/tcg-target.c.inc
47
+++ b/tcg/aarch64/tcg-target.c.inc
48
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
49
flush_idcache_range(jmp_rx, jmp_rw, 4);
50
}
51
52
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
53
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
54
const TCGArg args[TCG_MAX_OP_ARGS],
55
const int const_args[TCG_MAX_OP_ARGS])
56
{
57
- /* 99% of the time, we can signal the use of extension registers
58
- by looking to see if the opcode handles 64-bit data. */
59
- TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
60
-
61
/* Hoist the loads of the most common arguments. */
62
TCGArg a0 = args[0];
63
TCGArg a1 = args[1];
64
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
65
index XXXXXXX..XXXXXXX 100644
66
--- a/tcg/arm/tcg-target.c.inc
67
+++ b/tcg/arm/tcg-target.c.inc
68
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
69
flush_idcache_range(jmp_rx, jmp_rw, 4);
70
}
71
72
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
73
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
74
const TCGArg args[TCG_MAX_OP_ARGS],
75
const int const_args[TCG_MAX_OP_ARGS])
76
{
77
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
78
index XXXXXXX..XXXXXXX 100644
79
--- a/tcg/i386/tcg-target.c.inc
80
+++ b/tcg/i386/tcg-target.c.inc
81
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
82
/* no need to flush icache explicitly */
83
}
84
85
-static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
86
- const TCGArg args[TCG_MAX_OP_ARGS],
87
- const int const_args[TCG_MAX_OP_ARGS])
88
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
89
+ const TCGArg args[TCG_MAX_OP_ARGS],
90
+ const int const_args[TCG_MAX_OP_ARGS])
91
{
92
TCGArg a0, a1, a2;
93
- int c, const_a2, vexop, rexw = 0;
94
+ int c, const_a2, vexop, rexw;
95
96
#if TCG_TARGET_REG_BITS == 64
97
# define OP_32_64(x) \
98
case glue(glue(INDEX_op_, x), _i64): \
99
- rexw = P_REXW; /* FALLTHRU */ \
100
case glue(glue(INDEX_op_, x), _i32)
101
#else
102
# define OP_32_64(x) \
103
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
104
a1 = args[1];
105
a2 = args[2];
106
const_a2 = const_args[2];
107
+ rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
108
109
switch (opc) {
110
case INDEX_op_goto_ptr:
111
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
112
index XXXXXXX..XXXXXXX 100644
113
--- a/tcg/loongarch64/tcg-target.c.inc
114
+++ b/tcg/loongarch64/tcg-target.c.inc
115
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
116
flush_idcache_range(jmp_rx, jmp_rw, 4);
117
}
118
119
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
120
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
121
const TCGArg args[TCG_MAX_OP_ARGS],
122
const int const_args[TCG_MAX_OP_ARGS])
123
{
124
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
125
index XXXXXXX..XXXXXXX 100644
126
--- a/tcg/mips/tcg-target.c.inc
127
+++ b/tcg/mips/tcg-target.c.inc
128
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
129
/* Always indirect, nothing to do */
130
}
131
132
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
133
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
134
const TCGArg args[TCG_MAX_OP_ARGS],
135
const int const_args[TCG_MAX_OP_ARGS])
136
{
137
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
138
index XXXXXXX..XXXXXXX 100644
139
--- a/tcg/ppc/tcg-target.c.inc
140
+++ b/tcg/ppc/tcg-target.c.inc
141
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
142
flush_idcache_range(jmp_rx, jmp_rw, 4);
143
}
144
145
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
146
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
147
const TCGArg args[TCG_MAX_OP_ARGS],
148
const int const_args[TCG_MAX_OP_ARGS])
149
{
150
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
151
index XXXXXXX..XXXXXXX 100644
152
--- a/tcg/riscv/tcg-target.c.inc
153
+++ b/tcg/riscv/tcg-target.c.inc
154
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
155
flush_idcache_range(jmp_rx, jmp_rw, 4);
156
}
157
158
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
159
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
160
const TCGArg args[TCG_MAX_OP_ARGS],
161
const int const_args[TCG_MAX_OP_ARGS])
162
{
163
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
164
index XXXXXXX..XXXXXXX 100644
165
--- a/tcg/s390x/tcg-target.c.inc
166
+++ b/tcg/s390x/tcg-target.c.inc
167
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
168
case glue(glue(INDEX_op_,x),_i32): \
169
case glue(glue(INDEX_op_,x),_i64)
170
171
-static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
172
- const TCGArg args[TCG_MAX_OP_ARGS],
173
- const int const_args[TCG_MAX_OP_ARGS])
174
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
175
+ const TCGArg args[TCG_MAX_OP_ARGS],
176
+ const int const_args[TCG_MAX_OP_ARGS])
177
{
178
S390Opcode op, op2;
179
TCGArg a0, a1, a2;
180
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
181
/* Since we can't support "0Z" as a constraint, we allow a1 in
182
any register. Fix things up as if a matching constraint. */
183
if (a0 != a1) {
184
- TCGType type = (opc == INDEX_op_deposit_i64);
185
if (a0 == a2) {
186
tcg_out_mov(s, type, TCG_TMP0, a2);
187
a2 = TCG_TMP0;
188
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
189
index XXXXXXX..XXXXXXX 100644
190
--- a/tcg/sparc64/tcg-target.c.inc
191
+++ b/tcg/sparc64/tcg-target.c.inc
192
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
193
{
194
}
195
196
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
197
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
198
const TCGArg args[TCG_MAX_OP_ARGS],
199
const int const_args[TCG_MAX_OP_ARGS])
200
{
201
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
202
index XXXXXXX..XXXXXXX 100644
203
--- a/tcg/tci/tcg-target.c.inc
204
+++ b/tcg/tci/tcg-target.c.inc
205
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
206
/* Always indirect, nothing to do */
207
}
208
209
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
210
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
211
const TCGArg args[TCG_MAX_OP_ARGS],
212
const int const_args[TCG_MAX_OP_ARGS])
213
{
214
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
215
CASE_32_64(sextract) /* Optional (TCG_TARGET_HAS_sextract_*). */
216
{
217
TCGArg pos = args[2], len = args[3];
218
- TCGArg max = tcg_op_defs[opc].flags & TCG_OPF_64BIT ? 64 : 32;
219
+ TCGArg max = type == TCG_TYPE_I32 ? 32 : 64;
220
221
tcg_debug_assert(pos < max);
222
tcg_debug_assert(pos + len <= max);
223
--
224
2.43.0
225
226
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
This flag is no longer used.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
---
5
tcg/optimize.c | 23 ++++++++++++++---------
6
include/tcg/tcg-opc.h | 22 +++++++++++-----------
6
1 file changed, 14 insertions(+), 9 deletions(-)
7
include/tcg/tcg.h | 2 --
8
2 files changed, 11 insertions(+), 13 deletions(-)
7
9
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
9
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
12
--- a/include/tcg/tcg-opc.h
11
+++ b/tcg/optimize.c
13
+++ b/include/tcg/tcg-opc.h
12
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
13
return fold_const2(ctx, op);
15
14
}
16
#define IMPL(X) (__builtin_constant_p(X) && (X) <= 0 ? TCG_OPF_NOT_PRESENT : 0)
15
17
#if TCG_TARGET_REG_BITS == 32
16
+static bool fold_setcond(OptContext *ctx, TCGOp *op)
18
-# define IMPL64 TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT
17
+{
19
+# define IMPL64 TCG_OPF_NOT_PRESENT
18
+ TCGCond cond = op->args[3];
20
#else
19
+ int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
21
-# define IMPL64 TCG_OPF_64BIT
20
+
22
+# define IMPL64 0
21
+ if (i >= 0) {
23
#endif
22
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
24
23
+ }
25
DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
24
+ return false;
26
@@ -XXX,XX +XXX,XX @@ DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32))
25
+}
27
DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32))
26
+
28
DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32))
27
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
29
28
{
30
-DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
29
TCGCond cond = op->args[5];
31
+DEF(mov_i64, 1, 1, 0, TCG_OPF_NOT_PRESENT)
30
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
32
DEF(setcond_i64, 1, 2, 1, IMPL64)
31
}
33
DEF(negsetcond_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_negsetcond_i64))
32
break;
34
DEF(movcond_i64, 1, 4, 1, IMPL64)
33
35
@@ -XXX,XX +XXX,XX @@ DEF(qemu_ld_a32_i32, 1, 1, 1,
34
- CASE_OP_32_64(setcond):
36
DEF(qemu_st_a32_i32, 0, 1 + 1, 1,
35
- i = do_constant_folding_cond(opc, op->args[1],
37
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
36
- op->args[2], op->args[3]);
38
DEF(qemu_ld_a32_i64, DATA64_ARGS, 1, 1,
37
- if (i >= 0) {
39
- TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
38
- tcg_opt_gen_movi(&ctx, op, op->args[0], i);
40
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
39
- continue;
41
DEF(qemu_st_a32_i64, 0, DATA64_ARGS + 1, 1,
40
- }
42
- TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
41
- break;
43
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
42
-
44
43
CASE_OP_32_64(movcond):
45
DEF(qemu_ld_a64_i32, 1, DATA64_ARGS, 1,
44
i = do_constant_folding_cond(opc, op->args[1],
46
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
45
op->args[2], op->args[5]);
47
DEF(qemu_st_a64_i32, 0, 1 + DATA64_ARGS, 1,
46
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
48
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
47
CASE_OP_32_64(shr):
49
DEF(qemu_ld_a64_i64, DATA64_ARGS, DATA64_ARGS, 1,
48
done = fold_shift(&ctx, op);
50
- TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
49
break;
51
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
50
+ CASE_OP_32_64(setcond):
52
DEF(qemu_st_a64_i64, 0, DATA64_ARGS + DATA64_ARGS, 1,
51
+ done = fold_setcond(&ctx, op);
53
- TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
52
+ break;
54
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
53
case INDEX_op_setcond2_i32:
55
54
done = fold_setcond2(&ctx, op);
56
/* Only used by i386 to cope with stupid register constraints. */
55
break;
57
DEF(qemu_st8_a32_i32, 0, 1 + 1, 1,
58
@@ -XXX,XX +XXX,XX @@ DEF(qemu_st8_a64_i32, 0, 1 + DATA64_ARGS, 1,
59
60
/* Only for 64-bit hosts at the moment. */
61
DEF(qemu_ld_a32_i128, 2, 1, 1,
62
- TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
63
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
64
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
65
DEF(qemu_ld_a64_i128, 2, 1, 1,
66
- TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
67
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
68
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
69
DEF(qemu_st_a32_i128, 0, 3, 1,
70
- TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
71
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
72
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
73
DEF(qemu_st_a64_i128, 0, 3, 1,
74
- TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
75
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
76
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
77
78
/* Host vector support. */
79
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
80
index XXXXXXX..XXXXXXX 100644
81
--- a/include/tcg/tcg.h
82
+++ b/include/tcg/tcg.h
83
@@ -XXX,XX +XXX,XX @@ enum {
84
/* Instruction has side effects: it cannot be removed if its outputs
85
are not used, and might trigger exceptions. */
86
TCG_OPF_SIDE_EFFECTS = 0x08,
87
- /* Instruction operands are 64-bits (otherwise 32-bits). */
88
- TCG_OPF_64BIT = 0x10,
89
/* Instruction is optional and not implemented by the host, or insn
90
is generic and should not be implemented by the host. */
91
TCG_OPF_NOT_PRESENT = 0x20,
56
--
92
--
57
2.25.1
93
2.43.0
58
94
59
95
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Now that we use a functional interface to query whether the opcode
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
is supported, we can drop the TCG_OPF_NOT_PRESENT bit mapping from
3
TCG_TARGET_HAS_foo in tcg-opc.h
4
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
7
---
5
tcg/optimize.c | 33 +++++++++++++++++++--------------
8
include/tcg/tcg-opc.h | 306 +++++++++++++++++++-----------------------
6
1 file changed, 19 insertions(+), 14 deletions(-)
9
1 file changed, 141 insertions(+), 165 deletions(-)
7
10
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
9
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
13
--- a/include/tcg/tcg-opc.h
11
+++ b/tcg/optimize.c
14
+++ b/include/tcg/tcg-opc.h
12
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT)
13
return fold_const2(ctx, op);
16
14
}
17
DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
15
18
16
+static bool fold_brcond(OptContext *ctx, TCGOp *op)
19
-#define IMPL(X) (__builtin_constant_p(X) && (X) <= 0 ? TCG_OPF_NOT_PRESENT : 0)
17
+{
20
-#if TCG_TARGET_REG_BITS == 32
18
+ TCGCond cond = op->args[2];
21
-# define IMPL64 TCG_OPF_NOT_PRESENT
19
+ int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
22
-#else
20
+
23
-# define IMPL64 0
21
+ if (i == 0) {
24
-#endif
22
+ tcg_op_remove(ctx->tcg, op);
23
+ return true;
24
+ }
25
+ if (i > 0) {
26
+ op->opc = INDEX_op_br;
27
+ op->args[0] = op->args[3];
28
+ }
29
+ return false;
30
+}
31
+
32
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
33
{
34
TCGCond cond = op->args[4];
35
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
36
}
37
break;
38
39
- CASE_OP_32_64(brcond):
40
- i = do_constant_folding_cond(opc, op->args[0],
41
- op->args[1], op->args[2]);
42
- if (i == 0) {
43
- tcg_op_remove(s, op);
44
- continue;
45
- } else if (i > 0) {
46
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
47
- op->opc = opc = INDEX_op_br;
48
- op->args[0] = op->args[3];
49
- break;
50
- }
51
- break;
52
-
25
-
53
CASE_OP_32_64(movcond):
26
DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
54
i = do_constant_folding_cond(opc, op->args[1],
27
55
op->args[2], op->args[5]);
28
DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT)
56
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
29
DEF(setcond_i32, 1, 2, 1, 0)
57
CASE_OP_32_64_VEC(andc):
30
-DEF(negsetcond_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_negsetcond_i32))
58
done = fold_andc(&ctx, op);
31
+DEF(negsetcond_i32, 1, 2, 1, 0)
59
break;
32
DEF(movcond_i32, 1, 4, 1, 0)
60
+ CASE_OP_32_64(brcond):
33
/* load/store */
61
+ done = fold_brcond(&ctx, op);
34
DEF(ld8u_i32, 1, 1, 1, 0)
62
+ break;
35
@@ -XXX,XX +XXX,XX @@ DEF(st_i32, 0, 2, 1, 0)
63
case INDEX_op_brcond2_i32:
36
DEF(add_i32, 1, 2, 0, 0)
64
done = fold_brcond2(&ctx, op);
37
DEF(sub_i32, 1, 2, 0, 0)
65
break;
38
DEF(mul_i32, 1, 2, 0, 0)
39
-DEF(div_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32))
40
-DEF(divu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32))
41
-DEF(rem_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32))
42
-DEF(remu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32))
43
-DEF(div2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32))
44
-DEF(divu2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32))
45
+DEF(div_i32, 1, 2, 0, 0)
46
+DEF(divu_i32, 1, 2, 0, 0)
47
+DEF(rem_i32, 1, 2, 0, 0)
48
+DEF(remu_i32, 1, 2, 0, 0)
49
+DEF(div2_i32, 2, 3, 0, 0)
50
+DEF(divu2_i32, 2, 3, 0, 0)
51
DEF(and_i32, 1, 2, 0, 0)
52
DEF(or_i32, 1, 2, 0, 0)
53
DEF(xor_i32, 1, 2, 0, 0)
54
@@ -XXX,XX +XXX,XX @@ DEF(xor_i32, 1, 2, 0, 0)
55
DEF(shl_i32, 1, 2, 0, 0)
56
DEF(shr_i32, 1, 2, 0, 0)
57
DEF(sar_i32, 1, 2, 0, 0)
58
-DEF(rotl_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
59
-DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
60
-DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32))
61
-DEF(extract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_extract_i32))
62
-DEF(sextract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_sextract_i32))
63
-DEF(extract2_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_extract2_i32))
64
+DEF(rotl_i32, 1, 2, 0, 0)
65
+DEF(rotr_i32, 1, 2, 0, 0)
66
+DEF(deposit_i32, 1, 2, 2, 0)
67
+DEF(extract_i32, 1, 1, 2, 0)
68
+DEF(sextract_i32, 1, 1, 2, 0)
69
+DEF(extract2_i32, 1, 2, 1, 0)
70
71
DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH)
72
73
-DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32))
74
-DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32))
75
-DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32))
76
-DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32))
77
-DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32))
78
-DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32))
79
-DEF(brcond2_i32, 0, 4, 2,
80
- TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL(TCG_TARGET_REG_BITS == 32))
81
-DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
82
+DEF(add2_i32, 2, 4, 0, 0)
83
+DEF(sub2_i32, 2, 4, 0, 0)
84
+DEF(mulu2_i32, 2, 2, 0, 0)
85
+DEF(muls2_i32, 2, 2, 0, 0)
86
+DEF(muluh_i32, 1, 2, 0, 0)
87
+DEF(mulsh_i32, 1, 2, 0, 0)
88
+DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH)
89
+DEF(setcond2_i32, 1, 4, 1, 0)
90
91
-DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32))
92
-DEF(ext16s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16s_i32))
93
-DEF(ext8u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8u_i32))
94
-DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32))
95
-DEF(bswap16_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap16_i32))
96
-DEF(bswap32_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap32_i32))
97
-DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32))
98
+DEF(ext8s_i32, 1, 1, 0, 0)
99
+DEF(ext16s_i32, 1, 1, 0, 0)
100
+DEF(ext8u_i32, 1, 1, 0, 0)
101
+DEF(ext16u_i32, 1, 1, 0, 0)
102
+DEF(bswap16_i32, 1, 1, 1, 0)
103
+DEF(bswap32_i32, 1, 1, 1, 0)
104
+DEF(not_i32, 1, 1, 0, 0)
105
DEF(neg_i32, 1, 1, 0, 0)
106
-DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32))
107
-DEF(orc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_orc_i32))
108
-DEF(eqv_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_eqv_i32))
109
-DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32))
110
-DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32))
111
-DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32))
112
-DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32))
113
-DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32))
114
+DEF(andc_i32, 1, 2, 0, 0)
115
+DEF(orc_i32, 1, 2, 0, 0)
116
+DEF(eqv_i32, 1, 2, 0, 0)
117
+DEF(nand_i32, 1, 2, 0, 0)
118
+DEF(nor_i32, 1, 2, 0, 0)
119
+DEF(clz_i32, 1, 2, 0, 0)
120
+DEF(ctz_i32, 1, 2, 0, 0)
121
+DEF(ctpop_i32, 1, 1, 0, 0)
122
123
DEF(mov_i64, 1, 1, 0, TCG_OPF_NOT_PRESENT)
124
-DEF(setcond_i64, 1, 2, 1, IMPL64)
125
-DEF(negsetcond_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_negsetcond_i64))
126
-DEF(movcond_i64, 1, 4, 1, IMPL64)
127
+DEF(setcond_i64, 1, 2, 1, 0)
128
+DEF(negsetcond_i64, 1, 2, 1, 0)
129
+DEF(movcond_i64, 1, 4, 1, 0)
130
/* load/store */
131
-DEF(ld8u_i64, 1, 1, 1, IMPL64)
132
-DEF(ld8s_i64, 1, 1, 1, IMPL64)
133
-DEF(ld16u_i64, 1, 1, 1, IMPL64)
134
-DEF(ld16s_i64, 1, 1, 1, IMPL64)
135
-DEF(ld32u_i64, 1, 1, 1, IMPL64)
136
-DEF(ld32s_i64, 1, 1, 1, IMPL64)
137
-DEF(ld_i64, 1, 1, 1, IMPL64)
138
-DEF(st8_i64, 0, 2, 1, IMPL64)
139
-DEF(st16_i64, 0, 2, 1, IMPL64)
140
-DEF(st32_i64, 0, 2, 1, IMPL64)
141
-DEF(st_i64, 0, 2, 1, IMPL64)
142
+DEF(ld8u_i64, 1, 1, 1, 0)
143
+DEF(ld8s_i64, 1, 1, 1, 0)
144
+DEF(ld16u_i64, 1, 1, 1, 0)
145
+DEF(ld16s_i64, 1, 1, 1, 0)
146
+DEF(ld32u_i64, 1, 1, 1, 0)
147
+DEF(ld32s_i64, 1, 1, 1, 0)
148
+DEF(ld_i64, 1, 1, 1, 0)
149
+DEF(st8_i64, 0, 2, 1, 0)
150
+DEF(st16_i64, 0, 2, 1, 0)
151
+DEF(st32_i64, 0, 2, 1, 0)
152
+DEF(st_i64, 0, 2, 1, 0)
153
/* arith */
154
-DEF(add_i64, 1, 2, 0, IMPL64)
155
-DEF(sub_i64, 1, 2, 0, IMPL64)
156
-DEF(mul_i64, 1, 2, 0, IMPL64)
157
-DEF(div_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64))
158
-DEF(divu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64))
159
-DEF(rem_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64))
160
-DEF(remu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64))
161
-DEF(div2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64))
162
-DEF(divu2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64))
163
-DEF(and_i64, 1, 2, 0, IMPL64)
164
-DEF(or_i64, 1, 2, 0, IMPL64)
165
-DEF(xor_i64, 1, 2, 0, IMPL64)
166
+DEF(add_i64, 1, 2, 0, 0)
167
+DEF(sub_i64, 1, 2, 0, 0)
168
+DEF(mul_i64, 1, 2, 0, 0)
169
+DEF(div_i64, 1, 2, 0, 0)
170
+DEF(divu_i64, 1, 2, 0, 0)
171
+DEF(rem_i64, 1, 2, 0, 0)
172
+DEF(remu_i64, 1, 2, 0, 0)
173
+DEF(div2_i64, 2, 3, 0, 0)
174
+DEF(divu2_i64, 2, 3, 0, 0)
175
+DEF(and_i64, 1, 2, 0, 0)
176
+DEF(or_i64, 1, 2, 0, 0)
177
+DEF(xor_i64, 1, 2, 0, 0)
178
/* shifts/rotates */
179
-DEF(shl_i64, 1, 2, 0, IMPL64)
180
-DEF(shr_i64, 1, 2, 0, IMPL64)
181
-DEF(sar_i64, 1, 2, 0, IMPL64)
182
-DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
183
-DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
184
-DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64))
185
-DEF(extract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_extract_i64))
186
-DEF(sextract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_sextract_i64))
187
-DEF(extract2_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_extract2_i64))
188
+DEF(shl_i64, 1, 2, 0, 0)
189
+DEF(shr_i64, 1, 2, 0, 0)
190
+DEF(sar_i64, 1, 2, 0, 0)
191
+DEF(rotl_i64, 1, 2, 0, 0)
192
+DEF(rotr_i64, 1, 2, 0, 0)
193
+DEF(deposit_i64, 1, 2, 2, 0)
194
+DEF(extract_i64, 1, 1, 2, 0)
195
+DEF(sextract_i64, 1, 1, 2, 0)
196
+DEF(extract2_i64, 1, 2, 1, 0)
197
198
/* size changing ops */
199
-DEF(ext_i32_i64, 1, 1, 0, IMPL64)
200
-DEF(extu_i32_i64, 1, 1, 0, IMPL64)
201
-DEF(extrl_i64_i32, 1, 1, 0,
202
- IMPL(TCG_TARGET_HAS_extr_i64_i32)
203
- | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0))
204
-DEF(extrh_i64_i32, 1, 1, 0,
205
- IMPL(TCG_TARGET_HAS_extr_i64_i32)
206
- | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0))
207
+DEF(ext_i32_i64, 1, 1, 0, 0)
208
+DEF(extu_i32_i64, 1, 1, 0, 0)
209
+DEF(extrl_i64_i32, 1, 1, 0, 0)
210
+DEF(extrh_i64_i32, 1, 1, 0, 0)
211
212
-DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL64)
213
-DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64))
214
-DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64))
215
-DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64))
216
-DEF(ext8u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8u_i64))
217
-DEF(ext16u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16u_i64))
218
-DEF(ext32u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32u_i64))
219
-DEF(bswap16_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64))
220
-DEF(bswap32_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64))
221
-DEF(bswap64_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64))
222
-DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64))
223
-DEF(neg_i64, 1, 1, 0, IMPL64)
224
-DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64))
225
-DEF(orc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_orc_i64))
226
-DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64))
227
-DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64))
228
-DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64))
229
-DEF(clz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_clz_i64))
230
-DEF(ctz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctz_i64))
231
-DEF(ctpop_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctpop_i64))
232
+DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH)
233
+DEF(ext8s_i64, 1, 1, 0, 0)
234
+DEF(ext16s_i64, 1, 1, 0, 0)
235
+DEF(ext32s_i64, 1, 1, 0, 0)
236
+DEF(ext8u_i64, 1, 1, 0, 0)
237
+DEF(ext16u_i64, 1, 1, 0, 0)
238
+DEF(ext32u_i64, 1, 1, 0, 0)
239
+DEF(bswap16_i64, 1, 1, 1, 0)
240
+DEF(bswap32_i64, 1, 1, 1, 0)
241
+DEF(bswap64_i64, 1, 1, 1, 0)
242
+DEF(not_i64, 1, 1, 0, 0)
243
+DEF(neg_i64, 1, 1, 0, 0)
244
+DEF(andc_i64, 1, 2, 0, 0)
245
+DEF(orc_i64, 1, 2, 0, 0)
246
+DEF(eqv_i64, 1, 2, 0, 0)
247
+DEF(nand_i64, 1, 2, 0, 0)
248
+DEF(nor_i64, 1, 2, 0, 0)
249
+DEF(clz_i64, 1, 2, 0, 0)
250
+DEF(ctz_i64, 1, 2, 0, 0)
251
+DEF(ctpop_i64, 1, 1, 0, 0)
252
253
-DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64))
254
-DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
255
-DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64))
256
-DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
257
-DEF(muluh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muluh_i64))
258
-DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
259
+DEF(add2_i64, 2, 4, 0, 0)
260
+DEF(sub2_i64, 2, 4, 0, 0)
261
+DEF(mulu2_i64, 2, 2, 0, 0)
262
+DEF(muls2_i64, 2, 2, 0, 0)
263
+DEF(muluh_i64, 1, 2, 0, 0)
264
+DEF(mulsh_i64, 1, 2, 0, 0)
265
266
#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
267
268
@@ -XXX,XX +XXX,XX @@ DEF(qemu_st_a64_i64, 0, DATA64_ARGS + DATA64_ARGS, 1,
269
270
/* Only used by i386 to cope with stupid register constraints. */
271
DEF(qemu_st8_a32_i32, 0, 1 + 1, 1,
272
- TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
273
- IMPL(TCG_TARGET_HAS_qemu_st8_i32))
274
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
275
DEF(qemu_st8_a64_i32, 0, 1 + DATA64_ARGS, 1,
276
- TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
277
- IMPL(TCG_TARGET_HAS_qemu_st8_i32))
278
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
279
280
/* Only for 64-bit hosts at the moment. */
281
-DEF(qemu_ld_a32_i128, 2, 1, 1,
282
- TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
283
- IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
284
-DEF(qemu_ld_a64_i128, 2, 1, 1,
285
- TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
286
- IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
287
-DEF(qemu_st_a32_i128, 0, 3, 1,
288
- TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
289
- IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
290
-DEF(qemu_st_a64_i128, 0, 3, 1,
291
- TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
292
- IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
293
+DEF(qemu_ld_a32_i128, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
294
+DEF(qemu_ld_a64_i128, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
295
+DEF(qemu_st_a32_i128, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
296
+DEF(qemu_st_a64_i128, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
297
298
/* Host vector support. */
299
300
-#define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)
301
+#define IMPLVEC TCG_OPF_VECTOR
302
303
DEF(mov_vec, 1, 1, 0, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
304
305
DEF(dup_vec, 1, 1, 0, IMPLVEC)
306
-DEF(dup2_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_REG_BITS == 32))
307
+DEF(dup2_vec, 1, 2, 0, IMPLVEC)
308
309
DEF(ld_vec, 1, 1, 1, IMPLVEC)
310
DEF(st_vec, 0, 2, 1, IMPLVEC)
311
@@ -XXX,XX +XXX,XX @@ DEF(dupm_vec, 1, 1, 1, IMPLVEC)
312
313
DEF(add_vec, 1, 2, 0, IMPLVEC)
314
DEF(sub_vec, 1, 2, 0, IMPLVEC)
315
-DEF(mul_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_mul_vec))
316
-DEF(neg_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec))
317
-DEF(abs_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_abs_vec))
318
-DEF(ssadd_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
319
-DEF(usadd_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
320
-DEF(sssub_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
321
-DEF(ussub_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
322
-DEF(smin_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec))
323
-DEF(umin_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec))
324
-DEF(smax_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec))
325
-DEF(umax_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec))
326
+DEF(mul_vec, 1, 2, 0, IMPLVEC)
327
+DEF(neg_vec, 1, 1, 0, IMPLVEC)
328
+DEF(abs_vec, 1, 1, 0, IMPLVEC)
329
+DEF(ssadd_vec, 1, 2, 0, IMPLVEC)
330
+DEF(usadd_vec, 1, 2, 0, IMPLVEC)
331
+DEF(sssub_vec, 1, 2, 0, IMPLVEC)
332
+DEF(ussub_vec, 1, 2, 0, IMPLVEC)
333
+DEF(smin_vec, 1, 2, 0, IMPLVEC)
334
+DEF(umin_vec, 1, 2, 0, IMPLVEC)
335
+DEF(smax_vec, 1, 2, 0, IMPLVEC)
336
+DEF(umax_vec, 1, 2, 0, IMPLVEC)
337
338
DEF(and_vec, 1, 2, 0, IMPLVEC)
339
DEF(or_vec, 1, 2, 0, IMPLVEC)
340
DEF(xor_vec, 1, 2, 0, IMPLVEC)
341
-DEF(andc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec))
342
-DEF(orc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec))
343
-DEF(nand_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nand_vec))
344
-DEF(nor_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nor_vec))
345
-DEF(eqv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_eqv_vec))
346
-DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
347
+DEF(andc_vec, 1, 2, 0, IMPLVEC)
348
+DEF(orc_vec, 1, 2, 0, IMPLVEC)
349
+DEF(nand_vec, 1, 2, 0, IMPLVEC)
350
+DEF(nor_vec, 1, 2, 0, IMPLVEC)
351
+DEF(eqv_vec, 1, 2, 0, IMPLVEC)
352
+DEF(not_vec, 1, 1, 0, IMPLVEC)
353
354
-DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
355
-DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
356
-DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
357
-DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec))
358
+DEF(shli_vec, 1, 1, 1, IMPLVEC)
359
+DEF(shri_vec, 1, 1, 1, IMPLVEC)
360
+DEF(sari_vec, 1, 1, 1, IMPLVEC)
361
+DEF(rotli_vec, 1, 1, 1, IMPLVEC)
362
363
-DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
364
-DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
365
-DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
366
-DEF(rotls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rots_vec))
367
+DEF(shls_vec, 1, 2, 0, IMPLVEC)
368
+DEF(shrs_vec, 1, 2, 0, IMPLVEC)
369
+DEF(sars_vec, 1, 2, 0, IMPLVEC)
370
+DEF(rotls_vec, 1, 2, 0, IMPLVEC)
371
372
-DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
373
-DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
374
-DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
375
-DEF(rotlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
376
-DEF(rotrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
377
+DEF(shlv_vec, 1, 2, 0, IMPLVEC)
378
+DEF(shrv_vec, 1, 2, 0, IMPLVEC)
379
+DEF(sarv_vec, 1, 2, 0, IMPLVEC)
380
+DEF(rotlv_vec, 1, 2, 0, IMPLVEC)
381
+DEF(rotrv_vec, 1, 2, 0, IMPLVEC)
382
383
DEF(cmp_vec, 1, 2, 1, IMPLVEC)
384
385
-DEF(bitsel_vec, 1, 3, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_bitsel_vec))
386
-DEF(cmpsel_vec, 1, 4, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_cmpsel_vec))
387
+DEF(bitsel_vec, 1, 3, 0, IMPLVEC)
388
+DEF(cmpsel_vec, 1, 4, 1, IMPLVEC)
389
390
DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
391
392
#include "tcg-target-opc.h.inc"
393
394
#undef DATA64_ARGS
395
-#undef IMPL
396
-#undef IMPL64
397
#undef IMPLVEC
398
#undef DEF
66
--
399
--
67
2.25.1
400
2.43.0
68
401
69
402
diff view generated by jsdifflib
New patch
1
1
This is now a direct replacement.
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
include/tcg/tcg-opc.h | 89 +++++++++++++++-----------------
7
tcg/aarch64/tcg-target-opc.h.inc | 4 +-
8
tcg/arm/tcg-target-opc.h.inc | 6 +--
9
tcg/i386/tcg-target-opc.h.inc | 22 ++++----
10
tcg/ppc/tcg-target-opc.h.inc | 12 ++---
11
tcg/s390x/tcg-target-opc.h.inc | 6 +--
12
6 files changed, 68 insertions(+), 71 deletions(-)
13
14
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/include/tcg/tcg-opc.h
17
+++ b/include/tcg/tcg-opc.h
18
@@ -XXX,XX +XXX,XX @@ DEF(qemu_st_a64_i128, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
19
20
/* Host vector support. */
21
22
-#define IMPLVEC TCG_OPF_VECTOR
23
-
24
DEF(mov_vec, 1, 1, 0, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
25
26
-DEF(dup_vec, 1, 1, 0, IMPLVEC)
27
-DEF(dup2_vec, 1, 2, 0, IMPLVEC)
28
+DEF(dup_vec, 1, 1, 0, TCG_OPF_VECTOR)
29
+DEF(dup2_vec, 1, 2, 0, TCG_OPF_VECTOR)
30
31
-DEF(ld_vec, 1, 1, 1, IMPLVEC)
32
-DEF(st_vec, 0, 2, 1, IMPLVEC)
33
-DEF(dupm_vec, 1, 1, 1, IMPLVEC)
34
+DEF(ld_vec, 1, 1, 1, TCG_OPF_VECTOR)
35
+DEF(st_vec, 0, 2, 1, TCG_OPF_VECTOR)
36
+DEF(dupm_vec, 1, 1, 1, TCG_OPF_VECTOR)
37
38
-DEF(add_vec, 1, 2, 0, IMPLVEC)
39
-DEF(sub_vec, 1, 2, 0, IMPLVEC)
40
-DEF(mul_vec, 1, 2, 0, IMPLVEC)
41
-DEF(neg_vec, 1, 1, 0, IMPLVEC)
42
-DEF(abs_vec, 1, 1, 0, IMPLVEC)
43
-DEF(ssadd_vec, 1, 2, 0, IMPLVEC)
44
-DEF(usadd_vec, 1, 2, 0, IMPLVEC)
45
-DEF(sssub_vec, 1, 2, 0, IMPLVEC)
46
-DEF(ussub_vec, 1, 2, 0, IMPLVEC)
47
-DEF(smin_vec, 1, 2, 0, IMPLVEC)
48
-DEF(umin_vec, 1, 2, 0, IMPLVEC)
49
-DEF(smax_vec, 1, 2, 0, IMPLVEC)
50
-DEF(umax_vec, 1, 2, 0, IMPLVEC)
51
+DEF(add_vec, 1, 2, 0, TCG_OPF_VECTOR)
52
+DEF(sub_vec, 1, 2, 0, TCG_OPF_VECTOR)
53
+DEF(mul_vec, 1, 2, 0, TCG_OPF_VECTOR)
54
+DEF(neg_vec, 1, 1, 0, TCG_OPF_VECTOR)
55
+DEF(abs_vec, 1, 1, 0, TCG_OPF_VECTOR)
56
+DEF(ssadd_vec, 1, 2, 0, TCG_OPF_VECTOR)
57
+DEF(usadd_vec, 1, 2, 0, TCG_OPF_VECTOR)
58
+DEF(sssub_vec, 1, 2, 0, TCG_OPF_VECTOR)
59
+DEF(ussub_vec, 1, 2, 0, TCG_OPF_VECTOR)
60
+DEF(smin_vec, 1, 2, 0, TCG_OPF_VECTOR)
61
+DEF(umin_vec, 1, 2, 0, TCG_OPF_VECTOR)
62
+DEF(smax_vec, 1, 2, 0, TCG_OPF_VECTOR)
63
+DEF(umax_vec, 1, 2, 0, TCG_OPF_VECTOR)
64
65
-DEF(and_vec, 1, 2, 0, IMPLVEC)
66
-DEF(or_vec, 1, 2, 0, IMPLVEC)
67
-DEF(xor_vec, 1, 2, 0, IMPLVEC)
68
-DEF(andc_vec, 1, 2, 0, IMPLVEC)
69
-DEF(orc_vec, 1, 2, 0, IMPLVEC)
70
-DEF(nand_vec, 1, 2, 0, IMPLVEC)
71
-DEF(nor_vec, 1, 2, 0, IMPLVEC)
72
-DEF(eqv_vec, 1, 2, 0, IMPLVEC)
73
-DEF(not_vec, 1, 1, 0, IMPLVEC)
74
+DEF(and_vec, 1, 2, 0, TCG_OPF_VECTOR)
75
+DEF(or_vec, 1, 2, 0, TCG_OPF_VECTOR)
76
+DEF(xor_vec, 1, 2, 0, TCG_OPF_VECTOR)
77
+DEF(andc_vec, 1, 2, 0, TCG_OPF_VECTOR)
78
+DEF(orc_vec, 1, 2, 0, TCG_OPF_VECTOR)
79
+DEF(nand_vec, 1, 2, 0, TCG_OPF_VECTOR)
80
+DEF(nor_vec, 1, 2, 0, TCG_OPF_VECTOR)
81
+DEF(eqv_vec, 1, 2, 0, TCG_OPF_VECTOR)
82
+DEF(not_vec, 1, 1, 0, TCG_OPF_VECTOR)
83
84
-DEF(shli_vec, 1, 1, 1, IMPLVEC)
85
-DEF(shri_vec, 1, 1, 1, IMPLVEC)
86
-DEF(sari_vec, 1, 1, 1, IMPLVEC)
87
-DEF(rotli_vec, 1, 1, 1, IMPLVEC)
88
+DEF(shli_vec, 1, 1, 1, TCG_OPF_VECTOR)
89
+DEF(shri_vec, 1, 1, 1, TCG_OPF_VECTOR)
90
+DEF(sari_vec, 1, 1, 1, TCG_OPF_VECTOR)
91
+DEF(rotli_vec, 1, 1, 1, TCG_OPF_VECTOR)
92
93
-DEF(shls_vec, 1, 2, 0, IMPLVEC)
94
-DEF(shrs_vec, 1, 2, 0, IMPLVEC)
95
-DEF(sars_vec, 1, 2, 0, IMPLVEC)
96
-DEF(rotls_vec, 1, 2, 0, IMPLVEC)
97
+DEF(shls_vec, 1, 2, 0, TCG_OPF_VECTOR)
98
+DEF(shrs_vec, 1, 2, 0, TCG_OPF_VECTOR)
99
+DEF(sars_vec, 1, 2, 0, TCG_OPF_VECTOR)
100
+DEF(rotls_vec, 1, 2, 0, TCG_OPF_VECTOR)
101
102
-DEF(shlv_vec, 1, 2, 0, IMPLVEC)
103
-DEF(shrv_vec, 1, 2, 0, IMPLVEC)
104
-DEF(sarv_vec, 1, 2, 0, IMPLVEC)
105
-DEF(rotlv_vec, 1, 2, 0, IMPLVEC)
106
-DEF(rotrv_vec, 1, 2, 0, IMPLVEC)
107
+DEF(shlv_vec, 1, 2, 0, TCG_OPF_VECTOR)
108
+DEF(shrv_vec, 1, 2, 0, TCG_OPF_VECTOR)
109
+DEF(sarv_vec, 1, 2, 0, TCG_OPF_VECTOR)
110
+DEF(rotlv_vec, 1, 2, 0, TCG_OPF_VECTOR)
111
+DEF(rotrv_vec, 1, 2, 0, TCG_OPF_VECTOR)
112
113
-DEF(cmp_vec, 1, 2, 1, IMPLVEC)
114
+DEF(cmp_vec, 1, 2, 1, TCG_OPF_VECTOR)
115
116
-DEF(bitsel_vec, 1, 3, 0, IMPLVEC)
117
-DEF(cmpsel_vec, 1, 4, 1, IMPLVEC)
118
+DEF(bitsel_vec, 1, 3, 0, TCG_OPF_VECTOR)
119
+DEF(cmpsel_vec, 1, 4, 1, TCG_OPF_VECTOR)
120
121
DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
122
123
#include "tcg-target-opc.h.inc"
124
125
#undef DATA64_ARGS
126
-#undef IMPLVEC
127
#undef DEF
128
diff --git a/tcg/aarch64/tcg-target-opc.h.inc b/tcg/aarch64/tcg-target-opc.h.inc
129
index XXXXXXX..XXXXXXX 100644
130
--- a/tcg/aarch64/tcg-target-opc.h.inc
131
+++ b/tcg/aarch64/tcg-target-opc.h.inc
132
@@ -XXX,XX +XXX,XX @@
133
* consider these to be UNSPEC with names.
134
*/
135
136
-DEF(aa64_sshl_vec, 1, 2, 0, IMPLVEC)
137
-DEF(aa64_sli_vec, 1, 2, 1, IMPLVEC)
138
+DEF(aa64_sshl_vec, 1, 2, 0, TCG_OPF_VECTOR)
139
+DEF(aa64_sli_vec, 1, 2, 1, TCG_OPF_VECTOR)
140
diff --git a/tcg/arm/tcg-target-opc.h.inc b/tcg/arm/tcg-target-opc.h.inc
141
index XXXXXXX..XXXXXXX 100644
142
--- a/tcg/arm/tcg-target-opc.h.inc
143
+++ b/tcg/arm/tcg-target-opc.h.inc
144
@@ -XXX,XX +XXX,XX @@
145
* consider these to be UNSPEC with names.
146
*/
147
148
-DEF(arm_sli_vec, 1, 2, 1, IMPLVEC)
149
-DEF(arm_sshl_vec, 1, 2, 0, IMPLVEC)
150
-DEF(arm_ushl_vec, 1, 2, 0, IMPLVEC)
151
+DEF(arm_sli_vec, 1, 2, 1, TCG_OPF_VECTOR)
152
+DEF(arm_sshl_vec, 1, 2, 0, TCG_OPF_VECTOR)
153
+DEF(arm_ushl_vec, 1, 2, 0, TCG_OPF_VECTOR)
154
diff --git a/tcg/i386/tcg-target-opc.h.inc b/tcg/i386/tcg-target-opc.h.inc
155
index XXXXXXX..XXXXXXX 100644
156
--- a/tcg/i386/tcg-target-opc.h.inc
157
+++ b/tcg/i386/tcg-target-opc.h.inc
158
@@ -XXX,XX +XXX,XX @@
159
* consider these to be UNSPEC with names.
160
*/
161
162
-DEF(x86_shufps_vec, 1, 2, 1, IMPLVEC)
163
-DEF(x86_blend_vec, 1, 2, 1, IMPLVEC)
164
-DEF(x86_packss_vec, 1, 2, 0, IMPLVEC)
165
-DEF(x86_packus_vec, 1, 2, 0, IMPLVEC)
166
-DEF(x86_psrldq_vec, 1, 1, 1, IMPLVEC)
167
-DEF(x86_vperm2i128_vec, 1, 2, 1, IMPLVEC)
168
-DEF(x86_punpckl_vec, 1, 2, 0, IMPLVEC)
169
-DEF(x86_punpckh_vec, 1, 2, 0, IMPLVEC)
170
-DEF(x86_vpshldi_vec, 1, 2, 1, IMPLVEC)
171
-DEF(x86_vpshldv_vec, 1, 3, 0, IMPLVEC)
172
-DEF(x86_vpshrdv_vec, 1, 3, 0, IMPLVEC)
173
+DEF(x86_shufps_vec, 1, 2, 1, TCG_OPF_VECTOR)
174
+DEF(x86_blend_vec, 1, 2, 1, TCG_OPF_VECTOR)
175
+DEF(x86_packss_vec, 1, 2, 0, TCG_OPF_VECTOR)
176
+DEF(x86_packus_vec, 1, 2, 0, TCG_OPF_VECTOR)
177
+DEF(x86_psrldq_vec, 1, 1, 1, TCG_OPF_VECTOR)
178
+DEF(x86_vperm2i128_vec, 1, 2, 1, TCG_OPF_VECTOR)
179
+DEF(x86_punpckl_vec, 1, 2, 0, TCG_OPF_VECTOR)
180
+DEF(x86_punpckh_vec, 1, 2, 0, TCG_OPF_VECTOR)
181
+DEF(x86_vpshldi_vec, 1, 2, 1, TCG_OPF_VECTOR)
182
+DEF(x86_vpshldv_vec, 1, 3, 0, TCG_OPF_VECTOR)
183
+DEF(x86_vpshrdv_vec, 1, 3, 0, TCG_OPF_VECTOR)
184
diff --git a/tcg/ppc/tcg-target-opc.h.inc b/tcg/ppc/tcg-target-opc.h.inc
185
index XXXXXXX..XXXXXXX 100644
186
--- a/tcg/ppc/tcg-target-opc.h.inc
187
+++ b/tcg/ppc/tcg-target-opc.h.inc
188
@@ -XXX,XX +XXX,XX @@
189
* consider these to be UNSPEC with names.
190
*/
191
192
-DEF(ppc_mrgh_vec, 1, 2, 0, IMPLVEC)
193
-DEF(ppc_mrgl_vec, 1, 2, 0, IMPLVEC)
194
-DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC)
195
-DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC)
196
-DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC)
197
-DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC)
198
+DEF(ppc_mrgh_vec, 1, 2, 0, TCG_OPF_VECTOR)
199
+DEF(ppc_mrgl_vec, 1, 2, 0, TCG_OPF_VECTOR)
200
+DEF(ppc_msum_vec, 1, 3, 0, TCG_OPF_VECTOR)
201
+DEF(ppc_muleu_vec, 1, 2, 0, TCG_OPF_VECTOR)
202
+DEF(ppc_mulou_vec, 1, 2, 0, TCG_OPF_VECTOR)
203
+DEF(ppc_pkum_vec, 1, 2, 0, TCG_OPF_VECTOR)
204
diff --git a/tcg/s390x/tcg-target-opc.h.inc b/tcg/s390x/tcg-target-opc.h.inc
205
index XXXXXXX..XXXXXXX 100644
206
--- a/tcg/s390x/tcg-target-opc.h.inc
207
+++ b/tcg/s390x/tcg-target-opc.h.inc
208
@@ -XXX,XX +XXX,XX @@
209
* emitted by tcg_expand_vec_op. For those familiar with GCC internals,
210
* consider these to be UNSPEC with names.
211
*/
212
-DEF(s390_vuph_vec, 1, 1, 0, IMPLVEC)
213
-DEF(s390_vupl_vec, 1, 1, 0, IMPLVEC)
214
-DEF(s390_vpks_vec, 1, 2, 0, IMPLVEC)
215
+DEF(s390_vuph_vec, 1, 1, 0, TCG_OPF_VECTOR)
216
+DEF(s390_vupl_vec, 1, 1, 0, TCG_OPF_VECTOR)
217
+DEF(s390_vpks_vec, 1, 2, 0, TCG_OPF_VECTOR)
218
--
219
2.43.0
220
221
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
We always provide bswap subroutines, whether they are optimized
2
using mips32r2 when available or not.
2
3
3
In preparation for changing the divu128/divs128 implementations
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
to allow for quotients larger than 64 bits, move the div-by-zero
5
and overflow checks to the callers.
6
7
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20211025191154.350831-2-luis.pires@eldorado.org.br>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
6
---
12
include/hw/clock.h | 5 +++--
7
tcg/mips/tcg-target-has.h | 8 ++++----
13
include/qemu/host-utils.h | 34 ++++++++++++---------------------
8
1 file changed, 4 insertions(+), 4 deletions(-)
14
target/ppc/int_helper.c | 14 +++++++++-----
15
util/host-utils.c | 40 ++++++++++++++++++---------------------
16
4 files changed, 42 insertions(+), 51 deletions(-)
17
9
18
diff --git a/include/hw/clock.h b/include/hw/clock.h
10
diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h
19
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
20
--- a/include/hw/clock.h
12
--- a/tcg/mips/tcg-target-has.h
21
+++ b/include/hw/clock.h
13
+++ b/tcg/mips/tcg-target-has.h
22
@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
14
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
23
return 0;
15
#define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions)
24
}
16
#define TCG_TARGET_HAS_muluh_i32 1
25
/*
17
#define TCG_TARGET_HAS_mulsh_i32 1
26
- * Ignore divu128() return value as we've caught div-by-zero and don't
18
+#define TCG_TARGET_HAS_bswap16_i32 1
27
- * need different behaviour for overflow.
19
#define TCG_TARGET_HAS_bswap32_i32 1
28
+ * BUG: when CONFIG_INT128 is not defined, the current implementation of
20
#define TCG_TARGET_HAS_negsetcond_i32 0
29
+ * divu128 does not return a valid truncated quotient, so the result will
21
30
+ * be wrong.
22
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
31
*/
32
divu128(&lo, &hi, clk->period);
33
return lo;
34
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/include/qemu/host-utils.h
37
+++ b/include/qemu/host-utils.h
38
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
39
return (__int128_t)a * b / c;
40
}
41
42
-static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
43
+static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
44
{
45
- if (divisor == 0) {
46
- return 1;
47
- } else {
48
- __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
49
- __uint128_t result = dividend / divisor;
50
- *plow = result;
51
- *phigh = dividend % divisor;
52
- return result > UINT64_MAX;
53
- }
54
+ __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
55
+ __uint128_t result = dividend / divisor;
56
+ *plow = result;
57
+ *phigh = dividend % divisor;
58
}
59
60
-static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
61
+static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
62
{
63
- if (divisor == 0) {
64
- return 1;
65
- } else {
66
- __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
67
- __int128_t result = dividend / divisor;
68
- *plow = result;
69
- *phigh = dividend % divisor;
70
- return result != *plow;
71
- }
72
+ __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
73
+ __int128_t result = dividend / divisor;
74
+ *plow = result;
75
+ *phigh = dividend % divisor;
76
}
77
#else
78
void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
79
void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
80
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
81
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
82
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
83
+void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
84
85
static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
86
{
87
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/target/ppc/int_helper.c
90
+++ b/target/ppc/int_helper.c
91
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
92
uint64_t rt = 0;
93
int overflow = 0;
94
95
- overflow = divu128(&rt, &ra, rb);
96
-
97
- if (unlikely(overflow)) {
98
+ if (unlikely(rb == 0 || ra >= rb)) {
99
+ overflow = 1;
100
rt = 0; /* Undefined */
101
+ } else {
102
+ divu128(&rt, &ra, rb);
103
}
104
105
if (oe) {
106
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
107
int64_t rt = 0;
108
int64_t ra = (int64_t)rau;
109
int64_t rb = (int64_t)rbu;
110
- int overflow = divs128(&rt, &ra, rb);
111
+ int overflow = 0;
112
113
- if (unlikely(overflow)) {
114
+ if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
115
+ overflow = 1;
116
rt = 0; /* Undefined */
117
+ } else {
118
+ divs128(&rt, &ra, rb);
119
}
120
121
if (oe) {
122
diff --git a/util/host-utils.c b/util/host-utils.c
123
index XXXXXXX..XXXXXXX 100644
124
--- a/util/host-utils.c
125
+++ b/util/host-utils.c
126
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
127
*phigh = rh;
128
}
129
130
-/* Unsigned 128x64 division. Returns 1 if overflow (divide by zero or */
131
-/* quotient exceeds 64 bits). Otherwise returns quotient via plow and */
132
-/* remainder via phigh. */
133
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
134
+/*
135
+ * Unsigned 128-by-64 division. Returns quotient via plow and
136
+ * remainder via phigh.
137
+ * The result must fit in 64 bits (plow) - otherwise, the result
138
+ * is undefined.
139
+ * This function will cause a division by zero if passed a zero divisor.
140
+ */
141
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
142
{
143
uint64_t dhi = *phigh;
144
uint64_t dlo = *plow;
145
unsigned i;
146
uint64_t carry = 0;
147
148
- if (divisor == 0) {
149
- return 1;
150
- } else if (dhi == 0) {
151
+ if (divisor == 0 || dhi == 0) {
152
*plow = dlo / divisor;
153
*phigh = dlo % divisor;
154
- return 0;
155
- } else if (dhi >= divisor) {
156
- return 1;
157
} else {
158
159
for (i = 0; i < 64; i++) {
160
@@ -XXX,XX +XXX,XX @@ int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
161
162
*plow = dlo;
163
*phigh = dhi;
164
- return 0;
165
}
166
}
167
168
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
169
+/*
170
+ * Signed 128-by-64 division. Returns quotient via plow and
171
+ * remainder via phigh.
172
+ * The result must fit in 64 bits (plow) - otherwise, the result
173
+ * is undefined.
174
+ * This function will cause a division by zero if passed a zero divisor.
175
+ */
176
+void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
177
{
178
int sgn_dvdnd = *phigh < 0;
179
int sgn_divsr = divisor < 0;
180
- int overflow = 0;
181
182
if (sgn_dvdnd) {
183
*plow = ~(*plow);
184
@@ -XXX,XX +XXX,XX @@ int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
185
divisor = 0 - divisor;
186
}
187
188
- overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
189
+ divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
190
191
if (sgn_dvdnd ^ sgn_divsr) {
192
*plow = 0 - *plow;
193
}
194
-
195
- if (!overflow) {
196
- if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
197
- overflow = 1;
198
- }
199
- }
200
-
201
- return overflow;
202
}
203
#endif
23
#endif
204
24
25
/* optional instructions detected at runtime */
26
-#define TCG_TARGET_HAS_bswap16_i32 use_mips32r2_instructions
27
#define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions
28
#define TCG_TARGET_HAS_extract_i32 use_mips32r2_instructions
29
#define TCG_TARGET_HAS_sextract_i32 0
30
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
31
#define TCG_TARGET_HAS_qemu_st8_i32 0
32
33
#if TCG_TARGET_REG_BITS == 64
34
-#define TCG_TARGET_HAS_bswap16_i64 use_mips32r2_instructions
35
-#define TCG_TARGET_HAS_bswap32_i64 use_mips32r2_instructions
36
-#define TCG_TARGET_HAS_bswap64_i64 use_mips32r2_instructions
37
+#define TCG_TARGET_HAS_bswap16_i64 1
38
+#define TCG_TARGET_HAS_bswap32_i64 1
39
+#define TCG_TARGET_HAS_bswap64_i64 1
40
#define TCG_TARGET_HAS_deposit_i64 use_mips32r2_instructions
41
#define TCG_TARGET_HAS_extract_i64 use_mips32r2_instructions
42
#define TCG_TARGET_HAS_sextract_i64 0
205
--
43
--
206
2.25.1
44
2.43.0
207
45
208
46
diff view generated by jsdifflib
1
The results are generally 6 bit unsigned values, though
1
When we generalize {s}extract_i32, we'll lose the
2
the count leading and trailing bits may produce any value
2
specific register constraints on ext8u and ext8s.
3
for a zero input.
3
It's just as easy to emit a couple of insns instead.
4
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/optimize.c | 3 ++-
8
tcg/i386/tcg-target.c.inc | 23 +++++++++++++++++++----
10
1 file changed, 2 insertions(+), 1 deletion(-)
9
1 file changed, 19 insertions(+), 4 deletions(-)
11
10
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
13
--- a/tcg/i386/tcg-target.c.inc
15
+++ b/tcg/optimize.c
14
+++ b/tcg/i386/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_rolw_8(TCGContext *s, int reg)
17
g_assert_not_reached();
16
18
}
17
static void tcg_out_ext8u(TCGContext *s, TCGReg dest, TCGReg src)
19
ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
18
{
20
-
19
- /* movzbl */
21
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
20
- tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
22
return false;
21
+ if (TCG_TARGET_REG_BITS == 32 && src >= 4) {
22
+ tcg_out_mov(s, TCG_TYPE_I32, dest, src);
23
+ if (dest >= 4) {
24
+ tcg_out_modrm(s, OPC_ARITH_EvIz, ARITH_AND, dest);
25
+ tcg_out32(s, 0xff);
26
+ return;
27
+ }
28
+ src = dest;
29
+ }
30
tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
23
}
31
}
24
32
25
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
33
static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
26
default:
34
{
27
g_assert_not_reached();
35
int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
28
}
36
- /* movsbl */
29
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
37
- tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
30
return false;
38
+
39
+ if (TCG_TARGET_REG_BITS == 32 && src >= 4) {
40
+ tcg_out_mov(s, TCG_TYPE_I32, dest, src);
41
+ if (dest >= 4) {
42
+ tcg_out_shifti(s, SHIFT_SHL, dest, 24);
43
+ tcg_out_shifti(s, SHIFT_SAR, dest, 24);
44
+ return;
45
+ }
46
+ src = dest;
47
+ }
48
tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
31
}
49
}
32
50
33
--
51
--
34
2.25.1
52
2.43.0
35
53
36
54
diff view generated by jsdifflib
1
Break the final cleanup clause out of the main switch
1
Accept byte and word extensions with the extract opcodes.
2
statement. When fully folding an opcode to mov/movi,
2
This is preparatory to removing the specialized extracts.
3
use "continue" to process the next opcode, else break
3
4
to fall into the final cleanup.
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
6
---
11
tcg/optimize.c | 190 ++++++++++++++++++++++++-------------------------
7
tcg/i386/tcg-target-has.h | 49 +++++++++++++++++++++++++++----
12
1 file changed, 94 insertions(+), 96 deletions(-)
8
tcg/tcg-has.h | 12 +++++---
13
9
tcg/optimize.c | 8 +++--
10
tcg/tcg-op.c | 12 +++-----
11
tcg/i386/tcg-target.c.inc | 62 +++++++++++++++++++++++++++++----------
12
5 files changed, 107 insertions(+), 36 deletions(-)
13
14
diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/i386/tcg-target-has.h
17
+++ b/tcg/i386/tcg-target-has.h
18
@@ -XXX,XX +XXX,XX @@
19
#define TCG_TARGET_HAS_ctpop_i64 have_popcnt
20
#define TCG_TARGET_HAS_deposit_i64 1
21
#define TCG_TARGET_HAS_extract_i64 1
22
-#define TCG_TARGET_HAS_sextract_i64 0
23
+#define TCG_TARGET_HAS_sextract_i64 1
24
#define TCG_TARGET_HAS_extract2_i64 1
25
#define TCG_TARGET_HAS_negsetcond_i64 1
26
#define TCG_TARGET_HAS_add2_i64 1
27
@@ -XXX,XX +XXX,XX @@
28
(TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
29
#define TCG_TARGET_deposit_i64_valid TCG_TARGET_deposit_i32_valid
30
31
-/* Check for the possibility of high-byte extraction and, for 64-bit,
32
- zero-extending 32-bit right-shift. */
33
-#define TCG_TARGET_extract_i32_valid(ofs, len) ((ofs) == 8 && (len) == 8)
34
-#define TCG_TARGET_extract_i64_valid(ofs, len) \
35
- (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
36
+/*
37
+ * Check for the possibility of low byte/word extraction, high-byte extraction
38
+ * and zero-extending 32-bit right-shift.
39
+ *
40
+ * We cannot sign-extend from high byte to 64-bits without using the
41
+ * REX prefix that explicitly excludes access to the high-byte registers.
42
+ */
43
+static inline bool
44
+tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
45
+{
46
+ switch (ofs) {
47
+ case 0:
48
+ switch (len) {
49
+ case 8:
50
+ case 16:
51
+ return true;
52
+ case 32:
53
+ return type == TCG_TYPE_I64;
54
+ }
55
+ return false;
56
+ case 8:
57
+ return len == 8 && type == TCG_TYPE_I32;
58
+ }
59
+ return false;
60
+}
61
+#define TCG_TARGET_sextract_valid tcg_target_sextract_valid
62
+
63
+static inline bool
64
+tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
65
+{
66
+ if (type == TCG_TYPE_I64 && ofs + len == 32) {
67
+ return true;
68
+ }
69
+ switch (ofs) {
70
+ case 0:
71
+ return len == 8 || len == 16;
72
+ case 8:
73
+ return len == 8;
74
+ }
75
+ return false;
76
+}
77
+#define TCG_TARGET_extract_valid tcg_target_extract_valid
78
79
#endif
80
diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
81
index XXXXXXX..XXXXXXX 100644
82
--- a/tcg/tcg-has.h
83
+++ b/tcg/tcg-has.h
84
@@ -XXX,XX +XXX,XX @@
85
#ifndef TCG_TARGET_deposit_i64_valid
86
#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
87
#endif
88
-#ifndef TCG_TARGET_extract_i32_valid
89
-#define TCG_TARGET_extract_i32_valid(ofs, len) 1
90
+#ifndef TCG_TARGET_extract_valid
91
+#define TCG_TARGET_extract_valid(type, ofs, len) \
92
+ ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_extract_i32 \
93
+ : TCG_TARGET_HAS_extract_i64)
94
#endif
95
-#ifndef TCG_TARGET_extract_i64_valid
96
-#define TCG_TARGET_extract_i64_valid(ofs, len) 1
97
+#ifndef TCG_TARGET_sextract_valid
98
+#define TCG_TARGET_sextract_valid(type, ofs, len) \
99
+ ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_sextract_i32 \
100
+ : TCG_TARGET_HAS_sextract_i64)
101
#endif
102
103
/* Only one of DIV or DIV2 should be defined. */
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
104
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
105
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
106
--- a/tcg/optimize.c
17
+++ b/tcg/optimize.c
107
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
108
@@ -XXX,XX +XXX,XX @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
19
switch (opc) {
109
xor_opc = INDEX_op_xor_i32;
20
CASE_OP_32_64_VEC(mov):
110
shr_opc = INDEX_op_shr_i32;
21
tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
111
neg_opc = INDEX_op_neg_i32;
22
- break;
112
- if (TCG_TARGET_extract_i32_valid(sh, 1)) {
23
+ continue;
113
+ if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) {
24
114
uext_opc = TCG_TARGET_HAS_extract_i32 ? INDEX_op_extract_i32 : 0;
25
case INDEX_op_dup_vec:
115
+ }
26
if (arg_is_const(op->args[1])) {
116
+ if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, sh, 1)) {
27
tmp = arg_info(op->args[1])->val;
117
sext_opc = TCG_TARGET_HAS_sextract_i32 ? INDEX_op_sextract_i32 : 0;
28
tmp = dup_const(TCGOP_VECE(op), tmp);
118
}
29
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
119
break;
30
- break;
120
@@ -XXX,XX +XXX,XX @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
31
+ continue;
121
xor_opc = INDEX_op_xor_i64;
32
}
122
shr_opc = INDEX_op_shr_i64;
33
- goto do_default;
123
neg_opc = INDEX_op_neg_i64;
34
+ break;
124
- if (TCG_TARGET_extract_i64_valid(sh, 1)) {
35
125
+ if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) {
36
case INDEX_op_dup2_vec:
126
uext_opc = TCG_TARGET_HAS_extract_i64 ? INDEX_op_extract_i64 : 0;
37
assert(TCG_TARGET_REG_BITS == 32);
127
+ }
38
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
128
+ if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, sh, 1)) {
39
tcg_opt_gen_movi(s, &ctx, op, op->args[0],
129
sext_opc = TCG_TARGET_HAS_sextract_i64 ? INDEX_op_sextract_i64 : 0;
40
deposit64(arg_info(op->args[1])->val, 32, 32,
130
}
41
arg_info(op->args[2])->val));
131
break;
42
- break;
132
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
43
+ continue;
133
index XXXXXXX..XXXXXXX 100644
44
} else if (args_are_copies(op->args[1], op->args[2])) {
134
--- a/tcg/tcg-op.c
45
op->opc = INDEX_op_dup_vec;
135
+++ b/tcg/tcg-op.c
46
TCGOP_VECE(op) = MO_32;
136
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
47
nb_iargs = 1;
137
return;
48
}
138
}
49
- goto do_default;
139
50
+ break;
140
- if (TCG_TARGET_HAS_extract_i32
51
141
- && TCG_TARGET_extract_i32_valid(ofs, len)) {
52
CASE_OP_32_64(not):
142
+ if (TCG_TARGET_extract_valid(TCG_TYPE_I32, ofs, len)) {
53
CASE_OP_32_64(neg):
143
tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
54
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
144
return;
55
if (arg_is_const(op->args[1])) {
145
}
56
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
146
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
57
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
147
}
58
- break;
148
}
59
+ continue;
149
60
}
150
- if (TCG_TARGET_HAS_sextract_i32
61
- goto do_default;
151
- && TCG_TARGET_extract_i32_valid(ofs, len)) {
62
+ break;
152
+ if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, ofs, len)) {
63
153
tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
64
CASE_OP_32_64(bswap16):
154
return;
65
CASE_OP_32_64(bswap32):
155
}
66
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
156
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
67
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
157
goto do_shift_and;
68
op->args[2]);
158
}
69
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
159
70
- break;
160
- if (TCG_TARGET_HAS_extract_i64
71
+ continue;
161
- && TCG_TARGET_extract_i64_valid(ofs, len)) {
72
}
162
+ if (TCG_TARGET_extract_valid(TCG_TYPE_I64, ofs, len)) {
73
- goto do_default;
163
tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
74
+ break;
164
return;
75
165
}
76
CASE_OP_32_64(add):
166
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
77
CASE_OP_32_64(sub):
167
return;
78
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
168
}
79
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
169
80
arg_info(op->args[2])->val);
170
- if (TCG_TARGET_HAS_sextract_i64
81
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
171
- && TCG_TARGET_extract_i64_valid(ofs, len)) {
82
- break;
172
+ if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, ofs, len)) {
83
+ continue;
173
tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
84
}
174
return;
85
- goto do_default;
175
}
86
+ break;
176
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
87
177
index XXXXXXX..XXXXXXX 100644
88
CASE_OP_32_64(clz):
178
--- a/tcg/i386/tcg-target.c.inc
89
CASE_OP_32_64(ctz):
179
+++ b/tcg/i386/tcg-target.c.inc
90
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
180
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
91
} else {
181
92
tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
182
case INDEX_op_extract_i64:
93
}
183
if (a2 + args[3] == 32) {
94
- break;
184
+ if (a2 == 0) {
95
+ continue;
185
+ tcg_out_ext32u(s, a0, a1);
96
}
97
- goto do_default;
98
+ break;
99
100
CASE_OP_32_64(deposit):
101
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
102
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
103
op->args[3], op->args[4],
104
arg_info(op->args[2])->val);
105
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
106
- break;
107
+ continue;
108
}
109
- goto do_default;
110
+ break;
111
112
CASE_OP_32_64(extract):
113
if (arg_is_const(op->args[1])) {
114
tmp = extract64(arg_info(op->args[1])->val,
115
op->args[2], op->args[3]);
116
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
117
- break;
118
+ continue;
119
}
120
- goto do_default;
121
+ break;
122
123
CASE_OP_32_64(sextract):
124
if (arg_is_const(op->args[1])) {
125
tmp = sextract64(arg_info(op->args[1])->val,
126
op->args[2], op->args[3]);
127
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
128
- break;
129
+ continue;
130
}
131
- goto do_default;
132
+ break;
133
134
CASE_OP_32_64(extract2):
135
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
136
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
137
((uint32_t)v2 << (32 - shr)));
138
}
139
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
140
- break;
141
+ continue;
142
}
143
- goto do_default;
144
+ break;
145
146
CASE_OP_32_64(setcond):
147
tmp = do_constant_folding_cond(opc, op->args[1],
148
op->args[2], op->args[3]);
149
if (tmp != 2) {
150
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
151
- break;
152
+ continue;
153
}
154
- goto do_default;
155
+ break;
156
157
CASE_OP_32_64(brcond):
158
tmp = do_constant_folding_cond(opc, op->args[0],
159
op->args[1], op->args[2]);
160
- if (tmp != 2) {
161
- if (tmp) {
162
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
163
- op->opc = INDEX_op_br;
164
- op->args[0] = op->args[3];
165
- } else {
166
- tcg_op_remove(s, op);
167
- }
168
+ switch (tmp) {
169
+ case 0:
170
+ tcg_op_remove(s, op);
171
+ continue;
172
+ case 1:
173
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
174
+ op->opc = opc = INDEX_op_br;
175
+ op->args[0] = op->args[3];
176
break;
177
}
178
- goto do_default;
179
+ break;
180
181
CASE_OP_32_64(movcond):
182
tmp = do_constant_folding_cond(opc, op->args[1],
183
op->args[2], op->args[5]);
184
if (tmp != 2) {
185
tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
186
- break;
187
+ continue;
188
}
189
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
190
uint64_t tv = arg_info(op->args[3])->val;
191
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
192
if (fv == 1 && tv == 0) {
193
cond = tcg_invert_cond(cond);
194
} else if (!(tv == 1 && fv == 0)) {
195
- goto do_default;
196
+ break;
197
}
198
op->args[3] = cond;
199
op->opc = opc = (opc == INDEX_op_movcond_i32
200
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
201
: INDEX_op_setcond_i64);
202
nb_iargs = 2;
203
}
204
- goto do_default;
205
+ break;
206
207
case INDEX_op_add2_i32:
208
case INDEX_op_sub2_i32:
209
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
210
rh = op->args[1];
211
tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
212
tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
213
- break;
214
+ continue;
215
}
216
- goto do_default;
217
+ break;
218
219
case INDEX_op_mulu2_i32:
220
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
221
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
222
rh = op->args[1];
223
tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
224
tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
225
- break;
226
+ continue;
227
}
228
- goto do_default;
229
+ break;
230
231
case INDEX_op_brcond2_i32:
232
tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
233
op->args[4]);
234
- if (tmp != 2) {
235
- if (tmp) {
236
- do_brcond_true:
237
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
238
- op->opc = INDEX_op_br;
239
- op->args[0] = op->args[5];
240
- } else {
241
+ if (tmp == 0) {
242
do_brcond_false:
243
- tcg_op_remove(s, op);
244
- }
245
- } else if ((op->args[4] == TCG_COND_LT
246
- || op->args[4] == TCG_COND_GE)
247
- && arg_is_const(op->args[2])
248
- && arg_info(op->args[2])->val == 0
249
- && arg_is_const(op->args[3])
250
- && arg_info(op->args[3])->val == 0) {
251
+ tcg_op_remove(s, op);
252
+ continue;
253
+ }
254
+ if (tmp == 1) {
255
+ do_brcond_true:
256
+ op->opc = opc = INDEX_op_br;
257
+ op->args[0] = op->args[5];
258
+ break;
186
+ break;
259
+ }
187
+ }
260
+ if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
188
/* This is a 32-bit zero-extending right shift. */
261
+ && arg_is_const(op->args[2])
189
tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
262
+ && arg_info(op->args[2])->val == 0
190
tcg_out_shifti(s, SHIFT_SHR, a0, a2);
263
+ && arg_is_const(op->args[3])
191
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
264
+ && arg_info(op->args[3])->val == 0) {
192
}
265
/* Simplify LT/GE comparisons vs zero to a single compare
193
/* FALLTHRU */
266
vs the high word of the input. */
194
case INDEX_op_extract_i32:
267
do_brcond_high:
195
- /* On the off-chance that we can use the high-byte registers.
268
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
196
- Otherwise we emit the same ext16 + shift pattern that we
269
- op->opc = INDEX_op_brcond_i32;
197
- would have gotten from the normal tcg-op.c expansion. */
270
+ op->opc = opc = INDEX_op_brcond_i32;
198
- tcg_debug_assert(a2 == 8 && args[3] == 8);
271
op->args[0] = op->args[1];
199
- if (a1 < 4 && a0 < 8) {
272
op->args[1] = op->args[3];
200
- tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
273
op->args[2] = op->args[4];
201
- } else {
274
op->args[3] = op->args[5];
202
+ if (a2 == 0 && args[3] == 8) {
275
- } else if (op->args[4] == TCG_COND_EQ) {
203
+ tcg_out_ext8u(s, a0, a1);
276
+ break;
204
+ } else if (a2 == 0 && args[3] == 16) {
205
tcg_out_ext16u(s, a0, a1);
206
- tcg_out_shifti(s, SHIFT_SHR, a0, 8);
207
+ } else if (a2 == 8 && args[3] == 8) {
208
+ /*
209
+ * On the off-chance that we can use the high-byte registers.
210
+ * Otherwise we emit the same ext16 + shift pattern that we
211
+ * would have gotten from the normal tcg-op.c expansion.
212
+ */
213
+ if (a1 < 4 && a0 < 8) {
214
+ tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
215
+ } else {
216
+ tcg_out_ext16u(s, a0, a1);
217
+ tcg_out_shifti(s, SHIFT_SHR, a0, 8);
277
+ }
218
+ }
278
+ if (op->args[4] == TCG_COND_EQ) {
219
+ } else {
279
/* Simplify EQ comparisons where one of the pairs
220
+ g_assert_not_reached();
280
can be simplified. */
221
+ }
281
tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
222
+ break;
282
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
223
+
283
if (tmp == 0) {
224
+ case INDEX_op_sextract_i64:
284
goto do_brcond_false;
225
+ if (a2 == 0 && args[3] == 8) {
285
} else if (tmp != 1) {
226
+ tcg_out_ext8s(s, TCG_TYPE_I64, a0, a1);
286
- goto do_default;
227
+ } else if (a2 == 0 && args[3] == 16) {
287
+ break;
228
+ tcg_out_ext16s(s, TCG_TYPE_I64, a0, a1);
288
}
229
+ } else if (a2 == 0 && args[3] == 32) {
289
do_brcond_low:
230
+ tcg_out_ext32s(s, a0, a1);
290
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
231
+ } else {
291
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
232
+ g_assert_not_reached();
292
op->args[1] = op->args[2];
233
}
293
op->args[2] = op->args[4];
234
break;
294
op->args[3] = op->args[5];
235
295
- } else if (op->args[4] == TCG_COND_NE) {
236
case INDEX_op_sextract_i32:
296
+ break;
237
- /* We don't implement sextract_i64, as we cannot sign-extend to
238
- 64-bits without using the REX prefix that explicitly excludes
239
- access to the high-byte registers. */
240
- tcg_debug_assert(a2 == 8 && args[3] == 8);
241
- if (a1 < 4 && a0 < 8) {
242
- tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
243
- } else {
244
+ if (a2 == 0 && args[3] == 8) {
245
+ tcg_out_ext8s(s, TCG_TYPE_I32, a0, a1);
246
+ } else if (a2 == 0 && args[3] == 16) {
247
tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1);
248
- tcg_out_shifti(s, SHIFT_SAR, a0, 8);
249
+ } else if (a2 == 8 && args[3] == 8) {
250
+ if (a1 < 4 && a0 < 8) {
251
+ tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
252
+ } else {
253
+ tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1);
254
+ tcg_out_shifti(s, SHIFT_SAR, a0, 8);
297
+ }
255
+ }
298
+ if (op->args[4] == TCG_COND_NE) {
299
/* Simplify NE comparisons where one of the pairs
300
can be simplified. */
301
tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
302
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
303
} else if (tmp == 1) {
304
goto do_brcond_true;
305
}
306
- goto do_default;
307
- } else {
308
- goto do_default;
309
}
310
break;
311
312
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
313
if (tmp != 2) {
314
do_setcond_const:
315
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
316
- } else if ((op->args[5] == TCG_COND_LT
317
- || op->args[5] == TCG_COND_GE)
318
- && arg_is_const(op->args[3])
319
- && arg_info(op->args[3])->val == 0
320
- && arg_is_const(op->args[4])
321
- && arg_info(op->args[4])->val == 0) {
322
+ continue;
323
+ }
324
+ if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
325
+ && arg_is_const(op->args[3])
326
+ && arg_info(op->args[3])->val == 0
327
+ && arg_is_const(op->args[4])
328
+ && arg_info(op->args[4])->val == 0) {
329
/* Simplify LT/GE comparisons vs zero to a single compare
330
vs the high word of the input. */
331
do_setcond_high:
332
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
333
op->args[1] = op->args[2];
334
op->args[2] = op->args[4];
335
op->args[3] = op->args[5];
336
- } else if (op->args[5] == TCG_COND_EQ) {
337
+ break;
338
+ }
339
+ if (op->args[5] == TCG_COND_EQ) {
340
/* Simplify EQ comparisons where one of the pairs
341
can be simplified. */
342
tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
343
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
344
if (tmp == 0) {
345
goto do_setcond_high;
346
} else if (tmp != 1) {
347
- goto do_default;
348
+ break;
349
}
350
do_setcond_low:
351
reset_temp(op->args[0]);
352
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
353
op->opc = INDEX_op_setcond_i32;
354
op->args[2] = op->args[3];
355
op->args[3] = op->args[5];
356
- } else if (op->args[5] == TCG_COND_NE) {
357
+ break;
358
+ }
359
+ if (op->args[5] == TCG_COND_NE) {
360
/* Simplify NE comparisons where one of the pairs
361
can be simplified. */
362
tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
363
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
364
} else if (tmp == 1) {
365
goto do_setcond_const;
366
}
367
- goto do_default;
368
- } else {
369
- goto do_default;
370
}
371
break;
372
373
- case INDEX_op_call:
374
- if (!(tcg_call_flags(op)
375
+ default:
376
+ break;
377
+ }
378
+
379
+ /* Some of the folding above can change opc. */
380
+ opc = op->opc;
381
+ def = &tcg_op_defs[opc];
382
+ if (def->flags & TCG_OPF_BB_END) {
383
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
384
+ } else {
256
+ } else {
385
+ if (opc == INDEX_op_call &&
257
+ g_assert_not_reached();
386
+ !(tcg_call_flags(op)
258
}
387
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
259
break;
388
for (i = 0; i < nb_globals; i++) {
260
389
if (test_bit(i, ctx.temps_used.l)) {
261
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
390
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
262
case INDEX_op_extract_i32:
391
}
263
case INDEX_op_extract_i64:
392
}
264
case INDEX_op_sextract_i32:
393
}
265
+ case INDEX_op_sextract_i64:
394
- goto do_reset_output;
266
case INDEX_op_ctpop_i32:
395
267
case INDEX_op_ctpop_i64:
396
- default:
268
return C_O1_I1(r, r);
397
- do_default:
398
- /* Default case: we know nothing about operation (or were unable
399
- to compute the operation result) so no propagation is done.
400
- We trash everything if the operation is the end of a basic
401
- block, otherwise we only trash the output args. "z_mask" is
402
- the non-zero bits mask for the first output arg. */
403
- if (def->flags & TCG_OPF_BB_END) {
404
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
405
- } else {
406
- do_reset_output:
407
- for (i = 0; i < nb_oargs; i++) {
408
- reset_temp(op->args[i]);
409
- /* Save the corresponding known-zero bits mask for the
410
- first output argument (only one supported so far). */
411
- if (i == 0) {
412
- arg_info(op->args[i])->z_mask = z_mask;
413
- }
414
+ for (i = 0; i < nb_oargs; i++) {
415
+ reset_temp(op->args[i]);
416
+ /* Save the corresponding known-zero bits mask for the
417
+ first output argument (only one supported so far). */
418
+ if (i == 0) {
419
+ arg_info(op->args[i])->z_mask = z_mask;
420
}
421
}
422
- break;
423
}
424
425
/* Eliminate duplicate and redundant fence instructions. */
426
--
269
--
427
2.25.1
270
2.43.0
428
271
429
272
diff view generated by jsdifflib
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
1
Trivially mirrors TCG_TARGET_HAS_{s}extract_*.
2
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
3
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
5
---
6
tcg/optimize.c | 9 ++++++---
6
tcg/aarch64/tcg-target-has.h | 3 +++
7
1 file changed, 6 insertions(+), 3 deletions(-)
7
1 file changed, 3 insertions(+)
8
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h
10
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
11
--- a/tcg/aarch64/tcg-target-has.h
12
+++ b/tcg/optimize.c
12
+++ b/tcg/aarch64/tcg-target-has.h
13
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
13
@@ -XXX,XX +XXX,XX @@
14
uint64_t z_mask, partmask, affected, tmp;
14
#define TCG_TARGET_HAS_cmpsel_vec 0
15
TCGOpcode opc = op->opc;
15
#define TCG_TARGET_HAS_tst_vec 1
16
const TCGOpDef *def;
16
17
+ bool done = false;
17
+#define TCG_TARGET_extract_valid(type, ofs, len) 1
18
18
+#define TCG_TARGET_sextract_valid(type, ofs, len) 1
19
/* Calls are special. */
19
+
20
if (opc == INDEX_op_call) {
20
#endif
21
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
22
allocator where needed and possible. Also detect copies. */
23
switch (opc) {
24
CASE_OP_32_64_VEC(mov):
25
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
26
- continue;
27
+ done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
28
+ break;
29
30
case INDEX_op_dup_vec:
31
if (arg_is_const(op->args[1])) {
32
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
33
break;
34
}
35
36
- finish_folding(&ctx, op);
37
+ if (!done) {
38
+ finish_folding(&ctx, op);
39
+ }
40
41
/* Eliminate duplicate and redundant fence instructions. */
42
if (ctx.prev_mb) {
43
--
21
--
44
2.25.1
22
2.43.0
45
23
46
24
diff view generated by jsdifflib
1
Copy z_mask into OptContext, for writeback to the
1
We're about to change canonicalization of masks as extract
2
first output within the new function.
2
instead of and. Retain the andi expansion here.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/optimize.c | 49 +++++++++++++++++++++++++++++++++----------------
7
tcg/aarch64/tcg-target.c.inc | 7 ++++++-
9
1 file changed, 33 insertions(+), 16 deletions(-)
8
1 file changed, 6 insertions(+), 1 deletion(-)
10
9
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
12
--- a/tcg/aarch64/tcg-target.c.inc
14
+++ b/tcg/optimize.c
13
+++ b/tcg/aarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
16
TCGContext *tcg;
15
17
TCGOp *prev_mb;
16
case INDEX_op_extract_i64:
18
TCGTempSet temps_used;
17
case INDEX_op_extract_i32:
19
+
18
- tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
20
+ /* In flight values from optimization. */
19
+ if (a2 == 0) {
21
+ uint64_t z_mask;
20
+ uint64_t mask = MAKE_64BIT_MASK(0, args[3]);
22
} OptContext;
21
+ tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, mask);
23
22
+ } else {
24
static inline TempOptInfo *ts_info(TCGTemp *ts)
23
+ tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
25
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
26
}
27
}
28
29
+static void finish_folding(OptContext *ctx, TCGOp *op)
30
+{
31
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
32
+ int i, nb_oargs;
33
+
34
+ /*
35
+ * For an opcode that ends a BB, reset all temp data.
36
+ * We do no cross-BB optimization.
37
+ */
38
+ if (def->flags & TCG_OPF_BB_END) {
39
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
40
+ ctx->prev_mb = NULL;
41
+ return;
42
+ }
43
+
44
+ nb_oargs = def->nb_oargs;
45
+ for (i = 0; i < nb_oargs; i++) {
46
+ reset_temp(op->args[i]);
47
+ /*
48
+ * Save the corresponding known-zero bits mask for the
49
+ * first output argument (only one supported so far).
50
+ */
51
+ if (i == 0) {
52
+ arg_info(op->args[i])->z_mask = ctx->z_mask;
53
+ }
24
+ }
54
+ }
25
break;
55
+}
26
56
+
27
case INDEX_op_sextract_i64:
57
static bool fold_call(OptContext *ctx, TCGOp *op)
58
{
59
TCGContext *s = ctx->tcg;
60
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
61
partmask &= 0xffffffffu;
62
affected &= 0xffffffffu;
63
}
64
+ ctx.z_mask = z_mask;
65
66
if (partmask == 0) {
67
tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
68
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
69
break;
70
}
71
72
- /* Some of the folding above can change opc. */
73
- opc = op->opc;
74
- def = &tcg_op_defs[opc];
75
- if (def->flags & TCG_OPF_BB_END) {
76
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
77
- } else {
78
- int nb_oargs = def->nb_oargs;
79
- for (i = 0; i < nb_oargs; i++) {
80
- reset_temp(op->args[i]);
81
- /* Save the corresponding known-zero bits mask for the
82
- first output argument (only one supported so far). */
83
- if (i == 0) {
84
- arg_info(op->args[i])->z_mask = z_mask;
85
- }
86
- }
87
- }
88
+ finish_folding(&ctx, op);
89
90
/* Eliminate duplicate and redundant fence instructions. */
91
if (ctx.prev_mb) {
92
--
28
--
93
2.25.1
29
2.43.0
94
30
95
31
diff view generated by jsdifflib
1
Pull the "op r, a, a => mov r, a" optimization into a function,
1
The armv6 uxt and sxt opcodes have a 2-bit rotate field
2
and use it in the outer opcode fold functions.
2
which supports extractions from ofs = {0,8,16,24}.
3
Special case ofs = 0, len <= 8 as AND.
3
4
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 39 ++++++++++++++++++++++++---------------
8
tcg/arm/tcg-target-has.h | 21 ++++++++++++++--
9
1 file changed, 24 insertions(+), 15 deletions(-)
9
tcg/arm/tcg-target.c.inc | 54 +++++++++++++++++++++++++++++++++++-----
10
2 files changed, 67 insertions(+), 8 deletions(-)
10
11
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
--- a/tcg/arm/tcg-target-has.h
14
+++ b/tcg/optimize.c
15
+++ b/tcg/arm/tcg-target-has.h
15
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
16
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
16
return false;
17
#define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions
17
}
18
#define TCG_TARGET_HAS_ctpop_i32 0
18
19
#define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions
19
+/* If the binary operation has both arguments equal, fold to identity. */
20
-#define TCG_TARGET_HAS_extract_i32 use_armv7_instructions
20
+static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
21
-#define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions
22
+#define TCG_TARGET_HAS_extract_i32 1
23
+#define TCG_TARGET_HAS_sextract_i32 1
24
#define TCG_TARGET_HAS_extract2_i32 1
25
#define TCG_TARGET_HAS_negsetcond_i32 1
26
#define TCG_TARGET_HAS_mulu2_i32 1
27
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
28
#define TCG_TARGET_HAS_cmpsel_vec 0
29
#define TCG_TARGET_HAS_tst_vec 1
30
31
+static inline bool
32
+tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
21
+{
33
+{
22
+ if (args_are_copies(op->args[1], op->args[2])) {
34
+ if (use_armv7_instructions) {
23
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
35
+ return true; /* SBFX or UBFX */
36
+ }
37
+ switch (len) {
38
+ case 8: /* SXTB or UXTB */
39
+ case 16: /* SXTH or UXTH */
40
+ return (ofs % 8) == 0;
24
+ }
41
+ }
25
+ return false;
42
+ return false;
26
+}
43
+}
27
+
44
+
28
/*
45
+#define TCG_TARGET_extract_valid tcg_target_extract_valid
29
* These outermost fold_<op> functions are sorted alphabetically.
46
+#define TCG_TARGET_sextract_valid tcg_target_extract_valid
30
+ *
47
+
31
+ * The ordering of the transformations should be:
48
#endif
32
+ * 1) those that produce a constant
49
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
33
+ * 2) those that produce a copy
50
index XXXXXXX..XXXXXXX 100644
34
+ * 3) those that produce information about the result value.
51
--- a/tcg/arm/tcg-target.c.inc
35
*/
52
+++ b/tcg/arm/tcg-target.c.inc
36
53
@@ -XXX,XX +XXX,XX @@ static void tcg_out_deposit(TCGContext *s, ARMCond cond, TCGReg rd,
37
static bool fold_add(OptContext *ctx, TCGOp *op)
54
static void tcg_out_extract(TCGContext *s, ARMCond cond, TCGReg rd,
38
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
55
TCGReg rn, int ofs, int len)
39
40
static bool fold_and(OptContext *ctx, TCGOp *op)
41
{
56
{
42
- return fold_const2(ctx, op);
57
- /* ubfx */
43
+ if (fold_const2(ctx, op) ||
58
- tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn
44
+ fold_xx_to_x(ctx, op)) {
59
- | (ofs << 7) | ((len - 1) << 16));
45
+ return true;
60
+ /* According to gcc, AND can be faster. */
61
+ if (ofs == 0 && len <= 8) {
62
+ tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn,
63
+ encode_imm_nofail((1 << len) - 1));
64
+ return;
46
+ }
65
+ }
47
+ return false;
66
+
67
+ if (use_armv7_instructions) {
68
+ /* ubfx */
69
+ tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn
70
+ | (ofs << 7) | ((len - 1) << 16));
71
+ return;
72
+ }
73
+
74
+ assert(ofs % 8 == 0);
75
+ switch (len) {
76
+ case 8:
77
+ /* uxtb */
78
+ tcg_out32(s, 0x06ef0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
79
+ break;
80
+ case 16:
81
+ /* uxth */
82
+ tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
83
+ break;
84
+ default:
85
+ g_assert_not_reached();
86
+ }
48
}
87
}
49
88
50
static bool fold_andc(OptContext *ctx, TCGOp *op)
89
static void tcg_out_sextract(TCGContext *s, ARMCond cond, TCGReg rd,
51
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
90
TCGReg rn, int ofs, int len)
52
53
static bool fold_or(OptContext *ctx, TCGOp *op)
54
{
91
{
55
- return fold_const2(ctx, op);
92
- /* sbfx */
56
+ if (fold_const2(ctx, op) ||
93
- tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn
57
+ fold_xx_to_x(ctx, op)) {
94
- | (ofs << 7) | ((len - 1) << 16));
58
+ return true;
95
+ if (use_armv7_instructions) {
96
+ /* sbfx */
97
+ tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn
98
+ | (ofs << 7) | ((len - 1) << 16));
99
+ return;
59
+ }
100
+ }
60
+ return false;
101
+
102
+ assert(ofs % 8 == 0);
103
+ switch (len) {
104
+ case 8:
105
+ /* sxtb */
106
+ tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
107
+ break;
108
+ case 16:
109
+ /* sxth */
110
+ tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
111
+ break;
112
+ default:
113
+ g_assert_not_reached();
114
+ }
61
}
115
}
62
116
63
static bool fold_orc(OptContext *ctx, TCGOp *op)
117
+
64
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
118
static void tcg_out_ld32u(TCGContext *s, ARMCond cond,
65
break;
119
TCGReg rd, TCGReg rn, int32_t offset)
66
}
120
{
67
68
- /* Simplify expression for "op r, a, a => mov r, a" cases */
69
- switch (opc) {
70
- CASE_OP_32_64_VEC(or):
71
- CASE_OP_32_64_VEC(and):
72
- if (args_are_copies(op->args[1], op->args[2])) {
73
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
74
- continue;
75
- }
76
- break;
77
- default:
78
- break;
79
- }
80
-
81
/*
82
* Process each opcode.
83
* Sorted alphabetically by opcode as much as possible.
84
--
121
--
85
2.25.1
122
2.43.0
86
123
87
124
diff view generated by jsdifflib
1
Reduce some code duplication by folding the NE and EQ cases.
1
Accept byte and word extensions with the extract opcodes.
2
This is preparatory to removing the specialized extracts.
2
3
3
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
---
6
tcg/optimize.c | 159 +++++++++++++++++++++++++------------------------
7
tcg/loongarch64/tcg-target-has.h | 15 ++++++++++++--
7
1 file changed, 81 insertions(+), 78 deletions(-)
8
tcg/loongarch64/tcg-target.c.inc | 34 ++++++++++++++++++++++++++++++--
9
2 files changed, 45 insertions(+), 4 deletions(-)
8
10
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h
10
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
13
--- a/tcg/loongarch64/tcg-target-has.h
12
+++ b/tcg/optimize.c
14
+++ b/tcg/loongarch64/tcg-target-has.h
13
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@
14
return fold_const2(ctx, op);
16
#define TCG_TARGET_HAS_rot_i32 1
15
}
17
#define TCG_TARGET_HAS_deposit_i32 1
16
18
#define TCG_TARGET_HAS_extract_i32 1
17
+static bool fold_brcond2(OptContext *ctx, TCGOp *op)
19
-#define TCG_TARGET_HAS_sextract_i32 0
20
+#define TCG_TARGET_HAS_sextract_i32 1
21
#define TCG_TARGET_HAS_extract2_i32 0
22
#define TCG_TARGET_HAS_add2_i32 0
23
#define TCG_TARGET_HAS_sub2_i32 0
24
@@ -XXX,XX +XXX,XX @@
25
#define TCG_TARGET_HAS_rot_i64 1
26
#define TCG_TARGET_HAS_deposit_i64 1
27
#define TCG_TARGET_HAS_extract_i64 1
28
-#define TCG_TARGET_HAS_sextract_i64 0
29
+#define TCG_TARGET_HAS_sextract_i64 1
30
#define TCG_TARGET_HAS_extract2_i64 0
31
#define TCG_TARGET_HAS_extr_i64_i32 1
32
#define TCG_TARGET_HAS_ext8s_i64 1
33
@@ -XXX,XX +XXX,XX @@
34
#define TCG_TARGET_HAS_cmpsel_vec 0
35
#define TCG_TARGET_HAS_tst_vec 0
36
37
+#define TCG_TARGET_extract_valid(type, ofs, len) 1
38
+
39
+static inline bool
40
+tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
18
+{
41
+{
19
+ TCGCond cond = op->args[4];
42
+ if (type == TCG_TYPE_I64 && ofs + len == 32) {
20
+ int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
43
+ return true;
21
+ TCGArg label = op->args[5];
22
+ int inv = 0;
23
+
24
+ if (i >= 0) {
25
+ goto do_brcond_const;
26
+ }
44
+ }
27
+
45
+ return ofs == 0 && (len == 8 || len == 16);
28
+ switch (cond) {
46
+}
29
+ case TCG_COND_LT:
47
+#define TCG_TARGET_sextract_valid tcg_target_sextract_valid
30
+ case TCG_COND_GE:
48
31
+ /*
49
#endif
32
+ * Simplify LT/GE comparisons vs zero to a single compare
50
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
33
+ * vs the high word of the input.
51
index XXXXXXX..XXXXXXX 100644
34
+ */
52
--- a/tcg/loongarch64/tcg-target.c.inc
35
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
53
+++ b/tcg/loongarch64/tcg-target.c.inc
36
+ arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
54
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
37
+ goto do_brcond_high;
55
break;
56
57
case INDEX_op_extract_i32:
58
- tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1);
59
+ if (a2 == 0 && args[3] <= 12) {
60
+ tcg_out_opc_andi(s, a0, a1, (1 << args[3]) - 1);
61
+ } else {
62
+ tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1);
63
+ }
64
break;
65
case INDEX_op_extract_i64:
66
- tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1);
67
+ if (a2 == 0 && args[3] <= 12) {
68
+ tcg_out_opc_andi(s, a0, a1, (1 << args[3]) - 1);
69
+ } else {
70
+ tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1);
38
+ }
71
+ }
39
+ break;
72
+ break;
40
+
73
+
41
+ case TCG_COND_NE:
74
+ case INDEX_op_sextract_i64:
42
+ inv = 1;
75
+ if (a2 + args[3] == 32) {
43
+ QEMU_FALLTHROUGH;
76
+ if (a2 == 0) {
44
+ case TCG_COND_EQ:
77
+ tcg_out_ext32s(s, a0, a1);
45
+ /*
78
+ } else {
46
+ * Simplify EQ/NE comparisons where one of the pairs
79
+ tcg_out_opc_srai_w(s, a0, a1, a2);
47
+ * can be simplified.
80
+ }
48
+ */
49
+ i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
50
+ op->args[2], cond);
51
+ switch (i ^ inv) {
52
+ case 0:
53
+ goto do_brcond_const;
54
+ case 1:
55
+ goto do_brcond_high;
56
+ }
57
+
58
+ i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
59
+ op->args[3], cond);
60
+ switch (i ^ inv) {
61
+ case 0:
62
+ goto do_brcond_const;
63
+ case 1:
64
+ op->opc = INDEX_op_brcond_i32;
65
+ op->args[1] = op->args[2];
66
+ op->args[2] = cond;
67
+ op->args[3] = label;
68
+ break;
81
+ break;
69
+ }
82
+ }
70
+ break;
83
+ /* FALLTHRU */
71
+
84
+ case INDEX_op_sextract_i32:
72
+ default:
85
+ if (a2 == 0 && args[3] == 8) {
73
+ break;
86
+ tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1);
74
+
87
+ } else if (a2 == 0 && args[3] == 16) {
75
+ do_brcond_high:
88
+ tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1);
76
+ op->opc = INDEX_op_brcond_i32;
89
+ } else {
77
+ op->args[0] = op->args[1];
90
+ g_assert_not_reached();
78
+ op->args[1] = op->args[3];
79
+ op->args[2] = cond;
80
+ op->args[3] = label;
81
+ break;
82
+
83
+ do_brcond_const:
84
+ if (i == 0) {
85
+ tcg_op_remove(ctx->tcg, op);
86
+ return true;
87
+ }
91
+ }
88
+ op->opc = INDEX_op_br;
92
break;
89
+ op->args[0] = label;
93
90
+ break;
94
case INDEX_op_deposit_i32:
91
+ }
95
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
92
+ return false;
96
case INDEX_op_not_i64:
93
+}
97
case INDEX_op_extract_i32:
94
+
98
case INDEX_op_extract_i64:
95
static bool fold_call(OptContext *ctx, TCGOp *op)
99
+ case INDEX_op_sextract_i32:
96
{
100
+ case INDEX_op_sextract_i64:
97
TCGContext *s = ctx->tcg;
101
case INDEX_op_bswap16_i32:
98
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
102
case INDEX_op_bswap16_i64:
99
}
103
case INDEX_op_bswap32_i32:
100
break;
101
102
- case INDEX_op_brcond2_i32:
103
- i = do_constant_folding_cond2(&op->args[0], &op->args[2],
104
- op->args[4]);
105
- if (i == 0) {
106
- do_brcond_false:
107
- tcg_op_remove(s, op);
108
- continue;
109
- }
110
- if (i > 0) {
111
- do_brcond_true:
112
- op->opc = opc = INDEX_op_br;
113
- op->args[0] = op->args[5];
114
- break;
115
- }
116
- if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
117
- && arg_is_const(op->args[2])
118
- && arg_info(op->args[2])->val == 0
119
- && arg_is_const(op->args[3])
120
- && arg_info(op->args[3])->val == 0) {
121
- /* Simplify LT/GE comparisons vs zero to a single compare
122
- vs the high word of the input. */
123
- do_brcond_high:
124
- op->opc = opc = INDEX_op_brcond_i32;
125
- op->args[0] = op->args[1];
126
- op->args[1] = op->args[3];
127
- op->args[2] = op->args[4];
128
- op->args[3] = op->args[5];
129
- break;
130
- }
131
- if (op->args[4] == TCG_COND_EQ) {
132
- /* Simplify EQ comparisons where one of the pairs
133
- can be simplified. */
134
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
135
- op->args[0], op->args[2],
136
- TCG_COND_EQ);
137
- if (i == 0) {
138
- goto do_brcond_false;
139
- } else if (i > 0) {
140
- goto do_brcond_high;
141
- }
142
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
143
- op->args[1], op->args[3],
144
- TCG_COND_EQ);
145
- if (i == 0) {
146
- goto do_brcond_false;
147
- } else if (i < 0) {
148
- break;
149
- }
150
- do_brcond_low:
151
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
152
- op->opc = INDEX_op_brcond_i32;
153
- op->args[1] = op->args[2];
154
- op->args[2] = op->args[4];
155
- op->args[3] = op->args[5];
156
- break;
157
- }
158
- if (op->args[4] == TCG_COND_NE) {
159
- /* Simplify NE comparisons where one of the pairs
160
- can be simplified. */
161
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
162
- op->args[0], op->args[2],
163
- TCG_COND_NE);
164
- if (i == 0) {
165
- goto do_brcond_high;
166
- } else if (i > 0) {
167
- goto do_brcond_true;
168
- }
169
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
170
- op->args[1], op->args[3],
171
- TCG_COND_NE);
172
- if (i == 0) {
173
- goto do_brcond_low;
174
- } else if (i > 0) {
175
- goto do_brcond_true;
176
- }
177
- }
178
- break;
179
-
180
default:
181
break;
182
183
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
184
CASE_OP_32_64_VEC(andc):
185
done = fold_andc(&ctx, op);
186
break;
187
+ case INDEX_op_brcond2_i32:
188
+ done = fold_brcond2(&ctx, op);
189
+ break;
190
CASE_OP_32_64(ctpop):
191
done = fold_ctpop(&ctx, op);
192
break;
193
--
104
--
194
2.25.1
105
2.43.0
195
106
196
107
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Accept AND, ext32u, ext32s extensions with the extract opcodes.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
This is preparatory to removing the specialized extracts.
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
6
---
5
tcg/optimize.c | 56 ++++++++++++++++++++++++++++----------------------
7
tcg/mips/tcg-target-has.h | 26 ++++++++++++++++++++++----
6
1 file changed, 31 insertions(+), 25 deletions(-)
8
tcg/mips/tcg-target.c.inc | 33 ++++++++++++++++++++++++++++++---
9
2 files changed, 52 insertions(+), 7 deletions(-)
7
10
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h
9
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
13
--- a/tcg/mips/tcg-target-has.h
11
+++ b/tcg/optimize.c
14
+++ b/tcg/mips/tcg-target-has.h
12
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
13
return true;
16
14
}
17
/* optional instructions detected at runtime */
15
18
#define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions
16
+static bool fold_movcond(OptContext *ctx, TCGOp *op)
19
-#define TCG_TARGET_HAS_extract_i32 use_mips32r2_instructions
20
-#define TCG_TARGET_HAS_sextract_i32 0
21
+#define TCG_TARGET_HAS_extract_i32 1
22
+#define TCG_TARGET_HAS_sextract_i32 1
23
#define TCG_TARGET_HAS_extract2_i32 0
24
#define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions
25
#define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions
26
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
27
#define TCG_TARGET_HAS_bswap32_i64 1
28
#define TCG_TARGET_HAS_bswap64_i64 1
29
#define TCG_TARGET_HAS_deposit_i64 use_mips32r2_instructions
30
-#define TCG_TARGET_HAS_extract_i64 use_mips32r2_instructions
31
-#define TCG_TARGET_HAS_sextract_i64 0
32
+#define TCG_TARGET_HAS_extract_i64 1
33
+#define TCG_TARGET_HAS_sextract_i64 1
34
#define TCG_TARGET_HAS_extract2_i64 0
35
#define TCG_TARGET_HAS_ext8s_i64 use_mips32r2_instructions
36
#define TCG_TARGET_HAS_ext16s_i64 use_mips32r2_instructions
37
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
38
#define TCG_TARGET_HAS_qemu_ldst_i128 0
39
#define TCG_TARGET_HAS_tst 0
40
41
+#define TCG_TARGET_extract_valid(type, ofs, len) use_mips32r2_instructions
42
+
43
+static inline bool
44
+tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
17
+{
45
+{
18
+ TCGOpcode opc = op->opc;
46
+ if (ofs == 0) {
19
+ TCGCond cond = op->args[5];
47
+ switch (len) {
20
+ int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
48
+ case 8:
21
+
49
+ case 16:
22
+ if (i >= 0) {
50
+ return use_mips32r2_instructions;
23
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
51
+ case 32:
24
+ }
52
+ return type == TCG_TYPE_I64;
25
+
26
+ if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
27
+ uint64_t tv = arg_info(op->args[3])->val;
28
+ uint64_t fv = arg_info(op->args[4])->val;
29
+
30
+ opc = (opc == INDEX_op_movcond_i32
31
+ ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
32
+
33
+ if (tv == 1 && fv == 0) {
34
+ op->opc = opc;
35
+ op->args[3] = cond;
36
+ } else if (fv == 1 && tv == 0) {
37
+ op->opc = opc;
38
+ op->args[3] = tcg_invert_cond(cond);
39
+ }
53
+ }
40
+ }
54
+ }
41
+ return false;
55
+ return false;
42
+}
56
+}
57
+#define TCG_TARGET_sextract_valid tcg_target_sextract_valid
43
+
58
+
44
static bool fold_mul(OptContext *ctx, TCGOp *op)
59
#endif
45
{
60
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
46
return fold_const2(ctx, op);
61
index XXXXXXX..XXXXXXX 100644
47
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
--- a/tcg/mips/tcg-target.c.inc
48
}
63
+++ b/tcg/mips/tcg-target.c.inc
49
break;
64
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
50
65
tcg_out_opc_bf64(s, OPC_DINS, OPC_DINSM, OPC_DINSU, a0, a2,
51
- CASE_OP_32_64(movcond):
66
args[3] + args[4] - 1, args[3]);
52
- i = do_constant_folding_cond(opc, op->args[1],
67
break;
53
- op->args[2], op->args[5]);
68
+
54
- if (i >= 0) {
69
case INDEX_op_extract_i32:
55
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
70
- tcg_out_opc_bf(s, OPC_EXT, a0, a1, args[3] - 1, a2);
56
- continue;
71
+ if (a2 == 0 && args[3] <= 16) {
57
- }
72
+ tcg_out_opc_imm(s, OPC_ANDI, a0, a1, (1 << args[3]) - 1);
58
- if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
73
+ } else {
59
- uint64_t tv = arg_info(op->args[3])->val;
74
+ tcg_out_opc_bf(s, OPC_EXT, a0, a1, args[3] - 1, a2);
60
- uint64_t fv = arg_info(op->args[4])->val;
75
+ }
61
- TCGCond cond = op->args[5];
76
break;
62
-
77
case INDEX_op_extract_i64:
63
- if (fv == 1 && tv == 0) {
78
- tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU, a0, a1,
64
- cond = tcg_invert_cond(cond);
79
- args[3] - 1, a2);
65
- } else if (!(tv == 1 && fv == 0)) {
80
+ if (a2 == 0 && args[3] <= 16) {
66
- break;
81
+ tcg_out_opc_imm(s, OPC_ANDI, a0, a1, (1 << args[3]) - 1);
67
- }
82
+ } else {
68
- op->args[3] = cond;
83
+ tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU,
69
- op->opc = opc = (opc == INDEX_op_movcond_i32
84
+ a0, a1, args[3] - 1, a2);
70
- ? INDEX_op_setcond_i32
85
+ }
71
- : INDEX_op_setcond_i64);
86
+ break;
72
- }
87
+
73
- break;
88
+ case INDEX_op_sextract_i64:
74
-
89
+ if (a2 == 0 && args[3] == 32) {
75
-
90
+ tcg_out_ext32s(s, a0, a1);
76
default:
77
break;
78
79
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
80
case INDEX_op_mb:
81
done = fold_mb(&ctx, op);
82
break;
83
+ CASE_OP_32_64(movcond):
84
+ done = fold_movcond(&ctx, op);
85
+ break;
91
+ break;
86
CASE_OP_32_64(mul):
92
+ }
87
done = fold_mul(&ctx, op);
93
+ /* FALLTHRU */
88
break;
94
+ case INDEX_op_sextract_i32:
95
+ if (a2 == 0 && args[3] == 8) {
96
+ tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1);
97
+ } else if (a2 == 0 && args[3] == 16) {
98
+ tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1);
99
+ } else {
100
+ g_assert_not_reached();
101
+ }
102
break;
103
104
case INDEX_op_brcond_i32:
105
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
106
case INDEX_op_ext8s_i32:
107
case INDEX_op_ext16s_i32:
108
case INDEX_op_extract_i32:
109
+ case INDEX_op_sextract_i32:
110
case INDEX_op_ld8u_i64:
111
case INDEX_op_ld8s_i64:
112
case INDEX_op_ld16u_i64:
113
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
114
case INDEX_op_extrl_i64_i32:
115
case INDEX_op_extrh_i64_i32:
116
case INDEX_op_extract_i64:
117
+ case INDEX_op_sextract_i64:
118
return C_O1_I1(r, r);
119
120
case INDEX_op_st8_i32:
89
--
121
--
90
2.25.1
122
2.43.0
91
123
92
124
diff view generated by jsdifflib
1
Reduce some code duplication by folding the NE and EQ cases.
1
Accept byte and word extensions with the extract opcodes.
2
This is preparatory to removing the specialized extracts.
2
3
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
tcg/optimize.c | 145 ++++++++++++++++++++++++-------------------------
7
tcg/ppc/tcg-target-has.h | 16 ++++++++++++++--
8
1 file changed, 72 insertions(+), 73 deletions(-)
8
tcg/ppc/tcg-target.c.inc | 30 ++++++++++++++++++++++++++++++
9
2 files changed, 44 insertions(+), 2 deletions(-)
9
10
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
--- a/tcg/ppc/tcg-target-has.h
13
+++ b/tcg/optimize.c
14
+++ b/tcg/ppc/tcg-target-has.h
14
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@
15
return fold_const2(ctx, op);
16
#define TCG_TARGET_HAS_ctpop_i32 have_isa_2_06
16
}
17
#define TCG_TARGET_HAS_deposit_i32 1
17
18
#define TCG_TARGET_HAS_extract_i32 1
18
+static bool fold_setcond2(OptContext *ctx, TCGOp *op)
19
-#define TCG_TARGET_HAS_sextract_i32 0
20
+#define TCG_TARGET_HAS_sextract_i32 1
21
#define TCG_TARGET_HAS_extract2_i32 0
22
#define TCG_TARGET_HAS_negsetcond_i32 1
23
#define TCG_TARGET_HAS_mulu2_i32 0
24
@@ -XXX,XX +XXX,XX @@
25
#define TCG_TARGET_HAS_ctpop_i64 have_isa_2_06
26
#define TCG_TARGET_HAS_deposit_i64 1
27
#define TCG_TARGET_HAS_extract_i64 1
28
-#define TCG_TARGET_HAS_sextract_i64 0
29
+#define TCG_TARGET_HAS_sextract_i64 1
30
#define TCG_TARGET_HAS_extract2_i64 0
31
#define TCG_TARGET_HAS_negsetcond_i64 1
32
#define TCG_TARGET_HAS_add2_i64 1
33
@@ -XXX,XX +XXX,XX @@
34
#define TCG_TARGET_HAS_cmpsel_vec 1
35
#define TCG_TARGET_HAS_tst_vec 0
36
37
+#define TCG_TARGET_extract_valid(type, ofs, len) 1
38
+
39
+static inline bool
40
+tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
19
+{
41
+{
20
+ TCGCond cond = op->args[5];
42
+ if (type == TCG_TYPE_I64 && ofs + len == 32) {
21
+ int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
43
+ return true;
22
+ int inv = 0;
44
+ }
45
+ return ofs == 0 && (len == 8 || len == 16);
46
+}
47
+#define TCG_TARGET_sextract_valid tcg_target_sextract_valid
23
+
48
+
24
+ if (i >= 0) {
49
#endif
25
+ goto do_setcond_const;
50
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
26
+ }
51
index XXXXXXX..XXXXXXX 100644
27
+
52
--- a/tcg/ppc/tcg-target.c.inc
28
+ switch (cond) {
53
+++ b/tcg/ppc/tcg-target.c.inc
29
+ case TCG_COND_LT:
54
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
30
+ case TCG_COND_GE:
55
break;
31
+ /*
56
32
+ * Simplify LT/GE comparisons vs zero to a single compare
57
case INDEX_op_extract_i32:
33
+ * vs the high word of the input.
58
+ if (args[2] == 0 && args[3] <= 16) {
34
+ */
59
+ tcg_out32(s, ANDI | SAI(args[1], args[0], (1 << args[3]) - 1));
35
+ if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
60
+ break;
36
+ arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
61
+ }
37
+ goto do_setcond_high;
62
tcg_out_rlw(s, RLWINM, args[0], args[1],
63
32 - args[2], 32 - args[3], 31);
64
break;
65
case INDEX_op_extract_i64:
66
+ if (args[2] == 0 && args[3] <= 16) {
67
+ tcg_out32(s, ANDI | SAI(args[1], args[0], (1 << args[3]) - 1));
68
+ break;
69
+ }
70
tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
71
break;
72
73
+ case INDEX_op_sextract_i64:
74
+ if (args[2] + args[3] == 32) {
75
+ if (args[2] == 0) {
76
+ tcg_out_ext32s(s, args[0], args[1]);
77
+ } else {
78
+ tcg_out_sari32(s, args[0], args[1], args[2]);
79
+ }
80
+ break;
81
+ }
82
+ /* FALLTHRU */
83
+ case INDEX_op_sextract_i32:
84
+ if (args[2] == 0 && args[3] == 8) {
85
+ tcg_out_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
86
+ } else if (args[2] == 0 && args[3] == 16) {
87
+ tcg_out_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
88
+ } else {
89
+ g_assert_not_reached();
38
+ }
90
+ }
39
+ break;
91
+ break;
40
+
92
+
41
+ case TCG_COND_NE:
93
case INDEX_op_movcond_i32:
42
+ inv = 1;
94
tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
43
+ QEMU_FALLTHROUGH;
95
args[3], args[4], const_args[2]);
44
+ case TCG_COND_EQ:
96
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
45
+ /*
97
case INDEX_op_bswap16_i32:
46
+ * Simplify EQ/NE comparisons where one of the pairs
98
case INDEX_op_bswap32_i32:
47
+ * can be simplified.
99
case INDEX_op_extract_i32:
48
+ */
100
+ case INDEX_op_sextract_i32:
49
+ i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
101
case INDEX_op_ld8u_i64:
50
+ op->args[3], cond);
102
case INDEX_op_ld8s_i64:
51
+ switch (i ^ inv) {
103
case INDEX_op_ld16u_i64:
52
+ case 0:
104
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
53
+ goto do_setcond_const;
105
case INDEX_op_bswap32_i64:
54
+ case 1:
106
case INDEX_op_bswap64_i64:
55
+ goto do_setcond_high;
107
case INDEX_op_extract_i64:
56
+ }
108
+ case INDEX_op_sextract_i64:
57
+
109
return C_O1_I1(r, r);
58
+ i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
110
59
+ op->args[4], cond);
111
case INDEX_op_st8_i32:
60
+ switch (i ^ inv) {
61
+ case 0:
62
+ goto do_setcond_const;
63
+ case 1:
64
+ op->args[2] = op->args[3];
65
+ op->args[3] = cond;
66
+ op->opc = INDEX_op_setcond_i32;
67
+ break;
68
+ }
69
+ break;
70
+
71
+ default:
72
+ break;
73
+
74
+ do_setcond_high:
75
+ op->args[1] = op->args[2];
76
+ op->args[2] = op->args[4];
77
+ op->args[3] = cond;
78
+ op->opc = INDEX_op_setcond_i32;
79
+ break;
80
+ }
81
+ return false;
82
+
83
+ do_setcond_const:
84
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
85
+}
86
+
87
static bool fold_shift(OptContext *ctx, TCGOp *op)
88
{
89
return fold_const2(ctx, op);
90
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
91
}
92
break;
93
94
- case INDEX_op_setcond2_i32:
95
- i = do_constant_folding_cond2(&op->args[1], &op->args[3],
96
- op->args[5]);
97
- if (i >= 0) {
98
- do_setcond_const:
99
- tcg_opt_gen_movi(&ctx, op, op->args[0], i);
100
- continue;
101
- }
102
- if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
103
- && arg_is_const(op->args[3])
104
- && arg_info(op->args[3])->val == 0
105
- && arg_is_const(op->args[4])
106
- && arg_info(op->args[4])->val == 0) {
107
- /* Simplify LT/GE comparisons vs zero to a single compare
108
- vs the high word of the input. */
109
- do_setcond_high:
110
- reset_temp(op->args[0]);
111
- arg_info(op->args[0])->z_mask = 1;
112
- op->opc = INDEX_op_setcond_i32;
113
- op->args[1] = op->args[2];
114
- op->args[2] = op->args[4];
115
- op->args[3] = op->args[5];
116
- break;
117
- }
118
- if (op->args[5] == TCG_COND_EQ) {
119
- /* Simplify EQ comparisons where one of the pairs
120
- can be simplified. */
121
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
122
- op->args[1], op->args[3],
123
- TCG_COND_EQ);
124
- if (i == 0) {
125
- goto do_setcond_const;
126
- } else if (i > 0) {
127
- goto do_setcond_high;
128
- }
129
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
130
- op->args[2], op->args[4],
131
- TCG_COND_EQ);
132
- if (i == 0) {
133
- goto do_setcond_high;
134
- } else if (i < 0) {
135
- break;
136
- }
137
- do_setcond_low:
138
- reset_temp(op->args[0]);
139
- arg_info(op->args[0])->z_mask = 1;
140
- op->opc = INDEX_op_setcond_i32;
141
- op->args[2] = op->args[3];
142
- op->args[3] = op->args[5];
143
- break;
144
- }
145
- if (op->args[5] == TCG_COND_NE) {
146
- /* Simplify NE comparisons where one of the pairs
147
- can be simplified. */
148
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
149
- op->args[1], op->args[3],
150
- TCG_COND_NE);
151
- if (i == 0) {
152
- goto do_setcond_high;
153
- } else if (i > 0) {
154
- goto do_setcond_const;
155
- }
156
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
157
- op->args[2], op->args[4],
158
- TCG_COND_NE);
159
- if (i == 0) {
160
- goto do_setcond_low;
161
- } else if (i > 0) {
162
- goto do_setcond_const;
163
- }
164
- }
165
- break;
166
-
167
default:
168
break;
169
170
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
171
CASE_OP_32_64(shr):
172
done = fold_shift(&ctx, op);
173
break;
174
+ case INDEX_op_setcond2_i32:
175
+ done = fold_setcond2(&ctx, op);
176
+ break;
177
CASE_OP_32_64_VEC(sub):
178
done = fold_sub(&ctx, op);
179
break;
180
--
112
--
181
2.25.1
113
2.43.0
182
114
183
115
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Accept byte and word extensions with the extract opcodes.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
This is preparatory to removing the specialized extracts.
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
6
---
5
tcg/optimize.c | 53 +++++++++++++++++++++++++++++---------------------
7
tcg/riscv/tcg-target-has.h | 39 ++++++++++++++++++++++++++++++++++----
6
1 file changed, 31 insertions(+), 22 deletions(-)
8
tcg/riscv/tcg-target.c.inc | 34 +++++++++++++++++++++++++++++++++
9
2 files changed, 69 insertions(+), 4 deletions(-)
7
10
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
9
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
13
--- a/tcg/riscv/tcg-target-has.h
11
+++ b/tcg/optimize.c
14
+++ b/tcg/riscv/tcg-target-has.h
12
@@ -XXX,XX +XXX,XX @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@
13
return fold_const2(ctx, op);
16
#define TCG_TARGET_HAS_div2_i32 0
14
}
17
#define TCG_TARGET_HAS_rot_i32 (cpuinfo & CPUINFO_ZBB)
15
18
#define TCG_TARGET_HAS_deposit_i32 0
16
+static bool fold_dup(OptContext *ctx, TCGOp *op)
19
-#define TCG_TARGET_HAS_extract_i32 0
20
-#define TCG_TARGET_HAS_sextract_i32 0
21
+#define TCG_TARGET_HAS_extract_i32 1
22
+#define TCG_TARGET_HAS_sextract_i32 1
23
#define TCG_TARGET_HAS_extract2_i32 0
24
#define TCG_TARGET_HAS_add2_i32 1
25
#define TCG_TARGET_HAS_sub2_i32 1
26
@@ -XXX,XX +XXX,XX @@
27
#define TCG_TARGET_HAS_div2_i64 0
28
#define TCG_TARGET_HAS_rot_i64 (cpuinfo & CPUINFO_ZBB)
29
#define TCG_TARGET_HAS_deposit_i64 0
30
-#define TCG_TARGET_HAS_extract_i64 0
31
-#define TCG_TARGET_HAS_sextract_i64 0
32
+#define TCG_TARGET_HAS_extract_i64 1
33
+#define TCG_TARGET_HAS_sextract_i64 1
34
#define TCG_TARGET_HAS_extract2_i64 0
35
#define TCG_TARGET_HAS_extr_i64_i32 1
36
#define TCG_TARGET_HAS_ext8s_i64 1
37
@@ -XXX,XX +XXX,XX @@
38
39
#define TCG_TARGET_HAS_tst_vec 0
40
41
+static inline bool
42
+tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
17
+{
43
+{
18
+ if (arg_is_const(op->args[1])) {
44
+ if (ofs == 0) {
19
+ uint64_t t = arg_info(op->args[1])->val;
45
+ switch (len) {
20
+ t = dup_const(TCGOP_VECE(op), t);
46
+ case 16:
21
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
47
+ return cpuinfo & CPUINFO_ZBB;
48
+ case 32:
49
+ return (cpuinfo & CPUINFO_ZBA) && type == TCG_TYPE_I64;
50
+ }
22
+ }
51
+ }
23
+ return false;
52
+ return false;
24
+}
53
+}
54
+#define TCG_TARGET_extract_valid tcg_target_extract_valid
25
+
55
+
26
+static bool fold_dup2(OptContext *ctx, TCGOp *op)
56
+static inline bool
57
+tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
27
+{
58
+{
28
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
59
+ if (ofs == 0) {
29
+ uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
60
+ switch (len) {
30
+ arg_info(op->args[2])->val);
61
+ case 8:
31
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
62
+ case 16:
32
+ }
63
+ return cpuinfo & CPUINFO_ZBB;
33
+
64
+ case 32:
34
+ if (args_are_copies(op->args[1], op->args[2])) {
65
+ return type == TCG_TYPE_I64;
35
+ op->opc = INDEX_op_dup_vec;
66
+ }
36
+ TCGOP_VECE(op) = MO_32;
37
+ }
67
+ }
38
+ return false;
68
+ return false;
39
+}
69
+}
70
+#define TCG_TARGET_sextract_valid tcg_target_sextract_valid
40
+
71
+
41
static bool fold_eqv(OptContext *ctx, TCGOp *op)
72
#endif
42
{
73
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
43
return fold_const2(ctx, op);
74
index XXXXXXX..XXXXXXX 100644
44
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
75
--- a/tcg/riscv/tcg-target.c.inc
45
done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
76
+++ b/tcg/riscv/tcg-target.c.inc
46
break;
77
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
47
78
tcg_out_mb(s, a0);
48
- case INDEX_op_dup_vec:
79
break;
49
- if (arg_is_const(op->args[1])) {
80
50
- tmp = arg_info(op->args[1])->val;
81
+ case INDEX_op_extract_i64:
51
- tmp = dup_const(TCGOP_VECE(op), tmp);
82
+ if (a2 == 0 && args[3] == 32) {
52
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
83
+ tcg_out_ext32u(s, a0, a1);
53
- continue;
54
- }
55
- break;
56
-
57
- case INDEX_op_dup2_vec:
58
- assert(TCG_TARGET_REG_BITS == 32);
59
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
60
- tcg_opt_gen_movi(&ctx, op, op->args[0],
61
- deposit64(arg_info(op->args[1])->val, 32, 32,
62
- arg_info(op->args[2])->val));
63
- continue;
64
- } else if (args_are_copies(op->args[1], op->args[2])) {
65
- op->opc = INDEX_op_dup_vec;
66
- TCGOP_VECE(op) = MO_32;
67
- }
68
- break;
69
-
70
default:
71
break;
72
73
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
74
CASE_OP_32_64(divu):
75
done = fold_divide(&ctx, op);
76
break;
77
+ case INDEX_op_dup_vec:
78
+ done = fold_dup(&ctx, op);
79
+ break;
84
+ break;
80
+ case INDEX_op_dup2_vec:
85
+ }
81
+ done = fold_dup2(&ctx, op);
86
+ /* FALLTHRU */
87
+ case INDEX_op_extract_i32:
88
+ if (a2 == 0 && args[3] == 16) {
89
+ tcg_out_ext16u(s, a0, a1);
90
+ } else {
91
+ g_assert_not_reached();
92
+ }
93
+ break;
94
+
95
+ case INDEX_op_sextract_i64:
96
+ if (a2 == 0 && args[3] == 32) {
97
+ tcg_out_ext32s(s, a0, a1);
82
+ break;
98
+ break;
83
CASE_OP_32_64(eqv):
99
+ }
84
done = fold_eqv(&ctx, op);
100
+ /* FALLTHRU */
85
break;
101
+ case INDEX_op_sextract_i32:
102
+ if (a2 == 0 && args[3] == 8) {
103
+ tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1);
104
+ } else if (a2 == 0 && args[3] == 16) {
105
+ tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1);
106
+ } else {
107
+ g_assert_not_reached();
108
+ }
109
+ break;
110
+
111
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
112
case INDEX_op_mov_i64:
113
case INDEX_op_call: /* Always emitted via tcg_out_call. */
114
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
115
case INDEX_op_extrl_i64_i32:
116
case INDEX_op_extrh_i64_i32:
117
case INDEX_op_ext_i32_i64:
118
+ case INDEX_op_extract_i32:
119
+ case INDEX_op_extract_i64:
120
+ case INDEX_op_sextract_i32:
121
+ case INDEX_op_sextract_i64:
122
case INDEX_op_bswap16_i32:
123
case INDEX_op_bswap32_i32:
124
case INDEX_op_bswap16_i64:
86
--
125
--
87
2.25.1
126
2.43.0
88
127
89
128
diff view generated by jsdifflib
1
This will allow callers to tail call to these functions
1
Extracts which abut bit 32 may use 32-bit shifts.
2
and return true indicating processing complete.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
tcg/optimize.c | 9 +++++----
6
tcg/riscv/tcg-target-has.h | 24 +++++++-----------------
10
1 file changed, 5 insertions(+), 4 deletions(-)
7
tcg/riscv/tcg-target.c.inc | 16 ++++++++++++----
8
2 files changed, 19 insertions(+), 21 deletions(-)
11
9
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
12
--- a/tcg/riscv/tcg-target-has.h
15
+++ b/tcg/optimize.c
13
+++ b/tcg/riscv/tcg-target-has.h
16
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
14
@@ -XXX,XX +XXX,XX @@
17
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
15
static inline bool
16
tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
17
{
18
- if (ofs == 0) {
19
- switch (len) {
20
- case 16:
21
- return cpuinfo & CPUINFO_ZBB;
22
- case 32:
23
- return (cpuinfo & CPUINFO_ZBA) && type == TCG_TYPE_I64;
24
- }
25
+ if (type == TCG_TYPE_I64 && ofs + len == 32) {
26
+ /* ofs > 0 uses SRLIW; ofs == 0 uses add.uw. */
27
+ return ofs || (cpuinfo & CPUINFO_ZBA);
28
}
29
- return false;
30
+ return (cpuinfo & CPUINFO_ZBB) && ofs == 0 && len == 16;
18
}
31
}
19
32
#define TCG_TARGET_extract_valid tcg_target_extract_valid
20
-static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
33
21
+static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
34
static inline bool
35
tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
22
{
36
{
23
TCGTemp *dst_ts = arg_temp(dst);
37
- if (ofs == 0) {
24
TCGTemp *src_ts = arg_temp(src);
38
- switch (len) {
25
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
39
- case 8:
26
40
- case 16:
27
if (ts_are_copies(dst_ts, src_ts)) {
41
- return cpuinfo & CPUINFO_ZBB;
28
tcg_op_remove(ctx->tcg, op);
42
- case 32:
29
- return;
43
- return type == TCG_TYPE_I64;
44
- }
45
+ if (type == TCG_TYPE_I64 && ofs + len == 32) {
30
+ return true;
46
+ return true;
31
}
47
}
32
48
- return false;
33
reset_ts(dst_ts);
49
+ return (cpuinfo & CPUINFO_ZBB) && ofs == 0 && (len == 8 || len == 16);
34
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
35
di->is_const = si->is_const;
36
di->val = si->val;
37
}
38
+ return true;
39
}
50
}
40
51
#define TCG_TARGET_sextract_valid tcg_target_sextract_valid
41
-static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
52
42
+static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
53
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
43
TCGArg dst, uint64_t val)
54
index XXXXXXX..XXXXXXX 100644
44
{
55
--- a/tcg/riscv/tcg-target.c.inc
45
const TCGOpDef *def = &tcg_op_defs[op->opc];
56
+++ b/tcg/riscv/tcg-target.c.inc
46
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
57
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
47
/* Convert movi to mov with constant temp. */
58
break;
48
tv = tcg_constant_internal(type, val);
59
49
init_ts_info(ctx, tv);
60
case INDEX_op_extract_i64:
50
- tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
61
- if (a2 == 0 && args[3] == 32) {
51
+ return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
62
- tcg_out_ext32u(s, a0, a1);
52
}
63
+ if (a2 + args[3] == 32) {
53
64
+ if (a2 == 0) {
54
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
65
+ tcg_out_ext32u(s, a0, a1);
66
+ } else {
67
+ tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2);
68
+ }
69
break;
70
}
71
/* FALLTHRU */
72
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
73
break;
74
75
case INDEX_op_sextract_i64:
76
- if (a2 == 0 && args[3] == 32) {
77
- tcg_out_ext32s(s, a0, a1);
78
+ if (a2 + args[3] == 32) {
79
+ if (a2 == 0) {
80
+ tcg_out_ext32s(s, a0, a1);
81
+ } else {
82
+ tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2);
83
+ }
84
break;
85
}
86
/* FALLTHRU */
55
--
87
--
56
2.25.1
88
2.43.0
57
89
58
90
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Accept byte and word extensions with the extract opcodes.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
This is preparatory to removing the specialized extracts.
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
6
---
5
tcg/optimize.c | 48 ++++++++++++++++++++++++++++++------------------
7
tcg/s390x/tcg-target-has.h | 22 ++++++++++++++++++++--
6
1 file changed, 30 insertions(+), 18 deletions(-)
8
tcg/s390x/tcg-target.c.inc | 37 +++++++++++++++++++++++++++++++++++++
9
2 files changed, 57 insertions(+), 2 deletions(-)
7
10
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h
9
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
13
--- a/tcg/s390x/tcg-target-has.h
11
+++ b/tcg/optimize.c
14
+++ b/tcg/s390x/tcg-target-has.h
12
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
13
return fold_const2(ctx, op);
16
#define TCG_TARGET_HAS_ctpop_i32 1
14
}
17
#define TCG_TARGET_HAS_deposit_i32 1
15
18
#define TCG_TARGET_HAS_extract_i32 1
16
+static bool fold_extract(OptContext *ctx, TCGOp *op)
19
-#define TCG_TARGET_HAS_sextract_i32 0
20
+#define TCG_TARGET_HAS_sextract_i32 1
21
#define TCG_TARGET_HAS_extract2_i32 0
22
#define TCG_TARGET_HAS_negsetcond_i32 1
23
#define TCG_TARGET_HAS_add2_i32 1
24
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
25
#define TCG_TARGET_HAS_ctpop_i64 1
26
#define TCG_TARGET_HAS_deposit_i64 1
27
#define TCG_TARGET_HAS_extract_i64 1
28
-#define TCG_TARGET_HAS_sextract_i64 0
29
+#define TCG_TARGET_HAS_sextract_i64 1
30
#define TCG_TARGET_HAS_extract2_i64 0
31
#define TCG_TARGET_HAS_negsetcond_i64 1
32
#define TCG_TARGET_HAS_add2_i64 1
33
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
34
#define TCG_TARGET_HAS_cmpsel_vec 1
35
#define TCG_TARGET_HAS_tst_vec 0
36
37
+#define TCG_TARGET_extract_valid(type, ofs, len) 1
38
+
39
+static inline bool
40
+tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
17
+{
41
+{
18
+ if (arg_is_const(op->args[1])) {
42
+ if (ofs == 0) {
19
+ uint64_t t;
43
+ switch (len) {
20
+
44
+ case 8:
21
+ t = arg_info(op->args[1])->val;
45
+ case 16:
22
+ t = extract64(t, op->args[2], op->args[3]);
46
+ return true;
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
47
+ case 32:
48
+ return type == TCG_TYPE_I64;
49
+ }
24
+ }
50
+ }
25
+ return false;
51
+ return false;
26
+}
52
+}
53
+#define TCG_TARGET_sextract_valid tcg_target_sextract_valid
27
+
54
+
28
static bool fold_extract2(OptContext *ctx, TCGOp *op)
55
#endif
56
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
57
index XXXXXXX..XXXXXXX 100644
58
--- a/tcg/s390x/tcg-target.c.inc
59
+++ b/tcg/s390x/tcg-target.c.inc
60
@@ -XXX,XX +XXX,XX @@ static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
61
static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
62
int ofs, int len)
29
{
63
{
30
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
64
+ if (ofs == 0) {
31
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
65
+ switch (len) {
32
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
66
+ case 8:
67
+ tcg_out_ext8u(s, dest, src);
68
+ return;
69
+ case 16:
70
+ tcg_out_ext16u(s, dest, src);
71
+ return;
72
+ case 32:
73
+ tcg_out_ext32u(s, dest, src);
74
+ return;
75
+ }
76
+ }
77
tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
33
}
78
}
34
79
35
+static bool fold_sextract(OptContext *ctx, TCGOp *op)
80
+static void tgen_sextract(TCGContext *s, TCGReg dest, TCGReg src,
81
+ int ofs, int len)
36
+{
82
+{
37
+ if (arg_is_const(op->args[1])) {
83
+ if (ofs == 0) {
38
+ uint64_t t;
84
+ switch (len) {
39
+
85
+ case 8:
40
+ t = arg_info(op->args[1])->val;
86
+ tcg_out_ext8s(s, TCG_TYPE_REG, dest, src);
41
+ t = sextract64(t, op->args[2], op->args[3]);
87
+ return;
42
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
88
+ case 16:
89
+ tcg_out_ext16s(s, TCG_TYPE_REG, dest, src);
90
+ return;
91
+ case 32:
92
+ tcg_out_ext32s(s, dest, src);
93
+ return;
94
+ }
43
+ }
95
+ }
44
+ return false;
96
+ g_assert_not_reached();
45
+}
97
+}
46
+
98
+
47
static bool fold_shift(OptContext *ctx, TCGOp *op)
99
static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
48
{
100
{
49
return fold_const2(ctx, op);
101
ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
50
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
102
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
51
}
103
OP_32_64(extract):
52
break;
104
tgen_extract(s, args[0], args[1], args[2], args[3]);
53
105
break;
54
- CASE_OP_32_64(extract):
106
+ OP_32_64(sextract):
55
- if (arg_is_const(op->args[1])) {
107
+ tgen_sextract(s, args[0], args[1], args[2], args[3]);
56
- tmp = extract64(arg_info(op->args[1])->val,
108
+ break;
57
- op->args[2], op->args[3]);
109
58
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
110
case INDEX_op_clz_i64:
59
- continue;
111
tgen_clz(s, args[0], args[1], args[2], const_args[2]);
60
- }
112
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
61
- break;
113
case INDEX_op_extu_i32_i64:
62
-
114
case INDEX_op_extract_i32:
63
- CASE_OP_32_64(sextract):
115
case INDEX_op_extract_i64:
64
- if (arg_is_const(op->args[1])) {
116
+ case INDEX_op_sextract_i32:
65
- tmp = sextract64(arg_info(op->args[1])->val,
117
+ case INDEX_op_sextract_i64:
66
- op->args[2], op->args[3]);
118
case INDEX_op_ctpop_i32:
67
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
119
case INDEX_op_ctpop_i64:
68
- continue;
120
return C_O1_I1(r, r);
69
- }
70
- break;
71
-
72
default:
73
break;
74
75
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
76
CASE_OP_32_64(eqv):
77
done = fold_eqv(&ctx, op);
78
break;
79
+ CASE_OP_32_64(extract):
80
+ done = fold_extract(&ctx, op);
81
+ break;
82
CASE_OP_32_64(extract2):
83
done = fold_extract2(&ctx, op);
84
break;
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
86
case INDEX_op_setcond2_i32:
87
done = fold_setcond2(&ctx, op);
88
break;
89
+ CASE_OP_32_64(sextract):
90
+ done = fold_sextract(&ctx, op);
91
+ break;
92
CASE_OP_32_64_VEC(sub):
93
done = fold_sub(&ctx, op);
94
break;
95
--
121
--
96
2.25.1
122
2.43.0
97
123
98
124
diff view generated by jsdifflib
1
Continue splitting tcg_optimize.
1
Extracts which abut bit 32 may use 32-bit shifts.
2
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
tcg/optimize.c | 22 ++++++++++++++--------
6
tcg/sparc64/tcg-target-has.h | 13 +++++++++----
9
1 file changed, 14 insertions(+), 8 deletions(-)
7
tcg/sparc64/tcg-target.c.inc | 11 +++++++++++
8
2 files changed, 20 insertions(+), 4 deletions(-)
10
9
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
12
--- a/tcg/sparc64/tcg-target-has.h
14
+++ b/tcg/optimize.c
13
+++ b/tcg/sparc64/tcg-target-has.h
15
@@ -XXX,XX +XXX,XX @@ static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
14
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
16
}
15
#define TCG_TARGET_HAS_ctz_i32 0
17
}
16
#define TCG_TARGET_HAS_ctpop_i32 0
18
17
#define TCG_TARGET_HAS_deposit_i32 0
19
+static void copy_propagate(OptContext *ctx, TCGOp *op,
18
-#define TCG_TARGET_HAS_extract_i32 0
20
+ int nb_oargs, int nb_iargs)
19
-#define TCG_TARGET_HAS_sextract_i32 0
21
+{
20
+#define TCG_TARGET_HAS_extract_i32 1
22
+ TCGContext *s = ctx->tcg;
21
+#define TCG_TARGET_HAS_sextract_i32 1
22
#define TCG_TARGET_HAS_extract2_i32 0
23
#define TCG_TARGET_HAS_negsetcond_i32 1
24
#define TCG_TARGET_HAS_add2_i32 1
25
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
26
#define TCG_TARGET_HAS_ctz_i64 0
27
#define TCG_TARGET_HAS_ctpop_i64 0
28
#define TCG_TARGET_HAS_deposit_i64 0
29
-#define TCG_TARGET_HAS_extract_i64 0
30
-#define TCG_TARGET_HAS_sextract_i64 0
31
+#define TCG_TARGET_HAS_extract_i64 1
32
+#define TCG_TARGET_HAS_sextract_i64 1
33
#define TCG_TARGET_HAS_extract2_i64 0
34
#define TCG_TARGET_HAS_negsetcond_i64 1
35
#define TCG_TARGET_HAS_add2_i64 1
36
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
37
38
#define TCG_TARGET_HAS_tst 1
39
40
+#define TCG_TARGET_extract_valid(type, ofs, len) \
41
+ ((type) == TCG_TYPE_I64 && (ofs) + (len) == 32)
23
+
42
+
24
+ for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
43
+#define TCG_TARGET_sextract_valid TCG_TARGET_extract_valid
25
+ TCGTemp *ts = arg_temp(op->args[i]);
26
+ if (ts && ts_is_copy(ts)) {
27
+ op->args[i] = temp_arg(find_better_copy(s, ts));
28
+ }
29
+ }
30
+}
31
+
44
+
32
/* Propagate constants and copies, fold constant expressions. */
45
#endif
33
void tcg_optimize(TCGContext *s)
46
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
34
{
47
index XXXXXXX..XXXXXXX 100644
35
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
48
--- a/tcg/sparc64/tcg-target.c.inc
36
nb_iargs = def->nb_iargs;
49
+++ b/tcg/sparc64/tcg-target.c.inc
37
}
50
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
38
init_arguments(&ctx, op, nb_oargs + nb_iargs);
51
tcg_out_mb(s, a0);
39
-
52
break;
40
- /* Do copy propagation */
53
41
- for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
54
+ case INDEX_op_extract_i64:
42
- TCGTemp *ts = arg_temp(op->args[i]);
55
+ tcg_debug_assert(a2 + args[3] == 32);
43
- if (ts && ts_is_copy(ts)) {
56
+ tcg_out_arithi(s, a0, a1, a2, SHIFT_SRL);
44
- op->args[i] = temp_arg(find_better_copy(s, ts));
57
+ break;
45
- }
58
+ case INDEX_op_sextract_i64:
46
- }
59
+ tcg_debug_assert(a2 + args[3] == 32);
47
+ copy_propagate(&ctx, op, nb_oargs, nb_iargs);
60
+ tcg_out_arithi(s, a0, a1, a2, SHIFT_SRA);
48
61
+ break;
49
/* For commutative operations make constant second argument */
62
+
50
switch (opc) {
63
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
64
case INDEX_op_mov_i64:
65
case INDEX_op_call: /* Always emitted via tcg_out_call. */
66
@@ -XXX,XX +XXX,XX @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
67
case INDEX_op_ext32u_i64:
68
case INDEX_op_ext_i32_i64:
69
case INDEX_op_extu_i32_i64:
70
+ case INDEX_op_extract_i64:
71
+ case INDEX_op_sextract_i64:
72
case INDEX_op_qemu_ld_a32_i32:
73
case INDEX_op_qemu_ld_a64_i32:
74
case INDEX_op_qemu_ld_a32_i64:
51
--
75
--
52
2.25.1
76
2.43.0
53
77
54
78
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
Trivially mirrors TCG_TARGET_HAS_{s}extract_*.
2
2
3
These will be used to implement new decimal floating point
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
instructions from Power ISA 3.1.
5
6
The remainder is now returned directly by divu128/divs128,
7
freeing up phigh to receive the high 64 bits of the quotient.
8
9
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-Id: <20211025191154.350831-4-luis.pires@eldorado.org.br>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
5
---
14
include/hw/clock.h | 6 +-
6
tcg/tci/tcg-target-has.h | 3 +++
15
include/qemu/host-utils.h | 20 ++++--
7
1 file changed, 3 insertions(+)
16
target/ppc/int_helper.c | 9 +--
17
util/host-utils.c | 133 +++++++++++++++++++++++++-------------
18
4 files changed, 108 insertions(+), 60 deletions(-)
19
8
20
diff --git a/include/hw/clock.h b/include/hw/clock.h
9
diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h
21
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
22
--- a/include/hw/clock.h
11
--- a/tcg/tci/tcg-target-has.h
23
+++ b/include/hw/clock.h
12
+++ b/tcg/tci/tcg-target-has.h
24
@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
13
@@ -XXX,XX +XXX,XX @@
25
if (clk->period == 0) {
14
26
return 0;
15
#define TCG_TARGET_HAS_tst 1
27
}
16
28
- /*
17
+#define TCG_TARGET_extract_valid(type, ofs, len) 1
29
- * BUG: when CONFIG_INT128 is not defined, the current implementation of
18
+#define TCG_TARGET_sextract_valid(type, ofs, len) 1
30
- * divu128 does not return a valid truncated quotient, so the result will
31
- * be wrong.
32
- */
33
+
19
+
34
divu128(&lo, &hi, clk->period);
35
return lo;
36
}
37
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
38
index XXXXXXX..XXXXXXX 100644
39
--- a/include/qemu/host-utils.h
40
+++ b/include/qemu/host-utils.h
41
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
42
return (__int128_t)a * b / c;
43
}
44
45
-static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
46
+static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
47
+ uint64_t divisor)
48
{
49
__uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
50
__uint128_t result = dividend / divisor;
51
+
52
*plow = result;
53
- *phigh = dividend % divisor;
54
+ *phigh = result >> 64;
55
+ return dividend % divisor;
56
}
57
58
-static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
59
+static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
60
+ int64_t divisor)
61
{
62
- __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
63
+ __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
64
__int128_t result = dividend / divisor;
65
+
66
*plow = result;
67
- *phigh = dividend % divisor;
68
+ *phigh = result >> 64;
69
+ return dividend % divisor;
70
}
71
#else
72
void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
73
void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
74
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
75
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
76
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
77
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
78
79
static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
80
{
81
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
82
index XXXXXXX..XXXXXXX 100644
83
--- a/target/ppc/int_helper.c
84
+++ b/target/ppc/int_helper.c
85
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
86
87
uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
88
{
89
- int64_t rt = 0;
90
+ uint64_t rt = 0;
91
int64_t ra = (int64_t)rau;
92
int64_t rb = (int64_t)rbu;
93
int overflow = 0;
94
@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
95
int cr;
96
uint64_t lo_value;
97
uint64_t hi_value;
98
+ uint64_t rem;
99
ppc_avr_t ret = { .u64 = { 0, 0 } };
100
101
if (b->VsrSD(0) < 0) {
102
@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
103
* In that case, we leave r unchanged.
104
*/
105
} else {
106
- divu128(&lo_value, &hi_value, 1000000000000000ULL);
107
+ rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
108
109
- for (i = 1; i < 16; hi_value /= 10, i++) {
110
- bcd_put_digit(&ret, hi_value % 10, i);
111
+ for (i = 1; i < 16; rem /= 10, i++) {
112
+ bcd_put_digit(&ret, rem % 10, i);
113
}
114
115
for (; i < 32; lo_value /= 10, i++) {
116
diff --git a/util/host-utils.c b/util/host-utils.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/util/host-utils.c
119
+++ b/util/host-utils.c
120
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
121
}
122
123
/*
124
- * Unsigned 128-by-64 division. Returns quotient via plow and
125
- * remainder via phigh.
126
- * The result must fit in 64 bits (plow) - otherwise, the result
127
- * is undefined.
128
- * This function will cause a division by zero if passed a zero divisor.
129
+ * Unsigned 128-by-64 division.
130
+ * Returns the remainder.
131
+ * Returns quotient via plow and phigh.
132
+ * Also returns the remainder via the function return value.
133
*/
134
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
135
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
136
{
137
uint64_t dhi = *phigh;
138
uint64_t dlo = *plow;
139
- unsigned i;
140
- uint64_t carry = 0;
141
+ uint64_t rem, dhighest;
142
+ int sh;
143
144
if (divisor == 0 || dhi == 0) {
145
*plow = dlo / divisor;
146
- *phigh = dlo % divisor;
147
+ *phigh = 0;
148
+ return dlo % divisor;
149
} else {
150
+ sh = clz64(divisor);
151
152
- for (i = 0; i < 64; i++) {
153
- carry = dhi >> 63;
154
- dhi = (dhi << 1) | (dlo >> 63);
155
- if (carry || (dhi >= divisor)) {
156
- dhi -= divisor;
157
- carry = 1;
158
- } else {
159
- carry = 0;
160
+ if (dhi < divisor) {
161
+ if (sh != 0) {
162
+ /* normalize the divisor, shifting the dividend accordingly */
163
+ divisor <<= sh;
164
+ dhi = (dhi << sh) | (dlo >> (64 - sh));
165
+ dlo <<= sh;
166
}
167
- dlo = (dlo << 1) | carry;
168
+
169
+ *phigh = 0;
170
+ *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
171
+ } else {
172
+ if (sh != 0) {
173
+ /* normalize the divisor, shifting the dividend accordingly */
174
+ divisor <<= sh;
175
+ dhighest = dhi >> (64 - sh);
176
+ dhi = (dhi << sh) | (dlo >> (64 - sh));
177
+ dlo <<= sh;
178
+
179
+ *phigh = udiv_qrnnd(&dhi, dhighest, dhi, divisor);
180
+ } else {
181
+ /**
182
+ * dhi >= divisor
183
+ * Since the MSB of divisor is set (sh == 0),
184
+ * (dhi - divisor) < divisor
185
+ *
186
+ * Thus, the high part of the quotient is 1, and we can
187
+ * calculate the low part with a single call to udiv_qrnnd
188
+ * after subtracting divisor from dhi
189
+ */
190
+ dhi -= divisor;
191
+ *phigh = 1;
192
+ }
193
+
194
+ *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
195
}
196
197
- *plow = dlo;
198
- *phigh = dhi;
199
+ /*
200
+ * since the dividend/divisor might have been normalized,
201
+ * the remainder might also have to be shifted back
202
+ */
203
+ return rem >> sh;
204
}
205
}
206
207
/*
208
- * Signed 128-by-64 division. Returns quotient via plow and
209
- * remainder via phigh.
210
- * The result must fit in 64 bits (plow) - otherwise, the result
211
- * is undefined.
212
- * This function will cause a division by zero if passed a zero divisor.
213
+ * Signed 128-by-64 division.
214
+ * Returns quotient via plow and phigh.
215
+ * Also returns the remainder via the function return value.
216
*/
217
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
218
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor)
219
{
220
- int sgn_dvdnd = *phigh < 0;
221
- int sgn_divsr = divisor < 0;
222
+ bool neg_quotient = false, neg_remainder = false;
223
+ uint64_t unsig_hi = *phigh, unsig_lo = *plow;
224
+ uint64_t rem;
225
226
- if (sgn_dvdnd) {
227
- *plow = ~(*plow);
228
- *phigh = ~(*phigh);
229
- if (*plow == (int64_t)-1) {
230
+ if (*phigh < 0) {
231
+ neg_quotient = !neg_quotient;
232
+ neg_remainder = !neg_remainder;
233
+
234
+ if (unsig_lo == 0) {
235
+ unsig_hi = -unsig_hi;
236
+ } else {
237
+ unsig_hi = ~unsig_hi;
238
+ unsig_lo = -unsig_lo;
239
+ }
240
+ }
241
+
242
+ if (divisor < 0) {
243
+ neg_quotient = !neg_quotient;
244
+
245
+ divisor = -divisor;
246
+ }
247
+
248
+ rem = divu128(&unsig_lo, &unsig_hi, (uint64_t)divisor);
249
+
250
+ if (neg_quotient) {
251
+ if (unsig_lo == 0) {
252
+ *phigh = -unsig_hi;
253
*plow = 0;
254
- (*phigh)++;
255
- } else {
256
- (*plow)++;
257
- }
258
+ } else {
259
+ *phigh = ~unsig_hi;
260
+ *plow = -unsig_lo;
261
+ }
262
+ } else {
263
+ *phigh = unsig_hi;
264
+ *plow = unsig_lo;
265
}
266
267
- if (sgn_divsr) {
268
- divisor = 0 - divisor;
269
- }
270
-
271
- divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
272
-
273
- if (sgn_dvdnd ^ sgn_divsr) {
274
- *plow = 0 - *plow;
275
+ if (neg_remainder) {
276
+ return -rem;
277
+ } else {
278
+ return rem;
279
}
280
}
281
#endif
20
#endif
282
--
21
--
283
2.25.1
22
2.43.0
284
23
285
24
diff view generated by jsdifflib
1
There was no real reason for calls to have separate code here.
1
We already have these assertions during opcode creation.
2
Unify init for calls vs non-calls using the call path, which
3
handles TCG_CALL_DUMMY_ARG.
4
2
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
5
---
10
tcg/optimize.c | 25 +++++++++++--------------
6
tcg/tci/tcg-target.c.inc | 20 ++------------------
11
1 file changed, 11 insertions(+), 14 deletions(-)
7
1 file changed, 2 insertions(+), 18 deletions(-)
12
8
13
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
14
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/optimize.c
11
--- a/tcg/tci/tcg-target.c.inc
16
+++ b/tcg/optimize.c
12
+++ b/tcg/tci/tcg-target.c.inc
17
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
18
}
14
break;
19
}
15
20
16
CASE_32_64(deposit) /* Optional (TCG_TARGET_HAS_deposit_*). */
21
-static void init_arg_info(OptContext *ctx, TCGArg arg)
17
- {
22
-{
18
- TCGArg pos = args[3], len = args[4];
23
- init_ts_info(ctx, arg_temp(arg));
19
- TCGArg max = opc == INDEX_op_deposit_i32 ? 32 : 64;
24
-}
25
-
20
-
26
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
21
- tcg_debug_assert(pos < max);
27
{
22
- tcg_debug_assert(pos + len <= max);
28
TCGTemp *i, *g, *l;
23
-
29
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
24
- tcg_out_op_rrrbb(s, opc, args[0], args[1], args[2], pos, len);
30
return false;
25
- }
31
}
26
+ tcg_out_op_rrrbb(s, opc, args[0], args[1], args[2], args[3], args[4]);
32
27
break;
33
+static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
28
34
+{
29
CASE_32_64(extract) /* Optional (TCG_TARGET_HAS_extract_*). */
35
+ for (int i = 0; i < nb_args; i++) {
30
CASE_32_64(sextract) /* Optional (TCG_TARGET_HAS_sextract_*). */
36
+ TCGTemp *ts = arg_temp(op->args[i]);
31
- {
37
+ if (ts) {
32
- TCGArg pos = args[2], len = args[3];
38
+ init_ts_info(ctx, ts);
33
- TCGArg max = type == TCG_TYPE_I32 ? 32 : 64;
39
+ }
34
-
40
+ }
35
- tcg_debug_assert(pos < max);
41
+}
36
- tcg_debug_assert(pos + len <= max);
42
+
37
-
43
/* Propagate constants and copies, fold constant expressions. */
38
- tcg_out_op_rrbb(s, opc, args[0], args[1], pos, len);
44
void tcg_optimize(TCGContext *s)
39
- }
45
{
40
+ tcg_out_op_rrbb(s, opc, args[0], args[1], args[2], args[3]);
46
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
41
break;
47
if (opc == INDEX_op_call) {
42
48
nb_oargs = TCGOP_CALLO(op);
43
CASE_32_64(brcond)
49
nb_iargs = TCGOP_CALLI(op);
50
- for (i = 0; i < nb_oargs + nb_iargs; i++) {
51
- TCGTemp *ts = arg_temp(op->args[i]);
52
- if (ts) {
53
- init_ts_info(&ctx, ts);
54
- }
55
- }
56
} else {
57
nb_oargs = def->nb_oargs;
58
nb_iargs = def->nb_iargs;
59
- for (i = 0; i < nb_oargs + nb_iargs; i++) {
60
- init_arg_info(&ctx, op->args[i]);
61
- }
62
}
63
+ init_arguments(&ctx, op, nb_oargs + nb_iargs);
64
65
/* Do copy propagation */
66
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
67
--
44
--
68
2.25.1
45
2.43.0
69
46
70
47
diff view generated by jsdifflib
1
Certain targets, like riscv, produce signed 32-bit results.
1
Make extract and sextract "unconditional" in the sense
2
This can lead to lots of redundant extensions as values are
2
that the opcodes are always present. Rely instead on
3
manipulated.
3
TCG_TARGET_HAS_{s}extract_valid, now always defined.
4
4
5
Begin by tracking only the obvious sign-extensions, and
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
converting them to simple copies when possible.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
7
---
12
tcg/optimize.c | 123 ++++++++++++++++++++++++++++++++++++++++---------
8
tcg/aarch64/tcg-target-has.h | 4 ----
13
1 file changed, 102 insertions(+), 21 deletions(-)
9
tcg/arm/tcg-target-has.h | 2 --
10
tcg/i386/tcg-target-has.h | 4 ----
11
tcg/loongarch64/tcg-target-has.h | 4 ----
12
tcg/mips/tcg-target-has.h | 4 ----
13
tcg/ppc/tcg-target-has.h | 4 ----
14
tcg/riscv/tcg-target-has.h | 4 ----
15
tcg/s390x/tcg-target-has.h | 4 ----
16
tcg/sparc64/tcg-target-has.h | 4 ----
17
tcg/tcg-has.h | 12 ------------
18
tcg/tci/tcg-target-has.h | 4 ----
19
tcg/optimize.c | 8 ++++----
20
tcg/tcg.c | 12 ++++--------
21
tcg/tci.c | 8 --------
22
14 files changed, 8 insertions(+), 70 deletions(-)
14
23
24
diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/aarch64/tcg-target-has.h
27
+++ b/tcg/aarch64/tcg-target-has.h
28
@@ -XXX,XX +XXX,XX @@
29
#define TCG_TARGET_HAS_ctz_i32 1
30
#define TCG_TARGET_HAS_ctpop_i32 0
31
#define TCG_TARGET_HAS_deposit_i32 1
32
-#define TCG_TARGET_HAS_extract_i32 1
33
-#define TCG_TARGET_HAS_sextract_i32 1
34
#define TCG_TARGET_HAS_extract2_i32 1
35
#define TCG_TARGET_HAS_negsetcond_i32 1
36
#define TCG_TARGET_HAS_add2_i32 1
37
@@ -XXX,XX +XXX,XX @@
38
#define TCG_TARGET_HAS_ctz_i64 1
39
#define TCG_TARGET_HAS_ctpop_i64 0
40
#define TCG_TARGET_HAS_deposit_i64 1
41
-#define TCG_TARGET_HAS_extract_i64 1
42
-#define TCG_TARGET_HAS_sextract_i64 1
43
#define TCG_TARGET_HAS_extract2_i64 1
44
#define TCG_TARGET_HAS_negsetcond_i64 1
45
#define TCG_TARGET_HAS_add2_i64 1
46
diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/tcg/arm/tcg-target-has.h
49
+++ b/tcg/arm/tcg-target-has.h
50
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
51
#define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions
52
#define TCG_TARGET_HAS_ctpop_i32 0
53
#define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions
54
-#define TCG_TARGET_HAS_extract_i32 1
55
-#define TCG_TARGET_HAS_sextract_i32 1
56
#define TCG_TARGET_HAS_extract2_i32 1
57
#define TCG_TARGET_HAS_negsetcond_i32 1
58
#define TCG_TARGET_HAS_mulu2_i32 1
59
diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h
60
index XXXXXXX..XXXXXXX 100644
61
--- a/tcg/i386/tcg-target-has.h
62
+++ b/tcg/i386/tcg-target-has.h
63
@@ -XXX,XX +XXX,XX @@
64
#define TCG_TARGET_HAS_ctz_i32 1
65
#define TCG_TARGET_HAS_ctpop_i32 have_popcnt
66
#define TCG_TARGET_HAS_deposit_i32 1
67
-#define TCG_TARGET_HAS_extract_i32 1
68
-#define TCG_TARGET_HAS_sextract_i32 1
69
#define TCG_TARGET_HAS_extract2_i32 1
70
#define TCG_TARGET_HAS_negsetcond_i32 1
71
#define TCG_TARGET_HAS_add2_i32 1
72
@@ -XXX,XX +XXX,XX @@
73
#define TCG_TARGET_HAS_ctz_i64 1
74
#define TCG_TARGET_HAS_ctpop_i64 have_popcnt
75
#define TCG_TARGET_HAS_deposit_i64 1
76
-#define TCG_TARGET_HAS_extract_i64 1
77
-#define TCG_TARGET_HAS_sextract_i64 1
78
#define TCG_TARGET_HAS_extract2_i64 1
79
#define TCG_TARGET_HAS_negsetcond_i64 1
80
#define TCG_TARGET_HAS_add2_i64 1
81
diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h
82
index XXXXXXX..XXXXXXX 100644
83
--- a/tcg/loongarch64/tcg-target-has.h
84
+++ b/tcg/loongarch64/tcg-target-has.h
85
@@ -XXX,XX +XXX,XX @@
86
#define TCG_TARGET_HAS_div2_i32 0
87
#define TCG_TARGET_HAS_rot_i32 1
88
#define TCG_TARGET_HAS_deposit_i32 1
89
-#define TCG_TARGET_HAS_extract_i32 1
90
-#define TCG_TARGET_HAS_sextract_i32 1
91
#define TCG_TARGET_HAS_extract2_i32 0
92
#define TCG_TARGET_HAS_add2_i32 0
93
#define TCG_TARGET_HAS_sub2_i32 0
94
@@ -XXX,XX +XXX,XX @@
95
#define TCG_TARGET_HAS_div2_i64 0
96
#define TCG_TARGET_HAS_rot_i64 1
97
#define TCG_TARGET_HAS_deposit_i64 1
98
-#define TCG_TARGET_HAS_extract_i64 1
99
-#define TCG_TARGET_HAS_sextract_i64 1
100
#define TCG_TARGET_HAS_extract2_i64 0
101
#define TCG_TARGET_HAS_extr_i64_i32 1
102
#define TCG_TARGET_HAS_ext8s_i64 1
103
diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h
104
index XXXXXXX..XXXXXXX 100644
105
--- a/tcg/mips/tcg-target-has.h
106
+++ b/tcg/mips/tcg-target-has.h
107
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
108
109
/* optional instructions detected at runtime */
110
#define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions
111
-#define TCG_TARGET_HAS_extract_i32 1
112
-#define TCG_TARGET_HAS_sextract_i32 1
113
#define TCG_TARGET_HAS_extract2_i32 0
114
#define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions
115
#define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions
116
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
117
#define TCG_TARGET_HAS_bswap32_i64 1
118
#define TCG_TARGET_HAS_bswap64_i64 1
119
#define TCG_TARGET_HAS_deposit_i64 use_mips32r2_instructions
120
-#define TCG_TARGET_HAS_extract_i64 1
121
-#define TCG_TARGET_HAS_sextract_i64 1
122
#define TCG_TARGET_HAS_extract2_i64 0
123
#define TCG_TARGET_HAS_ext8s_i64 use_mips32r2_instructions
124
#define TCG_TARGET_HAS_ext16s_i64 use_mips32r2_instructions
125
diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h
126
index XXXXXXX..XXXXXXX 100644
127
--- a/tcg/ppc/tcg-target-has.h
128
+++ b/tcg/ppc/tcg-target-has.h
129
@@ -XXX,XX +XXX,XX @@
130
#define TCG_TARGET_HAS_ctz_i32 have_isa_3_00
131
#define TCG_TARGET_HAS_ctpop_i32 have_isa_2_06
132
#define TCG_TARGET_HAS_deposit_i32 1
133
-#define TCG_TARGET_HAS_extract_i32 1
134
-#define TCG_TARGET_HAS_sextract_i32 1
135
#define TCG_TARGET_HAS_extract2_i32 0
136
#define TCG_TARGET_HAS_negsetcond_i32 1
137
#define TCG_TARGET_HAS_mulu2_i32 0
138
@@ -XXX,XX +XXX,XX @@
139
#define TCG_TARGET_HAS_ctz_i64 have_isa_3_00
140
#define TCG_TARGET_HAS_ctpop_i64 have_isa_2_06
141
#define TCG_TARGET_HAS_deposit_i64 1
142
-#define TCG_TARGET_HAS_extract_i64 1
143
-#define TCG_TARGET_HAS_sextract_i64 1
144
#define TCG_TARGET_HAS_extract2_i64 0
145
#define TCG_TARGET_HAS_negsetcond_i64 1
146
#define TCG_TARGET_HAS_add2_i64 1
147
diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
148
index XXXXXXX..XXXXXXX 100644
149
--- a/tcg/riscv/tcg-target-has.h
150
+++ b/tcg/riscv/tcg-target-has.h
151
@@ -XXX,XX +XXX,XX @@
152
#define TCG_TARGET_HAS_div2_i32 0
153
#define TCG_TARGET_HAS_rot_i32 (cpuinfo & CPUINFO_ZBB)
154
#define TCG_TARGET_HAS_deposit_i32 0
155
-#define TCG_TARGET_HAS_extract_i32 1
156
-#define TCG_TARGET_HAS_sextract_i32 1
157
#define TCG_TARGET_HAS_extract2_i32 0
158
#define TCG_TARGET_HAS_add2_i32 1
159
#define TCG_TARGET_HAS_sub2_i32 1
160
@@ -XXX,XX +XXX,XX @@
161
#define TCG_TARGET_HAS_div2_i64 0
162
#define TCG_TARGET_HAS_rot_i64 (cpuinfo & CPUINFO_ZBB)
163
#define TCG_TARGET_HAS_deposit_i64 0
164
-#define TCG_TARGET_HAS_extract_i64 1
165
-#define TCG_TARGET_HAS_sextract_i64 1
166
#define TCG_TARGET_HAS_extract2_i64 0
167
#define TCG_TARGET_HAS_extr_i64_i32 1
168
#define TCG_TARGET_HAS_ext8s_i64 1
169
diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h
170
index XXXXXXX..XXXXXXX 100644
171
--- a/tcg/s390x/tcg-target-has.h
172
+++ b/tcg/s390x/tcg-target-has.h
173
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
174
#define TCG_TARGET_HAS_ctz_i32 0
175
#define TCG_TARGET_HAS_ctpop_i32 1
176
#define TCG_TARGET_HAS_deposit_i32 1
177
-#define TCG_TARGET_HAS_extract_i32 1
178
-#define TCG_TARGET_HAS_sextract_i32 1
179
#define TCG_TARGET_HAS_extract2_i32 0
180
#define TCG_TARGET_HAS_negsetcond_i32 1
181
#define TCG_TARGET_HAS_add2_i32 1
182
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
183
#define TCG_TARGET_HAS_ctz_i64 0
184
#define TCG_TARGET_HAS_ctpop_i64 1
185
#define TCG_TARGET_HAS_deposit_i64 1
186
-#define TCG_TARGET_HAS_extract_i64 1
187
-#define TCG_TARGET_HAS_sextract_i64 1
188
#define TCG_TARGET_HAS_extract2_i64 0
189
#define TCG_TARGET_HAS_negsetcond_i64 1
190
#define TCG_TARGET_HAS_add2_i64 1
191
diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h
192
index XXXXXXX..XXXXXXX 100644
193
--- a/tcg/sparc64/tcg-target-has.h
194
+++ b/tcg/sparc64/tcg-target-has.h
195
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
196
#define TCG_TARGET_HAS_ctz_i32 0
197
#define TCG_TARGET_HAS_ctpop_i32 0
198
#define TCG_TARGET_HAS_deposit_i32 0
199
-#define TCG_TARGET_HAS_extract_i32 1
200
-#define TCG_TARGET_HAS_sextract_i32 1
201
#define TCG_TARGET_HAS_extract2_i32 0
202
#define TCG_TARGET_HAS_negsetcond_i32 1
203
#define TCG_TARGET_HAS_add2_i32 1
204
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
205
#define TCG_TARGET_HAS_ctz_i64 0
206
#define TCG_TARGET_HAS_ctpop_i64 0
207
#define TCG_TARGET_HAS_deposit_i64 0
208
-#define TCG_TARGET_HAS_extract_i64 1
209
-#define TCG_TARGET_HAS_sextract_i64 1
210
#define TCG_TARGET_HAS_extract2_i64 0
211
#define TCG_TARGET_HAS_negsetcond_i64 1
212
#define TCG_TARGET_HAS_add2_i64 1
213
diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
214
index XXXXXXX..XXXXXXX 100644
215
--- a/tcg/tcg-has.h
216
+++ b/tcg/tcg-has.h
217
@@ -XXX,XX +XXX,XX @@
218
#define TCG_TARGET_HAS_ctz_i64 0
219
#define TCG_TARGET_HAS_ctpop_i64 0
220
#define TCG_TARGET_HAS_deposit_i64 0
221
-#define TCG_TARGET_HAS_extract_i64 0
222
-#define TCG_TARGET_HAS_sextract_i64 0
223
#define TCG_TARGET_HAS_extract2_i64 0
224
#define TCG_TARGET_HAS_negsetcond_i64 0
225
#define TCG_TARGET_HAS_add2_i64 0
226
@@ -XXX,XX +XXX,XX @@
227
#ifndef TCG_TARGET_deposit_i64_valid
228
#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
229
#endif
230
-#ifndef TCG_TARGET_extract_valid
231
-#define TCG_TARGET_extract_valid(type, ofs, len) \
232
- ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_extract_i32 \
233
- : TCG_TARGET_HAS_extract_i64)
234
-#endif
235
-#ifndef TCG_TARGET_sextract_valid
236
-#define TCG_TARGET_sextract_valid(type, ofs, len) \
237
- ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_sextract_i32 \
238
- : TCG_TARGET_HAS_sextract_i64)
239
-#endif
240
241
/* Only one of DIV or DIV2 should be defined. */
242
#if defined(TCG_TARGET_HAS_div_i32)
243
diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h
244
index XXXXXXX..XXXXXXX 100644
245
--- a/tcg/tci/tcg-target-has.h
246
+++ b/tcg/tci/tcg-target-has.h
247
@@ -XXX,XX +XXX,XX @@
248
#define TCG_TARGET_HAS_ext16u_i32 1
249
#define TCG_TARGET_HAS_andc_i32 1
250
#define TCG_TARGET_HAS_deposit_i32 1
251
-#define TCG_TARGET_HAS_extract_i32 1
252
-#define TCG_TARGET_HAS_sextract_i32 1
253
#define TCG_TARGET_HAS_extract2_i32 0
254
#define TCG_TARGET_HAS_eqv_i32 1
255
#define TCG_TARGET_HAS_nand_i32 1
256
@@ -XXX,XX +XXX,XX @@
257
#define TCG_TARGET_HAS_bswap32_i64 1
258
#define TCG_TARGET_HAS_bswap64_i64 1
259
#define TCG_TARGET_HAS_deposit_i64 1
260
-#define TCG_TARGET_HAS_extract_i64 1
261
-#define TCG_TARGET_HAS_sextract_i64 1
262
#define TCG_TARGET_HAS_extract2_i64 0
263
#define TCG_TARGET_HAS_div_i64 1
264
#define TCG_TARGET_HAS_rem_i64 1
15
diff --git a/tcg/optimize.c b/tcg/optimize.c
265
diff --git a/tcg/optimize.c b/tcg/optimize.c
16
index XXXXXXX..XXXXXXX 100644
266
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/optimize.c
267
--- a/tcg/optimize.c
18
+++ b/tcg/optimize.c
268
+++ b/tcg/optimize.c
19
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
269
@@ -XXX,XX +XXX,XX @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
20
TCGTemp *next_copy;
270
shr_opc = INDEX_op_shr_i32;
21
uint64_t val;
271
neg_opc = INDEX_op_neg_i32;
22
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
272
if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) {
23
+ uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
273
- uext_opc = TCG_TARGET_HAS_extract_i32 ? INDEX_op_extract_i32 : 0;
24
} TempOptInfo;
274
+ uext_opc = INDEX_op_extract_i32;
25
26
typedef struct OptContext {
27
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
28
/* In flight values from optimization. */
29
uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
30
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
31
+ uint64_t s_mask; /* mask of clrsb(value) bits */
32
TCGType type;
33
} OptContext;
34
35
+/* Calculate the smask for a specific value. */
36
+static uint64_t smask_from_value(uint64_t value)
37
+{
38
+ int rep = clrsb64(value);
39
+ return ~(~0ull >> rep);
40
+}
41
+
42
+/*
43
+ * Calculate the smask for a given set of known-zeros.
44
+ * If there are lots of zeros on the left, we can consider the remainder
45
+ * an unsigned field, and thus the corresponding signed field is one bit
46
+ * larger.
47
+ */
48
+static uint64_t smask_from_zmask(uint64_t zmask)
49
+{
50
+ /*
51
+ * Only the 0 bits are significant for zmask, thus the msb itself
52
+ * must be zero, else we have no sign information.
53
+ */
54
+ int rep = clz64(zmask);
55
+ if (rep == 0) {
56
+ return 0;
57
+ }
58
+ rep -= 1;
59
+ return ~(~0ull >> rep);
60
+}
61
+
62
static inline TempOptInfo *ts_info(TCGTemp *ts)
63
{
64
return ts->state_ptr;
65
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
66
ti->prev_copy = ts;
67
ti->is_const = false;
68
ti->z_mask = -1;
69
+ ti->s_mask = 0;
70
}
71
72
static void reset_temp(TCGArg arg)
73
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
74
ti->is_const = true;
75
ti->val = ts->val;
76
ti->z_mask = ts->val;
77
+ ti->s_mask = smask_from_value(ts->val);
78
} else {
79
ti->is_const = false;
80
ti->z_mask = -1;
81
+ ti->s_mask = 0;
82
}
83
}
84
85
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
86
op->args[1] = src;
87
88
di->z_mask = si->z_mask;
89
+ di->s_mask = si->s_mask;
90
91
if (src_ts->type == dst_ts->type) {
92
TempOptInfo *ni = ts_info(si->next_copy);
93
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
94
95
nb_oargs = def->nb_oargs;
96
for (i = 0; i < nb_oargs; i++) {
97
- reset_temp(op->args[i]);
98
+ TCGTemp *ts = arg_temp(op->args[i]);
99
+ reset_ts(ts);
100
/*
101
- * Save the corresponding known-zero bits mask for the
102
+ * Save the corresponding known-zero/sign bits mask for the
103
* first output argument (only one supported so far).
104
*/
105
if (i == 0) {
106
- arg_info(op->args[i])->z_mask = ctx->z_mask;
107
+ ts_info(ts)->z_mask = ctx->z_mask;
108
+ ts_info(ts)->s_mask = ctx->s_mask;
109
}
275
}
110
}
276
if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, sh, 1)) {
111
}
277
- sext_opc = TCG_TARGET_HAS_sextract_i32 ? INDEX_op_sextract_i32 : 0;
112
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
278
+ sext_opc = INDEX_op_sextract_i32;
113
{
279
}
114
uint64_t a_mask = ctx->a_mask;
280
break;
115
uint64_t z_mask = ctx->z_mask;
281
case TCG_TYPE_I64:
116
+ uint64_t s_mask = ctx->s_mask;
282
@@ -XXX,XX +XXX,XX @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
117
283
shr_opc = INDEX_op_shr_i64;
118
/*
284
neg_opc = INDEX_op_neg_i64;
119
* 32-bit ops generate 32-bit results, which for the purpose of
285
if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) {
120
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
286
- uext_opc = TCG_TARGET_HAS_extract_i64 ? INDEX_op_extract_i64 : 0;
121
if (ctx->type == TCG_TYPE_I32) {
287
+ uext_opc = INDEX_op_extract_i64;
122
a_mask = (int32_t)a_mask;
288
}
123
z_mask = (int32_t)z_mask;
289
if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, sh, 1)) {
124
+ s_mask |= MAKE_64BIT_MASK(32, 32);
290
- sext_opc = TCG_TARGET_HAS_sextract_i64 ? INDEX_op_sextract_i64 : 0;
125
ctx->z_mask = z_mask;
291
+ sext_opc = INDEX_op_sextract_i64;
126
+ ctx->s_mask = s_mask;
127
}
128
129
if (z_mask == 0) {
130
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
131
132
static bool fold_bswap(OptContext *ctx, TCGOp *op)
133
{
134
- uint64_t z_mask, sign;
135
+ uint64_t z_mask, s_mask, sign;
136
137
if (arg_is_const(op->args[1])) {
138
uint64_t t = arg_info(op->args[1])->val;
139
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
140
}
141
142
z_mask = arg_info(op->args[1])->z_mask;
143
+
144
switch (op->opc) {
145
case INDEX_op_bswap16_i32:
146
case INDEX_op_bswap16_i64:
147
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
148
default:
149
g_assert_not_reached();
150
}
151
+ s_mask = smask_from_zmask(z_mask);
152
153
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
154
case TCG_BSWAP_OZ:
155
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
156
/* If the sign bit may be 1, force all the bits above to 1. */
157
if (z_mask & sign) {
158
z_mask |= sign;
159
+ s_mask = sign << 1;
160
}
292
}
161
break;
293
break;
162
default:
294
default:
163
/* The high bits are undefined: force all bits above the sign to 1. */
295
diff --git a/tcg/tcg.c b/tcg/tcg.c
164
z_mask |= sign << 1;
296
index XXXXXXX..XXXXXXX 100644
165
+ s_mask = 0;
297
--- a/tcg/tcg.c
166
break;
298
+++ b/tcg/tcg.c
167
}
299
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
168
ctx->z_mask = z_mask;
300
case INDEX_op_shl_i32:
169
+ ctx->s_mask = s_mask;
301
case INDEX_op_shr_i32:
170
302
case INDEX_op_sar_i32:
171
return fold_masks(ctx, op);
303
+ case INDEX_op_extract_i32:
172
}
304
+ case INDEX_op_sextract_i32:
173
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
174
static bool fold_extract(OptContext *ctx, TCGOp *op)
175
{
176
uint64_t z_mask_old, z_mask;
177
+ int pos = op->args[2];
178
+ int len = op->args[3];
179
180
if (arg_is_const(op->args[1])) {
181
uint64_t t;
182
183
t = arg_info(op->args[1])->val;
184
- t = extract64(t, op->args[2], op->args[3]);
185
+ t = extract64(t, pos, len);
186
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
187
}
188
189
z_mask_old = arg_info(op->args[1])->z_mask;
190
- z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
191
- if (op->args[2] == 0) {
192
+ z_mask = extract64(z_mask_old, pos, len);
193
+ if (pos == 0) {
194
ctx->a_mask = z_mask_old ^ z_mask;
195
}
196
ctx->z_mask = z_mask;
197
+ ctx->s_mask = smask_from_zmask(z_mask);
198
199
return fold_masks(ctx, op);
200
}
201
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
202
203
static bool fold_exts(OptContext *ctx, TCGOp *op)
204
{
205
- uint64_t z_mask_old, z_mask, sign;
206
+ uint64_t s_mask_old, s_mask, z_mask, sign;
207
bool type_change = false;
208
209
if (fold_const1(ctx, op)) {
210
return true;
305
return true;
211
}
306
212
307
case INDEX_op_negsetcond_i32:
213
- z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
308
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
214
+ z_mask = arg_info(op->args[1])->z_mask;
309
return TCG_TARGET_HAS_rot_i32;
215
+ s_mask = arg_info(op->args[1])->s_mask;
310
case INDEX_op_deposit_i32:
216
+ s_mask_old = s_mask;
311
return TCG_TARGET_HAS_deposit_i32;
217
312
- case INDEX_op_extract_i32:
218
switch (op->opc) {
313
- return TCG_TARGET_HAS_extract_i32;
219
CASE_OP_32_64(ext8s):
314
- case INDEX_op_sextract_i32:
220
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
315
- return TCG_TARGET_HAS_sextract_i32;
221
316
case INDEX_op_extract2_i32:
222
if (z_mask & sign) {
317
return TCG_TARGET_HAS_extract2_i32;
223
z_mask |= sign;
318
case INDEX_op_add2_i32:
224
- } else if (!type_change) {
319
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
225
- ctx->a_mask = z_mask_old ^ z_mask;
320
case INDEX_op_sar_i64:
226
}
321
case INDEX_op_ext_i32_i64:
227
+ s_mask |= sign << 1;
322
case INDEX_op_extu_i32_i64:
228
+
323
+ case INDEX_op_extract_i64:
229
ctx->z_mask = z_mask;
324
+ case INDEX_op_sextract_i64:
230
+ ctx->s_mask = s_mask;
325
return TCG_TARGET_REG_BITS == 64;
231
+ if (!type_change) {
326
232
+ ctx->a_mask = s_mask & ~s_mask_old;
327
case INDEX_op_negsetcond_i64:
233
+ }
328
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
234
329
return TCG_TARGET_HAS_rot_i64;
235
return fold_masks(ctx, op);
330
case INDEX_op_deposit_i64:
236
}
331
return TCG_TARGET_HAS_deposit_i64;
237
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
332
- case INDEX_op_extract_i64:
238
}
333
- return TCG_TARGET_HAS_extract_i64;
239
334
- case INDEX_op_sextract_i64:
240
ctx->z_mask = z_mask;
335
- return TCG_TARGET_HAS_sextract_i64;
241
+ ctx->s_mask = smask_from_zmask(z_mask);
336
case INDEX_op_extract2_i64:
242
if (!type_change) {
337
return TCG_TARGET_HAS_extract2_i64;
243
ctx->a_mask = z_mask_old ^ z_mask;
338
case INDEX_op_extrl_i64_i32:
244
}
339
diff --git a/tcg/tci.c b/tcg/tci.c
245
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
340
index XXXXXXX..XXXXXXX 100644
246
MemOp mop = get_memop(oi);
341
--- a/tcg/tci.c
247
int width = 8 * memop_size(mop);
342
+++ b/tcg/tci.c
248
343
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
249
- if (!(mop & MO_SIGN) && width < 64) {
344
regs[r0] = deposit32(regs[r1], pos, len, regs[r2]);
250
- ctx->z_mask = MAKE_64BIT_MASK(0, width);
345
break;
251
+ if (width < 64) {
346
#endif
252
+ ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
347
-#if TCG_TARGET_HAS_extract_i32
253
+ if (!(mop & MO_SIGN)) {
348
case INDEX_op_extract_i32:
254
+ ctx->z_mask = MAKE_64BIT_MASK(0, width);
349
tci_args_rrbb(insn, &r0, &r1, &pos, &len);
255
+ ctx->s_mask <<= 1;
350
regs[r0] = extract32(regs[r1], pos, len);
256
+ }
351
break;
257
}
352
-#endif
258
353
-#if TCG_TARGET_HAS_sextract_i32
259
/* Opcodes that touch guest memory stop the mb optimization. */
354
case INDEX_op_sextract_i32:
260
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
355
tci_args_rrbb(insn, &r0, &r1, &pos, &len);
261
356
regs[r0] = sextract32(regs[r1], pos, len);
262
static bool fold_sextract(OptContext *ctx, TCGOp *op)
357
break;
263
{
358
-#endif
264
- int64_t z_mask_old, z_mask;
359
case INDEX_op_brcond_i32:
265
+ uint64_t z_mask, s_mask, s_mask_old;
360
tci_args_rl(insn, tb_ptr, &r0, &ptr);
266
+ int pos = op->args[2];
361
if ((uint32_t)regs[r0]) {
267
+ int len = op->args[3];
362
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
268
363
regs[r0] = deposit64(regs[r1], pos, len, regs[r2]);
269
if (arg_is_const(op->args[1])) {
364
break;
270
uint64_t t;
365
#endif
271
366
-#if TCG_TARGET_HAS_extract_i64
272
t = arg_info(op->args[1])->val;
367
case INDEX_op_extract_i64:
273
- t = sextract64(t, op->args[2], op->args[3]);
368
tci_args_rrbb(insn, &r0, &r1, &pos, &len);
274
+ t = sextract64(t, pos, len);
369
regs[r0] = extract64(regs[r1], pos, len);
275
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
370
break;
276
}
371
-#endif
277
372
-#if TCG_TARGET_HAS_sextract_i64
278
- z_mask_old = arg_info(op->args[1])->z_mask;
373
case INDEX_op_sextract_i64:
279
- z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
374
tci_args_rrbb(insn, &r0, &r1, &pos, &len);
280
- if (op->args[2] == 0 && z_mask >= 0) {
375
regs[r0] = sextract64(regs[r1], pos, len);
281
- ctx->a_mask = z_mask_old ^ z_mask;
376
break;
282
- }
377
-#endif
283
+ z_mask = arg_info(op->args[1])->z_mask;
378
case INDEX_op_brcond_i64:
284
+ z_mask = sextract64(z_mask, pos, len);
379
tci_args_rl(insn, tb_ptr, &r0, &ptr);
285
ctx->z_mask = z_mask;
380
if (regs[r0]) {
286
287
+ s_mask_old = arg_info(op->args[1])->s_mask;
288
+ s_mask = sextract64(s_mask_old, pos, len);
289
+ s_mask |= MAKE_64BIT_MASK(len, 64 - len);
290
+ ctx->s_mask = s_mask;
291
+
292
+ if (pos == 0) {
293
+ ctx->a_mask = s_mask & ~s_mask_old;
294
+ }
295
+
296
return fold_masks(ctx, op);
297
}
298
299
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
300
{
301
/* We can't do any folding with a load, but we can record bits. */
302
switch (op->opc) {
303
+ CASE_OP_32_64(ld8s):
304
+ ctx->s_mask = MAKE_64BIT_MASK(8, 56);
305
+ break;
306
CASE_OP_32_64(ld8u):
307
ctx->z_mask = MAKE_64BIT_MASK(0, 8);
308
+ ctx->s_mask = MAKE_64BIT_MASK(9, 55);
309
+ break;
310
+ CASE_OP_32_64(ld16s):
311
+ ctx->s_mask = MAKE_64BIT_MASK(16, 48);
312
break;
313
CASE_OP_32_64(ld16u):
314
ctx->z_mask = MAKE_64BIT_MASK(0, 16);
315
+ ctx->s_mask = MAKE_64BIT_MASK(17, 47);
316
+ break;
317
+ case INDEX_op_ld32s_i64:
318
+ ctx->s_mask = MAKE_64BIT_MASK(32, 32);
319
break;
320
case INDEX_op_ld32u_i64:
321
ctx->z_mask = MAKE_64BIT_MASK(0, 32);
322
+ ctx->s_mask = MAKE_64BIT_MASK(33, 31);
323
break;
324
default:
325
g_assert_not_reached();
326
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
327
ctx.type = TCG_TYPE_I32;
328
}
329
330
- /* Assume all bits affected, and no bits known zero. */
331
+ /* Assume all bits affected, no bits known zero, no sign reps. */
332
ctx.a_mask = -1;
333
ctx.z_mask = -1;
334
+ ctx.s_mask = 0;
335
336
/*
337
* Process each opcode.
338
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
339
case INDEX_op_extrh_i64_i32:
340
done = fold_extu(&ctx, op);
341
break;
342
+ CASE_OP_32_64(ld8s):
343
CASE_OP_32_64(ld8u):
344
+ CASE_OP_32_64(ld16s):
345
CASE_OP_32_64(ld16u):
346
+ case INDEX_op_ld32s_i64:
347
case INDEX_op_ld32u_i64:
348
done = fold_tcg_ld(&ctx, op);
349
break;
350
--
381
--
351
2.25.1
382
2.43.0
352
383
353
384
diff view generated by jsdifflib
1
Provide what will become a larger context for splitting
1
Make deposit "unconditional" in the sense that the opcode is
2
the very large tcg_optimize function.
2
always present. Rely instead on TCG_TARGET_deposit_valid,
3
now always defined.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/optimize.c | 77 ++++++++++++++++++++++++++------------------------
8
tcg/aarch64/tcg-target-has.h | 3 +--
10
1 file changed, 40 insertions(+), 37 deletions(-)
9
tcg/arm/tcg-target-has.h | 2 +-
10
tcg/i386/tcg-target-has.h | 5 +----
11
tcg/loongarch64/tcg-target-has.h | 3 +--
12
tcg/mips/tcg-target-has.h | 3 +--
13
tcg/ppc/tcg-target-has.h | 3 +--
14
tcg/riscv/tcg-target-has.h | 4 ++--
15
tcg/s390x/tcg-target-has.h | 3 +--
16
tcg/sparc64/tcg-target-has.h | 4 ++--
17
tcg/tcg-has.h | 8 --------
18
tcg/tci/tcg-target-has.h | 3 +--
19
tcg/tcg-op.c | 22 +++++++++++-----------
20
tcg/tcg.c | 31 +++++++++++--------------------
21
tcg/tci.c | 4 ----
22
tcg/tci/tcg-target.c.inc | 2 +-
23
15 files changed, 35 insertions(+), 65 deletions(-)
11
24
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
25
diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h
13
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
27
--- a/tcg/aarch64/tcg-target-has.h
15
+++ b/tcg/optimize.c
28
+++ b/tcg/aarch64/tcg-target-has.h
16
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
29
@@ -XXX,XX +XXX,XX @@
17
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
30
#define TCG_TARGET_HAS_clz_i32 1
18
} TempOptInfo;
31
#define TCG_TARGET_HAS_ctz_i32 1
19
32
#define TCG_TARGET_HAS_ctpop_i32 0
20
+typedef struct OptContext {
33
-#define TCG_TARGET_HAS_deposit_i32 1
21
+ TCGTempSet temps_used;
34
#define TCG_TARGET_HAS_extract2_i32 1
22
+} OptContext;
35
#define TCG_TARGET_HAS_negsetcond_i32 1
36
#define TCG_TARGET_HAS_add2_i32 1
37
@@ -XXX,XX +XXX,XX @@
38
#define TCG_TARGET_HAS_clz_i64 1
39
#define TCG_TARGET_HAS_ctz_i64 1
40
#define TCG_TARGET_HAS_ctpop_i64 0
41
-#define TCG_TARGET_HAS_deposit_i64 1
42
#define TCG_TARGET_HAS_extract2_i64 1
43
#define TCG_TARGET_HAS_negsetcond_i64 1
44
#define TCG_TARGET_HAS_add2_i64 1
45
@@ -XXX,XX +XXX,XX @@
46
47
#define TCG_TARGET_extract_valid(type, ofs, len) 1
48
#define TCG_TARGET_sextract_valid(type, ofs, len) 1
49
+#define TCG_TARGET_deposit_valid(type, ofs, len) 1
50
51
#endif
52
diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
53
index XXXXXXX..XXXXXXX 100644
54
--- a/tcg/arm/tcg-target-has.h
55
+++ b/tcg/arm/tcg-target-has.h
56
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
57
#define TCG_TARGET_HAS_clz_i32 1
58
#define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions
59
#define TCG_TARGET_HAS_ctpop_i32 0
60
-#define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions
61
#define TCG_TARGET_HAS_extract2_i32 1
62
#define TCG_TARGET_HAS_negsetcond_i32 1
63
#define TCG_TARGET_HAS_mulu2_i32 1
64
@@ -XXX,XX +XXX,XX @@ tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
65
66
#define TCG_TARGET_extract_valid tcg_target_extract_valid
67
#define TCG_TARGET_sextract_valid tcg_target_extract_valid
68
+#define TCG_TARGET_deposit_valid(type, ofs, len) use_armv7_instructions
69
70
#endif
71
diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h
72
index XXXXXXX..XXXXXXX 100644
73
--- a/tcg/i386/tcg-target-has.h
74
+++ b/tcg/i386/tcg-target-has.h
75
@@ -XXX,XX +XXX,XX @@
76
#define TCG_TARGET_HAS_clz_i32 1
77
#define TCG_TARGET_HAS_ctz_i32 1
78
#define TCG_TARGET_HAS_ctpop_i32 have_popcnt
79
-#define TCG_TARGET_HAS_deposit_i32 1
80
#define TCG_TARGET_HAS_extract2_i32 1
81
#define TCG_TARGET_HAS_negsetcond_i32 1
82
#define TCG_TARGET_HAS_add2_i32 1
83
@@ -XXX,XX +XXX,XX @@
84
#define TCG_TARGET_HAS_clz_i64 1
85
#define TCG_TARGET_HAS_ctz_i64 1
86
#define TCG_TARGET_HAS_ctpop_i64 have_popcnt
87
-#define TCG_TARGET_HAS_deposit_i64 1
88
#define TCG_TARGET_HAS_extract2_i64 1
89
#define TCG_TARGET_HAS_negsetcond_i64 1
90
#define TCG_TARGET_HAS_add2_i64 1
91
@@ -XXX,XX +XXX,XX @@
92
#define TCG_TARGET_HAS_cmpsel_vec 1
93
#define TCG_TARGET_HAS_tst_vec have_avx512bw
94
95
-#define TCG_TARGET_deposit_i32_valid(ofs, len) \
96
+#define TCG_TARGET_deposit_valid(type, ofs, len) \
97
(((ofs) == 0 && ((len) == 8 || (len) == 16)) || \
98
(TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
99
-#define TCG_TARGET_deposit_i64_valid TCG_TARGET_deposit_i32_valid
100
101
/*
102
* Check for the possibility of low byte/word extraction, high-byte extraction
103
diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h
104
index XXXXXXX..XXXXXXX 100644
105
--- a/tcg/loongarch64/tcg-target-has.h
106
+++ b/tcg/loongarch64/tcg-target-has.h
107
@@ -XXX,XX +XXX,XX @@
108
#define TCG_TARGET_HAS_rem_i32 1
109
#define TCG_TARGET_HAS_div2_i32 0
110
#define TCG_TARGET_HAS_rot_i32 1
111
-#define TCG_TARGET_HAS_deposit_i32 1
112
#define TCG_TARGET_HAS_extract2_i32 0
113
#define TCG_TARGET_HAS_add2_i32 0
114
#define TCG_TARGET_HAS_sub2_i32 0
115
@@ -XXX,XX +XXX,XX @@
116
#define TCG_TARGET_HAS_rem_i64 1
117
#define TCG_TARGET_HAS_div2_i64 0
118
#define TCG_TARGET_HAS_rot_i64 1
119
-#define TCG_TARGET_HAS_deposit_i64 1
120
#define TCG_TARGET_HAS_extract2_i64 0
121
#define TCG_TARGET_HAS_extr_i64_i32 1
122
#define TCG_TARGET_HAS_ext8s_i64 1
123
@@ -XXX,XX +XXX,XX @@
124
#define TCG_TARGET_HAS_tst_vec 0
125
126
#define TCG_TARGET_extract_valid(type, ofs, len) 1
127
+#define TCG_TARGET_deposit_valid(type, ofs, len) 1
128
129
static inline bool
130
tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
131
diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h
132
index XXXXXXX..XXXXXXX 100644
133
--- a/tcg/mips/tcg-target-has.h
134
+++ b/tcg/mips/tcg-target-has.h
135
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
136
#endif
137
138
/* optional instructions detected at runtime */
139
-#define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions
140
#define TCG_TARGET_HAS_extract2_i32 0
141
#define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions
142
#define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions
143
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
144
#define TCG_TARGET_HAS_bswap16_i64 1
145
#define TCG_TARGET_HAS_bswap32_i64 1
146
#define TCG_TARGET_HAS_bswap64_i64 1
147
-#define TCG_TARGET_HAS_deposit_i64 use_mips32r2_instructions
148
#define TCG_TARGET_HAS_extract2_i64 0
149
#define TCG_TARGET_HAS_ext8s_i64 use_mips32r2_instructions
150
#define TCG_TARGET_HAS_ext16s_i64 use_mips32r2_instructions
151
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
152
#define TCG_TARGET_HAS_tst 0
153
154
#define TCG_TARGET_extract_valid(type, ofs, len) use_mips32r2_instructions
155
+#define TCG_TARGET_deposit_valid(type, ofs, len) use_mips32r2_instructions
156
157
static inline bool
158
tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
159
diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h
160
index XXXXXXX..XXXXXXX 100644
161
--- a/tcg/ppc/tcg-target-has.h
162
+++ b/tcg/ppc/tcg-target-has.h
163
@@ -XXX,XX +XXX,XX @@
164
#define TCG_TARGET_HAS_clz_i32 1
165
#define TCG_TARGET_HAS_ctz_i32 have_isa_3_00
166
#define TCG_TARGET_HAS_ctpop_i32 have_isa_2_06
167
-#define TCG_TARGET_HAS_deposit_i32 1
168
#define TCG_TARGET_HAS_extract2_i32 0
169
#define TCG_TARGET_HAS_negsetcond_i32 1
170
#define TCG_TARGET_HAS_mulu2_i32 0
171
@@ -XXX,XX +XXX,XX @@
172
#define TCG_TARGET_HAS_clz_i64 1
173
#define TCG_TARGET_HAS_ctz_i64 have_isa_3_00
174
#define TCG_TARGET_HAS_ctpop_i64 have_isa_2_06
175
-#define TCG_TARGET_HAS_deposit_i64 1
176
#define TCG_TARGET_HAS_extract2_i64 0
177
#define TCG_TARGET_HAS_negsetcond_i64 1
178
#define TCG_TARGET_HAS_add2_i64 1
179
@@ -XXX,XX +XXX,XX @@
180
#define TCG_TARGET_HAS_tst_vec 0
181
182
#define TCG_TARGET_extract_valid(type, ofs, len) 1
183
+#define TCG_TARGET_deposit_valid(type, ofs, len) 1
184
185
static inline bool
186
tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
187
diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
188
index XXXXXXX..XXXXXXX 100644
189
--- a/tcg/riscv/tcg-target-has.h
190
+++ b/tcg/riscv/tcg-target-has.h
191
@@ -XXX,XX +XXX,XX @@
192
#define TCG_TARGET_HAS_rem_i32 1
193
#define TCG_TARGET_HAS_div2_i32 0
194
#define TCG_TARGET_HAS_rot_i32 (cpuinfo & CPUINFO_ZBB)
195
-#define TCG_TARGET_HAS_deposit_i32 0
196
#define TCG_TARGET_HAS_extract2_i32 0
197
#define TCG_TARGET_HAS_add2_i32 1
198
#define TCG_TARGET_HAS_sub2_i32 1
199
@@ -XXX,XX +XXX,XX @@
200
#define TCG_TARGET_HAS_rem_i64 1
201
#define TCG_TARGET_HAS_div2_i64 0
202
#define TCG_TARGET_HAS_rot_i64 (cpuinfo & CPUINFO_ZBB)
203
-#define TCG_TARGET_HAS_deposit_i64 0
204
#define TCG_TARGET_HAS_extract2_i64 0
205
#define TCG_TARGET_HAS_extr_i64_i32 1
206
#define TCG_TARGET_HAS_ext8s_i64 1
207
@@ -XXX,XX +XXX,XX @@ tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
208
}
209
#define TCG_TARGET_sextract_valid tcg_target_sextract_valid
210
211
+#define TCG_TARGET_deposit_valid(type, ofs, len) 0
23
+
212
+
24
static inline TempOptInfo *ts_info(TCGTemp *ts)
213
#endif
25
{
214
diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h
26
return ts->state_ptr;
215
index XXXXXXX..XXXXXXX 100644
27
@@ -XXX,XX +XXX,XX @@ static void reset_temp(TCGArg arg)
216
--- a/tcg/s390x/tcg-target-has.h
28
}
217
+++ b/tcg/s390x/tcg-target-has.h
29
218
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
30
/* Initialize and activate a temporary. */
219
#define TCG_TARGET_HAS_clz_i32 0
31
-static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
220
#define TCG_TARGET_HAS_ctz_i32 0
32
+static void init_ts_info(OptContext *ctx, TCGTemp *ts)
221
#define TCG_TARGET_HAS_ctpop_i32 1
33
{
222
-#define TCG_TARGET_HAS_deposit_i32 1
34
size_t idx = temp_idx(ts);
223
#define TCG_TARGET_HAS_extract2_i32 0
35
TempOptInfo *ti;
224
#define TCG_TARGET_HAS_negsetcond_i32 1
36
225
#define TCG_TARGET_HAS_add2_i32 1
37
- if (test_bit(idx, temps_used->l)) {
226
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
38
+ if (test_bit(idx, ctx->temps_used.l)) {
227
#define TCG_TARGET_HAS_clz_i64 1
228
#define TCG_TARGET_HAS_ctz_i64 0
229
#define TCG_TARGET_HAS_ctpop_i64 1
230
-#define TCG_TARGET_HAS_deposit_i64 1
231
#define TCG_TARGET_HAS_extract2_i64 0
232
#define TCG_TARGET_HAS_negsetcond_i64 1
233
#define TCG_TARGET_HAS_add2_i64 1
234
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
235
#define TCG_TARGET_HAS_tst_vec 0
236
237
#define TCG_TARGET_extract_valid(type, ofs, len) 1
238
+#define TCG_TARGET_deposit_valid(type, ofs, len) 1
239
240
static inline bool
241
tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
242
diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h
243
index XXXXXXX..XXXXXXX 100644
244
--- a/tcg/sparc64/tcg-target-has.h
245
+++ b/tcg/sparc64/tcg-target-has.h
246
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
247
#define TCG_TARGET_HAS_clz_i32 0
248
#define TCG_TARGET_HAS_ctz_i32 0
249
#define TCG_TARGET_HAS_ctpop_i32 0
250
-#define TCG_TARGET_HAS_deposit_i32 0
251
#define TCG_TARGET_HAS_extract2_i32 0
252
#define TCG_TARGET_HAS_negsetcond_i32 1
253
#define TCG_TARGET_HAS_add2_i32 1
254
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
255
#define TCG_TARGET_HAS_clz_i64 0
256
#define TCG_TARGET_HAS_ctz_i64 0
257
#define TCG_TARGET_HAS_ctpop_i64 0
258
-#define TCG_TARGET_HAS_deposit_i64 0
259
#define TCG_TARGET_HAS_extract2_i64 0
260
#define TCG_TARGET_HAS_negsetcond_i64 1
261
#define TCG_TARGET_HAS_add2_i64 1
262
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
263
264
#define TCG_TARGET_sextract_valid TCG_TARGET_extract_valid
265
266
+#define TCG_TARGET_deposit_valid(type, ofs, len) 0
267
+
268
#endif
269
diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
270
index XXXXXXX..XXXXXXX 100644
271
--- a/tcg/tcg-has.h
272
+++ b/tcg/tcg-has.h
273
@@ -XXX,XX +XXX,XX @@
274
#define TCG_TARGET_HAS_clz_i64 0
275
#define TCG_TARGET_HAS_ctz_i64 0
276
#define TCG_TARGET_HAS_ctpop_i64 0
277
-#define TCG_TARGET_HAS_deposit_i64 0
278
#define TCG_TARGET_HAS_extract2_i64 0
279
#define TCG_TARGET_HAS_negsetcond_i64 0
280
#define TCG_TARGET_HAS_add2_i64 0
281
@@ -XXX,XX +XXX,XX @@
282
#define TCG_TARGET_HAS_sub2_i32 1
283
#endif
284
285
-#ifndef TCG_TARGET_deposit_i32_valid
286
-#define TCG_TARGET_deposit_i32_valid(ofs, len) 1
287
-#endif
288
-#ifndef TCG_TARGET_deposit_i64_valid
289
-#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
290
-#endif
291
-
292
/* Only one of DIV or DIV2 should be defined. */
293
#if defined(TCG_TARGET_HAS_div_i32)
294
#define TCG_TARGET_HAS_div2_i32 0
295
diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h
296
index XXXXXXX..XXXXXXX 100644
297
--- a/tcg/tci/tcg-target-has.h
298
+++ b/tcg/tci/tcg-target-has.h
299
@@ -XXX,XX +XXX,XX @@
300
#define TCG_TARGET_HAS_ext8u_i32 1
301
#define TCG_TARGET_HAS_ext16u_i32 1
302
#define TCG_TARGET_HAS_andc_i32 1
303
-#define TCG_TARGET_HAS_deposit_i32 1
304
#define TCG_TARGET_HAS_extract2_i32 0
305
#define TCG_TARGET_HAS_eqv_i32 1
306
#define TCG_TARGET_HAS_nand_i32 1
307
@@ -XXX,XX +XXX,XX @@
308
#define TCG_TARGET_HAS_bswap16_i64 1
309
#define TCG_TARGET_HAS_bswap32_i64 1
310
#define TCG_TARGET_HAS_bswap64_i64 1
311
-#define TCG_TARGET_HAS_deposit_i64 1
312
#define TCG_TARGET_HAS_extract2_i64 0
313
#define TCG_TARGET_HAS_div_i64 1
314
#define TCG_TARGET_HAS_rem_i64 1
315
@@ -XXX,XX +XXX,XX @@
316
317
#define TCG_TARGET_extract_valid(type, ofs, len) 1
318
#define TCG_TARGET_sextract_valid(type, ofs, len) 1
319
+#define TCG_TARGET_deposit_valid(type, ofs, len) 1
320
321
#endif
322
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
323
index XXXXXXX..XXXXXXX 100644
324
--- a/tcg/tcg-op.c
325
+++ b/tcg/tcg-op.c
326
@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
327
tcg_gen_mov_i32(ret, arg2);
39
return;
328
return;
40
}
329
}
41
- set_bit(idx, temps_used->l);
330
- if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
42
+ set_bit(idx, ctx->temps_used.l);
331
+ if (TCG_TARGET_deposit_valid(TCG_TYPE_I32, ofs, len)) {
43
332
tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
44
ti = ts->state_ptr;
333
return;
45
if (ti == NULL) {
46
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
47
}
334
}
335
@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
336
tcg_gen_shli_i32(ret, arg, ofs);
337
} else if (ofs == 0) {
338
tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
339
- } else if (TCG_TARGET_HAS_deposit_i32
340
- && TCG_TARGET_deposit_i32_valid(ofs, len)) {
341
+ } else if (TCG_TARGET_deposit_valid(TCG_TYPE_I32, ofs, len)) {
342
TCGv_i32 zero = tcg_constant_i32(0);
343
tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len);
344
} else {
345
@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
346
tcg_gen_mov_i64(ret, arg2);
347
return;
348
}
349
- if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
350
- tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
351
- return;
352
- }
353
354
- if (TCG_TARGET_REG_BITS == 32) {
355
+ if (TCG_TARGET_REG_BITS == 64) {
356
+ if (TCG_TARGET_deposit_valid(TCG_TYPE_I64, ofs, len)) {
357
+ tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
358
+ return;
359
+ }
360
+ } else {
361
if (ofs >= 32) {
362
tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
363
TCGV_LOW(arg2), ofs - 32, len);
364
@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
365
tcg_gen_shli_i64(ret, arg, ofs);
366
} else if (ofs == 0) {
367
tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
368
- } else if (TCG_TARGET_HAS_deposit_i64
369
- && TCG_TARGET_deposit_i64_valid(ofs, len)) {
370
+ } else if (TCG_TARGET_REG_BITS == 64 &&
371
+ TCG_TARGET_deposit_valid(TCG_TYPE_I64, ofs, len)) {
372
TCGv_i64 zero = tcg_constant_i64(0);
373
tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len);
374
} else {
375
@@ -XXX,XX +XXX,XX @@ void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
376
tcg_gen_extu_i32_i64(dest, low);
377
/* If deposit is available, use it. Otherwise use the extra
378
knowledge that we have of the zero-extensions above. */
379
- if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
380
+ if (TCG_TARGET_deposit_valid(TCG_TYPE_I64, 32, 32)) {
381
tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
382
} else {
383
tcg_gen_shli_i64(tmp, tmp, 32);
384
diff --git a/tcg/tcg.c b/tcg/tcg.c
385
index XXXXXXX..XXXXXXX 100644
386
--- a/tcg/tcg.c
387
+++ b/tcg/tcg.c
388
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
389
case INDEX_op_sar_i32:
390
case INDEX_op_extract_i32:
391
case INDEX_op_sextract_i32:
392
+ case INDEX_op_deposit_i32:
393
return true;
394
395
case INDEX_op_negsetcond_i32:
396
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
397
case INDEX_op_rotl_i32:
398
case INDEX_op_rotr_i32:
399
return TCG_TARGET_HAS_rot_i32;
400
- case INDEX_op_deposit_i32:
401
- return TCG_TARGET_HAS_deposit_i32;
402
case INDEX_op_extract2_i32:
403
return TCG_TARGET_HAS_extract2_i32;
404
case INDEX_op_add2_i32:
405
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
406
case INDEX_op_extu_i32_i64:
407
case INDEX_op_extract_i64:
408
case INDEX_op_sextract_i64:
409
+ case INDEX_op_deposit_i64:
410
return TCG_TARGET_REG_BITS == 64;
411
412
case INDEX_op_negsetcond_i64:
413
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
414
case INDEX_op_rotl_i64:
415
case INDEX_op_rotr_i64:
416
return TCG_TARGET_HAS_rot_i64;
417
- case INDEX_op_deposit_i64:
418
- return TCG_TARGET_HAS_deposit_i64;
419
case INDEX_op_extract2_i64:
420
return TCG_TARGET_HAS_extract2_i64;
421
case INDEX_op_extrl_i64_i32:
422
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
423
424
bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
425
{
426
+ unsigned width;
427
+
428
+ tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
429
+ width = (type == TCG_TYPE_I32 ? 32 : 64);
430
+
431
+ tcg_debug_assert(ofs < width);
432
tcg_debug_assert(len > 0);
433
- switch (type) {
434
- case TCG_TYPE_I32:
435
- tcg_debug_assert(ofs < 32);
436
- tcg_debug_assert(len <= 32);
437
- tcg_debug_assert(ofs + len <= 32);
438
- return TCG_TARGET_HAS_deposit_i32 &&
439
- TCG_TARGET_deposit_i32_valid(ofs, len);
440
- case TCG_TYPE_I64:
441
- tcg_debug_assert(ofs < 64);
442
- tcg_debug_assert(len <= 64);
443
- tcg_debug_assert(ofs + len <= 64);
444
- return TCG_TARGET_HAS_deposit_i64 &&
445
- TCG_TARGET_deposit_i64_valid(ofs, len);
446
- default:
447
- g_assert_not_reached();
448
- }
449
+ tcg_debug_assert(len <= width - ofs);
450
+
451
+ return TCG_TARGET_deposit_valid(type, ofs, len);
48
}
452
}
49
453
50
-static void init_arg_info(TCGTempSet *temps_used, TCGArg arg)
454
static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
51
+static void init_arg_info(OptContext *ctx, TCGArg arg)
455
diff --git a/tcg/tci.c b/tcg/tci.c
52
{
456
index XXXXXXX..XXXXXXX 100644
53
- init_ts_info(temps_used, arg_temp(arg));
457
--- a/tcg/tci.c
54
+ init_ts_info(ctx, arg_temp(arg));
458
+++ b/tcg/tci.c
55
}
459
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
56
460
regs[r0] = ror32(regs[r1], regs[r2] & 31);
57
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
58
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
59
}
60
}
61
62
-static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
63
+static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
64
TCGOp *op, TCGArg dst, uint64_t val)
65
{
66
const TCGOpDef *def = &tcg_op_defs[op->opc];
67
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
68
69
/* Convert movi to mov with constant temp. */
70
tv = tcg_constant_internal(type, val);
71
- init_ts_info(temps_used, tv);
72
+ init_ts_info(ctx, tv);
73
tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
74
}
75
76
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
77
{
78
int nb_temps, nb_globals, i;
79
TCGOp *op, *op_next, *prev_mb = NULL;
80
- TCGTempSet temps_used;
81
+ OptContext ctx = {};
82
83
/* Array VALS has an element for each temp.
84
If this temp holds a constant then its value is kept in VALS' element.
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
86
nb_temps = s->nb_temps;
87
nb_globals = s->nb_globals;
88
89
- memset(&temps_used, 0, sizeof(temps_used));
90
for (i = 0; i < nb_temps; ++i) {
91
s->temps[i].state_ptr = NULL;
92
}
93
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
94
for (i = 0; i < nb_oargs + nb_iargs; i++) {
95
TCGTemp *ts = arg_temp(op->args[i]);
96
if (ts) {
97
- init_ts_info(&temps_used, ts);
98
+ init_ts_info(&ctx, ts);
99
}
100
}
101
} else {
102
nb_oargs = def->nb_oargs;
103
nb_iargs = def->nb_iargs;
104
for (i = 0; i < nb_oargs + nb_iargs; i++) {
105
- init_arg_info(&temps_used, op->args[i]);
106
+ init_arg_info(&ctx, op->args[i]);
107
}
108
}
109
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
CASE_OP_32_64(rotr):
112
if (arg_is_const(op->args[1])
113
&& arg_info(op->args[1])->val == 0) {
114
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
115
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
116
continue;
117
}
118
break;
461
break;
119
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
462
#endif
120
463
-#if TCG_TARGET_HAS_deposit_i32
121
if (partmask == 0) {
464
case INDEX_op_deposit_i32:
122
tcg_debug_assert(nb_oargs == 1);
465
tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
123
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
466
regs[r0] = deposit32(regs[r1], pos, len, regs[r2]);
124
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
125
continue;
126
}
127
if (affected == 0) {
128
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
129
CASE_OP_32_64(mulsh):
130
if (arg_is_const(op->args[2])
131
&& arg_info(op->args[2])->val == 0) {
132
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
133
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
134
continue;
135
}
136
break;
467
break;
137
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
468
-#endif
138
CASE_OP_32_64_VEC(sub):
469
case INDEX_op_extract_i32:
139
CASE_OP_32_64_VEC(xor):
470
tci_args_rrbb(insn, &r0, &r1, &pos, &len);
140
if (args_are_copies(op->args[1], op->args[2])) {
471
regs[r0] = extract32(regs[r1], pos, len);
141
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
472
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
142
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
473
regs[r0] = ror64(regs[r1], regs[r2] & 63);
143
continue;
144
}
145
break;
474
break;
146
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
475
#endif
147
if (arg_is_const(op->args[1])) {
476
-#if TCG_TARGET_HAS_deposit_i64
148
tmp = arg_info(op->args[1])->val;
477
case INDEX_op_deposit_i64:
149
tmp = dup_const(TCGOP_VECE(op), tmp);
478
tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
150
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
479
regs[r0] = deposit64(regs[r1], pos, len, regs[r2]);
151
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
480
break;
152
break;
481
-#endif
153
}
482
case INDEX_op_extract_i64:
154
goto do_default;
483
tci_args_rrbb(insn, &r0, &r1, &pos, &len);
155
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
484
regs[r0] = extract64(regs[r1], pos, len);
156
case INDEX_op_dup2_vec:
485
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
157
assert(TCG_TARGET_REG_BITS == 32);
486
index XXXXXXX..XXXXXXX 100644
158
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
487
--- a/tcg/tci/tcg-target.c.inc
159
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0],
488
+++ b/tcg/tci/tcg-target.c.inc
160
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0],
489
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
161
deposit64(arg_info(op->args[1])->val, 32, 32,
490
tcg_out_op_rrr(s, opc, args[0], args[1], args[2]);
162
arg_info(op->args[2])->val));
491
break;
163
break;
492
164
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
493
- CASE_32_64(deposit) /* Optional (TCG_TARGET_HAS_deposit_*). */
165
case INDEX_op_extrh_i64_i32:
494
+ CASE_32_64(deposit)
166
if (arg_is_const(op->args[1])) {
495
tcg_out_op_rrrbb(s, opc, args[0], args[1], args[2], args[3], args[4]);
167
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
496
break;
168
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
497
169
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
170
break;
171
}
172
goto do_default;
173
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
174
if (arg_is_const(op->args[1])) {
175
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
176
op->args[2]);
177
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
178
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
179
break;
180
}
181
goto do_default;
182
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
183
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
184
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
185
arg_info(op->args[2])->val);
186
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
187
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
188
break;
189
}
190
goto do_default;
191
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
192
TCGArg v = arg_info(op->args[1])->val;
193
if (v != 0) {
194
tmp = do_constant_folding(opc, v, 0);
195
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
196
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
197
} else {
198
tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
199
}
200
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
201
tmp = deposit64(arg_info(op->args[1])->val,
202
op->args[3], op->args[4],
203
arg_info(op->args[2])->val);
204
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
205
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
206
break;
207
}
208
goto do_default;
209
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
210
if (arg_is_const(op->args[1])) {
211
tmp = extract64(arg_info(op->args[1])->val,
212
op->args[2], op->args[3]);
213
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
214
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
215
break;
216
}
217
goto do_default;
218
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
219
if (arg_is_const(op->args[1])) {
220
tmp = sextract64(arg_info(op->args[1])->val,
221
op->args[2], op->args[3]);
222
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
223
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
224
break;
225
}
226
goto do_default;
227
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
228
tmp = (int32_t)(((uint32_t)v1 >> shr) |
229
((uint32_t)v2 << (32 - shr)));
230
}
231
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
232
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
233
break;
234
}
235
goto do_default;
236
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
237
tmp = do_constant_folding_cond(opc, op->args[1],
238
op->args[2], op->args[3]);
239
if (tmp != 2) {
240
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
241
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
242
break;
243
}
244
goto do_default;
245
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
246
op->args[1], op->args[2]);
247
if (tmp != 2) {
248
if (tmp) {
249
- memset(&temps_used, 0, sizeof(temps_used));
250
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
251
op->opc = INDEX_op_br;
252
op->args[0] = op->args[3];
253
} else {
254
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
255
256
rl = op->args[0];
257
rh = op->args[1];
258
- tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a);
259
- tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32));
260
+ tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
261
+ tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
262
break;
263
}
264
goto do_default;
265
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
266
267
rl = op->args[0];
268
rh = op->args[1];
269
- tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r);
270
- tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32));
271
+ tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
272
+ tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
273
break;
274
}
275
goto do_default;
276
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
277
if (tmp != 2) {
278
if (tmp) {
279
do_brcond_true:
280
- memset(&temps_used, 0, sizeof(temps_used));
281
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
282
op->opc = INDEX_op_br;
283
op->args[0] = op->args[5];
284
} else {
285
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
286
/* Simplify LT/GE comparisons vs zero to a single compare
287
vs the high word of the input. */
288
do_brcond_high:
289
- memset(&temps_used, 0, sizeof(temps_used));
290
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
291
op->opc = INDEX_op_brcond_i32;
292
op->args[0] = op->args[1];
293
op->args[1] = op->args[3];
294
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
295
goto do_default;
296
}
297
do_brcond_low:
298
- memset(&temps_used, 0, sizeof(temps_used));
299
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
300
op->opc = INDEX_op_brcond_i32;
301
op->args[1] = op->args[2];
302
op->args[2] = op->args[4];
303
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
304
op->args[5]);
305
if (tmp != 2) {
306
do_setcond_const:
307
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
308
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
309
} else if ((op->args[5] == TCG_COND_LT
310
|| op->args[5] == TCG_COND_GE)
311
&& arg_is_const(op->args[3])
312
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
313
if (!(tcg_call_flags(op)
314
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
315
for (i = 0; i < nb_globals; i++) {
316
- if (test_bit(i, temps_used.l)) {
317
+ if (test_bit(i, ctx.temps_used.l)) {
318
reset_ts(&s->temps[i]);
319
}
320
}
321
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
322
block, otherwise we only trash the output args. "z_mask" is
323
the non-zero bits mask for the first output arg. */
324
if (def->flags & TCG_OPF_BB_END) {
325
- memset(&temps_used, 0, sizeof(temps_used));
326
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
327
} else {
328
do_reset_output:
329
for (i = 0; i < nb_oargs; i++) {
330
--
498
--
331
2.25.1
499
2.43.0
332
500
333
501
diff view generated by jsdifflib
1
Rather than try to keep these up-to-date across folding,
1
Acked-by: Alistair Francis <alistair.francis@wdc.com>
2
re-read nb_oargs at the end, after re-reading the opcode.
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Message-ID: <20250102181601.1421059-2-richard.henderson@linaro.org>
5
---
6
host/include/riscv/host/cpuinfo.h | 5 +++--
7
util/cpuinfo-riscv.c | 18 ++++++++++++++++--
8
2 files changed, 19 insertions(+), 4 deletions(-)
3
9
4
A couple of asserts need dropping, but that will take care
10
diff --git a/host/include/riscv/host/cpuinfo.h b/host/include/riscv/host/cpuinfo.h
5
of itself as we split the function further.
6
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
tcg/optimize.c | 14 ++++----------
12
1 file changed, 4 insertions(+), 10 deletions(-)
13
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
12
--- a/host/include/riscv/host/cpuinfo.h
17
+++ b/tcg/optimize.c
13
+++ b/host/include/riscv/host/cpuinfo.h
18
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
14
@@ -XXX,XX +XXX,XX @@
19
15
#define CPUINFO_ALWAYS (1u << 0) /* so cpuinfo is nonzero */
20
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
16
#define CPUINFO_ZBA (1u << 1)
21
uint64_t z_mask, partmask, affected, tmp;
17
#define CPUINFO_ZBB (1u << 2)
22
- int nb_oargs, nb_iargs;
18
-#define CPUINFO_ZICOND (1u << 3)
23
TCGOpcode opc = op->opc;
19
-#define CPUINFO_ZVE64X (1u << 4)
24
const TCGOpDef *def;
20
+#define CPUINFO_ZBS (1u << 3)
25
21
+#define CPUINFO_ZICOND (1u << 4)
26
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
22
+#define CPUINFO_ZVE64X (1u << 5)
23
24
/* Initialized with a constructor. */
25
extern unsigned cpuinfo;
26
diff --git a/util/cpuinfo-riscv.c b/util/cpuinfo-riscv.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/util/cpuinfo-riscv.c
29
+++ b/util/cpuinfo-riscv.c
30
@@ -XXX,XX +XXX,XX @@ static void sigill_handler(int signo, siginfo_t *si, void *data)
31
/* Called both as constructor and (possibly) via other constructors. */
32
unsigned __attribute__((constructor)) cpuinfo_init(void)
33
{
34
- unsigned left = CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZICOND | CPUINFO_ZVE64X;
35
+ unsigned left = CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZBS
36
+ | CPUINFO_ZICOND | CPUINFO_ZVE64X;
37
unsigned info = cpuinfo;
38
39
if (info) {
40
@@ -XXX,XX +XXX,XX @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
41
#if defined(__riscv_arch_test) && defined(__riscv_zbb)
42
info |= CPUINFO_ZBB;
43
#endif
44
+#if defined(__riscv_arch_test) && defined(__riscv_zbs)
45
+ info |= CPUINFO_ZBS;
46
+#endif
47
#if defined(__riscv_arch_test) && defined(__riscv_zicond)
48
info |= CPUINFO_ZICOND;
49
#endif
50
@@ -XXX,XX +XXX,XX @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
51
&& pair.key >= 0) {
52
info |= pair.value & RISCV_HWPROBE_EXT_ZBA ? CPUINFO_ZBA : 0;
53
info |= pair.value & RISCV_HWPROBE_EXT_ZBB ? CPUINFO_ZBB : 0;
54
- left &= ~(CPUINFO_ZBA | CPUINFO_ZBB);
55
+ info |= pair.value & RISCV_HWPROBE_EXT_ZBS ? CPUINFO_ZBS : 0;
56
+ left &= ~(CPUINFO_ZBA | CPUINFO_ZBB | CPUINFO_ZBS);
57
#ifdef RISCV_HWPROBE_EXT_ZICOND
58
info |= pair.value & RISCV_HWPROBE_EXT_ZICOND ? CPUINFO_ZICOND : 0;
59
left &= ~CPUINFO_ZICOND;
60
@@ -XXX,XX +XXX,XX @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
61
left &= ~CPUINFO_ZBB;
27
}
62
}
28
63
29
def = &tcg_op_defs[opc];
64
+ if (left & CPUINFO_ZBS) {
30
- nb_oargs = def->nb_oargs;
65
+ /* Probe for Zbs: bext zero,zero,zero. */
31
- nb_iargs = def->nb_iargs;
66
+ got_sigill = 0;
32
- init_arguments(&ctx, op, nb_oargs + nb_iargs);
67
+ asm volatile(".insn r 0x33, 5, 0x24, zero, zero, zero"
33
- copy_propagate(&ctx, op, nb_oargs, nb_iargs);
68
+ : : : "memory");
34
+ init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
69
+ info |= got_sigill ? 0 : CPUINFO_ZBS;
35
+ copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
70
+ left &= ~CPUINFO_ZBS;
36
71
+ }
37
/* For commutative operations make constant second argument */
72
+
38
switch (opc) {
73
if (left & CPUINFO_ZICOND) {
39
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
74
/* Probe for Zicond: czero.eqz zero,zero,zero. */
40
75
got_sigill = 0;
41
CASE_OP_32_64(qemu_ld):
42
{
43
- MemOpIdx oi = op->args[nb_oargs + nb_iargs];
44
+ MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
45
MemOp mop = get_memop(oi);
46
if (!(mop & MO_SIGN)) {
47
z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
}
50
51
if (partmask == 0) {
52
- tcg_debug_assert(nb_oargs == 1);
53
tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
54
continue;
55
}
56
if (affected == 0) {
57
- tcg_debug_assert(nb_oargs == 1);
58
tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
59
continue;
60
}
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
} else if (args_are_copies(op->args[1], op->args[2])) {
63
op->opc = INDEX_op_dup_vec;
64
TCGOP_VECE(op) = MO_32;
65
- nb_iargs = 1;
66
}
67
break;
68
69
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
70
op->opc = opc = (opc == INDEX_op_movcond_i32
71
? INDEX_op_setcond_i32
72
: INDEX_op_setcond_i64);
73
- nb_iargs = 2;
74
}
75
break;
76
77
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
78
if (def->flags & TCG_OPF_BB_END) {
79
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
80
} else {
81
+ int nb_oargs = def->nb_oargs;
82
for (i = 0; i < nb_oargs; i++) {
83
reset_temp(op->args[i]);
84
/* Save the corresponding known-zero bits mask for the
85
--
76
--
86
2.25.1
77
2.43.0
87
78
88
79
diff view generated by jsdifflib
1
Pull the "op r, a, a => movi r, 0" optimization into a function,
1
Acked-by: Alistair Francis <alistair.francis@wdc.com>
2
and use it in the outer opcode fold functions.
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-ID: <20250102181601.1421059-3-richard.henderson@linaro.org>
4
---
5
tcg/riscv/tcg-target-has.h | 8 +++++++-
6
tcg/riscv/tcg-target.c.inc | 11 +++++++++--
7
2 files changed, 16 insertions(+), 3 deletions(-)
3
8
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
9
diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 41 ++++++++++++++++++++++++-----------------
9
1 file changed, 24 insertions(+), 17 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
11
--- a/tcg/riscv/tcg-target-has.h
14
+++ b/tcg/optimize.c
12
+++ b/tcg/riscv/tcg-target-has.h
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
13
@@ -XXX,XX +XXX,XX @@ tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
16
return false;
14
/* ofs > 0 uses SRLIW; ofs == 0 uses add.uw. */
17
}
15
return ofs || (cpuinfo & CPUINFO_ZBA);
18
16
}
19
+/* If the binary operation has both arguments equal, fold to @i. */
17
- return (cpuinfo & CPUINFO_ZBB) && ofs == 0 && len == 16;
20
+static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
18
+ switch (len) {
21
+{
19
+ case 1:
22
+ if (args_are_copies(op->args[1], op->args[2])) {
20
+ return (cpuinfo & CPUINFO_ZBS) && ofs != 0;
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
21
+ case 16:
24
+ }
22
+ return (cpuinfo & CPUINFO_ZBB) && ofs == 0;
25
+ return false;
26
+}
27
+
28
/*
29
* These outermost fold_<op> functions are sorted alphabetically.
30
*/
31
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
32
33
static bool fold_andc(OptContext *ctx, TCGOp *op)
34
{
35
- return fold_const2(ctx, op);
36
+ if (fold_const2(ctx, op) ||
37
+ fold_xx_to_i(ctx, op, 0)) {
38
+ return true;
39
+ }
23
+ }
40
+ return false;
24
+ return false;
41
}
25
}
42
26
#define TCG_TARGET_extract_valid tcg_target_extract_valid
43
static bool fold_brcond(OptContext *ctx, TCGOp *op)
27
44
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
28
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
45
29
index XXXXXXX..XXXXXXX 100644
46
static bool fold_sub(OptContext *ctx, TCGOp *op)
30
--- a/tcg/riscv/tcg-target.c.inc
47
{
31
+++ b/tcg/riscv/tcg-target.c.inc
48
- return fold_const2(ctx, op);
32
@@ -XXX,XX +XXX,XX @@ typedef enum {
49
+ if (fold_const2(ctx, op) ||
33
OPC_ANDI = 0x7013,
50
+ fold_xx_to_i(ctx, op, 0)) {
34
OPC_AUIPC = 0x17,
51
+ return true;
35
OPC_BEQ = 0x63,
52
+ }
36
+ OPC_BEXTI = 0x48005013,
53
+ return false;
37
OPC_BGE = 0x5063,
54
}
38
OPC_BGEU = 0x7063,
55
39
OPC_BLT = 0x4063,
56
static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
40
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
57
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
58
59
static bool fold_xor(OptContext *ctx, TCGOp *op)
60
{
61
- return fold_const2(ctx, op);
62
+ if (fold_const2(ctx, op) ||
63
+ fold_xx_to_i(ctx, op, 0)) {
64
+ return true;
65
+ }
66
+ return false;
67
}
68
69
/* Propagate constants and copies, fold constant expressions. */
70
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
71
break;
72
}
41
}
73
42
/* FALLTHRU */
74
- /* Simplify expression for "op r, a, a => movi r, 0" cases */
43
case INDEX_op_extract_i32:
75
- switch (opc) {
44
- if (a2 == 0 && args[3] == 16) {
76
- CASE_OP_32_64_VEC(andc):
45
+ switch (args[3]) {
77
- CASE_OP_32_64_VEC(sub):
46
+ case 1:
78
- CASE_OP_32_64_VEC(xor):
47
+ tcg_out_opc_imm(s, OPC_BEXTI, a0, a1, a2);
79
- if (args_are_copies(op->args[1], op->args[2])) {
48
+ break;
80
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
49
+ case 16:
81
- continue;
50
+ tcg_debug_assert(a2 == 0);
82
- }
51
tcg_out_ext16u(s, a0, a1);
83
- break;
52
- } else {
84
- default:
53
+ break;
85
- break;
54
+ default:
86
- }
55
g_assert_not_reached();
87
-
56
}
88
/*
57
break;
89
* Process each opcode.
90
* Sorted alphabetically by opcode as much as possible.
91
--
58
--
92
2.25.1
59
2.43.0
93
94
diff view generated by jsdifflib
1
Calls are special in that they have a variable number
1
From: Helge Deller <deller@kernel.org>
2
of arguments, and need to be able to clobber globals.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Add some missing fields which may be parsed by userspace applications.
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
5
Signed-off-by: Helge Deller <deller@gmx.de>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-ID: <Z39B1wzNNpndmOxZ@p100>
7
---
9
---
8
tcg/optimize.c | 63 ++++++++++++++++++++++++++++++++------------------
10
linux-user/sparc/target_proc.h | 20 +++++++++++++++++++-
9
1 file changed, 41 insertions(+), 22 deletions(-)
11
1 file changed, 19 insertions(+), 1 deletion(-)
10
12
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
diff --git a/linux-user/sparc/target_proc.h b/linux-user/sparc/target_proc.h
12
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
15
--- a/linux-user/sparc/target_proc.h
14
+++ b/tcg/optimize.c
16
+++ b/linux-user/sparc/target_proc.h
15
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
17
@@ -XXX,XX +XXX,XX @@
16
}
18
17
}
19
static int open_cpuinfo(CPUArchState *cpu_env, int fd)
18
20
{
19
+static bool fold_call(OptContext *ctx, TCGOp *op)
21
- dprintf(fd, "type\t\t: sun4u\n");
20
+{
22
+ int i, num_cpus;
21
+ TCGContext *s = ctx->tcg;
23
+ const char *cpu_type;
22
+ int nb_oargs = TCGOP_CALLO(op);
23
+ int nb_iargs = TCGOP_CALLI(op);
24
+ int flags, i;
25
+
24
+
26
+ init_arguments(ctx, op, nb_oargs + nb_iargs);
25
+ num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
27
+ copy_propagate(ctx, op, nb_oargs, nb_iargs);
26
+ if (cpu_env->def.features & CPU_FEATURE_HYPV) {
28
+
27
+ cpu_type = "sun4v";
29
+ /* If the function reads or writes globals, reset temp data. */
28
+ } else {
30
+ flags = tcg_call_flags(op);
29
+ cpu_type = "sun4u";
31
+ if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
32
+ int nb_globals = s->nb_globals;
33
+
34
+ for (i = 0; i < nb_globals; i++) {
35
+ if (test_bit(i, ctx->temps_used.l)) {
36
+ reset_ts(&ctx->tcg->temps[i]);
37
+ }
38
+ }
39
+ }
30
+ }
40
+
31
+
41
+ /* Reset temp data for outputs. */
32
+ dprintf(fd, "cpu\t\t: %s (QEMU)\n", cpu_env->def.name);
42
+ for (i = 0; i < nb_oargs; i++) {
33
+ dprintf(fd, "type\t\t: %s\n", cpu_type);
43
+ reset_temp(op->args[i]);
34
+ dprintf(fd, "ncpus probed\t: %d\n", num_cpus);
35
+ dprintf(fd, "ncpus active\t: %d\n", num_cpus);
36
+ dprintf(fd, "State:\n");
37
+ for (i = 0; i < num_cpus; i++) {
38
+ dprintf(fd, "CPU%d:\t\t: online\n", i);
44
+ }
39
+ }
45
+
40
+
46
+ /* Stop optimizing MB across calls. */
41
return 0;
47
+ ctx->prev_mb = NULL;
42
}
48
+ return true;
43
#define HAVE_ARCH_PROC_CPUINFO
49
+}
50
+
51
/* Propagate constants and copies, fold constant expressions. */
52
void tcg_optimize(TCGContext *s)
53
{
54
- int nb_temps, nb_globals, i;
55
+ int nb_temps, i;
56
TCGOp *op, *op_next;
57
OptContext ctx = { .tcg = s };
58
59
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
60
available through the doubly linked circular list. */
61
62
nb_temps = s->nb_temps;
63
- nb_globals = s->nb_globals;
64
-
65
for (i = 0; i < nb_temps; ++i) {
66
s->temps[i].state_ptr = NULL;
67
}
68
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
69
uint64_t z_mask, partmask, affected, tmp;
70
int nb_oargs, nb_iargs;
71
TCGOpcode opc = op->opc;
72
- const TCGOpDef *def = &tcg_op_defs[opc];
73
+ const TCGOpDef *def;
74
75
- /* Count the arguments, and initialize the temps that are
76
- going to be used */
77
+ /* Calls are special. */
78
if (opc == INDEX_op_call) {
79
- nb_oargs = TCGOP_CALLO(op);
80
- nb_iargs = TCGOP_CALLI(op);
81
- } else {
82
- nb_oargs = def->nb_oargs;
83
- nb_iargs = def->nb_iargs;
84
+ fold_call(&ctx, op);
85
+ continue;
86
}
87
+
88
+ def = &tcg_op_defs[opc];
89
+ nb_oargs = def->nb_oargs;
90
+ nb_iargs = def->nb_iargs;
91
init_arguments(&ctx, op, nb_oargs + nb_iargs);
92
copy_propagate(&ctx, op, nb_oargs, nb_iargs);
93
94
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
95
if (def->flags & TCG_OPF_BB_END) {
96
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
97
} else {
98
- if (opc == INDEX_op_call &&
99
- !(tcg_call_flags(op)
100
- & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
101
- for (i = 0; i < nb_globals; i++) {
102
- if (test_bit(i, ctx.temps_used.l)) {
103
- reset_ts(&s->temps[i]);
104
- }
105
- }
106
- }
107
-
108
for (i = 0; i < nb_oargs; i++) {
109
reset_temp(op->args[i]);
110
/* Save the corresponding known-zero bits mask for the
111
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
112
case INDEX_op_qemu_st_i32:
113
case INDEX_op_qemu_st8_i32:
114
case INDEX_op_qemu_st_i64:
115
- case INDEX_op_call:
116
/* Opcodes that touch guest memory stop the optimization. */
117
ctx.prev_mb = NULL;
118
break;
119
--
44
--
120
2.25.1
45
2.43.0
121
122
diff view generated by jsdifflib
1
From: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
2
2
3
Addition of not and xor on 128-bit integers.
3
These similarly named functions serve different purposes; add
4
docstrings to highlight them.
4
5
5
Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
6
Suggested-by: Alex Bennée <alex.bennee@linaro.org>
6
Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
7
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
7
Message-Id: <20211025122818.168890-3-frederic.petrot@univ-grenoble-alpes.fr>
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
[rth: Split out logical operations.]
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-ID: <20250116213214.5695-1-iii@linux.ibm.com>
11
---
12
---
12
include/qemu/int128.h | 20 ++++++++++++++++++++
13
include/tcg/tcg.h | 41 +++++++++++++++++++++++++++++++++++++++++
13
1 file changed, 20 insertions(+)
14
accel/tcg/cpu-exec.c | 15 ++++++++++++++-
15
2 files changed, 55 insertions(+), 1 deletion(-)
14
16
15
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
17
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/include/qemu/int128.h
19
--- a/include/tcg/tcg.h
18
+++ b/include/qemu/int128.h
20
+++ b/include/tcg/tcg.h
19
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
21
@@ -XXX,XX +XXX,XX @@ void tcg_region_reset_all(void);
20
return a;
22
size_t tcg_code_size(void);
23
size_t tcg_code_capacity(void);
24
25
+/**
26
+ * tcg_tb_insert:
27
+ * @tb: translation block to insert
28
+ *
29
+ * Insert @tb into the region trees.
30
+ */
31
void tcg_tb_insert(TranslationBlock *tb);
32
+
33
+/**
34
+ * tcg_tb_remove:
35
+ * @tb: translation block to remove
36
+ *
37
+ * Remove @tb from the region trees.
38
+ */
39
void tcg_tb_remove(TranslationBlock *tb);
40
+
41
+/**
42
+ * tcg_tb_lookup:
43
+ * @tc_ptr: host PC to look up
44
+ *
45
+ * Look up a translation block inside the region trees by @tc_ptr. This is
46
+ * useful for exception handling, but must not be used for the purposes of
47
+ * executing the returned translation block. See struct tb_tc for more
48
+ * information.
49
+ *
50
+ * Returns: a translation block previously inserted into the region trees,
51
+ * such that @tc_ptr points anywhere inside the code generated for it, or
52
+ * NULL.
53
+ */
54
TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr);
55
+
56
+/**
57
+ * tcg_tb_foreach:
58
+ * @func: callback
59
+ * @user_data: opaque value to pass to @callback
60
+ *
61
+ * Call @func for each translation block inserted into the region trees.
62
+ */
63
void tcg_tb_foreach(GTraverseFunc func, gpointer user_data);
64
+
65
+/**
66
+ * tcg_nb_tbs:
67
+ *
68
+ * Returns: the number of translation blocks inserted into the region trees.
69
+ */
70
size_t tcg_nb_tbs(void);
71
72
/* user-mode: Called with mmap_lock held. */
73
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/accel/tcg/cpu-exec.c
76
+++ b/accel/tcg/cpu-exec.c
77
@@ -XXX,XX +XXX,XX @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc,
78
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
21
}
79
}
22
80
23
+static inline Int128 int128_not(Int128 a)
81
-/* Might cause an exception, so have a longjmp destination ready */
24
+{
82
+/**
25
+ return ~a;
83
+ * tb_lookup:
26
+}
84
+ * @cpu: CPU that will execute the returned translation block
27
+
85
+ * @pc: guest PC
28
static inline Int128 int128_and(Int128 a, Int128 b)
86
+ * @cs_base: arch-specific value associated with translation block
29
{
87
+ * @flags: arch-specific translation block flags
30
return a & b;
88
+ * @cflags: CF_* flags
31
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
89
+ *
32
return a | b;
90
+ * Look up a translation block inside the QHT using @pc, @cs_base, @flags and
33
}
91
+ * @cflags. Uses @cpu's tb_jmp_cache. Might cause an exception, so have a
34
92
+ * longjmp destination ready.
35
+static inline Int128 int128_xor(Int128 a, Int128 b)
93
+ *
36
+{
94
+ * Returns: an existing translation block or NULL.
37
+ return a ^ b;
95
+ */
38
+}
96
static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc,
39
+
97
uint64_t cs_base, uint32_t flags,
40
static inline Int128 int128_rshift(Int128 a, int n)
98
uint32_t cflags)
41
{
42
return a >> n;
43
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
44
return int128_make128(a, (a < 0) ? -1 : 0);
45
}
46
47
+static inline Int128 int128_not(Int128 a)
48
+{
49
+ return int128_make128(~a.lo, ~a.hi);
50
+}
51
+
52
static inline Int128 int128_and(Int128 a, Int128 b)
53
{
54
return int128_make128(a.lo & b.lo, a.hi & b.hi);
55
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
56
return int128_make128(a.lo | b.lo, a.hi | b.hi);
57
}
58
59
+static inline Int128 int128_xor(Int128 a, Int128 b)
60
+{
61
+ return int128_make128(a.lo ^ b.lo, a.hi ^ b.hi);
62
+}
63
+
64
static inline Int128 int128_rshift(Int128 a, int n)
65
{
66
int64_t h;
67
--
99
--
68
2.25.1
100
2.43.0
69
101
70
102
diff view generated by jsdifflib
1
For constant shifts, we can simply shift the s_mask.
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
2
2
3
For variable shifts, we know that sar does not reduce
3
Currently one-insn TBs created from I/O memory are not added to
4
the s_mask, which helps for sequences like
4
region_trees. Therefore, when they generate exceptions, they are not
5
handled by cpu_restore_state_from_tb().
5
6
6
ext32s_i64 t, in
7
For x86 this is not a problem, because x86_restore_state_to_opc() only
7
sar_i64 t, t, v
8
restores pc and cc, which already have the correct values if the first
8
ext32s_i64 out, t
9
TB instruction causes an exception. However, on several other
10
architectures, restore_state_to_opc() is not stricly limited to state
11
restoration and affects some exception-related registers, where guests
12
can notice incorrect values, for example:
9
13
10
allowing the final extend to be eliminated.
14
- arm's exception.syndrome;
15
- hppa's unwind_breg;
16
- riscv's excp_uw2;
17
- s390x's int_pgm_ilen.
11
18
19
Fix by always calling tcg_tb_insert(). This may increase the size of
20
region_trees, but tcg_region_reset_all() clears it once code_gen_buffer
21
fills up, so it will not grow uncontrollably.
22
23
Do not call tb_link_page(), which would add such TBs to the QHT, to
24
prevent tb_lookup() from finding them. These TBs are single-use, since
25
subsequent reads from I/O memory may return different values; they are
26
not removed from code_gen_buffer only in order to keep things simple.
27
28
Co-developed-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
29
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
30
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
12
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
31
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
13
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
32
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
33
Message-ID: <20250116213214.5695-2-iii@linux.ibm.com>
15
---
34
---
16
tcg/optimize.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
35
accel/tcg/translate-all.c | 29 +++++++++++++++++++----------
17
1 file changed, 47 insertions(+), 3 deletions(-)
36
1 file changed, 19 insertions(+), 10 deletions(-)
18
37
19
diff --git a/tcg/optimize.c b/tcg/optimize.c
38
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
20
index XXXXXXX..XXXXXXX 100644
39
index XXXXXXX..XXXXXXX 100644
21
--- a/tcg/optimize.c
40
--- a/accel/tcg/translate-all.c
22
+++ b/tcg/optimize.c
41
+++ b/accel/tcg/translate-all.c
23
@@ -XXX,XX +XXX,XX @@ static uint64_t smask_from_zmask(uint64_t zmask)
42
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
24
return ~(~0ull >> rep);
43
tb_reset_jump(tb, 1);
25
}
26
27
+/*
28
+ * Recreate a properly left-aligned smask after manipulation.
29
+ * Some bit-shuffling, particularly shifts and rotates, may
30
+ * retain sign bits on the left, but may scatter disconnected
31
+ * sign bits on the right. Retain only what remains to the left.
32
+ */
33
+static uint64_t smask_from_smask(int64_t smask)
34
+{
35
+ /* Only the 1 bits are significant for smask */
36
+ return smask_from_zmask(~smask);
37
+}
38
+
39
static inline TempOptInfo *ts_info(TCGTemp *ts)
40
{
41
return ts->state_ptr;
42
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
43
44
static bool fold_shift(OptContext *ctx, TCGOp *op)
45
{
46
+ uint64_t s_mask, z_mask, sign;
47
+
48
if (fold_const2(ctx, op) ||
49
fold_ix_to_i(ctx, op, 0) ||
50
fold_xi_to_x(ctx, op, 0)) {
51
return true;
52
}
44
}
53
45
54
+ s_mask = arg_info(op->args[1])->s_mask;
46
- /*
55
+ z_mask = arg_info(op->args[1])->z_mask;
47
- * If the TB is not associated with a physical RAM page then it must be
56
+
48
- * a temporary one-insn TB, and we have nothing left to do. Return early
57
if (arg_is_const(op->args[2])) {
49
- * before attempting to link to other TBs or add to the lookup table.
58
- ctx->z_mask = do_constant_folding(op->opc, ctx->type,
50
- */
59
- arg_info(op->args[1])->z_mask,
51
- if (tb_page_addr0(tb) == -1) {
60
- arg_info(op->args[2])->val);
52
- assert_no_pages_locked();
61
+ int sh = arg_info(op->args[2])->val;
53
- return tb;
62
+
54
- }
63
+ ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
55
-
64
+
56
/*
65
+ s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
57
* Insert TB into the corresponding region tree before publishing it
66
+ ctx->s_mask = smask_from_smask(s_mask);
58
* through QHT. Otherwise rewinding happened in the TB might fail to
67
+
59
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
68
return fold_masks(ctx, op);
60
*/
69
}
61
tcg_tb_insert(tb);
70
+
62
71
+ switch (op->opc) {
63
+ /*
72
+ CASE_OP_32_64(sar):
64
+ * If the TB is not associated with a physical RAM page then it must be
73
+ /*
65
+ * a temporary one-insn TB.
74
+ * Arithmetic right shift will not reduce the number of
66
+ *
75
+ * input sign repetitions.
67
+ * Such TBs must be added to region trees in order to make sure that
76
+ */
68
+ * restore_state_to_opc() - which on some architectures is not limited to
77
+ ctx->s_mask = s_mask;
69
+ * rewinding, but also affects exception handling! - is called when such a
78
+ break;
70
+ * TB causes an exception.
79
+ CASE_OP_32_64(shr):
71
+ *
80
+ /*
72
+ * At the same time, temporary one-insn TBs must be executed at most once,
81
+ * If the sign bit is known zero, then logical right shift
73
+ * because subsequent reads from, e.g., I/O memory may return different
82
+ * will not reduced the number of input sign repetitions.
74
+ * values. So return early before attempting to link to other TBs or add
83
+ */
75
+ * to the QHT.
84
+ sign = (s_mask & -s_mask) >> 1;
76
+ */
85
+ if (!(z_mask & sign)) {
77
+ if (tb_page_addr0(tb) == -1) {
86
+ ctx->s_mask = s_mask;
78
+ assert_no_pages_locked();
87
+ }
79
+ return tb;
88
+ break;
89
+ default:
90
+ break;
91
+ }
80
+ }
92
+
81
+
93
return false;
82
/*
94
}
83
* No explicit memory barrier is required -- tb_link_page() makes the
95
84
* TB visible in a consistent state.
96
--
85
--
97
2.25.1
86
2.43.0
98
87
99
88
diff view generated by jsdifflib
1
Move all of the known-zero optimizations into the per-opcode
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
functions. Use fold_masks when there is a possibility of the
3
result being determined, and simply set ctx->z_mask otherwise.
4
2
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
These helpers don't alter float_status. Make it const.
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
5
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-ID: <20250116214359.67295-1-philmd@linaro.org>
8
---
9
---
9
tcg/optimize.c | 545 ++++++++++++++++++++++++++-----------------------
10
include/fpu/softfloat-helpers.h | 25 ++++++++++++++-----------
10
1 file changed, 294 insertions(+), 251 deletions(-)
11
1 file changed, 14 insertions(+), 11 deletions(-)
11
12
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h
13
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
15
--- a/include/fpu/softfloat-helpers.h
15
+++ b/tcg/optimize.c
16
+++ b/include/fpu/softfloat-helpers.h
16
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
17
@@ -XXX,XX +XXX,XX @@ static inline void set_no_signaling_nans(bool val, float_status *status)
17
TCGTempSet temps_used;
18
status->no_signaling_nans = val;
18
19
/* In flight values from optimization. */
20
- uint64_t z_mask;
21
+ uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
22
+ uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
23
TCGType type;
24
} OptContext;
25
26
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
27
return false;
28
}
19
}
29
20
30
+static bool fold_masks(OptContext *ctx, TCGOp *op)
21
-static inline bool get_float_detect_tininess(float_status *status)
31
+{
22
+static inline bool get_float_detect_tininess(const float_status *status)
32
+ uint64_t a_mask = ctx->a_mask;
33
+ uint64_t z_mask = ctx->z_mask;
34
+
35
+ /*
36
+ * 32-bit ops generate 32-bit results. For the result is zero test
37
+ * below, we can ignore high bits, but for further optimizations we
38
+ * need to record that the high bits contain garbage.
39
+ */
40
+ if (ctx->type == TCG_TYPE_I32) {
41
+ ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
42
+ a_mask &= MAKE_64BIT_MASK(0, 32);
43
+ z_mask &= MAKE_64BIT_MASK(0, 32);
44
+ }
45
+
46
+ if (z_mask == 0) {
47
+ return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
48
+ }
49
+ if (a_mask == 0) {
50
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
51
+ }
52
+ return false;
53
+}
54
+
55
/*
56
* Convert @op to NOT, if NOT is supported by the host.
57
* Return true f the conversion is successful, which will still
58
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
59
60
static bool fold_and(OptContext *ctx, TCGOp *op)
61
{
23
{
62
+ uint64_t z1, z2;
24
return status->tininess_before_rounding;
63
+
64
if (fold_const2(ctx, op) ||
65
fold_xi_to_i(ctx, op, 0) ||
66
fold_xi_to_x(ctx, op, -1) ||
67
fold_xx_to_x(ctx, op)) {
68
return true;
69
}
70
- return false;
71
+
72
+ z1 = arg_info(op->args[1])->z_mask;
73
+ z2 = arg_info(op->args[2])->z_mask;
74
+ ctx->z_mask = z1 & z2;
75
+
76
+ /*
77
+ * Known-zeros does not imply known-ones. Therefore unless
78
+ * arg2 is constant, we can't infer affected bits from it.
79
+ */
80
+ if (arg_is_const(op->args[2])) {
81
+ ctx->a_mask = z1 & ~z2;
82
+ }
83
+
84
+ return fold_masks(ctx, op);
85
}
25
}
86
26
87
static bool fold_andc(OptContext *ctx, TCGOp *op)
27
-static inline FloatRoundMode get_float_rounding_mode(float_status *status)
28
+static inline FloatRoundMode get_float_rounding_mode(const float_status *status)
88
{
29
{
89
+ uint64_t z1;
30
return status->float_rounding_mode;
90
+
91
if (fold_const2(ctx, op) ||
92
fold_xx_to_i(ctx, op, 0) ||
93
fold_xi_to_x(ctx, op, 0) ||
94
fold_ix_to_not(ctx, op, -1)) {
95
return true;
96
}
97
- return false;
98
+
99
+ z1 = arg_info(op->args[1])->z_mask;
100
+
101
+ /*
102
+ * Known-zeros does not imply known-ones. Therefore unless
103
+ * arg2 is constant, we can't infer anything from it.
104
+ */
105
+ if (arg_is_const(op->args[2])) {
106
+ uint64_t z2 = ~arg_info(op->args[2])->z_mask;
107
+ ctx->a_mask = z1 & ~z2;
108
+ z1 &= z2;
109
+ }
110
+ ctx->z_mask = z1;
111
+
112
+ return fold_masks(ctx, op);
113
}
31
}
114
32
115
static bool fold_brcond(OptContext *ctx, TCGOp *op)
33
-static inline int get_float_exception_flags(float_status *status)
116
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
34
+static inline int get_float_exception_flags(const float_status *status)
117
118
static bool fold_bswap(OptContext *ctx, TCGOp *op)
119
{
35
{
120
+ uint64_t z_mask, sign;
36
return status->float_exception_flags;
121
+
122
if (arg_is_const(op->args[1])) {
123
uint64_t t = arg_info(op->args[1])->val;
124
125
t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
126
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
127
}
128
- return false;
129
+
130
+ z_mask = arg_info(op->args[1])->z_mask;
131
+ switch (op->opc) {
132
+ case INDEX_op_bswap16_i32:
133
+ case INDEX_op_bswap16_i64:
134
+ z_mask = bswap16(z_mask);
135
+ sign = INT16_MIN;
136
+ break;
137
+ case INDEX_op_bswap32_i32:
138
+ case INDEX_op_bswap32_i64:
139
+ z_mask = bswap32(z_mask);
140
+ sign = INT32_MIN;
141
+ break;
142
+ case INDEX_op_bswap64_i64:
143
+ z_mask = bswap64(z_mask);
144
+ sign = INT64_MIN;
145
+ break;
146
+ default:
147
+ g_assert_not_reached();
148
+ }
149
+
150
+ switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
151
+ case TCG_BSWAP_OZ:
152
+ break;
153
+ case TCG_BSWAP_OS:
154
+ /* If the sign bit may be 1, force all the bits above to 1. */
155
+ if (z_mask & sign) {
156
+ z_mask |= sign;
157
+ }
158
+ break;
159
+ default:
160
+ /* The high bits are undefined: force all bits above the sign to 1. */
161
+ z_mask |= sign << 1;
162
+ break;
163
+ }
164
+ ctx->z_mask = z_mask;
165
+
166
+ return fold_masks(ctx, op);
167
}
37
}
168
38
169
static bool fold_call(OptContext *ctx, TCGOp *op)
39
static inline FloatX80RoundPrec
170
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
40
-get_floatx80_rounding_precision(float_status *status)
171
41
+get_floatx80_rounding_precision(const float_status *status)
172
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
173
{
42
{
174
+ uint64_t z_mask;
43
return status->floatx80_rounding_precision;
175
+
176
if (arg_is_const(op->args[1])) {
177
uint64_t t = arg_info(op->args[1])->val;
178
179
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
180
}
181
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
182
}
183
+
184
+ switch (ctx->type) {
185
+ case TCG_TYPE_I32:
186
+ z_mask = 31;
187
+ break;
188
+ case TCG_TYPE_I64:
189
+ z_mask = 63;
190
+ break;
191
+ default:
192
+ g_assert_not_reached();
193
+ }
194
+ ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
195
+
196
return false;
197
}
44
}
198
45
199
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
46
-static inline Float2NaNPropRule get_float_2nan_prop_rule(float_status *status)
47
+static inline Float2NaNPropRule
48
+get_float_2nan_prop_rule(const float_status *status)
200
{
49
{
201
- return fold_const1(ctx, op);
50
return status->float_2nan_prop_rule;
202
+ if (fold_const1(ctx, op)) {
203
+ return true;
204
+ }
205
+
206
+ switch (ctx->type) {
207
+ case TCG_TYPE_I32:
208
+ ctx->z_mask = 32 | 31;
209
+ break;
210
+ case TCG_TYPE_I64:
211
+ ctx->z_mask = 64 | 63;
212
+ break;
213
+ default:
214
+ g_assert_not_reached();
215
+ }
216
+ return false;
217
}
51
}
218
52
219
static bool fold_deposit(OptContext *ctx, TCGOp *op)
53
-static inline Float3NaNPropRule get_float_3nan_prop_rule(float_status *status)
220
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
54
+static inline Float3NaNPropRule
221
t1 = deposit64(t1, op->args[3], op->args[4], t2);
55
+get_float_3nan_prop_rule(const float_status *status)
222
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
56
{
223
}
57
return status->float_3nan_prop_rule;
224
+
225
+ ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
226
+ op->args[3], op->args[4],
227
+ arg_info(op->args[2])->z_mask);
228
return false;
229
}
58
}
230
59
231
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
60
-static inline FloatInfZeroNaNRule get_float_infzeronan_rule(float_status *status)
232
61
+static inline FloatInfZeroNaNRule
233
static bool fold_extract(OptContext *ctx, TCGOp *op)
62
+get_float_infzeronan_rule(const float_status *status)
234
{
63
{
235
+ uint64_t z_mask_old, z_mask;
64
return status->float_infzeronan_rule;
236
+
237
if (arg_is_const(op->args[1])) {
238
uint64_t t;
239
240
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
241
t = extract64(t, op->args[2], op->args[3]);
242
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
243
}
244
- return false;
245
+
246
+ z_mask_old = arg_info(op->args[1])->z_mask;
247
+ z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
248
+ if (op->args[2] == 0) {
249
+ ctx->a_mask = z_mask_old ^ z_mask;
250
+ }
251
+ ctx->z_mask = z_mask;
252
+
253
+ return fold_masks(ctx, op);
254
}
65
}
255
66
256
static bool fold_extract2(OptContext *ctx, TCGOp *op)
67
-static inline uint8_t get_float_default_nan_pattern(float_status *status)
257
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
68
+static inline uint8_t get_float_default_nan_pattern(const float_status *status)
258
259
static bool fold_exts(OptContext *ctx, TCGOp *op)
260
{
69
{
261
- return fold_const1(ctx, op);
70
return status->default_nan_pattern;
262
+ uint64_t z_mask_old, z_mask, sign;
263
+ bool type_change = false;
264
+
265
+ if (fold_const1(ctx, op)) {
266
+ return true;
267
+ }
268
+
269
+ z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
270
+
271
+ switch (op->opc) {
272
+ CASE_OP_32_64(ext8s):
273
+ sign = INT8_MIN;
274
+ z_mask = (uint8_t)z_mask;
275
+ break;
276
+ CASE_OP_32_64(ext16s):
277
+ sign = INT16_MIN;
278
+ z_mask = (uint16_t)z_mask;
279
+ break;
280
+ case INDEX_op_ext_i32_i64:
281
+ type_change = true;
282
+ QEMU_FALLTHROUGH;
283
+ case INDEX_op_ext32s_i64:
284
+ sign = INT32_MIN;
285
+ z_mask = (uint32_t)z_mask;
286
+ break;
287
+ default:
288
+ g_assert_not_reached();
289
+ }
290
+
291
+ if (z_mask & sign) {
292
+ z_mask |= sign;
293
+ } else if (!type_change) {
294
+ ctx->a_mask = z_mask_old ^ z_mask;
295
+ }
296
+ ctx->z_mask = z_mask;
297
+
298
+ return fold_masks(ctx, op);
299
}
71
}
300
72
301
static bool fold_extu(OptContext *ctx, TCGOp *op)
73
-static inline bool get_flush_to_zero(float_status *status)
74
+static inline bool get_flush_to_zero(const float_status *status)
302
{
75
{
303
- return fold_const1(ctx, op);
76
return status->flush_to_zero;
304
+ uint64_t z_mask_old, z_mask;
305
+ bool type_change = false;
306
+
307
+ if (fold_const1(ctx, op)) {
308
+ return true;
309
+ }
310
+
311
+ z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
312
+
313
+ switch (op->opc) {
314
+ CASE_OP_32_64(ext8u):
315
+ z_mask = (uint8_t)z_mask;
316
+ break;
317
+ CASE_OP_32_64(ext16u):
318
+ z_mask = (uint16_t)z_mask;
319
+ break;
320
+ case INDEX_op_extrl_i64_i32:
321
+ case INDEX_op_extu_i32_i64:
322
+ type_change = true;
323
+ QEMU_FALLTHROUGH;
324
+ case INDEX_op_ext32u_i64:
325
+ z_mask = (uint32_t)z_mask;
326
+ break;
327
+ case INDEX_op_extrh_i64_i32:
328
+ type_change = true;
329
+ z_mask >>= 32;
330
+ break;
331
+ default:
332
+ g_assert_not_reached();
333
+ }
334
+
335
+ ctx->z_mask = z_mask;
336
+ if (!type_change) {
337
+ ctx->a_mask = z_mask_old ^ z_mask;
338
+ }
339
+ return fold_masks(ctx, op);
340
}
77
}
341
78
342
static bool fold_mb(OptContext *ctx, TCGOp *op)
79
-static inline bool get_flush_inputs_to_zero(float_status *status)
343
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
80
+static inline bool get_flush_inputs_to_zero(const float_status *status)
344
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
345
}
346
347
+ ctx->z_mask = arg_info(op->args[3])->z_mask
348
+ | arg_info(op->args[4])->z_mask;
349
+
350
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
351
uint64_t tv = arg_info(op->args[3])->val;
352
uint64_t fv = arg_info(op->args[4])->val;
353
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
354
355
static bool fold_neg(OptContext *ctx, TCGOp *op)
356
{
81
{
357
+ uint64_t z_mask;
82
return status->flush_inputs_to_zero;
358
+
359
if (fold_const1(ctx, op)) {
360
return true;
361
}
362
+
363
+ /* Set to 1 all bits to the left of the rightmost. */
364
+ z_mask = arg_info(op->args[1])->z_mask;
365
+ ctx->z_mask = -(z_mask & -z_mask);
366
+
367
/*
368
* Because of fold_sub_to_neg, we want to always return true,
369
* via finish_folding.
370
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
371
fold_xx_to_x(ctx, op)) {
372
return true;
373
}
374
- return false;
375
+
376
+ ctx->z_mask = arg_info(op->args[1])->z_mask
377
+ | arg_info(op->args[2])->z_mask;
378
+ return fold_masks(ctx, op);
379
}
83
}
380
84
381
static bool fold_orc(OptContext *ctx, TCGOp *op)
85
-static inline bool get_default_nan_mode(float_status *status)
382
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
86
+static inline bool get_default_nan_mode(const float_status *status)
383
384
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
385
{
87
{
386
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
88
return status->default_nan_mode;
387
+ MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
388
+ MemOp mop = get_memop(oi);
389
+ int width = 8 * memop_size(mop);
390
+
391
+ if (!(mop & MO_SIGN) && width < 64) {
392
+ ctx->z_mask = MAKE_64BIT_MASK(0, width);
393
+ }
394
+
395
/* Opcodes that touch guest memory stop the mb optimization. */
396
ctx->prev_mb = NULL;
397
return false;
398
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
399
if (i >= 0) {
400
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
401
}
402
+
403
+ ctx->z_mask = 1;
404
return false;
405
}
89
}
406
407
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
408
op->opc = INDEX_op_setcond_i32;
409
break;
410
}
411
+
412
+ ctx->z_mask = 1;
413
return false;
414
415
do_setcond_const:
416
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
417
418
static bool fold_sextract(OptContext *ctx, TCGOp *op)
419
{
420
+ int64_t z_mask_old, z_mask;
421
+
422
if (arg_is_const(op->args[1])) {
423
uint64_t t;
424
425
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
426
t = sextract64(t, op->args[2], op->args[3]);
427
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
428
}
429
- return false;
430
+
431
+ z_mask_old = arg_info(op->args[1])->z_mask;
432
+ z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
433
+ if (op->args[2] == 0 && z_mask >= 0) {
434
+ ctx->a_mask = z_mask_old ^ z_mask;
435
+ }
436
+ ctx->z_mask = z_mask;
437
+
438
+ return fold_masks(ctx, op);
439
}
440
441
static bool fold_shift(OptContext *ctx, TCGOp *op)
442
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
443
fold_xi_to_x(ctx, op, 0)) {
444
return true;
445
}
446
+
447
+ if (arg_is_const(op->args[2])) {
448
+ ctx->z_mask = do_constant_folding(op->opc, ctx->type,
449
+ arg_info(op->args[1])->z_mask,
450
+ arg_info(op->args[2])->val);
451
+ return fold_masks(ctx, op);
452
+ }
453
return false;
454
}
455
456
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
457
return fold_addsub2_i32(ctx, op, false);
458
}
459
460
+static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
461
+{
462
+ /* We can't do any folding with a load, but we can record bits. */
463
+ switch (op->opc) {
464
+ CASE_OP_32_64(ld8u):
465
+ ctx->z_mask = MAKE_64BIT_MASK(0, 8);
466
+ break;
467
+ CASE_OP_32_64(ld16u):
468
+ ctx->z_mask = MAKE_64BIT_MASK(0, 16);
469
+ break;
470
+ case INDEX_op_ld32u_i64:
471
+ ctx->z_mask = MAKE_64BIT_MASK(0, 32);
472
+ break;
473
+ default:
474
+ g_assert_not_reached();
475
+ }
476
+ return false;
477
+}
478
+
479
static bool fold_xor(OptContext *ctx, TCGOp *op)
480
{
481
if (fold_const2(ctx, op) ||
482
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
483
fold_xi_to_not(ctx, op, -1)) {
484
return true;
485
}
486
- return false;
487
+
488
+ ctx->z_mask = arg_info(op->args[1])->z_mask
489
+ | arg_info(op->args[2])->z_mask;
490
+ return fold_masks(ctx, op);
491
}
492
493
/* Propagate constants and copies, fold constant expressions. */
494
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
495
}
496
497
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
498
- uint64_t z_mask, partmask, affected, tmp;
499
TCGOpcode opc = op->opc;
500
const TCGOpDef *def;
501
bool done = false;
502
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
503
break;
504
}
505
506
- /* Simplify using known-zero bits. Currently only ops with a single
507
- output argument is supported. */
508
- z_mask = -1;
509
- affected = -1;
510
- switch (opc) {
511
- CASE_OP_32_64(ext8s):
512
- if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
513
- break;
514
- }
515
- QEMU_FALLTHROUGH;
516
- CASE_OP_32_64(ext8u):
517
- z_mask = 0xff;
518
- goto and_const;
519
- CASE_OP_32_64(ext16s):
520
- if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
521
- break;
522
- }
523
- QEMU_FALLTHROUGH;
524
- CASE_OP_32_64(ext16u):
525
- z_mask = 0xffff;
526
- goto and_const;
527
- case INDEX_op_ext32s_i64:
528
- if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
529
- break;
530
- }
531
- QEMU_FALLTHROUGH;
532
- case INDEX_op_ext32u_i64:
533
- z_mask = 0xffffffffU;
534
- goto and_const;
535
-
536
- CASE_OP_32_64(and):
537
- z_mask = arg_info(op->args[2])->z_mask;
538
- if (arg_is_const(op->args[2])) {
539
- and_const:
540
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
541
- }
542
- z_mask = arg_info(op->args[1])->z_mask & z_mask;
543
- break;
544
-
545
- case INDEX_op_ext_i32_i64:
546
- if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
547
- break;
548
- }
549
- QEMU_FALLTHROUGH;
550
- case INDEX_op_extu_i32_i64:
551
- /* We do not compute affected as it is a size changing op. */
552
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
553
- break;
554
-
555
- CASE_OP_32_64(andc):
556
- /* Known-zeros does not imply known-ones. Therefore unless
557
- op->args[2] is constant, we can't infer anything from it. */
558
- if (arg_is_const(op->args[2])) {
559
- z_mask = ~arg_info(op->args[2])->z_mask;
560
- goto and_const;
561
- }
562
- /* But we certainly know nothing outside args[1] may be set. */
563
- z_mask = arg_info(op->args[1])->z_mask;
564
- break;
565
-
566
- case INDEX_op_sar_i32:
567
- if (arg_is_const(op->args[2])) {
568
- tmp = arg_info(op->args[2])->val & 31;
569
- z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
570
- }
571
- break;
572
- case INDEX_op_sar_i64:
573
- if (arg_is_const(op->args[2])) {
574
- tmp = arg_info(op->args[2])->val & 63;
575
- z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
576
- }
577
- break;
578
-
579
- case INDEX_op_shr_i32:
580
- if (arg_is_const(op->args[2])) {
581
- tmp = arg_info(op->args[2])->val & 31;
582
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
583
- }
584
- break;
585
- case INDEX_op_shr_i64:
586
- if (arg_is_const(op->args[2])) {
587
- tmp = arg_info(op->args[2])->val & 63;
588
- z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
589
- }
590
- break;
591
-
592
- case INDEX_op_extrl_i64_i32:
593
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
594
- break;
595
- case INDEX_op_extrh_i64_i32:
596
- z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
597
- break;
598
-
599
- CASE_OP_32_64(shl):
600
- if (arg_is_const(op->args[2])) {
601
- tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
602
- z_mask = arg_info(op->args[1])->z_mask << tmp;
603
- }
604
- break;
605
-
606
- CASE_OP_32_64(neg):
607
- /* Set to 1 all bits to the left of the rightmost. */
608
- z_mask = -(arg_info(op->args[1])->z_mask
609
- & -arg_info(op->args[1])->z_mask);
610
- break;
611
-
612
- CASE_OP_32_64(deposit):
613
- z_mask = deposit64(arg_info(op->args[1])->z_mask,
614
- op->args[3], op->args[4],
615
- arg_info(op->args[2])->z_mask);
616
- break;
617
-
618
- CASE_OP_32_64(extract):
619
- z_mask = extract64(arg_info(op->args[1])->z_mask,
620
- op->args[2], op->args[3]);
621
- if (op->args[2] == 0) {
622
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
623
- }
624
- break;
625
- CASE_OP_32_64(sextract):
626
- z_mask = sextract64(arg_info(op->args[1])->z_mask,
627
- op->args[2], op->args[3]);
628
- if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
629
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
630
- }
631
- break;
632
-
633
- CASE_OP_32_64(or):
634
- CASE_OP_32_64(xor):
635
- z_mask = arg_info(op->args[1])->z_mask
636
- | arg_info(op->args[2])->z_mask;
637
- break;
638
-
639
- case INDEX_op_clz_i32:
640
- case INDEX_op_ctz_i32:
641
- z_mask = arg_info(op->args[2])->z_mask | 31;
642
- break;
643
-
644
- case INDEX_op_clz_i64:
645
- case INDEX_op_ctz_i64:
646
- z_mask = arg_info(op->args[2])->z_mask | 63;
647
- break;
648
-
649
- case INDEX_op_ctpop_i32:
650
- z_mask = 32 | 31;
651
- break;
652
- case INDEX_op_ctpop_i64:
653
- z_mask = 64 | 63;
654
- break;
655
-
656
- CASE_OP_32_64(setcond):
657
- case INDEX_op_setcond2_i32:
658
- z_mask = 1;
659
- break;
660
-
661
- CASE_OP_32_64(movcond):
662
- z_mask = arg_info(op->args[3])->z_mask
663
- | arg_info(op->args[4])->z_mask;
664
- break;
665
-
666
- CASE_OP_32_64(ld8u):
667
- z_mask = 0xff;
668
- break;
669
- CASE_OP_32_64(ld16u):
670
- z_mask = 0xffff;
671
- break;
672
- case INDEX_op_ld32u_i64:
673
- z_mask = 0xffffffffu;
674
- break;
675
-
676
- CASE_OP_32_64(qemu_ld):
677
- {
678
- MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
679
- MemOp mop = get_memop(oi);
680
- if (!(mop & MO_SIGN)) {
681
- z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
682
- }
683
- }
684
- break;
685
-
686
- CASE_OP_32_64(bswap16):
687
- z_mask = arg_info(op->args[1])->z_mask;
688
- if (z_mask <= 0xffff) {
689
- op->args[2] |= TCG_BSWAP_IZ;
690
- }
691
- z_mask = bswap16(z_mask);
692
- switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
693
- case TCG_BSWAP_OZ:
694
- break;
695
- case TCG_BSWAP_OS:
696
- z_mask = (int16_t)z_mask;
697
- break;
698
- default: /* undefined high bits */
699
- z_mask |= MAKE_64BIT_MASK(16, 48);
700
- break;
701
- }
702
- break;
703
-
704
- case INDEX_op_bswap32_i64:
705
- z_mask = arg_info(op->args[1])->z_mask;
706
- if (z_mask <= 0xffffffffu) {
707
- op->args[2] |= TCG_BSWAP_IZ;
708
- }
709
- z_mask = bswap32(z_mask);
710
- switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
711
- case TCG_BSWAP_OZ:
712
- break;
713
- case TCG_BSWAP_OS:
714
- z_mask = (int32_t)z_mask;
715
- break;
716
- default: /* undefined high bits */
717
- z_mask |= MAKE_64BIT_MASK(32, 32);
718
- break;
719
- }
720
- break;
721
-
722
- default:
723
- break;
724
- }
725
-
726
- /* 32-bit ops generate 32-bit results. For the result is zero test
727
- below, we can ignore high bits, but for further optimizations we
728
- need to record that the high bits contain garbage. */
729
- partmask = z_mask;
730
- if (ctx.type == TCG_TYPE_I32) {
731
- z_mask |= ~(tcg_target_ulong)0xffffffffu;
732
- partmask &= 0xffffffffu;
733
- affected &= 0xffffffffu;
734
- }
735
- ctx.z_mask = z_mask;
736
-
737
- if (partmask == 0) {
738
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
739
- continue;
740
- }
741
- if (affected == 0) {
742
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
743
- continue;
744
- }
745
+ /* Assume all bits affected, and no bits known zero. */
746
+ ctx.a_mask = -1;
747
+ ctx.z_mask = -1;
748
749
/*
750
* Process each opcode.
751
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
752
case INDEX_op_extrh_i64_i32:
753
done = fold_extu(&ctx, op);
754
break;
755
+ CASE_OP_32_64(ld8u):
756
+ CASE_OP_32_64(ld16u):
757
+ case INDEX_op_ld32u_i64:
758
+ done = fold_tcg_ld(&ctx, op);
759
+ break;
760
case INDEX_op_mb:
761
done = fold_mb(&ctx, op);
762
break;
763
--
90
--
764
2.25.1
91
2.43.0
765
92
766
93
diff view generated by jsdifflib