1
The following changes since commit 627634031092e1514f363fd8659a579398de0f0e:
1
The following changes since commit aa3a285b5bc56a4208b3b57d4a55291e9c260107:
2
2
3
Merge tag 'buildsys-qom-qdev-ui-20230227' of https://github.com/philmd/qemu into staging (2023-02-28 15:09:18 +0000)
3
Merge tag 'mem-2024-12-21' of https://github.com/davidhildenbrand/qemu into staging (2024-12-22 14:33:27 -0500)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230228
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20241224
8
8
9
for you to fetch changes up to c7fbf10db8718d2eba87712bc3410b671157a377:
9
for you to fetch changes up to e4a8e093dc74be049f4829831dce76e5edab0003:
10
10
11
tcg: Update docs/devel/tcg-ops.rst for temporary changes (2023-02-28 10:36:19 -1000)
11
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core (2024-12-24 08:32:15 -0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
helper-head: Add fpu/softfloat-types.h
14
tcg/optimize: Remove in-flight mask data from OptContext
15
softmmu: Use memmove in flatview_write_continue
15
fpu: Add float*_muladd_scalbn
16
tcg: Add sign param to probe_access_flags, probe_access_full
16
fpu: Remove float_muladd_halve_result
17
tcg: Convert TARGET_TB_PCREL to CF_PCREL
17
fpu: Add float_round_nearest_even_max
18
tcg: Simplify temporary lifetimes for translators
18
fpu: Add float_muladd_suppress_add_product_zero
19
target/hexagon: Use float32_muladd
20
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core
19
21
20
----------------------------------------------------------------
22
----------------------------------------------------------------
21
Akihiko Odaki (1):
23
Ilya Leoshkevich (1):
22
softmmu: Use memmove in flatview_write_continue
24
tests/tcg: Do not use inttypes.h in multiarch/system/memory.c
23
25
24
Anton Johansson via (27):
26
Pierrick Bouvier (1):
25
include/exec: Introduce `CF_PCREL`
27
plugins: optimize cpu_index code generation
26
target/i386: set `CF_PCREL` in `x86_cpu_realizefn`
27
target/arm: set `CF_PCREL` in `arm_cpu_realizefn`
28
accel/tcg: Replace `TARGET_TB_PCREL` with `CF_PCREL`
29
include/exec: Replace `TARGET_TB_PCREL` with `CF_PCREL`
30
target/arm: Replace `TARGET_TB_PCREL` with `CF_PCREL`
31
target/i386: Replace `TARGET_TB_PCREL` with `CF_PCREL`
32
include/exec: Remove `TARGET_TB_PCREL` define
33
target/arm: Remove `TARGET_TB_PCREL` define
34
target/i386: Remove `TARGET_TB_PCREL` define
35
accel/tcg: Move jmp-cache `CF_PCREL` checks to caller
36
accel/tcg: Replace `tb_pc()` with `tb->pc`
37
target/tricore: Replace `tb_pc()` with `tb->pc`
38
target/sparc: Replace `tb_pc()` with `tb->pc`
39
target/sh4: Replace `tb_pc()` with `tb->pc`
40
target/rx: Replace `tb_pc()` with `tb->pc`
41
target/riscv: Replace `tb_pc()` with `tb->pc`
42
target/openrisc: Replace `tb_pc()` with `tb->pc`
43
target/mips: Replace `tb_pc()` with `tb->pc`
44
target/microblaze: Replace `tb_pc()` with `tb->pc`
45
target/loongarch: Replace `tb_pc()` with `tb->pc`
46
target/i386: Replace `tb_pc()` with `tb->pc`
47
target/hppa: Replace `tb_pc()` with `tb->pc`
48
target/hexagon: Replace `tb_pc()` with `tb->pc`
49
target/avr: Replace `tb_pc()` with `tb->pc`
50
target/arm: Replace `tb_pc()` with `tb->pc`
51
include/exec: Remove `tb_pc()`
52
28
53
Daniel Henrique Barboza (1):
29
Richard Henderson (70):
54
accel/tcg: Add 'size' param to probe_access_flags()
30
tcg/optimize: Split out finish_bb, finish_ebb
31
tcg/optimize: Split out fold_affected_mask
32
tcg/optimize: Copy mask writeback to fold_masks
33
tcg/optimize: Split out fold_masks_zs
34
tcg/optimize: Augment s_mask from z_mask in fold_masks_zs
35
tcg/optimize: Change representation of s_mask
36
tcg/optimize: Use finish_folding in fold_add, fold_add_vec, fold_addsub2
37
tcg/optimize: Introduce const value accessors for TempOptInfo
38
tcg/optimize: Use fold_masks_zs in fold_and
39
tcg/optimize: Use fold_masks_zs in fold_andc
40
tcg/optimize: Use fold_masks_zs in fold_bswap
41
tcg/optimize: Use fold_masks_zs in fold_count_zeros
42
tcg/optimize: Use fold_masks_z in fold_ctpop
43
tcg/optimize: Use fold_and and fold_masks_z in fold_deposit
44
tcg/optimize: Compute sign mask in fold_deposit
45
tcg/optimize: Use finish_folding in fold_divide
46
tcg/optimize: Use finish_folding in fold_dup, fold_dup2
47
tcg/optimize: Use fold_masks_s in fold_eqv
48
tcg/optimize: Use fold_masks_z in fold_extract
49
tcg/optimize: Use finish_folding in fold_extract2
50
tcg/optimize: Use fold_masks_zs in fold_exts
51
tcg/optimize: Use fold_masks_z in fold_extu
52
tcg/optimize: Use fold_masks_zs in fold_movcond
53
tcg/optimize: Use finish_folding in fold_mul*
54
tcg/optimize: Use fold_masks_s in fold_nand
55
tcg/optimize: Use fold_masks_z in fold_neg_no_const
56
tcg/optimize: Use fold_masks_s in fold_nor
57
tcg/optimize: Use fold_masks_s in fold_not
58
tcg/optimize: Use fold_masks_zs in fold_or
59
tcg/optimize: Use fold_masks_zs in fold_orc
60
tcg/optimize: Use fold_masks_zs in fold_qemu_ld
61
tcg/optimize: Return true from fold_qemu_st, fold_tcg_st
62
tcg/optimize: Use finish_folding in fold_remainder
63
tcg/optimize: Distinguish simplification in fold_setcond_zmask
64
tcg/optimize: Use fold_masks_z in fold_setcond
65
tcg/optimize: Use fold_masks_s in fold_negsetcond
66
tcg/optimize: Use fold_masks_z in fold_setcond2
67
tcg/optimize: Use finish_folding in fold_cmp_vec
68
tcg/optimize: Use finish_folding in fold_cmpsel_vec
69
tcg/optimize: Use fold_masks_zs in fold_sextract
70
tcg/optimize: Use fold_masks_zs, fold_masks_s in fold_shift
71
tcg/optimize: Simplify sign bit test in fold_shift
72
tcg/optimize: Use finish_folding in fold_sub, fold_sub_vec
73
tcg/optimize: Use fold_masks_zs in fold_tcg_ld
74
tcg/optimize: Use finish_folding in fold_tcg_ld_memcopy
75
tcg/optimize: Use fold_masks_zs in fold_xor
76
tcg/optimize: Use finish_folding in fold_bitsel_vec
77
tcg/optimize: Use finish_folding as default in tcg_optimize
78
tcg/optimize: Remove z_mask, s_mask from OptContext
79
tcg/optimize: Re-enable sign-mask optimizations
80
tcg/optimize: Move fold_bitsel_vec into alphabetic sort
81
tcg/optimize: Move fold_cmp_vec, fold_cmpsel_vec into alphabetic sort
82
softfloat: Add float{16,32,64}_muladd_scalbn
83
target/arm: Use float*_muladd_scalbn
84
target/sparc: Use float*_muladd_scalbn
85
softfloat: Remove float_muladd_halve_result
86
softfloat: Add float_round_nearest_even_max
87
softfloat: Add float_muladd_suppress_add_product_zero
88
target/hexagon: Use float32_mul in helper_sfmpy
89
target/hexagon: Use float32_muladd for helper_sffma
90
target/hexagon: Use float32_muladd for helper_sffms
91
target/hexagon: Use float32_muladd_scalbn for helper_sffma_sc
92
target/hexagon: Use float32_muladd for helper_sffm[as]_lib
93
target/hexagon: Remove internal_fmafx
94
target/hexagon: Expand GEN_XF_ROUND
95
target/hexagon: Remove Float
96
target/hexagon: Remove Double
97
target/hexagon: Use mulu64 for int128_mul_6464
98
target/hexagon: Simplify internal_mpyhh setup
99
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core
55
100
56
Philippe Mathieu-Daudé (1):
101
include/exec/translator.h | 14 -
57
exec/helper-head: Include missing "fpu/softfloat-types.h" header
102
include/fpu/softfloat-types.h | 2 +
58
103
include/fpu/softfloat.h | 14 +-
59
Richard Henderson (32):
104
include/hw/core/tcg-cpu-ops.h | 13 +
60
accel/tcg: Add 'size' param to probe_access_full
105
target/alpha/cpu.h | 2 +
61
tcg: Adjust TCGContext.temps_in_use check
106
target/arm/internals.h | 2 +
62
accel/tcg: Pass max_insn to gen_intermediate_code by pointer
107
target/avr/cpu.h | 2 +
63
accel/tcg: Use more accurate max_insns for tb_overflow
108
target/hexagon/cpu.h | 2 +
64
tcg: Remove branch-to-next regardless of reference count
109
target/hexagon/fma_emu.h | 3 -
65
tcg: Rename TEMP_LOCAL to TEMP_TB
110
target/hppa/cpu.h | 2 +
66
tcg: Use noinline for major tcg_gen_code subroutines
111
target/i386/tcg/helper-tcg.h | 2 +
67
tcg: Add liveness_pass_0
112
target/loongarch/internals.h | 2 +
68
tcg: Remove TEMP_NORMAL
113
target/m68k/cpu.h | 2 +
69
tcg: Pass TCGTempKind to tcg_temp_new_internal
114
target/microblaze/cpu.h | 2 +
70
tcg: Use tcg_constant_i32 in tcg_gen_io_start
115
target/mips/tcg/tcg-internal.h | 2 +
71
tcg: Add tcg_gen_movi_ptr
116
target/openrisc/cpu.h | 2 +
72
tcg: Add tcg_temp_ebb_new_{i32,i64,ptr}
117
target/ppc/cpu.h | 2 +
73
tcg: Use tcg_temp_ebb_new_* in tcg/
118
target/riscv/cpu.h | 3 +
74
tcg: Use tcg_constant_ptr in do_dup
119
target/rx/cpu.h | 2 +
75
accel/tcg/plugin: Use tcg_temp_ebb_*
120
target/s390x/s390x-internal.h | 2 +
76
accel/tcg/plugin: Tidy plugin_gen_disable_mem_helpers
121
target/sh4/cpu.h | 2 +
77
tcg: Don't re-use TEMP_TB temporaries
122
target/sparc/cpu.h | 2 +
78
tcg: Change default temp lifetime to TEMP_TB
123
target/sparc/helper.h | 4 +-
79
target/arm: Drop copies in gen_sve_{ldr,str}
124
target/tricore/cpu.h | 2 +
80
target/arm: Don't use tcg_temp_local_new_*
125
target/xtensa/cpu.h | 2 +
81
target/cris: Don't use tcg_temp_local_new
126
accel/tcg/cpu-exec.c | 8 +-
82
target/hexagon: Don't use tcg_temp_local_new_*
127
accel/tcg/plugin-gen.c | 9 +
83
target/hexagon/idef-parser: Drop gen_tmp_local
128
accel/tcg/translate-all.c | 8 +-
84
target/hppa: Don't use tcg_temp_local_new
129
fpu/softfloat.c | 63 +--
85
target/i386: Don't use tcg_temp_local_new
130
target/alpha/cpu.c | 1 +
86
target/mips: Don't use tcg_temp_local_new
131
target/alpha/translate.c | 4 +-
87
target/ppc: Don't use tcg_temp_local_new
132
target/arm/cpu.c | 1 +
88
target/xtensa: Don't use tcg_temp_local_new_*
133
target/arm/tcg/cpu-v7m.c | 1 +
89
exec/gen-icount: Don't use tcg_temp_local_new_i32
134
target/arm/tcg/helper-a64.c | 6 +-
90
tcg: Remove tcg_temp_local_new_*, tcg_const_local_*
135
target/arm/tcg/translate.c | 5 +-
91
tcg: Update docs/devel/tcg-ops.rst for temporary changes
136
target/avr/cpu.c | 1 +
92
137
target/avr/translate.c | 6 +-
93
docs/devel/tcg-ops.rst | 230 +++++++++++++----------
138
target/hexagon/cpu.c | 1 +
94
target/hexagon/idef-parser/README.rst | 4 +-
139
target/hexagon/fma_emu.c | 496 ++++++---------------
95
accel/tcg/internal.h | 10 +-
140
target/hexagon/op_helper.c | 125 ++----
96
accel/tcg/tb-jmp-cache.h | 42 +----
141
target/hexagon/translate.c | 4 +-
97
include/exec/cpu-defs.h | 3 -
142
target/hppa/cpu.c | 1 +
98
include/exec/exec-all.h | 26 +--
143
target/hppa/translate.c | 4 +-
99
include/exec/gen-icount.h | 12 +-
144
target/i386/tcg/tcg-cpu.c | 1 +
100
include/exec/helper-head.h | 2 +
145
target/i386/tcg/translate.c | 5 +-
101
include/exec/translator.h | 4 +-
146
target/loongarch/cpu.c | 1 +
102
include/tcg/tcg-op.h | 7 +-
147
target/loongarch/tcg/translate.c | 4 +-
103
include/tcg/tcg.h | 64 ++++---
148
target/m68k/cpu.c | 1 +
104
target/arm/cpu-param.h | 2 -
149
target/m68k/translate.c | 4 +-
105
target/arm/tcg/translate-a64.h | 1 -
150
target/microblaze/cpu.c | 1 +
106
target/arm/tcg/translate.h | 2 +-
151
target/microblaze/translate.c | 4 +-
107
target/hexagon/gen_tcg.h | 4 +-
152
target/mips/cpu.c | 1 +
108
target/i386/cpu-param.h | 4 -
153
target/mips/tcg/translate.c | 4 +-
109
accel/stubs/tcg-stub.c | 2 +-
154
target/openrisc/cpu.c | 1 +
110
accel/tcg/cpu-exec.c | 62 ++++--
155
target/openrisc/translate.c | 4 +-
111
accel/tcg/cputlb.c | 21 ++-
156
target/ppc/cpu_init.c | 1 +
112
accel/tcg/perf.c | 2 +-
157
target/ppc/translate.c | 4 +-
113
accel/tcg/plugin-gen.c | 32 ++--
158
target/riscv/tcg/tcg-cpu.c | 1 +
114
accel/tcg/tb-maint.c | 10 +-
159
target/riscv/translate.c | 4 +-
115
accel/tcg/translate-all.c | 18 +-
160
target/rx/cpu.c | 1 +
116
accel/tcg/translator.c | 6 +-
161
target/rx/translate.c | 4 +-
117
accel/tcg/user-exec.c | 5 +-
162
target/s390x/cpu.c | 1 +
118
semihosting/uaccess.c | 2 +-
163
target/s390x/tcg/translate.c | 4 +-
119
softmmu/physmem.c | 2 +-
164
target/sh4/cpu.c | 1 +
120
target/alpha/translate.c | 2 +-
165
target/sh4/translate.c | 4 +-
121
target/arm/cpu.c | 17 +-
166
target/sparc/cpu.c | 1 +
122
target/arm/ptw.c | 4 +-
167
target/sparc/fop_helper.c | 8 +-
123
target/arm/tcg/mte_helper.c | 4 +-
168
target/sparc/translate.c | 84 ++--
124
target/arm/tcg/sve_helper.c | 4 +-
169
target/tricore/cpu.c | 1 +
125
target/arm/tcg/translate-a64.c | 16 +-
170
target/tricore/translate.c | 5 +-
126
target/arm/tcg/translate-sve.c | 38 +---
171
target/xtensa/cpu.c | 1 +
127
target/arm/tcg/translate.c | 14 +-
172
target/xtensa/translate.c | 4 +-
128
target/avr/cpu.c | 3 +-
173
tcg/optimize.c | 857 +++++++++++++++++++-----------------
129
target/avr/translate.c | 2 +-
174
tests/tcg/multiarch/system/memory.c | 9 +-
130
target/cris/translate.c | 8 +-
175
fpu/softfloat-parts.c.inc | 16 +-
131
target/hexagon/cpu.c | 4 +-
176
75 files changed, 866 insertions(+), 1009 deletions(-)
132
target/hexagon/genptr.c | 16 +-
133
target/hexagon/idef-parser/parser-helpers.c | 26 +--
134
target/hexagon/translate.c | 4 +-
135
target/hppa/cpu.c | 8 +-
136
target/hppa/translate.c | 5 +-
137
target/i386/cpu.c | 5 +
138
target/i386/helper.c | 2 +-
139
target/i386/tcg/sysemu/excp_helper.c | 4 +-
140
target/i386/tcg/tcg-cpu.c | 8 +-
141
target/i386/tcg/translate.c | 55 +++---
142
target/loongarch/cpu.c | 6 +-
143
target/loongarch/translate.c | 2 +-
144
target/m68k/translate.c | 2 +-
145
target/microblaze/cpu.c | 4 +-
146
target/microblaze/translate.c | 2 +-
147
target/mips/tcg/exception.c | 3 +-
148
target/mips/tcg/sysemu/special_helper.c | 2 +-
149
target/mips/tcg/translate.c | 59 ++----
150
target/nios2/translate.c | 2 +-
151
target/openrisc/cpu.c | 4 +-
152
target/openrisc/translate.c | 2 +-
153
target/ppc/translate.c | 8 +-
154
target/riscv/cpu.c | 7 +-
155
target/riscv/translate.c | 2 +-
156
target/rx/cpu.c | 3 +-
157
target/rx/translate.c | 2 +-
158
target/s390x/tcg/mem_helper.c | 2 +-
159
target/s390x/tcg/translate.c | 2 +-
160
target/sh4/cpu.c | 6 +-
161
target/sh4/translate.c | 2 +-
162
target/sparc/cpu.c | 4 +-
163
target/sparc/translate.c | 2 +-
164
target/tricore/cpu.c | 3 +-
165
target/tricore/translate.c | 2 +-
166
target/xtensa/translate.c | 18 +-
167
tcg/optimize.c | 2 +-
168
tcg/tcg-op-gvec.c | 189 ++++++++++---------
169
tcg/tcg-op.c | 258 ++++++++++++-------------
170
tcg/tcg.c | 280 ++++++++++++++++------------
171
target/cris/translate_v10.c.inc | 10 +-
172
target/mips/tcg/nanomips_translate.c.inc | 4 +-
173
target/ppc/translate/spe-impl.c.inc | 8 +-
174
target/ppc/translate/vmx-impl.c.inc | 4 +-
175
target/hexagon/README | 8 +-
176
target/hexagon/gen_tcg_funcs.py | 18 +-
177
84 files changed, 870 insertions(+), 890 deletions(-)
178
diff view generated by jsdifflib
New patch
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
2
3
make check-tcg fails on Fedora with the following error message:
4
5
alpha-linux-gnu-gcc [...] qemu/tests/tcg/multiarch/system/memory.c -o memory [...]
6
qemu/tests/tcg/multiarch/system/memory.c:17:10: fatal error: inttypes.h: No such file or directory
7
17 | #include <inttypes.h>
8
| ^~~~~~~~~~~~
9
compilation terminated.
10
11
The reason is that Fedora has cross-compilers, but no cross-glibc
12
headers. Fix by hardcoding the format specifiers and dropping the
13
include.
14
15
An alternative fix would be to introduce a configure check for
16
inttypes.h. But this would make it impossible to use Fedora
17
cross-compilers for softmmu tests, which used to work so far.
18
19
Fixes: ecbcc9ead2f8 ("tests/tcg: add a system test to check memory instrumentation")
20
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
21
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
22
Message-ID: <20241010085906.226249-1-iii@linux.ibm.com>
23
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
24
---
25
tests/tcg/multiarch/system/memory.c | 9 ++++-----
26
1 file changed, 4 insertions(+), 5 deletions(-)
27
28
diff --git a/tests/tcg/multiarch/system/memory.c b/tests/tcg/multiarch/system/memory.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tests/tcg/multiarch/system/memory.c
31
+++ b/tests/tcg/multiarch/system/memory.c
32
@@ -XXX,XX +XXX,XX @@
33
34
#include <stdint.h>
35
#include <stdbool.h>
36
-#include <inttypes.h>
37
#include <minilib.h>
38
39
#ifndef CHECK_UNALIGNED
40
@@ -XXX,XX +XXX,XX @@ int main(void)
41
int i;
42
bool ok = true;
43
44
- ml_printf("Test data start: 0x%"PRIxPTR"\n", &test_data[0]);
45
- ml_printf("Test data end: 0x%"PRIxPTR"\n", &test_data[TEST_SIZE]);
46
+ ml_printf("Test data start: 0x%lx\n", (unsigned long)&test_data[0]);
47
+ ml_printf("Test data end: 0x%lx\n", (unsigned long)&test_data[TEST_SIZE]);
48
49
/* Run through the unsigned tests first */
50
for (i = 0; i < ARRAY_SIZE(init_ufns) && ok; i++) {
51
@@ -XXX,XX +XXX,XX @@ int main(void)
52
ok = do_signed_reads(true);
53
}
54
55
- ml_printf("Test data read: %"PRId32"\n", test_read_count);
56
- ml_printf("Test data write: %"PRId32"\n", test_write_count);
57
+ ml_printf("Test data read: %lu\n", (unsigned long)test_read_count);
58
+ ml_printf("Test data write: %lu\n", (unsigned long)test_write_count);
59
ml_printf("Test complete: %s\n", ok ? "PASSED" : "FAILED");
60
return ok ? 0 : -1;
61
}
62
--
63
2.43.0
diff view generated by jsdifflib
1
All of these uses have quite local scope.
1
From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Avoid tcg_const_*, because we haven't added a corresponding
3
interface for TEMP_EBB. Use explicit tcg_gen_movi_* instead.
4
2
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
When running with a single vcpu, we can return a constant instead of a
4
load when accessing cpu_index.
5
A side effect is that all tcg operations using it are optimized, most
6
notably scoreboard access.
7
When running a simple loop in user-mode, the speedup is around 20%.
8
9
Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-ID: <20241128213843.1023080-1-pierrick.bouvier@linaro.org>
7
---
13
---
8
accel/tcg/plugin-gen.c | 24 ++++++++++++++----------
14
accel/tcg/plugin-gen.c | 9 +++++++++
9
1 file changed, 14 insertions(+), 10 deletions(-)
15
1 file changed, 9 insertions(+)
10
16
11
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
17
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
12
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/plugin-gen.c
19
--- a/accel/tcg/plugin-gen.c
14
+++ b/accel/tcg/plugin-gen.c
20
+++ b/accel/tcg/plugin-gen.c
15
@@ -XXX,XX +XXX,XX @@ void HELPER(plugin_vcpu_mem_cb)(unsigned int vcpu_index,
21
@@ -XXX,XX +XXX,XX @@ static void gen_disable_mem_helper(void)
16
22
17
static void do_gen_mem_cb(TCGv vaddr, uint32_t info)
23
static TCGv_i32 gen_cpu_index(void)
18
{
24
{
19
- TCGv_i32 cpu_index = tcg_temp_new_i32();
25
+ /*
20
- TCGv_i32 meminfo = tcg_const_i32(info);
26
+ * Optimize when we run with a single vcpu. All values using cpu_index,
21
- TCGv_i64 vaddr64 = tcg_temp_new_i64();
27
+ * including scoreboard index, will be optimized out.
22
- TCGv_ptr udata = tcg_const_ptr(NULL);
28
+ * User-mode calls tb_flush when setting this flag. In system-mode, all
23
+ TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
29
+ * vcpus are created before generating code.
24
+ TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
30
+ */
25
+ TCGv_i64 vaddr64 = tcg_temp_ebb_new_i64();
31
+ if (!tcg_cflags_has(current_cpu, CF_PARALLEL)) {
26
+ TCGv_ptr udata = tcg_temp_ebb_new_ptr();
32
+ return tcg_constant_i32(current_cpu->cpu_index);
27
33
+ }
28
+ tcg_gen_movi_i32(meminfo, info);
34
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
29
+ tcg_gen_movi_ptr(udata, 0);
35
tcg_gen_ld_i32(cpu_index, tcg_env,
30
tcg_gen_ld_i32(cpu_index, cpu_env,
31
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
36
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
32
tcg_gen_extu_tl_i64(vaddr64, vaddr);
33
@@ -XXX,XX +XXX,XX @@ static void do_gen_mem_cb(TCGv vaddr, uint32_t info)
34
35
static void gen_empty_udata_cb(void)
36
{
37
- TCGv_i32 cpu_index = tcg_temp_new_i32();
38
- TCGv_ptr udata = tcg_const_ptr(NULL); /* will be overwritten later */
39
+ TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
40
+ TCGv_ptr udata = tcg_temp_ebb_new_ptr();
41
42
+ tcg_gen_movi_ptr(udata, 0);
43
tcg_gen_ld_i32(cpu_index, cpu_env,
44
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
45
gen_helper_plugin_vcpu_udata_cb(cpu_index, udata);
46
@@ -XXX,XX +XXX,XX @@ static void gen_empty_udata_cb(void)
47
*/
48
static void gen_empty_inline_cb(void)
49
{
50
- TCGv_i64 val = tcg_temp_new_i64();
51
- TCGv_ptr ptr = tcg_const_ptr(NULL); /* overwritten later */
52
+ TCGv_i64 val = tcg_temp_ebb_new_i64();
53
+ TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
54
55
+ tcg_gen_movi_ptr(ptr, 0);
56
tcg_gen_ld_i64(val, ptr, 0);
57
/* pass an immediate != 0 so that it doesn't get optimized away */
58
tcg_gen_addi_i64(val, val, 0xdeadface);
59
@@ -XXX,XX +XXX,XX @@ static void gen_empty_mem_cb(TCGv addr, uint32_t info)
60
*/
61
static void gen_empty_mem_helper(void)
62
{
63
- TCGv_ptr ptr;
64
+ TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
65
66
- ptr = tcg_const_ptr(NULL);
67
+ tcg_gen_movi_ptr(ptr, 0);
68
tcg_gen_st_ptr(ptr, cpu_env, offsetof(CPUState, plugin_mem_cbs) -
69
offsetof(ArchCPU, env));
70
tcg_temp_free_ptr(ptr);
71
--
37
--
72
2.34.1
38
2.43.0
73
74
diff view generated by jsdifflib
1
Since tcg_temp_new_* is now identical, use those.
1
Call them directly from the opcode switch statement in tcg_optimize,
2
rather than in finish_folding based on opcode flags. Adjust folding
3
of conditional branches to match.
2
4
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
7
---
6
target/xtensa/translate.c | 16 ++++++++--------
8
tcg/optimize.c | 47 +++++++++++++++++++++++++++++++----------------
7
1 file changed, 8 insertions(+), 8 deletions(-)
9
1 file changed, 31 insertions(+), 16 deletions(-)
8
10
9
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
11
--- a/target/xtensa/translate.c
13
--- a/tcg/optimize.c
12
+++ b/target/xtensa/translate.c
14
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static void gen_right_shift_sar(DisasContext *dc, TCGv_i32 sa)
15
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
14
static void gen_left_shift_sar(DisasContext *dc, TCGv_i32 sa)
15
{
16
if (!dc->sar_m32_allocated) {
17
- dc->sar_m32 = tcg_temp_local_new_i32();
18
+ dc->sar_m32 = tcg_temp_new_i32();
19
dc->sar_m32_allocated = true;
20
}
21
tcg_gen_andi_i32(dc->sar_m32, sa, 0x1f);
22
@@ -XXX,XX +XXX,XX @@ static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc)
23
if (i == 0 || arg_copy[i].resource != resource) {
24
resource = arg_copy[i].resource;
25
if (arg_copy[i].arg->num_bits <= 32) {
26
- temp = tcg_temp_local_new_i32();
27
+ temp = tcg_temp_new_i32();
28
tcg_gen_mov_i32(temp, arg_copy[i].arg->in);
29
} else if (arg_copy[i].arg->num_bits <= 64) {
30
- temp = tcg_temp_local_new_i64();
31
+ temp = tcg_temp_new_i64();
32
tcg_gen_mov_i64(temp, arg_copy[i].arg->in);
33
} else {
34
g_assert_not_reached();
35
@@ -XXX,XX +XXX,XX @@ static void xtensa_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
36
DisasContext *dc = container_of(dcbase, DisasContext, base);
37
38
if (dc->icount) {
39
- dc->next_icount = tcg_temp_local_new_i32();
40
+ dc->next_icount = tcg_temp_new_i32();
41
}
16
}
42
}
17
}
43
18
44
@@ -XXX,XX +XXX,XX @@ static void gen_check_atomctl(DisasContext *dc, TCGv_i32 addr)
19
+static void finish_bb(OptContext *ctx)
45
static void translate_s32c1i(DisasContext *dc, const OpcodeArg arg[],
20
+{
46
const uint32_t par[])
21
+ /* We only optimize memory barriers across basic blocks. */
22
+ ctx->prev_mb = NULL;
23
+}
24
+
25
+static void finish_ebb(OptContext *ctx)
26
+{
27
+ finish_bb(ctx);
28
+ /* We only optimize across extended basic blocks. */
29
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
30
+ remove_mem_copy_all(ctx);
31
+}
32
+
33
static void finish_folding(OptContext *ctx, TCGOp *op)
47
{
34
{
48
- TCGv_i32 tmp = tcg_temp_local_new_i32();
35
const TCGOpDef *def = &tcg_op_defs[op->opc];
49
- TCGv_i32 addr = tcg_temp_local_new_i32();
36
int i, nb_oargs;
50
+ TCGv_i32 tmp = tcg_temp_new_i32();
37
51
+ TCGv_i32 addr = tcg_temp_new_i32();
38
- /*
52
MemOp mop;
39
- * We only optimize extended basic blocks. If the opcode ends a BB
53
40
- * and is not a conditional branch, reset all temp data.
54
tcg_gen_mov_i32(tmp, arg[0].in);
41
- */
55
@@ -XXX,XX +XXX,XX @@ static void translate_s32ex(DisasContext *dc, const OpcodeArg arg[],
42
- if (def->flags & TCG_OPF_BB_END) {
56
const uint32_t par[])
43
- ctx->prev_mb = NULL;
57
{
44
- if (!(def->flags & TCG_OPF_COND_BRANCH)) {
58
TCGv_i32 prev = tcg_temp_new_i32();
45
- memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
59
- TCGv_i32 addr = tcg_temp_local_new_i32();
46
- remove_mem_copy_all(ctx);
60
- TCGv_i32 res = tcg_temp_local_new_i32();
47
- }
61
+ TCGv_i32 addr = tcg_temp_new_i32();
48
- return;
62
+ TCGv_i32 res = tcg_temp_new_i32();
49
- }
63
TCGLabel *label = gen_new_label();
50
-
64
MemOp mop;
51
nb_oargs = def->nb_oargs;
65
52
for (i = 0; i < nb_oargs; i++) {
53
TCGTemp *ts = arg_temp(op->args[i]);
54
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
55
if (i > 0) {
56
op->opc = INDEX_op_br;
57
op->args[0] = op->args[3];
58
+ finish_ebb(ctx);
59
+ } else {
60
+ finish_bb(ctx);
61
}
62
- return false;
63
+ return true;
64
}
65
66
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
67
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
68
}
69
op->opc = INDEX_op_br;
70
op->args[0] = label;
71
- break;
72
+ finish_ebb(ctx);
73
+ return true;
74
}
75
- return false;
76
+
77
+ finish_bb(ctx);
78
+ return true;
79
}
80
81
static bool fold_bswap(OptContext *ctx, TCGOp *op)
82
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
83
CASE_OP_32_64_VEC(xor):
84
done = fold_xor(&ctx, op);
85
break;
86
+ case INDEX_op_set_label:
87
+ case INDEX_op_br:
88
+ case INDEX_op_exit_tb:
89
+ case INDEX_op_goto_tb:
90
+ case INDEX_op_goto_ptr:
91
+ finish_ebb(&ctx);
92
+ done = true;
93
+ break;
94
default:
95
break;
96
}
66
--
97
--
67
2.34.1
98
2.43.0
68
69
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
There are only a few logical operations which can compute
2
an "affected" mask. Split out handling of this optimization
3
to a separate function, only to be called when applicable.
2
4
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
5
Remove the a_mask field from OptContext, as the mask is
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
no longer stored anywhere.
5
Message-Id: <20230227135202.9710-9-anjo@rev.ng>
7
8
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
10
---
8
include/exec/cpu-defs.h | 3 ---
11
tcg/optimize.c | 42 +++++++++++++++++++++++++++---------------
9
1 file changed, 3 deletions(-)
12
1 file changed, 27 insertions(+), 15 deletions(-)
10
13
11
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
13
--- a/include/exec/cpu-defs.h
16
--- a/tcg/optimize.c
14
+++ b/include/exec/cpu-defs.h
17
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
16
# error TARGET_PAGE_BITS must be defined in cpu-param.h
19
QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
17
# endif
20
18
#endif
21
/* In flight values from optimization. */
19
-#ifndef TARGET_TB_PCREL
22
- uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
20
-# define TARGET_TB_PCREL 0
23
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
21
-#endif
24
uint64_t s_mask; /* mask of clrsb(value) bits */
22
25
TCGType type;
23
#define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
26
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
27
28
static bool fold_masks(OptContext *ctx, TCGOp *op)
29
{
30
- uint64_t a_mask = ctx->a_mask;
31
uint64_t z_mask = ctx->z_mask;
32
uint64_t s_mask = ctx->s_mask;
33
34
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
35
* type changing opcodes.
36
*/
37
if (ctx->type == TCG_TYPE_I32) {
38
- a_mask = (int32_t)a_mask;
39
z_mask = (int32_t)z_mask;
40
s_mask |= MAKE_64BIT_MASK(32, 32);
41
ctx->z_mask = z_mask;
42
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
43
if (z_mask == 0) {
44
return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
45
}
46
+ return false;
47
+}
48
+
49
+/*
50
+ * An "affected" mask bit is 0 if and only if the result is identical
51
+ * to the first input. Thus if the entire mask is 0, the operation
52
+ * is equivalent to a copy.
53
+ */
54
+static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask)
55
+{
56
+ if (ctx->type == TCG_TYPE_I32) {
57
+ a_mask = (uint32_t)a_mask;
58
+ }
59
if (a_mask == 0) {
60
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
61
}
62
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
63
* Known-zeros does not imply known-ones. Therefore unless
64
* arg2 is constant, we can't infer affected bits from it.
65
*/
66
- if (arg_is_const(op->args[2])) {
67
- ctx->a_mask = z1 & ~z2;
68
+ if (arg_is_const(op->args[2]) &&
69
+ fold_affected_mask(ctx, op, z1 & ~z2)) {
70
+ return true;
71
}
72
73
return fold_masks(ctx, op);
74
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
75
*/
76
if (arg_is_const(op->args[2])) {
77
uint64_t z2 = ~arg_info(op->args[2])->z_mask;
78
- ctx->a_mask = z1 & ~z2;
79
+ if (fold_affected_mask(ctx, op, z1 & ~z2)) {
80
+ return true;
81
+ }
82
z1 &= z2;
83
}
84
ctx->z_mask = z1;
85
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
86
87
z_mask_old = arg_info(op->args[1])->z_mask;
88
z_mask = extract64(z_mask_old, pos, len);
89
- if (pos == 0) {
90
- ctx->a_mask = z_mask_old ^ z_mask;
91
+ if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
92
+ return true;
93
}
94
ctx->z_mask = z_mask;
95
ctx->s_mask = smask_from_zmask(z_mask);
96
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
97
98
ctx->z_mask = z_mask;
99
ctx->s_mask = s_mask;
100
- if (!type_change) {
101
- ctx->a_mask = s_mask & ~s_mask_old;
102
+ if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
103
+ return true;
104
}
105
106
return fold_masks(ctx, op);
107
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
108
109
ctx->z_mask = z_mask;
110
ctx->s_mask = smask_from_zmask(z_mask);
111
- if (!type_change) {
112
- ctx->a_mask = z_mask_old ^ z_mask;
113
+ if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
114
+ return true;
115
}
116
return fold_masks(ctx, op);
117
}
118
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
119
s_mask |= MAKE_64BIT_MASK(len, 64 - len);
120
ctx->s_mask = s_mask;
121
122
- if (pos == 0) {
123
- ctx->a_mask = s_mask & ~s_mask_old;
124
+ if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
125
+ return true;
126
}
127
128
return fold_masks(ctx, op);
129
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
130
}
131
132
/* Assume all bits affected, no bits known zero, no sign reps. */
133
- ctx.a_mask = -1;
134
ctx.z_mask = -1;
135
ctx.s_mask = 0;
24
136
25
--
137
--
26
2.34.1
138
2.43.0
27
28
diff view generated by jsdifflib
New patch
1
Use of fold_masks should be restricted to those opcodes that
2
can reliably make use of it -- those with a single output,
3
and from higher-level folders that set up the masks.
4
Prepare for conversion of each folder in turn.
1
5
6
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/optimize.c | 17 ++++++++++++++---
10
1 file changed, 14 insertions(+), 3 deletions(-)
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
17
{
18
uint64_t z_mask = ctx->z_mask;
19
uint64_t s_mask = ctx->s_mask;
20
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
21
+ TCGTemp *ts;
22
+ TempOptInfo *ti;
23
+
24
+ /* Only single-output opcodes are supported here. */
25
+ tcg_debug_assert(def->nb_oargs == 1);
26
27
/*
28
* 32-bit ops generate 32-bit results, which for the purpose of
29
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
30
if (ctx->type == TCG_TYPE_I32) {
31
z_mask = (int32_t)z_mask;
32
s_mask |= MAKE_64BIT_MASK(32, 32);
33
- ctx->z_mask = z_mask;
34
- ctx->s_mask = s_mask;
35
}
36
37
if (z_mask == 0) {
38
return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
39
}
40
- return false;
41
+
42
+ ts = arg_temp(op->args[0]);
43
+ reset_ts(ctx, ts);
44
+
45
+ ti = ts_info(ts);
46
+ ti->z_mask = z_mask;
47
+ ti->s_mask = s_mask;
48
+ return true;
49
}
50
51
/*
52
--
53
2.43.0
diff view generated by jsdifflib
New patch
1
Add a routine to which masks can be passed directly, rather than
2
storing them into OptContext. To be used in upcoming patches.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 15 ++++++++++++---
8
1 file changed, 12 insertions(+), 3 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
15
return fold_const2(ctx, op);
16
}
17
18
-static bool fold_masks(OptContext *ctx, TCGOp *op)
19
+/*
20
+ * Record "zero" and "sign" masks for the single output of @op.
21
+ * See TempOptInfo definition of z_mask and s_mask.
22
+ * If z_mask allows, fold the output to constant zero.
23
+ */
24
+static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
25
+ uint64_t z_mask, uint64_t s_mask)
26
{
27
- uint64_t z_mask = ctx->z_mask;
28
- uint64_t s_mask = ctx->s_mask;
29
const TCGOpDef *def = &tcg_op_defs[op->opc];
30
TCGTemp *ts;
31
TempOptInfo *ti;
32
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
33
return true;
34
}
35
36
+static bool fold_masks(OptContext *ctx, TCGOp *op)
37
+{
38
+ return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
39
+}
40
+
41
/*
42
* An "affected" mask bit is 0 if and only if the result is identical
43
* to the first input. Thus if the entire mask is 0, the operation
44
--
45
2.43.0
diff view generated by jsdifflib
1
While the argument can only be TEMP_EBB or TEMP_TB,
1
Consider the passed s_mask to be a minimum deduced from
2
it's more obvious this way.
2
either existing s_mask or from a sign-extension operation.
3
We may be able to deduce more from the set of known zeros.
4
Remove identical logic from several opcode folders.
3
5
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
8
---
7
include/tcg/tcg.h | 18 +++++++++---------
9
tcg/optimize.c | 21 ++++++---------------
8
tcg/tcg.c | 8 ++++----
10
1 file changed, 6 insertions(+), 15 deletions(-)
9
2 files changed, 13 insertions(+), 13 deletions(-)
10
11
11
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/include/tcg/tcg.h
14
--- a/tcg/optimize.c
14
+++ b/include/tcg/tcg.h
15
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size);
16
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
16
17
* Record "zero" and "sign" masks for the single output of @op.
17
TCGTemp *tcg_global_mem_new_internal(TCGType, TCGv_ptr,
18
* See TempOptInfo definition of z_mask and s_mask.
18
intptr_t, const char *);
19
* If z_mask allows, fold the output to constant zero.
19
-TCGTemp *tcg_temp_new_internal(TCGType, bool);
20
+ * The passed s_mask may be augmented by z_mask.
20
+TCGTemp *tcg_temp_new_internal(TCGType, TCGTempKind);
21
*/
21
void tcg_temp_free_internal(TCGTemp *);
22
static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
22
TCGv_vec tcg_temp_new_vec(TCGType type);
23
uint64_t z_mask, uint64_t s_mask)
23
TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match);
24
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
24
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t offset,
25
25
26
ti = ts_info(ts);
26
static inline TCGv_i32 tcg_temp_new_i32(void)
27
ti->z_mask = z_mask;
27
{
28
- ti->s_mask = s_mask;
28
- TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, false);
29
+ ti->s_mask = s_mask | smask_from_zmask(z_mask);
29
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB);
30
return true;
30
return temp_tcgv_i32(t);
31
}
31
}
32
32
33
static inline TCGv_i32 tcg_temp_local_new_i32(void)
33
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
34
{
34
default:
35
- TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, true);
35
g_assert_not_reached();
36
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB);
36
}
37
return temp_tcgv_i32(t);
37
- s_mask = smask_from_zmask(z_mask);
38
39
+ s_mask = 0;
40
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
41
case TCG_BSWAP_OZ:
42
break;
43
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
44
default:
45
/* The high bits are undefined: force all bits above the sign to 1. */
46
z_mask |= sign << 1;
47
- s_mask = 0;
48
break;
49
}
50
ctx->z_mask = z_mask;
51
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
52
g_assert_not_reached();
53
}
54
ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
55
- ctx->s_mask = smask_from_zmask(ctx->z_mask);
56
return false;
38
}
57
}
39
58
40
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t offset,
59
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
41
60
default:
42
static inline TCGv_i64 tcg_temp_new_i64(void)
61
g_assert_not_reached();
43
{
62
}
44
- TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, false);
63
- ctx->s_mask = smask_from_zmask(ctx->z_mask);
45
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB);
64
return false;
46
return temp_tcgv_i64(t);
47
}
65
}
48
66
49
static inline TCGv_i64 tcg_temp_local_new_i64(void)
67
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
50
{
68
return true;
51
- TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, true);
69
}
52
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB);
70
ctx->z_mask = z_mask;
53
return temp_tcgv_i64(t);
71
- ctx->s_mask = smask_from_zmask(z_mask);
72
73
return fold_masks(ctx, op);
54
}
74
}
55
75
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
56
static inline TCGv_i128 tcg_temp_new_i128(void)
76
}
57
{
77
58
- TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, false);
78
ctx->z_mask = z_mask;
59
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB);
79
- ctx->s_mask = smask_from_zmask(z_mask);
60
return temp_tcgv_i128(t);
80
if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
81
return true;
82
}
83
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
84
int width = 8 * memop_size(mop);
85
86
if (width < 64) {
87
- ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
88
- if (!(mop & MO_SIGN)) {
89
+ if (mop & MO_SIGN) {
90
+ ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
91
+ } else {
92
ctx->z_mask = MAKE_64BIT_MASK(0, width);
93
- ctx->s_mask <<= 1;
94
}
95
}
96
97
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
98
fold_setcond_tst_pow2(ctx, op, false);
99
100
ctx->z_mask = 1;
101
- ctx->s_mask = smask_from_zmask(1);
102
return false;
61
}
103
}
62
104
63
static inline TCGv_i128 tcg_temp_local_new_i128(void)
105
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
64
{
65
- TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, true);
66
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB);
67
return temp_tcgv_i128(t);
68
}
69
70
@@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t offset,
71
72
static inline TCGv_ptr tcg_temp_new_ptr(void)
73
{
74
- TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, false);
75
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB);
76
return temp_tcgv_ptr(t);
77
}
78
79
static inline TCGv_ptr tcg_temp_local_new_ptr(void)
80
{
81
- TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, true);
82
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB);
83
return temp_tcgv_ptr(t);
84
}
85
86
diff --git a/tcg/tcg.c b/tcg/tcg.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/tcg/tcg.c
89
+++ b/tcg/tcg.c
90
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
91
return ts;
92
}
93
94
-TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
95
+TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
96
{
97
TCGContext *s = tcg_ctx;
98
- TCGTempKind kind = temp_local ? TEMP_TB : TEMP_EBB;
99
+ bool temp_local = kind == TEMP_TB;
100
TCGTemp *ts;
101
int idx, k;
102
103
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_temp_new_vec(TCGType type)
104
}
106
}
105
#endif
107
106
108
ctx->z_mask = 1;
107
- t = tcg_temp_new_internal(type, 0);
109
- ctx->s_mask = smask_from_zmask(1);
108
+ t = tcg_temp_new_internal(type, TEMP_EBB);
110
return false;
109
return temp_tcgv_vec(t);
111
110
}
112
do_setcond_const:
111
113
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
112
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
114
break;
113
115
CASE_OP_32_64(ld8u):
114
tcg_debug_assert(t->temp_allocated != 0);
116
ctx->z_mask = MAKE_64BIT_MASK(0, 8);
115
117
- ctx->s_mask = MAKE_64BIT_MASK(9, 55);
116
- t = tcg_temp_new_internal(t->base_type, 0);
118
break;
117
+ t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
119
CASE_OP_32_64(ld16s):
118
return temp_tcgv_vec(t);
120
ctx->s_mask = MAKE_64BIT_MASK(16, 48);
119
}
121
break;
120
122
CASE_OP_32_64(ld16u):
123
ctx->z_mask = MAKE_64BIT_MASK(0, 16);
124
- ctx->s_mask = MAKE_64BIT_MASK(17, 47);
125
break;
126
case INDEX_op_ld32s_i64:
127
ctx->s_mask = MAKE_64BIT_MASK(32, 32);
128
break;
129
case INDEX_op_ld32u_i64:
130
ctx->z_mask = MAKE_64BIT_MASK(0, 32);
131
- ctx->s_mask = MAKE_64BIT_MASK(33, 31);
132
break;
133
default:
134
g_assert_not_reached();
121
--
135
--
122
2.34.1
136
2.43.0
123
124
diff view generated by jsdifflib
New patch
1
Change the representation from sign bit repetitions to all bits equal
2
to the sign bit, including the sign bit itself.
1
3
4
The previous format has a problem in that it is difficult to recreate
5
a valid sign mask after a shift operation: the "repetitions" part of
6
the previous format meant that applying the same shift as for the value
7
lead to an off-by-one value.
8
9
The new format, including the sign bit itself, means that the sign mask
10
can be manipulated in exactly the same way as the value, canonicalization
11
is easier.
12
13
Canonicalize the s_mask in fold_masks_zs, rather than requiring callers
14
to do so. Treat 0 as a non-canonical but typeless input for no sign
15
information, which will be reset as appropriate for the data type.
16
We can easily fold in the data from z_mask while canonicalizing.
17
18
Temporarily disable optimizations using s_mask while each operation is
19
converted to use fold_masks_zs and to the new form.
20
21
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
22
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
23
---
24
tcg/optimize.c | 64 ++++++++++++--------------------------------------
25
1 file changed, 15 insertions(+), 49 deletions(-)
26
27
diff --git a/tcg/optimize.c b/tcg/optimize.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/tcg/optimize.c
30
+++ b/tcg/optimize.c
31
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
32
QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
33
uint64_t val;
34
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
35
- uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
36
+ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
37
} TempOptInfo;
38
39
typedef struct OptContext {
40
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
41
42
/* In flight values from optimization. */
43
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
44
- uint64_t s_mask; /* mask of clrsb(value) bits */
45
+ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
46
TCGType type;
47
} OptContext;
48
49
-/* Calculate the smask for a specific value. */
50
-static uint64_t smask_from_value(uint64_t value)
51
-{
52
- int rep = clrsb64(value);
53
- return ~(~0ull >> rep);
54
-}
55
-
56
-/*
57
- * Calculate the smask for a given set of known-zeros.
58
- * If there are lots of zeros on the left, we can consider the remainder
59
- * an unsigned field, and thus the corresponding signed field is one bit
60
- * larger.
61
- */
62
-static uint64_t smask_from_zmask(uint64_t zmask)
63
-{
64
- /*
65
- * Only the 0 bits are significant for zmask, thus the msb itself
66
- * must be zero, else we have no sign information.
67
- */
68
- int rep = clz64(zmask);
69
- if (rep == 0) {
70
- return 0;
71
- }
72
- rep -= 1;
73
- return ~(~0ull >> rep);
74
-}
75
-
76
-/*
77
- * Recreate a properly left-aligned smask after manipulation.
78
- * Some bit-shuffling, particularly shifts and rotates, may
79
- * retain sign bits on the left, but may scatter disconnected
80
- * sign bits on the right. Retain only what remains to the left.
81
- */
82
-static uint64_t smask_from_smask(int64_t smask)
83
-{
84
- /* Only the 1 bits are significant for smask */
85
- return smask_from_zmask(~smask);
86
-}
87
-
88
static inline TempOptInfo *ts_info(TCGTemp *ts)
89
{
90
return ts->state_ptr;
91
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
92
ti->is_const = true;
93
ti->val = ts->val;
94
ti->z_mask = ts->val;
95
- ti->s_mask = smask_from_value(ts->val);
96
+ ti->s_mask = INT64_MIN >> clrsb64(ts->val);
97
} else {
98
ti->is_const = false;
99
ti->z_mask = -1;
100
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
101
*/
102
if (i == 0) {
103
ts_info(ts)->z_mask = ctx->z_mask;
104
- ts_info(ts)->s_mask = ctx->s_mask;
105
}
106
}
107
}
108
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
109
* The passed s_mask may be augmented by z_mask.
110
*/
111
static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
112
- uint64_t z_mask, uint64_t s_mask)
113
+ uint64_t z_mask, int64_t s_mask)
114
{
115
const TCGOpDef *def = &tcg_op_defs[op->opc];
116
TCGTemp *ts;
117
TempOptInfo *ti;
118
+ int rep;
119
120
/* Only single-output opcodes are supported here. */
121
tcg_debug_assert(def->nb_oargs == 1);
122
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
123
*/
124
if (ctx->type == TCG_TYPE_I32) {
125
z_mask = (int32_t)z_mask;
126
- s_mask |= MAKE_64BIT_MASK(32, 32);
127
+ s_mask |= INT32_MIN;
128
}
129
130
if (z_mask == 0) {
131
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
132
133
ti = ts_info(ts);
134
ti->z_mask = z_mask;
135
- ti->s_mask = s_mask | smask_from_zmask(z_mask);
136
+
137
+ /* Canonicalize s_mask and incorporate data from z_mask. */
138
+ rep = clz64(~s_mask);
139
+ rep = MAX(rep, clz64(z_mask));
140
+ rep = MAX(rep - 1, 0);
141
+ ti->s_mask = INT64_MIN >> rep;
142
+
143
return true;
144
}
145
146
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
147
148
ctx->z_mask = z_mask;
149
ctx->s_mask = s_mask;
150
- if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
151
+ if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
152
return true;
153
}
154
155
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
156
s_mask |= MAKE_64BIT_MASK(len, 64 - len);
157
ctx->s_mask = s_mask;
158
159
- if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
160
+ if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
161
return true;
162
}
163
164
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
165
ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
166
167
s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
168
- ctx->s_mask = smask_from_smask(s_mask);
169
170
return fold_masks(ctx, op);
171
}
172
--
173
2.43.0
diff view generated by jsdifflib
1
From: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
3
probe_access_flags() as it is today uses probe_access_full(), which in
4
turn uses probe_access_internal() with size = 0. probe_access_internal()
5
then uses the size to call the tlb_fill() callback for the given CPU.
6
This size param ('fault_size' as probe_access_internal() calls it) is
7
ignored by most existing .tlb_fill callback implementations, e.g.
8
arm_cpu_tlb_fill(), ppc_cpu_tlb_fill(), x86_cpu_tlb_fill() and
9
mips_cpu_tlb_fill() to name a few.
10
11
But RISC-V riscv_cpu_tlb_fill() actually uses it. The 'size' parameter
12
is used to check for PMP (Physical Memory Protection) access. This is
13
necessary because PMP does not make any guarantees about all the bytes
14
of the same page having the same permissions, i.e. the same page can
15
have different PMP properties, so we're forced to make sub-page range
16
checks. To allow RISC-V emulation to do a probe_acess_flags() that
17
covers PMP, we need to either add a 'size' param to the existing
18
probe_acess_flags() or create a new interface (e.g.
19
probe_access_range_flags).
20
21
There are quite a few probe_* APIs already, so let's add a 'size' param
22
to probe_access_flags() and re-use this API. This is done by open coding
23
what probe_access_full() does inside probe_acess_flags() and passing the
24
'size' param to probe_acess_internal(). Existing probe_access_flags()
25
callers use size = 0 to not change their current API usage. 'size' is
26
asserted to enforce single page access like probe_access() already does.
27
28
No behavioral changes intended.
29
30
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
31
Message-Id: <20230223234427.521114-2-dbarboza@ventanamicro.com>
32
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
33
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
34
---
3
---
35
include/exec/exec-all.h | 3 ++-
4
tcg/optimize.c | 9 +++++----
36
accel/stubs/tcg-stub.c | 2 +-
5
1 file changed, 5 insertions(+), 4 deletions(-)
37
accel/tcg/cputlb.c | 17 ++++++++++++++---
38
accel/tcg/user-exec.c | 5 +++--
39
semihosting/uaccess.c | 2 +-
40
target/arm/ptw.c | 2 +-
41
target/arm/tcg/sve_helper.c | 2 +-
42
target/s390x/tcg/mem_helper.c | 2 +-
43
8 files changed, 24 insertions(+), 11 deletions(-)
44
6
45
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
46
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
47
--- a/include/exec/exec-all.h
9
--- a/tcg/optimize.c
48
+++ b/include/exec/exec-all.h
10
+++ b/tcg/optimize.c
49
@@ -XXX,XX +XXX,XX @@ static inline void *probe_read(CPUArchState *env, target_ulong addr, int size,
11
@@ -XXX,XX +XXX,XX @@ static void finish_ebb(OptContext *ctx)
50
* probe_access_flags:
12
remove_mem_copy_all(ctx);
51
* @env: CPUArchState
13
}
52
* @addr: guest virtual address to look up
14
53
+ * @size: size of the access
15
-static void finish_folding(OptContext *ctx, TCGOp *op)
54
* @access_type: read, write or execute permission
16
+static bool finish_folding(OptContext *ctx, TCGOp *op)
55
* @mmu_idx: MMU index to use for lookup
56
* @nonfault: suppress the fault
57
@@ -XXX,XX +XXX,XX @@ static inline void *probe_read(CPUArchState *env, target_ulong addr, int size,
58
* Do handle clean pages, so exclude TLB_NOTDIRY from the returned flags.
59
* For simplicity, all "mmio-like" flags are folded to TLB_MMIO.
60
*/
61
-int probe_access_flags(CPUArchState *env, target_ulong addr,
62
+int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
63
MMUAccessType access_type, int mmu_idx,
64
bool nonfault, void **phost, uintptr_t retaddr);
65
66
diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/accel/stubs/tcg-stub.c
69
+++ b/accel/stubs/tcg-stub.c
70
@@ -XXX,XX +XXX,XX @@ void tcg_flush_jmp_cache(CPUState *cpu)
71
{
17
{
18
const TCGOpDef *def = &tcg_op_defs[op->opc];
19
int i, nb_oargs;
20
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
21
ts_info(ts)->z_mask = ctx->z_mask;
22
}
23
}
24
+ return true;
72
}
25
}
73
26
74
-int probe_access_flags(CPUArchState *env, target_ulong addr,
27
/*
75
+int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
28
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
76
MMUAccessType access_type, int mmu_idx,
29
fold_xi_to_x(ctx, op, 0)) {
77
bool nonfault, void **phost, uintptr_t retaddr)
30
return true;
78
{
31
}
79
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
32
- return false;
80
index XXXXXXX..XXXXXXX 100644
33
+ return finish_folding(ctx, op);
81
--- a/accel/tcg/cputlb.c
82
+++ b/accel/tcg/cputlb.c
83
@@ -XXX,XX +XXX,XX @@ int probe_access_full(CPUArchState *env, target_ulong addr,
84
return flags;
85
}
34
}
86
35
87
-int probe_access_flags(CPUArchState *env, target_ulong addr,
36
/* We cannot as yet do_constant_folding with vectors. */
88
+int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
37
@@ -XXX,XX +XXX,XX @@ static bool fold_add_vec(OptContext *ctx, TCGOp *op)
89
MMUAccessType access_type, int mmu_idx,
38
fold_xi_to_x(ctx, op, 0)) {
90
bool nonfault, void **phost, uintptr_t retaddr)
39
return true;
91
{
40
}
92
CPUTLBEntryFull *full;
41
- return false;
93
+ int flags;
42
+ return finish_folding(ctx, op);
94
95
- return probe_access_full(env, addr, access_type, mmu_idx,
96
- nonfault, phost, &full, retaddr);
97
+ g_assert(-(addr | TARGET_PAGE_MASK) >= size);
98
+
99
+ flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
100
+ nonfault, phost, &full, retaddr);
101
+
102
+ /* Handle clean RAM pages. */
103
+ if (unlikely(flags & TLB_NOTDIRTY)) {
104
+ notdirty_write(env_cpu(env), addr, 1, full, retaddr);
105
+ flags &= ~TLB_NOTDIRTY;
106
+ }
107
+
108
+ return flags;
109
}
43
}
110
44
111
void *probe_access(CPUArchState *env, target_ulong addr, int size,
45
static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
112
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
46
@@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
113
index XXXXXXX..XXXXXXX 100644
47
op->args[4] = arg_new_constant(ctx, bl);
114
--- a/accel/tcg/user-exec.c
48
op->args[5] = arg_new_constant(ctx, bh);
115
+++ b/accel/tcg/user-exec.c
49
}
116
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
50
- return false;
117
cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
51
+ return finish_folding(ctx, op);
118
}
52
}
119
53
120
-int probe_access_flags(CPUArchState *env, target_ulong addr,
54
static bool fold_add2(OptContext *ctx, TCGOp *op)
121
+int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
122
MMUAccessType access_type, int mmu_idx,
123
bool nonfault, void **phost, uintptr_t ra)
124
{
125
int flags;
126
127
- flags = probe_access_internal(env, addr, 0, access_type, nonfault, ra);
128
+ g_assert(-(addr | TARGET_PAGE_MASK) >= size);
129
+ flags = probe_access_internal(env, addr, size, access_type, nonfault, ra);
130
*phost = flags ? NULL : g2h(env_cpu(env), addr);
131
return flags;
132
}
133
diff --git a/semihosting/uaccess.c b/semihosting/uaccess.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/semihosting/uaccess.c
136
+++ b/semihosting/uaccess.c
137
@@ -XXX,XX +XXX,XX @@ ssize_t softmmu_strlen_user(CPUArchState *env, target_ulong addr)
138
/* Find the number of bytes remaining in the page. */
139
left_in_page = TARGET_PAGE_SIZE - (addr & ~TARGET_PAGE_MASK);
140
141
- flags = probe_access_flags(env, addr, MMU_DATA_LOAD,
142
+ flags = probe_access_flags(env, addr, 0, MMU_DATA_LOAD,
143
mmu_idx, true, &h, 0);
144
if (flags & TLB_INVALID_MASK) {
145
return -1;
146
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
147
index XXXXXXX..XXXXXXX 100644
148
--- a/target/arm/ptw.c
149
+++ b/target/arm/ptw.c
150
@@ -XXX,XX +XXX,XX @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val,
151
void *discard;
152
153
env->tlb_fi = fi;
154
- flags = probe_access_flags(env, ptw->out_virt, MMU_DATA_STORE,
155
+ flags = probe_access_flags(env, ptw->out_virt, 0, MMU_DATA_STORE,
156
arm_to_core_mmu_idx(ptw->in_ptw_idx),
157
true, &discard, 0);
158
env->tlb_fi = NULL;
159
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
160
index XXXXXXX..XXXXXXX 100644
161
--- a/target/arm/tcg/sve_helper.c
162
+++ b/target/arm/tcg/sve_helper.c
163
@@ -XXX,XX +XXX,XX @@ bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env,
164
addr = useronly_clean_ptr(addr);
165
166
#ifdef CONFIG_USER_ONLY
167
- flags = probe_access_flags(env, addr, access_type, mmu_idx, nofault,
168
+ flags = probe_access_flags(env, addr, 0, access_type, mmu_idx, nofault,
169
&info->host, retaddr);
170
#else
171
CPUTLBEntryFull *full;
172
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
173
index XXXXXXX..XXXXXXX 100644
174
--- a/target/s390x/tcg/mem_helper.c
175
+++ b/target/s390x/tcg/mem_helper.c
176
@@ -XXX,XX +XXX,XX @@ static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
177
int mmu_idx, bool nonfault,
178
void **phost, uintptr_t ra)
179
{
180
- int flags = probe_access_flags(env, addr, access_type, mmu_idx,
181
+ int flags = probe_access_flags(env, addr, 0, access_type, mmu_idx,
182
nonfault, phost, ra);
183
184
if (unlikely(flags & TLB_INVALID_MASK)) {
185
--
55
--
186
2.34.1
56
2.43.0
diff view generated by jsdifflib
1
TCG internals will want to be able to allocate and reuse
1
Introduce ti_is_const, ti_const_val, ti_is_const_val.
2
explicitly life-limited temporaries.
3
2
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
4
---
7
include/tcg/tcg.h | 28 ++++++++++++++++++++++++++++
5
tcg/optimize.c | 20 +++++++++++++++++---
8
1 file changed, 28 insertions(+)
6
1 file changed, 17 insertions(+), 3 deletions(-)
9
7
10
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
12
--- a/include/tcg/tcg.h
10
--- a/tcg/optimize.c
13
+++ b/include/tcg/tcg.h
11
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t offset,
12
@@ -XXX,XX +XXX,XX @@ static inline TempOptInfo *arg_info(TCGArg arg)
15
return temp_tcgv_i32(t);
13
return ts_info(arg_temp(arg));
16
}
14
}
17
15
18
+/* Used only by tcg infrastructure: tcg-op.c or plugin-gen.c */
16
+static inline bool ti_is_const(TempOptInfo *ti)
19
+static inline TCGv_i32 tcg_temp_ebb_new_i32(void)
20
+{
17
+{
21
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB);
18
+ return ti->is_const;
22
+ return temp_tcgv_i32(t);
23
+}
19
+}
24
+
20
+
25
static inline TCGv_i32 tcg_temp_new_i32(void)
21
+static inline uint64_t ti_const_val(TempOptInfo *ti)
26
{
27
TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB);
28
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t offset,
29
return temp_tcgv_i64(t);
30
}
31
32
+/* Used only by tcg infrastructure: tcg-op.c or plugin-gen.c */
33
+static inline TCGv_i64 tcg_temp_ebb_new_i64(void)
34
+{
22
+{
35
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB);
23
+ return ti->val;
36
+ return temp_tcgv_i64(t);
37
+}
24
+}
38
+
25
+
39
static inline TCGv_i64 tcg_temp_new_i64(void)
26
+static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
40
{
41
TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB);
42
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i64 tcg_temp_local_new_i64(void)
43
return temp_tcgv_i64(t);
44
}
45
46
+/* Used only by tcg infrastructure: tcg-op.c or plugin-gen.c */
47
+static inline TCGv_i128 tcg_temp_ebb_new_i128(void)
48
+{
27
+{
49
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB);
28
+ return ti_is_const(ti) && ti_const_val(ti) == val;
50
+ return temp_tcgv_i128(t);
51
+}
29
+}
52
+
30
+
53
static inline TCGv_i128 tcg_temp_new_i128(void)
31
static inline bool ts_is_const(TCGTemp *ts)
54
{
32
{
55
TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB);
33
- return ts_info(ts)->is_const;
56
@@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t offset,
34
+ return ti_is_const(ts_info(ts));
57
return temp_tcgv_ptr(t);
58
}
35
}
59
36
60
+/* Used only by tcg infrastructure: tcg-op.c or plugin-gen.c */
37
static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
61
+static inline TCGv_ptr tcg_temp_ebb_new_ptr(void)
62
+{
63
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB);
64
+ return temp_tcgv_ptr(t);
65
+}
66
+
67
static inline TCGv_ptr tcg_temp_new_ptr(void)
68
{
38
{
69
TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB);
39
- TempOptInfo *ti = ts_info(ts);
40
- return ti->is_const && ti->val == val;
41
+ return ti_is_const_val(ts_info(ts), val);
42
}
43
44
static inline bool arg_is_const(TCGArg arg)
70
--
45
--
71
2.34.1
46
2.43.0
72
73
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Sink mask computation below fold_affected_mask early exit.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 30 ++++++++++++++++--------------
8
1 file changed, 16 insertions(+), 14 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_add2(OptContext *ctx, TCGOp *op)
15
16
static bool fold_and(OptContext *ctx, TCGOp *op)
17
{
18
- uint64_t z1, z2;
19
+ uint64_t z1, z2, z_mask, s_mask;
20
+ TempOptInfo *t1, *t2;
21
22
if (fold_const2_commutative(ctx, op) ||
23
fold_xi_to_i(ctx, op, 0) ||
24
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
25
return true;
26
}
27
28
- z1 = arg_info(op->args[1])->z_mask;
29
- z2 = arg_info(op->args[2])->z_mask;
30
- ctx->z_mask = z1 & z2;
31
-
32
- /*
33
- * Sign repetitions are perforce all identical, whether they are 1 or 0.
34
- * Bitwise operations preserve the relative quantity of the repetitions.
35
- */
36
- ctx->s_mask = arg_info(op->args[1])->s_mask
37
- & arg_info(op->args[2])->s_mask;
38
+ t1 = arg_info(op->args[1]);
39
+ t2 = arg_info(op->args[2]);
40
+ z1 = t1->z_mask;
41
+ z2 = t2->z_mask;
42
43
/*
44
* Known-zeros does not imply known-ones. Therefore unless
45
* arg2 is constant, we can't infer affected bits from it.
46
*/
47
- if (arg_is_const(op->args[2]) &&
48
- fold_affected_mask(ctx, op, z1 & ~z2)) {
49
+ if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) {
50
return true;
51
}
52
53
- return fold_masks(ctx, op);
54
+ z_mask = z1 & z2;
55
+
56
+ /*
57
+ * Sign repetitions are perforce all identical, whether they are 1 or 0.
58
+ * Bitwise operations preserve the relative quantity of the repetitions.
59
+ */
60
+ s_mask = t1->s_mask & t2->s_mask;
61
+
62
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
63
}
64
65
static bool fold_andc(OptContext *ctx, TCGOp *op)
66
--
67
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Avoid double inversion of the value of second const operand.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 21 +++++++++++----------
8
1 file changed, 11 insertions(+), 10 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
15
16
static bool fold_andc(OptContext *ctx, TCGOp *op)
17
{
18
- uint64_t z1;
19
+ uint64_t z_mask, s_mask;
20
+ TempOptInfo *t1, *t2;
21
22
if (fold_const2(ctx, op) ||
23
fold_xx_to_i(ctx, op, 0) ||
24
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
25
return true;
26
}
27
28
- z1 = arg_info(op->args[1])->z_mask;
29
+ t1 = arg_info(op->args[1]);
30
+ t2 = arg_info(op->args[2]);
31
+ z_mask = t1->z_mask;
32
33
/*
34
* Known-zeros does not imply known-ones. Therefore unless
35
* arg2 is constant, we can't infer anything from it.
36
*/
37
- if (arg_is_const(op->args[2])) {
38
- uint64_t z2 = ~arg_info(op->args[2])->z_mask;
39
- if (fold_affected_mask(ctx, op, z1 & ~z2)) {
40
+ if (ti_is_const(t2)) {
41
+ uint64_t v2 = ti_const_val(t2);
42
+ if (fold_affected_mask(ctx, op, z_mask & v2)) {
43
return true;
44
}
45
- z1 &= z2;
46
+ z_mask &= ~v2;
47
}
48
- ctx->z_mask = z1;
49
50
- ctx->s_mask = arg_info(op->args[1])->s_mask
51
- & arg_info(op->args[2])->s_mask;
52
- return fold_masks(ctx, op);
53
+ s_mask = t1->s_mask & t2->s_mask;
54
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
55
}
56
57
static bool fold_brcond(OptContext *ctx, TCGOp *op)
58
--
59
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Always set s_mask along the BSWAP_OS path, since the result is
3
being explicitly sign-extended.
1
4
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 21 ++++++++++-----------
9
1 file changed, 10 insertions(+), 11 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
16
static bool fold_bswap(OptContext *ctx, TCGOp *op)
17
{
18
uint64_t z_mask, s_mask, sign;
19
+ TempOptInfo *t1 = arg_info(op->args[1]);
20
21
- if (arg_is_const(op->args[1])) {
22
- uint64_t t = arg_info(op->args[1])->val;
23
-
24
- t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
25
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
26
+ if (ti_is_const(t1)) {
27
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
28
+ do_constant_folding(op->opc, ctx->type,
29
+ ti_const_val(t1),
30
+ op->args[2]));
31
}
32
33
- z_mask = arg_info(op->args[1])->z_mask;
34
-
35
+ z_mask = t1->z_mask;
36
switch (op->opc) {
37
case INDEX_op_bswap16_i32:
38
case INDEX_op_bswap16_i64:
39
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
40
/* If the sign bit may be 1, force all the bits above to 1. */
41
if (z_mask & sign) {
42
z_mask |= sign;
43
- s_mask = sign << 1;
44
}
45
+ /* The value and therefore s_mask is explicitly sign-extended. */
46
+ s_mask = sign;
47
break;
48
default:
49
/* The high bits are undefined: force all bits above the sign to 1. */
50
z_mask |= sign << 1;
51
break;
52
}
53
- ctx->z_mask = z_mask;
54
- ctx->s_mask = s_mask;
55
56
- return fold_masks(ctx, op);
57
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
58
}
59
60
static bool fold_call(OptContext *ctx, TCGOp *op)
61
--
62
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Compute s_mask from the union of the maximum count and the
3
op2 fallback for op1 being zero.
1
4
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 15 ++++++++++-----
9
1 file changed, 10 insertions(+), 5 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
16
17
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
18
{
19
- uint64_t z_mask;
20
+ uint64_t z_mask, s_mask;
21
+ TempOptInfo *t1 = arg_info(op->args[1]);
22
+ TempOptInfo *t2 = arg_info(op->args[2]);
23
24
- if (arg_is_const(op->args[1])) {
25
- uint64_t t = arg_info(op->args[1])->val;
26
+ if (ti_is_const(t1)) {
27
+ uint64_t t = ti_const_val(t1);
28
29
if (t != 0) {
30
t = do_constant_folding(op->opc, ctx->type, t, 0);
31
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
32
default:
33
g_assert_not_reached();
34
}
35
- ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
36
- return false;
37
+ s_mask = ~z_mask;
38
+ z_mask |= t2->z_mask;
39
+ s_mask &= t2->s_mask;
40
+
41
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
42
}
43
44
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
45
--
46
2.43.0
diff view generated by jsdifflib
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
1
Add fold_masks_z as a trivial wrapper around fold_masks_zs.
2
Avoid the use of the OptContext slots.
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
6
---
4
include/tcg/tcg-op.h | 5 +++++
7
tcg/optimize.c | 13 ++++++++++---
5
1 file changed, 5 insertions(+)
8
1 file changed, 10 insertions(+), 3 deletions(-)
6
9
7
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
9
--- a/include/tcg/tcg-op.h
12
--- a/tcg/optimize.c
10
+++ b/include/tcg/tcg-op.h
13
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_mov_ptr(TCGv_ptr d, TCGv_ptr s)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
12
glue(tcg_gen_mov_,PTR)((NAT)d, (NAT)s);
15
return true;
13
}
16
}
14
17
15
+static inline void tcg_gen_movi_ptr(TCGv_ptr d, intptr_t s)
18
+static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
16
+{
19
+{
17
+ glue(tcg_gen_movi_,PTR)((NAT)d, s);
20
+ return fold_masks_zs(ctx, op, z_mask, 0);
18
+}
21
+}
19
+
22
+
20
static inline void tcg_gen_brcondi_ptr(TCGCond cond, TCGv_ptr a,
23
static bool fold_masks(OptContext *ctx, TCGOp *op)
21
intptr_t b, TCGLabel *label)
22
{
24
{
25
return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
26
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
27
28
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
29
{
30
+ uint64_t z_mask;
31
+
32
if (fold_const1(ctx, op)) {
33
return true;
34
}
35
36
switch (ctx->type) {
37
case TCG_TYPE_I32:
38
- ctx->z_mask = 32 | 31;
39
+ z_mask = 32 | 31;
40
break;
41
case TCG_TYPE_I64:
42
- ctx->z_mask = 64 | 63;
43
+ z_mask = 64 | 63;
44
break;
45
default:
46
g_assert_not_reached();
47
}
48
- return false;
49
+ return fold_masks_z(ctx, op, z_mask);
50
}
51
52
static bool fold_deposit(OptContext *ctx, TCGOp *op)
23
--
53
--
24
2.34.1
54
2.43.0
25
26
diff view generated by jsdifflib
1
All of these have obvious and quite local scope.
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
When we fold to and, use fold_and.
2
3
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
---
6
tcg/tcg-op-gvec.c | 186 ++++++++++++++++-----------------
7
tcg/optimize.c | 35 +++++++++++++++++------------------
7
tcg/tcg-op.c | 258 +++++++++++++++++++++++-----------------------
8
1 file changed, 17 insertions(+), 18 deletions(-)
8
tcg/tcg.c | 2 +-
9
3 files changed, 223 insertions(+), 223 deletions(-)
10
9
11
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/tcg-op-gvec.c
12
--- a/tcg/optimize.c
14
+++ b/tcg/tcg-op-gvec.c
13
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
14
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
16
TCGv_ptr a0, a1;
15
17
TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
16
static bool fold_deposit(OptContext *ctx, TCGOp *op)
18
17
{
19
- a0 = tcg_temp_new_ptr();
18
+ TempOptInfo *t1 = arg_info(op->args[1]);
20
- a1 = tcg_temp_new_ptr();
19
+ TempOptInfo *t2 = arg_info(op->args[2]);
21
+ a0 = tcg_temp_ebb_new_ptr();
20
+ int ofs = op->args[3];
22
+ a1 = tcg_temp_ebb_new_ptr();
21
+ int len = op->args[4];
23
22
TCGOpcode and_opc;
24
tcg_gen_addi_ptr(a0, cpu_env, dofs);
23
+ uint64_t z_mask;
25
tcg_gen_addi_ptr(a1, cpu_env, aofs);
24
26
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c,
25
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
27
TCGv_ptr a0, a1;
26
- uint64_t t1 = arg_info(op->args[1])->val;
28
TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
27
- uint64_t t2 = arg_info(op->args[2])->val;
29
28
-
30
- a0 = tcg_temp_new_ptr();
29
- t1 = deposit64(t1, op->args[3], op->args[4], t2);
31
- a1 = tcg_temp_new_ptr();
30
- return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
32
+ a0 = tcg_temp_ebb_new_ptr();
31
+ if (ti_is_const(t1) && ti_is_const(t2)) {
33
+ a1 = tcg_temp_ebb_new_ptr();
32
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
34
33
+ deposit64(ti_const_val(t1), ofs, len,
35
tcg_gen_addi_ptr(a0, cpu_env, dofs);
34
+ ti_const_val(t2)));
36
tcg_gen_addi_ptr(a1, cpu_env, aofs);
37
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
38
TCGv_ptr a0, a1, a2;
39
TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
40
41
- a0 = tcg_temp_new_ptr();
42
- a1 = tcg_temp_new_ptr();
43
- a2 = tcg_temp_new_ptr();
44
+ a0 = tcg_temp_ebb_new_ptr();
45
+ a1 = tcg_temp_ebb_new_ptr();
46
+ a2 = tcg_temp_ebb_new_ptr();
47
48
tcg_gen_addi_ptr(a0, cpu_env, dofs);
49
tcg_gen_addi_ptr(a1, cpu_env, aofs);
50
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
51
TCGv_ptr a0, a1, a2, a3;
52
TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
53
54
- a0 = tcg_temp_new_ptr();
55
- a1 = tcg_temp_new_ptr();
56
- a2 = tcg_temp_new_ptr();
57
- a3 = tcg_temp_new_ptr();
58
+ a0 = tcg_temp_ebb_new_ptr();
59
+ a1 = tcg_temp_ebb_new_ptr();
60
+ a2 = tcg_temp_ebb_new_ptr();
61
+ a3 = tcg_temp_ebb_new_ptr();
62
63
tcg_gen_addi_ptr(a0, cpu_env, dofs);
64
tcg_gen_addi_ptr(a1, cpu_env, aofs);
65
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_5_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
66
TCGv_ptr a0, a1, a2, a3, a4;
67
TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
68
69
- a0 = tcg_temp_new_ptr();
70
- a1 = tcg_temp_new_ptr();
71
- a2 = tcg_temp_new_ptr();
72
- a3 = tcg_temp_new_ptr();
73
- a4 = tcg_temp_new_ptr();
74
+ a0 = tcg_temp_ebb_new_ptr();
75
+ a1 = tcg_temp_ebb_new_ptr();
76
+ a2 = tcg_temp_ebb_new_ptr();
77
+ a3 = tcg_temp_ebb_new_ptr();
78
+ a4 = tcg_temp_ebb_new_ptr();
79
80
tcg_gen_addi_ptr(a0, cpu_env, dofs);
81
tcg_gen_addi_ptr(a1, cpu_env, aofs);
82
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2_ptr(uint32_t dofs, uint32_t aofs,
83
TCGv_ptr a0, a1;
84
TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
85
86
- a0 = tcg_temp_new_ptr();
87
- a1 = tcg_temp_new_ptr();
88
+ a0 = tcg_temp_ebb_new_ptr();
89
+ a1 = tcg_temp_ebb_new_ptr();
90
91
tcg_gen_addi_ptr(a0, cpu_env, dofs);
92
tcg_gen_addi_ptr(a1, cpu_env, aofs);
93
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
94
TCGv_ptr a0, a1, a2;
95
TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
96
97
- a0 = tcg_temp_new_ptr();
98
- a1 = tcg_temp_new_ptr();
99
- a2 = tcg_temp_new_ptr();
100
+ a0 = tcg_temp_ebb_new_ptr();
101
+ a1 = tcg_temp_ebb_new_ptr();
102
+ a2 = tcg_temp_ebb_new_ptr();
103
104
tcg_gen_addi_ptr(a0, cpu_env, dofs);
105
tcg_gen_addi_ptr(a1, cpu_env, aofs);
106
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
107
TCGv_ptr a0, a1, a2, a3;
108
TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
109
110
- a0 = tcg_temp_new_ptr();
111
- a1 = tcg_temp_new_ptr();
112
- a2 = tcg_temp_new_ptr();
113
- a3 = tcg_temp_new_ptr();
114
+ a0 = tcg_temp_ebb_new_ptr();
115
+ a1 = tcg_temp_ebb_new_ptr();
116
+ a2 = tcg_temp_ebb_new_ptr();
117
+ a3 = tcg_temp_ebb_new_ptr();
118
119
tcg_gen_addi_ptr(a0, cpu_env, dofs);
120
tcg_gen_addi_ptr(a1, cpu_env, aofs);
121
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
122
TCGv_ptr a0, a1, a2, a3, a4;
123
TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
124
125
- a0 = tcg_temp_new_ptr();
126
- a1 = tcg_temp_new_ptr();
127
- a2 = tcg_temp_new_ptr();
128
- a3 = tcg_temp_new_ptr();
129
- a4 = tcg_temp_new_ptr();
130
+ a0 = tcg_temp_ebb_new_ptr();
131
+ a1 = tcg_temp_ebb_new_ptr();
132
+ a2 = tcg_temp_ebb_new_ptr();
133
+ a3 = tcg_temp_ebb_new_ptr();
134
+ a4 = tcg_temp_ebb_new_ptr();
135
136
tcg_gen_addi_ptr(a0, cpu_env, dofs);
137
tcg_gen_addi_ptr(a1, cpu_env, aofs);
138
@@ -XXX,XX +XXX,XX @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
139
be simple enough. */
140
if (TCG_TARGET_REG_BITS == 64
141
&& (vece != MO_32 || !check_size_impl(oprsz, 4))) {
142
- t_64 = tcg_temp_new_i64();
143
+ t_64 = tcg_temp_ebb_new_i64();
144
tcg_gen_extu_i32_i64(t_64, in_32);
145
tcg_gen_dup_i64(vece, t_64, t_64);
146
} else {
147
- t_32 = tcg_temp_new_i32();
148
+ t_32 = tcg_temp_ebb_new_i32();
149
tcg_gen_dup_i32(vece, t_32, in_32);
150
}
151
} else if (in_64) {
152
/* We are given a 64-bit variable input. */
153
- t_64 = tcg_temp_new_i64();
154
+ t_64 = tcg_temp_ebb_new_i64();
155
tcg_gen_dup_i64(vece, t_64, in_64);
156
} else {
157
/* We are given a constant input. */
158
@@ -XXX,XX +XXX,XX @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
159
}
35
}
160
36
161
/* Otherwise implement out of line. */
37
switch (ctx->type) {
162
- t_ptr = tcg_temp_new_ptr();
38
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
163
+ t_ptr = tcg_temp_ebb_new_ptr();
164
tcg_gen_addi_ptr(t_ptr, cpu_env, dofs);
165
166
/*
167
@@ -XXX,XX +XXX,XX @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
168
if (in_32) {
169
t_val = in_32;
170
} else if (in_64) {
171
- t_val = tcg_temp_new_i32();
172
+ t_val = tcg_temp_ebb_new_i32();
173
tcg_gen_extrl_i64_i32(t_val, in_64);
174
} else {
175
t_val = tcg_constant_i32(in_c);
176
@@ -XXX,XX +XXX,XX @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
177
if (in_32) {
178
fns[vece](t_ptr, t_desc, in_32);
179
} else if (in_64) {
180
- t_32 = tcg_temp_new_i32();
181
+ t_32 = tcg_temp_ebb_new_i32();
182
tcg_gen_extrl_i64_i32(t_32, in_64);
183
fns[vece](t_ptr, t_desc, t_32);
184
tcg_temp_free_i32(t_32);
185
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
186
do_dup_store(type, dofs, oprsz, maxsz, t_vec);
187
tcg_temp_free_vec(t_vec);
188
} else if (vece <= MO_32) {
189
- TCGv_i32 in = tcg_temp_new_i32();
190
+ TCGv_i32 in = tcg_temp_ebb_new_i32();
191
switch (vece) {
192
case MO_8:
193
tcg_gen_ld8u_i32(in, cpu_env, aofs);
194
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
195
do_dup(vece, dofs, oprsz, maxsz, in, NULL, 0);
196
tcg_temp_free_i32(in);
197
} else {
198
- TCGv_i64 in = tcg_temp_new_i64();
199
+ TCGv_i64 in = tcg_temp_ebb_new_i64();
200
tcg_gen_ld_i64(in, cpu_env, aofs);
201
do_dup(vece, dofs, oprsz, maxsz, NULL, in, 0);
202
tcg_temp_free_i64(in);
203
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
204
}
205
tcg_temp_free_vec(in);
206
} else {
207
- TCGv_i64 in0 = tcg_temp_new_i64();
208
- TCGv_i64 in1 = tcg_temp_new_i64();
209
+ TCGv_i64 in0 = tcg_temp_ebb_new_i64();
210
+ TCGv_i64 in1 = tcg_temp_ebb_new_i64();
211
212
tcg_gen_ld_i64(in0, cpu_env, aofs);
213
tcg_gen_ld_i64(in1, cpu_env, aofs + 8);
214
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
215
int j;
216
217
for (j = 0; j < 4; ++j) {
218
- in[j] = tcg_temp_new_i64();
219
+ in[j] = tcg_temp_ebb_new_i64();
220
tcg_gen_ld_i64(in[j], cpu_env, aofs + j * 8);
221
}
222
for (i = (aofs == dofs) * 32; i < oprsz; i += 32) {
223
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs,
224
the 64-bit operation. */
225
static void gen_addv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)
226
{
227
- TCGv_i64 t1 = tcg_temp_new_i64();
228
- TCGv_i64 t2 = tcg_temp_new_i64();
229
- TCGv_i64 t3 = tcg_temp_new_i64();
230
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
231
+ TCGv_i64 t2 = tcg_temp_ebb_new_i64();
232
+ TCGv_i64 t3 = tcg_temp_ebb_new_i64();
233
234
tcg_gen_andc_i64(t1, a, m);
235
tcg_gen_andc_i64(t2, b, m);
236
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
237
void tcg_gen_vec_add8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
238
{
239
TCGv_i32 m = tcg_constant_i32((int32_t)dup_const(MO_8, 0x80));
240
- TCGv_i32 t1 = tcg_temp_new_i32();
241
- TCGv_i32 t2 = tcg_temp_new_i32();
242
- TCGv_i32 t3 = tcg_temp_new_i32();
243
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
244
+ TCGv_i32 t2 = tcg_temp_ebb_new_i32();
245
+ TCGv_i32 t3 = tcg_temp_ebb_new_i32();
246
247
tcg_gen_andc_i32(t1, a, m);
248
tcg_gen_andc_i32(t2, b, m);
249
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
250
251
void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
252
{
253
- TCGv_i32 t1 = tcg_temp_new_i32();
254
- TCGv_i32 t2 = tcg_temp_new_i32();
255
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
256
+ TCGv_i32 t2 = tcg_temp_ebb_new_i32();
257
258
tcg_gen_andi_i32(t1, a, ~0xffff);
259
tcg_gen_add_i32(t2, a, b);
260
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
261
262
void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
263
{
264
- TCGv_i64 t1 = tcg_temp_new_i64();
265
- TCGv_i64 t2 = tcg_temp_new_i64();
266
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
267
+ TCGv_i64 t2 = tcg_temp_ebb_new_i64();
268
269
tcg_gen_andi_i64(t1, a, ~0xffffffffull);
270
tcg_gen_add_i64(t2, a, b);
271
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_subs(unsigned vece, uint32_t dofs, uint32_t aofs,
272
Compare gen_addv_mask above. */
273
static void gen_subv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)
274
{
275
- TCGv_i64 t1 = tcg_temp_new_i64();
276
- TCGv_i64 t2 = tcg_temp_new_i64();
277
- TCGv_i64 t3 = tcg_temp_new_i64();
278
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
279
+ TCGv_i64 t2 = tcg_temp_ebb_new_i64();
280
+ TCGv_i64 t3 = tcg_temp_ebb_new_i64();
281
282
tcg_gen_or_i64(t1, a, m);
283
tcg_gen_andc_i64(t2, b, m);
284
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
285
void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
286
{
287
TCGv_i32 m = tcg_constant_i32((int32_t)dup_const(MO_8, 0x80));
288
- TCGv_i32 t1 = tcg_temp_new_i32();
289
- TCGv_i32 t2 = tcg_temp_new_i32();
290
- TCGv_i32 t3 = tcg_temp_new_i32();
291
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
292
+ TCGv_i32 t2 = tcg_temp_ebb_new_i32();
293
+ TCGv_i32 t3 = tcg_temp_ebb_new_i32();
294
295
tcg_gen_or_i32(t1, a, m);
296
tcg_gen_andc_i32(t2, b, m);
297
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
298
299
void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
300
{
301
- TCGv_i32 t1 = tcg_temp_new_i32();
302
- TCGv_i32 t2 = tcg_temp_new_i32();
303
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
304
+ TCGv_i32 t2 = tcg_temp_ebb_new_i32();
305
306
tcg_gen_andi_i32(t1, b, ~0xffff);
307
tcg_gen_sub_i32(t2, a, b);
308
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
309
310
void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
311
{
312
- TCGv_i64 t1 = tcg_temp_new_i64();
313
- TCGv_i64 t2 = tcg_temp_new_i64();
314
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
315
+ TCGv_i64 t2 = tcg_temp_ebb_new_i64();
316
317
tcg_gen_andi_i64(t1, b, ~0xffffffffull);
318
tcg_gen_sub_i64(t2, a, b);
319
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_umax(unsigned vece, uint32_t dofs, uint32_t aofs,
320
Compare gen_subv_mask above. */
321
static void gen_negv_mask(TCGv_i64 d, TCGv_i64 b, TCGv_i64 m)
322
{
323
- TCGv_i64 t2 = tcg_temp_new_i64();
324
- TCGv_i64 t3 = tcg_temp_new_i64();
325
+ TCGv_i64 t2 = tcg_temp_ebb_new_i64();
326
+ TCGv_i64 t3 = tcg_temp_ebb_new_i64();
327
328
tcg_gen_andc_i64(t3, m, b);
329
tcg_gen_andc_i64(t2, b, m);
330
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_neg16_i64(TCGv_i64 d, TCGv_i64 b)
331
332
void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 b)
333
{
334
- TCGv_i64 t1 = tcg_temp_new_i64();
335
- TCGv_i64 t2 = tcg_temp_new_i64();
336
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
337
+ TCGv_i64 t2 = tcg_temp_ebb_new_i64();
338
339
tcg_gen_andi_i64(t1, b, ~0xffffffffull);
340
tcg_gen_neg_i64(t2, b);
341
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_neg(unsigned vece, uint32_t dofs, uint32_t aofs,
342
343
static void gen_absv_mask(TCGv_i64 d, TCGv_i64 b, unsigned vece)
344
{
345
- TCGv_i64 t = tcg_temp_new_i64();
346
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
347
int nbit = 8 << vece;
348
349
/* Create -1 for each negative element. */
350
@@ -XXX,XX +XXX,XX @@ static const GVecGen2s gop_ands = {
351
void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
352
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
353
{
354
- TCGv_i64 tmp = tcg_temp_new_i64();
355
+ TCGv_i64 tmp = tcg_temp_ebb_new_i64();
356
tcg_gen_dup_i64(vece, tmp, c);
357
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
358
tcg_temp_free_i64(tmp);
359
@@ -XXX,XX +XXX,XX @@ static const GVecGen2s gop_xors = {
360
void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
361
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
362
{
363
- TCGv_i64 tmp = tcg_temp_new_i64();
364
+ TCGv_i64 tmp = tcg_temp_ebb_new_i64();
365
tcg_gen_dup_i64(vece, tmp, c);
366
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_xors);
367
tcg_temp_free_i64(tmp);
368
@@ -XXX,XX +XXX,XX @@ static const GVecGen2s gop_ors = {
369
void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
370
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
371
{
372
- TCGv_i64 tmp = tcg_temp_new_i64();
373
+ TCGv_i64 tmp = tcg_temp_ebb_new_i64();
374
tcg_gen_dup_i64(vece, tmp, c);
375
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ors);
376
tcg_temp_free_i64(tmp);
377
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
378
{
379
uint64_t s_mask = dup_const(MO_8, 0x80 >> c);
380
uint64_t c_mask = dup_const(MO_8, 0xff >> c);
381
- TCGv_i64 s = tcg_temp_new_i64();
382
+ TCGv_i64 s = tcg_temp_ebb_new_i64();
383
384
tcg_gen_shri_i64(d, a, c);
385
tcg_gen_andi_i64(s, d, s_mask); /* isolate (shifted) sign bit */
386
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
387
{
388
uint64_t s_mask = dup_const(MO_16, 0x8000 >> c);
389
uint64_t c_mask = dup_const(MO_16, 0xffff >> c);
390
- TCGv_i64 s = tcg_temp_new_i64();
391
+ TCGv_i64 s = tcg_temp_ebb_new_i64();
392
393
tcg_gen_shri_i64(d, a, c);
394
tcg_gen_andi_i64(s, d, s_mask); /* isolate (shifted) sign bit */
395
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
396
{
397
uint32_t s_mask = dup_const(MO_8, 0x80 >> c);
398
uint32_t c_mask = dup_const(MO_8, 0xff >> c);
399
- TCGv_i32 s = tcg_temp_new_i32();
400
+ TCGv_i32 s = tcg_temp_ebb_new_i32();
401
402
tcg_gen_shri_i32(d, a, c);
403
tcg_gen_andi_i32(s, d, s_mask); /* isolate (shifted) sign bit */
404
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
405
{
406
uint32_t s_mask = dup_const(MO_16, 0x8000 >> c);
407
uint32_t c_mask = dup_const(MO_16, 0xffff >> c);
408
- TCGv_i32 s = tcg_temp_new_i32();
409
+ TCGv_i32 s = tcg_temp_ebb_new_i32();
410
411
tcg_gen_shri_i32(d, a, c);
412
tcg_gen_andi_i32(s, d, s_mask); /* isolate (shifted) sign bit */
413
@@ -XXX,XX +XXX,XX @@ do_gvec_shifts(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 shift,
414
TCGv_vec v_shift = tcg_temp_new_vec(type);
415
416
if (vece == MO_64) {
417
- TCGv_i64 sh64 = tcg_temp_new_i64();
418
+ TCGv_i64 sh64 = tcg_temp_ebb_new_i64();
419
tcg_gen_extu_i32_i64(sh64, shift);
420
tcg_gen_dup_i64_vec(MO_64, v_shift, sh64);
421
tcg_temp_free_i64(sh64);
422
@@ -XXX,XX +XXX,XX @@ do_gvec_shifts(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 shift,
423
if (vece == MO_32 && check_size_impl(oprsz, 4)) {
424
expand_2s_i32(dofs, aofs, oprsz, shift, false, g->fni4);
425
} else if (vece == MO_64 && check_size_impl(oprsz, 8)) {
426
- TCGv_i64 sh64 = tcg_temp_new_i64();
427
+ TCGv_i64 sh64 = tcg_temp_ebb_new_i64();
428
tcg_gen_extu_i32_i64(sh64, shift);
429
expand_2s_i64(dofs, aofs, oprsz, sh64, false, g->fni8);
430
tcg_temp_free_i64(sh64);
431
} else {
432
- TCGv_ptr a0 = tcg_temp_new_ptr();
433
- TCGv_ptr a1 = tcg_temp_new_ptr();
434
- TCGv_i32 desc = tcg_temp_new_i32();
435
+ TCGv_ptr a0 = tcg_temp_ebb_new_ptr();
436
+ TCGv_ptr a1 = tcg_temp_ebb_new_ptr();
437
+ TCGv_i32 desc = tcg_temp_ebb_new_i32();
438
439
tcg_gen_shli_i32(desc, shift, SIMD_DATA_SHIFT);
440
tcg_gen_ori_i32(desc, desc, simd_desc(oprsz, maxsz, 0));
441
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_shlv_mod_vec(unsigned vece, TCGv_vec d,
442
443
static void tcg_gen_shl_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
444
{
445
- TCGv_i32 t = tcg_temp_new_i32();
446
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
447
448
tcg_gen_andi_i32(t, b, 31);
449
tcg_gen_shl_i32(d, a, t);
450
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_shl_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
451
452
static void tcg_gen_shl_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
453
{
454
- TCGv_i64 t = tcg_temp_new_i64();
455
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
456
457
tcg_gen_andi_i64(t, b, 63);
458
tcg_gen_shl_i64(d, a, t);
459
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_shrv_mod_vec(unsigned vece, TCGv_vec d,
460
461
static void tcg_gen_shr_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
462
{
463
- TCGv_i32 t = tcg_temp_new_i32();
464
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
465
466
tcg_gen_andi_i32(t, b, 31);
467
tcg_gen_shr_i32(d, a, t);
468
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_shr_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
469
470
static void tcg_gen_shr_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
471
{
472
- TCGv_i64 t = tcg_temp_new_i64();
473
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
474
475
tcg_gen_andi_i64(t, b, 63);
476
tcg_gen_shr_i64(d, a, t);
477
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_sarv_mod_vec(unsigned vece, TCGv_vec d,
478
479
static void tcg_gen_sar_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
480
{
481
- TCGv_i32 t = tcg_temp_new_i32();
482
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
483
484
tcg_gen_andi_i32(t, b, 31);
485
tcg_gen_sar_i32(d, a, t);
486
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_sar_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
487
488
static void tcg_gen_sar_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
489
{
490
- TCGv_i64 t = tcg_temp_new_i64();
491
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
492
493
tcg_gen_andi_i64(t, b, 63);
494
tcg_gen_sar_i64(d, a, t);
495
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_rotlv_mod_vec(unsigned vece, TCGv_vec d,
496
497
static void tcg_gen_rotl_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
498
{
499
- TCGv_i32 t = tcg_temp_new_i32();
500
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
501
502
tcg_gen_andi_i32(t, b, 31);
503
tcg_gen_rotl_i32(d, a, t);
504
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_rotl_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
505
506
static void tcg_gen_rotl_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
507
{
508
- TCGv_i64 t = tcg_temp_new_i64();
509
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
510
511
tcg_gen_andi_i64(t, b, 63);
512
tcg_gen_rotl_i64(d, a, t);
513
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_rotrv_mod_vec(unsigned vece, TCGv_vec d,
514
515
static void tcg_gen_rotr_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
516
{
517
- TCGv_i32 t = tcg_temp_new_i32();
518
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
519
520
tcg_gen_andi_i32(t, b, 31);
521
tcg_gen_rotr_i32(d, a, t);
522
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_rotr_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
523
524
static void tcg_gen_rotr_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
525
{
526
- TCGv_i64 t = tcg_temp_new_i64();
527
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
528
529
tcg_gen_andi_i64(t, b, 63);
530
tcg_gen_rotr_i64(d, a, t);
531
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
532
static void expand_cmp_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
533
uint32_t oprsz, TCGCond cond)
534
{
535
- TCGv_i32 t0 = tcg_temp_new_i32();
536
- TCGv_i32 t1 = tcg_temp_new_i32();
537
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
538
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
539
uint32_t i;
540
541
for (i = 0; i < oprsz; i += 4) {
542
@@ -XXX,XX +XXX,XX @@ static void expand_cmp_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
543
static void expand_cmp_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
544
uint32_t oprsz, TCGCond cond)
545
{
546
- TCGv_i64 t0 = tcg_temp_new_i64();
547
- TCGv_i64 t1 = tcg_temp_new_i64();
548
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
549
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
550
uint32_t i;
551
552
for (i = 0; i < oprsz; i += 8) {
553
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
554
555
static void tcg_gen_bitsel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
556
{
557
- TCGv_i64 t = tcg_temp_new_i64();
558
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
559
560
tcg_gen_and_i64(t, b, a);
561
tcg_gen_andc_i64(d, c, a);
562
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
563
index XXXXXXX..XXXXXXX 100644
564
--- a/tcg/tcg-op.c
565
+++ b/tcg/tcg-op.c
566
@@ -XXX,XX +XXX,XX @@ void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
567
if (TCG_TARGET_HAS_div_i32) {
568
tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2);
569
} else if (TCG_TARGET_HAS_div2_i32) {
570
- TCGv_i32 t0 = tcg_temp_new_i32();
571
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
572
tcg_gen_sari_i32(t0, arg1, 31);
573
tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2);
574
tcg_temp_free_i32(t0);
575
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
576
if (TCG_TARGET_HAS_rem_i32) {
577
tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2);
578
} else if (TCG_TARGET_HAS_div_i32) {
579
- TCGv_i32 t0 = tcg_temp_new_i32();
580
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
581
tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2);
582
tcg_gen_mul_i32(t0, t0, arg2);
583
tcg_gen_sub_i32(ret, arg1, t0);
584
tcg_temp_free_i32(t0);
585
} else if (TCG_TARGET_HAS_div2_i32) {
586
- TCGv_i32 t0 = tcg_temp_new_i32();
587
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
588
tcg_gen_sari_i32(t0, arg1, 31);
589
tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2);
590
tcg_temp_free_i32(t0);
591
@@ -XXX,XX +XXX,XX @@ void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
592
if (TCG_TARGET_HAS_div_i32) {
593
tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2);
594
} else if (TCG_TARGET_HAS_div2_i32) {
595
- TCGv_i32 t0 = tcg_temp_new_i32();
596
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
597
tcg_gen_movi_i32(t0, 0);
598
tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2);
599
tcg_temp_free_i32(t0);
600
@@ -XXX,XX +XXX,XX @@ void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
601
if (TCG_TARGET_HAS_rem_i32) {
602
tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2);
603
} else if (TCG_TARGET_HAS_div_i32) {
604
- TCGv_i32 t0 = tcg_temp_new_i32();
605
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
606
tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2);
607
tcg_gen_mul_i32(t0, t0, arg2);
608
tcg_gen_sub_i32(ret, arg1, t0);
609
tcg_temp_free_i32(t0);
610
} else if (TCG_TARGET_HAS_div2_i32) {
611
- TCGv_i32 t0 = tcg_temp_new_i32();
612
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
613
tcg_gen_movi_i32(t0, 0);
614
tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
615
tcg_temp_free_i32(t0);
616
@@ -XXX,XX +XXX,XX @@ void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
617
if (TCG_TARGET_HAS_andc_i32) {
618
tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2);
619
} else {
620
- TCGv_i32 t0 = tcg_temp_new_i32();
621
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
622
tcg_gen_not_i32(t0, arg2);
623
tcg_gen_and_i32(ret, arg1, t0);
624
tcg_temp_free_i32(t0);
625
@@ -XXX,XX +XXX,XX @@ void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
626
if (TCG_TARGET_HAS_orc_i32) {
627
tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2);
628
} else {
629
- TCGv_i32 t0 = tcg_temp_new_i32();
630
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
631
tcg_gen_not_i32(t0, arg2);
632
tcg_gen_or_i32(ret, arg1, t0);
633
tcg_temp_free_i32(t0);
634
@@ -XXX,XX +XXX,XX @@ void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
635
if (TCG_TARGET_HAS_clz_i32) {
636
tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2);
637
} else if (TCG_TARGET_HAS_clz_i64) {
638
- TCGv_i64 t1 = tcg_temp_new_i64();
639
- TCGv_i64 t2 = tcg_temp_new_i64();
640
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
641
+ TCGv_i64 t2 = tcg_temp_ebb_new_i64();
642
tcg_gen_extu_i32_i64(t1, arg1);
643
tcg_gen_extu_i32_i64(t2, arg2);
644
tcg_gen_addi_i64(t2, t2, 32);
645
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
646
if (TCG_TARGET_HAS_ctz_i32) {
647
tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2);
648
} else if (TCG_TARGET_HAS_ctz_i64) {
649
- TCGv_i64 t1 = tcg_temp_new_i64();
650
- TCGv_i64 t2 = tcg_temp_new_i64();
651
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
652
+ TCGv_i64 t2 = tcg_temp_ebb_new_i64();
653
tcg_gen_extu_i32_i64(t1, arg1);
654
tcg_gen_extu_i32_i64(t2, arg2);
655
tcg_gen_ctz_i64(t1, t1, t2);
656
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
657
|| TCG_TARGET_HAS_ctpop_i64
658
|| TCG_TARGET_HAS_clz_i32
659
|| TCG_TARGET_HAS_clz_i64) {
660
- TCGv_i32 z, t = tcg_temp_new_i32();
661
+ TCGv_i32 z, t = tcg_temp_ebb_new_i32();
662
663
if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) {
664
tcg_gen_subi_i32(t, arg1, 1);
665
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
666
{
667
if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) {
668
/* This equivalence has the advantage of not requiring a fixup. */
669
- TCGv_i32 t = tcg_temp_new_i32();
670
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
671
tcg_gen_subi_i32(t, arg1, 1);
672
tcg_gen_andc_i32(t, t, arg1);
673
tcg_gen_ctpop_i32(ret, t);
674
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
675
void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg)
676
{
677
if (TCG_TARGET_HAS_clz_i32) {
678
- TCGv_i32 t = tcg_temp_new_i32();
679
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
680
tcg_gen_sari_i32(t, arg, 31);
681
tcg_gen_xor_i32(t, t, arg);
682
tcg_gen_clzi_i32(t, t, 32);
683
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1)
684
if (TCG_TARGET_HAS_ctpop_i32) {
685
tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1);
686
} else if (TCG_TARGET_HAS_ctpop_i64) {
687
- TCGv_i64 t = tcg_temp_new_i64();
688
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
689
tcg_gen_extu_i32_i64(t, arg1);
690
tcg_gen_ctpop_i64(t, t);
691
tcg_gen_extrl_i64_i32(ret, t);
692
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
693
} else {
694
TCGv_i32 t0, t1;
695
696
- t0 = tcg_temp_new_i32();
697
- t1 = tcg_temp_new_i32();
698
+ t0 = tcg_temp_ebb_new_i32();
699
+ t1 = tcg_temp_ebb_new_i32();
700
tcg_gen_shl_i32(t0, arg1, arg2);
701
tcg_gen_subfi_i32(t1, 32, arg2);
702
tcg_gen_shr_i32(t1, arg1, t1);
703
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
704
tcg_gen_rotl_i32(ret, arg1, tcg_constant_i32(arg2));
705
} else {
706
TCGv_i32 t0, t1;
707
- t0 = tcg_temp_new_i32();
708
- t1 = tcg_temp_new_i32();
709
+ t0 = tcg_temp_ebb_new_i32();
710
+ t1 = tcg_temp_ebb_new_i32();
711
tcg_gen_shli_i32(t0, arg1, arg2);
712
tcg_gen_shri_i32(t1, arg1, 32 - arg2);
713
tcg_gen_or_i32(ret, t0, t1);
714
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
715
} else {
716
TCGv_i32 t0, t1;
717
718
- t0 = tcg_temp_new_i32();
719
- t1 = tcg_temp_new_i32();
720
+ t0 = tcg_temp_ebb_new_i32();
721
+ t1 = tcg_temp_ebb_new_i32();
722
tcg_gen_shr_i32(t0, arg1, arg2);
723
tcg_gen_subfi_i32(t1, 32, arg2);
724
tcg_gen_shl_i32(t1, arg1, t1);
725
@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
726
return;
727
}
39
}
728
40
729
- t1 = tcg_temp_new_i32();
41
/* Inserting a value into zero at offset 0. */
730
+ t1 = tcg_temp_ebb_new_i32();
42
- if (arg_is_const_val(op->args[1], 0) && op->args[3] == 0) {
731
43
- uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
732
if (TCG_TARGET_HAS_extract2_i32) {
44
+ if (ti_is_const_val(t1, 0) && ofs == 0) {
733
if (ofs + len == 32) {
45
+ uint64_t mask = MAKE_64BIT_MASK(0, len);
734
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah,
46
735
} else if (TCG_TARGET_HAS_extract2_i32) {
47
op->opc = and_opc;
736
tcg_gen_op4i_i32(INDEX_op_extract2_i32, ret, al, ah, ofs);
48
op->args[1] = op->args[2];
737
} else {
49
op->args[2] = arg_new_constant(ctx, mask);
738
- TCGv_i32 t0 = tcg_temp_new_i32();
50
- ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
739
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
51
- return false;
740
tcg_gen_shri_i32(t0, al, ofs);
52
+ return fold_and(ctx, op);
741
tcg_gen_deposit_i32(ret, t0, ah, 32 - ofs, ofs);
742
tcg_temp_free_i32(t0);
743
@@ -XXX,XX +XXX,XX @@ void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
744
} else if (TCG_TARGET_HAS_movcond_i32) {
745
tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
746
} else {
747
- TCGv_i32 t0 = tcg_temp_new_i32();
748
- TCGv_i32 t1 = tcg_temp_new_i32();
749
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
750
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
751
tcg_gen_setcond_i32(cond, t0, c1, c2);
752
tcg_gen_neg_i32(t0, t0);
753
tcg_gen_and_i32(t1, v1, t0);
754
@@ -XXX,XX +XXX,XX @@ void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
755
if (TCG_TARGET_HAS_add2_i32) {
756
tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh);
757
} else {
758
- TCGv_i64 t0 = tcg_temp_new_i64();
759
- TCGv_i64 t1 = tcg_temp_new_i64();
760
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
761
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
762
tcg_gen_concat_i32_i64(t0, al, ah);
763
tcg_gen_concat_i32_i64(t1, bl, bh);
764
tcg_gen_add_i64(t0, t0, t1);
765
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
766
if (TCG_TARGET_HAS_sub2_i32) {
767
tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh);
768
} else {
769
- TCGv_i64 t0 = tcg_temp_new_i64();
770
- TCGv_i64 t1 = tcg_temp_new_i64();
771
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
772
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
773
tcg_gen_concat_i32_i64(t0, al, ah);
774
tcg_gen_concat_i32_i64(t1, bl, bh);
775
tcg_gen_sub_i64(t0, t0, t1);
776
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
777
if (TCG_TARGET_HAS_mulu2_i32) {
778
tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
779
} else if (TCG_TARGET_HAS_muluh_i32) {
780
- TCGv_i32 t = tcg_temp_new_i32();
781
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
782
tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
783
tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
784
tcg_gen_mov_i32(rl, t);
785
tcg_temp_free_i32(t);
786
} else if (TCG_TARGET_REG_BITS == 64) {
787
- TCGv_i64 t0 = tcg_temp_new_i64();
788
- TCGv_i64 t1 = tcg_temp_new_i64();
789
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
790
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
791
tcg_gen_extu_i32_i64(t0, arg1);
792
tcg_gen_extu_i32_i64(t1, arg2);
793
tcg_gen_mul_i64(t0, t0, t1);
794
@@ -XXX,XX +XXX,XX @@ void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
795
if (TCG_TARGET_HAS_muls2_i32) {
796
tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
797
} else if (TCG_TARGET_HAS_mulsh_i32) {
798
- TCGv_i32 t = tcg_temp_new_i32();
799
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
800
tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
801
tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2);
802
tcg_gen_mov_i32(rl, t);
803
tcg_temp_free_i32(t);
804
} else if (TCG_TARGET_REG_BITS == 32) {
805
- TCGv_i32 t0 = tcg_temp_new_i32();
806
- TCGv_i32 t1 = tcg_temp_new_i32();
807
- TCGv_i32 t2 = tcg_temp_new_i32();
808
- TCGv_i32 t3 = tcg_temp_new_i32();
809
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
810
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
811
+ TCGv_i32 t2 = tcg_temp_ebb_new_i32();
812
+ TCGv_i32 t3 = tcg_temp_ebb_new_i32();
813
tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
814
/* Adjust for negative inputs. */
815
tcg_gen_sari_i32(t2, arg1, 31);
816
@@ -XXX,XX +XXX,XX @@ void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
817
tcg_temp_free_i32(t2);
818
tcg_temp_free_i32(t3);
819
} else {
820
- TCGv_i64 t0 = tcg_temp_new_i64();
821
- TCGv_i64 t1 = tcg_temp_new_i64();
822
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
823
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
824
tcg_gen_ext_i32_i64(t0, arg1);
825
tcg_gen_ext_i32_i64(t1, arg2);
826
tcg_gen_mul_i64(t0, t0, t1);
827
@@ -XXX,XX +XXX,XX @@ void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
828
void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
829
{
830
if (TCG_TARGET_REG_BITS == 32) {
831
- TCGv_i32 t0 = tcg_temp_new_i32();
832
- TCGv_i32 t1 = tcg_temp_new_i32();
833
- TCGv_i32 t2 = tcg_temp_new_i32();
834
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
835
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
836
+ TCGv_i32 t2 = tcg_temp_ebb_new_i32();
837
tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
838
/* Adjust for negative input for the signed arg1. */
839
tcg_gen_sari_i32(t2, arg1, 31);
840
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
841
tcg_temp_free_i32(t1);
842
tcg_temp_free_i32(t2);
843
} else {
844
- TCGv_i64 t0 = tcg_temp_new_i64();
845
- TCGv_i64 t1 = tcg_temp_new_i64();
846
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
847
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
848
tcg_gen_ext_i32_i64(t0, arg1);
849
tcg_gen_extu_i32_i64(t1, arg2);
850
tcg_gen_mul_i64(t0, t0, t1);
851
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags)
852
if (TCG_TARGET_HAS_bswap16_i32) {
853
tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags);
854
} else {
855
- TCGv_i32 t0 = tcg_temp_new_i32();
856
- TCGv_i32 t1 = tcg_temp_new_i32();
857
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
858
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
859
860
tcg_gen_shri_i32(t0, arg, 8);
861
if (!(flags & TCG_BSWAP_IZ)) {
862
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
863
if (TCG_TARGET_HAS_bswap32_i32) {
864
tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0);
865
} else {
866
- TCGv_i32 t0 = tcg_temp_new_i32();
867
- TCGv_i32 t1 = tcg_temp_new_i32();
868
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
869
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
870
TCGv_i32 t2 = tcg_constant_i32(0x00ff00ff);
871
872
/* arg = abcd */
873
@@ -XXX,XX +XXX,XX @@ void tcg_gen_umax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
874
875
void tcg_gen_abs_i32(TCGv_i32 ret, TCGv_i32 a)
876
{
877
- TCGv_i32 t = tcg_temp_new_i32();
878
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
879
880
tcg_gen_sari_i32(t, a, 31);
881
tcg_gen_xor_i32(ret, a, t);
882
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
883
TCGv_i64 t0;
884
TCGv_i32 t1;
885
886
- t0 = tcg_temp_new_i64();
887
- t1 = tcg_temp_new_i32();
888
+ t0 = tcg_temp_ebb_new_i64();
889
+ t1 = tcg_temp_ebb_new_i32();
890
891
tcg_gen_mulu2_i32(TCGV_LOW(t0), TCGV_HIGH(t0),
892
TCGV_LOW(arg1), TCGV_LOW(arg2));
893
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
894
tcg_gen_extract2_i32(TCGV_HIGH(ret),
895
TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c);
896
} else {
897
- TCGv_i32 t0 = tcg_temp_new_i32();
898
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
899
tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c);
900
tcg_gen_deposit_i32(TCGV_HIGH(ret), t0,
901
TCGV_HIGH(arg1), c, 32 - c);
902
@@ -XXX,XX +XXX,XX @@ void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
903
if (TCG_TARGET_HAS_div_i64) {
904
tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2);
905
} else if (TCG_TARGET_HAS_div2_i64) {
906
- TCGv_i64 t0 = tcg_temp_new_i64();
907
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
908
tcg_gen_sari_i64(t0, arg1, 63);
909
tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2);
910
tcg_temp_free_i64(t0);
911
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
912
if (TCG_TARGET_HAS_rem_i64) {
913
tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2);
914
} else if (TCG_TARGET_HAS_div_i64) {
915
- TCGv_i64 t0 = tcg_temp_new_i64();
916
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
917
tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2);
918
tcg_gen_mul_i64(t0, t0, arg2);
919
tcg_gen_sub_i64(ret, arg1, t0);
920
tcg_temp_free_i64(t0);
921
} else if (TCG_TARGET_HAS_div2_i64) {
922
- TCGv_i64 t0 = tcg_temp_new_i64();
923
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
924
tcg_gen_sari_i64(t0, arg1, 63);
925
tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2);
926
tcg_temp_free_i64(t0);
927
@@ -XXX,XX +XXX,XX @@ void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
928
if (TCG_TARGET_HAS_div_i64) {
929
tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2);
930
} else if (TCG_TARGET_HAS_div2_i64) {
931
- TCGv_i64 t0 = tcg_temp_new_i64();
932
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
933
tcg_gen_movi_i64(t0, 0);
934
tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2);
935
tcg_temp_free_i64(t0);
936
@@ -XXX,XX +XXX,XX @@ void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
937
if (TCG_TARGET_HAS_rem_i64) {
938
tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2);
939
} else if (TCG_TARGET_HAS_div_i64) {
940
- TCGv_i64 t0 = tcg_temp_new_i64();
941
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
942
tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2);
943
tcg_gen_mul_i64(t0, t0, arg2);
944
tcg_gen_sub_i64(ret, arg1, t0);
945
tcg_temp_free_i64(t0);
946
} else if (TCG_TARGET_HAS_div2_i64) {
947
- TCGv_i64 t0 = tcg_temp_new_i64();
948
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
949
tcg_gen_movi_i64(t0, 0);
950
tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
951
tcg_temp_free_i64(t0);
952
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
953
} else if (TCG_TARGET_HAS_bswap16_i64) {
954
tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, flags);
955
} else {
956
- TCGv_i64 t0 = tcg_temp_new_i64();
957
- TCGv_i64 t1 = tcg_temp_new_i64();
958
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
959
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
960
961
tcg_gen_shri_i64(t0, arg, 8);
962
if (!(flags & TCG_BSWAP_IZ)) {
963
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
964
} else if (TCG_TARGET_HAS_bswap32_i64) {
965
tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, flags);
966
} else {
967
- TCGv_i64 t0 = tcg_temp_new_i64();
968
- TCGv_i64 t1 = tcg_temp_new_i64();
969
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
970
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
971
TCGv_i64 t2 = tcg_constant_i64(0x00ff00ff);
972
973
/* arg = xxxxabcd */
974
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
975
{
976
if (TCG_TARGET_REG_BITS == 32) {
977
TCGv_i32 t0, t1;
978
- t0 = tcg_temp_new_i32();
979
- t1 = tcg_temp_new_i32();
980
+ t0 = tcg_temp_ebb_new_i32();
981
+ t1 = tcg_temp_ebb_new_i32();
982
983
tcg_gen_bswap32_i32(t0, TCGV_LOW(arg));
984
tcg_gen_bswap32_i32(t1, TCGV_HIGH(arg));
985
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
986
} else if (TCG_TARGET_HAS_bswap64_i64) {
987
tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0);
988
} else {
989
- TCGv_i64 t0 = tcg_temp_new_i64();
990
- TCGv_i64 t1 = tcg_temp_new_i64();
991
- TCGv_i64 t2 = tcg_temp_new_i64();
992
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
993
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
994
+ TCGv_i64 t2 = tcg_temp_ebb_new_i64();
995
996
/* arg = abcdefgh */
997
tcg_gen_movi_i64(t2, 0x00ff00ff00ff00ffull);
998
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
999
void tcg_gen_hswap_i64(TCGv_i64 ret, TCGv_i64 arg)
1000
{
1001
uint64_t m = 0x0000ffff0000ffffull;
1002
- TCGv_i64 t0 = tcg_temp_new_i64();
1003
- TCGv_i64 t1 = tcg_temp_new_i64();
1004
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1005
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1006
1007
/* See include/qemu/bitops.h, hswap64. */
1008
tcg_gen_rotli_i64(t1, arg, 32);
1009
@@ -XXX,XX +XXX,XX @@ void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1010
} else if (TCG_TARGET_HAS_andc_i64) {
1011
tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2);
1012
} else {
1013
- TCGv_i64 t0 = tcg_temp_new_i64();
1014
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1015
tcg_gen_not_i64(t0, arg2);
1016
tcg_gen_and_i64(ret, arg1, t0);
1017
tcg_temp_free_i64(t0);
1018
@@ -XXX,XX +XXX,XX @@ void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1019
} else if (TCG_TARGET_HAS_orc_i64) {
1020
tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2);
1021
} else {
1022
- TCGv_i64 t0 = tcg_temp_new_i64();
1023
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1024
tcg_gen_not_i64(t0, arg2);
1025
tcg_gen_or_i64(ret, arg1, t0);
1026
tcg_temp_free_i64(t0);
1027
@@ -XXX,XX +XXX,XX @@ void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1028
if (TCG_TARGET_REG_BITS == 32
1029
&& TCG_TARGET_HAS_clz_i32
1030
&& arg2 <= 0xffffffffu) {
1031
- TCGv_i32 t = tcg_temp_new_i32();
1032
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
1033
tcg_gen_clzi_i32(t, TCGV_LOW(arg1), arg2 - 32);
1034
tcg_gen_addi_i32(t, t, 32);
1035
tcg_gen_clz_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), t);
1036
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1037
if (TCG_TARGET_HAS_ctz_i64) {
1038
tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2);
1039
} else if (TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i64) {
1040
- TCGv_i64 z, t = tcg_temp_new_i64();
1041
+ TCGv_i64 z, t = tcg_temp_ebb_new_i64();
1042
1043
if (TCG_TARGET_HAS_ctpop_i64) {
1044
tcg_gen_subi_i64(t, arg1, 1);
1045
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1046
if (TCG_TARGET_REG_BITS == 32
1047
&& TCG_TARGET_HAS_ctz_i32
1048
&& arg2 <= 0xffffffffu) {
1049
- TCGv_i32 t32 = tcg_temp_new_i32();
1050
+ TCGv_i32 t32 = tcg_temp_ebb_new_i32();
1051
tcg_gen_ctzi_i32(t32, TCGV_HIGH(arg1), arg2 - 32);
1052
tcg_gen_addi_i32(t32, t32, 32);
1053
tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32);
1054
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1055
&& TCG_TARGET_HAS_ctpop_i64
1056
&& arg2 == 64) {
1057
/* This equivalence has the advantage of not requiring a fixup. */
1058
- TCGv_i64 t = tcg_temp_new_i64();
1059
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
1060
tcg_gen_subi_i64(t, arg1, 1);
1061
tcg_gen_andc_i64(t, t, arg1);
1062
tcg_gen_ctpop_i64(ret, t);
1063
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1064
void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg)
1065
{
1066
if (TCG_TARGET_HAS_clz_i64 || TCG_TARGET_HAS_clz_i32) {
1067
- TCGv_i64 t = tcg_temp_new_i64();
1068
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
1069
tcg_gen_sari_i64(t, arg, 63);
1070
tcg_gen_xor_i64(t, t, arg);
1071
tcg_gen_clzi_i64(t, t, 64);
1072
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1073
tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2);
1074
} else {
1075
TCGv_i64 t0, t1;
1076
- t0 = tcg_temp_new_i64();
1077
- t1 = tcg_temp_new_i64();
1078
+ t0 = tcg_temp_ebb_new_i64();
1079
+ t1 = tcg_temp_ebb_new_i64();
1080
tcg_gen_shl_i64(t0, arg1, arg2);
1081
tcg_gen_subfi_i64(t1, 64, arg2);
1082
tcg_gen_shr_i64(t1, arg1, t1);
1083
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1084
tcg_gen_rotl_i64(ret, arg1, tcg_constant_i64(arg2));
1085
} else {
1086
TCGv_i64 t0, t1;
1087
- t0 = tcg_temp_new_i64();
1088
- t1 = tcg_temp_new_i64();
1089
+ t0 = tcg_temp_ebb_new_i64();
1090
+ t1 = tcg_temp_ebb_new_i64();
1091
tcg_gen_shli_i64(t0, arg1, arg2);
1092
tcg_gen_shri_i64(t1, arg1, 64 - arg2);
1093
tcg_gen_or_i64(ret, t0, t1);
1094
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1095
tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2);
1096
} else {
1097
TCGv_i64 t0, t1;
1098
- t0 = tcg_temp_new_i64();
1099
- t1 = tcg_temp_new_i64();
1100
+ t0 = tcg_temp_ebb_new_i64();
1101
+ t1 = tcg_temp_ebb_new_i64();
1102
tcg_gen_shr_i64(t0, arg1, arg2);
1103
tcg_gen_subfi_i64(t1, 64, arg2);
1104
tcg_gen_shl_i64(t1, arg1, t1);
1105
@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
1106
}
1107
}
53
}
1108
54
1109
- t1 = tcg_temp_new_i64();
55
/* Inserting zero into a value. */
1110
+ t1 = tcg_temp_ebb_new_i64();
56
- if (arg_is_const_val(op->args[2], 0)) {
1111
57
- uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
1112
if (TCG_TARGET_HAS_extract2_i64) {
58
+ if (ti_is_const_val(t2, 0)) {
1113
if (ofs + len == 64) {
59
+ uint64_t mask = deposit64(-1, ofs, len, 0);
1114
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
60
1115
tcg_gen_sextract_i32(TCGV_HIGH(ret), TCGV_HIGH(arg), 0, len - 32);
61
op->opc = and_opc;
1116
return;
62
op->args[2] = arg_new_constant(ctx, mask);
1117
} else if (len > 32) {
63
- ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
1118
- TCGv_i32 t = tcg_temp_new_i32();
64
- return false;
1119
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
65
+ return fold_and(ctx, op);
1120
/* Extract the bits for the high word normally. */
1121
tcg_gen_sextract_i32(t, TCGV_HIGH(arg), ofs + 32, len - 32);
1122
/* Shift the field down for the low part. */
1123
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah,
1124
} else if (TCG_TARGET_HAS_extract2_i64) {
1125
tcg_gen_op4i_i64(INDEX_op_extract2_i64, ret, al, ah, ofs);
1126
} else {
1127
- TCGv_i64 t0 = tcg_temp_new_i64();
1128
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1129
tcg_gen_shri_i64(t0, al, ofs);
1130
tcg_gen_deposit_i64(ret, t0, ah, 64 - ofs, ofs);
1131
tcg_temp_free_i64(t0);
1132
@@ -XXX,XX +XXX,XX @@ void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
1133
} else if (cond == TCG_COND_NEVER) {
1134
tcg_gen_mov_i64(ret, v2);
1135
} else if (TCG_TARGET_REG_BITS == 32) {
1136
- TCGv_i32 t0 = tcg_temp_new_i32();
1137
- TCGv_i32 t1 = tcg_temp_new_i32();
1138
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
1139
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1140
tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
1141
TCGV_LOW(c1), TCGV_HIGH(c1),
1142
TCGV_LOW(c2), TCGV_HIGH(c2), cond);
1143
@@ -XXX,XX +XXX,XX @@ void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
1144
} else if (TCG_TARGET_HAS_movcond_i64) {
1145
tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
1146
} else {
1147
- TCGv_i64 t0 = tcg_temp_new_i64();
1148
- TCGv_i64 t1 = tcg_temp_new_i64();
1149
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1150
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1151
tcg_gen_setcond_i64(cond, t0, c1, c2);
1152
tcg_gen_neg_i64(t0, t0);
1153
tcg_gen_and_i64(t1, v1, t0);
1154
@@ -XXX,XX +XXX,XX @@ void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
1155
if (TCG_TARGET_HAS_add2_i64) {
1156
tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh);
1157
} else {
1158
- TCGv_i64 t0 = tcg_temp_new_i64();
1159
- TCGv_i64 t1 = tcg_temp_new_i64();
1160
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1161
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1162
tcg_gen_add_i64(t0, al, bl);
1163
tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al);
1164
tcg_gen_add_i64(rh, ah, bh);
1165
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
1166
if (TCG_TARGET_HAS_sub2_i64) {
1167
tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh);
1168
} else {
1169
- TCGv_i64 t0 = tcg_temp_new_i64();
1170
- TCGv_i64 t1 = tcg_temp_new_i64();
1171
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1172
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1173
tcg_gen_sub_i64(t0, al, bl);
1174
tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl);
1175
tcg_gen_sub_i64(rh, ah, bh);
1176
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
1177
if (TCG_TARGET_HAS_mulu2_i64) {
1178
tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
1179
} else if (TCG_TARGET_HAS_muluh_i64) {
1180
- TCGv_i64 t = tcg_temp_new_i64();
1181
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
1182
tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
1183
tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2);
1184
tcg_gen_mov_i64(rl, t);
1185
tcg_temp_free_i64(t);
1186
} else {
1187
- TCGv_i64 t0 = tcg_temp_new_i64();
1188
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1189
tcg_gen_mul_i64(t0, arg1, arg2);
1190
gen_helper_muluh_i64(rh, arg1, arg2);
1191
tcg_gen_mov_i64(rl, t0);
1192
@@ -XXX,XX +XXX,XX @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
1193
if (TCG_TARGET_HAS_muls2_i64) {
1194
tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
1195
} else if (TCG_TARGET_HAS_mulsh_i64) {
1196
- TCGv_i64 t = tcg_temp_new_i64();
1197
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
1198
tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
1199
tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2);
1200
tcg_gen_mov_i64(rl, t);
1201
tcg_temp_free_i64(t);
1202
} else if (TCG_TARGET_HAS_mulu2_i64 || TCG_TARGET_HAS_muluh_i64) {
1203
- TCGv_i64 t0 = tcg_temp_new_i64();
1204
- TCGv_i64 t1 = tcg_temp_new_i64();
1205
- TCGv_i64 t2 = tcg_temp_new_i64();
1206
- TCGv_i64 t3 = tcg_temp_new_i64();
1207
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1208
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1209
+ TCGv_i64 t2 = tcg_temp_ebb_new_i64();
1210
+ TCGv_i64 t3 = tcg_temp_ebb_new_i64();
1211
tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
1212
/* Adjust for negative inputs. */
1213
tcg_gen_sari_i64(t2, arg1, 63);
1214
@@ -XXX,XX +XXX,XX @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
1215
tcg_temp_free_i64(t2);
1216
tcg_temp_free_i64(t3);
1217
} else {
1218
- TCGv_i64 t0 = tcg_temp_new_i64();
1219
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1220
tcg_gen_mul_i64(t0, arg1, arg2);
1221
gen_helper_mulsh_i64(rh, arg1, arg2);
1222
tcg_gen_mov_i64(rl, t0);
1223
@@ -XXX,XX +XXX,XX @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
1224
1225
void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
1226
{
1227
- TCGv_i64 t0 = tcg_temp_new_i64();
1228
- TCGv_i64 t1 = tcg_temp_new_i64();
1229
- TCGv_i64 t2 = tcg_temp_new_i64();
1230
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1231
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1232
+ TCGv_i64 t2 = tcg_temp_ebb_new_i64();
1233
tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
1234
/* Adjust for negative input for the signed arg1. */
1235
tcg_gen_sari_i64(t2, arg1, 63);
1236
@@ -XXX,XX +XXX,XX @@ void tcg_gen_umax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
1237
1238
void tcg_gen_abs_i64(TCGv_i64 ret, TCGv_i64 a)
1239
{
1240
- TCGv_i64 t = tcg_temp_new_i64();
1241
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
1242
1243
tcg_gen_sari_i64(t, a, 63);
1244
tcg_gen_xor_i64(ret, a, t);
1245
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
1246
tcg_gen_op2(INDEX_op_extrh_i64_i32,
1247
tcgv_i32_arg(ret), tcgv_i64_arg(arg));
1248
} else {
1249
- TCGv_i64 t = tcg_temp_new_i64();
1250
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
1251
tcg_gen_shri_i64(t, arg, 32);
1252
tcg_gen_mov_i32(ret, (TCGv_i32)t);
1253
tcg_temp_free_i64(t);
1254
@@ -XXX,XX +XXX,XX @@ void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
1255
return;
1256
}
66
}
1257
67
1258
- tmp = tcg_temp_new_i64();
68
- ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
1259
+ tmp = tcg_temp_ebb_new_i64();
69
- op->args[3], op->args[4],
1260
/* These extensions are only needed for type correctness.
70
- arg_info(op->args[2])->z_mask);
1261
We may be able to do better given target specific information. */
71
- return false;
1262
tcg_gen_extu_i32_i64(tmp, high);
72
+ z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
1263
@@ -XXX,XX +XXX,XX @@ void tcg_gen_lookup_and_goto_ptr(void)
73
+ return fold_masks_z(ctx, op, z_mask);
1264
}
74
}
1265
75
1266
plugin_gen_disable_mem_helpers();
76
static bool fold_divide(OptContext *ctx, TCGOp *op)
1267
- ptr = tcg_temp_new_ptr();
1268
+ ptr = tcg_temp_ebb_new_ptr();
1269
gen_helper_lookup_tb_ptr(ptr, cpu_env);
1270
tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
1271
tcg_temp_free_ptr(ptr);
1272
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
1273
oi = make_memop_idx(memop, idx);
1274
1275
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
1276
- swap = tcg_temp_new_i32();
1277
+ swap = tcg_temp_ebb_new_i32();
1278
switch (memop & MO_SIZE) {
1279
case MO_16:
1280
tcg_gen_bswap16_i32(swap, val, 0);
1281
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
1282
oi = make_memop_idx(memop, idx);
1283
1284
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
1285
- swap = tcg_temp_new_i64();
1286
+ swap = tcg_temp_ebb_new_i64();
1287
switch (memop & MO_SIZE) {
1288
case MO_16:
1289
tcg_gen_bswap16_i64(swap, val, 0);
1290
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
1291
1292
addr_p8 = tcg_temp_new();
1293
if ((mop[0] ^ memop) & MO_BSWAP) {
1294
- TCGv_i64 t = tcg_temp_new_i64();
1295
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
1296
1297
tcg_gen_bswap64_i64(t, x);
1298
gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
1299
@@ -XXX,XX +XXX,XX @@ static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
1300
void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
1301
TCGv_i32 newv, TCGArg idx, MemOp memop)
1302
{
1303
- TCGv_i32 t1 = tcg_temp_new_i32();
1304
- TCGv_i32 t2 = tcg_temp_new_i32();
1305
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1306
+ TCGv_i32 t2 = tcg_temp_ebb_new_i32();
1307
1308
tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
1309
1310
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
1311
return;
1312
}
1313
1314
- t1 = tcg_temp_new_i64();
1315
- t2 = tcg_temp_new_i64();
1316
+ t1 = tcg_temp_ebb_new_i64();
1317
+ t2 = tcg_temp_ebb_new_i64();
1318
1319
tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
1320
1321
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
1322
tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
1323
}
1324
} else {
1325
- TCGv_i32 c32 = tcg_temp_new_i32();
1326
- TCGv_i32 n32 = tcg_temp_new_i32();
1327
- TCGv_i32 r32 = tcg_temp_new_i32();
1328
+ TCGv_i32 c32 = tcg_temp_ebb_new_i32();
1329
+ TCGv_i32 n32 = tcg_temp_ebb_new_i32();
1330
+ TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1331
1332
tcg_gen_extrl_i64_i32(c32, cmpv);
1333
tcg_gen_extrl_i64_i32(n32, newv);
1334
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
1335
1336
gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1337
} else {
1338
- TCGv_i128 oldv = tcg_temp_new_i128();
1339
- TCGv_i128 tmpv = tcg_temp_new_i128();
1340
- TCGv_i64 t0 = tcg_temp_new_i64();
1341
- TCGv_i64 t1 = tcg_temp_new_i64();
1342
+ TCGv_i128 oldv = tcg_temp_ebb_new_i128();
1343
+ TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
1344
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1345
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1346
TCGv_i64 z = tcg_constant_i64(0);
1347
1348
tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
1349
@@ -XXX,XX +XXX,XX @@ static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
1350
TCGArg idx, MemOp memop, bool new_val,
1351
void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
1352
{
1353
- TCGv_i32 t1 = tcg_temp_new_i32();
1354
- TCGv_i32 t2 = tcg_temp_new_i32();
1355
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1356
+ TCGv_i32 t2 = tcg_temp_ebb_new_i32();
1357
1358
memop = tcg_canonicalize_memop(memop, 0, 0);
1359
1360
@@ -XXX,XX +XXX,XX @@ static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
1361
TCGArg idx, MemOp memop, bool new_val,
1362
void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
1363
{
1364
- TCGv_i64 t1 = tcg_temp_new_i64();
1365
- TCGv_i64 t2 = tcg_temp_new_i64();
1366
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1367
+ TCGv_i64 t2 = tcg_temp_ebb_new_i64();
1368
1369
memop = tcg_canonicalize_memop(memop, 1, 0);
1370
1371
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
1372
tcg_gen_movi_i64(ret, 0);
1373
#endif /* CONFIG_ATOMIC64 */
1374
} else {
1375
- TCGv_i32 v32 = tcg_temp_new_i32();
1376
- TCGv_i32 r32 = tcg_temp_new_i32();
1377
+ TCGv_i32 v32 = tcg_temp_ebb_new_i32();
1378
+ TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1379
1380
tcg_gen_extrl_i64_i32(v32, val);
1381
do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
1382
diff --git a/tcg/tcg.c b/tcg/tcg.c
1383
index XXXXXXX..XXXXXXX 100644
1384
--- a/tcg/tcg.c
1385
+++ b/tcg/tcg.c
1386
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1387
case TCG_CALL_ARG_EXTEND_U:
1388
case TCG_CALL_ARG_EXTEND_S:
1389
{
1390
- TCGv_i64 temp = tcg_temp_new_i64();
1391
+ TCGv_i64 temp = tcg_temp_ebb_new_i64();
1392
TCGv_i32 orig = temp_tcgv_i32(ts);
1393
1394
if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
1395
--
77
--
1396
2.34.1
78
2.43.0
1397
1398
diff view generated by jsdifflib
New patch
1
The input which overlaps the sign bit of the output can
2
have its input s_mask propagated to the output s_mask.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 14 ++++++++++++--
8
1 file changed, 12 insertions(+), 2 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
15
TempOptInfo *t2 = arg_info(op->args[2]);
16
int ofs = op->args[3];
17
int len = op->args[4];
18
+ int width;
19
TCGOpcode and_opc;
20
- uint64_t z_mask;
21
+ uint64_t z_mask, s_mask;
22
23
if (ti_is_const(t1) && ti_is_const(t2)) {
24
return tcg_opt_gen_movi(ctx, op, op->args[0],
25
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
26
switch (ctx->type) {
27
case TCG_TYPE_I32:
28
and_opc = INDEX_op_and_i32;
29
+ width = 32;
30
break;
31
case TCG_TYPE_I64:
32
and_opc = INDEX_op_and_i64;
33
+ width = 64;
34
break;
35
default:
36
g_assert_not_reached();
37
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
38
return fold_and(ctx, op);
39
}
40
41
+ /* The s_mask from the top portion of the deposit is still valid. */
42
+ if (ofs + len == width) {
43
+ s_mask = t2->s_mask << ofs;
44
+ } else {
45
+ s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
46
+ }
47
+
48
z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
49
- return fold_masks_z(ctx, op, z_mask);
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
51
}
52
53
static bool fold_divide(OptContext *ctx, TCGOp *op)
54
--
55
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
12
fold_xi_to_x(ctx, op, 1)) {
13
return true;
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_dup(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
1
Rewrite the sections which talked about 'local temporaries'.
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Remove some assumptions which no longer hold.
3
4
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
3
---
7
docs/devel/tcg-ops.rst | 230 +++++++++++++++++++++++------------------
4
tcg/optimize.c | 4 ++--
8
1 file changed, 129 insertions(+), 101 deletions(-)
5
1 file changed, 2 insertions(+), 2 deletions(-)
9
6
10
diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
12
--- a/docs/devel/tcg-ops.rst
9
--- a/tcg/optimize.c
13
+++ b/docs/devel/tcg-ops.rst
10
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ TCG Intermediate Representation
11
@@ -XXX,XX +XXX,XX @@ static bool fold_dup(OptContext *ctx, TCGOp *op)
15
Introduction
12
t = dup_const(TCGOP_VECE(op), t);
16
============
13
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
17
14
}
18
-TCG (Tiny Code Generator) began as a generic backend for a C
15
- return false;
19
-compiler. It was simplified to be used in QEMU. It also has its roots
16
+ return finish_folding(ctx, op);
20
-in the QOP code generator written by Paul Brook.
17
}
21
+TCG (Tiny Code Generator) began as a generic backend for a C compiler.
18
22
+It was simplified to be used in QEMU. It also has its roots in the
19
static bool fold_dup2(OptContext *ctx, TCGOp *op)
23
+QOP code generator written by Paul Brook.
20
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
24
21
op->opc = INDEX_op_dup_vec;
25
Definitions
22
TCGOP_VECE(op) = MO_32;
26
===========
23
}
27
24
- return false;
28
-TCG receives RISC-like *TCG ops* and performs some optimizations on them,
25
+ return finish_folding(ctx, op);
29
-including liveness analysis and trivial constant expression
26
}
30
-evaluation. TCG ops are then implemented in the host CPU back end,
27
31
-also known as the TCG target.
28
static bool fold_eqv(OptContext *ctx, TCGOp *op)
32
-
33
-The TCG *target* is the architecture for which we generate the
34
-code. It is of course not the same as the "target" of QEMU which is
35
-the emulated architecture. As TCG started as a generic C backend used
36
-for cross compiling, it is assumed that the TCG target is different
37
-from the host, although it is never the case for QEMU.
38
+The TCG *target* is the architecture for which we generate the code.
39
+It is of course not the same as the "target" of QEMU which is the
40
+emulated architecture. As TCG started as a generic C backend used
41
+for cross compiling, the assumption was that TCG target might be
42
+different from the host, although this is never the case for QEMU.
43
44
In this document, we use *guest* to specify what architecture we are
45
emulating; *target* always means the TCG target, the machine on which
46
we are running QEMU.
47
48
-A TCG *function* corresponds to a QEMU Translated Block (TB).
49
-
50
-A TCG *temporary* is a variable only live in a basic block. Temporaries are allocated explicitly in each function.
51
-
52
-A TCG *local temporary* is a variable only live in a function. Local temporaries are allocated explicitly in each function.
53
-
54
-A TCG *global* is a variable which is live in all the functions
55
-(equivalent of a C global variable). They are defined before the
56
-functions defined. A TCG global can be a memory location (e.g. a QEMU
57
-CPU register), a fixed host register (e.g. the QEMU CPU state pointer)
58
-or a memory location which is stored in a register outside QEMU TBs
59
-(not implemented yet).
60
-
61
-A TCG *basic block* corresponds to a list of instructions terminated
62
-by a branch instruction.
63
-
64
An operation with *undefined behavior* may result in a crash.
65
66
An operation with *unspecified behavior* shall not crash. However,
67
the result may be one of several possibilities so may be considered
68
an *undefined result*.
69
70
-Intermediate representation
71
-===========================
72
+Basic Blocks
73
+============
74
75
-Introduction
76
-------------
77
+A TCG *basic block* is a single entry, multiple exit region which
78
+corresponds to a list of instructions terminated by a label, or
79
+any branch instruction.
80
81
-TCG instructions operate on variables which are temporaries, local
82
-temporaries or globals. TCG instructions and variables are strongly
83
-typed. Two types are supported: 32 bit integers and 64 bit
84
-integers. Pointers are defined as an alias to 32 bit or 64 bit
85
-integers depending on the TCG target word size.
86
+A TCG *extended basic block* is a single entry, multiple exit region
87
+which corresponds to a list of instructions terminated by a label or
88
+an unconditional branch. Specifically, an extended basic block is
89
+a sequence of basic blocks connected by the fall-through paths of
90
+zero or more conditional branch instructions.
91
92
-Each instruction has a fixed number of output variable operands, input
93
-variable operands and always constant operands.
94
+Operations
95
+==========
96
97
-The notable exception is the call instruction which has a variable
98
-number of outputs and inputs.
99
+TCG instructions or *ops* operate on TCG *variables*, both of which
100
+are strongly typed. Each instruction has a fixed number of output
101
+variable operands, input variable operands and constant operands.
102
+Vector instructions have a field specifying the element size within
103
+the vector. The notable exception is the call instruction which has
104
+a variable number of outputs and inputs.
105
106
In the textual form, output operands usually come first, followed by
107
input operands, followed by constant operands. The output type is
108
@@ -XXX,XX +XXX,XX @@ included in the instruction name. Constants are prefixed with a '$'.
109
110
add_i32 t0, t1, t2 /* (t0 <- t1 + t2) */
111
112
+Variables
113
+=========
114
115
-Assumptions
116
------------
117
+* ``TEMP_FIXED``
118
119
-Basic blocks
120
-^^^^^^^^^^^^
121
+ There is one TCG *fixed global* variable, ``cpu_env``, which is
122
+ live in all translation blocks, and holds a pointer to ``CPUArchState``.
123
+ This variable is held in a host cpu register at all times in all
124
+ translation blocks.
125
126
-* Basic blocks end after branches (e.g. brcond_i32 instruction),
127
- goto_tb and exit_tb instructions.
128
+* ``TEMP_GLOBAL``
129
130
-* Basic blocks start after the end of a previous basic block, or at a
131
- set_label instruction.
132
+ A TCG *global* is a variable which is live in all translation blocks,
133
+ and corresponds to memory location that is within ``CPUArchState``.
134
+ These may be specified as an offset from ``cpu_env``, in which case
135
+ they are called *direct globals*, or may be specified as an offset
136
+ from a direct global, in which case they are called *indirect globals*.
137
+ Even indirect globals should still reference memory within
138
+ ``CPUArchState``. All TCG globals are defined during
139
+ ``TCGCPUOps.initialize``, before any translation blocks are generated.
140
141
-After the end of a basic block, the content of temporaries is
142
-destroyed, but local temporaries and globals are preserved.
143
+* ``TEMP_CONST``
144
145
-Floating point types
146
-^^^^^^^^^^^^^^^^^^^^
147
+ A TCG *constant* is a variable which is live throughout the entire
148
+ translation block, and contains a constant value. These variables
149
+ are allocated on demand during translation and are hashed so that
150
+ there is exactly one variable holding a given value.
151
152
-* Floating point types are not supported yet
153
+* ``TEMP_TB``
154
155
-Pointers
156
-^^^^^^^^
157
+ A TCG *translation block temporary* is a variable which is live
158
+ throughout the entire translation block, but dies on any exit.
159
+ These temporaries are allocated explicitly during translation.
160
161
-* Depending on the TCG target, pointer size is 32 bit or 64
162
- bit. The type ``TCG_TYPE_PTR`` is an alias to ``TCG_TYPE_I32`` or
163
- ``TCG_TYPE_I64``.
164
+* ``TEMP_EBB``
165
+
166
+ A TCG *extended basic block temporary* is a variable which is live
167
+ throughout an extended basic block, but dies on any exit.
168
+ These temporaries are allocated explicitly during translation.
169
+
170
+Types
171
+=====
172
+
173
+* ``TCG_TYPE_I32``
174
+
175
+ A 32-bit integer.
176
+
177
+* ``TCG_TYPE_I64``
178
+
179
+ A 64-bit integer. For 32-bit hosts, such variables are split into a pair
180
+ of variables with ``type=TCG_TYPE_I32`` and ``base_type=TCG_TYPE_I64``.
181
+ The ``temp_subindex`` for each indicates where it falls within the
182
+ host-endian representation.
183
+
184
+* ``TCG_TYPE_PTR``
185
+
186
+ An alias for ``TCG_TYPE_I32`` or ``TCG_TYPE_I64``, depending on the size
187
+ of a pointer for the host.
188
+
189
+* ``TCG_TYPE_REG``
190
+
191
+ An alias for ``TCG_TYPE_I32`` or ``TCG_TYPE_I64``, depending on the size
192
+ of the integer registers for the host. This may be larger
193
+ than ``TCG_TYPE_PTR`` depending on the host ABI.
194
+
195
+* ``TCG_TYPE_I128``
196
+
197
+ A 128-bit integer. For all hosts, such variables are split into a number
198
+ of variables with ``type=TCG_TYPE_REG`` and ``base_type=TCG_TYPE_I128``.
199
+ The ``temp_subindex`` for each indicates where it falls within the
200
+ host-endian representation.
201
+
202
+* ``TCG_TYPE_V64``
203
+
204
+ A 64-bit vector. This type is valid only if the TCG target
205
+ sets ``TCG_TARGET_HAS_v64``.
206
+
207
+* ``TCG_TYPE_V128``
208
+
209
+ A 128-bit vector. This type is valid only if the TCG target
210
+ sets ``TCG_TARGET_HAS_v128``.
211
+
212
+* ``TCG_TYPE_V256``
213
+
214
+ A 256-bit vector. This type is valid only if the TCG target
215
+ sets ``TCG_TARGET_HAS_v256``.
216
217
Helpers
218
-^^^^^^^
219
+=======
220
221
-* Using the tcg_gen_helper_x_y it is possible to call any function
222
- taking i32, i64 or pointer types. By default, before calling a helper,
223
- all globals are stored at their canonical location and it is assumed
224
- that the function can modify them. By default, the helper is allowed to
225
- modify the CPU state or raise an exception.
226
+Helpers are registered in a guest-specific ``helper.h``,
227
+which is processed to generate ``tcg_gen_helper_*`` functions.
228
+With these functions it is possible to call a function taking
229
+i32, i64, i128 or pointer types.
230
231
- This can be overridden using the following function modifiers:
232
+By default, before calling a helper, all globals are stored at their
233
+canonical location. By default, the helper is allowed to modify the
234
+CPU state (including the state represented by tcg globals)
235
+or may raise an exception. This default can be overridden using the
236
+following function modifiers:
237
238
- - ``TCG_CALL_NO_READ_GLOBALS`` means that the helper does not read globals,
239
- either directly or via an exception. They will not be saved to their
240
- canonical locations before calling the helper.
241
+* ``TCG_CALL_NO_WRITE_GLOBALS``
242
243
- - ``TCG_CALL_NO_WRITE_GLOBALS`` means that the helper does not modify any globals.
244
- They will only be saved to their canonical location before calling helpers,
245
- but they won't be reloaded afterwards.
246
+ The helper does not modify any globals, but may read them.
247
+ Globals will be saved to their canonical location before calling helpers,
248
+ but need not be reloaded afterwards.
249
250
- - ``TCG_CALL_NO_SIDE_EFFECTS`` means that the call to the function is removed if
251
- the return value is not used.
252
+* ``TCG_CALL_NO_READ_GLOBALS``
253
254
- Note that ``TCG_CALL_NO_READ_GLOBALS`` implies ``TCG_CALL_NO_WRITE_GLOBALS``.
255
+ The helper does not read globals, either directly or via an exception.
256
+ They will not be saved to their canonical locations before calling
257
+ the helper. This implies ``TCG_CALL_NO_WRITE_GLOBALS``.
258
259
- On some TCG targets (e.g. x86), several calling conventions are
260
- supported.
261
+* ``TCG_CALL_NO_SIDE_EFFECTS``
262
263
-Branches
264
-^^^^^^^^
265
-
266
-* Use the instruction 'br' to jump to a label.
267
+ The call to the helper function may be removed if the return value is
268
+ not used. This means that it may not modify any CPU state nor may it
269
+ raise an exception.
270
271
Code Optimizations
272
-------------------
273
+==================
274
275
When generating instructions, you can count on at least the following
276
optimizations:
277
@@ -XXX,XX +XXX,XX @@ Recommended coding rules for best performance
278
often modified, e.g. the integer registers and the condition
279
codes. TCG will be able to use host registers to store them.
280
281
-- Avoid globals stored in fixed registers. They must be used only to
282
- store the pointer to the CPU state and possibly to store a pointer
283
- to a register window.
284
-
285
-- Use temporaries. Use local temporaries only when really needed,
286
- e.g. when you need to use a value after a jump. Local temporaries
287
- introduce a performance hit in the current TCG implementation: their
288
- content is saved to memory at end of each basic block.
289
-
290
-- Free temporaries and local temporaries when they are no longer used
291
- (tcg_temp_free). Since tcg_const_x() also creates a temporary, you
292
- should free it after it is used. Freeing temporaries does not yield
293
- a better generated code, but it reduces the memory usage of TCG and
294
- the speed of the translation.
295
+- Free temporaries when they are no longer used (``tcg_temp_free``).
296
+ Since ``tcg_const_x`` also creates a temporary, you should free it
297
+ after it is used.
298
299
- Don't hesitate to use helpers for complicated or seldom used guest
300
instructions. There is little performance advantage in using TCG to
301
@@ -XXX,XX +XXX,XX @@ Recommended coding rules for best performance
302
the instruction is mostly doing loads and stores, and in those cases
303
inline TCG may still be faster for longer sequences.
304
305
-- The hard limit on the number of TCG instructions you can generate
306
- per guest instruction is set by ``MAX_OP_PER_INSTR`` in ``exec-all.h`` --
307
- you cannot exceed this without risking a buffer overrun.
308
-
309
- Use the 'discard' instruction if you know that TCG won't be able to
310
prove that a given global is "dead" at a given program point. The
311
x86 guest uses it to improve the condition codes optimisation.
312
--
29
--
313
2.34.1
30
2.43.0
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Add fold_masks_s as a trivial wrapper around fold_masks_zs.
2
Avoid the use of the OptContext slots.
2
3
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-6-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
include/exec/exec-all.h | 27 +++++++++++----------------
7
tcg/optimize.c | 13 ++++++++++---
9
1 file changed, 11 insertions(+), 16 deletions(-)
8
1 file changed, 10 insertions(+), 3 deletions(-)
10
9
11
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/include/exec/exec-all.h
12
--- a/tcg/optimize.c
14
+++ b/include/exec/exec-all.h
13
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ struct tb_tc {
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
16
};
15
return fold_masks_zs(ctx, op, z_mask, 0);
17
18
struct TranslationBlock {
19
-#if !TARGET_TB_PCREL
20
/*
21
* Guest PC corresponding to this block. This must be the true
22
* virtual address. Therefore e.g. x86 stores EIP + CS_BASE, and
23
* targets like Arm, MIPS, HP-PA, which reuse low bits for ISA or
24
* privilege, must store those bits elsewhere.
25
*
26
- * If TARGET_TB_PCREL, the opcodes for the TranslationBlock are
27
- * written such that the TB is associated only with the physical
28
- * page and may be run in any virtual address context. In this case,
29
- * PC must always be taken from ENV in a target-specific manner.
30
+ * If CF_PCREL, the opcodes for the TranslationBlock are written
31
+ * such that the TB is associated only with the physical page and
32
+ * may be run in any virtual address context. In this case, PC
33
+ * must always be taken from ENV in a target-specific manner.
34
* Unwind information is taken as offsets from the page, to be
35
* deposited into the "current" PC.
36
*/
37
target_ulong pc;
38
-#endif
39
40
/*
41
* Target-specific data associated with the TranslationBlock, e.g.:
42
@@ -XXX,XX +XXX,XX @@ struct TranslationBlock {
43
uintptr_t jmp_dest[2];
44
};
45
46
-/* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */
47
-static inline target_ulong tb_pc(const TranslationBlock *tb)
48
-{
49
-#if TARGET_TB_PCREL
50
- qemu_build_not_reached();
51
-#else
52
- return tb->pc;
53
-#endif
54
-}
55
-
56
/* Hide the qatomic_read to make code a little easier on the eyes */
57
static inline uint32_t tb_cflags(const TranslationBlock *tb)
58
{
59
return qatomic_read(&tb->cflags);
60
}
16
}
61
17
62
+/* Hide the read to avoid ifdefs for CF_PCREL. */
18
+static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
63
+static inline target_ulong tb_pc(const TranslationBlock *tb)
64
+{
19
+{
65
+ assert(!(tb_cflags(tb) & CF_PCREL));
20
+ return fold_masks_zs(ctx, op, -1, s_mask);
66
+ return tb->pc;
67
+}
21
+}
68
+
22
+
69
static inline tb_page_addr_t tb_page_addr0(const TranslationBlock *tb)
23
static bool fold_masks(OptContext *ctx, TCGOp *op)
70
{
24
{
71
#ifdef CONFIG_USER_ONLY
25
return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
26
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
27
28
static bool fold_eqv(OptContext *ctx, TCGOp *op)
29
{
30
+ uint64_t s_mask;
31
+
32
if (fold_const2_commutative(ctx, op) ||
33
fold_xi_to_x(ctx, op, -1) ||
34
fold_xi_to_not(ctx, op, 0)) {
35
return true;
36
}
37
38
- ctx->s_mask = arg_info(op->args[1])->s_mask
39
- & arg_info(op->args[2])->s_mask;
40
- return false;
41
+ s_mask = arg_info(op->args[1])->s_mask
42
+ & arg_info(op->args[2])->s_mask;
43
+ return fold_masks_s(ctx, op, s_mask);
44
}
45
46
static bool fold_extract(OptContext *ctx, TCGOp *op)
72
--
47
--
73
2.34.1
48
2.43.0
74
75
diff view generated by jsdifflib
1
Since tcg_temp_new_i32 is now identical, use that.
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
5
---
6
include/exec/gen-icount.h | 8 +-------
6
tcg/optimize.c | 15 ++++++---------
7
1 file changed, 1 insertion(+), 7 deletions(-)
7
1 file changed, 6 insertions(+), 9 deletions(-)
8
8
9
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
11
--- a/include/exec/gen-icount.h
11
--- a/tcg/optimize.c
12
+++ b/include/exec/gen-icount.h
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static inline void gen_io_start(void)
13
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
14
14
static bool fold_extract(OptContext *ctx, TCGOp *op)
15
static inline void gen_tb_start(const TranslationBlock *tb)
16
{
15
{
17
- TCGv_i32 count;
16
uint64_t z_mask_old, z_mask;
17
+ TempOptInfo *t1 = arg_info(op->args[1]);
18
int pos = op->args[2];
19
int len = op->args[3];
20
21
- if (arg_is_const(op->args[1])) {
22
- uint64_t t;
18
-
23
-
19
- if (tb_cflags(tb) & CF_USE_ICOUNT) {
24
- t = arg_info(op->args[1])->val;
20
- count = tcg_temp_local_new_i32();
25
- t = extract64(t, pos, len);
21
- } else {
26
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
22
- count = tcg_temp_new_i32();
27
+ if (ti_is_const(t1)) {
23
- }
28
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
24
+ TCGv_i32 count = tcg_temp_new_i32();
29
+ extract64(ti_const_val(t1), pos, len));
25
30
}
26
tcg_gen_ld_i32(count, cpu_env,
31
27
offsetof(ArchCPU, neg.icount_decr.u32) -
32
- z_mask_old = arg_info(op->args[1])->z_mask;
33
+ z_mask_old = t1->z_mask;
34
z_mask = extract64(z_mask_old, pos, len);
35
if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
36
return true;
37
}
38
- ctx->z_mask = z_mask;
39
40
- return fold_masks(ctx, op);
41
+ return fold_masks_z(ctx, op, z_mask);
42
}
43
44
static bool fold_extract2(OptContext *ctx, TCGOp *op)
28
--
45
--
29
2.34.1
46
2.43.0
30
31
diff view generated by jsdifflib
1
Since tcg_temp_new is now identical, use that.
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
In some cases we can avoid a copy from A0 or T0.
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
3
---
7
target/i386/tcg/translate.c | 27 +++++++++------------------
4
tcg/optimize.c | 2 +-
8
1 file changed, 9 insertions(+), 18 deletions(-)
5
1 file changed, 1 insertion(+), 1 deletion(-)
9
6
10
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
12
--- a/target/i386/tcg/translate.c
9
--- a/tcg/optimize.c
13
+++ b/target/i386/tcg/translate.c
10
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
11
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
15
if (mod == 3) {
12
}
16
goto illegal_op;
13
return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
17
}
14
}
18
- a0 = tcg_temp_local_new();
15
- return false;
19
- t0 = tcg_temp_local_new();
16
+ return finish_folding(ctx, op);
20
+ a0 = s->A0;
21
+ t0 = s->T0;
22
label1 = gen_new_label();
23
24
- tcg_gen_mov_tl(a0, s->A0);
25
- tcg_gen_mov_tl(t0, s->T0);
26
-
27
gen_set_label(label1);
28
t1 = tcg_temp_new();
29
t2 = tcg_temp_new();
30
@@ -XXX,XX +XXX,XX @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
31
tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
32
33
tcg_temp_free(t2);
34
- tcg_temp_free(a0);
35
tcg_gen_neg_tl(s->T0, t0);
36
- tcg_temp_free(t0);
37
} else {
38
tcg_gen_neg_tl(s->T0, s->T0);
39
if (mod != 3) {
40
@@ -XXX,XX +XXX,XX @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
41
#endif
42
{
43
TCGLabel *label1;
44
- TCGv t0, t1, t2, a0;
45
+ TCGv t0, t1, t2;
46
47
if (!PE(s) || VM86(s))
48
goto illegal_op;
49
- t0 = tcg_temp_local_new();
50
- t1 = tcg_temp_local_new();
51
- t2 = tcg_temp_local_new();
52
+ t0 = tcg_temp_new();
53
+ t1 = tcg_temp_new();
54
+ t2 = tcg_temp_new();
55
ot = MO_16;
56
modrm = x86_ldub_code(env, s);
57
reg = (modrm >> 3) & 7;
58
@@ -XXX,XX +XXX,XX @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
59
if (mod != 3) {
60
gen_lea_modrm(env, s, modrm);
61
gen_op_ld_v(s, ot, t0, s->A0);
62
- a0 = tcg_temp_local_new();
63
- tcg_gen_mov_tl(a0, s->A0);
64
} else {
65
gen_op_mov_v_reg(s, ot, t0, rm);
66
- a0 = NULL;
67
}
68
gen_op_mov_v_reg(s, ot, t1, reg);
69
tcg_gen_andi_tl(s->tmp0, t0, 3);
70
@@ -XXX,XX +XXX,XX @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
71
tcg_gen_movi_tl(t2, CC_Z);
72
gen_set_label(label1);
73
if (mod != 3) {
74
- gen_op_st_v(s, ot, t0, a0);
75
- tcg_temp_free(a0);
76
+ gen_op_st_v(s, ot, t0, s->A0);
77
} else {
78
gen_op_mov_reg_v(s, ot, rm, t0);
79
}
80
@@ -XXX,XX +XXX,XX @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
81
modrm = x86_ldub_code(env, s);
82
reg = ((modrm >> 3) & 7) | REX_R(s);
83
gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
84
- t0 = tcg_temp_local_new();
85
+ t0 = tcg_temp_new();
86
gen_update_cc_op(s);
87
if (b == 0x102) {
88
gen_helper_lar(t0, cpu_env, s->T0);
89
@@ -XXX,XX +XXX,XX @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
90
dc->tmp2_i32 = tcg_temp_new_i32();
91
dc->tmp3_i32 = tcg_temp_new_i32();
92
dc->tmp4 = tcg_temp_new();
93
- dc->cc_srcT = tcg_temp_local_new();
94
+ dc->cc_srcT = tcg_temp_new();
95
}
17
}
96
18
97
static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
19
static bool fold_exts(OptContext *ctx, TCGOp *op)
98
--
20
--
99
2.34.1
21
2.43.0
100
101
diff view generated by jsdifflib
1
Since tcg_temp_new_* is now identical, use those.
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Explicitly sign-extend z_mask instead of doing that manually.
2
3
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
---
6
target/arm/tcg/translate-sve.c | 6 +++---
7
tcg/optimize.c | 29 ++++++++++++-----------------
7
target/arm/tcg/translate.c | 6 +++---
8
1 file changed, 12 insertions(+), 17 deletions(-)
8
2 files changed, 6 insertions(+), 6 deletions(-)
9
9
10
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/arm/tcg/translate-sve.c
12
--- a/tcg/optimize.c
13
+++ b/target/arm/tcg/translate-sve.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
15
16
static bool fold_exts(OptContext *ctx, TCGOp *op)
17
{
18
- uint64_t s_mask_old, s_mask, z_mask, sign;
19
+ uint64_t s_mask_old, s_mask, z_mask;
20
bool type_change = false;
21
+ TempOptInfo *t1;
22
23
if (fold_const1(ctx, op)) {
15
return true;
24
return true;
16
}
25
}
17
26
18
- last = tcg_temp_local_new_i32();
27
- z_mask = arg_info(op->args[1])->z_mask;
19
+ last = tcg_temp_new_i32();
28
- s_mask = arg_info(op->args[1])->s_mask;
20
over = gen_new_label();
29
+ t1 = arg_info(op->args[1]);
21
30
+ z_mask = t1->z_mask;
22
find_last_active(s, last, esz, a->pg);
31
+ s_mask = t1->s_mask;
23
@@ -XXX,XX +XXX,XX @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
32
s_mask_old = s_mask;
24
tcg_temp_free_i64(t0);
33
25
} else {
34
switch (op->opc) {
26
TCGLabel *loop = gen_new_label();
35
CASE_OP_32_64(ext8s):
27
- TCGv_ptr tp, i = tcg_const_local_ptr(0);
36
- sign = INT8_MIN;
28
+ TCGv_ptr tp, i = tcg_const_ptr(0);
37
- z_mask = (uint8_t)z_mask;
29
38
+ s_mask |= INT8_MIN;
30
gen_set_label(loop);
39
+ z_mask = (int8_t)z_mask;
31
40
break;
32
@@ -XXX,XX +XXX,XX @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
41
CASE_OP_32_64(ext16s):
33
tcg_temp_free_i64(t0);
42
- sign = INT16_MIN;
34
} else {
43
- z_mask = (uint16_t)z_mask;
35
TCGLabel *loop = gen_new_label();
44
+ s_mask |= INT16_MIN;
36
- TCGv_ptr tp, i = tcg_const_local_ptr(0);
45
+ z_mask = (int16_t)z_mask;
37
+ TCGv_ptr tp, i = tcg_const_ptr(0);
46
break;
38
47
case INDEX_op_ext_i32_i64:
39
gen_set_label(loop);
48
type_change = true;
40
49
QEMU_FALLTHROUGH;
41
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
50
case INDEX_op_ext32s_i64:
42
index XXXXXXX..XXXXXXX 100644
51
- sign = INT32_MIN;
43
--- a/target/arm/tcg/translate.c
52
- z_mask = (uint32_t)z_mask;
44
+++ b/target/arm/tcg/translate.c
53
+ s_mask |= INT32_MIN;
45
@@ -XXX,XX +XXX,XX @@ static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
54
+ z_mask = (int32_t)z_mask;
46
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
55
break;
56
default:
57
g_assert_not_reached();
47
}
58
}
48
59
49
- addr = tcg_temp_local_new_i32();
60
- if (z_mask & sign) {
50
+ addr = tcg_temp_new_i32();
61
- z_mask |= sign;
51
load_reg_var(s, addr, a->rn);
62
- }
52
tcg_gen_addi_i32(addr, addr, a->imm);
63
- s_mask |= sign << 1;
53
64
-
54
@@ -XXX,XX +XXX,XX @@ static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
65
- ctx->z_mask = z_mask;
66
- ctx->s_mask = s_mask;
67
if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
55
return true;
68
return true;
56
}
69
}
57
70
58
- addr = tcg_temp_local_new_i32();
71
- return fold_masks(ctx, op);
59
+ addr = tcg_temp_new_i32();
72
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
60
load_reg_var(s, addr, a->rn);
73
}
61
tcg_gen_addi_i32(addr, addr, a->imm);
74
62
75
static bool fold_extu(OptContext *ctx, TCGOp *op)
63
@@ -XXX,XX +XXX,XX @@ static bool trans_LE(DisasContext *s, arg_LE *a)
64
* Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
65
* so that decr stays live after the brcondi.
66
*/
67
- TCGv_i32 decr = tcg_temp_local_new_i32();
68
+ TCGv_i32 decr = tcg_temp_new_i32();
69
TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
70
tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
71
tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
72
--
76
--
73
2.34.1
77
2.43.0
74
75
diff view generated by jsdifflib
1
Change the temps_in_use check to use assert not fprintf.
1
Avoid the use of the OptContext slots.
2
Move the assert for double-free before the check for count,
3
since that is the more immediate problem.
4
2
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
tcg/tcg.c | 12 +++++-------
6
tcg/optimize.c | 4 ++--
9
1 file changed, 5 insertions(+), 7 deletions(-)
7
1 file changed, 2 insertions(+), 2 deletions(-)
10
8
11
diff --git a/tcg/tcg.c b/tcg/tcg.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/tcg.c
11
--- a/tcg/optimize.c
14
+++ b/tcg/tcg.c
12
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ void tcg_temp_free_internal(TCGTemp *ts)
13
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
16
g_assert_not_reached();
14
g_assert_not_reached();
17
}
15
}
18
16
19
-#if defined(CONFIG_DEBUG_TCG)
17
- ctx->z_mask = z_mask;
20
- s->temps_in_use--;
18
if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
21
- if (s->temps_in_use < 0) {
19
return true;
22
- fprintf(stderr, "More temporaries freed than allocated!\n");
20
}
23
- }
21
- return fold_masks(ctx, op);
24
-#endif
25
-
26
tcg_debug_assert(ts->temp_allocated != 0);
27
ts->temp_allocated = 0;
28
29
+#if defined(CONFIG_DEBUG_TCG)
30
+ assert(s->temps_in_use > 0);
31
+ s->temps_in_use--;
32
+#endif
33
+
22
+
34
idx = temp_idx(ts);
23
+ return fold_masks_z(ctx, op, z_mask);
35
k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
24
}
36
set_bit(idx, s->free_temps[k].l);
25
26
static bool fold_mb(OptContext *ctx, TCGOp *op)
37
--
27
--
38
2.34.1
28
2.43.0
39
40
diff view generated by jsdifflib
1
Here we are creating a temp whose value needs to be replaced,
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
but always storing NULL into CPUState.plugin_mem_cbs.
3
Use tcg_constant_ptr(0) explicitly.
4
2
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
accel/tcg/plugin-gen.c | 8 ++------
6
tcg/optimize.c | 19 +++++++++++--------
9
1 file changed, 2 insertions(+), 6 deletions(-)
7
1 file changed, 11 insertions(+), 8 deletions(-)
10
8
11
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/plugin-gen.c
11
--- a/tcg/optimize.c
14
+++ b/accel/tcg/plugin-gen.c
12
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void inject_mem_disable_helper(struct qemu_plugin_insn *plugin_insn,
13
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
16
/* called before finishing a TB with exit_tb, goto_tb or goto_ptr */
14
17
void plugin_gen_disable_mem_helpers(void)
15
static bool fold_movcond(OptContext *ctx, TCGOp *op)
18
{
16
{
19
- TCGv_ptr ptr;
17
+ uint64_t z_mask, s_mask;
20
-
18
+ TempOptInfo *tt, *ft;
21
/*
19
int i;
22
* We could emit the clearing unconditionally and be done. However, this can
20
23
* be wasteful if for instance plugins don't track memory accesses, or if
21
/* If true and false values are the same, eliminate the cmp. */
24
@@ -XXX,XX +XXX,XX @@ void plugin_gen_disable_mem_helpers(void)
22
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
25
if (!tcg_ctx->plugin_tb->mem_helper) {
23
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
26
return;
27
}
24
}
28
- ptr = tcg_const_ptr(NULL);
25
29
- tcg_gen_st_ptr(ptr, cpu_env, offsetof(CPUState, plugin_mem_cbs) -
26
- ctx->z_mask = arg_info(op->args[3])->z_mask
30
- offsetof(ArchCPU, env));
27
- | arg_info(op->args[4])->z_mask;
31
- tcg_temp_free_ptr(ptr);
28
- ctx->s_mask = arg_info(op->args[3])->s_mask
32
+ tcg_gen_st_ptr(tcg_constant_ptr(NULL), cpu_env,
29
- & arg_info(op->args[4])->s_mask;
33
+ offsetof(CPUState, plugin_mem_cbs) - offsetof(ArchCPU, env));
30
+ tt = arg_info(op->args[3]);
31
+ ft = arg_info(op->args[4]);
32
+ z_mask = tt->z_mask | ft->z_mask;
33
+ s_mask = tt->s_mask & ft->s_mask;
34
35
- if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
36
- uint64_t tv = arg_info(op->args[3])->val;
37
- uint64_t fv = arg_info(op->args[4])->val;
38
+ if (ti_is_const(tt) && ti_is_const(ft)) {
39
+ uint64_t tv = ti_const_val(tt);
40
+ uint64_t fv = ti_const_val(ft);
41
TCGOpcode opc, negopc = 0;
42
TCGCond cond = op->args[5];
43
44
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
45
}
46
}
47
}
48
- return false;
49
+
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
34
}
51
}
35
52
36
static void plugin_gen_tb_udata(const struct qemu_plugin_tb *ptb,
53
static bool fold_mul(OptContext *ctx, TCGOp *op)
37
--
54
--
38
2.34.1
55
2.43.0
39
40
diff view generated by jsdifflib
1
Since tcg_temp_new_* is now identical, use those.
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
3
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
3
---
7
target/hexagon/idef-parser/README.rst | 4 ++--
4
tcg/optimize.c | 6 +++---
8
target/hexagon/gen_tcg.h | 4 ++--
5
1 file changed, 3 insertions(+), 3 deletions(-)
9
target/hexagon/genptr.c | 16 ++++++++--------
10
target/hexagon/idef-parser/parser-helpers.c | 4 ++--
11
target/hexagon/translate.c | 2 +-
12
target/hexagon/README | 8 ++++----
13
target/hexagon/gen_tcg_funcs.py | 18 +++++++-----------
14
7 files changed, 26 insertions(+), 30 deletions(-)
15
6
16
diff --git a/target/hexagon/idef-parser/README.rst b/target/hexagon/idef-parser/README.rst
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
17
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
18
--- a/target/hexagon/idef-parser/README.rst
9
--- a/tcg/optimize.c
19
+++ b/target/hexagon/idef-parser/README.rst
10
+++ b/tcg/optimize.c
20
@@ -XXX,XX +XXX,XX @@ generators the previous declarations are mapped to
11
@@ -XXX,XX +XXX,XX @@ static bool fold_mul(OptContext *ctx, TCGOp *op)
21
12
fold_xi_to_x(ctx, op, 1)) {
22
::
13
return true;
23
14
}
24
- int var1; -> TCGv_i32 var1 = tcg_temp_local_new_i32();
15
- return false;
25
+ int var1; -> TCGv_i32 var1 = tcg_temp_new_i32();
16
+ return finish_folding(ctx, op);
26
27
- int var2 = 0; -> TCGv_i32 var1 = tcg_temp_local_new_i32();
28
+ int var2 = 0; -> TCGv_i32 var1 = tcg_temp_new_i32();
29
tcg_gen_movi_i32(j, ((int64_t) 0ULL));
30
31
which are later automatically freed at the end of the function they're declared
32
diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/hexagon/gen_tcg.h
35
+++ b/target/hexagon/gen_tcg.h
36
@@ -XXX,XX +XXX,XX @@
37
*/
38
#define fGEN_TCG_PRED_LOAD(GET_EA, PRED, SIZE, SIGN) \
39
do { \
40
- TCGv LSB = tcg_temp_local_new(); \
41
+ TCGv LSB = tcg_temp_new(); \
42
TCGLabel *label = gen_new_label(); \
43
tcg_gen_movi_tl(EA, 0); \
44
PRED; \
45
@@ -XXX,XX +XXX,XX @@
46
/* Predicated loads into a register pair */
47
#define fGEN_TCG_PRED_LOAD_PAIR(GET_EA, PRED) \
48
do { \
49
- TCGv LSB = tcg_temp_local_new(); \
50
+ TCGv LSB = tcg_temp_new(); \
51
TCGLabel *label = gen_new_label(); \
52
tcg_gen_movi_tl(EA, 0); \
53
PRED; \
54
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
55
index XXXXXXX..XXXXXXX 100644
56
--- a/target/hexagon/genptr.c
57
+++ b/target/hexagon/genptr.c
58
@@ -XXX,XX +XXX,XX @@ static void gen_cond_call(DisasContext *ctx, TCGv pred,
59
TCGCond cond, int pc_off)
60
{
61
TCGv next_PC;
62
- TCGv lsb = tcg_temp_local_new();
63
+ TCGv lsb = tcg_temp_new();
64
TCGLabel *skip = gen_new_label();
65
tcg_gen_andi_tl(lsb, pred, 1);
66
gen_write_new_pc_pcrel(ctx, pc_off, cond, lsb);
67
@@ -XXX,XX +XXX,XX @@ static void gen_cond_call(DisasContext *ctx, TCGv pred,
68
69
static void gen_endloop0(DisasContext *ctx)
70
{
71
- TCGv lpcfg = tcg_temp_local_new();
72
+ TCGv lpcfg = tcg_temp_new();
73
74
GET_USR_FIELD(USR_LPCFG, lpcfg);
75
76
@@ -XXX,XX +XXX,XX @@ static void gen_sar(TCGv dst, TCGv src, TCGv shift_amt)
77
/* Bidirectional shift right with saturation */
78
static void gen_asr_r_r_sat(TCGv RdV, TCGv RsV, TCGv RtV)
79
{
80
- TCGv shift_amt = tcg_temp_local_new();
81
+ TCGv shift_amt = tcg_temp_new();
82
TCGLabel *positive = gen_new_label();
83
TCGLabel *done = gen_new_label();
84
85
@@ -XXX,XX +XXX,XX @@ static void gen_asr_r_r_sat(TCGv RdV, TCGv RsV, TCGv RtV)
86
/* Bidirectional shift left with saturation */
87
static void gen_asl_r_r_sat(TCGv RdV, TCGv RsV, TCGv RtV)
88
{
89
- TCGv shift_amt = tcg_temp_local_new();
90
+ TCGv shift_amt = tcg_temp_new();
91
TCGLabel *positive = gen_new_label();
92
TCGLabel *done = gen_new_label();
93
94
@@ -XXX,XX +XXX,XX @@ static void gen_log_vreg_write(DisasContext *ctx, intptr_t srcoff, int num,
95
intptr_t dstoff;
96
97
if (is_predicated) {
98
- TCGv cancelled = tcg_temp_local_new();
99
+ TCGv cancelled = tcg_temp_new();
100
label_end = gen_new_label();
101
102
/* Don't do anything if the slot was cancelled */
103
@@ -XXX,XX +XXX,XX @@ static void gen_log_qreg_write(intptr_t srcoff, int num, int vnew,
104
intptr_t dstoff;
105
106
if (is_predicated) {
107
- TCGv cancelled = tcg_temp_local_new();
108
+ TCGv cancelled = tcg_temp_new();
109
label_end = gen_new_label();
110
111
/* Don't do anything if the slot was cancelled */
112
@@ -XXX,XX +XXX,XX @@ void gen_satu_i64_ovfl(TCGv ovfl, TCGv_i64 dest, TCGv_i64 source, int width)
113
/* Implements the fADDSAT64 macro in TCG */
114
void gen_add_sat_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
115
{
116
- TCGv_i64 sum = tcg_temp_local_new_i64();
117
+ TCGv_i64 sum = tcg_temp_new_i64();
118
TCGv_i64 xor = tcg_temp_new_i64();
119
TCGv_i64 cond1 = tcg_temp_new_i64();
120
- TCGv_i64 cond2 = tcg_temp_local_new_i64();
121
+ TCGv_i64 cond2 = tcg_temp_new_i64();
122
TCGv_i64 cond3 = tcg_temp_new_i64();
123
TCGv_i64 mask = tcg_constant_i64(0x8000000000000000ULL);
124
TCGv_i64 max_pos = tcg_constant_i64(0x7FFFFFFFFFFFFFFFLL);
125
diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/target/hexagon/idef-parser/parser-helpers.c
128
+++ b/target/hexagon/idef-parser/parser-helpers.c
129
@@ -XXX,XX +XXX,XX @@ HexValue gen_tmp_local(Context *c,
130
rvalue.is_manual = false;
131
rvalue.tmp.index = c->inst.tmp_count;
132
OUT(c, locp, "TCGv_i", &bit_width, " tmp_", &c->inst.tmp_count,
133
- " = tcg_temp_local_new_i", &bit_width, "();\n");
134
+ " = tcg_temp_new_i", &bit_width, "();\n");
135
c->inst.tmp_count++;
136
return rvalue;
137
}
17
}
138
@@ -XXX,XX +XXX,XX @@ void gen_varid_allocate(Context *c,
18
139
new_var.signedness = signedness;
19
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
140
20
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
141
EMIT_HEAD(c, "TCGv_%s %s", bit_suffix, varid->var.name->str);
21
fold_xi_to_i(ctx, op, 0)) {
142
- EMIT_HEAD(c, " = tcg_temp_local_new_%s();\n", bit_suffix);
22
return true;
143
+ EMIT_HEAD(c, " = tcg_temp_new_%s();\n", bit_suffix);
23
}
144
g_array_append_val(c->inst.allocated, new_var);
24
- return false;
25
+ return finish_folding(ctx, op);
145
}
26
}
146
27
147
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
28
static bool fold_multiply2(OptContext *ctx, TCGOp *op)
148
index XXXXXXX..XXXXXXX 100644
29
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
149
--- a/target/hexagon/translate.c
30
tcg_opt_gen_movi(ctx, op2, rh, h);
150
+++ b/target/hexagon/translate.c
31
return true;
151
@@ -XXX,XX +XXX,XX @@ void process_store(DisasContext *ctx, int slot_num)
152
tcg_temp_free(cancelled);
153
}
32
}
154
{
33
- return false;
155
- TCGv address = tcg_temp_local_new();
34
+ return finish_folding(ctx, op);
156
+ TCGv address = tcg_temp_new();
35
}
157
tcg_gen_mov_tl(address, hex_store_addr[slot_num]);
36
158
37
static bool fold_nand(OptContext *ctx, TCGOp *op)
159
/*
160
diff --git a/target/hexagon/README b/target/hexagon/README
161
index XXXXXXX..XXXXXXX 100644
162
--- a/target/hexagon/README
163
+++ b/target/hexagon/README
164
@@ -XXX,XX +XXX,XX @@ tcg_funcs_generated.c.inc
165
Insn *insn,
166
Packet *pkt)
167
{
168
- TCGv RdV = tcg_temp_local_new();
169
+ TCGv RdV = tcg_temp_new();
170
const int RdN = insn->regno[0];
171
TCGv RsV = hex_gpr[insn->regno[1]];
172
TCGv RtV = hex_gpr[insn->regno[2]];
173
@@ -XXX,XX +XXX,XX @@ istruction.
174
const int VdN = insn->regno[0];
175
const intptr_t VdV_off =
176
ctx_future_vreg_off(ctx, VdN, 1, true);
177
- TCGv_ptr VdV = tcg_temp_local_new_ptr();
178
+ TCGv_ptr VdV = tcg_temp_new_ptr();
179
tcg_gen_addi_ptr(VdV, cpu_env, VdV_off);
180
const int VuN = insn->regno[1];
181
const intptr_t VuV_off =
182
vreg_src_off(ctx, VuN);
183
- TCGv_ptr VuV = tcg_temp_local_new_ptr();
184
+ TCGv_ptr VuV = tcg_temp_new_ptr();
185
const int VvN = insn->regno[2];
186
const intptr_t VvV_off =
187
vreg_src_off(ctx, VvN);
188
- TCGv_ptr VvV = tcg_temp_local_new_ptr();
189
+ TCGv_ptr VvV = tcg_temp_new_ptr();
190
tcg_gen_addi_ptr(VuV, cpu_env, VuV_off);
191
tcg_gen_addi_ptr(VvV, cpu_env, VvV_off);
192
TCGv slot = tcg_constant_tl(insn->slot);
193
diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py
194
index XXXXXXX..XXXXXXX 100755
195
--- a/target/hexagon/gen_tcg_funcs.py
196
+++ b/target/hexagon/gen_tcg_funcs.py
197
@@ -XXX,XX +XXX,XX @@
198
## Helpers for gen_tcg_func
199
##
200
def gen_decl_ea_tcg(f, tag):
201
- if ('A_CONDEXEC' in hex_common.attribdict[tag] or
202
- 'A_LOAD' in hex_common.attribdict[tag]):
203
- f.write(" TCGv EA = tcg_temp_local_new();\n")
204
- else:
205
- f.write(" TCGv EA = tcg_temp_new();\n")
206
+ f.write(" TCGv EA = tcg_temp_new();\n")
207
208
def gen_free_ea_tcg(f):
209
f.write(" tcg_temp_free(EA);\n")
210
211
def genptr_decl_pair_writable(f, tag, regtype, regid, regno):
212
regN="%s%sN" % (regtype,regid)
213
- f.write(" TCGv_i64 %s%sV = tcg_temp_local_new_i64();\n" % \
214
+ f.write(" TCGv_i64 %s%sV = tcg_temp_new_i64();\n" % \
215
(regtype, regid))
216
if (regtype == "C"):
217
f.write(" const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
218
@@ -XXX,XX +XXX,XX @@ def genptr_decl_pair_writable(f, tag, regtype, regid, regno):
219
220
def genptr_decl_writable(f, tag, regtype, regid, regno):
221
regN="%s%sN" % (regtype,regid)
222
- f.write(" TCGv %s%sV = tcg_temp_local_new();\n" % \
223
+ f.write(" TCGv %s%sV = tcg_temp_new();\n" % \
224
(regtype, regid))
225
if (regtype == "C"):
226
f.write(" const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
227
@@ -XXX,XX +XXX,XX @@ def genptr_decl(f, tag, regtype, regid, regno):
228
regN="%s%sN" % (regtype,regid)
229
if (regtype == "R"):
230
if (regid in {"ss", "tt"}):
231
- f.write(" TCGv_i64 %s%sV = tcg_temp_local_new_i64();\n" % \
232
+ f.write(" TCGv_i64 %s%sV = tcg_temp_new_i64();\n" % \
233
(regtype, regid))
234
f.write(" const int %s = insn->regno[%d];\n" % \
235
(regN, regno))
236
@@ -XXX,XX +XXX,XX @@ def genptr_decl(f, tag, regtype, regid, regno):
237
print("Bad register parse: ", regtype, regid)
238
elif (regtype == "C"):
239
if (regid == "ss"):
240
- f.write(" TCGv_i64 %s%sV = tcg_temp_local_new_i64();\n" % \
241
+ f.write(" TCGv_i64 %s%sV = tcg_temp_new_i64();\n" % \
242
(regtype, regid))
243
f.write(" const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
244
(regN, regno))
245
elif (regid == "dd"):
246
genptr_decl_pair_writable(f, tag, regtype, regid, regno)
247
elif (regid == "s"):
248
- f.write(" TCGv %s%sV = tcg_temp_local_new();\n" % \
249
+ f.write(" TCGv %s%sV = tcg_temp_new();\n" % \
250
(regtype, regid))
251
f.write(" const int %s%sN = insn->regno[%d] + HEX_REG_SA0;\n" % \
252
(regtype, regid, regno))
253
@@ -XXX,XX +XXX,XX @@ def genptr_dst_write_opn(f,regtype, regid, tag):
254
## We produce:
255
## static void generate_A2_add(DisasContext *ctx)
256
## {
257
-## TCGv RdV = tcg_temp_local_new();
258
+## TCGv RdV = tcg_temp_new();
259
## const int RdN = insn->regno[0];
260
## TCGv RsV = hex_gpr[insn->regno[1]];
261
## TCGv RtV = hex_gpr[insn->regno[2]];
262
--
38
--
263
2.34.1
39
2.43.0
264
265
diff view generated by jsdifflib
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
1
Avoid the use of the OptContext slots.
2
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
---
5
include/exec/gen-icount.h | 4 +---
6
tcg/optimize.c | 8 +++++---
6
1 file changed, 1 insertion(+), 3 deletions(-)
7
1 file changed, 5 insertions(+), 3 deletions(-)
7
8
8
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
10
--- a/include/exec/gen-icount.h
11
--- a/tcg/optimize.c
11
+++ b/include/exec/gen-icount.h
12
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static TCGOp *icount_start_insn;
13
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
13
14
14
static inline void gen_io_start(void)
15
static bool fold_nand(OptContext *ctx, TCGOp *op)
15
{
16
{
16
- TCGv_i32 tmp = tcg_const_i32(1);
17
+ uint64_t s_mask;
17
- tcg_gen_st_i32(tmp, cpu_env,
18
+
18
+ tcg_gen_st_i32(tcg_constant_i32(1), cpu_env,
19
if (fold_const2_commutative(ctx, op) ||
19
offsetof(ArchCPU, parent_obj.can_do_io) -
20
fold_xi_to_not(ctx, op, -1)) {
20
offsetof(ArchCPU, env));
21
return true;
21
- tcg_temp_free_i32(tmp);
22
}
23
24
- ctx->s_mask = arg_info(op->args[1])->s_mask
25
- & arg_info(op->args[2])->s_mask;
26
- return false;
27
+ s_mask = arg_info(op->args[1])->s_mask
28
+ & arg_info(op->args[2])->s_mask;
29
+ return fold_masks_s(ctx, op, s_mask);
22
}
30
}
23
31
24
static inline void gen_tb_start(const TranslationBlock *tb)
32
static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
25
--
33
--
26
2.34.1
34
2.43.0
27
28
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Avoid the use of the OptContext slots.
2
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-26-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/avr/cpu.c | 3 ++-
6
tcg/optimize.c | 9 ++-------
9
1 file changed, 2 insertions(+), 1 deletion(-)
7
1 file changed, 2 insertions(+), 7 deletions(-)
10
8
11
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/target/avr/cpu.c
11
--- a/tcg/optimize.c
14
+++ b/target/avr/cpu.c
12
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void avr_cpu_synchronize_from_tb(CPUState *cs,
13
@@ -XXX,XX +XXX,XX @@ static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
16
AVRCPU *cpu = AVR_CPU(cs);
14
{
17
CPUAVRState *env = &cpu->env;
15
/* Set to 1 all bits to the left of the rightmost. */
18
16
uint64_t z_mask = arg_info(op->args[1])->z_mask;
19
- env->pc_w = tb_pc(tb) / 2; /* internally PC points to words */
17
- ctx->z_mask = -(z_mask & -z_mask);
20
+ tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
18
+ z_mask = -(z_mask & -z_mask);
21
+ env->pc_w = tb->pc / 2; /* internally PC points to words */
19
20
- /*
21
- * Because of fold_sub_to_neg, we want to always return true,
22
- * via finish_folding.
23
- */
24
- finish_folding(ctx, op);
25
- return true;
26
+ return fold_masks_z(ctx, op, z_mask);
22
}
27
}
23
28
24
static void avr_restore_state_to_opc(CPUState *cs,
29
static bool fold_neg(OptContext *ctx, TCGOp *op)
25
--
30
--
26
2.34.1
31
2.43.0
27
28
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Avoid the use of the OptContext slots.
2
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
6
Message-Id: <20230227135202.9710-25-anjo@rev.ng>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
target/hexagon/cpu.c | 4 +++-
6
tcg/optimize.c | 8 +++++---
10
1 file changed, 3 insertions(+), 1 deletion(-)
7
1 file changed, 5 insertions(+), 3 deletions(-)
11
8
12
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/target/hexagon/cpu.c
11
--- a/tcg/optimize.c
15
+++ b/target/hexagon/cpu.c
12
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
17
#include "qapi/error.h"
14
18
#include "hw/qdev-properties.h"
15
static bool fold_nor(OptContext *ctx, TCGOp *op)
19
#include "fpu/softfloat-helpers.h"
20
+#include "tcg/tcg.h"
21
22
static void hexagon_v67_cpu_init(Object *obj)
23
{
16
{
24
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_synchronize_from_tb(CPUState *cs,
17
+ uint64_t s_mask;
25
{
18
+
26
HexagonCPU *cpu = HEXAGON_CPU(cs);
19
if (fold_const2_commutative(ctx, op) ||
27
CPUHexagonState *env = &cpu->env;
20
fold_xi_to_not(ctx, op, 0)) {
28
- env->gpr[HEX_REG_PC] = tb_pc(tb);
21
return true;
29
+ tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
22
}
30
+ env->gpr[HEX_REG_PC] = tb->pc;
23
24
- ctx->s_mask = arg_info(op->args[1])->s_mask
25
- & arg_info(op->args[2])->s_mask;
26
- return false;
27
+ s_mask = arg_info(op->args[1])->s_mask
28
+ & arg_info(op->args[2])->s_mask;
29
+ return fold_masks_s(ctx, op, s_mask);
31
}
30
}
32
31
33
static bool hexagon_cpu_has_work(CPUState *cs)
32
static bool fold_not(OptContext *ctx, TCGOp *op)
34
--
33
--
35
2.34.1
34
2.43.0
36
37
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Avoid the use of the OptContext slots.
2
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-22-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/loongarch/cpu.c | 6 ++++--
6
tcg/optimize.c | 7 +------
9
1 file changed, 4 insertions(+), 2 deletions(-)
7
1 file changed, 1 insertion(+), 6 deletions(-)
10
8
11
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/target/loongarch/cpu.c
11
--- a/tcg/optimize.c
14
+++ b/target/loongarch/cpu.c
12
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
16
#include "fpu/softfloat-helpers.h"
14
if (fold_const1(ctx, op)) {
17
#include "cpu-csr.h"
15
return true;
18
#include "sysemu/reset.h"
16
}
19
+#include "tcg/tcg.h"
17
-
20
18
- ctx->s_mask = arg_info(op->args[1])->s_mask;
21
const char * const regnames[32] = {
19
-
22
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
20
- /* Because of fold_to_not, we want to always return true, via finish. */
23
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_synchronize_from_tb(CPUState *cs,
21
- finish_folding(ctx, op);
24
LoongArchCPU *cpu = LOONGARCH_CPU(cs);
22
- return true;
25
CPULoongArchState *env = &cpu->env;
23
+ return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask);
26
27
- env->pc = tb_pc(tb);
28
+ tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
29
+ env->pc = tb->pc;
30
}
24
}
31
25
32
static void loongarch_restore_state_to_opc(CPUState *cs,
26
static bool fold_or(OptContext *ctx, TCGOp *op)
33
@@ -XXX,XX +XXX,XX @@ static ObjectClass *loongarch_cpu_class_by_name(const char *cpu_model)
34
35
oc = object_class_by_name(cpu_model);
36
if (!oc) {
37
- g_autofree char *typename
38
+ g_autofree char *typename
39
= g_strdup_printf(LOONGARCH_CPU_TYPE_NAME("%s"), cpu_model);
40
oc = object_class_by_name(typename);
41
if (!oc) {
42
--
27
--
43
2.34.1
28
2.43.0
44
45
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-21-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/microblaze/cpu.c | 4 +++-
6
tcg/optimize.c | 13 ++++++++-----
9
1 file changed, 3 insertions(+), 1 deletion(-)
7
1 file changed, 8 insertions(+), 5 deletions(-)
10
8
11
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/target/microblaze/cpu.c
11
--- a/tcg/optimize.c
14
+++ b/target/microblaze/cpu.c
12
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
16
#include "exec/exec-all.h"
14
17
#include "exec/gdbstub.h"
15
static bool fold_or(OptContext *ctx, TCGOp *op)
18
#include "fpu/softfloat-helpers.h"
19
+#include "tcg/tcg.h"
20
21
static const struct {
22
const char *name;
23
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_synchronize_from_tb(CPUState *cs,
24
{
16
{
25
MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
17
+ uint64_t z_mask, s_mask;
26
18
+ TempOptInfo *t1, *t2;
27
- cpu->env.pc = tb_pc(tb);
19
+
28
+ tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
20
if (fold_const2_commutative(ctx, op) ||
29
+ cpu->env.pc = tb->pc;
21
fold_xi_to_x(ctx, op, 0) ||
30
cpu->env.iflags = tb->flags & IFLAGS_TB_MASK;
22
fold_xx_to_x(ctx, op)) {
23
return true;
24
}
25
26
- ctx->z_mask = arg_info(op->args[1])->z_mask
27
- | arg_info(op->args[2])->z_mask;
28
- ctx->s_mask = arg_info(op->args[1])->s_mask
29
- & arg_info(op->args[2])->s_mask;
30
- return fold_masks(ctx, op);
31
+ t1 = arg_info(op->args[1]);
32
+ t2 = arg_info(op->args[2]);
33
+ z_mask = t1->z_mask | t2->z_mask;
34
+ s_mask = t1->s_mask & t2->s_mask;
35
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
31
}
36
}
32
37
38
static bool fold_orc(OptContext *ctx, TCGOp *op)
33
--
39
--
34
2.34.1
40
2.43.0
35
36
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Avoid the use of the OptContext slots.
2
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-24-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/hppa/cpu.c | 8 +++++---
6
tcg/optimize.c | 8 +++++---
9
1 file changed, 5 insertions(+), 3 deletions(-)
7
1 file changed, 5 insertions(+), 3 deletions(-)
10
8
11
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/target/hppa/cpu.c
11
--- a/tcg/optimize.c
14
+++ b/target/hppa/cpu.c
12
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
16
#include "qemu/module.h"
14
17
#include "exec/exec-all.h"
15
static bool fold_orc(OptContext *ctx, TCGOp *op)
18
#include "fpu/softfloat.h"
19
-
20
+#include "tcg/tcg.h"
21
22
static void hppa_cpu_set_pc(CPUState *cs, vaddr value)
23
{
16
{
24
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_synchronize_from_tb(CPUState *cs,
17
+ uint64_t s_mask;
25
{
26
HPPACPU *cpu = HPPA_CPU(cs);
27
28
+ tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
29
+
18
+
30
#ifdef CONFIG_USER_ONLY
19
if (fold_const2(ctx, op) ||
31
- cpu->env.iaoq_f = tb_pc(tb);
20
fold_xx_to_i(ctx, op, -1) ||
32
+ cpu->env.iaoq_f = tb->pc;
21
fold_xi_to_x(ctx, op, -1) ||
33
cpu->env.iaoq_b = tb->cs_base;
22
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
34
#else
23
return true;
35
/* Recover the IAOQ values from the GVA + PRIV. */
36
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_synchronize_from_tb(CPUState *cs,
37
int32_t diff = cs_base;
38
39
cpu->env.iasq_f = iasq_f;
40
- cpu->env.iaoq_f = (tb_pc(tb) & ~iasq_f) + priv;
41
+ cpu->env.iaoq_f = (tb->pc & ~iasq_f) + priv;
42
if (diff) {
43
cpu->env.iaoq_b = cpu->env.iaoq_f + diff;
44
}
24
}
25
26
- ctx->s_mask = arg_info(op->args[1])->s_mask
27
- & arg_info(op->args[2])->s_mask;
28
- return false;
29
+ s_mask = arg_info(op->args[1])->s_mask
30
+ & arg_info(op->args[2])->s_mask;
31
+ return fold_masks_s(ctx, op, s_mask);
32
}
33
34
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
45
--
35
--
46
2.34.1
36
2.43.0
47
48
diff view generated by jsdifflib
1
Reusing TEMP_TB interferes with detecting whether the
1
Avoid the use of the OptContext slots.
2
temp can be adjusted to TEMP_EBB.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Be careful not to call fold_masks_zs when the memory operation
4
is wide enough to require multiple outputs, so split into two
5
functions: fold_qemu_ld_1reg and fold_qemu_ld_2reg.
6
7
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
9
---
7
include/tcg/tcg.h | 2 +-
10
tcg/optimize.c | 26 +++++++++++++++++++++-----
8
tcg/tcg.c | 101 ++++++++++++++++++++++++----------------------
11
1 file changed, 21 insertions(+), 5 deletions(-)
9
2 files changed, 53 insertions(+), 50 deletions(-)
10
12
11
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
13
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
13
--- a/include/tcg/tcg.h
15
--- a/tcg/optimize.c
14
+++ b/include/tcg/tcg.h
16
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
17
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
16
#endif
18
return fold_masks_s(ctx, op, s_mask);
17
19
}
18
GHashTable *const_table[TCG_TYPE_COUNT];
20
19
- TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
21
-static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
20
+ TCGTempSet free_temps[TCG_TYPE_COUNT];
22
+static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
21
TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
22
23
QTAILQ_HEAD(, TCGOp) ops, free_ops;
24
diff --git a/tcg/tcg.c b/tcg/tcg.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/tcg.c
27
+++ b/tcg/tcg.c
28
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
29
TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
30
{
23
{
31
TCGContext *s = tcg_ctx;
24
const TCGOpDef *def = &tcg_op_defs[op->opc];
32
- bool temp_local = kind == TEMP_TB;
25
MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
33
TCGTemp *ts;
26
MemOp mop = get_memop(oi);
34
- int idx, k;
27
int width = 8 * memop_size(mop);
35
+ int n;
28
+ uint64_t z_mask = -1, s_mask = 0;
36
29
37
- k = type + (temp_local ? TCG_TYPE_COUNT : 0);
30
if (width < 64) {
38
- idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
31
if (mop & MO_SIGN) {
39
- if (idx < TCG_MAX_TEMPS) {
32
- ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
40
- /* There is already an available temp with the right type. */
33
+ s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
41
- clear_bit(idx, s->free_temps[k].l);
34
} else {
42
+ if (kind == TEMP_EBB) {
35
- ctx->z_mask = MAKE_64BIT_MASK(0, width);
43
+ int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
36
+ z_mask = MAKE_64BIT_MASK(0, width);
44
45
- ts = &s->temps[idx];
46
- ts->temp_allocated = 1;
47
- tcg_debug_assert(ts->base_type == type);
48
- tcg_debug_assert(ts->kind == kind);
49
- } else {
50
- int i, n;
51
+ if (idx < TCG_MAX_TEMPS) {
52
+ /* There is already an available temp with the right type. */
53
+ clear_bit(idx, s->free_temps[type].l);
54
55
- switch (type) {
56
- case TCG_TYPE_I32:
57
- case TCG_TYPE_V64:
58
- case TCG_TYPE_V128:
59
- case TCG_TYPE_V256:
60
- n = 1;
61
- break;
62
- case TCG_TYPE_I64:
63
- n = 64 / TCG_TARGET_REG_BITS;
64
- break;
65
- case TCG_TYPE_I128:
66
- n = 128 / TCG_TARGET_REG_BITS;
67
- break;
68
- default:
69
- g_assert_not_reached();
70
+ ts = &s->temps[idx];
71
+ ts->temp_allocated = 1;
72
+ tcg_debug_assert(ts->base_type == type);
73
+ tcg_debug_assert(ts->kind == kind);
74
+ goto done;
75
}
76
+ } else {
77
+ tcg_debug_assert(kind == TEMP_TB);
78
+ }
79
80
- ts = tcg_temp_alloc(s);
81
- ts->base_type = type;
82
- ts->temp_allocated = 1;
83
- ts->kind = kind;
84
+ switch (type) {
85
+ case TCG_TYPE_I32:
86
+ case TCG_TYPE_V64:
87
+ case TCG_TYPE_V128:
88
+ case TCG_TYPE_V256:
89
+ n = 1;
90
+ break;
91
+ case TCG_TYPE_I64:
92
+ n = 64 / TCG_TARGET_REG_BITS;
93
+ break;
94
+ case TCG_TYPE_I128:
95
+ n = 128 / TCG_TARGET_REG_BITS;
96
+ break;
97
+ default:
98
+ g_assert_not_reached();
99
+ }
100
101
- if (n == 1) {
102
- ts->type = type;
103
- } else {
104
- ts->type = TCG_TYPE_REG;
105
+ ts = tcg_temp_alloc(s);
106
+ ts->base_type = type;
107
+ ts->temp_allocated = 1;
108
+ ts->kind = kind;
109
110
- for (i = 1; i < n; ++i) {
111
- TCGTemp *ts2 = tcg_temp_alloc(s);
112
+ if (n == 1) {
113
+ ts->type = type;
114
+ } else {
115
+ ts->type = TCG_TYPE_REG;
116
117
- tcg_debug_assert(ts2 == ts + i);
118
- ts2->base_type = type;
119
- ts2->type = TCG_TYPE_REG;
120
- ts2->temp_allocated = 1;
121
- ts2->temp_subindex = i;
122
- ts2->kind = kind;
123
- }
124
+ for (int i = 1; i < n; ++i) {
125
+ TCGTemp *ts2 = tcg_temp_alloc(s);
126
+
127
+ tcg_debug_assert(ts2 == ts + i);
128
+ ts2->base_type = type;
129
+ ts2->type = TCG_TYPE_REG;
130
+ ts2->temp_allocated = 1;
131
+ ts2->temp_subindex = i;
132
+ ts2->kind = kind;
133
}
37
}
134
}
38
}
135
39
136
+ done:
40
/* Opcodes that touch guest memory stop the mb optimization. */
137
#if defined(CONFIG_DEBUG_TCG)
41
ctx->prev_mb = NULL;
138
s->temps_in_use++;
42
- return false;
139
#endif
43
+
140
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
44
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
141
void tcg_temp_free_internal(TCGTemp *ts)
45
+}
142
{
46
+
143
TCGContext *s = tcg_ctx;
47
+static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
144
- int k, idx;
48
+{
145
49
+ /* Opcodes that touch guest memory stop the mb optimization. */
146
switch (ts->kind) {
50
+ ctx->prev_mb = NULL;
147
case TEMP_CONST:
51
+ return finish_folding(ctx, op);
148
@@ -XXX,XX +XXX,XX @@ void tcg_temp_free_internal(TCGTemp *ts)
149
s->temps_in_use--;
150
#endif
151
152
- idx = temp_idx(ts);
153
- k = ts->base_type + (ts->kind == TEMP_EBB ? 0 : TCG_TYPE_COUNT);
154
- set_bit(idx, s->free_temps[k].l);
155
+ if (ts->kind == TEMP_EBB) {
156
+ int idx = temp_idx(ts);
157
+ set_bit(idx, s->free_temps[ts->base_type].l);
158
+ }
159
}
52
}
160
53
161
TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
54
static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
55
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
56
break;
57
case INDEX_op_qemu_ld_a32_i32:
58
case INDEX_op_qemu_ld_a64_i32:
59
+ done = fold_qemu_ld_1reg(&ctx, op);
60
+ break;
61
case INDEX_op_qemu_ld_a32_i64:
62
case INDEX_op_qemu_ld_a64_i64:
63
+ if (TCG_TARGET_REG_BITS == 64) {
64
+ done = fold_qemu_ld_1reg(&ctx, op);
65
+ break;
66
+ }
67
+ QEMU_FALLTHROUGH;
68
case INDEX_op_qemu_ld_a32_i128:
69
case INDEX_op_qemu_ld_a64_i128:
70
- done = fold_qemu_ld(&ctx, op);
71
+ done = fold_qemu_ld_2reg(&ctx, op);
72
break;
73
case INDEX_op_qemu_st8_a32_i32:
74
case INDEX_op_qemu_st8_a64_i32:
162
--
75
--
163
2.34.1
76
2.43.0
164
165
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Stores have no output operands, and so need no further work.
2
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-5-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
accel/tcg/internal.h | 10 ++++----
6
tcg/optimize.c | 11 +++++------
9
accel/tcg/tb-jmp-cache.h | 48 +++++++++++++++++++--------------------
7
1 file changed, 5 insertions(+), 6 deletions(-)
10
accel/tcg/cpu-exec.c | 8 +++----
11
accel/tcg/perf.c | 2 +-
12
accel/tcg/tb-maint.c | 8 +++----
13
accel/tcg/translate-all.c | 14 ++++++------
14
6 files changed, 44 insertions(+), 46 deletions(-)
15
8
16
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
17
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
18
--- a/accel/tcg/internal.h
11
--- a/tcg/optimize.c
19
+++ b/accel/tcg/internal.h
12
+++ b/tcg/optimize.c
20
@@ -XXX,XX +XXX,XX @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
13
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
21
/* Return the current PC from CPU, which may be cached in TB. */
22
static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
23
{
14
{
24
-#if TARGET_TB_PCREL
15
/* Opcodes that touch guest memory stop the mb optimization. */
25
- return cpu->cc->get_pc(cpu);
16
ctx->prev_mb = NULL;
26
-#else
17
- return false;
27
- return tb_pc(tb);
18
+ return true;
28
-#endif
29
+ if (tb_cflags(tb) & CF_PCREL) {
30
+ return cpu->cc->get_pc(cpu);
31
+ } else {
32
+ return tb_pc(tb);
33
+ }
34
}
19
}
35
20
36
extern int64_t max_delay;
21
static bool fold_remainder(OptContext *ctx, TCGOp *op)
37
diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h
22
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
38
index XXXXXXX..XXXXXXX 100644
23
39
--- a/accel/tcg/tb-jmp-cache.h
24
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
40
+++ b/accel/tcg/tb-jmp-cache.h
25
remove_mem_copy_all(ctx);
41
@@ -XXX,XX +XXX,XX @@
26
- return false;
42
27
+ return true;
43
/*
28
}
44
* Accessed in parallel; all accesses to 'tb' must be atomic.
29
45
- * For TARGET_TB_PCREL, accesses to 'pc' must be protected by
30
switch (op->opc) {
46
- * a load_acquire/store_release to 'tb'.
31
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
47
+ * For CF_PCREL, accesses to 'pc' must be protected by a
32
g_assert_not_reached();
48
+ * load_acquire/store_release to 'tb'.
33
}
49
*/
34
remove_mem_copy_in(ctx, ofs, ofs + lm1);
50
struct CPUJumpCache {
35
- return false;
51
struct rcu_head rcu;
36
+ return true;
52
struct {
53
TranslationBlock *tb;
54
-#if TARGET_TB_PCREL
55
target_ulong pc;
56
-#endif
57
} array[TB_JMP_CACHE_SIZE];
58
};
59
60
static inline TranslationBlock *
61
-tb_jmp_cache_get_tb(CPUJumpCache *jc, uint32_t hash)
62
+tb_jmp_cache_get_tb(CPUJumpCache *jc, uint32_t cflags, uint32_t hash)
63
{
64
-#if TARGET_TB_PCREL
65
- /* Use acquire to ensure current load of pc from jc. */
66
- return qatomic_load_acquire(&jc->array[hash].tb);
67
-#else
68
- /* Use rcu_read to ensure current load of pc from *tb. */
69
- return qatomic_rcu_read(&jc->array[hash].tb);
70
-#endif
71
+ if (cflags & CF_PCREL) {
72
+ /* Use acquire to ensure current load of pc from jc. */
73
+ return qatomic_load_acquire(&jc->array[hash].tb);
74
+ } else {
75
+ /* Use rcu_read to ensure current load of pc from *tb. */
76
+ return qatomic_rcu_read(&jc->array[hash].tb);
77
+ }
78
}
37
}
79
38
80
static inline target_ulong
39
static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
81
tb_jmp_cache_get_pc(CPUJumpCache *jc, uint32_t hash, TranslationBlock *tb)
40
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
82
{
41
TCGType type;
83
-#if TARGET_TB_PCREL
42
84
- return jc->array[hash].pc;
43
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
85
-#else
44
- fold_tcg_st(ctx, op);
86
- return tb_pc(tb);
45
- return false;
87
-#endif
46
+ return fold_tcg_st(ctx, op);
88
+ if (tb_cflags(tb) & CF_PCREL) {
47
}
89
+ return jc->array[hash].pc;
48
90
+ } else {
49
src = arg_temp(op->args[0]);
91
+ return tb_pc(tb);
50
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
92
+ }
51
last = ofs + tcg_type_size(type) - 1;
52
remove_mem_copy_in(ctx, ofs, last);
53
record_mem_copy(ctx, type, src, ofs, last);
54
- return false;
55
+ return true;
93
}
56
}
94
57
95
static inline void
58
static bool fold_xor(OptContext *ctx, TCGOp *op)
96
tb_jmp_cache_set(CPUJumpCache *jc, uint32_t hash,
97
TranslationBlock *tb, target_ulong pc)
98
{
99
-#if TARGET_TB_PCREL
100
- jc->array[hash].pc = pc;
101
- /* Use store_release on tb to ensure pc is written first. */
102
- qatomic_store_release(&jc->array[hash].tb, tb);
103
-#else
104
- /* Use the pc value already stored in tb->pc. */
105
- qatomic_set(&jc->array[hash].tb, tb);
106
-#endif
107
+ if (tb_cflags(tb) & CF_PCREL) {
108
+ jc->array[hash].pc = pc;
109
+ /* Use store_release on tb to ensure pc is written first. */
110
+ qatomic_store_release(&jc->array[hash].tb, tb);
111
+ } else{
112
+ /* Use the pc value already stored in tb->pc. */
113
+ qatomic_set(&jc->array[hash].tb, tb);
114
+ }
115
}
116
117
#endif /* ACCEL_TCG_TB_JMP_CACHE_H */
118
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
119
index XXXXXXX..XXXXXXX 100644
120
--- a/accel/tcg/cpu-exec.c
121
+++ b/accel/tcg/cpu-exec.c
122
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
123
const TranslationBlock *tb = p;
124
const struct tb_desc *desc = d;
125
126
- if ((TARGET_TB_PCREL || tb_pc(tb) == desc->pc) &&
127
+ if ((tb_cflags(tb) & CF_PCREL || tb_pc(tb) == desc->pc) &&
128
tb_page_addr0(tb) == desc->page_addr0 &&
129
tb->cs_base == desc->cs_base &&
130
tb->flags == desc->flags &&
131
@@ -XXX,XX +XXX,XX @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
132
return NULL;
133
}
134
desc.page_addr0 = phys_pc;
135
- h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : pc),
136
+ h = tb_hash_func(phys_pc, (cflags & CF_PCREL ? 0 : pc),
137
flags, cflags, *cpu->trace_dstate);
138
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
139
}
140
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
141
142
hash = tb_jmp_cache_hash_func(pc);
143
jc = cpu->tb_jmp_cache;
144
- tb = tb_jmp_cache_get_tb(jc, hash);
145
+ tb = tb_jmp_cache_get_tb(jc, cflags, hash);
146
147
if (likely(tb &&
148
tb_jmp_cache_get_pc(jc, hash, tb) == pc &&
149
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
150
if (cc->tcg_ops->synchronize_from_tb) {
151
cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
152
} else {
153
- assert(!TARGET_TB_PCREL);
154
+ tcg_debug_assert(!(tb_cflags(last_tb) & CF_PCREL));
155
assert(cc->set_pc);
156
cc->set_pc(cpu, tb_pc(last_tb));
157
}
158
diff --git a/accel/tcg/perf.c b/accel/tcg/perf.c
159
index XXXXXXX..XXXXXXX 100644
160
--- a/accel/tcg/perf.c
161
+++ b/accel/tcg/perf.c
162
@@ -XXX,XX +XXX,XX @@ void perf_report_code(uint64_t guest_pc, TranslationBlock *tb,
163
for (insn = 0; insn < tb->icount; insn++) {
164
/* FIXME: This replicates the restore_state_to_opc() logic. */
165
q[insn].address = tcg_ctx->gen_insn_data[insn][0];
166
- if (TARGET_TB_PCREL) {
167
+ if (tb_cflags(tb) & CF_PCREL) {
168
q[insn].address |= (guest_pc & TARGET_PAGE_MASK);
169
} else {
170
#if defined(TARGET_I386)
171
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
172
index XXXXXXX..XXXXXXX 100644
173
--- a/accel/tcg/tb-maint.c
174
+++ b/accel/tcg/tb-maint.c
175
@@ -XXX,XX +XXX,XX @@ static bool tb_cmp(const void *ap, const void *bp)
176
const TranslationBlock *a = ap;
177
const TranslationBlock *b = bp;
178
179
- return ((TARGET_TB_PCREL || tb_pc(a) == tb_pc(b)) &&
180
+ return ((tb_cflags(a) & CF_PCREL || tb_pc(a) == tb_pc(b)) &&
181
a->cs_base == b->cs_base &&
182
a->flags == b->flags &&
183
(tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
184
@@ -XXX,XX +XXX,XX @@ static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
185
{
186
CPUState *cpu;
187
188
- if (TARGET_TB_PCREL) {
189
+ if (tb_cflags(tb) & CF_PCREL) {
190
/* A TB may be at any virtual address */
191
CPU_FOREACH(cpu) {
192
tcg_flush_jmp_cache(cpu);
193
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
194
195
/* remove the TB from the hash list */
196
phys_pc = tb_page_addr0(tb);
197
- h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
198
+ h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb_pc(tb)),
199
tb->flags, orig_cflags, tb->trace_vcpu_dstate);
200
if (!qht_remove(&tb_ctx.htable, tb, h)) {
201
return;
202
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
203
tb_record(tb, p, p2);
204
205
/* add in the hash table */
206
- h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
207
+ h = tb_hash_func(phys_pc, (tb->cflags & CF_PCREL ? 0 : tb_pc(tb)),
208
tb->flags, tb->cflags, tb->trace_vcpu_dstate);
209
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
210
211
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
212
index XXXXXXX..XXXXXXX 100644
213
--- a/accel/tcg/translate-all.c
214
+++ b/accel/tcg/translate-all.c
215
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
216
217
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
218
if (i == 0) {
219
- prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
220
+ prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb_pc(tb) : 0);
221
} else {
222
prev = tcg_ctx->gen_insn_data[i - 1][j];
223
}
224
@@ -XXX,XX +XXX,XX @@ static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
225
}
226
227
memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
228
- if (!TARGET_TB_PCREL) {
229
+ if (!(tb_cflags(tb) & CF_PCREL)) {
230
data[0] = tb_pc(tb);
231
}
232
233
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
234
235
gen_code_buf = tcg_ctx->code_gen_ptr;
236
tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
237
-#if !TARGET_TB_PCREL
238
- tb->pc = pc;
239
-#endif
240
+ if (!(cflags & CF_PCREL)) {
241
+ tb->pc = pc;
242
+ }
243
tb->cs_base = cs_base;
244
tb->flags = flags;
245
tb->cflags = cflags;
246
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
247
tb->tc.size = gen_code_size;
248
249
/*
250
- * For TARGET_TB_PCREL, attribute all executions of the generated
251
- * code to its first mapping.
252
+ * For CF_PCREL, attribute all executions of the generated code
253
+ * to its first mapping.
254
*/
255
perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
256
257
--
59
--
258
2.34.1
60
2.43.0
259
260
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-23-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
target/i386/tcg/tcg-cpu.c | 2 +-
4
tcg/optimize.c | 2 +-
9
1 file changed, 1 insertion(+), 1 deletion(-)
5
1 file changed, 1 insertion(+), 1 deletion(-)
10
6
11
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
13
--- a/target/i386/tcg/tcg-cpu.c
9
--- a/tcg/optimize.c
14
+++ b/target/i386/tcg/tcg-cpu.c
10
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_synchronize_from_tb(CPUState *cs,
11
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
16
/* The instruction pointer is always up to date with CF_PCREL. */
12
fold_xx_to_i(ctx, op, 0)) {
17
if (!(tb_cflags(tb) & CF_PCREL)) {
13
return true;
18
CPUX86State *env = cs->env_ptr;
19
- env->eip = tb_pc(tb) - tb->cs_base;
20
+ env->eip = tb->pc - tb->cs_base;
21
}
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
22
}
17
}
23
18
19
static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
24
--
20
--
25
2.34.1
21
2.43.0
26
27
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Change return from bool to int; distinguish between
2
complete folding, simplification, and no change.
2
3
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-13-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
accel/tcg/internal.h | 2 +-
7
tcg/optimize.c | 22 ++++++++++++++--------
9
accel/tcg/cpu-exec.c | 6 +++---
8
1 file changed, 14 insertions(+), 8 deletions(-)
10
accel/tcg/tb-maint.c | 8 ++++----
11
accel/tcg/translate-all.c | 4 ++--
12
4 files changed, 10 insertions(+), 10 deletions(-)
13
9
14
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/accel/tcg/internal.h
12
--- a/tcg/optimize.c
17
+++ b/accel/tcg/internal.h
13
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
19
if (tb_cflags(tb) & CF_PCREL) {
15
return finish_folding(ctx, op);
20
return cpu->cc->get_pc(cpu);
16
}
21
} else {
17
22
- return tb_pc(tb);
18
-static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
23
+ return tb->pc;
19
+/* Return 1 if finished, -1 if simplified, 0 if unchanged. */
20
+static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
21
{
22
uint64_t a_zmask, b_val;
23
TCGCond cond;
24
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
25
op->opc = xor_opc;
26
op->args[2] = arg_new_constant(ctx, 1);
27
}
28
- return false;
29
+ return -1;
30
}
24
}
31
}
32
-
33
- return false;
34
+ return 0;
25
}
35
}
26
36
27
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
37
static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
28
index XXXXXXX..XXXXXXX 100644
38
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
29
--- a/accel/tcg/cpu-exec.c
39
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
30
+++ b/accel/tcg/cpu-exec.c
31
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
32
const TranslationBlock *tb = p;
33
const struct tb_desc *desc = d;
34
35
- if ((tb_cflags(tb) & CF_PCREL || tb_pc(tb) == desc->pc) &&
36
+ if ((tb_cflags(tb) & CF_PCREL || tb->pc == desc->pc) &&
37
tb_page_addr0(tb) == desc->page_addr0 &&
38
tb->cs_base == desc->cs_base &&
39
tb->flags == desc->flags &&
40
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
41
tb = qatomic_rcu_read(&jc->array[hash].tb);
42
43
if (likely(tb &&
44
- tb_pc(tb) == pc &&
45
+ tb->pc == pc &&
46
tb->cs_base == cs_base &&
47
tb->flags == flags &&
48
tb->trace_vcpu_dstate == *cpu->trace_dstate &&
49
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
50
} else {
51
tcg_debug_assert(!(tb_cflags(last_tb) & CF_PCREL));
52
assert(cc->set_pc);
53
- cc->set_pc(cpu, tb_pc(last_tb));
54
+ cc->set_pc(cpu, last_tb->pc);
55
}
56
if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
57
target_ulong pc = log_pc(cpu, last_tb);
58
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/accel/tcg/tb-maint.c
61
+++ b/accel/tcg/tb-maint.c
62
@@ -XXX,XX +XXX,XX @@ static bool tb_cmp(const void *ap, const void *bp)
63
const TranslationBlock *a = ap;
64
const TranslationBlock *b = bp;
65
66
- return ((tb_cflags(a) & CF_PCREL || tb_pc(a) == tb_pc(b)) &&
67
+ return ((tb_cflags(a) & CF_PCREL || a->pc == b->pc) &&
68
a->cs_base == b->cs_base &&
69
a->flags == b->flags &&
70
(tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
71
@@ -XXX,XX +XXX,XX @@ static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
72
tcg_flush_jmp_cache(cpu);
73
}
74
} else {
75
- uint32_t h = tb_jmp_cache_hash_func(tb_pc(tb));
76
+ uint32_t h = tb_jmp_cache_hash_func(tb->pc);
77
78
CPU_FOREACH(cpu) {
79
CPUJumpCache *jc = cpu->tb_jmp_cache;
80
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
81
82
/* remove the TB from the hash list */
83
phys_pc = tb_page_addr0(tb);
84
- h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb_pc(tb)),
85
+ h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
86
tb->flags, orig_cflags, tb->trace_vcpu_dstate);
87
if (!qht_remove(&tb_ctx.htable, tb, h)) {
88
return;
89
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
90
tb_record(tb, p, p2);
91
92
/* add in the hash table */
93
- h = tb_hash_func(phys_pc, (tb->cflags & CF_PCREL ? 0 : tb_pc(tb)),
94
+ h = tb_hash_func(phys_pc, (tb->cflags & CF_PCREL ? 0 : tb->pc),
95
tb->flags, tb->cflags, tb->trace_vcpu_dstate);
96
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
97
98
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/accel/tcg/translate-all.c
101
+++ b/accel/tcg/translate-all.c
102
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
103
104
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
105
if (i == 0) {
106
- prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb_pc(tb) : 0);
107
+ prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0);
108
} else {
109
prev = tcg_ctx->gen_insn_data[i - 1][j];
110
}
111
@@ -XXX,XX +XXX,XX @@ static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
112
113
memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
114
if (!(tb_cflags(tb) & CF_PCREL)) {
115
- data[0] = tb_pc(tb);
116
+ data[0] = tb->pc;
117
}
40
}
118
41
119
/*
42
- if (fold_setcond_zmask(ctx, op, false)) {
43
+ i = fold_setcond_zmask(ctx, op, false);
44
+ if (i > 0) {
45
return true;
46
}
47
- fold_setcond_tst_pow2(ctx, op, false);
48
+ if (i == 0) {
49
+ fold_setcond_tst_pow2(ctx, op, false);
50
+ }
51
52
ctx->z_mask = 1;
53
return false;
54
@@ -XXX,XX +XXX,XX @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
55
return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
56
}
57
58
- if (fold_setcond_zmask(ctx, op, true)) {
59
+ i = fold_setcond_zmask(ctx, op, true);
60
+ if (i > 0) {
61
return true;
62
}
63
- fold_setcond_tst_pow2(ctx, op, true);
64
+ if (i == 0) {
65
+ fold_setcond_tst_pow2(ctx, op, true);
66
+ }
67
68
/* Value is {0,-1} so all bits are repetitions of the sign. */
69
ctx->s_mask = -1;
120
--
70
--
121
2.34.1
71
2.43.0
122
123
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Avoid the use of the OptContext slots.
2
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-19-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/openrisc/cpu.c | 4 +++-
6
tcg/optimize.c | 3 +--
9
1 file changed, 3 insertions(+), 1 deletion(-)
7
1 file changed, 1 insertion(+), 2 deletions(-)
10
8
11
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/target/openrisc/cpu.c
11
--- a/tcg/optimize.c
14
+++ b/target/openrisc/cpu.c
12
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
16
#include "qemu/qemu-print.h"
14
fold_setcond_tst_pow2(ctx, op, false);
17
#include "cpu.h"
15
}
18
#include "exec/exec-all.h"
16
19
+#include "tcg/tcg.h"
17
- ctx->z_mask = 1;
20
18
- return false;
21
static void openrisc_cpu_set_pc(CPUState *cs, vaddr value)
19
+ return fold_masks_z(ctx, op, 1);
22
{
23
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_synchronize_from_tb(CPUState *cs,
24
{
25
OpenRISCCPU *cpu = OPENRISC_CPU(cs);
26
27
- cpu->env.pc = tb_pc(tb);
28
+ tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
29
+ cpu->env.pc = tb->pc;
30
}
20
}
31
21
32
static void openrisc_restore_state_to_opc(CPUState *cs,
22
static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
33
--
23
--
34
2.34.1
24
2.43.0
35
36
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Avoid the use of the OptContext slots.
2
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Palmer Dabbelt <palmer@rivosinc.com>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Message-Id: <20230227135202.9710-18-anjo@rev.ng>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
target/riscv/cpu.c | 7 +++++--
6
tcg/optimize.c | 3 +--
10
1 file changed, 5 insertions(+), 2 deletions(-)
7
1 file changed, 1 insertion(+), 2 deletions(-)
11
8
12
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/target/riscv/cpu.c
11
--- a/tcg/optimize.c
15
+++ b/target/riscv/cpu.c
12
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
17
#include "fpu/softfloat-helpers.h"
18
#include "sysemu/kvm.h"
19
#include "kvm_riscv.h"
20
+#include "tcg/tcg.h"
21
22
/* RISC-V CPU definitions */
23
24
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_synchronize_from_tb(CPUState *cs,
25
CPURISCVState *env = &cpu->env;
26
RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
27
28
+ tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
29
+
30
if (xl == MXL_RV32) {
31
- env->pc = (int32_t)tb_pc(tb);
32
+ env->pc = (int32_t) tb->pc;
33
} else {
34
- env->pc = tb_pc(tb);
35
+ env->pc = tb->pc;
36
}
14
}
15
16
/* Value is {0,-1} so all bits are repetitions of the sign. */
17
- ctx->s_mask = -1;
18
- return false;
19
+ return fold_masks_s(ctx, op, -1);
37
}
20
}
38
21
22
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
39
--
23
--
40
2.34.1
24
2.43.0
41
42
diff view generated by jsdifflib
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
1
Avoid the use of the OptContext slots.
2
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
---
5
tcg/tcg-op-gvec.c | 3 +--
6
tcg/optimize.c | 3 +--
6
1 file changed, 1 insertion(+), 2 deletions(-)
7
1 file changed, 1 insertion(+), 2 deletions(-)
7
8
8
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/tcg-op-gvec.c
11
--- a/tcg/optimize.c
11
+++ b/tcg/tcg-op-gvec.c
12
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
13
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
13
* stores through to memset.
14
return fold_setcond(ctx, op);
14
*/
15
if (oprsz == maxsz && vece == MO_8) {
16
- TCGv_ptr t_size = tcg_const_ptr(oprsz);
17
+ TCGv_ptr t_size = tcg_constant_ptr(oprsz);
18
TCGv_i32 t_val;
19
20
if (in_32) {
21
@@ -XXX,XX +XXX,XX @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
22
if (in_64) {
23
tcg_temp_free_i32(t_val);
24
}
25
- tcg_temp_free_ptr(t_size);
26
tcg_temp_free_ptr(t_ptr);
27
return;
28
}
15
}
16
17
- ctx->z_mask = 1;
18
- return false;
19
+ return fold_masks_z(ctx, op, 1);
20
21
do_setcond_const:
22
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
29
--
23
--
30
2.34.1
24
2.43.0
31
32
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-17-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
target/rx/cpu.c | 3 ++-
4
tcg/optimize.c | 2 +-
9
1 file changed, 2 insertions(+), 1 deletion(-)
5
1 file changed, 1 insertion(+), 1 deletion(-)
10
6
11
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
13
--- a/target/rx/cpu.c
9
--- a/tcg/optimize.c
14
+++ b/target/rx/cpu.c
10
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_synchronize_from_tb(CPUState *cs,
11
@@ -XXX,XX +XXX,XX @@ static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
16
{
12
if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
17
RXCPU *cpu = RX_CPU(cs);
13
op->args[3] = tcg_swap_cond(op->args[3]);
18
14
}
19
- cpu->env.pc = tb_pc(tb);
15
- return false;
20
+ tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
16
+ return finish_folding(ctx, op);
21
+ cpu->env.pc = tb->pc;
22
}
17
}
23
18
24
static void rx_restore_state_to_opc(CPUState *cs,
19
static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
25
--
20
--
26
2.34.1
21
2.43.0
27
28
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-16-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
target/sh4/cpu.c | 6 ++++--
4
tcg/optimize.c | 2 +-
9
1 file changed, 4 insertions(+), 2 deletions(-)
5
1 file changed, 1 insertion(+), 1 deletion(-)
10
6
11
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
13
--- a/target/sh4/cpu.c
9
--- a/tcg/optimize.c
14
+++ b/target/sh4/cpu.c
10
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@
11
@@ -XXX,XX +XXX,XX @@ static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
16
#include "migration/vmstate.h"
12
if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
17
#include "exec/exec-all.h"
13
op->args[5] = tcg_invert_cond(op->args[5]);
18
#include "fpu/softfloat-helpers.h"
14
}
19
+#include "tcg/tcg.h"
15
- return false;
20
16
+ return finish_folding(ctx, op);
21
static void superh_cpu_set_pc(CPUState *cs, vaddr value)
22
{
23
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
24
{
25
SuperHCPU *cpu = SUPERH_CPU(cs);
26
27
- cpu->env.pc = tb_pc(tb);
28
+ tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
29
+ cpu->env.pc = tb->pc;
30
cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
31
}
17
}
32
18
33
@@ -XXX,XX +XXX,XX @@ static bool superh_io_recompile_replay_branch(CPUState *cs,
19
static bool fold_sextract(OptContext *ctx, TCGOp *op)
34
CPUSH4State *env = &cpu->env;
35
36
if ((env->flags & (TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND))
37
- && env->pc != tb_pc(tb)) {
38
+ && !(cs->tcg_cflags & CF_PCREL) && env->pc != tb->pc) {
39
env->pc -= 2;
40
env->flags &= ~(TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND);
41
return true;
42
--
20
--
43
2.34.1
21
2.43.0
44
45
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-8-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/i386/helper.c | 2 +-
6
tcg/optimize.c | 24 +++++++++---------------
9
target/i386/tcg/tcg-cpu.c | 6 +++---
7
1 file changed, 9 insertions(+), 15 deletions(-)
10
target/i386/tcg/translate.c | 26 +++++++++++++-------------
11
3 files changed, 17 insertions(+), 17 deletions(-)
12
8
13
diff --git a/target/i386/helper.c b/target/i386/helper.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
15
--- a/target/i386/helper.c
11
--- a/tcg/optimize.c
16
+++ b/target/i386/helper.c
12
+++ b/tcg/optimize.c
17
@@ -XXX,XX +XXX,XX @@ static inline target_ulong get_memio_eip(CPUX86State *env)
13
@@ -XXX,XX +XXX,XX @@ static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
14
static bool fold_sextract(OptContext *ctx, TCGOp *op)
15
{
16
uint64_t z_mask, s_mask, s_mask_old;
17
+ TempOptInfo *t1 = arg_info(op->args[1]);
18
int pos = op->args[2];
19
int len = op->args[3];
20
21
- if (arg_is_const(op->args[1])) {
22
- uint64_t t;
23
-
24
- t = arg_info(op->args[1])->val;
25
- t = sextract64(t, pos, len);
26
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
27
+ if (ti_is_const(t1)) {
28
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
29
+ sextract64(ti_const_val(t1), pos, len));
18
}
30
}
19
31
20
/* Per x86_restore_state_to_opc. */
32
- z_mask = arg_info(op->args[1])->z_mask;
21
- if (TARGET_TB_PCREL) {
33
- z_mask = sextract64(z_mask, pos, len);
22
+ if (cs->tcg_cflags & CF_PCREL) {
34
- ctx->z_mask = z_mask;
23
return (env->eip & TARGET_PAGE_MASK) | data[0];
35
-
24
} else {
36
- s_mask_old = arg_info(op->args[1])->s_mask;
25
return data[0] - env->segs[R_CS].base;
37
- s_mask = sextract64(s_mask_old, pos, len);
26
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
38
- s_mask |= MAKE_64BIT_MASK(len, 64 - len);
27
index XXXXXXX..XXXXXXX 100644
39
- ctx->s_mask = s_mask;
28
--- a/target/i386/tcg/tcg-cpu.c
40
+ s_mask_old = t1->s_mask;
29
+++ b/target/i386/tcg/tcg-cpu.c
41
+ s_mask = s_mask_old >> pos;
30
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_exec_exit(CPUState *cs)
42
+ s_mask |= -1ull << (len - 1);
31
static void x86_cpu_synchronize_from_tb(CPUState *cs,
43
32
const TranslationBlock *tb)
44
if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
33
{
45
return true;
34
- /* The instruction pointer is always up to date with TARGET_TB_PCREL. */
35
- if (!TARGET_TB_PCREL) {
36
+ /* The instruction pointer is always up to date with CF_PCREL. */
37
+ if (!(tb_cflags(tb) & CF_PCREL)) {
38
CPUX86State *env = cs->env_ptr;
39
env->eip = tb_pc(tb) - tb->cs_base;
40
}
46
}
41
@@ -XXX,XX +XXX,XX @@ static void x86_restore_state_to_opc(CPUState *cs,
47
42
CPUX86State *env = &cpu->env;
48
- return fold_masks(ctx, op);
43
int cc_op = data[1];
49
+ z_mask = sextract64(t1->z_mask, pos, len);
44
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
45
- if (TARGET_TB_PCREL) {
46
+ if (tb_cflags(tb) & CF_PCREL) {
47
env->eip = (env->eip & TARGET_PAGE_MASK) | data[0];
48
} else {
49
env->eip = data[0] - tb->cs_base;
50
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/target/i386/tcg/translate.c
53
+++ b/target/i386/tcg/translate.c
54
@@ -XXX,XX +XXX,XX @@ static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
55
static void gen_update_eip_cur(DisasContext *s)
56
{
57
assert(s->pc_save != -1);
58
- if (TARGET_TB_PCREL) {
59
+ if (tb_cflags(s->base.tb) & CF_PCREL) {
60
tcg_gen_addi_tl(cpu_eip, cpu_eip, s->base.pc_next - s->pc_save);
61
} else {
62
tcg_gen_movi_tl(cpu_eip, s->base.pc_next - s->cs_base);
63
@@ -XXX,XX +XXX,XX @@ static void gen_update_eip_cur(DisasContext *s)
64
static void gen_update_eip_next(DisasContext *s)
65
{
66
assert(s->pc_save != -1);
67
- if (TARGET_TB_PCREL) {
68
+ if (tb_cflags(s->base.tb) & CF_PCREL) {
69
tcg_gen_addi_tl(cpu_eip, cpu_eip, s->pc - s->pc_save);
70
} else {
71
tcg_gen_movi_tl(cpu_eip, s->pc - s->cs_base);
72
@@ -XXX,XX +XXX,XX @@ static TCGv_i32 eip_next_i32(DisasContext *s)
73
if (CODE64(s)) {
74
return tcg_constant_i32(-1);
75
}
76
- if (TARGET_TB_PCREL) {
77
+ if (tb_cflags(s->base.tb) & CF_PCREL) {
78
TCGv_i32 ret = tcg_temp_new_i32();
79
tcg_gen_trunc_tl_i32(ret, cpu_eip);
80
tcg_gen_addi_i32(ret, ret, s->pc - s->pc_save);
81
@@ -XXX,XX +XXX,XX @@ static TCGv_i32 eip_next_i32(DisasContext *s)
82
static TCGv eip_next_tl(DisasContext *s)
83
{
84
assert(s->pc_save != -1);
85
- if (TARGET_TB_PCREL) {
86
+ if (tb_cflags(s->base.tb) & CF_PCREL) {
87
TCGv ret = tcg_temp_new();
88
tcg_gen_addi_tl(ret, cpu_eip, s->pc - s->pc_save);
89
return ret;
90
@@ -XXX,XX +XXX,XX @@ static TCGv eip_next_tl(DisasContext *s)
91
static TCGv eip_cur_tl(DisasContext *s)
92
{
93
assert(s->pc_save != -1);
94
- if (TARGET_TB_PCREL) {
95
+ if (tb_cflags(s->base.tb) & CF_PCREL) {
96
TCGv ret = tcg_temp_new();
97
tcg_gen_addi_tl(ret, cpu_eip, s->base.pc_next - s->pc_save);
98
return ret;
99
@@ -XXX,XX +XXX,XX @@ static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
100
tcg_temp_free_i32(t0);
101
tcg_temp_free_i32(t1);
102
103
- /* The CC_OP value is no longer predictable. */
104
+ /* The CC_OP value is no longer predictable. */
105
set_cc_op(s, CC_OP_DYNAMIC);
106
}
51
}
107
52
108
@@ -XXX,XX +XXX,XX @@ static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
53
static bool fold_shift(OptContext *ctx, TCGOp *op)
109
gen_op_ld_v(s, ot, s->T0, s->A0);
110
else
111
gen_op_mov_v_reg(s, ot, s->T0, op1);
112
-
113
+
114
if (is_right) {
115
switch (ot) {
116
case MO_8:
117
@@ -XXX,XX +XXX,XX @@ static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a, bool is_vsib)
118
ea = cpu_regs[a.base];
119
}
120
if (!ea) {
121
- if (TARGET_TB_PCREL && a.base == -2) {
122
+ if (tb_cflags(s->base.tb) & CF_PCREL && a.base == -2) {
123
/* With cpu_eip ~= pc_save, the expression is pc-relative. */
124
tcg_gen_addi_tl(s->A0, cpu_eip, a.disp - s->pc_save);
125
} else {
126
@@ -XXX,XX +XXX,XX @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num)
127
if (!CODE64(s)) {
128
if (ot == MO_16) {
129
mask = 0xffff;
130
- if (TARGET_TB_PCREL && CODE32(s)) {
131
+ if (tb_cflags(s->base.tb) & CF_PCREL && CODE32(s)) {
132
use_goto_tb = false;
133
}
134
} else {
135
@@ -XXX,XX +XXX,XX @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num)
136
gen_update_cc_op(s);
137
set_cc_op(s, CC_OP_DYNAMIC);
138
139
- if (TARGET_TB_PCREL) {
140
+ if (tb_cflags(s->base.tb) & CF_PCREL) {
141
tcg_gen_addi_tl(cpu_eip, cpu_eip, new_pc - s->pc_save);
142
/*
143
* If we can prove the branch does not leave the page and we have
144
@@ -XXX,XX +XXX,XX @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num)
145
translator_use_goto_tb(&s->base, new_eip + s->cs_base)) {
146
/* jump to same page: we can use a direct jump */
147
tcg_gen_goto_tb(tb_num);
148
- if (!TARGET_TB_PCREL) {
149
+ if (!(tb_cflags(s->base.tb) & CF_PCREL)) {
150
tcg_gen_movi_tl(cpu_eip, new_eip);
151
}
152
tcg_gen_exit_tb(s->base.tb, tb_num);
153
s->base.is_jmp = DISAS_NORETURN;
154
} else {
155
- if (!TARGET_TB_PCREL) {
156
+ if (!(tb_cflags(s->base.tb) & CF_PCREL)) {
157
tcg_gen_movi_tl(cpu_eip, new_eip);
158
}
159
if (s->jmp_opt) {
160
@@ -XXX,XX +XXX,XX @@ static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
161
target_ulong pc_arg = dc->base.pc_next;
162
163
dc->prev_insn_end = tcg_last_op();
164
- if (TARGET_TB_PCREL) {
165
+ if (tb_cflags(dcbase->tb) & CF_PCREL) {
166
pc_arg -= dc->cs_base;
167
pc_arg &= ~TARGET_PAGE_MASK;
168
}
169
--
54
--
170
2.34.1
55
2.43.0
171
172
diff view generated by jsdifflib
1
TEMP_NORMAL is a subset of TEMP_EBB. Promote single basic
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
block temps to single extended basic block.
3
2
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
include/tcg/tcg.h | 2 --
6
tcg/optimize.c | 27 ++++++++++++++-------------
8
tcg/tcg.c | 19 +++----------------
7
1 file changed, 14 insertions(+), 13 deletions(-)
9
2 files changed, 3 insertions(+), 18 deletions(-)
10
8
11
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/include/tcg/tcg.h
11
--- a/tcg/optimize.c
14
+++ b/include/tcg/tcg.h
12
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ typedef enum TCGTempVal {
13
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
16
} TCGTempVal;
14
static bool fold_shift(OptContext *ctx, TCGOp *op)
17
18
typedef enum TCGTempKind {
19
- /* Temp is dead at the end of all basic blocks. */
20
- TEMP_NORMAL,
21
/*
22
* Temp is dead at the end of the extended basic block (EBB),
23
* the single-entry multiple-exit region that falls through
24
diff --git a/tcg/tcg.c b/tcg/tcg.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/tcg.c
27
+++ b/tcg/tcg.c
28
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
29
TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
30
{
15
{
31
TCGContext *s = tcg_ctx;
16
uint64_t s_mask, z_mask, sign;
32
- TCGTempKind kind = temp_local ? TEMP_TB : TEMP_NORMAL;
17
+ TempOptInfo *t1, *t2;
33
+ TCGTempKind kind = temp_local ? TEMP_TB : TEMP_EBB;
18
34
TCGTemp *ts;
19
if (fold_const2(ctx, op) ||
35
int idx, k;
20
fold_ix_to_i(ctx, op, 0) ||
36
21
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
37
@@ -XXX,XX +XXX,XX @@ void tcg_temp_free_internal(TCGTemp *ts)
22
return true;
38
* silently ignore free.
23
}
24
25
- s_mask = arg_info(op->args[1])->s_mask;
26
- z_mask = arg_info(op->args[1])->z_mask;
27
+ t1 = arg_info(op->args[1]);
28
+ t2 = arg_info(op->args[2]);
29
+ s_mask = t1->s_mask;
30
+ z_mask = t1->z_mask;
31
32
- if (arg_is_const(op->args[2])) {
33
- int sh = arg_info(op->args[2])->val;
34
-
35
- ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
36
+ if (ti_is_const(t2)) {
37
+ int sh = ti_const_val(t2);
38
39
+ z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
40
s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
41
42
- return fold_masks(ctx, op);
43
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
44
}
45
46
switch (op->opc) {
47
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
48
* Arithmetic right shift will not reduce the number of
49
* input sign repetitions.
39
*/
50
*/
40
return;
51
- ctx->s_mask = s_mask;
41
- case TEMP_NORMAL:
52
- break;
42
+ case TEMP_EBB:
53
+ return fold_masks_s(ctx, op, s_mask);
43
case TEMP_TB:
54
CASE_OP_32_64(shr):
55
/*
56
* If the sign bit is known zero, then logical right shift
57
- * will not reduced the number of input sign repetitions.
58
+ * will not reduce the number of input sign repetitions.
59
*/
60
- sign = (s_mask & -s_mask) >> 1;
61
+ sign = -s_mask;
62
if (sign && !(z_mask & sign)) {
63
- ctx->s_mask = s_mask;
64
+ return fold_masks_s(ctx, op, s_mask);
65
}
44
break;
66
break;
45
default:
67
default:
46
@@ -XXX,XX +XXX,XX @@ void tcg_temp_free_internal(TCGTemp *ts)
68
break;
47
#endif
69
}
48
70
49
idx = temp_idx(ts);
71
- return false;
50
- k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
72
+ return finish_folding(ctx, op);
51
+ k = ts->base_type + (ts->kind == TEMP_EBB ? 0 : TCG_TYPE_COUNT);
52
set_bit(idx, s->free_temps[k].l);
53
}
73
}
54
74
55
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_start(TCGContext *s)
75
static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
56
break;
57
case TEMP_GLOBAL:
58
break;
59
- case TEMP_NORMAL:
60
case TEMP_EBB:
61
val = TEMP_VAL_DEAD;
62
/* fall through */
63
@@ -XXX,XX +XXX,XX @@ static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
64
snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
65
break;
66
case TEMP_EBB:
67
- snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals);
68
- break;
69
- case TEMP_NORMAL:
70
snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
71
break;
72
case TEMP_CONST:
73
@@ -XXX,XX +XXX,XX @@ static void la_bb_end(TCGContext *s, int ng, int nt)
74
case TEMP_TB:
75
state = TS_DEAD | TS_MEM;
76
break;
77
- case TEMP_NORMAL:
78
case TEMP_EBB:
79
case TEMP_CONST:
80
state = TS_DEAD;
81
@@ -XXX,XX +XXX,XX @@ static void la_bb_sync(TCGContext *s, int ng, int nt)
82
continue;
83
}
84
break;
85
- case TEMP_NORMAL:
86
- s->temps[i].state = TS_DEAD;
87
- break;
88
case TEMP_EBB:
89
case TEMP_CONST:
90
continue;
91
@@ -XXX,XX +XXX,XX @@ static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
92
case TEMP_TB:
93
new_type = TEMP_VAL_MEM;
94
break;
95
- case TEMP_NORMAL:
96
case TEMP_EBB:
97
new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
98
break;
99
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
100
case TEMP_TB:
101
temp_save(s, ts, allocated_regs);
102
break;
103
- case TEMP_NORMAL:
104
case TEMP_EBB:
105
/* The liveness analysis already ensures that temps are dead.
106
Keep an tcg_debug_assert for safety. */
107
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
108
case TEMP_TB:
109
tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
110
break;
111
- case TEMP_NORMAL:
112
- tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
113
- break;
114
case TEMP_EBB:
115
case TEMP_CONST:
116
break;
117
--
76
--
118
2.34.1
77
2.43.0
119
120
diff view generated by jsdifflib
1
Since tcg_temp_new is now identical, use that.
1
Merge the two conditions, sign != 0 && !(z_mask & sign),
2
by testing ~z_mask & sign. If sign == 0, the logical and
3
will produce false.
2
4
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
7
---
6
target/ppc/translate.c | 6 +++---
8
tcg/optimize.c | 5 ++---
7
target/ppc/translate/spe-impl.c.inc | 8 ++++----
9
1 file changed, 2 insertions(+), 3 deletions(-)
8
target/ppc/translate/vmx-impl.c.inc | 4 ++--
9
3 files changed, 9 insertions(+), 9 deletions(-)
10
10
11
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/ppc/translate.c
13
--- a/tcg/optimize.c
14
+++ b/target/ppc/translate.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void gen_bcond(DisasContext *ctx, int type)
15
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
16
TCGv target;
16
17
17
static bool fold_shift(OptContext *ctx, TCGOp *op)
18
if (type == BCOND_LR || type == BCOND_CTR || type == BCOND_TAR) {
19
- target = tcg_temp_local_new();
20
+ target = tcg_temp_new();
21
if (type == BCOND_CTR) {
22
tcg_gen_mov_tl(target, cpu_ctr);
23
} else if (type == BCOND_TAR) {
24
@@ -XXX,XX +XXX,XX @@ static inline void gen_405_mulladd_insn(DisasContext *ctx, int opc2, int opc3,
25
{
18
{
26
TCGv t0, t1;
19
- uint64_t s_mask, z_mask, sign;
27
20
+ uint64_t s_mask, z_mask;
28
- t0 = tcg_temp_local_new();
21
TempOptInfo *t1, *t2;
29
- t1 = tcg_temp_local_new();
22
30
+ t0 = tcg_temp_new();
23
if (fold_const2(ctx, op) ||
31
+ t1 = tcg_temp_new();
24
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
32
25
* If the sign bit is known zero, then logical right shift
33
switch (opc3 & 0x0D) {
26
* will not reduce the number of input sign repetitions.
34
case 0x05:
27
*/
35
diff --git a/target/ppc/translate/spe-impl.c.inc b/target/ppc/translate/spe-impl.c.inc
28
- sign = -s_mask;
36
index XXXXXXX..XXXXXXX 100644
29
- if (sign && !(z_mask & sign)) {
37
--- a/target/ppc/translate/spe-impl.c.inc
30
+ if (~z_mask & -s_mask) {
38
+++ b/target/ppc/translate/spe-impl.c.inc
31
return fold_masks_s(ctx, op, s_mask);
39
@@ -XXX,XX +XXX,XX @@ static inline void gen_op_evsrwu(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
32
}
40
{
33
break;
41
TCGLabel *l1 = gen_new_label();
42
TCGLabel *l2 = gen_new_label();
43
- TCGv_i32 t0 = tcg_temp_local_new_i32();
44
+ TCGv_i32 t0 = tcg_temp_new_i32();
45
46
/* No error here: 6 bits are used */
47
tcg_gen_andi_i32(t0, arg2, 0x3F);
48
@@ -XXX,XX +XXX,XX @@ static inline void gen_op_evsrws(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
49
{
50
TCGLabel *l1 = gen_new_label();
51
TCGLabel *l2 = gen_new_label();
52
- TCGv_i32 t0 = tcg_temp_local_new_i32();
53
+ TCGv_i32 t0 = tcg_temp_new_i32();
54
55
/* No error here: 6 bits are used */
56
tcg_gen_andi_i32(t0, arg2, 0x3F);
57
@@ -XXX,XX +XXX,XX @@ static inline void gen_op_evslw(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
58
{
59
TCGLabel *l1 = gen_new_label();
60
TCGLabel *l2 = gen_new_label();
61
- TCGv_i32 t0 = tcg_temp_local_new_i32();
62
+ TCGv_i32 t0 = tcg_temp_new_i32();
63
64
/* No error here: 6 bits are used */
65
tcg_gen_andi_i32(t0, arg2, 0x3F);
66
@@ -XXX,XX +XXX,XX @@ static inline void gen_evsel(DisasContext *ctx)
67
TCGLabel *l2 = gen_new_label();
68
TCGLabel *l3 = gen_new_label();
69
TCGLabel *l4 = gen_new_label();
70
- TCGv_i32 t0 = tcg_temp_local_new_i32();
71
+ TCGv_i32 t0 = tcg_temp_new_i32();
72
73
tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 3);
74
tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
75
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
76
index XXXXXXX..XXXXXXX 100644
77
--- a/target/ppc/translate/vmx-impl.c.inc
78
+++ b/target/ppc/translate/vmx-impl.c.inc
79
@@ -XXX,XX +XXX,XX @@ static bool do_vcmpq(DisasContext *ctx, arg_VX_bf *a, bool sign)
80
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
81
REQUIRE_VECTOR(ctx);
82
83
- vra = tcg_temp_local_new_i64();
84
- vrb = tcg_temp_local_new_i64();
85
+ vra = tcg_temp_new_i64();
86
+ vrb = tcg_temp_new_i64();
87
gt = gen_new_label();
88
lt = gen_new_label();
89
done = gen_new_label();
90
--
34
--
91
2.34.1
35
2.43.0
92
93
diff view generated by jsdifflib
1
This makes it easier to assign blame with perf.
1
Duplicate fold_sub_vec into fold_sub instead of calling it,
2
now that fold_sub_vec always returns true.
2
3
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
tcg/tcg.c | 9 ++++++---
7
tcg/optimize.c | 9 ++++++---
8
1 file changed, 6 insertions(+), 3 deletions(-)
8
1 file changed, 6 insertions(+), 3 deletions(-)
9
9
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg.c
12
--- a/tcg/optimize.c
13
+++ b/tcg/tcg.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
14
@@ -XXX,XX +XXX,XX @@ static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
15
fold_sub_to_neg(ctx, op)) {
16
return true;
17
}
18
- return false;
19
+ return finish_folding(ctx, op);
15
}
20
}
16
21
17
/* Reachable analysis : remove unreachable code. */
22
static bool fold_sub(OptContext *ctx, TCGOp *op)
18
-static void reachable_code_pass(TCGContext *s)
19
+static void __attribute__((noinline))
20
+reachable_code_pass(TCGContext *s)
21
{
23
{
22
TCGOp *op, *op_next, *op_prev;
24
- if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) {
23
bool dead = false;
25
+ if (fold_const2(ctx, op) ||
24
@@ -XXX,XX +XXX,XX @@ static void la_cross_call(TCGContext *s, int nt)
26
+ fold_xx_to_i(ctx, op, 0) ||
25
/* Liveness analysis : update the opc_arg_life array to tell if a
27
+ fold_xi_to_x(ctx, op, 0) ||
26
given input arguments is dead. Instructions updating dead
28
+ fold_sub_to_neg(ctx, op)) {
27
temporaries are removed. */
29
return true;
28
-static void liveness_pass_1(TCGContext *s)
30
}
29
+static void __attribute__((noinline))
31
30
+liveness_pass_1(TCGContext *s)
32
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
31
{
33
? INDEX_op_add_i32 : INDEX_op_add_i64);
32
int nb_globals = s->nb_globals;
34
op->args[2] = arg_new_constant(ctx, -val);
33
int nb_temps = s->nb_temps;
35
}
34
@@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s)
36
- return false;
37
+ return finish_folding(ctx, op);
35
}
38
}
36
39
37
/* Liveness analysis: Convert indirect regs to direct temporaries. */
40
static bool fold_sub2(OptContext *ctx, TCGOp *op)
38
-static bool liveness_pass_2(TCGContext *s)
39
+static bool __attribute__((noinline))
40
+liveness_pass_2(TCGContext *s)
41
{
42
int nb_globals = s->nb_globals;
43
int nb_temps, i;
44
--
41
--
45
2.34.1
42
2.43.0
46
47
diff view generated by jsdifflib
1
Use TEMP_TB as that is more explicit about the default
1
Avoid the use of the OptContext slots.
2
lifetime of the data. While "global" and "local" used
3
to be contrasting, we have more lifetimes than that now.
4
2
5
Do not yet rename tcg_temp_local_new_*, just the enum.
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
5
---
10
include/tcg/tcg.h | 12 ++++++++----
6
tcg/optimize.c | 16 +++++++++-------
11
tcg/optimize.c | 2 +-
7
1 file changed, 9 insertions(+), 7 deletions(-)
12
tcg/tcg.c | 18 +++++++++---------
13
3 files changed, 18 insertions(+), 14 deletions(-)
14
8
15
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/tcg/tcg.h
18
+++ b/include/tcg/tcg.h
19
@@ -XXX,XX +XXX,XX @@ typedef enum TCGTempVal {
20
typedef enum TCGTempKind {
21
/* Temp is dead at the end of all basic blocks. */
22
TEMP_NORMAL,
23
- /* Temp is live across conditional branch, but dead otherwise. */
24
+ /*
25
+ * Temp is dead at the end of the extended basic block (EBB),
26
+ * the single-entry multiple-exit region that falls through
27
+ * conditional branches.
28
+ */
29
TEMP_EBB,
30
- /* Temp is saved across basic blocks but dead at the end of TBs. */
31
- TEMP_LOCAL,
32
- /* Temp is saved across both basic blocks and translation blocks. */
33
+ /* Temp is live across the entire translation block, but dead at end. */
34
+ TEMP_TB,
35
+ /* Temp is live across the entire translation block, and between them. */
36
TEMP_GLOBAL,
37
/* Temp is in a fixed register. */
38
TEMP_FIXED,
39
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
40
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
41
--- a/tcg/optimize.c
11
--- a/tcg/optimize.c
42
+++ b/tcg/optimize.c
12
+++ b/tcg/optimize.c
43
@@ -XXX,XX +XXX,XX @@ static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
13
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2(OptContext *ctx, TCGOp *op)
44
} else if (i->kind > ts->kind) {
14
45
if (i->kind == TEMP_GLOBAL) {
15
static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
46
g = i;
47
- } else if (i->kind == TEMP_LOCAL) {
48
+ } else if (i->kind == TEMP_TB) {
49
l = i;
50
}
51
}
52
diff --git a/tcg/tcg.c b/tcg/tcg.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/tcg/tcg.c
55
+++ b/tcg/tcg.c
56
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
57
TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
58
{
16
{
59
TCGContext *s = tcg_ctx;
17
+ uint64_t z_mask = -1, s_mask = 0;
60
- TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
18
+
61
+ TCGTempKind kind = temp_local ? TEMP_TB : TEMP_NORMAL;
19
/* We can't do any folding with a load, but we can record bits. */
62
TCGTemp *ts;
20
switch (op->opc) {
63
int idx, k;
21
CASE_OP_32_64(ld8s):
64
22
- ctx->s_mask = MAKE_64BIT_MASK(8, 56);
65
@@ -XXX,XX +XXX,XX @@ void tcg_temp_free_internal(TCGTemp *ts)
23
+ s_mask = INT8_MIN;
66
*/
24
break;
67
return;
25
CASE_OP_32_64(ld8u):
68
case TEMP_NORMAL:
26
- ctx->z_mask = MAKE_64BIT_MASK(0, 8);
69
- case TEMP_LOCAL:
27
+ z_mask = MAKE_64BIT_MASK(0, 8);
70
+ case TEMP_TB:
28
break;
29
CASE_OP_32_64(ld16s):
30
- ctx->s_mask = MAKE_64BIT_MASK(16, 48);
31
+ s_mask = INT16_MIN;
32
break;
33
CASE_OP_32_64(ld16u):
34
- ctx->z_mask = MAKE_64BIT_MASK(0, 16);
35
+ z_mask = MAKE_64BIT_MASK(0, 16);
36
break;
37
case INDEX_op_ld32s_i64:
38
- ctx->s_mask = MAKE_64BIT_MASK(32, 32);
39
+ s_mask = INT32_MIN;
40
break;
41
case INDEX_op_ld32u_i64:
42
- ctx->z_mask = MAKE_64BIT_MASK(0, 32);
43
+ z_mask = MAKE_64BIT_MASK(0, 32);
71
break;
44
break;
72
default:
45
default:
73
g_assert_not_reached();
46
g_assert_not_reached();
74
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_start(TCGContext *s)
47
}
75
case TEMP_EBB:
48
- return false;
76
val = TEMP_VAL_DEAD;
49
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
77
/* fall through */
50
}
78
- case TEMP_LOCAL:
51
79
+ case TEMP_TB:
52
static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
80
ts->mem_allocated = 0;
81
break;
82
default:
83
@@ -XXX,XX +XXX,XX @@ static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
84
case TEMP_GLOBAL:
85
pstrcpy(buf, buf_size, ts->name);
86
break;
87
- case TEMP_LOCAL:
88
+ case TEMP_TB:
89
snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
90
break;
91
case TEMP_EBB:
92
@@ -XXX,XX +XXX,XX @@ static void la_bb_end(TCGContext *s, int ng, int nt)
93
switch (ts->kind) {
94
case TEMP_FIXED:
95
case TEMP_GLOBAL:
96
- case TEMP_LOCAL:
97
+ case TEMP_TB:
98
state = TS_DEAD | TS_MEM;
99
break;
100
case TEMP_NORMAL:
101
@@ -XXX,XX +XXX,XX @@ static void la_bb_sync(TCGContext *s, int ng, int nt)
102
int state;
103
104
switch (ts->kind) {
105
- case TEMP_LOCAL:
106
+ case TEMP_TB:
107
state = ts->state;
108
ts->state = state | TS_MEM;
109
if (state != TS_DEAD) {
110
@@ -XXX,XX +XXX,XX @@ static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
111
case TEMP_FIXED:
112
return;
113
case TEMP_GLOBAL:
114
- case TEMP_LOCAL:
115
+ case TEMP_TB:
116
new_type = TEMP_VAL_MEM;
117
break;
118
case TEMP_NORMAL:
119
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
120
TCGTemp *ts = &s->temps[i];
121
122
switch (ts->kind) {
123
- case TEMP_LOCAL:
124
+ case TEMP_TB:
125
temp_save(s, ts, allocated_regs);
126
break;
127
case TEMP_NORMAL:
128
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
129
* Keep tcg_debug_asserts for safety.
130
*/
131
switch (ts->kind) {
132
- case TEMP_LOCAL:
133
+ case TEMP_TB:
134
tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
135
break;
136
case TEMP_NORMAL:
137
--
53
--
138
2.34.1
54
2.43.0
139
140
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-7-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
target/arm/tcg/translate.h | 2 +-
4
tcg/optimize.c | 2 +-
9
target/arm/cpu.c | 8 ++++----
5
1 file changed, 1 insertion(+), 1 deletion(-)
10
target/arm/tcg/translate-a64.c | 8 ++++----
11
target/arm/tcg/translate.c | 6 +++---
12
4 files changed, 12 insertions(+), 12 deletions(-)
13
6
14
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
16
--- a/target/arm/tcg/translate.h
9
--- a/tcg/optimize.c
17
+++ b/target/arm/tcg/translate.h
10
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
11
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
19
/* The address of the current instruction being translated. */
12
TCGType type;
20
target_ulong pc_curr;
13
21
/*
14
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
22
- * For TARGET_TB_PCREL, the full value of cpu_pc is not known
15
- return false;
23
+ * For CF_PCREL, the full value of cpu_pc is not known
16
+ return finish_folding(ctx, op);
24
* (although the page offset is known). For convenience, the
25
* translation loop uses the full virtual address that triggered
26
* the translation, from base.pc_start through pc_curr.
27
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/cpu.c
30
+++ b/target/arm/cpu.c
31
@@ -XXX,XX +XXX,XX @@ static vaddr arm_cpu_get_pc(CPUState *cs)
32
void arm_cpu_synchronize_from_tb(CPUState *cs,
33
const TranslationBlock *tb)
34
{
35
- /* The program counter is always up to date with TARGET_TB_PCREL. */
36
- if (!TARGET_TB_PCREL) {
37
+ /* The program counter is always up to date with CF_PCREL. */
38
+ if (!(tb_cflags(tb) & CF_PCREL)) {
39
CPUARMState *env = cs->env_ptr;
40
/*
41
* It's OK to look at env for the current mode here, because it's
42
@@ -XXX,XX +XXX,XX @@ void arm_restore_state_to_opc(CPUState *cs,
43
CPUARMState *env = cs->env_ptr;
44
45
if (is_a64(env)) {
46
- if (TARGET_TB_PCREL) {
47
+ if (tb_cflags(tb) & CF_PCREL) {
48
env->pc = (env->pc & TARGET_PAGE_MASK) | data[0];
49
} else {
50
env->pc = data[0];
51
@@ -XXX,XX +XXX,XX @@ void arm_restore_state_to_opc(CPUState *cs,
52
env->condexec_bits = 0;
53
env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
54
} else {
55
- if (TARGET_TB_PCREL) {
56
+ if (tb_cflags(tb) & CF_PCREL) {
57
env->regs[15] = (env->regs[15] & TARGET_PAGE_MASK) | data[0];
58
} else {
59
env->regs[15] = data[0];
60
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
61
index XXXXXXX..XXXXXXX 100644
62
--- a/target/arm/tcg/translate-a64.c
63
+++ b/target/arm/tcg/translate-a64.c
64
@@ -XXX,XX +XXX,XX @@ static void reset_btype(DisasContext *s)
65
static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
66
{
67
assert(s->pc_save != -1);
68
- if (TARGET_TB_PCREL) {
69
+ if (tb_cflags(s->base.tb) & CF_PCREL) {
70
tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
71
} else {
72
tcg_gen_movi_i64(dest, s->pc_curr + diff);
73
@@ -XXX,XX +XXX,XX @@ static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
74
* update to pc to the unlinked path. A long chain of links
75
* can thus avoid many updates to the PC.
76
*/
77
- if (TARGET_TB_PCREL) {
78
+ if (tb_cflags(s->base.tb) & CF_PCREL) {
79
gen_a64_update_pc(s, diff);
80
tcg_gen_goto_tb(n);
81
} else {
82
@@ -XXX,XX +XXX,XX @@ static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
83
if (page) {
84
/* ADRP (page based) */
85
offset <<= 12;
86
- /* The page offset is ok for TARGET_TB_PCREL. */
87
+ /* The page offset is ok for CF_PCREL. */
88
offset -= s->pc_curr & 0xfff;
89
}
17
}
90
18
91
@@ -XXX,XX +XXX,XX @@ static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
19
type = ctx->type;
92
DisasContext *dc = container_of(dcbase, DisasContext, base);
93
target_ulong pc_arg = dc->base.pc_next;
94
95
- if (TARGET_TB_PCREL) {
96
+ if (tb_cflags(dcbase->tb) & CF_PCREL) {
97
pc_arg &= ~TARGET_PAGE_MASK;
98
}
99
tcg_gen_insn_start(pc_arg, 0, 0);
100
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/target/arm/tcg/translate.c
103
+++ b/target/arm/tcg/translate.c
104
@@ -XXX,XX +XXX,XX @@ static target_long jmp_diff(DisasContext *s, target_long diff)
105
static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
106
{
107
assert(s->pc_save != -1);
108
- if (TARGET_TB_PCREL) {
109
+ if (tb_cflags(s->base.tb) & CF_PCREL) {
110
tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
111
} else {
112
tcg_gen_movi_i32(var, s->pc_curr + diff);
113
@@ -XXX,XX +XXX,XX @@ static void gen_goto_tb(DisasContext *s, int n, target_long diff)
114
* update to pc to the unlinked path. A long chain of links
115
* can thus avoid many updates to the PC.
116
*/
117
- if (TARGET_TB_PCREL) {
118
+ if (tb_cflags(s->base.tb) & CF_PCREL) {
119
gen_update_pc(s, diff);
120
tcg_gen_goto_tb(n);
121
} else {
122
@@ -XXX,XX +XXX,XX @@ static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
123
uint32_t condexec_bits;
124
target_ulong pc_arg = dc->base.pc_next;
125
126
- if (TARGET_TB_PCREL) {
127
+ if (tb_cflags(dcbase->tb) & CF_PCREL) {
128
pc_arg &= ~TARGET_PAGE_MASK;
129
}
130
if (dc->eci) {
131
--
20
--
132
2.34.1
21
2.43.0
133
134
diff view generated by jsdifflib
1
Since we now get TEMP_TB temporaries by default, we no longer
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
need to make copies across these loops. These were the only
2
Remove fold_masks as the function becomes unused.
3
uses of new_tmp_a64_local(), so remove that as well.
4
3
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
target/arm/tcg/translate-a64.h | 1 -
7
tcg/optimize.c | 18 ++++++++----------
9
target/arm/tcg/translate-a64.c | 6 ------
8
1 file changed, 8 insertions(+), 10 deletions(-)
10
target/arm/tcg/translate-sve.c | 32 --------------------------------
11
3 files changed, 39 deletions(-)
12
9
13
diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
15
--- a/target/arm/tcg/translate-a64.h
12
--- a/tcg/optimize.c
16
+++ b/target/arm/tcg/translate-a64.h
13
+++ b/tcg/optimize.c
17
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
18
#define TARGET_ARM_TRANSLATE_A64_H
15
return fold_masks_zs(ctx, op, -1, s_mask);
19
20
TCGv_i64 new_tmp_a64(DisasContext *s);
21
-TCGv_i64 new_tmp_a64_local(DisasContext *s);
22
TCGv_i64 new_tmp_a64_zero(DisasContext *s);
23
TCGv_i64 cpu_reg(DisasContext *s, int reg);
24
TCGv_i64 cpu_reg_sp(DisasContext *s, int reg);
25
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/target/arm/tcg/translate-a64.c
28
+++ b/target/arm/tcg/translate-a64.c
29
@@ -XXX,XX +XXX,XX @@ TCGv_i64 new_tmp_a64(DisasContext *s)
30
return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
31
}
16
}
32
17
33
-TCGv_i64 new_tmp_a64_local(DisasContext *s)
18
-static bool fold_masks(OptContext *ctx, TCGOp *op)
34
-{
19
-{
35
- assert(s->tmp_a64_count < TMP_A64_MAX);
20
- return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
36
- return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_local_new_i64();
37
-}
21
-}
38
-
22
-
39
TCGv_i64 new_tmp_a64_zero(DisasContext *s)
23
/*
24
* An "affected" mask bit is 0 if and only if the result is identical
25
* to the first input. Thus if the entire mask is 0, the operation
26
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
27
28
static bool fold_xor(OptContext *ctx, TCGOp *op)
40
{
29
{
41
TCGv_i64 t = new_tmp_a64(s);
30
+ uint64_t z_mask, s_mask;
42
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
31
+ TempOptInfo *t1, *t2;
43
index XXXXXXX..XXXXXXX 100644
32
+
44
--- a/target/arm/tcg/translate-sve.c
33
if (fold_const2_commutative(ctx, op) ||
45
+++ b/target/arm/tcg/translate-sve.c
34
fold_xx_to_i(ctx, op, 0) ||
46
@@ -XXX,XX +XXX,XX @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
35
fold_xi_to_x(ctx, op, 0) ||
47
TCGLabel *loop = gen_new_label();
36
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
48
TCGv_ptr tp, i = tcg_const_local_ptr(0);
37
return true;
49
50
- /* Copy the clean address into a local temp, live across the loop. */
51
- t0 = clean_addr;
52
- clean_addr = new_tmp_a64_local(s);
53
- tcg_gen_mov_i64(clean_addr, t0);
54
-
55
- if (base != cpu_env) {
56
- TCGv_ptr b = tcg_temp_local_new_ptr();
57
- tcg_gen_mov_ptr(b, base);
58
- base = b;
59
- }
60
-
61
gen_set_label(loop);
62
63
t0 = tcg_temp_new_i64();
64
@@ -XXX,XX +XXX,XX @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
65
66
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
67
tcg_temp_free_ptr(i);
68
-
69
- if (base != cpu_env) {
70
- tcg_temp_free_ptr(base);
71
- assert(len_remain == 0);
72
- }
73
}
38
}
74
39
75
/*
40
- ctx->z_mask = arg_info(op->args[1])->z_mask
76
@@ -XXX,XX +XXX,XX @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
41
- | arg_info(op->args[2])->z_mask;
77
TCGLabel *loop = gen_new_label();
42
- ctx->s_mask = arg_info(op->args[1])->s_mask
78
TCGv_ptr tp, i = tcg_const_local_ptr(0);
43
- & arg_info(op->args[2])->s_mask;
79
44
- return fold_masks(ctx, op);
80
- /* Copy the clean address into a local temp, live across the loop. */
45
+ t1 = arg_info(op->args[1]);
81
- t0 = clean_addr;
46
+ t2 = arg_info(op->args[2]);
82
- clean_addr = new_tmp_a64_local(s);
47
+ z_mask = t1->z_mask | t2->z_mask;
83
- tcg_gen_mov_i64(clean_addr, t0);
48
+ s_mask = t1->s_mask & t2->s_mask;
84
-
49
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
85
- if (base != cpu_env) {
50
}
86
- TCGv_ptr b = tcg_temp_local_new_ptr();
51
87
- tcg_gen_mov_ptr(b, base);
52
static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
88
- base = b;
89
- }
90
-
91
gen_set_label(loop);
92
93
t0 = tcg_temp_new_i64();
94
@@ -XXX,XX +XXX,XX @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
95
96
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
97
tcg_temp_free_ptr(i);
98
-
99
- if (base != cpu_env) {
100
- tcg_temp_free_ptr(base);
101
- assert(len_remain == 0);
102
- }
103
}
104
105
/* Predicate register stores can be any multiple of 2. */
106
--
53
--
107
2.34.1
54
2.43.0
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-15-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
target/sparc/cpu.c | 4 +++-
4
tcg/optimize.c | 2 +-
9
1 file changed, 3 insertions(+), 1 deletion(-)
5
1 file changed, 1 insertion(+), 1 deletion(-)
10
6
11
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
13
--- a/target/sparc/cpu.c
9
--- a/tcg/optimize.c
14
+++ b/target/sparc/cpu.c
10
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@
11
@@ -XXX,XX +XXX,XX @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
16
#include "exec/exec-all.h"
12
return fold_orc(ctx, op);
17
#include "hw/qdev-properties.h"
13
}
18
#include "qapi/visitor.h"
14
}
19
+#include "tcg/tcg.h"
15
- return false;
20
16
+ return finish_folding(ctx, op);
21
//#define DEBUG_FEATURES
22
23
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_synchronize_from_tb(CPUState *cs,
24
{
25
SPARCCPU *cpu = SPARC_CPU(cs);
26
27
- cpu->env.pc = tb_pc(tb);
28
+ tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
29
+ cpu->env.pc = tb->pc;
30
cpu->env.npc = tb->cs_base;
31
}
17
}
32
18
19
/* Propagate constants and copies, fold constant expressions. */
33
--
20
--
34
2.34.1
21
2.43.0
35
36
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
All non-default cases now finish folding within each function.
2
Do the same with the default case and assert it is done after.
2
3
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-27-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
target/arm/cpu.c | 4 ++--
7
tcg/optimize.c | 6 ++----
9
1 file changed, 2 insertions(+), 2 deletions(-)
8
1 file changed, 2 insertions(+), 4 deletions(-)
10
9
11
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/cpu.c
12
--- a/tcg/optimize.c
14
+++ b/target/arm/cpu.c
13
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ void arm_cpu_synchronize_from_tb(CPUState *cs,
14
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
16
* never possible for an AArch64 TB to chain to an AArch32 TB.
15
done = true;
17
*/
16
break;
18
if (is_a64(env)) {
17
default:
19
- env->pc = tb_pc(tb);
18
+ done = finish_folding(&ctx, op);
20
+ env->pc = tb->pc;
19
break;
21
} else {
22
- env->regs[15] = tb_pc(tb);
23
+ env->regs[15] = tb->pc;
24
}
20
}
21
-
22
- if (!done) {
23
- finish_folding(&ctx, op);
24
- }
25
+ tcg_debug_assert(done);
25
}
26
}
26
}
27
}
27
--
28
--
28
2.34.1
29
2.43.0
29
30
diff view generated by jsdifflib
1
This wasn't actually used at all, just some unused
1
All mask setting is now done with parameters via fold_masks_*.
2
macro re-definitions.
3
2
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
target/hppa/translate.c | 3 ---
6
tcg/optimize.c | 13 -------------
8
1 file changed, 3 deletions(-)
7
1 file changed, 13 deletions(-)
9
8
10
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
12
--- a/target/hppa/translate.c
11
--- a/tcg/optimize.c
13
+++ b/target/hppa/translate.c
12
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
15
#undef TCGv
14
QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
16
#undef tcg_temp_new
15
17
#undef tcg_global_mem_new
16
/* In flight values from optimization. */
18
-#undef tcg_temp_local_new
17
- uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
19
#undef tcg_temp_free
18
- uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
20
19
TCGType type;
21
#if TARGET_LONG_BITS == 64
20
} OptContext;
22
@@ -XXX,XX +XXX,XX @@
21
23
22
@@ -XXX,XX +XXX,XX @@ static bool finish_folding(OptContext *ctx, TCGOp *op)
24
#define tcg_temp_new tcg_temp_new_i64
23
for (i = 0; i < nb_oargs; i++) {
25
#define tcg_global_mem_new tcg_global_mem_new_i64
24
TCGTemp *ts = arg_temp(op->args[i]);
26
-#define tcg_temp_local_new tcg_temp_local_new_i64
25
reset_ts(ctx, ts);
27
#define tcg_temp_free tcg_temp_free_i64
26
- /*
28
27
- * Save the corresponding known-zero/sign bits mask for the
29
#define tcg_gen_movi_reg tcg_gen_movi_i64
28
- * first output argument (only one supported so far).
30
@@ -XXX,XX +XXX,XX @@
29
- */
31
#define TCGv_reg TCGv_i32
30
- if (i == 0) {
32
#define tcg_temp_new tcg_temp_new_i32
31
- ts_info(ts)->z_mask = ctx->z_mask;
33
#define tcg_global_mem_new tcg_global_mem_new_i32
32
- }
34
-#define tcg_temp_local_new tcg_temp_local_new_i32
33
}
35
#define tcg_temp_free tcg_temp_free_i32
34
return true;
36
35
}
37
#define tcg_gen_movi_reg tcg_gen_movi_i32
36
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
37
ctx.type = TCG_TYPE_I32;
38
}
39
40
- /* Assume all bits affected, no bits known zero, no sign reps. */
41
- ctx.z_mask = -1;
42
- ctx.s_mask = 0;
43
-
44
/*
45
* Process each opcode.
46
* Sorted alphabetically by opcode as much as possible.
38
--
47
--
39
2.34.1
48
2.43.0
40
41
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
All instances of s_mask have been converted to the new
2
representation. We can now re-enable usage.
2
3
3
Adds a new field to TranslationBlock.cflags denoting whether or not the
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
instructions of a given translation block are pc-relative. This field
5
aims to replace the macro `TARGET_TB_PCREL`.
6
7
Signed-off-by: Anton Johansson <anjo@rev.ng>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Message-Id: <20230227135202.9710-2-anjo@rev.ng>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
6
---
13
include/exec/exec-all.h | 1 +
7
tcg/optimize.c | 4 ++--
14
1 file changed, 1 insertion(+)
8
1 file changed, 2 insertions(+), 2 deletions(-)
15
9
16
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
17
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
18
--- a/include/exec/exec-all.h
12
--- a/tcg/optimize.c
19
+++ b/include/exec/exec-all.h
13
+++ b/tcg/optimize.c
20
@@ -XXX,XX +XXX,XX @@ struct TranslationBlock {
14
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
21
#define CF_INVALID 0x00040000 /* TB is stale. Set with @jmp_lock held */
15
g_assert_not_reached();
22
#define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */
16
}
23
#define CF_NOIRQ 0x00100000 /* Generate an uninterruptible TB */
17
24
+#define CF_PCREL 0x00200000 /* Opcodes in TB are PC-relative */
18
- if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
25
#define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
19
+ if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
26
#define CF_CLUSTER_SHIFT 24
20
return true;
21
}
22
23
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
24
s_mask = s_mask_old >> pos;
25
s_mask |= -1ull << (len - 1);
26
27
- if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
28
+ if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
29
return true;
30
}
27
31
28
--
32
--
29
2.34.1
33
2.43.0
30
31
diff view generated by jsdifflib
1
Attempt to reduce the lifetime of TEMP_TB.
1
The big comment just above says functions should be sorted.
2
Add forward declarations as needed.
2
3
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
---
6
tcg/tcg.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
7
tcg/optimize.c | 114 +++++++++++++++++++++++++------------------------
7
1 file changed, 70 insertions(+)
8
1 file changed, 59 insertions(+), 55 deletions(-)
8
9
9
diff --git a/tcg/tcg.c b/tcg/tcg.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/tcg.c
12
--- a/tcg/optimize.c
12
+++ b/tcg/tcg.c
13
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static void la_cross_call(TCGContext *s, int nt)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
14
}
15
* 3) those that produce information about the result value.
16
*/
17
18
+static bool fold_or(OptContext *ctx, TCGOp *op);
19
+static bool fold_orc(OptContext *ctx, TCGOp *op);
20
+static bool fold_xor(OptContext *ctx, TCGOp *op);
21
+
22
static bool fold_add(OptContext *ctx, TCGOp *op)
23
{
24
if (fold_const2_commutative(ctx, op) ||
25
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
26
return fold_masks_zs(ctx, op, z_mask, s_mask);
15
}
27
}
16
28
17
+/*
29
+static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
18
+ * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
19
+ * to TEMP_EBB, if possible.
20
+ */
21
+static void __attribute__((noinline))
22
+liveness_pass_0(TCGContext *s)
23
+{
30
+{
24
+ void * const multiple_ebb = (void *)(uintptr_t)-1;
31
+ /* If true and false values are the same, eliminate the cmp. */
25
+ int nb_temps = s->nb_temps;
32
+ if (args_are_copies(op->args[2], op->args[3])) {
26
+ TCGOp *op, *ebb;
33
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
27
+
28
+ for (int i = s->nb_globals; i < nb_temps; ++i) {
29
+ s->temps[i].state_ptr = NULL;
30
+ }
34
+ }
31
+
35
+
32
+ /*
36
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
33
+ * Represent each EBB by the op at which it begins. In the case of
37
+ uint64_t tv = arg_info(op->args[2])->val;
34
+ * the first EBB, this is the first op, otherwise it is a label.
38
+ uint64_t fv = arg_info(op->args[3])->val;
35
+ * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
36
+ * within a single EBB, else MULTIPLE_EBB.
37
+ */
38
+ ebb = QTAILQ_FIRST(&s->ops);
39
+ QTAILQ_FOREACH(op, &s->ops, link) {
40
+ const TCGOpDef *def;
41
+ int nb_oargs, nb_iargs;
42
+
39
+
43
+ switch (op->opc) {
40
+ if (tv == -1 && fv == 0) {
44
+ case INDEX_op_set_label:
41
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
45
+ ebb = op;
46
+ continue;
47
+ case INDEX_op_discard:
48
+ continue;
49
+ case INDEX_op_call:
50
+ nb_oargs = TCGOP_CALLO(op);
51
+ nb_iargs = TCGOP_CALLI(op);
52
+ break;
53
+ default:
54
+ def = &tcg_op_defs[op->opc];
55
+ nb_oargs = def->nb_oargs;
56
+ nb_iargs = def->nb_iargs;
57
+ break;
58
+ }
42
+ }
59
+
43
+ if (tv == 0 && fv == -1) {
60
+ for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
44
+ if (TCG_TARGET_HAS_not_vec) {
61
+ TCGTemp *ts = arg_temp(op->args[i]);
45
+ op->opc = INDEX_op_not_vec;
62
+
46
+ return fold_not(ctx, op);
63
+ if (ts->kind != TEMP_TB) {
47
+ } else {
64
+ continue;
48
+ op->opc = INDEX_op_xor_vec;
65
+ }
49
+ op->args[2] = arg_new_constant(ctx, -1);
66
+ if (ts->state_ptr == NULL) {
50
+ return fold_xor(ctx, op);
67
+ ts->state_ptr = ebb;
68
+ } else if (ts->state_ptr != ebb) {
69
+ ts->state_ptr = multiple_ebb;
70
+ }
51
+ }
71
+ }
52
+ }
72
+ }
53
+ }
73
+
54
+ if (arg_is_const(op->args[2])) {
74
+ /*
55
+ uint64_t tv = arg_info(op->args[2])->val;
75
+ * For TEMP_TB that turned out not to be used beyond one EBB,
56
+ if (tv == -1) {
76
+ * reduce the liveness to TEMP_EBB.
57
+ op->opc = INDEX_op_or_vec;
77
+ */
58
+ op->args[2] = op->args[3];
78
+ for (int i = s->nb_globals; i < nb_temps; ++i) {
59
+ return fold_or(ctx, op);
79
+ TCGTemp *ts = &s->temps[i];
60
+ }
80
+ if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
61
+ if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
81
+ ts->kind = TEMP_EBB;
62
+ op->opc = INDEX_op_andc_vec;
63
+ op->args[2] = op->args[1];
64
+ op->args[1] = op->args[3];
65
+ return fold_andc(ctx, op);
82
+ }
66
+ }
83
+ }
67
+ }
68
+ if (arg_is_const(op->args[3])) {
69
+ uint64_t fv = arg_info(op->args[3])->val;
70
+ if (fv == 0) {
71
+ op->opc = INDEX_op_and_vec;
72
+ return fold_and(ctx, op);
73
+ }
74
+ if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
75
+ op->opc = INDEX_op_orc_vec;
76
+ op->args[2] = op->args[1];
77
+ op->args[1] = op->args[3];
78
+ return fold_orc(ctx, op);
79
+ }
80
+ }
81
+ return finish_folding(ctx, op);
84
+}
82
+}
85
+
83
+
86
/* Liveness analysis : update the opc_arg_life array to tell if a
84
static bool fold_brcond(OptContext *ctx, TCGOp *op)
87
given input arguments is dead. Instructions updating dead
85
{
88
temporaries are removed. */
86
int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
89
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
87
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
90
#endif
88
return fold_masks_zs(ctx, op, z_mask, s_mask);
91
89
}
92
reachable_code_pass(s);
90
93
+ liveness_pass_0(s);
91
-static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
94
liveness_pass_1(s);
92
-{
95
93
- /* If true and false values are the same, eliminate the cmp. */
96
if (s->nb_indirects > 0) {
94
- if (args_are_copies(op->args[2], op->args[3])) {
95
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
96
- }
97
-
98
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
99
- uint64_t tv = arg_info(op->args[2])->val;
100
- uint64_t fv = arg_info(op->args[3])->val;
101
-
102
- if (tv == -1 && fv == 0) {
103
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
104
- }
105
- if (tv == 0 && fv == -1) {
106
- if (TCG_TARGET_HAS_not_vec) {
107
- op->opc = INDEX_op_not_vec;
108
- return fold_not(ctx, op);
109
- } else {
110
- op->opc = INDEX_op_xor_vec;
111
- op->args[2] = arg_new_constant(ctx, -1);
112
- return fold_xor(ctx, op);
113
- }
114
- }
115
- }
116
- if (arg_is_const(op->args[2])) {
117
- uint64_t tv = arg_info(op->args[2])->val;
118
- if (tv == -1) {
119
- op->opc = INDEX_op_or_vec;
120
- op->args[2] = op->args[3];
121
- return fold_or(ctx, op);
122
- }
123
- if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
124
- op->opc = INDEX_op_andc_vec;
125
- op->args[2] = op->args[1];
126
- op->args[1] = op->args[3];
127
- return fold_andc(ctx, op);
128
- }
129
- }
130
- if (arg_is_const(op->args[3])) {
131
- uint64_t fv = arg_info(op->args[3])->val;
132
- if (fv == 0) {
133
- op->opc = INDEX_op_and_vec;
134
- return fold_and(ctx, op);
135
- }
136
- if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
137
- op->opc = INDEX_op_orc_vec;
138
- op->args[2] = op->args[1];
139
- op->args[1] = op->args[3];
140
- return fold_orc(ctx, op);
141
- }
142
- }
143
- return finish_folding(ctx, op);
144
-}
145
-
146
/* Propagate constants and copies, fold constant expressions. */
147
void tcg_optimize(TCGContext *s)
148
{
97
--
149
--
98
2.34.1
150
2.43.0
99
100
diff view generated by jsdifflib
1
This is now equivalent to gen_tmp.
1
The big comment just above says functions should be sorted.
2
2
3
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
target/hexagon/idef-parser/parser-helpers.c | 24 ++-------------------
6
tcg/optimize.c | 60 +++++++++++++++++++++++++-------------------------
8
1 file changed, 2 insertions(+), 22 deletions(-)
7
1 file changed, 30 insertions(+), 30 deletions(-)
9
8
10
diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
12
--- a/target/hexagon/idef-parser/parser-helpers.c
11
--- a/tcg/optimize.c
13
+++ b/target/hexagon/idef-parser/parser-helpers.c
12
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ HexValue gen_tmp(Context *c,
13
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
15
return rvalue;
14
return true;
16
}
15
}
17
16
18
-HexValue gen_tmp_local(Context *c,
17
+static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
19
- YYLTYPE *locp,
18
+{
20
- unsigned bit_width,
19
+ /* Canonicalize the comparison to put immediate second. */
21
- HexSignedness signedness)
20
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
21
+ op->args[3] = tcg_swap_cond(op->args[3]);
22
+ }
23
+ return finish_folding(ctx, op);
24
+}
25
+
26
+static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
27
+{
28
+ /* If true and false values are the same, eliminate the cmp. */
29
+ if (args_are_copies(op->args[3], op->args[4])) {
30
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
31
+ }
32
+
33
+ /* Canonicalize the comparison to put immediate second. */
34
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
35
+ op->args[5] = tcg_swap_cond(op->args[5]);
36
+ }
37
+ /*
38
+ * Canonicalize the "false" input reg to match the destination,
39
+ * so that the tcg backend can implement "move if true".
40
+ */
41
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
42
+ op->args[5] = tcg_invert_cond(op->args[5]);
43
+ }
44
+ return finish_folding(ctx, op);
45
+}
46
+
47
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
48
{
49
uint64_t z_mask, s_mask;
50
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
51
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
52
}
53
54
-static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
22
-{
55
-{
23
- HexValue rvalue;
56
- /* Canonicalize the comparison to put immediate second. */
24
- assert(bit_width == 32 || bit_width == 64);
57
- if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
25
- memset(&rvalue, 0, sizeof(HexValue));
58
- op->args[3] = tcg_swap_cond(op->args[3]);
26
- rvalue.type = TEMP;
59
- }
27
- rvalue.bit_width = bit_width;
60
- return finish_folding(ctx, op);
28
- rvalue.signedness = signedness;
29
- rvalue.is_dotnew = false;
30
- rvalue.is_manual = false;
31
- rvalue.tmp.index = c->inst.tmp_count;
32
- OUT(c, locp, "TCGv_i", &bit_width, " tmp_", &c->inst.tmp_count,
33
- " = tcg_temp_new_i", &bit_width, "();\n");
34
- c->inst.tmp_count++;
35
- return rvalue;
36
-}
61
-}
37
-
62
-
38
HexValue gen_tmp_value(Context *c,
63
-static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
39
YYLTYPE *locp,
64
-{
40
const char *value,
65
- /* If true and false values are the same, eliminate the cmp. */
41
@@ -XXX,XX +XXX,XX @@ HexValue gen_rvalue_sat(Context *c, YYLTYPE *locp, HexSat *sat,
66
- if (args_are_copies(op->args[3], op->args[4])) {
42
assert_signedness(c, locp, sat->signedness);
67
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
43
68
- }
44
unsigned_str = (sat->signedness == UNSIGNED) ? "u" : "";
69
-
45
- res = gen_tmp_local(c, locp, value->bit_width, sat->signedness);
70
- /* Canonicalize the comparison to put immediate second. */
46
- ovfl = gen_tmp_local(c, locp, 32, sat->signedness);
71
- if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
47
+ res = gen_tmp(c, locp, value->bit_width, sat->signedness);
72
- op->args[5] = tcg_swap_cond(op->args[5]);
48
+ ovfl = gen_tmp(c, locp, 32, sat->signedness);
73
- }
49
OUT(c, locp, "gen_sat", unsigned_str, "_", bit_suffix, "_ovfl(");
74
- /*
50
OUT(c, locp, &ovfl, ", ", &res, ", ", value, ", ", &width->imm.value,
75
- * Canonicalize the "false" input reg to match the destination,
51
");\n");
76
- * so that the tcg backend can implement "move if true".
77
- */
78
- if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
79
- op->args[5] = tcg_invert_cond(op->args[5]);
80
- }
81
- return finish_folding(ctx, op);
82
-}
83
-
84
static bool fold_sextract(OptContext *ctx, TCGOp *op)
85
{
86
uint64_t z_mask, s_mask, s_mask_old;
52
--
87
--
53
2.34.1
88
2.43.0
54
55
diff view generated by jsdifflib
1
Since tcg_temp_new is now identical, use that.
1
We currently have a flag, float_muladd_halve_result, to scale
2
the result by 2**-1. Extend this to handle arbitrary scaling.
2
3
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
---
6
target/mips/tcg/translate.c | 57 ++++++------------------
7
include/fpu/softfloat.h | 6 ++++
7
target/mips/tcg/nanomips_translate.c.inc | 4 +-
8
fpu/softfloat.c | 58 ++++++++++++++++++++++-----------------
8
2 files changed, 16 insertions(+), 45 deletions(-)
9
fpu/softfloat-parts.c.inc | 7 +++--
9
10
3 files changed, 44 insertions(+), 27 deletions(-)
10
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
11
12
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
11
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
12
--- a/target/mips/tcg/translate.c
14
--- a/include/fpu/softfloat.h
13
+++ b/target/mips/tcg/translate.c
15
+++ b/include/fpu/softfloat.h
14
@@ -XXX,XX +XXX,XX @@ static void gen_arith_imm(DisasContext *ctx, uint32_t opc,
16
@@ -XXX,XX +XXX,XX @@ float16 float16_add(float16, float16, float_status *status);
15
switch (opc) {
17
float16 float16_sub(float16, float16, float_status *status);
16
case OPC_ADDI:
18
float16 float16_mul(float16, float16, float_status *status);
17
{
19
float16 float16_muladd(float16, float16, float16, int, float_status *status);
18
- TCGv t0 = tcg_temp_local_new();
20
+float16 float16_muladd_scalbn(float16, float16, float16,
19
+ TCGv t0 = tcg_temp_new();
21
+ int, int, float_status *status);
20
TCGv t1 = tcg_temp_new();
22
float16 float16_div(float16, float16, float_status *status);
21
TCGv t2 = tcg_temp_new();
23
float16 float16_scalbn(float16, int, float_status *status);
22
TCGLabel *l1 = gen_new_label();
24
float16 float16_min(float16, float16, float_status *status);
23
@@ -XXX,XX +XXX,XX @@ static void gen_arith_imm(DisasContext *ctx, uint32_t opc,
25
@@ -XXX,XX +XXX,XX @@ float32 float32_mul(float32, float32, float_status *status);
24
#if defined(TARGET_MIPS64)
26
float32 float32_div(float32, float32, float_status *status);
25
case OPC_DADDI:
27
float32 float32_rem(float32, float32, float_status *status);
26
{
28
float32 float32_muladd(float32, float32, float32, int, float_status *status);
27
- TCGv t0 = tcg_temp_local_new();
29
+float32 float32_muladd_scalbn(float32, float32, float32,
28
+ TCGv t0 = tcg_temp_new();
30
+ int, int, float_status *status);
29
TCGv t1 = tcg_temp_new();
31
float32 float32_sqrt(float32, float_status *status);
30
TCGv t2 = tcg_temp_new();
32
float32 float32_exp2(float32, float_status *status);
31
TCGLabel *l1 = gen_new_label();
33
float32 float32_log2(float32, float_status *status);
32
@@ -XXX,XX +XXX,XX @@ static void gen_arith(DisasContext *ctx, uint32_t opc,
34
@@ -XXX,XX +XXX,XX @@ float64 float64_mul(float64, float64, float_status *status);
33
switch (opc) {
35
float64 float64_div(float64, float64, float_status *status);
34
case OPC_ADD:
36
float64 float64_rem(float64, float64, float_status *status);
35
{
37
float64 float64_muladd(float64, float64, float64, int, float_status *status);
36
- TCGv t0 = tcg_temp_local_new();
38
+float64 float64_muladd_scalbn(float64, float64, float64,
37
+ TCGv t0 = tcg_temp_new();
39
+ int, int, float_status *status);
38
TCGv t1 = tcg_temp_new();
40
float64 float64_sqrt(float64, float_status *status);
39
TCGv t2 = tcg_temp_new();
41
float64 float64_log2(float64, float_status *status);
40
TCGLabel *l1 = gen_new_label();
42
FloatRelation float64_compare(float64, float64, float_status *status);
41
@@ -XXX,XX +XXX,XX @@ static void gen_arith(DisasContext *ctx, uint32_t opc,
43
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
42
break;
44
index XXXXXXX..XXXXXXX 100644
43
case OPC_SUB:
45
--- a/fpu/softfloat.c
44
{
46
+++ b/fpu/softfloat.c
45
- TCGv t0 = tcg_temp_local_new();
47
@@ -XXX,XX +XXX,XX @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
46
+ TCGv t0 = tcg_temp_new();
48
#define parts_mul(A, B, S) \
47
TCGv t1 = tcg_temp_new();
49
PARTS_GENERIC_64_128(mul, A)(A, B, S)
48
TCGv t2 = tcg_temp_new();
50
49
TCGLabel *l1 = gen_new_label();
51
-static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
50
@@ -XXX,XX +XXX,XX @@ static void gen_arith(DisasContext *ctx, uint32_t opc,
52
- FloatParts64 *c, int flags,
51
#if defined(TARGET_MIPS64)
53
- float_status *s);
52
case OPC_DADD:
54
-static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
53
{
55
- FloatParts128 *c, int flags,
54
- TCGv t0 = tcg_temp_local_new();
56
- float_status *s);
55
+ TCGv t0 = tcg_temp_new();
57
+static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
56
TCGv t1 = tcg_temp_new();
58
+ FloatParts64 *c, int scale,
57
TCGv t2 = tcg_temp_new();
59
+ int flags, float_status *s);
58
TCGLabel *l1 = gen_new_label();
60
+static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
59
@@ -XXX,XX +XXX,XX @@ static void gen_arith(DisasContext *ctx, uint32_t opc,
61
+ FloatParts128 *c, int scale,
60
break;
62
+ int flags, float_status *s);
61
case OPC_DSUB:
63
62
{
64
-#define parts_muladd(A, B, C, Z, S) \
63
- TCGv t0 = tcg_temp_local_new();
65
- PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
64
+ TCGv t0 = tcg_temp_new();
66
+#define parts_muladd_scalbn(A, B, C, Z, Y, S) \
65
TCGv t1 = tcg_temp_new();
67
+ PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
66
TCGv t2 = tcg_temp_new();
68
67
TCGLabel *l1 = gen_new_label();
69
static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
68
@@ -XXX,XX +XXX,XX @@ static void gen_loongson_integer(DisasContext *ctx, uint32_t opc,
70
float_status *s);
69
return;
71
@@ -XXX,XX +XXX,XX @@ floatx80_mul(floatx80 a, floatx80 b, float_status *status)
72
* Fused multiply-add
73
*/
74
75
-float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
76
- int flags, float_status *status)
77
+float16 QEMU_FLATTEN
78
+float16_muladd_scalbn(float16 a, float16 b, float16 c,
79
+ int scale, int flags, float_status *status)
80
{
81
FloatParts64 pa, pb, pc, *pr;
82
83
float16_unpack_canonical(&pa, a, status);
84
float16_unpack_canonical(&pb, b, status);
85
float16_unpack_canonical(&pc, c, status);
86
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
87
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
88
89
return float16_round_pack_canonical(pr, status);
90
}
91
92
-static float32 QEMU_SOFTFLOAT_ATTR
93
-soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
94
- float_status *status)
95
+float16 float16_muladd(float16 a, float16 b, float16 c,
96
+ int flags, float_status *status)
97
+{
98
+ return float16_muladd_scalbn(a, b, c, 0, flags, status);
99
+}
100
+
101
+float32 QEMU_SOFTFLOAT_ATTR
102
+float32_muladd_scalbn(float32 a, float32 b, float32 c,
103
+ int scale, int flags, float_status *status)
104
{
105
FloatParts64 pa, pb, pc, *pr;
106
107
float32_unpack_canonical(&pa, a, status);
108
float32_unpack_canonical(&pb, b, status);
109
float32_unpack_canonical(&pc, c, status);
110
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
111
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
112
113
return float32_round_pack_canonical(pr, status);
114
}
115
116
-static float64 QEMU_SOFTFLOAT_ATTR
117
-soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
118
- float_status *status)
119
+float64 QEMU_SOFTFLOAT_ATTR
120
+float64_muladd_scalbn(float64 a, float64 b, float64 c,
121
+ int scale, int flags, float_status *status)
122
{
123
FloatParts64 pa, pb, pc, *pr;
124
125
float64_unpack_canonical(&pa, a, status);
126
float64_unpack_canonical(&pb, b, status);
127
float64_unpack_canonical(&pc, c, status);
128
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
129
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
130
131
return float64_round_pack_canonical(pr, status);
132
}
133
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
134
return ur.s;
135
136
soft:
137
- return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
138
+ return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
139
}
140
141
float64 QEMU_FLATTEN
142
@@ -XXX,XX +XXX,XX @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
143
return ur.s;
144
145
soft:
146
- return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
147
+ return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
148
}
149
150
float64 float64r32_muladd(float64 a, float64 b, float64 c,
151
@@ -XXX,XX +XXX,XX @@ float64 float64r32_muladd(float64 a, float64 b, float64 c,
152
float64_unpack_canonical(&pa, a, status);
153
float64_unpack_canonical(&pb, b, status);
154
float64_unpack_canonical(&pc, c, status);
155
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
156
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
157
158
return float64r32_round_pack_canonical(pr, status);
159
}
160
@@ -XXX,XX +XXX,XX @@ bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
161
bfloat16_unpack_canonical(&pa, a, status);
162
bfloat16_unpack_canonical(&pb, b, status);
163
bfloat16_unpack_canonical(&pc, c, status);
164
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
165
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
166
167
return bfloat16_round_pack_canonical(pr, status);
168
}
169
@@ -XXX,XX +XXX,XX @@ float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
170
float128_unpack_canonical(&pa, a, status);
171
float128_unpack_canonical(&pb, b, status);
172
float128_unpack_canonical(&pc, c, status);
173
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
174
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
175
176
return float128_round_pack_canonical(pr, status);
177
}
178
@@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status)
179
180
float64_unpack_canonical(&rp, float64_one, status);
181
for (i = 0 ; i < 15 ; i++) {
182
+
183
float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
184
- rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
185
+ rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
186
xnp = *parts_mul(&xnp, &xp, status);
70
}
187
}
71
188
72
- switch (opc) {
189
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
73
- case OPC_MULT_G_2E:
74
- case OPC_MULT_G_2F:
75
- case OPC_MULTU_G_2E:
76
- case OPC_MULTU_G_2F:
77
-#if defined(TARGET_MIPS64)
78
- case OPC_DMULT_G_2E:
79
- case OPC_DMULT_G_2F:
80
- case OPC_DMULTU_G_2E:
81
- case OPC_DMULTU_G_2F:
82
-#endif
83
- t0 = tcg_temp_new();
84
- t1 = tcg_temp_new();
85
- break;
86
- default:
87
- t0 = tcg_temp_local_new();
88
- t1 = tcg_temp_local_new();
89
- break;
90
- }
91
-
92
+ t0 = tcg_temp_new();
93
+ t1 = tcg_temp_new();
94
gen_load_gpr(t0, rs);
95
gen_load_gpr(t1, rt);
96
97
@@ -XXX,XX +XXX,XX @@ static void gen_loongson_multimedia(DisasContext *ctx, int rd, int rs, int rt)
98
TCGCond cond;
99
100
opc = MASK_LMMI(ctx->opcode);
101
- switch (opc) {
102
- case OPC_ADD_CP2:
103
- case OPC_SUB_CP2:
104
- case OPC_DADD_CP2:
105
- case OPC_DSUB_CP2:
106
- t0 = tcg_temp_local_new_i64();
107
- t1 = tcg_temp_local_new_i64();
108
- break;
109
- default:
110
- t0 = tcg_temp_new_i64();
111
- t1 = tcg_temp_new_i64();
112
- break;
113
- }
114
-
115
check_cp1_enabled(ctx);
116
+
117
+ t0 = tcg_temp_new_i64();
118
+ t1 = tcg_temp_new_i64();
119
gen_load_fpr64(ctx, t0, rs);
120
gen_load_fpr64(ctx, t1, rt);
121
122
@@ -XXX,XX +XXX,XX @@ static void gen_mftr(CPUMIPSState *env, DisasContext *ctx, int rt, int rd,
123
int u, int sel, int h)
124
{
125
int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
126
- TCGv t0 = tcg_temp_local_new();
127
+ TCGv t0 = tcg_temp_new();
128
129
if ((env->CP0_VPEConf0 & (1 << CP0VPEC0_MVP)) == 0 &&
130
((env->tcs[other_tc].CP0_TCBind & (0xf << CP0TCBd_CurVPE)) !=
131
@@ -XXX,XX +XXX,XX @@ static void gen_mttr(CPUMIPSState *env, DisasContext *ctx, int rd, int rt,
132
int u, int sel, int h)
133
{
134
int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
135
- TCGv t0 = tcg_temp_local_new();
136
+ TCGv t0 = tcg_temp_new();
137
138
gen_load_gpr(t0, rt);
139
if ((env->CP0_VPEConf0 & (1 << CP0VPEC0_MVP)) == 0 &&
140
@@ -XXX,XX +XXX,XX @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
141
case OPC_ALNV_PS:
142
check_ps(ctx);
143
{
144
- TCGv t0 = tcg_temp_local_new();
145
+ TCGv t0 = tcg_temp_new();
146
TCGv_i32 fp = tcg_temp_new_i32();
147
TCGv_i32 fph = tcg_temp_new_i32();
148
TCGLabel *l1 = gen_new_label();
149
diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc
150
index XXXXXXX..XXXXXXX 100644
190
index XXXXXXX..XXXXXXX 100644
151
--- a/target/mips/tcg/nanomips_translate.c.inc
191
--- a/fpu/softfloat-parts.c.inc
152
+++ b/target/mips/tcg/nanomips_translate.c.inc
192
+++ b/fpu/softfloat-parts.c.inc
153
@@ -XXX,XX +XXX,XX @@ static void gen_llwp(DisasContext *ctx, uint32_t base, int16_t offset,
193
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
154
static void gen_scwp(DisasContext *ctx, uint32_t base, int16_t offset,
194
* Requires A and C extracted into a double-sized structure to provide the
155
uint32_t reg1, uint32_t reg2, bool eva)
195
* extra space for the widening multiply.
156
{
196
*/
157
- TCGv taddr = tcg_temp_local_new();
197
-static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
158
- TCGv lladdr = tcg_temp_local_new();
198
- FloatPartsN *c, int flags, float_status *s)
159
+ TCGv taddr = tcg_temp_new();
199
+static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
160
+ TCGv lladdr = tcg_temp_new();
200
+ FloatPartsN *c, int scale,
161
TCGv_i64 tval = tcg_temp_new_i64();
201
+ int flags, float_status *s)
162
TCGv_i64 llval = tcg_temp_new_i64();
202
{
163
TCGv_i64 val = tcg_temp_new_i64();
203
int ab_mask, abc_mask;
204
FloatPartsW p_widen, c_widen;
205
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
206
a->exp = p_widen.exp;
207
208
return_normal:
209
+ /* TODO: Replace all use of float_muladd_halve_result with scale. */
210
if (flags & float_muladd_halve_result) {
211
a->exp -= 1;
212
}
213
+ a->exp += scale;
214
finish_sign:
215
if (flags & float_muladd_negate_result) {
216
a->sign ^= 1;
164
--
217
--
165
2.34.1
218
2.43.0
166
219
167
220
diff view generated by jsdifflib
1
Guest front-ends now get temps that span the lifetime of
1
Use the scalbn interface instead of float_muladd_halve_result.
2
the translation block by default, which avoids accidentally
3
using the temp across branches and invalidating the data.
4
2
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
include/tcg/tcg.h | 8 ++++----
6
target/arm/tcg/helper-a64.c | 6 +++---
9
1 file changed, 4 insertions(+), 4 deletions(-)
7
1 file changed, 3 insertions(+), 3 deletions(-)
10
8
11
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
9
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/include/tcg/tcg.h
11
--- a/target/arm/tcg/helper-a64.c
14
+++ b/include/tcg/tcg.h
12
+++ b/target/arm/tcg/helper-a64.c
15
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i32 tcg_temp_ebb_new_i32(void)
13
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
16
14
(float16_is_infinity(b) && float16_is_zero(a))) {
17
static inline TCGv_i32 tcg_temp_new_i32(void)
15
return float16_one_point_five;
18
{
16
}
19
- TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB);
17
- return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
20
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB);
18
+ return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst);
21
return temp_tcgv_i32(t);
22
}
19
}
23
20
24
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i64 tcg_temp_ebb_new_i64(void)
21
float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
25
22
@@ -XXX,XX +XXX,XX @@ float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
26
static inline TCGv_i64 tcg_temp_new_i64(void)
23
(float32_is_infinity(b) && float32_is_zero(a))) {
27
{
24
return float32_one_point_five;
28
- TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB);
25
}
29
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB);
26
- return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
30
return temp_tcgv_i64(t);
27
+ return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst);
31
}
28
}
32
29
33
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i128 tcg_temp_ebb_new_i128(void)
30
float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
34
31
@@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
35
static inline TCGv_i128 tcg_temp_new_i128(void)
32
(float64_is_infinity(b) && float64_is_zero(a))) {
36
{
33
return float64_one_point_five;
37
- TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB);
34
}
38
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB);
35
- return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
39
return temp_tcgv_i128(t);
36
+ return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst);
40
}
37
}
41
38
42
@@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr tcg_temp_ebb_new_ptr(void)
39
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
43
44
static inline TCGv_ptr tcg_temp_new_ptr(void)
45
{
46
- TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB);
47
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB);
48
return temp_tcgv_ptr(t);
49
}
50
51
--
40
--
52
2.34.1
41
2.43.0
53
42
54
43
diff view generated by jsdifflib
1
Since tcg_temp_new is now identical, use that.
1
Use the scalbn interface instead of float_muladd_halve_result.
2
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
5
---
6
target/cris/translate.c | 6 +++---
6
target/sparc/helper.h | 4 +-
7
target/cris/translate_v10.c.inc | 10 +++++-----
7
target/sparc/fop_helper.c | 8 ++--
8
2 files changed, 8 insertions(+), 8 deletions(-)
8
target/sparc/translate.c | 80 +++++++++++++++++++++++----------------
9
9
3 files changed, 54 insertions(+), 38 deletions(-)
10
diff --git a/target/cris/translate.c b/target/cris/translate.c
10
11
diff --git a/target/sparc/helper.h b/target/sparc/helper.h
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/target/cris/translate.c
13
--- a/target/sparc/helper.h
13
+++ b/target/cris/translate.c
14
+++ b/target/sparc/helper.h
14
@@ -XXX,XX +XXX,XX @@ static int dec_bound_r(CPUCRISState *env, DisasContext *dc)
15
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(faddd, TCG_CALL_NO_WG, f64, env, f64, f64)
15
LOG_DIS("bound.%c $r%u, $r%u\n",
16
DEF_HELPER_FLAGS_3(fsubd, TCG_CALL_NO_WG, f64, env, f64, f64)
16
memsize_char(size), dc->op1, dc->op2);
17
DEF_HELPER_FLAGS_3(fmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
17
cris_cc_mask(dc, CC_MASK_NZ);
18
DEF_HELPER_FLAGS_3(fdivd, TCG_CALL_NO_WG, f64, env, f64, f64)
18
- l0 = tcg_temp_local_new();
19
-DEF_HELPER_FLAGS_5(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, i32)
19
+ l0 = tcg_temp_new();
20
+DEF_HELPER_FLAGS_6(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, s32, i32)
20
dec_prep_move_r(dc, dc->op1, dc->op2, size, 0, l0);
21
DEF_HELPER_FLAGS_3(fnaddd, TCG_CALL_NO_WG, f64, env, f64, f64)
21
cris_alu(dc, CC_OP_BOUND, cpu_R[dc->op2], cpu_R[dc->op2], l0, 4);
22
DEF_HELPER_FLAGS_3(fnmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
22
tcg_temp_free(l0);
23
23
@@ -XXX,XX +XXX,XX @@ static int dec_bound_m(CPUCRISState *env, DisasContext *dc)
24
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(fadds, TCG_CALL_NO_WG, f32, env, f32, f32)
24
dc->op1, dc->postinc ? "+]" : "]",
25
DEF_HELPER_FLAGS_3(fsubs, TCG_CALL_NO_WG, f32, env, f32, f32)
25
dc->op2);
26
DEF_HELPER_FLAGS_3(fmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
26
27
DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_WG, f32, env, f32, f32)
27
- l[0] = tcg_temp_local_new();
28
-DEF_HELPER_FLAGS_5(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, i32)
28
- l[1] = tcg_temp_local_new();
29
+DEF_HELPER_FLAGS_6(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, s32, i32)
29
+ l[0] = tcg_temp_new();
30
DEF_HELPER_FLAGS_3(fnadds, TCG_CALL_NO_WG, f32, env, f32, f32)
30
+ l[1] = tcg_temp_new();
31
DEF_HELPER_FLAGS_3(fnmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
31
insn_len = dec_prep_alu_m(env, dc, 0, memsize, l[0], l[1]);
32
32
cris_cc_mask(dc, CC_MASK_NZ);
33
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
33
cris_alu(dc, CC_OP_BOUND, cpu_R[dc->op2], l[0], l[1], 4);
34
diff --git a/target/cris/translate_v10.c.inc b/target/cris/translate_v10.c.inc
35
index XXXXXXX..XXXXXXX 100644
34
index XXXXXXX..XXXXXXX 100644
36
--- a/target/cris/translate_v10.c.inc
35
--- a/target/sparc/fop_helper.c
37
+++ b/target/cris/translate_v10.c.inc
36
+++ b/target/sparc/fop_helper.c
38
@@ -XXX,XX +XXX,XX @@ static void gen_store_v10_conditional(DisasContext *dc, TCGv addr, TCGv val,
37
@@ -XXX,XX +XXX,XX @@ Int128 helper_fsqrtq(CPUSPARCState *env, Int128 src)
39
unsigned int size, int mem_index)
38
}
40
{
39
41
TCGLabel *l1 = gen_new_label();
40
float32 helper_fmadds(CPUSPARCState *env, float32 s1,
42
- TCGv taddr = tcg_temp_local_new();
41
- float32 s2, float32 s3, uint32_t op)
43
- TCGv tval = tcg_temp_local_new();
42
+ float32 s2, float32 s3, int32_t sc, uint32_t op)
44
- TCGv t1 = tcg_temp_local_new();
43
{
45
+ TCGv taddr = tcg_temp_new();
44
- float32 ret = float32_muladd(s1, s2, s3, op, &env->fp_status);
46
+ TCGv tval = tcg_temp_new();
45
+ float32 ret = float32_muladd_scalbn(s1, s2, s3, sc, op, &env->fp_status);
47
+ TCGv t1 = tcg_temp_new();
46
check_ieee_exceptions(env, GETPC());
48
dc->postinc = 0;
47
return ret;
49
cris_evaluate_flags(dc);
48
}
50
49
51
@@ -XXX,XX +XXX,XX @@ static void dec10_reg_bound(DisasContext *dc, int size)
50
float64 helper_fmaddd(CPUSPARCState *env, float64 s1,
52
{
51
- float64 s2, float64 s3, uint32_t op)
53
TCGv t;
52
+ float64 s2, float64 s3, int32_t sc, uint32_t op)
54
53
{
55
- t = tcg_temp_local_new();
54
- float64 ret = float64_muladd(s1, s2, s3, op, &env->fp_status);
56
+ t = tcg_temp_new();
55
+ float64 ret = float64_muladd_scalbn(s1, s2, s3, sc, op, &env->fp_status);
57
t_gen_zext(t, cpu_R[dc->src], size);
56
check_ieee_exceptions(env, GETPC());
58
cris_alu(dc, CC_OP_BOUND, cpu_R[dc->dst], cpu_R[dc->dst], t, 4);
57
return ret;
59
tcg_temp_free(t);
58
}
60
@@ -XXX,XX +XXX,XX @@ static int dec10_ind_bound(CPUCRISState *env, DisasContext *dc,
59
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
61
int rd = dc->dst;
60
index XXXXXXX..XXXXXXX 100644
62
TCGv t;
61
--- a/target/sparc/translate.c
63
62
+++ b/target/sparc/translate.c
64
- t = tcg_temp_local_new();
63
@@ -XXX,XX +XXX,XX @@ static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src)
65
+ t = tcg_temp_new();
64
66
insn_len += dec10_prep_move_m(env, dc, 0, size, t);
65
static void gen_op_fmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
67
cris_alu(dc, CC_OP_BOUND, cpu_R[dc->dst], cpu_R[rd], t, 4);
66
{
68
if (dc->dst == 15) {
67
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
68
+ TCGv_i32 z = tcg_constant_i32(0);
69
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, z);
70
}
71
72
static void gen_op_fmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
73
{
74
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
75
+ TCGv_i32 z = tcg_constant_i32(0);
76
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, z);
77
}
78
79
static void gen_op_fmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
80
{
81
- int op = float_muladd_negate_c;
82
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
83
+ TCGv_i32 z = tcg_constant_i32(0);
84
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
85
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
86
}
87
88
static void gen_op_fmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
89
{
90
- int op = float_muladd_negate_c;
91
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
92
+ TCGv_i32 z = tcg_constant_i32(0);
93
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
94
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
95
}
96
97
static void gen_op_fnmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
98
{
99
- int op = float_muladd_negate_c | float_muladd_negate_result;
100
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
101
+ TCGv_i32 z = tcg_constant_i32(0);
102
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c |
103
+ float_muladd_negate_result);
104
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
105
}
106
107
static void gen_op_fnmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
108
{
109
- int op = float_muladd_negate_c | float_muladd_negate_result;
110
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
111
+ TCGv_i32 z = tcg_constant_i32(0);
112
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c |
113
+ float_muladd_negate_result);
114
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
115
}
116
117
static void gen_op_fnmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
118
{
119
- int op = float_muladd_negate_result;
120
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
121
+ TCGv_i32 z = tcg_constant_i32(0);
122
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
123
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
124
}
125
126
static void gen_op_fnmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
127
{
128
- int op = float_muladd_negate_result;
129
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
130
+ TCGv_i32 z = tcg_constant_i32(0);
131
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
132
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
133
}
134
135
/* Use muladd to compute (1 * src1) + src2 / 2 with one rounding. */
136
static void gen_op_fhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
137
{
138
- TCGv_i32 one = tcg_constant_i32(float32_one);
139
- int op = float_muladd_halve_result;
140
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
141
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
142
+ TCGv_i32 mone = tcg_constant_i32(-1);
143
+ TCGv_i32 op = tcg_constant_i32(0);
144
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
145
}
146
147
static void gen_op_fhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
148
{
149
- TCGv_i64 one = tcg_constant_i64(float64_one);
150
- int op = float_muladd_halve_result;
151
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
152
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
153
+ TCGv_i32 mone = tcg_constant_i32(-1);
154
+ TCGv_i32 op = tcg_constant_i32(0);
155
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
156
}
157
158
/* Use muladd to compute (1 * src1) - src2 / 2 with one rounding. */
159
static void gen_op_fhsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
160
{
161
- TCGv_i32 one = tcg_constant_i32(float32_one);
162
- int op = float_muladd_negate_c | float_muladd_halve_result;
163
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
164
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
165
+ TCGv_i32 mone = tcg_constant_i32(-1);
166
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
167
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
168
}
169
170
static void gen_op_fhsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
171
{
172
- TCGv_i64 one = tcg_constant_i64(float64_one);
173
- int op = float_muladd_negate_c | float_muladd_halve_result;
174
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
175
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
176
+ TCGv_i32 mone = tcg_constant_i32(-1);
177
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
178
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
179
}
180
181
/* Use muladd to compute -((1 * src1) + src2 / 2) with one rounding. */
182
static void gen_op_fnhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
183
{
184
- TCGv_i32 one = tcg_constant_i32(float32_one);
185
- int op = float_muladd_negate_result | float_muladd_halve_result;
186
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
187
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
188
+ TCGv_i32 mone = tcg_constant_i32(-1);
189
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
190
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
191
}
192
193
static void gen_op_fnhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
194
{
195
- TCGv_i64 one = tcg_constant_i64(float64_one);
196
- int op = float_muladd_negate_result | float_muladd_halve_result;
197
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
198
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
199
+ TCGv_i32 mone = tcg_constant_i32(-1);
200
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
201
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
202
}
203
204
static void gen_op_fpexception_im(DisasContext *dc, int ftt)
69
--
205
--
70
2.34.1
206
2.43.0
71
207
72
208
diff view generated by jsdifflib
1
Just because the label reference count is more than 1 does
1
All uses have been convered to float*_muladd_scalbn.
2
not mean we cannot remove a branch-to-next. By doing this
3
first, the label reference count may drop to 0, and then
4
the label itself gets removed as before.
5
2
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
tcg/tcg.c | 33 +++++++++++++++++----------------
6
include/fpu/softfloat.h | 3 ---
10
1 file changed, 17 insertions(+), 16 deletions(-)
7
fpu/softfloat.c | 6 ------
8
fpu/softfloat-parts.c.inc | 4 ----
9
3 files changed, 13 deletions(-)
11
10
12
diff --git a/tcg/tcg.c b/tcg/tcg.c
11
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/tcg.c
13
--- a/include/fpu/softfloat.h
15
+++ b/tcg/tcg.c
14
+++ b/include/fpu/softfloat.h
16
@@ -XXX,XX +XXX,XX @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
15
@@ -XXX,XX +XXX,XX @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
17
/* Reachable analysis : remove unreachable code. */
16
| Using these differs from negating an input or output before calling
18
static void reachable_code_pass(TCGContext *s)
17
| the muladd function in that this means that a NaN doesn't have its
19
{
18
| sign bit inverted before it is propagated.
20
- TCGOp *op, *op_next;
19
-| We also support halving the result before rounding, as a special
21
+ TCGOp *op, *op_next, *op_prev;
20
-| case to support the ARM fused-sqrt-step instruction FRSQRTS.
22
bool dead = false;
21
*----------------------------------------------------------------------------*/
23
22
enum {
24
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
23
float_muladd_negate_c = 1,
25
@@ -XXX,XX +XXX,XX @@ static void reachable_code_pass(TCGContext *s)
24
float_muladd_negate_product = 2,
26
switch (op->opc) {
25
float_muladd_negate_result = 4,
27
case INDEX_op_set_label:
26
- float_muladd_halve_result = 8,
28
label = arg_label(op->args[0]);
27
};
29
+
28
30
+ /*
29
/*----------------------------------------------------------------------------
31
+ * Optimization can fold conditional branches to unconditional.
30
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
32
+ * If we find a label which is preceded by an unconditional
31
index XXXXXXX..XXXXXXX 100644
33
+ * branch to next, remove the branch. We couldn't do this when
32
--- a/fpu/softfloat.c
34
+ * processing the branch because any dead code between the branch
33
+++ b/fpu/softfloat.c
35
+ * and label had not yet been removed.
34
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
36
+ */
35
if (unlikely(!can_use_fpu(s))) {
37
+ op_prev = QTAILQ_PREV(op, link);
36
goto soft;
38
+ if (op_prev->opc == INDEX_op_br &&
37
}
39
+ label == arg_label(op_prev->args[0])) {
38
- if (unlikely(flags & float_muladd_halve_result)) {
40
+ tcg_op_remove(s, op_prev);
39
- goto soft;
41
+ /* Fall through means insns become live again. */
40
- }
42
+ dead = false;
41
43
+ }
42
float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
44
+
43
if (unlikely(!f32_is_zon3(ua, ub, uc))) {
45
if (label->refs == 0) {
44
@@ -XXX,XX +XXX,XX @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
46
/*
45
if (unlikely(!can_use_fpu(s))) {
47
* While there is an occasional backward branch, virtually
46
goto soft;
48
@@ -XXX,XX +XXX,XX @@ static void reachable_code_pass(TCGContext *s)
47
}
49
/* Once we see a label, insns become live again. */
48
- if (unlikely(flags & float_muladd_halve_result)) {
50
dead = false;
49
- goto soft;
51
remove = false;
50
- }
52
-
51
53
- /*
52
float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
54
- * Optimization can fold conditional branches to unconditional.
53
if (unlikely(!f64_is_zon3(ua, ub, uc))) {
55
- * If we find a label with one reference which is preceded by
54
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
56
- * an unconditional branch to it, remove both. This needed to
55
index XXXXXXX..XXXXXXX 100644
57
- * wait until the dead code in between them was removed.
56
--- a/fpu/softfloat-parts.c.inc
58
- */
57
+++ b/fpu/softfloat-parts.c.inc
59
- if (label->refs == 1) {
58
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
60
- TCGOp *op_prev = QTAILQ_PREV(op, link);
59
a->exp = p_widen.exp;
61
- if (op_prev->opc == INDEX_op_br &&
60
62
- label == arg_label(op_prev->args[0])) {
61
return_normal:
63
- tcg_op_remove(s, op_prev);
62
- /* TODO: Replace all use of float_muladd_halve_result with scale. */
64
- remove = true;
63
- if (flags & float_muladd_halve_result) {
65
- }
64
- a->exp -= 1;
66
- }
65
- }
67
}
66
a->exp += scale;
68
break;
67
finish_sign:
69
68
if (flags & float_muladd_negate_result) {
70
--
69
--
71
2.34.1
70
2.43.0
72
71
73
72
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
This rounding mode is used by Hexagon.
2
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-20-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
4
---
8
target/mips/tcg/exception.c | 3 ++-
5
include/fpu/softfloat-types.h | 2 ++
9
target/mips/tcg/sysemu/special_helper.c | 2 +-
6
fpu/softfloat-parts.c.inc | 3 +++
10
2 files changed, 3 insertions(+), 2 deletions(-)
7
2 files changed, 5 insertions(+)
11
8
12
diff --git a/target/mips/tcg/exception.c b/target/mips/tcg/exception.c
9
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/target/mips/tcg/exception.c
11
--- a/include/fpu/softfloat-types.h
15
+++ b/target/mips/tcg/exception.c
12
+++ b/include/fpu/softfloat-types.h
16
@@ -XXX,XX +XXX,XX @@ void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb)
13
@@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) {
17
MIPSCPU *cpu = MIPS_CPU(cs);
14
float_round_to_odd = 5,
18
CPUMIPSState *env = &cpu->env;
15
/* Not an IEEE rounding mode: round to closest odd, overflow to inf */
19
16
float_round_to_odd_inf = 6,
20
- env->active_tc.PC = tb_pc(tb);
17
+ /* Not an IEEE rounding mode: round to nearest even, overflow to max */
21
+ tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
18
+ float_round_nearest_even_max = 7,
22
+ env->active_tc.PC = tb->pc;
19
} FloatRoundMode;
23
env->hflags &= ~MIPS_HFLAG_BMASK;
20
24
env->hflags |= tb->flags & MIPS_HFLAG_BMASK;
21
/*
25
}
22
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
26
diff --git a/target/mips/tcg/sysemu/special_helper.c b/target/mips/tcg/sysemu/special_helper.c
27
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
28
--- a/target/mips/tcg/sysemu/special_helper.c
24
--- a/fpu/softfloat-parts.c.inc
29
+++ b/target/mips/tcg/sysemu/special_helper.c
25
+++ b/fpu/softfloat-parts.c.inc
30
@@ -XXX,XX +XXX,XX @@ bool mips_io_recompile_replay_branch(CPUState *cs, const TranslationBlock *tb)
26
@@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
31
CPUMIPSState *env = &cpu->env;
27
int exp, flags = 0;
32
28
33
if ((env->hflags & MIPS_HFLAG_BMASK) != 0
29
switch (s->float_rounding_mode) {
34
- && env->active_tc.PC != tb_pc(tb)) {
30
+ case float_round_nearest_even_max:
35
+ && !(cs->tcg_cflags & CF_PCREL) && env->active_tc.PC != tb->pc) {
31
+ overflow_norm = true;
36
env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
32
+ /* fall through */
37
env->hflags &= ~MIPS_HFLAG_BMASK;
33
case float_round_nearest_even:
38
return true;
34
if (N > 64 && frac_lsb == 0) {
35
inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
39
--
36
--
40
2.34.1
37
2.43.0
41
42
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
Certain Hexagon instructions suppress changes to the result
2
when the product of fma() is a true zero.
2
3
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-11-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/i386/cpu-param.h | 4 ----
6
include/fpu/softfloat.h | 5 +++++
9
1 file changed, 4 deletions(-)
7
fpu/softfloat.c | 3 +++
8
fpu/softfloat-parts.c.inc | 4 +++-
9
3 files changed, 11 insertions(+), 1 deletion(-)
10
10
11
diff --git a/target/i386/cpu-param.h b/target/i386/cpu-param.h
11
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/i386/cpu-param.h
13
--- a/include/fpu/softfloat.h
14
+++ b/target/i386/cpu-param.h
14
+++ b/include/fpu/softfloat.h
15
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
16
#define TARGET_PAGE_BITS 12
16
| Using these differs from negating an input or output before calling
17
#define NB_MMU_MODES 5
17
| the muladd function in that this means that a NaN doesn't have its
18
18
| sign bit inverted before it is propagated.
19
-#ifndef CONFIG_USER_ONLY
19
+|
20
-# define TARGET_TB_PCREL 1
20
+| With float_muladd_suppress_add_product_zero, if A or B is zero
21
-#endif
21
+| such that the product is a true zero, then return C without addition.
22
-
22
+| This preserves the sign of C when C is +/- 0. Used for Hexagon.
23
#endif
23
*----------------------------------------------------------------------------*/
24
enum {
25
float_muladd_negate_c = 1,
26
float_muladd_negate_product = 2,
27
float_muladd_negate_result = 4,
28
+ float_muladd_suppress_add_product_zero = 8,
29
};
30
31
/*----------------------------------------------------------------------------
32
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/fpu/softfloat.c
35
+++ b/fpu/softfloat.c
36
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
37
if (unlikely(!can_use_fpu(s))) {
38
goto soft;
39
}
40
+ if (unlikely(flags & float_muladd_suppress_add_product_zero)) {
41
+ goto soft;
42
+ }
43
44
float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
45
if (unlikely(!f32_is_zon3(ua, ub, uc))) {
46
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
47
index XXXXXXX..XXXXXXX 100644
48
--- a/fpu/softfloat-parts.c.inc
49
+++ b/fpu/softfloat-parts.c.inc
50
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
51
goto return_normal;
52
}
53
if (c->cls == float_class_zero) {
54
- if (a->sign != c->sign) {
55
+ if (flags & float_muladd_suppress_add_product_zero) {
56
+ a->sign = c->sign;
57
+ } else if (a->sign != c->sign) {
58
goto return_sub_zero;
59
}
60
goto return_zero;
24
--
61
--
25
2.34.1
62
2.43.0
26
27
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
There are no special cases for this instruction.
2
Remove internal_mpyf as unused.
2
3
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-28-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
include/exec/exec-all.h | 7 -------
7
target/hexagon/fma_emu.h | 1 -
9
1 file changed, 7 deletions(-)
8
target/hexagon/fma_emu.c | 8 --------
9
target/hexagon/op_helper.c | 2 +-
10
3 files changed, 1 insertion(+), 10 deletions(-)
10
11
11
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
12
diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/include/exec/exec-all.h
14
--- a/target/hexagon/fma_emu.h
14
+++ b/include/exec/exec-all.h
15
+++ b/target/hexagon/fma_emu.h
15
@@ -XXX,XX +XXX,XX @@ static inline uint32_t tb_cflags(const TranslationBlock *tb)
16
@@ -XXX,XX +XXX,XX @@ int32_t float32_getexp(float32 f32);
16
return qatomic_read(&tb->cflags);
17
float32 infinite_float32(uint8_t sign);
18
float32 internal_fmafx(float32 a, float32 b, float32 c,
19
int scale, float_status *fp_status);
20
-float32 internal_mpyf(float32 a, float32 b, float_status *fp_status);
21
float64 internal_mpyhh(float64 a, float64 b,
22
unsigned long long int accumulated,
23
float_status *fp_status);
24
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/hexagon/fma_emu.c
27
+++ b/target/hexagon/fma_emu.c
28
@@ -XXX,XX +XXX,XX @@ float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
29
return accum_round_float32(result, fp_status);
17
}
30
}
18
31
19
-/* Hide the read to avoid ifdefs for CF_PCREL. */
32
-float32 internal_mpyf(float32 a, float32 b, float_status *fp_status)
20
-static inline target_ulong tb_pc(const TranslationBlock *tb)
21
-{
33
-{
22
- assert(!(tb_cflags(tb) & CF_PCREL));
34
- if (float32_is_zero(a) || float32_is_zero(b)) {
23
- return tb->pc;
35
- return float32_mul(a, b, fp_status);
36
- }
37
- return internal_fmafx(a, b, float32_zero, 0, fp_status);
24
-}
38
-}
25
-
39
-
26
static inline tb_page_addr_t tb_page_addr0(const TranslationBlock *tb)
40
float64 internal_mpyhh(float64 a, float64 b,
41
unsigned long long int accumulated,
42
float_status *fp_status)
43
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/hexagon/op_helper.c
46
+++ b/target/hexagon/op_helper.c
47
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
27
{
48
{
28
#ifdef CONFIG_USER_ONLY
49
float32 RdV;
50
arch_fpop_start(env);
51
- RdV = internal_mpyf(RsV, RtV, &env->fp_status);
52
+ RdV = float32_mul(RsV, RtV, &env->fp_status);
53
arch_fpop_end(env);
54
return RdV;
55
}
29
--
56
--
30
2.34.1
57
2.43.0
31
32
diff view generated by jsdifflib
1
Write back the number of insns that we attempt to translate,
1
There are no special cases for this instruction.
2
so that if we longjmp out we have a more accurate limit for
3
the next attempt. This results in fewer restarts when some
4
limit is consumed by few instructions.
5
2
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
accel/tcg/translator.c | 2 +-
6
target/hexagon/op_helper.c | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
7
1 file changed, 1 insertion(+), 1 deletion(-)
11
8
12
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
9
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/translator.c
11
--- a/target/hexagon/op_helper.c
15
+++ b/accel/tcg/translator.c
12
+++ b/target/hexagon/op_helper.c
16
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
13
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
17
plugin_enabled = plugin_gen_tb_start(cpu, db, cflags & CF_MEMI_ONLY);
14
float32 RsV, float32 RtV)
18
15
{
19
while (true) {
16
arch_fpop_start(env);
20
- db->num_insns++;
17
- RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
21
+ *max_insns = ++db->num_insns;
18
+ RxV = float32_muladd(RsV, RtV, RxV, 0, &env->fp_status);
22
ops->insn_start(db, cpu);
19
arch_fpop_end(env);
23
tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */
20
return RxV;
24
21
}
25
--
22
--
26
2.34.1
23
2.43.0
27
28
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
There are no special cases for this instruction. Since hexagon
2
always uses default-nan mode, explicitly negating the first
3
input is unnecessary. Use float_muladd_negate_product instead.
2
4
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
5
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-10-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
target/arm/cpu-param.h | 2 --
8
target/hexagon/op_helper.c | 5 ++---
9
1 file changed, 2 deletions(-)
9
1 file changed, 2 insertions(+), 3 deletions(-)
10
10
11
diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h
11
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/cpu-param.h
13
--- a/target/hexagon/op_helper.c
14
+++ b/target/arm/cpu-param.h
14
+++ b/target/hexagon/op_helper.c
15
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
16
# define TARGET_PAGE_BITS_VARY
16
float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
17
# define TARGET_PAGE_BITS_MIN 10
17
float32 RsV, float32 RtV)
18
18
{
19
-# define TARGET_TB_PCREL 1
19
- float32 neg_RsV;
20
-
20
arch_fpop_start(env);
21
/*
21
- neg_RsV = float32_set_sign(RsV, float32_is_neg(RsV) ? 0 : 1);
22
* Cache the attrs and shareability fields from the page table entry.
22
- RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
23
*
23
+ RxV = float32_muladd(RsV, RtV, RxV, float_muladd_negate_product,
24
+ &env->fp_status);
25
arch_fpop_end(env);
26
return RxV;
27
}
24
--
28
--
25
2.34.1
29
2.43.0
26
27
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
This instruction has a special case that 0 * x + c returns c
2
without the normal sign folding that comes with 0 + -0.
3
Use the new float_muladd_suppress_add_product_zero to
4
describe this.
2
5
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
6
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-4-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
---
8
target/arm/cpu.c | 5 +++++
9
target/hexagon/op_helper.c | 11 +++--------
9
1 file changed, 5 insertions(+)
10
1 file changed, 3 insertions(+), 8 deletions(-)
10
11
11
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
12
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/cpu.c
14
--- a/target/hexagon/op_helper.c
14
+++ b/target/arm/cpu.c
15
+++ b/target/hexagon/op_helper.c
15
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
16
@@ -XXX,XX +XXX,XX @@ static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
16
Error *local_err = NULL;
17
float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
17
bool no_aa32 = false;
18
float32 RsV, float32 RtV, float32 PuV)
18
19
{
19
+ /* Use pc-relative instructions in system-mode */
20
- size4s_t tmp;
20
+#ifndef CONFIG_USER_ONLY
21
arch_fpop_start(env);
21
+ cs->tcg_cflags |= CF_PCREL;
22
- RxV = check_nan(RxV, RxV, &env->fp_status);
22
+#endif
23
- RxV = check_nan(RxV, RsV, &env->fp_status);
23
+
24
- RxV = check_nan(RxV, RtV, &env->fp_status);
24
/* If we needed to query the host kernel for the CPU features
25
- tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
25
* then it's possible that might have failed in the initfn, but
26
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
26
* this is the first point where we can report it.
27
- RxV = tmp;
28
- }
29
+ RxV = float32_muladd_scalbn(RsV, RtV, RxV, fSXTN(8, 64, PuV),
30
+ float_muladd_suppress_add_product_zero,
31
+ &env->fp_status);
32
arch_fpop_end(env);
33
return RxV;
34
}
27
--
35
--
28
2.34.1
36
2.43.0
29
30
diff view generated by jsdifflib
1
These symbols are now unused.
1
There are multiple special cases for this instruction.
2
(1) The saturate to normal maximum instead of overflow to infinity is
3
handled by the new float_round_nearest_even_max rounding mode.
4
(2) The 0 * n + c special case is handled by the new
5
float_muladd_suppress_add_product_zero flag.
6
(3) The Inf - Inf -> 0 special case can be detected after the fact
7
by examining float_flag_invalid_isi.
2
8
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
11
---
6
include/tcg/tcg-op.h | 2 --
12
target/hexagon/op_helper.c | 105 +++++++++----------------------------
7
include/tcg/tcg.h | 28 ----------------------------
13
1 file changed, 26 insertions(+), 79 deletions(-)
8
tcg/tcg.c | 16 ----------------
9
3 files changed, 46 deletions(-)
10
14
11
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
15
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
12
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
13
--- a/include/tcg/tcg-op.h
17
--- a/target/hexagon/op_helper.c
14
+++ b/include/tcg/tcg-op.h
18
+++ b/target/hexagon/op_helper.c
15
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_plugin_cb_end(void)
19
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
16
#if TARGET_LONG_BITS == 32
20
return RxV;
17
#define tcg_temp_new() tcg_temp_new_i32()
18
#define tcg_global_mem_new tcg_global_mem_new_i32
19
-#define tcg_temp_local_new() tcg_temp_local_new_i32()
20
#define tcg_temp_free tcg_temp_free_i32
21
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32
22
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32
23
#else
24
#define tcg_temp_new() tcg_temp_new_i64()
25
#define tcg_global_mem_new tcg_global_mem_new_i64
26
-#define tcg_temp_local_new() tcg_temp_local_new_i64()
27
#define tcg_temp_free tcg_temp_free_i64
28
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64
29
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64
30
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
31
index XXXXXXX..XXXXXXX 100644
32
--- a/include/tcg/tcg.h
33
+++ b/include/tcg/tcg.h
34
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i32 tcg_temp_new_i32(void)
35
return temp_tcgv_i32(t);
36
}
21
}
37
22
38
-static inline TCGv_i32 tcg_temp_local_new_i32(void)
23
-static bool is_zero_prod(float32 a, float32 b)
39
-{
24
-{
40
- TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB);
25
- return ((float32_is_zero(a) && is_finite(b)) ||
41
- return temp_tcgv_i32(t);
26
- (float32_is_zero(b) && is_finite(a)));
42
-}
27
-}
43
-
28
-
44
static inline TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t offset,
29
-static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
45
const char *name)
46
{
47
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i64 tcg_temp_new_i64(void)
48
return temp_tcgv_i64(t);
49
}
50
51
-static inline TCGv_i64 tcg_temp_local_new_i64(void)
52
-{
30
-{
53
- TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB);
31
- float32 ret = dst;
54
- return temp_tcgv_i64(t);
32
- if (float32_is_any_nan(x)) {
33
- if (extract32(x, 22, 1) == 0) {
34
- float_raise(float_flag_invalid, fp_status);
35
- }
36
- ret = make_float32(0xffffffff); /* nan */
37
- }
38
- return ret;
55
-}
39
-}
56
-
40
-
57
/* Used only by tcg infrastructure: tcg-op.c or plugin-gen.c */
41
float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
58
static inline TCGv_i128 tcg_temp_ebb_new_i128(void)
42
float32 RsV, float32 RtV, float32 PuV)
59
{
43
{
60
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i128 tcg_temp_new_i128(void)
44
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
61
return temp_tcgv_i128(t);
45
return RxV;
62
}
46
}
63
47
64
-static inline TCGv_i128 tcg_temp_local_new_i128(void)
48
-static bool is_inf_prod(int32_t a, int32_t b)
65
-{
49
+static float32 do_sffma_lib(CPUHexagonState *env, float32 RxV,
66
- TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB);
50
+ float32 RsV, float32 RtV, int negate)
67
- return temp_tcgv_i128(t);
51
{
68
-}
52
- return (float32_is_infinity(a) && float32_is_infinity(b)) ||
53
- (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
54
- (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
55
+ int flags;
56
+
57
+ arch_fpop_start(env);
58
+
59
+ set_float_rounding_mode(float_round_nearest_even_max, &env->fp_status);
60
+ RxV = float32_muladd(RsV, RtV, RxV,
61
+ negate | float_muladd_suppress_add_product_zero,
62
+ &env->fp_status);
63
+
64
+ flags = get_float_exception_flags(&env->fp_status);
65
+ if (flags) {
66
+ /* Flags are suppressed by this instruction. */
67
+ set_float_exception_flags(0, &env->fp_status);
68
+
69
+ /* Return 0 for Inf - Inf. */
70
+ if (flags & float_flag_invalid_isi) {
71
+ RxV = 0;
72
+ }
73
+ }
74
+
75
+ arch_fpop_end(env);
76
+ return RxV;
77
}
78
79
float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
80
float32 RsV, float32 RtV)
81
{
82
- bool infinp;
83
- bool infminusinf;
84
- float32 tmp;
69
-
85
-
70
static inline TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t offset,
86
- arch_fpop_start(env);
71
const char *name)
87
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
88
- infminusinf = float32_is_infinity(RxV) &&
89
- is_inf_prod(RsV, RtV) &&
90
- (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
91
- infinp = float32_is_infinity(RxV) ||
92
- float32_is_infinity(RtV) ||
93
- float32_is_infinity(RsV);
94
- RxV = check_nan(RxV, RxV, &env->fp_status);
95
- RxV = check_nan(RxV, RsV, &env->fp_status);
96
- RxV = check_nan(RxV, RtV, &env->fp_status);
97
- tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
98
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
99
- RxV = tmp;
100
- }
101
- set_float_exception_flags(0, &env->fp_status);
102
- if (float32_is_infinity(RxV) && !infinp) {
103
- RxV = RxV - 1;
104
- }
105
- if (infminusinf) {
106
- RxV = 0;
107
- }
108
- arch_fpop_end(env);
109
- return RxV;
110
+ return do_sffma_lib(env, RxV, RsV, RtV, 0);
111
}
112
113
float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
114
float32 RsV, float32 RtV)
72
{
115
{
73
@@ -XXX,XX +XXX,XX @@ static inline TCGv_ptr tcg_temp_new_ptr(void)
116
- bool infinp;
74
return temp_tcgv_ptr(t);
117
- bool infminusinf;
118
- float32 tmp;
119
-
120
- arch_fpop_start(env);
121
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
122
- infminusinf = float32_is_infinity(RxV) &&
123
- is_inf_prod(RsV, RtV) &&
124
- (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
125
- infinp = float32_is_infinity(RxV) ||
126
- float32_is_infinity(RtV) ||
127
- float32_is_infinity(RsV);
128
- RxV = check_nan(RxV, RxV, &env->fp_status);
129
- RxV = check_nan(RxV, RsV, &env->fp_status);
130
- RxV = check_nan(RxV, RtV, &env->fp_status);
131
- float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
132
- tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
133
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
134
- RxV = tmp;
135
- }
136
- set_float_exception_flags(0, &env->fp_status);
137
- if (float32_is_infinity(RxV) && !infinp) {
138
- RxV = RxV - 1;
139
- }
140
- if (infminusinf) {
141
- RxV = 0;
142
- }
143
- arch_fpop_end(env);
144
- return RxV;
145
+ return do_sffma_lib(env, RxV, RsV, RtV, float_muladd_negate_product);
75
}
146
}
76
147
77
-static inline TCGv_ptr tcg_temp_local_new_ptr(void)
148
float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
78
-{
79
- TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB);
80
- return temp_tcgv_ptr(t);
81
-}
82
-
83
#if defined(CONFIG_DEBUG_TCG)
84
/* If you call tcg_clear_temp_count() at the start of a section of
85
* code which is not supposed to leak any TCG temporaries, then
86
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s);
87
/* Allocate a new temporary and initialize it with a constant. */
88
TCGv_i32 tcg_const_i32(int32_t val);
89
TCGv_i64 tcg_const_i64(int64_t val);
90
-TCGv_i32 tcg_const_local_i32(int32_t val);
91
-TCGv_i64 tcg_const_local_i64(int64_t val);
92
TCGv_vec tcg_const_zeros_vec(TCGType);
93
TCGv_vec tcg_const_ones_vec(TCGType);
94
TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec);
95
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val);
96
97
#if UINTPTR_MAX == UINT32_MAX
98
# define tcg_const_ptr(x) ((TCGv_ptr)tcg_const_i32((intptr_t)(x)))
99
-# define tcg_const_local_ptr(x) ((TCGv_ptr)tcg_const_local_i32((intptr_t)(x)))
100
# define tcg_constant_ptr(x) ((TCGv_ptr)tcg_constant_i32((intptr_t)(x)))
101
#else
102
# define tcg_const_ptr(x) ((TCGv_ptr)tcg_const_i64((intptr_t)(x)))
103
-# define tcg_const_local_ptr(x) ((TCGv_ptr)tcg_const_local_i64((intptr_t)(x)))
104
# define tcg_constant_ptr(x) ((TCGv_ptr)tcg_constant_i64((intptr_t)(x)))
105
#endif
106
107
diff --git a/tcg/tcg.c b/tcg/tcg.c
108
index XXXXXXX..XXXXXXX 100644
109
--- a/tcg/tcg.c
110
+++ b/tcg/tcg.c
111
@@ -XXX,XX +XXX,XX @@ TCGv_i64 tcg_const_i64(int64_t val)
112
return t0;
113
}
114
115
-TCGv_i32 tcg_const_local_i32(int32_t val)
116
-{
117
- TCGv_i32 t0;
118
- t0 = tcg_temp_local_new_i32();
119
- tcg_gen_movi_i32(t0, val);
120
- return t0;
121
-}
122
-
123
-TCGv_i64 tcg_const_local_i64(int64_t val)
124
-{
125
- TCGv_i64 t0;
126
- t0 = tcg_temp_local_new_i64();
127
- tcg_gen_movi_i64(t0, val);
128
- return t0;
129
-}
130
-
131
#if defined(CONFIG_DEBUG_TCG)
132
void tcg_clear_temp_count(void)
133
{
134
--
149
--
135
2.34.1
150
2.43.0
136
137
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
The function is now unused.
2
2
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-3-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/i386/cpu.c | 5 +++++
6
target/hexagon/fma_emu.h | 2 -
9
1 file changed, 5 insertions(+)
7
target/hexagon/fma_emu.c | 171 ---------------------------------------
8
2 files changed, 173 deletions(-)
10
9
11
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
10
diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/target/i386/cpu.c
12
--- a/target/hexagon/fma_emu.h
14
+++ b/target/i386/cpu.c
13
+++ b/target/hexagon/fma_emu.h
15
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
14
@@ -XXX,XX +XXX,XX @@ static inline uint32_t float32_getexp_raw(float32 f32)
16
static bool ht_warned;
15
}
17
unsigned requested_lbr_fmt;
16
int32_t float32_getexp(float32 f32);
18
17
float32 infinite_float32(uint8_t sign);
19
+ /* Use pc-relative instructions in system-mode */
18
-float32 internal_fmafx(float32 a, float32 b, float32 c,
20
+#ifndef CONFIG_USER_ONLY
19
- int scale, float_status *fp_status);
21
+ cs->tcg_cflags |= CF_PCREL;
20
float64 internal_mpyhh(float64 a, float64 b,
22
+#endif
21
unsigned long long int accumulated,
23
+
22
float_status *fp_status);
24
if (cpu->apic_id == UNASSIGNED_APIC_ID) {
23
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
25
error_setg(errp, "apic-id property was not initialized properly");
24
index XXXXXXX..XXXXXXX 100644
26
return;
25
--- a/target/hexagon/fma_emu.c
26
+++ b/target/hexagon/fma_emu.c
27
@@ -XXX,XX +XXX,XX @@ int32_t float64_getexp(float64 f64)
28
return -1;
29
}
30
31
-static uint64_t float32_getmant(float32 f32)
32
-{
33
- Float a = { .i = f32 };
34
- if (float32_is_normal(f32)) {
35
- return a.mant | 1ULL << 23;
36
- }
37
- if (float32_is_zero(f32)) {
38
- return 0;
39
- }
40
- if (float32_is_denormal(f32)) {
41
- return a.mant;
42
- }
43
- return ~0ULL;
44
-}
45
-
46
int32_t float32_getexp(float32 f32)
47
{
48
Float a = { .i = f32 };
49
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
50
}
51
52
/* Return a maximum finite value with the requested sign */
53
-static float32 maxfinite_float32(uint8_t sign)
54
-{
55
- if (sign) {
56
- return make_float32(SF_MINUS_MAXF);
57
- } else {
58
- return make_float32(SF_MAXF);
59
- }
60
-}
61
-
62
-/* Return a zero value with requested sign */
63
-static float32 zero_float32(uint8_t sign)
64
-{
65
- if (sign) {
66
- return make_float32(0x80000000);
67
- } else {
68
- return float32_zero;
69
- }
70
-}
71
-
72
#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
73
static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
74
{ \
75
@@ -XXX,XX +XXX,XX @@ static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
76
}
77
78
GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
79
-GEN_XF_ROUND(float32, SF_MANTBITS, SF_INF_EXP, Float)
80
-
81
-static bool is_inf_prod(float64 a, float64 b)
82
-{
83
- return ((float64_is_infinity(a) && float64_is_infinity(b)) ||
84
- (float64_is_infinity(a) && is_finite(b) && (!float64_is_zero(b))) ||
85
- (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a))));
86
-}
87
-
88
-static float64 special_fma(float64 a, float64 b, float64 c,
89
- float_status *fp_status)
90
-{
91
- float64 ret = make_float64(0);
92
-
93
- /*
94
- * If A multiplied by B is an exact infinity and C is also an infinity
95
- * but with the opposite sign, FMA returns NaN and raises invalid.
96
- */
97
- uint8_t a_sign = float64_is_neg(a);
98
- uint8_t b_sign = float64_is_neg(b);
99
- uint8_t c_sign = float64_is_neg(c);
100
- if (is_inf_prod(a, b) && float64_is_infinity(c)) {
101
- if ((a_sign ^ b_sign) != c_sign) {
102
- ret = make_float64(DF_NAN);
103
- float_raise(float_flag_invalid, fp_status);
104
- return ret;
105
- }
106
- }
107
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
108
- (float64_is_zero(a) && float64_is_infinity(b))) {
109
- ret = make_float64(DF_NAN);
110
- float_raise(float_flag_invalid, fp_status);
111
- return ret;
112
- }
113
- /*
114
- * If none of the above checks are true and C is a NaN,
115
- * a NaN shall be returned
116
- * If A or B are NaN, a NAN shall be returned.
117
- */
118
- if (float64_is_any_nan(a) ||
119
- float64_is_any_nan(b) ||
120
- float64_is_any_nan(c)) {
121
- if (float64_is_any_nan(a) && (fGETBIT(51, a) == 0)) {
122
- float_raise(float_flag_invalid, fp_status);
123
- }
124
- if (float64_is_any_nan(b) && (fGETBIT(51, b) == 0)) {
125
- float_raise(float_flag_invalid, fp_status);
126
- }
127
- if (float64_is_any_nan(c) && (fGETBIT(51, c) == 0)) {
128
- float_raise(float_flag_invalid, fp_status);
129
- }
130
- ret = make_float64(DF_NAN);
131
- return ret;
132
- }
133
- /*
134
- * We have checked for adding opposite-signed infinities.
135
- * Other infinities return infinity with the correct sign
136
- */
137
- if (float64_is_infinity(c)) {
138
- ret = infinite_float64(c_sign);
139
- return ret;
140
- }
141
- if (float64_is_infinity(a) || float64_is_infinity(b)) {
142
- ret = infinite_float64(a_sign ^ b_sign);
143
- return ret;
144
- }
145
- g_assert_not_reached();
146
-}
147
-
148
-static float32 special_fmaf(float32 a, float32 b, float32 c,
149
- float_status *fp_status)
150
-{
151
- float64 aa, bb, cc;
152
- aa = float32_to_float64(a, fp_status);
153
- bb = float32_to_float64(b, fp_status);
154
- cc = float32_to_float64(c, fp_status);
155
- return float64_to_float32(special_fma(aa, bb, cc, fp_status), fp_status);
156
-}
157
-
158
-float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
159
- float_status *fp_status)
160
-{
161
- Accum prod;
162
- Accum acc;
163
- Accum result;
164
- accum_init(&prod);
165
- accum_init(&acc);
166
- accum_init(&result);
167
-
168
- uint8_t a_sign = float32_is_neg(a);
169
- uint8_t b_sign = float32_is_neg(b);
170
- uint8_t c_sign = float32_is_neg(c);
171
- if (float32_is_infinity(a) ||
172
- float32_is_infinity(b) ||
173
- float32_is_infinity(c)) {
174
- return special_fmaf(a, b, c, fp_status);
175
- }
176
- if (float32_is_any_nan(a) ||
177
- float32_is_any_nan(b) ||
178
- float32_is_any_nan(c)) {
179
- return special_fmaf(a, b, c, fp_status);
180
- }
181
- if ((scale == 0) && (float32_is_zero(a) || float32_is_zero(b))) {
182
- float32 tmp = float32_mul(a, b, fp_status);
183
- tmp = float32_add(tmp, c, fp_status);
184
- return tmp;
185
- }
186
-
187
- /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
188
- prod.mant = int128_mul_6464(float32_getmant(a), float32_getmant(b));
189
-
190
- /*
191
- * Note: extracting the mantissa into an int is multiplying by
192
- * 2**23, so adjust here
193
- */
194
- prod.exp = float32_getexp(a) + float32_getexp(b) - SF_BIAS - 23;
195
- prod.sign = a_sign ^ b_sign;
196
- if (float32_is_zero(a) || float32_is_zero(b)) {
197
- prod.exp = -2 * WAY_BIG_EXP;
198
- }
199
- if ((scale > 0) && float32_is_denormal(c)) {
200
- acc.mant = int128_mul_6464(0, 0);
201
- acc.exp = -WAY_BIG_EXP;
202
- acc.sign = c_sign;
203
- acc.sticky = 1;
204
- result = accum_add(prod, acc);
205
- } else if (!float32_is_zero(c)) {
206
- acc.mant = int128_mul_6464(float32_getmant(c), 1);
207
- acc.exp = float32_getexp(c);
208
- acc.sign = c_sign;
209
- result = accum_add(prod, acc);
210
- } else {
211
- result = prod;
212
- }
213
- result.exp += scale;
214
- return accum_round_float32(result, fp_status);
215
-}
216
217
float64 internal_mpyhh(float64 a, float64 b,
218
unsigned long long int accumulated,
27
--
219
--
28
2.34.1
220
2.43.0
29
30
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
This massive macro is now only used once.
2
Expand it for use only by float64.
2
3
3
Signed-off-by: Anton Johansson <anjo@rev.ng>
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20230227135202.9710-14-anjo@rev.ng>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
target/tricore/cpu.c | 3 ++-
7
target/hexagon/fma_emu.c | 255 +++++++++++++++++++--------------------
9
1 file changed, 2 insertions(+), 1 deletion(-)
8
1 file changed, 127 insertions(+), 128 deletions(-)
10
9
11
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/target/tricore/cpu.c
12
--- a/target/hexagon/fma_emu.c
14
+++ b/target/tricore/cpu.c
13
+++ b/target/hexagon/fma_emu.c
15
@@ -XXX,XX +XXX,XX @@ static void tricore_cpu_synchronize_from_tb(CPUState *cs,
14
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
16
TriCoreCPU *cpu = TRICORE_CPU(cs);
17
CPUTriCoreState *env = &cpu->env;
18
19
- env->PC = tb_pc(tb);
20
+ tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
21
+ env->PC = tb->pc;
22
}
15
}
23
16
24
static void tricore_restore_state_to_opc(CPUState *cs,
17
/* Return a maximum finite value with the requested sign */
18
-#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
19
-static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
20
-{ \
21
- if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \
22
- && ((a.guard | a.round | a.sticky) == 0)) { \
23
- /* result zero */ \
24
- switch (fp_status->float_rounding_mode) { \
25
- case float_round_down: \
26
- return zero_##SUFFIX(1); \
27
- default: \
28
- return zero_##SUFFIX(0); \
29
- } \
30
- } \
31
- /* Normalize right */ \
32
- /* We want MANTBITS bits of mantissa plus the leading one. */ \
33
- /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \
34
- /* So we need to normalize right while the high word is non-zero and \
35
- * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \
36
- while ((int128_gethi(a.mant) != 0) || \
37
- ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0)) { \
38
- a = accum_norm_right(a, 1); \
39
- } \
40
- /* \
41
- * OK, now normalize left \
42
- * We want to normalize left until we have a leading one in bit 24 \
43
- * Theoretically, we only need to shift a maximum of one to the left if we \
44
- * shifted out lots of bits from B, or if we had no shift / 1 shift sticky \
45
- * should be 0 \
46
- */ \
47
- while ((int128_getlo(a.mant) & (1ULL << MANTBITS)) == 0) { \
48
- a = accum_norm_left(a); \
49
- } \
50
- /* \
51
- * OK, now we might need to denormalize because of potential underflow. \
52
- * We need to do this before rounding, and rounding might make us normal \
53
- * again \
54
- */ \
55
- while (a.exp <= 0) { \
56
- a = accum_norm_right(a, 1 - a.exp); \
57
- /* \
58
- * Do we have underflow? \
59
- * That's when we get an inexact answer because we ran out of bits \
60
- * in a denormal. \
61
- */ \
62
- if (a.guard || a.round || a.sticky) { \
63
- float_raise(float_flag_underflow, fp_status); \
64
- } \
65
- } \
66
- /* OK, we're relatively canonical... now we need to round */ \
67
- if (a.guard || a.round || a.sticky) { \
68
- float_raise(float_flag_inexact, fp_status); \
69
- switch (fp_status->float_rounding_mode) { \
70
- case float_round_to_zero: \
71
- /* Chop and we're done */ \
72
- break; \
73
- case float_round_up: \
74
- if (a.sign == 0) { \
75
- a.mant = int128_add(a.mant, int128_one()); \
76
- } \
77
- break; \
78
- case float_round_down: \
79
- if (a.sign != 0) { \
80
- a.mant = int128_add(a.mant, int128_one()); \
81
- } \
82
- break; \
83
- default: \
84
- if (a.round || a.sticky) { \
85
- /* round up if guard is 1, down if guard is zero */ \
86
- a.mant = int128_add(a.mant, int128_make64(a.guard)); \
87
- } else if (a.guard) { \
88
- /* exactly .5, round up if odd */ \
89
- a.mant = int128_add(a.mant, int128_and(a.mant, int128_one())); \
90
- } \
91
- break; \
92
- } \
93
- } \
94
- /* \
95
- * OK, now we might have carried all the way up. \
96
- * So we might need to shr once \
97
- * at least we know that the lsb should be zero if we rounded and \
98
- * got a carry out... \
99
- */ \
100
- if ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0) { \
101
- a = accum_norm_right(a, 1); \
102
- } \
103
- /* Overflow? */ \
104
- if (a.exp >= INF_EXP) { \
105
- /* Yep, inf result */ \
106
- float_raise(float_flag_overflow, fp_status); \
107
- float_raise(float_flag_inexact, fp_status); \
108
- switch (fp_status->float_rounding_mode) { \
109
- case float_round_to_zero: \
110
- return maxfinite_##SUFFIX(a.sign); \
111
- case float_round_up: \
112
- if (a.sign == 0) { \
113
- return infinite_##SUFFIX(a.sign); \
114
- } else { \
115
- return maxfinite_##SUFFIX(a.sign); \
116
- } \
117
- case float_round_down: \
118
- if (a.sign != 0) { \
119
- return infinite_##SUFFIX(a.sign); \
120
- } else { \
121
- return maxfinite_##SUFFIX(a.sign); \
122
- } \
123
- default: \
124
- return infinite_##SUFFIX(a.sign); \
125
- } \
126
- } \
127
- /* Underflow? */ \
128
- if (int128_getlo(a.mant) & (1ULL << MANTBITS)) { \
129
- /* Leading one means: No, we're normal. So, we should be done... */ \
130
- INTERNAL_TYPE ret; \
131
- ret.i = 0; \
132
- ret.sign = a.sign; \
133
- ret.exp = a.exp; \
134
- ret.mant = int128_getlo(a.mant); \
135
- return ret.i; \
136
- } \
137
- assert(a.exp == 1); \
138
- INTERNAL_TYPE ret; \
139
- ret.i = 0; \
140
- ret.sign = a.sign; \
141
- ret.exp = 0; \
142
- ret.mant = int128_getlo(a.mant); \
143
- return ret.i; \
144
+static float64 accum_round_float64(Accum a, float_status *fp_status)
145
+{
146
+ if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
147
+ && ((a.guard | a.round | a.sticky) == 0)) {
148
+ /* result zero */
149
+ switch (fp_status->float_rounding_mode) {
150
+ case float_round_down:
151
+ return zero_float64(1);
152
+ default:
153
+ return zero_float64(0);
154
+ }
155
+ }
156
+ /*
157
+ * Normalize right
158
+ * We want DF_MANTBITS bits of mantissa plus the leading one.
159
+ * That means that we want DF_MANTBITS+1 bits, or 0x000000000000FF_FFFF
160
+ * So we need to normalize right while the high word is non-zero and
161
+ * while the low word is nonzero when masked with 0xffe0_0000_0000_0000
162
+ */
163
+ while ((int128_gethi(a.mant) != 0) ||
164
+ ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0)) {
165
+ a = accum_norm_right(a, 1);
166
+ }
167
+ /*
168
+ * OK, now normalize left
169
+ * We want to normalize left until we have a leading one in bit 24
170
+ * Theoretically, we only need to shift a maximum of one to the left if we
171
+ * shifted out lots of bits from B, or if we had no shift / 1 shift sticky
172
+ * should be 0
173
+ */
174
+ while ((int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) == 0) {
175
+ a = accum_norm_left(a);
176
+ }
177
+ /*
178
+ * OK, now we might need to denormalize because of potential underflow.
179
+ * We need to do this before rounding, and rounding might make us normal
180
+ * again
181
+ */
182
+ while (a.exp <= 0) {
183
+ a = accum_norm_right(a, 1 - a.exp);
184
+ /*
185
+ * Do we have underflow?
186
+ * That's when we get an inexact answer because we ran out of bits
187
+ * in a denormal.
188
+ */
189
+ if (a.guard || a.round || a.sticky) {
190
+ float_raise(float_flag_underflow, fp_status);
191
+ }
192
+ }
193
+ /* OK, we're relatively canonical... now we need to round */
194
+ if (a.guard || a.round || a.sticky) {
195
+ float_raise(float_flag_inexact, fp_status);
196
+ switch (fp_status->float_rounding_mode) {
197
+ case float_round_to_zero:
198
+ /* Chop and we're done */
199
+ break;
200
+ case float_round_up:
201
+ if (a.sign == 0) {
202
+ a.mant = int128_add(a.mant, int128_one());
203
+ }
204
+ break;
205
+ case float_round_down:
206
+ if (a.sign != 0) {
207
+ a.mant = int128_add(a.mant, int128_one());
208
+ }
209
+ break;
210
+ default:
211
+ if (a.round || a.sticky) {
212
+ /* round up if guard is 1, down if guard is zero */
213
+ a.mant = int128_add(a.mant, int128_make64(a.guard));
214
+ } else if (a.guard) {
215
+ /* exactly .5, round up if odd */
216
+ a.mant = int128_add(a.mant, int128_and(a.mant, int128_one()));
217
+ }
218
+ break;
219
+ }
220
+ }
221
+ /*
222
+ * OK, now we might have carried all the way up.
223
+ * So we might need to shr once
224
+ * at least we know that the lsb should be zero if we rounded and
225
+ * got a carry out...
226
+ */
227
+ if ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0) {
228
+ a = accum_norm_right(a, 1);
229
+ }
230
+ /* Overflow? */
231
+ if (a.exp >= DF_INF_EXP) {
232
+ /* Yep, inf result */
233
+ float_raise(float_flag_overflow, fp_status);
234
+ float_raise(float_flag_inexact, fp_status);
235
+ switch (fp_status->float_rounding_mode) {
236
+ case float_round_to_zero:
237
+ return maxfinite_float64(a.sign);
238
+ case float_round_up:
239
+ if (a.sign == 0) {
240
+ return infinite_float64(a.sign);
241
+ } else {
242
+ return maxfinite_float64(a.sign);
243
+ }
244
+ case float_round_down:
245
+ if (a.sign != 0) {
246
+ return infinite_float64(a.sign);
247
+ } else {
248
+ return maxfinite_float64(a.sign);
249
+ }
250
+ default:
251
+ return infinite_float64(a.sign);
252
+ }
253
+ }
254
+ /* Underflow? */
255
+ if (int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) {
256
+ /* Leading one means: No, we're normal. So, we should be done... */
257
+ Double ret;
258
+ ret.i = 0;
259
+ ret.sign = a.sign;
260
+ ret.exp = a.exp;
261
+ ret.mant = int128_getlo(a.mant);
262
+ return ret.i;
263
+ }
264
+ assert(a.exp == 1);
265
+ Double ret;
266
+ ret.i = 0;
267
+ ret.sign = a.sign;
268
+ ret.exp = 0;
269
+ ret.mant = int128_getlo(a.mant);
270
+ return ret.i;
271
}
272
273
-GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
274
-
275
float64 internal_mpyhh(float64 a, float64 b,
276
unsigned long long int accumulated,
277
float_status *fp_status)
25
--
278
--
26
2.34.1
279
2.43.0
27
28
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
This structure, with bitfields, is incorrect for big-endian.
2
Use the existing float32_getexp_raw which uses extract32.
2
3
3
'dh_ctype_f32' is defined as 'float32', itself declared
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
in "fpu/softfloat-types.h". Include this header to avoid
5
when refactoring other headers:
6
7
In file included from include/exec/helper-proto.h:7,
8
from include/tcg/tcg-op.h:29,
9
from ../../tcg/tcg-op-vec.c:22:
10
include/exec/helper-head.h:44:22: error: unknown type name ‘float32’; did you mean ‘_Float32’?
11
44 | #define dh_ctype_f32 float32
12
| ^~~~~~~
13
14
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
15
Message-Id: <20221216225202.25664-1-philmd@linaro.org>
16
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
17
---
6
---
18
include/exec/helper-head.h | 2 ++
7
target/hexagon/fma_emu.c | 16 +++-------------
19
1 file changed, 2 insertions(+)
8
1 file changed, 3 insertions(+), 13 deletions(-)
20
9
21
diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
22
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
23
--- a/include/exec/helper-head.h
12
--- a/target/hexagon/fma_emu.c
24
+++ b/include/exec/helper-head.h
13
+++ b/target/hexagon/fma_emu.c
25
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ typedef union {
26
#ifndef EXEC_HELPER_HEAD_H
15
};
27
#define EXEC_HELPER_HEAD_H
16
} Double;
28
17
29
+#include "fpu/softfloat-types.h"
18
-typedef union {
30
+
19
- float f;
31
#define HELPER(name) glue(helper_, name)
20
- uint32_t i;
32
21
- struct {
33
/* Some types that make sense in C, but not for TCG. */
22
- uint32_t mant:23;
23
- uint32_t exp:8;
24
- uint32_t sign:1;
25
- };
26
-} Float;
27
-
28
static uint64_t float64_getmant(float64 f64)
29
{
30
Double a = { .i = f64 };
31
@@ -XXX,XX +XXX,XX @@ int32_t float64_getexp(float64 f64)
32
33
int32_t float32_getexp(float32 f32)
34
{
35
- Float a = { .i = f32 };
36
+ int exp = float32_getexp_raw(f32);
37
if (float32_is_normal(f32)) {
38
- return a.exp;
39
+ return exp;
40
}
41
if (float32_is_denormal(f32)) {
42
- return a.exp + 1;
43
+ return exp + 1;
44
}
45
return -1;
46
}
34
--
47
--
35
2.34.1
48
2.43.0
36
37
diff view generated by jsdifflib
1
Change to match the recent change to probe_access_flags.
1
This structure, with bitfields, is incorrect for big-endian.
2
All existing callers updated to supply 0, so no change in behaviour.
2
Use extract64 and deposit64 instead.
3
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
include/exec/exec-all.h | 2 +-
7
target/hexagon/fma_emu.c | 46 ++++++++++++++--------------------------
9
accel/tcg/cputlb.c | 4 ++--
8
1 file changed, 16 insertions(+), 30 deletions(-)
10
target/arm/ptw.c | 2 +-
11
target/arm/tcg/mte_helper.c | 4 ++--
12
target/arm/tcg/sve_helper.c | 2 +-
13
target/arm/tcg/translate-a64.c | 2 +-
14
target/i386/tcg/sysemu/excp_helper.c | 4 ++--
15
7 files changed, 10 insertions(+), 10 deletions(-)
16
9
17
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
18
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
19
--- a/include/exec/exec-all.h
12
--- a/target/hexagon/fma_emu.c
20
+++ b/include/exec/exec-all.h
13
+++ b/target/hexagon/fma_emu.c
21
@@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
14
@@ -XXX,XX +XXX,XX @@
22
* and must be consumed or copied immediately, before any further
15
23
* access or changes to TLB @mmu_idx.
16
#define WAY_BIG_EXP 4096
24
*/
17
25
-int probe_access_full(CPUArchState *env, target_ulong addr,
18
-typedef union {
26
+int probe_access_full(CPUArchState *env, target_ulong addr, int size,
19
- double f;
27
MMUAccessType access_type, int mmu_idx,
20
- uint64_t i;
28
bool nonfault, void **phost,
21
- struct {
29
CPUTLBEntryFull **pfull, uintptr_t retaddr);
22
- uint64_t mant:52;
30
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
23
- uint64_t exp:11;
31
index XXXXXXX..XXXXXXX 100644
24
- uint64_t sign:1;
32
--- a/accel/tcg/cputlb.c
25
- };
33
+++ b/accel/tcg/cputlb.c
26
-} Double;
34
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
27
-
35
return flags;
28
static uint64_t float64_getmant(float64 f64)
29
{
30
- Double a = { .i = f64 };
31
+ uint64_t mant = extract64(f64, 0, 52);
32
if (float64_is_normal(f64)) {
33
- return a.mant | 1ULL << 52;
34
+ return mant | 1ULL << 52;
35
}
36
if (float64_is_zero(f64)) {
37
return 0;
38
}
39
if (float64_is_denormal(f64)) {
40
- return a.mant;
41
+ return mant;
42
}
43
return ~0ULL;
36
}
44
}
37
45
38
-int probe_access_full(CPUArchState *env, target_ulong addr,
46
int32_t float64_getexp(float64 f64)
39
+int probe_access_full(CPUArchState *env, target_ulong addr, int size,
40
MMUAccessType access_type, int mmu_idx,
41
bool nonfault, void **phost, CPUTLBEntryFull **pfull,
42
uintptr_t retaddr)
43
{
47
{
44
- int flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
48
- Double a = { .i = f64 };
45
+ int flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
49
+ int exp = extract64(f64, 52, 11);
46
nonfault, phost, pfull, retaddr);
50
if (float64_is_normal(f64)) {
47
51
- return a.exp;
48
/* Handle clean RAM pages. */
52
+ return exp;
49
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/target/arm/ptw.c
52
+++ b/target/arm/ptw.c
53
@@ -XXX,XX +XXX,XX @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw,
54
int flags;
55
56
env->tlb_fi = fi;
57
- flags = probe_access_full(env, addr, MMU_DATA_LOAD,
58
+ flags = probe_access_full(env, addr, 0, MMU_DATA_LOAD,
59
arm_to_core_mmu_idx(s2_mmu_idx),
60
true, &ptw->out_host, &full, 0);
61
env->tlb_fi = NULL;
62
diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/arm/tcg/mte_helper.c
65
+++ b/target/arm/tcg/mte_helper.c
66
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
67
* valid. Indicate to probe_access_flags no-fault, then assert that
68
* we received a valid page.
69
*/
70
- flags = probe_access_full(env, ptr, ptr_access, ptr_mmu_idx,
71
+ flags = probe_access_full(env, ptr, 0, ptr_access, ptr_mmu_idx,
72
ra == 0, &host, &full, ra);
73
assert(!(flags & TLB_INVALID_MASK));
74
75
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
76
*/
77
in_page = -(ptr | TARGET_PAGE_MASK);
78
if (unlikely(ptr_size > in_page)) {
79
- flags |= probe_access_full(env, ptr + in_page, ptr_access,
80
+ flags |= probe_access_full(env, ptr + in_page, 0, ptr_access,
81
ptr_mmu_idx, ra == 0, &host, &full, ra);
82
assert(!(flags & TLB_INVALID_MASK));
83
}
53
}
84
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
54
if (float64_is_denormal(f64)) {
85
index XXXXXXX..XXXXXXX 100644
55
- return a.exp + 1;
86
--- a/target/arm/tcg/sve_helper.c
56
+ return exp + 1;
87
+++ b/target/arm/tcg/sve_helper.c
57
}
88
@@ -XXX,XX +XXX,XX @@ bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env,
58
return -1;
89
&info->host, retaddr);
59
}
90
#else
60
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
91
CPUTLBEntryFull *full;
61
/* Return a maximum finite value with the requested sign */
92
- flags = probe_access_full(env, addr, access_type, mmu_idx, nofault,
62
static float64 accum_round_float64(Accum a, float_status *fp_status)
93
+ flags = probe_access_full(env, addr, 0, access_type, mmu_idx, nofault,
63
{
94
&info->host, &full, retaddr);
64
+ uint64_t ret;
95
#endif
65
+
96
info->flags = flags;
66
if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
97
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
67
&& ((a.guard | a.round | a.sticky) == 0)) {
98
index XXXXXXX..XXXXXXX 100644
68
/* result zero */
99
--- a/target/arm/tcg/translate-a64.c
69
@@ -XXX,XX +XXX,XX @@ static float64 accum_round_float64(Accum a, float_status *fp_status)
100
+++ b/target/arm/tcg/translate-a64.c
70
}
101
@@ -XXX,XX +XXX,XX @@ static bool is_guarded_page(CPUARMState *env, DisasContext *s)
71
}
102
* that the TLB entry must be present and valid, and thus this
72
/* Underflow? */
103
* access will never raise an exception.
73
- if (int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) {
104
*/
74
+ ret = int128_getlo(a.mant);
105
- flags = probe_access_full(env, addr, MMU_INST_FETCH, mmu_idx,
75
+ if (ret & (1ULL << DF_MANTBITS)) {
106
+ flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx,
76
/* Leading one means: No, we're normal. So, we should be done... */
107
false, &host, &full, 0);
77
- Double ret;
108
assert(!(flags & TLB_INVALID_MASK));
78
- ret.i = 0;
109
79
- ret.sign = a.sign;
110
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
80
- ret.exp = a.exp;
111
index XXXXXXX..XXXXXXX 100644
81
- ret.mant = int128_getlo(a.mant);
112
--- a/target/i386/tcg/sysemu/excp_helper.c
82
- return ret.i;
113
+++ b/target/i386/tcg/sysemu/excp_helper.c
83
+ ret = deposit64(ret, 52, 11, a.exp);
114
@@ -XXX,XX +XXX,XX @@ static bool ptw_translate(PTETranslate *inout, hwaddr addr)
84
+ } else {
115
int flags;
85
+ assert(a.exp == 1);
116
86
+ ret = deposit64(ret, 52, 11, 0);
117
inout->gaddr = addr;
87
}
118
- flags = probe_access_full(inout->env, addr, MMU_DATA_STORE,
88
- assert(a.exp == 1);
119
+ flags = probe_access_full(inout->env, addr, 0, MMU_DATA_STORE,
89
- Double ret;
120
inout->ptw_idx, true, &inout->haddr, &full, 0);
90
- ret.i = 0;
121
91
- ret.sign = a.sign;
122
if (unlikely(flags & TLB_INVALID_MASK)) {
92
- ret.exp = 0;
123
@@ -XXX,XX +XXX,XX @@ do_check_protect_pse36:
93
- ret.mant = int128_getlo(a.mant);
124
CPUTLBEntryFull *full;
94
- return ret.i;
125
int flags, nested_page_size;
95
+ ret = deposit64(ret, 63, 1, a.sign);
126
96
+ return ret;
127
- flags = probe_access_full(env, paddr, access_type,
97
}
128
+ flags = probe_access_full(env, paddr, 0, access_type,
98
129
MMU_NESTED_IDX, true,
99
float64 internal_mpyhh(float64 a, float64 b,
130
&pte_trans.haddr, &full, 0);
131
if (unlikely(flags & TLB_INVALID_MASK)) {
132
--
100
--
133
2.34.1
101
2.43.0
134
135
diff view generated by jsdifflib
1
From: Anton Johansson via <qemu-devel@nongnu.org>
1
No need to open-code 64x64->128-bit multiplication.
2
2
3
tb-jmp-cache.h contains a few small functions that only exist to hide a
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
CF_PCREL check, however the caller often already performs such a check.
5
6
This patch moves CF_PCREL checks from the callee to the caller, and also
7
removes these functions which now only hide an access of the jmp-cache.
8
9
Signed-off-by: Anton Johansson <anjo@rev.ng>
10
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
11
Message-Id: <20230227135202.9710-12-anjo@rev.ng>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
5
---
14
accel/tcg/tb-jmp-cache.h | 36 ---------------------------
6
target/hexagon/fma_emu.c | 32 +++-----------------------------
15
accel/tcg/cpu-exec.c | 54 +++++++++++++++++++++++++++++-----------
7
1 file changed, 3 insertions(+), 29 deletions(-)
16
2 files changed, 40 insertions(+), 50 deletions(-)
17
8
18
diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h
9
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
19
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
20
--- a/accel/tcg/tb-jmp-cache.h
11
--- a/target/hexagon/fma_emu.c
21
+++ b/accel/tcg/tb-jmp-cache.h
12
+++ b/target/hexagon/fma_emu.c
22
@@ -XXX,XX +XXX,XX @@ struct CPUJumpCache {
13
@@ -XXX,XX +XXX,XX @@ int32_t float32_getexp(float32 f32)
23
} array[TB_JMP_CACHE_SIZE];
14
return -1;
24
};
15
}
25
16
26
-static inline TranslationBlock *
17
-static uint32_t int128_getw0(Int128 x)
27
-tb_jmp_cache_get_tb(CPUJumpCache *jc, uint32_t cflags, uint32_t hash)
28
-{
18
-{
29
- if (cflags & CF_PCREL) {
19
- return int128_getlo(x);
30
- /* Use acquire to ensure current load of pc from jc. */
31
- return qatomic_load_acquire(&jc->array[hash].tb);
32
- } else {
33
- /* Use rcu_read to ensure current load of pc from *tb. */
34
- return qatomic_rcu_read(&jc->array[hash].tb);
35
- }
36
-}
20
-}
37
-
21
-
38
-static inline target_ulong
22
-static uint32_t int128_getw1(Int128 x)
39
-tb_jmp_cache_get_pc(CPUJumpCache *jc, uint32_t hash, TranslationBlock *tb)
40
-{
23
-{
41
- if (tb_cflags(tb) & CF_PCREL) {
24
- return int128_getlo(x) >> 32;
42
- return jc->array[hash].pc;
43
- } else {
44
- return tb_pc(tb);
45
- }
46
-}
25
-}
47
-
26
-
48
-static inline void
27
static Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
49
-tb_jmp_cache_set(CPUJumpCache *jc, uint32_t hash,
28
{
50
- TranslationBlock *tb, target_ulong pc)
29
- Int128 a, b;
51
-{
30
- uint64_t pp0, pp1a, pp1b, pp1s, pp2;
52
- if (tb_cflags(tb) & CF_PCREL) {
31
+ uint64_t l, h;
53
- jc->array[hash].pc = pc;
32
54
- /* Use store_release on tb to ensure pc is written first. */
33
- a = int128_make64(ai);
55
- qatomic_store_release(&jc->array[hash].tb, tb);
34
- b = int128_make64(bi);
56
- } else{
35
- pp0 = (uint64_t)int128_getw0(a) * (uint64_t)int128_getw0(b);
57
- /* Use the pc value already stored in tb->pc. */
36
- pp1a = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw0(b);
58
- qatomic_set(&jc->array[hash].tb, tb);
37
- pp1b = (uint64_t)int128_getw1(b) * (uint64_t)int128_getw0(a);
38
- pp2 = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw1(b);
39
-
40
- pp1s = pp1a + pp1b;
41
- if ((pp1s < pp1a) || (pp1s < pp1b)) {
42
- pp2 += (1ULL << 32);
59
- }
43
- }
60
-}
44
- uint64_t ret_low = pp0 + (pp1s << 32);
45
- if ((ret_low < pp0) || (ret_low < (pp1s << 32))) {
46
- pp2 += 1;
47
- }
61
-
48
-
62
#endif /* ACCEL_TCG_TB_JMP_CACHE_H */
49
- return int128_make128(ret_low, pp2 + (pp1s >> 32));
63
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
50
+ mulu64(&l, &h, ai, bi);
64
index XXXXXXX..XXXXXXX 100644
51
+ return int128_make128(l, h);
65
--- a/accel/tcg/cpu-exec.c
66
+++ b/accel/tcg/cpu-exec.c
67
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
68
69
hash = tb_jmp_cache_hash_func(pc);
70
jc = cpu->tb_jmp_cache;
71
- tb = tb_jmp_cache_get_tb(jc, cflags, hash);
72
73
- if (likely(tb &&
74
- tb_jmp_cache_get_pc(jc, hash, tb) == pc &&
75
- tb->cs_base == cs_base &&
76
- tb->flags == flags &&
77
- tb->trace_vcpu_dstate == *cpu->trace_dstate &&
78
- tb_cflags(tb) == cflags)) {
79
- return tb;
80
+ if (cflags & CF_PCREL) {
81
+ /* Use acquire to ensure current load of pc from jc. */
82
+ tb = qatomic_load_acquire(&jc->array[hash].tb);
83
+
84
+ if (likely(tb &&
85
+ jc->array[hash].pc == pc &&
86
+ tb->cs_base == cs_base &&
87
+ tb->flags == flags &&
88
+ tb->trace_vcpu_dstate == *cpu->trace_dstate &&
89
+ tb_cflags(tb) == cflags)) {
90
+ return tb;
91
+ }
92
+ tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
93
+ if (tb == NULL) {
94
+ return NULL;
95
+ }
96
+ jc->array[hash].pc = pc;
97
+ /* Use store_release on tb to ensure pc is written first. */
98
+ qatomic_store_release(&jc->array[hash].tb, tb);
99
+ } else {
100
+ /* Use rcu_read to ensure current load of pc from *tb. */
101
+ tb = qatomic_rcu_read(&jc->array[hash].tb);
102
+
103
+ if (likely(tb &&
104
+ tb_pc(tb) == pc &&
105
+ tb->cs_base == cs_base &&
106
+ tb->flags == flags &&
107
+ tb->trace_vcpu_dstate == *cpu->trace_dstate &&
108
+ tb_cflags(tb) == cflags)) {
109
+ return tb;
110
+ }
111
+ tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
112
+ if (tb == NULL) {
113
+ return NULL;
114
+ }
115
+ /* Use the pc value already stored in tb->pc. */
116
+ qatomic_set(&jc->array[hash].tb, tb);
117
}
118
- tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
119
- if (tb == NULL) {
120
- return NULL;
121
- }
122
- tb_jmp_cache_set(jc, hash, tb, pc);
123
+
124
return tb;
125
}
52
}
126
53
127
@@ -XXX,XX +XXX,XX @@ cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
54
static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
128
* for the fast lookup
129
*/
130
h = tb_jmp_cache_hash_func(pc);
131
- tb_jmp_cache_set(cpu->tb_jmp_cache, h, tb, pc);
132
+ /* Use the pc value already stored in tb->pc. */
133
+ qatomic_set(&cpu->tb_jmp_cache->array[h].tb, tb);
134
}
135
136
#ifndef CONFIG_USER_ONLY
137
--
55
--
138
2.34.1
56
2.43.0
139
140
diff view generated by jsdifflib
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
Initialize x with accumulated via direct assignment,
2
rather than multiplying by 1.
2
3
3
We found a case where the source passed to flatview_write_continue() may
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
overlap with the destination when fuzzing igb, a new proposed network
5
device with sanitizers.
6
7
igb uses pci_dma_map() to get Tx packet, and pci_dma_write() to write Rx
8
buffer. While pci_dma_write() is usually used to write data from
9
memory not mapped to the guest, if igb is configured to perform
10
loopback, the data will be sourced from the guest memory. The source and
11
destination can overlap and the usage of memcpy() will be invalid in
12
such a case.
13
14
While we do not really have to deal with such an invalid request for
15
igb, detecting the overlap in igb code beforehand requires complex code,
16
and only covers this specific case. Instead, just replace memcpy() with
17
memmove() to tolerate overlaps. Using memmove() will slightly damage the
18
performance as it will need to check overlaps before using SIMD
19
instructions for copying, but the cost should be negligible, considering
20
the inherent complexity of flatview_write_continue().
21
22
The test cases generated by the fuzzer is available at:
23
https://patchew.org/QEMU/20230129053316.1071513-1-alxndr@bu.edu/
24
25
The fixed test case is:
26
fuzz/crash_47dfe62d9f911bf523ff48cd441b61c0013ed805
27
28
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
29
Acked-by: Alexander Bulekov <alxndr@bu.edu>
30
Acked-by: David Hildenbrand <david@redhat.com>
31
Message-Id: <20230131030155.18932-1-akihiko.odaki@daynix.com>
32
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
33
---
6
---
34
softmmu/physmem.c | 2 +-
7
target/hexagon/fma_emu.c | 2 +-
35
1 file changed, 1 insertion(+), 1 deletion(-)
8
1 file changed, 1 insertion(+), 1 deletion(-)
36
9
37
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
38
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
39
--- a/softmmu/physmem.c
12
--- a/target/hexagon/fma_emu.c
40
+++ b/softmmu/physmem.c
13
+++ b/target/hexagon/fma_emu.c
41
@@ -XXX,XX +XXX,XX @@ static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr,
14
@@ -XXX,XX +XXX,XX @@ float64 internal_mpyhh(float64 a, float64 b,
42
} else {
15
float64_is_infinity(b)) {
43
/* RAM case */
16
return float64_mul(a, b, fp_status);
44
ram_ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false);
17
}
45
- memcpy(ram_ptr, buf, l);
18
- x.mant = int128_mul_6464(accumulated, 1);
46
+ memmove(ram_ptr, buf, l);
19
+ x.mant = int128_make64(accumulated);
47
invalidate_and_set_dirty(mr, addr1, l);
20
x.sticky = sticky;
48
}
21
prod = fGETUWORD(1, float64_getmant(a)) * fGETUWORD(1, float64_getmant(b));
49
22
x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL));
50
--
23
--
51
2.34.1
24
2.43.0
diff view generated by jsdifflib
1
In preparation for returning the number of insns generated
1
Convert all targets simultaneously, as the gen_intermediate_code
2
via the same pointer. Adjust only the prototypes so far.
2
function disappears from the target. While there are possible
3
workarounds, they're larger than simply performing the conversion.
3
4
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
include/exec/translator.h | 4 ++--
8
include/exec/translator.h | 14 --------------
8
accel/tcg/translate-all.c | 2 +-
9
include/hw/core/tcg-cpu-ops.h | 13 +++++++++++++
9
accel/tcg/translator.c | 4 ++--
10
target/alpha/cpu.h | 2 ++
10
target/alpha/translate.c | 2 +-
11
target/arm/internals.h | 2 ++
11
target/arm/tcg/translate.c | 2 +-
12
target/avr/cpu.h | 2 ++
12
target/avr/translate.c | 2 +-
13
target/hexagon/cpu.h | 2 ++
13
target/cris/translate.c | 2 +-
14
target/hppa/cpu.h | 2 ++
14
target/hexagon/translate.c | 2 +-
15
target/i386/tcg/helper-tcg.h | 2 ++
15
target/hppa/translate.c | 2 +-
16
target/loongarch/internals.h | 2 ++
16
target/i386/tcg/translate.c | 2 +-
17
target/m68k/cpu.h | 2 ++
17
target/loongarch/translate.c | 2 +-
18
target/microblaze/cpu.h | 2 ++
18
target/m68k/translate.c | 2 +-
19
target/mips/tcg/tcg-internal.h | 2 ++
19
target/microblaze/translate.c | 2 +-
20
target/openrisc/cpu.h | 2 ++
20
target/mips/tcg/translate.c | 2 +-
21
target/ppc/cpu.h | 2 ++
21
target/nios2/translate.c | 2 +-
22
target/riscv/cpu.h | 3 +++
22
target/openrisc/translate.c | 2 +-
23
target/rx/cpu.h | 2 ++
23
target/ppc/translate.c | 2 +-
24
target/s390x/s390x-internal.h | 2 ++
24
target/riscv/translate.c | 2 +-
25
target/sh4/cpu.h | 2 ++
25
target/rx/translate.c | 2 +-
26
target/sparc/cpu.h | 2 ++
26
target/s390x/tcg/translate.c | 2 +-
27
target/tricore/cpu.h | 2 ++
27
target/sh4/translate.c | 2 +-
28
target/xtensa/cpu.h | 2 ++
28
target/sparc/translate.c | 2 +-
29
accel/tcg/cpu-exec.c | 8 +++++---
29
target/tricore/translate.c | 2 +-
30
accel/tcg/translate-all.c | 8 +++++---
30
target/xtensa/translate.c | 2 +-
31
target/alpha/cpu.c | 1 +
31
24 files changed, 26 insertions(+), 26 deletions(-)
32
target/alpha/translate.c | 4 ++--
33
target/arm/cpu.c | 1 +
34
target/arm/tcg/cpu-v7m.c | 1 +
35
target/arm/tcg/translate.c | 5 ++---
36
target/avr/cpu.c | 1 +
37
target/avr/translate.c | 6 +++---
38
target/hexagon/cpu.c | 1 +
39
target/hexagon/translate.c | 4 ++--
40
target/hppa/cpu.c | 1 +
41
target/hppa/translate.c | 4 ++--
42
target/i386/tcg/tcg-cpu.c | 1 +
43
target/i386/tcg/translate.c | 5 ++---
44
target/loongarch/cpu.c | 1 +
45
target/loongarch/tcg/translate.c | 4 ++--
46
target/m68k/cpu.c | 1 +
47
target/m68k/translate.c | 4 ++--
48
target/microblaze/cpu.c | 1 +
49
target/microblaze/translate.c | 4 ++--
50
target/mips/cpu.c | 1 +
51
target/mips/tcg/translate.c | 4 ++--
52
target/openrisc/cpu.c | 1 +
53
target/openrisc/translate.c | 4 ++--
54
target/ppc/cpu_init.c | 1 +
55
target/ppc/translate.c | 4 ++--
56
target/riscv/tcg/tcg-cpu.c | 1 +
57
target/riscv/translate.c | 4 ++--
58
target/rx/cpu.c | 1 +
59
target/rx/translate.c | 4 ++--
60
target/s390x/cpu.c | 1 +
61
target/s390x/tcg/translate.c | 4 ++--
62
target/sh4/cpu.c | 1 +
63
target/sh4/translate.c | 4 ++--
64
target/sparc/cpu.c | 1 +
65
target/sparc/translate.c | 4 ++--
66
target/tricore/cpu.c | 1 +
67
target/tricore/translate.c | 5 ++---
68
target/xtensa/cpu.c | 1 +
69
target/xtensa/translate.c | 4 ++--
70
62 files changed, 121 insertions(+), 62 deletions(-)
32
71
33
diff --git a/include/exec/translator.h b/include/exec/translator.h
72
diff --git a/include/exec/translator.h b/include/exec/translator.h
34
index XXXXXXX..XXXXXXX 100644
73
index XXXXXXX..XXXXXXX 100644
35
--- a/include/exec/translator.h
74
--- a/include/exec/translator.h
36
+++ b/include/exec/translator.h
75
+++ b/include/exec/translator.h
37
@@ -XXX,XX +XXX,XX @@
76
@@ -XXX,XX +XXX,XX @@
38
* This function must be provided by the target, which should create
77
#include "qemu/bswap.h"
39
* the target-specific DisasContext, and then invoke translator_loop.
78
#include "exec/vaddr.h"
40
*/
79
41
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
80
-/**
42
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
81
- * gen_intermediate_code
43
target_ulong pc, void *host_pc);
82
- * @cpu: cpu context
44
83
- * @tb: translation block
84
- * @max_insns: max number of instructions to translate
85
- * @pc: guest virtual program counter address
86
- * @host_pc: host physical program counter address
87
- *
88
- * This function must be provided by the target, which should create
89
- * the target-specific DisasContext, and then invoke translator_loop.
90
- */
91
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
92
- vaddr pc, void *host_pc);
93
-
45
/**
94
/**
46
@@ -XXX,XX +XXX,XX @@ typedef struct TranslatorOps {
95
* DisasJumpType:
47
* - When single-stepping is enabled (system-wide or on the current vCPU).
96
* @DISAS_NEXT: Next instruction in program order.
48
* - When too many instructions have been translated.
97
diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
49
*/
98
index XXXXXXX..XXXXXXX 100644
50
-void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
99
--- a/include/hw/core/tcg-cpu-ops.h
51
+void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
100
+++ b/include/hw/core/tcg-cpu-ops.h
52
target_ulong pc, void *host_pc,
101
@@ -XXX,XX +XXX,XX @@ struct TCGCPUOps {
53
const TranslatorOps *ops, DisasContextBase *db);
102
* Called when the first CPU is realized.
103
*/
104
void (*initialize)(void);
105
+ /**
106
+ * @translate_code: Translate guest instructions to TCGOps
107
+ * @cpu: cpu context
108
+ * @tb: translation block
109
+ * @max_insns: max number of instructions to translate
110
+ * @pc: guest virtual program counter address
111
+ * @host_pc: host physical program counter address
112
+ *
113
+ * This function must be provided by the target, which should create
114
+ * the target-specific DisasContext, and then invoke translator_loop.
115
+ */
116
+ void (*translate_code)(CPUState *cpu, TranslationBlock *tb,
117
+ int *max_insns, vaddr pc, void *host_pc);
118
/**
119
* @synchronize_from_tb: Synchronize state from a TCG #TranslationBlock
120
*
121
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
122
index XXXXXXX..XXXXXXX 100644
123
--- a/target/alpha/cpu.h
124
+++ b/target/alpha/cpu.h
125
@@ -XXX,XX +XXX,XX @@ enum {
126
};
127
128
void alpha_translate_init(void);
129
+void alpha_translate_code(CPUState *cs, TranslationBlock *tb,
130
+ int *max_insns, vaddr pc, void *host_pc);
131
132
#define CPU_RESOLVING_TYPE TYPE_ALPHA_CPU
133
134
diff --git a/target/arm/internals.h b/target/arm/internals.h
135
index XXXXXXX..XXXXXXX 100644
136
--- a/target/arm/internals.h
137
+++ b/target/arm/internals.h
138
@@ -XXX,XX +XXX,XX @@ void init_cpreg_list(ARMCPU *cpu);
139
140
void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
141
void arm_translate_init(void);
142
+void arm_translate_code(CPUState *cs, TranslationBlock *tb,
143
+ int *max_insns, vaddr pc, void *host_pc);
144
145
void arm_cpu_register_gdb_commands(ARMCPU *cpu);
146
void aarch64_cpu_register_gdb_commands(ARMCPU *cpu, GString *,
147
diff --git a/target/avr/cpu.h b/target/avr/cpu.h
148
index XXXXXXX..XXXXXXX 100644
149
--- a/target/avr/cpu.h
150
+++ b/target/avr/cpu.h
151
@@ -XXX,XX +XXX,XX @@ static inline void set_avr_feature(CPUAVRState *env, int feature)
152
}
153
154
void avr_cpu_tcg_init(void);
155
+void avr_cpu_translate_code(CPUState *cs, TranslationBlock *tb,
156
+ int *max_insns, vaddr pc, void *host_pc);
157
158
int cpu_avr_exec(CPUState *cpu);
159
160
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
161
index XXXXXXX..XXXXXXX 100644
162
--- a/target/hexagon/cpu.h
163
+++ b/target/hexagon/cpu.h
164
@@ -XXX,XX +XXX,XX @@ static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc,
165
typedef HexagonCPU ArchCPU;
166
167
void hexagon_translate_init(void);
168
+void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
169
+ int *max_insns, vaddr pc, void *host_pc);
170
171
#include "exec/cpu-all.h"
172
173
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
174
index XXXXXXX..XXXXXXX 100644
175
--- a/target/hppa/cpu.h
176
+++ b/target/hppa/cpu.h
177
@@ -XXX,XX +XXX,XX @@ static inline int HPPA_BTLB_ENTRIES(CPUHPPAState *env)
178
}
179
180
void hppa_translate_init(void);
181
+void hppa_translate_code(CPUState *cs, TranslationBlock *tb,
182
+ int *max_insns, vaddr pc, void *host_pc);
183
184
#define CPU_RESOLVING_TYPE TYPE_HPPA_CPU
185
186
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
187
index XXXXXXX..XXXXXXX 100644
188
--- a/target/i386/tcg/helper-tcg.h
189
+++ b/target/i386/tcg/helper-tcg.h
190
@@ -XXX,XX +XXX,XX @@ static inline target_long lshift(target_long x, int n)
191
192
/* translate.c */
193
void tcg_x86_init(void);
194
+void x86_translate_code(CPUState *cs, TranslationBlock *tb,
195
+ int *max_insns, vaddr pc, void *host_pc);
196
197
/* excp_helper.c */
198
G_NORETURN void raise_exception(CPUX86State *env, int exception_index);
199
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
200
index XXXXXXX..XXXXXXX 100644
201
--- a/target/loongarch/internals.h
202
+++ b/target/loongarch/internals.h
203
@@ -XXX,XX +XXX,XX @@
204
#define TARGET_VIRT_MASK MAKE_64BIT_MASK(0, TARGET_VIRT_ADDR_SPACE_BITS)
205
206
void loongarch_translate_init(void);
207
+void loongarch_translate_code(CPUState *cs, TranslationBlock *tb,
208
+ int *max_insns, vaddr pc, void *host_pc);
209
210
void G_NORETURN do_raise_exception(CPULoongArchState *env,
211
uint32_t exception,
212
diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h
213
index XXXXXXX..XXXXXXX 100644
214
--- a/target/m68k/cpu.h
215
+++ b/target/m68k/cpu.h
216
@@ -XXX,XX +XXX,XX @@ int m68k_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
217
int m68k_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
218
219
void m68k_tcg_init(void);
220
+void m68k_translate_code(CPUState *cs, TranslationBlock *tb,
221
+ int *max_insns, vaddr pc, void *host_pc);
222
void m68k_cpu_init_gdb(M68kCPU *cpu);
223
uint32_t cpu_m68k_get_ccr(CPUM68KState *env);
224
void cpu_m68k_set_ccr(CPUM68KState *env, uint32_t);
225
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
226
index XXXXXXX..XXXXXXX 100644
227
--- a/target/microblaze/cpu.h
228
+++ b/target/microblaze/cpu.h
229
@@ -XXX,XX +XXX,XX @@ static inline void mb_cpu_write_msr(CPUMBState *env, uint32_t val)
230
}
231
232
void mb_tcg_init(void);
233
+void mb_translate_code(CPUState *cs, TranslationBlock *tb,
234
+ int *max_insns, vaddr pc, void *host_pc);
235
236
#define CPU_RESOLVING_TYPE TYPE_MICROBLAZE_CPU
237
238
diff --git a/target/mips/tcg/tcg-internal.h b/target/mips/tcg/tcg-internal.h
239
index XXXXXXX..XXXXXXX 100644
240
--- a/target/mips/tcg/tcg-internal.h
241
+++ b/target/mips/tcg/tcg-internal.h
242
@@ -XXX,XX +XXX,XX @@
243
#include "cpu.h"
244
245
void mips_tcg_init(void);
246
+void mips_translate_code(CPUState *cs, TranslationBlock *tb,
247
+ int *max_insns, vaddr pc, void *host_pc);
248
249
void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
250
G_NORETURN void mips_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
251
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
252
index XXXXXXX..XXXXXXX 100644
253
--- a/target/openrisc/cpu.h
254
+++ b/target/openrisc/cpu.h
255
@@ -XXX,XX +XXX,XX @@ void openrisc_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
256
int openrisc_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
257
int openrisc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
258
void openrisc_translate_init(void);
259
+void openrisc_translate_code(CPUState *cs, TranslationBlock *tb,
260
+ int *max_insns, vaddr pc, void *host_pc);
261
int print_insn_or1k(bfd_vma addr, disassemble_info *info);
262
263
#ifndef CONFIG_USER_ONLY
264
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
265
index XXXXXXX..XXXXXXX 100644
266
--- a/target/ppc/cpu.h
267
+++ b/target/ppc/cpu.h
268
@@ -XXX,XX +XXX,XX @@ extern const VMStateDescription vmstate_ppc_cpu;
269
270
/*****************************************************************************/
271
void ppc_translate_init(void);
272
+void ppc_translate_code(CPUState *cs, TranslationBlock *tb,
273
+ int *max_insns, vaddr pc, void *host_pc);
274
275
#if !defined(CONFIG_USER_ONLY)
276
void ppc_store_sdr1(CPUPPCState *env, target_ulong value);
277
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
278
index XXXXXXX..XXXXXXX 100644
279
--- a/target/riscv/cpu.h
280
+++ b/target/riscv/cpu.h
281
@@ -XXX,XX +XXX,XX @@ RISCVException smstateen_acc_ok(CPURISCVState *env, int index, uint64_t bit);
282
void riscv_cpu_set_mode(CPURISCVState *env, target_ulong newpriv, bool virt_en);
283
284
void riscv_translate_init(void);
285
+void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
286
+ int *max_insns, vaddr pc, void *host_pc);
287
+
288
G_NORETURN void riscv_raise_exception(CPURISCVState *env,
289
uint32_t exception, uintptr_t pc);
290
291
diff --git a/target/rx/cpu.h b/target/rx/cpu.h
292
index XXXXXXX..XXXXXXX 100644
293
--- a/target/rx/cpu.h
294
+++ b/target/rx/cpu.h
295
@@ -XXX,XX +XXX,XX @@ int rx_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
296
int rx_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
297
298
void rx_translate_init(void);
299
+void rx_translate_code(CPUState *cs, TranslationBlock *tb,
300
+ int *max_insns, vaddr pc, void *host_pc);
301
void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte);
302
303
#include "exec/cpu-all.h"
304
diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h
305
index XXXXXXX..XXXXXXX 100644
306
--- a/target/s390x/s390x-internal.h
307
+++ b/target/s390x/s390x-internal.h
308
@@ -XXX,XX +XXX,XX @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3,
309
310
/* translate.c */
311
void s390x_translate_init(void);
312
+void s390x_translate_code(CPUState *cs, TranslationBlock *tb,
313
+ int *max_insns, vaddr pc, void *host_pc);
314
void s390x_restore_state_to_opc(CPUState *cs,
315
const TranslationBlock *tb,
316
const uint64_t *data);
317
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
318
index XXXXXXX..XXXXXXX 100644
319
--- a/target/sh4/cpu.h
320
+++ b/target/sh4/cpu.h
321
@@ -XXX,XX +XXX,XX @@ G_NORETURN void superh_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
322
uintptr_t retaddr);
323
324
void sh4_translate_init(void);
325
+void sh4_translate_code(CPUState *cs, TranslationBlock *tb,
326
+ int *max_insns, vaddr pc, void *host_pc);
327
328
#if !defined(CONFIG_USER_ONLY)
329
hwaddr superh_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
330
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
331
index XXXXXXX..XXXXXXX 100644
332
--- a/target/sparc/cpu.h
333
+++ b/target/sparc/cpu.h
334
@@ -XXX,XX +XXX,XX @@ int sparc_cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
335
336
/* translate.c */
337
void sparc_tcg_init(void);
338
+void sparc_translate_code(CPUState *cs, TranslationBlock *tb,
339
+ int *max_insns, vaddr pc, void *host_pc);
340
341
/* fop_helper.c */
342
target_ulong cpu_get_fsr(CPUSPARCState *);
343
diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h
344
index XXXXXXX..XXXXXXX 100644
345
--- a/target/tricore/cpu.h
346
+++ b/target/tricore/cpu.h
347
@@ -XXX,XX +XXX,XX @@ FIELD(TB_FLAGS, PRIV, 0, 2)
348
349
void cpu_state_reset(CPUTriCoreState *s);
350
void tricore_tcg_init(void);
351
+void tricore_translate_code(CPUState *cs, TranslationBlock *tb,
352
+ int *max_insns, vaddr pc, void *host_pc);
353
354
static inline void cpu_get_tb_cpu_state(CPUTriCoreState *env, vaddr *pc,
355
uint64_t *cs_base, uint32_t *flags)
356
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
357
index XXXXXXX..XXXXXXX 100644
358
--- a/target/xtensa/cpu.h
359
+++ b/target/xtensa/cpu.h
360
@@ -XXX,XX +XXX,XX @@ G_NORETURN void xtensa_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
361
362
void xtensa_collect_sr_names(const XtensaConfig *config);
363
void xtensa_translate_init(void);
364
+void xtensa_translate_code(CPUState *cs, TranslationBlock *tb,
365
+ int *max_insns, vaddr pc, void *host_pc);
366
void **xtensa_get_regfile_by_name(const char *name, int entries, int bits);
367
void xtensa_breakpoint_handler(CPUState *cs);
368
void xtensa_register_core(XtensaConfigList *node);
369
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
370
index XXXXXXX..XXXXXXX 100644
371
--- a/accel/tcg/cpu-exec.c
372
+++ b/accel/tcg/cpu-exec.c
373
@@ -XXX,XX +XXX,XX @@ bool tcg_exec_realizefn(CPUState *cpu, Error **errp)
374
375
if (!tcg_target_initialized) {
376
/* Check mandatory TCGCPUOps handlers */
377
+ const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
378
#ifndef CONFIG_USER_ONLY
379
- assert(cpu->cc->tcg_ops->cpu_exec_halt);
380
- assert(cpu->cc->tcg_ops->cpu_exec_interrupt);
381
+ assert(tcg_ops->cpu_exec_halt);
382
+ assert(tcg_ops->cpu_exec_interrupt);
383
#endif /* !CONFIG_USER_ONLY */
384
- cpu->cc->tcg_ops->initialize();
385
+ assert(tcg_ops->translate_code);
386
+ tcg_ops->initialize();
387
tcg_target_initialized = true;
388
}
54
389
55
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
390
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
56
index XXXXXXX..XXXXXXX 100644
391
index XXXXXXX..XXXXXXX 100644
57
--- a/accel/tcg/translate-all.c
392
--- a/accel/tcg/translate-all.c
58
+++ b/accel/tcg/translate-all.c
393
+++ b/accel/tcg/translate-all.c
59
@@ -XXX,XX +XXX,XX @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
394
@@ -XXX,XX +XXX,XX @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
395
60
tcg_func_start(tcg_ctx);
396
tcg_func_start(tcg_ctx);
61
397
62
tcg_ctx->cpu = env_cpu(env);
398
- tcg_ctx->cpu = env_cpu(env);
63
- gen_intermediate_code(env_cpu(env), tb, *max_insns, pc, host_pc);
399
- gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
64
+ gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
400
+ CPUState *cs = env_cpu(env);
401
+ tcg_ctx->cpu = cs;
402
+ cs->cc->tcg_ops->translate_code(cs, tb, max_insns, pc, host_pc);
403
+
65
assert(tb->size != 0);
404
assert(tb->size != 0);
66
tcg_ctx->cpu = NULL;
405
tcg_ctx->cpu = NULL;
67
*max_insns = tb->icount;
406
*max_insns = tb->icount;
68
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
407
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
69
index XXXXXXX..XXXXXXX 100644
408
/*
70
--- a/accel/tcg/translator.c
409
* Overflow of code_gen_buffer, or the current slice of it.
71
+++ b/accel/tcg/translator.c
410
*
72
@@ -XXX,XX +XXX,XX @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest)
411
- * TODO: We don't need to re-do gen_intermediate_code, nor
73
return ((db->pc_first ^ dest) & TARGET_PAGE_MASK) == 0;
412
+ * TODO: We don't need to re-do tcg_ops->translate_code, nor
74
}
413
* should we re-do the tcg optimization currently hidden
75
414
* inside tcg_gen_code. All that should be required is to
76
-void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
415
* flush the TBs, allocate a new TB, re-initialize it per
77
+void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
416
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
78
target_ulong pc, void *host_pc,
417
index XXXXXXX..XXXXXXX 100644
79
const TranslatorOps *ops, DisasContextBase *db)
418
--- a/target/alpha/cpu.c
80
{
419
+++ b/target/alpha/cpu.c
81
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
420
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps alpha_sysemu_ops = {
82
db->pc_next = pc;
421
83
db->is_jmp = DISAS_NEXT;
422
static const TCGCPUOps alpha_tcg_ops = {
84
db->num_insns = 0;
423
.initialize = alpha_translate_init,
85
- db->max_insns = max_insns;
424
+ .translate_code = alpha_translate_code,
86
+ db->max_insns = *max_insns;
425
.synchronize_from_tb = alpha_cpu_synchronize_from_tb,
87
db->singlestep_enabled = cflags & CF_SINGLE_STEP;
426
.restore_state_to_opc = alpha_restore_state_to_opc,
88
db->host_addr[0] = host_pc;
427
89
db->host_addr[1] = NULL;
90
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
428
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
91
index XXXXXXX..XXXXXXX 100644
429
index XXXXXXX..XXXXXXX 100644
92
--- a/target/alpha/translate.c
430
--- a/target/alpha/translate.c
93
+++ b/target/alpha/translate.c
431
+++ b/target/alpha/translate.c
94
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps alpha_tr_ops = {
432
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps alpha_tr_ops = {
95
.disas_log = alpha_tr_disas_log,
433
.tb_stop = alpha_tr_tb_stop,
96
};
434
};
97
435
98
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
436
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
99
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
437
- vaddr pc, void *host_pc)
100
target_ulong pc, void *host_pc)
438
+void alpha_translate_code(CPUState *cpu, TranslationBlock *tb,
439
+ int *max_insns, vaddr pc, void *host_pc)
101
{
440
{
102
DisasContext dc;
441
DisasContext dc;
442
translator_loop(cpu, tb, max_insns, pc, host_pc, &alpha_tr_ops, &dc.base);
443
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
444
index XXXXXXX..XXXXXXX 100644
445
--- a/target/arm/cpu.c
446
+++ b/target/arm/cpu.c
447
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps arm_sysemu_ops = {
448
#ifdef CONFIG_TCG
449
static const TCGCPUOps arm_tcg_ops = {
450
.initialize = arm_translate_init,
451
+ .translate_code = arm_translate_code,
452
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
453
.debug_excp_handler = arm_debug_excp_handler,
454
.restore_state_to_opc = arm_restore_state_to_opc,
455
diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c
456
index XXXXXXX..XXXXXXX 100644
457
--- a/target/arm/tcg/cpu-v7m.c
458
+++ b/target/arm/tcg/cpu-v7m.c
459
@@ -XXX,XX +XXX,XX @@ static void cortex_m55_initfn(Object *obj)
460
461
static const TCGCPUOps arm_v7m_tcg_ops = {
462
.initialize = arm_translate_init,
463
+ .translate_code = arm_translate_code,
464
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
465
.debug_excp_handler = arm_debug_excp_handler,
466
.restore_state_to_opc = arm_restore_state_to_opc,
103
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
467
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
104
index XXXXXXX..XXXXXXX 100644
468
index XXXXXXX..XXXXXXX 100644
105
--- a/target/arm/tcg/translate.c
469
--- a/target/arm/tcg/translate.c
106
+++ b/target/arm/tcg/translate.c
470
+++ b/target/arm/tcg/translate.c
107
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps thumb_translator_ops = {
471
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps thumb_translator_ops = {
108
};
472
.tb_stop = arm_tr_tb_stop,
109
473
};
110
/* generate intermediate code for basic block 'tb'. */
474
111
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
475
-/* generate intermediate code for basic block 'tb'. */
112
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
476
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
113
target_ulong pc, void *host_pc)
477
- vaddr pc, void *host_pc)
478
+void arm_translate_code(CPUState *cpu, TranslationBlock *tb,
479
+ int *max_insns, vaddr pc, void *host_pc)
114
{
480
{
115
DisasContext dc = { };
481
DisasContext dc = { };
482
const TranslatorOps *ops = &arm_translator_ops;
483
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
484
index XXXXXXX..XXXXXXX 100644
485
--- a/target/avr/cpu.c
486
+++ b/target/avr/cpu.c
487
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps avr_sysemu_ops = {
488
489
static const TCGCPUOps avr_tcg_ops = {
490
.initialize = avr_cpu_tcg_init,
491
+ .translate_code = avr_cpu_translate_code,
492
.synchronize_from_tb = avr_cpu_synchronize_from_tb,
493
.restore_state_to_opc = avr_restore_state_to_opc,
494
.cpu_exec_interrupt = avr_cpu_exec_interrupt,
116
diff --git a/target/avr/translate.c b/target/avr/translate.c
495
diff --git a/target/avr/translate.c b/target/avr/translate.c
117
index XXXXXXX..XXXXXXX 100644
496
index XXXXXXX..XXXXXXX 100644
118
--- a/target/avr/translate.c
497
--- a/target/avr/translate.c
119
+++ b/target/avr/translate.c
498
+++ b/target/avr/translate.c
499
@@ -XXX,XX +XXX,XX @@ static bool trans_WDR(DisasContext *ctx, arg_WDR *a)
500
*
501
* - translate()
502
* - canonicalize_skip()
503
- * - gen_intermediate_code()
504
+ * - translate_code()
505
* - restore_state_to_opc()
506
*
507
*/
120
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps avr_tr_ops = {
508
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps avr_tr_ops = {
121
.disas_log = avr_tr_disas_log,
509
.tb_stop = avr_tr_tb_stop,
122
};
510
};
123
511
124
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
512
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
125
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
513
- vaddr pc, void *host_pc)
126
target_ulong pc, void *host_pc)
514
+void avr_cpu_translate_code(CPUState *cs, TranslationBlock *tb,
515
+ int *max_insns, vaddr pc, void *host_pc)
127
{
516
{
128
DisasContext dc = { };
517
DisasContext dc = { };
129
diff --git a/target/cris/translate.c b/target/cris/translate.c
518
translator_loop(cs, tb, max_insns, pc, host_pc, &avr_tr_ops, &dc.base);
130
index XXXXXXX..XXXXXXX 100644
519
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
131
--- a/target/cris/translate.c
520
index XXXXXXX..XXXXXXX 100644
132
+++ b/target/cris/translate.c
521
--- a/target/hexagon/cpu.c
133
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps cris_tr_ops = {
522
+++ b/target/hexagon/cpu.c
134
.disas_log = cris_tr_disas_log,
523
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_init(Object *obj)
135
};
524
136
525
static const TCGCPUOps hexagon_tcg_ops = {
137
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
526
.initialize = hexagon_translate_init,
138
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
527
+ .translate_code = hexagon_translate_code,
139
target_ulong pc, void *host_pc)
528
.synchronize_from_tb = hexagon_cpu_synchronize_from_tb,
140
{
529
.restore_state_to_opc = hexagon_restore_state_to_opc,
141
DisasContext dc;
530
};
142
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
531
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
143
index XXXXXXX..XXXXXXX 100644
532
index XXXXXXX..XXXXXXX 100644
144
--- a/target/hexagon/translate.c
533
--- a/target/hexagon/translate.c
145
+++ b/target/hexagon/translate.c
534
+++ b/target/hexagon/translate.c
146
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hexagon_tr_ops = {
535
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hexagon_tr_ops = {
147
.disas_log = hexagon_tr_disas_log,
536
.tb_stop = hexagon_tr_tb_stop,
148
};
537
};
149
538
150
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
539
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
151
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
540
- vaddr pc, void *host_pc)
152
target_ulong pc, void *host_pc)
541
+void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
542
+ int *max_insns, vaddr pc, void *host_pc)
153
{
543
{
154
DisasContext ctx;
544
DisasContext ctx;
545
546
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
547
index XXXXXXX..XXXXXXX 100644
548
--- a/target/hppa/cpu.c
549
+++ b/target/hppa/cpu.c
550
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps hppa_sysemu_ops = {
551
552
static const TCGCPUOps hppa_tcg_ops = {
553
.initialize = hppa_translate_init,
554
+ .translate_code = hppa_translate_code,
555
.synchronize_from_tb = hppa_cpu_synchronize_from_tb,
556
.restore_state_to_opc = hppa_restore_state_to_opc,
557
155
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
558
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
156
index XXXXXXX..XXXXXXX 100644
559
index XXXXXXX..XXXXXXX 100644
157
--- a/target/hppa/translate.c
560
--- a/target/hppa/translate.c
158
+++ b/target/hppa/translate.c
561
+++ b/target/hppa/translate.c
159
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hppa_tr_ops = {
562
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hppa_tr_ops = {
160
.disas_log = hppa_tr_disas_log,
563
#endif
161
};
564
};
162
565
163
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
566
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
164
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
567
- vaddr pc, void *host_pc)
165
target_ulong pc, void *host_pc)
568
+void hppa_translate_code(CPUState *cs, TranslationBlock *tb,
166
{
569
+ int *max_insns, vaddr pc, void *host_pc)
167
DisasContext ctx;
570
{
571
DisasContext ctx = { };
572
translator_loop(cs, tb, max_insns, pc, host_pc, &hppa_tr_ops, &ctx.base);
573
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
574
index XXXXXXX..XXXXXXX 100644
575
--- a/target/i386/tcg/tcg-cpu.c
576
+++ b/target/i386/tcg/tcg-cpu.c
577
@@ -XXX,XX +XXX,XX @@ static bool x86_debug_check_breakpoint(CPUState *cs)
578
579
static const TCGCPUOps x86_tcg_ops = {
580
.initialize = tcg_x86_init,
581
+ .translate_code = x86_translate_code,
582
.synchronize_from_tb = x86_cpu_synchronize_from_tb,
583
.restore_state_to_opc = x86_restore_state_to_opc,
584
.cpu_exec_enter = x86_cpu_exec_enter,
168
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
585
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
169
index XXXXXXX..XXXXXXX 100644
586
index XXXXXXX..XXXXXXX 100644
170
--- a/target/i386/tcg/translate.c
587
--- a/target/i386/tcg/translate.c
171
+++ b/target/i386/tcg/translate.c
588
+++ b/target/i386/tcg/translate.c
172
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps i386_tr_ops = {
589
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps i386_tr_ops = {
173
};
590
.tb_stop = i386_tr_tb_stop,
174
591
};
175
/* generate intermediate code for basic block 'tb'. */
592
176
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
593
-/* generate intermediate code for basic block 'tb'. */
177
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
594
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
178
target_ulong pc, void *host_pc)
595
- vaddr pc, void *host_pc)
596
+void x86_translate_code(CPUState *cpu, TranslationBlock *tb,
597
+ int *max_insns, vaddr pc, void *host_pc)
179
{
598
{
180
DisasContext dc;
599
DisasContext dc;
181
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
600
182
index XXXXXXX..XXXXXXX 100644
601
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
183
--- a/target/loongarch/translate.c
602
index XXXXXXX..XXXXXXX 100644
184
+++ b/target/loongarch/translate.c
603
--- a/target/loongarch/cpu.c
604
+++ b/target/loongarch/cpu.c
605
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags)
606
607
static const TCGCPUOps loongarch_tcg_ops = {
608
.initialize = loongarch_translate_init,
609
+ .translate_code = loongarch_translate_code,
610
.synchronize_from_tb = loongarch_cpu_synchronize_from_tb,
611
.restore_state_to_opc = loongarch_restore_state_to_opc,
612
613
diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c
614
index XXXXXXX..XXXXXXX 100644
615
--- a/target/loongarch/tcg/translate.c
616
+++ b/target/loongarch/tcg/translate.c
185
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps loongarch_tr_ops = {
617
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps loongarch_tr_ops = {
186
.disas_log = loongarch_tr_disas_log,
618
.tb_stop = loongarch_tr_tb_stop,
187
};
619
};
188
620
189
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
621
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
190
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
622
- vaddr pc, void *host_pc)
191
target_ulong pc, void *host_pc)
623
+void loongarch_translate_code(CPUState *cs, TranslationBlock *tb,
624
+ int *max_insns, vaddr pc, void *host_pc)
192
{
625
{
193
DisasContext ctx;
626
DisasContext ctx;
627
628
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
629
index XXXXXXX..XXXXXXX 100644
630
--- a/target/m68k/cpu.c
631
+++ b/target/m68k/cpu.c
632
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps m68k_sysemu_ops = {
633
634
static const TCGCPUOps m68k_tcg_ops = {
635
.initialize = m68k_tcg_init,
636
+ .translate_code = m68k_translate_code,
637
.restore_state_to_opc = m68k_restore_state_to_opc,
638
639
#ifndef CONFIG_USER_ONLY
194
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
640
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
195
index XXXXXXX..XXXXXXX 100644
641
index XXXXXXX..XXXXXXX 100644
196
--- a/target/m68k/translate.c
642
--- a/target/m68k/translate.c
197
+++ b/target/m68k/translate.c
643
+++ b/target/m68k/translate.c
198
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps m68k_tr_ops = {
644
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps m68k_tr_ops = {
199
.disas_log = m68k_tr_disas_log,
645
.tb_stop = m68k_tr_tb_stop,
200
};
646
};
201
647
202
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
648
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
203
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
649
- vaddr pc, void *host_pc)
204
target_ulong pc, void *host_pc)
650
+void m68k_translate_code(CPUState *cpu, TranslationBlock *tb,
651
+ int *max_insns, vaddr pc, void *host_pc)
205
{
652
{
206
DisasContext dc;
653
DisasContext dc;
654
translator_loop(cpu, tb, max_insns, pc, host_pc, &m68k_tr_ops, &dc.base);
655
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
656
index XXXXXXX..XXXXXXX 100644
657
--- a/target/microblaze/cpu.c
658
+++ b/target/microblaze/cpu.c
659
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps mb_sysemu_ops = {
660
661
static const TCGCPUOps mb_tcg_ops = {
662
.initialize = mb_tcg_init,
663
+ .translate_code = mb_translate_code,
664
.synchronize_from_tb = mb_cpu_synchronize_from_tb,
665
.restore_state_to_opc = mb_restore_state_to_opc,
666
207
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
667
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
208
index XXXXXXX..XXXXXXX 100644
668
index XXXXXXX..XXXXXXX 100644
209
--- a/target/microblaze/translate.c
669
--- a/target/microblaze/translate.c
210
+++ b/target/microblaze/translate.c
670
+++ b/target/microblaze/translate.c
211
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mb_tr_ops = {
671
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mb_tr_ops = {
212
.disas_log = mb_tr_disas_log,
672
.tb_stop = mb_tr_tb_stop,
213
};
673
};
214
674
215
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
675
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
216
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
676
- vaddr pc, void *host_pc)
217
target_ulong pc, void *host_pc)
677
+void mb_translate_code(CPUState *cpu, TranslationBlock *tb,
678
+ int *max_insns, vaddr pc, void *host_pc)
218
{
679
{
219
DisasContext dc;
680
DisasContext dc;
681
translator_loop(cpu, tb, max_insns, pc, host_pc, &mb_tr_ops, &dc.base);
682
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
683
index XXXXXXX..XXXXXXX 100644
684
--- a/target/mips/cpu.c
685
+++ b/target/mips/cpu.c
686
@@ -XXX,XX +XXX,XX @@ static const Property mips_cpu_properties[] = {
687
#include "hw/core/tcg-cpu-ops.h"
688
static const TCGCPUOps mips_tcg_ops = {
689
.initialize = mips_tcg_init,
690
+ .translate_code = mips_translate_code,
691
.synchronize_from_tb = mips_cpu_synchronize_from_tb,
692
.restore_state_to_opc = mips_restore_state_to_opc,
693
220
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
694
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
221
index XXXXXXX..XXXXXXX 100644
695
index XXXXXXX..XXXXXXX 100644
222
--- a/target/mips/tcg/translate.c
696
--- a/target/mips/tcg/translate.c
223
+++ b/target/mips/tcg/translate.c
697
+++ b/target/mips/tcg/translate.c
224
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mips_tr_ops = {
698
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mips_tr_ops = {
225
.disas_log = mips_tr_disas_log,
699
.tb_stop = mips_tr_tb_stop,
226
};
700
};
227
701
228
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
702
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
229
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
703
- vaddr pc, void *host_pc)
230
target_ulong pc, void *host_pc)
704
+void mips_translate_code(CPUState *cs, TranslationBlock *tb,
705
+ int *max_insns, vaddr pc, void *host_pc)
231
{
706
{
232
DisasContext ctx;
707
DisasContext ctx;
233
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
708
234
index XXXXXXX..XXXXXXX 100644
709
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
235
--- a/target/nios2/translate.c
710
index XXXXXXX..XXXXXXX 100644
236
+++ b/target/nios2/translate.c
711
--- a/target/openrisc/cpu.c
237
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps nios2_tr_ops = {
712
+++ b/target/openrisc/cpu.c
238
.disas_log = nios2_tr_disas_log,
713
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps openrisc_sysemu_ops = {
239
};
714
240
715
static const TCGCPUOps openrisc_tcg_ops = {
241
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
716
.initialize = openrisc_translate_init,
242
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
717
+ .translate_code = openrisc_translate_code,
243
target_ulong pc, void *host_pc)
718
.synchronize_from_tb = openrisc_cpu_synchronize_from_tb,
244
{
719
.restore_state_to_opc = openrisc_restore_state_to_opc,
245
DisasContext dc;
720
246
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
721
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
247
index XXXXXXX..XXXXXXX 100644
722
index XXXXXXX..XXXXXXX 100644
248
--- a/target/openrisc/translate.c
723
--- a/target/openrisc/translate.c
249
+++ b/target/openrisc/translate.c
724
+++ b/target/openrisc/translate.c
250
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps openrisc_tr_ops = {
725
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps openrisc_tr_ops = {
251
.disas_log = openrisc_tr_disas_log,
726
.tb_stop = openrisc_tr_tb_stop,
252
};
727
};
253
728
254
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
729
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
255
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
730
- vaddr pc, void *host_pc)
256
target_ulong pc, void *host_pc)
731
+void openrisc_translate_code(CPUState *cs, TranslationBlock *tb,
732
+ int *max_insns, vaddr pc, void *host_pc)
257
{
733
{
258
DisasContext ctx;
734
DisasContext ctx;
735
736
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
737
index XXXXXXX..XXXXXXX 100644
738
--- a/target/ppc/cpu_init.c
739
+++ b/target/ppc/cpu_init.c
740
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps ppc_sysemu_ops = {
741
742
static const TCGCPUOps ppc_tcg_ops = {
743
.initialize = ppc_translate_init,
744
+ .translate_code = ppc_translate_code,
745
.restore_state_to_opc = ppc_restore_state_to_opc,
746
747
#ifdef CONFIG_USER_ONLY
259
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
748
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
260
index XXXXXXX..XXXXXXX 100644
749
index XXXXXXX..XXXXXXX 100644
261
--- a/target/ppc/translate.c
750
--- a/target/ppc/translate.c
262
+++ b/target/ppc/translate.c
751
+++ b/target/ppc/translate.c
263
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps ppc_tr_ops = {
752
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps ppc_tr_ops = {
264
.disas_log = ppc_tr_disas_log,
753
.tb_stop = ppc_tr_tb_stop,
265
};
754
};
266
755
267
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
756
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
268
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
757
- vaddr pc, void *host_pc)
269
target_ulong pc, void *host_pc)
758
+void ppc_translate_code(CPUState *cs, TranslationBlock *tb,
759
+ int *max_insns, vaddr pc, void *host_pc)
270
{
760
{
271
DisasContext ctx;
761
DisasContext ctx;
762
763
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
764
index XXXXXXX..XXXXXXX 100644
765
--- a/target/riscv/tcg/tcg-cpu.c
766
+++ b/target/riscv/tcg/tcg-cpu.c
767
@@ -XXX,XX +XXX,XX @@ static void riscv_restore_state_to_opc(CPUState *cs,
768
769
static const TCGCPUOps riscv_tcg_ops = {
770
.initialize = riscv_translate_init,
771
+ .translate_code = riscv_translate_code,
772
.synchronize_from_tb = riscv_cpu_synchronize_from_tb,
773
.restore_state_to_opc = riscv_restore_state_to_opc,
774
272
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
775
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
273
index XXXXXXX..XXXXXXX 100644
776
index XXXXXXX..XXXXXXX 100644
274
--- a/target/riscv/translate.c
777
--- a/target/riscv/translate.c
275
+++ b/target/riscv/translate.c
778
+++ b/target/riscv/translate.c
276
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps riscv_tr_ops = {
779
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps riscv_tr_ops = {
277
.disas_log = riscv_tr_disas_log,
780
.tb_stop = riscv_tr_tb_stop,
278
};
781
};
279
782
280
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
783
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
281
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
784
- vaddr pc, void *host_pc)
282
target_ulong pc, void *host_pc)
785
+void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
786
+ int *max_insns, vaddr pc, void *host_pc)
283
{
787
{
284
DisasContext ctx;
788
DisasContext ctx;
789
790
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
791
index XXXXXXX..XXXXXXX 100644
792
--- a/target/rx/cpu.c
793
+++ b/target/rx/cpu.c
794
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps rx_sysemu_ops = {
795
796
static const TCGCPUOps rx_tcg_ops = {
797
.initialize = rx_translate_init,
798
+ .translate_code = rx_translate_code,
799
.synchronize_from_tb = rx_cpu_synchronize_from_tb,
800
.restore_state_to_opc = rx_restore_state_to_opc,
801
.tlb_fill = rx_cpu_tlb_fill,
285
diff --git a/target/rx/translate.c b/target/rx/translate.c
802
diff --git a/target/rx/translate.c b/target/rx/translate.c
286
index XXXXXXX..XXXXXXX 100644
803
index XXXXXXX..XXXXXXX 100644
287
--- a/target/rx/translate.c
804
--- a/target/rx/translate.c
288
+++ b/target/rx/translate.c
805
+++ b/target/rx/translate.c
289
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps rx_tr_ops = {
806
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps rx_tr_ops = {
290
.disas_log = rx_tr_disas_log,
807
.tb_stop = rx_tr_tb_stop,
291
};
808
};
292
809
293
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
810
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
294
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
811
- vaddr pc, void *host_pc)
295
target_ulong pc, void *host_pc)
812
+void rx_translate_code(CPUState *cs, TranslationBlock *tb,
813
+ int *max_insns, vaddr pc, void *host_pc)
296
{
814
{
297
DisasContext dc;
815
DisasContext dc;
816
817
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
818
index XXXXXXX..XXXXXXX 100644
819
--- a/target/s390x/cpu.c
820
+++ b/target/s390x/cpu.c
821
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc,
822
823
static const TCGCPUOps s390_tcg_ops = {
824
.initialize = s390x_translate_init,
825
+ .translate_code = s390x_translate_code,
826
.restore_state_to_opc = s390x_restore_state_to_opc,
827
828
#ifdef CONFIG_USER_ONLY
298
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
829
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
299
index XXXXXXX..XXXXXXX 100644
830
index XXXXXXX..XXXXXXX 100644
300
--- a/target/s390x/tcg/translate.c
831
--- a/target/s390x/tcg/translate.c
301
+++ b/target/s390x/tcg/translate.c
832
+++ b/target/s390x/tcg/translate.c
302
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps s390x_tr_ops = {
833
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps s390x_tr_ops = {
303
.disas_log = s390x_tr_disas_log,
834
.disas_log = s390x_tr_disas_log,
304
};
835
};
305
836
306
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
837
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
307
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
838
- vaddr pc, void *host_pc)
308
target_ulong pc, void *host_pc)
839
+void s390x_translate_code(CPUState *cs, TranslationBlock *tb,
840
+ int *max_insns, vaddr pc, void *host_pc)
309
{
841
{
310
DisasContext dc;
842
DisasContext dc;
843
844
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
845
index XXXXXXX..XXXXXXX 100644
846
--- a/target/sh4/cpu.c
847
+++ b/target/sh4/cpu.c
848
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps sh4_sysemu_ops = {
849
850
static const TCGCPUOps superh_tcg_ops = {
851
.initialize = sh4_translate_init,
852
+ .translate_code = sh4_translate_code,
853
.synchronize_from_tb = superh_cpu_synchronize_from_tb,
854
.restore_state_to_opc = superh_restore_state_to_opc,
855
311
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
856
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
312
index XXXXXXX..XXXXXXX 100644
857
index XXXXXXX..XXXXXXX 100644
313
--- a/target/sh4/translate.c
858
--- a/target/sh4/translate.c
314
+++ b/target/sh4/translate.c
859
+++ b/target/sh4/translate.c
315
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sh4_tr_ops = {
860
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sh4_tr_ops = {
316
.disas_log = sh4_tr_disas_log,
861
.tb_stop = sh4_tr_tb_stop,
317
};
862
};
318
863
319
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
864
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
320
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
865
- vaddr pc, void *host_pc)
321
target_ulong pc, void *host_pc)
866
+void sh4_translate_code(CPUState *cs, TranslationBlock *tb,
867
+ int *max_insns, vaddr pc, void *host_pc)
322
{
868
{
323
DisasContext ctx;
869
DisasContext ctx;
870
871
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
872
index XXXXXXX..XXXXXXX 100644
873
--- a/target/sparc/cpu.c
874
+++ b/target/sparc/cpu.c
875
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps sparc_sysemu_ops = {
876
877
static const TCGCPUOps sparc_tcg_ops = {
878
.initialize = sparc_tcg_init,
879
+ .translate_code = sparc_translate_code,
880
.synchronize_from_tb = sparc_cpu_synchronize_from_tb,
881
.restore_state_to_opc = sparc_restore_state_to_opc,
882
324
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
883
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
325
index XXXXXXX..XXXXXXX 100644
884
index XXXXXXX..XXXXXXX 100644
326
--- a/target/sparc/translate.c
885
--- a/target/sparc/translate.c
327
+++ b/target/sparc/translate.c
886
+++ b/target/sparc/translate.c
328
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sparc_tr_ops = {
887
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sparc_tr_ops = {
329
.disas_log = sparc_tr_disas_log,
888
.tb_stop = sparc_tr_tb_stop,
330
};
889
};
331
890
332
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
891
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
333
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
892
- vaddr pc, void *host_pc)
334
target_ulong pc, void *host_pc)
893
+void sparc_translate_code(CPUState *cs, TranslationBlock *tb,
894
+ int *max_insns, vaddr pc, void *host_pc)
335
{
895
{
336
DisasContext dc = {};
896
DisasContext dc = {};
897
898
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
899
index XXXXXXX..XXXXXXX 100644
900
--- a/target/tricore/cpu.c
901
+++ b/target/tricore/cpu.c
902
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps tricore_sysemu_ops = {
903
904
static const TCGCPUOps tricore_tcg_ops = {
905
.initialize = tricore_tcg_init,
906
+ .translate_code = tricore_translate_code,
907
.synchronize_from_tb = tricore_cpu_synchronize_from_tb,
908
.restore_state_to_opc = tricore_restore_state_to_opc,
909
.tlb_fill = tricore_cpu_tlb_fill,
337
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
910
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
338
index XXXXXXX..XXXXXXX 100644
911
index XXXXXXX..XXXXXXX 100644
339
--- a/target/tricore/translate.c
912
--- a/target/tricore/translate.c
340
+++ b/target/tricore/translate.c
913
+++ b/target/tricore/translate.c
341
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps tricore_tr_ops = {
914
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps tricore_tr_ops = {
342
};
915
.tb_stop = tricore_tr_tb_stop,
343
916
};
344
917
345
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
918
-
346
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
919
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
347
target_ulong pc, void *host_pc)
920
- vaddr pc, void *host_pc)
921
+void tricore_translate_code(CPUState *cs, TranslationBlock *tb,
922
+ int *max_insns, vaddr pc, void *host_pc)
348
{
923
{
349
DisasContext ctx;
924
DisasContext ctx;
925
translator_loop(cs, tb, max_insns, pc, host_pc,
926
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
927
index XXXXXXX..XXXXXXX 100644
928
--- a/target/xtensa/cpu.c
929
+++ b/target/xtensa/cpu.c
930
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps xtensa_sysemu_ops = {
931
932
static const TCGCPUOps xtensa_tcg_ops = {
933
.initialize = xtensa_translate_init,
934
+ .translate_code = xtensa_translate_code,
935
.debug_excp_handler = xtensa_breakpoint_handler,
936
.restore_state_to_opc = xtensa_restore_state_to_opc,
937
350
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
938
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
351
index XXXXXXX..XXXXXXX 100644
939
index XXXXXXX..XXXXXXX 100644
352
--- a/target/xtensa/translate.c
940
--- a/target/xtensa/translate.c
353
+++ b/target/xtensa/translate.c
941
+++ b/target/xtensa/translate.c
354
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps xtensa_translator_ops = {
942
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps xtensa_translator_ops = {
355
.disas_log = xtensa_tr_disas_log,
943
.tb_stop = xtensa_tr_tb_stop,
356
};
944
};
357
945
358
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
946
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
359
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
947
- vaddr pc, void *host_pc)
360
target_ulong pc, void *host_pc)
948
+void xtensa_translate_code(CPUState *cpu, TranslationBlock *tb,
949
+ int *max_insns, vaddr pc, void *host_pc)
361
{
950
{
362
DisasContext dc = {};
951
DisasContext dc = {};
952
translator_loop(cpu, tb, max_insns, pc, host_pc,
363
--
953
--
364
2.34.1
954
2.43.0
365
955
366
956
diff view generated by jsdifflib