1
The following changes since commit aa3a285b5bc56a4208b3b57d4a55291e9c260107:
1
The following changes since commit a9fe9e191b4305b88c356a1ed9ac3baf89eb18aa:
2
2
3
Merge tag 'mem-2024-12-21' of https://github.com/davidhildenbrand/qemu into staging (2024-12-22 14:33:27 -0500)
3
Merge tag 'pull-riscv-to-apply-20230505-1' of https://github.com/alistair23/qemu into staging (2023-05-05 09:25:13 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20241224
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230505
8
8
9
for you to fetch changes up to e4a8e093dc74be049f4829831dce76e5edab0003:
9
for you to fetch changes up to 35a0bd63b458f30389b6bc6b7471c1665fe7b9d8:
10
10
11
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core (2024-12-24 08:32:15 -0800)
11
tcg: Widen helper_*_st[bw]_mmu val arguments (2023-05-05 17:21:03 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
tcg/optimize: Remove in-flight mask data from OptContext
14
softfloat: Fix the incorrect computation in float32_exp2
15
fpu: Add float*_muladd_scalbn
15
tcg: Remove compatability helpers for qemu ld/st
16
fpu: Remove float_muladd_halve_result
16
target/alpha: Remove TARGET_ALIGNED_ONLY
17
fpu: Add float_round_nearest_even_max
17
target/hppa: Remove TARGET_ALIGNED_ONLY
18
fpu: Add float_muladd_suppress_add_product_zero
18
target/sparc: Remove TARGET_ALIGNED_ONLY
19
target/hexagon: Use float32_muladd
19
tcg: Cleanups preparing to unify calls to qemu_ld/st helpers
20
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core
21
20
22
----------------------------------------------------------------
21
----------------------------------------------------------------
23
Ilya Leoshkevich (1):
22
Richard Henderson (41):
24
tests/tcg: Do not use inttypes.h in multiarch/system/memory.c
23
target/avr: Finish conversion to tcg_gen_qemu_{ld,st}_*
24
target/cris: Finish conversion to tcg_gen_qemu_{ld,st}_*
25
target/Hexagon: Finish conversion to tcg_gen_qemu_{ld, st}_*
26
target/m68k: Finish conversion to tcg_gen_qemu_{ld,st}_*
27
target/mips: Finish conversion to tcg_gen_qemu_{ld,st}_*
28
target/s390x: Finish conversion to tcg_gen_qemu_{ld, st}_*
29
target/sparc: Finish conversion to tcg_gen_qemu_{ld, st}_*
30
target/xtensa: Finish conversion to tcg_gen_qemu_{ld, st}_*
31
tcg: Remove compatability helpers for qemu ld/st
32
target/alpha: Use MO_ALIGN for system UNALIGN()
33
target/alpha: Use MO_ALIGN where required
34
target/alpha: Remove TARGET_ALIGNED_ONLY
35
target/hppa: Use MO_ALIGN for system UNALIGN()
36
target/hppa: Remove TARGET_ALIGNED_ONLY
37
target/sparc: Use MO_ALIGN where required
38
target/sparc: Use cpu_ld*_code_mmu
39
target/sparc: Remove TARGET_ALIGNED_ONLY
40
tcg/i386: Rationalize args to tcg_out_qemu_{ld,st}
41
tcg/i386: Generalize multi-part load overlap test
42
tcg/i386: Introduce HostAddress
43
tcg/i386: Drop r0+r1 local variables from tcg_out_tlb_load
44
tcg/i386: Introduce tcg_out_testi
45
tcg/aarch64: Rationalize args to tcg_out_qemu_{ld,st}
46
tcg/aarch64: Introduce HostAddress
47
tcg/arm: Rationalize args to tcg_out_qemu_{ld,st}
48
tcg/arm: Introduce HostAddress
49
tcg/loongarch64: Rationalize args to tcg_out_qemu_{ld,st}
50
tcg/loongarch64: Introduce HostAddress
51
tcg/mips: Rationalize args to tcg_out_qemu_{ld,st}
52
tcg/ppc: Rationalize args to tcg_out_qemu_{ld,st}
53
tcg/ppc: Introduce HostAddress
54
tcg/riscv: Require TCG_TARGET_REG_BITS == 64
55
tcg/riscv: Rationalize args to tcg_out_qemu_{ld,st}
56
tcg/s390x: Pass TCGType to tcg_out_qemu_{ld,st}
57
tcg/s390x: Introduce HostAddress
58
tcg/sparc64: Drop is_64 test from tcg_out_qemu_ld data return
59
tcg/sparc64: Pass TCGType to tcg_out_qemu_{ld,st}
60
tcg: Move TCGLabelQemuLdst to tcg.c
61
tcg: Replace REG_P with arg_loc_reg_p
62
tcg: Introduce arg_slot_stk_ofs
63
tcg: Widen helper_*_st[bw]_mmu val arguments
25
64
26
Pierrick Bouvier (1):
65
Shivaprasad G Bhat (1):
27
plugins: optimize cpu_index code generation
66
softfloat: Fix the incorrect computation in float32_exp2
28
67
29
Richard Henderson (70):
68
configs/targets/alpha-linux-user.mak | 1 -
30
tcg/optimize: Split out finish_bb, finish_ebb
69
configs/targets/alpha-softmmu.mak | 1 -
31
tcg/optimize: Split out fold_affected_mask
70
configs/targets/hppa-linux-user.mak | 1 -
32
tcg/optimize: Copy mask writeback to fold_masks
71
configs/targets/hppa-softmmu.mak | 1 -
33
tcg/optimize: Split out fold_masks_zs
72
configs/targets/sparc-linux-user.mak | 1 -
34
tcg/optimize: Augment s_mask from z_mask in fold_masks_zs
73
configs/targets/sparc-softmmu.mak | 1 -
35
tcg/optimize: Change representation of s_mask
74
configs/targets/sparc32plus-linux-user.mak | 1 -
36
tcg/optimize: Use finish_folding in fold_add, fold_add_vec, fold_addsub2
75
configs/targets/sparc64-linux-user.mak | 1 -
37
tcg/optimize: Introduce const value accessors for TempOptInfo
76
configs/targets/sparc64-softmmu.mak | 1 -
38
tcg/optimize: Use fold_masks_zs in fold_and
77
include/tcg/tcg-ldst.h | 10 +-
39
tcg/optimize: Use fold_masks_zs in fold_andc
78
include/tcg/tcg-op.h | 55 -----
40
tcg/optimize: Use fold_masks_zs in fold_bswap
79
target/hexagon/macros.h | 14 +-
41
tcg/optimize: Use fold_masks_zs in fold_count_zeros
80
tcg/riscv/tcg-target-con-set.h | 8 -
42
tcg/optimize: Use fold_masks_z in fold_ctpop
81
tcg/riscv/tcg-target.h | 22 +-
43
tcg/optimize: Use fold_and and fold_masks_z in fold_deposit
82
tcg/tcg-internal.h | 4 -
44
tcg/optimize: Compute sign mask in fold_deposit
83
accel/tcg/cputlb.c | 6 +-
45
tcg/optimize: Use finish_folding in fold_divide
84
fpu/softfloat.c | 2 +-
46
tcg/optimize: Use finish_folding in fold_dup, fold_dup2
85
target/alpha/translate.c | 38 +--
47
tcg/optimize: Use fold_masks_s in fold_eqv
86
target/avr/translate.c | 16 +-
48
tcg/optimize: Use fold_masks_z in fold_extract
87
target/hexagon/genptr.c | 8 +-
49
tcg/optimize: Use finish_folding in fold_extract2
88
target/hexagon/idef-parser/parser-helpers.c | 28 +--
50
tcg/optimize: Use fold_masks_zs in fold_exts
89
target/hexagon/translate.c | 32 +--
51
tcg/optimize: Use fold_masks_z in fold_extu
90
target/hppa/translate.c | 2 +-
52
tcg/optimize: Use fold_masks_zs in fold_movcond
91
target/m68k/translate.c | 76 ++----
53
tcg/optimize: Use finish_folding in fold_mul*
92
target/mips/tcg/translate.c | 8 +-
54
tcg/optimize: Use fold_masks_s in fold_nand
93
target/s390x/tcg/translate.c | 152 ++++++------
55
tcg/optimize: Use fold_masks_z in fold_neg_no_const
94
target/sparc/ldst_helper.c | 10 +-
56
tcg/optimize: Use fold_masks_s in fold_nor
95
target/sparc/translate.c | 85 ++++---
57
tcg/optimize: Use fold_masks_s in fold_not
96
target/xtensa/translate.c | 4 +-
58
tcg/optimize: Use fold_masks_zs in fold_or
97
tcg/tcg.c | 58 +++--
59
tcg/optimize: Use fold_masks_zs in fold_orc
98
target/cris/translate_v10.c.inc | 18 +-
60
tcg/optimize: Use fold_masks_zs in fold_qemu_ld
99
target/mips/tcg/nanomips_translate.c.inc | 2 +-
61
tcg/optimize: Return true from fold_qemu_st, fold_tcg_st
100
tcg/aarch64/tcg-target.c.inc | 108 ++++++---
62
tcg/optimize: Use finish_folding in fold_remainder
101
tcg/arm/tcg-target.c.inc | 357 +++++++++++++---------------
63
tcg/optimize: Distinguish simplification in fold_setcond_zmask
102
tcg/i386/tcg-target.c.inc | 345 ++++++++++++++-------------
64
tcg/optimize: Use fold_masks_z in fold_setcond
103
tcg/loongarch64/tcg-target.c.inc | 135 +++++------
65
tcg/optimize: Use fold_masks_s in fold_negsetcond
104
tcg/mips/tcg-target.c.inc | 186 ++++++++-------
66
tcg/optimize: Use fold_masks_z in fold_setcond2
105
tcg/ppc/tcg-target.c.inc | 192 ++++++++-------
67
tcg/optimize: Use finish_folding in fold_cmp_vec
106
tcg/riscv/tcg-target.c.inc | 268 ++++++---------------
68
tcg/optimize: Use finish_folding in fold_cmpsel_vec
107
tcg/s390x/tcg-target.c.inc | 131 +++++-----
69
tcg/optimize: Use fold_masks_zs in fold_sextract
108
tcg/sparc64/tcg-target.c.inc | 8 +-
70
tcg/optimize: Use fold_masks_zs, fold_masks_s in fold_shift
109
tcg/tcg-ldst.c.inc | 14 --
71
tcg/optimize: Simplify sign bit test in fold_shift
110
42 files changed, 1120 insertions(+), 1291 deletions(-)
72
tcg/optimize: Use finish_folding in fold_sub, fold_sub_vec
73
tcg/optimize: Use fold_masks_zs in fold_tcg_ld
74
tcg/optimize: Use finish_folding in fold_tcg_ld_memcopy
75
tcg/optimize: Use fold_masks_zs in fold_xor
76
tcg/optimize: Use finish_folding in fold_bitsel_vec
77
tcg/optimize: Use finish_folding as default in tcg_optimize
78
tcg/optimize: Remove z_mask, s_mask from OptContext
79
tcg/optimize: Re-enable sign-mask optimizations
80
tcg/optimize: Move fold_bitsel_vec into alphabetic sort
81
tcg/optimize: Move fold_cmp_vec, fold_cmpsel_vec into alphabetic sort
82
softfloat: Add float{16,32,64}_muladd_scalbn
83
target/arm: Use float*_muladd_scalbn
84
target/sparc: Use float*_muladd_scalbn
85
softfloat: Remove float_muladd_halve_result
86
softfloat: Add float_round_nearest_even_max
87
softfloat: Add float_muladd_suppress_add_product_zero
88
target/hexagon: Use float32_mul in helper_sfmpy
89
target/hexagon: Use float32_muladd for helper_sffma
90
target/hexagon: Use float32_muladd for helper_sffms
91
target/hexagon: Use float32_muladd_scalbn for helper_sffma_sc
92
target/hexagon: Use float32_muladd for helper_sffm[as]_lib
93
target/hexagon: Remove internal_fmafx
94
target/hexagon: Expand GEN_XF_ROUND
95
target/hexagon: Remove Float
96
target/hexagon: Remove Double
97
target/hexagon: Use mulu64 for int128_mul_6464
98
target/hexagon: Simplify internal_mpyhh setup
99
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core
100
101
include/exec/translator.h | 14 -
102
include/fpu/softfloat-types.h | 2 +
103
include/fpu/softfloat.h | 14 +-
104
include/hw/core/tcg-cpu-ops.h | 13 +
105
target/alpha/cpu.h | 2 +
106
target/arm/internals.h | 2 +
107
target/avr/cpu.h | 2 +
108
target/hexagon/cpu.h | 2 +
109
target/hexagon/fma_emu.h | 3 -
110
target/hppa/cpu.h | 2 +
111
target/i386/tcg/helper-tcg.h | 2 +
112
target/loongarch/internals.h | 2 +
113
target/m68k/cpu.h | 2 +
114
target/microblaze/cpu.h | 2 +
115
target/mips/tcg/tcg-internal.h | 2 +
116
target/openrisc/cpu.h | 2 +
117
target/ppc/cpu.h | 2 +
118
target/riscv/cpu.h | 3 +
119
target/rx/cpu.h | 2 +
120
target/s390x/s390x-internal.h | 2 +
121
target/sh4/cpu.h | 2 +
122
target/sparc/cpu.h | 2 +
123
target/sparc/helper.h | 4 +-
124
target/tricore/cpu.h | 2 +
125
target/xtensa/cpu.h | 2 +
126
accel/tcg/cpu-exec.c | 8 +-
127
accel/tcg/plugin-gen.c | 9 +
128
accel/tcg/translate-all.c | 8 +-
129
fpu/softfloat.c | 63 +--
130
target/alpha/cpu.c | 1 +
131
target/alpha/translate.c | 4 +-
132
target/arm/cpu.c | 1 +
133
target/arm/tcg/cpu-v7m.c | 1 +
134
target/arm/tcg/helper-a64.c | 6 +-
135
target/arm/tcg/translate.c | 5 +-
136
target/avr/cpu.c | 1 +
137
target/avr/translate.c | 6 +-
138
target/hexagon/cpu.c | 1 +
139
target/hexagon/fma_emu.c | 496 ++++++---------------
140
target/hexagon/op_helper.c | 125 ++----
141
target/hexagon/translate.c | 4 +-
142
target/hppa/cpu.c | 1 +
143
target/hppa/translate.c | 4 +-
144
target/i386/tcg/tcg-cpu.c | 1 +
145
target/i386/tcg/translate.c | 5 +-
146
target/loongarch/cpu.c | 1 +
147
target/loongarch/tcg/translate.c | 4 +-
148
target/m68k/cpu.c | 1 +
149
target/m68k/translate.c | 4 +-
150
target/microblaze/cpu.c | 1 +
151
target/microblaze/translate.c | 4 +-
152
target/mips/cpu.c | 1 +
153
target/mips/tcg/translate.c | 4 +-
154
target/openrisc/cpu.c | 1 +
155
target/openrisc/translate.c | 4 +-
156
target/ppc/cpu_init.c | 1 +
157
target/ppc/translate.c | 4 +-
158
target/riscv/tcg/tcg-cpu.c | 1 +
159
target/riscv/translate.c | 4 +-
160
target/rx/cpu.c | 1 +
161
target/rx/translate.c | 4 +-
162
target/s390x/cpu.c | 1 +
163
target/s390x/tcg/translate.c | 4 +-
164
target/sh4/cpu.c | 1 +
165
target/sh4/translate.c | 4 +-
166
target/sparc/cpu.c | 1 +
167
target/sparc/fop_helper.c | 8 +-
168
target/sparc/translate.c | 84 ++--
169
target/tricore/cpu.c | 1 +
170
target/tricore/translate.c | 5 +-
171
target/xtensa/cpu.c | 1 +
172
target/xtensa/translate.c | 4 +-
173
tcg/optimize.c | 857 +++++++++++++++++++-----------------
174
tests/tcg/multiarch/system/memory.c | 9 +-
175
fpu/softfloat-parts.c.inc | 16 +-
176
75 files changed, 866 insertions(+), 1009 deletions(-)
diff view generated by jsdifflib
Deleted patch
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
2
1
3
make check-tcg fails on Fedora with the following error message:
4
5
alpha-linux-gnu-gcc [...] qemu/tests/tcg/multiarch/system/memory.c -o memory [...]
6
qemu/tests/tcg/multiarch/system/memory.c:17:10: fatal error: inttypes.h: No such file or directory
7
17 | #include <inttypes.h>
8
| ^~~~~~~~~~~~
9
compilation terminated.
10
11
The reason is that Fedora has cross-compilers, but no cross-glibc
12
headers. Fix by hardcoding the format specifiers and dropping the
13
include.
14
15
An alternative fix would be to introduce a configure check for
16
inttypes.h. But this would make it impossible to use Fedora
17
cross-compilers for softmmu tests, which used to work so far.
18
19
Fixes: ecbcc9ead2f8 ("tests/tcg: add a system test to check memory instrumentation")
20
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
21
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
22
Message-ID: <20241010085906.226249-1-iii@linux.ibm.com>
23
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
24
---
25
tests/tcg/multiarch/system/memory.c | 9 ++++-----
26
1 file changed, 4 insertions(+), 5 deletions(-)
27
28
diff --git a/tests/tcg/multiarch/system/memory.c b/tests/tcg/multiarch/system/memory.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tests/tcg/multiarch/system/memory.c
31
+++ b/tests/tcg/multiarch/system/memory.c
32
@@ -XXX,XX +XXX,XX @@
33
34
#include <stdint.h>
35
#include <stdbool.h>
36
-#include <inttypes.h>
37
#include <minilib.h>
38
39
#ifndef CHECK_UNALIGNED
40
@@ -XXX,XX +XXX,XX @@ int main(void)
41
int i;
42
bool ok = true;
43
44
- ml_printf("Test data start: 0x%"PRIxPTR"\n", &test_data[0]);
45
- ml_printf("Test data end: 0x%"PRIxPTR"\n", &test_data[TEST_SIZE]);
46
+ ml_printf("Test data start: 0x%lx\n", (unsigned long)&test_data[0]);
47
+ ml_printf("Test data end: 0x%lx\n", (unsigned long)&test_data[TEST_SIZE]);
48
49
/* Run through the unsigned tests first */
50
for (i = 0; i < ARRAY_SIZE(init_ufns) && ok; i++) {
51
@@ -XXX,XX +XXX,XX @@ int main(void)
52
ok = do_signed_reads(true);
53
}
54
55
- ml_printf("Test data read: %"PRId32"\n", test_read_count);
56
- ml_printf("Test data write: %"PRId32"\n", test_write_count);
57
+ ml_printf("Test data read: %lu\n", (unsigned long)test_read_count);
58
+ ml_printf("Test data write: %lu\n", (unsigned long)test_write_count);
59
ml_printf("Test complete: %s\n", ok ? "PASSED" : "FAILED");
60
return ok ? 0 : -1;
61
}
62
--
63
2.43.0
diff view generated by jsdifflib
Deleted patch
1
From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
1
3
When running with a single vcpu, we can return a constant instead of a
4
load when accessing cpu_index.
5
A side effect is that all tcg operations using it are optimized, most
6
notably scoreboard access.
7
When running a simple loop in user-mode, the speedup is around 20%.
8
9
Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-ID: <20241128213843.1023080-1-pierrick.bouvier@linaro.org>
13
---
14
accel/tcg/plugin-gen.c | 9 +++++++++
15
1 file changed, 9 insertions(+)
16
17
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/accel/tcg/plugin-gen.c
20
+++ b/accel/tcg/plugin-gen.c
21
@@ -XXX,XX +XXX,XX @@ static void gen_disable_mem_helper(void)
22
23
static TCGv_i32 gen_cpu_index(void)
24
{
25
+ /*
26
+ * Optimize when we run with a single vcpu. All values using cpu_index,
27
+ * including scoreboard index, will be optimized out.
28
+ * User-mode calls tb_flush when setting this flag. In system-mode, all
29
+ * vcpus are created before generating code.
30
+ */
31
+ if (!tcg_cflags_has(current_cpu, CF_PARALLEL)) {
32
+ return tcg_constant_i32(current_cpu->cpu_index);
33
+ }
34
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
35
tcg_gen_ld_i32(cpu_index, tcg_env,
36
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
37
--
38
2.43.0
diff view generated by jsdifflib
1
Certain Hexagon instructions suppress changes to the result
1
From: Shivaprasad G Bhat <sbhat@linux.ibm.com>
2
when the product of fma() is a true zero.
3
2
3
The float32_exp2 function is computing wrong exponent of 2.
4
5
For example, with the following set of values {0.1, 2.0, 2.0, -1.0},
6
the expected output would be {1.071773, 4.000000, 4.000000, 0.500000}.
7
Instead, the function is computing {1.119102, 3.382044, 3.382044, -0.191022}
8
9
Looking at the code, the float32_exp2() attempts to do this
10
11
2 3 4 5 n
12
x x x x x x x
13
e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
14
1! 2! 3! 4! 5! n!
15
16
But because of the typo it ends up doing
17
18
x x x x x x x
19
e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
20
1! 2! 3! 4! 5! n!
21
22
This is because instead of the xnp which holds the numerator, parts_muladd
23
is using the xp which is just 'x'. Commit '572c4d862ff2' refactored this
24
function, and mistakenly used xp instead of xnp.
25
26
Cc: qemu-stable@nongnu.org
27
Fixes: 572c4d862ff2 "softfloat: Convert float32_exp2 to FloatParts"
28
Partially-Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1623
29
Reported-By: Luca Barbato (https://gitlab.com/lu-zero)
30
Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
31
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
32
Message-Id: <168304110865.537992.13059030916325018670.stgit@localhost.localdomain>
33
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
34
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
35
---
6
include/fpu/softfloat.h | 5 +++++
36
fpu/softfloat.c | 2 +-
7
fpu/softfloat.c | 3 +++
37
1 file changed, 1 insertion(+), 1 deletion(-)
8
fpu/softfloat-parts.c.inc | 4 +++-
9
3 files changed, 11 insertions(+), 1 deletion(-)
10
38
11
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/include/fpu/softfloat.h
14
+++ b/include/fpu/softfloat.h
15
@@ -XXX,XX +XXX,XX @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
16
| Using these differs from negating an input or output before calling
17
| the muladd function in that this means that a NaN doesn't have its
18
| sign bit inverted before it is propagated.
19
+|
20
+| With float_muladd_suppress_add_product_zero, if A or B is zero
21
+| such that the product is a true zero, then return C without addition.
22
+| This preserves the sign of C when C is +/- 0. Used for Hexagon.
23
*----------------------------------------------------------------------------*/
24
enum {
25
float_muladd_negate_c = 1,
26
float_muladd_negate_product = 2,
27
float_muladd_negate_result = 4,
28
+ float_muladd_suppress_add_product_zero = 8,
29
};
30
31
/*----------------------------------------------------------------------------
32
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
39
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
33
index XXXXXXX..XXXXXXX 100644
40
index XXXXXXX..XXXXXXX 100644
34
--- a/fpu/softfloat.c
41
--- a/fpu/softfloat.c
35
+++ b/fpu/softfloat.c
42
+++ b/fpu/softfloat.c
36
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
43
@@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status)
37
if (unlikely(!can_use_fpu(s))) {
44
float64_unpack_canonical(&rp, float64_one, status);
38
goto soft;
45
for (i = 0 ; i < 15 ; i++) {
46
float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
47
- rp = *parts_muladd(&tp, &xp, &rp, 0, status);
48
+ rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
49
xnp = *parts_mul(&xnp, &xp, status);
39
}
50
}
40
+ if (unlikely(flags & float_muladd_suppress_add_product_zero)) {
51
41
+ goto soft;
42
+ }
43
44
float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
45
if (unlikely(!f32_is_zon3(ua, ub, uc))) {
46
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
47
index XXXXXXX..XXXXXXX 100644
48
--- a/fpu/softfloat-parts.c.inc
49
+++ b/fpu/softfloat-parts.c.inc
50
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
51
goto return_normal;
52
}
53
if (c->cls == float_class_zero) {
54
- if (a->sign != c->sign) {
55
+ if (flags & float_muladd_suppress_add_product_zero) {
56
+ a->sign = c->sign;
57
+ } else if (a->sign != c->sign) {
58
goto return_sub_zero;
59
}
60
goto return_zero;
61
--
52
--
62
2.43.0
53
2.34.1
diff view generated by jsdifflib
1
There are multiple special cases for this instruction.
1
Convert away from the old interface with the implicit
2
(1) The saturate to normal maximum instead of overflow to infinity is
2
MemOp argument.
3
handled by the new float_round_nearest_even_max rounding mode.
4
(2) The 0 * n + c special case is handled by the new
5
float_muladd_suppress_add_product_zero flag.
6
(3) The Inf - Inf -> 0 special case can be detected after the fact
7
by examining float_flag_invalid_isi.
8
3
9
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Anton Johansson <anjo@rev.ng>
6
Message-Id: <20230502135741.1158035-2-richard.henderson@linaro.org>
11
---
7
---
12
target/hexagon/op_helper.c | 105 +++++++++----------------------------
8
target/avr/translate.c | 16 ++++++++--------
13
1 file changed, 26 insertions(+), 79 deletions(-)
9
1 file changed, 8 insertions(+), 8 deletions(-)
14
10
15
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
11
diff --git a/target/avr/translate.c b/target/avr/translate.c
16
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
17
--- a/target/hexagon/op_helper.c
13
--- a/target/avr/translate.c
18
+++ b/target/hexagon/op_helper.c
14
+++ b/target/avr/translate.c
19
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
15
@@ -XXX,XX +XXX,XX @@ static void gen_data_store(DisasContext *ctx, TCGv data, TCGv addr)
20
return RxV;
16
if (ctx->base.tb->flags & TB_FLAGS_FULL_ACCESS) {
17
gen_helper_fullwr(cpu_env, data, addr);
18
} else {
19
- tcg_gen_qemu_st8(data, addr, MMU_DATA_IDX); /* mem[addr] = data */
20
+ tcg_gen_qemu_st_tl(data, addr, MMU_DATA_IDX, MO_UB);
21
}
21
}
22
}
22
23
23
-static bool is_zero_prod(float32 a, float32 b)
24
@@ -XXX,XX +XXX,XX @@ static void gen_data_load(DisasContext *ctx, TCGv data, TCGv addr)
24
-{
25
if (ctx->base.tb->flags & TB_FLAGS_FULL_ACCESS) {
25
- return ((float32_is_zero(a) && is_finite(b)) ||
26
gen_helper_fullrd(data, cpu_env, addr);
26
- (float32_is_zero(b) && is_finite(a)));
27
} else {
27
-}
28
- tcg_gen_qemu_ld8u(data, addr, MMU_DATA_IDX); /* data = mem[addr] */
28
-
29
+ tcg_gen_qemu_ld_tl(data, addr, MMU_DATA_IDX, MO_UB);
29
-static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
30
}
30
-{
31
- float32 ret = dst;
32
- if (float32_is_any_nan(x)) {
33
- if (extract32(x, 22, 1) == 0) {
34
- float_raise(float_flag_invalid, fp_status);
35
- }
36
- ret = make_float32(0xffffffff); /* nan */
37
- }
38
- return ret;
39
-}
40
-
41
float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
42
float32 RsV, float32 RtV, float32 PuV)
43
{
44
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
45
return RxV;
46
}
31
}
47
32
48
-static bool is_inf_prod(int32_t a, int32_t b)
33
@@ -XXX,XX +XXX,XX @@ static bool trans_LPM1(DisasContext *ctx, arg_LPM1 *a)
49
+static float32 do_sffma_lib(CPUHexagonState *env, float32 RxV,
34
50
+ float32 RsV, float32 RtV, int negate)
35
tcg_gen_shli_tl(addr, H, 8); /* addr = H:L */
51
{
36
tcg_gen_or_tl(addr, addr, L);
52
- return (float32_is_infinity(a) && float32_is_infinity(b)) ||
37
- tcg_gen_qemu_ld8u(Rd, addr, MMU_CODE_IDX); /* Rd = mem[addr] */
53
- (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
38
+ tcg_gen_qemu_ld_tl(Rd, addr, MMU_CODE_IDX, MO_UB);
54
- (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
39
return true;
55
+ int flags;
56
+
57
+ arch_fpop_start(env);
58
+
59
+ set_float_rounding_mode(float_round_nearest_even_max, &env->fp_status);
60
+ RxV = float32_muladd(RsV, RtV, RxV,
61
+ negate | float_muladd_suppress_add_product_zero,
62
+ &env->fp_status);
63
+
64
+ flags = get_float_exception_flags(&env->fp_status);
65
+ if (flags) {
66
+ /* Flags are suppressed by this instruction. */
67
+ set_float_exception_flags(0, &env->fp_status);
68
+
69
+ /* Return 0 for Inf - Inf. */
70
+ if (flags & float_flag_invalid_isi) {
71
+ RxV = 0;
72
+ }
73
+ }
74
+
75
+ arch_fpop_end(env);
76
+ return RxV;
77
}
40
}
78
41
79
float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
42
@@ -XXX,XX +XXX,XX @@ static bool trans_LPM2(DisasContext *ctx, arg_LPM2 *a)
80
float32 RsV, float32 RtV)
43
81
{
44
tcg_gen_shli_tl(addr, H, 8); /* addr = H:L */
82
- bool infinp;
45
tcg_gen_or_tl(addr, addr, L);
83
- bool infminusinf;
46
- tcg_gen_qemu_ld8u(Rd, addr, MMU_CODE_IDX); /* Rd = mem[addr] */
84
- float32 tmp;
47
+ tcg_gen_qemu_ld_tl(Rd, addr, MMU_CODE_IDX, MO_UB);
85
-
48
return true;
86
- arch_fpop_start(env);
87
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
88
- infminusinf = float32_is_infinity(RxV) &&
89
- is_inf_prod(RsV, RtV) &&
90
- (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
91
- infinp = float32_is_infinity(RxV) ||
92
- float32_is_infinity(RtV) ||
93
- float32_is_infinity(RsV);
94
- RxV = check_nan(RxV, RxV, &env->fp_status);
95
- RxV = check_nan(RxV, RsV, &env->fp_status);
96
- RxV = check_nan(RxV, RtV, &env->fp_status);
97
- tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
98
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
99
- RxV = tmp;
100
- }
101
- set_float_exception_flags(0, &env->fp_status);
102
- if (float32_is_infinity(RxV) && !infinp) {
103
- RxV = RxV - 1;
104
- }
105
- if (infminusinf) {
106
- RxV = 0;
107
- }
108
- arch_fpop_end(env);
109
- return RxV;
110
+ return do_sffma_lib(env, RxV, RsV, RtV, 0);
111
}
49
}
112
50
113
float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
51
@@ -XXX,XX +XXX,XX @@ static bool trans_LPMX(DisasContext *ctx, arg_LPMX *a)
114
float32 RsV, float32 RtV)
52
115
{
53
tcg_gen_shli_tl(addr, H, 8); /* addr = H:L */
116
- bool infinp;
54
tcg_gen_or_tl(addr, addr, L);
117
- bool infminusinf;
55
- tcg_gen_qemu_ld8u(Rd, addr, MMU_CODE_IDX); /* Rd = mem[addr] */
118
- float32 tmp;
56
+ tcg_gen_qemu_ld_tl(Rd, addr, MMU_CODE_IDX, MO_UB);
119
-
57
tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */
120
- arch_fpop_start(env);
58
tcg_gen_andi_tl(L, addr, 0xff);
121
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
59
tcg_gen_shri_tl(addr, addr, 8);
122
- infminusinf = float32_is_infinity(RxV) &&
60
@@ -XXX,XX +XXX,XX @@ static bool trans_ELPM1(DisasContext *ctx, arg_ELPM1 *a)
123
- is_inf_prod(RsV, RtV) &&
61
TCGv Rd = cpu_r[0];
124
- (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
62
TCGv addr = gen_get_zaddr();
125
- infinp = float32_is_infinity(RxV) ||
63
126
- float32_is_infinity(RtV) ||
64
- tcg_gen_qemu_ld8u(Rd, addr, MMU_CODE_IDX); /* Rd = mem[addr] */
127
- float32_is_infinity(RsV);
65
+ tcg_gen_qemu_ld_tl(Rd, addr, MMU_CODE_IDX, MO_UB);
128
- RxV = check_nan(RxV, RxV, &env->fp_status);
66
return true;
129
- RxV = check_nan(RxV, RsV, &env->fp_status);
130
- RxV = check_nan(RxV, RtV, &env->fp_status);
131
- float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
132
- tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
133
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
134
- RxV = tmp;
135
- }
136
- set_float_exception_flags(0, &env->fp_status);
137
- if (float32_is_infinity(RxV) && !infinp) {
138
- RxV = RxV - 1;
139
- }
140
- if (infminusinf) {
141
- RxV = 0;
142
- }
143
- arch_fpop_end(env);
144
- return RxV;
145
+ return do_sffma_lib(env, RxV, RsV, RtV, float_muladd_negate_product);
146
}
67
}
147
68
148
float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
69
@@ -XXX,XX +XXX,XX @@ static bool trans_ELPM2(DisasContext *ctx, arg_ELPM2 *a)
70
TCGv Rd = cpu_r[a->rd];
71
TCGv addr = gen_get_zaddr();
72
73
- tcg_gen_qemu_ld8u(Rd, addr, MMU_CODE_IDX); /* Rd = mem[addr] */
74
+ tcg_gen_qemu_ld_tl(Rd, addr, MMU_CODE_IDX, MO_UB);
75
return true;
76
}
77
78
@@ -XXX,XX +XXX,XX @@ static bool trans_ELPMX(DisasContext *ctx, arg_ELPMX *a)
79
TCGv Rd = cpu_r[a->rd];
80
TCGv addr = gen_get_zaddr();
81
82
- tcg_gen_qemu_ld8u(Rd, addr, MMU_CODE_IDX); /* Rd = mem[addr] */
83
+ tcg_gen_qemu_ld_tl(Rd, addr, MMU_CODE_IDX, MO_UB);
84
tcg_gen_addi_tl(addr, addr, 1); /* addr = addr + 1 */
85
gen_set_zaddr(addr);
86
return true;
149
--
87
--
150
2.43.0
88
2.34.1
diff view generated by jsdifflib
1
All instances of s_mask have been converted to the new
1
Convert away from the old interface with the implicit
2
representation. We can now re-enable usage.
2
MemOp argument. In this case we can fold the calls
3
using the size bits of MemOp.
3
4
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Anton Johansson <anjo@rev.ng>
7
Message-Id: <20230502135741.1158035-3-richard.henderson@linaro.org>
6
---
8
---
7
tcg/optimize.c | 4 ++--
9
target/cris/translate_v10.c.inc | 18 ++++--------------
8
1 file changed, 2 insertions(+), 2 deletions(-)
10
1 file changed, 4 insertions(+), 14 deletions(-)
9
11
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/target/cris/translate_v10.c.inc b/target/cris/translate_v10.c.inc
11
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
14
--- a/target/cris/translate_v10.c.inc
13
+++ b/tcg/optimize.c
15
+++ b/target/cris/translate_v10.c.inc
14
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ static void gen_store_v10_conditional(DisasContext *dc, TCGv addr, TCGv val,
15
g_assert_not_reached();
17
/* Store only if F flag isn't set */
18
tcg_gen_andi_tl(t1, cpu_PR[PR_CCS], F_FLAG_V10);
19
tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
20
- if (size == 1) {
21
- tcg_gen_qemu_st8(tval, taddr, mem_index);
22
- } else if (size == 2) {
23
- tcg_gen_qemu_st16(tval, taddr, mem_index);
24
- } else {
25
- tcg_gen_qemu_st32(tval, taddr, mem_index);
26
- }
27
+
28
+ tcg_gen_qemu_st_tl(tval, taddr, mem_index, ctz32(size) | MO_TE);
29
+
30
gen_set_label(l1);
31
tcg_gen_shri_tl(t1, t1, 1); /* shift F to P position */
32
tcg_gen_or_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], t1); /*P=F*/
33
@@ -XXX,XX +XXX,XX @@ static void gen_store_v10(DisasContext *dc, TCGv addr, TCGv val,
34
return;
16
}
35
}
17
36
18
- if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
37
- if (size == 1) {
19
+ if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
38
- tcg_gen_qemu_st8(val, addr, mem_index);
20
return true;
39
- } else if (size == 2) {
21
}
40
- tcg_gen_qemu_st16(val, addr, mem_index);
22
41
- } else {
23
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
42
- tcg_gen_qemu_st32(val, addr, mem_index);
24
s_mask = s_mask_old >> pos;
43
- }
25
s_mask |= -1ull << (len - 1);
44
+ tcg_gen_qemu_st_tl(val, addr, mem_index, ctz32(size) | MO_TE);
26
45
}
27
- if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
46
28
+ if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
29
return true;
30
}
31
47
32
--
48
--
33
2.43.0
49
2.34.1
diff view generated by jsdifflib
1
All non-default cases now finish folding within each function.
1
Convert away from the old interface with the implicit
2
Do the same with the default case and assert it is done after.
2
MemOp argument. Importantly, this removes some incorrect
3
casts generated by idef-parser's gen_load().
3
4
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Tested-by: Taylor Simpson <tsimpson@quicinc.com>
7
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
8
Reviewed-by: Anton Johansson <anjo@rev.ng>
9
Message-Id: <20230502135741.1158035-4-richard.henderson@linaro.org>
6
---
10
---
7
tcg/optimize.c | 6 ++----
11
target/hexagon/macros.h | 14 ++++-----
8
1 file changed, 2 insertions(+), 4 deletions(-)
12
target/hexagon/genptr.c | 8 +++---
13
target/hexagon/idef-parser/parser-helpers.c | 28 +++++++++---------
14
target/hexagon/translate.c | 32 ++++++++++-----------
15
4 files changed, 40 insertions(+), 42 deletions(-)
9
16
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
17
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
11
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
19
--- a/target/hexagon/macros.h
13
+++ b/tcg/optimize.c
20
+++ b/target/hexagon/macros.h
14
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
21
@@ -XXX,XX +XXX,XX @@
15
done = true;
22
#define MEM_LOAD1s(DST, VA) \
23
do { \
24
CHECK_NOSHUF(VA, 1); \
25
- tcg_gen_qemu_ld8s(DST, VA, ctx->mem_idx); \
26
+ tcg_gen_qemu_ld_tl(DST, VA, ctx->mem_idx, MO_SB); \
27
} while (0)
28
#define MEM_LOAD1u(DST, VA) \
29
do { \
30
CHECK_NOSHUF(VA, 1); \
31
- tcg_gen_qemu_ld8u(DST, VA, ctx->mem_idx); \
32
+ tcg_gen_qemu_ld_tl(DST, VA, ctx->mem_idx, MO_UB); \
33
} while (0)
34
#define MEM_LOAD2s(DST, VA) \
35
do { \
36
CHECK_NOSHUF(VA, 2); \
37
- tcg_gen_qemu_ld16s(DST, VA, ctx->mem_idx); \
38
+ tcg_gen_qemu_ld_tl(DST, VA, ctx->mem_idx, MO_TESW); \
39
} while (0)
40
#define MEM_LOAD2u(DST, VA) \
41
do { \
42
CHECK_NOSHUF(VA, 2); \
43
- tcg_gen_qemu_ld16u(DST, VA, ctx->mem_idx); \
44
+ tcg_gen_qemu_ld_tl(DST, VA, ctx->mem_idx, MO_TEUW); \
45
} while (0)
46
#define MEM_LOAD4s(DST, VA) \
47
do { \
48
CHECK_NOSHUF(VA, 4); \
49
- tcg_gen_qemu_ld32s(DST, VA, ctx->mem_idx); \
50
+ tcg_gen_qemu_ld_tl(DST, VA, ctx->mem_idx, MO_TESL); \
51
} while (0)
52
#define MEM_LOAD4u(DST, VA) \
53
do { \
54
CHECK_NOSHUF(VA, 4); \
55
- tcg_gen_qemu_ld32s(DST, VA, ctx->mem_idx); \
56
+ tcg_gen_qemu_ld_tl(DST, VA, ctx->mem_idx, MO_TEUL); \
57
} while (0)
58
#define MEM_LOAD8u(DST, VA) \
59
do { \
60
CHECK_NOSHUF(VA, 8); \
61
- tcg_gen_qemu_ld64(DST, VA, ctx->mem_idx); \
62
+ tcg_gen_qemu_ld_i64(DST, VA, ctx->mem_idx, MO_TEUQ); \
63
} while (0)
64
65
#define MEM_STORE1_FUNC(X) \
66
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/target/hexagon/genptr.c
69
+++ b/target/hexagon/genptr.c
70
@@ -XXX,XX +XXX,XX @@ void gen_set_byte_i64(int N, TCGv_i64 result, TCGv src)
71
72
static inline void gen_load_locked4u(TCGv dest, TCGv vaddr, int mem_index)
73
{
74
- tcg_gen_qemu_ld32u(dest, vaddr, mem_index);
75
+ tcg_gen_qemu_ld_tl(dest, vaddr, mem_index, MO_TEUL);
76
tcg_gen_mov_tl(hex_llsc_addr, vaddr);
77
tcg_gen_mov_tl(hex_llsc_val, dest);
78
}
79
80
static inline void gen_load_locked8u(TCGv_i64 dest, TCGv vaddr, int mem_index)
81
{
82
- tcg_gen_qemu_ld64(dest, vaddr, mem_index);
83
+ tcg_gen_qemu_ld_i64(dest, vaddr, mem_index, MO_TEUQ);
84
tcg_gen_mov_tl(hex_llsc_addr, vaddr);
85
tcg_gen_mov_i64(hex_llsc_val_i64, dest);
86
}
87
@@ -XXX,XX +XXX,XX @@ static void gen_load_frame(DisasContext *ctx, TCGv_i64 frame, TCGv EA)
88
{
89
Insn *insn = ctx->insn; /* Needed for CHECK_NOSHUF */
90
CHECK_NOSHUF(EA, 8);
91
- tcg_gen_qemu_ld64(frame, EA, ctx->mem_idx);
92
+ tcg_gen_qemu_ld_i64(frame, EA, ctx->mem_idx, MO_TEUQ);
93
}
94
95
static void gen_return(DisasContext *ctx, TCGv_i64 dst, TCGv src)
96
@@ -XXX,XX +XXX,XX @@ static void gen_vreg_load(DisasContext *ctx, intptr_t dstoff, TCGv src,
97
tcg_gen_andi_tl(src, src, ~((int32_t)sizeof(MMVector) - 1));
98
}
99
for (int i = 0; i < sizeof(MMVector) / 8; i++) {
100
- tcg_gen_qemu_ld64(tmp, src, ctx->mem_idx);
101
+ tcg_gen_qemu_ld_i64(tmp, src, ctx->mem_idx, MO_TEUQ);
102
tcg_gen_addi_tl(src, src, 8);
103
tcg_gen_st_i64(tmp, cpu_env, dstoff + i * 8);
104
}
105
diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c
106
index XXXXXXX..XXXXXXX 100644
107
--- a/target/hexagon/idef-parser/parser-helpers.c
108
+++ b/target/hexagon/idef-parser/parser-helpers.c
109
@@ -XXX,XX +XXX,XX @@ void gen_load_cancel(Context *c, YYLTYPE *locp)
110
void gen_load(Context *c, YYLTYPE *locp, HexValue *width,
111
HexSignedness signedness, HexValue *ea, HexValue *dst)
112
{
113
- char size_suffix[4] = {0};
114
- const char *sign_suffix;
115
+ unsigned dst_bit_width;
116
+ unsigned src_bit_width;
117
+
118
/* Memop width is specified in the load macro */
119
assert_signedness(c, locp, signedness);
120
- sign_suffix = (width->imm.value > 4)
121
- ? ""
122
- : ((signedness == UNSIGNED) ? "u" : "s");
123
+
124
/* If dst is a variable, assert that is declared and load the type info */
125
if (dst->type == VARID) {
126
find_variable(c, locp, dst, dst);
127
}
128
129
- snprintf(size_suffix, 4, "%" PRIu64, width->imm.value * 8);
130
+ src_bit_width = width->imm.value * 8;
131
+ dst_bit_width = MAX(dst->bit_width, 32);
132
+
133
/* Lookup the effective address EA */
134
find_variable(c, locp, ea, ea);
135
OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_store_s1) {\n");
136
OUT(c, locp, "probe_noshuf_load(", ea, ", ", width, ", ctx->mem_idx);\n");
137
OUT(c, locp, "process_store(ctx, 1);\n");
138
OUT(c, locp, "}\n");
139
- OUT(c, locp, "tcg_gen_qemu_ld", size_suffix, sign_suffix);
140
+
141
+ OUT(c, locp, "tcg_gen_qemu_ld_i", &dst_bit_width);
142
OUT(c, locp, "(");
143
- if (dst->bit_width > width->imm.value * 8) {
144
- /*
145
- * Cast to the correct TCG type if necessary, to avoid implict cast
146
- * warnings. This is needed when the width of the destination var is
147
- * larger than the size of the requested load.
148
- */
149
- OUT(c, locp, "(TCGv) ");
150
+ OUT(c, locp, dst, ", ", ea, ", ctx->mem_idx, MO_", &src_bit_width);
151
+ if (signedness == SIGNED) {
152
+ OUT(c, locp, " | MO_SIGN");
153
}
154
- OUT(c, locp, dst, ", ", ea, ", ctx->mem_idx);\n");
155
+ OUT(c, locp, " | MO_TE);\n");
156
}
157
158
void gen_store(Context *c, YYLTYPE *locp, HexValue *width, HexValue *ea,
159
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
160
index XXXXXXX..XXXXXXX 100644
161
--- a/target/hexagon/translate.c
162
+++ b/target/hexagon/translate.c
163
@@ -XXX,XX +XXX,XX @@ void process_store(DisasContext *ctx, int slot_num)
164
switch (ctx->store_width[slot_num]) {
165
case 1:
166
gen_check_store_width(ctx, slot_num);
167
- tcg_gen_qemu_st8(hex_store_val32[slot_num],
168
- hex_store_addr[slot_num],
169
- ctx->mem_idx);
170
+ tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
171
+ hex_store_addr[slot_num],
172
+ ctx->mem_idx, MO_UB);
173
break;
174
case 2:
175
gen_check_store_width(ctx, slot_num);
176
- tcg_gen_qemu_st16(hex_store_val32[slot_num],
177
- hex_store_addr[slot_num],
178
- ctx->mem_idx);
179
+ tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
180
+ hex_store_addr[slot_num],
181
+ ctx->mem_idx, MO_TEUW);
182
break;
183
case 4:
184
gen_check_store_width(ctx, slot_num);
185
- tcg_gen_qemu_st32(hex_store_val32[slot_num],
186
- hex_store_addr[slot_num],
187
- ctx->mem_idx);
188
+ tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
189
+ hex_store_addr[slot_num],
190
+ ctx->mem_idx, MO_TEUL);
191
break;
192
case 8:
193
gen_check_store_width(ctx, slot_num);
194
- tcg_gen_qemu_st64(hex_store_val64[slot_num],
195
- hex_store_addr[slot_num],
196
- ctx->mem_idx);
197
+ tcg_gen_qemu_st_i64(hex_store_val64[slot_num],
198
+ hex_store_addr[slot_num],
199
+ ctx->mem_idx, MO_TEUQ);
16
break;
200
break;
17
default:
201
default:
18
+ done = finish_folding(&ctx, op);
202
{
19
break;
203
@@ -XXX,XX +XXX,XX @@ static void process_dczeroa(DisasContext *ctx)
20
}
204
TCGv_i64 zero = tcg_constant_i64(0);
21
-
205
22
- if (!done) {
206
tcg_gen_andi_tl(addr, hex_dczero_addr, ~0x1f);
23
- finish_folding(&ctx, op);
207
- tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
24
- }
208
+ tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
25
+ tcg_debug_assert(done);
209
tcg_gen_addi_tl(addr, addr, 8);
26
}
210
- tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
27
}
211
+ tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
212
tcg_gen_addi_tl(addr, addr, 8);
213
- tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
214
+ tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
215
tcg_gen_addi_tl(addr, addr, 8);
216
- tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
217
+ tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
218
}
219
}
220
28
--
221
--
29
2.43.0
222
2.34.1
diff view generated by jsdifflib
1
Introduce ti_is_const, ti_const_val, ti_is_const_val.
1
Convert away from the old interface with the implicit
2
MemOp argument.
2
3
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Anton Johansson <anjo@rev.ng>
6
Message-Id: <20230502135741.1158035-5-richard.henderson@linaro.org>
4
---
7
---
5
tcg/optimize.c | 20 +++++++++++++++++---
8
target/m68k/translate.c | 76 ++++++++++++++---------------------------
6
1 file changed, 17 insertions(+), 3 deletions(-)
9
1 file changed, 25 insertions(+), 51 deletions(-)
7
10
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
9
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
13
--- a/target/m68k/translate.c
11
+++ b/tcg/optimize.c
14
+++ b/target/m68k/translate.c
12
@@ -XXX,XX +XXX,XX @@ static inline TempOptInfo *arg_info(TCGArg arg)
15
@@ -XXX,XX +XXX,XX @@ static inline void gen_addr_fault(DisasContext *s)
13
return ts_info(arg_temp(arg));
16
static inline TCGv gen_load(DisasContext *s, int opsize, TCGv addr,
17
int sign, int index)
18
{
19
- TCGv tmp;
20
- tmp = tcg_temp_new_i32();
21
- switch(opsize) {
22
+ TCGv tmp = tcg_temp_new_i32();
23
+
24
+ switch (opsize) {
25
case OS_BYTE:
26
- if (sign)
27
- tcg_gen_qemu_ld8s(tmp, addr, index);
28
- else
29
- tcg_gen_qemu_ld8u(tmp, addr, index);
30
- break;
31
case OS_WORD:
32
- if (sign)
33
- tcg_gen_qemu_ld16s(tmp, addr, index);
34
- else
35
- tcg_gen_qemu_ld16u(tmp, addr, index);
36
- break;
37
case OS_LONG:
38
- tcg_gen_qemu_ld32u(tmp, addr, index);
39
+ tcg_gen_qemu_ld_tl(tmp, addr, index,
40
+ opsize | (sign ? MO_SIGN : 0) | MO_TE);
41
break;
42
default:
43
g_assert_not_reached();
44
@@ -XXX,XX +XXX,XX @@ static inline TCGv gen_load(DisasContext *s, int opsize, TCGv addr,
45
static inline void gen_store(DisasContext *s, int opsize, TCGv addr, TCGv val,
46
int index)
47
{
48
- switch(opsize) {
49
+ switch (opsize) {
50
case OS_BYTE:
51
- tcg_gen_qemu_st8(val, addr, index);
52
- break;
53
case OS_WORD:
54
- tcg_gen_qemu_st16(val, addr, index);
55
- break;
56
case OS_LONG:
57
- tcg_gen_qemu_st32(val, addr, index);
58
+ tcg_gen_qemu_st_tl(val, addr, index, opsize | MO_TE);
59
break;
60
default:
61
g_assert_not_reached();
62
@@ -XXX,XX +XXX,XX @@ static void gen_load_fp(DisasContext *s, int opsize, TCGv addr, TCGv_ptr fp,
63
tmp = tcg_temp_new();
64
switch (opsize) {
65
case OS_BYTE:
66
- tcg_gen_qemu_ld8s(tmp, addr, index);
67
- gen_helper_exts32(cpu_env, fp, tmp);
68
- break;
69
case OS_WORD:
70
- tcg_gen_qemu_ld16s(tmp, addr, index);
71
- gen_helper_exts32(cpu_env, fp, tmp);
72
- break;
73
- case OS_LONG:
74
- tcg_gen_qemu_ld32u(tmp, addr, index);
75
+ tcg_gen_qemu_ld_tl(tmp, addr, index, opsize | MO_SIGN | MO_TE);
76
gen_helper_exts32(cpu_env, fp, tmp);
77
break;
78
case OS_SINGLE:
79
- tcg_gen_qemu_ld32u(tmp, addr, index);
80
+ tcg_gen_qemu_ld_tl(tmp, addr, index, MO_TEUL);
81
gen_helper_extf32(cpu_env, fp, tmp);
82
break;
83
case OS_DOUBLE:
84
- tcg_gen_qemu_ld64(t64, addr, index);
85
+ tcg_gen_qemu_ld_i64(t64, addr, index, MO_TEUQ);
86
gen_helper_extf64(cpu_env, fp, t64);
87
break;
88
case OS_EXTENDED:
89
@@ -XXX,XX +XXX,XX @@ static void gen_load_fp(DisasContext *s, int opsize, TCGv addr, TCGv_ptr fp,
90
gen_exception(s, s->base.pc_next, EXCP_FP_UNIMP);
91
break;
92
}
93
- tcg_gen_qemu_ld32u(tmp, addr, index);
94
+ tcg_gen_qemu_ld_i32(tmp, addr, index, MO_TEUL);
95
tcg_gen_shri_i32(tmp, tmp, 16);
96
tcg_gen_st16_i32(tmp, fp, offsetof(FPReg, l.upper));
97
tcg_gen_addi_i32(tmp, addr, 4);
98
- tcg_gen_qemu_ld64(t64, tmp, index);
99
+ tcg_gen_qemu_ld_i64(t64, tmp, index, MO_TEUQ);
100
tcg_gen_st_i64(t64, fp, offsetof(FPReg, l.lower));
101
break;
102
case OS_PACKED:
103
@@ -XXX,XX +XXX,XX @@ static void gen_store_fp(DisasContext *s, int opsize, TCGv addr, TCGv_ptr fp,
104
tmp = tcg_temp_new();
105
switch (opsize) {
106
case OS_BYTE:
107
- gen_helper_reds32(tmp, cpu_env, fp);
108
- tcg_gen_qemu_st8(tmp, addr, index);
109
- break;
110
case OS_WORD:
111
- gen_helper_reds32(tmp, cpu_env, fp);
112
- tcg_gen_qemu_st16(tmp, addr, index);
113
- break;
114
case OS_LONG:
115
gen_helper_reds32(tmp, cpu_env, fp);
116
- tcg_gen_qemu_st32(tmp, addr, index);
117
+ tcg_gen_qemu_st_tl(tmp, addr, index, opsize | MO_TE);
118
break;
119
case OS_SINGLE:
120
gen_helper_redf32(tmp, cpu_env, fp);
121
- tcg_gen_qemu_st32(tmp, addr, index);
122
+ tcg_gen_qemu_st_tl(tmp, addr, index, MO_TEUL);
123
break;
124
case OS_DOUBLE:
125
gen_helper_redf64(t64, cpu_env, fp);
126
- tcg_gen_qemu_st64(t64, addr, index);
127
+ tcg_gen_qemu_st_i64(t64, addr, index, MO_TEUQ);
128
break;
129
case OS_EXTENDED:
130
if (m68k_feature(s->env, M68K_FEATURE_CF_FPU)) {
131
@@ -XXX,XX +XXX,XX @@ static void gen_store_fp(DisasContext *s, int opsize, TCGv addr, TCGv_ptr fp,
132
}
133
tcg_gen_ld16u_i32(tmp, fp, offsetof(FPReg, l.upper));
134
tcg_gen_shli_i32(tmp, tmp, 16);
135
- tcg_gen_qemu_st32(tmp, addr, index);
136
+ tcg_gen_qemu_st_i32(tmp, addr, index, MO_TEUL);
137
tcg_gen_addi_i32(tmp, addr, 4);
138
tcg_gen_ld_i64(t64, fp, offsetof(FPReg, l.lower));
139
- tcg_gen_qemu_st64(t64, tmp, index);
140
+ tcg_gen_qemu_st_i64(t64, tmp, index, MO_TEUQ);
141
break;
142
case OS_PACKED:
143
/*
144
@@ -XXX,XX +XXX,XX @@ DISAS_INSN(movep)
145
if (insn & 0x80) {
146
for ( ; i > 0 ; i--) {
147
tcg_gen_shri_i32(dbuf, reg, (i - 1) * 8);
148
- tcg_gen_qemu_st8(dbuf, abuf, IS_USER(s));
149
+ tcg_gen_qemu_st_i32(dbuf, abuf, IS_USER(s), MO_UB);
150
if (i > 1) {
151
tcg_gen_addi_i32(abuf, abuf, 2);
152
}
153
}
154
} else {
155
for ( ; i > 0 ; i--) {
156
- tcg_gen_qemu_ld8u(dbuf, abuf, IS_USER(s));
157
+ tcg_gen_qemu_ld_tl(dbuf, abuf, IS_USER(s), MO_UB);
158
tcg_gen_deposit_i32(reg, reg, dbuf, (i - 1) * 8, 8);
159
if (i > 1) {
160
tcg_gen_addi_i32(abuf, abuf, 2);
161
@@ -XXX,XX +XXX,XX @@ static void m68k_copy_line(TCGv dst, TCGv src, int index)
162
t1 = tcg_temp_new_i64();
163
164
tcg_gen_andi_i32(addr, src, ~15);
165
- tcg_gen_qemu_ld64(t0, addr, index);
166
+ tcg_gen_qemu_ld_i64(t0, addr, index, MO_TEUQ);
167
tcg_gen_addi_i32(addr, addr, 8);
168
- tcg_gen_qemu_ld64(t1, addr, index);
169
+ tcg_gen_qemu_ld_i64(t1, addr, index, MO_TEUQ);
170
171
tcg_gen_andi_i32(addr, dst, ~15);
172
- tcg_gen_qemu_st64(t0, addr, index);
173
+ tcg_gen_qemu_st_i64(t0, addr, index, MO_TEUQ);
174
tcg_gen_addi_i32(addr, addr, 8);
175
- tcg_gen_qemu_st64(t1, addr, index);
176
+ tcg_gen_qemu_st_i64(t1, addr, index, MO_TEUQ);
14
}
177
}
15
178
16
+static inline bool ti_is_const(TempOptInfo *ti)
179
DISAS_INSN(move16_reg)
17
+{
180
@@ -XXX,XX +XXX,XX @@ static void gen_qemu_store_fcr(DisasContext *s, TCGv addr, int reg)
18
+ return ti->is_const;
181
19
+}
182
tmp = tcg_temp_new();
20
+
183
gen_load_fcr(s, tmp, reg);
21
+static inline uint64_t ti_const_val(TempOptInfo *ti)
184
- tcg_gen_qemu_st32(tmp, addr, index);
22
+{
185
+ tcg_gen_qemu_st_tl(tmp, addr, index, MO_TEUL);
23
+ return ti->val;
24
+}
25
+
26
+static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
27
+{
28
+ return ti_is_const(ti) && ti_const_val(ti) == val;
29
+}
30
+
31
static inline bool ts_is_const(TCGTemp *ts)
32
{
33
- return ts_info(ts)->is_const;
34
+ return ti_is_const(ts_info(ts));
35
}
186
}
36
187
37
static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
188
static void gen_qemu_load_fcr(DisasContext *s, TCGv addr, int reg)
38
{
189
@@ -XXX,XX +XXX,XX @@ static void gen_qemu_load_fcr(DisasContext *s, TCGv addr, int reg)
39
- TempOptInfo *ti = ts_info(ts);
190
TCGv tmp;
40
- return ti->is_const && ti->val == val;
191
41
+ return ti_is_const_val(ts_info(ts), val);
192
tmp = tcg_temp_new();
193
- tcg_gen_qemu_ld32u(tmp, addr, index);
194
+ tcg_gen_qemu_ld_tl(tmp, addr, index, MO_TEUL);
195
gen_store_fcr(s, tmp, reg);
42
}
196
}
43
197
44
static inline bool arg_is_const(TCGArg arg)
45
--
198
--
46
2.43.0
199
2.34.1
diff view generated by jsdifflib
1
Initialize x with accumulated via direct assignment,
1
Convert away from the old interface with the implicit
2
rather than multiplying by 1.
2
MemOp argument.
3
3
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Anton Johansson <anjo@rev.ng>
6
Message-Id: <20230502135741.1158035-6-richard.henderson@linaro.org>
6
---
7
---
7
target/hexagon/fma_emu.c | 2 +-
8
target/mips/tcg/translate.c | 8 ++++----
8
1 file changed, 1 insertion(+), 1 deletion(-)
9
target/mips/tcg/nanomips_translate.c.inc | 2 +-
10
2 files changed, 5 insertions(+), 5 deletions(-)
9
11
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
12
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
11
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
12
--- a/target/hexagon/fma_emu.c
14
--- a/target/mips/tcg/translate.c
13
+++ b/target/hexagon/fma_emu.c
15
+++ b/target/mips/tcg/translate.c
14
@@ -XXX,XX +XXX,XX @@ float64 internal_mpyhh(float64 a, float64 b,
16
@@ -XXX,XX +XXX,XX @@ FOP_CONDNS(s, FMT_S, 32, gen_store_fpr32(ctx, fp0, fd))
15
float64_is_infinity(b)) {
17
16
return float64_mul(a, b, fp_status);
18
/* load/store instructions. */
17
}
19
#ifdef CONFIG_USER_ONLY
18
- x.mant = int128_mul_6464(accumulated, 1);
20
-#define OP_LD_ATOMIC(insn, fname) \
19
+ x.mant = int128_make64(accumulated);
21
+#define OP_LD_ATOMIC(insn, memop) \
20
x.sticky = sticky;
22
static inline void op_ld_##insn(TCGv ret, TCGv arg1, int mem_idx, \
21
prod = fGETUWORD(1, float64_getmant(a)) * fGETUWORD(1, float64_getmant(b));
23
DisasContext *ctx) \
22
x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL));
24
{ \
25
TCGv t0 = tcg_temp_new(); \
26
tcg_gen_mov_tl(t0, arg1); \
27
- tcg_gen_qemu_##fname(ret, arg1, ctx->mem_idx); \
28
+ tcg_gen_qemu_ld_tl(ret, arg1, ctx->mem_idx, memop); \
29
tcg_gen_st_tl(t0, cpu_env, offsetof(CPUMIPSState, lladdr)); \
30
tcg_gen_st_tl(ret, cpu_env, offsetof(CPUMIPSState, llval)); \
31
}
32
@@ -XXX,XX +XXX,XX @@ static inline void op_ld_##insn(TCGv ret, TCGv arg1, int mem_idx, \
33
gen_helper_##insn(ret, cpu_env, arg1, tcg_constant_i32(mem_idx)); \
34
}
35
#endif
36
-OP_LD_ATOMIC(ll, ld32s);
37
+OP_LD_ATOMIC(ll, MO_TESL);
38
#if defined(TARGET_MIPS64)
39
-OP_LD_ATOMIC(lld, ld64);
40
+OP_LD_ATOMIC(lld, MO_TEUQ);
41
#endif
42
#undef OP_LD_ATOMIC
43
44
diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc
45
index XXXXXXX..XXXXXXX 100644
46
--- a/target/mips/tcg/nanomips_translate.c.inc
47
+++ b/target/mips/tcg/nanomips_translate.c.inc
48
@@ -XXX,XX +XXX,XX @@ static void gen_llwp(DisasContext *ctx, uint32_t base, int16_t offset,
49
TCGv tmp2 = tcg_temp_new();
50
51
gen_base_offset_addr(ctx, taddr, base, offset);
52
- tcg_gen_qemu_ld64(tval, taddr, ctx->mem_idx);
53
+ tcg_gen_qemu_ld_i64(tval, taddr, ctx->mem_idx, MO_TEUQ);
54
if (cpu_is_bigendian(ctx)) {
55
tcg_gen_extr_i64_tl(tmp2, tmp1, tval);
56
} else {
23
--
57
--
24
2.43.0
58
2.34.1
diff view generated by jsdifflib
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
1
Convert away from the old interface with the implicit
2
Explicitly sign-extend z_mask instead of doing that manually.
2
MemOp argument.
3
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: David Hildenbrand <david@redhat.com>
6
Reviewed-by: Ilya Leoshkevich <iii@linux.ibm.com>
7
Message-Id: <20230502135741.1158035-7-richard.henderson@linaro.org>
6
---
8
---
7
tcg/optimize.c | 29 ++++++++++++-----------------
9
target/s390x/tcg/translate.c | 152 ++++++++++++++++-------------------
8
1 file changed, 12 insertions(+), 17 deletions(-)
10
1 file changed, 71 insertions(+), 81 deletions(-)
9
11
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
11
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
14
--- a/target/s390x/tcg/translate.c
13
+++ b/tcg/optimize.c
15
+++ b/target/s390x/tcg/translate.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_clc(DisasContext *s, DisasOps *o)
15
17
{
16
static bool fold_exts(OptContext *ctx, TCGOp *op)
18
int l = get_field(s, l1);
17
{
19
TCGv_i32 vl;
18
- uint64_t s_mask_old, s_mask, z_mask, sign;
20
+ MemOp mop;
19
+ uint64_t s_mask_old, s_mask, z_mask;
21
20
bool type_change = false;
22
switch (l + 1) {
21
+ TempOptInfo *t1;
23
case 1:
22
24
- tcg_gen_qemu_ld8u(cc_src, o->addr1, get_mem_index(s));
23
if (fold_const1(ctx, op)) {
25
- tcg_gen_qemu_ld8u(cc_dst, o->in2, get_mem_index(s));
24
return true;
26
- break;
25
}
27
case 2:
26
28
- tcg_gen_qemu_ld16u(cc_src, o->addr1, get_mem_index(s));
27
- z_mask = arg_info(op->args[1])->z_mask;
29
- tcg_gen_qemu_ld16u(cc_dst, o->in2, get_mem_index(s));
28
- s_mask = arg_info(op->args[1])->s_mask;
30
- break;
29
+ t1 = arg_info(op->args[1]);
31
case 4:
30
+ z_mask = t1->z_mask;
32
- tcg_gen_qemu_ld32u(cc_src, o->addr1, get_mem_index(s));
31
+ s_mask = t1->s_mask;
33
- tcg_gen_qemu_ld32u(cc_dst, o->in2, get_mem_index(s));
32
s_mask_old = s_mask;
34
- break;
33
35
case 8:
34
switch (op->opc) {
36
- tcg_gen_qemu_ld64(cc_src, o->addr1, get_mem_index(s));
35
CASE_OP_32_64(ext8s):
37
- tcg_gen_qemu_ld64(cc_dst, o->in2, get_mem_index(s));
36
- sign = INT8_MIN;
38
- break;
37
- z_mask = (uint8_t)z_mask;
39
+ mop = ctz32(l + 1) | MO_TE;
38
+ s_mask |= INT8_MIN;
40
+ tcg_gen_qemu_ld_tl(cc_src, o->addr1, get_mem_index(s), mop);
39
+ z_mask = (int8_t)z_mask;
41
+ tcg_gen_qemu_ld_tl(cc_dst, o->in2, get_mem_index(s), mop);
42
+ gen_op_update2_cc_i64(s, CC_OP_LTUGTU_64, cc_src, cc_dst);
43
+ return DISAS_NEXT;
44
default:
45
vl = tcg_constant_i32(l);
46
gen_helper_clc(cc_op, cpu_env, vl, o->addr1, o->in2);
47
set_cc_static(s);
48
return DISAS_NEXT;
49
}
50
- gen_op_update2_cc_i64(s, CC_OP_LTUGTU_64, cc_src, cc_dst);
51
- return DISAS_NEXT;
52
}
53
54
static DisasJumpType op_clcl(DisasContext *s, DisasOps *o)
55
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cvd(DisasContext *s, DisasOps *o)
56
TCGv_i32 t2 = tcg_temp_new_i32();
57
tcg_gen_extrl_i64_i32(t2, o->in1);
58
gen_helper_cvd(t1, t2);
59
- tcg_gen_qemu_st64(t1, o->in2, get_mem_index(s));
60
+ tcg_gen_qemu_st_i64(t1, o->in2, get_mem_index(s), MO_TEUQ);
61
return DISAS_NEXT;
62
}
63
64
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_icm(DisasContext *s, DisasOps *o)
65
switch (m3) {
66
case 0xf:
67
/* Effectively a 32-bit load. */
68
- tcg_gen_qemu_ld32u(tmp, o->in2, get_mem_index(s));
69
+ tcg_gen_qemu_ld_i64(tmp, o->in2, get_mem_index(s), MO_TEUL);
70
len = 32;
71
goto one_insert;
72
73
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_icm(DisasContext *s, DisasOps *o)
74
case 0x6:
75
case 0x3:
76
/* Effectively a 16-bit load. */
77
- tcg_gen_qemu_ld16u(tmp, o->in2, get_mem_index(s));
78
+ tcg_gen_qemu_ld_i64(tmp, o->in2, get_mem_index(s), MO_TEUW);
79
len = 16;
80
goto one_insert;
81
82
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_icm(DisasContext *s, DisasOps *o)
83
case 0x2:
84
case 0x1:
85
/* Effectively an 8-bit load. */
86
- tcg_gen_qemu_ld8u(tmp, o->in2, get_mem_index(s));
87
+ tcg_gen_qemu_ld_i64(tmp, o->in2, get_mem_index(s), MO_UB);
88
len = 8;
89
goto one_insert;
90
91
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_icm(DisasContext *s, DisasOps *o)
92
ccm = 0;
93
while (m3) {
94
if (m3 & 0x8) {
95
- tcg_gen_qemu_ld8u(tmp, o->in2, get_mem_index(s));
96
+ tcg_gen_qemu_ld_i64(tmp, o->in2, get_mem_index(s), MO_UB);
97
tcg_gen_addi_i64(o->in2, o->in2, 1);
98
tcg_gen_deposit_i64(o->out, o->out, tmp, pos, 8);
99
ccm |= 0xffull << pos;
100
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_llgt(DisasContext *s, DisasOps *o)
101
102
static DisasJumpType op_ld8s(DisasContext *s, DisasOps *o)
103
{
104
- tcg_gen_qemu_ld8s(o->out, o->in2, get_mem_index(s));
105
+ tcg_gen_qemu_ld_i64(o->out, o->in2, get_mem_index(s), MO_SB);
106
return DISAS_NEXT;
107
}
108
109
static DisasJumpType op_ld8u(DisasContext *s, DisasOps *o)
110
{
111
- tcg_gen_qemu_ld8u(o->out, o->in2, get_mem_index(s));
112
+ tcg_gen_qemu_ld_i64(o->out, o->in2, get_mem_index(s), MO_UB);
113
return DISAS_NEXT;
114
}
115
116
static DisasJumpType op_ld16s(DisasContext *s, DisasOps *o)
117
{
118
- tcg_gen_qemu_ld16s(o->out, o->in2, get_mem_index(s));
119
+ tcg_gen_qemu_ld_i64(o->out, o->in2, get_mem_index(s), MO_TESW);
120
return DISAS_NEXT;
121
}
122
123
static DisasJumpType op_ld16u(DisasContext *s, DisasOps *o)
124
{
125
- tcg_gen_qemu_ld16u(o->out, o->in2, get_mem_index(s));
126
+ tcg_gen_qemu_ld_i64(o->out, o->in2, get_mem_index(s), MO_TEUW);
127
return DISAS_NEXT;
128
}
129
130
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lat(DisasContext *s, DisasOps *o)
131
static DisasJumpType op_lgat(DisasContext *s, DisasOps *o)
132
{
133
TCGLabel *lab = gen_new_label();
134
- tcg_gen_qemu_ld64(o->out, o->in2, get_mem_index(s));
135
+ tcg_gen_qemu_ld_i64(o->out, o->in2, get_mem_index(s), MO_TEUQ);
136
/* The value is stored even in case of trap. */
137
tcg_gen_brcondi_i64(TCG_COND_NE, o->out, 0, lab);
138
gen_trap(s);
139
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lfhat(DisasContext *s, DisasOps *o)
140
static DisasJumpType op_llgfat(DisasContext *s, DisasOps *o)
141
{
142
TCGLabel *lab = gen_new_label();
143
- tcg_gen_qemu_ld32u(o->out, o->in2, get_mem_index(s));
144
+
145
+ tcg_gen_qemu_ld_i64(o->out, o->in2, get_mem_index(s), MO_TEUL);
146
/* The value is stored even in case of trap. */
147
tcg_gen_brcondi_i64(TCG_COND_NE, o->out, 0, lab);
148
gen_trap(s);
149
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lpswe(DisasContext *s, DisasOps *o)
150
tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s),
151
MO_TEUQ | MO_ALIGN_8);
152
tcg_gen_addi_i64(o->in2, o->in2, 8);
153
- tcg_gen_qemu_ld64(t2, o->in2, get_mem_index(s));
154
+ tcg_gen_qemu_ld_i64(t2, o->in2, get_mem_index(s), MO_TEUQ);
155
gen_helper_load_psw(cpu_env, t1, t2);
156
return DISAS_NORETURN;
157
}
158
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lm32(DisasContext *s, DisasOps *o)
159
/* Only one register to read. */
160
t1 = tcg_temp_new_i64();
161
if (unlikely(r1 == r3)) {
162
- tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
163
+ tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s), MO_TEUL);
164
store_reg32_i64(r1, t1);
165
return DISAS_NEXT;
166
}
167
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lm32(DisasContext *s, DisasOps *o)
168
/* First load the values of the first and last registers to trigger
169
possible page faults. */
170
t2 = tcg_temp_new_i64();
171
- tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
172
+ tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s), MO_TEUL);
173
tcg_gen_addi_i64(t2, o->in2, 4 * ((r3 - r1) & 15));
174
- tcg_gen_qemu_ld32u(t2, t2, get_mem_index(s));
175
+ tcg_gen_qemu_ld_i64(t2, t2, get_mem_index(s), MO_TEUL);
176
store_reg32_i64(r1, t1);
177
store_reg32_i64(r3, t2);
178
179
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lm32(DisasContext *s, DisasOps *o)
180
while (r1 != r3) {
181
r1 = (r1 + 1) & 15;
182
tcg_gen_add_i64(o->in2, o->in2, t2);
183
- tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
184
+ tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s), MO_TEUL);
185
store_reg32_i64(r1, t1);
186
}
187
return DISAS_NEXT;
188
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lmh(DisasContext *s, DisasOps *o)
189
/* Only one register to read. */
190
t1 = tcg_temp_new_i64();
191
if (unlikely(r1 == r3)) {
192
- tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
193
+ tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s), MO_TEUL);
194
store_reg32h_i64(r1, t1);
195
return DISAS_NEXT;
196
}
197
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lmh(DisasContext *s, DisasOps *o)
198
/* First load the values of the first and last registers to trigger
199
possible page faults. */
200
t2 = tcg_temp_new_i64();
201
- tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
202
+ tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s), MO_TEUL);
203
tcg_gen_addi_i64(t2, o->in2, 4 * ((r3 - r1) & 15));
204
- tcg_gen_qemu_ld32u(t2, t2, get_mem_index(s));
205
+ tcg_gen_qemu_ld_i64(t2, t2, get_mem_index(s), MO_TEUL);
206
store_reg32h_i64(r1, t1);
207
store_reg32h_i64(r3, t2);
208
209
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lmh(DisasContext *s, DisasOps *o)
210
while (r1 != r3) {
211
r1 = (r1 + 1) & 15;
212
tcg_gen_add_i64(o->in2, o->in2, t2);
213
- tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
214
+ tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s), MO_TEUL);
215
store_reg32h_i64(r1, t1);
216
}
217
return DISAS_NEXT;
218
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lm64(DisasContext *s, DisasOps *o)
219
220
/* Only one register to read. */
221
if (unlikely(r1 == r3)) {
222
- tcg_gen_qemu_ld64(regs[r1], o->in2, get_mem_index(s));
223
+ tcg_gen_qemu_ld_i64(regs[r1], o->in2, get_mem_index(s), MO_TEUQ);
224
return DISAS_NEXT;
225
}
226
227
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lm64(DisasContext *s, DisasOps *o)
228
possible page faults. */
229
t1 = tcg_temp_new_i64();
230
t2 = tcg_temp_new_i64();
231
- tcg_gen_qemu_ld64(t1, o->in2, get_mem_index(s));
232
+ tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s), MO_TEUQ);
233
tcg_gen_addi_i64(t2, o->in2, 8 * ((r3 - r1) & 15));
234
- tcg_gen_qemu_ld64(regs[r3], t2, get_mem_index(s));
235
+ tcg_gen_qemu_ld_i64(regs[r3], t2, get_mem_index(s), MO_TEUQ);
236
tcg_gen_mov_i64(regs[r1], t1);
237
238
/* Only two registers to read. */
239
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lm64(DisasContext *s, DisasOps *o)
240
while (r1 != r3) {
241
r1 = (r1 + 1) & 15;
242
tcg_gen_add_i64(o->in2, o->in2, t1);
243
- tcg_gen_qemu_ld64(regs[r1], o->in2, get_mem_index(s));
244
+ tcg_gen_qemu_ld_i64(regs[r1], o->in2, get_mem_index(s), MO_TEUQ);
245
}
246
return DISAS_NEXT;
247
}
248
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_soc(DisasContext *s, DisasOps *o)
249
a = get_address(s, 0, get_field(s, b2), get_field(s, d2));
250
switch (s->insn->data) {
251
case 1: /* STOCG */
252
- tcg_gen_qemu_st64(regs[r1], a, get_mem_index(s));
253
+ tcg_gen_qemu_st_i64(regs[r1], a, get_mem_index(s), MO_TEUQ);
40
break;
254
break;
41
CASE_OP_32_64(ext16s):
255
case 0: /* STOC */
42
- sign = INT16_MIN;
256
- tcg_gen_qemu_st32(regs[r1], a, get_mem_index(s));
43
- z_mask = (uint16_t)z_mask;
257
+ tcg_gen_qemu_st_i64(regs[r1], a, get_mem_index(s), MO_TEUL);
44
+ s_mask |= INT16_MIN;
45
+ z_mask = (int16_t)z_mask;
46
break;
258
break;
47
case INDEX_op_ext_i32_i64:
259
case 2: /* STOCFH */
48
type_change = true;
260
h = tcg_temp_new_i64();
49
QEMU_FALLTHROUGH;
261
tcg_gen_shri_i64(h, regs[r1], 32);
50
case INDEX_op_ext32s_i64:
262
- tcg_gen_qemu_st32(h, a, get_mem_index(s));
51
- sign = INT32_MIN;
263
+ tcg_gen_qemu_st_i64(h, a, get_mem_index(s), MO_TEUL);
52
- z_mask = (uint32_t)z_mask;
53
+ s_mask |= INT32_MIN;
54
+ z_mask = (int32_t)z_mask;
55
break;
264
break;
56
default:
265
default:
57
g_assert_not_reached();
266
g_assert_not_reached();
58
}
267
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_ectg(DisasContext *s, DisasOps *o)
59
268
gen_addi_and_wrap_i64(s, o->addr1, regs[r3], 0);
60
- if (z_mask & sign) {
269
61
- z_mask |= sign;
270
/* load the third operand into r3 before modifying anything */
62
- }
271
- tcg_gen_qemu_ld64(regs[r3], o->addr1, get_mem_index(s));
63
- s_mask |= sign << 1;
272
+ tcg_gen_qemu_ld_i64(regs[r3], o->addr1, get_mem_index(s), MO_TEUQ);
64
-
273
65
- ctx->z_mask = z_mask;
274
/* subtract CPU timer from first operand and store in GR0 */
66
- ctx->s_mask = s_mask;
275
gen_helper_stpt(tmp, cpu_env);
67
if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
276
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_stcke(DisasContext *s, DisasOps *o)
68
return true;
277
tcg_gen_shri_i64(c1, c1, 8);
69
}
278
tcg_gen_ori_i64(c2, c2, 0x10000);
70
279
tcg_gen_or_i64(c2, c2, todpr);
71
- return fold_masks(ctx, op);
280
- tcg_gen_qemu_st64(c1, o->in2, get_mem_index(s));
72
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
281
+ tcg_gen_qemu_st_i64(c1, o->in2, get_mem_index(s), MO_TEUQ);
73
}
282
tcg_gen_addi_i64(o->in2, o->in2, 8);
74
283
- tcg_gen_qemu_st64(c2, o->in2, get_mem_index(s));
75
static bool fold_extu(OptContext *ctx, TCGOp *op)
284
+ tcg_gen_qemu_st_i64(c2, o->in2, get_mem_index(s), MO_TEUQ);
285
/* ??? We don't implement clock states. */
286
gen_op_movi_cc(s, 0);
287
return DISAS_NEXT;
288
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_stnosm(DisasContext *s, DisasOps *o)
289
restart, we'll have the wrong SYSTEM MASK in place. */
290
t = tcg_temp_new_i64();
291
tcg_gen_shri_i64(t, psw_mask, 56);
292
- tcg_gen_qemu_st8(t, o->addr1, get_mem_index(s));
293
+ tcg_gen_qemu_st_i64(t, o->addr1, get_mem_index(s), MO_UB);
294
295
if (s->fields.op == 0xac) {
296
tcg_gen_andi_i64(psw_mask, psw_mask,
297
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_stfle(DisasContext *s, DisasOps *o)
298
299
static DisasJumpType op_st8(DisasContext *s, DisasOps *o)
300
{
301
- tcg_gen_qemu_st8(o->in1, o->in2, get_mem_index(s));
302
+ tcg_gen_qemu_st_i64(o->in1, o->in2, get_mem_index(s), MO_UB);
303
return DISAS_NEXT;
304
}
305
306
static DisasJumpType op_st16(DisasContext *s, DisasOps *o)
307
{
308
- tcg_gen_qemu_st16(o->in1, o->in2, get_mem_index(s));
309
+ tcg_gen_qemu_st_i64(o->in1, o->in2, get_mem_index(s), MO_TEUW);
310
return DISAS_NEXT;
311
}
312
313
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_stcm(DisasContext *s, DisasOps *o)
314
case 0xf:
315
/* Effectively a 32-bit store. */
316
tcg_gen_shri_i64(tmp, o->in1, pos);
317
- tcg_gen_qemu_st32(tmp, o->in2, get_mem_index(s));
318
+ tcg_gen_qemu_st_i64(tmp, o->in2, get_mem_index(s), MO_TEUL);
319
break;
320
321
case 0xc:
322
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_stcm(DisasContext *s, DisasOps *o)
323
case 0x3:
324
/* Effectively a 16-bit store. */
325
tcg_gen_shri_i64(tmp, o->in1, pos);
326
- tcg_gen_qemu_st16(tmp, o->in2, get_mem_index(s));
327
+ tcg_gen_qemu_st_i64(tmp, o->in2, get_mem_index(s), MO_TEUW);
328
break;
329
330
case 0x8:
331
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_stcm(DisasContext *s, DisasOps *o)
332
case 0x1:
333
/* Effectively an 8-bit store. */
334
tcg_gen_shri_i64(tmp, o->in1, pos);
335
- tcg_gen_qemu_st8(tmp, o->in2, get_mem_index(s));
336
+ tcg_gen_qemu_st_i64(tmp, o->in2, get_mem_index(s), MO_UB);
337
break;
338
339
default:
340
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_stcm(DisasContext *s, DisasOps *o)
341
while (m3) {
342
if (m3 & 0x8) {
343
tcg_gen_shri_i64(tmp, o->in1, pos);
344
- tcg_gen_qemu_st8(tmp, o->in2, get_mem_index(s));
345
+ tcg_gen_qemu_st_i64(tmp, o->in2, get_mem_index(s), MO_UB);
346
tcg_gen_addi_i64(o->in2, o->in2, 1);
347
}
348
m3 = (m3 << 1) & 0xf;
349
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_stm(DisasContext *s, DisasOps *o)
350
TCGv_i64 tsize = tcg_constant_i64(size);
351
352
while (1) {
353
- if (size == 8) {
354
- tcg_gen_qemu_st64(regs[r1], o->in2, get_mem_index(s));
355
- } else {
356
- tcg_gen_qemu_st32(regs[r1], o->in2, get_mem_index(s));
357
- }
358
+ tcg_gen_qemu_st_i64(regs[r1], o->in2, get_mem_index(s),
359
+ size == 8 ? MO_TEUQ : MO_TEUL);
360
if (r1 == r3) {
361
break;
362
}
363
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_stmh(DisasContext *s, DisasOps *o)
364
365
while (1) {
366
tcg_gen_shl_i64(t, regs[r1], t32);
367
- tcg_gen_qemu_st32(t, o->in2, get_mem_index(s));
368
+ tcg_gen_qemu_st_i64(t, o->in2, get_mem_index(s), MO_TEUL);
369
if (r1 == r3) {
370
break;
371
}
372
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_xc(DisasContext *s, DisasOps *o)
373
374
l++;
375
while (l >= 8) {
376
- tcg_gen_qemu_st64(o->in2, o->addr1, get_mem_index(s));
377
+ tcg_gen_qemu_st_i64(o->in2, o->addr1, get_mem_index(s), MO_UQ);
378
l -= 8;
379
if (l > 0) {
380
tcg_gen_addi_i64(o->addr1, o->addr1, 8);
381
}
382
}
383
if (l >= 4) {
384
- tcg_gen_qemu_st32(o->in2, o->addr1, get_mem_index(s));
385
+ tcg_gen_qemu_st_i64(o->in2, o->addr1, get_mem_index(s), MO_UL);
386
l -= 4;
387
if (l > 0) {
388
tcg_gen_addi_i64(o->addr1, o->addr1, 4);
389
}
390
}
391
if (l >= 2) {
392
- tcg_gen_qemu_st16(o->in2, o->addr1, get_mem_index(s));
393
+ tcg_gen_qemu_st_i64(o->in2, o->addr1, get_mem_index(s), MO_UW);
394
l -= 2;
395
if (l > 0) {
396
tcg_gen_addi_i64(o->addr1, o->addr1, 2);
397
}
398
}
399
if (l) {
400
- tcg_gen_qemu_st8(o->in2, o->addr1, get_mem_index(s));
401
+ tcg_gen_qemu_st_i64(o->in2, o->addr1, get_mem_index(s), MO_UB);
402
}
403
gen_op_movi_cc(s, 0);
404
return DISAS_NEXT;
405
@@ -XXX,XX +XXX,XX @@ static void wout_cond_e1e2(DisasContext *s, DisasOps *o)
406
407
static void wout_m1_8(DisasContext *s, DisasOps *o)
408
{
409
- tcg_gen_qemu_st8(o->out, o->addr1, get_mem_index(s));
410
+ tcg_gen_qemu_st_i64(o->out, o->addr1, get_mem_index(s), MO_UB);
411
}
412
#define SPEC_wout_m1_8 0
413
414
static void wout_m1_16(DisasContext *s, DisasOps *o)
415
{
416
- tcg_gen_qemu_st16(o->out, o->addr1, get_mem_index(s));
417
+ tcg_gen_qemu_st_i64(o->out, o->addr1, get_mem_index(s), MO_TEUW);
418
}
419
#define SPEC_wout_m1_16 0
420
421
@@ -XXX,XX +XXX,XX @@ static void wout_m1_16a(DisasContext *s, DisasOps *o)
422
423
static void wout_m1_32(DisasContext *s, DisasOps *o)
424
{
425
- tcg_gen_qemu_st32(o->out, o->addr1, get_mem_index(s));
426
+ tcg_gen_qemu_st_i64(o->out, o->addr1, get_mem_index(s), MO_TEUL);
427
}
428
#define SPEC_wout_m1_32 0
429
430
@@ -XXX,XX +XXX,XX @@ static void wout_m1_32a(DisasContext *s, DisasOps *o)
431
432
static void wout_m1_64(DisasContext *s, DisasOps *o)
433
{
434
- tcg_gen_qemu_st64(o->out, o->addr1, get_mem_index(s));
435
+ tcg_gen_qemu_st_i64(o->out, o->addr1, get_mem_index(s), MO_TEUQ);
436
}
437
#define SPEC_wout_m1_64 0
438
439
@@ -XXX,XX +XXX,XX @@ static void wout_m1_64a(DisasContext *s, DisasOps *o)
440
441
static void wout_m2_32(DisasContext *s, DisasOps *o)
442
{
443
- tcg_gen_qemu_st32(o->out, o->in2, get_mem_index(s));
444
+ tcg_gen_qemu_st_i64(o->out, o->in2, get_mem_index(s), MO_TEUL);
445
}
446
#define SPEC_wout_m2_32 0
447
448
@@ -XXX,XX +XXX,XX @@ static void in1_m1_8u(DisasContext *s, DisasOps *o)
449
{
450
in1_la1(s, o);
451
o->in1 = tcg_temp_new_i64();
452
- tcg_gen_qemu_ld8u(o->in1, o->addr1, get_mem_index(s));
453
+ tcg_gen_qemu_ld_i64(o->in1, o->addr1, get_mem_index(s), MO_UB);
454
}
455
#define SPEC_in1_m1_8u 0
456
457
@@ -XXX,XX +XXX,XX @@ static void in1_m1_16s(DisasContext *s, DisasOps *o)
458
{
459
in1_la1(s, o);
460
o->in1 = tcg_temp_new_i64();
461
- tcg_gen_qemu_ld16s(o->in1, o->addr1, get_mem_index(s));
462
+ tcg_gen_qemu_ld_i64(o->in1, o->addr1, get_mem_index(s), MO_TESW);
463
}
464
#define SPEC_in1_m1_16s 0
465
466
@@ -XXX,XX +XXX,XX @@ static void in1_m1_16u(DisasContext *s, DisasOps *o)
467
{
468
in1_la1(s, o);
469
o->in1 = tcg_temp_new_i64();
470
- tcg_gen_qemu_ld16u(o->in1, o->addr1, get_mem_index(s));
471
+ tcg_gen_qemu_ld_i64(o->in1, o->addr1, get_mem_index(s), MO_TEUW);
472
}
473
#define SPEC_in1_m1_16u 0
474
475
@@ -XXX,XX +XXX,XX @@ static void in1_m1_32s(DisasContext *s, DisasOps *o)
476
{
477
in1_la1(s, o);
478
o->in1 = tcg_temp_new_i64();
479
- tcg_gen_qemu_ld32s(o->in1, o->addr1, get_mem_index(s));
480
+ tcg_gen_qemu_ld_i64(o->in1, o->addr1, get_mem_index(s), MO_TESL);
481
}
482
#define SPEC_in1_m1_32s 0
483
484
@@ -XXX,XX +XXX,XX @@ static void in1_m1_32u(DisasContext *s, DisasOps *o)
485
{
486
in1_la1(s, o);
487
o->in1 = tcg_temp_new_i64();
488
- tcg_gen_qemu_ld32u(o->in1, o->addr1, get_mem_index(s));
489
+ tcg_gen_qemu_ld_i64(o->in1, o->addr1, get_mem_index(s), MO_TEUL);
490
}
491
#define SPEC_in1_m1_32u 0
492
493
@@ -XXX,XX +XXX,XX @@ static void in1_m1_64(DisasContext *s, DisasOps *o)
494
{
495
in1_la1(s, o);
496
o->in1 = tcg_temp_new_i64();
497
- tcg_gen_qemu_ld64(o->in1, o->addr1, get_mem_index(s));
498
+ tcg_gen_qemu_ld_i64(o->in1, o->addr1, get_mem_index(s), MO_TEUQ);
499
}
500
#define SPEC_in1_m1_64 0
501
502
@@ -XXX,XX +XXX,XX @@ static void in2_sh(DisasContext *s, DisasOps *o)
503
static void in2_m2_8u(DisasContext *s, DisasOps *o)
504
{
505
in2_a2(s, o);
506
- tcg_gen_qemu_ld8u(o->in2, o->in2, get_mem_index(s));
507
+ tcg_gen_qemu_ld_i64(o->in2, o->in2, get_mem_index(s), MO_UB);
508
}
509
#define SPEC_in2_m2_8u 0
510
511
static void in2_m2_16s(DisasContext *s, DisasOps *o)
512
{
513
in2_a2(s, o);
514
- tcg_gen_qemu_ld16s(o->in2, o->in2, get_mem_index(s));
515
+ tcg_gen_qemu_ld_i64(o->in2, o->in2, get_mem_index(s), MO_TESW);
516
}
517
#define SPEC_in2_m2_16s 0
518
519
static void in2_m2_16u(DisasContext *s, DisasOps *o)
520
{
521
in2_a2(s, o);
522
- tcg_gen_qemu_ld16u(o->in2, o->in2, get_mem_index(s));
523
+ tcg_gen_qemu_ld_i64(o->in2, o->in2, get_mem_index(s), MO_TEUW);
524
}
525
#define SPEC_in2_m2_16u 0
526
527
static void in2_m2_32s(DisasContext *s, DisasOps *o)
528
{
529
in2_a2(s, o);
530
- tcg_gen_qemu_ld32s(o->in2, o->in2, get_mem_index(s));
531
+ tcg_gen_qemu_ld_i64(o->in2, o->in2, get_mem_index(s), MO_TESL);
532
}
533
#define SPEC_in2_m2_32s 0
534
535
static void in2_m2_32u(DisasContext *s, DisasOps *o)
536
{
537
in2_a2(s, o);
538
- tcg_gen_qemu_ld32u(o->in2, o->in2, get_mem_index(s));
539
+ tcg_gen_qemu_ld_i64(o->in2, o->in2, get_mem_index(s), MO_TEUL);
540
}
541
#define SPEC_in2_m2_32u 0
542
543
@@ -XXX,XX +XXX,XX @@ static void in2_m2_32ua(DisasContext *s, DisasOps *o)
544
static void in2_m2_64(DisasContext *s, DisasOps *o)
545
{
546
in2_a2(s, o);
547
- tcg_gen_qemu_ld64(o->in2, o->in2, get_mem_index(s));
548
+ tcg_gen_qemu_ld_i64(o->in2, o->in2, get_mem_index(s), MO_TEUQ);
549
}
550
#define SPEC_in2_m2_64 0
551
552
static void in2_m2_64w(DisasContext *s, DisasOps *o)
553
{
554
in2_a2(s, o);
555
- tcg_gen_qemu_ld64(o->in2, o->in2, get_mem_index(s));
556
+ tcg_gen_qemu_ld_i64(o->in2, o->in2, get_mem_index(s), MO_TEUQ);
557
gen_addi_and_wrap_i64(s, o->in2, o->in2, 0);
558
}
559
#define SPEC_in2_m2_64w 0
560
@@ -XXX,XX +XXX,XX @@ static void in2_m2_64a(DisasContext *s, DisasOps *o)
561
static void in2_mri2_16s(DisasContext *s, DisasOps *o)
562
{
563
o->in2 = tcg_temp_new_i64();
564
- tcg_gen_qemu_ld16s(o->in2, gen_ri2(s), get_mem_index(s));
565
+ tcg_gen_qemu_ld_i64(o->in2, gen_ri2(s), get_mem_index(s), MO_TESW);
566
}
567
#define SPEC_in2_mri2_16s 0
568
569
static void in2_mri2_16u(DisasContext *s, DisasOps *o)
570
{
571
o->in2 = tcg_temp_new_i64();
572
- tcg_gen_qemu_ld16u(o->in2, gen_ri2(s), get_mem_index(s));
573
+ tcg_gen_qemu_ld_i64(o->in2, gen_ri2(s), get_mem_index(s), MO_TEUW);
574
}
575
#define SPEC_in2_mri2_16u 0
576
76
--
577
--
77
2.43.0
578
2.34.1
diff view generated by jsdifflib
1
No need to open-code 64x64->128-bit multiplication.
1
Convert away from the old interface with the implicit
2
MemOp argument.
2
3
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Anton Johansson <anjo@rev.ng>
6
Message-Id: <20230502135741.1158035-8-richard.henderson@linaro.org>
5
---
7
---
6
target/hexagon/fma_emu.c | 32 +++-----------------------------
8
target/sparc/translate.c | 43 ++++++++++++++++++++++++++--------------
7
1 file changed, 3 insertions(+), 29 deletions(-)
9
1 file changed, 28 insertions(+), 15 deletions(-)
8
10
9
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
11
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
10
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
11
--- a/target/hexagon/fma_emu.c
13
--- a/target/sparc/translate.c
12
+++ b/target/hexagon/fma_emu.c
14
+++ b/target/sparc/translate.c
13
@@ -XXX,XX +XXX,XX @@ int32_t float32_getexp(float32 f32)
15
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
14
return -1;
16
switch (xop) {
15
}
17
case 0x0: /* ld, V9 lduw, load unsigned word */
16
18
gen_address_mask(dc, cpu_addr);
17
-static uint32_t int128_getw0(Int128 x)
19
- tcg_gen_qemu_ld32u(cpu_val, cpu_addr, dc->mem_idx);
18
-{
20
+ tcg_gen_qemu_ld_tl(cpu_val, cpu_addr,
19
- return int128_getlo(x);
21
+ dc->mem_idx, MO_TEUL);
20
-}
22
break;
21
-
23
case 0x1: /* ldub, load unsigned byte */
22
-static uint32_t int128_getw1(Int128 x)
24
gen_address_mask(dc, cpu_addr);
23
-{
25
- tcg_gen_qemu_ld8u(cpu_val, cpu_addr, dc->mem_idx);
24
- return int128_getlo(x) >> 32;
26
+ tcg_gen_qemu_ld_tl(cpu_val, cpu_addr,
25
-}
27
+ dc->mem_idx, MO_UB);
26
-
28
break;
27
static Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
29
case 0x2: /* lduh, load unsigned halfword */
28
{
30
gen_address_mask(dc, cpu_addr);
29
- Int128 a, b;
31
- tcg_gen_qemu_ld16u(cpu_val, cpu_addr, dc->mem_idx);
30
- uint64_t pp0, pp1a, pp1b, pp1s, pp2;
32
+ tcg_gen_qemu_ld_tl(cpu_val, cpu_addr,
31
+ uint64_t l, h;
33
+ dc->mem_idx, MO_TEUW);
32
34
break;
33
- a = int128_make64(ai);
35
case 0x3: /* ldd, load double word */
34
- b = int128_make64(bi);
36
if (rd & 1)
35
- pp0 = (uint64_t)int128_getw0(a) * (uint64_t)int128_getw0(b);
37
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
36
- pp1a = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw0(b);
38
37
- pp1b = (uint64_t)int128_getw1(b) * (uint64_t)int128_getw0(a);
39
gen_address_mask(dc, cpu_addr);
38
- pp2 = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw1(b);
40
t64 = tcg_temp_new_i64();
39
-
41
- tcg_gen_qemu_ld64(t64, cpu_addr, dc->mem_idx);
40
- pp1s = pp1a + pp1b;
42
+ tcg_gen_qemu_ld_i64(t64, cpu_addr,
41
- if ((pp1s < pp1a) || (pp1s < pp1b)) {
43
+ dc->mem_idx, MO_TEUQ);
42
- pp2 += (1ULL << 32);
44
tcg_gen_trunc_i64_tl(cpu_val, t64);
43
- }
45
tcg_gen_ext32u_tl(cpu_val, cpu_val);
44
- uint64_t ret_low = pp0 + (pp1s << 32);
46
gen_store_gpr(dc, rd + 1, cpu_val);
45
- if ((ret_low < pp0) || (ret_low < (pp1s << 32))) {
47
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
46
- pp2 += 1;
48
break;
47
- }
49
case 0x9: /* ldsb, load signed byte */
48
-
50
gen_address_mask(dc, cpu_addr);
49
- return int128_make128(ret_low, pp2 + (pp1s >> 32));
51
- tcg_gen_qemu_ld8s(cpu_val, cpu_addr, dc->mem_idx);
50
+ mulu64(&l, &h, ai, bi);
52
+ tcg_gen_qemu_ld_tl(cpu_val, cpu_addr, dc->mem_idx, MO_SB);
51
+ return int128_make128(l, h);
53
break;
52
}
54
case 0xa: /* ldsh, load signed halfword */
53
55
gen_address_mask(dc, cpu_addr);
54
static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
56
- tcg_gen_qemu_ld16s(cpu_val, cpu_addr, dc->mem_idx);
57
+ tcg_gen_qemu_ld_tl(cpu_val, cpu_addr,
58
+ dc->mem_idx, MO_TESW);
59
break;
60
case 0xd: /* ldstub */
61
gen_ldstub(dc, cpu_val, cpu_addr, dc->mem_idx);
62
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
63
#ifdef TARGET_SPARC64
64
case 0x08: /* V9 ldsw */
65
gen_address_mask(dc, cpu_addr);
66
- tcg_gen_qemu_ld32s(cpu_val, cpu_addr, dc->mem_idx);
67
+ tcg_gen_qemu_ld_tl(cpu_val, cpu_addr,
68
+ dc->mem_idx, MO_TESL);
69
break;
70
case 0x0b: /* V9 ldx */
71
gen_address_mask(dc, cpu_addr);
72
- tcg_gen_qemu_ld64(cpu_val, cpu_addr, dc->mem_idx);
73
+ tcg_gen_qemu_ld_tl(cpu_val, cpu_addr,
74
+ dc->mem_idx, MO_TEUQ);
75
break;
76
case 0x18: /* V9 ldswa */
77
gen_ld_asi(dc, cpu_val, cpu_addr, insn, MO_TESL);
78
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
79
switch (xop) {
80
case 0x4: /* st, store word */
81
gen_address_mask(dc, cpu_addr);
82
- tcg_gen_qemu_st32(cpu_val, cpu_addr, dc->mem_idx);
83
+ tcg_gen_qemu_st_tl(cpu_val, cpu_addr,
84
+ dc->mem_idx, MO_TEUL);
85
break;
86
case 0x5: /* stb, store byte */
87
gen_address_mask(dc, cpu_addr);
88
- tcg_gen_qemu_st8(cpu_val, cpu_addr, dc->mem_idx);
89
+ tcg_gen_qemu_st_tl(cpu_val, cpu_addr, dc->mem_idx, MO_UB);
90
break;
91
case 0x6: /* sth, store halfword */
92
gen_address_mask(dc, cpu_addr);
93
- tcg_gen_qemu_st16(cpu_val, cpu_addr, dc->mem_idx);
94
+ tcg_gen_qemu_st_tl(cpu_val, cpu_addr,
95
+ dc->mem_idx, MO_TEUW);
96
break;
97
case 0x7: /* std, store double word */
98
if (rd & 1)
99
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
100
lo = gen_load_gpr(dc, rd + 1);
101
t64 = tcg_temp_new_i64();
102
tcg_gen_concat_tl_i64(t64, lo, cpu_val);
103
- tcg_gen_qemu_st64(t64, cpu_addr, dc->mem_idx);
104
+ tcg_gen_qemu_st_i64(t64, cpu_addr,
105
+ dc->mem_idx, MO_TEUQ);
106
}
107
break;
108
#if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
109
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
110
#ifdef TARGET_SPARC64
111
case 0x0e: /* V9 stx */
112
gen_address_mask(dc, cpu_addr);
113
- tcg_gen_qemu_st64(cpu_val, cpu_addr, dc->mem_idx);
114
+ tcg_gen_qemu_st_tl(cpu_val, cpu_addr,
115
+ dc->mem_idx, MO_TEUQ);
116
break;
117
case 0x1e: /* V9 stxa */
118
gen_st_asi(dc, cpu_val, cpu_addr, insn, MO_TEUQ);
119
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
120
#ifdef TARGET_SPARC64
121
gen_address_mask(dc, cpu_addr);
122
if (rd == 1) {
123
- tcg_gen_qemu_st64(cpu_fsr, cpu_addr, dc->mem_idx);
124
+ tcg_gen_qemu_st_tl(cpu_fsr, cpu_addr,
125
+ dc->mem_idx, MO_TEUQ);
126
break;
127
}
128
#endif
129
- tcg_gen_qemu_st32(cpu_fsr, cpu_addr, dc->mem_idx);
130
+ tcg_gen_qemu_st_tl(cpu_fsr, cpu_addr,
131
+ dc->mem_idx, MO_TEUL);
132
}
133
break;
134
case 0x26:
55
--
135
--
56
2.43.0
136
2.34.1
diff view generated by jsdifflib
1
This massive macro is now only used once.
1
Convert away from the old interface with the implicit
2
Expand it for use only by float64.
2
MemOp argument.
3
3
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Reviewed-by: Max Filippov <jcmvbkbc@gmail.com>
6
Message-Id: <20230502135741.1158035-9-richard.henderson@linaro.org>
6
---
7
---
7
target/hexagon/fma_emu.c | 255 +++++++++++++++++++--------------------
8
target/xtensa/translate.c | 4 ++--
8
1 file changed, 127 insertions(+), 128 deletions(-)
9
1 file changed, 2 insertions(+), 2 deletions(-)
9
10
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
11
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/target/hexagon/fma_emu.c
13
--- a/target/xtensa/translate.c
13
+++ b/target/hexagon/fma_emu.c
14
+++ b/target/xtensa/translate.c
14
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
15
@@ -XXX,XX +XXX,XX @@ static void translate_dcache(DisasContext *dc, const OpcodeArg arg[],
16
TCGv_i32 res = tcg_temp_new_i32();
17
18
tcg_gen_addi_i32(addr, arg[0].in, arg[1].imm);
19
- tcg_gen_qemu_ld8u(res, addr, dc->cring);
20
+ tcg_gen_qemu_ld_i32(res, addr, dc->cring, MO_UB);
15
}
21
}
16
22
17
/* Return a maximum finite value with the requested sign */
23
static void translate_depbits(DisasContext *dc, const OpcodeArg arg[],
18
-#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
24
@@ -XXX,XX +XXX,XX @@ static void translate_l32r(DisasContext *dc, const OpcodeArg arg[],
19
-static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
25
} else {
20
-{ \
26
tmp = tcg_constant_i32(arg[1].imm);
21
- if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \
27
}
22
- && ((a.guard | a.round | a.sticky) == 0)) { \
28
- tcg_gen_qemu_ld32u(arg[0].out, tmp, dc->cring);
23
- /* result zero */ \
29
+ tcg_gen_qemu_ld_i32(arg[0].out, tmp, dc->cring, MO_TEUL);
24
- switch (fp_status->float_rounding_mode) { \
25
- case float_round_down: \
26
- return zero_##SUFFIX(1); \
27
- default: \
28
- return zero_##SUFFIX(0); \
29
- } \
30
- } \
31
- /* Normalize right */ \
32
- /* We want MANTBITS bits of mantissa plus the leading one. */ \
33
- /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \
34
- /* So we need to normalize right while the high word is non-zero and \
35
- * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \
36
- while ((int128_gethi(a.mant) != 0) || \
37
- ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0)) { \
38
- a = accum_norm_right(a, 1); \
39
- } \
40
- /* \
41
- * OK, now normalize left \
42
- * We want to normalize left until we have a leading one in bit 24 \
43
- * Theoretically, we only need to shift a maximum of one to the left if we \
44
- * shifted out lots of bits from B, or if we had no shift / 1 shift sticky \
45
- * should be 0 \
46
- */ \
47
- while ((int128_getlo(a.mant) & (1ULL << MANTBITS)) == 0) { \
48
- a = accum_norm_left(a); \
49
- } \
50
- /* \
51
- * OK, now we might need to denormalize because of potential underflow. \
52
- * We need to do this before rounding, and rounding might make us normal \
53
- * again \
54
- */ \
55
- while (a.exp <= 0) { \
56
- a = accum_norm_right(a, 1 - a.exp); \
57
- /* \
58
- * Do we have underflow? \
59
- * That's when we get an inexact answer because we ran out of bits \
60
- * in a denormal. \
61
- */ \
62
- if (a.guard || a.round || a.sticky) { \
63
- float_raise(float_flag_underflow, fp_status); \
64
- } \
65
- } \
66
- /* OK, we're relatively canonical... now we need to round */ \
67
- if (a.guard || a.round || a.sticky) { \
68
- float_raise(float_flag_inexact, fp_status); \
69
- switch (fp_status->float_rounding_mode) { \
70
- case float_round_to_zero: \
71
- /* Chop and we're done */ \
72
- break; \
73
- case float_round_up: \
74
- if (a.sign == 0) { \
75
- a.mant = int128_add(a.mant, int128_one()); \
76
- } \
77
- break; \
78
- case float_round_down: \
79
- if (a.sign != 0) { \
80
- a.mant = int128_add(a.mant, int128_one()); \
81
- } \
82
- break; \
83
- default: \
84
- if (a.round || a.sticky) { \
85
- /* round up if guard is 1, down if guard is zero */ \
86
- a.mant = int128_add(a.mant, int128_make64(a.guard)); \
87
- } else if (a.guard) { \
88
- /* exactly .5, round up if odd */ \
89
- a.mant = int128_add(a.mant, int128_and(a.mant, int128_one())); \
90
- } \
91
- break; \
92
- } \
93
- } \
94
- /* \
95
- * OK, now we might have carried all the way up. \
96
- * So we might need to shr once \
97
- * at least we know that the lsb should be zero if we rounded and \
98
- * got a carry out... \
99
- */ \
100
- if ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0) { \
101
- a = accum_norm_right(a, 1); \
102
- } \
103
- /* Overflow? */ \
104
- if (a.exp >= INF_EXP) { \
105
- /* Yep, inf result */ \
106
- float_raise(float_flag_overflow, fp_status); \
107
- float_raise(float_flag_inexact, fp_status); \
108
- switch (fp_status->float_rounding_mode) { \
109
- case float_round_to_zero: \
110
- return maxfinite_##SUFFIX(a.sign); \
111
- case float_round_up: \
112
- if (a.sign == 0) { \
113
- return infinite_##SUFFIX(a.sign); \
114
- } else { \
115
- return maxfinite_##SUFFIX(a.sign); \
116
- } \
117
- case float_round_down: \
118
- if (a.sign != 0) { \
119
- return infinite_##SUFFIX(a.sign); \
120
- } else { \
121
- return maxfinite_##SUFFIX(a.sign); \
122
- } \
123
- default: \
124
- return infinite_##SUFFIX(a.sign); \
125
- } \
126
- } \
127
- /* Underflow? */ \
128
- if (int128_getlo(a.mant) & (1ULL << MANTBITS)) { \
129
- /* Leading one means: No, we're normal. So, we should be done... */ \
130
- INTERNAL_TYPE ret; \
131
- ret.i = 0; \
132
- ret.sign = a.sign; \
133
- ret.exp = a.exp; \
134
- ret.mant = int128_getlo(a.mant); \
135
- return ret.i; \
136
- } \
137
- assert(a.exp == 1); \
138
- INTERNAL_TYPE ret; \
139
- ret.i = 0; \
140
- ret.sign = a.sign; \
141
- ret.exp = 0; \
142
- ret.mant = int128_getlo(a.mant); \
143
- return ret.i; \
144
+static float64 accum_round_float64(Accum a, float_status *fp_status)
145
+{
146
+ if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
147
+ && ((a.guard | a.round | a.sticky) == 0)) {
148
+ /* result zero */
149
+ switch (fp_status->float_rounding_mode) {
150
+ case float_round_down:
151
+ return zero_float64(1);
152
+ default:
153
+ return zero_float64(0);
154
+ }
155
+ }
156
+ /*
157
+ * Normalize right
158
+ * We want DF_MANTBITS bits of mantissa plus the leading one.
159
+ * That means that we want DF_MANTBITS+1 bits, or 0x000000000000FF_FFFF
160
+ * So we need to normalize right while the high word is non-zero and
161
+ * while the low word is nonzero when masked with 0xffe0_0000_0000_0000
162
+ */
163
+ while ((int128_gethi(a.mant) != 0) ||
164
+ ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0)) {
165
+ a = accum_norm_right(a, 1);
166
+ }
167
+ /*
168
+ * OK, now normalize left
169
+ * We want to normalize left until we have a leading one in bit 24
170
+ * Theoretically, we only need to shift a maximum of one to the left if we
171
+ * shifted out lots of bits from B, or if we had no shift / 1 shift sticky
172
+ * should be 0
173
+ */
174
+ while ((int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) == 0) {
175
+ a = accum_norm_left(a);
176
+ }
177
+ /*
178
+ * OK, now we might need to denormalize because of potential underflow.
179
+ * We need to do this before rounding, and rounding might make us normal
180
+ * again
181
+ */
182
+ while (a.exp <= 0) {
183
+ a = accum_norm_right(a, 1 - a.exp);
184
+ /*
185
+ * Do we have underflow?
186
+ * That's when we get an inexact answer because we ran out of bits
187
+ * in a denormal.
188
+ */
189
+ if (a.guard || a.round || a.sticky) {
190
+ float_raise(float_flag_underflow, fp_status);
191
+ }
192
+ }
193
+ /* OK, we're relatively canonical... now we need to round */
194
+ if (a.guard || a.round || a.sticky) {
195
+ float_raise(float_flag_inexact, fp_status);
196
+ switch (fp_status->float_rounding_mode) {
197
+ case float_round_to_zero:
198
+ /* Chop and we're done */
199
+ break;
200
+ case float_round_up:
201
+ if (a.sign == 0) {
202
+ a.mant = int128_add(a.mant, int128_one());
203
+ }
204
+ break;
205
+ case float_round_down:
206
+ if (a.sign != 0) {
207
+ a.mant = int128_add(a.mant, int128_one());
208
+ }
209
+ break;
210
+ default:
211
+ if (a.round || a.sticky) {
212
+ /* round up if guard is 1, down if guard is zero */
213
+ a.mant = int128_add(a.mant, int128_make64(a.guard));
214
+ } else if (a.guard) {
215
+ /* exactly .5, round up if odd */
216
+ a.mant = int128_add(a.mant, int128_and(a.mant, int128_one()));
217
+ }
218
+ break;
219
+ }
220
+ }
221
+ /*
222
+ * OK, now we might have carried all the way up.
223
+ * So we might need to shr once
224
+ * at least we know that the lsb should be zero if we rounded and
225
+ * got a carry out...
226
+ */
227
+ if ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0) {
228
+ a = accum_norm_right(a, 1);
229
+ }
230
+ /* Overflow? */
231
+ if (a.exp >= DF_INF_EXP) {
232
+ /* Yep, inf result */
233
+ float_raise(float_flag_overflow, fp_status);
234
+ float_raise(float_flag_inexact, fp_status);
235
+ switch (fp_status->float_rounding_mode) {
236
+ case float_round_to_zero:
237
+ return maxfinite_float64(a.sign);
238
+ case float_round_up:
239
+ if (a.sign == 0) {
240
+ return infinite_float64(a.sign);
241
+ } else {
242
+ return maxfinite_float64(a.sign);
243
+ }
244
+ case float_round_down:
245
+ if (a.sign != 0) {
246
+ return infinite_float64(a.sign);
247
+ } else {
248
+ return maxfinite_float64(a.sign);
249
+ }
250
+ default:
251
+ return infinite_float64(a.sign);
252
+ }
253
+ }
254
+ /* Underflow? */
255
+ if (int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) {
256
+ /* Leading one means: No, we're normal. So, we should be done... */
257
+ Double ret;
258
+ ret.i = 0;
259
+ ret.sign = a.sign;
260
+ ret.exp = a.exp;
261
+ ret.mant = int128_getlo(a.mant);
262
+ return ret.i;
263
+ }
264
+ assert(a.exp == 1);
265
+ Double ret;
266
+ ret.i = 0;
267
+ ret.sign = a.sign;
268
+ ret.exp = 0;
269
+ ret.mant = int128_getlo(a.mant);
270
+ return ret.i;
271
}
30
}
272
31
273
-GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
32
static void translate_loop(DisasContext *dc, const OpcodeArg arg[],
274
-
275
float64 internal_mpyhh(float64 a, float64 b,
276
unsigned long long int accumulated,
277
float_status *fp_status)
278
--
33
--
279
2.43.0
34
2.34.1
diff view generated by jsdifflib
1
The function is now unused.
1
Remove the old interfaces with the implicit MemOp argument.
2
2
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Acked-by: David Hildenbrand <david@redhat.com>
5
Message-Id: <20230502135741.1158035-10-richard.henderson@linaro.org>
5
---
6
---
6
target/hexagon/fma_emu.h | 2 -
7
include/tcg/tcg-op.h | 55 --------------------------------------------
7
target/hexagon/fma_emu.c | 171 ---------------------------------------
8
1 file changed, 55 deletions(-)
8
2 files changed, 173 deletions(-)
9
9
10
diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
10
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
11
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/hexagon/fma_emu.h
12
--- a/include/tcg/tcg-op.h
13
+++ b/target/hexagon/fma_emu.h
13
+++ b/include/tcg/tcg-op.h
14
@@ -XXX,XX +XXX,XX @@ static inline uint32_t float32_getexp_raw(float32 f32)
14
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, MemOp);
15
}
15
void tcg_gen_qemu_ld_i128(TCGv_i128, TCGv, TCGArg, MemOp);
16
int32_t float32_getexp(float32 f32);
16
void tcg_gen_qemu_st_i128(TCGv_i128, TCGv, TCGArg, MemOp);
17
float32 infinite_float32(uint8_t sign);
17
18
-float32 internal_fmafx(float32 a, float32 b, float32 c,
18
-static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index)
19
- int scale, float_status *fp_status);
20
float64 internal_mpyhh(float64 a, float64 b,
21
unsigned long long int accumulated,
22
float_status *fp_status);
23
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/hexagon/fma_emu.c
26
+++ b/target/hexagon/fma_emu.c
27
@@ -XXX,XX +XXX,XX @@ int32_t float64_getexp(float64 f64)
28
return -1;
29
}
30
31
-static uint64_t float32_getmant(float32 f32)
32
-{
19
-{
33
- Float a = { .i = f32 };
20
- tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_UB);
34
- if (float32_is_normal(f32)) {
35
- return a.mant | 1ULL << 23;
36
- }
37
- if (float32_is_zero(f32)) {
38
- return 0;
39
- }
40
- if (float32_is_denormal(f32)) {
41
- return a.mant;
42
- }
43
- return ~0ULL;
44
-}
21
-}
45
-
22
-
46
int32_t float32_getexp(float32 f32)
23
-static inline void tcg_gen_qemu_ld8s(TCGv ret, TCGv addr, int mem_index)
47
{
48
Float a = { .i = f32 };
49
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
50
}
51
52
/* Return a maximum finite value with the requested sign */
53
-static float32 maxfinite_float32(uint8_t sign)
54
-{
24
-{
55
- if (sign) {
25
- tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_SB);
56
- return make_float32(SF_MINUS_MAXF);
57
- } else {
58
- return make_float32(SF_MAXF);
59
- }
60
-}
26
-}
61
-
27
-
62
-/* Return a zero value with requested sign */
28
-static inline void tcg_gen_qemu_ld16u(TCGv ret, TCGv addr, int mem_index)
63
-static float32 zero_float32(uint8_t sign)
64
-{
29
-{
65
- if (sign) {
30
- tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TEUW);
66
- return make_float32(0x80000000);
67
- } else {
68
- return float32_zero;
69
- }
70
-}
31
-}
71
-
32
-
72
#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
33
-static inline void tcg_gen_qemu_ld16s(TCGv ret, TCGv addr, int mem_index)
73
static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
74
{ \
75
@@ -XXX,XX +XXX,XX @@ static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
76
}
77
78
GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
79
-GEN_XF_ROUND(float32, SF_MANTBITS, SF_INF_EXP, Float)
80
-
81
-static bool is_inf_prod(float64 a, float64 b)
82
-{
34
-{
83
- return ((float64_is_infinity(a) && float64_is_infinity(b)) ||
35
- tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TESW);
84
- (float64_is_infinity(a) && is_finite(b) && (!float64_is_zero(b))) ||
85
- (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a))));
86
-}
36
-}
87
-
37
-
88
-static float64 special_fma(float64 a, float64 b, float64 c,
38
-static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index)
89
- float_status *fp_status)
90
-{
39
-{
91
- float64 ret = make_float64(0);
40
- tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TEUL);
92
-
93
- /*
94
- * If A multiplied by B is an exact infinity and C is also an infinity
95
- * but with the opposite sign, FMA returns NaN and raises invalid.
96
- */
97
- uint8_t a_sign = float64_is_neg(a);
98
- uint8_t b_sign = float64_is_neg(b);
99
- uint8_t c_sign = float64_is_neg(c);
100
- if (is_inf_prod(a, b) && float64_is_infinity(c)) {
101
- if ((a_sign ^ b_sign) != c_sign) {
102
- ret = make_float64(DF_NAN);
103
- float_raise(float_flag_invalid, fp_status);
104
- return ret;
105
- }
106
- }
107
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
108
- (float64_is_zero(a) && float64_is_infinity(b))) {
109
- ret = make_float64(DF_NAN);
110
- float_raise(float_flag_invalid, fp_status);
111
- return ret;
112
- }
113
- /*
114
- * If none of the above checks are true and C is a NaN,
115
- * a NaN shall be returned
116
- * If A or B are NaN, a NAN shall be returned.
117
- */
118
- if (float64_is_any_nan(a) ||
119
- float64_is_any_nan(b) ||
120
- float64_is_any_nan(c)) {
121
- if (float64_is_any_nan(a) && (fGETBIT(51, a) == 0)) {
122
- float_raise(float_flag_invalid, fp_status);
123
- }
124
- if (float64_is_any_nan(b) && (fGETBIT(51, b) == 0)) {
125
- float_raise(float_flag_invalid, fp_status);
126
- }
127
- if (float64_is_any_nan(c) && (fGETBIT(51, c) == 0)) {
128
- float_raise(float_flag_invalid, fp_status);
129
- }
130
- ret = make_float64(DF_NAN);
131
- return ret;
132
- }
133
- /*
134
- * We have checked for adding opposite-signed infinities.
135
- * Other infinities return infinity with the correct sign
136
- */
137
- if (float64_is_infinity(c)) {
138
- ret = infinite_float64(c_sign);
139
- return ret;
140
- }
141
- if (float64_is_infinity(a) || float64_is_infinity(b)) {
142
- ret = infinite_float64(a_sign ^ b_sign);
143
- return ret;
144
- }
145
- g_assert_not_reached();
146
-}
41
-}
147
-
42
-
148
-static float32 special_fmaf(float32 a, float32 b, float32 c,
43
-static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index)
149
- float_status *fp_status)
150
-{
44
-{
151
- float64 aa, bb, cc;
45
- tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TESL);
152
- aa = float32_to_float64(a, fp_status);
153
- bb = float32_to_float64(b, fp_status);
154
- cc = float32_to_float64(c, fp_status);
155
- return float64_to_float32(special_fma(aa, bb, cc, fp_status), fp_status);
156
-}
46
-}
157
-
47
-
158
-float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
48
-static inline void tcg_gen_qemu_ld64(TCGv_i64 ret, TCGv addr, int mem_index)
159
- float_status *fp_status)
160
-{
49
-{
161
- Accum prod;
50
- tcg_gen_qemu_ld_i64(ret, addr, mem_index, MO_TEUQ);
162
- Accum acc;
51
-}
163
- Accum result;
164
- accum_init(&prod);
165
- accum_init(&acc);
166
- accum_init(&result);
167
-
52
-
168
- uint8_t a_sign = float32_is_neg(a);
53
-static inline void tcg_gen_qemu_st8(TCGv arg, TCGv addr, int mem_index)
169
- uint8_t b_sign = float32_is_neg(b);
54
-{
170
- uint8_t c_sign = float32_is_neg(c);
55
- tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_UB);
171
- if (float32_is_infinity(a) ||
56
-}
172
- float32_is_infinity(b) ||
173
- float32_is_infinity(c)) {
174
- return special_fmaf(a, b, c, fp_status);
175
- }
176
- if (float32_is_any_nan(a) ||
177
- float32_is_any_nan(b) ||
178
- float32_is_any_nan(c)) {
179
- return special_fmaf(a, b, c, fp_status);
180
- }
181
- if ((scale == 0) && (float32_is_zero(a) || float32_is_zero(b))) {
182
- float32 tmp = float32_mul(a, b, fp_status);
183
- tmp = float32_add(tmp, c, fp_status);
184
- return tmp;
185
- }
186
-
57
-
187
- /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
58
-static inline void tcg_gen_qemu_st16(TCGv arg, TCGv addr, int mem_index)
188
- prod.mant = int128_mul_6464(float32_getmant(a), float32_getmant(b));
59
-{
60
- tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_TEUW);
61
-}
189
-
62
-
190
- /*
63
-static inline void tcg_gen_qemu_st32(TCGv arg, TCGv addr, int mem_index)
191
- * Note: extracting the mantissa into an int is multiplying by
64
-{
192
- * 2**23, so adjust here
65
- tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_TEUL);
193
- */
194
- prod.exp = float32_getexp(a) + float32_getexp(b) - SF_BIAS - 23;
195
- prod.sign = a_sign ^ b_sign;
196
- if (float32_is_zero(a) || float32_is_zero(b)) {
197
- prod.exp = -2 * WAY_BIG_EXP;
198
- }
199
- if ((scale > 0) && float32_is_denormal(c)) {
200
- acc.mant = int128_mul_6464(0, 0);
201
- acc.exp = -WAY_BIG_EXP;
202
- acc.sign = c_sign;
203
- acc.sticky = 1;
204
- result = accum_add(prod, acc);
205
- } else if (!float32_is_zero(c)) {
206
- acc.mant = int128_mul_6464(float32_getmant(c), 1);
207
- acc.exp = float32_getexp(c);
208
- acc.sign = c_sign;
209
- result = accum_add(prod, acc);
210
- } else {
211
- result = prod;
212
- }
213
- result.exp += scale;
214
- return accum_round_float32(result, fp_status);
215
-}
66
-}
216
67
-
217
float64 internal_mpyhh(float64 a, float64 b,
68
-static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
218
unsigned long long int accumulated,
69
-{
70
- tcg_gen_qemu_st_i64(arg, addr, mem_index, MO_TEUQ);
71
-}
72
-
73
void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
74
TCGArg, MemOp);
75
void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
219
--
76
--
220
2.43.0
77
2.34.1
diff view generated by jsdifflib
1
There are no special cases for this instruction.
2
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
2
---
6
target/hexagon/op_helper.c | 2 +-
3
target/alpha/translate.c | 2 +-
7
1 file changed, 1 insertion(+), 1 deletion(-)
4
1 file changed, 1 insertion(+), 1 deletion(-)
8
5
9
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
6
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
10
index XXXXXXX..XXXXXXX 100644
7
index XXXXXXX..XXXXXXX 100644
11
--- a/target/hexagon/op_helper.c
8
--- a/target/alpha/translate.c
12
+++ b/target/hexagon/op_helper.c
9
+++ b/target/alpha/translate.c
13
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
10
@@ -XXX,XX +XXX,XX @@ struct DisasContext {
14
float32 RsV, float32 RtV)
11
#ifdef CONFIG_USER_ONLY
15
{
12
#define UNALIGN(C) (C)->unalign
16
arch_fpop_start(env);
13
#else
17
- RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
14
-#define UNALIGN(C) 0
18
+ RxV = float32_muladd(RsV, RtV, RxV, 0, &env->fp_status);
15
+#define UNALIGN(C) MO_ALIGN
19
arch_fpop_end(env);
16
#endif
20
return RxV;
17
21
}
18
/* Target-specific return values from translate_one, indicating the
22
--
19
--
23
2.43.0
20
2.34.1
diff view generated by jsdifflib
1
This rounding mode is used by Hexagon.
1
Mark all memory operations that are not already marked with UNALIGN.
2
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
4
---
5
include/fpu/softfloat-types.h | 2 ++
5
target/alpha/translate.c | 36 ++++++++++++++++++++----------------
6
fpu/softfloat-parts.c.inc | 3 +++
6
1 file changed, 20 insertions(+), 16 deletions(-)
7
2 files changed, 5 insertions(+)
8
7
9
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
8
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
10
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
11
--- a/include/fpu/softfloat-types.h
10
--- a/target/alpha/translate.c
12
+++ b/include/fpu/softfloat-types.h
11
+++ b/target/alpha/translate.c
13
@@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) {
12
@@ -XXX,XX +XXX,XX @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
14
float_round_to_odd = 5,
13
switch ((insn >> 12) & 0xF) {
15
/* Not an IEEE rounding mode: round to closest odd, overflow to inf */
14
case 0x0:
16
float_round_to_odd_inf = 6,
15
/* Longword physical access (hw_ldl/p) */
17
+ /* Not an IEEE rounding mode: round to nearest even, overflow to max */
16
- tcg_gen_qemu_ld_i64(va, addr, MMU_PHYS_IDX, MO_LESL);
18
+ float_round_nearest_even_max = 7,
17
+ tcg_gen_qemu_ld_i64(va, addr, MMU_PHYS_IDX, MO_LESL | MO_ALIGN);
19
} FloatRoundMode;
18
break;
20
19
case 0x1:
21
/*
20
/* Quadword physical access (hw_ldq/p) */
22
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
21
- tcg_gen_qemu_ld_i64(va, addr, MMU_PHYS_IDX, MO_LEUQ);
23
index XXXXXXX..XXXXXXX 100644
22
+ tcg_gen_qemu_ld_i64(va, addr, MMU_PHYS_IDX, MO_LEUQ | MO_ALIGN);
24
--- a/fpu/softfloat-parts.c.inc
23
break;
25
+++ b/fpu/softfloat-parts.c.inc
24
case 0x2:
26
@@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
25
/* Longword physical access with lock (hw_ldl_l/p) */
27
int exp, flags = 0;
26
- tcg_gen_qemu_ld_i64(va, addr, MMU_PHYS_IDX, MO_LESL);
28
27
+ tcg_gen_qemu_ld_i64(va, addr, MMU_PHYS_IDX, MO_LESL | MO_ALIGN);
29
switch (s->float_rounding_mode) {
28
tcg_gen_mov_i64(cpu_lock_addr, addr);
30
+ case float_round_nearest_even_max:
29
tcg_gen_mov_i64(cpu_lock_value, va);
31
+ overflow_norm = true;
30
break;
32
+ /* fall through */
31
case 0x3:
33
case float_round_nearest_even:
32
/* Quadword physical access with lock (hw_ldq_l/p) */
34
if (N > 64 && frac_lsb == 0) {
33
- tcg_gen_qemu_ld_i64(va, addr, MMU_PHYS_IDX, MO_LEUQ);
35
inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
34
+ tcg_gen_qemu_ld_i64(va, addr, MMU_PHYS_IDX, MO_LEUQ | MO_ALIGN);
35
tcg_gen_mov_i64(cpu_lock_addr, addr);
36
tcg_gen_mov_i64(cpu_lock_value, va);
37
break;
38
@@ -XXX,XX +XXX,XX @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
39
goto invalid_opc;
40
case 0xA:
41
/* Longword virtual access with protection check (hw_ldl/w) */
42
- tcg_gen_qemu_ld_i64(va, addr, MMU_KERNEL_IDX, MO_LESL);
43
+ tcg_gen_qemu_ld_i64(va, addr, MMU_KERNEL_IDX,
44
+ MO_LESL | MO_ALIGN);
45
break;
46
case 0xB:
47
/* Quadword virtual access with protection check (hw_ldq/w) */
48
- tcg_gen_qemu_ld_i64(va, addr, MMU_KERNEL_IDX, MO_LEUQ);
49
+ tcg_gen_qemu_ld_i64(va, addr, MMU_KERNEL_IDX,
50
+ MO_LEUQ | MO_ALIGN);
51
break;
52
case 0xC:
53
/* Longword virtual access with alt access mode (hw_ldl/a)*/
54
@@ -XXX,XX +XXX,XX @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
55
case 0xE:
56
/* Longword virtual access with alternate access mode and
57
protection checks (hw_ldl/wa) */
58
- tcg_gen_qemu_ld_i64(va, addr, MMU_USER_IDX, MO_LESL);
59
+ tcg_gen_qemu_ld_i64(va, addr, MMU_USER_IDX,
60
+ MO_LESL | MO_ALIGN);
61
break;
62
case 0xF:
63
/* Quadword virtual access with alternate access mode and
64
protection checks (hw_ldq/wa) */
65
- tcg_gen_qemu_ld_i64(va, addr, MMU_USER_IDX, MO_LEUQ);
66
+ tcg_gen_qemu_ld_i64(va, addr, MMU_USER_IDX,
67
+ MO_LEUQ | MO_ALIGN);
68
break;
69
}
70
break;
71
@@ -XXX,XX +XXX,XX @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
72
vb = load_gpr(ctx, rb);
73
tmp = tcg_temp_new();
74
tcg_gen_addi_i64(tmp, vb, disp12);
75
- tcg_gen_qemu_st_i64(va, tmp, MMU_PHYS_IDX, MO_LESL);
76
+ tcg_gen_qemu_st_i64(va, tmp, MMU_PHYS_IDX, MO_LESL | MO_ALIGN);
77
break;
78
case 0x1:
79
/* Quadword physical access */
80
@@ -XXX,XX +XXX,XX @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
81
vb = load_gpr(ctx, rb);
82
tmp = tcg_temp_new();
83
tcg_gen_addi_i64(tmp, vb, disp12);
84
- tcg_gen_qemu_st_i64(va, tmp, MMU_PHYS_IDX, MO_LEUQ);
85
+ tcg_gen_qemu_st_i64(va, tmp, MMU_PHYS_IDX, MO_LEUQ | MO_ALIGN);
86
break;
87
case 0x2:
88
/* Longword physical access with lock */
89
ret = gen_store_conditional(ctx, ra, rb, disp12,
90
- MMU_PHYS_IDX, MO_LESL);
91
+ MMU_PHYS_IDX, MO_LESL | MO_ALIGN);
92
break;
93
case 0x3:
94
/* Quadword physical access with lock */
95
ret = gen_store_conditional(ctx, ra, rb, disp12,
96
- MMU_PHYS_IDX, MO_LEUQ);
97
+ MMU_PHYS_IDX, MO_LEUQ | MO_ALIGN);
98
break;
99
case 0x4:
100
/* Longword virtual access */
101
@@ -XXX,XX +XXX,XX @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
102
break;
103
case 0x2A:
104
/* LDL_L */
105
- gen_load_int(ctx, ra, rb, disp16, MO_LESL, 0, 1);
106
+ gen_load_int(ctx, ra, rb, disp16, MO_LESL | MO_ALIGN, 0, 1);
107
break;
108
case 0x2B:
109
/* LDQ_L */
110
- gen_load_int(ctx, ra, rb, disp16, MO_LEUQ, 0, 1);
111
+ gen_load_int(ctx, ra, rb, disp16, MO_LEUQ | MO_ALIGN, 0, 1);
112
break;
113
case 0x2C:
114
/* STL */
115
@@ -XXX,XX +XXX,XX @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
116
case 0x2E:
117
/* STL_C */
118
ret = gen_store_conditional(ctx, ra, rb, disp16,
119
- ctx->mem_idx, MO_LESL);
120
+ ctx->mem_idx, MO_LESL | MO_ALIGN);
121
break;
122
case 0x2F:
123
/* STQ_C */
124
ret = gen_store_conditional(ctx, ra, rb, disp16,
125
- ctx->mem_idx, MO_LEUQ);
126
+ ctx->mem_idx, MO_LEUQ | MO_ALIGN);
127
break;
128
case 0x30:
129
/* BR */
36
--
130
--
37
2.43.0
131
2.34.1
diff view generated by jsdifflib
1
This structure, with bitfields, is incorrect for big-endian.
2
Use extract64 and deposit64 instead.
3
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
2
---
7
target/hexagon/fma_emu.c | 46 ++++++++++++++--------------------------
3
configs/targets/alpha-linux-user.mak | 1 -
8
1 file changed, 16 insertions(+), 30 deletions(-)
4
configs/targets/alpha-softmmu.mak | 1 -
5
2 files changed, 2 deletions(-)
9
6
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
7
diff --git a/configs/targets/alpha-linux-user.mak b/configs/targets/alpha-linux-user.mak
11
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
12
--- a/target/hexagon/fma_emu.c
9
--- a/configs/targets/alpha-linux-user.mak
13
+++ b/target/hexagon/fma_emu.c
10
+++ b/configs/targets/alpha-linux-user.mak
14
@@ -XXX,XX +XXX,XX @@
11
@@ -XXX,XX +XXX,XX @@
15
12
TARGET_ARCH=alpha
16
#define WAY_BIG_EXP 4096
13
TARGET_SYSTBL_ABI=common
17
14
TARGET_SYSTBL=syscall.tbl
18
-typedef union {
15
-TARGET_ALIGNED_ONLY=y
19
- double f;
16
diff --git a/configs/targets/alpha-softmmu.mak b/configs/targets/alpha-softmmu.mak
20
- uint64_t i;
17
index XXXXXXX..XXXXXXX 100644
21
- struct {
18
--- a/configs/targets/alpha-softmmu.mak
22
- uint64_t mant:52;
19
+++ b/configs/targets/alpha-softmmu.mak
23
- uint64_t exp:11;
20
@@ -XXX,XX +XXX,XX @@
24
- uint64_t sign:1;
21
TARGET_ARCH=alpha
25
- };
22
-TARGET_ALIGNED_ONLY=y
26
-} Double;
23
TARGET_SUPPORTS_MTTCG=y
27
-
28
static uint64_t float64_getmant(float64 f64)
29
{
30
- Double a = { .i = f64 };
31
+ uint64_t mant = extract64(f64, 0, 52);
32
if (float64_is_normal(f64)) {
33
- return a.mant | 1ULL << 52;
34
+ return mant | 1ULL << 52;
35
}
36
if (float64_is_zero(f64)) {
37
return 0;
38
}
39
if (float64_is_denormal(f64)) {
40
- return a.mant;
41
+ return mant;
42
}
43
return ~0ULL;
44
}
45
46
int32_t float64_getexp(float64 f64)
47
{
48
- Double a = { .i = f64 };
49
+ int exp = extract64(f64, 52, 11);
50
if (float64_is_normal(f64)) {
51
- return a.exp;
52
+ return exp;
53
}
54
if (float64_is_denormal(f64)) {
55
- return a.exp + 1;
56
+ return exp + 1;
57
}
58
return -1;
59
}
60
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
61
/* Return a maximum finite value with the requested sign */
62
static float64 accum_round_float64(Accum a, float_status *fp_status)
63
{
64
+ uint64_t ret;
65
+
66
if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
67
&& ((a.guard | a.round | a.sticky) == 0)) {
68
/* result zero */
69
@@ -XXX,XX +XXX,XX @@ static float64 accum_round_float64(Accum a, float_status *fp_status)
70
}
71
}
72
/* Underflow? */
73
- if (int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) {
74
+ ret = int128_getlo(a.mant);
75
+ if (ret & (1ULL << DF_MANTBITS)) {
76
/* Leading one means: No, we're normal. So, we should be done... */
77
- Double ret;
78
- ret.i = 0;
79
- ret.sign = a.sign;
80
- ret.exp = a.exp;
81
- ret.mant = int128_getlo(a.mant);
82
- return ret.i;
83
+ ret = deposit64(ret, 52, 11, a.exp);
84
+ } else {
85
+ assert(a.exp == 1);
86
+ ret = deposit64(ret, 52, 11, 0);
87
}
88
- assert(a.exp == 1);
89
- Double ret;
90
- ret.i = 0;
91
- ret.sign = a.sign;
92
- ret.exp = 0;
93
- ret.mant = int128_getlo(a.mant);
94
- return ret.i;
95
+ ret = deposit64(ret, 63, 1, a.sign);
96
+ return ret;
97
}
98
99
float64 internal_mpyhh(float64 a, float64 b,
100
--
24
--
101
2.43.0
25
2.34.1
diff view generated by jsdifflib
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
2
---
4
tcg/optimize.c | 2 +-
3
target/hppa/translate.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
4
1 file changed, 1 insertion(+), 1 deletion(-)
6
5
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
6
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
8
index XXXXXXX..XXXXXXX 100644
7
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
8
--- a/target/hppa/translate.c
10
+++ b/tcg/optimize.c
9
+++ b/target/hppa/translate.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
10
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
12
return fold_orc(ctx, op);
11
#ifdef CONFIG_USER_ONLY
13
}
12
#define UNALIGN(C) (C)->unalign
14
}
13
#else
15
- return false;
14
-#define UNALIGN(C) 0
16
+ return finish_folding(ctx, op);
15
+#define UNALIGN(C) MO_ALIGN
17
}
16
#endif
18
17
19
/* Propagate constants and copies, fold constant expressions. */
18
/* Note that ssm/rsm instructions number PSW_W and PSW_E differently. */
20
--
19
--
21
2.43.0
20
2.34.1
diff view generated by jsdifflib
1
This structure, with bitfields, is incorrect for big-endian.
2
Use the existing float32_getexp_raw which uses extract32.
3
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
2
---
7
target/hexagon/fma_emu.c | 16 +++-------------
3
configs/targets/hppa-linux-user.mak | 1 -
8
1 file changed, 3 insertions(+), 13 deletions(-)
4
configs/targets/hppa-softmmu.mak | 1 -
5
2 files changed, 2 deletions(-)
9
6
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
7
diff --git a/configs/targets/hppa-linux-user.mak b/configs/targets/hppa-linux-user.mak
11
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
12
--- a/target/hexagon/fma_emu.c
9
--- a/configs/targets/hppa-linux-user.mak
13
+++ b/target/hexagon/fma_emu.c
10
+++ b/configs/targets/hppa-linux-user.mak
14
@@ -XXX,XX +XXX,XX @@ typedef union {
11
@@ -XXX,XX +XXX,XX @@
15
};
12
TARGET_ARCH=hppa
16
} Double;
13
TARGET_SYSTBL_ABI=common,32
17
14
TARGET_SYSTBL=syscall.tbl
18
-typedef union {
15
-TARGET_ALIGNED_ONLY=y
19
- float f;
16
TARGET_BIG_ENDIAN=y
20
- uint32_t i;
17
diff --git a/configs/targets/hppa-softmmu.mak b/configs/targets/hppa-softmmu.mak
21
- struct {
18
index XXXXXXX..XXXXXXX 100644
22
- uint32_t mant:23;
19
--- a/configs/targets/hppa-softmmu.mak
23
- uint32_t exp:8;
20
+++ b/configs/targets/hppa-softmmu.mak
24
- uint32_t sign:1;
21
@@ -XXX,XX +XXX,XX @@
25
- };
22
TARGET_ARCH=hppa
26
-} Float;
23
-TARGET_ALIGNED_ONLY=y
27
-
24
TARGET_BIG_ENDIAN=y
28
static uint64_t float64_getmant(float64 f64)
25
TARGET_SUPPORTS_MTTCG=y
29
{
30
Double a = { .i = f64 };
31
@@ -XXX,XX +XXX,XX @@ int32_t float64_getexp(float64 f64)
32
33
int32_t float32_getexp(float32 f32)
34
{
35
- Float a = { .i = f32 };
36
+ int exp = float32_getexp_raw(f32);
37
if (float32_is_normal(f32)) {
38
- return a.exp;
39
+ return exp;
40
}
41
if (float32_is_denormal(f32)) {
42
- return a.exp + 1;
43
+ return exp + 1;
44
}
45
return -1;
46
}
47
--
26
--
48
2.43.0
27
2.34.1
diff view generated by jsdifflib
1
There are no special cases for this instruction.
1
Acked-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2
Remove internal_mpyf as unused.
3
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
3
---
7
target/hexagon/fma_emu.h | 1 -
4
target/sparc/translate.c | 66 +++++++++++++++++++++-------------------
8
target/hexagon/fma_emu.c | 8 --------
5
1 file changed, 34 insertions(+), 32 deletions(-)
9
target/hexagon/op_helper.c | 2 +-
10
3 files changed, 1 insertion(+), 10 deletions(-)
11
6
12
diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
7
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
13
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
14
--- a/target/hexagon/fma_emu.h
9
--- a/target/sparc/translate.c
15
+++ b/target/hexagon/fma_emu.h
10
+++ b/target/sparc/translate.c
16
@@ -XXX,XX +XXX,XX @@ int32_t float32_getexp(float32 f32);
11
@@ -XXX,XX +XXX,XX @@ static void gen_swap(DisasContext *dc, TCGv dst, TCGv src,
17
float32 infinite_float32(uint8_t sign);
12
TCGv addr, int mmu_idx, MemOp memop)
18
float32 internal_fmafx(float32 a, float32 b, float32 c,
13
{
19
int scale, float_status *fp_status);
14
gen_address_mask(dc, addr);
20
-float32 internal_mpyf(float32 a, float32 b, float_status *fp_status);
15
- tcg_gen_atomic_xchg_tl(dst, addr, src, mmu_idx, memop);
21
float64 internal_mpyhh(float64 a, float64 b,
16
+ tcg_gen_atomic_xchg_tl(dst, addr, src, mmu_idx, memop | MO_ALIGN);
22
unsigned long long int accumulated,
23
float_status *fp_status);
24
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/hexagon/fma_emu.c
27
+++ b/target/hexagon/fma_emu.c
28
@@ -XXX,XX +XXX,XX @@ float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
29
return accum_round_float32(result, fp_status);
30
}
17
}
31
18
32
-float32 internal_mpyf(float32 a, float32 b, float_status *fp_status)
19
static void gen_ldstub(DisasContext *dc, TCGv dst, TCGv addr, int mmu_idx)
33
-{
20
@@ -XXX,XX +XXX,XX @@ static void gen_ld_asi(DisasContext *dc, TCGv dst, TCGv addr,
34
- if (float32_is_zero(a) || float32_is_zero(b)) {
21
break;
35
- return float32_mul(a, b, fp_status);
22
case GET_ASI_DIRECT:
36
- }
23
gen_address_mask(dc, addr);
37
- return internal_fmafx(a, b, float32_zero, 0, fp_status);
24
- tcg_gen_qemu_ld_tl(dst, addr, da.mem_idx, da.memop);
38
-}
25
+ tcg_gen_qemu_ld_tl(dst, addr, da.mem_idx, da.memop | MO_ALIGN);
39
-
26
break;
40
float64 internal_mpyhh(float64 a, float64 b,
27
default:
41
unsigned long long int accumulated,
28
{
42
float_status *fp_status)
29
TCGv_i32 r_asi = tcg_constant_i32(da.asi);
43
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
30
- TCGv_i32 r_mop = tcg_constant_i32(memop);
44
index XXXXXXX..XXXXXXX 100644
31
+ TCGv_i32 r_mop = tcg_constant_i32(memop | MO_ALIGN);
45
--- a/target/hexagon/op_helper.c
32
46
+++ b/target/hexagon/op_helper.c
33
save_state(dc);
47
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
34
#ifdef TARGET_SPARC64
48
{
35
@@ -XXX,XX +XXX,XX @@ static void gen_st_asi(DisasContext *dc, TCGv src, TCGv addr,
49
float32 RdV;
36
/* fall through */
50
arch_fpop_start(env);
37
case GET_ASI_DIRECT:
51
- RdV = internal_mpyf(RsV, RtV, &env->fp_status);
38
gen_address_mask(dc, addr);
52
+ RdV = float32_mul(RsV, RtV, &env->fp_status);
39
- tcg_gen_qemu_st_tl(src, addr, da.mem_idx, da.memop);
53
arch_fpop_end(env);
40
+ tcg_gen_qemu_st_tl(src, addr, da.mem_idx, da.memop | MO_ALIGN);
54
return RdV;
41
break;
55
}
42
#if !defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY)
43
case GET_ASI_BCOPY:
44
@@ -XXX,XX +XXX,XX @@ static void gen_st_asi(DisasContext *dc, TCGv src, TCGv addr,
45
default:
46
{
47
TCGv_i32 r_asi = tcg_constant_i32(da.asi);
48
- TCGv_i32 r_mop = tcg_constant_i32(memop & MO_SIZE);
49
+ TCGv_i32 r_mop = tcg_constant_i32(memop | MO_ALIGN);
50
51
save_state(dc);
52
#ifdef TARGET_SPARC64
53
@@ -XXX,XX +XXX,XX @@ static void gen_cas_asi(DisasContext *dc, TCGv addr, TCGv cmpv,
54
case GET_ASI_DIRECT:
55
oldv = tcg_temp_new();
56
tcg_gen_atomic_cmpxchg_tl(oldv, addr, cmpv, gen_load_gpr(dc, rd),
57
- da.mem_idx, da.memop);
58
+ da.mem_idx, da.memop | MO_ALIGN);
59
gen_store_gpr(dc, rd, oldv);
60
break;
61
default:
62
@@ -XXX,XX +XXX,XX @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr,
63
switch (size) {
64
case 4:
65
d32 = gen_dest_fpr_F(dc);
66
- tcg_gen_qemu_ld_i32(d32, addr, da.mem_idx, da.memop);
67
+ tcg_gen_qemu_ld_i32(d32, addr, da.mem_idx, da.memop | MO_ALIGN);
68
gen_store_fpr_F(dc, rd, d32);
69
break;
70
case 8:
71
@@ -XXX,XX +XXX,XX @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr,
72
/* Valid for lddfa only. */
73
if (size == 8) {
74
gen_address_mask(dc, addr);
75
- tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop);
76
+ tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da.mem_idx,
77
+ da.memop | MO_ALIGN);
78
} else {
79
gen_exception(dc, TT_ILL_INSN);
80
}
81
@@ -XXX,XX +XXX,XX @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr,
82
default:
83
{
84
TCGv_i32 r_asi = tcg_constant_i32(da.asi);
85
- TCGv_i32 r_mop = tcg_constant_i32(da.memop);
86
+ TCGv_i32 r_mop = tcg_constant_i32(da.memop | MO_ALIGN);
87
88
save_state(dc);
89
/* According to the table in the UA2011 manual, the only
90
@@ -XXX,XX +XXX,XX @@ static void gen_stf_asi(DisasContext *dc, TCGv addr,
91
switch (size) {
92
case 4:
93
d32 = gen_load_fpr_F(dc, rd);
94
- tcg_gen_qemu_st_i32(d32, addr, da.mem_idx, da.memop);
95
+ tcg_gen_qemu_st_i32(d32, addr, da.mem_idx, da.memop | MO_ALIGN);
96
break;
97
case 8:
98
tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx,
99
@@ -XXX,XX +XXX,XX @@ static void gen_stf_asi(DisasContext *dc, TCGv addr,
100
/* Valid for stdfa only. */
101
if (size == 8) {
102
gen_address_mask(dc, addr);
103
- tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop);
104
+ tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx,
105
+ da.memop | MO_ALIGN);
106
} else {
107
gen_exception(dc, TT_ILL_INSN);
108
}
109
@@ -XXX,XX +XXX,XX @@ static void gen_ldda_asi(DisasContext *dc, TCGv addr, int insn, int rd)
110
TCGv_i64 tmp = tcg_temp_new_i64();
111
112
gen_address_mask(dc, addr);
113
- tcg_gen_qemu_ld_i64(tmp, addr, da.mem_idx, da.memop);
114
+ tcg_gen_qemu_ld_i64(tmp, addr, da.mem_idx, da.memop | MO_ALIGN);
115
116
/* Note that LE ldda acts as if each 32-bit register
117
result is byte swapped. Having just performed one
118
@@ -XXX,XX +XXX,XX @@ static void gen_stda_asi(DisasContext *dc, TCGv hi, TCGv addr,
119
tcg_gen_concat32_i64(t64, hi, lo);
120
}
121
gen_address_mask(dc, addr);
122
- tcg_gen_qemu_st_i64(t64, addr, da.mem_idx, da.memop);
123
+ tcg_gen_qemu_st_i64(t64, addr, da.mem_idx, da.memop | MO_ALIGN);
124
}
125
break;
126
127
@@ -XXX,XX +XXX,XX @@ static void gen_casx_asi(DisasContext *dc, TCGv addr, TCGv cmpv,
128
case GET_ASI_DIRECT:
129
oldv = tcg_temp_new();
130
tcg_gen_atomic_cmpxchg_tl(oldv, addr, cmpv, gen_load_gpr(dc, rd),
131
- da.mem_idx, da.memop);
132
+ da.mem_idx, da.memop | MO_ALIGN);
133
gen_store_gpr(dc, rd, oldv);
134
break;
135
default:
136
@@ -XXX,XX +XXX,XX @@ static void gen_ldda_asi(DisasContext *dc, TCGv addr, int insn, int rd)
137
return;
138
case GET_ASI_DIRECT:
139
gen_address_mask(dc, addr);
140
- tcg_gen_qemu_ld_i64(t64, addr, da.mem_idx, da.memop);
141
+ tcg_gen_qemu_ld_i64(t64, addr, da.mem_idx, da.memop | MO_ALIGN);
142
break;
143
default:
144
{
145
@@ -XXX,XX +XXX,XX @@ static void gen_stda_asi(DisasContext *dc, TCGv hi, TCGv addr,
146
break;
147
case GET_ASI_DIRECT:
148
gen_address_mask(dc, addr);
149
- tcg_gen_qemu_st_i64(t64, addr, da.mem_idx, da.memop);
150
+ tcg_gen_qemu_st_i64(t64, addr, da.mem_idx, da.memop | MO_ALIGN);
151
break;
152
case GET_ASI_BFILL:
153
/* Store 32 bytes of T64 to ADDR. */
154
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
155
case 0x0: /* ld, V9 lduw, load unsigned word */
156
gen_address_mask(dc, cpu_addr);
157
tcg_gen_qemu_ld_tl(cpu_val, cpu_addr,
158
- dc->mem_idx, MO_TEUL);
159
+ dc->mem_idx, MO_TEUL | MO_ALIGN);
160
break;
161
case 0x1: /* ldub, load unsigned byte */
162
gen_address_mask(dc, cpu_addr);
163
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
164
case 0x2: /* lduh, load unsigned halfword */
165
gen_address_mask(dc, cpu_addr);
166
tcg_gen_qemu_ld_tl(cpu_val, cpu_addr,
167
- dc->mem_idx, MO_TEUW);
168
+ dc->mem_idx, MO_TEUW | MO_ALIGN);
169
break;
170
case 0x3: /* ldd, load double word */
171
if (rd & 1)
172
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
173
gen_address_mask(dc, cpu_addr);
174
t64 = tcg_temp_new_i64();
175
tcg_gen_qemu_ld_i64(t64, cpu_addr,
176
- dc->mem_idx, MO_TEUQ);
177
+ dc->mem_idx, MO_TEUQ | MO_ALIGN);
178
tcg_gen_trunc_i64_tl(cpu_val, t64);
179
tcg_gen_ext32u_tl(cpu_val, cpu_val);
180
gen_store_gpr(dc, rd + 1, cpu_val);
181
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
182
case 0xa: /* ldsh, load signed halfword */
183
gen_address_mask(dc, cpu_addr);
184
tcg_gen_qemu_ld_tl(cpu_val, cpu_addr,
185
- dc->mem_idx, MO_TESW);
186
+ dc->mem_idx, MO_TESW | MO_ALIGN);
187
break;
188
case 0xd: /* ldstub */
189
gen_ldstub(dc, cpu_val, cpu_addr, dc->mem_idx);
190
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
191
case 0x08: /* V9 ldsw */
192
gen_address_mask(dc, cpu_addr);
193
tcg_gen_qemu_ld_tl(cpu_val, cpu_addr,
194
- dc->mem_idx, MO_TESL);
195
+ dc->mem_idx, MO_TESL | MO_ALIGN);
196
break;
197
case 0x0b: /* V9 ldx */
198
gen_address_mask(dc, cpu_addr);
199
tcg_gen_qemu_ld_tl(cpu_val, cpu_addr,
200
- dc->mem_idx, MO_TEUQ);
201
+ dc->mem_idx, MO_TEUQ | MO_ALIGN);
202
break;
203
case 0x18: /* V9 ldswa */
204
gen_ld_asi(dc, cpu_val, cpu_addr, insn, MO_TESL);
205
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
206
gen_address_mask(dc, cpu_addr);
207
cpu_dst_32 = gen_dest_fpr_F(dc);
208
tcg_gen_qemu_ld_i32(cpu_dst_32, cpu_addr,
209
- dc->mem_idx, MO_TEUL);
210
+ dc->mem_idx, MO_TEUL | MO_ALIGN);
211
gen_store_fpr_F(dc, rd, cpu_dst_32);
212
break;
213
case 0x21: /* ldfsr, V9 ldxfsr */
214
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
215
if (rd == 1) {
216
TCGv_i64 t64 = tcg_temp_new_i64();
217
tcg_gen_qemu_ld_i64(t64, cpu_addr,
218
- dc->mem_idx, MO_TEUQ);
219
+ dc->mem_idx, MO_TEUQ | MO_ALIGN);
220
gen_helper_ldxfsr(cpu_fsr, cpu_env, cpu_fsr, t64);
221
break;
222
}
223
#endif
224
cpu_dst_32 = tcg_temp_new_i32();
225
tcg_gen_qemu_ld_i32(cpu_dst_32, cpu_addr,
226
- dc->mem_idx, MO_TEUL);
227
+ dc->mem_idx, MO_TEUL | MO_ALIGN);
228
gen_helper_ldfsr(cpu_fsr, cpu_env, cpu_fsr, cpu_dst_32);
229
break;
230
case 0x22: /* ldqf, load quad fpreg */
231
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
232
case 0x4: /* st, store word */
233
gen_address_mask(dc, cpu_addr);
234
tcg_gen_qemu_st_tl(cpu_val, cpu_addr,
235
- dc->mem_idx, MO_TEUL);
236
+ dc->mem_idx, MO_TEUL | MO_ALIGN);
237
break;
238
case 0x5: /* stb, store byte */
239
gen_address_mask(dc, cpu_addr);
240
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
241
case 0x6: /* sth, store halfword */
242
gen_address_mask(dc, cpu_addr);
243
tcg_gen_qemu_st_tl(cpu_val, cpu_addr,
244
- dc->mem_idx, MO_TEUW);
245
+ dc->mem_idx, MO_TEUW | MO_ALIGN);
246
break;
247
case 0x7: /* std, store double word */
248
if (rd & 1)
249
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
250
t64 = tcg_temp_new_i64();
251
tcg_gen_concat_tl_i64(t64, lo, cpu_val);
252
tcg_gen_qemu_st_i64(t64, cpu_addr,
253
- dc->mem_idx, MO_TEUQ);
254
+ dc->mem_idx, MO_TEUQ | MO_ALIGN);
255
}
256
break;
257
#if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
258
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
259
case 0x0e: /* V9 stx */
260
gen_address_mask(dc, cpu_addr);
261
tcg_gen_qemu_st_tl(cpu_val, cpu_addr,
262
- dc->mem_idx, MO_TEUQ);
263
+ dc->mem_idx, MO_TEUQ | MO_ALIGN);
264
break;
265
case 0x1e: /* V9 stxa */
266
gen_st_asi(dc, cpu_val, cpu_addr, insn, MO_TEUQ);
267
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
268
gen_address_mask(dc, cpu_addr);
269
cpu_src1_32 = gen_load_fpr_F(dc, rd);
270
tcg_gen_qemu_st_i32(cpu_src1_32, cpu_addr,
271
- dc->mem_idx, MO_TEUL);
272
+ dc->mem_idx, MO_TEUL | MO_ALIGN);
273
break;
274
case 0x25: /* stfsr, V9 stxfsr */
275
{
276
@@ -XXX,XX +XXX,XX @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
277
gen_address_mask(dc, cpu_addr);
278
if (rd == 1) {
279
tcg_gen_qemu_st_tl(cpu_fsr, cpu_addr,
280
- dc->mem_idx, MO_TEUQ);
281
+ dc->mem_idx, MO_TEUQ | MO_ALIGN);
282
break;
283
}
284
#endif
285
tcg_gen_qemu_st_tl(cpu_fsr, cpu_addr,
286
- dc->mem_idx, MO_TEUL);
287
+ dc->mem_idx, MO_TEUL | MO_ALIGN);
288
}
289
break;
290
case 0x26:
56
--
291
--
57
2.43.0
292
2.34.1
diff view generated by jsdifflib
1
Merge the two conditions, sign != 0 && !(z_mask & sign),
1
This passes on the memop as given as argument to
2
by testing ~z_mask & sign. If sign == 0, the logical and
2
helper_ld_asi to the ultimate load primitive.
3
will produce false.
4
3
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/optimize.c | 5 ++---
7
target/sparc/ldst_helper.c | 10 ++++++----
9
1 file changed, 2 insertions(+), 3 deletions(-)
8
1 file changed, 6 insertions(+), 4 deletions(-)
10
9
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
12
--- a/target/sparc/ldst_helper.c
14
+++ b/tcg/optimize.c
13
+++ b/target/sparc/ldst_helper.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
16
15
#if defined(DEBUG_MXCC) || defined(DEBUG_ASI)
17
static bool fold_shift(OptContext *ctx, TCGOp *op)
16
uint32_t last_addr = addr;
18
{
17
#endif
19
- uint64_t s_mask, z_mask, sign;
18
+ MemOpIdx oi;
20
+ uint64_t s_mask, z_mask;
19
21
TempOptInfo *t1, *t2;
20
do_check_align(env, addr, size - 1, GETPC());
22
21
switch (asi) {
23
if (fold_const2(ctx, op) ||
22
@@ -XXX,XX +XXX,XX @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
24
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
23
case ASI_M_IODIAG: /* Turbosparc IOTLB Diagnostic */
25
* If the sign bit is known zero, then logical right shift
24
break;
26
* will not reduce the number of input sign repetitions.
25
case ASI_KERNELTXT: /* Supervisor code access */
27
*/
26
+ oi = make_memop_idx(memop, cpu_mmu_index(env, true));
28
- sign = -s_mask;
27
switch (size) {
29
- if (sign && !(z_mask & sign)) {
28
case 1:
30
+ if (~z_mask & -s_mask) {
29
- ret = cpu_ldub_code(env, addr);
31
return fold_masks_s(ctx, op, s_mask);
30
+ ret = cpu_ldb_code_mmu(env, addr, oi, GETPC());
31
break;
32
case 2:
33
- ret = cpu_lduw_code(env, addr);
34
+ ret = cpu_ldw_code_mmu(env, addr, oi, GETPC());
35
break;
36
default:
37
case 4:
38
- ret = cpu_ldl_code(env, addr);
39
+ ret = cpu_ldl_code_mmu(env, addr, oi, GETPC());
40
break;
41
case 8:
42
- ret = cpu_ldq_code(env, addr);
43
+ ret = cpu_ldq_code_mmu(env, addr, oi, GETPC());
44
break;
32
}
45
}
33
break;
46
break;
34
--
47
--
35
2.43.0
48
2.34.1
diff view generated by jsdifflib
1
This instruction has a special case that 0 * x + c returns c
1
Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2
without the normal sign folding that comes with 0 + -0.
3
Use the new float_muladd_suppress_add_product_zero to
4
describe this.
5
6
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
3
---
9
target/hexagon/op_helper.c | 11 +++--------
4
configs/targets/sparc-linux-user.mak | 1 -
10
1 file changed, 3 insertions(+), 8 deletions(-)
5
configs/targets/sparc-softmmu.mak | 1 -
6
configs/targets/sparc32plus-linux-user.mak | 1 -
7
configs/targets/sparc64-linux-user.mak | 1 -
8
configs/targets/sparc64-softmmu.mak | 1 -
9
5 files changed, 5 deletions(-)
11
10
12
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
11
diff --git a/configs/targets/sparc-linux-user.mak b/configs/targets/sparc-linux-user.mak
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/target/hexagon/op_helper.c
13
--- a/configs/targets/sparc-linux-user.mak
15
+++ b/target/hexagon/op_helper.c
14
+++ b/configs/targets/sparc-linux-user.mak
16
@@ -XXX,XX +XXX,XX @@ static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
15
@@ -XXX,XX +XXX,XX @@
17
float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
16
TARGET_ARCH=sparc
18
float32 RsV, float32 RtV, float32 PuV)
17
TARGET_SYSTBL_ABI=common,32
19
{
18
TARGET_SYSTBL=syscall.tbl
20
- size4s_t tmp;
19
-TARGET_ALIGNED_ONLY=y
21
arch_fpop_start(env);
20
TARGET_BIG_ENDIAN=y
22
- RxV = check_nan(RxV, RxV, &env->fp_status);
21
diff --git a/configs/targets/sparc-softmmu.mak b/configs/targets/sparc-softmmu.mak
23
- RxV = check_nan(RxV, RsV, &env->fp_status);
22
index XXXXXXX..XXXXXXX 100644
24
- RxV = check_nan(RxV, RtV, &env->fp_status);
23
--- a/configs/targets/sparc-softmmu.mak
25
- tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
24
+++ b/configs/targets/sparc-softmmu.mak
26
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
25
@@ -XXX,XX +XXX,XX @@
27
- RxV = tmp;
26
TARGET_ARCH=sparc
28
- }
27
-TARGET_ALIGNED_ONLY=y
29
+ RxV = float32_muladd_scalbn(RsV, RtV, RxV, fSXTN(8, 64, PuV),
28
TARGET_BIG_ENDIAN=y
30
+ float_muladd_suppress_add_product_zero,
29
diff --git a/configs/targets/sparc32plus-linux-user.mak b/configs/targets/sparc32plus-linux-user.mak
31
+ &env->fp_status);
30
index XXXXXXX..XXXXXXX 100644
32
arch_fpop_end(env);
31
--- a/configs/targets/sparc32plus-linux-user.mak
33
return RxV;
32
+++ b/configs/targets/sparc32plus-linux-user.mak
34
}
33
@@ -XXX,XX +XXX,XX @@ TARGET_BASE_ARCH=sparc
34
TARGET_ABI_DIR=sparc
35
TARGET_SYSTBL_ABI=common,32
36
TARGET_SYSTBL=syscall.tbl
37
-TARGET_ALIGNED_ONLY=y
38
TARGET_BIG_ENDIAN=y
39
diff --git a/configs/targets/sparc64-linux-user.mak b/configs/targets/sparc64-linux-user.mak
40
index XXXXXXX..XXXXXXX 100644
41
--- a/configs/targets/sparc64-linux-user.mak
42
+++ b/configs/targets/sparc64-linux-user.mak
43
@@ -XXX,XX +XXX,XX @@ TARGET_BASE_ARCH=sparc
44
TARGET_ABI_DIR=sparc
45
TARGET_SYSTBL_ABI=common,64
46
TARGET_SYSTBL=syscall.tbl
47
-TARGET_ALIGNED_ONLY=y
48
TARGET_BIG_ENDIAN=y
49
diff --git a/configs/targets/sparc64-softmmu.mak b/configs/targets/sparc64-softmmu.mak
50
index XXXXXXX..XXXXXXX 100644
51
--- a/configs/targets/sparc64-softmmu.mak
52
+++ b/configs/targets/sparc64-softmmu.mak
53
@@ -XXX,XX +XXX,XX @@
54
TARGET_ARCH=sparc64
55
TARGET_BASE_ARCH=sparc
56
-TARGET_ALIGNED_ONLY=y
57
TARGET_BIG_ENDIAN=y
35
--
58
--
36
2.43.0
59
2.34.1
diff view generated by jsdifflib
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
1
Interpret the variable argument placement in the caller. Pass data_type
2
instead of is64 -- there are several places where we already convert back
3
from bool to type. Clean things up by using type throughout.
4
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
7
---
4
tcg/optimize.c | 6 +++---
8
tcg/i386/tcg-target.c.inc | 111 +++++++++++++++++---------------------
5
1 file changed, 3 insertions(+), 3 deletions(-)
9
1 file changed, 50 insertions(+), 61 deletions(-)
6
10
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
13
--- a/tcg/i386/tcg-target.c.inc
10
+++ b/tcg/optimize.c
14
+++ b/tcg/i386/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static bool fold_mul(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
12
fold_xi_to_x(ctx, op, 1)) {
16
* Record the context of a call to the out of line helper code for the slow path
13
return true;
17
* for a load or store, so that we can later generate the correct helper code
14
}
18
*/
15
- return false;
19
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, bool is_64,
16
+ return finish_folding(ctx, op);
20
- MemOpIdx oi,
21
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
22
+ TCGType type, MemOpIdx oi,
23
TCGReg datalo, TCGReg datahi,
24
TCGReg addrlo, TCGReg addrhi,
25
tcg_insn_unit *raddr,
26
@@ -XXX,XX +XXX,XX @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, bool is_64,
27
28
label->is_ld = is_ld;
29
label->oi = oi;
30
- label->type = is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
31
+ label->type = type;
32
label->datalo_reg = datalo;
33
label->datahi_reg = datahi;
34
label->addrlo_reg = addrlo;
35
@@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void)
36
37
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
38
TCGReg base, int index, intptr_t ofs,
39
- int seg, bool is64, MemOp memop)
40
+ int seg, TCGType type, MemOp memop)
41
{
42
- TCGType type = is64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
43
bool use_movbe = false;
44
- int rexw = is64 * P_REXW;
45
+ int rexw = (type == TCG_TYPE_I32 ? 0 : P_REXW);
46
int movop = OPC_MOVL_GvEv;
47
48
/* Do big-endian loads with movbe. */
49
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
50
}
17
}
51
}
18
52
19
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
53
-/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
20
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
54
- EAX. It will be useful once fixed registers globals are less
21
fold_xi_to_i(ctx, op, 0)) {
55
- common. */
22
return true;
56
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
23
}
57
+static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
24
- return false;
58
+ TCGReg addrlo, TCGReg addrhi,
25
+ return finish_folding(ctx, op);
59
+ MemOpIdx oi, TCGType data_type)
60
{
61
- TCGReg datalo, datahi, addrlo;
62
- TCGReg addrhi __attribute__((unused));
63
- MemOpIdx oi;
64
- MemOp opc;
65
+ MemOp opc = get_memop(oi);
66
+
67
#if defined(CONFIG_SOFTMMU)
68
- int mem_index;
69
tcg_insn_unit *label_ptr[2];
70
-#else
71
- unsigned a_bits;
72
-#endif
73
74
- datalo = *args++;
75
- datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
76
- addrlo = *args++;
77
- addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
78
- oi = *args++;
79
- opc = get_memop(oi);
80
-
81
-#if defined(CONFIG_SOFTMMU)
82
- mem_index = get_mmuidx(oi);
83
-
84
- tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
85
+ tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
86
label_ptr, offsetof(CPUTLBEntry, addr_read));
87
88
/* TLB Hit. */
89
- tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, is64, opc);
90
+ tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1,
91
+ -1, 0, 0, data_type, opc);
92
93
/* Record the current context of a load into ldst label */
94
- add_qemu_ldst_label(s, true, is64, oi, datalo, datahi, addrlo, addrhi,
95
- s->code_ptr, label_ptr);
96
+ add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi,
97
+ addrlo, addrhi, s->code_ptr, label_ptr);
98
#else
99
- a_bits = get_alignment_bits(opc);
100
+ unsigned a_bits = get_alignment_bits(opc);
101
if (a_bits) {
102
tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
103
}
104
105
tcg_out_qemu_ld_direct(s, datalo, datahi, addrlo, x86_guest_base_index,
106
x86_guest_base_offset, x86_guest_base_seg,
107
- is64, opc);
108
+ data_type, opc);
109
#endif
26
}
110
}
27
111
28
static bool fold_multiply2(OptContext *ctx, TCGOp *op)
112
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
29
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
113
}
30
tcg_opt_gen_movi(ctx, op2, rh, h);
31
return true;
32
}
33
- return false;
34
+ return finish_folding(ctx, op);
35
}
114
}
36
115
37
static bool fold_nand(OptContext *ctx, TCGOp *op)
116
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
117
+static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
118
+ TCGReg addrlo, TCGReg addrhi,
119
+ MemOpIdx oi, TCGType data_type)
120
{
121
- TCGReg datalo, datahi, addrlo;
122
- TCGReg addrhi __attribute__((unused));
123
- MemOpIdx oi;
124
- MemOp opc;
125
+ MemOp opc = get_memop(oi);
126
+
127
#if defined(CONFIG_SOFTMMU)
128
- int mem_index;
129
tcg_insn_unit *label_ptr[2];
130
-#else
131
- unsigned a_bits;
132
-#endif
133
134
- datalo = *args++;
135
- datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
136
- addrlo = *args++;
137
- addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
138
- oi = *args++;
139
- opc = get_memop(oi);
140
-
141
-#if defined(CONFIG_SOFTMMU)
142
- mem_index = get_mmuidx(oi);
143
-
144
- tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
145
+ tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
146
label_ptr, offsetof(CPUTLBEntry, addr_write));
147
148
/* TLB Hit. */
149
tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
150
151
/* Record the current context of a store into ldst label */
152
- add_qemu_ldst_label(s, false, is64, oi, datalo, datahi, addrlo, addrhi,
153
- s->code_ptr, label_ptr);
154
+ add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi,
155
+ addrlo, addrhi, s->code_ptr, label_ptr);
156
#else
157
- a_bits = get_alignment_bits(opc);
158
+ unsigned a_bits = get_alignment_bits(opc);
159
if (a_bits) {
160
tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
161
}
162
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
163
break;
164
165
case INDEX_op_qemu_ld_i32:
166
- tcg_out_qemu_ld(s, args, 0);
167
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
168
+ tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
169
+ } else {
170
+ tcg_out_qemu_ld(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32);
171
+ }
172
break;
173
case INDEX_op_qemu_ld_i64:
174
- tcg_out_qemu_ld(s, args, 1);
175
+ if (TCG_TARGET_REG_BITS == 64) {
176
+ tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
177
+ } else if (TARGET_LONG_BITS == 32) {
178
+ tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64);
179
+ } else {
180
+ tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
181
+ }
182
break;
183
case INDEX_op_qemu_st_i32:
184
case INDEX_op_qemu_st8_i32:
185
- tcg_out_qemu_st(s, args, 0);
186
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
187
+ tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
188
+ } else {
189
+ tcg_out_qemu_st(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32);
190
+ }
191
break;
192
case INDEX_op_qemu_st_i64:
193
- tcg_out_qemu_st(s, args, 1);
194
+ if (TCG_TARGET_REG_BITS == 64) {
195
+ tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
196
+ } else if (TARGET_LONG_BITS == 32) {
197
+ tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64);
198
+ } else {
199
+ tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
200
+ }
201
break;
202
203
OP_32_64(mulu2):
38
--
204
--
39
2.43.0
205
2.34.1
206
207
diff view generated by jsdifflib
1
Avoid the use of the OptContext slots.
1
Test for both base and index; use datahi as a temporary, overwritten
2
by the final load. Always perform the loads in ascending order, so
3
that any (user-only) fault sees the correct address.
2
4
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
---
6
tcg/optimize.c | 16 +++++++++-------
7
tcg/i386/tcg-target.c.inc | 31 +++++++++++++++----------------
7
1 file changed, 9 insertions(+), 7 deletions(-)
8
1 file changed, 15 insertions(+), 16 deletions(-)
8
9
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
--- a/tcg/i386/tcg-target.c.inc
12
+++ b/tcg/optimize.c
13
+++ b/tcg/i386/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
14
15
if (TCG_TARGET_REG_BITS == 64) {
15
static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
16
tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
16
{
17
base, index, 0, ofs);
17
+ uint64_t z_mask = -1, s_mask = 0;
18
+ break;
18
+
19
+ }
19
/* We can't do any folding with a load, but we can record bits. */
20
+ if (use_movbe) {
20
switch (op->opc) {
21
+ TCGReg t = datalo;
21
CASE_OP_32_64(ld8s):
22
+ datalo = datahi;
22
- ctx->s_mask = MAKE_64BIT_MASK(8, 56);
23
+ datahi = t;
23
+ s_mask = INT8_MIN;
24
+ }
24
break;
25
+ if (base == datalo || index == datalo) {
25
CASE_OP_32_64(ld8u):
26
+ tcg_out_modrm_sib_offset(s, OPC_LEA, datahi, base, index, 0, ofs);
26
- ctx->z_mask = MAKE_64BIT_MASK(0, 8);
27
+ tcg_out_modrm_offset(s, movop + seg, datalo, datahi, 0);
27
+ z_mask = MAKE_64BIT_MASK(0, 8);
28
+ tcg_out_modrm_offset(s, movop + seg, datahi, datahi, 4);
28
break;
29
} else {
29
CASE_OP_32_64(ld16s):
30
- if (use_movbe) {
30
- ctx->s_mask = MAKE_64BIT_MASK(16, 48);
31
- TCGReg t = datalo;
31
+ s_mask = INT16_MIN;
32
- datalo = datahi;
32
break;
33
- datahi = t;
33
CASE_OP_32_64(ld16u):
34
- }
34
- ctx->z_mask = MAKE_64BIT_MASK(0, 16);
35
- if (base != datalo) {
35
+ z_mask = MAKE_64BIT_MASK(0, 16);
36
- tcg_out_modrm_sib_offset(s, movop + seg, datalo,
36
break;
37
- base, index, 0, ofs);
37
case INDEX_op_ld32s_i64:
38
- tcg_out_modrm_sib_offset(s, movop + seg, datahi,
38
- ctx->s_mask = MAKE_64BIT_MASK(32, 32);
39
- base, index, 0, ofs + 4);
39
+ s_mask = INT32_MIN;
40
- } else {
40
break;
41
- tcg_out_modrm_sib_offset(s, movop + seg, datahi,
41
case INDEX_op_ld32u_i64:
42
- base, index, 0, ofs + 4);
42
- ctx->z_mask = MAKE_64BIT_MASK(0, 32);
43
- tcg_out_modrm_sib_offset(s, movop + seg, datalo,
43
+ z_mask = MAKE_64BIT_MASK(0, 32);
44
- base, index, 0, ofs);
45
- }
46
+ tcg_out_modrm_sib_offset(s, movop + seg, datalo,
47
+ base, index, 0, ofs);
48
+ tcg_out_modrm_sib_offset(s, movop + seg, datahi,
49
+ base, index, 0, ofs + 4);
50
}
44
break;
51
break;
45
default:
52
default:
46
g_assert_not_reached();
47
}
48
- return false;
49
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
50
}
51
52
static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
53
--
53
--
54
2.43.0
54
2.34.1
diff view generated by jsdifflib
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
1
Collect the 4 potential parts of the host address into a struct.
2
Reorg tcg_out_qemu_{ld,st}_direct to use it.
3
Reorg guest_base handling to use it.
2
4
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
7
---
6
tcg/optimize.c | 27 ++++++++++++++-------------
8
tcg/i386/tcg-target.c.inc | 165 +++++++++++++++++++++-----------------
7
1 file changed, 14 insertions(+), 13 deletions(-)
9
1 file changed, 90 insertions(+), 75 deletions(-)
8
10
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
13
--- a/tcg/i386/tcg-target.c.inc
12
+++ b/tcg/optimize.c
14
+++ b/tcg/i386/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_nopn(TCGContext *s, int n)
14
static bool fold_shift(OptContext *ctx, TCGOp *op)
16
tcg_out8(s, 0x90);
15
{
17
}
16
uint64_t s_mask, z_mask, sign;
18
17
+ TempOptInfo *t1, *t2;
19
+typedef struct {
18
20
+ TCGReg base;
19
if (fold_const2(ctx, op) ||
21
+ int index;
20
fold_ix_to_i(ctx, op, 0) ||
22
+ int ofs;
21
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
23
+ int seg;
22
return true;
24
+} HostAddress;
23
}
25
+
24
26
#if defined(CONFIG_SOFTMMU)
25
- s_mask = arg_info(op->args[1])->s_mask;
27
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
26
- z_mask = arg_info(op->args[1])->z_mask;
28
* int mmu_idx, uintptr_t ra)
27
+ t1 = arg_info(op->args[1]);
29
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
28
+ t2 = arg_info(op->args[2]);
30
return tcg_out_fail_alignment(s, l);
29
+ s_mask = t1->s_mask;
31
}
30
+ z_mask = t1->z_mask;
32
31
33
-#if TCG_TARGET_REG_BITS == 32
32
- if (arg_is_const(op->args[2])) {
34
-# define x86_guest_base_seg 0
33
- int sh = arg_info(op->args[2])->val;
35
-# define x86_guest_base_index -1
34
-
36
-# define x86_guest_base_offset guest_base
35
- ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
37
-#else
36
+ if (ti_is_const(t2)) {
38
-static int x86_guest_base_seg;
37
+ int sh = ti_const_val(t2);
39
-static int x86_guest_base_index = -1;
38
40
-static int32_t x86_guest_base_offset;
39
+ z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
41
-# if defined(__x86_64__) && defined(__linux__)
40
s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
42
-# include <asm/prctl.h>
41
43
-# include <sys/prctl.h>
42
- return fold_masks(ctx, op);
44
+static HostAddress x86_guest_base = {
43
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
45
+ .index = -1
44
}
46
+};
45
47
+
46
switch (op->opc) {
48
+#if defined(__x86_64__) && defined(__linux__)
47
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
49
+# include <asm/prctl.h>
48
* Arithmetic right shift will not reduce the number of
50
+# include <sys/prctl.h>
49
* input sign repetitions.
51
int arch_prctl(int code, unsigned long addr);
50
*/
52
static inline int setup_guest_base_seg(void)
51
- ctx->s_mask = s_mask;
53
{
52
- break;
54
@@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void)
53
+ return fold_masks_s(ctx, op, s_mask);
55
}
54
CASE_OP_32_64(shr):
56
return 0;
55
/*
57
}
56
* If the sign bit is known zero, then logical right shift
58
-# elif defined (__FreeBSD__) || defined (__FreeBSD_kernel__)
57
- * will not reduced the number of input sign repetitions.
59
-# include <machine/sysarch.h>
58
+ * will not reduce the number of input sign repetitions.
60
+#elif defined(__x86_64__) && \
59
*/
61
+ (defined (__FreeBSD__) || defined (__FreeBSD_kernel__))
60
- sign = (s_mask & -s_mask) >> 1;
62
+# include <machine/sysarch.h>
61
+ sign = -s_mask;
63
static inline int setup_guest_base_seg(void)
62
if (sign && !(z_mask & sign)) {
64
{
63
- ctx->s_mask = s_mask;
65
if (sysarch(AMD64_SET_GSBASE, &guest_base) == 0) {
64
+ return fold_masks_s(ctx, op, s_mask);
66
@@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void)
67
}
68
return 0;
69
}
70
-# else
71
+#else
72
static inline int setup_guest_base_seg(void)
73
{
74
return 0;
75
}
76
-# endif
77
-#endif
78
+#endif /* setup_guest_base_seg */
79
#endif /* SOFTMMU */
80
81
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
82
- TCGReg base, int index, intptr_t ofs,
83
- int seg, TCGType type, MemOp memop)
84
+ HostAddress h, TCGType type, MemOp memop)
85
{
86
bool use_movbe = false;
87
int rexw = (type == TCG_TYPE_I32 ? 0 : P_REXW);
88
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
89
90
switch (memop & MO_SSIZE) {
91
case MO_UB:
92
- tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
93
- base, index, 0, ofs);
94
+ tcg_out_modrm_sib_offset(s, OPC_MOVZBL + h.seg, datalo,
95
+ h.base, h.index, 0, h.ofs);
96
break;
97
case MO_SB:
98
- tcg_out_modrm_sib_offset(s, OPC_MOVSBL + rexw + seg, datalo,
99
- base, index, 0, ofs);
100
+ tcg_out_modrm_sib_offset(s, OPC_MOVSBL + rexw + h.seg, datalo,
101
+ h.base, h.index, 0, h.ofs);
102
break;
103
case MO_UW:
104
if (use_movbe) {
105
/* There is no extending movbe; only low 16-bits are modified. */
106
- if (datalo != base && datalo != index) {
107
+ if (datalo != h.base && datalo != h.index) {
108
/* XOR breaks dependency chains. */
109
tgen_arithr(s, ARITH_XOR, datalo, datalo);
110
- tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
111
- datalo, base, index, 0, ofs);
112
+ tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + h.seg,
113
+ datalo, h.base, h.index, 0, h.ofs);
114
} else {
115
- tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
116
- datalo, base, index, 0, ofs);
117
+ tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + h.seg,
118
+ datalo, h.base, h.index, 0, h.ofs);
119
tcg_out_ext16u(s, datalo, datalo);
120
}
121
} else {
122
- tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
123
- base, index, 0, ofs);
124
+ tcg_out_modrm_sib_offset(s, OPC_MOVZWL + h.seg, datalo,
125
+ h.base, h.index, 0, h.ofs);
126
}
127
break;
128
case MO_SW:
129
if (use_movbe) {
130
- tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
131
- datalo, base, index, 0, ofs);
132
+ tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + h.seg,
133
+ datalo, h.base, h.index, 0, h.ofs);
134
tcg_out_ext16s(s, type, datalo, datalo);
135
} else {
136
- tcg_out_modrm_sib_offset(s, OPC_MOVSWL + rexw + seg,
137
- datalo, base, index, 0, ofs);
138
+ tcg_out_modrm_sib_offset(s, OPC_MOVSWL + rexw + h.seg,
139
+ datalo, h.base, h.index, 0, h.ofs);
140
}
141
break;
142
case MO_UL:
143
- tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
144
+ tcg_out_modrm_sib_offset(s, movop + h.seg, datalo,
145
+ h.base, h.index, 0, h.ofs);
146
break;
147
#if TCG_TARGET_REG_BITS == 64
148
case MO_SL:
149
if (use_movbe) {
150
- tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + seg, datalo,
151
- base, index, 0, ofs);
152
+ tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + h.seg, datalo,
153
+ h.base, h.index, 0, h.ofs);
154
tcg_out_ext32s(s, datalo, datalo);
155
} else {
156
- tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
157
- base, index, 0, ofs);
158
+ tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + h.seg, datalo,
159
+ h.base, h.index, 0, h.ofs);
160
}
161
break;
162
#endif
163
case MO_UQ:
164
if (TCG_TARGET_REG_BITS == 64) {
165
- tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
166
- base, index, 0, ofs);
167
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
168
+ h.base, h.index, 0, h.ofs);
169
break;
170
}
171
if (use_movbe) {
172
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
173
datalo = datahi;
174
datahi = t;
175
}
176
- if (base == datalo || index == datalo) {
177
- tcg_out_modrm_sib_offset(s, OPC_LEA, datahi, base, index, 0, ofs);
178
- tcg_out_modrm_offset(s, movop + seg, datalo, datahi, 0);
179
- tcg_out_modrm_offset(s, movop + seg, datahi, datahi, 4);
180
+ if (h.base == datalo || h.index == datalo) {
181
+ tcg_out_modrm_sib_offset(s, OPC_LEA, datahi,
182
+ h.base, h.index, 0, h.ofs);
183
+ tcg_out_modrm_offset(s, movop + h.seg, datalo, datahi, 0);
184
+ tcg_out_modrm_offset(s, movop + h.seg, datahi, datahi, 4);
185
} else {
186
- tcg_out_modrm_sib_offset(s, movop + seg, datalo,
187
- base, index, 0, ofs);
188
- tcg_out_modrm_sib_offset(s, movop + seg, datahi,
189
- base, index, 0, ofs + 4);
190
+ tcg_out_modrm_sib_offset(s, movop + h.seg, datalo,
191
+ h.base, h.index, 0, h.ofs);
192
+ tcg_out_modrm_sib_offset(s, movop + h.seg, datahi,
193
+ h.base, h.index, 0, h.ofs + 4);
65
}
194
}
66
break;
195
break;
67
default:
196
default:
68
break;
197
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
69
}
198
MemOpIdx oi, TCGType data_type)
70
199
{
71
- return false;
200
MemOp opc = get_memop(oi);
72
+ return finish_folding(ctx, op);
201
+ HostAddress h;
73
}
202
74
203
#if defined(CONFIG_SOFTMMU)
75
static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
204
tcg_insn_unit *label_ptr[2];
205
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
206
label_ptr, offsetof(CPUTLBEntry, addr_read));
207
208
/* TLB Hit. */
209
- tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1,
210
- -1, 0, 0, data_type, opc);
211
+ h.base = TCG_REG_L1;
212
+ h.index = -1;
213
+ h.ofs = 0;
214
+ h.seg = 0;
215
+ tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc);
216
217
/* Record the current context of a load into ldst label */
218
add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi,
219
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
220
tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
221
}
222
223
- tcg_out_qemu_ld_direct(s, datalo, datahi, addrlo, x86_guest_base_index,
224
- x86_guest_base_offset, x86_guest_base_seg,
225
- data_type, opc);
226
+ h = x86_guest_base;
227
+ h.base = addrlo;
228
+ tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc);
229
#endif
230
}
231
232
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
233
- TCGReg base, int index, intptr_t ofs,
234
- int seg, MemOp memop)
235
+ HostAddress h, MemOp memop)
236
{
237
bool use_movbe = false;
238
int movop = OPC_MOVL_EvGv;
239
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
240
case MO_8:
241
/* This is handled with constraints on INDEX_op_qemu_st8_i32. */
242
tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || datalo < 4);
243
- tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
244
- datalo, base, index, 0, ofs);
245
+ tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + h.seg,
246
+ datalo, h.base, h.index, 0, h.ofs);
247
break;
248
case MO_16:
249
- tcg_out_modrm_sib_offset(s, movop + P_DATA16 + seg, datalo,
250
- base, index, 0, ofs);
251
+ tcg_out_modrm_sib_offset(s, movop + P_DATA16 + h.seg, datalo,
252
+ h.base, h.index, 0, h.ofs);
253
break;
254
case MO_32:
255
- tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
256
+ tcg_out_modrm_sib_offset(s, movop + h.seg, datalo,
257
+ h.base, h.index, 0, h.ofs);
258
break;
259
case MO_64:
260
if (TCG_TARGET_REG_BITS == 64) {
261
- tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
262
- base, index, 0, ofs);
263
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
264
+ h.base, h.index, 0, h.ofs);
265
} else {
266
if (use_movbe) {
267
TCGReg t = datalo;
268
datalo = datahi;
269
datahi = t;
270
}
271
- tcg_out_modrm_sib_offset(s, movop + seg, datalo,
272
- base, index, 0, ofs);
273
- tcg_out_modrm_sib_offset(s, movop + seg, datahi,
274
- base, index, 0, ofs + 4);
275
+ tcg_out_modrm_sib_offset(s, movop + h.seg, datalo,
276
+ h.base, h.index, 0, h.ofs);
277
+ tcg_out_modrm_sib_offset(s, movop + h.seg, datahi,
278
+ h.base, h.index, 0, h.ofs + 4);
279
}
280
break;
281
default:
282
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
283
MemOpIdx oi, TCGType data_type)
284
{
285
MemOp opc = get_memop(oi);
286
+ HostAddress h;
287
288
#if defined(CONFIG_SOFTMMU)
289
tcg_insn_unit *label_ptr[2];
290
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
291
label_ptr, offsetof(CPUTLBEntry, addr_write));
292
293
/* TLB Hit. */
294
- tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
295
+ h.base = TCG_REG_L1;
296
+ h.index = -1;
297
+ h.ofs = 0;
298
+ h.seg = 0;
299
+ tcg_out_qemu_st_direct(s, datalo, datahi, h, opc);
300
301
/* Record the current context of a store into ldst label */
302
add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi,
303
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
304
tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
305
}
306
307
- tcg_out_qemu_st_direct(s, datalo, datahi, addrlo, x86_guest_base_index,
308
- x86_guest_base_offset, x86_guest_base_seg, opc);
309
+ h = x86_guest_base;
310
+ h.base = addrlo;
311
+
312
+ tcg_out_qemu_st_direct(s, datalo, datahi, h, opc);
313
#endif
314
}
315
316
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
317
(ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
318
+ stack_addend);
319
#else
320
-# if !defined(CONFIG_SOFTMMU) && TCG_TARGET_REG_BITS == 64
321
+# if !defined(CONFIG_SOFTMMU)
322
if (guest_base) {
323
int seg = setup_guest_base_seg();
324
if (seg != 0) {
325
- x86_guest_base_seg = seg;
326
+ x86_guest_base.seg = seg;
327
} else if (guest_base == (int32_t)guest_base) {
328
- x86_guest_base_offset = guest_base;
329
+ x86_guest_base.ofs = guest_base;
330
} else {
331
/* Choose R12 because, as a base, it requires a SIB byte. */
332
- x86_guest_base_index = TCG_REG_R12;
333
- tcg_out_movi(s, TCG_TYPE_PTR, x86_guest_base_index, guest_base);
334
- tcg_regset_set_reg(s->reserved_regs, x86_guest_base_index);
335
+ x86_guest_base.index = TCG_REG_R12;
336
+ tcg_out_movi(s, TCG_TYPE_PTR, x86_guest_base.index, guest_base);
337
+ tcg_regset_set_reg(s->reserved_regs, x86_guest_base.index);
338
}
339
}
340
# endif
76
--
341
--
77
2.43.0
342
2.34.1
343
344
diff view generated by jsdifflib
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
1
Use TCG_REG_L[01] constants directly.
2
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
5
---
6
tcg/optimize.c | 24 +++++++++---------------
6
tcg/i386/tcg-target.c.inc | 32 ++++++++++++++++----------------
7
1 file changed, 9 insertions(+), 15 deletions(-)
7
1 file changed, 16 insertions(+), 16 deletions(-)
8
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
11
--- a/tcg/i386/tcg-target.c.inc
12
+++ b/tcg/optimize.c
12
+++ b/tcg/i386/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
13
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
14
static bool fold_sextract(OptContext *ctx, TCGOp *op)
14
int mem_index, MemOp opc,
15
tcg_insn_unit **label_ptr, int which)
15
{
16
{
16
uint64_t z_mask, s_mask, s_mask_old;
17
- const TCGReg r0 = TCG_REG_L0;
17
+ TempOptInfo *t1 = arg_info(op->args[1]);
18
- const TCGReg r1 = TCG_REG_L1;
18
int pos = op->args[2];
19
TCGType ttype = TCG_TYPE_I32;
19
int len = op->args[3];
20
TCGType tlbtype = TCG_TYPE_I32;
20
21
int trexw = 0, hrexw = 0, tlbrexw = 0;
21
- if (arg_is_const(op->args[1])) {
22
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
22
- uint64_t t;
23
}
23
-
24
- t = arg_info(op->args[1])->val;
25
- t = sextract64(t, pos, len);
26
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
27
+ if (ti_is_const(t1)) {
28
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
29
+ sextract64(ti_const_val(t1), pos, len));
30
}
24
}
31
25
32
- z_mask = arg_info(op->args[1])->z_mask;
26
- tcg_out_mov(s, tlbtype, r0, addrlo);
33
- z_mask = sextract64(z_mask, pos, len);
27
- tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
34
- ctx->z_mask = z_mask;
28
+ tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
35
-
29
+ tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
36
- s_mask_old = arg_info(op->args[1])->s_mask;
30
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
37
- s_mask = sextract64(s_mask_old, pos, len);
31
38
- s_mask |= MAKE_64BIT_MASK(len, 64 - len);
32
- tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, r0, TCG_AREG0,
39
- ctx->s_mask = s_mask;
33
+ tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
40
+ s_mask_old = t1->s_mask;
34
TLB_MASK_TABLE_OFS(mem_index) +
41
+ s_mask = s_mask_old >> pos;
35
offsetof(CPUTLBDescFast, mask));
42
+ s_mask |= -1ull << (len - 1);
36
43
37
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r0, TCG_AREG0,
44
if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
38
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
45
return true;
39
TLB_MASK_TABLE_OFS(mem_index) +
40
offsetof(CPUTLBDescFast, table));
41
42
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
43
copy the address and mask. For lesser alignments, check that we don't
44
cross pages for the complete access. */
45
if (a_bits >= s_bits) {
46
- tcg_out_mov(s, ttype, r1, addrlo);
47
+ tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
48
} else {
49
- tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
50
+ tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
51
+ addrlo, s_mask - a_mask);
46
}
52
}
47
53
tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
48
- return fold_masks(ctx, op);
54
- tgen_arithi(s, ARITH_AND + trexw, r1, tlb_mask, 0);
49
+ z_mask = sextract64(t1->z_mask, pos, len);
55
+ tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
56
57
- /* cmp 0(r0), r1 */
58
- tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, which);
59
+ /* cmp 0(TCG_REG_L0), TCG_REG_L1 */
60
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
61
+ TCG_REG_L1, TCG_REG_L0, which);
62
63
/* Prepare for both the fast path add of the tlb addend, and the slow
64
path function argument setup. */
65
- tcg_out_mov(s, ttype, r1, addrlo);
66
+ tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
67
68
/* jne slow_path */
69
tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
70
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
71
s->code_ptr += 4;
72
73
if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
74
- /* cmp 4(r0), addrhi */
75
- tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, which + 4);
76
+ /* cmp 4(TCG_REG_L0), addrhi */
77
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, which + 4);
78
79
/* jne slow_path */
80
tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
81
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
82
83
/* TLB Hit. */
84
85
- /* add addend(r0), r1 */
86
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
87
+ /* add addend(TCG_REG_L0), TCG_REG_L1 */
88
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L1, TCG_REG_L0,
89
offsetof(CPUTLBEntry, addend));
51
}
90
}
52
91
53
static bool fold_shift(OptContext *ctx, TCGOp *op)
54
--
92
--
55
2.43.0
93
2.34.1
94
95
diff view generated by jsdifflib
1
The big comment just above says functions should be sorted.
1
Split out a helper for choosing testb vs testl.
2
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
5
---
6
tcg/optimize.c | 60 +++++++++++++++++++++++++-------------------------
6
tcg/i386/tcg-target.c.inc | 30 ++++++++++++++++++------------
7
1 file changed, 30 insertions(+), 30 deletions(-)
7
1 file changed, 18 insertions(+), 12 deletions(-)
8
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
11
--- a/tcg/i386/tcg-target.c.inc
12
+++ b/tcg/optimize.c
12
+++ b/tcg/i386/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_nopn(TCGContext *s, int n)
14
return true;
14
tcg_out8(s, 0x90);
15
}
15
}
16
16
17
+static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
17
+/* Test register R vs immediate bits I, setting Z flag for EQ/NE. */
18
+static void __attribute__((unused))
19
+tcg_out_testi(TCGContext *s, TCGReg r, uint32_t i)
18
+{
20
+{
19
+ /* Canonicalize the comparison to put immediate second. */
21
+ /*
20
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
22
+ * This is used for testing alignment, so we can usually use testb.
21
+ op->args[3] = tcg_swap_cond(op->args[3]);
23
+ * For i686, we have to use testl for %esi/%edi.
24
+ */
25
+ if (i <= 0xff && (TCG_TARGET_REG_BITS == 64 || r < 4)) {
26
+ tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, r);
27
+ tcg_out8(s, i);
28
+ } else {
29
+ tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_TESTi, r);
30
+ tcg_out32(s, i);
22
+ }
31
+ }
23
+ return finish_folding(ctx, op);
24
+}
32
+}
25
+
33
+
26
+static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
34
typedef struct {
27
+{
35
TCGReg base;
28
+ /* If true and false values are the same, eliminate the cmp. */
36
int index;
29
+ if (args_are_copies(op->args[3], op->args[4])) {
37
@@ -XXX,XX +XXX,XX @@ static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
30
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
38
unsigned a_mask = (1 << a_bits) - 1;
31
+ }
39
TCGLabelQemuLdst *label;
32
+
40
33
+ /* Canonicalize the comparison to put immediate second. */
41
- /*
34
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
42
- * We are expecting a_bits to max out at 7, so we can usually use testb.
35
+ op->args[5] = tcg_swap_cond(op->args[5]);
43
- * For i686, we have to use testl for %esi/%edi.
36
+ }
44
- */
37
+ /*
45
- if (a_mask <= 0xff && (TCG_TARGET_REG_BITS == 64 || addrlo < 4)) {
38
+ * Canonicalize the "false" input reg to match the destination,
46
- tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, addrlo);
39
+ * so that the tcg backend can implement "move if true".
47
- tcg_out8(s, a_mask);
40
+ */
48
- } else {
41
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
49
- tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_TESTi, addrlo);
42
+ op->args[5] = tcg_invert_cond(op->args[5]);
50
- tcg_out32(s, a_mask);
43
+ }
44
+ return finish_folding(ctx, op);
45
+}
46
+
47
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
48
{
49
uint64_t z_mask, s_mask;
50
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
51
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
52
}
53
54
-static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
55
-{
56
- /* Canonicalize the comparison to put immediate second. */
57
- if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
58
- op->args[3] = tcg_swap_cond(op->args[3]);
59
- }
60
- return finish_folding(ctx, op);
61
-}
62
-
63
-static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
64
-{
65
- /* If true and false values are the same, eliminate the cmp. */
66
- if (args_are_copies(op->args[3], op->args[4])) {
67
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
68
- }
51
- }
69
-
52
-
70
- /* Canonicalize the comparison to put immediate second. */
53
+ tcg_out_testi(s, addrlo, a_mask);
71
- if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
54
/* jne slow_path */
72
- op->args[5] = tcg_swap_cond(op->args[5]);
55
tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
73
- }
56
74
- /*
75
- * Canonicalize the "false" input reg to match the destination,
76
- * so that the tcg backend can implement "move if true".
77
- */
78
- if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
79
- op->args[5] = tcg_invert_cond(op->args[5]);
80
- }
81
- return finish_folding(ctx, op);
82
-}
83
-
84
static bool fold_sextract(OptContext *ctx, TCGOp *op)
85
{
86
uint64_t z_mask, s_mask, s_mask_old;
87
--
57
--
88
2.43.0
58
2.34.1
59
60
diff view generated by jsdifflib
1
Convert all targets simultaneously, as the gen_intermediate_code
1
Rename the 'ext' parameter 'data_type' to make the use clearer;
2
function disappears from the target. While there are possible
2
pass it to tcg_out_qemu_st as well to even out the interfaces.
3
workarounds, they're larger than simply performing the conversion.
3
Rename the 'otype' local 'addr_type' to make the use clearer.
4
4
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
include/exec/translator.h | 14 --------------
8
tcg/aarch64/tcg-target.c.inc | 36 +++++++++++++++++-------------------
9
include/hw/core/tcg-cpu-ops.h | 13 +++++++++++++
9
1 file changed, 17 insertions(+), 19 deletions(-)
10
target/alpha/cpu.h | 2 ++
11
target/arm/internals.h | 2 ++
12
target/avr/cpu.h | 2 ++
13
target/hexagon/cpu.h | 2 ++
14
target/hppa/cpu.h | 2 ++
15
target/i386/tcg/helper-tcg.h | 2 ++
16
target/loongarch/internals.h | 2 ++
17
target/m68k/cpu.h | 2 ++
18
target/microblaze/cpu.h | 2 ++
19
target/mips/tcg/tcg-internal.h | 2 ++
20
target/openrisc/cpu.h | 2 ++
21
target/ppc/cpu.h | 2 ++
22
target/riscv/cpu.h | 3 +++
23
target/rx/cpu.h | 2 ++
24
target/s390x/s390x-internal.h | 2 ++
25
target/sh4/cpu.h | 2 ++
26
target/sparc/cpu.h | 2 ++
27
target/tricore/cpu.h | 2 ++
28
target/xtensa/cpu.h | 2 ++
29
accel/tcg/cpu-exec.c | 8 +++++---
30
accel/tcg/translate-all.c | 8 +++++---
31
target/alpha/cpu.c | 1 +
32
target/alpha/translate.c | 4 ++--
33
target/arm/cpu.c | 1 +
34
target/arm/tcg/cpu-v7m.c | 1 +
35
target/arm/tcg/translate.c | 5 ++---
36
target/avr/cpu.c | 1 +
37
target/avr/translate.c | 6 +++---
38
target/hexagon/cpu.c | 1 +
39
target/hexagon/translate.c | 4 ++--
40
target/hppa/cpu.c | 1 +
41
target/hppa/translate.c | 4 ++--
42
target/i386/tcg/tcg-cpu.c | 1 +
43
target/i386/tcg/translate.c | 5 ++---
44
target/loongarch/cpu.c | 1 +
45
target/loongarch/tcg/translate.c | 4 ++--
46
target/m68k/cpu.c | 1 +
47
target/m68k/translate.c | 4 ++--
48
target/microblaze/cpu.c | 1 +
49
target/microblaze/translate.c | 4 ++--
50
target/mips/cpu.c | 1 +
51
target/mips/tcg/translate.c | 4 ++--
52
target/openrisc/cpu.c | 1 +
53
target/openrisc/translate.c | 4 ++--
54
target/ppc/cpu_init.c | 1 +
55
target/ppc/translate.c | 4 ++--
56
target/riscv/tcg/tcg-cpu.c | 1 +
57
target/riscv/translate.c | 4 ++--
58
target/rx/cpu.c | 1 +
59
target/rx/translate.c | 4 ++--
60
target/s390x/cpu.c | 1 +
61
target/s390x/tcg/translate.c | 4 ++--
62
target/sh4/cpu.c | 1 +
63
target/sh4/translate.c | 4 ++--
64
target/sparc/cpu.c | 1 +
65
target/sparc/translate.c | 4 ++--
66
target/tricore/cpu.c | 1 +
67
target/tricore/translate.c | 5 ++---
68
target/xtensa/cpu.c | 1 +
69
target/xtensa/translate.c | 4 ++--
70
62 files changed, 121 insertions(+), 62 deletions(-)
71
10
72
diff --git a/include/exec/translator.h b/include/exec/translator.h
11
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
73
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
74
--- a/include/exec/translator.h
13
--- a/tcg/aarch64/tcg-target.c.inc
75
+++ b/include/exec/translator.h
14
+++ b/tcg/aarch64/tcg-target.c.inc
76
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
77
#include "qemu/bswap.h"
78
#include "exec/vaddr.h"
79
80
-/**
81
- * gen_intermediate_code
82
- * @cpu: cpu context
83
- * @tb: translation block
84
- * @max_insns: max number of instructions to translate
85
- * @pc: guest virtual program counter address
86
- * @host_pc: host physical program counter address
87
- *
88
- * This function must be provided by the target, which should create
89
- * the target-specific DisasContext, and then invoke translator_loop.
90
- */
91
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
92
- vaddr pc, void *host_pc);
93
-
94
/**
95
* DisasJumpType:
96
* @DISAS_NEXT: Next instruction in program order.
97
diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
98
index XXXXXXX..XXXXXXX 100644
99
--- a/include/hw/core/tcg-cpu-ops.h
100
+++ b/include/hw/core/tcg-cpu-ops.h
101
@@ -XXX,XX +XXX,XX @@ struct TCGCPUOps {
102
* Called when the first CPU is realized.
103
*/
104
void (*initialize)(void);
105
+ /**
106
+ * @translate_code: Translate guest instructions to TCGOps
107
+ * @cpu: cpu context
108
+ * @tb: translation block
109
+ * @max_insns: max number of instructions to translate
110
+ * @pc: guest virtual program counter address
111
+ * @host_pc: host physical program counter address
112
+ *
113
+ * This function must be provided by the target, which should create
114
+ * the target-specific DisasContext, and then invoke translator_loop.
115
+ */
116
+ void (*translate_code)(CPUState *cpu, TranslationBlock *tb,
117
+ int *max_insns, vaddr pc, void *host_pc);
118
/**
119
* @synchronize_from_tb: Synchronize state from a TCG #TranslationBlock
120
*
121
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
122
index XXXXXXX..XXXXXXX 100644
123
--- a/target/alpha/cpu.h
124
+++ b/target/alpha/cpu.h
125
@@ -XXX,XX +XXX,XX @@ enum {
126
};
127
128
void alpha_translate_init(void);
129
+void alpha_translate_code(CPUState *cs, TranslationBlock *tb,
130
+ int *max_insns, vaddr pc, void *host_pc);
131
132
#define CPU_RESOLVING_TYPE TYPE_ALPHA_CPU
133
134
diff --git a/target/arm/internals.h b/target/arm/internals.h
135
index XXXXXXX..XXXXXXX 100644
136
--- a/target/arm/internals.h
137
+++ b/target/arm/internals.h
138
@@ -XXX,XX +XXX,XX @@ void init_cpreg_list(ARMCPU *cpu);
139
140
void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
141
void arm_translate_init(void);
142
+void arm_translate_code(CPUState *cs, TranslationBlock *tb,
143
+ int *max_insns, vaddr pc, void *host_pc);
144
145
void arm_cpu_register_gdb_commands(ARMCPU *cpu);
146
void aarch64_cpu_register_gdb_commands(ARMCPU *cpu, GString *,
147
diff --git a/target/avr/cpu.h b/target/avr/cpu.h
148
index XXXXXXX..XXXXXXX 100644
149
--- a/target/avr/cpu.h
150
+++ b/target/avr/cpu.h
151
@@ -XXX,XX +XXX,XX @@ static inline void set_avr_feature(CPUAVRState *env, int feature)
152
}
16
}
153
17
154
void avr_cpu_tcg_init(void);
18
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
155
+void avr_cpu_translate_code(CPUState *cs, TranslationBlock *tb,
19
- MemOpIdx oi, TCGType ext)
156
+ int *max_insns, vaddr pc, void *host_pc);
20
+ MemOpIdx oi, TCGType data_type)
157
21
{
158
int cpu_avr_exec(CPUState *cpu);
22
MemOp memop = get_memop(oi);
159
23
- const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
160
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
24
+ TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
161
index XXXXXXX..XXXXXXX 100644
25
162
--- a/target/hexagon/cpu.h
26
/* Byte swapping is left to middle-end expansion. */
163
+++ b/target/hexagon/cpu.h
27
tcg_debug_assert((memop & MO_BSWAP) == 0);
164
@@ -XXX,XX +XXX,XX @@ static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc,
28
165
typedef HexagonCPU ArchCPU;
29
#ifdef CONFIG_SOFTMMU
166
30
- unsigned mem_index = get_mmuidx(oi);
167
void hexagon_translate_init(void);
31
tcg_insn_unit *label_ptr;
168
+void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
32
169
+ int *max_insns, vaddr pc, void *host_pc);
33
- tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
170
34
- tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
171
#include "exec/cpu-all.h"
35
- TCG_REG_X1, otype, addr_reg);
172
36
- add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
173
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
37
+ tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 1);
174
index XXXXXXX..XXXXXXX 100644
38
+ tcg_out_qemu_ld_direct(s, memop, data_type, data_reg,
175
--- a/target/hppa/cpu.h
39
+ TCG_REG_X1, addr_type, addr_reg);
176
+++ b/target/hppa/cpu.h
40
+ add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
177
@@ -XXX,XX +XXX,XX @@ static inline int HPPA_BTLB_ENTRIES(CPUHPPAState *env)
41
s->code_ptr, label_ptr);
42
#else /* !CONFIG_SOFTMMU */
43
unsigned a_bits = get_alignment_bits(memop);
44
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
45
tcg_out_test_alignment(s, true, addr_reg, a_bits);
46
}
47
if (USE_GUEST_BASE) {
48
- tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
49
- TCG_REG_GUEST_BASE, otype, addr_reg);
50
+ tcg_out_qemu_ld_direct(s, memop, data_type, data_reg,
51
+ TCG_REG_GUEST_BASE, addr_type, addr_reg);
52
} else {
53
- tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
54
+ tcg_out_qemu_ld_direct(s, memop, data_type, data_reg,
55
addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
56
}
57
#endif /* CONFIG_SOFTMMU */
178
}
58
}
179
59
180
void hppa_translate_init(void);
60
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
181
+void hppa_translate_code(CPUState *cs, TranslationBlock *tb,
61
- MemOpIdx oi)
182
+ int *max_insns, vaddr pc, void *host_pc);
62
+ MemOpIdx oi, TCGType data_type)
183
63
{
184
#define CPU_RESOLVING_TYPE TYPE_HPPA_CPU
64
MemOp memop = get_memop(oi);
185
65
- const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
186
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
66
+ TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
187
index XXXXXXX..XXXXXXX 100644
67
188
--- a/target/i386/tcg/helper-tcg.h
68
/* Byte swapping is left to middle-end expansion. */
189
+++ b/target/i386/tcg/helper-tcg.h
69
tcg_debug_assert((memop & MO_BSWAP) == 0);
190
@@ -XXX,XX +XXX,XX @@ static inline target_long lshift(target_long x, int n)
70
191
71
#ifdef CONFIG_SOFTMMU
192
/* translate.c */
72
- unsigned mem_index = get_mmuidx(oi);
193
void tcg_x86_init(void);
73
tcg_insn_unit *label_ptr;
194
+void x86_translate_code(CPUState *cs, TranslationBlock *tb,
74
195
+ int *max_insns, vaddr pc, void *host_pc);
75
- tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
196
76
+ tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 0);
197
/* excp_helper.c */
77
tcg_out_qemu_st_direct(s, memop, data_reg,
198
G_NORETURN void raise_exception(CPUX86State *env, int exception_index);
78
- TCG_REG_X1, otype, addr_reg);
199
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
79
- add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
200
index XXXXXXX..XXXXXXX 100644
80
- data_reg, addr_reg, s->code_ptr, label_ptr);
201
--- a/target/loongarch/internals.h
81
+ TCG_REG_X1, addr_type, addr_reg);
202
+++ b/target/loongarch/internals.h
82
+ add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
203
@@ -XXX,XX +XXX,XX @@
83
+ s->code_ptr, label_ptr);
204
#define TARGET_VIRT_MASK MAKE_64BIT_MASK(0, TARGET_VIRT_ADDR_SPACE_BITS)
84
#else /* !CONFIG_SOFTMMU */
205
85
unsigned a_bits = get_alignment_bits(memop);
206
void loongarch_translate_init(void);
86
if (a_bits) {
207
+void loongarch_translate_code(CPUState *cs, TranslationBlock *tb,
87
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
208
+ int *max_insns, vaddr pc, void *host_pc);
209
210
void G_NORETURN do_raise_exception(CPULoongArchState *env,
211
uint32_t exception,
212
diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h
213
index XXXXXXX..XXXXXXX 100644
214
--- a/target/m68k/cpu.h
215
+++ b/target/m68k/cpu.h
216
@@ -XXX,XX +XXX,XX @@ int m68k_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
217
int m68k_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
218
219
void m68k_tcg_init(void);
220
+void m68k_translate_code(CPUState *cs, TranslationBlock *tb,
221
+ int *max_insns, vaddr pc, void *host_pc);
222
void m68k_cpu_init_gdb(M68kCPU *cpu);
223
uint32_t cpu_m68k_get_ccr(CPUM68KState *env);
224
void cpu_m68k_set_ccr(CPUM68KState *env, uint32_t);
225
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
226
index XXXXXXX..XXXXXXX 100644
227
--- a/target/microblaze/cpu.h
228
+++ b/target/microblaze/cpu.h
229
@@ -XXX,XX +XXX,XX @@ static inline void mb_cpu_write_msr(CPUMBState *env, uint32_t val)
230
}
231
232
void mb_tcg_init(void);
233
+void mb_translate_code(CPUState *cs, TranslationBlock *tb,
234
+ int *max_insns, vaddr pc, void *host_pc);
235
236
#define CPU_RESOLVING_TYPE TYPE_MICROBLAZE_CPU
237
238
diff --git a/target/mips/tcg/tcg-internal.h b/target/mips/tcg/tcg-internal.h
239
index XXXXXXX..XXXXXXX 100644
240
--- a/target/mips/tcg/tcg-internal.h
241
+++ b/target/mips/tcg/tcg-internal.h
242
@@ -XXX,XX +XXX,XX @@
243
#include "cpu.h"
244
245
void mips_tcg_init(void);
246
+void mips_translate_code(CPUState *cs, TranslationBlock *tb,
247
+ int *max_insns, vaddr pc, void *host_pc);
248
249
void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
250
G_NORETURN void mips_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
251
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
252
index XXXXXXX..XXXXXXX 100644
253
--- a/target/openrisc/cpu.h
254
+++ b/target/openrisc/cpu.h
255
@@ -XXX,XX +XXX,XX @@ void openrisc_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
256
int openrisc_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
257
int openrisc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
258
void openrisc_translate_init(void);
259
+void openrisc_translate_code(CPUState *cs, TranslationBlock *tb,
260
+ int *max_insns, vaddr pc, void *host_pc);
261
int print_insn_or1k(bfd_vma addr, disassemble_info *info);
262
263
#ifndef CONFIG_USER_ONLY
264
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
265
index XXXXXXX..XXXXXXX 100644
266
--- a/target/ppc/cpu.h
267
+++ b/target/ppc/cpu.h
268
@@ -XXX,XX +XXX,XX @@ extern const VMStateDescription vmstate_ppc_cpu;
269
270
/*****************************************************************************/
271
void ppc_translate_init(void);
272
+void ppc_translate_code(CPUState *cs, TranslationBlock *tb,
273
+ int *max_insns, vaddr pc, void *host_pc);
274
275
#if !defined(CONFIG_USER_ONLY)
276
void ppc_store_sdr1(CPUPPCState *env, target_ulong value);
277
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
278
index XXXXXXX..XXXXXXX 100644
279
--- a/target/riscv/cpu.h
280
+++ b/target/riscv/cpu.h
281
@@ -XXX,XX +XXX,XX @@ RISCVException smstateen_acc_ok(CPURISCVState *env, int index, uint64_t bit);
282
void riscv_cpu_set_mode(CPURISCVState *env, target_ulong newpriv, bool virt_en);
283
284
void riscv_translate_init(void);
285
+void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
286
+ int *max_insns, vaddr pc, void *host_pc);
287
+
288
G_NORETURN void riscv_raise_exception(CPURISCVState *env,
289
uint32_t exception, uintptr_t pc);
290
291
diff --git a/target/rx/cpu.h b/target/rx/cpu.h
292
index XXXXXXX..XXXXXXX 100644
293
--- a/target/rx/cpu.h
294
+++ b/target/rx/cpu.h
295
@@ -XXX,XX +XXX,XX @@ int rx_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
296
int rx_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
297
298
void rx_translate_init(void);
299
+void rx_translate_code(CPUState *cs, TranslationBlock *tb,
300
+ int *max_insns, vaddr pc, void *host_pc);
301
void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte);
302
303
#include "exec/cpu-all.h"
304
diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h
305
index XXXXXXX..XXXXXXX 100644
306
--- a/target/s390x/s390x-internal.h
307
+++ b/target/s390x/s390x-internal.h
308
@@ -XXX,XX +XXX,XX @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3,
309
310
/* translate.c */
311
void s390x_translate_init(void);
312
+void s390x_translate_code(CPUState *cs, TranslationBlock *tb,
313
+ int *max_insns, vaddr pc, void *host_pc);
314
void s390x_restore_state_to_opc(CPUState *cs,
315
const TranslationBlock *tb,
316
const uint64_t *data);
317
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
318
index XXXXXXX..XXXXXXX 100644
319
--- a/target/sh4/cpu.h
320
+++ b/target/sh4/cpu.h
321
@@ -XXX,XX +XXX,XX @@ G_NORETURN void superh_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
322
uintptr_t retaddr);
323
324
void sh4_translate_init(void);
325
+void sh4_translate_code(CPUState *cs, TranslationBlock *tb,
326
+ int *max_insns, vaddr pc, void *host_pc);
327
328
#if !defined(CONFIG_USER_ONLY)
329
hwaddr superh_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
330
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
331
index XXXXXXX..XXXXXXX 100644
332
--- a/target/sparc/cpu.h
333
+++ b/target/sparc/cpu.h
334
@@ -XXX,XX +XXX,XX @@ int sparc_cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
335
336
/* translate.c */
337
void sparc_tcg_init(void);
338
+void sparc_translate_code(CPUState *cs, TranslationBlock *tb,
339
+ int *max_insns, vaddr pc, void *host_pc);
340
341
/* fop_helper.c */
342
target_ulong cpu_get_fsr(CPUSPARCState *);
343
diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h
344
index XXXXXXX..XXXXXXX 100644
345
--- a/target/tricore/cpu.h
346
+++ b/target/tricore/cpu.h
347
@@ -XXX,XX +XXX,XX @@ FIELD(TB_FLAGS, PRIV, 0, 2)
348
349
void cpu_state_reset(CPUTriCoreState *s);
350
void tricore_tcg_init(void);
351
+void tricore_translate_code(CPUState *cs, TranslationBlock *tb,
352
+ int *max_insns, vaddr pc, void *host_pc);
353
354
static inline void cpu_get_tb_cpu_state(CPUTriCoreState *env, vaddr *pc,
355
uint64_t *cs_base, uint32_t *flags)
356
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
357
index XXXXXXX..XXXXXXX 100644
358
--- a/target/xtensa/cpu.h
359
+++ b/target/xtensa/cpu.h
360
@@ -XXX,XX +XXX,XX @@ G_NORETURN void xtensa_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
361
362
void xtensa_collect_sr_names(const XtensaConfig *config);
363
void xtensa_translate_init(void);
364
+void xtensa_translate_code(CPUState *cs, TranslationBlock *tb,
365
+ int *max_insns, vaddr pc, void *host_pc);
366
void **xtensa_get_regfile_by_name(const char *name, int entries, int bits);
367
void xtensa_breakpoint_handler(CPUState *cs);
368
void xtensa_register_core(XtensaConfigList *node);
369
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
370
index XXXXXXX..XXXXXXX 100644
371
--- a/accel/tcg/cpu-exec.c
372
+++ b/accel/tcg/cpu-exec.c
373
@@ -XXX,XX +XXX,XX @@ bool tcg_exec_realizefn(CPUState *cpu, Error **errp)
374
375
if (!tcg_target_initialized) {
376
/* Check mandatory TCGCPUOps handlers */
377
+ const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
378
#ifndef CONFIG_USER_ONLY
379
- assert(cpu->cc->tcg_ops->cpu_exec_halt);
380
- assert(cpu->cc->tcg_ops->cpu_exec_interrupt);
381
+ assert(tcg_ops->cpu_exec_halt);
382
+ assert(tcg_ops->cpu_exec_interrupt);
383
#endif /* !CONFIG_USER_ONLY */
384
- cpu->cc->tcg_ops->initialize();
385
+ assert(tcg_ops->translate_code);
386
+ tcg_ops->initialize();
387
tcg_target_initialized = true;
388
}
88
}
389
89
if (USE_GUEST_BASE) {
390
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
90
tcg_out_qemu_st_direct(s, memop, data_reg,
391
index XXXXXXX..XXXXXXX 100644
91
- TCG_REG_GUEST_BASE, otype, addr_reg);
392
--- a/accel/tcg/translate-all.c
92
+ TCG_REG_GUEST_BASE, addr_type, addr_reg);
393
+++ b/accel/tcg/translate-all.c
93
} else {
394
@@ -XXX,XX +XXX,XX @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
94
tcg_out_qemu_st_direct(s, memop, data_reg,
395
95
addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
396
tcg_func_start(tcg_ctx);
96
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
397
97
break;
398
- tcg_ctx->cpu = env_cpu(env);
98
case INDEX_op_qemu_st_i32:
399
- gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
99
case INDEX_op_qemu_st_i64:
400
+ CPUState *cs = env_cpu(env);
100
- tcg_out_qemu_st(s, REG0(0), a1, a2);
401
+ tcg_ctx->cpu = cs;
101
+ tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
402
+ cs->cc->tcg_ops->translate_code(cs, tb, max_insns, pc, host_pc);
102
break;
403
+
103
404
assert(tb->size != 0);
104
case INDEX_op_bswap64_i64:
405
tcg_ctx->cpu = NULL;
406
*max_insns = tb->icount;
407
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
408
/*
409
* Overflow of code_gen_buffer, or the current slice of it.
410
*
411
- * TODO: We don't need to re-do gen_intermediate_code, nor
412
+ * TODO: We don't need to re-do tcg_ops->translate_code, nor
413
* should we re-do the tcg optimization currently hidden
414
* inside tcg_gen_code. All that should be required is to
415
* flush the TBs, allocate a new TB, re-initialize it per
416
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
417
index XXXXXXX..XXXXXXX 100644
418
--- a/target/alpha/cpu.c
419
+++ b/target/alpha/cpu.c
420
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps alpha_sysemu_ops = {
421
422
static const TCGCPUOps alpha_tcg_ops = {
423
.initialize = alpha_translate_init,
424
+ .translate_code = alpha_translate_code,
425
.synchronize_from_tb = alpha_cpu_synchronize_from_tb,
426
.restore_state_to_opc = alpha_restore_state_to_opc,
427
428
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
429
index XXXXXXX..XXXXXXX 100644
430
--- a/target/alpha/translate.c
431
+++ b/target/alpha/translate.c
432
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps alpha_tr_ops = {
433
.tb_stop = alpha_tr_tb_stop,
434
};
435
436
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
437
- vaddr pc, void *host_pc)
438
+void alpha_translate_code(CPUState *cpu, TranslationBlock *tb,
439
+ int *max_insns, vaddr pc, void *host_pc)
440
{
441
DisasContext dc;
442
translator_loop(cpu, tb, max_insns, pc, host_pc, &alpha_tr_ops, &dc.base);
443
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
444
index XXXXXXX..XXXXXXX 100644
445
--- a/target/arm/cpu.c
446
+++ b/target/arm/cpu.c
447
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps arm_sysemu_ops = {
448
#ifdef CONFIG_TCG
449
static const TCGCPUOps arm_tcg_ops = {
450
.initialize = arm_translate_init,
451
+ .translate_code = arm_translate_code,
452
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
453
.debug_excp_handler = arm_debug_excp_handler,
454
.restore_state_to_opc = arm_restore_state_to_opc,
455
diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c
456
index XXXXXXX..XXXXXXX 100644
457
--- a/target/arm/tcg/cpu-v7m.c
458
+++ b/target/arm/tcg/cpu-v7m.c
459
@@ -XXX,XX +XXX,XX @@ static void cortex_m55_initfn(Object *obj)
460
461
static const TCGCPUOps arm_v7m_tcg_ops = {
462
.initialize = arm_translate_init,
463
+ .translate_code = arm_translate_code,
464
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
465
.debug_excp_handler = arm_debug_excp_handler,
466
.restore_state_to_opc = arm_restore_state_to_opc,
467
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
468
index XXXXXXX..XXXXXXX 100644
469
--- a/target/arm/tcg/translate.c
470
+++ b/target/arm/tcg/translate.c
471
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps thumb_translator_ops = {
472
.tb_stop = arm_tr_tb_stop,
473
};
474
475
-/* generate intermediate code for basic block 'tb'. */
476
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
477
- vaddr pc, void *host_pc)
478
+void arm_translate_code(CPUState *cpu, TranslationBlock *tb,
479
+ int *max_insns, vaddr pc, void *host_pc)
480
{
481
DisasContext dc = { };
482
const TranslatorOps *ops = &arm_translator_ops;
483
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
484
index XXXXXXX..XXXXXXX 100644
485
--- a/target/avr/cpu.c
486
+++ b/target/avr/cpu.c
487
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps avr_sysemu_ops = {
488
489
static const TCGCPUOps avr_tcg_ops = {
490
.initialize = avr_cpu_tcg_init,
491
+ .translate_code = avr_cpu_translate_code,
492
.synchronize_from_tb = avr_cpu_synchronize_from_tb,
493
.restore_state_to_opc = avr_restore_state_to_opc,
494
.cpu_exec_interrupt = avr_cpu_exec_interrupt,
495
diff --git a/target/avr/translate.c b/target/avr/translate.c
496
index XXXXXXX..XXXXXXX 100644
497
--- a/target/avr/translate.c
498
+++ b/target/avr/translate.c
499
@@ -XXX,XX +XXX,XX @@ static bool trans_WDR(DisasContext *ctx, arg_WDR *a)
500
*
501
* - translate()
502
* - canonicalize_skip()
503
- * - gen_intermediate_code()
504
+ * - translate_code()
505
* - restore_state_to_opc()
506
*
507
*/
508
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps avr_tr_ops = {
509
.tb_stop = avr_tr_tb_stop,
510
};
511
512
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
513
- vaddr pc, void *host_pc)
514
+void avr_cpu_translate_code(CPUState *cs, TranslationBlock *tb,
515
+ int *max_insns, vaddr pc, void *host_pc)
516
{
517
DisasContext dc = { };
518
translator_loop(cs, tb, max_insns, pc, host_pc, &avr_tr_ops, &dc.base);
519
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
520
index XXXXXXX..XXXXXXX 100644
521
--- a/target/hexagon/cpu.c
522
+++ b/target/hexagon/cpu.c
523
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_init(Object *obj)
524
525
static const TCGCPUOps hexagon_tcg_ops = {
526
.initialize = hexagon_translate_init,
527
+ .translate_code = hexagon_translate_code,
528
.synchronize_from_tb = hexagon_cpu_synchronize_from_tb,
529
.restore_state_to_opc = hexagon_restore_state_to_opc,
530
};
531
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
532
index XXXXXXX..XXXXXXX 100644
533
--- a/target/hexagon/translate.c
534
+++ b/target/hexagon/translate.c
535
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hexagon_tr_ops = {
536
.tb_stop = hexagon_tr_tb_stop,
537
};
538
539
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
540
- vaddr pc, void *host_pc)
541
+void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
542
+ int *max_insns, vaddr pc, void *host_pc)
543
{
544
DisasContext ctx;
545
546
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
547
index XXXXXXX..XXXXXXX 100644
548
--- a/target/hppa/cpu.c
549
+++ b/target/hppa/cpu.c
550
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps hppa_sysemu_ops = {
551
552
static const TCGCPUOps hppa_tcg_ops = {
553
.initialize = hppa_translate_init,
554
+ .translate_code = hppa_translate_code,
555
.synchronize_from_tb = hppa_cpu_synchronize_from_tb,
556
.restore_state_to_opc = hppa_restore_state_to_opc,
557
558
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
559
index XXXXXXX..XXXXXXX 100644
560
--- a/target/hppa/translate.c
561
+++ b/target/hppa/translate.c
562
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hppa_tr_ops = {
563
#endif
564
};
565
566
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
567
- vaddr pc, void *host_pc)
568
+void hppa_translate_code(CPUState *cs, TranslationBlock *tb,
569
+ int *max_insns, vaddr pc, void *host_pc)
570
{
571
DisasContext ctx = { };
572
translator_loop(cs, tb, max_insns, pc, host_pc, &hppa_tr_ops, &ctx.base);
573
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
574
index XXXXXXX..XXXXXXX 100644
575
--- a/target/i386/tcg/tcg-cpu.c
576
+++ b/target/i386/tcg/tcg-cpu.c
577
@@ -XXX,XX +XXX,XX @@ static bool x86_debug_check_breakpoint(CPUState *cs)
578
579
static const TCGCPUOps x86_tcg_ops = {
580
.initialize = tcg_x86_init,
581
+ .translate_code = x86_translate_code,
582
.synchronize_from_tb = x86_cpu_synchronize_from_tb,
583
.restore_state_to_opc = x86_restore_state_to_opc,
584
.cpu_exec_enter = x86_cpu_exec_enter,
585
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
586
index XXXXXXX..XXXXXXX 100644
587
--- a/target/i386/tcg/translate.c
588
+++ b/target/i386/tcg/translate.c
589
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps i386_tr_ops = {
590
.tb_stop = i386_tr_tb_stop,
591
};
592
593
-/* generate intermediate code for basic block 'tb'. */
594
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
595
- vaddr pc, void *host_pc)
596
+void x86_translate_code(CPUState *cpu, TranslationBlock *tb,
597
+ int *max_insns, vaddr pc, void *host_pc)
598
{
599
DisasContext dc;
600
601
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
602
index XXXXXXX..XXXXXXX 100644
603
--- a/target/loongarch/cpu.c
604
+++ b/target/loongarch/cpu.c
605
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags)
606
607
static const TCGCPUOps loongarch_tcg_ops = {
608
.initialize = loongarch_translate_init,
609
+ .translate_code = loongarch_translate_code,
610
.synchronize_from_tb = loongarch_cpu_synchronize_from_tb,
611
.restore_state_to_opc = loongarch_restore_state_to_opc,
612
613
diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c
614
index XXXXXXX..XXXXXXX 100644
615
--- a/target/loongarch/tcg/translate.c
616
+++ b/target/loongarch/tcg/translate.c
617
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps loongarch_tr_ops = {
618
.tb_stop = loongarch_tr_tb_stop,
619
};
620
621
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
622
- vaddr pc, void *host_pc)
623
+void loongarch_translate_code(CPUState *cs, TranslationBlock *tb,
624
+ int *max_insns, vaddr pc, void *host_pc)
625
{
626
DisasContext ctx;
627
628
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
629
index XXXXXXX..XXXXXXX 100644
630
--- a/target/m68k/cpu.c
631
+++ b/target/m68k/cpu.c
632
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps m68k_sysemu_ops = {
633
634
static const TCGCPUOps m68k_tcg_ops = {
635
.initialize = m68k_tcg_init,
636
+ .translate_code = m68k_translate_code,
637
.restore_state_to_opc = m68k_restore_state_to_opc,
638
639
#ifndef CONFIG_USER_ONLY
640
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
641
index XXXXXXX..XXXXXXX 100644
642
--- a/target/m68k/translate.c
643
+++ b/target/m68k/translate.c
644
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps m68k_tr_ops = {
645
.tb_stop = m68k_tr_tb_stop,
646
};
647
648
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
649
- vaddr pc, void *host_pc)
650
+void m68k_translate_code(CPUState *cpu, TranslationBlock *tb,
651
+ int *max_insns, vaddr pc, void *host_pc)
652
{
653
DisasContext dc;
654
translator_loop(cpu, tb, max_insns, pc, host_pc, &m68k_tr_ops, &dc.base);
655
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
656
index XXXXXXX..XXXXXXX 100644
657
--- a/target/microblaze/cpu.c
658
+++ b/target/microblaze/cpu.c
659
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps mb_sysemu_ops = {
660
661
static const TCGCPUOps mb_tcg_ops = {
662
.initialize = mb_tcg_init,
663
+ .translate_code = mb_translate_code,
664
.synchronize_from_tb = mb_cpu_synchronize_from_tb,
665
.restore_state_to_opc = mb_restore_state_to_opc,
666
667
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
668
index XXXXXXX..XXXXXXX 100644
669
--- a/target/microblaze/translate.c
670
+++ b/target/microblaze/translate.c
671
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mb_tr_ops = {
672
.tb_stop = mb_tr_tb_stop,
673
};
674
675
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
676
- vaddr pc, void *host_pc)
677
+void mb_translate_code(CPUState *cpu, TranslationBlock *tb,
678
+ int *max_insns, vaddr pc, void *host_pc)
679
{
680
DisasContext dc;
681
translator_loop(cpu, tb, max_insns, pc, host_pc, &mb_tr_ops, &dc.base);
682
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
683
index XXXXXXX..XXXXXXX 100644
684
--- a/target/mips/cpu.c
685
+++ b/target/mips/cpu.c
686
@@ -XXX,XX +XXX,XX @@ static const Property mips_cpu_properties[] = {
687
#include "hw/core/tcg-cpu-ops.h"
688
static const TCGCPUOps mips_tcg_ops = {
689
.initialize = mips_tcg_init,
690
+ .translate_code = mips_translate_code,
691
.synchronize_from_tb = mips_cpu_synchronize_from_tb,
692
.restore_state_to_opc = mips_restore_state_to_opc,
693
694
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
695
index XXXXXXX..XXXXXXX 100644
696
--- a/target/mips/tcg/translate.c
697
+++ b/target/mips/tcg/translate.c
698
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mips_tr_ops = {
699
.tb_stop = mips_tr_tb_stop,
700
};
701
702
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
703
- vaddr pc, void *host_pc)
704
+void mips_translate_code(CPUState *cs, TranslationBlock *tb,
705
+ int *max_insns, vaddr pc, void *host_pc)
706
{
707
DisasContext ctx;
708
709
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
710
index XXXXXXX..XXXXXXX 100644
711
--- a/target/openrisc/cpu.c
712
+++ b/target/openrisc/cpu.c
713
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps openrisc_sysemu_ops = {
714
715
static const TCGCPUOps openrisc_tcg_ops = {
716
.initialize = openrisc_translate_init,
717
+ .translate_code = openrisc_translate_code,
718
.synchronize_from_tb = openrisc_cpu_synchronize_from_tb,
719
.restore_state_to_opc = openrisc_restore_state_to_opc,
720
721
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
722
index XXXXXXX..XXXXXXX 100644
723
--- a/target/openrisc/translate.c
724
+++ b/target/openrisc/translate.c
725
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps openrisc_tr_ops = {
726
.tb_stop = openrisc_tr_tb_stop,
727
};
728
729
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
730
- vaddr pc, void *host_pc)
731
+void openrisc_translate_code(CPUState *cs, TranslationBlock *tb,
732
+ int *max_insns, vaddr pc, void *host_pc)
733
{
734
DisasContext ctx;
735
736
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
737
index XXXXXXX..XXXXXXX 100644
738
--- a/target/ppc/cpu_init.c
739
+++ b/target/ppc/cpu_init.c
740
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps ppc_sysemu_ops = {
741
742
static const TCGCPUOps ppc_tcg_ops = {
743
.initialize = ppc_translate_init,
744
+ .translate_code = ppc_translate_code,
745
.restore_state_to_opc = ppc_restore_state_to_opc,
746
747
#ifdef CONFIG_USER_ONLY
748
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
749
index XXXXXXX..XXXXXXX 100644
750
--- a/target/ppc/translate.c
751
+++ b/target/ppc/translate.c
752
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps ppc_tr_ops = {
753
.tb_stop = ppc_tr_tb_stop,
754
};
755
756
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
757
- vaddr pc, void *host_pc)
758
+void ppc_translate_code(CPUState *cs, TranslationBlock *tb,
759
+ int *max_insns, vaddr pc, void *host_pc)
760
{
761
DisasContext ctx;
762
763
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
764
index XXXXXXX..XXXXXXX 100644
765
--- a/target/riscv/tcg/tcg-cpu.c
766
+++ b/target/riscv/tcg/tcg-cpu.c
767
@@ -XXX,XX +XXX,XX @@ static void riscv_restore_state_to_opc(CPUState *cs,
768
769
static const TCGCPUOps riscv_tcg_ops = {
770
.initialize = riscv_translate_init,
771
+ .translate_code = riscv_translate_code,
772
.synchronize_from_tb = riscv_cpu_synchronize_from_tb,
773
.restore_state_to_opc = riscv_restore_state_to_opc,
774
775
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
776
index XXXXXXX..XXXXXXX 100644
777
--- a/target/riscv/translate.c
778
+++ b/target/riscv/translate.c
779
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps riscv_tr_ops = {
780
.tb_stop = riscv_tr_tb_stop,
781
};
782
783
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
784
- vaddr pc, void *host_pc)
785
+void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
786
+ int *max_insns, vaddr pc, void *host_pc)
787
{
788
DisasContext ctx;
789
790
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
791
index XXXXXXX..XXXXXXX 100644
792
--- a/target/rx/cpu.c
793
+++ b/target/rx/cpu.c
794
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps rx_sysemu_ops = {
795
796
static const TCGCPUOps rx_tcg_ops = {
797
.initialize = rx_translate_init,
798
+ .translate_code = rx_translate_code,
799
.synchronize_from_tb = rx_cpu_synchronize_from_tb,
800
.restore_state_to_opc = rx_restore_state_to_opc,
801
.tlb_fill = rx_cpu_tlb_fill,
802
diff --git a/target/rx/translate.c b/target/rx/translate.c
803
index XXXXXXX..XXXXXXX 100644
804
--- a/target/rx/translate.c
805
+++ b/target/rx/translate.c
806
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps rx_tr_ops = {
807
.tb_stop = rx_tr_tb_stop,
808
};
809
810
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
811
- vaddr pc, void *host_pc)
812
+void rx_translate_code(CPUState *cs, TranslationBlock *tb,
813
+ int *max_insns, vaddr pc, void *host_pc)
814
{
815
DisasContext dc;
816
817
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
818
index XXXXXXX..XXXXXXX 100644
819
--- a/target/s390x/cpu.c
820
+++ b/target/s390x/cpu.c
821
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc,
822
823
static const TCGCPUOps s390_tcg_ops = {
824
.initialize = s390x_translate_init,
825
+ .translate_code = s390x_translate_code,
826
.restore_state_to_opc = s390x_restore_state_to_opc,
827
828
#ifdef CONFIG_USER_ONLY
829
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
830
index XXXXXXX..XXXXXXX 100644
831
--- a/target/s390x/tcg/translate.c
832
+++ b/target/s390x/tcg/translate.c
833
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps s390x_tr_ops = {
834
.disas_log = s390x_tr_disas_log,
835
};
836
837
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
838
- vaddr pc, void *host_pc)
839
+void s390x_translate_code(CPUState *cs, TranslationBlock *tb,
840
+ int *max_insns, vaddr pc, void *host_pc)
841
{
842
DisasContext dc;
843
844
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
845
index XXXXXXX..XXXXXXX 100644
846
--- a/target/sh4/cpu.c
847
+++ b/target/sh4/cpu.c
848
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps sh4_sysemu_ops = {
849
850
static const TCGCPUOps superh_tcg_ops = {
851
.initialize = sh4_translate_init,
852
+ .translate_code = sh4_translate_code,
853
.synchronize_from_tb = superh_cpu_synchronize_from_tb,
854
.restore_state_to_opc = superh_restore_state_to_opc,
855
856
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
857
index XXXXXXX..XXXXXXX 100644
858
--- a/target/sh4/translate.c
859
+++ b/target/sh4/translate.c
860
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sh4_tr_ops = {
861
.tb_stop = sh4_tr_tb_stop,
862
};
863
864
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
865
- vaddr pc, void *host_pc)
866
+void sh4_translate_code(CPUState *cs, TranslationBlock *tb,
867
+ int *max_insns, vaddr pc, void *host_pc)
868
{
869
DisasContext ctx;
870
871
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
872
index XXXXXXX..XXXXXXX 100644
873
--- a/target/sparc/cpu.c
874
+++ b/target/sparc/cpu.c
875
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps sparc_sysemu_ops = {
876
877
static const TCGCPUOps sparc_tcg_ops = {
878
.initialize = sparc_tcg_init,
879
+ .translate_code = sparc_translate_code,
880
.synchronize_from_tb = sparc_cpu_synchronize_from_tb,
881
.restore_state_to_opc = sparc_restore_state_to_opc,
882
883
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
884
index XXXXXXX..XXXXXXX 100644
885
--- a/target/sparc/translate.c
886
+++ b/target/sparc/translate.c
887
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sparc_tr_ops = {
888
.tb_stop = sparc_tr_tb_stop,
889
};
890
891
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
892
- vaddr pc, void *host_pc)
893
+void sparc_translate_code(CPUState *cs, TranslationBlock *tb,
894
+ int *max_insns, vaddr pc, void *host_pc)
895
{
896
DisasContext dc = {};
897
898
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
899
index XXXXXXX..XXXXXXX 100644
900
--- a/target/tricore/cpu.c
901
+++ b/target/tricore/cpu.c
902
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps tricore_sysemu_ops = {
903
904
static const TCGCPUOps tricore_tcg_ops = {
905
.initialize = tricore_tcg_init,
906
+ .translate_code = tricore_translate_code,
907
.synchronize_from_tb = tricore_cpu_synchronize_from_tb,
908
.restore_state_to_opc = tricore_restore_state_to_opc,
909
.tlb_fill = tricore_cpu_tlb_fill,
910
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
911
index XXXXXXX..XXXXXXX 100644
912
--- a/target/tricore/translate.c
913
+++ b/target/tricore/translate.c
914
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps tricore_tr_ops = {
915
.tb_stop = tricore_tr_tb_stop,
916
};
917
918
-
919
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
920
- vaddr pc, void *host_pc)
921
+void tricore_translate_code(CPUState *cs, TranslationBlock *tb,
922
+ int *max_insns, vaddr pc, void *host_pc)
923
{
924
DisasContext ctx;
925
translator_loop(cs, tb, max_insns, pc, host_pc,
926
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
927
index XXXXXXX..XXXXXXX 100644
928
--- a/target/xtensa/cpu.c
929
+++ b/target/xtensa/cpu.c
930
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps xtensa_sysemu_ops = {
931
932
static const TCGCPUOps xtensa_tcg_ops = {
933
.initialize = xtensa_translate_init,
934
+ .translate_code = xtensa_translate_code,
935
.debug_excp_handler = xtensa_breakpoint_handler,
936
.restore_state_to_opc = xtensa_restore_state_to_opc,
937
938
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
939
index XXXXXXX..XXXXXXX 100644
940
--- a/target/xtensa/translate.c
941
+++ b/target/xtensa/translate.c
942
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps xtensa_translator_ops = {
943
.tb_stop = xtensa_tr_tb_stop,
944
};
945
946
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
947
- vaddr pc, void *host_pc)
948
+void xtensa_translate_code(CPUState *cpu, TranslationBlock *tb,
949
+ int *max_insns, vaddr pc, void *host_pc)
950
{
951
DisasContext dc = {};
952
translator_loop(cpu, tb, max_insns, pc, host_pc,
953
--
105
--
954
2.43.0
106
2.34.1
955
107
956
108
diff view generated by jsdifflib
1
Use the scalbn interface instead of float_muladd_halve_result.
1
Collect the 3 potential parts of the host address into a struct.
2
Reorg tcg_out_qemu_{ld,st}_direct to use it.
2
3
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
---
6
target/sparc/helper.h | 4 +-
7
tcg/aarch64/tcg-target.c.inc | 86 +++++++++++++++++++++++++-----------
7
target/sparc/fop_helper.c | 8 ++--
8
1 file changed, 59 insertions(+), 27 deletions(-)
8
target/sparc/translate.c | 80 +++++++++++++++++++++++----------------
9
3 files changed, 54 insertions(+), 38 deletions(-)
10
9
11
diff --git a/target/sparc/helper.h b/target/sparc/helper.h
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/target/sparc/helper.h
12
--- a/tcg/aarch64/tcg-target.c.inc
14
+++ b/target/sparc/helper.h
13
+++ b/tcg/aarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(faddd, TCG_CALL_NO_WG, f64, env, f64, f64)
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
16
DEF_HELPER_FLAGS_3(fsubd, TCG_CALL_NO_WG, f64, env, f64, f64)
15
tcg_out_insn(s, 3406, ADR, rd, offset);
17
DEF_HELPER_FLAGS_3(fmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
18
DEF_HELPER_FLAGS_3(fdivd, TCG_CALL_NO_WG, f64, env, f64, f64)
19
-DEF_HELPER_FLAGS_5(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, i32)
20
+DEF_HELPER_FLAGS_6(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, s32, i32)
21
DEF_HELPER_FLAGS_3(fnaddd, TCG_CALL_NO_WG, f64, env, f64, f64)
22
DEF_HELPER_FLAGS_3(fnmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
23
24
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(fadds, TCG_CALL_NO_WG, f32, env, f32, f32)
25
DEF_HELPER_FLAGS_3(fsubs, TCG_CALL_NO_WG, f32, env, f32, f32)
26
DEF_HELPER_FLAGS_3(fmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
27
DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_WG, f32, env, f32, f32)
28
-DEF_HELPER_FLAGS_5(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, i32)
29
+DEF_HELPER_FLAGS_6(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, s32, i32)
30
DEF_HELPER_FLAGS_3(fnadds, TCG_CALL_NO_WG, f32, env, f32, f32)
31
DEF_HELPER_FLAGS_3(fnmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
32
33
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/sparc/fop_helper.c
36
+++ b/target/sparc/fop_helper.c
37
@@ -XXX,XX +XXX,XX @@ Int128 helper_fsqrtq(CPUSPARCState *env, Int128 src)
38
}
16
}
39
17
40
float32 helper_fmadds(CPUSPARCState *env, float32 s1,
18
+typedef struct {
41
- float32 s2, float32 s3, uint32_t op)
19
+ TCGReg base;
42
+ float32 s2, float32 s3, int32_t sc, uint32_t op)
20
+ TCGReg index;
21
+ TCGType index_ext;
22
+} HostAddress;
23
+
24
#ifdef CONFIG_SOFTMMU
25
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
26
* MemOpIdx oi, uintptr_t ra)
27
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
28
#endif /* CONFIG_SOFTMMU */
29
30
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
31
- TCGReg data_r, TCGReg addr_r,
32
- TCGType otype, TCGReg off_r)
33
+ TCGReg data_r, HostAddress h)
43
{
34
{
44
- float32 ret = float32_muladd(s1, s2, s3, op, &env->fp_status);
35
switch (memop & MO_SSIZE) {
45
+ float32 ret = float32_muladd_scalbn(s1, s2, s3, sc, op, &env->fp_status);
36
case MO_UB:
46
check_ieee_exceptions(env, GETPC());
37
- tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
47
return ret;
38
+ tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
39
break;
40
case MO_SB:
41
tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
42
- data_r, addr_r, otype, off_r);
43
+ data_r, h.base, h.index_ext, h.index);
44
break;
45
case MO_UW:
46
- tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
47
+ tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
48
break;
49
case MO_SW:
50
tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
51
- data_r, addr_r, otype, off_r);
52
+ data_r, h.base, h.index_ext, h.index);
53
break;
54
case MO_UL:
55
- tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
56
+ tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
57
break;
58
case MO_SL:
59
- tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
60
+ tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
61
break;
62
case MO_UQ:
63
- tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
64
+ tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
65
break;
66
default:
67
g_assert_not_reached();
68
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
48
}
69
}
49
70
50
float64 helper_fmaddd(CPUSPARCState *env, float64 s1,
71
static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
51
- float64 s2, float64 s3, uint32_t op)
72
- TCGReg data_r, TCGReg addr_r,
52
+ float64 s2, float64 s3, int32_t sc, uint32_t op)
73
- TCGType otype, TCGReg off_r)
74
+ TCGReg data_r, HostAddress h)
53
{
75
{
54
- float64 ret = float64_muladd(s1, s2, s3, op, &env->fp_status);
76
switch (memop & MO_SIZE) {
55
+ float64 ret = float64_muladd_scalbn(s1, s2, s3, sc, op, &env->fp_status);
77
case MO_8:
56
check_ieee_exceptions(env, GETPC());
78
- tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
57
return ret;
79
+ tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
80
break;
81
case MO_16:
82
- tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
83
+ tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
84
break;
85
case MO_32:
86
- tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
87
+ tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
88
break;
89
case MO_64:
90
- tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
91
+ tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
92
break;
93
default:
94
g_assert_not_reached();
95
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
96
{
97
MemOp memop = get_memop(oi);
98
TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
99
+ HostAddress h;
100
101
/* Byte swapping is left to middle-end expansion. */
102
tcg_debug_assert((memop & MO_BSWAP) == 0);
103
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
104
tcg_insn_unit *label_ptr;
105
106
tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 1);
107
- tcg_out_qemu_ld_direct(s, memop, data_type, data_reg,
108
- TCG_REG_X1, addr_type, addr_reg);
109
+
110
+ h = (HostAddress){
111
+ .base = TCG_REG_X1,
112
+ .index = addr_reg,
113
+ .index_ext = addr_type
114
+ };
115
+ tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h);
116
+
117
add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
118
s->code_ptr, label_ptr);
119
#else /* !CONFIG_SOFTMMU */
120
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
121
tcg_out_test_alignment(s, true, addr_reg, a_bits);
122
}
123
if (USE_GUEST_BASE) {
124
- tcg_out_qemu_ld_direct(s, memop, data_type, data_reg,
125
- TCG_REG_GUEST_BASE, addr_type, addr_reg);
126
+ h = (HostAddress){
127
+ .base = TCG_REG_GUEST_BASE,
128
+ .index = addr_reg,
129
+ .index_ext = addr_type
130
+ };
131
} else {
132
- tcg_out_qemu_ld_direct(s, memop, data_type, data_reg,
133
- addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
134
+ h = (HostAddress){
135
+ .base = addr_reg,
136
+ .index = TCG_REG_XZR,
137
+ .index_ext = TCG_TYPE_I64
138
+ };
139
}
140
+ tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h);
141
#endif /* CONFIG_SOFTMMU */
58
}
142
}
59
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
143
60
index XXXXXXX..XXXXXXX 100644
144
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
61
--- a/target/sparc/translate.c
62
+++ b/target/sparc/translate.c
63
@@ -XXX,XX +XXX,XX @@ static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src)
64
65
static void gen_op_fmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
66
{
145
{
67
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
146
MemOp memop = get_memop(oi);
68
+ TCGv_i32 z = tcg_constant_i32(0);
147
TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
69
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, z);
148
+ HostAddress h;
149
150
/* Byte swapping is left to middle-end expansion. */
151
tcg_debug_assert((memop & MO_BSWAP) == 0);
152
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
153
tcg_insn_unit *label_ptr;
154
155
tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 0);
156
- tcg_out_qemu_st_direct(s, memop, data_reg,
157
- TCG_REG_X1, addr_type, addr_reg);
158
+
159
+ h = (HostAddress){
160
+ .base = TCG_REG_X1,
161
+ .index = addr_reg,
162
+ .index_ext = addr_type
163
+ };
164
+ tcg_out_qemu_st_direct(s, memop, data_reg, h);
165
+
166
add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
167
s->code_ptr, label_ptr);
168
#else /* !CONFIG_SOFTMMU */
169
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
170
tcg_out_test_alignment(s, false, addr_reg, a_bits);
171
}
172
if (USE_GUEST_BASE) {
173
- tcg_out_qemu_st_direct(s, memop, data_reg,
174
- TCG_REG_GUEST_BASE, addr_type, addr_reg);
175
+ h = (HostAddress){
176
+ .base = TCG_REG_GUEST_BASE,
177
+ .index = addr_reg,
178
+ .index_ext = addr_type
179
+ };
180
} else {
181
- tcg_out_qemu_st_direct(s, memop, data_reg,
182
- addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
183
+ h = (HostAddress){
184
+ .base = addr_reg,
185
+ .index = TCG_REG_XZR,
186
+ .index_ext = TCG_TYPE_I64
187
+ };
188
}
189
+ tcg_out_qemu_st_direct(s, memop, data_reg, h);
190
#endif /* CONFIG_SOFTMMU */
70
}
191
}
71
192
72
static void gen_op_fmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
73
{
74
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
75
+ TCGv_i32 z = tcg_constant_i32(0);
76
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, z);
77
}
78
79
static void gen_op_fmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
80
{
81
- int op = float_muladd_negate_c;
82
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
83
+ TCGv_i32 z = tcg_constant_i32(0);
84
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
85
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
86
}
87
88
static void gen_op_fmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
89
{
90
- int op = float_muladd_negate_c;
91
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
92
+ TCGv_i32 z = tcg_constant_i32(0);
93
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
94
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
95
}
96
97
static void gen_op_fnmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
98
{
99
- int op = float_muladd_negate_c | float_muladd_negate_result;
100
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
101
+ TCGv_i32 z = tcg_constant_i32(0);
102
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c |
103
+ float_muladd_negate_result);
104
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
105
}
106
107
static void gen_op_fnmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
108
{
109
- int op = float_muladd_negate_c | float_muladd_negate_result;
110
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
111
+ TCGv_i32 z = tcg_constant_i32(0);
112
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c |
113
+ float_muladd_negate_result);
114
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
115
}
116
117
static void gen_op_fnmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
118
{
119
- int op = float_muladd_negate_result;
120
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
121
+ TCGv_i32 z = tcg_constant_i32(0);
122
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
123
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
124
}
125
126
static void gen_op_fnmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
127
{
128
- int op = float_muladd_negate_result;
129
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
130
+ TCGv_i32 z = tcg_constant_i32(0);
131
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
132
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
133
}
134
135
/* Use muladd to compute (1 * src1) + src2 / 2 with one rounding. */
136
static void gen_op_fhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
137
{
138
- TCGv_i32 one = tcg_constant_i32(float32_one);
139
- int op = float_muladd_halve_result;
140
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
141
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
142
+ TCGv_i32 mone = tcg_constant_i32(-1);
143
+ TCGv_i32 op = tcg_constant_i32(0);
144
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
145
}
146
147
static void gen_op_fhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
148
{
149
- TCGv_i64 one = tcg_constant_i64(float64_one);
150
- int op = float_muladd_halve_result;
151
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
152
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
153
+ TCGv_i32 mone = tcg_constant_i32(-1);
154
+ TCGv_i32 op = tcg_constant_i32(0);
155
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
156
}
157
158
/* Use muladd to compute (1 * src1) - src2 / 2 with one rounding. */
159
static void gen_op_fhsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
160
{
161
- TCGv_i32 one = tcg_constant_i32(float32_one);
162
- int op = float_muladd_negate_c | float_muladd_halve_result;
163
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
164
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
165
+ TCGv_i32 mone = tcg_constant_i32(-1);
166
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
167
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
168
}
169
170
static void gen_op_fhsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
171
{
172
- TCGv_i64 one = tcg_constant_i64(float64_one);
173
- int op = float_muladd_negate_c | float_muladd_halve_result;
174
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
175
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
176
+ TCGv_i32 mone = tcg_constant_i32(-1);
177
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
178
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
179
}
180
181
/* Use muladd to compute -((1 * src1) + src2 / 2) with one rounding. */
182
static void gen_op_fnhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
183
{
184
- TCGv_i32 one = tcg_constant_i32(float32_one);
185
- int op = float_muladd_negate_result | float_muladd_halve_result;
186
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
187
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
188
+ TCGv_i32 mone = tcg_constant_i32(-1);
189
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
190
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
191
}
192
193
static void gen_op_fnhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
194
{
195
- TCGv_i64 one = tcg_constant_i64(float64_one);
196
- int op = float_muladd_negate_result | float_muladd_halve_result;
197
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
198
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
199
+ TCGv_i32 mone = tcg_constant_i32(-1);
200
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
201
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
202
}
203
204
static void gen_op_fpexception_im(DisasContext *dc, int ftt)
205
--
193
--
206
2.43.0
194
2.34.1
207
195
208
196
diff view generated by jsdifflib
1
There are no special cases for this instruction. Since hexagon
1
Interpret the variable argument placement in the caller.
2
always uses default-nan mode, explicitly negating the first
2
Pass data_type instead of is_64. We need to set this in
3
input is unnecessary. Use float_muladd_negate_product instead.
3
TCGLabelQemuLdst, so plumb this all the way through from tcg_out_op.
4
4
5
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
target/hexagon/op_helper.c | 5 ++---
8
tcg/arm/tcg-target.c.inc | 113 +++++++++++++++++++--------------------
9
1 file changed, 2 insertions(+), 3 deletions(-)
9
1 file changed, 56 insertions(+), 57 deletions(-)
10
10
11
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
11
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/hexagon/op_helper.c
13
--- a/tcg/arm/tcg-target.c.inc
14
+++ b/target/hexagon/op_helper.c
14
+++ b/tcg/arm/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
15
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
16
float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
16
/* Record the context of a call to the out of line helper code for the slow
17
float32 RsV, float32 RtV)
17
path for a load or store, so that we can later generate the correct
18
helper code. */
19
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
20
- TCGReg datalo, TCGReg datahi, TCGReg addrlo,
21
- TCGReg addrhi, tcg_insn_unit *raddr,
22
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
23
+ MemOpIdx oi, TCGType type,
24
+ TCGReg datalo, TCGReg datahi,
25
+ TCGReg addrlo, TCGReg addrhi,
26
+ tcg_insn_unit *raddr,
27
tcg_insn_unit *label_ptr)
18
{
28
{
19
- float32 neg_RsV;
29
TCGLabelQemuLdst *label = new_ldst_label(s);
20
arch_fpop_start(env);
30
21
- neg_RsV = float32_set_sign(RsV, float32_is_neg(RsV) ? 0 : 1);
31
label->is_ld = is_ld;
22
- RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
32
label->oi = oi;
23
+ RxV = float32_muladd(RsV, RtV, RxV, float_muladd_negate_product,
33
+ label->type = type;
24
+ &env->fp_status);
34
label->datalo_reg = datalo;
25
arch_fpop_end(env);
35
label->datahi_reg = datahi;
26
return RxV;
36
label->addrlo_reg = addrlo;
37
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
27
}
38
}
39
#endif
40
41
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
42
+static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
43
+ TCGReg addrlo, TCGReg addrhi,
44
+ MemOpIdx oi, TCGType data_type)
45
{
46
- TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
47
- MemOpIdx oi;
48
- MemOp opc;
49
-#ifdef CONFIG_SOFTMMU
50
- int mem_index;
51
- TCGReg addend;
52
- tcg_insn_unit *label_ptr;
53
-#else
54
- unsigned a_bits;
55
-#endif
56
-
57
- datalo = *args++;
58
- datahi = (is64 ? *args++ : 0);
59
- addrlo = *args++;
60
- addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
61
- oi = *args++;
62
- opc = get_memop(oi);
63
+ MemOp opc = get_memop(oi);
64
65
#ifdef CONFIG_SOFTMMU
66
- mem_index = get_mmuidx(oi);
67
- addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
68
+ TCGReg addend= tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 1);
69
70
- /* This a conditional BL only to load a pointer within this opcode into LR
71
- for the slow path. We will not be using the value for a tail call. */
72
- label_ptr = s->code_ptr;
73
+ /*
74
+ * This a conditional BL only to load a pointer within this opcode into
75
+ * LR for the slow path. We will not be using the value for a tail call.
76
+ */
77
+ tcg_insn_unit *label_ptr = s->code_ptr;
78
tcg_out_bl_imm(s, COND_NE, 0);
79
80
tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend, true);
81
82
- add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
83
- s->code_ptr, label_ptr);
84
+ add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi,
85
+ addrlo, addrhi, s->code_ptr, label_ptr);
86
#else /* !CONFIG_SOFTMMU */
87
- a_bits = get_alignment_bits(opc);
88
+ unsigned a_bits = get_alignment_bits(opc);
89
if (a_bits) {
90
tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
91
}
92
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
93
}
94
#endif
95
96
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
97
+static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
98
+ TCGReg addrlo, TCGReg addrhi,
99
+ MemOpIdx oi, TCGType data_type)
100
{
101
- TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
102
- MemOpIdx oi;
103
- MemOp opc;
104
-#ifdef CONFIG_SOFTMMU
105
- int mem_index;
106
- TCGReg addend;
107
- tcg_insn_unit *label_ptr;
108
-#else
109
- unsigned a_bits;
110
-#endif
111
-
112
- datalo = *args++;
113
- datahi = (is64 ? *args++ : 0);
114
- addrlo = *args++;
115
- addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
116
- oi = *args++;
117
- opc = get_memop(oi);
118
+ MemOp opc = get_memop(oi);
119
120
#ifdef CONFIG_SOFTMMU
121
- mem_index = get_mmuidx(oi);
122
- addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
123
+ TCGReg addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 0);
124
125
tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi,
126
addrlo, addend, true);
127
128
/* The conditional call must come last, as we're going to return here. */
129
- label_ptr = s->code_ptr;
130
+ tcg_insn_unit *label_ptr = s->code_ptr;
131
tcg_out_bl_imm(s, COND_NE, 0);
132
133
- add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
134
- s->code_ptr, label_ptr);
135
+ add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi,
136
+ addrlo, addrhi, s->code_ptr, label_ptr);
137
#else /* !CONFIG_SOFTMMU */
138
- a_bits = get_alignment_bits(opc);
139
+ unsigned a_bits = get_alignment_bits(opc);
140
if (a_bits) {
141
tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
142
}
143
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
144
break;
145
146
case INDEX_op_qemu_ld_i32:
147
- tcg_out_qemu_ld(s, args, 0);
148
+ if (TARGET_LONG_BITS == 32) {
149
+ tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
150
+ args[2], TCG_TYPE_I32);
151
+ } else {
152
+ tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
153
+ args[3], TCG_TYPE_I32);
154
+ }
155
break;
156
case INDEX_op_qemu_ld_i64:
157
- tcg_out_qemu_ld(s, args, 1);
158
+ if (TARGET_LONG_BITS == 32) {
159
+ tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
160
+ args[3], TCG_TYPE_I64);
161
+ } else {
162
+ tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
163
+ args[4], TCG_TYPE_I64);
164
+ }
165
break;
166
case INDEX_op_qemu_st_i32:
167
- tcg_out_qemu_st(s, args, 0);
168
+ if (TARGET_LONG_BITS == 32) {
169
+ tcg_out_qemu_st(s, args[0], -1, args[1], -1,
170
+ args[2], TCG_TYPE_I32);
171
+ } else {
172
+ tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
173
+ args[3], TCG_TYPE_I32);
174
+ }
175
break;
176
case INDEX_op_qemu_st_i64:
177
- tcg_out_qemu_st(s, args, 1);
178
+ if (TARGET_LONG_BITS == 32) {
179
+ tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
180
+ args[3], TCG_TYPE_I64);
181
+ } else {
182
+ tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
183
+ args[4], TCG_TYPE_I64);
184
+ }
185
break;
186
187
case INDEX_op_bswap16_i32:
28
--
188
--
29
2.43.0
189
2.34.1
190
191
diff view generated by jsdifflib
1
Consider the passed s_mask to be a minimum deduced from
1
Collect the parts of the host address, and condition, into a struct.
2
either existing s_mask or from a sign-extension operation.
2
Merge tcg_out_qemu_*_{index,direct} and use it.
3
We may be able to deduce more from the set of known zeros.
4
Remove identical logic from several opcode folders.
5
3
6
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
tcg/optimize.c | 21 ++++++---------------
6
tcg/arm/tcg-target.c.inc | 248 ++++++++++++++++++---------------------
10
1 file changed, 6 insertions(+), 15 deletions(-)
7
1 file changed, 115 insertions(+), 133 deletions(-)
11
8
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
11
--- a/tcg/arm/tcg-target.c.inc
15
+++ b/tcg/optimize.c
12
+++ b/tcg/arm/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vldst(TCGContext *s, ARMInsn insn,
17
* Record "zero" and "sign" masks for the single output of @op.
14
tcg_out32(s, insn | (rn << 16) | encode_vd(rd) | 0xf);
18
* See TempOptInfo definition of z_mask and s_mask.
15
}
19
* If z_mask allows, fold the output to constant zero.
16
20
+ * The passed s_mask may be augmented by z_mask.
17
+typedef struct {
21
*/
18
+ ARMCond cond;
22
static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
19
+ TCGReg base;
23
uint64_t z_mask, uint64_t s_mask)
20
+ int index;
24
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
21
+ bool index_scratch;
25
22
+} HostAddress;
26
ti = ts_info(ts);
23
+
27
ti->z_mask = z_mask;
24
#ifdef CONFIG_SOFTMMU
28
- ti->s_mask = s_mask;
25
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
29
+ ti->s_mask = s_mask | smask_from_zmask(z_mask);
26
* int mmu_idx, uintptr_t ra)
30
return true;
27
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
31
}
28
}
32
29
#endif /* SOFTMMU */
33
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
30
31
-static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
32
- TCGReg datalo, TCGReg datahi,
33
- TCGReg addrlo, TCGReg addend,
34
- bool scratch_addend)
35
+static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
36
+ TCGReg datahi, HostAddress h)
37
{
38
+ TCGReg base;
39
+
40
/* Byte swapping is left to middle-end expansion. */
41
tcg_debug_assert((opc & MO_BSWAP) == 0);
42
43
switch (opc & MO_SSIZE) {
44
case MO_UB:
45
- tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
46
+ if (h.index < 0) {
47
+ tcg_out_ld8_12(s, h.cond, datalo, h.base, 0);
48
+ } else {
49
+ tcg_out_ld8_r(s, h.cond, datalo, h.base, h.index);
50
+ }
51
break;
52
case MO_SB:
53
- tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
54
+ if (h.index < 0) {
55
+ tcg_out_ld8s_8(s, h.cond, datalo, h.base, 0);
56
+ } else {
57
+ tcg_out_ld8s_r(s, h.cond, datalo, h.base, h.index);
58
+ }
59
break;
60
case MO_UW:
61
- tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
62
+ if (h.index < 0) {
63
+ tcg_out_ld16u_8(s, h.cond, datalo, h.base, 0);
64
+ } else {
65
+ tcg_out_ld16u_r(s, h.cond, datalo, h.base, h.index);
66
+ }
67
break;
68
case MO_SW:
69
- tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
70
+ if (h.index < 0) {
71
+ tcg_out_ld16s_8(s, h.cond, datalo, h.base, 0);
72
+ } else {
73
+ tcg_out_ld16s_r(s, h.cond, datalo, h.base, h.index);
74
+ }
75
break;
76
case MO_UL:
77
- tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
78
+ if (h.index < 0) {
79
+ tcg_out_ld32_12(s, h.cond, datalo, h.base, 0);
80
+ } else {
81
+ tcg_out_ld32_r(s, h.cond, datalo, h.base, h.index);
82
+ }
83
break;
84
case MO_UQ:
85
/* We used pair allocation for datalo, so already should be aligned. */
86
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
87
tcg_debug_assert(datahi == datalo + 1);
88
/* LDRD requires alignment; double-check that. */
89
if (get_alignment_bits(opc) >= MO_64) {
90
+ if (h.index < 0) {
91
+ tcg_out_ldrd_8(s, h.cond, datalo, h.base, 0);
92
+ break;
93
+ }
94
/*
95
* Rm (the second address op) must not overlap Rt or Rt + 1.
96
* Since datalo is aligned, we can simplify the test via alignment.
97
* Flip the two address arguments if that works.
98
*/
99
- if ((addend & ~1) != datalo) {
100
- tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend);
101
+ if ((h.index & ~1) != datalo) {
102
+ tcg_out_ldrd_r(s, h.cond, datalo, h.base, h.index);
103
break;
104
}
105
- if ((addrlo & ~1) != datalo) {
106
- tcg_out_ldrd_r(s, COND_AL, datalo, addend, addrlo);
107
+ if ((h.base & ~1) != datalo) {
108
+ tcg_out_ldrd_r(s, h.cond, datalo, h.index, h.base);
109
break;
110
}
111
}
112
- if (scratch_addend) {
113
- tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo);
114
- tcg_out_ld32_12(s, COND_AL, datahi, addend, 4);
115
+ if (h.index < 0) {
116
+ base = h.base;
117
+ if (datalo == h.base) {
118
+ tcg_out_mov_reg(s, h.cond, TCG_REG_TMP, base);
119
+ base = TCG_REG_TMP;
120
+ }
121
+ } else if (h.index_scratch) {
122
+ tcg_out_ld32_rwb(s, h.cond, datalo, h.index, h.base);
123
+ tcg_out_ld32_12(s, h.cond, datahi, h.index, 4);
124
+ break;
125
} else {
126
- tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
127
- addend, addrlo, SHIFT_IMM_LSL(0));
128
- tcg_out_ld32_12(s, COND_AL, datalo, TCG_REG_TMP, 0);
129
- tcg_out_ld32_12(s, COND_AL, datahi, TCG_REG_TMP, 4);
130
+ tcg_out_dat_reg(s, h.cond, ARITH_ADD, TCG_REG_TMP,
131
+ h.base, h.index, SHIFT_IMM_LSL(0));
132
+ base = TCG_REG_TMP;
133
}
134
+ tcg_out_ld32_12(s, h.cond, datalo, base, 0);
135
+ tcg_out_ld32_12(s, h.cond, datahi, base, 4);
136
break;
34
default:
137
default:
35
g_assert_not_reached();
138
g_assert_not_reached();
36
}
139
}
37
- s_mask = smask_from_zmask(z_mask);
140
}
38
141
39
+ s_mask = 0;
142
-#ifndef CONFIG_SOFTMMU
40
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
143
-static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
41
case TCG_BSWAP_OZ:
144
- TCGReg datahi, TCGReg addrlo)
42
break;
145
-{
43
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
146
- /* Byte swapping is left to middle-end expansion. */
44
default:
147
- tcg_debug_assert((opc & MO_BSWAP) == 0);
45
/* The high bits are undefined: force all bits above the sign to 1. */
148
-
46
z_mask |= sign << 1;
149
- switch (opc & MO_SSIZE) {
47
- s_mask = 0;
150
- case MO_UB:
48
break;
151
- tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
152
- break;
153
- case MO_SB:
154
- tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
155
- break;
156
- case MO_UW:
157
- tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
158
- break;
159
- case MO_SW:
160
- tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
161
- break;
162
- case MO_UL:
163
- tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
164
- break;
165
- case MO_UQ:
166
- /* We used pair allocation for datalo, so already should be aligned. */
167
- tcg_debug_assert((datalo & 1) == 0);
168
- tcg_debug_assert(datahi == datalo + 1);
169
- /* LDRD requires alignment; double-check that. */
170
- if (get_alignment_bits(opc) >= MO_64) {
171
- tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0);
172
- } else if (datalo == addrlo) {
173
- tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
174
- tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
175
- } else {
176
- tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
177
- tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
178
- }
179
- break;
180
- default:
181
- g_assert_not_reached();
182
- }
183
-}
184
-#endif
185
-
186
static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
187
TCGReg addrlo, TCGReg addrhi,
188
MemOpIdx oi, TCGType data_type)
189
{
190
MemOp opc = get_memop(oi);
191
+ HostAddress h;
192
193
#ifdef CONFIG_SOFTMMU
194
- TCGReg addend= tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 1);
195
+ h.cond = COND_AL;
196
+ h.base = addrlo;
197
+ h.index_scratch = true;
198
+ h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 1);
199
200
/*
201
* This a conditional BL only to load a pointer within this opcode into
202
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
203
tcg_insn_unit *label_ptr = s->code_ptr;
204
tcg_out_bl_imm(s, COND_NE, 0);
205
206
- tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend, true);
207
+ tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
208
209
add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi,
210
addrlo, addrhi, s->code_ptr, label_ptr);
211
-#else /* !CONFIG_SOFTMMU */
212
+#else
213
unsigned a_bits = get_alignment_bits(opc);
214
if (a_bits) {
215
tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
49
}
216
}
50
ctx->z_mask = z_mask;
217
- if (guest_base) {
51
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
218
- tcg_out_qemu_ld_index(s, opc, datalo, datahi,
52
g_assert_not_reached();
219
- addrlo, TCG_REG_GUEST_BASE, false);
53
}
220
- } else {
54
ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
221
- tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
55
- ctx->s_mask = smask_from_zmask(ctx->z_mask);
222
- }
56
return false;
223
+
57
}
224
+ h.cond = COND_AL;
58
225
+ h.base = addrlo;
59
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
226
+ h.index = guest_base ? TCG_REG_GUEST_BASE : -1;
227
+ h.index_scratch = false;
228
+ tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
229
#endif
230
}
231
232
-static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc,
233
- TCGReg datalo, TCGReg datahi,
234
- TCGReg addrlo, TCGReg addend,
235
- bool scratch_addend)
236
-{
237
- /* Byte swapping is left to middle-end expansion. */
238
- tcg_debug_assert((opc & MO_BSWAP) == 0);
239
-
240
- switch (opc & MO_SIZE) {
241
- case MO_8:
242
- tcg_out_st8_r(s, cond, datalo, addrlo, addend);
243
- break;
244
- case MO_16:
245
- tcg_out_st16_r(s, cond, datalo, addrlo, addend);
246
- break;
247
- case MO_32:
248
- tcg_out_st32_r(s, cond, datalo, addrlo, addend);
249
- break;
250
- case MO_64:
251
- /* We used pair allocation for datalo, so already should be aligned. */
252
- tcg_debug_assert((datalo & 1) == 0);
253
- tcg_debug_assert(datahi == datalo + 1);
254
- /* STRD requires alignment; double-check that. */
255
- if (get_alignment_bits(opc) >= MO_64) {
256
- tcg_out_strd_r(s, cond, datalo, addrlo, addend);
257
- } else if (scratch_addend) {
258
- tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
259
- tcg_out_st32_12(s, cond, datahi, addend, 4);
260
- } else {
261
- tcg_out_dat_reg(s, cond, ARITH_ADD, TCG_REG_TMP,
262
- addend, addrlo, SHIFT_IMM_LSL(0));
263
- tcg_out_st32_12(s, cond, datalo, TCG_REG_TMP, 0);
264
- tcg_out_st32_12(s, cond, datahi, TCG_REG_TMP, 4);
265
- }
266
- break;
267
- default:
268
- g_assert_not_reached();
269
- }
270
-}
271
-
272
-#ifndef CONFIG_SOFTMMU
273
static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
274
- TCGReg datahi, TCGReg addrlo)
275
+ TCGReg datahi, HostAddress h)
276
{
277
/* Byte swapping is left to middle-end expansion. */
278
tcg_debug_assert((opc & MO_BSWAP) == 0);
279
280
switch (opc & MO_SIZE) {
281
case MO_8:
282
- tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
283
+ if (h.index < 0) {
284
+ tcg_out_st8_12(s, h.cond, datalo, h.base, 0);
285
+ } else {
286
+ tcg_out_st8_r(s, h.cond, datalo, h.base, h.index);
287
+ }
288
break;
289
case MO_16:
290
- tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
291
+ if (h.index < 0) {
292
+ tcg_out_st16_8(s, h.cond, datalo, h.base, 0);
293
+ } else {
294
+ tcg_out_st16_r(s, h.cond, datalo, h.base, h.index);
295
+ }
296
break;
297
case MO_32:
298
- tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
299
+ if (h.index < 0) {
300
+ tcg_out_st32_12(s, h.cond, datalo, h.base, 0);
301
+ } else {
302
+ tcg_out_st32_r(s, h.cond, datalo, h.base, h.index);
303
+ }
304
break;
305
case MO_64:
306
/* We used pair allocation for datalo, so already should be aligned. */
307
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
308
tcg_debug_assert(datahi == datalo + 1);
309
/* STRD requires alignment; double-check that. */
310
if (get_alignment_bits(opc) >= MO_64) {
311
- tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
312
+ if (h.index < 0) {
313
+ tcg_out_strd_8(s, h.cond, datalo, h.base, 0);
314
+ } else {
315
+ tcg_out_strd_r(s, h.cond, datalo, h.base, h.index);
316
+ }
317
+ } else if (h.index_scratch) {
318
+ tcg_out_st32_rwb(s, h.cond, datalo, h.index, h.base);
319
+ tcg_out_st32_12(s, h.cond, datahi, h.index, 4);
320
} else {
321
- tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
322
- tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
323
+ tcg_out_dat_reg(s, h.cond, ARITH_ADD, TCG_REG_TMP,
324
+ h.base, h.index, SHIFT_IMM_LSL(0));
325
+ tcg_out_st32_12(s, h.cond, datalo, TCG_REG_TMP, 0);
326
+ tcg_out_st32_12(s, h.cond, datahi, TCG_REG_TMP, 4);
327
}
328
break;
60
default:
329
default:
61
g_assert_not_reached();
330
g_assert_not_reached();
62
}
331
}
63
- ctx->s_mask = smask_from_zmask(ctx->z_mask);
332
}
64
return false;
333
-#endif
65
}
334
66
335
static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
67
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
336
TCGReg addrlo, TCGReg addrhi,
68
return true;
337
MemOpIdx oi, TCGType data_type)
338
{
339
MemOp opc = get_memop(oi);
340
+ HostAddress h;
341
342
#ifdef CONFIG_SOFTMMU
343
- TCGReg addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 0);
344
-
345
- tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi,
346
- addrlo, addend, true);
347
+ h.cond = COND_EQ;
348
+ h.base = addrlo;
349
+ h.index_scratch = true;
350
+ h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 0);
351
+ tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
352
353
/* The conditional call must come last, as we're going to return here. */
354
tcg_insn_unit *label_ptr = s->code_ptr;
355
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
356
357
add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi,
358
addrlo, addrhi, s->code_ptr, label_ptr);
359
-#else /* !CONFIG_SOFTMMU */
360
+#else
361
unsigned a_bits = get_alignment_bits(opc);
362
+
363
+ h.cond = COND_AL;
364
if (a_bits) {
365
tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
366
+ h.cond = COND_EQ;
69
}
367
}
70
ctx->z_mask = z_mask;
368
- if (guest_base) {
71
- ctx->s_mask = smask_from_zmask(z_mask);
369
- tcg_out_qemu_st_index(s, COND_AL, opc, datalo, datahi,
72
370
- addrlo, TCG_REG_GUEST_BASE, false);
73
return fold_masks(ctx, op);
371
- } else {
74
}
372
- tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
75
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
373
- }
76
}
374
+
77
375
+ h.base = addrlo;
78
ctx->z_mask = z_mask;
376
+ h.index = guest_base ? TCG_REG_GUEST_BASE : -1;
79
- ctx->s_mask = smask_from_zmask(z_mask);
377
+ h.index_scratch = false;
80
if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
378
+ tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
81
return true;
379
#endif
82
}
380
}
83
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
381
84
int width = 8 * memop_size(mop);
85
86
if (width < 64) {
87
- ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
88
- if (!(mop & MO_SIGN)) {
89
+ if (mop & MO_SIGN) {
90
+ ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
91
+ } else {
92
ctx->z_mask = MAKE_64BIT_MASK(0, width);
93
- ctx->s_mask <<= 1;
94
}
95
}
96
97
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
98
fold_setcond_tst_pow2(ctx, op, false);
99
100
ctx->z_mask = 1;
101
- ctx->s_mask = smask_from_zmask(1);
102
return false;
103
}
104
105
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
106
}
107
108
ctx->z_mask = 1;
109
- ctx->s_mask = smask_from_zmask(1);
110
return false;
111
112
do_setcond_const:
113
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
114
break;
115
CASE_OP_32_64(ld8u):
116
ctx->z_mask = MAKE_64BIT_MASK(0, 8);
117
- ctx->s_mask = MAKE_64BIT_MASK(9, 55);
118
break;
119
CASE_OP_32_64(ld16s):
120
ctx->s_mask = MAKE_64BIT_MASK(16, 48);
121
break;
122
CASE_OP_32_64(ld16u):
123
ctx->z_mask = MAKE_64BIT_MASK(0, 16);
124
- ctx->s_mask = MAKE_64BIT_MASK(17, 47);
125
break;
126
case INDEX_op_ld32s_i64:
127
ctx->s_mask = MAKE_64BIT_MASK(32, 32);
128
break;
129
case INDEX_op_ld32u_i64:
130
ctx->z_mask = MAKE_64BIT_MASK(0, 32);
131
- ctx->s_mask = MAKE_64BIT_MASK(33, 31);
132
break;
133
default:
134
g_assert_not_reached();
135
--
382
--
136
2.43.0
383
2.34.1
diff view generated by jsdifflib
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
1
Interpret the variable argument placement in the caller. Shift some
2
code around slightly to share more between softmmu and user-only.
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
6
---
4
tcg/optimize.c | 9 +++++----
7
tcg/loongarch64/tcg-target.c.inc | 100 +++++++++++++------------------
5
1 file changed, 5 insertions(+), 4 deletions(-)
8
1 file changed, 42 insertions(+), 58 deletions(-)
6
9
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
12
--- a/tcg/loongarch64/tcg-target.c.inc
10
+++ b/tcg/optimize.c
13
+++ b/tcg/loongarch64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static void finish_ebb(OptContext *ctx)
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_indexed(TCGContext *s, TCGReg rd, TCGReg rj,
12
remove_mem_copy_all(ctx);
15
}
13
}
16
}
14
17
15
-static void finish_folding(OptContext *ctx, TCGOp *op)
18
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGType type)
16
+static bool finish_folding(OptContext *ctx, TCGOp *op)
19
+static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
20
+ MemOpIdx oi, TCGType data_type)
17
{
21
{
18
const TCGOpDef *def = &tcg_op_defs[op->opc];
22
- TCGReg addr_regl;
19
int i, nb_oargs;
23
- TCGReg data_regl;
20
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
24
- MemOpIdx oi;
21
ts_info(ts)->z_mask = ctx->z_mask;
25
- MemOp opc;
22
}
26
-#if defined(CONFIG_SOFTMMU)
27
+ MemOp opc = get_memop(oi);
28
+ TCGReg base, index;
29
+
30
+#ifdef CONFIG_SOFTMMU
31
tcg_insn_unit *label_ptr[1];
32
-#else
33
- unsigned a_bits;
34
-#endif
35
- TCGReg base;
36
37
- data_regl = *args++;
38
- addr_regl = *args++;
39
- oi = *args++;
40
- opc = get_memop(oi);
41
-
42
-#if defined(CONFIG_SOFTMMU)
43
- tcg_out_tlb_load(s, addr_regl, oi, label_ptr, 1);
44
- base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0);
45
- tcg_out_qemu_ld_indexed(s, data_regl, base, TCG_REG_TMP2, opc, type);
46
- add_qemu_ldst_label(s, 1, oi, type,
47
- data_regl, addr_regl,
48
- s->code_ptr, label_ptr);
49
+ tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1);
50
+ index = TCG_REG_TMP2;
51
#else
52
- a_bits = get_alignment_bits(opc);
53
+ unsigned a_bits = get_alignment_bits(opc);
54
if (a_bits) {
55
- tcg_out_test_alignment(s, true, addr_regl, a_bits);
56
+ tcg_out_test_alignment(s, true, addr_reg, a_bits);
23
}
57
}
24
+ return true;
58
- base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0);
59
- TCGReg guest_base_reg = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
60
- tcg_out_qemu_ld_indexed(s, data_regl, base, guest_base_reg, opc, type);
61
+ index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
62
+#endif
63
+
64
+ base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
65
+ tcg_out_qemu_ld_indexed(s, data_reg, base, index, opc, data_type);
66
+
67
+#ifdef CONFIG_SOFTMMU
68
+ add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
69
+ s->code_ptr, label_ptr);
70
#endif
25
}
71
}
26
72
27
/*
73
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_indexed(TCGContext *s, TCGReg data,
28
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
29
fold_xi_to_x(ctx, op, 0)) {
30
return true;
31
}
74
}
32
- return false;
33
+ return finish_folding(ctx, op);
34
}
75
}
35
76
36
/* We cannot as yet do_constant_folding with vectors. */
77
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, TCGType type)
37
@@ -XXX,XX +XXX,XX @@ static bool fold_add_vec(OptContext *ctx, TCGOp *op)
78
+static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
38
fold_xi_to_x(ctx, op, 0)) {
79
+ MemOpIdx oi, TCGType data_type)
39
return true;
80
{
81
- TCGReg addr_regl;
82
- TCGReg data_regl;
83
- MemOpIdx oi;
84
- MemOp opc;
85
-#if defined(CONFIG_SOFTMMU)
86
+ MemOp opc = get_memop(oi);
87
+ TCGReg base, index;
88
+
89
+#ifdef CONFIG_SOFTMMU
90
tcg_insn_unit *label_ptr[1];
91
-#else
92
- unsigned a_bits;
93
-#endif
94
- TCGReg base;
95
96
- data_regl = *args++;
97
- addr_regl = *args++;
98
- oi = *args++;
99
- opc = get_memop(oi);
100
-
101
-#if defined(CONFIG_SOFTMMU)
102
- tcg_out_tlb_load(s, addr_regl, oi, label_ptr, 0);
103
- base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0);
104
- tcg_out_qemu_st_indexed(s, data_regl, base, TCG_REG_TMP2, opc);
105
- add_qemu_ldst_label(s, 0, oi, type,
106
- data_regl, addr_regl,
107
- s->code_ptr, label_ptr);
108
+ tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0);
109
+ index = TCG_REG_TMP2;
110
#else
111
- a_bits = get_alignment_bits(opc);
112
+ unsigned a_bits = get_alignment_bits(opc);
113
if (a_bits) {
114
- tcg_out_test_alignment(s, false, addr_regl, a_bits);
115
+ tcg_out_test_alignment(s, false, addr_reg, a_bits);
40
}
116
}
41
- return false;
117
- base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0);
42
+ return finish_folding(ctx, op);
118
- TCGReg guest_base_reg = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
119
- tcg_out_qemu_st_indexed(s, data_regl, base, guest_base_reg, opc);
120
+ index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
121
+#endif
122
+
123
+ base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
124
+ tcg_out_qemu_st_indexed(s, data_reg, base, index, opc);
125
+
126
+#ifdef CONFIG_SOFTMMU
127
+ add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
128
+ s->code_ptr, label_ptr);
129
#endif
43
}
130
}
44
131
45
static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
132
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
46
@@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
133
break;
47
op->args[4] = arg_new_constant(ctx, bl);
134
48
op->args[5] = arg_new_constant(ctx, bh);
135
case INDEX_op_qemu_ld_i32:
49
}
136
- tcg_out_qemu_ld(s, args, TCG_TYPE_I32);
50
- return false;
137
+ tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
51
+ return finish_folding(ctx, op);
138
break;
52
}
139
case INDEX_op_qemu_ld_i64:
53
140
- tcg_out_qemu_ld(s, args, TCG_TYPE_I64);
54
static bool fold_add2(OptContext *ctx, TCGOp *op)
141
+ tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
142
break;
143
case INDEX_op_qemu_st_i32:
144
- tcg_out_qemu_st(s, args, TCG_TYPE_I32);
145
+ tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
146
break;
147
case INDEX_op_qemu_st_i64:
148
- tcg_out_qemu_st(s, args, TCG_TYPE_I64);
149
+ tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
150
break;
151
152
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
55
--
153
--
56
2.43.0
154
2.34.1
155
156
diff view generated by jsdifflib
1
Add fold_masks_z as a trivial wrapper around fold_masks_zs.
1
Collect the 2 parts of the host address into a struct.
2
Avoid the use of the OptContext slots.
2
Reorg tcg_out_qemu_{ld,st}_direct to use it.
3
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
tcg/optimize.c | 13 ++++++++++---
7
tcg/loongarch64/tcg-target.c.inc | 55 +++++++++++++++++---------------
8
1 file changed, 10 insertions(+), 3 deletions(-)
8
1 file changed, 30 insertions(+), 25 deletions(-)
9
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
12
--- a/tcg/loongarch64/tcg-target.c.inc
13
+++ b/tcg/optimize.c
13
+++ b/tcg/loongarch64/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
14
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_zext_addr_if_32_bit(TCGContext *s,
15
return true;
15
return addr;
16
}
16
}
17
17
18
+static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
18
-static void tcg_out_qemu_ld_indexed(TCGContext *s, TCGReg rd, TCGReg rj,
19
+{
19
- TCGReg rk, MemOp opc, TCGType type)
20
+ return fold_masks_zs(ctx, op, z_mask, 0);
20
+typedef struct {
21
+}
21
+ TCGReg base;
22
+ TCGReg index;
23
+} HostAddress;
22
+
24
+
23
static bool fold_masks(OptContext *ctx, TCGOp *op)
25
+static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type,
26
+ TCGReg rd, HostAddress h)
24
{
27
{
25
return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
28
/* Byte swapping is left to middle-end expansion. */
26
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
29
tcg_debug_assert((opc & MO_BSWAP) == 0);
27
30
28
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
31
switch (opc & MO_SSIZE) {
29
{
32
case MO_UB:
30
+ uint64_t z_mask;
33
- tcg_out_opc_ldx_bu(s, rd, rj, rk);
31
+
34
+ tcg_out_opc_ldx_bu(s, rd, h.base, h.index);
32
if (fold_const1(ctx, op)) {
33
return true;
34
}
35
36
switch (ctx->type) {
37
case TCG_TYPE_I32:
38
- ctx->z_mask = 32 | 31;
39
+ z_mask = 32 | 31;
40
break;
35
break;
41
case TCG_TYPE_I64:
36
case MO_SB:
42
- ctx->z_mask = 64 | 63;
37
- tcg_out_opc_ldx_b(s, rd, rj, rk);
43
+ z_mask = 64 | 63;
38
+ tcg_out_opc_ldx_b(s, rd, h.base, h.index);
39
break;
40
case MO_UW:
41
- tcg_out_opc_ldx_hu(s, rd, rj, rk);
42
+ tcg_out_opc_ldx_hu(s, rd, h.base, h.index);
43
break;
44
case MO_SW:
45
- tcg_out_opc_ldx_h(s, rd, rj, rk);
46
+ tcg_out_opc_ldx_h(s, rd, h.base, h.index);
47
break;
48
case MO_UL:
49
if (type == TCG_TYPE_I64) {
50
- tcg_out_opc_ldx_wu(s, rd, rj, rk);
51
+ tcg_out_opc_ldx_wu(s, rd, h.base, h.index);
52
break;
53
}
54
/* fallthrough */
55
case MO_SL:
56
- tcg_out_opc_ldx_w(s, rd, rj, rk);
57
+ tcg_out_opc_ldx_w(s, rd, h.base, h.index);
58
break;
59
case MO_UQ:
60
- tcg_out_opc_ldx_d(s, rd, rj, rk);
61
+ tcg_out_opc_ldx_d(s, rd, h.base, h.index);
44
break;
62
break;
45
default:
63
default:
46
g_assert_not_reached();
64
g_assert_not_reached();
65
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
66
MemOpIdx oi, TCGType data_type)
67
{
68
MemOp opc = get_memop(oi);
69
- TCGReg base, index;
70
+ HostAddress h;
71
72
#ifdef CONFIG_SOFTMMU
73
tcg_insn_unit *label_ptr[1];
74
75
tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1);
76
- index = TCG_REG_TMP2;
77
+ h.index = TCG_REG_TMP2;
78
#else
79
unsigned a_bits = get_alignment_bits(opc);
80
if (a_bits) {
81
tcg_out_test_alignment(s, true, addr_reg, a_bits);
47
}
82
}
48
- return false;
83
- index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
49
+ return fold_masks_z(ctx, op, z_mask);
84
+ h.index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
85
#endif
86
87
- base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
88
- tcg_out_qemu_ld_indexed(s, data_reg, base, index, opc, data_type);
89
+ h.base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
90
+ tcg_out_qemu_ld_indexed(s, opc, data_type, data_reg, h);
91
92
#ifdef CONFIG_SOFTMMU
93
add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
94
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
95
#endif
50
}
96
}
51
97
52
static bool fold_deposit(OptContext *ctx, TCGOp *op)
98
-static void tcg_out_qemu_st_indexed(TCGContext *s, TCGReg data,
99
- TCGReg rj, TCGReg rk, MemOp opc)
100
+static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc,
101
+ TCGReg rd, HostAddress h)
102
{
103
/* Byte swapping is left to middle-end expansion. */
104
tcg_debug_assert((opc & MO_BSWAP) == 0);
105
106
switch (opc & MO_SIZE) {
107
case MO_8:
108
- tcg_out_opc_stx_b(s, data, rj, rk);
109
+ tcg_out_opc_stx_b(s, rd, h.base, h.index);
110
break;
111
case MO_16:
112
- tcg_out_opc_stx_h(s, data, rj, rk);
113
+ tcg_out_opc_stx_h(s, rd, h.base, h.index);
114
break;
115
case MO_32:
116
- tcg_out_opc_stx_w(s, data, rj, rk);
117
+ tcg_out_opc_stx_w(s, rd, h.base, h.index);
118
break;
119
case MO_64:
120
- tcg_out_opc_stx_d(s, data, rj, rk);
121
+ tcg_out_opc_stx_d(s, rd, h.base, h.index);
122
break;
123
default:
124
g_assert_not_reached();
125
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
126
MemOpIdx oi, TCGType data_type)
127
{
128
MemOp opc = get_memop(oi);
129
- TCGReg base, index;
130
+ HostAddress h;
131
132
#ifdef CONFIG_SOFTMMU
133
tcg_insn_unit *label_ptr[1];
134
135
tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0);
136
- index = TCG_REG_TMP2;
137
+ h.index = TCG_REG_TMP2;
138
#else
139
unsigned a_bits = get_alignment_bits(opc);
140
if (a_bits) {
141
tcg_out_test_alignment(s, false, addr_reg, a_bits);
142
}
143
- index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
144
+ h.index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
145
#endif
146
147
- base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
148
- tcg_out_qemu_st_indexed(s, data_reg, base, index, opc);
149
+ h.base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
150
+ tcg_out_qemu_st_indexed(s, opc, data_reg, h);
151
152
#ifdef CONFIG_SOFTMMU
153
add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
53
--
154
--
54
2.43.0
155
2.34.1
156
157
diff view generated by jsdifflib
1
Duplicate fold_sub_vec into fold_sub instead of calling it,
1
Interpret the variable argument placement in the caller. There are
2
now that fold_sub_vec always returns true.
2
several places where we already convert back from bool to type.
3
Clean things up by using type throughout.
3
4
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
tcg/optimize.c | 9 ++++++---
8
tcg/mips/tcg-target.c.inc | 186 +++++++++++++++++++-------------------
8
1 file changed, 6 insertions(+), 3 deletions(-)
9
1 file changed, 95 insertions(+), 91 deletions(-)
9
10
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
--- a/tcg/mips/tcg-target.c.inc
13
+++ b/tcg/optimize.c
14
+++ b/tcg/mips/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
15
fold_sub_to_neg(ctx, op)) {
16
#endif /* SOFTMMU */
16
return true;
17
17
}
18
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
18
- return false;
19
- TCGReg base, MemOp opc, bool is_64)
19
+ return finish_folding(ctx, op);
20
+ TCGReg base, MemOp opc, TCGType type)
21
{
22
switch (opc & (MO_SSIZE | MO_BSWAP)) {
23
case MO_UB:
24
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
25
tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
26
break;
27
case MO_UL | MO_BSWAP:
28
- if (TCG_TARGET_REG_BITS == 64 && is_64) {
29
+ if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
30
if (use_mips32r2_instructions) {
31
tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
32
tcg_out_bswap32(s, lo, lo, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
33
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
34
}
35
break;
36
case MO_UL:
37
- if (TCG_TARGET_REG_BITS == 64 && is_64) {
38
+ if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
39
tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
40
break;
41
}
42
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
20
}
43
}
21
44
22
static bool fold_sub(OptContext *ctx, TCGOp *op)
45
static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
46
- TCGReg base, MemOp opc, bool is_64)
47
+ TCGReg base, MemOp opc, TCGType type)
23
{
48
{
24
- if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) {
49
const MIPSInsn lw1 = MIPS_BE ? OPC_LWL : OPC_LWR;
25
+ if (fold_const2(ctx, op) ||
50
const MIPSInsn lw2 = MIPS_BE ? OPC_LWR : OPC_LWL;
26
+ fold_xx_to_i(ctx, op, 0) ||
51
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
27
+ fold_xi_to_x(ctx, op, 0) ||
52
case MO_UL:
28
+ fold_sub_to_neg(ctx, op)) {
53
tcg_out_opc_imm(s, lw1, lo, base, 0);
29
return true;
54
tcg_out_opc_imm(s, lw2, lo, base, 3);
30
}
55
- if (TCG_TARGET_REG_BITS == 64 && is_64 && !sgn) {
31
56
+ if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 && !sgn) {
32
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
57
tcg_out_ext32u(s, lo, lo);
33
? INDEX_op_add_i32 : INDEX_op_add_i64);
58
}
34
op->args[2] = arg_new_constant(ctx, -val);
59
break;
35
}
60
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
36
- return false;
61
tcg_out_opc_imm(s, lw1, lo, base, 0);
37
+ return finish_folding(ctx, op);
62
tcg_out_opc_imm(s, lw2, lo, base, 3);
63
tcg_out_bswap32(s, lo, lo,
64
- TCG_TARGET_REG_BITS == 64 && is_64
65
+ TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64
66
? (sgn ? TCG_BSWAP_OS : TCG_BSWAP_OZ) : 0);
67
} else {
68
const tcg_insn_unit *subr =
69
- (TCG_TARGET_REG_BITS == 64 && is_64 && !sgn
70
+ (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 && !sgn
71
? bswap32u_addr : bswap32_addr);
72
73
tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0);
74
tcg_out_bswap_subr(s, subr);
75
/* delay slot */
76
tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 3);
77
- tcg_out_mov(s, is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, lo, TCG_TMP3);
78
+ tcg_out_mov(s, type, lo, TCG_TMP3);
79
}
80
break;
81
82
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
83
}
38
}
84
}
39
85
40
static bool fold_sub2(OptContext *ctx, TCGOp *op)
86
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
87
+static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
88
+ TCGReg addrlo, TCGReg addrhi,
89
+ MemOpIdx oi, TCGType data_type)
90
{
91
- TCGReg addr_regl, addr_regh __attribute__((unused));
92
- TCGReg data_regl, data_regh;
93
- MemOpIdx oi;
94
- MemOp opc;
95
-#if defined(CONFIG_SOFTMMU)
96
- tcg_insn_unit *label_ptr[2];
97
-#else
98
-#endif
99
- unsigned a_bits, s_bits;
100
- TCGReg base = TCG_REG_A0;
101
-
102
- data_regl = *args++;
103
- data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
104
- addr_regl = *args++;
105
- addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
106
- oi = *args++;
107
- opc = get_memop(oi);
108
- a_bits = get_alignment_bits(opc);
109
- s_bits = opc & MO_SIZE;
110
+ MemOp opc = get_memop(oi);
111
+ unsigned a_bits = get_alignment_bits(opc);
112
+ unsigned s_bits = opc & MO_SIZE;
113
+ TCGReg base;
114
115
/*
116
* R6 removes the left/right instructions but requires the
117
* system to support misaligned memory accesses.
118
*/
119
#if defined(CONFIG_SOFTMMU)
120
- tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 1);
121
+ tcg_insn_unit *label_ptr[2];
122
+
123
+ base = TCG_REG_A0;
124
+ tcg_out_tlb_load(s, base, addrlo, addrhi, oi, label_ptr, 1);
125
if (use_mips32r6_instructions || a_bits >= s_bits) {
126
- tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
127
+ tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
128
} else {
129
- tcg_out_qemu_ld_unalign(s, data_regl, data_regh, base, opc, is_64);
130
+ tcg_out_qemu_ld_unalign(s, datalo, datahi, base, opc, data_type);
131
}
132
- add_qemu_ldst_label(s, 1, oi,
133
- (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
134
- data_regl, data_regh, addr_regl, addr_regh,
135
- s->code_ptr, label_ptr);
136
+ add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi,
137
+ addrlo, addrhi, s->code_ptr, label_ptr);
138
#else
139
+ base = addrlo;
140
if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
141
- tcg_out_ext32u(s, base, addr_regl);
142
- addr_regl = base;
143
+ tcg_out_ext32u(s, TCG_REG_A0, base);
144
+ base = TCG_REG_A0;
145
}
146
- if (guest_base == 0 && data_regl != addr_regl) {
147
- base = addr_regl;
148
- } else if (guest_base == (int16_t)guest_base) {
149
- tcg_out_opc_imm(s, ALIAS_PADDI, base, addr_regl, guest_base);
150
- } else {
151
- tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, addr_regl);
152
+ if (guest_base) {
153
+ if (guest_base == (int16_t)guest_base) {
154
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
155
+ } else {
156
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
157
+ TCG_GUEST_BASE_REG);
158
+ }
159
+ base = TCG_REG_A0;
160
}
161
if (use_mips32r6_instructions) {
162
if (a_bits) {
163
- tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits);
164
+ tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
165
}
166
- tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
167
+ tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
168
} else {
169
if (a_bits && a_bits != s_bits) {
170
- tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits);
171
+ tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
172
}
173
if (a_bits >= s_bits) {
174
- tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
175
+ tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
176
} else {
177
- tcg_out_qemu_ld_unalign(s, data_regl, data_regh, base, opc, is_64);
178
+ tcg_out_qemu_ld_unalign(s, datalo, datahi, base, opc, data_type);
179
}
180
}
181
#endif
182
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
183
g_assert_not_reached();
184
}
185
}
186
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
187
-{
188
- TCGReg addr_regl, addr_regh __attribute__((unused));
189
- TCGReg data_regl, data_regh;
190
- MemOpIdx oi;
191
- MemOp opc;
192
-#if defined(CONFIG_SOFTMMU)
193
- tcg_insn_unit *label_ptr[2];
194
-#endif
195
- unsigned a_bits, s_bits;
196
- TCGReg base = TCG_REG_A0;
197
198
- data_regl = *args++;
199
- data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
200
- addr_regl = *args++;
201
- addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
202
- oi = *args++;
203
- opc = get_memop(oi);
204
- a_bits = get_alignment_bits(opc);
205
- s_bits = opc & MO_SIZE;
206
+static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
207
+ TCGReg addrlo, TCGReg addrhi,
208
+ MemOpIdx oi, TCGType data_type)
209
+{
210
+ MemOp opc = get_memop(oi);
211
+ unsigned a_bits = get_alignment_bits(opc);
212
+ unsigned s_bits = opc & MO_SIZE;
213
+ TCGReg base;
214
215
/*
216
* R6 removes the left/right instructions but requires the
217
* system to support misaligned memory accesses.
218
*/
219
#if defined(CONFIG_SOFTMMU)
220
- tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 0);
221
+ tcg_insn_unit *label_ptr[2];
222
+
223
+ base = TCG_REG_A0;
224
+ tcg_out_tlb_load(s, base, addrlo, addrhi, oi, label_ptr, 0);
225
if (use_mips32r6_instructions || a_bits >= s_bits) {
226
- tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
227
+ tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
228
} else {
229
- tcg_out_qemu_st_unalign(s, data_regl, data_regh, base, opc);
230
+ tcg_out_qemu_st_unalign(s, datalo, datahi, base, opc);
231
}
232
- add_qemu_ldst_label(s, 0, oi,
233
- (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
234
- data_regl, data_regh, addr_regl, addr_regh,
235
- s->code_ptr, label_ptr);
236
+ add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi,
237
+ addrlo, addrhi, s->code_ptr, label_ptr);
238
#else
239
+ base = addrlo;
240
if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
241
- tcg_out_ext32u(s, base, addr_regl);
242
- addr_regl = base;
243
+ tcg_out_ext32u(s, TCG_REG_A0, base);
244
+ base = TCG_REG_A0;
245
}
246
- if (guest_base == 0) {
247
- base = addr_regl;
248
- } else if (guest_base == (int16_t)guest_base) {
249
- tcg_out_opc_imm(s, ALIAS_PADDI, base, addr_regl, guest_base);
250
- } else {
251
- tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, addr_regl);
252
+ if (guest_base) {
253
+ if (guest_base == (int16_t)guest_base) {
254
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
255
+ } else {
256
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
257
+ TCG_GUEST_BASE_REG);
258
+ }
259
+ base = TCG_REG_A0;
260
}
261
if (use_mips32r6_instructions) {
262
if (a_bits) {
263
- tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits);
264
+ tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
265
}
266
- tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
267
+ tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
268
} else {
269
if (a_bits && a_bits != s_bits) {
270
- tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits);
271
+ tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
272
}
273
if (a_bits >= s_bits) {
274
- tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
275
+ tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
276
} else {
277
- tcg_out_qemu_st_unalign(s, data_regl, data_regh, base, opc);
278
+ tcg_out_qemu_st_unalign(s, datalo, datahi, base, opc);
279
}
280
}
281
#endif
282
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
283
break;
284
285
case INDEX_op_qemu_ld_i32:
286
- tcg_out_qemu_ld(s, args, false);
287
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
288
+ tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
289
+ } else {
290
+ tcg_out_qemu_ld(s, a0, 0, a1, a2, args[3], TCG_TYPE_I32);
291
+ }
292
break;
293
case INDEX_op_qemu_ld_i64:
294
- tcg_out_qemu_ld(s, args, true);
295
+ if (TCG_TARGET_REG_BITS == 64) {
296
+ tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
297
+ } else if (TARGET_LONG_BITS == 32) {
298
+ tcg_out_qemu_ld(s, a0, a1, a2, 0, args[3], TCG_TYPE_I64);
299
+ } else {
300
+ tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
301
+ }
302
break;
303
case INDEX_op_qemu_st_i32:
304
- tcg_out_qemu_st(s, args, false);
305
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
306
+ tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
307
+ } else {
308
+ tcg_out_qemu_st(s, a0, 0, a1, a2, args[3], TCG_TYPE_I32);
309
+ }
310
break;
311
case INDEX_op_qemu_st_i64:
312
- tcg_out_qemu_st(s, args, true);
313
+ if (TCG_TARGET_REG_BITS == 64) {
314
+ tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
315
+ } else if (TARGET_LONG_BITS == 32) {
316
+ tcg_out_qemu_st(s, a0, a1, a2, 0, args[3], TCG_TYPE_I64);
317
+ } else {
318
+ tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
319
+ }
320
break;
321
322
case INDEX_op_add2_i32:
41
--
323
--
42
2.43.0
324
2.34.1
325
326
diff view generated by jsdifflib
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
1
Interpret the variable argument placement in the caller. Pass data_type
2
Remove fold_masks as the function becomes unused.
2
instead of is64 -- there are several places where we already convert back
3
3
from bool to type. Clean things up by using type throughout.
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
8
---
7
tcg/optimize.c | 18 ++++++++----------
9
tcg/ppc/tcg-target.c.inc | 110 +++++++++++++++++++++------------------
8
1 file changed, 8 insertions(+), 10 deletions(-)
10
1 file changed, 59 insertions(+), 51 deletions(-)
9
11
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
14
--- a/tcg/ppc/tcg-target.c.inc
13
+++ b/tcg/optimize.c
15
+++ b/tcg/ppc/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
16
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
15
return fold_masks_zs(ctx, op, -1, s_mask);
17
/* Record the context of a call to the out of line helper code for the slow
18
path for a load or store, so that we can later generate the correct
19
helper code. */
20
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
21
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
22
+ TCGType type, MemOpIdx oi,
23
TCGReg datalo_reg, TCGReg datahi_reg,
24
TCGReg addrlo_reg, TCGReg addrhi_reg,
25
tcg_insn_unit *raddr, tcg_insn_unit *lptr)
26
@@ -XXX,XX +XXX,XX @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
27
TCGLabelQemuLdst *label = new_ldst_label(s);
28
29
label->is_ld = is_ld;
30
+ label->type = type;
31
label->oi = oi;
32
label->datalo_reg = datalo_reg;
33
label->datahi_reg = datahi_reg;
34
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
35
36
#endif /* SOFTMMU */
37
38
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
39
+static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
40
+ TCGReg addrlo, TCGReg addrhi,
41
+ MemOpIdx oi, TCGType data_type)
42
{
43
- TCGReg datalo, datahi, addrlo, rbase;
44
- TCGReg addrhi __attribute__((unused));
45
- MemOpIdx oi;
46
- MemOp opc, s_bits;
47
+ MemOp opc = get_memop(oi);
48
+ MemOp s_bits = opc & MO_SIZE;
49
+ TCGReg rbase;
50
+
51
#ifdef CONFIG_SOFTMMU
52
- int mem_index;
53
tcg_insn_unit *label_ptr;
54
-#else
55
- unsigned a_bits;
56
-#endif
57
58
- datalo = *args++;
59
- datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
60
- addrlo = *args++;
61
- addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
62
- oi = *args++;
63
- opc = get_memop(oi);
64
- s_bits = opc & MO_SIZE;
65
-
66
-#ifdef CONFIG_SOFTMMU
67
- mem_index = get_mmuidx(oi);
68
- addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
69
+ addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), true);
70
71
/* Load a pointer into the current opcode w/conditional branch-link. */
72
label_ptr = s->code_ptr;
73
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
74
75
rbase = TCG_REG_R3;
76
#else /* !CONFIG_SOFTMMU */
77
- a_bits = get_alignment_bits(opc);
78
+ unsigned a_bits = get_alignment_bits(opc);
79
if (a_bits) {
80
tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
81
}
82
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
83
}
84
85
#ifdef CONFIG_SOFTMMU
86
- add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
87
- s->code_ptr, label_ptr);
88
+ add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi,
89
+ addrlo, addrhi, s->code_ptr, label_ptr);
90
#endif
16
}
91
}
17
92
18
-static bool fold_masks(OptContext *ctx, TCGOp *op)
93
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
19
-{
94
+static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
20
- return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
95
+ TCGReg addrlo, TCGReg addrhi,
21
-}
96
+ MemOpIdx oi, TCGType data_type)
97
{
98
- TCGReg datalo, datahi, addrlo, rbase;
99
- TCGReg addrhi __attribute__((unused));
100
- MemOpIdx oi;
101
- MemOp opc, s_bits;
102
+ MemOp opc = get_memop(oi);
103
+ MemOp s_bits = opc & MO_SIZE;
104
+ TCGReg rbase;
105
+
106
#ifdef CONFIG_SOFTMMU
107
- int mem_index;
108
tcg_insn_unit *label_ptr;
109
-#else
110
- unsigned a_bits;
111
-#endif
112
113
- datalo = *args++;
114
- datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
115
- addrlo = *args++;
116
- addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
117
- oi = *args++;
118
- opc = get_memop(oi);
119
- s_bits = opc & MO_SIZE;
22
-
120
-
23
/*
121
-#ifdef CONFIG_SOFTMMU
24
* An "affected" mask bit is 0 if and only if the result is identical
122
- mem_index = get_mmuidx(oi);
25
* to the first input. Thus if the entire mask is 0, the operation
123
- addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
26
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
124
+ addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), false);
27
125
28
static bool fold_xor(OptContext *ctx, TCGOp *op)
126
/* Load a pointer into the current opcode w/conditional branch-link. */
29
{
127
label_ptr = s->code_ptr;
30
+ uint64_t z_mask, s_mask;
128
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
31
+ TempOptInfo *t1, *t2;
129
32
+
130
rbase = TCG_REG_R3;
33
if (fold_const2_commutative(ctx, op) ||
131
#else /* !CONFIG_SOFTMMU */
34
fold_xx_to_i(ctx, op, 0) ||
132
- a_bits = get_alignment_bits(opc);
35
fold_xi_to_x(ctx, op, 0) ||
133
+ unsigned a_bits = get_alignment_bits(opc);
36
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
134
if (a_bits) {
37
return true;
135
tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
38
}
136
}
39
137
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
40
- ctx->z_mask = arg_info(op->args[1])->z_mask
138
}
41
- | arg_info(op->args[2])->z_mask;
139
42
- ctx->s_mask = arg_info(op->args[1])->s_mask
140
#ifdef CONFIG_SOFTMMU
43
- & arg_info(op->args[2])->s_mask;
141
- add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
44
- return fold_masks(ctx, op);
142
- s->code_ptr, label_ptr);
45
+ t1 = arg_info(op->args[1]);
143
+ add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi,
46
+ t2 = arg_info(op->args[2]);
144
+ addrlo, addrhi, s->code_ptr, label_ptr);
47
+ z_mask = t1->z_mask | t2->z_mask;
145
#endif
48
+ s_mask = t1->s_mask & t2->s_mask;
49
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
50
}
146
}
51
147
52
static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
148
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
149
break;
150
151
case INDEX_op_qemu_ld_i32:
152
- tcg_out_qemu_ld(s, args, false);
153
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
154
+ tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
155
+ args[2], TCG_TYPE_I32);
156
+ } else {
157
+ tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
158
+ args[3], TCG_TYPE_I32);
159
+ }
160
break;
161
case INDEX_op_qemu_ld_i64:
162
- tcg_out_qemu_ld(s, args, true);
163
+ if (TCG_TARGET_REG_BITS == 64) {
164
+ tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
165
+ args[2], TCG_TYPE_I64);
166
+ } else if (TARGET_LONG_BITS == 32) {
167
+ tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
168
+ args[3], TCG_TYPE_I64);
169
+ } else {
170
+ tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
171
+ args[4], TCG_TYPE_I64);
172
+ }
173
break;
174
case INDEX_op_qemu_st_i32:
175
- tcg_out_qemu_st(s, args, false);
176
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
177
+ tcg_out_qemu_st(s, args[0], -1, args[1], -1,
178
+ args[2], TCG_TYPE_I32);
179
+ } else {
180
+ tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
181
+ args[3], TCG_TYPE_I32);
182
+ }
183
break;
184
case INDEX_op_qemu_st_i64:
185
- tcg_out_qemu_st(s, args, true);
186
+ if (TCG_TARGET_REG_BITS == 64) {
187
+ tcg_out_qemu_st(s, args[0], -1, args[1], -1,
188
+ args[2], TCG_TYPE_I64);
189
+ } else if (TARGET_LONG_BITS == 32) {
190
+ tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
191
+ args[3], TCG_TYPE_I64);
192
+ } else {
193
+ tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
194
+ args[4], TCG_TYPE_I64);
195
+ }
196
break;
197
198
case INDEX_op_setcond_i32:
53
--
199
--
54
2.43.0
200
2.34.1
201
202
diff view generated by jsdifflib
1
Avoid the use of the OptContext slots.
1
Collect the parts of the host address into a struct.
2
Reorg tcg_out_qemu_{ld,st} to use it.
2
3
3
Be careful not to call fold_masks_zs when the memory operation
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
is wide enough to require multiple outputs, so split into two
5
functions: fold_qemu_ld_1reg and fold_qemu_ld_2reg.
6
7
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
6
---
10
tcg/optimize.c | 26 +++++++++++++++++++++-----
7
tcg/ppc/tcg-target.c.inc | 90 +++++++++++++++++++++-------------------
11
1 file changed, 21 insertions(+), 5 deletions(-)
8
1 file changed, 47 insertions(+), 43 deletions(-)
12
9
13
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
14
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/optimize.c
12
--- a/tcg/ppc/tcg-target.c.inc
16
+++ b/tcg/optimize.c
13
+++ b/tcg/ppc/tcg-target.c.inc
17
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
18
return fold_masks_s(ctx, op, s_mask);
15
{
16
return tcg_out_fail_alignment(s, l);
19
}
17
}
20
18
-
21
-static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
19
#endif /* SOFTMMU */
22
+static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
20
21
+typedef struct {
22
+ TCGReg base;
23
+ TCGReg index;
24
+} HostAddress;
25
+
26
static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
27
TCGReg addrlo, TCGReg addrhi,
28
MemOpIdx oi, TCGType data_type)
23
{
29
{
24
const TCGOpDef *def = &tcg_op_defs[op->opc];
30
MemOp opc = get_memop(oi);
25
MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
31
MemOp s_bits = opc & MO_SIZE;
26
MemOp mop = get_memop(oi);
32
- TCGReg rbase;
27
int width = 8 * memop_size(mop);
33
+ HostAddress h;
28
+ uint64_t z_mask = -1, s_mask = 0;
34
29
35
#ifdef CONFIG_SOFTMMU
30
if (width < 64) {
36
tcg_insn_unit *label_ptr;
31
if (mop & MO_SIGN) {
37
32
- ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
38
- addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), true);
33
+ s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
39
+ h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), true);
40
+ h.base = TCG_REG_R3;
41
42
/* Load a pointer into the current opcode w/conditional branch-link. */
43
label_ptr = s->code_ptr;
44
tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
45
-
46
- rbase = TCG_REG_R3;
47
#else /* !CONFIG_SOFTMMU */
48
unsigned a_bits = get_alignment_bits(opc);
49
if (a_bits) {
50
tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
51
}
52
- rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
53
+ h.base = guest_base ? TCG_GUEST_BASE_REG : 0;
54
+ h.index = addrlo;
55
if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
56
tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
57
- addrlo = TCG_REG_TMP1;
58
+ h.index = TCG_REG_TMP1;
59
}
60
#endif
61
62
if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
63
if (opc & MO_BSWAP) {
64
- tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
65
- tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
66
- tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
67
- } else if (rbase != 0) {
68
- tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
69
- tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
70
- tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
71
- } else if (addrlo == datahi) {
72
- tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
73
- tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
74
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
75
+ tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
76
+ tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0));
77
+ } else if (h.base != 0) {
78
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
79
+ tcg_out32(s, LWZX | TAB(datahi, h.base, h.index));
80
+ tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0));
81
+ } else if (h.index == datahi) {
82
+ tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
83
+ tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
34
} else {
84
} else {
35
- ctx->z_mask = MAKE_64BIT_MASK(0, width);
85
- tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
36
+ z_mask = MAKE_64BIT_MASK(0, width);
86
- tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
87
+ tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
88
+ tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
89
}
90
} else {
91
uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
92
if (!have_isa_2_06 && insn == LDBRX) {
93
- tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
94
- tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
95
- tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
96
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
97
+ tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
98
+ tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0));
99
tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
100
} else if (insn) {
101
- tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
102
+ tcg_out32(s, insn | TAB(datalo, h.base, h.index));
103
} else {
104
insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
105
- tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
106
+ tcg_out32(s, insn | TAB(datalo, h.base, h.index));
107
tcg_out_movext(s, TCG_TYPE_REG, datalo,
108
TCG_TYPE_REG, opc & MO_SSIZE, datalo);
109
}
110
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
111
{
112
MemOp opc = get_memop(oi);
113
MemOp s_bits = opc & MO_SIZE;
114
- TCGReg rbase;
115
+ HostAddress h;
116
117
#ifdef CONFIG_SOFTMMU
118
tcg_insn_unit *label_ptr;
119
120
- addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), false);
121
+ h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), false);
122
+ h.base = TCG_REG_R3;
123
124
/* Load a pointer into the current opcode w/conditional branch-link. */
125
label_ptr = s->code_ptr;
126
tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
127
-
128
- rbase = TCG_REG_R3;
129
#else /* !CONFIG_SOFTMMU */
130
unsigned a_bits = get_alignment_bits(opc);
131
if (a_bits) {
132
tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
133
}
134
- rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
135
+ h.base = guest_base ? TCG_GUEST_BASE_REG : 0;
136
+ h.index = addrlo;
137
if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
138
tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
139
- addrlo = TCG_REG_TMP1;
140
+ h.index = TCG_REG_TMP1;
141
}
142
#endif
143
144
if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
145
if (opc & MO_BSWAP) {
146
- tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
147
- tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
148
- tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0));
149
- } else if (rbase != 0) {
150
- tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
151
- tcg_out32(s, STWX | SAB(datahi, rbase, addrlo));
152
- tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0));
153
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
154
+ tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
155
+ tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0));
156
+ } else if (h.base != 0) {
157
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
158
+ tcg_out32(s, STWX | SAB(datahi, h.base, h.index));
159
+ tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0));
160
} else {
161
- tcg_out32(s, STW | TAI(datahi, addrlo, 0));
162
- tcg_out32(s, STW | TAI(datalo, addrlo, 4));
163
+ tcg_out32(s, STW | TAI(datahi, h.index, 0));
164
+ tcg_out32(s, STW | TAI(datalo, h.index, 4));
165
}
166
} else {
167
uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
168
if (!have_isa_2_06 && insn == STDBRX) {
169
- tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
170
- tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
171
+ tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
172
+ tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, h.index, 4));
173
tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
174
- tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1));
175
+ tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP1));
176
} else {
177
- tcg_out32(s, insn | SAB(datalo, rbase, addrlo));
178
+ tcg_out32(s, insn | SAB(datalo, h.base, h.index));
37
}
179
}
38
}
180
}
39
181
40
/* Opcodes that touch guest memory stop the mb optimization. */
41
ctx->prev_mb = NULL;
42
- return false;
43
+
44
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
45
+}
46
+
47
+static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
48
+{
49
+ /* Opcodes that touch guest memory stop the mb optimization. */
50
+ ctx->prev_mb = NULL;
51
+ return finish_folding(ctx, op);
52
}
53
54
static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
55
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
56
break;
57
case INDEX_op_qemu_ld_a32_i32:
58
case INDEX_op_qemu_ld_a64_i32:
59
+ done = fold_qemu_ld_1reg(&ctx, op);
60
+ break;
61
case INDEX_op_qemu_ld_a32_i64:
62
case INDEX_op_qemu_ld_a64_i64:
63
+ if (TCG_TARGET_REG_BITS == 64) {
64
+ done = fold_qemu_ld_1reg(&ctx, op);
65
+ break;
66
+ }
67
+ QEMU_FALLTHROUGH;
68
case INDEX_op_qemu_ld_a32_i128:
69
case INDEX_op_qemu_ld_a64_i128:
70
- done = fold_qemu_ld(&ctx, op);
71
+ done = fold_qemu_ld_2reg(&ctx, op);
72
break;
73
case INDEX_op_qemu_st8_a32_i32:
74
case INDEX_op_qemu_st8_a64_i32:
75
--
182
--
76
2.43.0
183
2.34.1
184
185
diff view generated by jsdifflib
1
The input which overlaps the sign bit of the output can
1
The port currently does not support "oversize" guests, which
2
have its input s_mask propagated to the output s_mask.
2
means riscv32 can only target 32-bit guests. We will soon be
3
building TCG once for all guests. This implies that we can
4
only support riscv64.
3
5
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Since all Linux distributions target riscv64 not riscv32,
7
this is not much of a restriction and simplifies the code.
8
9
The brcond2 and setcond2 opcodes are exclusive to 32-bit hosts,
10
so we can and should remove the stubs.
11
12
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
13
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
15
---
7
tcg/optimize.c | 14 ++++++++++++--
16
tcg/riscv/tcg-target-con-set.h | 8 --
8
1 file changed, 12 insertions(+), 2 deletions(-)
17
tcg/riscv/tcg-target.h | 22 ++--
18
tcg/riscv/tcg-target.c.inc | 232 +++++++++------------------------
19
3 files changed, 72 insertions(+), 190 deletions(-)
9
20
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
21
diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h
11
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
23
--- a/tcg/riscv/tcg-target-con-set.h
13
+++ b/tcg/optimize.c
24
+++ b/tcg/riscv/tcg-target-con-set.h
14
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
25
@@ -XXX,XX +XXX,XX @@ C_O0_I1(r)
15
TempOptInfo *t2 = arg_info(op->args[2]);
26
C_O0_I2(LZ, L)
16
int ofs = op->args[3];
27
C_O0_I2(rZ, r)
17
int len = op->args[4];
28
C_O0_I2(rZ, rZ)
18
+ int width;
29
-C_O0_I3(LZ, L, L)
19
TCGOpcode and_opc;
30
-C_O0_I3(LZ, LZ, L)
20
- uint64_t z_mask;
31
-C_O0_I4(LZ, LZ, L, L)
21
+ uint64_t z_mask, s_mask;
32
-C_O0_I4(rZ, rZ, rZ, rZ)
22
33
C_O1_I1(r, L)
23
if (ti_is_const(t1) && ti_is_const(t2)) {
34
C_O1_I1(r, r)
24
return tcg_opt_gen_movi(ctx, op, op->args[0],
35
-C_O1_I2(r, L, L)
25
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
36
C_O1_I2(r, r, ri)
26
switch (ctx->type) {
37
C_O1_I2(r, r, rI)
27
case TCG_TYPE_I32:
38
C_O1_I2(r, rZ, rN)
28
and_opc = INDEX_op_and_i32;
39
C_O1_I2(r, rZ, rZ)
29
+ width = 32;
40
-C_O1_I4(r, rZ, rZ, rZ, rZ)
30
break;
41
-C_O2_I1(r, r, L)
31
case TCG_TYPE_I64:
42
-C_O2_I2(r, r, L, L)
32
and_opc = INDEX_op_and_i64;
43
C_O2_I4(r, r, rZ, rZ, rM, rM)
33
+ width = 64;
44
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
45
index XXXXXXX..XXXXXXX 100644
46
--- a/tcg/riscv/tcg-target.h
47
+++ b/tcg/riscv/tcg-target.h
48
@@ -XXX,XX +XXX,XX @@
49
#ifndef RISCV_TCG_TARGET_H
50
#define RISCV_TCG_TARGET_H
51
52
-#if __riscv_xlen == 32
53
-# define TCG_TARGET_REG_BITS 32
54
-#elif __riscv_xlen == 64
55
-# define TCG_TARGET_REG_BITS 64
56
+/*
57
+ * We don't support oversize guests.
58
+ * Since we will only build tcg once, this in turn requires a 64-bit host.
59
+ */
60
+#if __riscv_xlen != 64
61
+#error "unsupported code generation mode"
62
#endif
63
+#define TCG_TARGET_REG_BITS 64
64
65
#define TCG_TARGET_INSN_UNIT_SIZE 4
66
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 20
67
@@ -XXX,XX +XXX,XX @@ typedef enum {
68
#define TCG_TARGET_STACK_ALIGN 16
69
#define TCG_TARGET_CALL_STACK_OFFSET 0
70
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
71
-#if TCG_TARGET_REG_BITS == 32
72
-#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
73
-#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
74
-#else
75
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
76
#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
77
-#endif
78
#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
79
80
/* optional instructions */
81
@@ -XXX,XX +XXX,XX @@ typedef enum {
82
#define TCG_TARGET_HAS_sub2_i32 1
83
#define TCG_TARGET_HAS_mulu2_i32 0
84
#define TCG_TARGET_HAS_muls2_i32 0
85
-#define TCG_TARGET_HAS_muluh_i32 (TCG_TARGET_REG_BITS == 32)
86
-#define TCG_TARGET_HAS_mulsh_i32 (TCG_TARGET_REG_BITS == 32)
87
+#define TCG_TARGET_HAS_muluh_i32 0
88
+#define TCG_TARGET_HAS_mulsh_i32 0
89
#define TCG_TARGET_HAS_ext8s_i32 1
90
#define TCG_TARGET_HAS_ext16s_i32 1
91
#define TCG_TARGET_HAS_ext8u_i32 1
92
@@ -XXX,XX +XXX,XX @@ typedef enum {
93
#define TCG_TARGET_HAS_setcond2 1
94
#define TCG_TARGET_HAS_qemu_st8_i32 0
95
96
-#if TCG_TARGET_REG_BITS == 64
97
#define TCG_TARGET_HAS_movcond_i64 0
98
#define TCG_TARGET_HAS_div_i64 1
99
#define TCG_TARGET_HAS_rem_i64 1
100
@@ -XXX,XX +XXX,XX @@ typedef enum {
101
#define TCG_TARGET_HAS_muls2_i64 0
102
#define TCG_TARGET_HAS_muluh_i64 1
103
#define TCG_TARGET_HAS_mulsh_i64 1
104
-#endif
105
106
#define TCG_TARGET_DEFAULT_MO (0)
107
108
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
109
index XXXXXXX..XXXXXXX 100644
110
--- a/tcg/riscv/tcg-target.c.inc
111
+++ b/tcg/riscv/tcg-target.c.inc
112
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
113
#define SOFTMMU_RESERVE_REGS 0
114
#endif
115
116
-
117
-static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
118
-{
119
- if (TCG_TARGET_REG_BITS == 32) {
120
- return sextract32(val, pos, len);
121
- } else {
122
- return sextract64(val, pos, len);
123
- }
124
-}
125
+#define sextreg sextract64
126
127
/* test if a constant matches the constraint */
128
static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
129
@@ -XXX,XX +XXX,XX @@ typedef enum {
130
OPC_XOR = 0x4033,
131
OPC_XORI = 0x4013,
132
133
-#if TCG_TARGET_REG_BITS == 64
134
OPC_ADDIW = 0x1b,
135
OPC_ADDW = 0x3b,
136
OPC_DIVUW = 0x200503b,
137
@@ -XXX,XX +XXX,XX @@ typedef enum {
138
OPC_SRLIW = 0x501b,
139
OPC_SRLW = 0x503b,
140
OPC_SUBW = 0x4000003b,
141
-#else
142
- /* Simplify code throughout by defining aliases for RV32. */
143
- OPC_ADDIW = OPC_ADDI,
144
- OPC_ADDW = OPC_ADD,
145
- OPC_DIVUW = OPC_DIVU,
146
- OPC_DIVW = OPC_DIV,
147
- OPC_MULW = OPC_MUL,
148
- OPC_REMUW = OPC_REMU,
149
- OPC_REMW = OPC_REM,
150
- OPC_SLLIW = OPC_SLLI,
151
- OPC_SLLW = OPC_SLL,
152
- OPC_SRAIW = OPC_SRAI,
153
- OPC_SRAW = OPC_SRA,
154
- OPC_SRLIW = OPC_SRLI,
155
- OPC_SRLW = OPC_SRL,
156
- OPC_SUBW = OPC_SUB,
157
-#endif
158
159
OPC_FENCE = 0x0000000f,
160
OPC_NOP = OPC_ADDI, /* nop = addi r0,r0,0 */
161
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
162
tcg_target_long lo, hi, tmp;
163
int shift, ret;
164
165
- if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
166
+ if (type == TCG_TYPE_I32) {
167
val = (int32_t)val;
168
}
169
170
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
171
}
172
173
hi = val - lo;
174
- if (TCG_TARGET_REG_BITS == 32 || val == (int32_t)val) {
175
+ if (val == (int32_t)val) {
176
tcg_out_opc_upper(s, OPC_LUI, rd, hi);
177
if (lo != 0) {
178
tcg_out_opc_imm(s, OPC_ADDIW, rd, rd, lo);
179
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
180
return;
181
}
182
183
- /* We can only be here if TCG_TARGET_REG_BITS != 32 */
184
tmp = tcg_pcrel_diff(s, (void *)val);
185
if (tmp == (int32_t)tmp) {
186
tcg_out_opc_upper(s, OPC_AUIPC, rd, 0);
187
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
188
static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
189
TCGReg arg1, intptr_t arg2)
190
{
191
- bool is32bit = (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32);
192
- tcg_out_ldst(s, is32bit ? OPC_LW : OPC_LD, arg, arg1, arg2);
193
+ RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_LW : OPC_LD;
194
+ tcg_out_ldst(s, insn, arg, arg1, arg2);
195
}
196
197
static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
198
TCGReg arg1, intptr_t arg2)
199
{
200
- bool is32bit = (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32);
201
- tcg_out_ldst(s, is32bit ? OPC_SW : OPC_SD, arg, arg1, arg2);
202
+ RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SW : OPC_SD;
203
+ tcg_out_ldst(s, insn, arg, arg1, arg2);
204
}
205
206
static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
207
@@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
208
}
209
}
210
211
-static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
212
- TCGReg bl, TCGReg bh, TCGLabel *l)
213
-{
214
- /* todo */
215
- g_assert_not_reached();
216
-}
217
-
218
-static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
219
- TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh)
220
-{
221
- /* todo */
222
- g_assert_not_reached();
223
-}
224
-
225
static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
226
{
227
TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
228
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
229
if (offset == sextreg(offset, 0, 20)) {
230
/* short jump: -2097150 to 2097152 */
231
tcg_out_opc_jump(s, OPC_JAL, link, offset);
232
- } else if (TCG_TARGET_REG_BITS == 32 || offset == (int32_t)offset) {
233
+ } else if (offset == (int32_t)offset) {
234
/* long jump: -2147483646 to 2147483648 */
235
tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0);
236
tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0);
237
ret = reloc_call(s->code_ptr - 2, arg);
238
tcg_debug_assert(ret == true);
239
- } else if (TCG_TARGET_REG_BITS == 64) {
240
+ } else {
241
/* far jump: 64-bit */
242
tcg_target_long imm = sextreg((tcg_target_long)arg, 0, 12);
243
tcg_target_long base = (tcg_target_long)arg - imm;
244
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base);
245
tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm);
246
- } else {
247
- g_assert_not_reached();
248
}
249
}
250
251
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] = {
252
#endif
253
};
254
255
-/* We don't support oversize guests */
256
-QEMU_BUILD_BUG_ON(TCG_TARGET_REG_BITS < TARGET_LONG_BITS);
257
-
258
/* We expect to use a 12-bit negative offset from ENV. */
259
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
260
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
261
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
262
tcg_debug_assert(ok);
263
}
264
265
-static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
266
- TCGReg addrh, MemOpIdx oi,
267
+static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, MemOpIdx oi,
268
tcg_insn_unit **label_ptr, bool is_load)
269
{
270
MemOp opc = get_memop(oi);
271
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
272
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
273
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
274
275
- tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addrl,
276
+ tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr,
277
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
278
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
279
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
280
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
281
/* Clear the non-page, non-alignment bits from the address. */
282
compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
283
if (compare_mask == sextreg(compare_mask, 0, 12)) {
284
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addrl, compare_mask);
285
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr, compare_mask);
286
} else {
287
tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
288
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addrl);
289
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr);
290
}
291
292
/* Compare masked address with the TLB entry. */
293
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
294
tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
295
296
/* TLB Hit - translate address using addend. */
297
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
298
- tcg_out_ext32u(s, TCG_REG_TMP0, addrl);
299
- addrl = TCG_REG_TMP0;
300
+ if (TARGET_LONG_BITS == 32) {
301
+ tcg_out_ext32u(s, TCG_REG_TMP0, addr);
302
+ addr = TCG_REG_TMP0;
303
}
304
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addrl);
305
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr);
306
return TCG_REG_TMP0;
307
}
308
309
static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
310
- TCGType ext,
311
- TCGReg datalo, TCGReg datahi,
312
- TCGReg addrlo, TCGReg addrhi,
313
- void *raddr, tcg_insn_unit **label_ptr)
314
+ TCGType data_type, TCGReg data_reg,
315
+ TCGReg addr_reg, void *raddr,
316
+ tcg_insn_unit **label_ptr)
317
{
318
TCGLabelQemuLdst *label = new_ldst_label(s);
319
320
label->is_ld = is_ld;
321
label->oi = oi;
322
- label->type = ext;
323
- label->datalo_reg = datalo;
324
- label->datahi_reg = datahi;
325
- label->addrlo_reg = addrlo;
326
- label->addrhi_reg = addrhi;
327
+ label->type = data_type;
328
+ label->datalo_reg = data_reg;
329
+ label->addrlo_reg = addr_reg;
330
label->raddr = tcg_splitwx_to_rx(raddr);
331
label->label_ptr[0] = label_ptr[0];
332
}
333
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
334
TCGReg a2 = tcg_target_call_iarg_regs[2];
335
TCGReg a3 = tcg_target_call_iarg_regs[3];
336
337
- /* We don't support oversize guests */
338
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
339
- g_assert_not_reached();
340
- }
341
-
342
/* resolve label address */
343
if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
344
return false;
345
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
346
TCGReg a3 = tcg_target_call_iarg_regs[3];
347
TCGReg a4 = tcg_target_call_iarg_regs[4];
348
349
- /* We don't support oversize guests */
350
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
351
- g_assert_not_reached();
352
- }
353
-
354
/* resolve label address */
355
if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
356
return false;
357
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
358
359
#endif /* CONFIG_SOFTMMU */
360
361
-static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
362
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
363
TCGReg base, MemOp opc, bool is_64)
364
{
365
/* Byte swapping is left to middle-end expansion. */
366
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
367
368
switch (opc & (MO_SSIZE)) {
369
case MO_UB:
370
- tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
371
+ tcg_out_opc_imm(s, OPC_LBU, val, base, 0);
372
break;
373
case MO_SB:
374
- tcg_out_opc_imm(s, OPC_LB, lo, base, 0);
375
+ tcg_out_opc_imm(s, OPC_LB, val, base, 0);
376
break;
377
case MO_UW:
378
- tcg_out_opc_imm(s, OPC_LHU, lo, base, 0);
379
+ tcg_out_opc_imm(s, OPC_LHU, val, base, 0);
380
break;
381
case MO_SW:
382
- tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
383
+ tcg_out_opc_imm(s, OPC_LH, val, base, 0);
384
break;
385
case MO_UL:
386
- if (TCG_TARGET_REG_BITS == 64 && is_64) {
387
- tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
388
+ if (is_64) {
389
+ tcg_out_opc_imm(s, OPC_LWU, val, base, 0);
390
break;
391
}
392
/* FALLTHRU */
393
case MO_SL:
394
- tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
395
+ tcg_out_opc_imm(s, OPC_LW, val, base, 0);
396
break;
397
case MO_UQ:
398
- /* Prefer to load from offset 0 first, but allow for overlap. */
399
- if (TCG_TARGET_REG_BITS == 64) {
400
- tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
401
- } else if (lo != base) {
402
- tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
403
- tcg_out_opc_imm(s, OPC_LW, hi, base, 4);
404
- } else {
405
- tcg_out_opc_imm(s, OPC_LW, hi, base, 4);
406
- tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
407
- }
408
+ tcg_out_opc_imm(s, OPC_LD, val, base, 0);
34
break;
409
break;
35
default:
410
default:
36
g_assert_not_reached();
411
g_assert_not_reached();
37
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
412
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
38
return fold_and(ctx, op);
413
39
}
414
static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
40
415
{
41
+ /* The s_mask from the top portion of the deposit is still valid. */
416
- TCGReg addr_regl, addr_regh __attribute__((unused));
42
+ if (ofs + len == width) {
417
- TCGReg data_regl, data_regh;
43
+ s_mask = t2->s_mask << ofs;
418
+ TCGReg addr_reg, data_reg;
44
+ } else {
419
MemOpIdx oi;
45
+ s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
420
MemOp opc;
46
+ }
421
#if defined(CONFIG_SOFTMMU)
47
+
422
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
48
z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
423
#endif
49
- return fold_masks_z(ctx, op, z_mask);
424
TCGReg base;
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
425
51
}
426
- data_regl = *args++;
52
427
- data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
53
static bool fold_divide(OptContext *ctx, TCGOp *op)
428
- addr_regl = *args++;
429
- addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
430
+ data_reg = *args++;
431
+ addr_reg = *args++;
432
oi = *args++;
433
opc = get_memop(oi);
434
435
#if defined(CONFIG_SOFTMMU)
436
- base = tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 1);
437
- tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
438
- add_qemu_ldst_label(s, 1, oi,
439
- (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
440
- data_regl, data_regh, addr_regl, addr_regh,
441
- s->code_ptr, label_ptr);
442
+ base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1);
443
+ tcg_out_qemu_ld_direct(s, data_reg, base, opc, is_64);
444
+ add_qemu_ldst_label(s, 1, oi, (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
445
+ data_reg, addr_reg, s->code_ptr, label_ptr);
446
#else
447
a_bits = get_alignment_bits(opc);
448
if (a_bits) {
449
- tcg_out_test_alignment(s, true, addr_regl, a_bits);
450
+ tcg_out_test_alignment(s, true, addr_reg, a_bits);
451
}
452
- base = addr_regl;
453
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
454
+ base = addr_reg;
455
+ if (TARGET_LONG_BITS == 32) {
456
tcg_out_ext32u(s, TCG_REG_TMP0, base);
457
base = TCG_REG_TMP0;
458
}
459
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
460
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
461
base = TCG_REG_TMP0;
462
}
463
- tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
464
+ tcg_out_qemu_ld_direct(s, data_reg, base, opc, is_64);
465
#endif
466
}
467
468
-static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
469
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val,
470
TCGReg base, MemOp opc)
471
{
472
/* Byte swapping is left to middle-end expansion. */
473
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
474
475
switch (opc & (MO_SSIZE)) {
476
case MO_8:
477
- tcg_out_opc_store(s, OPC_SB, base, lo, 0);
478
+ tcg_out_opc_store(s, OPC_SB, base, val, 0);
479
break;
480
case MO_16:
481
- tcg_out_opc_store(s, OPC_SH, base, lo, 0);
482
+ tcg_out_opc_store(s, OPC_SH, base, val, 0);
483
break;
484
case MO_32:
485
- tcg_out_opc_store(s, OPC_SW, base, lo, 0);
486
+ tcg_out_opc_store(s, OPC_SW, base, val, 0);
487
break;
488
case MO_64:
489
- if (TCG_TARGET_REG_BITS == 64) {
490
- tcg_out_opc_store(s, OPC_SD, base, lo, 0);
491
- } else {
492
- tcg_out_opc_store(s, OPC_SW, base, lo, 0);
493
- tcg_out_opc_store(s, OPC_SW, base, hi, 4);
494
- }
495
+ tcg_out_opc_store(s, OPC_SD, base, val, 0);
496
break;
497
default:
498
g_assert_not_reached();
499
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
500
501
static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
502
{
503
- TCGReg addr_regl, addr_regh __attribute__((unused));
504
- TCGReg data_regl, data_regh;
505
+ TCGReg addr_reg, data_reg;
506
MemOpIdx oi;
507
MemOp opc;
508
#if defined(CONFIG_SOFTMMU)
509
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
510
#endif
511
TCGReg base;
512
513
- data_regl = *args++;
514
- data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
515
- addr_regl = *args++;
516
- addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
517
+ data_reg = *args++;
518
+ addr_reg = *args++;
519
oi = *args++;
520
opc = get_memop(oi);
521
522
#if defined(CONFIG_SOFTMMU)
523
- base = tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 0);
524
- tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
525
- add_qemu_ldst_label(s, 0, oi,
526
- (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
527
- data_regl, data_regh, addr_regl, addr_regh,
528
- s->code_ptr, label_ptr);
529
+ base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0);
530
+ tcg_out_qemu_st_direct(s, data_reg, base, opc);
531
+ add_qemu_ldst_label(s, 0, oi, (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
532
+ data_reg, addr_reg, s->code_ptr, label_ptr);
533
#else
534
a_bits = get_alignment_bits(opc);
535
if (a_bits) {
536
- tcg_out_test_alignment(s, false, addr_regl, a_bits);
537
+ tcg_out_test_alignment(s, false, addr_reg, a_bits);
538
}
539
- base = addr_regl;
540
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
541
+ base = addr_reg;
542
+ if (TARGET_LONG_BITS == 32) {
543
tcg_out_ext32u(s, TCG_REG_TMP0, base);
544
base = TCG_REG_TMP0;
545
}
546
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
547
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
548
base = TCG_REG_TMP0;
549
}
550
- tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
551
+ tcg_out_qemu_st_direct(s, data_reg, base, opc);
552
#endif
553
}
554
555
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
556
case INDEX_op_brcond_i64:
557
tcg_out_brcond(s, a2, a0, a1, arg_label(args[3]));
558
break;
559
- case INDEX_op_brcond2_i32:
560
- tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], arg_label(args[5]));
561
- break;
562
563
case INDEX_op_setcond_i32:
564
case INDEX_op_setcond_i64:
565
tcg_out_setcond(s, args[3], a0, a1, a2);
566
break;
567
- case INDEX_op_setcond2_i32:
568
- tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]);
569
- break;
570
571
case INDEX_op_qemu_ld_i32:
572
tcg_out_qemu_ld(s, args, false);
573
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
574
case INDEX_op_sub2_i64:
575
return C_O2_I4(r, r, rZ, rZ, rM, rM);
576
577
- case INDEX_op_brcond2_i32:
578
- return C_O0_I4(rZ, rZ, rZ, rZ);
579
-
580
- case INDEX_op_setcond2_i32:
581
- return C_O1_I4(r, rZ, rZ, rZ, rZ);
582
-
583
case INDEX_op_qemu_ld_i32:
584
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
585
- ? C_O1_I1(r, L) : C_O1_I2(r, L, L));
586
- case INDEX_op_qemu_st_i32:
587
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
588
- ? C_O0_I2(LZ, L) : C_O0_I3(LZ, L, L));
589
case INDEX_op_qemu_ld_i64:
590
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
591
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, L)
592
- : C_O2_I2(r, r, L, L));
593
+ return C_O1_I1(r, L);
594
+ case INDEX_op_qemu_st_i32:
595
case INDEX_op_qemu_st_i64:
596
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(LZ, L)
597
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(LZ, LZ, L)
598
- : C_O0_I4(LZ, LZ, L, L));
599
+ return C_O0_I2(LZ, L);
600
601
default:
602
g_assert_not_reached();
603
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
604
static void tcg_target_init(TCGContext *s)
605
{
606
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
607
- if (TCG_TARGET_REG_BITS == 64) {
608
- tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
609
- }
610
+ tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
611
612
tcg_target_call_clobber_regs = -1u;
613
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0);
54
--
614
--
55
2.43.0
615
2.34.1
616
617
diff view generated by jsdifflib
1
Stores have no output operands, and so need no further work.
1
Interpret the variable argument placement in the caller. Pass data_type
2
instead of is64 -- there are several places where we already convert back
3
from bool to type. Clean things up by using type throughout.
2
4
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
8
---
6
tcg/optimize.c | 11 +++++------
9
tcg/riscv/tcg-target.c.inc | 66 ++++++++++++++------------------------
7
1 file changed, 5 insertions(+), 6 deletions(-)
10
1 file changed, 24 insertions(+), 42 deletions(-)
8
11
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
14
--- a/tcg/riscv/tcg-target.c.inc
12
+++ b/tcg/optimize.c
15
+++ b/tcg/riscv/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
17
#endif /* CONFIG_SOFTMMU */
18
19
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
20
- TCGReg base, MemOp opc, bool is_64)
21
+ TCGReg base, MemOp opc, TCGType type)
14
{
22
{
15
/* Opcodes that touch guest memory stop the mb optimization. */
23
/* Byte swapping is left to middle-end expansion. */
16
ctx->prev_mb = NULL;
24
tcg_debug_assert((opc & MO_BSWAP) == 0);
17
- return false;
25
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
18
+ return true;
26
tcg_out_opc_imm(s, OPC_LH, val, base, 0);
27
break;
28
case MO_UL:
29
- if (is_64) {
30
+ if (type == TCG_TYPE_I64) {
31
tcg_out_opc_imm(s, OPC_LWU, val, base, 0);
32
break;
33
}
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
35
}
19
}
36
}
20
37
21
static bool fold_remainder(OptContext *ctx, TCGOp *op)
38
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
22
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
39
+static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
23
40
+ MemOpIdx oi, TCGType data_type)
24
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
41
{
25
remove_mem_copy_all(ctx);
42
- TCGReg addr_reg, data_reg;
26
- return false;
43
- MemOpIdx oi;
27
+ return true;
44
- MemOp opc;
45
-#if defined(CONFIG_SOFTMMU)
46
- tcg_insn_unit *label_ptr[1];
47
-#else
48
- unsigned a_bits;
49
-#endif
50
+ MemOp opc = get_memop(oi);
51
TCGReg base;
52
53
- data_reg = *args++;
54
- addr_reg = *args++;
55
- oi = *args++;
56
- opc = get_memop(oi);
57
-
58
#if defined(CONFIG_SOFTMMU)
59
+ tcg_insn_unit *label_ptr[1];
60
+
61
base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1);
62
- tcg_out_qemu_ld_direct(s, data_reg, base, opc, is_64);
63
- add_qemu_ldst_label(s, 1, oi, (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
64
- data_reg, addr_reg, s->code_ptr, label_ptr);
65
+ tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type);
66
+ add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
67
+ s->code_ptr, label_ptr);
68
#else
69
- a_bits = get_alignment_bits(opc);
70
+ unsigned a_bits = get_alignment_bits(opc);
71
if (a_bits) {
72
tcg_out_test_alignment(s, true, addr_reg, a_bits);
28
}
73
}
29
74
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
30
switch (op->opc) {
75
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
31
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
76
base = TCG_REG_TMP0;
32
g_assert_not_reached();
33
}
77
}
34
remove_mem_copy_in(ctx, ofs, ofs + lm1);
78
- tcg_out_qemu_ld_direct(s, data_reg, base, opc, is_64);
35
- return false;
79
+ tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type);
36
+ return true;
80
#endif
37
}
81
}
38
82
39
static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
83
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val,
40
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
41
TCGType type;
42
43
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
44
- fold_tcg_st(ctx, op);
45
- return false;
46
+ return fold_tcg_st(ctx, op);
47
}
84
}
48
49
src = arg_temp(op->args[0]);
50
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
51
last = ofs + tcg_type_size(type) - 1;
52
remove_mem_copy_in(ctx, ofs, last);
53
record_mem_copy(ctx, type, src, ofs, last);
54
- return false;
55
+ return true;
56
}
85
}
57
86
58
static bool fold_xor(OptContext *ctx, TCGOp *op)
87
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
88
+static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
89
+ MemOpIdx oi, TCGType data_type)
90
{
91
- TCGReg addr_reg, data_reg;
92
- MemOpIdx oi;
93
- MemOp opc;
94
-#if defined(CONFIG_SOFTMMU)
95
- tcg_insn_unit *label_ptr[1];
96
-#else
97
- unsigned a_bits;
98
-#endif
99
+ MemOp opc = get_memop(oi);
100
TCGReg base;
101
102
- data_reg = *args++;
103
- addr_reg = *args++;
104
- oi = *args++;
105
- opc = get_memop(oi);
106
-
107
#if defined(CONFIG_SOFTMMU)
108
+ tcg_insn_unit *label_ptr[1];
109
+
110
base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0);
111
tcg_out_qemu_st_direct(s, data_reg, base, opc);
112
- add_qemu_ldst_label(s, 0, oi, (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
113
- data_reg, addr_reg, s->code_ptr, label_ptr);
114
+ add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
115
+ s->code_ptr, label_ptr);
116
#else
117
- a_bits = get_alignment_bits(opc);
118
+ unsigned a_bits = get_alignment_bits(opc);
119
if (a_bits) {
120
tcg_out_test_alignment(s, false, addr_reg, a_bits);
121
}
122
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
123
break;
124
125
case INDEX_op_qemu_ld_i32:
126
- tcg_out_qemu_ld(s, args, false);
127
+ tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
128
break;
129
case INDEX_op_qemu_ld_i64:
130
- tcg_out_qemu_ld(s, args, true);
131
+ tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
132
break;
133
case INDEX_op_qemu_st_i32:
134
- tcg_out_qemu_st(s, args, false);
135
+ tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
136
break;
137
case INDEX_op_qemu_st_i64:
138
- tcg_out_qemu_st(s, args, true);
139
+ tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
140
break;
141
142
case INDEX_op_extrh_i64_i32:
59
--
143
--
60
2.43.0
144
2.34.1
145
146
diff view generated by jsdifflib
1
Change return from bool to int; distinguish between
1
We need to set this in TCGLabelQemuLdst, so plumb this
2
complete folding, simplification, and no change.
2
all the way through from tcg_out_op.
3
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
tcg/optimize.c | 22 ++++++++++++++--------
7
tcg/s390x/tcg-target.c.inc | 22 ++++++++++++++--------
8
1 file changed, 14 insertions(+), 8 deletions(-)
8
1 file changed, 14 insertions(+), 8 deletions(-)
9
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
12
--- a/tcg/s390x/tcg-target.c.inc
13
+++ b/tcg/optimize.c
13
+++ b/tcg/s390x/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
15
return finish_folding(ctx, op);
16
}
15
}
17
16
18
-static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
17
static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
19
+/* Return 1 if finished, -1 if simplified, 0 if unchanged. */
18
- TCGReg data, TCGReg addr,
20
+static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
19
+ TCGType type, TCGReg data, TCGReg addr,
20
tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
21
{
21
{
22
uint64_t a_zmask, b_val;
22
TCGLabelQemuLdst *label = new_ldst_label(s);
23
TCGCond cond;
23
24
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
24
label->is_ld = is_ld;
25
op->opc = xor_opc;
25
label->oi = oi;
26
op->args[2] = arg_new_constant(ctx, 1);
26
+ label->type = type;
27
}
27
label->datalo_reg = data;
28
- return false;
28
label->addrlo_reg = addr;
29
+ return -1;
29
label->raddr = tcg_splitwx_to_rx(raddr);
30
}
30
@@ -XXX,XX +XXX,XX @@ static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
31
}
31
#endif /* CONFIG_SOFTMMU */
32
-
32
33
- return false;
33
static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
34
+ return 0;
34
- MemOpIdx oi)
35
+ MemOpIdx oi, TCGType data_type)
36
{
37
MemOp opc = get_memop(oi);
38
#ifdef CONFIG_SOFTMMU
39
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
40
41
tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
42
43
- add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
44
+ add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
45
+ s->code_ptr, label_ptr);
46
#else
47
TCGReg index_reg;
48
tcg_target_long disp;
49
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
35
}
50
}
36
51
37
static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
52
static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
38
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
53
- MemOpIdx oi)
39
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
54
+ MemOpIdx oi, TCGType data_type)
40
}
55
{
41
56
MemOp opc = get_memop(oi);
42
- if (fold_setcond_zmask(ctx, op, false)) {
57
#ifdef CONFIG_SOFTMMU
43
+ i = fold_setcond_zmask(ctx, op, false);
58
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
44
+ if (i > 0) {
59
45
return true;
60
tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
46
}
61
47
- fold_setcond_tst_pow2(ctx, op, false);
62
- add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
48
+ if (i == 0) {
63
+ add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
49
+ fold_setcond_tst_pow2(ctx, op, false);
64
+ s->code_ptr, label_ptr);
50
+ }
65
#else
51
66
TCGReg index_reg;
52
ctx->z_mask = 1;
67
tcg_target_long disp;
53
return false;
68
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
54
@@ -XXX,XX +XXX,XX @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
69
break;
55
return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
70
56
}
71
case INDEX_op_qemu_ld_i32:
57
72
- /* ??? Technically we can use a non-extending instruction. */
58
- if (fold_setcond_zmask(ctx, op, true)) {
73
+ tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32);
59
+ i = fold_setcond_zmask(ctx, op, true);
74
+ break;
60
+ if (i > 0) {
75
case INDEX_op_qemu_ld_i64:
61
return true;
76
- tcg_out_qemu_ld(s, args[0], args[1], args[2]);
62
}
77
+ tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I64);
63
- fold_setcond_tst_pow2(ctx, op, true);
78
break;
64
+ if (i == 0) {
79
case INDEX_op_qemu_st_i32:
65
+ fold_setcond_tst_pow2(ctx, op, true);
80
+ tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I32);
66
+ }
81
+ break;
67
82
case INDEX_op_qemu_st_i64:
68
/* Value is {0,-1} so all bits are repetitions of the sign. */
83
- tcg_out_qemu_st(s, args[0], args[1], args[2]);
69
ctx->s_mask = -1;
84
+ tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
85
break;
86
87
case INDEX_op_ld16s_i64:
70
--
88
--
71
2.43.0
89
2.34.1
90
91
diff view generated by jsdifflib
1
There are only a few logical operations which can compute
1
Collect the 3 potential parts of the host address into a struct.
2
an "affected" mask. Split out handling of this optimization
2
Reorg tcg_out_qemu_{ld,st}_direct to use it.
3
to a separate function, only to be called when applicable.
3
4
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Remove the a_mask field from OptContext, as the mask is
6
no longer stored anywhere.
7
8
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
6
---
11
tcg/optimize.c | 42 +++++++++++++++++++++++++++---------------
7
tcg/s390x/tcg-target.c.inc | 109 ++++++++++++++++++++-----------------
12
1 file changed, 27 insertions(+), 15 deletions(-)
8
1 file changed, 60 insertions(+), 49 deletions(-)
13
9
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
12
--- a/tcg/s390x/tcg-target.c.inc
17
+++ b/tcg/optimize.c
13
+++ b/tcg/s390x/tcg-target.c.inc
18
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
19
QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
15
tcg_out_call_int(s, dest);
20
16
}
21
/* In flight values from optimization. */
17
22
- uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
18
+typedef struct {
23
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
19
+ TCGReg base;
24
uint64_t s_mask; /* mask of clrsb(value) bits */
20
+ TCGReg index;
25
TCGType type;
21
+ int disp;
26
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
22
+} HostAddress;
27
23
+
28
static bool fold_masks(OptContext *ctx, TCGOp *op)
24
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
29
{
25
- TCGReg base, TCGReg index, int disp)
30
- uint64_t a_mask = ctx->a_mask;
26
+ HostAddress h)
31
uint64_t z_mask = ctx->z_mask;
27
{
32
uint64_t s_mask = ctx->s_mask;
28
switch (opc & (MO_SSIZE | MO_BSWAP)) {
33
29
case MO_UB:
34
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
30
- tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
35
* type changing opcodes.
31
+ tcg_out_insn(s, RXY, LLGC, data, h.base, h.index, h.disp);
36
*/
32
break;
37
if (ctx->type == TCG_TYPE_I32) {
33
case MO_SB:
38
- a_mask = (int32_t)a_mask;
34
- tcg_out_insn(s, RXY, LGB, data, base, index, disp);
39
z_mask = (int32_t)z_mask;
35
+ tcg_out_insn(s, RXY, LGB, data, h.base, h.index, h.disp);
40
s_mask |= MAKE_64BIT_MASK(32, 32);
36
break;
41
ctx->z_mask = z_mask;
37
42
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
38
case MO_UW | MO_BSWAP:
43
if (z_mask == 0) {
39
/* swapped unsigned halfword load with upper bits zeroed */
44
return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
40
- tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
45
}
41
+ tcg_out_insn(s, RXY, LRVH, data, h.base, h.index, h.disp);
46
+ return false;
42
tcg_out_ext16u(s, data, data);
47
+}
43
break;
48
+
44
case MO_UW:
49
+/*
45
- tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
50
+ * An "affected" mask bit is 0 if and only if the result is identical
46
+ tcg_out_insn(s, RXY, LLGH, data, h.base, h.index, h.disp);
51
+ * to the first input. Thus if the entire mask is 0, the operation
47
break;
52
+ * is equivalent to a copy.
48
53
+ */
49
case MO_SW | MO_BSWAP:
54
+static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask)
50
/* swapped sign-extended halfword load */
55
+{
51
- tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
56
+ if (ctx->type == TCG_TYPE_I32) {
52
+ tcg_out_insn(s, RXY, LRVH, data, h.base, h.index, h.disp);
57
+ a_mask = (uint32_t)a_mask;
53
tcg_out_ext16s(s, TCG_TYPE_REG, data, data);
58
+ }
54
break;
59
if (a_mask == 0) {
55
case MO_SW:
60
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
56
- tcg_out_insn(s, RXY, LGH, data, base, index, disp);
61
}
57
+ tcg_out_insn(s, RXY, LGH, data, h.base, h.index, h.disp);
62
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
58
break;
63
* Known-zeros does not imply known-ones. Therefore unless
59
64
* arg2 is constant, we can't infer affected bits from it.
60
case MO_UL | MO_BSWAP:
65
*/
61
/* swapped unsigned int load with upper bits zeroed */
66
- if (arg_is_const(op->args[2])) {
62
- tcg_out_insn(s, RXY, LRV, data, base, index, disp);
67
- ctx->a_mask = z1 & ~z2;
63
+ tcg_out_insn(s, RXY, LRV, data, h.base, h.index, h.disp);
68
+ if (arg_is_const(op->args[2]) &&
64
tcg_out_ext32u(s, data, data);
69
+ fold_affected_mask(ctx, op, z1 & ~z2)) {
65
break;
70
+ return true;
66
case MO_UL:
71
}
67
- tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
72
68
+ tcg_out_insn(s, RXY, LLGF, data, h.base, h.index, h.disp);
73
return fold_masks(ctx, op);
69
break;
74
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
70
75
*/
71
case MO_SL | MO_BSWAP:
76
if (arg_is_const(op->args[2])) {
72
/* swapped sign-extended int load */
77
uint64_t z2 = ~arg_info(op->args[2])->z_mask;
73
- tcg_out_insn(s, RXY, LRV, data, base, index, disp);
78
- ctx->a_mask = z1 & ~z2;
74
+ tcg_out_insn(s, RXY, LRV, data, h.base, h.index, h.disp);
79
+ if (fold_affected_mask(ctx, op, z1 & ~z2)) {
75
tcg_out_ext32s(s, data, data);
80
+ return true;
76
break;
81
+ }
77
case MO_SL:
82
z1 &= z2;
78
- tcg_out_insn(s, RXY, LGF, data, base, index, disp);
83
}
79
+ tcg_out_insn(s, RXY, LGF, data, h.base, h.index, h.disp);
84
ctx->z_mask = z1;
80
break;
85
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
81
86
82
case MO_UQ | MO_BSWAP:
87
z_mask_old = arg_info(op->args[1])->z_mask;
83
- tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
88
z_mask = extract64(z_mask_old, pos, len);
84
+ tcg_out_insn(s, RXY, LRVG, data, h.base, h.index, h.disp);
89
- if (pos == 0) {
85
break;
90
- ctx->a_mask = z_mask_old ^ z_mask;
86
case MO_UQ:
91
+ if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
87
- tcg_out_insn(s, RXY, LG, data, base, index, disp);
92
+ return true;
88
+ tcg_out_insn(s, RXY, LG, data, h.base, h.index, h.disp);
93
}
89
break;
94
ctx->z_mask = z_mask;
90
95
ctx->s_mask = smask_from_zmask(z_mask);
91
default:
96
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
92
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
97
93
}
98
ctx->z_mask = z_mask;
94
99
ctx->s_mask = s_mask;
95
static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
100
- if (!type_change) {
96
- TCGReg base, TCGReg index, int disp)
101
- ctx->a_mask = s_mask & ~s_mask_old;
97
+ HostAddress h)
102
+ if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
98
{
103
+ return true;
99
switch (opc & (MO_SIZE | MO_BSWAP)) {
104
}
100
case MO_UB:
105
101
- if (disp >= 0 && disp < 0x1000) {
106
return fold_masks(ctx, op);
102
- tcg_out_insn(s, RX, STC, data, base, index, disp);
107
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
103
+ if (h.disp >= 0 && h.disp < 0x1000) {
108
104
+ tcg_out_insn(s, RX, STC, data, h.base, h.index, h.disp);
109
ctx->z_mask = z_mask;
105
} else {
110
ctx->s_mask = smask_from_zmask(z_mask);
106
- tcg_out_insn(s, RXY, STCY, data, base, index, disp);
111
- if (!type_change) {
107
+ tcg_out_insn(s, RXY, STCY, data, h.base, h.index, h.disp);
112
- ctx->a_mask = z_mask_old ^ z_mask;
113
+ if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
114
+ return true;
115
}
116
return fold_masks(ctx, op);
117
}
118
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
119
s_mask |= MAKE_64BIT_MASK(len, 64 - len);
120
ctx->s_mask = s_mask;
121
122
- if (pos == 0) {
123
- ctx->a_mask = s_mask & ~s_mask_old;
124
+ if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
125
+ return true;
126
}
127
128
return fold_masks(ctx, op);
129
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
130
}
108
}
131
109
break;
132
/* Assume all bits affected, no bits known zero, no sign reps. */
110
133
- ctx.a_mask = -1;
111
case MO_UW | MO_BSWAP:
134
ctx.z_mask = -1;
112
- tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
135
ctx.s_mask = 0;
113
+ tcg_out_insn(s, RXY, STRVH, data, h.base, h.index, h.disp);
114
break;
115
case MO_UW:
116
- if (disp >= 0 && disp < 0x1000) {
117
- tcg_out_insn(s, RX, STH, data, base, index, disp);
118
+ if (h.disp >= 0 && h.disp < 0x1000) {
119
+ tcg_out_insn(s, RX, STH, data, h.base, h.index, h.disp);
120
} else {
121
- tcg_out_insn(s, RXY, STHY, data, base, index, disp);
122
+ tcg_out_insn(s, RXY, STHY, data, h.base, h.index, h.disp);
123
}
124
break;
125
126
case MO_UL | MO_BSWAP:
127
- tcg_out_insn(s, RXY, STRV, data, base, index, disp);
128
+ tcg_out_insn(s, RXY, STRV, data, h.base, h.index, h.disp);
129
break;
130
case MO_UL:
131
- if (disp >= 0 && disp < 0x1000) {
132
- tcg_out_insn(s, RX, ST, data, base, index, disp);
133
+ if (h.disp >= 0 && h.disp < 0x1000) {
134
+ tcg_out_insn(s, RX, ST, data, h.base, h.index, h.disp);
135
} else {
136
- tcg_out_insn(s, RXY, STY, data, base, index, disp);
137
+ tcg_out_insn(s, RXY, STY, data, h.base, h.index, h.disp);
138
}
139
break;
140
141
case MO_UQ | MO_BSWAP:
142
- tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
143
+ tcg_out_insn(s, RXY, STRVG, data, h.base, h.index, h.disp);
144
break;
145
case MO_UQ:
146
- tcg_out_insn(s, RXY, STG, data, base, index, disp);
147
+ tcg_out_insn(s, RXY, STG, data, h.base, h.index, h.disp);
148
break;
149
150
default:
151
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
152
return tcg_out_fail_alignment(s, l);
153
}
154
155
-static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
156
- TCGReg *index_reg, tcg_target_long *disp)
157
+static HostAddress tcg_prepare_user_ldst(TCGContext *s, TCGReg addr_reg)
158
{
159
+ TCGReg index;
160
+ int disp;
161
+
162
if (TARGET_LONG_BITS == 32) {
163
- tcg_out_ext32u(s, TCG_TMP0, *addr_reg);
164
- *addr_reg = TCG_TMP0;
165
+ tcg_out_ext32u(s, TCG_TMP0, addr_reg);
166
+ addr_reg = TCG_TMP0;
167
}
168
if (guest_base < 0x80000) {
169
- *index_reg = TCG_REG_NONE;
170
- *disp = guest_base;
171
+ index = TCG_REG_NONE;
172
+ disp = guest_base;
173
} else {
174
- *index_reg = TCG_GUEST_BASE_REG;
175
- *disp = 0;
176
+ index = TCG_GUEST_BASE_REG;
177
+ disp = 0;
178
}
179
+ return (HostAddress){ .base = addr_reg, .index = index, .disp = disp };
180
}
181
#endif /* CONFIG_SOFTMMU */
182
183
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
184
MemOpIdx oi, TCGType data_type)
185
{
186
MemOp opc = get_memop(oi);
187
+ HostAddress h;
188
+
189
#ifdef CONFIG_SOFTMMU
190
unsigned mem_index = get_mmuidx(oi);
191
tcg_insn_unit *label_ptr;
192
- TCGReg base_reg;
193
194
- base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
195
+ h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
196
+ h.index = TCG_REG_R2;
197
+ h.disp = 0;
198
199
tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
200
label_ptr = s->code_ptr;
201
s->code_ptr += 1;
202
203
- tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
204
+ tcg_out_qemu_ld_direct(s, opc, data_reg, h);
205
206
add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
207
s->code_ptr, label_ptr);
208
#else
209
- TCGReg index_reg;
210
- tcg_target_long disp;
211
unsigned a_bits = get_alignment_bits(opc);
212
213
if (a_bits) {
214
tcg_out_test_alignment(s, true, addr_reg, a_bits);
215
}
216
- tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
217
- tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
218
+ h = tcg_prepare_user_ldst(s, addr_reg);
219
+ tcg_out_qemu_ld_direct(s, opc, data_reg, h);
220
#endif
221
}
222
223
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
224
MemOpIdx oi, TCGType data_type)
225
{
226
MemOp opc = get_memop(oi);
227
+ HostAddress h;
228
+
229
#ifdef CONFIG_SOFTMMU
230
unsigned mem_index = get_mmuidx(oi);
231
tcg_insn_unit *label_ptr;
232
- TCGReg base_reg;
233
234
- base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
235
+ h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
236
+ h.index = TCG_REG_R2;
237
+ h.disp = 0;
238
239
tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
240
label_ptr = s->code_ptr;
241
s->code_ptr += 1;
242
243
- tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
244
+ tcg_out_qemu_st_direct(s, opc, data_reg, h);
245
246
add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
247
s->code_ptr, label_ptr);
248
#else
249
- TCGReg index_reg;
250
- tcg_target_long disp;
251
unsigned a_bits = get_alignment_bits(opc);
252
253
if (a_bits) {
254
tcg_out_test_alignment(s, false, addr_reg, a_bits);
255
}
256
- tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
257
- tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
258
+ h = tcg_prepare_user_ldst(s, addr_reg);
259
+ tcg_out_qemu_st_direct(s, opc, data_reg, h);
260
#endif
261
}
136
262
137
--
263
--
138
2.43.0
264
2.34.1
265
266
diff view generated by jsdifflib
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
1
In tcg_canonicalize_memop, we remove MO_SIGN from MO_32 operations
2
with TCG_TYPE_I32. Thus this is never set. We already have an
3
identical test just above which does not include is_64
4
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
7
---
4
tcg/optimize.c | 2 +-
8
tcg/sparc64/tcg-target.c.inc | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
9
1 file changed, 1 insertion(+), 1 deletion(-)
6
10
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
13
--- a/tcg/sparc64/tcg-target.c.inc
10
+++ b/tcg/optimize.c
14
+++ b/tcg/sparc64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
12
TCGType type;
16
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O2, oi);
13
17
14
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
18
/* We let the helper sign-extend SB and SW, but leave SL for here. */
15
- return false;
19
- if (is_64 && (memop & MO_SSIZE) == MO_SL) {
16
+ return finish_folding(ctx, op);
20
+ if ((memop & MO_SSIZE) == MO_SL) {
17
}
21
tcg_out_ext32s(s, data, TCG_REG_O0);
18
22
} else {
19
type = ctx->type;
23
tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
20
--
24
--
21
2.43.0
25
2.34.1
26
27
diff view generated by jsdifflib
1
All uses have been convered to float*_muladd_scalbn.
1
We need to set this in TCGLabelQemuLdst, so plumb this
2
all the way through from tcg_out_op.
2
3
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
---
6
include/fpu/softfloat.h | 3 ---
7
tcg/sparc64/tcg-target.c.inc | 6 +++---
7
fpu/softfloat.c | 6 ------
8
1 file changed, 3 insertions(+), 3 deletions(-)
8
fpu/softfloat-parts.c.inc | 4 ----
9
3 files changed, 13 deletions(-)
10
9
11
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
10
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/include/fpu/softfloat.h
12
--- a/tcg/sparc64/tcg-target.c.inc
14
+++ b/include/fpu/softfloat.h
13
+++ b/tcg/sparc64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
14
@@ -XXX,XX +XXX,XX @@ static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
16
| Using these differs from negating an input or output before calling
17
| the muladd function in that this means that a NaN doesn't have its
18
| sign bit inverted before it is propagated.
19
-| We also support halving the result before rounding, as a special
20
-| case to support the ARM fused-sqrt-step instruction FRSQRTS.
21
*----------------------------------------------------------------------------*/
22
enum {
23
float_muladd_negate_c = 1,
24
float_muladd_negate_product = 2,
25
float_muladd_negate_result = 4,
26
- float_muladd_halve_result = 8,
27
};
15
};
28
16
29
/*----------------------------------------------------------------------------
17
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
30
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
18
- MemOpIdx oi, bool is_64)
31
index XXXXXXX..XXXXXXX 100644
19
+ MemOpIdx oi, TCGType data_type)
32
--- a/fpu/softfloat.c
20
{
33
+++ b/fpu/softfloat.c
21
MemOp memop = get_memop(oi);
34
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
22
tcg_insn_unit *label_ptr;
35
if (unlikely(!can_use_fpu(s))) {
23
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
36
goto soft;
24
break;
37
}
25
38
- if (unlikely(flags & float_muladd_halve_result)) {
26
case INDEX_op_qemu_ld_i32:
39
- goto soft;
27
- tcg_out_qemu_ld(s, a0, a1, a2, false);
40
- }
28
+ tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
41
29
break;
42
float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
30
case INDEX_op_qemu_ld_i64:
43
if (unlikely(!f32_is_zon3(ua, ub, uc))) {
31
- tcg_out_qemu_ld(s, a0, a1, a2, true);
44
@@ -XXX,XX +XXX,XX @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
32
+ tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
45
if (unlikely(!can_use_fpu(s))) {
33
break;
46
goto soft;
34
case INDEX_op_qemu_st_i32:
47
}
35
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
48
- if (unlikely(flags & float_muladd_halve_result)) {
49
- goto soft;
50
- }
51
52
float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
53
if (unlikely(!f64_is_zon3(ua, ub, uc))) {
54
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
55
index XXXXXXX..XXXXXXX 100644
56
--- a/fpu/softfloat-parts.c.inc
57
+++ b/fpu/softfloat-parts.c.inc
58
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
59
a->exp = p_widen.exp;
60
61
return_normal:
62
- /* TODO: Replace all use of float_muladd_halve_result with scale. */
63
- if (flags & float_muladd_halve_result) {
64
- a->exp -= 1;
65
- }
66
a->exp += scale;
67
finish_sign:
68
if (flags & float_muladd_negate_result) {
69
--
36
--
70
2.43.0
37
2.34.1
71
38
72
39
diff view generated by jsdifflib
1
The big comment just above says functions should be sorted.
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Add forward declarations as needed.
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
3
---
7
tcg/optimize.c | 114 +++++++++++++++++++++++++------------------------
4
tcg/tcg.c | 13 +++++++++++++
8
1 file changed, 59 insertions(+), 55 deletions(-)
5
tcg/tcg-ldst.c.inc | 14 --------------
6
2 files changed, 13 insertions(+), 14 deletions(-)
9
7
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/tcg/tcg.c b/tcg/tcg.c
11
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
10
--- a/tcg/tcg.c
13
+++ b/tcg/optimize.c
11
+++ b/tcg/tcg.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
12
@@ -XXX,XX +XXX,XX @@ typedef struct QEMU_PACKED {
15
* 3) those that produce information about the result value.
13
DebugFrameFDEHeader fde;
14
} DebugFrameHeader;
15
16
+typedef struct TCGLabelQemuLdst {
17
+ bool is_ld; /* qemu_ld: true, qemu_st: false */
18
+ MemOpIdx oi;
19
+ TCGType type; /* result type of a load */
20
+ TCGReg addrlo_reg; /* reg index for low word of guest virtual addr */
21
+ TCGReg addrhi_reg; /* reg index for high word of guest virtual addr */
22
+ TCGReg datalo_reg; /* reg index for low word to be loaded or stored */
23
+ TCGReg datahi_reg; /* reg index for high word to be loaded or stored */
24
+ const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */
25
+ tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
26
+ QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
27
+} TCGLabelQemuLdst;
28
+
29
static void tcg_register_jit_int(const void *buf, size_t size,
30
const void *debug_frame,
31
size_t debug_frame_size)
32
diff --git a/tcg/tcg-ldst.c.inc b/tcg/tcg-ldst.c.inc
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/tcg-ldst.c.inc
35
+++ b/tcg/tcg-ldst.c.inc
36
@@ -XXX,XX +XXX,XX @@
37
* THE SOFTWARE.
16
*/
38
*/
17
39
18
+static bool fold_or(OptContext *ctx, TCGOp *op);
40
-typedef struct TCGLabelQemuLdst {
19
+static bool fold_orc(OptContext *ctx, TCGOp *op);
41
- bool is_ld; /* qemu_ld: true, qemu_st: false */
20
+static bool fold_xor(OptContext *ctx, TCGOp *op);
42
- MemOpIdx oi;
21
+
43
- TCGType type; /* result type of a load */
22
static bool fold_add(OptContext *ctx, TCGOp *op)
44
- TCGReg addrlo_reg; /* reg index for low word of guest virtual addr */
23
{
45
- TCGReg addrhi_reg; /* reg index for high word of guest virtual addr */
24
if (fold_const2_commutative(ctx, op) ||
46
- TCGReg datalo_reg; /* reg index for low word to be loaded or stored */
25
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
47
- TCGReg datahi_reg; /* reg index for high word to be loaded or stored */
26
return fold_masks_zs(ctx, op, z_mask, s_mask);
48
- const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */
27
}
49
- tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
28
50
- QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
29
+static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
51
-} TCGLabelQemuLdst;
30
+{
31
+ /* If true and false values are the same, eliminate the cmp. */
32
+ if (args_are_copies(op->args[2], op->args[3])) {
33
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
34
+ }
35
+
36
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
37
+ uint64_t tv = arg_info(op->args[2])->val;
38
+ uint64_t fv = arg_info(op->args[3])->val;
39
+
40
+ if (tv == -1 && fv == 0) {
41
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
42
+ }
43
+ if (tv == 0 && fv == -1) {
44
+ if (TCG_TARGET_HAS_not_vec) {
45
+ op->opc = INDEX_op_not_vec;
46
+ return fold_not(ctx, op);
47
+ } else {
48
+ op->opc = INDEX_op_xor_vec;
49
+ op->args[2] = arg_new_constant(ctx, -1);
50
+ return fold_xor(ctx, op);
51
+ }
52
+ }
53
+ }
54
+ if (arg_is_const(op->args[2])) {
55
+ uint64_t tv = arg_info(op->args[2])->val;
56
+ if (tv == -1) {
57
+ op->opc = INDEX_op_or_vec;
58
+ op->args[2] = op->args[3];
59
+ return fold_or(ctx, op);
60
+ }
61
+ if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
62
+ op->opc = INDEX_op_andc_vec;
63
+ op->args[2] = op->args[1];
64
+ op->args[1] = op->args[3];
65
+ return fold_andc(ctx, op);
66
+ }
67
+ }
68
+ if (arg_is_const(op->args[3])) {
69
+ uint64_t fv = arg_info(op->args[3])->val;
70
+ if (fv == 0) {
71
+ op->opc = INDEX_op_and_vec;
72
+ return fold_and(ctx, op);
73
+ }
74
+ if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
75
+ op->opc = INDEX_op_orc_vec;
76
+ op->args[2] = op->args[1];
77
+ op->args[1] = op->args[3];
78
+ return fold_orc(ctx, op);
79
+ }
80
+ }
81
+ return finish_folding(ctx, op);
82
+}
83
+
84
static bool fold_brcond(OptContext *ctx, TCGOp *op)
85
{
86
int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
87
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
88
return fold_masks_zs(ctx, op, z_mask, s_mask);
89
}
90
91
-static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
92
-{
93
- /* If true and false values are the same, eliminate the cmp. */
94
- if (args_are_copies(op->args[2], op->args[3])) {
95
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
96
- }
97
-
52
-
98
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
99
- uint64_t tv = arg_info(op->args[2])->val;
100
- uint64_t fv = arg_info(op->args[3])->val;
101
-
53
-
102
- if (tv == -1 && fv == 0) {
54
/*
103
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
55
* Generate TB finalization at the end of block
104
- }
56
*/
105
- if (tv == 0 && fv == -1) {
106
- if (TCG_TARGET_HAS_not_vec) {
107
- op->opc = INDEX_op_not_vec;
108
- return fold_not(ctx, op);
109
- } else {
110
- op->opc = INDEX_op_xor_vec;
111
- op->args[2] = arg_new_constant(ctx, -1);
112
- return fold_xor(ctx, op);
113
- }
114
- }
115
- }
116
- if (arg_is_const(op->args[2])) {
117
- uint64_t tv = arg_info(op->args[2])->val;
118
- if (tv == -1) {
119
- op->opc = INDEX_op_or_vec;
120
- op->args[2] = op->args[3];
121
- return fold_or(ctx, op);
122
- }
123
- if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
124
- op->opc = INDEX_op_andc_vec;
125
- op->args[2] = op->args[1];
126
- op->args[1] = op->args[3];
127
- return fold_andc(ctx, op);
128
- }
129
- }
130
- if (arg_is_const(op->args[3])) {
131
- uint64_t fv = arg_info(op->args[3])->val;
132
- if (fv == 0) {
133
- op->opc = INDEX_op_and_vec;
134
- return fold_and(ctx, op);
135
- }
136
- if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
137
- op->opc = INDEX_op_orc_vec;
138
- op->args[2] = op->args[1];
139
- op->args[1] = op->args[3];
140
- return fold_orc(ctx, op);
141
- }
142
- }
143
- return finish_folding(ctx, op);
144
-}
145
-
146
/* Propagate constants and copies, fold constant expressions. */
147
void tcg_optimize(TCGContext *s)
148
{
149
--
57
--
150
2.43.0
58
2.34.1
59
60
diff view generated by jsdifflib
1
We currently have a flag, float_muladd_halve_result, to scale
1
An inline function is safer than a macro, and REG_P
2
the result by 2**-1. Extend this to handle arbitrary scaling.
2
was rather too generic.
3
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
include/fpu/softfloat.h | 6 ++++
7
tcg/tcg-internal.h | 4 ----
8
fpu/softfloat.c | 58 ++++++++++++++++++++++-----------------
8
tcg/tcg.c | 16 +++++++++++++---
9
fpu/softfloat-parts.c.inc | 7 +++--
9
2 files changed, 13 insertions(+), 7 deletions(-)
10
3 files changed, 44 insertions(+), 27 deletions(-)
11
10
12
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
11
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/include/fpu/softfloat.h
13
--- a/tcg/tcg-internal.h
15
+++ b/include/fpu/softfloat.h
14
+++ b/tcg/tcg-internal.h
16
@@ -XXX,XX +XXX,XX @@ float16 float16_add(float16, float16, float_status *status);
15
@@ -XXX,XX +XXX,XX @@ typedef struct TCGCallArgumentLoc {
17
float16 float16_sub(float16, float16, float_status *status);
16
unsigned tmp_subindex : 2;
18
float16 float16_mul(float16, float16, float_status *status);
17
} TCGCallArgumentLoc;
19
float16 float16_muladd(float16, float16, float16, int, float_status *status);
18
20
+float16 float16_muladd_scalbn(float16, float16, float16,
19
-/* Avoid "unsigned < 0 is always false" Werror, when iarg_regs is empty. */
21
+ int, int, float_status *status);
20
-#define REG_P(L) \
22
float16 float16_div(float16, float16, float_status *status);
21
- ((int)(L)->arg_slot < (int)ARRAY_SIZE(tcg_target_call_iarg_regs))
23
float16 float16_scalbn(float16, int, float_status *status);
22
-
24
float16 float16_min(float16, float16, float_status *status);
23
typedef struct TCGHelperInfo {
25
@@ -XXX,XX +XXX,XX @@ float32 float32_mul(float32, float32, float_status *status);
24
void *func;
26
float32 float32_div(float32, float32, float_status *status);
25
const char *name;
27
float32 float32_rem(float32, float32, float_status *status);
26
diff --git a/tcg/tcg.c b/tcg/tcg.c
28
float32 float32_muladd(float32, float32, float32, int, float_status *status);
29
+float32 float32_muladd_scalbn(float32, float32, float32,
30
+ int, int, float_status *status);
31
float32 float32_sqrt(float32, float_status *status);
32
float32 float32_exp2(float32, float_status *status);
33
float32 float32_log2(float32, float_status *status);
34
@@ -XXX,XX +XXX,XX @@ float64 float64_mul(float64, float64, float_status *status);
35
float64 float64_div(float64, float64, float_status *status);
36
float64 float64_rem(float64, float64, float_status *status);
37
float64 float64_muladd(float64, float64, float64, int, float_status *status);
38
+float64 float64_muladd_scalbn(float64, float64, float64,
39
+ int, int, float_status *status);
40
float64 float64_sqrt(float64, float_status *status);
41
float64 float64_log2(float64, float_status *status);
42
FloatRelation float64_compare(float64, float64, float_status *status);
43
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
44
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
45
--- a/fpu/softfloat.c
28
--- a/tcg/tcg.c
46
+++ b/fpu/softfloat.c
29
+++ b/tcg/tcg.c
47
@@ -XXX,XX +XXX,XX @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
30
@@ -XXX,XX +XXX,XX @@ static void init_ffi_layouts(void)
48
#define parts_mul(A, B, S) \
49
PARTS_GENERIC_64_128(mul, A)(A, B, S)
50
51
-static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
52
- FloatParts64 *c, int flags,
53
- float_status *s);
54
-static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
55
- FloatParts128 *c, int flags,
56
- float_status *s);
57
+static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
58
+ FloatParts64 *c, int scale,
59
+ int flags, float_status *s);
60
+static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
61
+ FloatParts128 *c, int scale,
62
+ int flags, float_status *s);
63
64
-#define parts_muladd(A, B, C, Z, S) \
65
- PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
66
+#define parts_muladd_scalbn(A, B, C, Z, Y, S) \
67
+ PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
68
69
static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
70
float_status *s);
71
@@ -XXX,XX +XXX,XX @@ floatx80_mul(floatx80 a, floatx80 b, float_status *status)
72
* Fused multiply-add
73
*/
74
75
-float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
76
- int flags, float_status *status)
77
+float16 QEMU_FLATTEN
78
+float16_muladd_scalbn(float16 a, float16 b, float16 c,
79
+ int scale, int flags, float_status *status)
80
{
81
FloatParts64 pa, pb, pc, *pr;
82
83
float16_unpack_canonical(&pa, a, status);
84
float16_unpack_canonical(&pb, b, status);
85
float16_unpack_canonical(&pc, c, status);
86
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
87
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
88
89
return float16_round_pack_canonical(pr, status);
90
}
31
}
91
32
#endif /* CONFIG_TCG_INTERPRETER */
92
-static float32 QEMU_SOFTFLOAT_ATTR
33
93
-soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
34
+static inline bool arg_slot_reg_p(unsigned arg_slot)
94
- float_status *status)
95
+float16 float16_muladd(float16 a, float16 b, float16 c,
96
+ int flags, float_status *status)
97
+{
35
+{
98
+ return float16_muladd_scalbn(a, b, c, 0, flags, status);
36
+ /*
37
+ * Split the sizeof away from the comparison to avoid Werror from
38
+ * "unsigned < 0 is always false", when iarg_regs is empty.
39
+ */
40
+ unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
41
+ return arg_slot < nreg;
99
+}
42
+}
100
+
43
+
101
+float32 QEMU_SOFTFLOAT_ATTR
44
typedef struct TCGCumulativeArgs {
102
+float32_muladd_scalbn(float32 a, float32 b, float32 c,
45
int arg_idx; /* tcg_gen_callN args[] */
103
+ int scale, int flags, float_status *status)
46
int info_in_idx; /* TCGHelperInfo in[] */
47
@@ -XXX,XX +XXX,XX @@ liveness_pass_1(TCGContext *s)
48
case TCG_CALL_ARG_NORMAL:
49
case TCG_CALL_ARG_EXTEND_U:
50
case TCG_CALL_ARG_EXTEND_S:
51
- if (REG_P(loc)) {
52
+ if (arg_slot_reg_p(loc->arg_slot)) {
53
*la_temp_pref(ts) = 0;
54
break;
55
}
56
@@ -XXX,XX +XXX,XX @@ liveness_pass_1(TCGContext *s)
57
case TCG_CALL_ARG_NORMAL:
58
case TCG_CALL_ARG_EXTEND_U:
59
case TCG_CALL_ARG_EXTEND_S:
60
- if (REG_P(loc)) {
61
+ if (arg_slot_reg_p(loc->arg_slot)) {
62
tcg_regset_set_reg(*la_temp_pref(ts),
63
tcg_target_call_iarg_regs[loc->arg_slot]);
64
}
65
@@ -XXX,XX +XXX,XX @@ static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts,
66
static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
67
TCGTemp *ts, TCGRegSet *allocated_regs)
104
{
68
{
105
FloatParts64 pa, pb, pc, *pr;
69
- if (REG_P(l)) {
106
70
+ if (arg_slot_reg_p(l->arg_slot)) {
107
float32_unpack_canonical(&pa, a, status);
71
TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
108
float32_unpack_canonical(&pb, b, status);
72
load_arg_reg(s, reg, ts, *allocated_regs);
109
float32_unpack_canonical(&pc, c, status);
73
tcg_regset_set_reg(*allocated_regs, reg);
110
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
111
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
112
113
return float32_round_pack_canonical(pr, status);
114
}
115
116
-static float64 QEMU_SOFTFLOAT_ATTR
117
-soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
118
- float_status *status)
119
+float64 QEMU_SOFTFLOAT_ATTR
120
+float64_muladd_scalbn(float64 a, float64 b, float64 c,
121
+ int scale, int flags, float_status *status)
122
{
123
FloatParts64 pa, pb, pc, *pr;
124
125
float64_unpack_canonical(&pa, a, status);
126
float64_unpack_canonical(&pb, b, status);
127
float64_unpack_canonical(&pc, c, status);
128
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
129
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
130
131
return float64_round_pack_canonical(pr, status);
132
}
133
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
134
return ur.s;
135
136
soft:
137
- return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
138
+ return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
139
}
140
141
float64 QEMU_FLATTEN
142
@@ -XXX,XX +XXX,XX @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
143
return ur.s;
144
145
soft:
146
- return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
147
+ return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
148
}
149
150
float64 float64r32_muladd(float64 a, float64 b, float64 c,
151
@@ -XXX,XX +XXX,XX @@ float64 float64r32_muladd(float64 a, float64 b, float64 c,
152
float64_unpack_canonical(&pa, a, status);
153
float64_unpack_canonical(&pb, b, status);
154
float64_unpack_canonical(&pc, c, status);
155
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
156
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
157
158
return float64r32_round_pack_canonical(pr, status);
159
}
160
@@ -XXX,XX +XXX,XX @@ bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
161
bfloat16_unpack_canonical(&pa, a, status);
162
bfloat16_unpack_canonical(&pb, b, status);
163
bfloat16_unpack_canonical(&pc, c, status);
164
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
165
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
166
167
return bfloat16_round_pack_canonical(pr, status);
168
}
169
@@ -XXX,XX +XXX,XX @@ float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
170
float128_unpack_canonical(&pa, a, status);
171
float128_unpack_canonical(&pb, b, status);
172
float128_unpack_canonical(&pc, c, status);
173
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
174
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
175
176
return float128_round_pack_canonical(pr, status);
177
}
178
@@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status)
179
180
float64_unpack_canonical(&rp, float64_one, status);
181
for (i = 0 ; i < 15 ; i++) {
182
+
183
float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
184
- rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
185
+ rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
186
xnp = *parts_mul(&xnp, &xp, status);
187
}
188
189
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
190
index XXXXXXX..XXXXXXX 100644
191
--- a/fpu/softfloat-parts.c.inc
192
+++ b/fpu/softfloat-parts.c.inc
193
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
194
* Requires A and C extracted into a double-sized structure to provide the
195
* extra space for the widening multiply.
196
*/
197
-static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
198
- FloatPartsN *c, int flags, float_status *s)
199
+static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
200
+ FloatPartsN *c, int scale,
201
+ int flags, float_status *s)
202
{
203
int ab_mask, abc_mask;
204
FloatPartsW p_widen, c_widen;
205
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
206
a->exp = p_widen.exp;
207
208
return_normal:
209
+ /* TODO: Replace all use of float_muladd_halve_result with scale. */
210
if (flags & float_muladd_halve_result) {
211
a->exp -= 1;
212
}
213
+ a->exp += scale;
214
finish_sign:
215
if (flags & float_muladd_negate_result) {
216
a->sign ^= 1;
217
--
74
--
218
2.43.0
75
2.34.1
219
76
220
77
diff view generated by jsdifflib
1
Call them directly from the opcode switch statement in tcg_optimize,
1
Unify all computation of argument stack offset in one function.
2
rather than in finish_folding based on opcode flags. Adjust folding
2
This requires that we adjust ref_slot to be in the same units,
3
of conditional branches to match.
3
by adding max_reg_slots during init_call_layout.
4
4
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 47 +++++++++++++++++++++++++++++++----------------
8
tcg/tcg.c | 29 +++++++++++++++++------------
9
1 file changed, 31 insertions(+), 16 deletions(-)
9
1 file changed, 17 insertions(+), 12 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/tcg.c b/tcg/tcg.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/tcg.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/tcg.c
15
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
15
@@ -XXX,XX +XXX,XX @@ static inline bool arg_slot_reg_p(unsigned arg_slot)
16
return arg_slot < nreg;
17
}
18
19
+static inline int arg_slot_stk_ofs(unsigned arg_slot)
20
+{
21
+ unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
22
+ unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
23
+
24
+ tcg_debug_assert(stk_slot < max);
25
+ return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
26
+}
27
+
28
typedef struct TCGCumulativeArgs {
29
int arg_idx; /* tcg_gen_callN args[] */
30
int info_in_idx; /* TCGHelperInfo in[] */
31
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
32
}
33
}
34
assert(ref_base + cum.ref_slot <= max_stk_slots);
35
+ ref_base += max_reg_slots;
36
37
if (ref_base != 0) {
38
for (int i = cum.info_in_idx - 1; i >= 0; --i) {
39
@@ -XXX,XX +XXX,XX @@ static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
16
}
40
}
17
}
41
}
18
42
19
+static void finish_bb(OptContext *ctx)
43
-static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts,
20
+{
44
+static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
21
+ /* We only optimize memory barriers across basic blocks. */
45
TCGRegSet allocated_regs)
22
+ ctx->prev_mb = NULL;
23
+}
24
+
25
+static void finish_ebb(OptContext *ctx)
26
+{
27
+ finish_bb(ctx);
28
+ /* We only optimize across extended basic blocks. */
29
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
30
+ remove_mem_copy_all(ctx);
31
+}
32
+
33
static void finish_folding(OptContext *ctx, TCGOp *op)
34
{
46
{
35
const TCGOpDef *def = &tcg_op_defs[op->opc];
47
/*
36
int i, nb_oargs;
48
@@ -XXX,XX +XXX,XX @@ static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts,
37
49
*/
38
- /*
50
temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
39
- * We only optimize extended basic blocks. If the opcode ends a BB
51
tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
40
- * and is not a conditional branch, reset all temp data.
52
- TCG_TARGET_CALL_STACK_OFFSET +
41
- */
53
- stk_slot * sizeof(tcg_target_long));
42
- if (def->flags & TCG_OPF_BB_END) {
54
+ arg_slot_stk_ofs(arg_slot));
43
- ctx->prev_mb = NULL;
55
}
44
- if (!(def->flags & TCG_OPF_COND_BRANCH)) {
56
45
- memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
57
static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
46
- remove_mem_copy_all(ctx);
58
@@ -XXX,XX +XXX,XX @@ static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
47
- }
59
load_arg_reg(s, reg, ts, *allocated_regs);
48
- return;
60
tcg_regset_set_reg(*allocated_regs, reg);
49
- }
61
} else {
50
-
62
- load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs),
51
nb_oargs = def->nb_oargs;
63
- ts, *allocated_regs);
52
for (i = 0; i < nb_oargs; i++) {
64
+ load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
53
TCGTemp *ts = arg_temp(op->args[i]);
54
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
55
if (i > 0) {
56
op->opc = INDEX_op_br;
57
op->args[0] = op->args[3];
58
+ finish_ebb(ctx);
59
+ } else {
60
+ finish_bb(ctx);
61
}
65
}
62
- return false;
63
+ return true;
64
}
66
}
65
67
66
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
68
-static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base,
67
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
69
+static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
68
}
70
intptr_t ref_off, TCGRegSet *allocated_regs)
69
op->opc = INDEX_op_br;
71
{
70
op->args[0] = label;
72
TCGReg reg;
71
- break;
73
- int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
72
+ finish_ebb(ctx);
74
73
+ return true;
75
- if (stk_slot < 0) {
76
+ if (arg_slot_reg_p(arg_slot)) {
77
reg = tcg_target_call_iarg_regs[arg_slot];
78
tcg_reg_free(s, reg, *allocated_regs);
79
tcg_out_addi_ptr(s, reg, ref_base, ref_off);
80
@@ -XXX,XX +XXX,XX @@ static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base,
81
*allocated_regs, 0, false);
82
tcg_out_addi_ptr(s, reg, ref_base, ref_off);
83
tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
84
- TCG_TARGET_CALL_STACK_OFFSET
85
- + stk_slot * sizeof(tcg_target_long));
86
+ arg_slot_stk_ofs(arg_slot));
74
}
87
}
75
- return false;
76
+
77
+ finish_bb(ctx);
78
+ return true;
79
}
88
}
80
89
81
static bool fold_bswap(OptContext *ctx, TCGOp *op)
90
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
82
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
91
case TCG_CALL_ARG_BY_REF:
83
CASE_OP_32_64_VEC(xor):
92
load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
84
done = fold_xor(&ctx, op);
93
load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
94
- TCG_TARGET_CALL_STACK_OFFSET
95
- + loc->ref_slot * sizeof(tcg_target_long),
96
+ arg_slot_stk_ofs(loc->ref_slot),
97
&allocated_regs);
85
break;
98
break;
86
+ case INDEX_op_set_label:
99
case TCG_CALL_ARG_BY_REF_N:
87
+ case INDEX_op_br:
88
+ case INDEX_op_exit_tb:
89
+ case INDEX_op_goto_tb:
90
+ case INDEX_op_goto_ptr:
91
+ finish_ebb(&ctx);
92
+ done = true;
93
+ break;
94
default:
95
break;
96
}
97
--
100
--
98
2.43.0
101
2.34.1
102
103
diff view generated by jsdifflib
Deleted patch
1
Use of fold_masks should be restricted to those opcodes that
2
can reliably make use of it -- those with a single output,
3
and from higher-level folders that set up the masks.
4
Prepare for conversion of each folder in turn.
5
1
6
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/optimize.c | 17 ++++++++++++++---
10
1 file changed, 14 insertions(+), 3 deletions(-)
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
17
{
18
uint64_t z_mask = ctx->z_mask;
19
uint64_t s_mask = ctx->s_mask;
20
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
21
+ TCGTemp *ts;
22
+ TempOptInfo *ti;
23
+
24
+ /* Only single-output opcodes are supported here. */
25
+ tcg_debug_assert(def->nb_oargs == 1);
26
27
/*
28
* 32-bit ops generate 32-bit results, which for the purpose of
29
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
30
if (ctx->type == TCG_TYPE_I32) {
31
z_mask = (int32_t)z_mask;
32
s_mask |= MAKE_64BIT_MASK(32, 32);
33
- ctx->z_mask = z_mask;
34
- ctx->s_mask = s_mask;
35
}
36
37
if (z_mask == 0) {
38
return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
39
}
40
- return false;
41
+
42
+ ts = arg_temp(op->args[0]);
43
+ reset_ts(ctx, ts);
44
+
45
+ ti = ts_info(ts);
46
+ ti->z_mask = z_mask;
47
+ ti->s_mask = s_mask;
48
+ return true;
49
}
50
51
/*
52
--
53
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Add a routine to which masks can be passed directly, rather than
2
storing them into OptContext. To be used in upcoming patches.
3
1
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 15 ++++++++++++---
8
1 file changed, 12 insertions(+), 3 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
15
return fold_const2(ctx, op);
16
}
17
18
-static bool fold_masks(OptContext *ctx, TCGOp *op)
19
+/*
20
+ * Record "zero" and "sign" masks for the single output of @op.
21
+ * See TempOptInfo definition of z_mask and s_mask.
22
+ * If z_mask allows, fold the output to constant zero.
23
+ */
24
+static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
25
+ uint64_t z_mask, uint64_t s_mask)
26
{
27
- uint64_t z_mask = ctx->z_mask;
28
- uint64_t s_mask = ctx->s_mask;
29
const TCGOpDef *def = &tcg_op_defs[op->opc];
30
TCGTemp *ts;
31
TempOptInfo *ti;
32
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
33
return true;
34
}
35
36
+static bool fold_masks(OptContext *ctx, TCGOp *op)
37
+{
38
+ return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
39
+}
40
+
41
/*
42
* An "affected" mask bit is 0 if and only if the result is identical
43
* to the first input. Thus if the entire mask is 0, the operation
44
--
45
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Change the representation from sign bit repetitions to all bits equal
2
to the sign bit, including the sign bit itself.
3
1
4
The previous format has a problem in that it is difficult to recreate
5
a valid sign mask after a shift operation: the "repetitions" part of
6
the previous format meant that applying the same shift as for the value
7
lead to an off-by-one value.
8
9
The new format, including the sign bit itself, means that the sign mask
10
can be manipulated in exactly the same way as the value, canonicalization
11
is easier.
12
13
Canonicalize the s_mask in fold_masks_zs, rather than requiring callers
14
to do so. Treat 0 as a non-canonical but typeless input for no sign
15
information, which will be reset as appropriate for the data type.
16
We can easily fold in the data from z_mask while canonicalizing.
17
18
Temporarily disable optimizations using s_mask while each operation is
19
converted to use fold_masks_zs and to the new form.
20
21
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
22
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
23
---
24
tcg/optimize.c | 64 ++++++++++++--------------------------------------
25
1 file changed, 15 insertions(+), 49 deletions(-)
26
27
diff --git a/tcg/optimize.c b/tcg/optimize.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/tcg/optimize.c
30
+++ b/tcg/optimize.c
31
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
32
QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
33
uint64_t val;
34
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
35
- uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
36
+ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
37
} TempOptInfo;
38
39
typedef struct OptContext {
40
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
41
42
/* In flight values from optimization. */
43
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
44
- uint64_t s_mask; /* mask of clrsb(value) bits */
45
+ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
46
TCGType type;
47
} OptContext;
48
49
-/* Calculate the smask for a specific value. */
50
-static uint64_t smask_from_value(uint64_t value)
51
-{
52
- int rep = clrsb64(value);
53
- return ~(~0ull >> rep);
54
-}
55
-
56
-/*
57
- * Calculate the smask for a given set of known-zeros.
58
- * If there are lots of zeros on the left, we can consider the remainder
59
- * an unsigned field, and thus the corresponding signed field is one bit
60
- * larger.
61
- */
62
-static uint64_t smask_from_zmask(uint64_t zmask)
63
-{
64
- /*
65
- * Only the 0 bits are significant for zmask, thus the msb itself
66
- * must be zero, else we have no sign information.
67
- */
68
- int rep = clz64(zmask);
69
- if (rep == 0) {
70
- return 0;
71
- }
72
- rep -= 1;
73
- return ~(~0ull >> rep);
74
-}
75
-
76
-/*
77
- * Recreate a properly left-aligned smask after manipulation.
78
- * Some bit-shuffling, particularly shifts and rotates, may
79
- * retain sign bits on the left, but may scatter disconnected
80
- * sign bits on the right. Retain only what remains to the left.
81
- */
82
-static uint64_t smask_from_smask(int64_t smask)
83
-{
84
- /* Only the 1 bits are significant for smask */
85
- return smask_from_zmask(~smask);
86
-}
87
-
88
static inline TempOptInfo *ts_info(TCGTemp *ts)
89
{
90
return ts->state_ptr;
91
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
92
ti->is_const = true;
93
ti->val = ts->val;
94
ti->z_mask = ts->val;
95
- ti->s_mask = smask_from_value(ts->val);
96
+ ti->s_mask = INT64_MIN >> clrsb64(ts->val);
97
} else {
98
ti->is_const = false;
99
ti->z_mask = -1;
100
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
101
*/
102
if (i == 0) {
103
ts_info(ts)->z_mask = ctx->z_mask;
104
- ts_info(ts)->s_mask = ctx->s_mask;
105
}
106
}
107
}
108
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
109
* The passed s_mask may be augmented by z_mask.
110
*/
111
static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
112
- uint64_t z_mask, uint64_t s_mask)
113
+ uint64_t z_mask, int64_t s_mask)
114
{
115
const TCGOpDef *def = &tcg_op_defs[op->opc];
116
TCGTemp *ts;
117
TempOptInfo *ti;
118
+ int rep;
119
120
/* Only single-output opcodes are supported here. */
121
tcg_debug_assert(def->nb_oargs == 1);
122
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
123
*/
124
if (ctx->type == TCG_TYPE_I32) {
125
z_mask = (int32_t)z_mask;
126
- s_mask |= MAKE_64BIT_MASK(32, 32);
127
+ s_mask |= INT32_MIN;
128
}
129
130
if (z_mask == 0) {
131
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
132
133
ti = ts_info(ts);
134
ti->z_mask = z_mask;
135
- ti->s_mask = s_mask | smask_from_zmask(z_mask);
136
+
137
+ /* Canonicalize s_mask and incorporate data from z_mask. */
138
+ rep = clz64(~s_mask);
139
+ rep = MAX(rep, clz64(z_mask));
140
+ rep = MAX(rep - 1, 0);
141
+ ti->s_mask = INT64_MIN >> rep;
142
+
143
return true;
144
}
145
146
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
147
148
ctx->z_mask = z_mask;
149
ctx->s_mask = s_mask;
150
- if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
151
+ if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
152
return true;
153
}
154
155
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
156
s_mask |= MAKE_64BIT_MASK(len, 64 - len);
157
ctx->s_mask = s_mask;
158
159
- if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
160
+ if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
161
return true;
162
}
163
164
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
165
ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
166
167
s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
168
- ctx->s_mask = smask_from_smask(s_mask);
169
170
return fold_masks(ctx, op);
171
}
172
--
173
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Sink mask computation below fold_affected_mask early exit.
3
1
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 30 ++++++++++++++++--------------
8
1 file changed, 16 insertions(+), 14 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_add2(OptContext *ctx, TCGOp *op)
15
16
static bool fold_and(OptContext *ctx, TCGOp *op)
17
{
18
- uint64_t z1, z2;
19
+ uint64_t z1, z2, z_mask, s_mask;
20
+ TempOptInfo *t1, *t2;
21
22
if (fold_const2_commutative(ctx, op) ||
23
fold_xi_to_i(ctx, op, 0) ||
24
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
25
return true;
26
}
27
28
- z1 = arg_info(op->args[1])->z_mask;
29
- z2 = arg_info(op->args[2])->z_mask;
30
- ctx->z_mask = z1 & z2;
31
-
32
- /*
33
- * Sign repetitions are perforce all identical, whether they are 1 or 0.
34
- * Bitwise operations preserve the relative quantity of the repetitions.
35
- */
36
- ctx->s_mask = arg_info(op->args[1])->s_mask
37
- & arg_info(op->args[2])->s_mask;
38
+ t1 = arg_info(op->args[1]);
39
+ t2 = arg_info(op->args[2]);
40
+ z1 = t1->z_mask;
41
+ z2 = t2->z_mask;
42
43
/*
44
* Known-zeros does not imply known-ones. Therefore unless
45
* arg2 is constant, we can't infer affected bits from it.
46
*/
47
- if (arg_is_const(op->args[2]) &&
48
- fold_affected_mask(ctx, op, z1 & ~z2)) {
49
+ if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) {
50
return true;
51
}
52
53
- return fold_masks(ctx, op);
54
+ z_mask = z1 & z2;
55
+
56
+ /*
57
+ * Sign repetitions are perforce all identical, whether they are 1 or 0.
58
+ * Bitwise operations preserve the relative quantity of the repetitions.
59
+ */
60
+ s_mask = t1->s_mask & t2->s_mask;
61
+
62
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
63
}
64
65
static bool fold_andc(OptContext *ctx, TCGOp *op)
66
--
67
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Avoid double inversion of the value of second const operand.
3
1
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 21 +++++++++++----------
8
1 file changed, 11 insertions(+), 10 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
15
16
static bool fold_andc(OptContext *ctx, TCGOp *op)
17
{
18
- uint64_t z1;
19
+ uint64_t z_mask, s_mask;
20
+ TempOptInfo *t1, *t2;
21
22
if (fold_const2(ctx, op) ||
23
fold_xx_to_i(ctx, op, 0) ||
24
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
25
return true;
26
}
27
28
- z1 = arg_info(op->args[1])->z_mask;
29
+ t1 = arg_info(op->args[1]);
30
+ t2 = arg_info(op->args[2]);
31
+ z_mask = t1->z_mask;
32
33
/*
34
* Known-zeros does not imply known-ones. Therefore unless
35
* arg2 is constant, we can't infer anything from it.
36
*/
37
- if (arg_is_const(op->args[2])) {
38
- uint64_t z2 = ~arg_info(op->args[2])->z_mask;
39
- if (fold_affected_mask(ctx, op, z1 & ~z2)) {
40
+ if (ti_is_const(t2)) {
41
+ uint64_t v2 = ti_const_val(t2);
42
+ if (fold_affected_mask(ctx, op, z_mask & v2)) {
43
return true;
44
}
45
- z1 &= z2;
46
+ z_mask &= ~v2;
47
}
48
- ctx->z_mask = z1;
49
50
- ctx->s_mask = arg_info(op->args[1])->s_mask
51
- & arg_info(op->args[2])->s_mask;
52
- return fold_masks(ctx, op);
53
+ s_mask = t1->s_mask & t2->s_mask;
54
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
55
}
56
57
static bool fold_brcond(OptContext *ctx, TCGOp *op)
58
--
59
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Always set s_mask along the BSWAP_OS path, since the result is
3
being explicitly sign-extended.
4
1
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 21 ++++++++++-----------
9
1 file changed, 10 insertions(+), 11 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
16
static bool fold_bswap(OptContext *ctx, TCGOp *op)
17
{
18
uint64_t z_mask, s_mask, sign;
19
+ TempOptInfo *t1 = arg_info(op->args[1]);
20
21
- if (arg_is_const(op->args[1])) {
22
- uint64_t t = arg_info(op->args[1])->val;
23
-
24
- t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
25
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
26
+ if (ti_is_const(t1)) {
27
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
28
+ do_constant_folding(op->opc, ctx->type,
29
+ ti_const_val(t1),
30
+ op->args[2]));
31
}
32
33
- z_mask = arg_info(op->args[1])->z_mask;
34
-
35
+ z_mask = t1->z_mask;
36
switch (op->opc) {
37
case INDEX_op_bswap16_i32:
38
case INDEX_op_bswap16_i64:
39
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
40
/* If the sign bit may be 1, force all the bits above to 1. */
41
if (z_mask & sign) {
42
z_mask |= sign;
43
- s_mask = sign << 1;
44
}
45
+ /* The value and therefore s_mask is explicitly sign-extended. */
46
+ s_mask = sign;
47
break;
48
default:
49
/* The high bits are undefined: force all bits above the sign to 1. */
50
z_mask |= sign << 1;
51
break;
52
}
53
- ctx->z_mask = z_mask;
54
- ctx->s_mask = s_mask;
55
56
- return fold_masks(ctx, op);
57
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
58
}
59
60
static bool fold_call(OptContext *ctx, TCGOp *op)
61
--
62
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Compute s_mask from the union of the maximum count and the
3
op2 fallback for op1 being zero.
4
1
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 15 ++++++++++-----
9
1 file changed, 10 insertions(+), 5 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
16
17
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
18
{
19
- uint64_t z_mask;
20
+ uint64_t z_mask, s_mask;
21
+ TempOptInfo *t1 = arg_info(op->args[1]);
22
+ TempOptInfo *t2 = arg_info(op->args[2]);
23
24
- if (arg_is_const(op->args[1])) {
25
- uint64_t t = arg_info(op->args[1])->val;
26
+ if (ti_is_const(t1)) {
27
+ uint64_t t = ti_const_val(t1);
28
29
if (t != 0) {
30
t = do_constant_folding(op->opc, ctx->type, t, 0);
31
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
32
default:
33
g_assert_not_reached();
34
}
35
- ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
36
- return false;
37
+ s_mask = ~z_mask;
38
+ z_mask |= t2->z_mask;
39
+ s_mask &= t2->s_mask;
40
+
41
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
42
}
43
44
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
45
--
46
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
When we fold to and, use fold_and.
3
1
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 35 +++++++++++++++++------------------
8
1 file changed, 17 insertions(+), 18 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
15
16
static bool fold_deposit(OptContext *ctx, TCGOp *op)
17
{
18
+ TempOptInfo *t1 = arg_info(op->args[1]);
19
+ TempOptInfo *t2 = arg_info(op->args[2]);
20
+ int ofs = op->args[3];
21
+ int len = op->args[4];
22
TCGOpcode and_opc;
23
+ uint64_t z_mask;
24
25
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
26
- uint64_t t1 = arg_info(op->args[1])->val;
27
- uint64_t t2 = arg_info(op->args[2])->val;
28
-
29
- t1 = deposit64(t1, op->args[3], op->args[4], t2);
30
- return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
31
+ if (ti_is_const(t1) && ti_is_const(t2)) {
32
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
33
+ deposit64(ti_const_val(t1), ofs, len,
34
+ ti_const_val(t2)));
35
}
36
37
switch (ctx->type) {
38
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
39
}
40
41
/* Inserting a value into zero at offset 0. */
42
- if (arg_is_const_val(op->args[1], 0) && op->args[3] == 0) {
43
- uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
44
+ if (ti_is_const_val(t1, 0) && ofs == 0) {
45
+ uint64_t mask = MAKE_64BIT_MASK(0, len);
46
47
op->opc = and_opc;
48
op->args[1] = op->args[2];
49
op->args[2] = arg_new_constant(ctx, mask);
50
- ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
51
- return false;
52
+ return fold_and(ctx, op);
53
}
54
55
/* Inserting zero into a value. */
56
- if (arg_is_const_val(op->args[2], 0)) {
57
- uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
58
+ if (ti_is_const_val(t2, 0)) {
59
+ uint64_t mask = deposit64(-1, ofs, len, 0);
60
61
op->opc = and_opc;
62
op->args[2] = arg_new_constant(ctx, mask);
63
- ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
64
- return false;
65
+ return fold_and(ctx, op);
66
}
67
68
- ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
69
- op->args[3], op->args[4],
70
- arg_info(op->args[2])->z_mask);
71
- return false;
72
+ z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
73
+ return fold_masks_z(ctx, op, z_mask);
74
}
75
76
static bool fold_divide(OptContext *ctx, TCGOp *op)
77
--
78
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
6
1
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
12
fold_xi_to_x(ctx, op, 1)) {
13
return true;
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_dup(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 4 ++--
5
1 file changed, 2 insertions(+), 2 deletions(-)
6
1
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_dup(OptContext *ctx, TCGOp *op)
12
t = dup_const(TCGOP_VECE(op), t);
13
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_dup2(OptContext *ctx, TCGOp *op)
20
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
21
op->opc = INDEX_op_dup_vec;
22
TCGOP_VECE(op) = MO_32;
23
}
24
- return false;
25
+ return finish_folding(ctx, op);
26
}
27
28
static bool fold_eqv(OptContext *ctx, TCGOp *op)
29
--
30
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Add fold_masks_s as a trivial wrapper around fold_masks_zs.
2
Avoid the use of the OptContext slots.
3
1
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 13 ++++++++++---
8
1 file changed, 10 insertions(+), 3 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
15
return fold_masks_zs(ctx, op, z_mask, 0);
16
}
17
18
+static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
19
+{
20
+ return fold_masks_zs(ctx, op, -1, s_mask);
21
+}
22
+
23
static bool fold_masks(OptContext *ctx, TCGOp *op)
24
{
25
return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
26
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
27
28
static bool fold_eqv(OptContext *ctx, TCGOp *op)
29
{
30
+ uint64_t s_mask;
31
+
32
if (fold_const2_commutative(ctx, op) ||
33
fold_xi_to_x(ctx, op, -1) ||
34
fold_xi_to_not(ctx, op, 0)) {
35
return true;
36
}
37
38
- ctx->s_mask = arg_info(op->args[1])->s_mask
39
- & arg_info(op->args[2])->s_mask;
40
- return false;
41
+ s_mask = arg_info(op->args[1])->s_mask
42
+ & arg_info(op->args[2])->s_mask;
43
+ return fold_masks_s(ctx, op, s_mask);
44
}
45
46
static bool fold_extract(OptContext *ctx, TCGOp *op)
47
--
48
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
1
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 15 ++++++---------
7
1 file changed, 6 insertions(+), 9 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
14
static bool fold_extract(OptContext *ctx, TCGOp *op)
15
{
16
uint64_t z_mask_old, z_mask;
17
+ TempOptInfo *t1 = arg_info(op->args[1]);
18
int pos = op->args[2];
19
int len = op->args[3];
20
21
- if (arg_is_const(op->args[1])) {
22
- uint64_t t;
23
-
24
- t = arg_info(op->args[1])->val;
25
- t = extract64(t, pos, len);
26
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
27
+ if (ti_is_const(t1)) {
28
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
29
+ extract64(ti_const_val(t1), pos, len));
30
}
31
32
- z_mask_old = arg_info(op->args[1])->z_mask;
33
+ z_mask_old = t1->z_mask;
34
z_mask = extract64(z_mask_old, pos, len);
35
if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
36
return true;
37
}
38
- ctx->z_mask = z_mask;
39
40
- return fold_masks(ctx, op);
41
+ return fold_masks_z(ctx, op, z_mask);
42
}
43
44
static bool fold_extract2(OptContext *ctx, TCGOp *op)
45
--
46
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
6
1
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
12
}
13
return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_exts(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots.
2
1
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 4 ++--
7
1 file changed, 2 insertions(+), 2 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
14
g_assert_not_reached();
15
}
16
17
- ctx->z_mask = z_mask;
18
if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
19
return true;
20
}
21
- return fold_masks(ctx, op);
22
+
23
+ return fold_masks_z(ctx, op, z_mask);
24
}
25
26
static bool fold_mb(OptContext *ctx, TCGOp *op)
27
--
28
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
1
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 19 +++++++++++--------
7
1 file changed, 11 insertions(+), 8 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
14
15
static bool fold_movcond(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t z_mask, s_mask;
18
+ TempOptInfo *tt, *ft;
19
int i;
20
21
/* If true and false values are the same, eliminate the cmp. */
22
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
23
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
24
}
25
26
- ctx->z_mask = arg_info(op->args[3])->z_mask
27
- | arg_info(op->args[4])->z_mask;
28
- ctx->s_mask = arg_info(op->args[3])->s_mask
29
- & arg_info(op->args[4])->s_mask;
30
+ tt = arg_info(op->args[3]);
31
+ ft = arg_info(op->args[4]);
32
+ z_mask = tt->z_mask | ft->z_mask;
33
+ s_mask = tt->s_mask & ft->s_mask;
34
35
- if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
36
- uint64_t tv = arg_info(op->args[3])->val;
37
- uint64_t fv = arg_info(op->args[4])->val;
38
+ if (ti_is_const(tt) && ti_is_const(ft)) {
39
+ uint64_t tv = ti_const_val(tt);
40
+ uint64_t fv = ti_const_val(ft);
41
TCGOpcode opc, negopc = 0;
42
TCGCond cond = op->args[5];
43
44
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
45
}
46
}
47
}
48
- return false;
49
+
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
51
}
52
53
static bool fold_mul(OptContext *ctx, TCGOp *op)
54
--
55
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots.
2
1
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 8 +++++---
7
1 file changed, 5 insertions(+), 3 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
14
15
static bool fold_nand(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t s_mask;
18
+
19
if (fold_const2_commutative(ctx, op) ||
20
fold_xi_to_not(ctx, op, -1)) {
21
return true;
22
}
23
24
- ctx->s_mask = arg_info(op->args[1])->s_mask
25
- & arg_info(op->args[2])->s_mask;
26
- return false;
27
+ s_mask = arg_info(op->args[1])->s_mask
28
+ & arg_info(op->args[2])->s_mask;
29
+ return fold_masks_s(ctx, op, s_mask);
30
}
31
32
static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
33
--
34
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots.
2
1
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 9 ++-------
7
1 file changed, 2 insertions(+), 7 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
14
{
15
/* Set to 1 all bits to the left of the rightmost. */
16
uint64_t z_mask = arg_info(op->args[1])->z_mask;
17
- ctx->z_mask = -(z_mask & -z_mask);
18
+ z_mask = -(z_mask & -z_mask);
19
20
- /*
21
- * Because of fold_sub_to_neg, we want to always return true,
22
- * via finish_folding.
23
- */
24
- finish_folding(ctx, op);
25
- return true;
26
+ return fold_masks_z(ctx, op, z_mask);
27
}
28
29
static bool fold_neg(OptContext *ctx, TCGOp *op)
30
--
31
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots.
2
1
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 8 +++++---
7
1 file changed, 5 insertions(+), 3 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
14
15
static bool fold_nor(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t s_mask;
18
+
19
if (fold_const2_commutative(ctx, op) ||
20
fold_xi_to_not(ctx, op, 0)) {
21
return true;
22
}
23
24
- ctx->s_mask = arg_info(op->args[1])->s_mask
25
- & arg_info(op->args[2])->s_mask;
26
- return false;
27
+ s_mask = arg_info(op->args[1])->s_mask
28
+ & arg_info(op->args[2])->s_mask;
29
+ return fold_masks_s(ctx, op, s_mask);
30
}
31
32
static bool fold_not(OptContext *ctx, TCGOp *op)
33
--
34
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots.
2
1
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 7 +------
7
1 file changed, 1 insertion(+), 6 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
14
if (fold_const1(ctx, op)) {
15
return true;
16
}
17
-
18
- ctx->s_mask = arg_info(op->args[1])->s_mask;
19
-
20
- /* Because of fold_to_not, we want to always return true, via finish. */
21
- finish_folding(ctx, op);
22
- return true;
23
+ return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask);
24
}
25
26
static bool fold_or(OptContext *ctx, TCGOp *op)
27
--
28
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
1
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 13 ++++++++-----
7
1 file changed, 8 insertions(+), 5 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
14
15
static bool fold_or(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t z_mask, s_mask;
18
+ TempOptInfo *t1, *t2;
19
+
20
if (fold_const2_commutative(ctx, op) ||
21
fold_xi_to_x(ctx, op, 0) ||
22
fold_xx_to_x(ctx, op)) {
23
return true;
24
}
25
26
- ctx->z_mask = arg_info(op->args[1])->z_mask
27
- | arg_info(op->args[2])->z_mask;
28
- ctx->s_mask = arg_info(op->args[1])->s_mask
29
- & arg_info(op->args[2])->s_mask;
30
- return fold_masks(ctx, op);
31
+ t1 = arg_info(op->args[1]);
32
+ t2 = arg_info(op->args[2]);
33
+ z_mask = t1->z_mask | t2->z_mask;
34
+ s_mask = t1->s_mask & t2->s_mask;
35
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
36
}
37
38
static bool fold_orc(OptContext *ctx, TCGOp *op)
39
--
40
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots.
2
1
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 8 +++++---
7
1 file changed, 5 insertions(+), 3 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
14
15
static bool fold_orc(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t s_mask;
18
+
19
if (fold_const2(ctx, op) ||
20
fold_xx_to_i(ctx, op, -1) ||
21
fold_xi_to_x(ctx, op, -1) ||
22
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
23
return true;
24
}
25
26
- ctx->s_mask = arg_info(op->args[1])->s_mask
27
- & arg_info(op->args[2])->s_mask;
28
- return false;
29
+ s_mask = arg_info(op->args[1])->s_mask
30
+ & arg_info(op->args[2])->s_mask;
31
+ return fold_masks_s(ctx, op, s_mask);
32
}
33
34
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
35
--
36
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
6
1
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
12
fold_xx_to_i(ctx, op, 0)) {
13
return true;
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
20
--
21
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots.
2
1
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 3 +--
7
1 file changed, 1 insertion(+), 2 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
14
fold_setcond_tst_pow2(ctx, op, false);
15
}
16
17
- ctx->z_mask = 1;
18
- return false;
19
+ return fold_masks_z(ctx, op, 1);
20
}
21
22
static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
23
--
24
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots.
2
1
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 3 +--
7
1 file changed, 1 insertion(+), 2 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
14
}
15
16
/* Value is {0,-1} so all bits are repetitions of the sign. */
17
- ctx->s_mask = -1;
18
- return false;
19
+ return fold_masks_s(ctx, op, -1);
20
}
21
22
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
23
--
24
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Avoid the use of the OptContext slots.
2
1
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 3 +--
7
1 file changed, 1 insertion(+), 2 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
14
return fold_setcond(ctx, op);
15
}
16
17
- ctx->z_mask = 1;
18
- return false;
19
+ return fold_masks_z(ctx, op, 1);
20
21
do_setcond_const:
22
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
23
--
24
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
6
1
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
12
if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
13
op->args[3] = tcg_swap_cond(op->args[3]);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
6
1
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
12
if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
13
op->args[5] = tcg_invert_cond(op->args[5]);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_sextract(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
Deleted patch
1
All mask setting is now done with parameters via fold_masks_*.
2
1
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 13 -------------
7
1 file changed, 13 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
14
QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
15
16
/* In flight values from optimization. */
17
- uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
18
- uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
19
TCGType type;
20
} OptContext;
21
22
@@ -XXX,XX +XXX,XX @@ static bool finish_folding(OptContext *ctx, TCGOp *op)
23
for (i = 0; i < nb_oargs; i++) {
24
TCGTemp *ts = arg_temp(op->args[i]);
25
reset_ts(ctx, ts);
26
- /*
27
- * Save the corresponding known-zero/sign bits mask for the
28
- * first output argument (only one supported so far).
29
- */
30
- if (i == 0) {
31
- ts_info(ts)->z_mask = ctx->z_mask;
32
- }
33
}
34
return true;
35
}
36
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
37
ctx.type = TCG_TYPE_I32;
38
}
39
40
- /* Assume all bits affected, no bits known zero, no sign reps. */
41
- ctx.z_mask = -1;
42
- ctx.s_mask = 0;
43
-
44
/*
45
* Process each opcode.
46
* Sorted alphabetically by opcode as much as possible.
47
--
48
2.43.0
diff view generated by jsdifflib
1
Use the scalbn interface instead of float_muladd_halve_result.
1
While the old type was correct in the ideal sense, some ABIs require
2
the argument to be zero-extended. Using uint32_t for all such values
3
is a decent compromise.
2
4
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
7
---
6
target/arm/tcg/helper-a64.c | 6 +++---
8
include/tcg/tcg-ldst.h | 10 +++++++---
7
1 file changed, 3 insertions(+), 3 deletions(-)
9
accel/tcg/cputlb.c | 6 +++---
10
2 files changed, 10 insertions(+), 6 deletions(-)
8
11
9
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
12
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
10
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
11
--- a/target/arm/tcg/helper-a64.c
14
--- a/include/tcg/tcg-ldst.h
12
+++ b/target/arm/tcg/helper-a64.c
15
+++ b/include/tcg/tcg-ldst.h
13
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
16
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
14
(float16_is_infinity(b) && float16_is_zero(a))) {
17
tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
15
return float16_one_point_five;
18
MemOpIdx oi, uintptr_t retaddr);
16
}
19
17
- return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
20
-void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
18
+ return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst);
21
+/*
22
+ * Value extended to at least uint32_t, so that some ABIs do not require
23
+ * zero-extension from uint8_t or uint16_t.
24
+ */
25
+void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
26
MemOpIdx oi, uintptr_t retaddr);
27
-void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
28
+void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
29
MemOpIdx oi, uintptr_t retaddr);
30
void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
31
MemOpIdx oi, uintptr_t retaddr);
32
void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
33
MemOpIdx oi, uintptr_t retaddr);
34
-void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
35
+void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
36
MemOpIdx oi, uintptr_t retaddr);
37
void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
38
MemOpIdx oi, uintptr_t retaddr);
39
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/accel/tcg/cputlb.c
42
+++ b/accel/tcg/cputlb.c
43
@@ -XXX,XX +XXX,XX @@ full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
44
store_helper(env, addr, val, oi, retaddr, MO_UB);
19
}
45
}
20
46
21
float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
47
-void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
22
@@ -XXX,XX +XXX,XX @@ float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
48
+void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
23
(float32_is_infinity(b) && float32_is_zero(a))) {
49
MemOpIdx oi, uintptr_t retaddr)
24
return float32_one_point_five;
50
{
25
}
51
full_stb_mmu(env, addr, val, oi, retaddr);
26
- return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
52
@@ -XXX,XX +XXX,XX @@ static void full_le_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
27
+ return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst);
53
store_helper(env, addr, val, oi, retaddr, MO_LEUW);
28
}
54
}
29
55
30
float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
56
-void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
31
@@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
57
+void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
32
(float64_is_infinity(b) && float64_is_zero(a))) {
58
MemOpIdx oi, uintptr_t retaddr)
33
return float64_one_point_five;
59
{
34
}
60
full_le_stw_mmu(env, addr, val, oi, retaddr);
35
- return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
61
@@ -XXX,XX +XXX,XX @@ static void full_be_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
36
+ return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst);
62
store_helper(env, addr, val, oi, retaddr, MO_BEUW);
37
}
63
}
38
64
39
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
65
-void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
66
+void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
67
MemOpIdx oi, uintptr_t retaddr)
68
{
69
full_be_stw_mmu(env, addr, val, oi, retaddr);
40
--
70
--
41
2.43.0
71
2.34.1
42
72
43
73
diff view generated by jsdifflib