1
The following changes since commit 13d5f87cc3b94bfccc501142df4a7b12fee3a6e7:
1
The following changes since commit aa3a285b5bc56a4208b3b57d4a55291e9c260107:
2
2
3
Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-axp-20210628' into staging (2021-06-29 10:02:42 +0100)
3
Merge tag 'mem-2024-12-21' of https://github.com/davidhildenbrand/qemu into staging (2024-12-22 14:33:27 -0500)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20210629
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20241224
8
8
9
for you to fetch changes up to c86bd2dc4c1d37653c27293b2dacee6bb46bb995:
9
for you to fetch changes up to e4a8e093dc74be049f4829831dce76e5edab0003:
10
10
11
tcg/riscv: Remove MO_BSWAP handling (2021-06-29 10:04:57 -0700)
11
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core (2024-12-24 08:32:15 -0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
TranslatorOps conversion for target/avr
14
tcg/optimize: Remove in-flight mask data from OptContext
15
TranslatorOps conversion for target/cris
15
fpu: Add float*_muladd_scalbn
16
TranslatorOps conversion for target/nios2
16
fpu: Remove float_muladd_halve_result
17
Simple vector operations on TCGv_i32
17
fpu: Add float_round_nearest_even_max
18
Host signal fixes for *BSD
18
fpu: Add float_muladd_suppress_add_product_zero
19
Improvements to tcg bswap operations
19
target/hexagon: Use float32_muladd
20
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core
20
21
21
----------------------------------------------------------------
22
----------------------------------------------------------------
22
LIU Zhiwei (5):
23
Ilya Leoshkevich (1):
23
tcg: Add tcg_gen_vec_add{sub}16_i32
24
tests/tcg: Do not use inttypes.h in multiarch/system/memory.c
24
tcg: Add tcg_gen_vec_add{sub}8_i32
25
tcg: Add tcg_gen_vec_shl{shr}{sar}16i_i32
26
tcg: Add tcg_gen_vec_shl{shr}{sar}8i_i32
27
tcg: Implement tcg_gen_vec_add{sub}32_tl
28
25
29
Richard Henderson (57):
26
Pierrick Bouvier (1):
30
target/nios2: Replace DISAS_TB_JUMP with DISAS_NORETURN
27
plugins: optimize cpu_index code generation
31
target/nios2: Use global cpu_env
32
target/nios2: Use global cpu_R
33
target/nios2: Add DisasContextBase to DisasContext
34
target/nios2: Convert to TranslatorOps
35
target/nios2: Remove assignment to env in handle_instruction
36
target/nios2: Clean up goto in handle_instruction
37
target/nios2: Inline handle_instruction
38
target/nios2: Use pc_next for pc + 4
39
target/avr: Add DisasContextBase to DisasContext
40
target/avr: Change ctx to DisasContext* in gen_intermediate_code
41
target/avr: Convert to TranslatorOps
42
target/cris: Add DisasContextBase to DisasContext
43
target/cris: Remove DISAS_SWI
44
target/cris: Replace DISAS_TB_JUMP with DISAS_NORETURN
45
target/cris: Mark exceptions as DISAS_NORETURN
46
target/cris: Fix use_goto_tb
47
target/cris: Convert to TranslatorOps
48
target/cris: Mark helper_raise_exception noreturn
49
target/cris: Mark static arrays const
50
target/cris: Fold unhandled X_FLAG changes into cpustate_changed
51
target/cris: Set cpustate_changed for rfe/rfn
52
target/cris: Add DISAS_UPDATE_NEXT
53
target/cris: Add DISAS_DBRANCH
54
target/cris: Use tcg_gen_lookup_and_goto_ptr
55
target/cris: Improve JMP_INDIRECT
56
target/cris: Remove dc->flagx_known
57
target/cris: Do not exit tb for X_FLAG changes
58
tcg: Add flags argument to bswap opcodes
59
tcg/i386: Support bswap flags
60
tcg/aarch64: Merge tcg_out_rev{16,32,64}
61
tcg/aarch64: Support bswap flags
62
tcg/arm: Support bswap flags
63
tcg/ppc: Split out tcg_out_ext{8,16,32}s
64
tcg/ppc: Split out tcg_out_sari{32,64}
65
tcg/ppc: Split out tcg_out_bswap16
66
tcg/ppc: Split out tcg_out_bswap32
67
tcg/ppc: Split out tcg_out_bswap64
68
tcg/ppc: Support bswap flags
69
tcg/ppc: Use power10 byte-reverse instructions
70
tcg/s390: Support bswap flags
71
tcg/mips: Support bswap flags in tcg_out_bswap16
72
tcg/mips: Support bswap flags in tcg_out_bswap32
73
tcg/tci: Support bswap flags
74
tcg: Handle new bswap flags during optimize
75
tcg: Add flags argument to tcg_gen_bswap16_*, tcg_gen_bswap32_i64
76
tcg: Make use of bswap flags in tcg_gen_qemu_ld_*
77
tcg: Make use of bswap flags in tcg_gen_qemu_st_*
78
target/arm: Improve REV32
79
target/arm: Improve vector REV
80
target/arm: Improve REVSH
81
target/i386: Improve bswap translation
82
target/sh4: Improve swap.b translation
83
target/mips: Fix gen_mxu_s32ldd_s32lddr
84
tcg/arm: Unset TCG_TARGET_HAS_MEMORY_BSWAP
85
tcg/aarch64: Unset TCG_TARGET_HAS_MEMORY_BSWAP
86
tcg/riscv: Remove MO_BSWAP handling
87
28
88
Warner Losh (1):
29
Richard Henderson (70):
89
tcg: Use correct trap number for page faults on *BSD systems
30
tcg/optimize: Split out finish_bb, finish_ebb
31
tcg/optimize: Split out fold_affected_mask
32
tcg/optimize: Copy mask writeback to fold_masks
33
tcg/optimize: Split out fold_masks_zs
34
tcg/optimize: Augment s_mask from z_mask in fold_masks_zs
35
tcg/optimize: Change representation of s_mask
36
tcg/optimize: Use finish_folding in fold_add, fold_add_vec, fold_addsub2
37
tcg/optimize: Introduce const value accessors for TempOptInfo
38
tcg/optimize: Use fold_masks_zs in fold_and
39
tcg/optimize: Use fold_masks_zs in fold_andc
40
tcg/optimize: Use fold_masks_zs in fold_bswap
41
tcg/optimize: Use fold_masks_zs in fold_count_zeros
42
tcg/optimize: Use fold_masks_z in fold_ctpop
43
tcg/optimize: Use fold_and and fold_masks_z in fold_deposit
44
tcg/optimize: Compute sign mask in fold_deposit
45
tcg/optimize: Use finish_folding in fold_divide
46
tcg/optimize: Use finish_folding in fold_dup, fold_dup2
47
tcg/optimize: Use fold_masks_s in fold_eqv
48
tcg/optimize: Use fold_masks_z in fold_extract
49
tcg/optimize: Use finish_folding in fold_extract2
50
tcg/optimize: Use fold_masks_zs in fold_exts
51
tcg/optimize: Use fold_masks_z in fold_extu
52
tcg/optimize: Use fold_masks_zs in fold_movcond
53
tcg/optimize: Use finish_folding in fold_mul*
54
tcg/optimize: Use fold_masks_s in fold_nand
55
tcg/optimize: Use fold_masks_z in fold_neg_no_const
56
tcg/optimize: Use fold_masks_s in fold_nor
57
tcg/optimize: Use fold_masks_s in fold_not
58
tcg/optimize: Use fold_masks_zs in fold_or
59
tcg/optimize: Use fold_masks_zs in fold_orc
60
tcg/optimize: Use fold_masks_zs in fold_qemu_ld
61
tcg/optimize: Return true from fold_qemu_st, fold_tcg_st
62
tcg/optimize: Use finish_folding in fold_remainder
63
tcg/optimize: Distinguish simplification in fold_setcond_zmask
64
tcg/optimize: Use fold_masks_z in fold_setcond
65
tcg/optimize: Use fold_masks_s in fold_negsetcond
66
tcg/optimize: Use fold_masks_z in fold_setcond2
67
tcg/optimize: Use finish_folding in fold_cmp_vec
68
tcg/optimize: Use finish_folding in fold_cmpsel_vec
69
tcg/optimize: Use fold_masks_zs in fold_sextract
70
tcg/optimize: Use fold_masks_zs, fold_masks_s in fold_shift
71
tcg/optimize: Simplify sign bit test in fold_shift
72
tcg/optimize: Use finish_folding in fold_sub, fold_sub_vec
73
tcg/optimize: Use fold_masks_zs in fold_tcg_ld
74
tcg/optimize: Use finish_folding in fold_tcg_ld_memcopy
75
tcg/optimize: Use fold_masks_zs in fold_xor
76
tcg/optimize: Use finish_folding in fold_bitsel_vec
77
tcg/optimize: Use finish_folding as default in tcg_optimize
78
tcg/optimize: Remove z_mask, s_mask from OptContext
79
tcg/optimize: Re-enable sign-mask optimizations
80
tcg/optimize: Move fold_bitsel_vec into alphabetic sort
81
tcg/optimize: Move fold_cmp_vec, fold_cmpsel_vec into alphabetic sort
82
softfloat: Add float{16,32,64}_muladd_scalbn
83
target/arm: Use float*_muladd_scalbn
84
target/sparc: Use float*_muladd_scalbn
85
softfloat: Remove float_muladd_halve_result
86
softfloat: Add float_round_nearest_even_max
87
softfloat: Add float_muladd_suppress_add_product_zero
88
target/hexagon: Use float32_mul in helper_sfmpy
89
target/hexagon: Use float32_muladd for helper_sffma
90
target/hexagon: Use float32_muladd for helper_sffms
91
target/hexagon: Use float32_muladd_scalbn for helper_sffma_sc
92
target/hexagon: Use float32_muladd for helper_sffm[as]_lib
93
target/hexagon: Remove internal_fmafx
94
target/hexagon: Expand GEN_XF_ROUND
95
target/hexagon: Remove Float
96
target/hexagon: Remove Double
97
target/hexagon: Use mulu64 for int128_mul_6464
98
target/hexagon: Simplify internal_mpyhh setup
99
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core
90
100
91
include/tcg/tcg-op-gvec.h | 43 ++++
101
include/exec/translator.h | 14 -
92
include/tcg/tcg-op.h | 8 +-
102
include/fpu/softfloat-types.h | 2 +
93
include/tcg/tcg-opc.h | 10 +-
103
include/fpu/softfloat.h | 14 +-
94
include/tcg/tcg.h | 12 +
104
include/hw/core/tcg-cpu-ops.h | 13 +
95
target/cris/helper.h | 2 +-
105
target/alpha/cpu.h | 2 +
96
tcg/aarch64/tcg-target.h | 2 +-
106
target/arm/internals.h | 2 +
97
tcg/arm/tcg-target.h | 2 +-
107
target/avr/cpu.h | 2 +
98
accel/tcg/user-exec.c | 20 +-
108
target/hexagon/cpu.h | 2 +
99
target/arm/translate-a64.c | 21 +-
109
target/hexagon/fma_emu.h | 3 -
100
target/arm/translate.c | 4 +-
110
target/hppa/cpu.h | 2 +
101
target/avr/translate.c | 284 ++++++++++++----------
111
target/i386/tcg/helper-tcg.h | 2 +
102
target/cris/translate.c | 515 ++++++++++++++++++++--------------------
112
target/loongarch/internals.h | 2 +
103
target/i386/tcg/translate.c | 14 +-
113
target/m68k/cpu.h | 2 +
104
target/mips/tcg/mxu_translate.c | 6 +-
114
target/microblaze/cpu.h | 2 +
105
target/nios2/translate.c | 318 ++++++++++++-------------
115
target/mips/tcg/tcg-internal.h | 2 +
106
target/s390x/translate.c | 4 +-
116
target/openrisc/cpu.h | 2 +
107
target/sh4/translate.c | 3 +-
117
target/ppc/cpu.h | 2 +
108
tcg/optimize.c | 56 ++++-
118
target/riscv/cpu.h | 3 +
109
tcg/tcg-op-gvec.c | 122 ++++++++++
119
target/rx/cpu.h | 2 +
110
tcg/tcg-op.c | 143 +++++++----
120
target/s390x/s390x-internal.h | 2 +
111
tcg/tcg.c | 28 +++
121
target/sh4/cpu.h | 2 +
112
tcg/tci.c | 3 +-
122
target/sparc/cpu.h | 2 +
113
target/cris/translate_v10.c.inc | 17 +-
123
target/sparc/helper.h | 4 +-
114
tcg/aarch64/tcg-target.c.inc | 125 ++++------
124
target/tricore/cpu.h | 2 +
115
tcg/arm/tcg-target.c.inc | 295 ++++++++++-------------
125
target/xtensa/cpu.h | 2 +
116
tcg/i386/tcg-target.c.inc | 20 +-
126
accel/tcg/cpu-exec.c | 8 +-
117
tcg/mips/tcg-target.c.inc | 102 ++++----
127
accel/tcg/plugin-gen.c | 9 +
118
tcg/ppc/tcg-target.c.inc | 230 ++++++++++++------
128
accel/tcg/translate-all.c | 8 +-
119
tcg/riscv/tcg-target.c.inc | 64 ++---
129
fpu/softfloat.c | 63 +--
120
tcg/s390/tcg-target.c.inc | 34 ++-
130
target/alpha/cpu.c | 1 +
121
tcg/tci/tcg-target.c.inc | 23 +-
131
target/alpha/translate.c | 4 +-
122
tcg/README | 22 +-
132
target/arm/cpu.c | 1 +
123
32 files changed, 1458 insertions(+), 1094 deletions(-)
133
target/arm/tcg/cpu-v7m.c | 1 +
124
134
target/arm/tcg/helper-a64.c | 6 +-
135
target/arm/tcg/translate.c | 5 +-
136
target/avr/cpu.c | 1 +
137
target/avr/translate.c | 6 +-
138
target/hexagon/cpu.c | 1 +
139
target/hexagon/fma_emu.c | 496 ++++++---------------
140
target/hexagon/op_helper.c | 125 ++----
141
target/hexagon/translate.c | 4 +-
142
target/hppa/cpu.c | 1 +
143
target/hppa/translate.c | 4 +-
144
target/i386/tcg/tcg-cpu.c | 1 +
145
target/i386/tcg/translate.c | 5 +-
146
target/loongarch/cpu.c | 1 +
147
target/loongarch/tcg/translate.c | 4 +-
148
target/m68k/cpu.c | 1 +
149
target/m68k/translate.c | 4 +-
150
target/microblaze/cpu.c | 1 +
151
target/microblaze/translate.c | 4 +-
152
target/mips/cpu.c | 1 +
153
target/mips/tcg/translate.c | 4 +-
154
target/openrisc/cpu.c | 1 +
155
target/openrisc/translate.c | 4 +-
156
target/ppc/cpu_init.c | 1 +
157
target/ppc/translate.c | 4 +-
158
target/riscv/tcg/tcg-cpu.c | 1 +
159
target/riscv/translate.c | 4 +-
160
target/rx/cpu.c | 1 +
161
target/rx/translate.c | 4 +-
162
target/s390x/cpu.c | 1 +
163
target/s390x/tcg/translate.c | 4 +-
164
target/sh4/cpu.c | 1 +
165
target/sh4/translate.c | 4 +-
166
target/sparc/cpu.c | 1 +
167
target/sparc/fop_helper.c | 8 +-
168
target/sparc/translate.c | 84 ++--
169
target/tricore/cpu.c | 1 +
170
target/tricore/translate.c | 5 +-
171
target/xtensa/cpu.c | 1 +
172
target/xtensa/translate.c | 4 +-
173
tcg/optimize.c | 857 +++++++++++++++++++-----------------
174
tests/tcg/multiarch/system/memory.c | 9 +-
175
fpu/softfloat-parts.c.inc | 16 +-
176
75 files changed, 866 insertions(+), 1009 deletions(-)
diff view generated by jsdifflib
New patch
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
2
3
make check-tcg fails on Fedora with the following error message:
4
5
alpha-linux-gnu-gcc [...] qemu/tests/tcg/multiarch/system/memory.c -o memory [...]
6
qemu/tests/tcg/multiarch/system/memory.c:17:10: fatal error: inttypes.h: No such file or directory
7
17 | #include <inttypes.h>
8
| ^~~~~~~~~~~~
9
compilation terminated.
10
11
The reason is that Fedora has cross-compilers, but no cross-glibc
12
headers. Fix by hardcoding the format specifiers and dropping the
13
include.
14
15
An alternative fix would be to introduce a configure check for
16
inttypes.h. But this would make it impossible to use Fedora
17
cross-compilers for softmmu tests, which used to work so far.
18
19
Fixes: ecbcc9ead2f8 ("tests/tcg: add a system test to check memory instrumentation")
20
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
21
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
22
Message-ID: <20241010085906.226249-1-iii@linux.ibm.com>
23
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
24
---
25
tests/tcg/multiarch/system/memory.c | 9 ++++-----
26
1 file changed, 4 insertions(+), 5 deletions(-)
27
28
diff --git a/tests/tcg/multiarch/system/memory.c b/tests/tcg/multiarch/system/memory.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tests/tcg/multiarch/system/memory.c
31
+++ b/tests/tcg/multiarch/system/memory.c
32
@@ -XXX,XX +XXX,XX @@
33
34
#include <stdint.h>
35
#include <stdbool.h>
36
-#include <inttypes.h>
37
#include <minilib.h>
38
39
#ifndef CHECK_UNALIGNED
40
@@ -XXX,XX +XXX,XX @@ int main(void)
41
int i;
42
bool ok = true;
43
44
- ml_printf("Test data start: 0x%"PRIxPTR"\n", &test_data[0]);
45
- ml_printf("Test data end: 0x%"PRIxPTR"\n", &test_data[TEST_SIZE]);
46
+ ml_printf("Test data start: 0x%lx\n", (unsigned long)&test_data[0]);
47
+ ml_printf("Test data end: 0x%lx\n", (unsigned long)&test_data[TEST_SIZE]);
48
49
/* Run through the unsigned tests first */
50
for (i = 0; i < ARRAY_SIZE(init_ufns) && ok; i++) {
51
@@ -XXX,XX +XXX,XX @@ int main(void)
52
ok = do_signed_reads(true);
53
}
54
55
- ml_printf("Test data read: %"PRId32"\n", test_read_count);
56
- ml_printf("Test data write: %"PRId32"\n", test_write_count);
57
+ ml_printf("Test data read: %lu\n", (unsigned long)test_read_count);
58
+ ml_printf("Test data write: %lu\n", (unsigned long)test_write_count);
59
ml_printf("Test complete: %s\n", ok ? "PASSED" : "FAILED");
60
return ok ? 0 : -1;
61
}
62
--
63
2.43.0
diff view generated by jsdifflib
New patch
1
From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
1
2
3
When running with a single vcpu, we can return a constant instead of a
4
load when accessing cpu_index.
5
A side effect is that all tcg operations using it are optimized, most
6
notably scoreboard access.
7
When running a simple loop in user-mode, the speedup is around 20%.
8
9
Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-ID: <20241128213843.1023080-1-pierrick.bouvier@linaro.org>
13
---
14
accel/tcg/plugin-gen.c | 9 +++++++++
15
1 file changed, 9 insertions(+)
16
17
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/accel/tcg/plugin-gen.c
20
+++ b/accel/tcg/plugin-gen.c
21
@@ -XXX,XX +XXX,XX @@ static void gen_disable_mem_helper(void)
22
23
static TCGv_i32 gen_cpu_index(void)
24
{
25
+ /*
26
+ * Optimize when we run with a single vcpu. All values using cpu_index,
27
+ * including scoreboard index, will be optimized out.
28
+ * User-mode calls tb_flush when setting this flag. In system-mode, all
29
+ * vcpus are created before generating code.
30
+ */
31
+ if (!tcg_cflags_has(current_cpu, CF_PARALLEL)) {
32
+ return tcg_constant_i32(current_cpu->cpu_index);
33
+ }
34
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
35
tcg_gen_ld_i32(cpu_index, tcg_env,
36
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
37
--
38
2.43.0
diff view generated by jsdifflib
1
We will shortly require sari in other context;
1
Call them directly from the opcode switch statement in tcg_optimize,
2
split out both for cleanliness sake.
2
rather than in finish_folding based on opcode flags. Adjust folding
3
of conditional branches to match.
3
4
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
tcg/ppc/tcg-target.c.inc | 17 +++++++++++++----
8
tcg/optimize.c | 47 +++++++++++++++++++++++++++++++----------------
8
1 file changed, 13 insertions(+), 4 deletions(-)
9
1 file changed, 31 insertions(+), 16 deletions(-)
9
10
10
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.c.inc
13
--- a/tcg/optimize.c
13
+++ b/tcg/ppc/tcg-target.c.inc
14
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
15
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
15
tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
16
}
16
}
17
}
17
18
18
+static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
19
+static void finish_bb(OptContext *ctx)
19
+{
20
+{
20
+ /* Limit immediate shift count lest we create an illegal insn. */
21
+ /* We only optimize memory barriers across basic blocks. */
21
+ tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
22
+ ctx->prev_mb = NULL;
22
+}
23
+}
23
+
24
+
24
static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
25
+static void finish_ebb(OptContext *ctx)
25
{
26
tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
27
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
28
tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
29
}
30
31
+static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
32
+{
26
+{
33
+ tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
27
+ finish_bb(ctx);
28
+ /* We only optimize across extended basic blocks. */
29
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
30
+ remove_mem_copy_all(ctx);
34
+}
31
+}
35
+
32
+
36
/* Emit a move into ret of arg, if it can be done in one insn. */
33
static void finish_folding(OptContext *ctx, TCGOp *op)
37
static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
38
{
34
{
39
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
35
const TCGOpDef *def = &tcg_op_defs[op->opc];
40
break;
36
int i, nb_oargs;
41
case INDEX_op_sar_i32:
37
42
if (const_args[2]) {
38
- /*
43
- /* Limit immediate shift count lest we create an illegal insn. */
39
- * We only optimize extended basic blocks. If the opcode ends a BB
44
- tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2] & 31));
40
- * and is not a conditional branch, reset all temp data.
45
+ tcg_out_sari32(s, args[0], args[1], args[2]);
41
- */
46
} else {
42
- if (def->flags & TCG_OPF_BB_END) {
47
tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
43
- ctx->prev_mb = NULL;
44
- if (!(def->flags & TCG_OPF_COND_BRANCH)) {
45
- memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
46
- remove_mem_copy_all(ctx);
47
- }
48
- return;
49
- }
50
-
51
nb_oargs = def->nb_oargs;
52
for (i = 0; i < nb_oargs; i++) {
53
TCGTemp *ts = arg_temp(op->args[i]);
54
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
55
if (i > 0) {
56
op->opc = INDEX_op_br;
57
op->args[0] = op->args[3];
58
+ finish_ebb(ctx);
59
+ } else {
60
+ finish_bb(ctx);
61
}
62
- return false;
63
+ return true;
64
}
65
66
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
67
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
48
}
68
}
49
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
69
op->opc = INDEX_op_br;
50
break;
70
op->args[0] = label;
51
case INDEX_op_sar_i64:
71
- break;
52
if (const_args[2]) {
72
+ finish_ebb(ctx);
53
- int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1);
73
+ return true;
54
- tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh);
74
}
55
+ tcg_out_sari64(s, args[0], args[1], args[2]);
75
- return false;
56
} else {
76
+
57
tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
77
+ finish_bb(ctx);
78
+ return true;
79
}
80
81
static bool fold_bswap(OptContext *ctx, TCGOp *op)
82
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
83
CASE_OP_32_64_VEC(xor):
84
done = fold_xor(&ctx, op);
85
break;
86
+ case INDEX_op_set_label:
87
+ case INDEX_op_br:
88
+ case INDEX_op_exit_tb:
89
+ case INDEX_op_goto_tb:
90
+ case INDEX_op_goto_ptr:
91
+ finish_ebb(&ctx);
92
+ done = true;
93
+ break;
94
default:
95
break;
58
}
96
}
59
--
97
--
60
2.25.1
98
2.43.0
61
62
diff view generated by jsdifflib
1
Direct assignments to env during translation do not work.
1
There are only a few logical operations which can compute
2
an "affected" mask. Split out handling of this optimization
3
to a separate function, only to be called when applicable.
2
4
3
As it happens, the only way we can get here is if env->pc
5
Remove the a_mask field from OptContext, as the mask is
4
is already set to dc->pc. We will trap on the first insn
6
no longer stored anywhere.
5
we execute anywhere on the page.
6
7
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
---
10
target/nios2/translate.c | 3 ++-
11
tcg/optimize.c | 42 +++++++++++++++++++++++++++---------------
11
1 file changed, 2 insertions(+), 1 deletion(-)
12
1 file changed, 27 insertions(+), 15 deletions(-)
12
13
13
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
15
--- a/target/nios2/translate.c
16
--- a/tcg/optimize.c
16
+++ b/target/nios2/translate.c
17
+++ b/tcg/optimize.c
17
@@ -XXX,XX +XXX,XX @@ static void handle_instruction(DisasContext *dc, CPUNios2State *env)
18
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
18
uint32_t code;
19
QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
19
uint8_t op;
20
20
const Nios2Instruction *instr;
21
/* In flight values from optimization. */
22
- uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
23
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
24
uint64_t s_mask; /* mask of clrsb(value) bits */
25
TCGType type;
26
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
27
28
static bool fold_masks(OptContext *ctx, TCGOp *op)
29
{
30
- uint64_t a_mask = ctx->a_mask;
31
uint64_t z_mask = ctx->z_mask;
32
uint64_t s_mask = ctx->s_mask;
33
34
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
35
* type changing opcodes.
36
*/
37
if (ctx->type == TCG_TYPE_I32) {
38
- a_mask = (int32_t)a_mask;
39
z_mask = (int32_t)z_mask;
40
s_mask |= MAKE_64BIT_MASK(32, 32);
41
ctx->z_mask = z_mask;
42
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
43
if (z_mask == 0) {
44
return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
45
}
46
+ return false;
47
+}
21
+
48
+
22
#if defined(CONFIG_USER_ONLY)
49
+/*
23
/* FIXME: Is this needed ? */
50
+ * An "affected" mask bit is 0 if and only if the result is identical
24
if (dc->pc >= 0x1000 && dc->pc < 0x2000) {
51
+ * to the first input. Thus if the entire mask is 0, the operation
25
- env->regs[R_PC] = dc->pc;
52
+ * is equivalent to a copy.
26
t_gen_helper_raise_exception(dc, 0xaa);
53
+ */
27
return;
54
+static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask)
55
+{
56
+ if (ctx->type == TCG_TYPE_I32) {
57
+ a_mask = (uint32_t)a_mask;
58
+ }
59
if (a_mask == 0) {
60
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
28
}
61
}
29
#endif
62
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
30
+
63
* Known-zeros does not imply known-ones. Therefore unless
31
code = cpu_ldl_code(env, dc->pc);
64
* arg2 is constant, we can't infer affected bits from it.
32
op = get_opcode(code);
65
*/
66
- if (arg_is_const(op->args[2])) {
67
- ctx->a_mask = z1 & ~z2;
68
+ if (arg_is_const(op->args[2]) &&
69
+ fold_affected_mask(ctx, op, z1 & ~z2)) {
70
+ return true;
71
}
72
73
return fold_masks(ctx, op);
74
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
75
*/
76
if (arg_is_const(op->args[2])) {
77
uint64_t z2 = ~arg_info(op->args[2])->z_mask;
78
- ctx->a_mask = z1 & ~z2;
79
+ if (fold_affected_mask(ctx, op, z1 & ~z2)) {
80
+ return true;
81
+ }
82
z1 &= z2;
83
}
84
ctx->z_mask = z1;
85
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
86
87
z_mask_old = arg_info(op->args[1])->z_mask;
88
z_mask = extract64(z_mask_old, pos, len);
89
- if (pos == 0) {
90
- ctx->a_mask = z_mask_old ^ z_mask;
91
+ if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
92
+ return true;
93
}
94
ctx->z_mask = z_mask;
95
ctx->s_mask = smask_from_zmask(z_mask);
96
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
97
98
ctx->z_mask = z_mask;
99
ctx->s_mask = s_mask;
100
- if (!type_change) {
101
- ctx->a_mask = s_mask & ~s_mask_old;
102
+ if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
103
+ return true;
104
}
105
106
return fold_masks(ctx, op);
107
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
108
109
ctx->z_mask = z_mask;
110
ctx->s_mask = smask_from_zmask(z_mask);
111
- if (!type_change) {
112
- ctx->a_mask = z_mask_old ^ z_mask;
113
+ if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
114
+ return true;
115
}
116
return fold_masks(ctx, op);
117
}
118
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
119
s_mask |= MAKE_64BIT_MASK(len, 64 - len);
120
ctx->s_mask = s_mask;
121
122
- if (pos == 0) {
123
- ctx->a_mask = s_mask & ~s_mask_old;
124
+ if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
125
+ return true;
126
}
127
128
return fold_masks(ctx, op);
129
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
130
}
131
132
/* Assume all bits affected, no bits known zero, no sign reps. */
133
- ctx.a_mask = -1;
134
ctx.z_mask = -1;
135
ctx.s_mask = 0;
33
136
34
--
137
--
35
2.25.1
138
2.43.0
36
37
diff view generated by jsdifflib
New patch
1
Use of fold_masks should be restricted to those opcodes that
2
can reliably make use of it -- those with a single output,
3
and from higher-level folders that set up the masks.
4
Prepare for conversion of each folder in turn.
1
5
6
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/optimize.c | 17 ++++++++++++++---
10
1 file changed, 14 insertions(+), 3 deletions(-)
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
17
{
18
uint64_t z_mask = ctx->z_mask;
19
uint64_t s_mask = ctx->s_mask;
20
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
21
+ TCGTemp *ts;
22
+ TempOptInfo *ti;
23
+
24
+ /* Only single-output opcodes are supported here. */
25
+ tcg_debug_assert(def->nb_oargs == 1);
26
27
/*
28
* 32-bit ops generate 32-bit results, which for the purpose of
29
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
30
if (ctx->type == TCG_TYPE_I32) {
31
z_mask = (int32_t)z_mask;
32
s_mask |= MAKE_64BIT_MASK(32, 32);
33
- ctx->z_mask = z_mask;
34
- ctx->s_mask = s_mask;
35
}
36
37
if (z_mask == 0) {
38
return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
39
}
40
- return false;
41
+
42
+ ts = arg_temp(op->args[0]);
43
+ reset_ts(ctx, ts);
44
+
45
+ ti = ts_info(ts);
46
+ ti->z_mask = z_mask;
47
+ ti->s_mask = s_mask;
48
+ return true;
49
}
50
51
/*
52
--
53
2.43.0
diff view generated by jsdifflib
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
1
Add a routine to which masks can be passed directly, rather than
2
storing them into OptContext. To be used in upcoming patches.
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
6
---
4
tcg/ppc/tcg-target.c.inc | 34 ++++++++++++++++++++++++++++++++++
7
tcg/optimize.c | 15 ++++++++++++---
5
1 file changed, 34 insertions(+)
8
1 file changed, 12 insertions(+), 3 deletions(-)
6
9
7
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/ppc/tcg-target.c.inc
12
--- a/tcg/optimize.c
10
+++ b/tcg/ppc/tcg-target.c.inc
13
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
12
#define SRAD XO31(794)
15
return fold_const2(ctx, op);
13
#define SRADI XO31(413<<1)
14
15
+#define BRH XO31(219)
16
+#define BRW XO31(155)
17
+#define BRD XO31(187)
18
+
19
#define TW XO31( 4)
20
#define TRAP (TW | TO(31))
21
22
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_ext16s(TCGContext *s, TCGReg dst, TCGReg src)
23
tcg_out32(s, EXTSH | RA(dst) | RS(src));
24
}
16
}
25
17
26
+static inline void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
18
-static bool fold_masks(OptContext *ctx, TCGOp *op)
19
+/*
20
+ * Record "zero" and "sign" masks for the single output of @op.
21
+ * See TempOptInfo definition of z_mask and s_mask.
22
+ * If z_mask allows, fold the output to constant zero.
23
+ */
24
+static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
25
+ uint64_t z_mask, uint64_t s_mask)
26
{
27
- uint64_t z_mask = ctx->z_mask;
28
- uint64_t s_mask = ctx->s_mask;
29
const TCGOpDef *def = &tcg_op_defs[op->opc];
30
TCGTemp *ts;
31
TempOptInfo *ti;
32
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
33
return true;
34
}
35
36
+static bool fold_masks(OptContext *ctx, TCGOp *op)
27
+{
37
+{
28
+ tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
38
+ return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
29
+}
39
+}
30
+
40
+
31
static inline void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
41
/*
32
{
42
* An "affected" mask bit is 0 if and only if the result is identical
33
tcg_out32(s, EXTSW | RA(dst) | RS(src));
43
* to the first input. Thus if the entire mask is 0, the operation
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
35
{
36
TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
37
38
+ if (have_isa_3_10) {
39
+ tcg_out32(s, BRH | RA(dst) | RS(src));
40
+ if (flags & TCG_BSWAP_OS) {
41
+ tcg_out_ext16s(s, dst, dst);
42
+ } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
43
+ tcg_out_ext16u(s, dst, dst);
44
+ }
45
+ return;
46
+ }
47
+
48
/*
49
* In the following,
50
* dep(a, b, m) -> (a & ~m) | (b & m)
51
@@ -XXX,XX +XXX,XX @@ static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
52
{
53
TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
54
55
+ if (have_isa_3_10) {
56
+ tcg_out32(s, BRW | RA(dst) | RS(src));
57
+ if (flags & TCG_BSWAP_OS) {
58
+ tcg_out_ext32s(s, dst, dst);
59
+ } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
60
+ tcg_out_ext32u(s, dst, dst);
61
+ }
62
+ return;
63
+ }
64
+
65
/*
66
* Stolen from gcc's builtin_bswap32.
67
* In the following,
68
@@ -XXX,XX +XXX,XX @@ static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
69
TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
70
TCGReg t1 = dst == src ? dst : TCG_REG_R0;
71
72
+ if (have_isa_3_10) {
73
+ tcg_out32(s, BRD | RA(dst) | RS(src));
74
+ return;
75
+ }
76
+
77
/*
78
* In the following,
79
* dep(a, b, m) -> (a & ~m) | (b & m)
80
--
44
--
81
2.25.1
45
2.43.0
82
83
diff view generated by jsdifflib
1
Migrate the bstate, tb and singlestep_enabled fields
1
Consider the passed s_mask to be a minimum deduced from
2
from DisasContext into the base.
2
either existing s_mask or from a sign-extension operation.
3
We may be able to deduce more from the set of known zeros.
4
Remove identical logic from several opcode folders.
3
5
4
Tested-by: Michael Rolnik <mrolnik@gmail.com>
6
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Reviewed-by: Michael Rolnik <mrolnik@gmail.com>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
target/avr/translate.c | 58 +++++++++++++++++++++---------------------
9
tcg/optimize.c | 21 ++++++---------------
10
1 file changed, 29 insertions(+), 29 deletions(-)
10
1 file changed, 6 insertions(+), 15 deletions(-)
11
11
12
diff --git a/target/avr/translate.c b/target/avr/translate.c
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/avr/translate.c
14
--- a/tcg/optimize.c
15
+++ b/target/avr/translate.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext DisasContext;
16
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
17
17
* Record "zero" and "sign" masks for the single output of @op.
18
/* This is the state at translation time. */
18
* See TempOptInfo definition of z_mask and s_mask.
19
struct DisasContext {
19
* If z_mask allows, fold the output to constant zero.
20
- TranslationBlock *tb;
20
+ * The passed s_mask may be augmented by z_mask.
21
+ DisasContextBase base;
21
*/
22
22
static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
23
CPUAVRState *env;
23
uint64_t z_mask, uint64_t s_mask)
24
CPUState *cs;
24
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
25
@@ -XXX,XX +XXX,XX @@ struct DisasContext {
25
26
26
ti = ts_info(ts);
27
/* Routine used to access memory */
27
ti->z_mask = z_mask;
28
int memidx;
28
- ti->s_mask = s_mask;
29
- int bstate;
29
+ ti->s_mask = s_mask | smask_from_zmask(z_mask);
30
- int singlestep;
31
32
/*
33
* some AVR instructions can make the following instruction to be skipped
34
@@ -XXX,XX +XXX,XX @@ static bool avr_have_feature(DisasContext *ctx, int feature)
35
{
36
if (!avr_feature(ctx->env, feature)) {
37
gen_helper_unsupported(cpu_env);
38
- ctx->bstate = DISAS_NORETURN;
39
+ ctx->base.is_jmp = DISAS_NORETURN;
40
return false;
41
}
42
return true;
43
@@ -XXX,XX +XXX,XX @@ static void gen_jmp_ez(DisasContext *ctx)
44
{
45
tcg_gen_deposit_tl(cpu_pc, cpu_r[30], cpu_r[31], 8, 8);
46
tcg_gen_or_tl(cpu_pc, cpu_pc, cpu_eind);
47
- ctx->bstate = DISAS_LOOKUP;
48
+ ctx->base.is_jmp = DISAS_LOOKUP;
49
}
50
51
static void gen_jmp_z(DisasContext *ctx)
52
{
53
tcg_gen_deposit_tl(cpu_pc, cpu_r[30], cpu_r[31], 8, 8);
54
- ctx->bstate = DISAS_LOOKUP;
55
+ ctx->base.is_jmp = DISAS_LOOKUP;
56
}
57
58
static void gen_push_ret(DisasContext *ctx, int ret)
59
@@ -XXX,XX +XXX,XX @@ static void gen_pop_ret(DisasContext *ctx, TCGv ret)
60
61
static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
62
{
63
- TranslationBlock *tb = ctx->tb;
64
+ const TranslationBlock *tb = ctx->base.tb;
65
66
- if (ctx->singlestep == 0) {
67
+ if (!ctx->base.singlestep_enabled) {
68
tcg_gen_goto_tb(n);
69
tcg_gen_movi_i32(cpu_pc, dest);
70
tcg_gen_exit_tb(tb, n);
71
@@ -XXX,XX +XXX,XX @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
72
gen_helper_debug(cpu_env);
73
tcg_gen_exit_tb(NULL, 0);
74
}
75
- ctx->bstate = DISAS_NORETURN;
76
+ ctx->base.is_jmp = DISAS_NORETURN;
77
}
78
79
/*
80
@@ -XXX,XX +XXX,XX @@ static bool trans_RET(DisasContext *ctx, arg_RET *a)
81
{
82
gen_pop_ret(ctx, cpu_pc);
83
84
- ctx->bstate = DISAS_LOOKUP;
85
+ ctx->base.is_jmp = DISAS_LOOKUP;
86
return true;
30
return true;
87
}
31
}
88
32
89
@@ -XXX,XX +XXX,XX @@ static bool trans_RETI(DisasContext *ctx, arg_RETI *a)
33
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
90
tcg_gen_movi_tl(cpu_If, 1);
34
default:
91
35
g_assert_not_reached();
92
/* Need to return to main loop to re-evaluate interrupts. */
36
}
93
- ctx->bstate = DISAS_EXIT;
37
- s_mask = smask_from_zmask(z_mask);
94
+ ctx->base.is_jmp = DISAS_EXIT;
38
95
return true;
39
+ s_mask = 0;
40
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
41
case TCG_BSWAP_OZ:
42
break;
43
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
44
default:
45
/* The high bits are undefined: force all bits above the sign to 1. */
46
z_mask |= sign << 1;
47
- s_mask = 0;
48
break;
49
}
50
ctx->z_mask = z_mask;
51
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
52
g_assert_not_reached();
53
}
54
ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
55
- ctx->s_mask = smask_from_zmask(ctx->z_mask);
56
return false;
96
}
57
}
97
58
98
@@ -XXX,XX +XXX,XX @@ static bool trans_BRBC(DisasContext *ctx, arg_BRBC *a)
59
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
99
gen_goto_tb(ctx, 0, ctx->npc + a->imm);
60
default:
100
gen_set_label(not_taken);
61
g_assert_not_reached();
101
62
}
102
- ctx->bstate = DISAS_CHAIN;
63
- ctx->s_mask = smask_from_zmask(ctx->z_mask);
103
+ ctx->base.is_jmp = DISAS_CHAIN;
64
return false;
104
return true;
105
}
65
}
106
66
107
@@ -XXX,XX +XXX,XX @@ static bool trans_BRBS(DisasContext *ctx, arg_BRBS *a)
67
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
108
gen_goto_tb(ctx, 0, ctx->npc + a->imm);
68
return true;
109
gen_set_label(not_taken);
69
}
110
70
ctx->z_mask = z_mask;
111
- ctx->bstate = DISAS_CHAIN;
71
- ctx->s_mask = smask_from_zmask(z_mask);
112
+ ctx->base.is_jmp = DISAS_CHAIN;
72
113
return true;
73
return fold_masks(ctx, op);
114
}
74
}
115
75
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
116
@@ -XXX,XX +XXX,XX @@ static TCGv gen_get_zaddr(void)
76
}
117
*/
77
118
static void gen_data_store(DisasContext *ctx, TCGv data, TCGv addr)
78
ctx->z_mask = z_mask;
119
{
79
- ctx->s_mask = smask_from_zmask(z_mask);
120
- if (ctx->tb->flags & TB_FLAGS_FULL_ACCESS) {
80
if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
121
+ if (ctx->base.tb->flags & TB_FLAGS_FULL_ACCESS) {
81
return true;
122
gen_helper_fullwr(cpu_env, data, addr);
82
}
123
} else {
83
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
124
tcg_gen_qemu_st8(data, addr, MMU_DATA_IDX); /* mem[addr] = data */
84
int width = 8 * memop_size(mop);
125
@@ -XXX,XX +XXX,XX @@ static void gen_data_store(DisasContext *ctx, TCGv data, TCGv addr)
85
126
86
if (width < 64) {
127
static void gen_data_load(DisasContext *ctx, TCGv data, TCGv addr)
87
- ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
128
{
88
- if (!(mop & MO_SIGN)) {
129
- if (ctx->tb->flags & TB_FLAGS_FULL_ACCESS) {
89
+ if (mop & MO_SIGN) {
130
+ if (ctx->base.tb->flags & TB_FLAGS_FULL_ACCESS) {
90
+ ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
131
gen_helper_fullrd(data, cpu_env, addr);
91
+ } else {
132
} else {
92
ctx->z_mask = MAKE_64BIT_MASK(0, width);
133
tcg_gen_qemu_ld8u(data, addr, MMU_DATA_IDX); /* data = mem[addr] */
93
- ctx->s_mask <<= 1;
134
@@ -XXX,XX +XXX,XX @@ static bool trans_BREAK(DisasContext *ctx, arg_BREAK *a)
94
}
135
#ifdef BREAKPOINT_ON_BREAK
95
}
136
tcg_gen_movi_tl(cpu_pc, ctx->npc - 1);
96
137
gen_helper_debug(cpu_env);
97
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
138
- ctx->bstate = DISAS_EXIT;
98
fold_setcond_tst_pow2(ctx, op, false);
139
+ ctx->base.is_jmp = DISAS_EXIT;
99
140
#else
100
ctx->z_mask = 1;
141
/* NOP */
101
- ctx->s_mask = smask_from_zmask(1);
142
#endif
102
return false;
143
@@ -XXX,XX +XXX,XX @@ static bool trans_NOP(DisasContext *ctx, arg_NOP *a)
144
static bool trans_SLEEP(DisasContext *ctx, arg_SLEEP *a)
145
{
146
gen_helper_sleep(cpu_env);
147
- ctx->bstate = DISAS_NORETURN;
148
+ ctx->base.is_jmp = DISAS_NORETURN;
149
return true;
150
}
103
}
151
104
152
@@ -XXX,XX +XXX,XX @@ static void translate(DisasContext *ctx)
105
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
153
154
if (!decode_insn(ctx, opcode)) {
155
gen_helper_unsupported(cpu_env);
156
- ctx->bstate = DISAS_NORETURN;
157
+ ctx->base.is_jmp = DISAS_NORETURN;
158
}
106
}
159
}
107
160
108
ctx->z_mask = 1;
161
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
109
- ctx->s_mask = smask_from_zmask(1);
162
{
110
return false;
163
CPUAVRState *env = cs->env_ptr;
111
164
DisasContext ctx = {
112
do_setcond_const:
165
- .tb = tb,
113
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
166
+ .base.tb = tb,
167
+ .base.is_jmp = DISAS_NEXT,
168
+ .base.pc_first = tb->pc,
169
+ .base.pc_next = tb->pc,
170
+ .base.singlestep_enabled = cs->singlestep_enabled,
171
.cs = cs,
172
.env = env,
173
.memidx = 0,
174
- .bstate = DISAS_NEXT,
175
.skip_cond = TCG_COND_NEVER,
176
- .singlestep = cs->singlestep_enabled,
177
};
178
target_ulong pc_start = tb->pc / 2;
179
int num_insns = 0;
180
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
181
*/
182
max_insns = 1;
183
}
184
- if (ctx.singlestep) {
185
+ if (ctx.base.singlestep_enabled) {
186
max_insns = 1;
187
}
188
189
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
190
* b main - sets breakpoint at address 0x00000100 (code)
191
* b *0x100 - sets breakpoint at address 0x00800100 (data)
192
*/
193
- if (unlikely(!ctx.singlestep &&
194
+ if (unlikely(!ctx.base.singlestep_enabled &&
195
(cpu_breakpoint_test(cs, OFFSET_CODE + ctx.npc * 2, BP_ANY) ||
196
cpu_breakpoint_test(cs, OFFSET_DATA + ctx.npc * 2, BP_ANY)))) {
197
canonicalize_skip(&ctx);
198
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
199
if (skip_label) {
200
canonicalize_skip(&ctx);
201
gen_set_label(skip_label);
202
- if (ctx.bstate == DISAS_NORETURN) {
203
- ctx.bstate = DISAS_CHAIN;
204
+ if (ctx.base.is_jmp == DISAS_NORETURN) {
205
+ ctx.base.is_jmp = DISAS_CHAIN;
206
}
207
}
208
- } while (ctx.bstate == DISAS_NEXT
209
+ } while (ctx.base.is_jmp == DISAS_NEXT
210
&& num_insns < max_insns
211
&& (ctx.npc - pc_start) * 2 < TARGET_PAGE_SIZE - 4
212
&& !tcg_op_buf_full());
213
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
214
215
bool nonconst_skip = canonicalize_skip(&ctx);
216
217
- switch (ctx.bstate) {
218
+ switch (ctx.base.is_jmp) {
219
case DISAS_NORETURN:
220
assert(!nonconst_skip);
221
break;
114
break;
222
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
115
CASE_OP_32_64(ld8u):
223
tcg_gen_movi_tl(cpu_pc, ctx.npc);
116
ctx->z_mask = MAKE_64BIT_MASK(0, 8);
224
/* fall through */
117
- ctx->s_mask = MAKE_64BIT_MASK(9, 55);
225
case DISAS_LOOKUP:
118
break;
226
- if (!ctx.singlestep) {
119
CASE_OP_32_64(ld16s):
227
+ if (!ctx.base.singlestep_enabled) {
120
ctx->s_mask = MAKE_64BIT_MASK(16, 48);
228
tcg_gen_lookup_and_goto_ptr();
121
break;
229
break;
122
CASE_OP_32_64(ld16u):
230
}
123
ctx->z_mask = MAKE_64BIT_MASK(0, 16);
231
/* fall through */
124
- ctx->s_mask = MAKE_64BIT_MASK(17, 47);
232
case DISAS_EXIT:
125
break;
233
- if (ctx.singlestep) {
126
case INDEX_op_ld32s_i64:
234
+ if (ctx.base.singlestep_enabled) {
127
ctx->s_mask = MAKE_64BIT_MASK(32, 32);
235
gen_helper_debug(cpu_env);
128
break;
236
} else {
129
case INDEX_op_ld32u_i64:
237
tcg_gen_exit_tb(NULL, 0);
130
ctx->z_mask = MAKE_64BIT_MASK(0, 32);
131
- ctx->s_mask = MAKE_64BIT_MASK(33, 31);
132
break;
133
default:
134
g_assert_not_reached();
238
--
135
--
239
2.25.1
136
2.43.0
240
241
diff view generated by jsdifflib
New patch
1
Change the representation from sign bit repetitions to all bits equal
2
to the sign bit, including the sign bit itself.
1
3
4
The previous format has a problem in that it is difficult to recreate
5
a valid sign mask after a shift operation: the "repetitions" part of
6
the previous format meant that applying the same shift as for the value
7
lead to an off-by-one value.
8
9
The new format, including the sign bit itself, means that the sign mask
10
can be manipulated in exactly the same way as the value, canonicalization
11
is easier.
12
13
Canonicalize the s_mask in fold_masks_zs, rather than requiring callers
14
to do so. Treat 0 as a non-canonical but typeless input for no sign
15
information, which will be reset as appropriate for the data type.
16
We can easily fold in the data from z_mask while canonicalizing.
17
18
Temporarily disable optimizations using s_mask while each operation is
19
converted to use fold_masks_zs and to the new form.
20
21
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
22
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
23
---
24
tcg/optimize.c | 64 ++++++++++++--------------------------------------
25
1 file changed, 15 insertions(+), 49 deletions(-)
26
27
diff --git a/tcg/optimize.c b/tcg/optimize.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/tcg/optimize.c
30
+++ b/tcg/optimize.c
31
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
32
QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
33
uint64_t val;
34
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
35
- uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
36
+ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
37
} TempOptInfo;
38
39
typedef struct OptContext {
40
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
41
42
/* In flight values from optimization. */
43
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
44
- uint64_t s_mask; /* mask of clrsb(value) bits */
45
+ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
46
TCGType type;
47
} OptContext;
48
49
-/* Calculate the smask for a specific value. */
50
-static uint64_t smask_from_value(uint64_t value)
51
-{
52
- int rep = clrsb64(value);
53
- return ~(~0ull >> rep);
54
-}
55
-
56
-/*
57
- * Calculate the smask for a given set of known-zeros.
58
- * If there are lots of zeros on the left, we can consider the remainder
59
- * an unsigned field, and thus the corresponding signed field is one bit
60
- * larger.
61
- */
62
-static uint64_t smask_from_zmask(uint64_t zmask)
63
-{
64
- /*
65
- * Only the 0 bits are significant for zmask, thus the msb itself
66
- * must be zero, else we have no sign information.
67
- */
68
- int rep = clz64(zmask);
69
- if (rep == 0) {
70
- return 0;
71
- }
72
- rep -= 1;
73
- return ~(~0ull >> rep);
74
-}
75
-
76
-/*
77
- * Recreate a properly left-aligned smask after manipulation.
78
- * Some bit-shuffling, particularly shifts and rotates, may
79
- * retain sign bits on the left, but may scatter disconnected
80
- * sign bits on the right. Retain only what remains to the left.
81
- */
82
-static uint64_t smask_from_smask(int64_t smask)
83
-{
84
- /* Only the 1 bits are significant for smask */
85
- return smask_from_zmask(~smask);
86
-}
87
-
88
static inline TempOptInfo *ts_info(TCGTemp *ts)
89
{
90
return ts->state_ptr;
91
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
92
ti->is_const = true;
93
ti->val = ts->val;
94
ti->z_mask = ts->val;
95
- ti->s_mask = smask_from_value(ts->val);
96
+ ti->s_mask = INT64_MIN >> clrsb64(ts->val);
97
} else {
98
ti->is_const = false;
99
ti->z_mask = -1;
100
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
101
*/
102
if (i == 0) {
103
ts_info(ts)->z_mask = ctx->z_mask;
104
- ts_info(ts)->s_mask = ctx->s_mask;
105
}
106
}
107
}
108
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
109
* The passed s_mask may be augmented by z_mask.
110
*/
111
static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
112
- uint64_t z_mask, uint64_t s_mask)
113
+ uint64_t z_mask, int64_t s_mask)
114
{
115
const TCGOpDef *def = &tcg_op_defs[op->opc];
116
TCGTemp *ts;
117
TempOptInfo *ti;
118
+ int rep;
119
120
/* Only single-output opcodes are supported here. */
121
tcg_debug_assert(def->nb_oargs == 1);
122
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
123
*/
124
if (ctx->type == TCG_TYPE_I32) {
125
z_mask = (int32_t)z_mask;
126
- s_mask |= MAKE_64BIT_MASK(32, 32);
127
+ s_mask |= INT32_MIN;
128
}
129
130
if (z_mask == 0) {
131
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
132
133
ti = ts_info(ts);
134
ti->z_mask = z_mask;
135
- ti->s_mask = s_mask | smask_from_zmask(z_mask);
136
+
137
+ /* Canonicalize s_mask and incorporate data from z_mask. */
138
+ rep = clz64(~s_mask);
139
+ rep = MAX(rep, clz64(z_mask));
140
+ rep = MAX(rep - 1, 0);
141
+ ti->s_mask = INT64_MIN >> rep;
142
+
143
return true;
144
}
145
146
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
147
148
ctx->z_mask = z_mask;
149
ctx->s_mask = s_mask;
150
- if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
151
+ if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
152
return true;
153
}
154
155
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
156
s_mask |= MAKE_64BIT_MASK(len, 64 - len);
157
ctx->s_mask = s_mask;
158
159
- if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
160
+ if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
161
return true;
162
}
163
164
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
165
ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
166
167
s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
168
- ctx->s_mask = smask_from_smask(s_mask);
169
170
return fold_masks(ctx, op);
171
}
172
--
173
2.43.0
diff view generated by jsdifflib
1
We have pre-computed the next instruction address into
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
dc->base.pc_next, so we might as well use it.
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Suggested-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
target/nios2/translate.c | 12 ++++++------
4
tcg/optimize.c | 9 +++++----
9
1 file changed, 6 insertions(+), 6 deletions(-)
5
1 file changed, 5 insertions(+), 4 deletions(-)
10
6
11
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
13
--- a/target/nios2/translate.c
9
--- a/tcg/optimize.c
14
+++ b/target/nios2/translate.c
10
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void jmpi(DisasContext *dc, uint32_t code, uint32_t flags)
11
@@ -XXX,XX +XXX,XX @@ static void finish_ebb(OptContext *ctx)
16
12
remove_mem_copy_all(ctx);
17
static void call(DisasContext *dc, uint32_t code, uint32_t flags)
13
}
14
15
-static void finish_folding(OptContext *ctx, TCGOp *op)
16
+static bool finish_folding(OptContext *ctx, TCGOp *op)
18
{
17
{
19
- tcg_gen_movi_tl(cpu_R[R_RA], dc->pc + 4);
18
const TCGOpDef *def = &tcg_op_defs[op->opc];
20
+ tcg_gen_movi_tl(cpu_R[R_RA], dc->base.pc_next);
19
int i, nb_oargs;
21
jmpi(dc, code, flags);
20
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
21
ts_info(ts)->z_mask = ctx->z_mask;
22
}
23
}
24
+ return true;
22
}
25
}
23
26
24
@@ -XXX,XX +XXX,XX @@ static void br(DisasContext *dc, uint32_t code, uint32_t flags)
27
/*
25
{
28
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
26
I_TYPE(instr, code);
29
fold_xi_to_x(ctx, op, 0)) {
27
30
return true;
28
- gen_goto_tb(dc, 0, dc->pc + 4 + (instr.imm16.s & -4));
31
}
29
+ gen_goto_tb(dc, 0, dc->base.pc_next + (instr.imm16.s & -4));
32
- return false;
30
dc->base.is_jmp = DISAS_NORETURN;
33
+ return finish_folding(ctx, op);
31
}
34
}
32
35
33
@@ -XXX,XX +XXX,XX @@ static void gen_bxx(DisasContext *dc, uint32_t code, uint32_t flags)
36
/* We cannot as yet do_constant_folding with vectors. */
34
37
@@ -XXX,XX +XXX,XX @@ static bool fold_add_vec(OptContext *ctx, TCGOp *op)
35
TCGLabel *l1 = gen_new_label();
38
fold_xi_to_x(ctx, op, 0)) {
36
tcg_gen_brcond_tl(flags, cpu_R[instr.a], cpu_R[instr.b], l1);
39
return true;
37
- gen_goto_tb(dc, 0, dc->pc + 4);
40
}
38
+ gen_goto_tb(dc, 0, dc->base.pc_next);
41
- return false;
39
gen_set_label(l1);
42
+ return finish_folding(ctx, op);
40
- gen_goto_tb(dc, 1, dc->pc + 4 + (instr.imm16.s & -4));
41
+ gen_goto_tb(dc, 1, dc->base.pc_next + (instr.imm16.s & -4));
42
dc->base.is_jmp = DISAS_NORETURN;
43
}
43
}
44
44
45
@@ -XXX,XX +XXX,XX @@ static void nextpc(DisasContext *dc, uint32_t code, uint32_t flags)
45
static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
46
R_TYPE(instr, code);
46
@@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
47
47
op->args[4] = arg_new_constant(ctx, bl);
48
if (likely(instr.c != R_ZERO)) {
48
op->args[5] = arg_new_constant(ctx, bh);
49
- tcg_gen_movi_tl(cpu_R[instr.c], dc->pc + 4);
50
+ tcg_gen_movi_tl(cpu_R[instr.c], dc->base.pc_next);
51
}
49
}
50
- return false;
51
+ return finish_folding(ctx, op);
52
}
52
}
53
53
54
@@ -XXX,XX +XXX,XX @@ static void callr(DisasContext *dc, uint32_t code, uint32_t flags)
54
static bool fold_add2(OptContext *ctx, TCGOp *op)
55
R_TYPE(instr, code);
56
57
tcg_gen_mov_tl(cpu_R[R_PC], load_gpr(dc, instr.a));
58
- tcg_gen_movi_tl(cpu_R[R_RA], dc->pc + 4);
59
+ tcg_gen_movi_tl(cpu_R[R_RA], dc->base.pc_next);
60
61
dc->base.is_jmp = DISAS_JUMP;
62
}
63
--
55
--
64
2.25.1
56
2.43.0
65
66
diff view generated by jsdifflib
1
We will shortly require these in other context;
1
Introduce ti_is_const, ti_const_val, ti_is_const_val.
2
make the expansion as clear as possible.
3
2
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
4
---
8
tcg/ppc/tcg-target.c.inc | 31 +++++++++++++++++++++----------
5
tcg/optimize.c | 20 +++++++++++++++++---
9
1 file changed, 21 insertions(+), 10 deletions(-)
6
1 file changed, 17 insertions(+), 3 deletions(-)
10
7
11
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.c.inc
10
--- a/tcg/optimize.c
14
+++ b/tcg/ppc/tcg-target.c.inc
11
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
12
@@ -XXX,XX +XXX,XX @@ static inline TempOptInfo *arg_info(TCGArg arg)
16
tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
13
return ts_info(arg_temp(arg));
17
}
14
}
18
15
19
+static inline void tcg_out_ext8s(TCGContext *s, TCGReg dst, TCGReg src)
16
+static inline bool ti_is_const(TempOptInfo *ti)
20
+{
17
+{
21
+ tcg_out32(s, EXTSB | RA(dst) | RS(src));
18
+ return ti->is_const;
22
+}
19
+}
23
+
20
+
24
+static inline void tcg_out_ext16s(TCGContext *s, TCGReg dst, TCGReg src)
21
+static inline uint64_t ti_const_val(TempOptInfo *ti)
25
+{
22
+{
26
+ tcg_out32(s, EXTSH | RA(dst) | RS(src));
23
+ return ti->val;
27
+}
24
+}
28
+
25
+
29
+static inline void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
26
+static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
30
+{
27
+{
31
+ tcg_out32(s, EXTSW | RA(dst) | RS(src));
28
+ return ti_is_const(ti) && ti_const_val(ti) == val;
32
+}
29
+}
33
+
30
+
34
static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
31
static inline bool ts_is_const(TCGTemp *ts)
35
{
32
{
36
tcg_out_rld(s, RLDICL, dst, src, 0, 32);
33
- return ts_info(ts)->is_const;
37
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
34
+ return ti_is_const(ts_info(ts));
38
const int const_args[TCG_MAX_OP_ARGS])
35
}
36
37
static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
39
{
38
{
40
TCGArg a0, a1, a2;
39
- TempOptInfo *ti = ts_info(ts);
41
- int c;
40
- return ti->is_const && ti->val == val;
42
41
+ return ti_is_const_val(ts_info(ts), val);
43
switch (opc) {
42
}
44
case INDEX_op_exit_tb:
43
45
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
44
static inline bool arg_is_const(TCGArg arg)
46
case INDEX_op_ld8s_i32:
47
case INDEX_op_ld8s_i64:
48
tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
49
- tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
50
+ tcg_out_ext8s(s, args[0], args[0]);
51
break;
52
case INDEX_op_ld16u_i32:
53
case INDEX_op_ld16u_i64:
54
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
55
56
case INDEX_op_ext8s_i32:
57
case INDEX_op_ext8s_i64:
58
- c = EXTSB;
59
- goto gen_ext;
60
+ tcg_out_ext8s(s, args[0], args[1]);
61
+ break;
62
case INDEX_op_ext16s_i32:
63
case INDEX_op_ext16s_i64:
64
- c = EXTSH;
65
- goto gen_ext;
66
+ tcg_out_ext16s(s, args[0], args[1]);
67
+ break;
68
case INDEX_op_ext_i32_i64:
69
case INDEX_op_ext32s_i64:
70
- c = EXTSW;
71
- goto gen_ext;
72
- gen_ext:
73
- tcg_out32(s, c | RS(args[1]) | RA(args[0]));
74
+ tcg_out_ext32s(s, args[0], args[1]);
75
break;
76
case INDEX_op_extu_i32_i64:
77
tcg_out_ext32u(s, args[0], args[1]);
78
--
45
--
79
2.25.1
46
2.43.0
80
81
diff view generated by jsdifflib
1
Combine the three bswap16 routines, and differentiate via the flags.
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Use the correct flags combination from the load/store routines, and
2
Sink mask computation below fold_affected_mask early exit.
3
pass along the constant parameter from tcg_out_op.
4
3
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/arm/tcg-target.c.inc | 101 ++++++++++++++++++++++++---------------
7
tcg/optimize.c | 30 ++++++++++++++++--------------
9
1 file changed, 63 insertions(+), 38 deletions(-)
8
1 file changed, 16 insertions(+), 14 deletions(-)
10
9
11
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/arm/tcg-target.c.inc
12
--- a/tcg/optimize.c
14
+++ b/tcg/arm/tcg-target.c.inc
13
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_ext16u(TCGContext *s, int cond,
14
@@ -XXX,XX +XXX,XX @@ static bool fold_add2(OptContext *ctx, TCGOp *op)
15
16
static bool fold_and(OptContext *ctx, TCGOp *op)
17
{
18
- uint64_t z1, z2;
19
+ uint64_t z1, z2, z_mask, s_mask;
20
+ TempOptInfo *t1, *t2;
21
22
if (fold_const2_commutative(ctx, op) ||
23
fold_xi_to_i(ctx, op, 0) ||
24
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
25
return true;
16
}
26
}
17
}
27
18
28
- z1 = arg_info(op->args[1])->z_mask;
19
-static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
29
- z2 = arg_info(op->args[2])->z_mask;
20
+static void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn, int flags)
30
- ctx->z_mask = z1 & z2;
21
{
31
-
22
if (use_armv6_instructions) {
32
- /*
23
- /* revsh */
33
- * Sign repetitions are perforce all identical, whether they are 1 or 0.
24
- tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
34
- * Bitwise operations preserve the relative quantity of the repetitions.
25
- } else {
35
- */
26
- tcg_out_dat_reg(s, cond, ARITH_MOV,
36
- ctx->s_mask = arg_info(op->args[1])->s_mask
27
- TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
37
- & arg_info(op->args[2])->s_mask;
28
- tcg_out_dat_reg(s, cond, ARITH_MOV,
38
+ t1 = arg_info(op->args[1]);
29
- TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
39
+ t2 = arg_info(op->args[2]);
30
- tcg_out_dat_reg(s, cond, ARITH_ORR,
40
+ z1 = t1->z_mask;
31
- rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
41
+ z2 = t2->z_mask;
32
- }
42
33
-}
43
/*
34
+ if (flags & TCG_BSWAP_OS) {
44
* Known-zeros does not imply known-ones. Therefore unless
35
+ /* revsh */
45
* arg2 is constant, we can't infer affected bits from it.
36
+ tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
46
*/
37
+ return;
47
- if (arg_is_const(op->args[2]) &&
38
+ }
48
- fold_affected_mask(ctx, op, z1 & ~z2)) {
39
49
+ if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) {
40
-static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
50
return true;
41
-{
42
- if (use_armv6_instructions) {
43
/* rev16 */
44
tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
45
- } else {
46
- tcg_out_dat_reg(s, cond, ARITH_MOV,
47
- TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
48
- tcg_out_dat_reg(s, cond, ARITH_MOV,
49
- TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
50
- tcg_out_dat_reg(s, cond, ARITH_ORR,
51
- rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
52
+ if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
53
+ /* uxth */
54
+ tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rd);
55
+ }
56
+ return;
57
}
51
}
58
-}
52
59
53
- return fold_masks(ctx, op);
60
-/* swap the two low bytes assuming that the two high input bytes and the
54
+ z_mask = z1 & z2;
61
- two high output bit can hold any value. */
62
-static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
63
-{
64
- if (use_armv6_instructions) {
65
- /* rev16 */
66
- tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
67
- } else {
68
+ if (flags == 0) {
69
+ /*
70
+ * For stores, no input or output extension:
71
+ * rn = xxAB
72
+ * lsr tmp, rn, #8 tmp = 0xxA
73
+ * and tmp, tmp, #0xff tmp = 000A
74
+ * orr rd, tmp, rn, lsl #8 rd = xABA
75
+ */
76
tcg_out_dat_reg(s, cond, ARITH_MOV,
77
TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
78
tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
79
tcg_out_dat_reg(s, cond, ARITH_ORR,
80
rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
81
+ return;
82
}
83
+
55
+
84
+ /*
56
+ /*
85
+ * Byte swap, leaving the result at the top of the register.
57
+ * Sign repetitions are perforce all identical, whether they are 1 or 0.
86
+ * We will then shift down, zero or sign-extending.
58
+ * Bitwise operations preserve the relative quantity of the repetitions.
87
+ */
59
+ */
88
+ if (flags & TCG_BSWAP_IZ) {
60
+ s_mask = t1->s_mask & t2->s_mask;
89
+ /*
61
+
90
+ * rn = 00AB
62
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
91
+ * ror tmp, rn, #8 tmp = B00A
92
+ * orr tmp, tmp, tmp, lsl #16 tmp = BA00
93
+ */
94
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
95
+ TCG_REG_TMP, 0, rn, SHIFT_IMM_ROR(8));
96
+ tcg_out_dat_reg(s, cond, ARITH_ORR,
97
+ TCG_REG_TMP, TCG_REG_TMP, TCG_REG_TMP,
98
+ SHIFT_IMM_LSL(16));
99
+ } else {
100
+ /*
101
+ * rn = xxAB
102
+ * and tmp, rn, #0xff00 tmp = 00A0
103
+ * lsl tmp, tmp, #8 tmp = 0A00
104
+ * orr tmp, tmp, rn, lsl #24 tmp = BA00
105
+ */
106
+ tcg_out_dat_rI(s, cond, ARITH_AND, TCG_REG_TMP, rn, 0xff00, 1);
107
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
108
+ TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSL(8));
109
+ tcg_out_dat_reg(s, cond, ARITH_ORR,
110
+ TCG_REG_TMP, TCG_REG_TMP, rn, SHIFT_IMM_LSL(24));
111
+ }
112
+ tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, TCG_REG_TMP,
113
+ (flags & TCG_BSWAP_OS
114
+ ? SHIFT_IMM_ASR(8) : SHIFT_IMM_LSR(8)));
115
}
63
}
116
64
117
static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
65
static bool fold_andc(OptContext *ctx, TCGOp *op)
118
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
119
case MO_UW:
120
tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
121
if (bswap) {
122
- tcg_out_bswap16(s, COND_AL, datalo, datalo);
123
+ tcg_out_bswap16(s, COND_AL, datalo, datalo,
124
+ TCG_BSWAP_IZ | TCG_BSWAP_OZ);
125
}
126
break;
127
case MO_SW:
128
if (bswap) {
129
tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
130
- tcg_out_bswap16s(s, COND_AL, datalo, datalo);
131
+ tcg_out_bswap16(s, COND_AL, datalo, datalo,
132
+ TCG_BSWAP_IZ | TCG_BSWAP_OS);
133
} else {
134
tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
135
}
136
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc,
137
case MO_UW:
138
tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
139
if (bswap) {
140
- tcg_out_bswap16(s, COND_AL, datalo, datalo);
141
+ tcg_out_bswap16(s, COND_AL, datalo, datalo,
142
+ TCG_BSWAP_IZ | TCG_BSWAP_OZ);
143
}
144
break;
145
case MO_SW:
146
if (bswap) {
147
tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
148
- tcg_out_bswap16s(s, COND_AL, datalo, datalo);
149
+ tcg_out_bswap16(s, COND_AL, datalo, datalo,
150
+ TCG_BSWAP_IZ | TCG_BSWAP_OS);
151
} else {
152
tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
153
}
154
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, MemOp opc,
155
break;
156
case MO_16:
157
if (bswap) {
158
- tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
159
+ tcg_out_bswap16(s, cond, TCG_REG_R0, datalo, 0);
160
tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
161
} else {
162
tcg_out_st16_r(s, cond, datalo, addrlo, addend);
163
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc,
164
break;
165
case MO_16:
166
if (bswap) {
167
- tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
168
+ tcg_out_bswap16(s, COND_AL, TCG_REG_R0, datalo, 0);
169
tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
170
} else {
171
tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
172
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
173
break;
174
175
case INDEX_op_bswap16_i32:
176
- tcg_out_bswap16(s, COND_AL, args[0], args[1]);
177
+ tcg_out_bswap16(s, COND_AL, args[0], args[1], args[2]);
178
break;
179
case INDEX_op_bswap32_i32:
180
tcg_out_bswap32(s, COND_AL, args[0], args[1]);
181
--
66
--
182
2.25.1
67
2.43.0
183
184
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Avoid double inversion of the value of second const operand.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 21 +++++++++++----------
8
1 file changed, 11 insertions(+), 10 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
15
16
static bool fold_andc(OptContext *ctx, TCGOp *op)
17
{
18
- uint64_t z1;
19
+ uint64_t z_mask, s_mask;
20
+ TempOptInfo *t1, *t2;
21
22
if (fold_const2(ctx, op) ||
23
fold_xx_to_i(ctx, op, 0) ||
24
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
25
return true;
26
}
27
28
- z1 = arg_info(op->args[1])->z_mask;
29
+ t1 = arg_info(op->args[1]);
30
+ t2 = arg_info(op->args[2]);
31
+ z_mask = t1->z_mask;
32
33
/*
34
* Known-zeros does not imply known-ones. Therefore unless
35
* arg2 is constant, we can't infer anything from it.
36
*/
37
- if (arg_is_const(op->args[2])) {
38
- uint64_t z2 = ~arg_info(op->args[2])->z_mask;
39
- if (fold_affected_mask(ctx, op, z1 & ~z2)) {
40
+ if (ti_is_const(t2)) {
41
+ uint64_t v2 = ti_const_val(t2);
42
+ if (fold_affected_mask(ctx, op, z_mask & v2)) {
43
return true;
44
}
45
- z1 &= z2;
46
+ z_mask &= ~v2;
47
}
48
- ctx->z_mask = z1;
49
50
- ctx->s_mask = arg_info(op->args[1])->s_mask
51
- & arg_info(op->args[2])->s_mask;
52
- return fold_masks(ctx, op);
53
+ s_mask = t1->s_mask & t2->s_mask;
54
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
55
}
56
57
static bool fold_brcond(OptContext *ctx, TCGOp *op)
58
--
59
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Always set s_mask along the BSWAP_OS path, since the result is
3
being explicitly sign-extended.
1
4
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 21 ++++++++++-----------
9
1 file changed, 10 insertions(+), 11 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
16
static bool fold_bswap(OptContext *ctx, TCGOp *op)
17
{
18
uint64_t z_mask, s_mask, sign;
19
+ TempOptInfo *t1 = arg_info(op->args[1]);
20
21
- if (arg_is_const(op->args[1])) {
22
- uint64_t t = arg_info(op->args[1])->val;
23
-
24
- t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
25
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
26
+ if (ti_is_const(t1)) {
27
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
28
+ do_constant_folding(op->opc, ctx->type,
29
+ ti_const_val(t1),
30
+ op->args[2]));
31
}
32
33
- z_mask = arg_info(op->args[1])->z_mask;
34
-
35
+ z_mask = t1->z_mask;
36
switch (op->opc) {
37
case INDEX_op_bswap16_i32:
38
case INDEX_op_bswap16_i64:
39
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
40
/* If the sign bit may be 1, force all the bits above to 1. */
41
if (z_mask & sign) {
42
z_mask |= sign;
43
- s_mask = sign << 1;
44
}
45
+ /* The value and therefore s_mask is explicitly sign-extended. */
46
+ s_mask = sign;
47
break;
48
default:
49
/* The high bits are undefined: force all bits above the sign to 1. */
50
z_mask |= sign << 1;
51
break;
52
}
53
- ctx->z_mask = z_mask;
54
- ctx->s_mask = s_mask;
55
56
- return fold_masks(ctx, op);
57
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
58
}
59
60
static bool fold_call(OptContext *ctx, TCGOp *op)
61
--
62
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Compute s_mask from the union of the maximum count and the
3
op2 fallback for op1 being zero.
1
4
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 15 ++++++++++-----
9
1 file changed, 10 insertions(+), 5 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
16
17
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
18
{
19
- uint64_t z_mask;
20
+ uint64_t z_mask, s_mask;
21
+ TempOptInfo *t1 = arg_info(op->args[1]);
22
+ TempOptInfo *t2 = arg_info(op->args[2]);
23
24
- if (arg_is_const(op->args[1])) {
25
- uint64_t t = arg_info(op->args[1])->val;
26
+ if (ti_is_const(t1)) {
27
+ uint64_t t = ti_const_val(t1);
28
29
if (t != 0) {
30
t = do_constant_folding(op->opc, ctx->type, t, 0);
31
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
32
default:
33
g_assert_not_reached();
34
}
35
- ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
36
- return false;
37
+ s_mask = ~z_mask;
38
+ z_mask |= t2->z_mask;
39
+ s_mask &= t2->s_mask;
40
+
41
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
42
}
43
44
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
45
--
46
2.43.0
diff view generated by jsdifflib
1
Tested-by: Michael Rolnik <mrolnik@gmail.com>
1
Add fold_masks_z as a trivial wrapper around fold_masks_zs.
2
Reviewed-by: Michael Rolnik <mrolnik@gmail.com>
2
Avoid the use of the OptContext slots.
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
6
---
5
target/avr/translate.c | 234 ++++++++++++++++++++++-------------------
7
tcg/optimize.c | 13 ++++++++++---
6
1 file changed, 128 insertions(+), 106 deletions(-)
8
1 file changed, 10 insertions(+), 3 deletions(-)
7
9
8
diff --git a/target/avr/translate.c b/target/avr/translate.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
10
--- a/target/avr/translate.c
12
--- a/tcg/optimize.c
11
+++ b/target/avr/translate.c
13
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool canonicalize_skip(DisasContext *ctx)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
13
return true;
15
return true;
14
}
16
}
15
17
16
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
18
+static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
17
+static void gen_breakpoint(DisasContext *ctx)
19
+{
18
{
20
+ return fold_masks_zs(ctx, op, z_mask, 0);
19
+ canonicalize_skip(ctx);
20
+ tcg_gen_movi_tl(cpu_pc, ctx->npc);
21
+ gen_helper_debug(cpu_env);
22
+ ctx->base.is_jmp = DISAS_NORETURN;
23
+}
21
+}
24
+
22
+
25
+static void avr_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
23
static bool fold_masks(OptContext *ctx, TCGOp *op)
26
+{
24
{
27
+ DisasContext *ctx = container_of(dcbase, DisasContext, base);
25
return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
28
CPUAVRState *env = cs->env_ptr;
26
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
29
- DisasContext ctx1 = {
27
30
- .base.tb = tb,
28
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
31
- .base.is_jmp = DISAS_NEXT,
29
{
32
- .base.pc_first = tb->pc,
30
+ uint64_t z_mask;
33
- .base.pc_next = tb->pc,
31
+
34
- .base.singlestep_enabled = cs->singlestep_enabled,
32
if (fold_const1(ctx, op)) {
35
- .cs = cs,
33
return true;
36
- .env = env,
37
- .memidx = 0,
38
- .skip_cond = TCG_COND_NEVER,
39
- };
40
- DisasContext *ctx = &ctx1;
41
- target_ulong pc_start = tb->pc / 2;
42
- int num_insns = 0;
43
+ uint32_t tb_flags = ctx->base.tb->flags;
44
45
- if (tb->flags & TB_FLAGS_FULL_ACCESS) {
46
- /*
47
- * This flag is set by ST/LD instruction we will regenerate it ONLY
48
- * with mem/cpu memory access instead of mem access
49
- */
50
- max_insns = 1;
51
- }
52
- if (ctx->base.singlestep_enabled) {
53
- max_insns = 1;
54
- }
55
+ ctx->cs = cs;
56
+ ctx->env = env;
57
+ ctx->npc = ctx->base.pc_first / 2;
58
59
- gen_tb_start(tb);
60
-
61
- ctx->npc = pc_start;
62
- if (tb->flags & TB_FLAGS_SKIP) {
63
+ ctx->skip_cond = TCG_COND_NEVER;
64
+ if (tb_flags & TB_FLAGS_SKIP) {
65
ctx->skip_cond = TCG_COND_ALWAYS;
66
ctx->skip_var0 = cpu_skip;
67
}
34
}
68
35
69
- do {
36
switch (ctx->type) {
70
- TCGLabel *skip_label = NULL;
37
case TCG_TYPE_I32:
71
-
38
- ctx->z_mask = 32 | 31;
72
- /* translate current instruction */
39
+ z_mask = 32 | 31;
73
- tcg_gen_insn_start(ctx->npc);
40
break;
74
- num_insns++;
41
case TCG_TYPE_I64:
75
-
42
- ctx->z_mask = 64 | 63;
76
+ if (tb_flags & TB_FLAGS_FULL_ACCESS) {
43
+ z_mask = 64 | 63;
77
/*
44
break;
78
- * this is due to some strange GDB behavior
79
- * let's assume main has address 0x100
80
- * b main - sets breakpoint at address 0x00000100 (code)
81
- * b *0x100 - sets breakpoint at address 0x00800100 (data)
82
+ * This flag is set by ST/LD instruction we will regenerate it ONLY
83
+ * with mem/cpu memory access instead of mem access
84
*/
85
- if (unlikely(!ctx->base.singlestep_enabled &&
86
- (cpu_breakpoint_test(cs, OFFSET_CODE + ctx->npc * 2, BP_ANY) ||
87
- cpu_breakpoint_test(cs, OFFSET_DATA + ctx->npc * 2, BP_ANY)))) {
88
- canonicalize_skip(ctx);
89
- tcg_gen_movi_tl(cpu_pc, ctx->npc);
90
- gen_helper_debug(cpu_env);
91
- goto done_generating;
92
- }
93
+ ctx->base.max_insns = 1;
94
+ }
95
+}
96
97
- /* Conditionally skip the next instruction, if indicated. */
98
- if (ctx->skip_cond != TCG_COND_NEVER) {
99
- skip_label = gen_new_label();
100
- if (ctx->skip_var0 == cpu_skip) {
101
- /*
102
- * Copy cpu_skip so that we may zero it before the branch.
103
- * This ensures that cpu_skip is non-zero after the label
104
- * if and only if the skipped insn itself sets a skip.
105
- */
106
- ctx->free_skip_var0 = true;
107
- ctx->skip_var0 = tcg_temp_new();
108
- tcg_gen_mov_tl(ctx->skip_var0, cpu_skip);
109
- tcg_gen_movi_tl(cpu_skip, 0);
110
- }
111
- if (ctx->skip_var1 == NULL) {
112
- tcg_gen_brcondi_tl(ctx->skip_cond, ctx->skip_var0,
113
- 0, skip_label);
114
- } else {
115
- tcg_gen_brcond_tl(ctx->skip_cond, ctx->skip_var0,
116
- ctx->skip_var1, skip_label);
117
- ctx->skip_var1 = NULL;
118
- }
119
- if (ctx->free_skip_var0) {
120
- tcg_temp_free(ctx->skip_var0);
121
- ctx->free_skip_var0 = false;
122
- }
123
- ctx->skip_cond = TCG_COND_NEVER;
124
- ctx->skip_var0 = NULL;
125
- }
126
+static void avr_tr_tb_start(DisasContextBase *db, CPUState *cs)
127
+{
128
+}
129
130
- translate(ctx);
131
+static void avr_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
132
+{
133
+ DisasContext *ctx = container_of(dcbase, DisasContext, base);
134
135
- if (skip_label) {
136
- canonicalize_skip(ctx);
137
- gen_set_label(skip_label);
138
- if (ctx->base.is_jmp == DISAS_NORETURN) {
139
- ctx->base.is_jmp = DISAS_CHAIN;
140
- }
141
- }
142
- } while (ctx->base.is_jmp == DISAS_NEXT
143
- && num_insns < max_insns
144
- && (ctx->npc - pc_start) * 2 < TARGET_PAGE_SIZE - 4
145
- && !tcg_op_buf_full());
146
+ tcg_gen_insn_start(ctx->npc);
147
+}
148
149
- if (tb->cflags & CF_LAST_IO) {
150
- gen_io_end();
151
+static bool avr_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs,
152
+ const CPUBreakpoint *bp)
153
+{
154
+ DisasContext *ctx = container_of(dcbase, DisasContext, base);
155
+
156
+ gen_breakpoint(ctx);
157
+ return true;
158
+}
159
+
160
+static void avr_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
161
+{
162
+ DisasContext *ctx = container_of(dcbase, DisasContext, base);
163
+ TCGLabel *skip_label = NULL;
164
+
165
+ /*
166
+ * This is due to some strange GDB behavior
167
+ * Let's assume main has address 0x100:
168
+ * b main - sets breakpoint at address 0x00000100 (code)
169
+ * b *0x100 - sets breakpoint at address 0x00800100 (data)
170
+ *
171
+ * The translator driver has already taken care of the code pointer.
172
+ */
173
+ if (!ctx->base.singlestep_enabled &&
174
+ cpu_breakpoint_test(cs, OFFSET_DATA + ctx->base.pc_next, BP_ANY)) {
175
+ gen_breakpoint(ctx);
176
+ return;
177
}
178
179
+ /* Conditionally skip the next instruction, if indicated. */
180
+ if (ctx->skip_cond != TCG_COND_NEVER) {
181
+ skip_label = gen_new_label();
182
+ if (ctx->skip_var0 == cpu_skip) {
183
+ /*
184
+ * Copy cpu_skip so that we may zero it before the branch.
185
+ * This ensures that cpu_skip is non-zero after the label
186
+ * if and only if the skipped insn itself sets a skip.
187
+ */
188
+ ctx->free_skip_var0 = true;
189
+ ctx->skip_var0 = tcg_temp_new();
190
+ tcg_gen_mov_tl(ctx->skip_var0, cpu_skip);
191
+ tcg_gen_movi_tl(cpu_skip, 0);
192
+ }
193
+ if (ctx->skip_var1 == NULL) {
194
+ tcg_gen_brcondi_tl(ctx->skip_cond, ctx->skip_var0, 0, skip_label);
195
+ } else {
196
+ tcg_gen_brcond_tl(ctx->skip_cond, ctx->skip_var0,
197
+ ctx->skip_var1, skip_label);
198
+ ctx->skip_var1 = NULL;
199
+ }
200
+ if (ctx->free_skip_var0) {
201
+ tcg_temp_free(ctx->skip_var0);
202
+ ctx->free_skip_var0 = false;
203
+ }
204
+ ctx->skip_cond = TCG_COND_NEVER;
205
+ ctx->skip_var0 = NULL;
206
+ }
207
+
208
+ translate(ctx);
209
+
210
+ ctx->base.pc_next = ctx->npc * 2;
211
+
212
+ if (skip_label) {
213
+ canonicalize_skip(ctx);
214
+ gen_set_label(skip_label);
215
+ if (ctx->base.is_jmp == DISAS_NORETURN) {
216
+ ctx->base.is_jmp = DISAS_CHAIN;
217
+ }
218
+ }
219
+
220
+ if (ctx->base.is_jmp == DISAS_NEXT) {
221
+ target_ulong page_first = ctx->base.pc_first & TARGET_PAGE_MASK;
222
+
223
+ if ((ctx->base.pc_next - page_first) >= TARGET_PAGE_SIZE - 4) {
224
+ ctx->base.is_jmp = DISAS_TOO_MANY;
225
+ }
226
+ }
227
+}
228
+
229
+static void avr_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
230
+{
231
+ DisasContext *ctx = container_of(dcbase, DisasContext, base);
232
bool nonconst_skip = canonicalize_skip(ctx);
233
234
switch (ctx->base.is_jmp) {
235
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
236
default:
45
default:
237
g_assert_not_reached();
46
g_assert_not_reached();
238
}
47
}
239
+}
48
- return false;
240
49
+ return fold_masks_z(ctx, op, z_mask);
241
-done_generating:
242
- gen_tb_end(tb, num_insns);
243
+static void avr_tr_disas_log(const DisasContextBase *dcbase, CPUState *cs)
244
+{
245
+ qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first));
246
+ log_target_disas(cs, dcbase->pc_first, dcbase->tb->size);
247
+}
248
249
- tb->size = (ctx->npc - pc_start) * 2;
250
- tb->icount = num_insns;
251
+static const TranslatorOps avr_tr_ops = {
252
+ .init_disas_context = avr_tr_init_disas_context,
253
+ .tb_start = avr_tr_tb_start,
254
+ .insn_start = avr_tr_insn_start,
255
+ .breakpoint_check = avr_tr_breakpoint_check,
256
+ .translate_insn = avr_tr_translate_insn,
257
+ .tb_stop = avr_tr_tb_stop,
258
+ .disas_log = avr_tr_disas_log,
259
+};
260
261
-#ifdef DEBUG_DISAS
262
- if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
263
- && qemu_log_in_addr_range(tb->pc)) {
264
- FILE *fd;
265
- fd = qemu_log_lock();
266
- qemu_log("IN: %s\n", lookup_symbol(tb->pc));
267
- log_target_disas(cs, tb->pc, tb->size);
268
- qemu_log("\n");
269
- qemu_log_unlock(fd);
270
- }
271
-#endif
272
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
273
+{
274
+ DisasContext dc = { };
275
+ translator_loop(&avr_tr_ops, &dc.base, cs, tb, max_insns);
276
}
50
}
277
51
278
void restore_state_to_opc(CPUAVRState *env, TranslationBlock *tb,
52
static bool fold_deposit(OptContext *ctx, TCGOp *op)
279
--
53
--
280
2.25.1
54
2.43.0
281
282
diff view generated by jsdifflib
1
Prepare for receiving it as a pointer input.
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
When we fold to and, use fold_and.
2
3
3
Tested-by: Michael Rolnik <mrolnik@gmail.com>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Michael Rolnik <mrolnik@gmail.com>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
target/avr/translate.c | 84 +++++++++++++++++++++---------------------
7
tcg/optimize.c | 35 +++++++++++++++++------------------
9
1 file changed, 43 insertions(+), 41 deletions(-)
8
1 file changed, 17 insertions(+), 18 deletions(-)
10
9
11
diff --git a/target/avr/translate.c b/target/avr/translate.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/target/avr/translate.c
12
--- a/tcg/optimize.c
14
+++ b/target/avr/translate.c
13
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ struct DisasContext {
14
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
16
* used in the following manner (sketch)
15
17
*
16
static bool fold_deposit(OptContext *ctx, TCGOp *op)
18
* TCGLabel *skip_label = NULL;
19
- * if (ctx.skip_cond != TCG_COND_NEVER) {
20
+ * if (ctx->skip_cond != TCG_COND_NEVER) {
21
* skip_label = gen_new_label();
22
* tcg_gen_brcond_tl(skip_cond, skip_var0, skip_var1, skip_label);
23
* }
24
@@ -XXX,XX +XXX,XX @@ struct DisasContext {
25
* free_skip_var0 = false;
26
* }
27
*
28
- * translate(&ctx);
29
+ * translate(ctx);
30
*
31
* if (skip_label) {
32
* gen_set_label(skip_label);
33
@@ -XXX,XX +XXX,XX @@ static bool canonicalize_skip(DisasContext *ctx)
34
void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
35
{
17
{
36
CPUAVRState *env = cs->env_ptr;
18
+ TempOptInfo *t1 = arg_info(op->args[1]);
37
- DisasContext ctx = {
19
+ TempOptInfo *t2 = arg_info(op->args[2]);
38
+ DisasContext ctx1 = {
20
+ int ofs = op->args[3];
39
.base.tb = tb,
21
+ int len = op->args[4];
40
.base.is_jmp = DISAS_NEXT,
22
TCGOpcode and_opc;
41
.base.pc_first = tb->pc,
23
+ uint64_t z_mask;
42
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
24
43
.memidx = 0,
25
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
44
.skip_cond = TCG_COND_NEVER,
26
- uint64_t t1 = arg_info(op->args[1])->val;
45
};
27
- uint64_t t2 = arg_info(op->args[2])->val;
46
+ DisasContext *ctx = &ctx1;
28
-
47
target_ulong pc_start = tb->pc / 2;
29
- t1 = deposit64(t1, op->args[3], op->args[4], t2);
48
int num_insns = 0;
30
- return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
49
31
+ if (ti_is_const(t1) && ti_is_const(t2)) {
50
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
32
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
51
*/
33
+ deposit64(ti_const_val(t1), ofs, len,
52
max_insns = 1;
34
+ ti_const_val(t2)));
53
}
35
}
54
- if (ctx.base.singlestep_enabled) {
36
55
+ if (ctx->base.singlestep_enabled) {
37
switch (ctx->type) {
56
max_insns = 1;
38
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
57
}
39
}
58
40
59
gen_tb_start(tb);
41
/* Inserting a value into zero at offset 0. */
60
42
- if (arg_is_const_val(op->args[1], 0) && op->args[3] == 0) {
61
- ctx.npc = pc_start;
43
- uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
62
+ ctx->npc = pc_start;
44
+ if (ti_is_const_val(t1, 0) && ofs == 0) {
63
if (tb->flags & TB_FLAGS_SKIP) {
45
+ uint64_t mask = MAKE_64BIT_MASK(0, len);
64
- ctx.skip_cond = TCG_COND_ALWAYS;
46
65
- ctx.skip_var0 = cpu_skip;
47
op->opc = and_opc;
66
+ ctx->skip_cond = TCG_COND_ALWAYS;
48
op->args[1] = op->args[2];
67
+ ctx->skip_var0 = cpu_skip;
49
op->args[2] = arg_new_constant(ctx, mask);
50
- ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
51
- return false;
52
+ return fold_and(ctx, op);
68
}
53
}
69
54
70
do {
55
/* Inserting zero into a value. */
71
TCGLabel *skip_label = NULL;
56
- if (arg_is_const_val(op->args[2], 0)) {
72
57
- uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
73
/* translate current instruction */
58
+ if (ti_is_const_val(t2, 0)) {
74
- tcg_gen_insn_start(ctx.npc);
59
+ uint64_t mask = deposit64(-1, ofs, len, 0);
75
+ tcg_gen_insn_start(ctx->npc);
60
76
num_insns++;
61
op->opc = and_opc;
77
62
op->args[2] = arg_new_constant(ctx, mask);
78
/*
63
- ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
79
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
64
- return false;
80
* b main - sets breakpoint at address 0x00000100 (code)
65
+ return fold_and(ctx, op);
81
* b *0x100 - sets breakpoint at address 0x00800100 (data)
82
*/
83
- if (unlikely(!ctx.base.singlestep_enabled &&
84
- (cpu_breakpoint_test(cs, OFFSET_CODE + ctx.npc * 2, BP_ANY) ||
85
- cpu_breakpoint_test(cs, OFFSET_DATA + ctx.npc * 2, BP_ANY)))) {
86
- canonicalize_skip(&ctx);
87
- tcg_gen_movi_tl(cpu_pc, ctx.npc);
88
+ if (unlikely(!ctx->base.singlestep_enabled &&
89
+ (cpu_breakpoint_test(cs, OFFSET_CODE + ctx->npc * 2, BP_ANY) ||
90
+ cpu_breakpoint_test(cs, OFFSET_DATA + ctx->npc * 2, BP_ANY)))) {
91
+ canonicalize_skip(ctx);
92
+ tcg_gen_movi_tl(cpu_pc, ctx->npc);
93
gen_helper_debug(cpu_env);
94
goto done_generating;
95
}
96
97
/* Conditionally skip the next instruction, if indicated. */
98
- if (ctx.skip_cond != TCG_COND_NEVER) {
99
+ if (ctx->skip_cond != TCG_COND_NEVER) {
100
skip_label = gen_new_label();
101
- if (ctx.skip_var0 == cpu_skip) {
102
+ if (ctx->skip_var0 == cpu_skip) {
103
/*
104
* Copy cpu_skip so that we may zero it before the branch.
105
* This ensures that cpu_skip is non-zero after the label
106
* if and only if the skipped insn itself sets a skip.
107
*/
108
- ctx.free_skip_var0 = true;
109
- ctx.skip_var0 = tcg_temp_new();
110
- tcg_gen_mov_tl(ctx.skip_var0, cpu_skip);
111
+ ctx->free_skip_var0 = true;
112
+ ctx->skip_var0 = tcg_temp_new();
113
+ tcg_gen_mov_tl(ctx->skip_var0, cpu_skip);
114
tcg_gen_movi_tl(cpu_skip, 0);
115
}
116
- if (ctx.skip_var1 == NULL) {
117
- tcg_gen_brcondi_tl(ctx.skip_cond, ctx.skip_var0, 0, skip_label);
118
+ if (ctx->skip_var1 == NULL) {
119
+ tcg_gen_brcondi_tl(ctx->skip_cond, ctx->skip_var0,
120
+ 0, skip_label);
121
} else {
122
- tcg_gen_brcond_tl(ctx.skip_cond, ctx.skip_var0,
123
- ctx.skip_var1, skip_label);
124
- ctx.skip_var1 = NULL;
125
+ tcg_gen_brcond_tl(ctx->skip_cond, ctx->skip_var0,
126
+ ctx->skip_var1, skip_label);
127
+ ctx->skip_var1 = NULL;
128
}
129
- if (ctx.free_skip_var0) {
130
- tcg_temp_free(ctx.skip_var0);
131
- ctx.free_skip_var0 = false;
132
+ if (ctx->free_skip_var0) {
133
+ tcg_temp_free(ctx->skip_var0);
134
+ ctx->free_skip_var0 = false;
135
}
136
- ctx.skip_cond = TCG_COND_NEVER;
137
- ctx.skip_var0 = NULL;
138
+ ctx->skip_cond = TCG_COND_NEVER;
139
+ ctx->skip_var0 = NULL;
140
}
141
142
- translate(&ctx);
143
+ translate(ctx);
144
145
if (skip_label) {
146
- canonicalize_skip(&ctx);
147
+ canonicalize_skip(ctx);
148
gen_set_label(skip_label);
149
- if (ctx.base.is_jmp == DISAS_NORETURN) {
150
- ctx.base.is_jmp = DISAS_CHAIN;
151
+ if (ctx->base.is_jmp == DISAS_NORETURN) {
152
+ ctx->base.is_jmp = DISAS_CHAIN;
153
}
154
}
155
- } while (ctx.base.is_jmp == DISAS_NEXT
156
+ } while (ctx->base.is_jmp == DISAS_NEXT
157
&& num_insns < max_insns
158
- && (ctx.npc - pc_start) * 2 < TARGET_PAGE_SIZE - 4
159
+ && (ctx->npc - pc_start) * 2 < TARGET_PAGE_SIZE - 4
160
&& !tcg_op_buf_full());
161
162
if (tb->cflags & CF_LAST_IO) {
163
gen_io_end();
164
}
66
}
165
67
166
- bool nonconst_skip = canonicalize_skip(&ctx);
68
- ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
167
+ bool nonconst_skip = canonicalize_skip(ctx);
69
- op->args[3], op->args[4],
168
70
- arg_info(op->args[2])->z_mask);
169
- switch (ctx.base.is_jmp) {
71
- return false;
170
+ switch (ctx->base.is_jmp) {
72
+ z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
171
case DISAS_NORETURN:
73
+ return fold_masks_z(ctx, op, z_mask);
172
assert(!nonconst_skip);
74
}
173
break;
75
174
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
76
static bool fold_divide(OptContext *ctx, TCGOp *op)
175
case DISAS_CHAIN:
176
if (!nonconst_skip) {
177
/* Note gen_goto_tb checks singlestep. */
178
- gen_goto_tb(&ctx, 1, ctx.npc);
179
+ gen_goto_tb(ctx, 1, ctx->npc);
180
break;
181
}
182
- tcg_gen_movi_tl(cpu_pc, ctx.npc);
183
+ tcg_gen_movi_tl(cpu_pc, ctx->npc);
184
/* fall through */
185
case DISAS_LOOKUP:
186
- if (!ctx.base.singlestep_enabled) {
187
+ if (!ctx->base.singlestep_enabled) {
188
tcg_gen_lookup_and_goto_ptr();
189
break;
190
}
191
/* fall through */
192
case DISAS_EXIT:
193
- if (ctx.base.singlestep_enabled) {
194
+ if (ctx->base.singlestep_enabled) {
195
gen_helper_debug(cpu_env);
196
} else {
197
tcg_gen_exit_tb(NULL, 0);
198
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
199
done_generating:
200
gen_tb_end(tb, num_insns);
201
202
- tb->size = (ctx.npc - pc_start) * 2;
203
+ tb->size = (ctx->npc - pc_start) * 2;
204
tb->icount = num_insns;
205
206
#ifdef DEBUG_DISAS
207
--
77
--
208
2.25.1
78
2.43.0
209
210
diff view generated by jsdifflib
1
Move delayed branch handling to tb_stop, where we can re-use other
1
The input which overlaps the sign bit of the output can
2
end-of-tb code, e.g. the evaluation of flags. Honor single stepping.
2
have its input s_mask propagated to the output s_mask.
3
Validate that we aren't losing state by overwriting is_jmp.
4
3
5
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
target/cris/translate.c | 96 ++++++++++++++++++++++++-----------------
7
tcg/optimize.c | 14 ++++++++++++--
10
1 file changed, 56 insertions(+), 40 deletions(-)
8
1 file changed, 12 insertions(+), 2 deletions(-)
11
9
12
diff --git a/target/cris/translate.c b/target/cris/translate.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/target/cris/translate.c
12
--- a/tcg/optimize.c
15
+++ b/target/cris/translate.c
13
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
17
#define DISAS_UPDATE DISAS_TARGET_1
15
TempOptInfo *t2 = arg_info(op->args[2]);
18
/* Cpu state was modified dynamically, excluding pc -- use npc */
16
int ofs = op->args[3];
19
#define DISAS_UPDATE_NEXT DISAS_TARGET_2
17
int len = op->args[4];
20
+/* PC update for delayed branch, see cpustate_changed otherwise */
18
+ int width;
21
+#define DISAS_DBRANCH DISAS_TARGET_3
19
TCGOpcode and_opc;
22
20
- uint64_t z_mask;
23
/* Used by the decoder. */
21
+ uint64_t z_mask, s_mask;
24
#define EXTRACT_FIELD(src, start, end) \
22
25
@@ -XXX,XX +XXX,XX @@ static void cris_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
23
if (ti_is_const(t1) && ti_is_const(t2)) {
26
dc->cpustate_changed |= dc->flags_x != (dc->base.tb->flags & X_FLAG);
24
return tcg_opt_gen_movi(ctx, op, op->args[0],
27
25
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
28
/*
26
switch (ctx->type) {
29
- * Check for delayed branches here. If we do it before
27
case TCG_TYPE_I32:
30
- * actually generating any host code, the simulator will just
28
and_opc = INDEX_op_and_i32;
31
- * loop doing nothing for on this program location.
29
+ width = 32;
32
+ * All branches are delayed branches, handled immediately below.
30
break;
33
+ * We don't expect to see odd combinations of exit conditions.
31
case TCG_TYPE_I64:
34
*/
32
and_opc = INDEX_op_and_i64;
35
+ assert(dc->base.is_jmp == DISAS_NEXT || dc->cpustate_changed);
33
+ width = 64;
36
+
34
break;
37
if (dc->delayed_branch && --dc->delayed_branch == 0) {
35
default:
38
- if (dc->base.tb->flags & 7) {
36
g_assert_not_reached();
39
- t_gen_movi_env_TN(dslot, 0);
37
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
40
- }
38
return fold_and(ctx, op);
41
+ dc->base.is_jmp = DISAS_DBRANCH;
42
+ return;
43
+ }
44
45
- if (dc->cpustate_changed) {
46
- cris_store_direct_jmp(dc);
47
- }
48
-
49
- if (dc->clear_locked_irq) {
50
- dc->clear_locked_irq = 0;
51
- t_gen_movi_env_TN(locked_irq, 0);
52
- }
53
-
54
- if (dc->jmp == JMP_DIRECT_CC) {
55
- TCGLabel *l1 = gen_new_label();
56
- cris_evaluate_flags(dc);
57
-
58
- /* Conditional jmp. */
59
- tcg_gen_brcondi_tl(TCG_COND_EQ, env_btaken, 0, l1);
60
- gen_goto_tb(dc, 1, dc->jmp_pc);
61
- gen_set_label(l1);
62
- gen_goto_tb(dc, 0, dc->pc);
63
- dc->base.is_jmp = DISAS_NORETURN;
64
- dc->jmp = JMP_NOJMP;
65
- } else if (dc->jmp == JMP_DIRECT) {
66
- cris_evaluate_flags(dc);
67
- gen_goto_tb(dc, 0, dc->jmp_pc);
68
- dc->base.is_jmp = DISAS_NORETURN;
69
- dc->jmp = JMP_NOJMP;
70
- } else {
71
- TCGv c = tcg_const_tl(dc->pc);
72
- t_gen_cc_jmp(env_btarget, c);
73
- tcg_temp_free(c);
74
- dc->base.is_jmp = DISAS_JUMP;
75
- }
76
+ if (dc->base.is_jmp != DISAS_NEXT) {
77
+ return;
78
}
39
}
79
40
80
/* Force an update if the per-tb cpu state has changed. */
41
+ /* The s_mask from the top portion of the deposit is still valid. */
81
- if (dc->base.is_jmp == DISAS_NEXT && dc->cpustate_changed) {
42
+ if (ofs + len == width) {
82
+ if (dc->cpustate_changed) {
43
+ s_mask = t2->s_mask << ofs;
83
dc->base.is_jmp = DISAS_UPDATE_NEXT;
44
+ } else {
84
return;
45
+ s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
85
}
86
@@ -XXX,XX +XXX,XX @@ static void cris_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
87
* If we can detect the length of the next insn easily, we should.
88
* In the meantime, simply stop when we do cross.
89
*/
90
- if (dc->base.is_jmp == DISAS_NEXT
91
- && ((dc->pc ^ dc->base.pc_first) & TARGET_PAGE_MASK) != 0) {
92
+ if ((dc->pc ^ dc->base.pc_first) & TARGET_PAGE_MASK) {
93
dc->base.is_jmp = DISAS_TOO_MANY;
94
}
95
}
96
@@ -XXX,XX +XXX,XX @@ static void cris_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
97
98
cris_evaluate_flags(dc);
99
100
+ /* Evaluate delayed branch destination and fold to another is_jmp case. */
101
+ if (is_jmp == DISAS_DBRANCH) {
102
+ if (dc->base.tb->flags & 7) {
103
+ t_gen_movi_env_TN(dslot, 0);
104
+ }
105
+
106
+ switch (dc->jmp) {
107
+ case JMP_DIRECT:
108
+ npc = dc->jmp_pc;
109
+ is_jmp = dc->cpustate_changed ? DISAS_UPDATE_NEXT : DISAS_TOO_MANY;
110
+ break;
111
+
112
+ case JMP_DIRECT_CC:
113
+ /*
114
+ * Use a conditional branch if either taken or not-taken path
115
+ * can use goto_tb. If neither can, then treat it as indirect.
116
+ */
117
+ if (likely(!dc->base.singlestep_enabled)
118
+ && likely(!dc->cpustate_changed)
119
+ && (use_goto_tb(dc, dc->jmp_pc) || use_goto_tb(dc, npc))) {
120
+ TCGLabel *not_taken = gen_new_label();
121
+
122
+ tcg_gen_brcondi_tl(TCG_COND_EQ, env_btaken, 0, not_taken);
123
+ gen_goto_tb(dc, 1, dc->jmp_pc);
124
+ gen_set_label(not_taken);
125
+
126
+ /* not-taken case handled below. */
127
+ is_jmp = DISAS_TOO_MANY;
128
+ break;
129
+ }
130
+ tcg_gen_movi_tl(env_btarget, dc->jmp_pc);
131
+ /* fall through */
132
+
133
+ case JMP_INDIRECT:
134
+ t_gen_cc_jmp(env_btarget, tcg_constant_tl(npc));
135
+ is_jmp = dc->cpustate_changed ? DISAS_UPDATE : DISAS_JUMP;
136
+ break;
137
+
138
+ default:
139
+ g_assert_not_reached();
140
+ }
141
+ }
46
+ }
142
+
47
+
143
if (unlikely(dc->base.singlestep_enabled)) {
48
z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
144
switch (is_jmp) {
49
- return fold_masks_z(ctx, op, z_mask);
145
case DISAS_TOO_MANY:
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
51
}
52
53
static bool fold_divide(OptContext *ctx, TCGOp *op)
146
--
54
--
147
2.25.1
55
2.43.0
148
149
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
12
fold_xi_to_x(ctx, op, 1)) {
13
return true;
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_dup(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
1
Merge tcg_out_bswap16 and tcg_out_bswap16s. Use the flags
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
in the internal uses for loads and stores.
3
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
3
---
7
tcg/mips/tcg-target.c.inc | 63 +++++++++++++++++++--------------------
4
tcg/optimize.c | 4 ++--
8
1 file changed, 30 insertions(+), 33 deletions(-)
5
1 file changed, 2 insertions(+), 2 deletions(-)
9
6
10
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/mips/tcg-target.c.inc
9
--- a/tcg/optimize.c
13
+++ b/tcg/mips/tcg-target.c.inc
10
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
11
@@ -XXX,XX +XXX,XX @@ static bool fold_dup(OptContext *ctx, TCGOp *op)
12
t = dup_const(TCGOP_VECE(op), t);
13
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
15
}
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
16
}
17
}
17
18
18
-static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg)
19
static bool fold_dup2(OptContext *ctx, TCGOp *op)
19
+static void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg, int flags)
20
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
20
{
21
op->opc = INDEX_op_dup_vec;
21
+ /* ret and arg can't be register tmp0 */
22
TCGOP_VECE(op) = MO_32;
22
+ tcg_debug_assert(ret != TCG_TMP0);
23
+ tcg_debug_assert(arg != TCG_TMP0);
24
+
25
+ /* With arg = abcd: */
26
if (use_mips32r2_instructions) {
27
- tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
28
- } else {
29
- /* ret and arg can't be register at */
30
- if (ret == TCG_TMP0 || arg == TCG_TMP0) {
31
- tcg_abort();
32
+ tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); /* badc */
33
+ if (flags & TCG_BSWAP_OS) {
34
+ tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); /* ssdc */
35
+ } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
36
+ tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xffff); /* 00dc */
37
}
38
-
39
- tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8);
40
- tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8);
41
- tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00);
42
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
43
+ return;
44
}
23
}
45
-}
24
- return false;
46
25
+ return finish_folding(ctx, op);
47
-static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg)
48
-{
49
- if (use_mips32r2_instructions) {
50
- tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
51
- tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret);
52
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); /* 0abc */
53
+ if (!(flags & TCG_BSWAP_IZ)) {
54
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff); /* 000c */
55
+ }
56
+ if (flags & TCG_BSWAP_OS) {
57
+ tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); /* d000 */
58
+ tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); /* ssd0 */
59
} else {
60
- /* ret and arg can't be register at */
61
- if (ret == TCG_TMP0 || arg == TCG_TMP0) {
62
- tcg_abort();
63
+ tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); /* bcd0 */
64
+ if (flags & TCG_BSWAP_OZ) {
65
+ tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); /* 00d0 */
66
}
67
-
68
- tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8);
69
- tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
70
- tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16);
71
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
72
}
73
+ tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); /* ssdc */
74
}
26
}
75
27
76
static void tcg_out_bswap_subr(TCGContext *s, const tcg_insn_unit *sub)
28
static bool fold_eqv(OptContext *ctx, TCGOp *op)
77
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
78
break;
79
case MO_UW | MO_BSWAP:
80
tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
81
- tcg_out_bswap16(s, lo, TCG_TMP1);
82
+ tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
83
break;
84
case MO_UW:
85
tcg_out_opc_imm(s, OPC_LHU, lo, base, 0);
86
break;
87
case MO_SW | MO_BSWAP:
88
tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
89
- tcg_out_bswap16s(s, lo, TCG_TMP1);
90
+ tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OS);
91
break;
92
case MO_SW:
93
tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
94
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
95
break;
96
97
case MO_16 | MO_BSWAP:
98
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, lo, 0xffff);
99
- tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1);
100
+ tcg_out_bswap16(s, TCG_TMP1, lo, 0);
101
lo = TCG_TMP1;
102
/* FALLTHRU */
103
case MO_16:
104
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
105
case INDEX_op_not_i64:
106
i1 = OPC_NOR;
107
goto do_unary;
108
- case INDEX_op_bswap16_i32:
109
- case INDEX_op_bswap16_i64:
110
- i1 = OPC_WSBH;
111
- goto do_unary;
112
case INDEX_op_ext8s_i32:
113
case INDEX_op_ext8s_i64:
114
i1 = OPC_SEB;
115
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
116
tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1);
117
break;
118
119
+ case INDEX_op_bswap16_i32:
120
+ case INDEX_op_bswap16_i64:
121
+ tcg_out_bswap16(s, a0, a1, a2);
122
+ break;
123
case INDEX_op_bswap32_i32:
124
tcg_out_bswap32(s, a0, a1);
125
break;
126
--
29
--
127
2.25.1
30
2.43.0
128
129
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
Add fold_masks_s as a trivial wrapper around fold_masks_zs.
2
Avoid the use of the OptContext slots.
2
3
3
Implement tcg_gen_vec_add{sub}16_tl by adding corresponding i32 OP.
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
5
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
6
Message-Id: <20210624105023.3852-2-zhiwei_liu@c-sky.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
include/tcg/tcg-op-gvec.h | 13 +++++++++++++
7
tcg/optimize.c | 13 ++++++++++---
10
tcg/tcg-op-gvec.c | 28 ++++++++++++++++++++++++++++
8
1 file changed, 10 insertions(+), 3 deletions(-)
11
2 files changed, 41 insertions(+)
12
9
13
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-op-gvec.h
12
--- a/tcg/optimize.c
16
+++ b/include/tcg/tcg-op-gvec.h
13
+++ b/tcg/optimize.c
17
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
18
void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
15
return fold_masks_zs(ctx, op, z_mask, 0);
19
void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
20
21
+/* 32-bit vector operations. */
22
+void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
23
+
24
+void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
25
+
26
+#if TARGET_LONG_BITS == 64
27
+#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
28
+#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
29
+#else
30
+#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
31
+#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
32
+#endif
33
+
34
#endif
35
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/tcg/tcg-op-gvec.c
38
+++ b/tcg/tcg-op-gvec.c
39
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
40
gen_addv_mask(d, a, b, m);
41
}
16
}
42
17
43
+void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
18
+static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
44
+{
19
+{
45
+ TCGv_i32 t1 = tcg_temp_new_i32();
20
+ return fold_masks_zs(ctx, op, -1, s_mask);
46
+ TCGv_i32 t2 = tcg_temp_new_i32();
47
+
48
+ tcg_gen_andi_i32(t1, a, ~0xffff);
49
+ tcg_gen_add_i32(t2, a, b);
50
+ tcg_gen_add_i32(t1, t1, b);
51
+ tcg_gen_deposit_i32(d, t1, t2, 0, 16);
52
+
53
+ tcg_temp_free_i32(t1);
54
+ tcg_temp_free_i32(t2);
55
+}
21
+}
56
+
22
+
57
void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
23
static bool fold_masks(OptContext *ctx, TCGOp *op)
58
{
24
{
59
TCGv_i64 t1 = tcg_temp_new_i64();
25
return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
60
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
26
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
61
gen_subv_mask(d, a, b, m);
27
28
static bool fold_eqv(OptContext *ctx, TCGOp *op)
29
{
30
+ uint64_t s_mask;
31
+
32
if (fold_const2_commutative(ctx, op) ||
33
fold_xi_to_x(ctx, op, -1) ||
34
fold_xi_to_not(ctx, op, 0)) {
35
return true;
36
}
37
38
- ctx->s_mask = arg_info(op->args[1])->s_mask
39
- & arg_info(op->args[2])->s_mask;
40
- return false;
41
+ s_mask = arg_info(op->args[1])->s_mask
42
+ & arg_info(op->args[2])->s_mask;
43
+ return fold_masks_s(ctx, op, s_mask);
62
}
44
}
63
45
64
+void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
46
static bool fold_extract(OptContext *ctx, TCGOp *op)
65
+{
66
+ TCGv_i32 t1 = tcg_temp_new_i32();
67
+ TCGv_i32 t2 = tcg_temp_new_i32();
68
+
69
+ tcg_gen_andi_i32(t1, b, ~0xffff);
70
+ tcg_gen_sub_i32(t2, a, b);
71
+ tcg_gen_sub_i32(t1, a, t1);
72
+ tcg_gen_deposit_i32(d, t1, t2, 0, 16);
73
+
74
+ tcg_temp_free_i32(t1);
75
+ tcg_temp_free_i32(t2);
76
+}
77
+
78
void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
79
{
80
TCGv_i64 t1 = tcg_temp_new_i64();
81
--
47
--
82
2.25.1
48
2.43.0
83
84
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 15 ++++++---------
7
1 file changed, 6 insertions(+), 9 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
14
static bool fold_extract(OptContext *ctx, TCGOp *op)
15
{
16
uint64_t z_mask_old, z_mask;
17
+ TempOptInfo *t1 = arg_info(op->args[1]);
18
int pos = op->args[2];
19
int len = op->args[3];
20
21
- if (arg_is_const(op->args[1])) {
22
- uint64_t t;
23
-
24
- t = arg_info(op->args[1])->val;
25
- t = extract64(t, pos, len);
26
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
27
+ if (ti_is_const(t1)) {
28
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
29
+ extract64(ti_const_val(t1), pos, len));
30
}
31
32
- z_mask_old = arg_info(op->args[1])->z_mask;
33
+ z_mask_old = t1->z_mask;
34
z_mask = extract64(z_mask_old, pos, len);
35
if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
36
return true;
37
}
38
- ctx->z_mask = z_mask;
39
40
- return fold_masks(ctx, op);
41
+ return fold_masks_z(ctx, op, z_mask);
42
}
43
44
static bool fold_extract2(OptContext *ctx, TCGOp *op)
45
--
46
2.43.0
diff view generated by jsdifflib
1
There were two bugs here: (1) the required endianness was
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
not present in the MemOp, and (2) we were not providing a
3
zero-extended input to the bswap as semantics required.
4
5
The best fix is to fold the bswap into the memory operation,
6
producing the desired result directly.
7
8
Acked-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
3
---
11
target/mips/tcg/mxu_translate.c | 6 +-----
4
tcg/optimize.c | 2 +-
12
1 file changed, 1 insertion(+), 5 deletions(-)
5
1 file changed, 1 insertion(+), 1 deletion(-)
13
6
14
diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
16
--- a/target/mips/tcg/mxu_translate.c
9
--- a/tcg/optimize.c
17
+++ b/target/mips/tcg/mxu_translate.c
10
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx)
11
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
19
tcg_gen_ori_tl(t1, t1, 0xFFFFF000);
12
}
13
return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
20
}
14
}
21
tcg_gen_add_tl(t1, t0, t1);
15
- return false;
22
- tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_SL);
16
+ return finish_folding(ctx, op);
23
+ tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_TESL ^ (sel * MO_BSWAP));
17
}
24
18
25
- if (sel == 1) {
19
static bool fold_exts(OptContext *ctx, TCGOp *op)
26
- /* S32LDDR */
27
- tcg_gen_bswap32_tl(t1, t1, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
28
- }
29
gen_store_mxu_gpr(t1, XRa);
30
31
tcg_temp_free(t0);
32
--
20
--
33
2.25.1
21
2.43.0
34
35
diff view generated by jsdifflib
1
After we've raised the exception, we have left the TB.
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Explicitly sign-extend z_mask instead of doing that manually.
2
3
3
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
target/cris/translate.c | 5 +++--
7
tcg/optimize.c | 29 ++++++++++++-----------------
8
target/cris/translate_v10.c.inc | 3 ++-
8
1 file changed, 12 insertions(+), 17 deletions(-)
9
2 files changed, 5 insertions(+), 3 deletions(-)
10
9
11
diff --git a/target/cris/translate.c b/target/cris/translate.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/target/cris/translate.c
12
--- a/tcg/optimize.c
14
+++ b/target/cris/translate.c
13
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static int dec_rfe_etc(CPUCRISState *env, DisasContext *dc)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
16
-offsetof(CRISCPU, env) + offsetof(CPUState, halted));
15
17
tcg_gen_movi_tl(env_pc, dc->pc + 2);
16
static bool fold_exts(OptContext *ctx, TCGOp *op)
18
t_gen_raise_exception(EXCP_HLT);
17
{
19
+ dc->base.is_jmp = DISAS_NORETURN;
18
- uint64_t s_mask_old, s_mask, z_mask, sign;
20
return 2;
19
+ uint64_t s_mask_old, s_mask, z_mask;
20
bool type_change = false;
21
+ TempOptInfo *t1;
22
23
if (fold_const1(ctx, op)) {
24
return true;
21
}
25
}
22
26
23
@@ -XXX,XX +XXX,XX @@ static int dec_rfe_etc(CPUCRISState *env, DisasContext *dc)
27
- z_mask = arg_info(op->args[1])->z_mask;
24
/* Breaks start at 16 in the exception vector. */
28
- s_mask = arg_info(op->args[1])->s_mask;
25
t_gen_movi_env_TN(trap_vector, dc->op1 + 16);
29
+ t1 = arg_info(op->args[1]);
26
t_gen_raise_exception(EXCP_BREAK);
30
+ z_mask = t1->z_mask;
27
- dc->base.is_jmp = DISAS_UPDATE;
31
+ s_mask = t1->s_mask;
28
+ dc->base.is_jmp = DISAS_NORETURN;
32
s_mask_old = s_mask;
33
34
switch (op->opc) {
35
CASE_OP_32_64(ext8s):
36
- sign = INT8_MIN;
37
- z_mask = (uint8_t)z_mask;
38
+ s_mask |= INT8_MIN;
39
+ z_mask = (int8_t)z_mask;
40
break;
41
CASE_OP_32_64(ext16s):
42
- sign = INT16_MIN;
43
- z_mask = (uint16_t)z_mask;
44
+ s_mask |= INT16_MIN;
45
+ z_mask = (int16_t)z_mask;
46
break;
47
case INDEX_op_ext_i32_i64:
48
type_change = true;
49
QEMU_FALLTHROUGH;
50
case INDEX_op_ext32s_i64:
51
- sign = INT32_MIN;
52
- z_mask = (uint32_t)z_mask;
53
+ s_mask |= INT32_MIN;
54
+ z_mask = (int32_t)z_mask;
29
break;
55
break;
30
default:
56
default:
31
printf("op2=%x\n", dc->op2);
57
g_assert_not_reached();
32
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
58
}
33
cris_evaluate_flags(dc);
59
34
tcg_gen_movi_tl(env_pc, dc->pc);
60
- if (z_mask & sign) {
35
t_gen_raise_exception(EXCP_DEBUG);
61
- z_mask |= sign;
36
- dc->base.is_jmp = DISAS_UPDATE;
62
- }
37
+ dc->base.is_jmp = DISAS_NORETURN;
63
- s_mask |= sign << 1;
38
/* The address covered by the breakpoint must be included in
64
-
39
[tb->pc, tb->pc + tb->size) in order to for it to be
65
- ctx->z_mask = z_mask;
40
properly cleared -- thus we increment the PC here so that
66
- ctx->s_mask = s_mask;
41
diff --git a/target/cris/translate_v10.c.inc b/target/cris/translate_v10.c.inc
67
if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
42
index XXXXXXX..XXXXXXX 100644
68
return true;
43
--- a/target/cris/translate_v10.c.inc
69
}
44
+++ b/target/cris/translate_v10.c.inc
70
45
@@ -XXX,XX +XXX,XX @@ static inline void cris_illegal_insn(DisasContext *dc)
71
- return fold_masks(ctx, op);
46
{
72
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
47
qemu_log_mask(LOG_GUEST_ERROR, "illegal insn at pc=%x\n", dc->pc);
48
t_gen_raise_exception(EXCP_BREAK);
49
+ dc->base.is_jmp = DISAS_NORETURN;
50
}
73
}
51
74
52
static void gen_store_v10_conditional(DisasContext *dc, TCGv addr, TCGv val,
75
static bool fold_extu(OptContext *ctx, TCGOp *op)
53
@@ -XXX,XX +XXX,XX @@ static unsigned int dec10_ind(CPUCRISState *env, DisasContext *dc)
54
t_gen_mov_env_TN(trap_vector, c);
55
tcg_temp_free(c);
56
t_gen_raise_exception(EXCP_BREAK);
57
- dc->base.is_jmp = DISAS_UPDATE;
58
+ dc->base.is_jmp = DISAS_NORETURN;
59
return insn_len;
60
}
61
LOG_DIS("%d: jump.%d %d r%d r%d\n", __LINE__, size,
62
--
76
--
63
2.25.1
77
2.43.0
64
65
diff view generated by jsdifflib
1
Use a break instead of an ifdefed else.
1
Avoid the use of the OptContext slots.
2
There's no need to move the values through s->T0.
3
Remove TCG_BSWAP_IZ and the preceding zero-extension.
4
2
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/i386/tcg/translate.c | 14 ++++----------
6
tcg/optimize.c | 4 ++--
9
1 file changed, 4 insertions(+), 10 deletions(-)
7
1 file changed, 2 insertions(+), 2 deletions(-)
10
8
11
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/target/i386/tcg/translate.c
11
--- a/tcg/optimize.c
14
+++ b/target/i386/tcg/translate.c
12
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
13
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
16
reg = (b & 7) | REX_B(s);
14
g_assert_not_reached();
17
#ifdef TARGET_X86_64
15
}
18
if (dflag == MO_64) {
16
19
- gen_op_mov_v_reg(s, MO_64, s->T0, reg);
17
- ctx->z_mask = z_mask;
20
- tcg_gen_bswap64_i64(s->T0, s->T0);
18
if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
21
- gen_op_mov_reg_v(s, MO_64, reg, s->T0);
19
return true;
22
- } else
20
}
23
-#endif
21
- return fold_masks(ctx, op);
24
- {
22
+
25
- gen_op_mov_v_reg(s, MO_32, s->T0, reg);
23
+ return fold_masks_z(ctx, op, z_mask);
26
- tcg_gen_ext32u_tl(s->T0, s->T0);
24
}
27
- tcg_gen_bswap32_tl(s->T0, s->T0, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
25
28
- gen_op_mov_reg_v(s, MO_32, reg, s->T0);
26
static bool fold_mb(OptContext *ctx, TCGOp *op)
29
+ tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]);
30
+ break;
31
}
32
+#endif
33
+ tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ);
34
break;
35
case 0xd6: /* salc */
36
if (CODE64(s))
37
--
27
--
38
2.25.1
28
2.43.0
39
40
diff view generated by jsdifflib
1
Migrate the is_jmp, tb and singlestep_enabled fields
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
from DisasContext into the base.
3
2
4
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/cris/translate.c | 49 +++++++++++++++++----------------
6
tcg/optimize.c | 19 +++++++++++--------
9
target/cris/translate_v10.c.inc | 4 +--
7
1 file changed, 11 insertions(+), 8 deletions(-)
10
2 files changed, 27 insertions(+), 26 deletions(-)
11
8
12
diff --git a/target/cris/translate.c b/target/cris/translate.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/target/cris/translate.c
11
--- a/tcg/optimize.c
15
+++ b/target/cris/translate.c
12
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static TCGv env_pc;
13
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
17
14
18
/* This is the state at translation time. */
15
static bool fold_movcond(OptContext *ctx, TCGOp *op)
19
typedef struct DisasContext {
20
+ DisasContextBase base;
21
+
22
CRISCPU *cpu;
23
target_ulong pc, ppc;
24
25
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
26
int clear_locked_irq; /* Clear the irq lockout. */
27
int cpustate_changed;
28
unsigned int tb_flags; /* tb dependent flags. */
29
- int is_jmp;
30
31
#define JMP_NOJMP 0
32
#define JMP_DIRECT 1
33
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
34
uint32_t jmp_pc;
35
36
int delayed_branch;
37
-
38
- TranslationBlock *tb;
39
- int singlestep_enabled;
40
} DisasContext;
41
42
static void gen_BUG(DisasContext *dc, const char *file, int line)
43
@@ -XXX,XX +XXX,XX @@ static void t_gen_cc_jmp(TCGv pc_true, TCGv pc_false)
44
static inline bool use_goto_tb(DisasContext *dc, target_ulong dest)
45
{
16
{
46
#ifndef CONFIG_USER_ONLY
17
+ uint64_t z_mask, s_mask;
47
- return (dc->tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
18
+ TempOptInfo *tt, *ft;
48
+ return (dc->base.pc_first & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
19
int i;
49
(dc->ppc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
20
50
#else
21
/* If true and false values are the same, eliminate the cmp. */
51
return true;
22
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
52
@@ -XXX,XX +XXX,XX @@ static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
23
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
53
if (use_goto_tb(dc, dest)) {
54
tcg_gen_goto_tb(n);
55
tcg_gen_movi_tl(env_pc, dest);
56
- tcg_gen_exit_tb(dc->tb, n);
57
+ tcg_gen_exit_tb(dc->base.tb, n);
58
} else {
59
tcg_gen_movi_tl(env_pc, dest);
60
tcg_gen_exit_tb(NULL, 0);
61
@@ -XXX,XX +XXX,XX @@ static int dec_setclrf(CPUCRISState *env, DisasContext *dc)
62
/* Break the TB if any of the SPI flag changes. */
63
if (flags & (P_FLAG | S_FLAG)) {
64
tcg_gen_movi_tl(env_pc, dc->pc + 2);
65
- dc->is_jmp = DISAS_UPDATE;
66
+ dc->base.is_jmp = DISAS_UPDATE;
67
dc->cpustate_changed = 1;
68
}
24
}
69
25
70
/* For the I flag, only act on posedge. */
26
- ctx->z_mask = arg_info(op->args[3])->z_mask
71
if ((flags & I_FLAG)) {
27
- | arg_info(op->args[4])->z_mask;
72
tcg_gen_movi_tl(env_pc, dc->pc + 2);
28
- ctx->s_mask = arg_info(op->args[3])->s_mask
73
- dc->is_jmp = DISAS_UPDATE;
29
- & arg_info(op->args[4])->s_mask;
74
+ dc->base.is_jmp = DISAS_UPDATE;
30
+ tt = arg_info(op->args[3]);
75
dc->cpustate_changed = 1;
31
+ ft = arg_info(op->args[4]);
76
}
32
+ z_mask = tt->z_mask | ft->z_mask;
77
33
+ s_mask = tt->s_mask & ft->s_mask;
78
@@ -XXX,XX +XXX,XX @@ static int dec_rfe_etc(CPUCRISState *env, DisasContext *dc)
34
79
LOG_DIS("rfe\n");
35
- if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
80
cris_evaluate_flags(dc);
36
- uint64_t tv = arg_info(op->args[3])->val;
81
gen_helper_rfe(cpu_env);
37
- uint64_t fv = arg_info(op->args[4])->val;
82
- dc->is_jmp = DISAS_UPDATE;
38
+ if (ti_is_const(tt) && ti_is_const(ft)) {
83
+ dc->base.is_jmp = DISAS_UPDATE;
39
+ uint64_t tv = ti_const_val(tt);
84
break;
40
+ uint64_t fv = ti_const_val(ft);
85
case 5:
41
TCGOpcode opc, negopc = 0;
86
/* rfn. */
42
TCGCond cond = op->args[5];
87
LOG_DIS("rfn\n");
43
88
cris_evaluate_flags(dc);
44
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
89
gen_helper_rfn(cpu_env);
90
- dc->is_jmp = DISAS_UPDATE;
91
+ dc->base.is_jmp = DISAS_UPDATE;
92
break;
93
case 6:
94
LOG_DIS("break %d\n", dc->op1);
95
@@ -XXX,XX +XXX,XX @@ static int dec_rfe_etc(CPUCRISState *env, DisasContext *dc)
96
/* Breaks start at 16 in the exception vector. */
97
t_gen_movi_env_TN(trap_vector, dc->op1 + 16);
98
t_gen_raise_exception(EXCP_BREAK);
99
- dc->is_jmp = DISAS_UPDATE;
100
+ dc->base.is_jmp = DISAS_UPDATE;
101
break;
102
default:
103
printf("op2=%x\n", dc->op2);
104
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
105
* delayslot, like in real hw.
106
*/
107
pc_start = tb->pc & ~1;
108
- dc->cpu = env_archcpu(env);
109
- dc->tb = tb;
110
111
- dc->is_jmp = DISAS_NEXT;
112
+ dc->base.tb = tb;
113
+ dc->base.pc_first = pc_start;
114
+ dc->base.pc_next = pc_start;
115
+ dc->base.is_jmp = DISAS_NEXT;
116
+ dc->base.singlestep_enabled = cs->singlestep_enabled;
117
+
118
+ dc->cpu = env_archcpu(env);
119
dc->ppc = pc_start;
120
dc->pc = pc_start;
121
- dc->singlestep_enabled = cs->singlestep_enabled;
122
dc->flags_uptodate = 1;
123
dc->flagx_known = 1;
124
dc->flags_x = tb->flags & X_FLAG;
125
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
126
cris_evaluate_flags(dc);
127
tcg_gen_movi_tl(env_pc, dc->pc);
128
t_gen_raise_exception(EXCP_DEBUG);
129
- dc->is_jmp = DISAS_UPDATE;
130
+ dc->base.is_jmp = DISAS_UPDATE;
131
/* The address covered by the breakpoint must be included in
132
[tb->pc, tb->pc + tb->size) in order to for it to be
133
properly cleared -- thus we increment the PC here so that
134
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
135
gen_goto_tb(dc, 1, dc->jmp_pc);
136
gen_set_label(l1);
137
gen_goto_tb(dc, 0, dc->pc);
138
- dc->is_jmp = DISAS_TB_JUMP;
139
+ dc->base.is_jmp = DISAS_TB_JUMP;
140
dc->jmp = JMP_NOJMP;
141
} else if (dc->jmp == JMP_DIRECT) {
142
cris_evaluate_flags(dc);
143
gen_goto_tb(dc, 0, dc->jmp_pc);
144
- dc->is_jmp = DISAS_TB_JUMP;
145
+ dc->base.is_jmp = DISAS_TB_JUMP;
146
dc->jmp = JMP_NOJMP;
147
} else {
148
TCGv c = tcg_const_tl(dc->pc);
149
t_gen_cc_jmp(env_btarget, c);
150
tcg_temp_free(c);
151
- dc->is_jmp = DISAS_JUMP;
152
+ dc->base.is_jmp = DISAS_JUMP;
153
}
154
break;
155
}
45
}
156
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
157
if (!(tb->pc & 1) && cs->singlestep_enabled) {
158
break;
159
}
160
- } while (!dc->is_jmp && !dc->cpustate_changed
161
+ } while (!dc->base.is_jmp && !dc->cpustate_changed
162
&& !tcg_op_buf_full()
163
&& !singlestep
164
&& (dc->pc - page_start < TARGET_PAGE_SIZE)
165
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
166
npc = dc->pc;
167
168
/* Force an update if the per-tb cpu state has changed. */
169
- if (dc->is_jmp == DISAS_NEXT
170
+ if (dc->base.is_jmp == DISAS_NEXT
171
&& (dc->cpustate_changed || !dc->flagx_known
172
|| (dc->flags_x != (tb->flags & X_FLAG)))) {
173
- dc->is_jmp = DISAS_UPDATE;
174
+ dc->base.is_jmp = DISAS_UPDATE;
175
tcg_gen_movi_tl(env_pc, npc);
176
}
177
/* Broken branch+delayslot sequence. */
178
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
179
cris_evaluate_flags(dc);
180
181
if (unlikely(cs->singlestep_enabled)) {
182
- if (dc->is_jmp == DISAS_NEXT) {
183
+ if (dc->base.is_jmp == DISAS_NEXT) {
184
tcg_gen_movi_tl(env_pc, npc);
185
}
186
t_gen_raise_exception(EXCP_DEBUG);
187
} else {
188
- switch (dc->is_jmp) {
189
+ switch (dc->base.is_jmp) {
190
case DISAS_NEXT:
191
gen_goto_tb(dc, 1, npc);
192
break;
193
diff --git a/target/cris/translate_v10.c.inc b/target/cris/translate_v10.c.inc
194
index XXXXXXX..XXXXXXX 100644
195
--- a/target/cris/translate_v10.c.inc
196
+++ b/target/cris/translate_v10.c.inc
197
@@ -XXX,XX +XXX,XX @@ static unsigned int dec10_ind(CPUCRISState *env, DisasContext *dc)
198
t_gen_mov_env_TN(trap_vector, c);
199
tcg_temp_free(c);
200
t_gen_raise_exception(EXCP_BREAK);
201
- dc->is_jmp = DISAS_UPDATE;
202
+ dc->base.is_jmp = DISAS_UPDATE;
203
return insn_len;
204
}
205
LOG_DIS("%d: jump.%d %d r%d r%d\n", __LINE__, size,
206
@@ -XXX,XX +XXX,XX @@ static unsigned int crisv10_decoder(CPUCRISState *env, DisasContext *dc)
207
if (dc->clear_prefix && dc->tb_flags & PFIX_FLAG) {
208
dc->tb_flags &= ~PFIX_FLAG;
209
tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~PFIX_FLAG);
210
- if (dc->tb_flags != dc->tb->flags) {
211
+ if (dc->tb_flags != dc->base.tb->flags) {
212
dc->cpustate_changed = 1;
213
}
46
}
214
}
47
}
48
- return false;
49
+
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
51
}
52
53
static bool fold_mul(OptContext *ctx, TCGOp *op)
215
--
54
--
216
2.25.1
55
2.43.0
217
218
diff view generated by jsdifflib
1
For INDEX_op_bswap32_i32, pass 0 for flags: input not zero-extended,
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
output does not need extension within the host 64-bit register.
3
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
3
---
7
tcg/ppc/tcg-target.c.inc | 22 ++++++++++++++++------
4
tcg/optimize.c | 6 +++---
8
1 file changed, 16 insertions(+), 6 deletions(-)
5
1 file changed, 3 insertions(+), 3 deletions(-)
9
6
10
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.c.inc
9
--- a/tcg/optimize.c
13
+++ b/tcg/ppc/tcg-target.c.inc
10
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
11
@@ -XXX,XX +XXX,XX @@ static bool fold_mul(OptContext *ctx, TCGOp *op)
15
tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
12
fold_xi_to_x(ctx, op, 1)) {
13
return true;
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
16
}
17
}
17
18
18
-static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src)
19
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
19
+static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
20
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
20
{
21
fold_xi_to_i(ctx, op, 0)) {
21
TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
22
return true;
22
23
}
23
@@ -XXX,XX +XXX,XX @@ static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src)
24
- return false;
24
/* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */
25
+ return finish_folding(ctx, op);
25
tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
26
27
- tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
28
+ if (flags & TCG_BSWAP_OS) {
29
+ tcg_out_ext16s(s, dst, tmp);
30
+ } else {
31
+ tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
32
+ }
33
}
26
}
34
27
35
-static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src)
28
static bool fold_multiply2(OptContext *ctx, TCGOp *op)
36
+static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
29
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
37
{
30
tcg_opt_gen_movi(ctx, op2, rh, h);
38
TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
31
return true;
39
32
}
40
@@ -XXX,XX +XXX,XX @@ static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src)
33
- return false;
41
/* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */
34
+ return finish_folding(ctx, op);
42
tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
43
44
- tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
45
+ if (flags & TCG_BSWAP_OS) {
46
+ tcg_out_ext32s(s, dst, tmp);
47
+ } else {
48
+ tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
49
+ }
50
}
35
}
51
36
52
static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
37
static bool fold_nand(OptContext *ctx, TCGOp *op)
53
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
54
55
case INDEX_op_bswap16_i32:
56
case INDEX_op_bswap16_i64:
57
- tcg_out_bswap16(s, args[0], args[1]);
58
+ tcg_out_bswap16(s, args[0], args[1], args[2]);
59
break;
60
case INDEX_op_bswap32_i32:
61
+ tcg_out_bswap32(s, args[0], args[1], 0);
62
+ break;
63
case INDEX_op_bswap32_i64:
64
- tcg_out_bswap32(s, args[0], args[1]);
65
+ tcg_out_bswap32(s, args[0], args[1], args[2]);
66
break;
67
case INDEX_op_bswap64_i64:
68
tcg_out_bswap64(s, args[0], args[1]);
69
--
38
--
70
2.25.1
39
2.43.0
71
72
diff view generated by jsdifflib
1
The new bswap flags can implement the semantics exactly.
1
Avoid the use of the OptContext slots.
2
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
target/arm/translate.c | 4 +---
6
tcg/optimize.c | 8 +++++---
8
1 file changed, 1 insertion(+), 3 deletions(-)
7
1 file changed, 5 insertions(+), 3 deletions(-)
9
8
10
diff --git a/target/arm/translate.c b/target/arm/translate.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
12
--- a/target/arm/translate.c
11
--- a/tcg/optimize.c
13
+++ b/target/arm/translate.c
12
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
13
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
15
/* Byteswap low halfword and sign extend. */
14
16
static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
15
static bool fold_nand(OptContext *ctx, TCGOp *op)
17
{
16
{
18
- tcg_gen_ext16u_i32(var, var);
17
+ uint64_t s_mask;
19
- tcg_gen_bswap16_i32(var, var, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
18
+
20
- tcg_gen_ext16s_i32(dest, var);
19
if (fold_const2_commutative(ctx, op) ||
21
+ tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
20
fold_xi_to_not(ctx, op, -1)) {
21
return true;
22
}
23
24
- ctx->s_mask = arg_info(op->args[1])->s_mask
25
- & arg_info(op->args[2])->s_mask;
26
- return false;
27
+ s_mask = arg_info(op->args[1])->s_mask
28
+ & arg_info(op->args[2])->s_mask;
29
+ return fold_masks_s(ctx, op, s_mask);
22
}
30
}
23
31
24
/* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
32
static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
25
--
33
--
26
2.25.1
34
2.43.0
27
28
diff view generated by jsdifflib
1
Do not skip the page check for user-only -- mmap/mprotect can
1
Avoid the use of the OptContext slots.
2
still change page mappings. Only check dc->base.pc_first, not
3
dc->ppc -- the start page is the only one that's relevant.
4
2
5
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
target/cris/translate.c | 9 ++-------
6
tcg/optimize.c | 9 ++-------
10
1 file changed, 2 insertions(+), 7 deletions(-)
7
1 file changed, 2 insertions(+), 7 deletions(-)
11
8
12
diff --git a/target/cris/translate.c b/target/cris/translate.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/target/cris/translate.c
11
--- a/tcg/optimize.c
15
+++ b/target/cris/translate.c
12
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static void t_gen_cc_jmp(TCGv pc_true, TCGv pc_false)
13
@@ -XXX,XX +XXX,XX @@ static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
17
gen_set_label(l1);
14
{
15
/* Set to 1 all bits to the left of the rightmost. */
16
uint64_t z_mask = arg_info(op->args[1])->z_mask;
17
- ctx->z_mask = -(z_mask & -z_mask);
18
+ z_mask = -(z_mask & -z_mask);
19
20
- /*
21
- * Because of fold_sub_to_neg, we want to always return true,
22
- * via finish_folding.
23
- */
24
- finish_folding(ctx, op);
25
- return true;
26
+ return fold_masks_z(ctx, op, z_mask);
18
}
27
}
19
28
20
-static inline bool use_goto_tb(DisasContext *dc, target_ulong dest)
29
static bool fold_neg(OptContext *ctx, TCGOp *op)
21
+static bool use_goto_tb(DisasContext *dc, target_ulong dest)
22
{
23
-#ifndef CONFIG_USER_ONLY
24
- return (dc->base.pc_first & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
25
- (dc->ppc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
26
-#else
27
- return true;
28
-#endif
29
+ return ((dest ^ dc->base.pc_first) & TARGET_PAGE_MASK) == 0;
30
}
31
32
static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
33
--
30
--
34
2.25.1
31
2.43.0
35
36
diff view generated by jsdifflib
1
We can eliminate the requirement for a zero-extended output,
1
Avoid the use of the OptContext slots.
2
because the following store will ignore any garbage high bits.
3
2
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/arm/translate-a64.c | 6 ++----
6
tcg/optimize.c | 8 +++++---
9
1 file changed, 2 insertions(+), 4 deletions(-)
7
1 file changed, 5 insertions(+), 3 deletions(-)
10
8
11
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-a64.c
11
--- a/tcg/optimize.c
14
+++ b/target/arm/translate-a64.c
12
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void handle_rev(DisasContext *s, int opcode, bool u,
13
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
16
read_vec_element(s, tcg_tmp, rn, i, grp_size);
14
17
switch (grp_size) {
15
static bool fold_nor(OptContext *ctx, TCGOp *op)
18
case MO_16:
16
{
19
- tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp,
17
+ uint64_t s_mask;
20
- TCG_BSWAP_IZ | TCG_BSWAP_OZ);
18
+
21
+ tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
19
if (fold_const2_commutative(ctx, op) ||
22
break;
20
fold_xi_to_not(ctx, op, 0)) {
23
case MO_32:
21
return true;
24
- tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp,
22
}
25
- TCG_BSWAP_IZ | TCG_BSWAP_OZ);
23
26
+ tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
24
- ctx->s_mask = arg_info(op->args[1])->s_mask
27
break;
25
- & arg_info(op->args[2])->s_mask;
28
case MO_64:
26
- return false;
29
tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
27
+ s_mask = arg_info(op->args[1])->s_mask
28
+ & arg_info(op->args[2])->s_mask;
29
+ return fold_masks_s(ctx, op, s_mask);
30
}
31
32
static bool fold_not(OptContext *ctx, TCGOp *op)
30
--
33
--
31
2.25.1
34
2.43.0
32
33
diff view generated by jsdifflib
1
For the sf version, we are performing two 32-bit bswaps
1
Avoid the use of the OptContext slots.
2
in either half of the register. This is equivalent to
3
performing one 64-bit bswap followed by a rotate.
4
2
5
For the non-sf version, we can remove TCG_BSWAP_IZ
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
and the preceding zero-extension.
7
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
5
---
12
target/arm/translate-a64.c | 17 ++++-------------
6
tcg/optimize.c | 7 +------
13
1 file changed, 4 insertions(+), 13 deletions(-)
7
1 file changed, 1 insertion(+), 6 deletions(-)
14
8
15
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
16
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
17
--- a/target/arm/translate-a64.c
11
--- a/tcg/optimize.c
18
+++ b/target/arm/translate-a64.c
12
+++ b/tcg/optimize.c
19
@@ -XXX,XX +XXX,XX @@ static void handle_rev32(DisasContext *s, unsigned int sf,
13
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
20
unsigned int rn, unsigned int rd)
14
if (fold_const1(ctx, op)) {
21
{
15
return true;
22
TCGv_i64 tcg_rd = cpu_reg(s, rd);
16
}
23
+ TCGv_i64 tcg_rn = cpu_reg(s, rn);
24
25
if (sf) {
26
- TCGv_i64 tcg_tmp = tcg_temp_new_i64();
27
- TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
28
-
17
-
29
- /* bswap32_i64 requires zero high word */
18
- ctx->s_mask = arg_info(op->args[1])->s_mask;
30
- tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
31
- tcg_gen_bswap32_i64(tcg_rd, tcg_tmp, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
32
- tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
33
- tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
34
- tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
35
-
19
-
36
- tcg_temp_free_i64(tcg_tmp);
20
- /* Because of fold_to_not, we want to always return true, via finish. */
37
+ tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
21
- finish_folding(ctx, op);
38
+ tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
22
- return true;
39
} else {
23
+ return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask);
40
- tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
41
- tcg_gen_bswap32_i64(tcg_rd, tcg_rd, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
42
+ tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
43
}
44
}
24
}
45
25
26
static bool fold_or(OptContext *ctx, TCGOp *op)
46
--
27
--
47
2.25.1
28
2.43.0
48
49
diff view generated by jsdifflib
1
By removing TCG_BSWAP_IZ we indicate that the input is
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
not zero-extended, and thus can remove an explicit extend.
3
By removing TCG_BSWAP_OZ, we allow the implementation to
4
leave high bits set, which will be ignored by the store.
5
2
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
tcg/tcg-op.c | 9 +++------
6
tcg/optimize.c | 13 ++++++++-----
10
1 file changed, 3 insertions(+), 6 deletions(-)
7
1 file changed, 8 insertions(+), 5 deletions(-)
11
8
12
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/tcg-op.c
11
--- a/tcg/optimize.c
15
+++ b/tcg/tcg-op.c
12
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
13
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
17
swap = tcg_temp_new_i32();
14
18
switch (memop & MO_SIZE) {
15
static bool fold_or(OptContext *ctx, TCGOp *op)
19
case MO_16:
16
{
20
- tcg_gen_ext16u_i32(swap, val);
17
+ uint64_t z_mask, s_mask;
21
- tcg_gen_bswap16_i32(swap, swap, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
18
+ TempOptInfo *t1, *t2;
22
+ tcg_gen_bswap16_i32(swap, val, 0);
19
+
23
break;
20
if (fold_const2_commutative(ctx, op) ||
24
case MO_32:
21
fold_xi_to_x(ctx, op, 0) ||
25
tcg_gen_bswap32_i32(swap, val);
22
fold_xx_to_x(ctx, op)) {
26
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
23
return true;
27
swap = tcg_temp_new_i64();
24
}
28
switch (memop & MO_SIZE) {
25
29
case MO_16:
26
- ctx->z_mask = arg_info(op->args[1])->z_mask
30
- tcg_gen_ext16u_i64(swap, val);
27
- | arg_info(op->args[2])->z_mask;
31
- tcg_gen_bswap16_i64(swap, swap, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
28
- ctx->s_mask = arg_info(op->args[1])->s_mask
32
+ tcg_gen_bswap16_i64(swap, val, 0);
29
- & arg_info(op->args[2])->s_mask;
33
break;
30
- return fold_masks(ctx, op);
34
case MO_32:
31
+ t1 = arg_info(op->args[1]);
35
- tcg_gen_ext32u_i64(swap, val);
32
+ t2 = arg_info(op->args[2]);
36
- tcg_gen_bswap32_i64(swap, swap, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
33
+ z_mask = t1->z_mask | t2->z_mask;
37
+ tcg_gen_bswap32_i64(swap, val, 0);
34
+ s_mask = t1->s_mask & t2->s_mask;
38
break;
35
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
39
case MO_64:
36
}
40
tcg_gen_bswap64_i64(swap, val);
37
38
static bool fold_orc(OptContext *ctx, TCGOp *op)
41
--
39
--
42
2.25.1
40
2.43.0
43
44
diff view generated by jsdifflib
1
We always know the exact value of X, that's all that matters.
1
Avoid the use of the OptContext slots.
2
This avoids splitting the TB e.g. between "ax" and "addq".
3
2
4
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/cris/translate.c | 3 ---
6
tcg/optimize.c | 8 +++++---
9
1 file changed, 3 deletions(-)
7
1 file changed, 5 insertions(+), 3 deletions(-)
10
8
11
diff --git a/target/cris/translate.c b/target/cris/translate.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/target/cris/translate.c
11
--- a/tcg/optimize.c
14
+++ b/target/cris/translate.c
12
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void cris_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
13
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
16
cris_clear_x_flag(dc);
14
15
static bool fold_orc(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t s_mask;
18
+
19
if (fold_const2(ctx, op) ||
20
fold_xx_to_i(ctx, op, -1) ||
21
fold_xi_to_x(ctx, op, -1) ||
22
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
23
return true;
17
}
24
}
18
25
19
- /* Fold unhandled changes to X_FLAG into cpustate_changed. */
26
- ctx->s_mask = arg_info(op->args[1])->s_mask
20
- dc->cpustate_changed |= dc->flags_x != (dc->base.tb->flags & X_FLAG);
27
- & arg_info(op->args[2])->s_mask;
21
-
28
- return false;
22
/*
29
+ s_mask = arg_info(op->args[1])->s_mask
23
* All branches are delayed branches, handled immediately below.
30
+ & arg_info(op->args[2])->s_mask;
24
* We don't expect to see odd combinations of exit conditions.
31
+ return fold_masks_s(ctx, op, s_mask);
32
}
33
34
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
25
--
35
--
26
2.25.1
36
2.43.0
27
28
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
Avoid the use of the OptContext slots.
2
2
3
Implement tcg_gen_vec_add{sub}8_tl by adding corresponging i32 OP.
3
Be careful not to call fold_masks_zs when the memory operation
4
is wide enough to require multiple outputs, so split into two
5
functions: fold_qemu_ld_1reg and fold_qemu_ld_2reg.
4
6
5
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
7
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Message-Id: <20210624105023.3852-3-zhiwei_liu@c-sky.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
---
9
include/tcg/tcg-op-gvec.h | 6 ++++++
10
tcg/optimize.c | 26 +++++++++++++++++++++-----
10
tcg/tcg-op-gvec.c | 38 ++++++++++++++++++++++++++++++++++++++
11
1 file changed, 21 insertions(+), 5 deletions(-)
11
2 files changed, 44 insertions(+)
12
12
13
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
13
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-op-gvec.h
15
--- a/tcg/optimize.c
16
+++ b/include/tcg/tcg-op-gvec.h
16
+++ b/tcg/optimize.c
17
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
17
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
18
void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
18
return fold_masks_s(ctx, op, s_mask);
19
20
/* 32-bit vector operations. */
21
+void tcg_gen_vec_add8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
22
void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
23
24
+void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
25
void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
26
27
#if TARGET_LONG_BITS == 64
28
+#define tcg_gen_vec_add8_tl tcg_gen_vec_add8_i64
29
+#define tcg_gen_vec_sub8_tl tcg_gen_vec_sub8_i64
30
#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
31
#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
32
#else
33
+#define tcg_gen_vec_add8_tl tcg_gen_vec_add8_i32
34
+#define tcg_gen_vec_sub8_tl tcg_gen_vec_sub8_i32
35
#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
36
#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
37
#endif
38
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/tcg/tcg-op-gvec.c
41
+++ b/tcg/tcg-op-gvec.c
42
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
43
gen_addv_mask(d, a, b, m);
44
}
19
}
45
20
46
+void tcg_gen_vec_add8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
21
-static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
47
+{
22
+static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
48
+ TCGv_i32 m = tcg_constant_i32((int32_t)dup_const(MO_8, 0x80));
23
{
49
+ TCGv_i32 t1 = tcg_temp_new_i32();
24
const TCGOpDef *def = &tcg_op_defs[op->opc];
50
+ TCGv_i32 t2 = tcg_temp_new_i32();
25
MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
51
+ TCGv_i32 t3 = tcg_temp_new_i32();
26
MemOp mop = get_memop(oi);
27
int width = 8 * memop_size(mop);
28
+ uint64_t z_mask = -1, s_mask = 0;
29
30
if (width < 64) {
31
if (mop & MO_SIGN) {
32
- ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
33
+ s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
34
} else {
35
- ctx->z_mask = MAKE_64BIT_MASK(0, width);
36
+ z_mask = MAKE_64BIT_MASK(0, width);
37
}
38
}
39
40
/* Opcodes that touch guest memory stop the mb optimization. */
41
ctx->prev_mb = NULL;
42
- return false;
52
+
43
+
53
+ tcg_gen_andc_i32(t1, a, m);
44
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
54
+ tcg_gen_andc_i32(t2, b, m);
55
+ tcg_gen_xor_i32(t3, a, b);
56
+ tcg_gen_add_i32(d, t1, t2);
57
+ tcg_gen_and_i32(t3, t3, m);
58
+ tcg_gen_xor_i32(d, d, t3);
59
+
60
+ tcg_temp_free_i32(t1);
61
+ tcg_temp_free_i32(t2);
62
+ tcg_temp_free_i32(t3);
63
+}
45
+}
64
+
46
+
65
void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
47
+static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
66
{
48
+{
67
TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
49
+ /* Opcodes that touch guest memory stop the mb optimization. */
68
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
50
+ ctx->prev_mb = NULL;
69
gen_subv_mask(d, a, b, m);
51
+ return finish_folding(ctx, op);
70
}
52
}
71
53
72
+void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
54
static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
73
+{
55
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
74
+ TCGv_i32 m = tcg_constant_i32((int32_t)dup_const(MO_8, 0x80));
56
break;
75
+ TCGv_i32 t1 = tcg_temp_new_i32();
57
case INDEX_op_qemu_ld_a32_i32:
76
+ TCGv_i32 t2 = tcg_temp_new_i32();
58
case INDEX_op_qemu_ld_a64_i32:
77
+ TCGv_i32 t3 = tcg_temp_new_i32();
59
+ done = fold_qemu_ld_1reg(&ctx, op);
78
+
60
+ break;
79
+ tcg_gen_or_i32(t1, a, m);
61
case INDEX_op_qemu_ld_a32_i64:
80
+ tcg_gen_andc_i32(t2, b, m);
62
case INDEX_op_qemu_ld_a64_i64:
81
+ tcg_gen_eqv_i32(t3, a, b);
63
+ if (TCG_TARGET_REG_BITS == 64) {
82
+ tcg_gen_sub_i32(d, t1, t2);
64
+ done = fold_qemu_ld_1reg(&ctx, op);
83
+ tcg_gen_and_i32(t3, t3, m);
65
+ break;
84
+ tcg_gen_xor_i32(d, d, t3);
66
+ }
85
+
67
+ QEMU_FALLTHROUGH;
86
+ tcg_temp_free_i32(t1);
68
case INDEX_op_qemu_ld_a32_i128:
87
+ tcg_temp_free_i32(t2);
69
case INDEX_op_qemu_ld_a64_i128:
88
+ tcg_temp_free_i32(t3);
70
- done = fold_qemu_ld(&ctx, op);
89
+}
71
+ done = fold_qemu_ld_2reg(&ctx, op);
90
+
72
break;
91
void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
73
case INDEX_op_qemu_st8_a32_i32:
92
{
74
case INDEX_op_qemu_st8_a64_i32:
93
TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
94
--
75
--
95
2.25.1
76
2.43.0
96
97
diff view generated by jsdifflib
1
Migrate the is_jmp, tb and singlestep_enabled fields from
1
Stores have no output operands, and so need no further work.
2
DisasContext into the base. Use pc_first instead of tb->pc.
3
Increment pc_next prior to decode, leaving the address of
4
the current insn in dc->pc.
5
2
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
target/nios2/translate.c | 70 +++++++++++++++++++++-------------------
6
tcg/optimize.c | 11 +++++------
10
1 file changed, 36 insertions(+), 34 deletions(-)
7
1 file changed, 5 insertions(+), 6 deletions(-)
11
8
12
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/target/nios2/translate.c
11
--- a/tcg/optimize.c
15
+++ b/target/nios2/translate.c
12
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
14
{
15
/* Opcodes that touch guest memory stop the mb optimization. */
16
ctx->prev_mb = NULL;
17
- return false;
18
+ return true;
19
}
20
21
static bool fold_remainder(OptContext *ctx, TCGOp *op)
22
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
23
24
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
25
remove_mem_copy_all(ctx);
26
- return false;
27
+ return true;
17
}
28
}
18
29
19
typedef struct DisasContext {
30
switch (op->opc) {
20
+ DisasContextBase base;
31
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
21
TCGv_i32 zero;
32
g_assert_not_reached();
22
- int is_jmp;
33
}
23
target_ulong pc;
34
remove_mem_copy_in(ctx, ofs, ofs + lm1);
24
- TranslationBlock *tb;
35
- return false;
25
int mem_idx;
36
+ return true;
26
- bool singlestep_enabled;
27
} DisasContext;
28
29
static TCGv cpu_R[NUM_CORE_REGS];
30
@@ -XXX,XX +XXX,XX @@ static void t_gen_helper_raise_exception(DisasContext *dc,
31
tcg_gen_movi_tl(cpu_R[R_PC], dc->pc);
32
gen_helper_raise_exception(cpu_env, tmp);
33
tcg_temp_free_i32(tmp);
34
- dc->is_jmp = DISAS_NORETURN;
35
+ dc->base.is_jmp = DISAS_NORETURN;
36
}
37
}
37
38
38
static bool use_goto_tb(DisasContext *dc, uint32_t dest)
39
static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
39
{
40
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
40
- if (unlikely(dc->singlestep_enabled)) {
41
TCGType type;
41
+ if (unlikely(dc->base.singlestep_enabled)) {
42
42
return false;
43
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
44
- fold_tcg_st(ctx, op);
45
- return false;
46
+ return fold_tcg_st(ctx, op);
43
}
47
}
44
48
45
#ifndef CONFIG_USER_ONLY
49
src = arg_temp(op->args[0]);
46
- return (dc->tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
50
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
47
+ return (dc->base.pc_first & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
51
last = ofs + tcg_type_size(type) - 1;
48
#else
52
remove_mem_copy_in(ctx, ofs, last);
49
return true;
53
record_mem_copy(ctx, type, src, ofs, last);
50
#endif
54
- return false;
51
@@ -XXX,XX +XXX,XX @@ static bool use_goto_tb(DisasContext *dc, uint32_t dest)
55
+ return true;
52
53
static void gen_goto_tb(DisasContext *dc, int n, uint32_t dest)
54
{
55
- TranslationBlock *tb = dc->tb;
56
+ const TranslationBlock *tb = dc->base.tb;
57
58
if (use_goto_tb(dc, dest)) {
59
tcg_gen_goto_tb(n);
60
@@ -XXX,XX +XXX,XX @@ static void gen_excp(DisasContext *dc, uint32_t code, uint32_t flags)
61
62
static void gen_check_supervisor(DisasContext *dc)
63
{
64
- if (dc->tb->flags & CR_STATUS_U) {
65
+ if (dc->base.tb->flags & CR_STATUS_U) {
66
/* CPU in user mode, privileged instruction called, stop. */
67
t_gen_helper_raise_exception(dc, EXCP_SUPERI);
68
}
69
@@ -XXX,XX +XXX,XX @@ static void jmpi(DisasContext *dc, uint32_t code, uint32_t flags)
70
{
71
J_TYPE(instr, code);
72
gen_goto_tb(dc, 0, (dc->pc & 0xF0000000) | (instr.imm26 << 2));
73
- dc->is_jmp = DISAS_NORETURN;
74
+ dc->base.is_jmp = DISAS_NORETURN;
75
}
56
}
76
57
77
static void call(DisasContext *dc, uint32_t code, uint32_t flags)
58
static bool fold_xor(OptContext *ctx, TCGOp *op)
78
@@ -XXX,XX +XXX,XX @@ static void br(DisasContext *dc, uint32_t code, uint32_t flags)
79
I_TYPE(instr, code);
80
81
gen_goto_tb(dc, 0, dc->pc + 4 + (instr.imm16.s & -4));
82
- dc->is_jmp = DISAS_NORETURN;
83
+ dc->base.is_jmp = DISAS_NORETURN;
84
}
85
86
static void gen_bxx(DisasContext *dc, uint32_t code, uint32_t flags)
87
@@ -XXX,XX +XXX,XX @@ static void gen_bxx(DisasContext *dc, uint32_t code, uint32_t flags)
88
gen_goto_tb(dc, 0, dc->pc + 4);
89
gen_set_label(l1);
90
gen_goto_tb(dc, 1, dc->pc + 4 + (instr.imm16.s & -4));
91
- dc->is_jmp = DISAS_NORETURN;
92
+ dc->base.is_jmp = DISAS_NORETURN;
93
}
94
95
/* Comparison instructions */
96
@@ -XXX,XX +XXX,XX @@ static void eret(DisasContext *dc, uint32_t code, uint32_t flags)
97
tcg_gen_mov_tl(cpu_R[CR_STATUS], cpu_R[CR_ESTATUS]);
98
tcg_gen_mov_tl(cpu_R[R_PC], cpu_R[R_EA]);
99
100
- dc->is_jmp = DISAS_JUMP;
101
+ dc->base.is_jmp = DISAS_JUMP;
102
}
103
104
/* PC <- ra */
105
@@ -XXX,XX +XXX,XX @@ static void ret(DisasContext *dc, uint32_t code, uint32_t flags)
106
{
107
tcg_gen_mov_tl(cpu_R[R_PC], cpu_R[R_RA]);
108
109
- dc->is_jmp = DISAS_JUMP;
110
+ dc->base.is_jmp = DISAS_JUMP;
111
}
112
113
/* PC <- ba */
114
@@ -XXX,XX +XXX,XX @@ static void bret(DisasContext *dc, uint32_t code, uint32_t flags)
115
{
116
tcg_gen_mov_tl(cpu_R[R_PC], cpu_R[R_BA]);
117
118
- dc->is_jmp = DISAS_JUMP;
119
+ dc->base.is_jmp = DISAS_JUMP;
120
}
121
122
/* PC <- rA */
123
@@ -XXX,XX +XXX,XX @@ static void jmp(DisasContext *dc, uint32_t code, uint32_t flags)
124
125
tcg_gen_mov_tl(cpu_R[R_PC], load_gpr(dc, instr.a));
126
127
- dc->is_jmp = DISAS_JUMP;
128
+ dc->base.is_jmp = DISAS_JUMP;
129
}
130
131
/* rC <- PC + 4 */
132
@@ -XXX,XX +XXX,XX @@ static void callr(DisasContext *dc, uint32_t code, uint32_t flags)
133
tcg_gen_mov_tl(cpu_R[R_PC], load_gpr(dc, instr.a));
134
tcg_gen_movi_tl(cpu_R[R_RA], dc->pc + 4);
135
136
- dc->is_jmp = DISAS_JUMP;
137
+ dc->base.is_jmp = DISAS_JUMP;
138
}
139
140
/* rC <- ctlN */
141
@@ -XXX,XX +XXX,XX @@ static void wrctl(DisasContext *dc, uint32_t code, uint32_t flags)
142
/* If interrupts were enabled using WRCTL, trigger them. */
143
#if !defined(CONFIG_USER_ONLY)
144
if ((instr.imm5 + CR_BASE) == CR_STATUS) {
145
- if (tb_cflags(dc->tb) & CF_USE_ICOUNT) {
146
+ if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) {
147
gen_io_start();
148
}
149
gen_helper_check_interrupts(cpu_env);
150
- dc->is_jmp = DISAS_UPDATE;
151
+ dc->base.is_jmp = DISAS_UPDATE;
152
}
153
#endif
154
}
155
@@ -XXX,XX +XXX,XX @@ static void gen_exception(DisasContext *dc, uint32_t excp)
156
tcg_gen_movi_tl(cpu_R[R_PC], dc->pc);
157
gen_helper_raise_exception(cpu_env, tmp);
158
tcg_temp_free_i32(tmp);
159
- dc->is_jmp = DISAS_NORETURN;
160
+ dc->base.is_jmp = DISAS_NORETURN;
161
}
162
163
/* generate intermediate code for basic block 'tb'. */
164
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
165
int num_insns;
166
167
/* Initialize DC */
168
- dc->is_jmp = DISAS_NEXT;
169
- dc->pc = tb->pc;
170
- dc->tb = tb;
171
+
172
+ dc->base.tb = tb;
173
+ dc->base.singlestep_enabled = cs->singlestep_enabled;
174
+ dc->base.is_jmp = DISAS_NEXT;
175
+ dc->base.pc_first = tb->pc;
176
+ dc->base.pc_next = tb->pc;
177
+
178
dc->mem_idx = cpu_mmu_index(env, false);
179
- dc->singlestep_enabled = cs->singlestep_enabled;
180
181
/* Set up instruction counts */
182
num_insns = 0;
183
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
184
185
gen_tb_start(tb);
186
do {
187
- tcg_gen_insn_start(dc->pc);
188
+ tcg_gen_insn_start(dc->base.pc_next);
189
num_insns++;
190
191
- if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
192
+ if (unlikely(cpu_breakpoint_test(cs, dc->base.pc_next, BP_ANY))) {
193
gen_exception(dc, EXCP_DEBUG);
194
/* The address covered by the breakpoint must be included in
195
[tb->pc, tb->pc + tb->size) in order to for it to be
196
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
197
gen_io_start();
198
}
199
200
+ dc->pc = dc->base.pc_next;
201
+ dc->base.pc_next += 4;
202
+
203
/* Decode an instruction */
204
handle_instruction(dc, env);
205
206
- dc->pc += 4;
207
-
208
/* Translation stops when a conditional branch is encountered.
209
* Otherwise the subsequent code could get translated several times.
210
* Also stop translation when a page boundary is reached. This
211
* ensures prefetch aborts occur at the right place. */
212
- } while (!dc->is_jmp &&
213
+ } while (!dc->base.is_jmp &&
214
!tcg_op_buf_full() &&
215
num_insns < max_insns);
216
217
/* Indicate where the next block should start */
218
- switch (dc->is_jmp) {
219
+ switch (dc->base.is_jmp) {
220
case DISAS_NEXT:
221
case DISAS_UPDATE:
222
/* Save the current PC back into the CPU register */
223
- tcg_gen_movi_tl(cpu_R[R_PC], dc->pc);
224
+ tcg_gen_movi_tl(cpu_R[R_PC], dc->base.pc_next);
225
tcg_gen_exit_tb(NULL, 0);
226
break;
227
228
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
229
gen_tb_end(tb, num_insns);
230
231
/* Mark instruction starts for the final generated instruction */
232
- tb->size = dc->pc - tb->pc;
233
+ tb->size = dc->base.pc_next - dc->base.pc_first;
234
tb->icount = num_insns;
235
236
#ifdef DEBUG_DISAS
237
if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
238
- && qemu_log_in_addr_range(tb->pc)) {
239
+ && qemu_log_in_addr_range(dc->base.pc_first)) {
240
FILE *logfile = qemu_log_lock();
241
- qemu_log("IN: %s\n", lookup_symbol(tb->pc));
242
- log_target_disas(cs, tb->pc, dc->pc - tb->pc);
243
+ qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
244
+ log_target_disas(cs, tb->pc, tb->size);
245
qemu_log("\n");
246
qemu_log_unlock(logfile);
247
}
248
--
59
--
249
2.25.1
60
2.43.0
250
251
diff view generated by jsdifflib
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
3
---
4
tcg/ppc/tcg-target.c.inc | 64 +++++++++++++++++++++-------------------
4
tcg/optimize.c | 2 +-
5
1 file changed, 34 insertions(+), 30 deletions(-)
5
1 file changed, 1 insertion(+), 1 deletion(-)
6
6
7
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/ppc/tcg-target.c.inc
9
--- a/tcg/optimize.c
10
+++ b/tcg/ppc/tcg-target.c.inc
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src)
11
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
12
tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
12
fold_xx_to_i(ctx, op, 0)) {
13
return true;
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
13
}
17
}
14
18
15
+static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
19
static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
16
+{
17
+ TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
18
+ TCGReg t1 = dst == src ? dst : TCG_REG_R0;
19
+
20
+ /*
21
+ * In the following,
22
+ * dep(a, b, m) -> (a & ~m) | (b & m)
23
+ *
24
+ * Begin with: src = abcdefgh
25
+ */
26
+ /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */
27
+ tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
28
+ /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */
29
+ tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
30
+ /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */
31
+ tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
32
+
33
+ /* t0 = rol64(t0, 32) = hgfe0000 */
34
+ tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
35
+ /* t1 = rol64(src, 32) = efghabcd */
36
+ tcg_out_rld(s, RLDICL, t1, src, 32, 0);
37
+
38
+ /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */
39
+ tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
40
+ /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */
41
+ tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
42
+ /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */
43
+ tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
44
+
45
+ tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
46
+}
47
+
48
/* Emit a move into ret of arg, if it can be done in one insn. */
49
static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
50
{
51
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
52
case INDEX_op_bswap32_i64:
53
tcg_out_bswap32(s, args[0], args[1]);
54
break;
55
-
56
case INDEX_op_bswap64_i64:
57
- a0 = args[0], a1 = args[1], a2 = TCG_REG_R0;
58
- if (a0 == a1) {
59
- a0 = TCG_REG_R0;
60
- a2 = a1;
61
- }
62
-
63
- /* a1 = # abcd efgh */
64
- /* a0 = rl32(a1, 8) # 0000 fghe */
65
- tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
66
- /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */
67
- tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
68
- /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */
69
- tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
70
-
71
- /* a0 = rl64(a0, 32) # hgfe 0000 */
72
- /* a2 = rl64(a1, 32) # efgh abcd */
73
- tcg_out_rld(s, RLDICL, a0, a0, 32, 0);
74
- tcg_out_rld(s, RLDICL, a2, a1, 32, 0);
75
-
76
- /* a0 = dep(a0, rl32(a2, 8), 0xffffffff) # hgfe bcda */
77
- tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31);
78
- /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */
79
- tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7);
80
- /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */
81
- tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23);
82
-
83
- if (a0 == 0) {
84
- tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
85
- }
86
+ tcg_out_bswap64(s, args[0], args[1]);
87
break;
88
89
case INDEX_op_deposit_i32:
90
--
20
--
91
2.25.1
21
2.43.0
92
93
diff view generated by jsdifflib
1
Ever since 2a44f7f17364, flagx_known is always true.
1
Change return from bool to int; distinguish between
2
Fold away all of the tests against the flag.
2
complete folding, simplification, and no change.
3
3
4
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
target/cris/translate.c | 99 ++++++++-------------------------
7
tcg/optimize.c | 22 ++++++++++++++--------
9
target/cris/translate_v10.c.inc | 6 +-
8
1 file changed, 14 insertions(+), 8 deletions(-)
10
2 files changed, 24 insertions(+), 81 deletions(-)
11
9
12
diff --git a/target/cris/translate.c b/target/cris/translate.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/target/cris/translate.c
12
--- a/tcg/optimize.c
15
+++ b/target/cris/translate.c
13
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
14
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
17
15
return finish_folding(ctx, op);
18
int cc_x_uptodate; /* 1 - ccs, 2 - known | X_FLAG. 0 not up-to-date. */
16
}
19
int flags_uptodate; /* Whether or not $ccs is up-to-date. */
17
20
- int flagx_known; /* Whether or not flags_x has the x flag known at
18
-static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
21
- translation time. */
19
+/* Return 1 if finished, -1 if simplified, 0 if unchanged. */
22
int flags_x;
20
+static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
23
24
int clear_x; /* Clear x after this insn? */
25
@@ -XXX,XX +XXX,XX @@ static inline void t_gen_add_flag(TCGv d, int flag)
26
27
static inline void t_gen_addx_carry(DisasContext *dc, TCGv d)
28
{
21
{
29
- if (dc->flagx_known) {
22
uint64_t a_zmask, b_val;
30
- if (dc->flags_x) {
23
TCGCond cond;
31
- TCGv c;
24
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
32
-
25
op->opc = xor_opc;
33
- c = tcg_temp_new();
26
op->args[2] = arg_new_constant(ctx, 1);
34
- t_gen_mov_TN_preg(c, PR_CCS);
27
}
35
- /* C flag is already at bit 0. */
28
- return false;
36
- tcg_gen_andi_tl(c, c, C_FLAG);
29
+ return -1;
37
- tcg_gen_add_tl(d, d, c);
30
}
38
- tcg_temp_free(c);
39
- }
40
- } else {
41
- TCGv x, c;
42
+ if (dc->flags_x) {
43
+ TCGv c = tcg_temp_new();
44
45
- x = tcg_temp_new();
46
- c = tcg_temp_new();
47
- t_gen_mov_TN_preg(x, PR_CCS);
48
- tcg_gen_mov_tl(c, x);
49
-
50
- /* Propagate carry into d if X is set. Branch free. */
51
+ t_gen_mov_TN_preg(c, PR_CCS);
52
+ /* C flag is already at bit 0. */
53
tcg_gen_andi_tl(c, c, C_FLAG);
54
- tcg_gen_andi_tl(x, x, X_FLAG);
55
- tcg_gen_shri_tl(x, x, 4);
56
-
57
- tcg_gen_and_tl(x, x, c);
58
- tcg_gen_add_tl(d, d, x);
59
- tcg_temp_free(x);
60
+ tcg_gen_add_tl(d, d, c);
61
tcg_temp_free(c);
62
}
63
}
64
65
static inline void t_gen_subx_carry(DisasContext *dc, TCGv d)
66
{
67
- if (dc->flagx_known) {
68
- if (dc->flags_x) {
69
- TCGv c;
70
-
71
- c = tcg_temp_new();
72
- t_gen_mov_TN_preg(c, PR_CCS);
73
- /* C flag is already at bit 0. */
74
- tcg_gen_andi_tl(c, c, C_FLAG);
75
- tcg_gen_sub_tl(d, d, c);
76
- tcg_temp_free(c);
77
- }
78
- } else {
79
- TCGv x, c;
80
+ if (dc->flags_x) {
81
+ TCGv c = tcg_temp_new();
82
83
- x = tcg_temp_new();
84
- c = tcg_temp_new();
85
- t_gen_mov_TN_preg(x, PR_CCS);
86
- tcg_gen_mov_tl(c, x);
87
-
88
- /* Propagate carry into d if X is set. Branch free. */
89
+ t_gen_mov_TN_preg(c, PR_CCS);
90
+ /* C flag is already at bit 0. */
91
tcg_gen_andi_tl(c, c, C_FLAG);
92
- tcg_gen_andi_tl(x, x, X_FLAG);
93
- tcg_gen_shri_tl(x, x, 4);
94
-
95
- tcg_gen_and_tl(x, x, c);
96
- tcg_gen_sub_tl(d, d, x);
97
- tcg_temp_free(x);
98
+ tcg_gen_sub_tl(d, d, c);
99
tcg_temp_free(c);
100
}
101
}
102
@@ -XXX,XX +XXX,XX @@ static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
103
104
static inline void cris_clear_x_flag(DisasContext *dc)
105
{
106
- if (dc->flagx_known && dc->flags_x) {
107
+ if (dc->flags_x) {
108
dc->flags_uptodate = 0;
109
}
31
}
110
-
32
-
111
- dc->flagx_known = 1;
33
- return false;
112
dc->flags_x = 0;
34
+ return 0;
113
}
35
}
114
36
115
@@ -XXX,XX +XXX,XX @@ static void cris_evaluate_flags(DisasContext *dc)
37
static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
116
break;
38
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
39
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
117
}
40
}
118
41
119
- if (dc->flagx_known) {
42
- if (fold_setcond_zmask(ctx, op, false)) {
120
- if (dc->flags_x) {
43
+ i = fold_setcond_zmask(ctx, op, false);
121
- tcg_gen_ori_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], X_FLAG);
44
+ if (i > 0) {
122
- } else if (dc->cc_op == CC_OP_FLAGS) {
45
return true;
123
- tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~X_FLAG);
124
- }
125
+ if (dc->flags_x) {
126
+ tcg_gen_ori_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], X_FLAG);
127
+ } else if (dc->cc_op == CC_OP_FLAGS) {
128
+ tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~X_FLAG);
129
}
46
}
130
dc->flags_uptodate = 1;
47
- fold_setcond_tst_pow2(ctx, op, false);
131
}
48
+ if (i == 0) {
132
@@ -XXX,XX +XXX,XX @@ static void cris_update_cc_op(DisasContext *dc, int op, int size)
49
+ fold_setcond_tst_pow2(ctx, op, false);
133
static inline void cris_update_cc_x(DisasContext *dc)
50
+ }
134
{
51
135
/* Save the x flag state at the time of the cc snapshot. */
52
ctx->z_mask = 1;
136
- if (dc->flagx_known) {
53
return false;
137
- if (dc->cc_x_uptodate == (2 | dc->flags_x)) {
54
@@ -XXX,XX +XXX,XX @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
138
- return;
55
return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
139
- }
140
- tcg_gen_movi_tl(cc_x, dc->flags_x);
141
- dc->cc_x_uptodate = 2 | dc->flags_x;
142
- } else {
143
- tcg_gen_andi_tl(cc_x, cpu_PR[PR_CCS], X_FLAG);
144
- dc->cc_x_uptodate = 1;
145
+ if (dc->cc_x_uptodate == (2 | dc->flags_x)) {
146
+ return;
147
}
56
}
148
+ tcg_gen_movi_tl(cc_x, dc->flags_x);
57
149
+ dc->cc_x_uptodate = 2 | dc->flags_x;
58
- if (fold_setcond_zmask(ctx, op, true)) {
150
}
59
+ i = fold_setcond_zmask(ctx, op, true);
151
60
+ if (i > 0) {
152
/* Update cc prior to executing ALU op. Needs source operands untouched. */
61
return true;
153
@@ -XXX,XX +XXX,XX @@ static void gen_store (DisasContext *dc, TCGv addr, TCGv val,
154
155
/* Conditional writes. We only support the kind were X and P are known
156
at translation time. */
157
- if (dc->flagx_known && dc->flags_x && (dc->tb_flags & P_FLAG)) {
158
+ if (dc->flags_x && (dc->tb_flags & P_FLAG)) {
159
dc->postinc = 0;
160
cris_evaluate_flags(dc);
161
tcg_gen_ori_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], C_FLAG);
162
@@ -XXX,XX +XXX,XX @@ static void gen_store (DisasContext *dc, TCGv addr, TCGv val,
163
164
tcg_gen_qemu_st_tl(val, addr, mem_index, MO_TE + ctz32(size));
165
166
- if (dc->flagx_known && dc->flags_x) {
167
+ if (dc->flags_x) {
168
cris_evaluate_flags(dc);
169
tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~C_FLAG);
170
}
62
}
171
@@ -XXX,XX +XXX,XX @@ static int dec_addc_r(CPUCRISState *env, DisasContext *dc)
63
- fold_setcond_tst_pow2(ctx, op, true);
172
LOG_DIS("addc $r%u, $r%u\n",
64
+ if (i == 0) {
173
dc->op1, dc->op2);
65
+ fold_setcond_tst_pow2(ctx, op, true);
174
cris_evaluate_flags(dc);
66
+ }
175
+
67
176
/* Set for this insn. */
68
/* Value is {0,-1} so all bits are repetitions of the sign. */
177
- dc->flagx_known = 1;
69
ctx->s_mask = -1;
178
dc->flags_x = X_FLAG;
179
180
cris_cc_mask(dc, CC_MASK_NZVC);
181
@@ -XXX,XX +XXX,XX @@ static int dec_setclrf(CPUCRISState *env, DisasContext *dc)
182
}
183
184
if (flags & X_FLAG) {
185
- dc->flagx_known = 1;
186
if (set) {
187
dc->flags_x = X_FLAG;
188
} else {
189
@@ -XXX,XX +XXX,XX @@ static int dec_addc_mr(CPUCRISState *env, DisasContext *dc)
190
cris_evaluate_flags(dc);
191
192
/* Set for this insn. */
193
- dc->flagx_known = 1;
194
dc->flags_x = X_FLAG;
195
196
cris_alu_m_alloc_temps(t);
197
@@ -XXX,XX +XXX,XX @@ static void cris_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
198
dc->ppc = pc_start;
199
dc->pc = pc_start;
200
dc->flags_uptodate = 1;
201
- dc->flagx_known = 1;
202
dc->flags_x = tb_flags & X_FLAG;
203
dc->cc_x_uptodate = 0;
204
dc->cc_mask = 0;
205
@@ -XXX,XX +XXX,XX @@ static void cris_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
206
}
207
208
/* Fold unhandled changes to X_FLAG into cpustate_changed. */
209
- dc->cpustate_changed |= !dc->flagx_known;
210
dc->cpustate_changed |= dc->flags_x != (dc->base.tb->flags & X_FLAG);
211
212
/*
213
diff --git a/target/cris/translate_v10.c.inc b/target/cris/translate_v10.c.inc
214
index XXXXXXX..XXXXXXX 100644
215
--- a/target/cris/translate_v10.c.inc
216
+++ b/target/cris/translate_v10.c.inc
217
@@ -XXX,XX +XXX,XX @@ static void gen_store_v10(DisasContext *dc, TCGv addr, TCGv val,
218
cris_store_direct_jmp(dc);
219
}
220
221
- /* Conditional writes. We only support the kind were X is known
222
- at translation time. */
223
- if (dc->flagx_known && dc->flags_x) {
224
+ /* Conditional writes. */
225
+ if (dc->flags_x) {
226
gen_store_v10_conditional(dc, addr, val, size, mem_index);
227
return;
228
}
229
@@ -XXX,XX +XXX,XX @@ static unsigned int dec10_setclrf(DisasContext *dc)
230
231
232
if (flags & X_FLAG) {
233
- dc->flagx_known = 1;
234
if (set)
235
dc->flags_x = X_FLAG;
236
else
237
--
70
--
238
2.25.1
71
2.43.0
239
240
diff view generated by jsdifflib
1
Move this pc update into tb_stop.
1
Avoid the use of the OptContext slots.
2
We will be able to re-use this code shortly.
3
2
4
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/cris/translate.c | 20 +++++++++++++++-----
6
tcg/optimize.c | 3 +--
9
1 file changed, 15 insertions(+), 5 deletions(-)
7
1 file changed, 1 insertion(+), 2 deletions(-)
10
8
11
diff --git a/target/cris/translate.c b/target/cris/translate.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/target/cris/translate.c
11
--- a/tcg/optimize.c
14
+++ b/target/cris/translate.c
12
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
16
#define BUG() (gen_BUG(dc, __FILE__, __LINE__))
14
fold_setcond_tst_pow2(ctx, op, false);
17
#define BUG_ON(x) ({if (x) BUG();})
18
19
-/* is_jmp field values */
20
-#define DISAS_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */
21
-#define DISAS_UPDATE DISAS_TARGET_1 /* cpu state was modified dynamically */
22
+/*
23
+ * Target-specific is_jmp field values
24
+ */
25
+/* Only pc was modified dynamically */
26
+#define DISAS_JUMP DISAS_TARGET_0
27
+/* Cpu state was modified dynamically, including pc */
28
+#define DISAS_UPDATE DISAS_TARGET_1
29
+/* Cpu state was modified dynamically, excluding pc -- use npc */
30
+#define DISAS_UPDATE_NEXT DISAS_TARGET_2
31
32
/* Used by the decoder. */
33
#define EXTRACT_FIELD(src, start, end) \
34
@@ -XXX,XX +XXX,XX @@ static void cris_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
35
36
/* Force an update if the per-tb cpu state has changed. */
37
if (dc->base.is_jmp == DISAS_NEXT && dc->cpustate_changed) {
38
- dc->base.is_jmp = DISAS_UPDATE;
39
- tcg_gen_movi_tl(env_pc, dc->pc);
40
+ dc->base.is_jmp = DISAS_UPDATE_NEXT;
41
+ return;
42
}
15
}
43
16
44
/*
17
- ctx->z_mask = 1;
45
@@ -XXX,XX +XXX,XX @@ static void cris_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
18
- return false;
46
if (unlikely(dc->base.singlestep_enabled)) {
19
+ return fold_masks_z(ctx, op, 1);
47
switch (is_jmp) {
20
}
48
case DISAS_TOO_MANY:
21
49
+ case DISAS_UPDATE_NEXT:
22
static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
50
tcg_gen_movi_tl(env_pc, npc);
51
/* fall through */
52
case DISAS_JUMP:
53
@@ -XXX,XX +XXX,XX @@ static void cris_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
54
case DISAS_TOO_MANY:
55
gen_goto_tb(dc, 0, npc);
56
break;
57
+ case DISAS_UPDATE_NEXT:
58
+ tcg_gen_movi_tl(env_pc, npc);
59
+ /* fall through */
60
case DISAS_JUMP:
61
case DISAS_UPDATE:
62
/* Indicate that interupts must be re-evaluated before the next TB. */
63
--
23
--
64
2.25.1
24
2.43.0
65
66
diff view generated by jsdifflib
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
1
Avoid the use of the OptContext slots.
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
5
---
4
target/nios2/translate.c | 8 ++------
6
tcg/optimize.c | 3 +--
5
1 file changed, 2 insertions(+), 6 deletions(-)
7
1 file changed, 1 insertion(+), 2 deletions(-)
6
8
7
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
9
--- a/target/nios2/translate.c
11
--- a/tcg/optimize.c
10
+++ b/target/nios2/translate.c
12
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static void handle_instruction(DisasContext *dc, CPUNios2State *env)
13
@@ -XXX,XX +XXX,XX @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
12
op = get_opcode(code);
13
14
if (unlikely(op >= ARRAY_SIZE(i_type_instructions))) {
15
- goto illegal_op;
16
+ t_gen_helper_raise_exception(dc, EXCP_ILLEGAL);
17
+ return;
18
}
14
}
19
15
20
dc->zero = NULL;
16
/* Value is {0,-1} so all bits are repetitions of the sign. */
21
@@ -XXX,XX +XXX,XX @@ static void handle_instruction(DisasContext *dc, CPUNios2State *env)
17
- ctx->s_mask = -1;
22
if (dc->zero) {
18
- return false;
23
tcg_temp_free(dc->zero);
19
+ return fold_masks_s(ctx, op, -1);
24
}
25
-
26
- return;
27
-
28
-illegal_op:
29
- t_gen_helper_raise_exception(dc, EXCP_ILLEGAL);
30
}
20
}
31
21
32
static const char * const regnames[] = {
22
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
33
--
23
--
34
2.25.1
24
2.43.0
35
36
diff view generated by jsdifflib
1
Remove TCG_BSWAP_IZ and the preceding zero-extension.
1
Avoid the use of the OptContext slots.
2
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
5
---
6
target/sh4/translate.c | 3 +--
6
tcg/optimize.c | 3 +--
7
1 file changed, 1 insertion(+), 2 deletions(-)
7
1 file changed, 1 insertion(+), 2 deletions(-)
8
8
9
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/sh4/translate.c
11
--- a/tcg/optimize.c
12
+++ b/target/sh4/translate.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
13
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
14
case 0x6008:        /* swap.b Rm,Rn */
14
return fold_setcond(ctx, op);
15
    {
15
}
16
TCGv low = tcg_temp_new();
16
17
-     tcg_gen_ext16u_i32(low, REG(B7_4));
17
- ctx->z_mask = 1;
18
-     tcg_gen_bswap16_i32(low, low, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
18
- return false;
19
+ tcg_gen_bswap16_i32(low, REG(B7_4), 0);
19
+ return fold_masks_z(ctx, op, 1);
20
tcg_gen_deposit_i32(REG(B11_8), REG(B7_4), low, 0, 16);
20
21
     tcg_temp_free(low);
21
do_setcond_const:
22
    }
22
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
23
--
23
--
24
2.25.1
24
2.43.0
25
26
diff view generated by jsdifflib
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
3
---
4
tcg/ppc/tcg-target.c.inc | 38 ++++++++++++++++++++++----------------
4
tcg/optimize.c | 2 +-
5
1 file changed, 22 insertions(+), 16 deletions(-)
5
1 file changed, 1 insertion(+), 1 deletion(-)
6
6
7
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/ppc/tcg-target.c.inc
9
--- a/tcg/optimize.c
10
+++ b/tcg/ppc/tcg-target.c.inc
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src)
11
@@ -XXX,XX +XXX,XX @@ static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
12
tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
12
if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
13
op->args[3] = tcg_swap_cond(op->args[3]);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
13
}
17
}
14
18
15
+static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src)
19
static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
16
+{
17
+ TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
18
+
19
+ /*
20
+ * Stolen from gcc's builtin_bswap32.
21
+ * In the following,
22
+ * dep(a, b, m) -> (a & ~m) | (b & m)
23
+ *
24
+ * Begin with: src = xxxxabcd
25
+ */
26
+ /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */
27
+ tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
28
+ /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */
29
+ tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
30
+ /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */
31
+ tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
32
+
33
+ tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
34
+}
35
+
36
/* Emit a move into ret of arg, if it can be done in one insn. */
37
static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
38
{
39
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
40
case INDEX_op_bswap16_i64:
41
tcg_out_bswap16(s, args[0], args[1]);
42
break;
43
-
44
case INDEX_op_bswap32_i32:
45
case INDEX_op_bswap32_i64:
46
- /* Stolen from gcc's builtin_bswap32 */
47
- a1 = args[1];
48
- a0 = args[0] == a1 ? TCG_REG_R0 : args[0];
49
-
50
- /* a1 = args[1] # abcd */
51
- /* a0 = rotate_left (a1, 8) # bcda */
52
- tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
53
- /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */
54
- tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
55
- /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */
56
- tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
57
-
58
- if (a0 == TCG_REG_R0) {
59
- tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
60
- }
61
+ tcg_out_bswap32(s, args[0], args[1]);
62
break;
63
64
case INDEX_op_bswap64_i64:
65
--
20
--
66
2.25.1
21
2.43.0
67
68
diff view generated by jsdifflib
1
With the use of a suitable temporary, we can use the same
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
algorithm when src overlaps dst. The result is the same
3
number of instructions either way.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
tcg/ppc/tcg-target.c.inc | 34 +++++++++++++++++++---------------
4
tcg/optimize.c | 2 +-
9
1 file changed, 19 insertions(+), 15 deletions(-)
5
1 file changed, 1 insertion(+), 1 deletion(-)
10
6
11
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.c.inc
9
--- a/tcg/optimize.c
14
+++ b/tcg/ppc/tcg-target.c.inc
10
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
11
@@ -XXX,XX +XXX,XX @@ static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
16
tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
12
if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
13
op->args[5] = tcg_invert_cond(op->args[5]);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
17
}
18
18
19
+static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src)
19
static bool fold_sextract(OptContext *ctx, TCGOp *op)
20
+{
21
+ TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
22
+
23
+ /*
24
+ * In the following,
25
+ * dep(a, b, m) -> (a & ~m) | (b & m)
26
+ *
27
+ * Begin with: src = xxxxabcd
28
+ */
29
+ /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */
30
+ tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
31
+ /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */
32
+ tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
33
+
34
+ tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
35
+}
36
+
37
/* Emit a move into ret of arg, if it can be done in one insn. */
38
static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
39
{
40
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
41
42
case INDEX_op_bswap16_i32:
43
case INDEX_op_bswap16_i64:
44
- a0 = args[0], a1 = args[1];
45
- /* a1 = abcd */
46
- if (a0 != a1) {
47
- /* a0 = (a1 r<< 24) & 0xff # 000c */
48
- tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
49
- /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */
50
- tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23);
51
- } else {
52
- /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */
53
- tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23);
54
- /* a0 = (a1 r<< 24) & 0xff # 000c */
55
- tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
56
- /* a0 = a0 | r0 # 00dc */
57
- tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0));
58
- }
59
+ tcg_out_bswap16(s, args[0], args[1]);
60
break;
61
62
case INDEX_op_bswap32_i32:
63
--
20
--
64
2.25.1
21
2.43.0
65
66
diff view generated by jsdifflib
1
We really do this already, by including them into the same test.
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
This just hoists the expression up a bit.
3
2
4
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/cris/translate.c | 13 ++++++-------
6
tcg/optimize.c | 24 +++++++++---------------
9
1 file changed, 6 insertions(+), 7 deletions(-)
7
1 file changed, 9 insertions(+), 15 deletions(-)
10
8
11
diff --git a/target/cris/translate.c b/target/cris/translate.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/target/cris/translate.c
11
--- a/tcg/optimize.c
14
+++ b/target/cris/translate.c
12
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void cris_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
13
@@ -XXX,XX +XXX,XX @@ static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
16
cris_clear_x_flag(dc);
14
static bool fold_sextract(OptContext *ctx, TCGOp *op)
15
{
16
uint64_t z_mask, s_mask, s_mask_old;
17
+ TempOptInfo *t1 = arg_info(op->args[1]);
18
int pos = op->args[2];
19
int len = op->args[3];
20
21
- if (arg_is_const(op->args[1])) {
22
- uint64_t t;
23
-
24
- t = arg_info(op->args[1])->val;
25
- t = sextract64(t, pos, len);
26
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
27
+ if (ti_is_const(t1)) {
28
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
29
+ sextract64(ti_const_val(t1), pos, len));
17
}
30
}
18
31
19
+ /* Fold unhandled changes to X_FLAG into cpustate_changed. */
32
- z_mask = arg_info(op->args[1])->z_mask;
20
+ dc->cpustate_changed |= !dc->flagx_known;
33
- z_mask = sextract64(z_mask, pos, len);
21
+ dc->cpustate_changed |= dc->flags_x != (dc->base.tb->flags & X_FLAG);
34
- ctx->z_mask = z_mask;
22
+
35
-
23
/*
36
- s_mask_old = arg_info(op->args[1])->s_mask;
24
* Check for delayed branches here. If we do it before
37
- s_mask = sextract64(s_mask_old, pos, len);
25
* actually generating any host code, the simulator will just
38
- s_mask |= MAKE_64BIT_MASK(len, 64 - len);
26
@@ -XXX,XX +XXX,XX @@ static void cris_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
39
- ctx->s_mask = s_mask;
27
t_gen_movi_env_TN(dslot, 0);
40
+ s_mask_old = t1->s_mask;
28
}
41
+ s_mask = s_mask_old >> pos;
29
42
+ s_mask |= -1ull << (len - 1);
30
- if (dc->cpustate_changed
43
31
- || !dc->flagx_known
44
if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
32
- || (dc->flags_x != (dc->base.tb->flags & X_FLAG))) {
45
return true;
33
+ if (dc->cpustate_changed) {
34
cris_store_direct_jmp(dc);
35
}
36
37
@@ -XXX,XX +XXX,XX @@ static void cris_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
38
}
46
}
39
47
40
/* Force an update if the per-tb cpu state has changed. */
48
- return fold_masks(ctx, op);
41
- if (dc->base.is_jmp == DISAS_NEXT
49
+ z_mask = sextract64(t1->z_mask, pos, len);
42
- && (dc->cpustate_changed
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
43
- || !dc->flagx_known
51
}
44
- || (dc->flags_x != (dc->base.tb->flags & X_FLAG)))) {
52
45
+ if (dc->base.is_jmp == DISAS_NEXT && dc->cpustate_changed) {
53
static bool fold_shift(OptContext *ctx, TCGOp *op)
46
dc->base.is_jmp = DISAS_UPDATE;
47
tcg_gen_movi_tl(env_pc, dc->pc);
48
}
49
--
54
--
50
2.25.1
55
2.43.0
51
52
diff view generated by jsdifflib
1
We do not need to copy this into DisasContext.
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
5
---
6
target/nios2/translate.c | 73 +++++++++++++++++++---------------------
6
tcg/optimize.c | 27 ++++++++++++++-------------
7
1 file changed, 34 insertions(+), 39 deletions(-)
7
1 file changed, 14 insertions(+), 13 deletions(-)
8
8
9
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/nios2/translate.c
11
--- a/tcg/optimize.c
12
+++ b/target/nios2/translate.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
14
static bool fold_shift(OptContext *ctx, TCGOp *op)
15
{
16
uint64_t s_mask, z_mask, sign;
17
+ TempOptInfo *t1, *t2;
18
19
if (fold_const2(ctx, op) ||
20
fold_ix_to_i(ctx, op, 0) ||
21
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
22
return true;
14
}
23
}
15
24
16
typedef struct DisasContext {
25
- s_mask = arg_info(op->args[1])->s_mask;
17
- TCGv *cpu_R;
26
- z_mask = arg_info(op->args[1])->z_mask;
18
TCGv_i32 zero;
27
+ t1 = arg_info(op->args[1]);
19
int is_jmp;
28
+ t2 = arg_info(op->args[2]);
20
target_ulong pc;
29
+ s_mask = t1->s_mask;
21
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
30
+ z_mask = t1->z_mask;
22
bool singlestep_enabled;
31
23
} DisasContext;
32
- if (arg_is_const(op->args[2])) {
24
33
- int sh = arg_info(op->args[2])->val;
25
+static TCGv cpu_R[NUM_CORE_REGS];
34
-
26
+
35
- ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
27
typedef struct Nios2Instruction {
36
+ if (ti_is_const(t2)) {
28
void (*handler)(DisasContext *dc, uint32_t code, uint32_t flags);
37
+ int sh = ti_const_val(t2);
29
uint32_t flags;
38
30
@@ -XXX,XX +XXX,XX @@ static TCGv load_zero(DisasContext *dc)
39
+ z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
31
static TCGv load_gpr(DisasContext *dc, uint8_t reg)
40
s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
32
{
41
33
if (likely(reg != R_ZERO)) {
42
- return fold_masks(ctx, op);
34
- return dc->cpu_R[reg];
43
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
35
+ return cpu_R[reg];
36
} else {
37
return load_zero(dc);
38
}
44
}
39
@@ -XXX,XX +XXX,XX @@ static void t_gen_helper_raise_exception(DisasContext *dc,
45
40
{
46
switch (op->opc) {
41
TCGv_i32 tmp = tcg_const_i32(index);
47
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
42
48
* Arithmetic right shift will not reduce the number of
43
- tcg_gen_movi_tl(dc->cpu_R[R_PC], dc->pc);
49
* input sign repetitions.
44
+ tcg_gen_movi_tl(cpu_R[R_PC], dc->pc);
50
*/
45
gen_helper_raise_exception(cpu_env, tmp);
51
- ctx->s_mask = s_mask;
46
tcg_temp_free_i32(tmp);
52
- break;
47
dc->is_jmp = DISAS_NORETURN;
53
+ return fold_masks_s(ctx, op, s_mask);
48
@@ -XXX,XX +XXX,XX @@ static void gen_goto_tb(DisasContext *dc, int n, uint32_t dest)
54
CASE_OP_32_64(shr):
49
55
/*
50
if (use_goto_tb(dc, dest)) {
56
* If the sign bit is known zero, then logical right shift
51
tcg_gen_goto_tb(n);
57
- * will not reduced the number of input sign repetitions.
52
- tcg_gen_movi_tl(dc->cpu_R[R_PC], dest);
58
+ * will not reduce the number of input sign repetitions.
53
+ tcg_gen_movi_tl(cpu_R[R_PC], dest);
59
*/
54
tcg_gen_exit_tb(tb, n);
60
- sign = (s_mask & -s_mask) >> 1;
55
} else {
61
+ sign = -s_mask;
56
- tcg_gen_movi_tl(dc->cpu_R[R_PC], dest);
62
if (sign && !(z_mask & sign)) {
57
+ tcg_gen_movi_tl(cpu_R[R_PC], dest);
63
- ctx->s_mask = s_mask;
58
tcg_gen_exit_tb(NULL, 0);
64
+ return fold_masks_s(ctx, op, s_mask);
59
}
60
}
61
@@ -XXX,XX +XXX,XX @@ static void jmpi(DisasContext *dc, uint32_t code, uint32_t flags)
62
63
static void call(DisasContext *dc, uint32_t code, uint32_t flags)
64
{
65
- tcg_gen_movi_tl(dc->cpu_R[R_RA], dc->pc + 4);
66
+ tcg_gen_movi_tl(cpu_R[R_RA], dc->pc + 4);
67
jmpi(dc, code, flags);
68
}
69
70
@@ -XXX,XX +XXX,XX @@ static void gen_ldx(DisasContext *dc, uint32_t code, uint32_t flags)
71
* the Nios2 CPU.
72
*/
73
if (likely(instr.b != R_ZERO)) {
74
- data = dc->cpu_R[instr.b];
75
+ data = cpu_R[instr.b];
76
} else {
77
data = tcg_temp_new();
78
}
79
@@ -XXX,XX +XXX,XX @@ static void gen_bxx(DisasContext *dc, uint32_t code, uint32_t flags)
80
I_TYPE(instr, code);
81
82
TCGLabel *l1 = gen_new_label();
83
- tcg_gen_brcond_tl(flags, dc->cpu_R[instr.a], dc->cpu_R[instr.b], l1);
84
+ tcg_gen_brcond_tl(flags, cpu_R[instr.a], cpu_R[instr.b], l1);
85
gen_goto_tb(dc, 0, dc->pc + 4);
86
gen_set_label(l1);
87
gen_goto_tb(dc, 1, dc->pc + 4 + (instr.imm16.s & -4));
88
@@ -XXX,XX +XXX,XX @@ static void gen_bxx(DisasContext *dc, uint32_t code, uint32_t flags)
89
static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags) \
90
{ \
91
I_TYPE(instr, (code)); \
92
- tcg_gen_setcondi_tl(flags, (dc)->cpu_R[instr.b], (dc)->cpu_R[instr.a], \
93
- (op3)); \
94
+ tcg_gen_setcondi_tl(flags, cpu_R[instr.b], cpu_R[instr.a], (op3)); \
95
}
96
97
gen_i_cmpxx(gen_cmpxxsi, instr.imm16.s)
98
@@ -XXX,XX +XXX,XX @@ static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags) \
99
if (unlikely(instr.b == R_ZERO)) { /* Store to R_ZERO is ignored */ \
100
return; \
101
} else if (instr.a == R_ZERO) { /* MOVxI optimizations */ \
102
- tcg_gen_movi_tl(dc->cpu_R[instr.b], (resimm) ? (op3) : 0); \
103
+ tcg_gen_movi_tl(cpu_R[instr.b], (resimm) ? (op3) : 0); \
104
} else { \
105
- tcg_gen_##insn##_tl((dc)->cpu_R[instr.b], (dc)->cpu_R[instr.a], \
106
- (op3)); \
107
+ tcg_gen_##insn##_tl(cpu_R[instr.b], cpu_R[instr.a], (op3)); \
108
} \
109
}
110
111
@@ -XXX,XX +XXX,XX @@ static const Nios2Instruction i_type_instructions[] = {
112
*/
113
static void eret(DisasContext *dc, uint32_t code, uint32_t flags)
114
{
115
- tcg_gen_mov_tl(dc->cpu_R[CR_STATUS], dc->cpu_R[CR_ESTATUS]);
116
- tcg_gen_mov_tl(dc->cpu_R[R_PC], dc->cpu_R[R_EA]);
117
+ tcg_gen_mov_tl(cpu_R[CR_STATUS], cpu_R[CR_ESTATUS]);
118
+ tcg_gen_mov_tl(cpu_R[R_PC], cpu_R[R_EA]);
119
120
dc->is_jmp = DISAS_JUMP;
121
}
122
@@ -XXX,XX +XXX,XX @@ static void eret(DisasContext *dc, uint32_t code, uint32_t flags)
123
/* PC <- ra */
124
static void ret(DisasContext *dc, uint32_t code, uint32_t flags)
125
{
126
- tcg_gen_mov_tl(dc->cpu_R[R_PC], dc->cpu_R[R_RA]);
127
+ tcg_gen_mov_tl(cpu_R[R_PC], cpu_R[R_RA]);
128
129
dc->is_jmp = DISAS_JUMP;
130
}
131
@@ -XXX,XX +XXX,XX @@ static void ret(DisasContext *dc, uint32_t code, uint32_t flags)
132
/* PC <- ba */
133
static void bret(DisasContext *dc, uint32_t code, uint32_t flags)
134
{
135
- tcg_gen_mov_tl(dc->cpu_R[R_PC], dc->cpu_R[R_BA]);
136
+ tcg_gen_mov_tl(cpu_R[R_PC], cpu_R[R_BA]);
137
138
dc->is_jmp = DISAS_JUMP;
139
}
140
@@ -XXX,XX +XXX,XX @@ static void jmp(DisasContext *dc, uint32_t code, uint32_t flags)
141
{
142
R_TYPE(instr, code);
143
144
- tcg_gen_mov_tl(dc->cpu_R[R_PC], load_gpr(dc, instr.a));
145
+ tcg_gen_mov_tl(cpu_R[R_PC], load_gpr(dc, instr.a));
146
147
dc->is_jmp = DISAS_JUMP;
148
}
149
@@ -XXX,XX +XXX,XX @@ static void nextpc(DisasContext *dc, uint32_t code, uint32_t flags)
150
R_TYPE(instr, code);
151
152
if (likely(instr.c != R_ZERO)) {
153
- tcg_gen_movi_tl(dc->cpu_R[instr.c], dc->pc + 4);
154
+ tcg_gen_movi_tl(cpu_R[instr.c], dc->pc + 4);
155
}
156
}
157
158
@@ -XXX,XX +XXX,XX @@ static void callr(DisasContext *dc, uint32_t code, uint32_t flags)
159
{
160
R_TYPE(instr, code);
161
162
- tcg_gen_mov_tl(dc->cpu_R[R_PC], load_gpr(dc, instr.a));
163
- tcg_gen_movi_tl(dc->cpu_R[R_RA], dc->pc + 4);
164
+ tcg_gen_mov_tl(cpu_R[R_PC], load_gpr(dc, instr.a));
165
+ tcg_gen_movi_tl(cpu_R[R_RA], dc->pc + 4);
166
167
dc->is_jmp = DISAS_JUMP;
168
}
169
@@ -XXX,XX +XXX,XX @@ static void rdctl(DisasContext *dc, uint32_t code, uint32_t flags)
170
{
171
#if !defined(CONFIG_USER_ONLY)
172
if (likely(instr.c != R_ZERO)) {
173
- tcg_gen_mov_tl(dc->cpu_R[instr.c], dc->cpu_R[instr.imm5 + CR_BASE]);
174
+ tcg_gen_mov_tl(cpu_R[instr.c], cpu_R[instr.imm5 + CR_BASE]);
175
#ifdef DEBUG_MMU
176
TCGv_i32 tmp = tcg_const_i32(instr.imm5 + CR_BASE);
177
- gen_helper_mmu_read_debug(dc->cpu_R[instr.c], cpu_env, tmp);
178
+ gen_helper_mmu_read_debug(cpu_R[instr.c], cpu_env, tmp);
179
tcg_temp_free_i32(tmp);
180
#endif
181
}
182
@@ -XXX,XX +XXX,XX @@ static void rdctl(DisasContext *dc, uint32_t code, uint32_t flags)
183
184
default:
185
if (likely(instr.c != R_ZERO)) {
186
- tcg_gen_mov_tl(dc->cpu_R[instr.c], dc->cpu_R[instr.imm5 + CR_BASE]);
187
+ tcg_gen_mov_tl(cpu_R[instr.c], cpu_R[instr.imm5 + CR_BASE]);
188
}
65
}
189
break;
66
break;
190
}
191
@@ -XXX,XX +XXX,XX @@ static void wrctl(DisasContext *dc, uint32_t code, uint32_t flags)
192
}
193
194
default:
67
default:
195
- tcg_gen_mov_tl(dc->cpu_R[instr.imm5 + CR_BASE], load_gpr(dc, instr.a));
196
+ tcg_gen_mov_tl(cpu_R[instr.imm5 + CR_BASE], load_gpr(dc, instr.a));
197
break;
68
break;
198
}
69
}
199
70
200
@@ -XXX,XX +XXX,XX @@ static void gen_cmpxx(DisasContext *dc, uint32_t code, uint32_t flags)
71
- return false;
201
{
72
+ return finish_folding(ctx, op);
202
R_TYPE(instr, code);
203
if (likely(instr.c != R_ZERO)) {
204
- tcg_gen_setcond_tl(flags, dc->cpu_R[instr.c], dc->cpu_R[instr.a],
205
- dc->cpu_R[instr.b]);
206
+ tcg_gen_setcond_tl(flags, cpu_R[instr.c], cpu_R[instr.a],
207
+ cpu_R[instr.b]);
208
}
209
}
73
}
210
74
211
@@ -XXX,XX +XXX,XX @@ static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags) \
75
static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
212
{ \
213
R_TYPE(instr, (code)); \
214
if (likely(instr.c != R_ZERO)) { \
215
- tcg_gen_##insn((dc)->cpu_R[instr.c], load_gpr((dc), instr.a), \
216
- (op3)); \
217
+ tcg_gen_##insn(cpu_R[instr.c], load_gpr((dc), instr.a), (op3)); \
218
} \
219
}
220
221
@@ -XXX,XX +XXX,XX @@ static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags) \
222
R_TYPE(instr, (code)); \
223
if (likely(instr.c != R_ZERO)) { \
224
TCGv t0 = tcg_temp_new(); \
225
- tcg_gen_##insn(t0, dc->cpu_R[instr.c], \
226
- load_gpr(dc, instr.a), load_gpr(dc, instr.b)); \
227
+ tcg_gen_##insn(t0, cpu_R[instr.c], \
228
+ load_gpr(dc, instr.a), load_gpr(dc, instr.b)); \
229
tcg_temp_free(t0); \
230
} \
231
}
232
@@ -XXX,XX +XXX,XX @@ static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags) \
233
if (likely(instr.c != R_ZERO)) { \
234
TCGv t0 = tcg_temp_new(); \
235
tcg_gen_andi_tl(t0, load_gpr((dc), instr.b), 31); \
236
- tcg_gen_##insn((dc)->cpu_R[instr.c], load_gpr((dc), instr.a), t0); \
237
+ tcg_gen_##insn(cpu_R[instr.c], load_gpr((dc), instr.a), t0); \
238
tcg_temp_free(t0); \
239
} \
240
}
241
@@ -XXX,XX +XXX,XX @@ static void divs(DisasContext *dc, uint32_t code, uint32_t flags)
242
tcg_gen_or_tl(t2, t2, t3);
243
tcg_gen_movi_tl(t3, 0);
244
tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
245
- tcg_gen_div_tl(dc->cpu_R[instr.c], t0, t1);
246
- tcg_gen_ext32s_tl(dc->cpu_R[instr.c], dc->cpu_R[instr.c]);
247
+ tcg_gen_div_tl(cpu_R[instr.c], t0, t1);
248
+ tcg_gen_ext32s_tl(cpu_R[instr.c], cpu_R[instr.c]);
249
250
tcg_temp_free(t3);
251
tcg_temp_free(t2);
252
@@ -XXX,XX +XXX,XX @@ static void divu(DisasContext *dc, uint32_t code, uint32_t flags)
253
tcg_gen_ext32u_tl(t0, load_gpr(dc, instr.a));
254
tcg_gen_ext32u_tl(t1, load_gpr(dc, instr.b));
255
tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
256
- tcg_gen_divu_tl(dc->cpu_R[instr.c], t0, t1);
257
- tcg_gen_ext32s_tl(dc->cpu_R[instr.c], dc->cpu_R[instr.c]);
258
+ tcg_gen_divu_tl(cpu_R[instr.c], t0, t1);
259
+ tcg_gen_ext32s_tl(cpu_R[instr.c], cpu_R[instr.c]);
260
261
tcg_temp_free(t3);
262
tcg_temp_free(t2);
263
@@ -XXX,XX +XXX,XX @@ static const char * const regnames[] = {
264
"rpc"
265
};
266
267
-static TCGv cpu_R[NUM_CORE_REGS];
268
-
269
#include "exec/gen-icount.h"
270
271
static void gen_exception(DisasContext *dc, uint32_t excp)
272
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
273
int num_insns;
274
275
/* Initialize DC */
276
- dc->cpu_R = cpu_R;
277
dc->is_jmp = DISAS_NEXT;
278
dc->pc = tb->pc;
279
dc->tb = tb;
280
--
76
--
281
2.25.1
77
2.43.0
282
283
diff view generated by jsdifflib
1
We can perform any required sign-extension via TCG_BSWAP_OS.
1
Merge the two conditions, sign != 0 && !(z_mask & sign),
2
by testing ~z_mask & sign. If sign == 0, the logical and
3
will produce false.
2
4
3
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
tcg/tcg-op.c | 24 ++++++++++--------------
8
tcg/optimize.c | 5 ++---
8
1 file changed, 10 insertions(+), 14 deletions(-)
9
1 file changed, 2 insertions(+), 3 deletions(-)
9
10
10
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg-op.c
13
--- a/tcg/optimize.c
13
+++ b/tcg/tcg-op.c
14
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
15
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
15
orig_memop = memop;
16
16
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
17
static bool fold_shift(OptContext *ctx, TCGOp *op)
17
memop &= ~MO_BSWAP;
18
{
18
- /* The bswap primitive requires zero-extended input. */
19
- uint64_t s_mask, z_mask, sign;
19
+ /* The bswap primitive benefits from zero-extended input. */
20
+ uint64_t s_mask, z_mask;
20
if ((memop & MO_SSIZE) == MO_SW) {
21
TempOptInfo *t1, *t2;
21
memop &= ~MO_SIGN;
22
23
if (fold_const2(ctx, op) ||
24
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
25
* If the sign bit is known zero, then logical right shift
26
* will not reduce the number of input sign repetitions.
27
*/
28
- sign = -s_mask;
29
- if (sign && !(z_mask & sign)) {
30
+ if (~z_mask & -s_mask) {
31
return fold_masks_s(ctx, op, s_mask);
22
}
32
}
23
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
33
break;
24
if ((orig_memop ^ memop) & MO_BSWAP) {
25
switch (orig_memop & MO_SIZE) {
26
case MO_16:
27
- tcg_gen_bswap16_i32(val, val, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
28
- if (orig_memop & MO_SIGN) {
29
- tcg_gen_ext16s_i32(val, val);
30
- }
31
+ tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
32
+ ? TCG_BSWAP_IZ | TCG_BSWAP_OS
33
+ : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
34
break;
35
case MO_32:
36
tcg_gen_bswap32_i32(val, val);
37
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
38
orig_memop = memop;
39
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
40
memop &= ~MO_BSWAP;
41
- /* The bswap primitive requires zero-extended input. */
42
+ /* The bswap primitive benefits from zero-extended input. */
43
if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
44
memop &= ~MO_SIGN;
45
}
46
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
47
plugin_gen_mem_callbacks(addr, info);
48
49
if ((orig_memop ^ memop) & MO_BSWAP) {
50
+ int flags = (orig_memop & MO_SIGN
51
+ ? TCG_BSWAP_IZ | TCG_BSWAP_OS
52
+ : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
53
switch (orig_memop & MO_SIZE) {
54
case MO_16:
55
- tcg_gen_bswap16_i64(val, val, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
56
- if (orig_memop & MO_SIGN) {
57
- tcg_gen_ext16s_i64(val, val);
58
- }
59
+ tcg_gen_bswap16_i64(val, val, flags);
60
break;
61
case MO_32:
62
- tcg_gen_bswap32_i64(val, val, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
63
- if (orig_memop & MO_SIGN) {
64
- tcg_gen_ext32s_i64(val, val);
65
- }
66
+ tcg_gen_bswap32_i64(val, val, flags);
67
break;
68
case MO_64:
69
tcg_gen_bswap64_i64(val, val);
70
--
34
--
71
2.25.1
35
2.43.0
72
73
diff view generated by jsdifflib
1
From: Warner Losh <imp@bsdimp.com>
1
Duplicate fold_sub_vec into fold_sub instead of calling it,
2
now that fold_sub_vec always returns true.
2
3
3
The trap number for a page fault on BSD systems is T_PAGEFLT
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
not 0xe -- 0xe is used by Linux and represents the intel hardware
5
trap vector. The BSD kernels, however, translate this to T_PAGEFLT
6
in their Xpage, Xtrap0e, Xtrap14, etc fault handlers. This is true
7
for i386 and x86_64, though the name of the trap hanlder can very
8
on the flavor of BSD. As far as I can tell, Linux doesn't provide
9
a define for this value. Invent a new one (PAGE_FAULT_TRAP) and
10
use it instead to avoid uglier ifdefs.
11
12
Signed-off-by: Mark Johnston <markj@FreeBSD.org>
13
Signed-off-by: Juergen Lock <nox@FreeBSD.org>
14
[ Rework to avoid ifdefs and expand it to i386 ]
15
Signed-off-by: Warner Losh <imp@bsdimp.com>
16
Message-Id: <20210625045707.84534-3-imp@bsdimp.com>
17
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
18
---
6
---
19
accel/tcg/user-exec.c | 20 ++++++++++++++++++--
7
tcg/optimize.c | 9 ++++++---
20
1 file changed, 18 insertions(+), 2 deletions(-)
8
1 file changed, 6 insertions(+), 3 deletions(-)
21
9
22
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
23
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
24
--- a/accel/tcg/user-exec.c
12
--- a/tcg/optimize.c
25
+++ b/accel/tcg/user-exec.c
13
+++ b/tcg/optimize.c
26
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
14
@@ -XXX,XX +XXX,XX @@ static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
27
15
fold_sub_to_neg(ctx, op)) {
28
#if defined(__NetBSD__)
16
return true;
29
#include <ucontext.h>
17
}
30
+#include <machine/trap.h>
18
- return false;
31
19
+ return finish_folding(ctx, op);
32
#define EIP_sig(context) ((context)->uc_mcontext.__gregs[_REG_EIP])
33
#define TRAP_sig(context) ((context)->uc_mcontext.__gregs[_REG_TRAPNO])
34
#define ERROR_sig(context) ((context)->uc_mcontext.__gregs[_REG_ERR])
35
#define MASK_sig(context) ((context)->uc_sigmask)
36
+#define PAGE_FAULT_TRAP T_PAGEFLT
37
#elif defined(__FreeBSD__) || defined(__DragonFly__)
38
#include <ucontext.h>
39
+#include <machine/trap.h>
40
41
#define EIP_sig(context) (*((unsigned long *)&(context)->uc_mcontext.mc_eip))
42
#define TRAP_sig(context) ((context)->uc_mcontext.mc_trapno)
43
#define ERROR_sig(context) ((context)->uc_mcontext.mc_err)
44
#define MASK_sig(context) ((context)->uc_sigmask)
45
+#define PAGE_FAULT_TRAP T_PAGEFLT
46
#elif defined(__OpenBSD__)
47
+#include <machine/trap.h>
48
#define EIP_sig(context) ((context)->sc_eip)
49
#define TRAP_sig(context) ((context)->sc_trapno)
50
#define ERROR_sig(context) ((context)->sc_err)
51
#define MASK_sig(context) ((context)->sc_mask)
52
+#define PAGE_FAULT_TRAP T_PAGEFLT
53
#else
54
#define EIP_sig(context) ((context)->uc_mcontext.gregs[REG_EIP])
55
#define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO])
56
#define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR])
57
#define MASK_sig(context) ((context)->uc_sigmask)
58
+#define PAGE_FAULT_TRAP 0xe
59
#endif
60
61
int cpu_signal_handler(int host_signum, void *pinfo,
62
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
63
pc = EIP_sig(uc);
64
trapno = TRAP_sig(uc);
65
return handle_cpu_signal(pc, info,
66
- trapno == 0xe ? (ERROR_sig(uc) >> 1) & 1 : 0,
67
+ trapno == PAGE_FAULT_TRAP ?
68
+ (ERROR_sig(uc) >> 1) & 1 : 0,
69
&MASK_sig(uc));
70
}
20
}
71
21
72
#elif defined(__x86_64__)
22
static bool fold_sub(OptContext *ctx, TCGOp *op)
73
23
{
74
#ifdef __NetBSD__
24
- if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) {
75
+#include <machine/trap.h>
25
+ if (fold_const2(ctx, op) ||
76
#define PC_sig(context) _UC_MACHINE_PC(context)
26
+ fold_xx_to_i(ctx, op, 0) ||
77
#define TRAP_sig(context) ((context)->uc_mcontext.__gregs[_REG_TRAPNO])
27
+ fold_xi_to_x(ctx, op, 0) ||
78
#define ERROR_sig(context) ((context)->uc_mcontext.__gregs[_REG_ERR])
28
+ fold_sub_to_neg(ctx, op)) {
79
#define MASK_sig(context) ((context)->uc_sigmask)
29
return true;
80
+#define PAGE_FAULT_TRAP T_PAGEFLT
30
}
81
#elif defined(__OpenBSD__)
31
82
+#include <machine/trap.h>
32
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
83
#define PC_sig(context) ((context)->sc_rip)
33
? INDEX_op_add_i32 : INDEX_op_add_i64);
84
#define TRAP_sig(context) ((context)->sc_trapno)
34
op->args[2] = arg_new_constant(ctx, -val);
85
#define ERROR_sig(context) ((context)->sc_err)
35
}
86
#define MASK_sig(context) ((context)->sc_mask)
36
- return false;
87
+#define PAGE_FAULT_TRAP T_PAGEFLT
37
+ return finish_folding(ctx, op);
88
#elif defined(__FreeBSD__) || defined(__DragonFly__)
89
#include <ucontext.h>
90
+#include <machine/trap.h>
91
92
#define PC_sig(context) (*((unsigned long *)&(context)->uc_mcontext.mc_rip))
93
#define TRAP_sig(context) ((context)->uc_mcontext.mc_trapno)
94
#define ERROR_sig(context) ((context)->uc_mcontext.mc_err)
95
#define MASK_sig(context) ((context)->uc_sigmask)
96
+#define PAGE_FAULT_TRAP T_PAGEFLT
97
#else
98
#define PC_sig(context) ((context)->uc_mcontext.gregs[REG_RIP])
99
#define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO])
100
#define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR])
101
#define MASK_sig(context) ((context)->uc_sigmask)
102
+#define PAGE_FAULT_TRAP 0xe
103
#endif
104
105
int cpu_signal_handler(int host_signum, void *pinfo,
106
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
107
108
pc = PC_sig(uc);
109
return handle_cpu_signal(pc, info,
110
- TRAP_sig(uc) == 0xe ? (ERROR_sig(uc) >> 1) & 1 : 0,
111
+ TRAP_sig(uc) == PAGE_FAULT_TRAP ?
112
+ (ERROR_sig(uc) >> 1) & 1 : 0,
113
&MASK_sig(uc));
114
}
38
}
115
39
40
static bool fold_sub2(OptContext *ctx, TCGOp *op)
116
--
41
--
117
2.25.1
42
2.43.0
118
119
diff view generated by jsdifflib
1
The memory bswap support in the aarch64 backend merely dates from
1
Avoid the use of the OptContext slots.
2
a time when it was required. There is nothing special about the
3
backend support that could not have been provided by the middle-end
4
even prior to the introduction of the bswap flags.
5
2
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
tcg/aarch64/tcg-target.h | 2 +-
6
tcg/optimize.c | 16 +++++++++-------
10
tcg/aarch64/tcg-target.c.inc | 87 +++++++++++++-----------------------
7
1 file changed, 9 insertions(+), 7 deletions(-)
11
2 files changed, 32 insertions(+), 57 deletions(-)
12
8
13
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/aarch64/tcg-target.h
11
--- a/tcg/optimize.c
16
+++ b/tcg/aarch64/tcg-target.h
12
+++ b/tcg/optimize.c
17
@@ -XXX,XX +XXX,XX @@ typedef enum {
13
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2(OptContext *ctx, TCGOp *op)
18
#define TCG_TARGET_HAS_cmpsel_vec 0
14
19
15
static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
20
#define TCG_TARGET_DEFAULT_MO (0)
21
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
22
+#define TCG_TARGET_HAS_MEMORY_BSWAP 0
23
24
void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
25
26
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
27
index XXXXXXX..XXXXXXX 100644
28
--- a/tcg/aarch64/tcg-target.c.inc
29
+++ b/tcg/aarch64/tcg-target.c.inc
30
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
31
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
32
* TCGMemOpIdx oi, uintptr_t ra)
33
*/
34
-static void * const qemu_ld_helpers[16] = {
35
- [MO_UB] = helper_ret_ldub_mmu,
36
- [MO_LEUW] = helper_le_lduw_mmu,
37
- [MO_LEUL] = helper_le_ldul_mmu,
38
- [MO_LEQ] = helper_le_ldq_mmu,
39
- [MO_BEUW] = helper_be_lduw_mmu,
40
- [MO_BEUL] = helper_be_ldul_mmu,
41
- [MO_BEQ] = helper_be_ldq_mmu,
42
+static void * const qemu_ld_helpers[4] = {
43
+ [MO_8] = helper_ret_ldub_mmu,
44
+#ifdef HOST_WORDS_BIGENDIAN
45
+ [MO_16] = helper_be_lduw_mmu,
46
+ [MO_32] = helper_be_ldul_mmu,
47
+ [MO_64] = helper_be_ldq_mmu,
48
+#else
49
+ [MO_16] = helper_le_lduw_mmu,
50
+ [MO_32] = helper_le_ldul_mmu,
51
+ [MO_64] = helper_le_ldq_mmu,
52
+#endif
53
};
54
55
/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
56
* uintxx_t val, TCGMemOpIdx oi,
57
* uintptr_t ra)
58
*/
59
-static void * const qemu_st_helpers[16] = {
60
- [MO_UB] = helper_ret_stb_mmu,
61
- [MO_LEUW] = helper_le_stw_mmu,
62
- [MO_LEUL] = helper_le_stl_mmu,
63
- [MO_LEQ] = helper_le_stq_mmu,
64
- [MO_BEUW] = helper_be_stw_mmu,
65
- [MO_BEUL] = helper_be_stl_mmu,
66
- [MO_BEQ] = helper_be_stq_mmu,
67
+static void * const qemu_st_helpers[4] = {
68
+ [MO_8] = helper_ret_stb_mmu,
69
+#ifdef HOST_WORDS_BIGENDIAN
70
+ [MO_16] = helper_be_stw_mmu,
71
+ [MO_32] = helper_be_stl_mmu,
72
+ [MO_64] = helper_be_stq_mmu,
73
+#else
74
+ [MO_16] = helper_le_stw_mmu,
75
+ [MO_32] = helper_le_stl_mmu,
76
+ [MO_64] = helper_le_stq_mmu,
77
+#endif
78
};
79
80
static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
81
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
82
tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
83
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
84
tcg_out_adr(s, TCG_REG_X3, lb->raddr);
85
- tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
86
+ tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]);
87
if (opc & MO_SIGN) {
88
tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
89
} else {
90
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
91
tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
92
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
93
tcg_out_adr(s, TCG_REG_X4, lb->raddr);
94
- tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
95
+ tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]);
96
tcg_out_goto(s, lb->raddr);
97
return true;
98
}
99
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
100
TCGReg data_r, TCGReg addr_r,
101
TCGType otype, TCGReg off_r)
102
{
16
{
103
- const MemOp bswap = memop & MO_BSWAP;
17
+ uint64_t z_mask = -1, s_mask = 0;
104
+ /* Byte swapping is left to middle-end expansion. */
18
+
105
+ tcg_debug_assert((memop & MO_BSWAP) == 0);
19
/* We can't do any folding with a load, but we can record bits. */
106
20
switch (op->opc) {
107
switch (memop & MO_SSIZE) {
21
CASE_OP_32_64(ld8s):
108
case MO_UB:
22
- ctx->s_mask = MAKE_64BIT_MASK(8, 56);
109
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
23
+ s_mask = INT8_MIN;
110
break;
24
break;
111
case MO_UW:
25
CASE_OP_32_64(ld8u):
112
tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
26
- ctx->z_mask = MAKE_64BIT_MASK(0, 8);
113
- if (bswap) {
27
+ z_mask = MAKE_64BIT_MASK(0, 8);
114
- tcg_out_rev(s, TCG_TYPE_I32, MO_16, data_r, data_r);
115
- }
116
break;
28
break;
117
case MO_SW:
29
CASE_OP_32_64(ld16s):
118
- if (bswap) {
30
- ctx->s_mask = MAKE_64BIT_MASK(16, 48);
119
- tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
31
+ s_mask = INT16_MIN;
120
- tcg_out_rev(s, TCG_TYPE_I32, MO_16, data_r, data_r);
121
- tcg_out_sxt(s, ext, MO_16, data_r, data_r);
122
- } else {
123
- tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
124
- data_r, addr_r, otype, off_r);
125
- }
126
+ tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
127
+ data_r, addr_r, otype, off_r);
128
break;
32
break;
129
case MO_UL:
33
CASE_OP_32_64(ld16u):
130
tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
34
- ctx->z_mask = MAKE_64BIT_MASK(0, 16);
131
- if (bswap) {
35
+ z_mask = MAKE_64BIT_MASK(0, 16);
132
- tcg_out_rev(s, TCG_TYPE_I32, MO_32, data_r, data_r);
133
- }
134
break;
36
break;
135
case MO_SL:
37
case INDEX_op_ld32s_i64:
136
- if (bswap) {
38
- ctx->s_mask = MAKE_64BIT_MASK(32, 32);
137
- tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
39
+ s_mask = INT32_MIN;
138
- tcg_out_rev(s, TCG_TYPE_I32, MO_32, data_r, data_r);
139
- tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
140
- } else {
141
- tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
142
- }
143
+ tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
144
break;
40
break;
145
case MO_Q:
41
case INDEX_op_ld32u_i64:
146
tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
42
- ctx->z_mask = MAKE_64BIT_MASK(0, 32);
147
- if (bswap) {
43
+ z_mask = MAKE_64BIT_MASK(0, 32);
148
- tcg_out_rev(s, TCG_TYPE_I64, MO_64, data_r, data_r);
149
- }
150
break;
44
break;
151
default:
45
default:
152
tcg_abort();
46
g_assert_not_reached();
153
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
47
}
154
TCGReg data_r, TCGReg addr_r,
48
- return false;
155
TCGType otype, TCGReg off_r)
49
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
156
{
50
}
157
- const MemOp bswap = memop & MO_BSWAP;
51
158
+ /* Byte swapping is left to middle-end expansion. */
52
static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
159
+ tcg_debug_assert((memop & MO_BSWAP) == 0);
160
161
switch (memop & MO_SIZE) {
162
case MO_8:
163
tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
164
break;
165
case MO_16:
166
- if (bswap && data_r != TCG_REG_XZR) {
167
- tcg_out_rev(s, TCG_TYPE_I32, MO_16, TCG_REG_TMP, data_r);
168
- data_r = TCG_REG_TMP;
169
- }
170
tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
171
break;
172
case MO_32:
173
- if (bswap && data_r != TCG_REG_XZR) {
174
- tcg_out_rev(s, TCG_TYPE_I32, MO_32, TCG_REG_TMP, data_r);
175
- data_r = TCG_REG_TMP;
176
- }
177
tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
178
break;
179
case MO_64:
180
- if (bswap && data_r != TCG_REG_XZR) {
181
- tcg_out_rev(s, TCG_TYPE_I64, MO_64, TCG_REG_TMP, data_r);
182
- data_r = TCG_REG_TMP;
183
- }
184
tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
185
break;
186
default:
187
--
53
--
188
2.25.1
54
2.43.0
189
190
diff view generated by jsdifflib
1
We do not need to copy this into DisasContext.
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
3
---
6
target/nios2/translate.c | 10 ++++------
4
tcg/optimize.c | 2 +-
7
1 file changed, 4 insertions(+), 6 deletions(-)
5
1 file changed, 1 insertion(+), 1 deletion(-)
8
6
9
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
11
--- a/target/nios2/translate.c
9
--- a/tcg/optimize.c
12
+++ b/target/nios2/translate.c
10
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@
11
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
12
TCGType type;
13
14
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
15
- return false;
16
+ return finish_folding(ctx, op);
14
}
17
}
15
18
16
typedef struct DisasContext {
19
type = ctx->type;
17
- TCGv_ptr cpu_env;
18
TCGv *cpu_R;
19
TCGv_i32 zero;
20
int is_jmp;
21
@@ -XXX,XX +XXX,XX @@ static void t_gen_helper_raise_exception(DisasContext *dc,
22
TCGv_i32 tmp = tcg_const_i32(index);
23
24
tcg_gen_movi_tl(dc->cpu_R[R_PC], dc->pc);
25
- gen_helper_raise_exception(dc->cpu_env, tmp);
26
+ gen_helper_raise_exception(cpu_env, tmp);
27
tcg_temp_free_i32(tmp);
28
dc->is_jmp = DISAS_NORETURN;
29
}
30
@@ -XXX,XX +XXX,XX @@ static void rdctl(DisasContext *dc, uint32_t code, uint32_t flags)
31
tcg_gen_mov_tl(dc->cpu_R[instr.c], dc->cpu_R[instr.imm5 + CR_BASE]);
32
#ifdef DEBUG_MMU
33
TCGv_i32 tmp = tcg_const_i32(instr.imm5 + CR_BASE);
34
- gen_helper_mmu_read_debug(dc->cpu_R[instr.c], dc->cpu_env, tmp);
35
+ gen_helper_mmu_read_debug(dc->cpu_R[instr.c], cpu_env, tmp);
36
tcg_temp_free_i32(tmp);
37
#endif
38
}
39
@@ -XXX,XX +XXX,XX @@ static void wrctl(DisasContext *dc, uint32_t code, uint32_t flags)
40
{
41
#if !defined(CONFIG_USER_ONLY)
42
TCGv_i32 tmp = tcg_const_i32(instr.imm5 + CR_BASE);
43
- gen_helper_mmu_write(dc->cpu_env, tmp, load_gpr(dc, instr.a));
44
+ gen_helper_mmu_write(cpu_env, tmp, load_gpr(dc, instr.a));
45
tcg_temp_free_i32(tmp);
46
#endif
47
break;
48
@@ -XXX,XX +XXX,XX @@ static void wrctl(DisasContext *dc, uint32_t code, uint32_t flags)
49
if (tb_cflags(dc->tb) & CF_USE_ICOUNT) {
50
gen_io_start();
51
}
52
- gen_helper_check_interrupts(dc->cpu_env);
53
+ gen_helper_check_interrupts(cpu_env);
54
dc->is_jmp = DISAS_UPDATE;
55
}
56
#endif
57
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
58
int num_insns;
59
60
/* Initialize DC */
61
- dc->cpu_env = cpu_env;
62
dc->cpu_R = cpu_R;
63
dc->is_jmp = DISAS_NEXT;
64
dc->pc = tb->pc;
65
--
20
--
66
2.25.1
21
2.43.0
67
68
diff view generated by jsdifflib
1
Merge tcg_out_bswap32 and tcg_out_bswap32s.
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Use the flags in the internal uses for loads and stores.
2
Remove fold_masks as the function becomes unused.
3
3
4
For mips32r2 bswap32 with zero-extension, standardize on
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
WSBH+ROTR+DEXT. This is the same number of insns as the
6
previous DSBH+DSHD+DSRL but fits in better with the flags check.
7
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
6
---
11
tcg/mips/tcg-target.c.inc | 39 ++++++++++++++++-----------------------
7
tcg/optimize.c | 18 ++++++++----------
12
1 file changed, 16 insertions(+), 23 deletions(-)
8
1 file changed, 8 insertions(+), 10 deletions(-)
13
9
14
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/mips/tcg-target.c.inc
12
--- a/tcg/optimize.c
17
+++ b/tcg/mips/tcg-target.c.inc
13
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ static void tcg_out_bswap_subr(TCGContext *s, const tcg_insn_unit *sub)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
19
tcg_debug_assert(ok);
15
return fold_masks_zs(ctx, op, -1, s_mask);
20
}
16
}
21
17
22
-static void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
18
-static bool fold_masks(OptContext *ctx, TCGOp *op)
23
+static void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg, int flags)
19
-{
24
{
20
- return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
25
if (use_mips32r2_instructions) {
26
tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
27
tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16);
28
+ if (flags & TCG_BSWAP_OZ) {
29
+ tcg_out_opc_bf(s, OPC_DEXT, ret, ret, 31, 0);
30
+ }
31
} else {
32
- tcg_out_bswap_subr(s, bswap32_addr);
33
- /* delay slot -- never omit the insn, like tcg_out_mov might. */
34
- tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO);
35
- tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3);
36
- }
37
-}
21
-}
38
-
22
-
39
-static void tcg_out_bswap32u(TCGContext *s, TCGReg ret, TCGReg arg)
23
/*
40
-{
24
* An "affected" mask bit is 0 if and only if the result is identical
41
- if (use_mips32r2_instructions) {
25
* to the first input. Thus if the entire mask is 0, the operation
42
- tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg);
26
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
43
- tcg_out_opc_reg(s, OPC_DSHD, ret, 0, ret);
27
44
- tcg_out_dsrl(s, ret, ret, 32);
28
static bool fold_xor(OptContext *ctx, TCGOp *op)
45
- } else {
29
{
46
- tcg_out_bswap_subr(s, bswap32u_addr);
30
+ uint64_t z_mask, s_mask;
47
+ if (flags & TCG_BSWAP_OZ) {
31
+ TempOptInfo *t1, *t2;
48
+ tcg_out_bswap_subr(s, bswap32u_addr);
32
+
49
+ } else {
33
if (fold_const2_commutative(ctx, op) ||
50
+ tcg_out_bswap_subr(s, bswap32_addr);
34
fold_xx_to_i(ctx, op, 0) ||
51
+ }
35
fold_xi_to_x(ctx, op, 0) ||
52
/* delay slot -- never omit the insn, like tcg_out_mov might. */
36
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
53
tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO);
37
return true;
54
tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3);
38
}
55
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
39
56
if (TCG_TARGET_REG_BITS == 64 && is_64) {
40
- ctx->z_mask = arg_info(op->args[1])->z_mask
57
if (use_mips32r2_instructions) {
41
- | arg_info(op->args[2])->z_mask;
58
tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
42
- ctx->s_mask = arg_info(op->args[1])->s_mask
59
- tcg_out_bswap32u(s, lo, lo);
43
- & arg_info(op->args[2])->s_mask;
60
+ tcg_out_bswap32(s, lo, lo, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
44
- return fold_masks(ctx, op);
61
} else {
45
+ t1 = arg_info(op->args[1]);
62
tcg_out_bswap_subr(s, bswap32u_addr);
46
+ t2 = arg_info(op->args[2]);
63
/* delay slot */
47
+ z_mask = t1->z_mask | t2->z_mask;
64
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
48
+ s_mask = t1->s_mask & t2->s_mask;
65
case MO_SL | MO_BSWAP:
49
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
66
if (use_mips32r2_instructions) {
50
}
67
tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
51
68
- tcg_out_bswap32(s, lo, lo);
52
static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
69
+ tcg_out_bswap32(s, lo, lo, 0);
70
} else {
71
tcg_out_bswap_subr(s, bswap32_addr);
72
/* delay slot */
73
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
74
break;
75
76
case MO_32 | MO_BSWAP:
77
- tcg_out_bswap32(s, TCG_TMP3, lo);
78
+ tcg_out_bswap32(s, TCG_TMP3, lo, 0);
79
lo = TCG_TMP3;
80
/* FALLTHRU */
81
case MO_32:
82
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
83
tcg_out_opc_imm(s, OPC_SW, TCG_TMP0, base, 0);
84
tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, 4);
85
} else {
86
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi);
87
+ tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0);
88
tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 0);
89
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo);
90
+ tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0);
91
tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 4);
92
}
93
break;
94
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
95
tcg_out_bswap16(s, a0, a1, a2);
96
break;
97
case INDEX_op_bswap32_i32:
98
- tcg_out_bswap32(s, a0, a1);
99
+ tcg_out_bswap32(s, a0, a1, 0);
100
break;
101
case INDEX_op_bswap32_i64:
102
- tcg_out_bswap32u(s, a0, a1);
103
+ tcg_out_bswap32(s, a0, a1, a2);
104
break;
105
case INDEX_op_bswap64_i64:
106
tcg_out_bswap64(s, a0, a1);
107
--
53
--
108
2.25.1
54
2.43.0
109
110
diff view generated by jsdifflib
1
We can use this in gen_goto_tb and for DISAS_JUMP
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
to indirectly chain to the next TB.
3
4
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
5
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
target/cris/translate.c | 4 +++-
4
tcg/optimize.c | 2 +-
9
1 file changed, 3 insertions(+), 1 deletion(-)
5
1 file changed, 1 insertion(+), 1 deletion(-)
10
6
11
diff --git a/target/cris/translate.c b/target/cris/translate.c
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
13
--- a/target/cris/translate.c
9
--- a/tcg/optimize.c
14
+++ b/target/cris/translate.c
10
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
11
@@ -XXX,XX +XXX,XX @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
16
tcg_gen_exit_tb(dc->base.tb, n);
12
return fold_orc(ctx, op);
17
} else {
13
}
18
tcg_gen_movi_tl(env_pc, dest);
19
- tcg_gen_exit_tb(NULL, 0);
20
+ tcg_gen_lookup_and_goto_ptr();
21
}
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
22
}
17
}
23
18
24
@@ -XXX,XX +XXX,XX @@ static void cris_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
19
/* Propagate constants and copies, fold constant expressions. */
25
tcg_gen_movi_tl(env_pc, npc);
26
/* fall through */
27
case DISAS_JUMP:
28
+ tcg_gen_lookup_and_goto_ptr();
29
+ break;
30
case DISAS_UPDATE:
31
/* Indicate that interupts must be re-evaluated before the next TB. */
32
tcg_gen_exit_tb(NULL, 0);
33
--
20
--
34
2.25.1
21
2.43.0
35
36
diff view generated by jsdifflib
1
Notice when the input is known to be zero-extended and force
1
All non-default cases now finish folding within each function.
2
the TCG_BSWAP_IZ flag on. Honor the TCG_BSWAP_OS bit during
2
Do the same with the default case and assert it is done after.
3
constant folding. Propagate the input to the output mask.
4
3
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/optimize.c | 56 +++++++++++++++++++++++++++++++++++++++++++++-----
7
tcg/optimize.c | 6 ++----
9
1 file changed, 51 insertions(+), 5 deletions(-)
8
1 file changed, 2 insertions(+), 4 deletions(-)
10
9
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
12
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
13
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
16
return (uint16_t)x;
17
18
CASE_OP_32_64(bswap16):
19
- return bswap16(x);
20
+ x = bswap16(x);
21
+ return y & TCG_BSWAP_OS ? (int16_t)x : x;
22
23
CASE_OP_32_64(bswap32):
24
- return bswap32(x);
25
+ x = bswap32(x);
26
+ return y & TCG_BSWAP_OS ? (int32_t)x : x;
27
28
case INDEX_op_bswap64_i64:
29
return bswap64(x);
30
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
14
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
31
}
15
done = true;
32
break;
16
break;
33
34
+ CASE_OP_32_64(bswap16):
35
+ mask = arg_info(op->args[1])->mask;
36
+ if (mask <= 0xffff) {
37
+ op->args[2] |= TCG_BSWAP_IZ;
38
+ }
39
+ mask = bswap16(mask);
40
+ switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
41
+ case TCG_BSWAP_OZ:
42
+ break;
43
+ case TCG_BSWAP_OS:
44
+ mask = (int16_t)mask;
45
+ break;
46
+ default: /* undefined high bits */
47
+ mask |= MAKE_64BIT_MASK(16, 48);
48
+ break;
49
+ }
50
+ break;
51
+
52
+ case INDEX_op_bswap32_i64:
53
+ mask = arg_info(op->args[1])->mask;
54
+ if (mask <= 0xffffffffu) {
55
+ op->args[2] |= TCG_BSWAP_IZ;
56
+ }
57
+ mask = bswap32(mask);
58
+ switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
59
+ case TCG_BSWAP_OZ:
60
+ break;
61
+ case TCG_BSWAP_OS:
62
+ mask = (int32_t)mask;
63
+ break;
64
+ default: /* undefined high bits */
65
+ mask |= MAKE_64BIT_MASK(32, 32);
66
+ break;
67
+ }
68
+ break;
69
+
70
default:
17
default:
18
+ done = finish_folding(&ctx, op);
71
break;
19
break;
72
}
20
}
73
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
21
-
74
CASE_OP_32_64(ext16s):
22
- if (!done) {
75
CASE_OP_32_64(ext16u):
23
- finish_folding(&ctx, op);
76
CASE_OP_32_64(ctpop):
24
- }
77
- CASE_OP_32_64(bswap16):
25
+ tcg_debug_assert(done);
78
- CASE_OP_32_64(bswap32):
26
}
79
- case INDEX_op_bswap64_i64:
27
}
80
case INDEX_op_ext32s_i64:
81
case INDEX_op_ext32u_i64:
82
case INDEX_op_ext_i32_i64:
83
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
84
}
85
goto do_default;
86
87
+ CASE_OP_32_64(bswap16):
88
+ CASE_OP_32_64(bswap32):
89
+ case INDEX_op_bswap64_i64:
90
+ if (arg_is_const(op->args[1])) {
91
+ tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
92
+ op->args[2]);
93
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
94
+ break;
95
+ }
96
+ goto do_default;
97
+
98
CASE_OP_32_64(add):
99
CASE_OP_32_64(sub):
100
CASE_OP_32_64(mul):
101
--
28
--
102
2.25.1
29
2.43.0
103
104
diff view generated by jsdifflib
1
Now that the middle-end can replicate the same tricks as tcg/arm
1
All mask setting is now done with parameters via fold_masks_*.
2
used for optimizing bswap for signed loads and for stores, do not
3
pretend to have these memory ops in the backend.
4
2
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
tcg/arm/tcg-target.h | 2 +-
6
tcg/optimize.c | 13 -------------
9
tcg/arm/tcg-target.c.inc | 214 ++++++++++++++-------------------------
7
1 file changed, 13 deletions(-)
10
2 files changed, 77 insertions(+), 139 deletions(-)
11
8
12
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/arm/tcg-target.h
11
--- a/tcg/optimize.c
15
+++ b/tcg/arm/tcg-target.h
12
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
13
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
17
#define TCG_TARGET_HAS_cmpsel_vec 0
14
QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
18
15
19
#define TCG_TARGET_DEFAULT_MO (0)
16
/* In flight values from optimization. */
20
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
17
- uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
21
+#define TCG_TARGET_HAS_MEMORY_BSWAP 0
18
- uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
22
19
TCGType type;
23
/* not defined -- call should be eliminated at compile time */
20
} OptContext;
24
void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
21
25
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
22
@@ -XXX,XX +XXX,XX @@ static bool finish_folding(OptContext *ctx, TCGOp *op)
26
index XXXXXXX..XXXXXXX 100644
23
for (i = 0; i < nb_oargs; i++) {
27
--- a/tcg/arm/tcg-target.c.inc
24
TCGTemp *ts = arg_temp(op->args[i]);
28
+++ b/tcg/arm/tcg-target.c.inc
25
reset_ts(ctx, ts);
29
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vldst(TCGContext *s, ARMInsn insn,
26
- /*
30
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
27
- * Save the corresponding known-zero/sign bits mask for the
31
* int mmu_idx, uintptr_t ra)
28
- * first output argument (only one supported so far).
32
*/
29
- */
33
-static void * const qemu_ld_helpers[16] = {
30
- if (i == 0) {
34
+static void * const qemu_ld_helpers[8] = {
31
- ts_info(ts)->z_mask = ctx->z_mask;
35
[MO_UB] = helper_ret_ldub_mmu,
32
- }
36
[MO_SB] = helper_ret_ldsb_mmu,
33
}
37
-
38
- [MO_LEUW] = helper_le_lduw_mmu,
39
- [MO_LEUL] = helper_le_ldul_mmu,
40
- [MO_LEQ] = helper_le_ldq_mmu,
41
- [MO_LESW] = helper_le_ldsw_mmu,
42
- [MO_LESL] = helper_le_ldul_mmu,
43
-
44
- [MO_BEUW] = helper_be_lduw_mmu,
45
- [MO_BEUL] = helper_be_ldul_mmu,
46
- [MO_BEQ] = helper_be_ldq_mmu,
47
- [MO_BESW] = helper_be_ldsw_mmu,
48
- [MO_BESL] = helper_be_ldul_mmu,
49
+#ifdef HOST_WORDS_BIGENDIAN
50
+ [MO_UW] = helper_be_lduw_mmu,
51
+ [MO_UL] = helper_be_ldul_mmu,
52
+ [MO_Q] = helper_be_ldq_mmu,
53
+ [MO_SW] = helper_be_ldsw_mmu,
54
+ [MO_SL] = helper_be_ldul_mmu,
55
+#else
56
+ [MO_UW] = helper_le_lduw_mmu,
57
+ [MO_UL] = helper_le_ldul_mmu,
58
+ [MO_Q] = helper_le_ldq_mmu,
59
+ [MO_SW] = helper_le_ldsw_mmu,
60
+ [MO_SL] = helper_le_ldul_mmu,
61
+#endif
62
};
63
64
/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
65
* uintxx_t val, int mmu_idx, uintptr_t ra)
66
*/
67
-static void * const qemu_st_helpers[16] = {
68
- [MO_UB] = helper_ret_stb_mmu,
69
- [MO_LEUW] = helper_le_stw_mmu,
70
- [MO_LEUL] = helper_le_stl_mmu,
71
- [MO_LEQ] = helper_le_stq_mmu,
72
- [MO_BEUW] = helper_be_stw_mmu,
73
- [MO_BEUL] = helper_be_stl_mmu,
74
- [MO_BEQ] = helper_be_stq_mmu,
75
+static void * const qemu_st_helpers[4] = {
76
+ [MO_8] = helper_ret_stb_mmu,
77
+#ifdef HOST_WORDS_BIGENDIAN
78
+ [MO_16] = helper_be_stw_mmu,
79
+ [MO_32] = helper_be_stl_mmu,
80
+ [MO_64] = helper_be_stq_mmu,
81
+#else
82
+ [MO_16] = helper_le_stw_mmu,
83
+ [MO_32] = helper_le_stl_mmu,
84
+ [MO_64] = helper_le_stq_mmu,
85
+#endif
86
};
87
88
/* Helper routines for marshalling helper function arguments into
89
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
90
icache usage. For pre-armv6, use the signed helpers since we do
91
not have a single insn sign-extend. */
92
if (use_armv6_instructions) {
93
- func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
94
+ func = qemu_ld_helpers[opc & MO_SIZE];
95
} else {
96
- func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
97
+ func = qemu_ld_helpers[opc & MO_SSIZE];
98
if (opc & MO_SIGN) {
99
opc = MO_UL;
100
}
101
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
102
argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
103
104
/* Tail-call to the helper, which will return to the fast path. */
105
- tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
106
+ tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
107
return true;
34
return true;
108
}
35
}
109
#endif /* SOFTMMU */
36
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
110
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
37
ctx.type = TCG_TYPE_I32;
111
TCGReg datalo, TCGReg datahi,
38
}
112
TCGReg addrlo, TCGReg addend)
39
113
{
40
- /* Assume all bits affected, no bits known zero, no sign reps. */
114
- MemOp bswap = opc & MO_BSWAP;
41
- ctx.z_mask = -1;
115
+ /* Byte swapping is left to middle-end expansion. */
42
- ctx.s_mask = 0;
116
+ tcg_debug_assert((opc & MO_BSWAP) == 0);
117
118
switch (opc & MO_SSIZE) {
119
case MO_UB:
120
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
121
break;
122
case MO_UW:
123
tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
124
- if (bswap) {
125
- tcg_out_bswap16(s, COND_AL, datalo, datalo,
126
- TCG_BSWAP_IZ | TCG_BSWAP_OZ);
127
- }
128
break;
129
case MO_SW:
130
- if (bswap) {
131
- tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
132
- tcg_out_bswap16(s, COND_AL, datalo, datalo,
133
- TCG_BSWAP_IZ | TCG_BSWAP_OS);
134
- } else {
135
- tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
136
- }
137
+ tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
138
break;
139
case MO_UL:
140
- default:
141
tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
142
- if (bswap) {
143
- tcg_out_bswap32(s, COND_AL, datalo, datalo);
144
- }
145
break;
146
case MO_Q:
147
- {
148
- TCGReg dl = (bswap ? datahi : datalo);
149
- TCGReg dh = (bswap ? datalo : datahi);
150
-
43
-
151
- /* Avoid ldrd for user-only emulation, to handle unaligned. */
44
/*
152
- if (USING_SOFTMMU && use_armv6_instructions
45
* Process each opcode.
153
- && (dl & 1) == 0 && dh == dl + 1) {
46
* Sorted alphabetically by opcode as much as possible.
154
- tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
155
- } else if (dl != addend) {
156
- tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
157
- tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
158
- } else {
159
- tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
160
- addend, addrlo, SHIFT_IMM_LSL(0));
161
- tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
162
- tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
163
- }
164
- if (bswap) {
165
- tcg_out_bswap32(s, COND_AL, dl, dl);
166
- tcg_out_bswap32(s, COND_AL, dh, dh);
167
- }
168
+ /* Avoid ldrd for user-only emulation, to handle unaligned. */
169
+ if (USING_SOFTMMU && use_armv6_instructions
170
+ && (datalo & 1) == 0 && datahi == datalo + 1) {
171
+ tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend);
172
+ } else if (datalo != addend) {
173
+ tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo);
174
+ tcg_out_ld32_12(s, COND_AL, datahi, addend, 4);
175
+ } else {
176
+ tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
177
+ addend, addrlo, SHIFT_IMM_LSL(0));
178
+ tcg_out_ld32_12(s, COND_AL, datalo, TCG_REG_TMP, 0);
179
+ tcg_out_ld32_12(s, COND_AL, datahi, TCG_REG_TMP, 4);
180
}
181
break;
182
+ default:
183
+ g_assert_not_reached();
184
}
185
}
186
187
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc,
188
TCGReg datalo, TCGReg datahi,
189
TCGReg addrlo)
190
{
191
- MemOp bswap = opc & MO_BSWAP;
192
+ /* Byte swapping is left to middle-end expansion. */
193
+ tcg_debug_assert((opc & MO_BSWAP) == 0);
194
195
switch (opc & MO_SSIZE) {
196
case MO_UB:
197
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc,
198
break;
199
case MO_UW:
200
tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
201
- if (bswap) {
202
- tcg_out_bswap16(s, COND_AL, datalo, datalo,
203
- TCG_BSWAP_IZ | TCG_BSWAP_OZ);
204
- }
205
break;
206
case MO_SW:
207
- if (bswap) {
208
- tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
209
- tcg_out_bswap16(s, COND_AL, datalo, datalo,
210
- TCG_BSWAP_IZ | TCG_BSWAP_OS);
211
- } else {
212
- tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
213
- }
214
+ tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
215
break;
216
case MO_UL:
217
- default:
218
tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
219
- if (bswap) {
220
- tcg_out_bswap32(s, COND_AL, datalo, datalo);
221
- }
222
break;
223
case MO_Q:
224
- {
225
- TCGReg dl = (bswap ? datahi : datalo);
226
- TCGReg dh = (bswap ? datalo : datahi);
227
-
228
- /* Avoid ldrd for user-only emulation, to handle unaligned. */
229
- if (USING_SOFTMMU && use_armv6_instructions
230
- && (dl & 1) == 0 && dh == dl + 1) {
231
- tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
232
- } else if (dl == addrlo) {
233
- tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
234
- tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
235
- } else {
236
- tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
237
- tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
238
- }
239
- if (bswap) {
240
- tcg_out_bswap32(s, COND_AL, dl, dl);
241
- tcg_out_bswap32(s, COND_AL, dh, dh);
242
- }
243
+ /* Avoid ldrd for user-only emulation, to handle unaligned. */
244
+ if (USING_SOFTMMU && use_armv6_instructions
245
+ && (datalo & 1) == 0 && datahi == datalo + 1) {
246
+ tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0);
247
+ } else if (datalo == addrlo) {
248
+ tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
249
+ tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
250
+ } else {
251
+ tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
252
+ tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
253
}
254
break;
255
+ default:
256
+ g_assert_not_reached();
257
}
258
}
259
260
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, MemOp opc,
261
TCGReg datalo, TCGReg datahi,
262
TCGReg addrlo, TCGReg addend)
263
{
264
- MemOp bswap = opc & MO_BSWAP;
265
+ /* Byte swapping is left to middle-end expansion. */
266
+ tcg_debug_assert((opc & MO_BSWAP) == 0);
267
268
switch (opc & MO_SIZE) {
269
case MO_8:
270
tcg_out_st8_r(s, cond, datalo, addrlo, addend);
271
break;
272
case MO_16:
273
- if (bswap) {
274
- tcg_out_bswap16(s, cond, TCG_REG_R0, datalo, 0);
275
- tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
276
- } else {
277
- tcg_out_st16_r(s, cond, datalo, addrlo, addend);
278
- }
279
+ tcg_out_st16_r(s, cond, datalo, addrlo, addend);
280
break;
281
case MO_32:
282
- default:
283
- if (bswap) {
284
- tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
285
- tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
286
- } else {
287
- tcg_out_st32_r(s, cond, datalo, addrlo, addend);
288
- }
289
+ tcg_out_st32_r(s, cond, datalo, addrlo, addend);
290
break;
291
case MO_64:
292
/* Avoid strd for user-only emulation, to handle unaligned. */
293
- if (bswap) {
294
- tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
295
- tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
296
- tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
297
- tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
298
- } else if (USING_SOFTMMU && use_armv6_instructions
299
- && (datalo & 1) == 0 && datahi == datalo + 1) {
300
+ if (USING_SOFTMMU && use_armv6_instructions
301
+ && (datalo & 1) == 0 && datahi == datalo + 1) {
302
tcg_out_strd_r(s, cond, datalo, addrlo, addend);
303
} else {
304
tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
305
tcg_out_st32_12(s, cond, datahi, addend, 4);
306
}
307
break;
308
+ default:
309
+ g_assert_not_reached();
310
}
311
}
312
313
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc,
314
TCGReg datalo, TCGReg datahi,
315
TCGReg addrlo)
316
{
317
- MemOp bswap = opc & MO_BSWAP;
318
+ /* Byte swapping is left to middle-end expansion. */
319
+ tcg_debug_assert((opc & MO_BSWAP) == 0);
320
321
switch (opc & MO_SIZE) {
322
case MO_8:
323
tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
324
break;
325
case MO_16:
326
- if (bswap) {
327
- tcg_out_bswap16(s, COND_AL, TCG_REG_R0, datalo, 0);
328
- tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
329
- } else {
330
- tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
331
- }
332
+ tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
333
break;
334
case MO_32:
335
- default:
336
- if (bswap) {
337
- tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
338
- tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
339
- } else {
340
- tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
341
- }
342
+ tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
343
break;
344
case MO_64:
345
/* Avoid strd for user-only emulation, to handle unaligned. */
346
- if (bswap) {
347
- tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
348
- tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
349
- tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
350
- tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
351
- } else if (USING_SOFTMMU && use_armv6_instructions
352
- && (datalo & 1) == 0 && datahi == datalo + 1) {
353
+ if (USING_SOFTMMU && use_armv6_instructions
354
+ && (datalo & 1) == 0 && datahi == datalo + 1) {
355
tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
356
} else {
357
tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
358
tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
359
}
360
break;
361
+ default:
362
+ g_assert_not_reached();
363
}
364
}
365
366
--
47
--
367
2.25.1
48
2.43.0
368
369
diff view generated by jsdifflib
1
This will eventually simplify front-end usage, and will allow
1
All instances of s_mask have been converted to the new
2
backends to unset TCG_TARGET_HAS_MEMORY_BSWAP without loss of
2
representation. We can now re-enable usage.
3
optimization.
4
3
5
The argument is added during expansion, not currently exposed to the
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
front end translators. The backends currently only support a flags
7
value of either TCG_BSWAP_IZ, or (TCG_BSWAP_IZ | TCG_BSWAP_OZ),
8
since they all require zero top bytes and leave them that way.
9
At the existing call sites we pass in (TCG_BSWAP_IZ | TCG_BSWAP_OZ),
10
except for the flags-ignored cases of a 32-bit swap of a 32-bit
11
value and or a 64-bit swap of a 64-bit value, where we pass 0.
12
13
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
---
6
---
16
include/tcg/tcg-opc.h | 10 +++++-----
7
tcg/optimize.c | 4 ++--
17
include/tcg/tcg.h | 12 ++++++++++++
8
1 file changed, 2 insertions(+), 2 deletions(-)
18
tcg/tcg-op.c | 13 ++++++++-----
19
tcg/tcg.c | 28 ++++++++++++++++++++++++++++
20
tcg/README | 22 ++++++++++++++--------
21
5 files changed, 67 insertions(+), 18 deletions(-)
22
9
23
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
24
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
25
--- a/include/tcg/tcg-opc.h
12
--- a/tcg/optimize.c
26
+++ b/include/tcg/tcg-opc.h
13
+++ b/tcg/optimize.c
27
@@ -XXX,XX +XXX,XX @@ DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32))
14
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
28
DEF(ext16s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16s_i32))
15
g_assert_not_reached();
29
DEF(ext8u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8u_i32))
16
}
30
DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32))
17
31
-DEF(bswap16_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap16_i32))
18
- if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
32
-DEF(bswap32_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap32_i32))
19
+ if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
33
+DEF(bswap16_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap16_i32))
20
return true;
34
+DEF(bswap32_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap32_i32))
21
}
35
DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32))
22
36
DEF(neg_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_neg_i32))
23
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
37
DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32))
24
s_mask = s_mask_old >> pos;
38
@@ -XXX,XX +XXX,XX @@ DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64))
25
s_mask |= -1ull << (len - 1);
39
DEF(ext8u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8u_i64))
26
40
DEF(ext16u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16u_i64))
27
- if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
41
DEF(ext32u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32u_i64))
28
+ if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
42
-DEF(bswap16_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64))
29
return true;
43
-DEF(bswap32_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64))
30
}
44
-DEF(bswap64_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64))
45
+DEF(bswap16_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64))
46
+DEF(bswap32_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64))
47
+DEF(bswap64_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64))
48
DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64))
49
DEF(neg_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_neg_i64))
50
DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64))
51
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
52
index XXXXXXX..XXXXXXX 100644
53
--- a/include/tcg/tcg.h
54
+++ b/include/tcg/tcg.h
55
@@ -XXX,XX +XXX,XX @@ typedef TCGv_ptr TCGv_env;
56
/* Used to align parameters. See the comment before tcgv_i32_temp. */
57
#define TCG_CALL_DUMMY_ARG ((TCGArg)0)
58
59
+/*
60
+ * Flags for the bswap opcodes.
61
+ * If IZ, the input is zero-extended, otherwise unknown.
62
+ * If OZ or OS, the output is zero- or sign-extended respectively,
63
+ * otherwise the high bits are undefined.
64
+ */
65
+enum {
66
+ TCG_BSWAP_IZ = 1,
67
+ TCG_BSWAP_OZ = 2,
68
+ TCG_BSWAP_OS = 4,
69
+};
70
+
71
typedef enum TCGTempVal {
72
TEMP_VAL_DEAD,
73
TEMP_VAL_REG,
74
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/tcg/tcg-op.c
77
+++ b/tcg/tcg-op.c
78
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
79
void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)
80
{
81
if (TCG_TARGET_HAS_bswap16_i32) {
82
- tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg);
83
+ tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg,
84
+ TCG_BSWAP_IZ | TCG_BSWAP_OZ);
85
} else {
86
TCGv_i32 t0 = tcg_temp_new_i32();
87
88
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)
89
void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
90
{
91
if (TCG_TARGET_HAS_bswap32_i32) {
92
- tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg);
93
+ tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0);
94
} else {
95
TCGv_i32 t0 = tcg_temp_new_i32();
96
TCGv_i32 t1 = tcg_temp_new_i32();
97
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg)
98
tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg));
99
tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
100
} else if (TCG_TARGET_HAS_bswap16_i64) {
101
- tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg);
102
+ tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg,
103
+ TCG_BSWAP_IZ | TCG_BSWAP_OZ);
104
} else {
105
TCGv_i64 t0 = tcg_temp_new_i64();
106
107
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
108
tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));
109
tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
110
} else if (TCG_TARGET_HAS_bswap32_i64) {
111
- tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg);
112
+ tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg,
113
+ TCG_BSWAP_IZ | TCG_BSWAP_OZ);
114
} else {
115
TCGv_i64 t0 = tcg_temp_new_i64();
116
TCGv_i64 t1 = tcg_temp_new_i64();
117
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
118
tcg_temp_free_i32(t0);
119
tcg_temp_free_i32(t1);
120
} else if (TCG_TARGET_HAS_bswap64_i64) {
121
- tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg);
122
+ tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0);
123
} else {
124
TCGv_i64 t0 = tcg_temp_new_i64();
125
TCGv_i64 t1 = tcg_temp_new_i64();
126
diff --git a/tcg/tcg.c b/tcg/tcg.c
127
index XXXXXXX..XXXXXXX 100644
128
--- a/tcg/tcg.c
129
+++ b/tcg/tcg.c
130
@@ -XXX,XX +XXX,XX @@ static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
131
[MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
132
};
133
134
+static const char bswap_flag_name[][6] = {
135
+ [TCG_BSWAP_IZ] = "iz",
136
+ [TCG_BSWAP_OZ] = "oz",
137
+ [TCG_BSWAP_OS] = "os",
138
+ [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
139
+ [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
140
+};
141
+
142
static inline bool tcg_regset_single(TCGRegSet d)
143
{
144
return (d & (d - 1)) == 0;
145
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
146
i = 1;
147
}
148
break;
149
+ case INDEX_op_bswap16_i32:
150
+ case INDEX_op_bswap16_i64:
151
+ case INDEX_op_bswap32_i32:
152
+ case INDEX_op_bswap32_i64:
153
+ case INDEX_op_bswap64_i64:
154
+ {
155
+ TCGArg flags = op->args[k];
156
+ const char *name = NULL;
157
+
158
+ if (flags < ARRAY_SIZE(bswap_flag_name)) {
159
+ name = bswap_flag_name[flags];
160
+ }
161
+ if (name) {
162
+ col += qemu_log(",%s", name);
163
+ } else {
164
+ col += qemu_log(",$0x%" TCG_PRIlx, flags);
165
+ }
166
+ i = k = 1;
167
+ }
168
+ break;
169
default:
170
i = 0;
171
break;
172
diff --git a/tcg/README b/tcg/README
173
index XXXXXXX..XXXXXXX 100644
174
--- a/tcg/README
175
+++ b/tcg/README
176
@@ -XXX,XX +XXX,XX @@ ext32u_i64 t0, t1
177
178
8, 16 or 32 bit sign/zero extension (both operands must have the same type)
179
180
-* bswap16_i32/i64 t0, t1
181
+* bswap16_i32/i64 t0, t1, flags
182
183
-16 bit byte swap on a 32/64 bit value. It assumes that the two/six high order
184
-bytes are set to zero.
185
+16 bit byte swap on the low bits of a 32/64 bit input.
186
+If flags & TCG_BSWAP_IZ, then t1 is known to be zero-extended from bit 15.
187
+If flags & TCG_BSWAP_OZ, then t0 will be zero-extended from bit 15.
188
+If flags & TCG_BSWAP_OS, then t0 will be sign-extended from bit 15.
189
+If neither TCG_BSWAP_OZ nor TCG_BSWAP_OS are set, then the bits of
190
+t0 above bit 15 may contain any value.
191
192
-* bswap32_i32/i64 t0, t1
193
+* bswap32_i64 t0, t1, flags
194
195
-32 bit byte swap on a 32/64 bit value. With a 64 bit value, it assumes that
196
-the four high order bytes are set to zero.
197
+32 bit byte swap on a 64-bit value. The flags are the same as for bswap16,
198
+except they apply from bit 31 instead of bit 15.
199
200
-* bswap64_i64 t0, t1
201
+* bswap32_i32 t0, t1, flags
202
+* bswap64_i64 t0, t1, flags
203
204
-64 bit byte swap
205
+32/64 bit byte swap. The flags are ignored, but still present
206
+for consistency with the other bswap opcodes.
207
208
* discard_i32/i64 t0
209
31
210
--
32
--
211
2.25.1
33
2.43.0
212
213
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
The big comment just above says functions should be sorted.
2
Add forward declarations as needed.
2
3
3
Implement tcg_gen_vec_shl{shr}{sar}16i_tl by adding corresponging i32 OP.
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
5
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
6
Message-Id: <20210624105023.3852-4-zhiwei_liu@c-sky.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
include/tcg/tcg-op-gvec.h | 10 ++++++++++
7
tcg/optimize.c | 114 +++++++++++++++++++++++++------------------------
10
tcg/tcg-op-gvec.c | 28 ++++++++++++++++++++++++++++
8
1 file changed, 59 insertions(+), 55 deletions(-)
11
2 files changed, 38 insertions(+)
12
9
13
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-op-gvec.h
12
--- a/tcg/optimize.c
16
+++ b/include/tcg/tcg-op-gvec.h
13
+++ b/tcg/optimize.c
17
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
14
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
18
void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
15
* 3) those that produce information about the result value.
19
void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
16
*/
20
17
21
+void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
18
+static bool fold_or(OptContext *ctx, TCGOp *op);
22
+void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
19
+static bool fold_orc(OptContext *ctx, TCGOp *op);
23
+void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
20
+static bool fold_xor(OptContext *ctx, TCGOp *op);
24
+
21
+
25
#if TARGET_LONG_BITS == 64
22
static bool fold_add(OptContext *ctx, TCGOp *op)
26
#define tcg_gen_vec_add8_tl tcg_gen_vec_add8_i64
23
{
27
#define tcg_gen_vec_sub8_tl tcg_gen_vec_sub8_i64
24
if (fold_const2_commutative(ctx, op) ||
28
#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
25
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
29
#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
26
return fold_masks_zs(ctx, op, z_mask, s_mask);
30
+#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i64
31
+#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i64
32
+#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i64
33
#else
34
#define tcg_gen_vec_add8_tl tcg_gen_vec_add8_i32
35
#define tcg_gen_vec_sub8_tl tcg_gen_vec_sub8_i32
36
#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
37
#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
38
+#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i32
39
+#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i32
40
+#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i32
41
#endif
42
43
#endif
44
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/tcg/tcg-op-gvec.c
47
+++ b/tcg/tcg-op-gvec.c
48
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
49
tcg_gen_andi_i64(d, d, mask);
50
}
27
}
51
28
52
+void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
29
+static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
53
+{
30
+{
54
+ uint32_t mask = dup_const(MO_16, 0xffff << c);
31
+ /* If true and false values are the same, eliminate the cmp. */
55
+ tcg_gen_shli_i32(d, a, c);
32
+ if (args_are_copies(op->args[2], op->args[3])) {
56
+ tcg_gen_andi_i32(d, d, mask);
33
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
34
+ }
35
+
36
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
37
+ uint64_t tv = arg_info(op->args[2])->val;
38
+ uint64_t fv = arg_info(op->args[3])->val;
39
+
40
+ if (tv == -1 && fv == 0) {
41
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
42
+ }
43
+ if (tv == 0 && fv == -1) {
44
+ if (TCG_TARGET_HAS_not_vec) {
45
+ op->opc = INDEX_op_not_vec;
46
+ return fold_not(ctx, op);
47
+ } else {
48
+ op->opc = INDEX_op_xor_vec;
49
+ op->args[2] = arg_new_constant(ctx, -1);
50
+ return fold_xor(ctx, op);
51
+ }
52
+ }
53
+ }
54
+ if (arg_is_const(op->args[2])) {
55
+ uint64_t tv = arg_info(op->args[2])->val;
56
+ if (tv == -1) {
57
+ op->opc = INDEX_op_or_vec;
58
+ op->args[2] = op->args[3];
59
+ return fold_or(ctx, op);
60
+ }
61
+ if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
62
+ op->opc = INDEX_op_andc_vec;
63
+ op->args[2] = op->args[1];
64
+ op->args[1] = op->args[3];
65
+ return fold_andc(ctx, op);
66
+ }
67
+ }
68
+ if (arg_is_const(op->args[3])) {
69
+ uint64_t fv = arg_info(op->args[3])->val;
70
+ if (fv == 0) {
71
+ op->opc = INDEX_op_and_vec;
72
+ return fold_and(ctx, op);
73
+ }
74
+ if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
75
+ op->opc = INDEX_op_orc_vec;
76
+ op->args[2] = op->args[1];
77
+ op->args[1] = op->args[3];
78
+ return fold_orc(ctx, op);
79
+ }
80
+ }
81
+ return finish_folding(ctx, op);
57
+}
82
+}
58
+
83
+
59
void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
84
static bool fold_brcond(OptContext *ctx, TCGOp *op)
60
int64_t shift, uint32_t oprsz, uint32_t maxsz)
61
{
85
{
62
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
86
int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
63
tcg_gen_andi_i64(d, d, mask);
87
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
88
return fold_masks_zs(ctx, op, z_mask, s_mask);
64
}
89
}
65
90
66
+void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
91
-static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
67
+{
92
-{
68
+ uint32_t mask = dup_const(MO_16, 0xffff >> c);
93
- /* If true and false values are the same, eliminate the cmp. */
69
+ tcg_gen_shri_i32(d, a, c);
94
- if (args_are_copies(op->args[2], op->args[3])) {
70
+ tcg_gen_andi_i32(d, d, mask);
95
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
71
+}
96
- }
72
+
97
-
73
void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
98
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
74
int64_t shift, uint32_t oprsz, uint32_t maxsz)
99
- uint64_t tv = arg_info(op->args[2])->val;
75
{
100
- uint64_t fv = arg_info(op->args[3])->val;
76
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
101
-
77
tcg_temp_free_i64(s);
102
- if (tv == -1 && fv == 0) {
78
}
103
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
79
104
- }
80
+void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
105
- if (tv == 0 && fv == -1) {
81
+{
106
- if (TCG_TARGET_HAS_not_vec) {
82
+ uint32_t s_mask = dup_const(MO_16, 0x8000 >> c);
107
- op->opc = INDEX_op_not_vec;
83
+ uint32_t c_mask = dup_const(MO_16, 0xffff >> c);
108
- return fold_not(ctx, op);
84
+ TCGv_i32 s = tcg_temp_new_i32();
109
- } else {
85
+
110
- op->opc = INDEX_op_xor_vec;
86
+ tcg_gen_shri_i32(d, a, c);
111
- op->args[2] = arg_new_constant(ctx, -1);
87
+ tcg_gen_andi_i32(s, d, s_mask); /* isolate (shifted) sign bit */
112
- return fold_xor(ctx, op);
88
+ tcg_gen_andi_i32(d, d, c_mask); /* clear out bits above sign */
113
- }
89
+ tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */
114
- }
90
+ tcg_gen_or_i32(d, d, s); /* include sign extension */
115
- }
91
+ tcg_temp_free_i32(s);
116
- if (arg_is_const(op->args[2])) {
92
+}
117
- uint64_t tv = arg_info(op->args[2])->val;
93
+
118
- if (tv == -1) {
94
void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
119
- op->opc = INDEX_op_or_vec;
95
int64_t shift, uint32_t oprsz, uint32_t maxsz)
120
- op->args[2] = op->args[3];
121
- return fold_or(ctx, op);
122
- }
123
- if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
124
- op->opc = INDEX_op_andc_vec;
125
- op->args[2] = op->args[1];
126
- op->args[1] = op->args[3];
127
- return fold_andc(ctx, op);
128
- }
129
- }
130
- if (arg_is_const(op->args[3])) {
131
- uint64_t fv = arg_info(op->args[3])->val;
132
- if (fv == 0) {
133
- op->opc = INDEX_op_and_vec;
134
- return fold_and(ctx, op);
135
- }
136
- if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
137
- op->opc = INDEX_op_orc_vec;
138
- op->args[2] = op->args[1];
139
- op->args[1] = op->args[3];
140
- return fold_orc(ctx, op);
141
- }
142
- }
143
- return finish_folding(ctx, op);
144
-}
145
-
146
/* Propagate constants and copies, fold constant expressions. */
147
void tcg_optimize(TCGContext *s)
96
{
148
{
97
--
149
--
98
2.25.1
150
2.43.0
99
100
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
The big comment just above says functions should be sorted.
2
2
3
Implement tcg_gen_vec_shl{shr}{sar}8i_tl by adding corresponging i32 OP.
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
5
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
6
Message-Id: <20210624105023.3852-5-zhiwei_liu@c-sky.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
include/tcg/tcg-op-gvec.h | 10 ++++++++++
6
tcg/optimize.c | 60 +++++++++++++++++++++++++-------------------------
10
tcg/tcg-op-gvec.c | 28 ++++++++++++++++++++++++++++
7
1 file changed, 30 insertions(+), 30 deletions(-)
11
2 files changed, 38 insertions(+)
12
8
13
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-op-gvec.h
11
--- a/tcg/optimize.c
16
+++ b/include/tcg/tcg-op-gvec.h
12
+++ b/tcg/optimize.c
17
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
13
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
18
void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
14
return true;
19
void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
20
21
+void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
22
void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
23
+void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
24
void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
25
+void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
26
void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
27
28
#if TARGET_LONG_BITS == 64
29
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
30
#define tcg_gen_vec_sub8_tl tcg_gen_vec_sub8_i64
31
#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
32
#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
33
+#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i64
34
+#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i64
35
+#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i64
36
#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i64
37
#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i64
38
#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i64
39
+
40
#else
41
#define tcg_gen_vec_add8_tl tcg_gen_vec_add8_i32
42
#define tcg_gen_vec_sub8_tl tcg_gen_vec_sub8_i32
43
#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
44
#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
45
+#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i32
46
+#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i32
47
+#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i32
48
#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i32
49
#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i32
50
#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i32
51
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/tcg/tcg-op-gvec.c
54
+++ b/tcg/tcg-op-gvec.c
55
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
56
tcg_gen_andi_i64(d, d, mask);
57
}
15
}
58
16
59
+void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
17
+static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
60
+{
18
+{
61
+ uint32_t mask = dup_const(MO_8, 0xff << c);
19
+ /* Canonicalize the comparison to put immediate second. */
62
+ tcg_gen_shli_i32(d, a, c);
20
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
63
+ tcg_gen_andi_i32(d, d, mask);
21
+ op->args[3] = tcg_swap_cond(op->args[3]);
22
+ }
23
+ return finish_folding(ctx, op);
64
+}
24
+}
65
+
25
+
66
void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
26
+static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
67
{
68
uint32_t mask = dup_const(MO_16, 0xffff << c);
69
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
70
tcg_gen_andi_i64(d, d, mask);
71
}
72
73
+void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
74
+{
27
+{
75
+ uint32_t mask = dup_const(MO_8, 0xff >> c);
28
+ /* If true and false values are the same, eliminate the cmp. */
76
+ tcg_gen_shri_i32(d, a, c);
29
+ if (args_are_copies(op->args[3], op->args[4])) {
77
+ tcg_gen_andi_i32(d, d, mask);
30
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
31
+ }
32
+
33
+ /* Canonicalize the comparison to put immediate second. */
34
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
35
+ op->args[5] = tcg_swap_cond(op->args[5]);
36
+ }
37
+ /*
38
+ * Canonicalize the "false" input reg to match the destination,
39
+ * so that the tcg backend can implement "move if true".
40
+ */
41
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
42
+ op->args[5] = tcg_invert_cond(op->args[5]);
43
+ }
44
+ return finish_folding(ctx, op);
78
+}
45
+}
79
+
46
+
80
void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
47
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
81
{
48
{
82
uint32_t mask = dup_const(MO_16, 0xffff >> c);
49
uint64_t z_mask, s_mask;
83
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
50
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
84
tcg_temp_free_i64(s);
51
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
85
}
52
}
86
53
87
+void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
54
-static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
88
+{
55
-{
89
+ uint32_t s_mask = dup_const(MO_8, 0x80 >> c);
56
- /* Canonicalize the comparison to put immediate second. */
90
+ uint32_t c_mask = dup_const(MO_8, 0xff >> c);
57
- if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
91
+ TCGv_i32 s = tcg_temp_new_i32();
58
- op->args[3] = tcg_swap_cond(op->args[3]);
92
+
59
- }
93
+ tcg_gen_shri_i32(d, a, c);
60
- return finish_folding(ctx, op);
94
+ tcg_gen_andi_i32(s, d, s_mask); /* isolate (shifted) sign bit */
61
-}
95
+ tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */
62
-
96
+ tcg_gen_andi_i32(d, d, c_mask); /* clear out bits above sign */
63
-static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
97
+ tcg_gen_or_i32(d, d, s); /* include sign extension */
64
-{
98
+ tcg_temp_free_i32(s);
65
- /* If true and false values are the same, eliminate the cmp. */
99
+}
66
- if (args_are_copies(op->args[3], op->args[4])) {
100
+
67
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
101
void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
68
- }
69
-
70
- /* Canonicalize the comparison to put immediate second. */
71
- if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
72
- op->args[5] = tcg_swap_cond(op->args[5]);
73
- }
74
- /*
75
- * Canonicalize the "false" input reg to match the destination,
76
- * so that the tcg backend can implement "move if true".
77
- */
78
- if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
79
- op->args[5] = tcg_invert_cond(op->args[5]);
80
- }
81
- return finish_folding(ctx, op);
82
-}
83
-
84
static bool fold_sextract(OptContext *ctx, TCGOp *op)
102
{
85
{
103
uint32_t s_mask = dup_const(MO_16, 0x8000 >> c);
86
uint64_t z_mask, s_mask, s_mask_old;
104
--
87
--
105
2.25.1
88
2.43.0
106
107
diff view generated by jsdifflib
1
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
1
We currently have a flag, float_muladd_halve_result, to scale
2
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2
the result by 2**-1. Extend this to handle arbitrary scaling.
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
6
---
5
target/cris/translate.c | 317 ++++++++++++++++++++++------------------
7
include/fpu/softfloat.h | 6 ++++
6
1 file changed, 174 insertions(+), 143 deletions(-)
8
fpu/softfloat.c | 58 ++++++++++++++++++++++-----------------
7
9
fpu/softfloat-parts.c.inc | 7 +++--
8
diff --git a/target/cris/translate.c b/target/cris/translate.c
10
3 files changed, 44 insertions(+), 27 deletions(-)
11
12
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
9
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
10
--- a/target/cris/translate.c
14
--- a/include/fpu/softfloat.h
11
+++ b/target/cris/translate.c
15
+++ b/include/fpu/softfloat.h
12
@@ -XXX,XX +XXX,XX @@ static unsigned int crisv32_decoder(CPUCRISState *env, DisasContext *dc)
16
@@ -XXX,XX +XXX,XX @@ float16 float16_add(float16, float16, float_status *status);
13
*
17
float16 float16_sub(float16, float16, float_status *status);
18
float16 float16_mul(float16, float16, float_status *status);
19
float16 float16_muladd(float16, float16, float16, int, float_status *status);
20
+float16 float16_muladd_scalbn(float16, float16, float16,
21
+ int, int, float_status *status);
22
float16 float16_div(float16, float16, float_status *status);
23
float16 float16_scalbn(float16, int, float_status *status);
24
float16 float16_min(float16, float16, float_status *status);
25
@@ -XXX,XX +XXX,XX @@ float32 float32_mul(float32, float32, float_status *status);
26
float32 float32_div(float32, float32, float_status *status);
27
float32 float32_rem(float32, float32, float_status *status);
28
float32 float32_muladd(float32, float32, float32, int, float_status *status);
29
+float32 float32_muladd_scalbn(float32, float32, float32,
30
+ int, int, float_status *status);
31
float32 float32_sqrt(float32, float_status *status);
32
float32 float32_exp2(float32, float_status *status);
33
float32 float32_log2(float32, float_status *status);
34
@@ -XXX,XX +XXX,XX @@ float64 float64_mul(float64, float64, float_status *status);
35
float64 float64_div(float64, float64, float_status *status);
36
float64 float64_rem(float64, float64, float_status *status);
37
float64 float64_muladd(float64, float64, float64, int, float_status *status);
38
+float64 float64_muladd_scalbn(float64, float64, float64,
39
+ int, int, float_status *status);
40
float64 float64_sqrt(float64, float_status *status);
41
float64 float64_log2(float64, float_status *status);
42
FloatRelation float64_compare(float64, float64, float_status *status);
43
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/fpu/softfloat.c
46
+++ b/fpu/softfloat.c
47
@@ -XXX,XX +XXX,XX @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
48
#define parts_mul(A, B, S) \
49
PARTS_GENERIC_64_128(mul, A)(A, B, S)
50
51
-static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
52
- FloatParts64 *c, int flags,
53
- float_status *s);
54
-static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
55
- FloatParts128 *c, int flags,
56
- float_status *s);
57
+static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
58
+ FloatParts64 *c, int scale,
59
+ int flags, float_status *s);
60
+static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
61
+ FloatParts128 *c, int scale,
62
+ int flags, float_status *s);
63
64
-#define parts_muladd(A, B, C, Z, S) \
65
- PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
66
+#define parts_muladd_scalbn(A, B, C, Z, Y, S) \
67
+ PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
68
69
static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
70
float_status *s);
71
@@ -XXX,XX +XXX,XX @@ floatx80_mul(floatx80 a, floatx80 b, float_status *status)
72
* Fused multiply-add
14
*/
73
*/
15
74
16
-/* generate intermediate code for basic block 'tb'. */
75
-float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
17
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
76
- int flags, float_status *status)
18
+static void cris_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
77
+float16 QEMU_FLATTEN
19
{
78
+float16_muladd_scalbn(float16 a, float16 b, float16 c,
20
+ DisasContext *dc = container_of(dcbase, DisasContext, base);
79
+ int scale, int flags, float_status *status)
21
CPUCRISState *env = cs->env_ptr;
80
{
22
+ uint32_t tb_flags = dc->base.tb->flags;
81
FloatParts64 pa, pb, pc, *pr;
23
uint32_t pc_start;
82
24
- unsigned int insn_len;
83
float16_unpack_canonical(&pa, a, status);
25
- struct DisasContext ctx;
84
float16_unpack_canonical(&pb, b, status);
26
- struct DisasContext *dc = &ctx;
85
float16_unpack_canonical(&pc, c, status);
27
- uint32_t page_start;
86
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
28
- target_ulong npc;
87
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
29
- int num_insns;
88
30
89
return float16_round_pack_canonical(pr, status);
31
if (env->pregs[PR_VR] == 32) {
90
}
32
dc->decoder = crisv32_decoder;
91
33
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
92
-static float32 QEMU_SOFTFLOAT_ATTR
34
dc->clear_locked_irq = 1;
93
-soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
35
}
94
- float_status *status)
36
95
+float16 float16_muladd(float16 a, float16 b, float16 c,
37
- /* Odd PC indicates that branch is rexecuting due to exception in the
96
+ int flags, float_status *status)
38
+ /*
39
+ * Odd PC indicates that branch is rexecuting due to exception in the
40
* delayslot, like in real hw.
41
*/
42
- pc_start = tb->pc & ~1;
43
-
44
- dc->base.tb = tb;
45
+ pc_start = dc->base.pc_first & ~1;
46
dc->base.pc_first = pc_start;
47
dc->base.pc_next = pc_start;
48
- dc->base.is_jmp = DISAS_NEXT;
49
- dc->base.singlestep_enabled = cs->singlestep_enabled;
50
51
dc->cpu = env_archcpu(env);
52
dc->ppc = pc_start;
53
dc->pc = pc_start;
54
dc->flags_uptodate = 1;
55
dc->flagx_known = 1;
56
- dc->flags_x = tb->flags & X_FLAG;
57
+ dc->flags_x = tb_flags & X_FLAG;
58
dc->cc_x_uptodate = 0;
59
dc->cc_mask = 0;
60
dc->update_cc = 0;
61
dc->clear_prefix = 0;
62
+ dc->cpustate_changed = 0;
63
64
cris_update_cc_op(dc, CC_OP_FLAGS, 4);
65
dc->cc_size_uptodate = -1;
66
67
/* Decode TB flags. */
68
- dc->tb_flags = tb->flags & (S_FLAG | P_FLAG | U_FLAG \
69
- | X_FLAG | PFIX_FLAG);
70
- dc->delayed_branch = !!(tb->flags & 7);
71
+ dc->tb_flags = tb_flags & (S_FLAG | P_FLAG | U_FLAG | X_FLAG | PFIX_FLAG);
72
+ dc->delayed_branch = !!(tb_flags & 7);
73
if (dc->delayed_branch) {
74
dc->jmp = JMP_INDIRECT;
75
} else {
76
dc->jmp = JMP_NOJMP;
77
}
78
+}
79
80
- dc->cpustate_changed = 0;
81
+static void cris_tr_tb_start(DisasContextBase *db, CPUState *cpu)
82
+{
97
+{
83
+}
98
+ return float16_muladd_scalbn(a, b, c, 0, flags, status);
84
85
- page_start = pc_start & TARGET_PAGE_MASK;
86
- num_insns = 0;
87
+static void cris_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
88
+{
89
+ DisasContext *dc = container_of(dcbase, DisasContext, base);
90
91
- gen_tb_start(tb);
92
- do {
93
- tcg_gen_insn_start(dc->delayed_branch == 1
94
- ? dc->ppc | 1 : dc->pc);
95
- num_insns++;
96
+ tcg_gen_insn_start(dc->delayed_branch == 1 ? dc->ppc | 1 : dc->pc);
97
+}
98
99
- if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
100
+static bool cris_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
101
+ const CPUBreakpoint *bp)
102
+{
103
+ DisasContext *dc = container_of(dcbase, DisasContext, base);
104
+
105
+ cris_evaluate_flags(dc);
106
+ tcg_gen_movi_tl(env_pc, dc->pc);
107
+ t_gen_raise_exception(EXCP_DEBUG);
108
+ dc->base.is_jmp = DISAS_NORETURN;
109
+ /*
110
+ * The address covered by the breakpoint must be included in
111
+ * [tb->pc, tb->pc + tb->size) in order to for it to be
112
+ * properly cleared -- thus we increment the PC here so that
113
+ * the logic setting tb->size below does the right thing.
114
+ */
115
+ dc->pc += 2;
116
+ return true;
117
+}
99
+}
118
+
100
+
119
+static void cris_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
101
+float32 QEMU_SOFTFLOAT_ATTR
120
+{
102
+float32_muladd_scalbn(float32 a, float32 b, float32 c,
121
+ DisasContext *dc = container_of(dcbase, DisasContext, base);
103
+ int scale, int flags, float_status *status)
122
+ CPUCRISState *env = cs->env_ptr;
104
{
123
+ unsigned int insn_len;
105
FloatParts64 pa, pb, pc, *pr;
106
107
float32_unpack_canonical(&pa, a, status);
108
float32_unpack_canonical(&pb, b, status);
109
float32_unpack_canonical(&pc, c, status);
110
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
111
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
112
113
return float32_round_pack_canonical(pr, status);
114
}
115
116
-static float64 QEMU_SOFTFLOAT_ATTR
117
-soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
118
- float_status *status)
119
+float64 QEMU_SOFTFLOAT_ATTR
120
+float64_muladd_scalbn(float64 a, float64 b, float64 c,
121
+ int scale, int flags, float_status *status)
122
{
123
FloatParts64 pa, pb, pc, *pr;
124
125
float64_unpack_canonical(&pa, a, status);
126
float64_unpack_canonical(&pb, b, status);
127
float64_unpack_canonical(&pc, c, status);
128
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
129
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
130
131
return float64_round_pack_canonical(pr, status);
132
}
133
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
134
return ur.s;
135
136
soft:
137
- return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
138
+ return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
139
}
140
141
float64 QEMU_FLATTEN
142
@@ -XXX,XX +XXX,XX @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
143
return ur.s;
144
145
soft:
146
- return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
147
+ return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
148
}
149
150
float64 float64r32_muladd(float64 a, float64 b, float64 c,
151
@@ -XXX,XX +XXX,XX @@ float64 float64r32_muladd(float64 a, float64 b, float64 c,
152
float64_unpack_canonical(&pa, a, status);
153
float64_unpack_canonical(&pb, b, status);
154
float64_unpack_canonical(&pc, c, status);
155
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
156
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
157
158
return float64r32_round_pack_canonical(pr, status);
159
}
160
@@ -XXX,XX +XXX,XX @@ bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
161
bfloat16_unpack_canonical(&pa, a, status);
162
bfloat16_unpack_canonical(&pb, b, status);
163
bfloat16_unpack_canonical(&pc, c, status);
164
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
165
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
166
167
return bfloat16_round_pack_canonical(pr, status);
168
}
169
@@ -XXX,XX +XXX,XX @@ float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
170
float128_unpack_canonical(&pa, a, status);
171
float128_unpack_canonical(&pb, b, status);
172
float128_unpack_canonical(&pc, c, status);
173
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
174
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
175
176
return float128_round_pack_canonical(pr, status);
177
}
178
@@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status)
179
180
float64_unpack_canonical(&rp, float64_one, status);
181
for (i = 0 ; i < 15 ; i++) {
124
+
182
+
125
+ /* Pretty disas. */
183
float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
126
+ LOG_DIS("%8.8x:\t", dc->pc);
184
- rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
127
+
185
+ rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
128
+ dc->clear_x = 1;
186
xnp = *parts_mul(&xnp, &xp, status);
129
+
130
+ insn_len = dc->decoder(env, dc);
131
+ dc->ppc = dc->pc;
132
+ dc->pc += insn_len;
133
+ dc->base.pc_next += insn_len;
134
+
135
+ if (dc->base.is_jmp == DISAS_NORETURN) {
136
+ return;
137
+ }
138
+
139
+ if (dc->clear_x) {
140
+ cris_clear_x_flag(dc);
141
+ }
142
+
143
+ /*
144
+ * Check for delayed branches here. If we do it before
145
+ * actually generating any host code, the simulator will just
146
+ * loop doing nothing for on this program location.
147
+ */
148
+ if (dc->delayed_branch && --dc->delayed_branch == 0) {
149
+ if (dc->base.tb->flags & 7) {
150
+ t_gen_movi_env_TN(dslot, 0);
151
+ }
152
+
153
+ if (dc->cpustate_changed
154
+ || !dc->flagx_known
155
+ || (dc->flags_x != (dc->base.tb->flags & X_FLAG))) {
156
+ cris_store_direct_jmp(dc);
157
+ }
158
+
159
+ if (dc->clear_locked_irq) {
160
+ dc->clear_locked_irq = 0;
161
+ t_gen_movi_env_TN(locked_irq, 0);
162
+ }
163
+
164
+ if (dc->jmp == JMP_DIRECT_CC) {
165
+ TCGLabel *l1 = gen_new_label();
166
cris_evaluate_flags(dc);
167
- tcg_gen_movi_tl(env_pc, dc->pc);
168
- t_gen_raise_exception(EXCP_DEBUG);
169
+
170
+ /* Conditional jmp. */
171
+ tcg_gen_brcondi_tl(TCG_COND_EQ, env_btaken, 0, l1);
172
+ gen_goto_tb(dc, 1, dc->jmp_pc);
173
+ gen_set_label(l1);
174
+ gen_goto_tb(dc, 0, dc->pc);
175
dc->base.is_jmp = DISAS_NORETURN;
176
- /* The address covered by the breakpoint must be included in
177
- [tb->pc, tb->pc + tb->size) in order to for it to be
178
- properly cleared -- thus we increment the PC here so that
179
- the logic setting tb->size below does the right thing. */
180
- dc->pc += 2;
181
- break;
182
+ dc->jmp = JMP_NOJMP;
183
+ } else if (dc->jmp == JMP_DIRECT) {
184
+ cris_evaluate_flags(dc);
185
+ gen_goto_tb(dc, 0, dc->jmp_pc);
186
+ dc->base.is_jmp = DISAS_NORETURN;
187
+ dc->jmp = JMP_NOJMP;
188
+ } else {
189
+ TCGv c = tcg_const_tl(dc->pc);
190
+ t_gen_cc_jmp(env_btarget, c);
191
+ tcg_temp_free(c);
192
+ dc->base.is_jmp = DISAS_JUMP;
193
}
194
+ }
195
196
- /* Pretty disas. */
197
- LOG_DIS("%8.8x:\t", dc->pc);
198
+ /* Force an update if the per-tb cpu state has changed. */
199
+ if (dc->base.is_jmp == DISAS_NEXT
200
+ && (dc->cpustate_changed
201
+ || !dc->flagx_known
202
+ || (dc->flags_x != (dc->base.tb->flags & X_FLAG)))) {
203
+ dc->base.is_jmp = DISAS_UPDATE;
204
+ tcg_gen_movi_tl(env_pc, dc->pc);
205
+ }
206
207
- if (num_insns == max_insns && (tb_cflags(tb) & CF_LAST_IO)) {
208
- gen_io_start();
209
- }
210
- dc->clear_x = 1;
211
+ /*
212
+ * FIXME: Only the first insn in the TB should cross a page boundary.
213
+ * If we can detect the length of the next insn easily, we should.
214
+ * In the meantime, simply stop when we do cross.
215
+ */
216
+ if (dc->base.is_jmp == DISAS_NEXT
217
+ && ((dc->pc ^ dc->base.pc_first) & TARGET_PAGE_MASK) != 0) {
218
+ dc->base.is_jmp = DISAS_TOO_MANY;
219
+ }
220
+}
221
222
- insn_len = dc->decoder(env, dc);
223
- dc->ppc = dc->pc;
224
- dc->pc += insn_len;
225
- if (dc->clear_x) {
226
- cris_clear_x_flag(dc);
227
- }
228
+static void cris_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
229
+{
230
+ DisasContext *dc = container_of(dcbase, DisasContext, base);
231
+ DisasJumpType is_jmp = dc->base.is_jmp;
232
+ target_ulong npc = dc->pc;
233
234
- /* Check for delayed branches here. If we do it before
235
- actually generating any host code, the simulator will just
236
- loop doing nothing for on this program location. */
237
- if (dc->delayed_branch) {
238
- dc->delayed_branch--;
239
- if (dc->delayed_branch == 0) {
240
- if (tb->flags & 7) {
241
- t_gen_movi_env_TN(dslot, 0);
242
- }
243
- if (dc->cpustate_changed || !dc->flagx_known
244
- || (dc->flags_x != (tb->flags & X_FLAG))) {
245
- cris_store_direct_jmp(dc);
246
- }
247
-
248
- if (dc->clear_locked_irq) {
249
- dc->clear_locked_irq = 0;
250
- t_gen_movi_env_TN(locked_irq, 0);
251
- }
252
-
253
- if (dc->jmp == JMP_DIRECT_CC) {
254
- TCGLabel *l1 = gen_new_label();
255
- cris_evaluate_flags(dc);
256
-
257
- /* Conditional jmp. */
258
- tcg_gen_brcondi_tl(TCG_COND_EQ,
259
- env_btaken, 0, l1);
260
- gen_goto_tb(dc, 1, dc->jmp_pc);
261
- gen_set_label(l1);
262
- gen_goto_tb(dc, 0, dc->pc);
263
- dc->base.is_jmp = DISAS_NORETURN;
264
- dc->jmp = JMP_NOJMP;
265
- } else if (dc->jmp == JMP_DIRECT) {
266
- cris_evaluate_flags(dc);
267
- gen_goto_tb(dc, 0, dc->jmp_pc);
268
- dc->base.is_jmp = DISAS_NORETURN;
269
- dc->jmp = JMP_NOJMP;
270
- } else {
271
- TCGv c = tcg_const_tl(dc->pc);
272
- t_gen_cc_jmp(env_btarget, c);
273
- tcg_temp_free(c);
274
- dc->base.is_jmp = DISAS_JUMP;
275
- }
276
- break;
277
- }
278
- }
279
-
280
- /* If we are rexecuting a branch due to exceptions on
281
- delay slots don't break. */
282
- if (!(tb->pc & 1) && cs->singlestep_enabled) {
283
- break;
284
- }
285
- } while (!dc->base.is_jmp && !dc->cpustate_changed
286
- && !tcg_op_buf_full()
287
- && !singlestep
288
- && (dc->pc - page_start < TARGET_PAGE_SIZE)
289
- && num_insns < max_insns);
290
+ if (is_jmp == DISAS_NORETURN) {
291
+ /* If we have a broken branch+delayslot sequence, it's too late. */
292
+ assert(dc->delayed_branch != 1);
293
+ return;
294
+ }
295
296
if (dc->clear_locked_irq) {
297
t_gen_movi_env_TN(locked_irq, 0);
298
}
187
}
299
188
300
- npc = dc->pc;
189
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
301
-
190
index XXXXXXX..XXXXXXX 100644
302
- /* Force an update if the per-tb cpu state has changed. */
191
--- a/fpu/softfloat-parts.c.inc
303
- if (dc->base.is_jmp == DISAS_NEXT
192
+++ b/fpu/softfloat-parts.c.inc
304
- && (dc->cpustate_changed || !dc->flagx_known
193
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
305
- || (dc->flags_x != (tb->flags & X_FLAG)))) {
194
* Requires A and C extracted into a double-sized structure to provide the
306
- dc->base.is_jmp = DISAS_UPDATE;
195
* extra space for the widening multiply.
307
- tcg_gen_movi_tl(env_pc, npc);
196
*/
308
- }
197
-static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
309
/* Broken branch+delayslot sequence. */
198
- FloatPartsN *c, int flags, float_status *s)
310
if (dc->delayed_branch == 1) {
199
+static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
311
/* Set env->dslot to the size of the branch insn. */
200
+ FloatPartsN *c, int scale,
312
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
201
+ int flags, float_status *s)
313
202
{
314
cris_evaluate_flags(dc);
203
int ab_mask, abc_mask;
315
204
FloatPartsW p_widen, c_widen;
316
- if (unlikely(cs->singlestep_enabled)) {
205
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
317
- if (dc->base.is_jmp == DISAS_NEXT) {
206
a->exp = p_widen.exp;
318
+ if (unlikely(dc->base.singlestep_enabled)) {
207
319
+ switch (is_jmp) {
208
return_normal:
320
+ case DISAS_TOO_MANY:
209
+ /* TODO: Replace all use of float_muladd_halve_result with scale. */
321
tcg_gen_movi_tl(env_pc, npc);
210
if (flags & float_muladd_halve_result) {
322
- }
211
a->exp -= 1;
323
- t_gen_raise_exception(EXCP_DEBUG);
324
- } else {
325
- switch (dc->base.is_jmp) {
326
- case DISAS_NEXT:
327
- gen_goto_tb(dc, 1, npc);
328
- break;
329
- default:
330
+ /* fall through */
331
case DISAS_JUMP:
332
case DISAS_UPDATE:
333
- /* indicate that the hash table must be used
334
- to find the next TB */
335
- tcg_gen_exit_tb(NULL, 0);
336
- break;
337
- case DISAS_NORETURN:
338
- /* nothing more to generate */
339
+ t_gen_raise_exception(EXCP_DEBUG);
340
+ return;
341
+ default:
342
break;
343
}
344
+ g_assert_not_reached();
345
}
212
}
346
- gen_tb_end(tb, num_insns);
213
+ a->exp += scale;
347
214
finish_sign:
348
- tb->size = dc->pc - pc_start;
215
if (flags & float_muladd_negate_result) {
349
- tb->icount = num_insns;
216
a->sign ^= 1;
350
-
351
-#ifdef DEBUG_DISAS
352
-#if !DISAS_CRIS
353
- if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
354
- && qemu_log_in_addr_range(pc_start)) {
355
- FILE *logfile = qemu_log_lock();
356
- qemu_log("--------------\n");
357
- qemu_log("IN: %s\n", lookup_symbol(pc_start));
358
- log_target_disas(cs, pc_start, dc->pc - pc_start);
359
- qemu_log_unlock(logfile);
360
+ switch (is_jmp) {
361
+ case DISAS_TOO_MANY:
362
+ gen_goto_tb(dc, 0, npc);
363
+ break;
364
+ case DISAS_JUMP:
365
+ case DISAS_UPDATE:
366
+ /* Indicate that interupts must be re-evaluated before the next TB. */
367
+ tcg_gen_exit_tb(NULL, 0);
368
+ break;
369
+ default:
370
+ g_assert_not_reached();
371
}
372
-#endif
373
-#endif
374
+}
375
+
376
+static void cris_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
377
+{
378
+ if (!DISAS_CRIS) {
379
+ qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first));
380
+ log_target_disas(cpu, dcbase->pc_first, dcbase->tb->size);
381
+ }
382
+}
383
+
384
+static const TranslatorOps cris_tr_ops = {
385
+ .init_disas_context = cris_tr_init_disas_context,
386
+ .tb_start = cris_tr_tb_start,
387
+ .insn_start = cris_tr_insn_start,
388
+ .breakpoint_check = cris_tr_breakpoint_check,
389
+ .translate_insn = cris_tr_translate_insn,
390
+ .tb_stop = cris_tr_tb_stop,
391
+ .disas_log = cris_tr_disas_log,
392
+};
393
+
394
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
395
+{
396
+ DisasContext dc;
397
+ translator_loop(&cris_tr_ops, &dc.base, cs, tb, max_insns);
398
}
399
400
void cris_cpu_dump_state(CPUState *cs, FILE *f, int flags)
401
--
217
--
402
2.25.1
218
2.43.0
403
219
404
220
diff view generated by jsdifflib
1
The only semantic of DISAS_TB_JUMP is that we've done goto_tb,
1
Use the scalbn interface instead of float_muladd_halve_result.
2
which is the same as DISAS_NORETURN -- we've exited the tb.
3
2
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
target/nios2/translate.c | 8 +++-----
6
target/arm/tcg/helper-a64.c | 6 +++---
8
1 file changed, 3 insertions(+), 5 deletions(-)
7
1 file changed, 3 insertions(+), 3 deletions(-)
9
8
10
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
9
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
11
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
12
--- a/target/nios2/translate.c
11
--- a/target/arm/tcg/helper-a64.c
13
+++ b/target/nios2/translate.c
12
+++ b/target/arm/tcg/helper-a64.c
14
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
15
/* is_jmp field values */
14
(float16_is_infinity(b) && float16_is_zero(a))) {
16
#define DISAS_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */
15
return float16_one_point_five;
17
#define DISAS_UPDATE DISAS_TARGET_1 /* cpu state was modified dynamically */
16
}
18
-#define DISAS_TB_JUMP DISAS_TARGET_2 /* only pc was modified statically */
17
- return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
19
18
+ return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst);
20
#define INSTRUCTION_FLG(func, flags) { (func), (flags) }
21
#define INSTRUCTION(func) \
22
@@ -XXX,XX +XXX,XX @@ static void jmpi(DisasContext *dc, uint32_t code, uint32_t flags)
23
{
24
J_TYPE(instr, code);
25
gen_goto_tb(dc, 0, (dc->pc & 0xF0000000) | (instr.imm26 << 2));
26
- dc->is_jmp = DISAS_TB_JUMP;
27
+ dc->is_jmp = DISAS_NORETURN;
28
}
19
}
29
20
30
static void call(DisasContext *dc, uint32_t code, uint32_t flags)
21
float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
31
@@ -XXX,XX +XXX,XX @@ static void br(DisasContext *dc, uint32_t code, uint32_t flags)
22
@@ -XXX,XX +XXX,XX @@ float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
32
I_TYPE(instr, code);
23
(float32_is_infinity(b) && float32_is_zero(a))) {
33
24
return float32_one_point_five;
34
gen_goto_tb(dc, 0, dc->pc + 4 + (instr.imm16.s & -4));
25
}
35
- dc->is_jmp = DISAS_TB_JUMP;
26
- return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
36
+ dc->is_jmp = DISAS_NORETURN;
27
+ return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst);
37
}
28
}
38
29
39
static void gen_bxx(DisasContext *dc, uint32_t code, uint32_t flags)
30
float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
40
@@ -XXX,XX +XXX,XX @@ static void gen_bxx(DisasContext *dc, uint32_t code, uint32_t flags)
31
@@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
41
gen_goto_tb(dc, 0, dc->pc + 4);
32
(float64_is_infinity(b) && float64_is_zero(a))) {
42
gen_set_label(l1);
33
return float64_one_point_five;
43
gen_goto_tb(dc, 1, dc->pc + 4 + (instr.imm16.s & -4));
34
}
44
- dc->is_jmp = DISAS_TB_JUMP;
35
- return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
45
+ dc->is_jmp = DISAS_NORETURN;
36
+ return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst);
46
}
37
}
47
38
48
/* Comparison instructions */
39
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
49
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
50
break;
51
52
case DISAS_NORETURN:
53
- case DISAS_TB_JUMP:
54
/* nothing more to generate */
55
break;
56
}
57
--
40
--
58
2.25.1
41
2.43.0
59
42
60
43
diff view generated by jsdifflib
1
The existing interpreter zero-extends, ignoring high bits.
1
Use the scalbn interface instead of float_muladd_halve_result.
2
Simply add a separate sign-extension opcode if required.
2
3
Ensure that the interpreter supports ext16s when bswap16 is enabled.
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
tcg/tci.c | 3 ++-
6
target/sparc/helper.h | 4 +-
9
tcg/tci/tcg-target.c.inc | 23 ++++++++++++++++++++---
7
target/sparc/fop_helper.c | 8 ++--
10
2 files changed, 22 insertions(+), 4 deletions(-)
8
target/sparc/translate.c | 80 +++++++++++++++++++++++----------------
11
9
3 files changed, 54 insertions(+), 38 deletions(-)
12
diff --git a/tcg/tci.c b/tcg/tci.c
10
11
diff --git a/target/sparc/helper.h b/target/sparc/helper.h
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/tci.c
13
--- a/target/sparc/helper.h
15
+++ b/tcg/tci.c
14
+++ b/target/sparc/helper.h
16
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
15
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(faddd, TCG_CALL_NO_WG, f64, env, f64, f64)
17
regs[r0] = (int8_t)regs[r1];
16
DEF_HELPER_FLAGS_3(fsubd, TCG_CALL_NO_WG, f64, env, f64, f64)
18
break;
17
DEF_HELPER_FLAGS_3(fmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
19
#endif
18
DEF_HELPER_FLAGS_3(fdivd, TCG_CALL_NO_WG, f64, env, f64, f64)
20
-#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
19
-DEF_HELPER_FLAGS_5(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, i32)
21
+#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 || \
20
+DEF_HELPER_FLAGS_6(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, s32, i32)
22
+ TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64
21
DEF_HELPER_FLAGS_3(fnaddd, TCG_CALL_NO_WG, f64, env, f64, f64)
23
CASE_32_64(ext16s)
22
DEF_HELPER_FLAGS_3(fnmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
24
tci_args_rr(insn, &r0, &r1);
23
25
regs[r0] = (int16_t)regs[r1];
24
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(fadds, TCG_CALL_NO_WG, f32, env, f32, f32)
26
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
25
DEF_HELPER_FLAGS_3(fsubs, TCG_CALL_NO_WG, f32, env, f32, f32)
26
DEF_HELPER_FLAGS_3(fmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
27
DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_WG, f32, env, f32, f32)
28
-DEF_HELPER_FLAGS_5(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, i32)
29
+DEF_HELPER_FLAGS_6(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, s32, i32)
30
DEF_HELPER_FLAGS_3(fnadds, TCG_CALL_NO_WG, f32, env, f32, f32)
31
DEF_HELPER_FLAGS_3(fnmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
32
33
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
27
index XXXXXXX..XXXXXXX 100644
34
index XXXXXXX..XXXXXXX 100644
28
--- a/tcg/tci/tcg-target.c.inc
35
--- a/target/sparc/fop_helper.c
29
+++ b/tcg/tci/tcg-target.c.inc
36
+++ b/target/sparc/fop_helper.c
30
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
37
@@ -XXX,XX +XXX,XX @@ Int128 helper_fsqrtq(CPUSPARCState *env, Int128 src)
31
const TCGArg args[TCG_MAX_OP_ARGS],
38
}
32
const int const_args[TCG_MAX_OP_ARGS])
39
33
{
40
float32 helper_fmadds(CPUSPARCState *env, float32 s1,
34
+ TCGOpcode exts;
41
- float32 s2, float32 s3, uint32_t op)
35
+
42
+ float32 s2, float32 s3, int32_t sc, uint32_t op)
36
switch (opc) {
43
{
37
case INDEX_op_exit_tb:
44
- float32 ret = float32_muladd(s1, s2, s3, op, &env->fp_status);
38
tcg_out_op_p(s, opc, (void *)args[0]);
45
+ float32 ret = float32_muladd_scalbn(s1, s2, s3, sc, op, &env->fp_status);
39
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
46
check_ieee_exceptions(env, GETPC());
40
CASE_64(ext32u) /* Optional (TCG_TARGET_HAS_ext32u_i64). */
47
return ret;
41
CASE_64(ext_i32)
48
}
42
CASE_64(extu_i32)
49
43
- CASE_32_64(bswap16) /* Optional (TCG_TARGET_HAS_bswap16_*). */
50
float64 helper_fmaddd(CPUSPARCState *env, float64 s1,
44
- CASE_32_64(bswap32) /* Optional (TCG_TARGET_HAS_bswap32_*). */
51
- float64 s2, float64 s3, uint32_t op)
45
- CASE_64(bswap64) /* Optional (TCG_TARGET_HAS_bswap64_i64). */
52
+ float64 s2, float64 s3, int32_t sc, uint32_t op)
46
CASE_32_64(ctpop) /* Optional (TCG_TARGET_HAS_ctpop_*). */
53
{
47
+ case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */
54
- float64 ret = float64_muladd(s1, s2, s3, op, &env->fp_status);
48
+ case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */
55
+ float64 ret = float64_muladd_scalbn(s1, s2, s3, sc, op, &env->fp_status);
49
tcg_out_op_rr(s, opc, args[0], args[1]);
56
check_ieee_exceptions(env, GETPC());
50
break;
57
return ret;
51
58
}
52
+ case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */
59
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
53
+ exts = INDEX_op_ext16s_i32;
60
index XXXXXXX..XXXXXXX 100644
54
+ goto do_bswap;
61
--- a/target/sparc/translate.c
55
+ case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */
62
+++ b/target/sparc/translate.c
56
+ exts = INDEX_op_ext16s_i64;
63
@@ -XXX,XX +XXX,XX @@ static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src)
57
+ goto do_bswap;
64
58
+ case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */
65
static void gen_op_fmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
59
+ exts = INDEX_op_ext32s_i64;
66
{
60
+ do_bswap:
67
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
61
+ /* The base tci bswaps zero-extend, and ignore high bits. */
68
+ TCGv_i32 z = tcg_constant_i32(0);
62
+ tcg_out_op_rr(s, opc, args[0], args[1]);
69
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, z);
63
+ if (args[2] & TCG_BSWAP_OS) {
70
}
64
+ tcg_out_op_rr(s, exts, args[0], args[0]);
71
65
+ }
72
static void gen_op_fmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
66
+ break;
73
{
67
+
74
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
68
CASE_32_64(add2)
75
+ TCGv_i32 z = tcg_constant_i32(0);
69
CASE_32_64(sub2)
76
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, z);
70
tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2],
77
}
78
79
static void gen_op_fmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
80
{
81
- int op = float_muladd_negate_c;
82
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
83
+ TCGv_i32 z = tcg_constant_i32(0);
84
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
85
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
86
}
87
88
static void gen_op_fmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
89
{
90
- int op = float_muladd_negate_c;
91
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
92
+ TCGv_i32 z = tcg_constant_i32(0);
93
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
94
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
95
}
96
97
static void gen_op_fnmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
98
{
99
- int op = float_muladd_negate_c | float_muladd_negate_result;
100
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
101
+ TCGv_i32 z = tcg_constant_i32(0);
102
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c |
103
+ float_muladd_negate_result);
104
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
105
}
106
107
static void gen_op_fnmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
108
{
109
- int op = float_muladd_negate_c | float_muladd_negate_result;
110
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
111
+ TCGv_i32 z = tcg_constant_i32(0);
112
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c |
113
+ float_muladd_negate_result);
114
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
115
}
116
117
static void gen_op_fnmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
118
{
119
- int op = float_muladd_negate_result;
120
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
121
+ TCGv_i32 z = tcg_constant_i32(0);
122
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
123
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
124
}
125
126
static void gen_op_fnmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
127
{
128
- int op = float_muladd_negate_result;
129
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
130
+ TCGv_i32 z = tcg_constant_i32(0);
131
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
132
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
133
}
134
135
/* Use muladd to compute (1 * src1) + src2 / 2 with one rounding. */
136
static void gen_op_fhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
137
{
138
- TCGv_i32 one = tcg_constant_i32(float32_one);
139
- int op = float_muladd_halve_result;
140
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
141
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
142
+ TCGv_i32 mone = tcg_constant_i32(-1);
143
+ TCGv_i32 op = tcg_constant_i32(0);
144
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
145
}
146
147
static void gen_op_fhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
148
{
149
- TCGv_i64 one = tcg_constant_i64(float64_one);
150
- int op = float_muladd_halve_result;
151
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
152
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
153
+ TCGv_i32 mone = tcg_constant_i32(-1);
154
+ TCGv_i32 op = tcg_constant_i32(0);
155
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
156
}
157
158
/* Use muladd to compute (1 * src1) - src2 / 2 with one rounding. */
159
static void gen_op_fhsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
160
{
161
- TCGv_i32 one = tcg_constant_i32(float32_one);
162
- int op = float_muladd_negate_c | float_muladd_halve_result;
163
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
164
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
165
+ TCGv_i32 mone = tcg_constant_i32(-1);
166
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
167
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
168
}
169
170
static void gen_op_fhsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
171
{
172
- TCGv_i64 one = tcg_constant_i64(float64_one);
173
- int op = float_muladd_negate_c | float_muladd_halve_result;
174
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
175
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
176
+ TCGv_i32 mone = tcg_constant_i32(-1);
177
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
178
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
179
}
180
181
/* Use muladd to compute -((1 * src1) + src2 / 2) with one rounding. */
182
static void gen_op_fnhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
183
{
184
- TCGv_i32 one = tcg_constant_i32(float32_one);
185
- int op = float_muladd_negate_result | float_muladd_halve_result;
186
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
187
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
188
+ TCGv_i32 mone = tcg_constant_i32(-1);
189
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
190
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
191
}
192
193
static void gen_op_fnhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
194
{
195
- TCGv_i64 one = tcg_constant_i64(float64_one);
196
- int op = float_muladd_negate_result | float_muladd_halve_result;
197
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
198
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
199
+ TCGv_i32 mone = tcg_constant_i32(-1);
200
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
201
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
202
}
203
204
static void gen_op_fpexception_im(DisasContext *dc, int ftt)
71
--
205
--
72
2.25.1
206
2.43.0
73
207
74
208
diff view generated by jsdifflib
1
TCG_TARGET_HAS_MEMORY_BSWAP is already unset for this backend,
1
All uses have been convered to float*_muladd_scalbn.
2
which means that MO_BSWAP be handled by the middle-end and
3
will never be seen by the backend. Thus the indexes used with
4
qemu_{ld,st}_helpers will always be zero.
5
2
6
Tidy the comments and asserts in tcg_out_qemu_{ld,st}_direct.
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
It is not that we do not handle bswap "yet", but never will.
8
9
Acked-by: Alistair Francis <alistair.francis@wdc.com>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
5
---
12
tcg/riscv/tcg-target.c.inc | 64 ++++++++++++++++++++------------------
6
include/fpu/softfloat.h | 3 ---
13
1 file changed, 33 insertions(+), 31 deletions(-)
7
fpu/softfloat.c | 6 ------
8
fpu/softfloat-parts.c.inc | 4 ----
9
3 files changed, 13 deletions(-)
14
10
15
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
11
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
16
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/riscv/tcg-target.c.inc
13
--- a/include/fpu/softfloat.h
18
+++ b/tcg/riscv/tcg-target.c.inc
14
+++ b/include/fpu/softfloat.h
19
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
15
@@ -XXX,XX +XXX,XX @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
20
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
16
| Using these differs from negating an input or output before calling
21
* TCGMemOpIdx oi, uintptr_t ra)
17
| the muladd function in that this means that a NaN doesn't have its
22
*/
18
| sign bit inverted before it is propagated.
23
-static void * const qemu_ld_helpers[16] = {
19
-| We also support halving the result before rounding, as a special
24
- [MO_UB] = helper_ret_ldub_mmu,
20
-| case to support the ARM fused-sqrt-step instruction FRSQRTS.
25
- [MO_SB] = helper_ret_ldsb_mmu,
21
*----------------------------------------------------------------------------*/
26
- [MO_LEUW] = helper_le_lduw_mmu,
22
enum {
27
- [MO_LESW] = helper_le_ldsw_mmu,
23
float_muladd_negate_c = 1,
28
- [MO_LEUL] = helper_le_ldul_mmu,
24
float_muladd_negate_product = 2,
29
+static void * const qemu_ld_helpers[8] = {
25
float_muladd_negate_result = 4,
30
+ [MO_UB] = helper_ret_ldub_mmu,
26
- float_muladd_halve_result = 8,
31
+ [MO_SB] = helper_ret_ldsb_mmu,
32
+#ifdef HOST_WORDS_BIGENDIAN
33
+ [MO_UW] = helper_be_lduw_mmu,
34
+ [MO_SW] = helper_be_ldsw_mmu,
35
+ [MO_UL] = helper_be_ldul_mmu,
36
#if TCG_TARGET_REG_BITS == 64
37
- [MO_LESL] = helper_le_ldsl_mmu,
38
+ [MO_SL] = helper_be_ldsl_mmu,
39
#endif
40
- [MO_LEQ] = helper_le_ldq_mmu,
41
- [MO_BEUW] = helper_be_lduw_mmu,
42
- [MO_BESW] = helper_be_ldsw_mmu,
43
- [MO_BEUL] = helper_be_ldul_mmu,
44
+ [MO_Q] = helper_be_ldq_mmu,
45
+#else
46
+ [MO_UW] = helper_le_lduw_mmu,
47
+ [MO_SW] = helper_le_ldsw_mmu,
48
+ [MO_UL] = helper_le_ldul_mmu,
49
#if TCG_TARGET_REG_BITS == 64
50
- [MO_BESL] = helper_be_ldsl_mmu,
51
+ [MO_SL] = helper_le_ldsl_mmu,
52
+#endif
53
+ [MO_Q] = helper_le_ldq_mmu,
54
#endif
55
- [MO_BEQ] = helper_be_ldq_mmu,
56
};
27
};
57
28
58
/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
29
/*----------------------------------------------------------------------------
59
* uintxx_t val, TCGMemOpIdx oi,
30
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
60
* uintptr_t ra)
31
index XXXXXXX..XXXXXXX 100644
61
*/
32
--- a/fpu/softfloat.c
62
-static void * const qemu_st_helpers[16] = {
33
+++ b/fpu/softfloat.c
63
- [MO_UB] = helper_ret_stb_mmu,
34
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
64
- [MO_LEUW] = helper_le_stw_mmu,
35
if (unlikely(!can_use_fpu(s))) {
65
- [MO_LEUL] = helper_le_stl_mmu,
36
goto soft;
66
- [MO_LEQ] = helper_le_stq_mmu,
37
}
67
- [MO_BEUW] = helper_be_stw_mmu,
38
- if (unlikely(flags & float_muladd_halve_result)) {
68
- [MO_BEUL] = helper_be_stl_mmu,
39
- goto soft;
69
- [MO_BEQ] = helper_be_stq_mmu,
40
- }
70
+static void * const qemu_st_helpers[4] = {
41
71
+ [MO_8] = helper_ret_stb_mmu,
42
float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
72
+#ifdef HOST_WORDS_BIGENDIAN
43
if (unlikely(!f32_is_zon3(ua, ub, uc))) {
73
+ [MO_16] = helper_be_stw_mmu,
44
@@ -XXX,XX +XXX,XX @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
74
+ [MO_32] = helper_be_stl_mmu,
45
if (unlikely(!can_use_fpu(s))) {
75
+ [MO_64] = helper_be_stq_mmu,
46
goto soft;
76
+#else
47
}
77
+ [MO_16] = helper_le_stw_mmu,
48
- if (unlikely(flags & float_muladd_halve_result)) {
78
+ [MO_32] = helper_le_stl_mmu,
49
- goto soft;
79
+ [MO_64] = helper_le_stq_mmu,
50
- }
80
+#endif
51
81
};
52
float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
82
53
if (unlikely(!f64_is_zon3(ua, ub, uc))) {
83
/* We don't support oversize guests */
54
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
84
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
55
index XXXXXXX..XXXXXXX 100644
85
tcg_out_movi(s, TCG_TYPE_PTR, a2, oi);
56
--- a/fpu/softfloat-parts.c.inc
86
tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr);
57
+++ b/fpu/softfloat-parts.c.inc
87
58
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
88
- tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
59
a->exp = p_widen.exp;
89
+ tcg_out_call(s, qemu_ld_helpers[opc & MO_SSIZE]);
60
90
tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0);
61
return_normal:
91
62
- /* TODO: Replace all use of float_muladd_halve_result with scale. */
92
tcg_out_goto(s, l->raddr);
63
- if (flags & float_muladd_halve_result) {
93
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
64
- a->exp -= 1;
94
tcg_out_movi(s, TCG_TYPE_PTR, a3, oi);
65
- }
95
tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr);
66
a->exp += scale;
96
67
finish_sign:
97
- tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
68
if (flags & float_muladd_negate_result) {
98
+ tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]);
99
100
tcg_out_goto(s, l->raddr);
101
return true;
102
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
103
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
104
TCGReg base, MemOp opc, bool is_64)
105
{
106
- const MemOp bswap = opc & MO_BSWAP;
107
-
108
- /* We don't yet handle byteswapping, assert */
109
- g_assert(!bswap);
110
+ /* Byte swapping is left to middle-end expansion. */
111
+ tcg_debug_assert((opc & MO_BSWAP) == 0);
112
113
switch (opc & (MO_SSIZE)) {
114
case MO_UB:
115
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
116
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
117
TCGReg base, MemOp opc)
118
{
119
- const MemOp bswap = opc & MO_BSWAP;
120
-
121
- /* We don't yet handle byteswapping, assert */
122
- g_assert(!bswap);
123
+ /* Byte swapping is left to middle-end expansion. */
124
+ tcg_debug_assert((opc & MO_BSWAP) == 0);
125
126
switch (opc & (MO_SSIZE)) {
127
case MO_8:
128
--
69
--
129
2.25.1
70
2.43.0
130
71
131
72
diff view generated by jsdifflib
1
For INDEX_op_bswap16_i64, use 64-bit instructions so that we can
1
This rounding mode is used by Hexagon.
2
easily provide the extension to 64-bits. Drop the special case,
3
previously used, where the input is already zero-extended -- the
4
minor code size savings is not worth the complication.
5
2
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
4
---
9
tcg/s390/tcg-target.c.inc | 34 ++++++++++++++++++++++++++++------
5
include/fpu/softfloat-types.h | 2 ++
10
1 file changed, 28 insertions(+), 6 deletions(-)
6
fpu/softfloat-parts.c.inc | 3 +++
7
2 files changed, 5 insertions(+)
11
8
12
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
9
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/s390/tcg-target.c.inc
11
--- a/include/fpu/softfloat-types.h
15
+++ b/tcg/s390/tcg-target.c.inc
12
+++ b/include/fpu/softfloat-types.h
16
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
13
@@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) {
17
tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
14
float_round_to_odd = 5,
18
break;
15
/* Not an IEEE rounding mode: round to closest odd, overflow to inf */
19
16
float_round_to_odd_inf = 6,
20
- OP_32_64(bswap16):
17
+ /* Not an IEEE rounding mode: round to nearest even, overflow to max */
21
- /* The TCG bswap definition requires bits 0-47 already be zero.
18
+ float_round_nearest_even_max = 7,
22
- Thus we don't need the G-type insns to implement bswap16_i64. */
19
} FloatRoundMode;
23
- tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
20
24
- tcg_out_sh32(s, RS_SRL, args[0], TCG_REG_NONE, 16);
21
/*
25
+ case INDEX_op_bswap16_i32:
22
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
26
+ a0 = args[0], a1 = args[1], a2 = args[2];
23
index XXXXXXX..XXXXXXX 100644
27
+ tcg_out_insn(s, RRE, LRVR, a0, a1);
24
--- a/fpu/softfloat-parts.c.inc
28
+ if (a2 & TCG_BSWAP_OS) {
25
+++ b/fpu/softfloat-parts.c.inc
29
+ tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
26
@@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
30
+ } else {
27
int exp, flags = 0;
31
+ tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
28
32
+ }
29
switch (s->float_rounding_mode) {
33
break;
30
+ case float_round_nearest_even_max:
34
- OP_32_64(bswap32):
31
+ overflow_norm = true;
35
+ case INDEX_op_bswap16_i64:
32
+ /* fall through */
36
+ a0 = args[0], a1 = args[1], a2 = args[2];
33
case float_round_nearest_even:
37
+ tcg_out_insn(s, RRE, LRVGR, a0, a1);
34
if (N > 64 && frac_lsb == 0) {
38
+ if (a2 & TCG_BSWAP_OS) {
35
inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
39
+ tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
40
+ } else {
41
+ tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
42
+ }
43
+ break;
44
+
45
+ case INDEX_op_bswap32_i32:
46
tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
47
break;
48
+ case INDEX_op_bswap32_i64:
49
+ a0 = args[0], a1 = args[1], a2 = args[2];
50
+ tcg_out_insn(s, RRE, LRVR, a0, a1);
51
+ if (a2 & TCG_BSWAP_OS) {
52
+ tgen_ext32s(s, a0, a0);
53
+ } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
54
+ tgen_ext32u(s, a0, a0);
55
+ }
56
+ break;
57
58
case INDEX_op_add2_i32:
59
if (const_args[4]) {
60
--
36
--
61
2.25.1
37
2.43.0
62
63
diff view generated by jsdifflib
1
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
Certain Hexagon instructions suppress changes to the result
2
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
when the product of fma() is a true zero.
3
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
---
5
tcg/aarch64/tcg-target.c.inc | 12 ++++++++++++
6
include/fpu/softfloat.h | 5 +++++
6
1 file changed, 12 insertions(+)
7
fpu/softfloat.c | 3 +++
8
fpu/softfloat-parts.c.inc | 4 +++-
9
3 files changed, 11 insertions(+), 1 deletion(-)
7
10
8
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
11
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
9
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/aarch64/tcg-target.c.inc
13
--- a/include/fpu/softfloat.h
11
+++ b/tcg/aarch64/tcg-target.c.inc
14
+++ b/include/fpu/softfloat.h
12
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
15
@@ -XXX,XX +XXX,XX @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
13
tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
16
| Using these differs from negating an input or output before calling
14
break;
17
| the muladd function in that this means that a NaN doesn't have its
15
case INDEX_op_bswap32_i64:
18
| sign bit inverted before it is propagated.
16
+ tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
19
+|
17
+ if (a2 & TCG_BSWAP_OS) {
20
+| With float_muladd_suppress_add_product_zero, if A or B is zero
18
+ tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a0);
21
+| such that the product is a true zero, then return C without addition.
19
+ }
22
+| This preserves the sign of C when C is +/- 0. Used for Hexagon.
20
+ break;
23
*----------------------------------------------------------------------------*/
21
case INDEX_op_bswap32_i32:
24
enum {
22
tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
25
float_muladd_negate_c = 1,
23
break;
26
float_muladd_negate_product = 2,
24
case INDEX_op_bswap16_i64:
27
float_muladd_negate_result = 4,
25
case INDEX_op_bswap16_i32:
28
+ float_muladd_suppress_add_product_zero = 8,
26
tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
29
};
27
+ if (a2 & TCG_BSWAP_OS) {
30
28
+ /* Output must be sign-extended. */
31
/*----------------------------------------------------------------------------
29
+ tcg_out_sxt(s, ext, MO_16, a0, a0);
32
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
30
+ } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
33
index XXXXXXX..XXXXXXX 100644
31
+ /* Output must be zero-extended, but input isn't. */
34
--- a/fpu/softfloat.c
32
+ tcg_out_uxt(s, MO_16, a0, a0);
35
+++ b/fpu/softfloat.c
33
+ }
36
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
34
break;
37
if (unlikely(!can_use_fpu(s))) {
35
38
goto soft;
36
case INDEX_op_ext8s_i64:
39
}
40
+ if (unlikely(flags & float_muladd_suppress_add_product_zero)) {
41
+ goto soft;
42
+ }
43
44
float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
45
if (unlikely(!f32_is_zon3(ua, ub, uc))) {
46
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
47
index XXXXXXX..XXXXXXX 100644
48
--- a/fpu/softfloat-parts.c.inc
49
+++ b/fpu/softfloat-parts.c.inc
50
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
51
goto return_normal;
52
}
53
if (c->cls == float_class_zero) {
54
- if (a->sign != c->sign) {
55
+ if (flags & float_muladd_suppress_add_product_zero) {
56
+ a->sign = c->sign;
57
+ } else if (a->sign != c->sign) {
58
goto return_sub_zero;
59
}
60
goto return_zero;
37
--
61
--
38
2.25.1
62
2.43.0
39
40
diff view generated by jsdifflib
1
Use movcond instead of brcond to set env_pc.
1
There are no special cases for this instruction.
2
Discard the btarget and btaken variables to improve
2
Remove internal_mpyf as unused.
3
register allocation and avoid unnecessary writeback.
4
3
5
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
6
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
target/cris/translate.c | 22 ++++++++++------------
7
target/hexagon/fma_emu.h | 1 -
10
1 file changed, 10 insertions(+), 12 deletions(-)
8
target/hexagon/fma_emu.c | 8 --------
9
target/hexagon/op_helper.c | 2 +-
10
3 files changed, 1 insertion(+), 10 deletions(-)
11
11
12
diff --git a/target/cris/translate.c b/target/cris/translate.c
12
diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/cris/translate.c
14
--- a/target/hexagon/fma_emu.h
15
+++ b/target/cris/translate.c
15
+++ b/target/hexagon/fma_emu.h
16
@@ -XXX,XX +XXX,XX @@ static void t_gen_swapr(TCGv d, TCGv s)
16
@@ -XXX,XX +XXX,XX @@ int32_t float32_getexp(float32 f32);
17
tcg_temp_free(org_s);
17
float32 infinite_float32(uint8_t sign);
18
float32 internal_fmafx(float32 a, float32 b, float32 c,
19
int scale, float_status *fp_status);
20
-float32 internal_mpyf(float32 a, float32 b, float_status *fp_status);
21
float64 internal_mpyhh(float64 a, float64 b,
22
unsigned long long int accumulated,
23
float_status *fp_status);
24
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/hexagon/fma_emu.c
27
+++ b/target/hexagon/fma_emu.c
28
@@ -XXX,XX +XXX,XX @@ float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
29
return accum_round_float32(result, fp_status);
18
}
30
}
19
31
20
-static void t_gen_cc_jmp(TCGv pc_true, TCGv pc_false)
32
-float32 internal_mpyf(float32 a, float32 b, float_status *fp_status)
21
-{
33
-{
22
- TCGLabel *l1 = gen_new_label();
34
- if (float32_is_zero(a) || float32_is_zero(b)) {
23
-
35
- return float32_mul(a, b, fp_status);
24
- /* Conditional jmp. */
36
- }
25
- tcg_gen_mov_tl(env_pc, pc_false);
37
- return internal_fmafx(a, b, float32_zero, 0, fp_status);
26
- tcg_gen_brcondi_tl(TCG_COND_EQ, env_btaken, 0, l1);
27
- tcg_gen_mov_tl(env_pc, pc_true);
28
- gen_set_label(l1);
29
-}
38
-}
30
-
39
-
31
static bool use_goto_tb(DisasContext *dc, target_ulong dest)
40
float64 internal_mpyhh(float64 a, float64 b,
41
unsigned long long int accumulated,
42
float_status *fp_status)
43
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/hexagon/op_helper.c
46
+++ b/target/hexagon/op_helper.c
47
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
32
{
48
{
33
return ((dest ^ dc->base.pc_first) & TARGET_PAGE_MASK) == 0;
49
float32 RdV;
34
@@ -XXX,XX +XXX,XX @@ static void cris_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
50
arch_fpop_start(env);
35
/* fall through */
51
- RdV = internal_mpyf(RsV, RtV, &env->fp_status);
36
52
+ RdV = float32_mul(RsV, RtV, &env->fp_status);
37
case JMP_INDIRECT:
53
arch_fpop_end(env);
38
- t_gen_cc_jmp(env_btarget, tcg_constant_tl(npc));
54
return RdV;
39
+ tcg_gen_movcond_tl(TCG_COND_NE, env_pc,
55
}
40
+ env_btaken, tcg_constant_tl(0),
41
+ env_btarget, tcg_constant_tl(npc));
42
is_jmp = dc->cpustate_changed ? DISAS_UPDATE : DISAS_JUMP;
43
+
44
+ /*
45
+ * We have now consumed btaken and btarget. Hint to the
46
+ * tcg compiler that the writeback to env may be dropped.
47
+ */
48
+ tcg_gen_discard_tl(env_btaken);
49
+ tcg_gen_discard_tl(env_btarget);
50
break;
51
52
default:
53
--
56
--
54
2.25.1
57
2.43.0
55
56
diff view generated by jsdifflib
1
Retain the current rorw bswap16 expansion for the zero-in/zero-out case.
1
There are no special cases for this instruction.
2
Otherwise, perform a wider bswap plus a right-shift or extend.
3
2
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
tcg/i386/tcg-target.c.inc | 20 +++++++++++++++++++-
6
target/hexagon/op_helper.c | 2 +-
8
1 file changed, 19 insertions(+), 1 deletion(-)
7
1 file changed, 1 insertion(+), 1 deletion(-)
9
8
10
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
9
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
11
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/i386/tcg-target.c.inc
11
--- a/target/hexagon/op_helper.c
13
+++ b/tcg/i386/tcg-target.c.inc
12
+++ b/target/hexagon/op_helper.c
14
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
13
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
15
break;
14
float32 RsV, float32 RtV)
16
15
{
17
OP_32_64(bswap16):
16
arch_fpop_start(env);
18
- tcg_out_rolw_8(s, a0);
17
- RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
19
+ if (a2 & TCG_BSWAP_OS) {
18
+ RxV = float32_muladd(RsV, RtV, RxV, 0, &env->fp_status);
20
+ /* Output must be sign-extended. */
19
arch_fpop_end(env);
21
+ if (rexw) {
20
return RxV;
22
+ tcg_out_bswap64(s, a0);
21
}
23
+ tcg_out_shifti(s, SHIFT_SAR + rexw, a0, 48);
24
+ } else {
25
+ tcg_out_bswap32(s, a0);
26
+ tcg_out_shifti(s, SHIFT_SAR, a0, 16);
27
+ }
28
+ } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
29
+ /* Output must be zero-extended, but input isn't. */
30
+ tcg_out_bswap32(s, a0);
31
+ tcg_out_shifti(s, SHIFT_SHR, a0, 16);
32
+ } else {
33
+ tcg_out_rolw_8(s, a0);
34
+ }
35
break;
36
OP_32_64(bswap32):
37
tcg_out_bswap32(s, a0);
38
+ if (rexw && (a2 & TCG_BSWAP_OS)) {
39
+ tcg_out_ext32s(s, a0, a0);
40
+ }
41
break;
42
43
OP_32_64(neg):
44
--
22
--
45
2.25.1
23
2.43.0
46
47
diff view generated by jsdifflib
1
From: LIU Zhiwei <zhiwei_liu@c-sky.com>
1
There are no special cases for this instruction. Since hexagon
2
always uses default-nan mode, explicitly negating the first
3
input is unnecessary. Use float_muladd_negate_product instead.
2
4
3
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
5
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Message-Id: <20210624105023.3852-6-zhiwei_liu@c-sky.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
include/tcg/tcg-op-gvec.h | 4 ++++
8
target/hexagon/op_helper.c | 5 ++---
8
1 file changed, 4 insertions(+)
9
1 file changed, 2 insertions(+), 3 deletions(-)
9
10
10
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
11
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/include/tcg/tcg-op-gvec.h
13
--- a/target/hexagon/op_helper.c
13
+++ b/include/tcg/tcg-op-gvec.h
14
+++ b/target/hexagon/op_helper.c
14
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
15
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
15
#define tcg_gen_vec_sub8_tl tcg_gen_vec_sub8_i64
16
float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
16
#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
17
float32 RsV, float32 RtV)
17
#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
18
{
18
+#define tcg_gen_vec_add32_tl tcg_gen_vec_add32_i64
19
- float32 neg_RsV;
19
+#define tcg_gen_vec_sub32_tl tcg_gen_vec_sub32_i64
20
arch_fpop_start(env);
20
#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i64
21
- neg_RsV = float32_set_sign(RsV, float32_is_neg(RsV) ? 0 : 1);
21
#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i64
22
- RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
22
#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i64
23
+ RxV = float32_muladd(RsV, RtV, RxV, float_muladd_negate_product,
23
@@ -XXX,XX +XXX,XX @@ void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
24
+ &env->fp_status);
24
#define tcg_gen_vec_sub8_tl tcg_gen_vec_sub8_i32
25
arch_fpop_end(env);
25
#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
26
return RxV;
26
#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
27
}
27
+#define tcg_gen_vec_add32_tl tcg_gen_add_i32
28
+#define tcg_gen_vec_sub32_tl tcg_gen_sub_i32
29
#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i32
30
#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i32
31
#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i32
32
--
28
--
33
2.25.1
29
2.43.0
34
35
diff view generated by jsdifflib
1
These insns set DISAS_UPDATE without cpustate_changed,
1
This instruction has a special case that 0 * x + c returns c
2
which isn't quite right.
2
without the normal sign folding that comes with 0 + -0.
3
Use the new float_muladd_suppress_add_product_zero to
4
describe this.
3
5
4
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
6
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
---
8
target/cris/translate.c | 2 ++
9
target/hexagon/op_helper.c | 11 +++--------
9
1 file changed, 2 insertions(+)
10
1 file changed, 3 insertions(+), 8 deletions(-)
10
11
11
diff --git a/target/cris/translate.c b/target/cris/translate.c
12
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/target/cris/translate.c
14
--- a/target/hexagon/op_helper.c
14
+++ b/target/cris/translate.c
15
+++ b/target/hexagon/op_helper.c
15
@@ -XXX,XX +XXX,XX @@ static int dec_rfe_etc(CPUCRISState *env, DisasContext *dc)
16
@@ -XXX,XX +XXX,XX @@ static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
16
cris_evaluate_flags(dc);
17
float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
17
gen_helper_rfe(cpu_env);
18
float32 RsV, float32 RtV, float32 PuV)
18
dc->base.is_jmp = DISAS_UPDATE;
19
{
19
+ dc->cpustate_changed = true;
20
- size4s_t tmp;
20
break;
21
arch_fpop_start(env);
21
case 5:
22
- RxV = check_nan(RxV, RxV, &env->fp_status);
22
/* rfn. */
23
- RxV = check_nan(RxV, RsV, &env->fp_status);
23
@@ -XXX,XX +XXX,XX @@ static int dec_rfe_etc(CPUCRISState *env, DisasContext *dc)
24
- RxV = check_nan(RxV, RtV, &env->fp_status);
24
cris_evaluate_flags(dc);
25
- tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
25
gen_helper_rfn(cpu_env);
26
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
26
dc->base.is_jmp = DISAS_UPDATE;
27
- RxV = tmp;
27
+ dc->cpustate_changed = true;
28
- }
28
break;
29
+ RxV = float32_muladd_scalbn(RsV, RtV, RxV, fSXTN(8, 64, PuV),
29
case 6:
30
+ float_muladd_suppress_add_product_zero,
30
LOG_DIS("break %d\n", dc->op1);
31
+ &env->fp_status);
32
arch_fpop_end(env);
33
return RxV;
34
}
31
--
35
--
32
2.25.1
36
2.43.0
33
34
diff view generated by jsdifflib
1
Move handle_instruction into nios2_tr_translate_insn
1
There are multiple special cases for this instruction.
2
as the only caller.
2
(1) The saturate to normal maximum instead of overflow to infinity is
3
handled by the new float_round_nearest_even_max rounding mode.
4
(2) The 0 * n + c special case is handled by the new
5
float_muladd_suppress_add_product_zero flag.
6
(3) The Inf - Inf -> 0 special case can be detected after the fact
7
by examining float_flag_invalid_isi.
3
8
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
11
---
7
target/nios2/translate.c | 66 +++++++++++++++++++---------------------
12
target/hexagon/op_helper.c | 105 +++++++++----------------------------
8
1 file changed, 31 insertions(+), 35 deletions(-)
13
1 file changed, 26 insertions(+), 79 deletions(-)
9
14
10
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
15
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
11
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
12
--- a/target/nios2/translate.c
17
--- a/target/hexagon/op_helper.c
13
+++ b/target/nios2/translate.c
18
+++ b/target/hexagon/op_helper.c
14
@@ -XXX,XX +XXX,XX @@ illegal_op:
19
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
15
t_gen_helper_raise_exception(dc, EXCP_ILLEGAL);
20
return RxV;
16
}
21
}
17
22
18
-static void handle_instruction(DisasContext *dc, CPUNios2State *env)
23
-static bool is_zero_prod(float32 a, float32 b)
19
-{
24
-{
20
- uint32_t code;
25
- return ((float32_is_zero(a) && is_finite(b)) ||
21
- uint8_t op;
26
- (float32_is_zero(b) && is_finite(a)));
22
- const Nios2Instruction *instr;
23
-
24
-#if defined(CONFIG_USER_ONLY)
25
- /* FIXME: Is this needed ? */
26
- if (dc->pc >= 0x1000 && dc->pc < 0x2000) {
27
- t_gen_helper_raise_exception(dc, 0xaa);
28
- return;
29
- }
30
-#endif
31
-
32
- code = cpu_ldl_code(env, dc->pc);
33
- op = get_opcode(code);
34
-
35
- if (unlikely(op >= ARRAY_SIZE(i_type_instructions))) {
36
- t_gen_helper_raise_exception(dc, EXCP_ILLEGAL);
37
- return;
38
- }
39
-
40
- dc->zero = NULL;
41
-
42
- instr = &i_type_instructions[op];
43
- instr->handler(dc, code, instr->flags);
44
-
45
- if (dc->zero) {
46
- tcg_temp_free(dc->zero);
47
- }
48
-}
27
-}
49
-
28
-
50
static const char * const regnames[] = {
29
-static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
51
"zero", "at", "r2", "r3",
30
-{
52
"r4", "r5", "r6", "r7",
31
- float32 ret = dst;
53
@@ -XXX,XX +XXX,XX @@ static void nios2_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
32
- if (float32_is_any_nan(x)) {
33
- if (extract32(x, 22, 1) == 0) {
34
- float_raise(float_flag_invalid, fp_status);
35
- }
36
- ret = make_float32(0xffffffff); /* nan */
37
- }
38
- return ret;
39
-}
40
-
41
float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
42
float32 RsV, float32 RtV, float32 PuV)
54
{
43
{
55
DisasContext *dc = container_of(dcbase, DisasContext, base);
44
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
56
CPUNios2State *env = cs->env_ptr;
45
return RxV;
57
+ const Nios2Instruction *instr;
46
}
58
+ uint32_t code, pc;
47
59
+ uint8_t op;
48
-static bool is_inf_prod(int32_t a, int32_t b)
60
49
+static float32 do_sffma_lib(CPUHexagonState *env, float32 RxV,
61
- dc->pc = dc->base.pc_next;
50
+ float32 RsV, float32 RtV, int negate)
62
- dc->base.pc_next += 4;
51
{
63
+ pc = dc->base.pc_next;
52
- return (float32_is_infinity(a) && float32_is_infinity(b)) ||
64
+ dc->pc = pc;
53
- (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
65
+ dc->base.pc_next = pc + 4;
54
- (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
66
55
+ int flags;
67
/* Decode an instruction */
68
- handle_instruction(dc, env);
69
+
56
+
70
+#if defined(CONFIG_USER_ONLY)
57
+ arch_fpop_start(env);
71
+ /* FIXME: Is this needed ? */
72
+ if (pc >= 0x1000 && pc < 0x2000) {
73
+ t_gen_helper_raise_exception(dc, 0xaa);
74
+ return;
75
+ }
76
+#endif
77
+
58
+
78
+ code = cpu_ldl_code(env, pc);
59
+ set_float_rounding_mode(float_round_nearest_even_max, &env->fp_status);
79
+ op = get_opcode(code);
60
+ RxV = float32_muladd(RsV, RtV, RxV,
61
+ negate | float_muladd_suppress_add_product_zero,
62
+ &env->fp_status);
80
+
63
+
81
+ if (unlikely(op >= ARRAY_SIZE(i_type_instructions))) {
64
+ flags = get_float_exception_flags(&env->fp_status);
82
+ t_gen_helper_raise_exception(dc, EXCP_ILLEGAL);
65
+ if (flags) {
83
+ return;
66
+ /* Flags are suppressed by this instruction. */
67
+ set_float_exception_flags(0, &env->fp_status);
68
+
69
+ /* Return 0 for Inf - Inf. */
70
+ if (flags & float_flag_invalid_isi) {
71
+ RxV = 0;
72
+ }
84
+ }
73
+ }
85
+
74
+
86
+ dc->zero = NULL;
75
+ arch_fpop_end(env);
87
+
76
+ return RxV;
88
+ instr = &i_type_instructions[op];
89
+ instr->handler(dc, code, instr->flags);
90
+
91
+ if (dc->zero) {
92
+ tcg_temp_free(dc->zero);
93
+ }
94
}
77
}
95
78
96
static void nios2_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
79
float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
80
float32 RsV, float32 RtV)
81
{
82
- bool infinp;
83
- bool infminusinf;
84
- float32 tmp;
85
-
86
- arch_fpop_start(env);
87
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
88
- infminusinf = float32_is_infinity(RxV) &&
89
- is_inf_prod(RsV, RtV) &&
90
- (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
91
- infinp = float32_is_infinity(RxV) ||
92
- float32_is_infinity(RtV) ||
93
- float32_is_infinity(RsV);
94
- RxV = check_nan(RxV, RxV, &env->fp_status);
95
- RxV = check_nan(RxV, RsV, &env->fp_status);
96
- RxV = check_nan(RxV, RtV, &env->fp_status);
97
- tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
98
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
99
- RxV = tmp;
100
- }
101
- set_float_exception_flags(0, &env->fp_status);
102
- if (float32_is_infinity(RxV) && !infinp) {
103
- RxV = RxV - 1;
104
- }
105
- if (infminusinf) {
106
- RxV = 0;
107
- }
108
- arch_fpop_end(env);
109
- return RxV;
110
+ return do_sffma_lib(env, RxV, RsV, RtV, 0);
111
}
112
113
float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
114
float32 RsV, float32 RtV)
115
{
116
- bool infinp;
117
- bool infminusinf;
118
- float32 tmp;
119
-
120
- arch_fpop_start(env);
121
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
122
- infminusinf = float32_is_infinity(RxV) &&
123
- is_inf_prod(RsV, RtV) &&
124
- (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
125
- infinp = float32_is_infinity(RxV) ||
126
- float32_is_infinity(RtV) ||
127
- float32_is_infinity(RsV);
128
- RxV = check_nan(RxV, RxV, &env->fp_status);
129
- RxV = check_nan(RxV, RsV, &env->fp_status);
130
- RxV = check_nan(RxV, RtV, &env->fp_status);
131
- float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
132
- tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
133
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
134
- RxV = tmp;
135
- }
136
- set_float_exception_flags(0, &env->fp_status);
137
- if (float32_is_infinity(RxV) && !infinp) {
138
- RxV = RxV - 1;
139
- }
140
- if (infminusinf) {
141
- RxV = 0;
142
- }
143
- arch_fpop_end(env);
144
- return RxV;
145
+ return do_sffma_lib(env, RxV, RsV, RtV, float_muladd_negate_product);
146
}
147
148
float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
97
--
149
--
98
2.25.1
150
2.43.0
99
100
diff view generated by jsdifflib
1
The only semantic of DISAS_TB_JUMP is that we've done goto_tb,
1
The function is now unused.
2
which is the same as DISAS_NORETURN -- we've exited the tb.
3
2
4
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/cris/translate.c | 7 +++----
6
target/hexagon/fma_emu.h | 2 -
9
1 file changed, 3 insertions(+), 4 deletions(-)
7
target/hexagon/fma_emu.c | 171 ---------------------------------------
8
2 files changed, 173 deletions(-)
10
9
11
diff --git a/target/cris/translate.c b/target/cris/translate.c
10
diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/target/cris/translate.c
12
--- a/target/hexagon/fma_emu.h
14
+++ b/target/cris/translate.c
13
+++ b/target/hexagon/fma_emu.h
15
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ static inline uint32_t float32_getexp_raw(float32 f32)
16
/* is_jmp field values */
15
}
17
#define DISAS_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */
16
int32_t float32_getexp(float32 f32);
18
#define DISAS_UPDATE DISAS_TARGET_1 /* cpu state was modified dynamically */
17
float32 infinite_float32(uint8_t sign);
19
-#define DISAS_TB_JUMP DISAS_TARGET_2 /* only pc was modified statically */
18
-float32 internal_fmafx(float32 a, float32 b, float32 c,
20
19
- int scale, float_status *fp_status);
21
/* Used by the decoder. */
20
float64 internal_mpyhh(float64 a, float64 b,
22
#define EXTRACT_FIELD(src, start, end) \
21
unsigned long long int accumulated,
23
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
22
float_status *fp_status);
24
gen_goto_tb(dc, 1, dc->jmp_pc);
23
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
25
gen_set_label(l1);
24
index XXXXXXX..XXXXXXX 100644
26
gen_goto_tb(dc, 0, dc->pc);
25
--- a/target/hexagon/fma_emu.c
27
- dc->base.is_jmp = DISAS_TB_JUMP;
26
+++ b/target/hexagon/fma_emu.c
28
+ dc->base.is_jmp = DISAS_NORETURN;
27
@@ -XXX,XX +XXX,XX @@ int32_t float64_getexp(float64 f64)
29
dc->jmp = JMP_NOJMP;
28
return -1;
30
} else if (dc->jmp == JMP_DIRECT) {
29
}
31
cris_evaluate_flags(dc);
30
32
gen_goto_tb(dc, 0, dc->jmp_pc);
31
-static uint64_t float32_getmant(float32 f32)
33
- dc->base.is_jmp = DISAS_TB_JUMP;
32
-{
34
+ dc->base.is_jmp = DISAS_NORETURN;
33
- Float a = { .i = f32 };
35
dc->jmp = JMP_NOJMP;
34
- if (float32_is_normal(f32)) {
36
} else {
35
- return a.mant | 1ULL << 23;
37
TCGv c = tcg_const_tl(dc->pc);
36
- }
38
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
37
- if (float32_is_zero(f32)) {
39
to find the next TB */
38
- return 0;
40
tcg_gen_exit_tb(NULL, 0);
39
- }
41
break;
40
- if (float32_is_denormal(f32)) {
42
- case DISAS_TB_JUMP:
41
- return a.mant;
43
+ case DISAS_NORETURN:
42
- }
44
/* nothing more to generate */
43
- return ~0ULL;
45
break;
44
-}
46
}
45
-
46
int32_t float32_getexp(float32 f32)
47
{
48
Float a = { .i = f32 };
49
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
50
}
51
52
/* Return a maximum finite value with the requested sign */
53
-static float32 maxfinite_float32(uint8_t sign)
54
-{
55
- if (sign) {
56
- return make_float32(SF_MINUS_MAXF);
57
- } else {
58
- return make_float32(SF_MAXF);
59
- }
60
-}
61
-
62
-/* Return a zero value with requested sign */
63
-static float32 zero_float32(uint8_t sign)
64
-{
65
- if (sign) {
66
- return make_float32(0x80000000);
67
- } else {
68
- return float32_zero;
69
- }
70
-}
71
-
72
#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
73
static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
74
{ \
75
@@ -XXX,XX +XXX,XX @@ static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
76
}
77
78
GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
79
-GEN_XF_ROUND(float32, SF_MANTBITS, SF_INF_EXP, Float)
80
-
81
-static bool is_inf_prod(float64 a, float64 b)
82
-{
83
- return ((float64_is_infinity(a) && float64_is_infinity(b)) ||
84
- (float64_is_infinity(a) && is_finite(b) && (!float64_is_zero(b))) ||
85
- (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a))));
86
-}
87
-
88
-static float64 special_fma(float64 a, float64 b, float64 c,
89
- float_status *fp_status)
90
-{
91
- float64 ret = make_float64(0);
92
-
93
- /*
94
- * If A multiplied by B is an exact infinity and C is also an infinity
95
- * but with the opposite sign, FMA returns NaN and raises invalid.
96
- */
97
- uint8_t a_sign = float64_is_neg(a);
98
- uint8_t b_sign = float64_is_neg(b);
99
- uint8_t c_sign = float64_is_neg(c);
100
- if (is_inf_prod(a, b) && float64_is_infinity(c)) {
101
- if ((a_sign ^ b_sign) != c_sign) {
102
- ret = make_float64(DF_NAN);
103
- float_raise(float_flag_invalid, fp_status);
104
- return ret;
105
- }
106
- }
107
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
108
- (float64_is_zero(a) && float64_is_infinity(b))) {
109
- ret = make_float64(DF_NAN);
110
- float_raise(float_flag_invalid, fp_status);
111
- return ret;
112
- }
113
- /*
114
- * If none of the above checks are true and C is a NaN,
115
- * a NaN shall be returned
116
- * If A or B are NaN, a NAN shall be returned.
117
- */
118
- if (float64_is_any_nan(a) ||
119
- float64_is_any_nan(b) ||
120
- float64_is_any_nan(c)) {
121
- if (float64_is_any_nan(a) && (fGETBIT(51, a) == 0)) {
122
- float_raise(float_flag_invalid, fp_status);
123
- }
124
- if (float64_is_any_nan(b) && (fGETBIT(51, b) == 0)) {
125
- float_raise(float_flag_invalid, fp_status);
126
- }
127
- if (float64_is_any_nan(c) && (fGETBIT(51, c) == 0)) {
128
- float_raise(float_flag_invalid, fp_status);
129
- }
130
- ret = make_float64(DF_NAN);
131
- return ret;
132
- }
133
- /*
134
- * We have checked for adding opposite-signed infinities.
135
- * Other infinities return infinity with the correct sign
136
- */
137
- if (float64_is_infinity(c)) {
138
- ret = infinite_float64(c_sign);
139
- return ret;
140
- }
141
- if (float64_is_infinity(a) || float64_is_infinity(b)) {
142
- ret = infinite_float64(a_sign ^ b_sign);
143
- return ret;
144
- }
145
- g_assert_not_reached();
146
-}
147
-
148
-static float32 special_fmaf(float32 a, float32 b, float32 c,
149
- float_status *fp_status)
150
-{
151
- float64 aa, bb, cc;
152
- aa = float32_to_float64(a, fp_status);
153
- bb = float32_to_float64(b, fp_status);
154
- cc = float32_to_float64(c, fp_status);
155
- return float64_to_float32(special_fma(aa, bb, cc, fp_status), fp_status);
156
-}
157
-
158
-float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
159
- float_status *fp_status)
160
-{
161
- Accum prod;
162
- Accum acc;
163
- Accum result;
164
- accum_init(&prod);
165
- accum_init(&acc);
166
- accum_init(&result);
167
-
168
- uint8_t a_sign = float32_is_neg(a);
169
- uint8_t b_sign = float32_is_neg(b);
170
- uint8_t c_sign = float32_is_neg(c);
171
- if (float32_is_infinity(a) ||
172
- float32_is_infinity(b) ||
173
- float32_is_infinity(c)) {
174
- return special_fmaf(a, b, c, fp_status);
175
- }
176
- if (float32_is_any_nan(a) ||
177
- float32_is_any_nan(b) ||
178
- float32_is_any_nan(c)) {
179
- return special_fmaf(a, b, c, fp_status);
180
- }
181
- if ((scale == 0) && (float32_is_zero(a) || float32_is_zero(b))) {
182
- float32 tmp = float32_mul(a, b, fp_status);
183
- tmp = float32_add(tmp, c, fp_status);
184
- return tmp;
185
- }
186
-
187
- /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
188
- prod.mant = int128_mul_6464(float32_getmant(a), float32_getmant(b));
189
-
190
- /*
191
- * Note: extracting the mantissa into an int is multiplying by
192
- * 2**23, so adjust here
193
- */
194
- prod.exp = float32_getexp(a) + float32_getexp(b) - SF_BIAS - 23;
195
- prod.sign = a_sign ^ b_sign;
196
- if (float32_is_zero(a) || float32_is_zero(b)) {
197
- prod.exp = -2 * WAY_BIG_EXP;
198
- }
199
- if ((scale > 0) && float32_is_denormal(c)) {
200
- acc.mant = int128_mul_6464(0, 0);
201
- acc.exp = -WAY_BIG_EXP;
202
- acc.sign = c_sign;
203
- acc.sticky = 1;
204
- result = accum_add(prod, acc);
205
- } else if (!float32_is_zero(c)) {
206
- acc.mant = int128_mul_6464(float32_getmant(c), 1);
207
- acc.exp = float32_getexp(c);
208
- acc.sign = c_sign;
209
- result = accum_add(prod, acc);
210
- } else {
211
- result = prod;
212
- }
213
- result.exp += scale;
214
- return accum_round_float32(result, fp_status);
215
-}
216
217
float64 internal_mpyhh(float64 a, float64 b,
218
unsigned long long int accumulated,
47
--
219
--
48
2.25.1
220
2.43.0
49
50
diff view generated by jsdifflib
1
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
1
This massive macro is now only used once.
2
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2
Expand it for use only by float64.
3
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
6
---
5
target/cris/translate.c | 19 ++++++++++---------
7
target/hexagon/fma_emu.c | 255 +++++++++++++++++++--------------------
6
target/cris/translate_v10.c.inc | 6 +++---
8
1 file changed, 127 insertions(+), 128 deletions(-)
7
2 files changed, 13 insertions(+), 12 deletions(-)
8
9
9
diff --git a/target/cris/translate.c b/target/cris/translate.c
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
10
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
11
--- a/target/cris/translate.c
12
--- a/target/hexagon/fma_emu.c
12
+++ b/target/cris/translate.c
13
+++ b/target/hexagon/fma_emu.c
13
@@ -XXX,XX +XXX,XX @@ static void gen_BUG(DisasContext *dc, const char *file, int line)
14
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
14
cpu_abort(CPU(dc->cpu), "%s:%d pc=%x\n", file, line, dc->pc);
15
}
15
}
16
16
17
-static const char *regnames_v32[] =
17
/* Return a maximum finite value with the requested sign */
18
+static const char * const regnames_v32[] =
18
-#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
19
{
19
-static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
20
"$r0", "$r1", "$r2", "$r3",
20
-{ \
21
"$r4", "$r5", "$r6", "$r7",
21
- if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \
22
"$r8", "$r9", "$r10", "$r11",
22
- && ((a.guard | a.round | a.sticky) == 0)) { \
23
"$r12", "$r13", "$sp", "$acr",
23
- /* result zero */ \
24
};
24
- switch (fp_status->float_rounding_mode) { \
25
-static const char *pregnames_v32[] =
25
- case float_round_down: \
26
+
26
- return zero_##SUFFIX(1); \
27
+static const char * const pregnames_v32[] =
27
- default: \
28
{
28
- return zero_##SUFFIX(0); \
29
"$bz", "$vr", "$pid", "$srs",
29
- } \
30
"$wz", "$exs", "$eda", "$mof",
30
- } \
31
@@ -XXX,XX +XXX,XX @@ static const char *pregnames_v32[] =
31
- /* Normalize right */ \
32
};
32
- /* We want MANTBITS bits of mantissa plus the leading one. */ \
33
33
- /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \
34
/* We need this table to handle preg-moves with implicit width. */
34
- /* So we need to normalize right while the high word is non-zero and \
35
-static int preg_sizes[] = {
35
- * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \
36
+static const int preg_sizes[] = {
36
- while ((int128_gethi(a.mant) != 0) || \
37
1, /* bz. */
37
- ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0)) { \
38
1, /* vr. */
38
- a = accum_norm_right(a, 1); \
39
4, /* pid. */
39
- } \
40
@@ -XXX,XX +XXX,XX @@ static inline void t_gen_swapw(TCGv d, TCGv s)
40
- /* \
41
((T0 >> 5) & 0x02020202) |
41
- * OK, now normalize left \
42
((T0 >> 7) & 0x01010101));
42
- * We want to normalize left until we have a leading one in bit 24 \
43
*/
43
- * Theoretically, we only need to shift a maximum of one to the left if we \
44
-static inline void t_gen_swapr(TCGv d, TCGv s)
44
- * shifted out lots of bits from B, or if we had no shift / 1 shift sticky \
45
+static void t_gen_swapr(TCGv d, TCGv s)
45
- * should be 0 \
46
{
46
- */ \
47
- struct {
47
- while ((int128_getlo(a.mant) & (1ULL << MANTBITS)) == 0) { \
48
+ static const struct {
48
- a = accum_norm_left(a); \
49
int shift; /* LSL when positive, LSR when negative. */
49
- } \
50
uint32_t mask;
50
- /* \
51
} bitrev[] = {
51
- * OK, now we might need to denormalize because of potential underflow. \
52
@@ -XXX,XX +XXX,XX @@ static int dec_prep_alu_m(CPUCRISState *env, DisasContext *dc,
52
- * We need to do this before rounding, and rounding might make us normal \
53
#if DISAS_CRIS
53
- * again \
54
static const char *cc_name(int cc)
54
- */ \
55
{
55
- while (a.exp <= 0) { \
56
- static const char *cc_names[16] = {
56
- a = accum_norm_right(a, 1 - a.exp); \
57
+ static const char * const cc_names[16] = {
57
- /* \
58
"cc", "cs", "ne", "eq", "vc", "vs", "pl", "mi",
58
- * Do we have underflow? \
59
"ls", "hi", "ge", "lt", "gt", "le", "a", "p"
59
- * That's when we get an inexact answer because we ran out of bits \
60
};
60
- * in a denormal. \
61
@@ -XXX,XX +XXX,XX @@ static int dec_null(CPUCRISState *env, DisasContext *dc)
61
- */ \
62
return 2;
62
- if (a.guard || a.round || a.sticky) { \
63
- float_raise(float_flag_underflow, fp_status); \
64
- } \
65
- } \
66
- /* OK, we're relatively canonical... now we need to round */ \
67
- if (a.guard || a.round || a.sticky) { \
68
- float_raise(float_flag_inexact, fp_status); \
69
- switch (fp_status->float_rounding_mode) { \
70
- case float_round_to_zero: \
71
- /* Chop and we're done */ \
72
- break; \
73
- case float_round_up: \
74
- if (a.sign == 0) { \
75
- a.mant = int128_add(a.mant, int128_one()); \
76
- } \
77
- break; \
78
- case float_round_down: \
79
- if (a.sign != 0) { \
80
- a.mant = int128_add(a.mant, int128_one()); \
81
- } \
82
- break; \
83
- default: \
84
- if (a.round || a.sticky) { \
85
- /* round up if guard is 1, down if guard is zero */ \
86
- a.mant = int128_add(a.mant, int128_make64(a.guard)); \
87
- } else if (a.guard) { \
88
- /* exactly .5, round up if odd */ \
89
- a.mant = int128_add(a.mant, int128_and(a.mant, int128_one())); \
90
- } \
91
- break; \
92
- } \
93
- } \
94
- /* \
95
- * OK, now we might have carried all the way up. \
96
- * So we might need to shr once \
97
- * at least we know that the lsb should be zero if we rounded and \
98
- * got a carry out... \
99
- */ \
100
- if ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0) { \
101
- a = accum_norm_right(a, 1); \
102
- } \
103
- /* Overflow? */ \
104
- if (a.exp >= INF_EXP) { \
105
- /* Yep, inf result */ \
106
- float_raise(float_flag_overflow, fp_status); \
107
- float_raise(float_flag_inexact, fp_status); \
108
- switch (fp_status->float_rounding_mode) { \
109
- case float_round_to_zero: \
110
- return maxfinite_##SUFFIX(a.sign); \
111
- case float_round_up: \
112
- if (a.sign == 0) { \
113
- return infinite_##SUFFIX(a.sign); \
114
- } else { \
115
- return maxfinite_##SUFFIX(a.sign); \
116
- } \
117
- case float_round_down: \
118
- if (a.sign != 0) { \
119
- return infinite_##SUFFIX(a.sign); \
120
- } else { \
121
- return maxfinite_##SUFFIX(a.sign); \
122
- } \
123
- default: \
124
- return infinite_##SUFFIX(a.sign); \
125
- } \
126
- } \
127
- /* Underflow? */ \
128
- if (int128_getlo(a.mant) & (1ULL << MANTBITS)) { \
129
- /* Leading one means: No, we're normal. So, we should be done... */ \
130
- INTERNAL_TYPE ret; \
131
- ret.i = 0; \
132
- ret.sign = a.sign; \
133
- ret.exp = a.exp; \
134
- ret.mant = int128_getlo(a.mant); \
135
- return ret.i; \
136
- } \
137
- assert(a.exp == 1); \
138
- INTERNAL_TYPE ret; \
139
- ret.i = 0; \
140
- ret.sign = a.sign; \
141
- ret.exp = 0; \
142
- ret.mant = int128_getlo(a.mant); \
143
- return ret.i; \
144
+static float64 accum_round_float64(Accum a, float_status *fp_status)
145
+{
146
+ if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
147
+ && ((a.guard | a.round | a.sticky) == 0)) {
148
+ /* result zero */
149
+ switch (fp_status->float_rounding_mode) {
150
+ case float_round_down:
151
+ return zero_float64(1);
152
+ default:
153
+ return zero_float64(0);
154
+ }
155
+ }
156
+ /*
157
+ * Normalize right
158
+ * We want DF_MANTBITS bits of mantissa plus the leading one.
159
+ * That means that we want DF_MANTBITS+1 bits, or 0x000000000000FF_FFFF
160
+ * So we need to normalize right while the high word is non-zero and
161
+ * while the low word is nonzero when masked with 0xffe0_0000_0000_0000
162
+ */
163
+ while ((int128_gethi(a.mant) != 0) ||
164
+ ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0)) {
165
+ a = accum_norm_right(a, 1);
166
+ }
167
+ /*
168
+ * OK, now normalize left
169
+ * We want to normalize left until we have a leading one in bit 24
170
+ * Theoretically, we only need to shift a maximum of one to the left if we
171
+ * shifted out lots of bits from B, or if we had no shift / 1 shift sticky
172
+ * should be 0
173
+ */
174
+ while ((int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) == 0) {
175
+ a = accum_norm_left(a);
176
+ }
177
+ /*
178
+ * OK, now we might need to denormalize because of potential underflow.
179
+ * We need to do this before rounding, and rounding might make us normal
180
+ * again
181
+ */
182
+ while (a.exp <= 0) {
183
+ a = accum_norm_right(a, 1 - a.exp);
184
+ /*
185
+ * Do we have underflow?
186
+ * That's when we get an inexact answer because we ran out of bits
187
+ * in a denormal.
188
+ */
189
+ if (a.guard || a.round || a.sticky) {
190
+ float_raise(float_flag_underflow, fp_status);
191
+ }
192
+ }
193
+ /* OK, we're relatively canonical... now we need to round */
194
+ if (a.guard || a.round || a.sticky) {
195
+ float_raise(float_flag_inexact, fp_status);
196
+ switch (fp_status->float_rounding_mode) {
197
+ case float_round_to_zero:
198
+ /* Chop and we're done */
199
+ break;
200
+ case float_round_up:
201
+ if (a.sign == 0) {
202
+ a.mant = int128_add(a.mant, int128_one());
203
+ }
204
+ break;
205
+ case float_round_down:
206
+ if (a.sign != 0) {
207
+ a.mant = int128_add(a.mant, int128_one());
208
+ }
209
+ break;
210
+ default:
211
+ if (a.round || a.sticky) {
212
+ /* round up if guard is 1, down if guard is zero */
213
+ a.mant = int128_add(a.mant, int128_make64(a.guard));
214
+ } else if (a.guard) {
215
+ /* exactly .5, round up if odd */
216
+ a.mant = int128_add(a.mant, int128_and(a.mant, int128_one()));
217
+ }
218
+ break;
219
+ }
220
+ }
221
+ /*
222
+ * OK, now we might have carried all the way up.
223
+ * So we might need to shr once
224
+ * at least we know that the lsb should be zero if we rounded and
225
+ * got a carry out...
226
+ */
227
+ if ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0) {
228
+ a = accum_norm_right(a, 1);
229
+ }
230
+ /* Overflow? */
231
+ if (a.exp >= DF_INF_EXP) {
232
+ /* Yep, inf result */
233
+ float_raise(float_flag_overflow, fp_status);
234
+ float_raise(float_flag_inexact, fp_status);
235
+ switch (fp_status->float_rounding_mode) {
236
+ case float_round_to_zero:
237
+ return maxfinite_float64(a.sign);
238
+ case float_round_up:
239
+ if (a.sign == 0) {
240
+ return infinite_float64(a.sign);
241
+ } else {
242
+ return maxfinite_float64(a.sign);
243
+ }
244
+ case float_round_down:
245
+ if (a.sign != 0) {
246
+ return infinite_float64(a.sign);
247
+ } else {
248
+ return maxfinite_float64(a.sign);
249
+ }
250
+ default:
251
+ return infinite_float64(a.sign);
252
+ }
253
+ }
254
+ /* Underflow? */
255
+ if (int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) {
256
+ /* Leading one means: No, we're normal. So, we should be done... */
257
+ Double ret;
258
+ ret.i = 0;
259
+ ret.sign = a.sign;
260
+ ret.exp = a.exp;
261
+ ret.mant = int128_getlo(a.mant);
262
+ return ret.i;
263
+ }
264
+ assert(a.exp == 1);
265
+ Double ret;
266
+ ret.i = 0;
267
+ ret.sign = a.sign;
268
+ ret.exp = 0;
269
+ ret.mant = int128_getlo(a.mant);
270
+ return ret.i;
63
}
271
}
64
272
65
-static struct decoder_info {
273
-GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
66
+static const struct decoder_info {
274
-
67
struct {
275
float64 internal_mpyhh(float64 a, float64 b,
68
uint32_t bits;
276
unsigned long long int accumulated,
69
uint32_t mask;
277
float_status *fp_status)
70
@@ -XXX,XX +XXX,XX @@ void cris_cpu_dump_state(CPUState *cs, FILE *f, int flags)
71
{
72
CRISCPU *cpu = CRIS_CPU(cs);
73
CPUCRISState *env = &cpu->env;
74
- const char **regnames;
75
- const char **pregnames;
76
+ const char * const *regnames;
77
+ const char * const *pregnames;
78
int i;
79
80
if (!env) {
81
diff --git a/target/cris/translate_v10.c.inc b/target/cris/translate_v10.c.inc
82
index XXXXXXX..XXXXXXX 100644
83
--- a/target/cris/translate_v10.c.inc
84
+++ b/target/cris/translate_v10.c.inc
85
@@ -XXX,XX +XXX,XX @@
86
#include "qemu/osdep.h"
87
#include "crisv10-decode.h"
88
89
-static const char *regnames_v10[] =
90
+static const char * const regnames_v10[] =
91
{
92
"$r0", "$r1", "$r2", "$r3",
93
"$r4", "$r5", "$r6", "$r7",
94
@@ -XXX,XX +XXX,XX @@ static const char *regnames_v10[] =
95
"$r12", "$r13", "$sp", "$pc",
96
};
97
98
-static const char *pregnames_v10[] =
99
+static const char * const pregnames_v10[] =
100
{
101
"$bz", "$vr", "$p2", "$p3",
102
"$wz", "$ccr", "$p6-prefix", "$mof",
103
@@ -XXX,XX +XXX,XX @@ static const char *pregnames_v10[] =
104
};
105
106
/* We need this table to handle preg-moves with implicit width. */
107
-static int preg_sizes_v10[] = {
108
+static const int preg_sizes_v10[] = {
109
1, /* bz. */
110
1, /* vr. */
111
1, /* pid. */
112
--
278
--
113
2.25.1
279
2.43.0
114
115
diff view generated by jsdifflib
1
This value is unused.
1
This structure, with bitfields, is incorrect for big-endian.
2
Use the existing float32_getexp_raw which uses extract32.
2
3
3
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
target/cris/translate.c | 2 --
7
target/hexagon/fma_emu.c | 16 +++-------------
8
1 file changed, 2 deletions(-)
8
1 file changed, 3 insertions(+), 13 deletions(-)
9
9
10
diff --git a/target/cris/translate.c b/target/cris/translate.c
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
11
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/cris/translate.c
12
--- a/target/hexagon/fma_emu.c
13
+++ b/target/cris/translate.c
13
+++ b/target/hexagon/fma_emu.c
14
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ typedef union {
15
#define DISAS_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */
15
};
16
#define DISAS_UPDATE DISAS_TARGET_1 /* cpu state was modified dynamically */
16
} Double;
17
#define DISAS_TB_JUMP DISAS_TARGET_2 /* only pc was modified statically */
17
18
-#define DISAS_SWI DISAS_TARGET_3
18
-typedef union {
19
19
- float f;
20
/* Used by the decoder. */
20
- uint32_t i;
21
#define EXTRACT_FIELD(src, start, end) \
21
- struct {
22
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
22
- uint32_t mant:23;
23
to find the next TB */
23
- uint32_t exp:8;
24
tcg_gen_exit_tb(NULL, 0);
24
- uint32_t sign:1;
25
break;
25
- };
26
- case DISAS_SWI:
26
-} Float;
27
case DISAS_TB_JUMP:
27
-
28
/* nothing more to generate */
28
static uint64_t float64_getmant(float64 f64)
29
break;
29
{
30
Double a = { .i = f64 };
31
@@ -XXX,XX +XXX,XX @@ int32_t float64_getexp(float64 f64)
32
33
int32_t float32_getexp(float32 f32)
34
{
35
- Float a = { .i = f32 };
36
+ int exp = float32_getexp_raw(f32);
37
if (float32_is_normal(f32)) {
38
- return a.exp;
39
+ return exp;
40
}
41
if (float32_is_denormal(f32)) {
42
- return a.exp + 1;
43
+ return exp + 1;
44
}
45
return -1;
46
}
30
--
47
--
31
2.25.1
48
2.43.0
32
33
diff view generated by jsdifflib
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
1
This structure, with bitfields, is incorrect for big-endian.
2
Use extract64 and deposit64 instead.
3
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
6
---
4
target/nios2/translate.c | 128 ++++++++++++++++++++-------------------
7
target/hexagon/fma_emu.c | 46 ++++++++++++++--------------------------
5
1 file changed, 65 insertions(+), 63 deletions(-)
8
1 file changed, 16 insertions(+), 30 deletions(-)
6
9
7
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
8
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
9
--- a/target/nios2/translate.c
12
--- a/target/hexagon/fma_emu.c
10
+++ b/target/nios2/translate.c
13
+++ b/target/hexagon/fma_emu.c
11
@@ -XXX,XX +XXX,XX @@ static void gen_exception(DisasContext *dc, uint32_t excp)
14
@@ -XXX,XX +XXX,XX @@
15
16
#define WAY_BIG_EXP 4096
17
18
-typedef union {
19
- double f;
20
- uint64_t i;
21
- struct {
22
- uint64_t mant:52;
23
- uint64_t exp:11;
24
- uint64_t sign:1;
25
- };
26
-} Double;
27
-
28
static uint64_t float64_getmant(float64 f64)
29
{
30
- Double a = { .i = f64 };
31
+ uint64_t mant = extract64(f64, 0, 52);
32
if (float64_is_normal(f64)) {
33
- return a.mant | 1ULL << 52;
34
+ return mant | 1ULL << 52;
35
}
36
if (float64_is_zero(f64)) {
37
return 0;
38
}
39
if (float64_is_denormal(f64)) {
40
- return a.mant;
41
+ return mant;
42
}
43
return ~0ULL;
12
}
44
}
13
45
14
/* generate intermediate code for basic block 'tb'. */
46
int32_t float64_getexp(float64 f64)
15
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
16
+static void nios2_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
17
{
47
{
18
+ DisasContext *dc = container_of(dcbase, DisasContext, base);
48
- Double a = { .i = f64 };
19
CPUNios2State *env = cs->env_ptr;
49
+ int exp = extract64(f64, 52, 11);
20
- DisasContext dc1, *dc = &dc1;
50
if (float64_is_normal(f64)) {
21
- int num_insns;
51
- return a.exp;
22
-
52
+ return exp;
23
- /* Initialize DC */
53
}
24
-
54
if (float64_is_denormal(f64)) {
25
- dc->base.tb = tb;
55
- return a.exp + 1;
26
- dc->base.singlestep_enabled = cs->singlestep_enabled;
56
+ return exp + 1;
27
- dc->base.is_jmp = DISAS_NEXT;
57
}
28
- dc->base.pc_first = tb->pc;
58
return -1;
29
- dc->base.pc_next = tb->pc;
59
}
30
+ int page_insns;
60
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
31
61
/* Return a maximum finite value with the requested sign */
32
dc->mem_idx = cpu_mmu_index(env, false);
62
static float64 accum_round_float64(Accum a, float_status *fp_status)
33
63
{
34
- /* Set up instruction counts */
64
+ uint64_t ret;
35
- num_insns = 0;
36
- if (max_insns > 1) {
37
- int page_insns = (TARGET_PAGE_SIZE - (tb->pc & ~TARGET_PAGE_MASK)) / 4;
38
- if (max_insns > page_insns) {
39
- max_insns = page_insns;
40
- }
41
- }
42
+ /* Bound the number of insns to execute to those left on the page. */
43
+ page_insns = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
44
+ dc->base.max_insns = MIN(page_insns, dc->base.max_insns);
45
+}
46
47
- gen_tb_start(tb);
48
- do {
49
- tcg_gen_insn_start(dc->base.pc_next);
50
- num_insns++;
51
+static void nios2_tr_tb_start(DisasContextBase *db, CPUState *cs)
52
+{
53
+}
54
55
- if (unlikely(cpu_breakpoint_test(cs, dc->base.pc_next, BP_ANY))) {
56
- gen_exception(dc, EXCP_DEBUG);
57
- /* The address covered by the breakpoint must be included in
58
- [tb->pc, tb->pc + tb->size) in order to for it to be
59
- properly cleared -- thus we increment the PC here so that
60
- the logic setting tb->size below does the right thing. */
61
- dc->pc += 4;
62
- break;
63
- }
64
+static void nios2_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
65
+{
66
+ tcg_gen_insn_start(dcbase->pc_next);
67
+}
68
69
- if (num_insns == max_insns && (tb_cflags(tb) & CF_LAST_IO)) {
70
- gen_io_start();
71
- }
72
+static bool nios2_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs,
73
+ const CPUBreakpoint *bp)
74
+{
75
+ DisasContext *dc = container_of(dcbase, DisasContext, base);
76
77
- dc->pc = dc->base.pc_next;
78
- dc->base.pc_next += 4;
79
+ gen_exception(dc, EXCP_DEBUG);
80
+ /*
81
+ * The address covered by the breakpoint must be included in
82
+ * [tb->pc, tb->pc + tb->size) in order to for it to be
83
+ * properly cleared -- thus we increment the PC here so that
84
+ * the logic setting tb->size below does the right thing.
85
+ */
86
+ dc->base.pc_next += 4;
87
+ return true;
88
+}
89
90
- /* Decode an instruction */
91
- handle_instruction(dc, env);
92
+static void nios2_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
93
+{
94
+ DisasContext *dc = container_of(dcbase, DisasContext, base);
95
+ CPUNios2State *env = cs->env_ptr;
96
97
- /* Translation stops when a conditional branch is encountered.
98
- * Otherwise the subsequent code could get translated several times.
99
- * Also stop translation when a page boundary is reached. This
100
- * ensures prefetch aborts occur at the right place. */
101
- } while (!dc->base.is_jmp &&
102
- !tcg_op_buf_full() &&
103
- num_insns < max_insns);
104
+ dc->pc = dc->base.pc_next;
105
+ dc->base.pc_next += 4;
106
+
65
+
107
+ /* Decode an instruction */
66
if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
108
+ handle_instruction(dc, env);
67
&& ((a.guard | a.round | a.sticky) == 0)) {
109
+}
68
/* result zero */
110
+
69
@@ -XXX,XX +XXX,XX @@ static float64 accum_round_float64(Accum a, float_status *fp_status)
111
+static void nios2_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
70
}
112
+{
113
+ DisasContext *dc = container_of(dcbase, DisasContext, base);
114
115
/* Indicate where the next block should start */
116
switch (dc->base.is_jmp) {
117
- case DISAS_NEXT:
118
+ case DISAS_TOO_MANY:
119
case DISAS_UPDATE:
120
/* Save the current PC back into the CPU register */
121
tcg_gen_movi_tl(cpu_R[R_PC], dc->base.pc_next);
122
tcg_gen_exit_tb(NULL, 0);
123
break;
124
125
- default:
126
case DISAS_JUMP:
127
/* The jump will already have updated the PC register */
128
tcg_gen_exit_tb(NULL, 0);
129
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
130
case DISAS_NORETURN:
131
/* nothing more to generate */
132
break;
133
+
134
+ default:
135
+ g_assert_not_reached();
136
}
71
}
137
+}
72
/* Underflow? */
138
73
- if (int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) {
139
- /* End off the block */
74
+ ret = int128_getlo(a.mant);
140
- gen_tb_end(tb, num_insns);
75
+ if (ret & (1ULL << DF_MANTBITS)) {
141
+static void nios2_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
76
/* Leading one means: No, we're normal. So, we should be done... */
142
+{
77
- Double ret;
143
+ qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first));
78
- ret.i = 0;
144
+ log_target_disas(cpu, dcbase->pc_first, dcbase->tb->size);
79
- ret.sign = a.sign;
145
+}
80
- ret.exp = a.exp;
146
81
- ret.mant = int128_getlo(a.mant);
147
- /* Mark instruction starts for the final generated instruction */
82
- return ret.i;
148
- tb->size = dc->base.pc_next - dc->base.pc_first;
83
+ ret = deposit64(ret, 52, 11, a.exp);
149
- tb->icount = num_insns;
84
+ } else {
150
+static const TranslatorOps nios2_tr_ops = {
85
+ assert(a.exp == 1);
151
+ .init_disas_context = nios2_tr_init_disas_context,
86
+ ret = deposit64(ret, 52, 11, 0);
152
+ .tb_start = nios2_tr_tb_start,
87
}
153
+ .insn_start = nios2_tr_insn_start,
88
- assert(a.exp == 1);
154
+ .breakpoint_check = nios2_tr_breakpoint_check,
89
- Double ret;
155
+ .translate_insn = nios2_tr_translate_insn,
90
- ret.i = 0;
156
+ .tb_stop = nios2_tr_tb_stop,
91
- ret.sign = a.sign;
157
+ .disas_log = nios2_tr_disas_log,
92
- ret.exp = 0;
158
+};
93
- ret.mant = int128_getlo(a.mant);
159
94
- return ret.i;
160
-#ifdef DEBUG_DISAS
95
+ ret = deposit64(ret, 63, 1, a.sign);
161
- if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
96
+ return ret;
162
- && qemu_log_in_addr_range(dc->base.pc_first)) {
163
- FILE *logfile = qemu_log_lock();
164
- qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
165
- log_target_disas(cs, tb->pc, tb->size);
166
- qemu_log("\n");
167
- qemu_log_unlock(logfile);
168
- }
169
-#endif
170
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
171
+{
172
+ DisasContext dc;
173
+ translator_loop(&nios2_tr_ops, &dc.base, cs, tb, max_insns);
174
}
97
}
175
98
176
void nios2_cpu_dump_state(CPUState *cs, FILE *f, int flags)
99
float64 internal_mpyhh(float64 a, float64 b,
177
--
100
--
178
2.25.1
101
2.43.0
179
180
diff view generated by jsdifflib
1
Pass in the input and output size. We currently use 3 of the 5
1
No need to open-code 64x64->128-bit multiplication.
2
possible combinations; the others may be used by new tcg opcodes.
3
2
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
tcg/aarch64/tcg-target.c.inc | 42 ++++++++++++++----------------------
6
target/hexagon/fma_emu.c | 32 +++-----------------------------
8
1 file changed, 16 insertions(+), 26 deletions(-)
7
1 file changed, 3 insertions(+), 29 deletions(-)
9
8
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
9
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
11
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/aarch64/tcg-target.c.inc
11
--- a/target/hexagon/fma_emu.c
13
+++ b/tcg/aarch64/tcg-target.c.inc
12
+++ b/target/hexagon/fma_emu.c
14
@@ -XXX,XX +XXX,XX @@ typedef enum {
13
@@ -XXX,XX +XXX,XX @@ int32_t float32_getexp(float32 f32)
15
/* Data-processing (1 source) instructions. */
14
return -1;
16
I3507_CLZ = 0x5ac01000,
17
I3507_RBIT = 0x5ac00000,
18
- I3507_REV16 = 0x5ac00400,
19
- I3507_REV32 = 0x5ac00800,
20
- I3507_REV64 = 0x5ac00c00,
21
+ I3507_REV = 0x5ac00000, /* + size << 10 */
22
23
/* Data-processing (2 source) instructions. */
24
I3508_LSLV = 0x1ac02000,
25
@@ -XXX,XX +XXX,XX @@ static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
26
}
27
}
15
}
28
16
29
-static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
17
-static uint32_t int128_getw0(Int128 x)
30
+static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
18
-{
31
+ TCGReg rd, TCGReg rn)
19
- return int128_getlo(x);
32
{
33
- tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
34
-}
20
-}
35
-
21
-
36
-static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
22
-static uint32_t int128_getw1(Int128 x)
37
-{
23
-{
38
- tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
24
- return int128_getlo(x) >> 32;
39
-}
25
-}
40
-
26
-
41
-static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
27
static Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
42
-{
28
{
43
- tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
29
- Int128 a, b;
44
+ /* REV, REV16, REV32 */
30
- uint64_t pp0, pp1a, pp1b, pp1s, pp2;
45
+ tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
31
+ uint64_t l, h;
32
33
- a = int128_make64(ai);
34
- b = int128_make64(bi);
35
- pp0 = (uint64_t)int128_getw0(a) * (uint64_t)int128_getw0(b);
36
- pp1a = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw0(b);
37
- pp1b = (uint64_t)int128_getw1(b) * (uint64_t)int128_getw0(a);
38
- pp2 = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw1(b);
39
-
40
- pp1s = pp1a + pp1b;
41
- if ((pp1s < pp1a) || (pp1s < pp1b)) {
42
- pp2 += (1ULL << 32);
43
- }
44
- uint64_t ret_low = pp0 + (pp1s << 32);
45
- if ((ret_low < pp0) || (ret_low < (pp1s << 32))) {
46
- pp2 += 1;
47
- }
48
-
49
- return int128_make128(ret_low, pp2 + (pp1s >> 32));
50
+ mulu64(&l, &h, ai, bi);
51
+ return int128_make128(l, h);
46
}
52
}
47
53
48
static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
54
static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
49
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
50
case MO_UW:
51
tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
52
if (bswap) {
53
- tcg_out_rev16(s, data_r, data_r);
54
+ tcg_out_rev(s, TCG_TYPE_I32, MO_16, data_r, data_r);
55
}
56
break;
57
case MO_SW:
58
if (bswap) {
59
tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
60
- tcg_out_rev16(s, data_r, data_r);
61
+ tcg_out_rev(s, TCG_TYPE_I32, MO_16, data_r, data_r);
62
tcg_out_sxt(s, ext, MO_16, data_r, data_r);
63
} else {
64
tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
65
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
66
case MO_UL:
67
tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
68
if (bswap) {
69
- tcg_out_rev32(s, data_r, data_r);
70
+ tcg_out_rev(s, TCG_TYPE_I32, MO_32, data_r, data_r);
71
}
72
break;
73
case MO_SL:
74
if (bswap) {
75
tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
76
- tcg_out_rev32(s, data_r, data_r);
77
+ tcg_out_rev(s, TCG_TYPE_I32, MO_32, data_r, data_r);
78
tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
79
} else {
80
tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
81
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
82
case MO_Q:
83
tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
84
if (bswap) {
85
- tcg_out_rev64(s, data_r, data_r);
86
+ tcg_out_rev(s, TCG_TYPE_I64, MO_64, data_r, data_r);
87
}
88
break;
89
default:
90
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
91
break;
92
case MO_16:
93
if (bswap && data_r != TCG_REG_XZR) {
94
- tcg_out_rev16(s, TCG_REG_TMP, data_r);
95
+ tcg_out_rev(s, TCG_TYPE_I32, MO_16, TCG_REG_TMP, data_r);
96
data_r = TCG_REG_TMP;
97
}
98
tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
99
break;
100
case MO_32:
101
if (bswap && data_r != TCG_REG_XZR) {
102
- tcg_out_rev32(s, TCG_REG_TMP, data_r);
103
+ tcg_out_rev(s, TCG_TYPE_I32, MO_32, TCG_REG_TMP, data_r);
104
data_r = TCG_REG_TMP;
105
}
106
tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
107
break;
108
case MO_64:
109
if (bswap && data_r != TCG_REG_XZR) {
110
- tcg_out_rev64(s, TCG_REG_TMP, data_r);
111
+ tcg_out_rev(s, TCG_TYPE_I64, MO_64, TCG_REG_TMP, data_r);
112
data_r = TCG_REG_TMP;
113
}
114
tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
115
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
116
break;
117
118
case INDEX_op_bswap64_i64:
119
- tcg_out_rev64(s, a0, a1);
120
+ tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
121
break;
122
case INDEX_op_bswap32_i64:
123
case INDEX_op_bswap32_i32:
124
- tcg_out_rev32(s, a0, a1);
125
+ tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
126
break;
127
case INDEX_op_bswap16_i64:
128
case INDEX_op_bswap16_i32:
129
- tcg_out_rev16(s, a0, a1);
130
+ tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
131
break;
132
133
case INDEX_op_ext8s_i64:
134
--
55
--
135
2.25.1
56
2.43.0
136
137
diff view generated by jsdifflib
1
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
1
Initialize x with accumulated via direct assignment,
2
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
2
rather than multiplying by 1.
3
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
6
---
5
target/cris/helper.h | 2 +-
7
target/hexagon/fma_emu.c | 2 +-
6
1 file changed, 1 insertion(+), 1 deletion(-)
8
1 file changed, 1 insertion(+), 1 deletion(-)
7
9
8
diff --git a/target/cris/helper.h b/target/cris/helper.h
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
9
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
10
--- a/target/cris/helper.h
12
--- a/target/hexagon/fma_emu.c
11
+++ b/target/cris/helper.h
13
+++ b/target/hexagon/fma_emu.c
12
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ float64 internal_mpyhh(float64 a, float64 b,
13
-DEF_HELPER_2(raise_exception, void, env, i32)
15
float64_is_infinity(b)) {
14
+DEF_HELPER_2(raise_exception, noreturn, env, i32)
16
return float64_mul(a, b, fp_status);
15
DEF_HELPER_2(tlb_flush_pid, void, env, i32)
17
}
16
DEF_HELPER_2(spc_write, void, env, i32)
18
- x.mant = int128_mul_6464(accumulated, 1);
17
DEF_HELPER_1(rfe, void, env)
19
+ x.mant = int128_make64(accumulated);
20
x.sticky = sticky;
21
prod = fGETUWORD(1, float64_getmant(a)) * fGETUWORD(1, float64_getmant(b));
22
x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL));
18
--
23
--
19
2.25.1
24
2.43.0
20
21
diff view generated by jsdifflib
1
Implement the new semantics in the fallback expansion.
1
Convert all targets simultaneously, as the gen_intermediate_code
2
Change all callers to supply the flags that keep the
2
function disappears from the target. While there are possible
3
semantics unchanged locally.
3
workarounds, they're larger than simply performing the conversion.
4
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
include/tcg/tcg-op.h | 8 +--
8
include/exec/translator.h | 14 --------------
10
target/arm/translate-a64.c | 12 ++--
9
include/hw/core/tcg-cpu-ops.h | 13 +++++++++++++
11
target/arm/translate.c | 2 +-
10
target/alpha/cpu.h | 2 ++
12
target/i386/tcg/translate.c | 2 +-
11
target/arm/internals.h | 2 ++
13
target/mips/tcg/mxu_translate.c | 2 +-
12
target/avr/cpu.h | 2 ++
14
target/s390x/translate.c | 4 +-
13
target/hexagon/cpu.h | 2 ++
15
target/sh4/translate.c | 2 +-
14
target/hppa/cpu.h | 2 ++
16
tcg/tcg-op.c | 121 ++++++++++++++++++++++----------
15
target/i386/tcg/helper-tcg.h | 2 ++
17
8 files changed, 99 insertions(+), 54 deletions(-)
16
target/loongarch/internals.h | 2 ++
17
target/m68k/cpu.h | 2 ++
18
target/microblaze/cpu.h | 2 ++
19
target/mips/tcg/tcg-internal.h | 2 ++
20
target/openrisc/cpu.h | 2 ++
21
target/ppc/cpu.h | 2 ++
22
target/riscv/cpu.h | 3 +++
23
target/rx/cpu.h | 2 ++
24
target/s390x/s390x-internal.h | 2 ++
25
target/sh4/cpu.h | 2 ++
26
target/sparc/cpu.h | 2 ++
27
target/tricore/cpu.h | 2 ++
28
target/xtensa/cpu.h | 2 ++
29
accel/tcg/cpu-exec.c | 8 +++++---
30
accel/tcg/translate-all.c | 8 +++++---
31
target/alpha/cpu.c | 1 +
32
target/alpha/translate.c | 4 ++--
33
target/arm/cpu.c | 1 +
34
target/arm/tcg/cpu-v7m.c | 1 +
35
target/arm/tcg/translate.c | 5 ++---
36
target/avr/cpu.c | 1 +
37
target/avr/translate.c | 6 +++---
38
target/hexagon/cpu.c | 1 +
39
target/hexagon/translate.c | 4 ++--
40
target/hppa/cpu.c | 1 +
41
target/hppa/translate.c | 4 ++--
42
target/i386/tcg/tcg-cpu.c | 1 +
43
target/i386/tcg/translate.c | 5 ++---
44
target/loongarch/cpu.c | 1 +
45
target/loongarch/tcg/translate.c | 4 ++--
46
target/m68k/cpu.c | 1 +
47
target/m68k/translate.c | 4 ++--
48
target/microblaze/cpu.c | 1 +
49
target/microblaze/translate.c | 4 ++--
50
target/mips/cpu.c | 1 +
51
target/mips/tcg/translate.c | 4 ++--
52
target/openrisc/cpu.c | 1 +
53
target/openrisc/translate.c | 4 ++--
54
target/ppc/cpu_init.c | 1 +
55
target/ppc/translate.c | 4 ++--
56
target/riscv/tcg/tcg-cpu.c | 1 +
57
target/riscv/translate.c | 4 ++--
58
target/rx/cpu.c | 1 +
59
target/rx/translate.c | 4 ++--
60
target/s390x/cpu.c | 1 +
61
target/s390x/tcg/translate.c | 4 ++--
62
target/sh4/cpu.c | 1 +
63
target/sh4/translate.c | 4 ++--
64
target/sparc/cpu.c | 1 +
65
target/sparc/translate.c | 4 ++--
66
target/tricore/cpu.c | 1 +
67
target/tricore/translate.c | 5 ++---
68
target/xtensa/cpu.c | 1 +
69
target/xtensa/translate.c | 4 ++--
70
62 files changed, 121 insertions(+), 62 deletions(-)
18
71
19
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
72
diff --git a/include/exec/translator.h b/include/exec/translator.h
20
index XXXXXXX..XXXXXXX 100644
73
index XXXXXXX..XXXXXXX 100644
21
--- a/include/tcg/tcg-op.h
74
--- a/include/exec/translator.h
22
+++ b/include/tcg/tcg-op.h
75
+++ b/include/exec/translator.h
23
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg);
76
@@ -XXX,XX +XXX,XX @@
24
void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg);
77
#include "qemu/bswap.h"
25
void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg);
78
#include "exec/vaddr.h"
26
void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg);
79
27
-void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg);
80
-/**
28
+void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags);
81
- * gen_intermediate_code
29
void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg);
82
- * @cpu: cpu context
30
void tcg_gen_smin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
83
- * @tb: translation block
31
void tcg_gen_smax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
84
- * @max_insns: max number of instructions to translate
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg);
85
- * @pc: guest virtual program counter address
33
void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg);
86
- * @host_pc: host physical program counter address
34
void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg);
87
- *
35
void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg);
88
- * This function must be provided by the target, which should create
36
-void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg);
89
- * the target-specific DisasContext, and then invoke translator_loop.
37
-void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg);
90
- */
38
+void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags);
91
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
39
+void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags);
92
- vaddr pc, void *host_pc);
40
void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg);
93
-
41
void tcg_gen_smin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
94
/**
42
void tcg_gen_smax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
95
* DisasJumpType:
43
@@ -XXX,XX +XXX,XX @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
96
* @DISAS_NEXT: Next instruction in program order.
44
#define tcg_gen_ext32u_tl tcg_gen_mov_i32
97
diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
45
#define tcg_gen_ext32s_tl tcg_gen_mov_i32
98
index XXXXXXX..XXXXXXX 100644
46
#define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
99
--- a/include/hw/core/tcg-cpu-ops.h
47
-#define tcg_gen_bswap32_tl tcg_gen_bswap32_i32
100
+++ b/include/hw/core/tcg-cpu-ops.h
48
+#define tcg_gen_bswap32_tl(D, S, F) tcg_gen_bswap32_i32(D, S)
101
@@ -XXX,XX +XXX,XX @@ struct TCGCPUOps {
49
#define tcg_gen_bswap_tl tcg_gen_bswap32_i32
102
* Called when the first CPU is realized.
50
#define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64
103
*/
51
#define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32
104
void (*initialize)(void);
52
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
105
+ /**
53
index XXXXXXX..XXXXXXX 100644
106
+ * @translate_code: Translate guest instructions to TCGOps
54
--- a/target/arm/translate-a64.c
107
+ * @cpu: cpu context
55
+++ b/target/arm/translate-a64.c
108
+ * @tb: translation block
56
@@ -XXX,XX +XXX,XX @@ static void handle_rev32(DisasContext *s, unsigned int sf,
109
+ * @max_insns: max number of instructions to translate
57
110
+ * @pc: guest virtual program counter address
58
/* bswap32_i64 requires zero high word */
111
+ * @host_pc: host physical program counter address
59
tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
112
+ *
60
- tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
113
+ * This function must be provided by the target, which should create
61
+ tcg_gen_bswap32_i64(tcg_rd, tcg_tmp, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
114
+ * the target-specific DisasContext, and then invoke translator_loop.
62
tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
115
+ */
63
- tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
116
+ void (*translate_code)(CPUState *cpu, TranslationBlock *tb,
64
+ tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
117
+ int *max_insns, vaddr pc, void *host_pc);
65
tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
118
/**
66
119
* @synchronize_from_tb: Synchronize state from a TCG #TranslationBlock
67
tcg_temp_free_i64(tcg_tmp);
120
*
68
} else {
121
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
69
tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
122
index XXXXXXX..XXXXXXX 100644
70
- tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
123
--- a/target/alpha/cpu.h
71
+ tcg_gen_bswap32_i64(tcg_rd, tcg_rd, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
124
+++ b/target/alpha/cpu.h
125
@@ -XXX,XX +XXX,XX @@ enum {
126
};
127
128
void alpha_translate_init(void);
129
+void alpha_translate_code(CPUState *cs, TranslationBlock *tb,
130
+ int *max_insns, vaddr pc, void *host_pc);
131
132
#define CPU_RESOLVING_TYPE TYPE_ALPHA_CPU
133
134
diff --git a/target/arm/internals.h b/target/arm/internals.h
135
index XXXXXXX..XXXXXXX 100644
136
--- a/target/arm/internals.h
137
+++ b/target/arm/internals.h
138
@@ -XXX,XX +XXX,XX @@ void init_cpreg_list(ARMCPU *cpu);
139
140
void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
141
void arm_translate_init(void);
142
+void arm_translate_code(CPUState *cs, TranslationBlock *tb,
143
+ int *max_insns, vaddr pc, void *host_pc);
144
145
void arm_cpu_register_gdb_commands(ARMCPU *cpu);
146
void aarch64_cpu_register_gdb_commands(ARMCPU *cpu, GString *,
147
diff --git a/target/avr/cpu.h b/target/avr/cpu.h
148
index XXXXXXX..XXXXXXX 100644
149
--- a/target/avr/cpu.h
150
+++ b/target/avr/cpu.h
151
@@ -XXX,XX +XXX,XX @@ static inline void set_avr_feature(CPUAVRState *env, int feature)
152
}
153
154
void avr_cpu_tcg_init(void);
155
+void avr_cpu_translate_code(CPUState *cs, TranslationBlock *tb,
156
+ int *max_insns, vaddr pc, void *host_pc);
157
158
int cpu_avr_exec(CPUState *cpu);
159
160
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
161
index XXXXXXX..XXXXXXX 100644
162
--- a/target/hexagon/cpu.h
163
+++ b/target/hexagon/cpu.h
164
@@ -XXX,XX +XXX,XX @@ static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc,
165
typedef HexagonCPU ArchCPU;
166
167
void hexagon_translate_init(void);
168
+void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
169
+ int *max_insns, vaddr pc, void *host_pc);
170
171
#include "exec/cpu-all.h"
172
173
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
174
index XXXXXXX..XXXXXXX 100644
175
--- a/target/hppa/cpu.h
176
+++ b/target/hppa/cpu.h
177
@@ -XXX,XX +XXX,XX @@ static inline int HPPA_BTLB_ENTRIES(CPUHPPAState *env)
178
}
179
180
void hppa_translate_init(void);
181
+void hppa_translate_code(CPUState *cs, TranslationBlock *tb,
182
+ int *max_insns, vaddr pc, void *host_pc);
183
184
#define CPU_RESOLVING_TYPE TYPE_HPPA_CPU
185
186
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
187
index XXXXXXX..XXXXXXX 100644
188
--- a/target/i386/tcg/helper-tcg.h
189
+++ b/target/i386/tcg/helper-tcg.h
190
@@ -XXX,XX +XXX,XX @@ static inline target_long lshift(target_long x, int n)
191
192
/* translate.c */
193
void tcg_x86_init(void);
194
+void x86_translate_code(CPUState *cs, TranslationBlock *tb,
195
+ int *max_insns, vaddr pc, void *host_pc);
196
197
/* excp_helper.c */
198
G_NORETURN void raise_exception(CPUX86State *env, int exception_index);
199
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
200
index XXXXXXX..XXXXXXX 100644
201
--- a/target/loongarch/internals.h
202
+++ b/target/loongarch/internals.h
203
@@ -XXX,XX +XXX,XX @@
204
#define TARGET_VIRT_MASK MAKE_64BIT_MASK(0, TARGET_VIRT_ADDR_SPACE_BITS)
205
206
void loongarch_translate_init(void);
207
+void loongarch_translate_code(CPUState *cs, TranslationBlock *tb,
208
+ int *max_insns, vaddr pc, void *host_pc);
209
210
void G_NORETURN do_raise_exception(CPULoongArchState *env,
211
uint32_t exception,
212
diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h
213
index XXXXXXX..XXXXXXX 100644
214
--- a/target/m68k/cpu.h
215
+++ b/target/m68k/cpu.h
216
@@ -XXX,XX +XXX,XX @@ int m68k_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
217
int m68k_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
218
219
void m68k_tcg_init(void);
220
+void m68k_translate_code(CPUState *cs, TranslationBlock *tb,
221
+ int *max_insns, vaddr pc, void *host_pc);
222
void m68k_cpu_init_gdb(M68kCPU *cpu);
223
uint32_t cpu_m68k_get_ccr(CPUM68KState *env);
224
void cpu_m68k_set_ccr(CPUM68KState *env, uint32_t);
225
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
226
index XXXXXXX..XXXXXXX 100644
227
--- a/target/microblaze/cpu.h
228
+++ b/target/microblaze/cpu.h
229
@@ -XXX,XX +XXX,XX @@ static inline void mb_cpu_write_msr(CPUMBState *env, uint32_t val)
230
}
231
232
void mb_tcg_init(void);
233
+void mb_translate_code(CPUState *cs, TranslationBlock *tb,
234
+ int *max_insns, vaddr pc, void *host_pc);
235
236
#define CPU_RESOLVING_TYPE TYPE_MICROBLAZE_CPU
237
238
diff --git a/target/mips/tcg/tcg-internal.h b/target/mips/tcg/tcg-internal.h
239
index XXXXXXX..XXXXXXX 100644
240
--- a/target/mips/tcg/tcg-internal.h
241
+++ b/target/mips/tcg/tcg-internal.h
242
@@ -XXX,XX +XXX,XX @@
243
#include "cpu.h"
244
245
void mips_tcg_init(void);
246
+void mips_translate_code(CPUState *cs, TranslationBlock *tb,
247
+ int *max_insns, vaddr pc, void *host_pc);
248
249
void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
250
G_NORETURN void mips_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
251
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
252
index XXXXXXX..XXXXXXX 100644
253
--- a/target/openrisc/cpu.h
254
+++ b/target/openrisc/cpu.h
255
@@ -XXX,XX +XXX,XX @@ void openrisc_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
256
int openrisc_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
257
int openrisc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
258
void openrisc_translate_init(void);
259
+void openrisc_translate_code(CPUState *cs, TranslationBlock *tb,
260
+ int *max_insns, vaddr pc, void *host_pc);
261
int print_insn_or1k(bfd_vma addr, disassemble_info *info);
262
263
#ifndef CONFIG_USER_ONLY
264
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
265
index XXXXXXX..XXXXXXX 100644
266
--- a/target/ppc/cpu.h
267
+++ b/target/ppc/cpu.h
268
@@ -XXX,XX +XXX,XX @@ extern const VMStateDescription vmstate_ppc_cpu;
269
270
/*****************************************************************************/
271
void ppc_translate_init(void);
272
+void ppc_translate_code(CPUState *cs, TranslationBlock *tb,
273
+ int *max_insns, vaddr pc, void *host_pc);
274
275
#if !defined(CONFIG_USER_ONLY)
276
void ppc_store_sdr1(CPUPPCState *env, target_ulong value);
277
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
278
index XXXXXXX..XXXXXXX 100644
279
--- a/target/riscv/cpu.h
280
+++ b/target/riscv/cpu.h
281
@@ -XXX,XX +XXX,XX @@ RISCVException smstateen_acc_ok(CPURISCVState *env, int index, uint64_t bit);
282
void riscv_cpu_set_mode(CPURISCVState *env, target_ulong newpriv, bool virt_en);
283
284
void riscv_translate_init(void);
285
+void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
286
+ int *max_insns, vaddr pc, void *host_pc);
287
+
288
G_NORETURN void riscv_raise_exception(CPURISCVState *env,
289
uint32_t exception, uintptr_t pc);
290
291
diff --git a/target/rx/cpu.h b/target/rx/cpu.h
292
index XXXXXXX..XXXXXXX 100644
293
--- a/target/rx/cpu.h
294
+++ b/target/rx/cpu.h
295
@@ -XXX,XX +XXX,XX @@ int rx_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
296
int rx_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
297
298
void rx_translate_init(void);
299
+void rx_translate_code(CPUState *cs, TranslationBlock *tb,
300
+ int *max_insns, vaddr pc, void *host_pc);
301
void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte);
302
303
#include "exec/cpu-all.h"
304
diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h
305
index XXXXXXX..XXXXXXX 100644
306
--- a/target/s390x/s390x-internal.h
307
+++ b/target/s390x/s390x-internal.h
308
@@ -XXX,XX +XXX,XX @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3,
309
310
/* translate.c */
311
void s390x_translate_init(void);
312
+void s390x_translate_code(CPUState *cs, TranslationBlock *tb,
313
+ int *max_insns, vaddr pc, void *host_pc);
314
void s390x_restore_state_to_opc(CPUState *cs,
315
const TranslationBlock *tb,
316
const uint64_t *data);
317
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
318
index XXXXXXX..XXXXXXX 100644
319
--- a/target/sh4/cpu.h
320
+++ b/target/sh4/cpu.h
321
@@ -XXX,XX +XXX,XX @@ G_NORETURN void superh_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
322
uintptr_t retaddr);
323
324
void sh4_translate_init(void);
325
+void sh4_translate_code(CPUState *cs, TranslationBlock *tb,
326
+ int *max_insns, vaddr pc, void *host_pc);
327
328
#if !defined(CONFIG_USER_ONLY)
329
hwaddr superh_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
330
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
331
index XXXXXXX..XXXXXXX 100644
332
--- a/target/sparc/cpu.h
333
+++ b/target/sparc/cpu.h
334
@@ -XXX,XX +XXX,XX @@ int sparc_cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
335
336
/* translate.c */
337
void sparc_tcg_init(void);
338
+void sparc_translate_code(CPUState *cs, TranslationBlock *tb,
339
+ int *max_insns, vaddr pc, void *host_pc);
340
341
/* fop_helper.c */
342
target_ulong cpu_get_fsr(CPUSPARCState *);
343
diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h
344
index XXXXXXX..XXXXXXX 100644
345
--- a/target/tricore/cpu.h
346
+++ b/target/tricore/cpu.h
347
@@ -XXX,XX +XXX,XX @@ FIELD(TB_FLAGS, PRIV, 0, 2)
348
349
void cpu_state_reset(CPUTriCoreState *s);
350
void tricore_tcg_init(void);
351
+void tricore_translate_code(CPUState *cs, TranslationBlock *tb,
352
+ int *max_insns, vaddr pc, void *host_pc);
353
354
static inline void cpu_get_tb_cpu_state(CPUTriCoreState *env, vaddr *pc,
355
uint64_t *cs_base, uint32_t *flags)
356
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
357
index XXXXXXX..XXXXXXX 100644
358
--- a/target/xtensa/cpu.h
359
+++ b/target/xtensa/cpu.h
360
@@ -XXX,XX +XXX,XX @@ G_NORETURN void xtensa_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
361
362
void xtensa_collect_sr_names(const XtensaConfig *config);
363
void xtensa_translate_init(void);
364
+void xtensa_translate_code(CPUState *cs, TranslationBlock *tb,
365
+ int *max_insns, vaddr pc, void *host_pc);
366
void **xtensa_get_regfile_by_name(const char *name, int entries, int bits);
367
void xtensa_breakpoint_handler(CPUState *cs);
368
void xtensa_register_core(XtensaConfigList *node);
369
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
370
index XXXXXXX..XXXXXXX 100644
371
--- a/accel/tcg/cpu-exec.c
372
+++ b/accel/tcg/cpu-exec.c
373
@@ -XXX,XX +XXX,XX @@ bool tcg_exec_realizefn(CPUState *cpu, Error **errp)
374
375
if (!tcg_target_initialized) {
376
/* Check mandatory TCGCPUOps handlers */
377
+ const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
378
#ifndef CONFIG_USER_ONLY
379
- assert(cpu->cc->tcg_ops->cpu_exec_halt);
380
- assert(cpu->cc->tcg_ops->cpu_exec_interrupt);
381
+ assert(tcg_ops->cpu_exec_halt);
382
+ assert(tcg_ops->cpu_exec_interrupt);
383
#endif /* !CONFIG_USER_ONLY */
384
- cpu->cc->tcg_ops->initialize();
385
+ assert(tcg_ops->translate_code);
386
+ tcg_ops->initialize();
387
tcg_target_initialized = true;
72
}
388
}
73
}
389
74
390
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
75
@@ -XXX,XX +XXX,XX @@ static void handle_rev(DisasContext *s, int opcode, bool u,
391
index XXXXXXX..XXXXXXX 100644
76
read_vec_element(s, tcg_tmp, rn, i, grp_size);
392
--- a/accel/tcg/translate-all.c
77
switch (grp_size) {
393
+++ b/accel/tcg/translate-all.c
78
case MO_16:
394
@@ -XXX,XX +XXX,XX @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
79
- tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
395
80
+ tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp,
396
tcg_func_start(tcg_ctx);
81
+ TCG_BSWAP_IZ | TCG_BSWAP_OZ);
397
82
break;
398
- tcg_ctx->cpu = env_cpu(env);
83
case MO_32:
399
- gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
84
- tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
400
+ CPUState *cs = env_cpu(env);
85
+ tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp,
401
+ tcg_ctx->cpu = cs;
86
+ TCG_BSWAP_IZ | TCG_BSWAP_OZ);
402
+ cs->cc->tcg_ops->translate_code(cs, tb, max_insns, pc, host_pc);
87
break;
403
+
88
case MO_64:
404
assert(tb->size != 0);
89
tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
405
tcg_ctx->cpu = NULL;
90
diff --git a/target/arm/translate.c b/target/arm/translate.c
406
*max_insns = tb->icount;
91
index XXXXXXX..XXXXXXX 100644
407
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
92
--- a/target/arm/translate.c
408
/*
93
+++ b/target/arm/translate.c
409
* Overflow of code_gen_buffer, or the current slice of it.
94
@@ -XXX,XX +XXX,XX @@ void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
410
*
95
static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
411
- * TODO: We don't need to re-do gen_intermediate_code, nor
96
{
412
+ * TODO: We don't need to re-do tcg_ops->translate_code, nor
97
tcg_gen_ext16u_i32(var, var);
413
* should we re-do the tcg optimization currently hidden
98
- tcg_gen_bswap16_i32(var, var);
414
* inside tcg_gen_code. All that should be required is to
99
+ tcg_gen_bswap16_i32(var, var, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
415
* flush the TBs, allocate a new TB, re-initialize it per
100
tcg_gen_ext16s_i32(dest, var);
416
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
101
}
417
index XXXXXXX..XXXXXXX 100644
102
418
--- a/target/alpha/cpu.c
419
+++ b/target/alpha/cpu.c
420
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps alpha_sysemu_ops = {
421
422
static const TCGCPUOps alpha_tcg_ops = {
423
.initialize = alpha_translate_init,
424
+ .translate_code = alpha_translate_code,
425
.synchronize_from_tb = alpha_cpu_synchronize_from_tb,
426
.restore_state_to_opc = alpha_restore_state_to_opc,
427
428
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
429
index XXXXXXX..XXXXXXX 100644
430
--- a/target/alpha/translate.c
431
+++ b/target/alpha/translate.c
432
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps alpha_tr_ops = {
433
.tb_stop = alpha_tr_tb_stop,
434
};
435
436
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
437
- vaddr pc, void *host_pc)
438
+void alpha_translate_code(CPUState *cpu, TranslationBlock *tb,
439
+ int *max_insns, vaddr pc, void *host_pc)
440
{
441
DisasContext dc;
442
translator_loop(cpu, tb, max_insns, pc, host_pc, &alpha_tr_ops, &dc.base);
443
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
444
index XXXXXXX..XXXXXXX 100644
445
--- a/target/arm/cpu.c
446
+++ b/target/arm/cpu.c
447
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps arm_sysemu_ops = {
448
#ifdef CONFIG_TCG
449
static const TCGCPUOps arm_tcg_ops = {
450
.initialize = arm_translate_init,
451
+ .translate_code = arm_translate_code,
452
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
453
.debug_excp_handler = arm_debug_excp_handler,
454
.restore_state_to_opc = arm_restore_state_to_opc,
455
diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c
456
index XXXXXXX..XXXXXXX 100644
457
--- a/target/arm/tcg/cpu-v7m.c
458
+++ b/target/arm/tcg/cpu-v7m.c
459
@@ -XXX,XX +XXX,XX @@ static void cortex_m55_initfn(Object *obj)
460
461
static const TCGCPUOps arm_v7m_tcg_ops = {
462
.initialize = arm_translate_init,
463
+ .translate_code = arm_translate_code,
464
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
465
.debug_excp_handler = arm_debug_excp_handler,
466
.restore_state_to_opc = arm_restore_state_to_opc,
467
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
468
index XXXXXXX..XXXXXXX 100644
469
--- a/target/arm/tcg/translate.c
470
+++ b/target/arm/tcg/translate.c
471
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps thumb_translator_ops = {
472
.tb_stop = arm_tr_tb_stop,
473
};
474
475
-/* generate intermediate code for basic block 'tb'. */
476
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
477
- vaddr pc, void *host_pc)
478
+void arm_translate_code(CPUState *cpu, TranslationBlock *tb,
479
+ int *max_insns, vaddr pc, void *host_pc)
480
{
481
DisasContext dc = { };
482
const TranslatorOps *ops = &arm_translator_ops;
483
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
484
index XXXXXXX..XXXXXXX 100644
485
--- a/target/avr/cpu.c
486
+++ b/target/avr/cpu.c
487
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps avr_sysemu_ops = {
488
489
static const TCGCPUOps avr_tcg_ops = {
490
.initialize = avr_cpu_tcg_init,
491
+ .translate_code = avr_cpu_translate_code,
492
.synchronize_from_tb = avr_cpu_synchronize_from_tb,
493
.restore_state_to_opc = avr_restore_state_to_opc,
494
.cpu_exec_interrupt = avr_cpu_exec_interrupt,
495
diff --git a/target/avr/translate.c b/target/avr/translate.c
496
index XXXXXXX..XXXXXXX 100644
497
--- a/target/avr/translate.c
498
+++ b/target/avr/translate.c
499
@@ -XXX,XX +XXX,XX @@ static bool trans_WDR(DisasContext *ctx, arg_WDR *a)
500
*
501
* - translate()
502
* - canonicalize_skip()
503
- * - gen_intermediate_code()
504
+ * - translate_code()
505
* - restore_state_to_opc()
506
*
507
*/
508
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps avr_tr_ops = {
509
.tb_stop = avr_tr_tb_stop,
510
};
511
512
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
513
- vaddr pc, void *host_pc)
514
+void avr_cpu_translate_code(CPUState *cs, TranslationBlock *tb,
515
+ int *max_insns, vaddr pc, void *host_pc)
516
{
517
DisasContext dc = { };
518
translator_loop(cs, tb, max_insns, pc, host_pc, &avr_tr_ops, &dc.base);
519
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
520
index XXXXXXX..XXXXXXX 100644
521
--- a/target/hexagon/cpu.c
522
+++ b/target/hexagon/cpu.c
523
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_init(Object *obj)
524
525
static const TCGCPUOps hexagon_tcg_ops = {
526
.initialize = hexagon_translate_init,
527
+ .translate_code = hexagon_translate_code,
528
.synchronize_from_tb = hexagon_cpu_synchronize_from_tb,
529
.restore_state_to_opc = hexagon_restore_state_to_opc,
530
};
531
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
532
index XXXXXXX..XXXXXXX 100644
533
--- a/target/hexagon/translate.c
534
+++ b/target/hexagon/translate.c
535
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hexagon_tr_ops = {
536
.tb_stop = hexagon_tr_tb_stop,
537
};
538
539
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
540
- vaddr pc, void *host_pc)
541
+void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
542
+ int *max_insns, vaddr pc, void *host_pc)
543
{
544
DisasContext ctx;
545
546
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
547
index XXXXXXX..XXXXXXX 100644
548
--- a/target/hppa/cpu.c
549
+++ b/target/hppa/cpu.c
550
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps hppa_sysemu_ops = {
551
552
static const TCGCPUOps hppa_tcg_ops = {
553
.initialize = hppa_translate_init,
554
+ .translate_code = hppa_translate_code,
555
.synchronize_from_tb = hppa_cpu_synchronize_from_tb,
556
.restore_state_to_opc = hppa_restore_state_to_opc,
557
558
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
559
index XXXXXXX..XXXXXXX 100644
560
--- a/target/hppa/translate.c
561
+++ b/target/hppa/translate.c
562
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hppa_tr_ops = {
563
#endif
564
};
565
566
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
567
- vaddr pc, void *host_pc)
568
+void hppa_translate_code(CPUState *cs, TranslationBlock *tb,
569
+ int *max_insns, vaddr pc, void *host_pc)
570
{
571
DisasContext ctx = { };
572
translator_loop(cs, tb, max_insns, pc, host_pc, &hppa_tr_ops, &ctx.base);
573
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
574
index XXXXXXX..XXXXXXX 100644
575
--- a/target/i386/tcg/tcg-cpu.c
576
+++ b/target/i386/tcg/tcg-cpu.c
577
@@ -XXX,XX +XXX,XX @@ static bool x86_debug_check_breakpoint(CPUState *cs)
578
579
static const TCGCPUOps x86_tcg_ops = {
580
.initialize = tcg_x86_init,
581
+ .translate_code = x86_translate_code,
582
.synchronize_from_tb = x86_cpu_synchronize_from_tb,
583
.restore_state_to_opc = x86_restore_state_to_opc,
584
.cpu_exec_enter = x86_cpu_exec_enter,
103
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
585
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
104
index XXXXXXX..XXXXXXX 100644
586
index XXXXXXX..XXXXXXX 100644
105
--- a/target/i386/tcg/translate.c
587
--- a/target/i386/tcg/translate.c
106
+++ b/target/i386/tcg/translate.c
588
+++ b/target/i386/tcg/translate.c
107
@@ -XXX,XX +XXX,XX @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
589
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps i386_tr_ops = {
108
{
590
.tb_stop = i386_tr_tb_stop,
109
gen_op_mov_v_reg(s, MO_32, s->T0, reg);
591
};
110
tcg_gen_ext32u_tl(s->T0, s->T0);
592
111
- tcg_gen_bswap32_tl(s->T0, s->T0);
593
-/* generate intermediate code for basic block 'tb'. */
112
+ tcg_gen_bswap32_tl(s->T0, s->T0, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
594
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
113
gen_op_mov_reg_v(s, MO_32, reg, s->T0);
595
- vaddr pc, void *host_pc)
114
}
596
+void x86_translate_code(CPUState *cpu, TranslationBlock *tb,
115
break;
597
+ int *max_insns, vaddr pc, void *host_pc)
116
diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c
598
{
117
index XXXXXXX..XXXXXXX 100644
599
DisasContext dc;
118
--- a/target/mips/tcg/mxu_translate.c
600
119
+++ b/target/mips/tcg/mxu_translate.c
601
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
120
@@ -XXX,XX +XXX,XX @@ static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx)
602
index XXXXXXX..XXXXXXX 100644
121
603
--- a/target/loongarch/cpu.c
122
if (sel == 1) {
604
+++ b/target/loongarch/cpu.c
123
/* S32LDDR */
605
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags)
124
- tcg_gen_bswap32_tl(t1, t1);
606
125
+ tcg_gen_bswap32_tl(t1, t1, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
607
static const TCGCPUOps loongarch_tcg_ops = {
126
}
608
.initialize = loongarch_translate_init,
127
gen_store_mxu_gpr(t1, XRa);
609
+ .translate_code = loongarch_translate_code,
128
610
.synchronize_from_tb = loongarch_cpu_synchronize_from_tb,
129
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
611
.restore_state_to_opc = loongarch_restore_state_to_opc,
130
index XXXXXXX..XXXXXXX 100644
612
131
--- a/target/s390x/translate.c
613
diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c
132
+++ b/target/s390x/translate.c
614
index XXXXXXX..XXXXXXX 100644
133
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_rosbg(DisasContext *s, DisasOps *o)
615
--- a/target/loongarch/tcg/translate.c
134
616
+++ b/target/loongarch/tcg/translate.c
135
static DisasJumpType op_rev16(DisasContext *s, DisasOps *o)
617
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps loongarch_tr_ops = {
136
{
618
.tb_stop = loongarch_tr_tb_stop,
137
- tcg_gen_bswap16_i64(o->out, o->in2);
619
};
138
+ tcg_gen_bswap16_i64(o->out, o->in2, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
620
139
return DISAS_NEXT;
621
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
140
}
622
- vaddr pc, void *host_pc)
141
623
+void loongarch_translate_code(CPUState *cs, TranslationBlock *tb,
142
static DisasJumpType op_rev32(DisasContext *s, DisasOps *o)
624
+ int *max_insns, vaddr pc, void *host_pc)
143
{
625
{
144
- tcg_gen_bswap32_i64(o->out, o->in2);
626
DisasContext ctx;
145
+ tcg_gen_bswap32_i64(o->out, o->in2, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
627
146
return DISAS_NEXT;
628
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
147
}
629
index XXXXXXX..XXXXXXX 100644
630
--- a/target/m68k/cpu.c
631
+++ b/target/m68k/cpu.c
632
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps m68k_sysemu_ops = {
633
634
static const TCGCPUOps m68k_tcg_ops = {
635
.initialize = m68k_tcg_init,
636
+ .translate_code = m68k_translate_code,
637
.restore_state_to_opc = m68k_restore_state_to_opc,
638
639
#ifndef CONFIG_USER_ONLY
640
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
641
index XXXXXXX..XXXXXXX 100644
642
--- a/target/m68k/translate.c
643
+++ b/target/m68k/translate.c
644
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps m68k_tr_ops = {
645
.tb_stop = m68k_tr_tb_stop,
646
};
647
648
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
649
- vaddr pc, void *host_pc)
650
+void m68k_translate_code(CPUState *cpu, TranslationBlock *tb,
651
+ int *max_insns, vaddr pc, void *host_pc)
652
{
653
DisasContext dc;
654
translator_loop(cpu, tb, max_insns, pc, host_pc, &m68k_tr_ops, &dc.base);
655
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
656
index XXXXXXX..XXXXXXX 100644
657
--- a/target/microblaze/cpu.c
658
+++ b/target/microblaze/cpu.c
659
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps mb_sysemu_ops = {
660
661
static const TCGCPUOps mb_tcg_ops = {
662
.initialize = mb_tcg_init,
663
+ .translate_code = mb_translate_code,
664
.synchronize_from_tb = mb_cpu_synchronize_from_tb,
665
.restore_state_to_opc = mb_restore_state_to_opc,
666
667
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
668
index XXXXXXX..XXXXXXX 100644
669
--- a/target/microblaze/translate.c
670
+++ b/target/microblaze/translate.c
671
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mb_tr_ops = {
672
.tb_stop = mb_tr_tb_stop,
673
};
674
675
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
676
- vaddr pc, void *host_pc)
677
+void mb_translate_code(CPUState *cpu, TranslationBlock *tb,
678
+ int *max_insns, vaddr pc, void *host_pc)
679
{
680
DisasContext dc;
681
translator_loop(cpu, tb, max_insns, pc, host_pc, &mb_tr_ops, &dc.base);
682
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
683
index XXXXXXX..XXXXXXX 100644
684
--- a/target/mips/cpu.c
685
+++ b/target/mips/cpu.c
686
@@ -XXX,XX +XXX,XX @@ static const Property mips_cpu_properties[] = {
687
#include "hw/core/tcg-cpu-ops.h"
688
static const TCGCPUOps mips_tcg_ops = {
689
.initialize = mips_tcg_init,
690
+ .translate_code = mips_translate_code,
691
.synchronize_from_tb = mips_cpu_synchronize_from_tb,
692
.restore_state_to_opc = mips_restore_state_to_opc,
693
694
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
695
index XXXXXXX..XXXXXXX 100644
696
--- a/target/mips/tcg/translate.c
697
+++ b/target/mips/tcg/translate.c
698
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mips_tr_ops = {
699
.tb_stop = mips_tr_tb_stop,
700
};
701
702
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
703
- vaddr pc, void *host_pc)
704
+void mips_translate_code(CPUState *cs, TranslationBlock *tb,
705
+ int *max_insns, vaddr pc, void *host_pc)
706
{
707
DisasContext ctx;
708
709
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
710
index XXXXXXX..XXXXXXX 100644
711
--- a/target/openrisc/cpu.c
712
+++ b/target/openrisc/cpu.c
713
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps openrisc_sysemu_ops = {
714
715
static const TCGCPUOps openrisc_tcg_ops = {
716
.initialize = openrisc_translate_init,
717
+ .translate_code = openrisc_translate_code,
718
.synchronize_from_tb = openrisc_cpu_synchronize_from_tb,
719
.restore_state_to_opc = openrisc_restore_state_to_opc,
720
721
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
722
index XXXXXXX..XXXXXXX 100644
723
--- a/target/openrisc/translate.c
724
+++ b/target/openrisc/translate.c
725
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps openrisc_tr_ops = {
726
.tb_stop = openrisc_tr_tb_stop,
727
};
728
729
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
730
- vaddr pc, void *host_pc)
731
+void openrisc_translate_code(CPUState *cs, TranslationBlock *tb,
732
+ int *max_insns, vaddr pc, void *host_pc)
733
{
734
DisasContext ctx;
735
736
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
737
index XXXXXXX..XXXXXXX 100644
738
--- a/target/ppc/cpu_init.c
739
+++ b/target/ppc/cpu_init.c
740
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps ppc_sysemu_ops = {
741
742
static const TCGCPUOps ppc_tcg_ops = {
743
.initialize = ppc_translate_init,
744
+ .translate_code = ppc_translate_code,
745
.restore_state_to_opc = ppc_restore_state_to_opc,
746
747
#ifdef CONFIG_USER_ONLY
748
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
749
index XXXXXXX..XXXXXXX 100644
750
--- a/target/ppc/translate.c
751
+++ b/target/ppc/translate.c
752
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps ppc_tr_ops = {
753
.tb_stop = ppc_tr_tb_stop,
754
};
755
756
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
757
- vaddr pc, void *host_pc)
758
+void ppc_translate_code(CPUState *cs, TranslationBlock *tb,
759
+ int *max_insns, vaddr pc, void *host_pc)
760
{
761
DisasContext ctx;
762
763
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
764
index XXXXXXX..XXXXXXX 100644
765
--- a/target/riscv/tcg/tcg-cpu.c
766
+++ b/target/riscv/tcg/tcg-cpu.c
767
@@ -XXX,XX +XXX,XX @@ static void riscv_restore_state_to_opc(CPUState *cs,
768
769
static const TCGCPUOps riscv_tcg_ops = {
770
.initialize = riscv_translate_init,
771
+ .translate_code = riscv_translate_code,
772
.synchronize_from_tb = riscv_cpu_synchronize_from_tb,
773
.restore_state_to_opc = riscv_restore_state_to_opc,
774
775
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
776
index XXXXXXX..XXXXXXX 100644
777
--- a/target/riscv/translate.c
778
+++ b/target/riscv/translate.c
779
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps riscv_tr_ops = {
780
.tb_stop = riscv_tr_tb_stop,
781
};
782
783
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
784
- vaddr pc, void *host_pc)
785
+void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
786
+ int *max_insns, vaddr pc, void *host_pc)
787
{
788
DisasContext ctx;
789
790
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
791
index XXXXXXX..XXXXXXX 100644
792
--- a/target/rx/cpu.c
793
+++ b/target/rx/cpu.c
794
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps rx_sysemu_ops = {
795
796
static const TCGCPUOps rx_tcg_ops = {
797
.initialize = rx_translate_init,
798
+ .translate_code = rx_translate_code,
799
.synchronize_from_tb = rx_cpu_synchronize_from_tb,
800
.restore_state_to_opc = rx_restore_state_to_opc,
801
.tlb_fill = rx_cpu_tlb_fill,
802
diff --git a/target/rx/translate.c b/target/rx/translate.c
803
index XXXXXXX..XXXXXXX 100644
804
--- a/target/rx/translate.c
805
+++ b/target/rx/translate.c
806
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps rx_tr_ops = {
807
.tb_stop = rx_tr_tb_stop,
808
};
809
810
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
811
- vaddr pc, void *host_pc)
812
+void rx_translate_code(CPUState *cs, TranslationBlock *tb,
813
+ int *max_insns, vaddr pc, void *host_pc)
814
{
815
DisasContext dc;
816
817
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
818
index XXXXXXX..XXXXXXX 100644
819
--- a/target/s390x/cpu.c
820
+++ b/target/s390x/cpu.c
821
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc,
822
823
static const TCGCPUOps s390_tcg_ops = {
824
.initialize = s390x_translate_init,
825
+ .translate_code = s390x_translate_code,
826
.restore_state_to_opc = s390x_restore_state_to_opc,
827
828
#ifdef CONFIG_USER_ONLY
829
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
830
index XXXXXXX..XXXXXXX 100644
831
--- a/target/s390x/tcg/translate.c
832
+++ b/target/s390x/tcg/translate.c
833
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps s390x_tr_ops = {
834
.disas_log = s390x_tr_disas_log,
835
};
836
837
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
838
- vaddr pc, void *host_pc)
839
+void s390x_translate_code(CPUState *cs, TranslationBlock *tb,
840
+ int *max_insns, vaddr pc, void *host_pc)
841
{
842
DisasContext dc;
843
844
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
845
index XXXXXXX..XXXXXXX 100644
846
--- a/target/sh4/cpu.c
847
+++ b/target/sh4/cpu.c
848
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps sh4_sysemu_ops = {
849
850
static const TCGCPUOps superh_tcg_ops = {
851
.initialize = sh4_translate_init,
852
+ .translate_code = sh4_translate_code,
853
.synchronize_from_tb = superh_cpu_synchronize_from_tb,
854
.restore_state_to_opc = superh_restore_state_to_opc,
148
855
149
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
856
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
150
index XXXXXXX..XXXXXXX 100644
857
index XXXXXXX..XXXXXXX 100644
151
--- a/target/sh4/translate.c
858
--- a/target/sh4/translate.c
152
+++ b/target/sh4/translate.c
859
+++ b/target/sh4/translate.c
153
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
860
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sh4_tr_ops = {
154
    {
861
.tb_stop = sh4_tr_tb_stop,
155
TCGv low = tcg_temp_new();
862
};
156
     tcg_gen_ext16u_i32(low, REG(B7_4));
863
157
-     tcg_gen_bswap16_i32(low, low);
864
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
158
+     tcg_gen_bswap16_i32(low, low, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
865
- vaddr pc, void *host_pc)
159
tcg_gen_deposit_i32(REG(B11_8), REG(B7_4), low, 0, 16);
866
+void sh4_translate_code(CPUState *cs, TranslationBlock *tb,
160
     tcg_temp_free(low);
867
+ int *max_insns, vaddr pc, void *host_pc)
161
    }
868
{
162
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
869
DisasContext ctx;
163
index XXXXXXX..XXXXXXX 100644
870
164
--- a/tcg/tcg-op.c
871
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
165
+++ b/tcg/tcg-op.c
872
index XXXXXXX..XXXXXXX 100644
166
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
873
--- a/target/sparc/cpu.c
167
}
874
+++ b/target/sparc/cpu.c
168
}
875
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps sparc_sysemu_ops = {
169
876
170
-/* Note: we assume the two high bytes are set to zero */
877
static const TCGCPUOps sparc_tcg_ops = {
171
-void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)
878
.initialize = sparc_tcg_init,
172
+void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags)
879
+ .translate_code = sparc_translate_code,
173
{
880
.synchronize_from_tb = sparc_cpu_synchronize_from_tb,
174
+ /* Only one extension flag may be present. */
881
.restore_state_to_opc = sparc_restore_state_to_opc,
175
+ tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
882
176
+
883
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
177
if (TCG_TARGET_HAS_bswap16_i32) {
884
index XXXXXXX..XXXXXXX 100644
178
- tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg,
885
--- a/target/sparc/translate.c
179
- TCG_BSWAP_IZ | TCG_BSWAP_OZ);
886
+++ b/target/sparc/translate.c
180
+ tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags);
887
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sparc_tr_ops = {
181
} else {
888
.tb_stop = sparc_tr_tb_stop,
182
TCGv_i32 t0 = tcg_temp_new_i32();
889
};
183
+ TCGv_i32 t1 = tcg_temp_new_i32();
890
184
891
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
185
- tcg_gen_ext8u_i32(t0, arg);
892
- vaddr pc, void *host_pc)
186
- tcg_gen_shli_i32(t0, t0, 8);
893
+void sparc_translate_code(CPUState *cs, TranslationBlock *tb,
187
- tcg_gen_shri_i32(ret, arg, 8);
894
+ int *max_insns, vaddr pc, void *host_pc)
188
- tcg_gen_or_i32(ret, ret, t0);
895
{
189
+ tcg_gen_shri_i32(t0, arg, 8);
896
DisasContext dc = {};
190
+ if (!(flags & TCG_BSWAP_IZ)) {
897
191
+ tcg_gen_ext8u_i32(t0, t0);
898
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
192
+ }
899
index XXXXXXX..XXXXXXX 100644
193
+
900
--- a/target/tricore/cpu.c
194
+ if (flags & TCG_BSWAP_OS) {
901
+++ b/target/tricore/cpu.c
195
+ tcg_gen_shli_i32(t1, arg, 24);
902
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps tricore_sysemu_ops = {
196
+ tcg_gen_sari_i32(t1, t1, 16);
903
197
+ } else if (flags & TCG_BSWAP_OZ) {
904
static const TCGCPUOps tricore_tcg_ops = {
198
+ tcg_gen_ext8u_i32(t1, arg);
905
.initialize = tricore_tcg_init,
199
+ tcg_gen_shli_i32(t1, t1, 8);
906
+ .translate_code = tricore_translate_code,
200
+ } else {
907
.synchronize_from_tb = tricore_cpu_synchronize_from_tb,
201
+ tcg_gen_shli_i32(t1, arg, 8);
908
.restore_state_to_opc = tricore_restore_state_to_opc,
202
+ }
909
.tlb_fill = tricore_cpu_tlb_fill,
203
+
910
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
204
+ tcg_gen_or_i32(ret, t0, t1);
911
index XXXXXXX..XXXXXXX 100644
205
tcg_temp_free_i32(t0);
912
--- a/target/tricore/translate.c
206
+ tcg_temp_free_i32(t1);
913
+++ b/target/tricore/translate.c
207
}
914
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps tricore_tr_ops = {
208
}
915
.tb_stop = tricore_tr_tb_stop,
209
916
};
210
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
917
211
}
918
-
212
}
919
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
213
920
- vaddr pc, void *host_pc)
214
-/* Note: we assume the six high bytes are set to zero */
921
+void tricore_translate_code(CPUState *cs, TranslationBlock *tb,
215
-void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg)
922
+ int *max_insns, vaddr pc, void *host_pc)
216
+void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
923
{
217
{
924
DisasContext ctx;
218
+ /* Only one extension flag may be present. */
925
translator_loop(cs, tb, max_insns, pc, host_pc,
219
+ tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
926
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
220
+
927
index XXXXXXX..XXXXXXX 100644
221
if (TCG_TARGET_REG_BITS == 32) {
928
--- a/target/xtensa/cpu.c
222
- tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg));
929
+++ b/target/xtensa/cpu.c
223
- tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
930
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps xtensa_sysemu_ops = {
224
+ tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg), flags);
931
225
+ if (flags & TCG_BSWAP_OS) {
932
static const TCGCPUOps xtensa_tcg_ops = {
226
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
933
.initialize = xtensa_translate_init,
227
+ } else {
934
+ .translate_code = xtensa_translate_code,
228
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
935
.debug_excp_handler = xtensa_breakpoint_handler,
229
+ }
936
.restore_state_to_opc = xtensa_restore_state_to_opc,
230
} else if (TCG_TARGET_HAS_bswap16_i64) {
937
231
- tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg,
938
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
232
- TCG_BSWAP_IZ | TCG_BSWAP_OZ);
939
index XXXXXXX..XXXXXXX 100644
233
+ tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, flags);
940
--- a/target/xtensa/translate.c
234
} else {
941
+++ b/target/xtensa/translate.c
235
TCGv_i64 t0 = tcg_temp_new_i64();
942
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps xtensa_translator_ops = {
236
+ TCGv_i64 t1 = tcg_temp_new_i64();
943
.tb_stop = xtensa_tr_tb_stop,
237
944
};
238
- tcg_gen_ext8u_i64(t0, arg);
945
239
- tcg_gen_shli_i64(t0, t0, 8);
946
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
240
- tcg_gen_shri_i64(ret, arg, 8);
947
- vaddr pc, void *host_pc)
241
- tcg_gen_or_i64(ret, ret, t0);
948
+void xtensa_translate_code(CPUState *cpu, TranslationBlock *tb,
242
+ tcg_gen_shri_i64(t0, arg, 8);
949
+ int *max_insns, vaddr pc, void *host_pc)
243
+ if (!(flags & TCG_BSWAP_IZ)) {
950
{
244
+ tcg_gen_ext8u_i64(t0, t0);
951
DisasContext dc = {};
245
+ }
952
translator_loop(cpu, tb, max_insns, pc, host_pc,
246
+
247
+ if (flags & TCG_BSWAP_OS) {
248
+ tcg_gen_shli_i64(t1, arg, 56);
249
+ tcg_gen_sari_i64(t1, t1, 48);
250
+ } else if (flags & TCG_BSWAP_OZ) {
251
+ tcg_gen_ext8u_i64(t1, arg);
252
+ tcg_gen_shli_i64(t1, t1, 8);
253
+ } else {
254
+ tcg_gen_shli_i64(t1, arg, 8);
255
+ }
256
+
257
+ tcg_gen_or_i64(ret, t0, t1);
258
tcg_temp_free_i64(t0);
259
+ tcg_temp_free_i64(t1);
260
}
261
}
262
263
-/* Note: we assume the four high bytes are set to zero */
264
-void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
265
+void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
266
{
267
+ /* Only one extension flag may be present. */
268
+ tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
269
+
270
if (TCG_TARGET_REG_BITS == 32) {
271
tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));
272
- tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
273
+ if (flags & TCG_BSWAP_OS) {
274
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
275
+ } else {
276
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
277
+ }
278
} else if (TCG_TARGET_HAS_bswap32_i64) {
279
- tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg,
280
- TCG_BSWAP_IZ | TCG_BSWAP_OZ);
281
+ tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, flags);
282
} else {
283
TCGv_i64 t0 = tcg_temp_new_i64();
284
TCGv_i64 t1 = tcg_temp_new_i64();
285
TCGv_i64 t2 = tcg_constant_i64(0x00ff00ff);
286
287
- /* arg = ....abcd */
288
- tcg_gen_shri_i64(t0, arg, 8); /* t0 = .....abc */
289
- tcg_gen_and_i64(t1, arg, t2); /* t1 = .....b.d */
290
- tcg_gen_and_i64(t0, t0, t2); /* t0 = .....a.c */
291
- tcg_gen_shli_i64(t1, t1, 8); /* t1 = ....b.d. */
292
- tcg_gen_or_i64(ret, t0, t1); /* ret = ....badc */
293
+ /* arg = xxxxabcd */
294
+ tcg_gen_shri_i64(t0, arg, 8); /* t0 = .xxxxabc */
295
+ tcg_gen_and_i64(t1, arg, t2); /* t1 = .....b.d */
296
+ tcg_gen_and_i64(t0, t0, t2); /* t0 = .....a.c */
297
+ tcg_gen_shli_i64(t1, t1, 8); /* t1 = ....b.d. */
298
+ tcg_gen_or_i64(ret, t0, t1); /* ret = ....badc */
299
300
- tcg_gen_shli_i64(t1, ret, 48); /* t1 = dc...... */
301
- tcg_gen_shri_i64(t0, ret, 16); /* t0 = ......ba */
302
- tcg_gen_shri_i64(t1, t1, 32); /* t1 = ....dc.. */
303
- tcg_gen_or_i64(ret, t0, t1); /* ret = ....dcba */
304
+ tcg_gen_shli_i64(t1, ret, 48); /* t1 = dc...... */
305
+ tcg_gen_shri_i64(t0, ret, 16); /* t0 = ......ba */
306
+ if (flags & TCG_BSWAP_OS) {
307
+ tcg_gen_sari_i64(t1, t1, 32); /* t1 = ssssdc.. */
308
+ } else {
309
+ tcg_gen_shri_i64(t1, t1, 32); /* t1 = ....dc.. */
310
+ }
311
+ tcg_gen_or_i64(ret, t0, t1); /* ret = ssssdcba */
312
313
tcg_temp_free_i64(t0);
314
tcg_temp_free_i64(t1);
315
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
316
if ((orig_memop ^ memop) & MO_BSWAP) {
317
switch (orig_memop & MO_SIZE) {
318
case MO_16:
319
- tcg_gen_bswap16_i32(val, val);
320
+ tcg_gen_bswap16_i32(val, val, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
321
if (orig_memop & MO_SIGN) {
322
tcg_gen_ext16s_i32(val, val);
323
}
324
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
325
switch (memop & MO_SIZE) {
326
case MO_16:
327
tcg_gen_ext16u_i32(swap, val);
328
- tcg_gen_bswap16_i32(swap, swap);
329
+ tcg_gen_bswap16_i32(swap, swap, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
330
break;
331
case MO_32:
332
tcg_gen_bswap32_i32(swap, val);
333
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
334
if ((orig_memop ^ memop) & MO_BSWAP) {
335
switch (orig_memop & MO_SIZE) {
336
case MO_16:
337
- tcg_gen_bswap16_i64(val, val);
338
+ tcg_gen_bswap16_i64(val, val, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
339
if (orig_memop & MO_SIGN) {
340
tcg_gen_ext16s_i64(val, val);
341
}
342
break;
343
case MO_32:
344
- tcg_gen_bswap32_i64(val, val);
345
+ tcg_gen_bswap32_i64(val, val, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
346
if (orig_memop & MO_SIGN) {
347
tcg_gen_ext32s_i64(val, val);
348
}
349
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
350
switch (memop & MO_SIZE) {
351
case MO_16:
352
tcg_gen_ext16u_i64(swap, val);
353
- tcg_gen_bswap16_i64(swap, swap);
354
+ tcg_gen_bswap16_i64(swap, swap, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
355
break;
356
case MO_32:
357
tcg_gen_ext32u_i64(swap, val);
358
- tcg_gen_bswap32_i64(swap, swap);
359
+ tcg_gen_bswap32_i64(swap, swap, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
360
break;
361
case MO_64:
362
tcg_gen_bswap64_i64(swap, val);
363
--
953
--
364
2.25.1
954
2.43.0
365
955
366
956
diff view generated by jsdifflib