1
The following changes since commit d530697ca20e19f7a626f4c1c8b26fccd0dc4470:
1
The following changes since commit aa3a285b5bc56a4208b3b57d4a55291e9c260107:
2
2
3
Merge tag 'pull-testing-updates-100523-1' of https://gitlab.com/stsquad/qemu into staging (2023-05-10 16:43:01 +0100)
3
Merge tag 'mem-2024-12-21' of https://github.com/davidhildenbrand/qemu into staging (2024-12-22 14:33:27 -0500)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230511
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20241224
8
8
9
for you to fetch changes up to b2d4d6616c22325dff802e0a35092167f2dc2268:
9
for you to fetch changes up to e4a8e093dc74be049f4829831dce76e5edab0003:
10
10
11
target/loongarch: Do not include tcg-ldst.h (2023-05-11 06:06:04 +0100)
11
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core (2024-12-24 08:32:15 -0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
target/m68k: Fix gen_load_fp regression
14
tcg/optimize: Remove in-flight mask data from OptContext
15
accel/tcg: Ensure fairness with icount
15
fpu: Add float*_muladd_scalbn
16
disas: Move disas.c into the target-independent source sets
16
fpu: Remove float_muladd_halve_result
17
tcg: Use common routines for calling slow path helpers
17
fpu: Add float_round_nearest_even_max
18
tcg/*: Cleanups to qemu_ld/st constraints
18
fpu: Add float_muladd_suppress_add_product_zero
19
tcg: Remove TARGET_ALIGNED_ONLY
19
target/hexagon: Use float32_muladd
20
accel/tcg: Reorg system mode load/store helpers
20
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core
21
21
22
----------------------------------------------------------------
22
----------------------------------------------------------------
23
Jamie Iles (2):
23
Ilya Leoshkevich (1):
24
cpu: expose qemu_cpu_list_lock for lock-guard use
24
tests/tcg: Do not use inttypes.h in multiarch/system/memory.c
25
accel/tcg/tcg-accel-ops-rr: ensure fairness with icount
26
25
27
Richard Henderson (49):
26
Pierrick Bouvier (1):
28
target/m68k: Fix gen_load_fp for OS_LONG
27
plugins: optimize cpu_index code generation
29
accel/tcg: Fix atomic_mmu_lookup for reads
30
disas: Fix tabs and braces in disas.c
31
disas: Move disas.c to disas/
32
disas: Remove target_ulong from the interface
33
disas: Remove target-specific headers
34
tcg/i386: Introduce prepare_host_addr
35
tcg/i386: Use indexed addressing for softmmu fast path
36
tcg/aarch64: Introduce prepare_host_addr
37
tcg/arm: Introduce prepare_host_addr
38
tcg/loongarch64: Introduce prepare_host_addr
39
tcg/mips: Introduce prepare_host_addr
40
tcg/ppc: Introduce prepare_host_addr
41
tcg/riscv: Introduce prepare_host_addr
42
tcg/s390x: Introduce prepare_host_addr
43
tcg: Add routines for calling slow-path helpers
44
tcg/i386: Convert tcg_out_qemu_ld_slow_path
45
tcg/i386: Convert tcg_out_qemu_st_slow_path
46
tcg/aarch64: Convert tcg_out_qemu_{ld,st}_slow_path
47
tcg/arm: Convert tcg_out_qemu_{ld,st}_slow_path
48
tcg/loongarch64: Convert tcg_out_qemu_{ld,st}_slow_path
49
tcg/mips: Convert tcg_out_qemu_{ld,st}_slow_path
50
tcg/ppc: Convert tcg_out_qemu_{ld,st}_slow_path
51
tcg/riscv: Convert tcg_out_qemu_{ld,st}_slow_path
52
tcg/s390x: Convert tcg_out_qemu_{ld,st}_slow_path
53
tcg/loongarch64: Simplify constraints on qemu_ld/st
54
tcg/mips: Remove MO_BSWAP handling
55
tcg/mips: Reorg tlb load within prepare_host_addr
56
tcg/mips: Simplify constraints on qemu_ld/st
57
tcg/ppc: Reorg tcg_out_tlb_read
58
tcg/ppc: Adjust constraints on qemu_ld/st
59
tcg/ppc: Remove unused constraints A, B, C, D
60
tcg/ppc: Remove unused constraint J
61
tcg/riscv: Simplify constraints on qemu_ld/st
62
tcg/s390x: Use ALGFR in constructing softmmu host address
63
tcg/s390x: Simplify constraints on qemu_ld/st
64
target/mips: Add MO_ALIGN to gen_llwp, gen_scwp
65
target/mips: Add missing default_tcg_memop_mask
66
target/mips: Use MO_ALIGN instead of 0
67
target/mips: Remove TARGET_ALIGNED_ONLY
68
target/nios2: Remove TARGET_ALIGNED_ONLY
69
target/sh4: Use MO_ALIGN where required
70
target/sh4: Remove TARGET_ALIGNED_ONLY
71
tcg: Remove TARGET_ALIGNED_ONLY
72
accel/tcg: Add cpu_in_serial_context
73
accel/tcg: Introduce tlb_read_idx
74
accel/tcg: Reorg system mode load helpers
75
accel/tcg: Reorg system mode store helpers
76
target/loongarch: Do not include tcg-ldst.h
77
28
78
Thomas Huth (2):
29
Richard Henderson (70):
79
disas: Move softmmu specific code to separate file
30
tcg/optimize: Split out finish_bb, finish_ebb
80
disas: Move disas.c into the target-independent source set
31
tcg/optimize: Split out fold_affected_mask
32
tcg/optimize: Copy mask writeback to fold_masks
33
tcg/optimize: Split out fold_masks_zs
34
tcg/optimize: Augment s_mask from z_mask in fold_masks_zs
35
tcg/optimize: Change representation of s_mask
36
tcg/optimize: Use finish_folding in fold_add, fold_add_vec, fold_addsub2
37
tcg/optimize: Introduce const value accessors for TempOptInfo
38
tcg/optimize: Use fold_masks_zs in fold_and
39
tcg/optimize: Use fold_masks_zs in fold_andc
40
tcg/optimize: Use fold_masks_zs in fold_bswap
41
tcg/optimize: Use fold_masks_zs in fold_count_zeros
42
tcg/optimize: Use fold_masks_z in fold_ctpop
43
tcg/optimize: Use fold_and and fold_masks_z in fold_deposit
44
tcg/optimize: Compute sign mask in fold_deposit
45
tcg/optimize: Use finish_folding in fold_divide
46
tcg/optimize: Use finish_folding in fold_dup, fold_dup2
47
tcg/optimize: Use fold_masks_s in fold_eqv
48
tcg/optimize: Use fold_masks_z in fold_extract
49
tcg/optimize: Use finish_folding in fold_extract2
50
tcg/optimize: Use fold_masks_zs in fold_exts
51
tcg/optimize: Use fold_masks_z in fold_extu
52
tcg/optimize: Use fold_masks_zs in fold_movcond
53
tcg/optimize: Use finish_folding in fold_mul*
54
tcg/optimize: Use fold_masks_s in fold_nand
55
tcg/optimize: Use fold_masks_z in fold_neg_no_const
56
tcg/optimize: Use fold_masks_s in fold_nor
57
tcg/optimize: Use fold_masks_s in fold_not
58
tcg/optimize: Use fold_masks_zs in fold_or
59
tcg/optimize: Use fold_masks_zs in fold_orc
60
tcg/optimize: Use fold_masks_zs in fold_qemu_ld
61
tcg/optimize: Return true from fold_qemu_st, fold_tcg_st
62
tcg/optimize: Use finish_folding in fold_remainder
63
tcg/optimize: Distinguish simplification in fold_setcond_zmask
64
tcg/optimize: Use fold_masks_z in fold_setcond
65
tcg/optimize: Use fold_masks_s in fold_negsetcond
66
tcg/optimize: Use fold_masks_z in fold_setcond2
67
tcg/optimize: Use finish_folding in fold_cmp_vec
68
tcg/optimize: Use finish_folding in fold_cmpsel_vec
69
tcg/optimize: Use fold_masks_zs in fold_sextract
70
tcg/optimize: Use fold_masks_zs, fold_masks_s in fold_shift
71
tcg/optimize: Simplify sign bit test in fold_shift
72
tcg/optimize: Use finish_folding in fold_sub, fold_sub_vec
73
tcg/optimize: Use fold_masks_zs in fold_tcg_ld
74
tcg/optimize: Use finish_folding in fold_tcg_ld_memcopy
75
tcg/optimize: Use fold_masks_zs in fold_xor
76
tcg/optimize: Use finish_folding in fold_bitsel_vec
77
tcg/optimize: Use finish_folding as default in tcg_optimize
78
tcg/optimize: Remove z_mask, s_mask from OptContext
79
tcg/optimize: Re-enable sign-mask optimizations
80
tcg/optimize: Move fold_bitsel_vec into alphabetic sort
81
tcg/optimize: Move fold_cmp_vec, fold_cmpsel_vec into alphabetic sort
82
softfloat: Add float{16,32,64}_muladd_scalbn
83
target/arm: Use float*_muladd_scalbn
84
target/sparc: Use float*_muladd_scalbn
85
softfloat: Remove float_muladd_halve_result
86
softfloat: Add float_round_nearest_even_max
87
softfloat: Add float_muladd_suppress_add_product_zero
88
target/hexagon: Use float32_mul in helper_sfmpy
89
target/hexagon: Use float32_muladd for helper_sffma
90
target/hexagon: Use float32_muladd for helper_sffms
91
target/hexagon: Use float32_muladd_scalbn for helper_sffma_sc
92
target/hexagon: Use float32_muladd for helper_sffm[as]_lib
93
target/hexagon: Remove internal_fmafx
94
target/hexagon: Expand GEN_XF_ROUND
95
target/hexagon: Remove Float
96
target/hexagon: Remove Double
97
target/hexagon: Use mulu64 for int128_mul_6464
98
target/hexagon: Simplify internal_mpyhh setup
99
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core
81
100
82
configs/targets/mips-linux-user.mak | 1 -
101
include/exec/translator.h | 14 -
83
configs/targets/mips-softmmu.mak | 1 -
102
include/fpu/softfloat-types.h | 2 +
84
configs/targets/mips64-linux-user.mak | 1 -
103
include/fpu/softfloat.h | 14 +-
85
configs/targets/mips64-softmmu.mak | 1 -
104
include/hw/core/tcg-cpu-ops.h | 13 +
86
configs/targets/mips64el-linux-user.mak | 1 -
105
target/alpha/cpu.h | 2 +
87
configs/targets/mips64el-softmmu.mak | 1 -
106
target/arm/internals.h | 2 +
88
configs/targets/mipsel-linux-user.mak | 1 -
107
target/avr/cpu.h | 2 +
89
configs/targets/mipsel-softmmu.mak | 1 -
108
target/hexagon/cpu.h | 2 +
90
configs/targets/mipsn32-linux-user.mak | 1 -
109
target/hexagon/fma_emu.h | 3 -
91
configs/targets/mipsn32el-linux-user.mak | 1 -
110
target/hppa/cpu.h | 2 +
92
configs/targets/nios2-softmmu.mak | 1 -
111
target/i386/tcg/helper-tcg.h | 2 +
93
configs/targets/sh4-linux-user.mak | 1 -
112
target/loongarch/internals.h | 2 +
94
configs/targets/sh4-softmmu.mak | 1 -
113
target/m68k/cpu.h | 2 +
95
configs/targets/sh4eb-linux-user.mak | 1 -
114
target/microblaze/cpu.h | 2 +
96
configs/targets/sh4eb-softmmu.mak | 1 -
115
target/mips/tcg/tcg-internal.h | 2 +
97
meson.build | 3 -
116
target/openrisc/cpu.h | 2 +
98
accel/tcg/internal.h | 9 +
117
target/ppc/cpu.h | 2 +
99
accel/tcg/tcg-accel-ops-icount.h | 3 +-
118
target/riscv/cpu.h | 3 +
100
disas/disas-internal.h | 21 +
119
target/rx/cpu.h | 2 +
101
include/disas/disas.h | 23 +-
120
target/s390x/s390x-internal.h | 2 +
102
include/exec/cpu-common.h | 1 +
121
target/sh4/cpu.h | 2 +
103
include/exec/cpu-defs.h | 7 +-
122
target/sparc/cpu.h | 2 +
104
include/exec/cpu_ldst.h | 26 +-
123
target/sparc/helper.h | 4 +-
105
include/exec/memop.h | 13 +-
124
target/tricore/cpu.h | 2 +
106
include/exec/poison.h | 1 -
125
target/xtensa/cpu.h | 2 +
107
tcg/loongarch64/tcg-target-con-set.h | 2 -
126
accel/tcg/cpu-exec.c | 8 +-
108
tcg/loongarch64/tcg-target-con-str.h | 1 -
127
accel/tcg/plugin-gen.c | 9 +
109
tcg/mips/tcg-target-con-set.h | 13 +-
128
accel/tcg/translate-all.c | 8 +-
110
tcg/mips/tcg-target-con-str.h | 2 -
129
fpu/softfloat.c | 63 +--
111
tcg/mips/tcg-target.h | 4 +-
130
target/alpha/cpu.c | 1 +
112
tcg/ppc/tcg-target-con-set.h | 11 +-
131
target/alpha/translate.c | 4 +-
113
tcg/ppc/tcg-target-con-str.h | 7 -
132
target/arm/cpu.c | 1 +
114
tcg/riscv/tcg-target-con-set.h | 2 -
133
target/arm/tcg/cpu-v7m.c | 1 +
115
tcg/riscv/tcg-target-con-str.h | 1 -
134
target/arm/tcg/helper-a64.c | 6 +-
116
tcg/s390x/tcg-target-con-set.h | 2 -
135
target/arm/tcg/translate.c | 5 +-
117
tcg/s390x/tcg-target-con-str.h | 1 -
136
target/avr/cpu.c | 1 +
118
accel/tcg/cpu-exec-common.c | 3 +
137
target/avr/translate.c | 6 +-
119
accel/tcg/cputlb.c | 1113 ++++++++++++++++-------------
138
target/hexagon/cpu.c | 1 +
120
accel/tcg/tb-maint.c | 2 +-
139
target/hexagon/fma_emu.c | 496 ++++++---------------
121
accel/tcg/tcg-accel-ops-icount.c | 21 +-
140
target/hexagon/op_helper.c | 125 ++----
122
accel/tcg/tcg-accel-ops-rr.c | 37 +-
141
target/hexagon/translate.c | 4 +-
123
bsd-user/elfload.c | 5 +-
142
target/hppa/cpu.c | 1 +
124
cpus-common.c | 2 +-
143
target/hppa/translate.c | 4 +-
125
disas/disas-mon.c | 65 ++
144
target/i386/tcg/tcg-cpu.c | 1 +
126
disas.c => disas/disas.c | 109 +--
145
target/i386/tcg/translate.c | 5 +-
127
linux-user/elfload.c | 18 +-
146
target/loongarch/cpu.c | 1 +
128
migration/dirtyrate.c | 26 +-
147
target/loongarch/tcg/translate.c | 4 +-
129
replay/replay.c | 3 +-
148
target/m68k/cpu.c | 1 +
130
target/loongarch/csr_helper.c | 1 -
149
target/m68k/translate.c | 4 +-
131
target/loongarch/iocsr_helper.c | 1 -
150
target/microblaze/cpu.c | 1 +
132
target/m68k/translate.c | 1 +
151
target/microblaze/translate.c | 4 +-
133
target/mips/tcg/mxu_translate.c | 3 +-
152
target/mips/cpu.c | 1 +
134
target/nios2/translate.c | 10 +
153
target/mips/tcg/translate.c | 4 +-
135
target/sh4/translate.c | 102 ++-
154
target/openrisc/cpu.c | 1 +
136
tcg/tcg.c | 480 ++++++++++++-
155
target/openrisc/translate.c | 4 +-
137
trace/control-target.c | 9 +-
156
target/ppc/cpu_init.c | 1 +
138
target/mips/tcg/micromips_translate.c.inc | 24 +-
157
target/ppc/translate.c | 4 +-
139
target/mips/tcg/mips16e_translate.c.inc | 18 +-
158
target/riscv/tcg/tcg-cpu.c | 1 +
140
target/mips/tcg/nanomips_translate.c.inc | 32 +-
159
target/riscv/translate.c | 4 +-
141
tcg/aarch64/tcg-target.c.inc | 347 ++++-----
160
target/rx/cpu.c | 1 +
142
tcg/arm/tcg-target.c.inc | 455 +++++-------
161
target/rx/translate.c | 4 +-
143
tcg/i386/tcg-target.c.inc | 453 +++++-------
162
target/s390x/cpu.c | 1 +
144
tcg/loongarch64/tcg-target.c.inc | 313 +++-----
163
target/s390x/tcg/translate.c | 4 +-
145
tcg/mips/tcg-target.c.inc | 870 +++++++---------------
164
target/sh4/cpu.c | 1 +
146
tcg/ppc/tcg-target.c.inc | 512 ++++++-------
165
target/sh4/translate.c | 4 +-
147
tcg/riscv/tcg-target.c.inc | 304 ++++----
166
target/sparc/cpu.c | 1 +
148
tcg/s390x/tcg-target.c.inc | 314 ++++----
167
target/sparc/fop_helper.c | 8 +-
149
disas/meson.build | 6 +-
168
target/sparc/translate.c | 84 ++--
150
68 files changed, 2788 insertions(+), 3039 deletions(-)
169
target/tricore/cpu.c | 1 +
151
create mode 100644 disas/disas-internal.h
170
target/tricore/translate.c | 5 +-
152
create mode 100644 disas/disas-mon.c
171
target/xtensa/cpu.c | 1 +
153
rename disas.c => disas/disas.c (79%)
172
target/xtensa/translate.c | 4 +-
173
tcg/optimize.c | 857 +++++++++++++++++++-----------------
174
tests/tcg/multiarch/system/memory.c | 9 +-
175
fpu/softfloat-parts.c.inc | 16 +-
176
75 files changed, 866 insertions(+), 1009 deletions(-)
diff view generated by jsdifflib
1
Use tcg_out_ld_helper_args and tcg_out_ld_helper_ret.
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
2
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
make check-tcg fails on Fedora with the following error message:
4
5
alpha-linux-gnu-gcc [...] qemu/tests/tcg/multiarch/system/memory.c -o memory [...]
6
qemu/tests/tcg/multiarch/system/memory.c:17:10: fatal error: inttypes.h: No such file or directory
7
17 | #include <inttypes.h>
8
| ^~~~~~~~~~~~
9
compilation terminated.
10
11
The reason is that Fedora has cross-compilers, but no cross-glibc
12
headers. Fix by hardcoding the format specifiers and dropping the
13
include.
14
15
An alternative fix would be to introduce a configure check for
16
inttypes.h. But this would make it impossible to use Fedora
17
cross-compilers for softmmu tests, which used to work so far.
18
19
Fixes: ecbcc9ead2f8 ("tests/tcg: add a system test to check memory instrumentation")
20
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
21
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
22
Message-ID: <20241010085906.226249-1-iii@linux.ibm.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
23
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
24
---
6
tcg/i386/tcg-target.c.inc | 71 +++++++++++++++------------------------
25
tests/tcg/multiarch/system/memory.c | 9 ++++-----
7
1 file changed, 28 insertions(+), 43 deletions(-)
26
1 file changed, 4 insertions(+), 5 deletions(-)
8
27
9
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
28
diff --git a/tests/tcg/multiarch/system/memory.c b/tests/tcg/multiarch/system/memory.c
10
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/i386/tcg-target.c.inc
30
--- a/tests/tcg/multiarch/system/memory.c
12
+++ b/tcg/i386/tcg-target.c.inc
31
+++ b/tests/tcg/multiarch/system/memory.c
13
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
32
@@ -XXX,XX +XXX,XX @@
14
[MO_BEUQ] = helper_be_stq_mmu,
33
15
};
34
#include <stdint.h>
16
35
#include <stdbool.h>
17
+/*
36
-#include <inttypes.h>
18
+ * Because i686 has no register parameters and because x86_64 has xchg
37
#include <minilib.h>
19
+ * to handle addr/data register overlap, we have placed all input arguments
38
20
+ * before we need might need a scratch reg.
39
#ifndef CHECK_UNALIGNED
21
+ *
40
@@ -XXX,XX +XXX,XX @@ int main(void)
22
+ * Even then, a scratch is only needed for l->raddr. Rather than expose
41
int i;
23
+ * a general-purpose scratch when we don't actually know it's available,
42
bool ok = true;
24
+ * use the ra_gen hook to load into RAX if needed.
43
25
+ */
44
- ml_printf("Test data start: 0x%"PRIxPTR"\n", &test_data[0]);
26
+#if TCG_TARGET_REG_BITS == 64
45
- ml_printf("Test data end: 0x%"PRIxPTR"\n", &test_data[TEST_SIZE]);
27
+static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
46
+ ml_printf("Test data start: 0x%lx\n", (unsigned long)&test_data[0]);
28
+{
47
+ ml_printf("Test data end: 0x%lx\n", (unsigned long)&test_data[TEST_SIZE]);
29
+ if (arg < 0) {
48
30
+ arg = TCG_REG_RAX;
49
/* Run through the unsigned tests first */
31
+ }
50
for (i = 0; i < ARRAY_SIZE(init_ufns) && ok; i++) {
32
+ tcg_out_movi(s, TCG_TYPE_PTR, arg, (uintptr_t)l->raddr);
51
@@ -XXX,XX +XXX,XX @@ int main(void)
33
+ return arg;
52
ok = do_signed_reads(true);
34
+}
35
+static const TCGLdstHelperParam ldst_helper_param = {
36
+ .ra_gen = ldst_ra_gen
37
+};
38
+#else
39
+static const TCGLdstHelperParam ldst_helper_param = { };
40
+#endif
41
+
42
/*
43
* Generate code for the slow path for a load at the end of block
44
*/
45
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
46
{
47
- MemOpIdx oi = l->oi;
48
- MemOp opc = get_memop(oi);
49
+ MemOp opc = get_memop(l->oi);
50
tcg_insn_unit **label_ptr = &l->label_ptr[0];
51
52
/* resolve label address */
53
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
54
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
55
}
53
}
56
54
57
- if (TCG_TARGET_REG_BITS == 32) {
55
- ml_printf("Test data read: %"PRId32"\n", test_read_count);
58
- int ofs = 0;
56
- ml_printf("Test data write: %"PRId32"\n", test_write_count);
59
-
57
+ ml_printf("Test data read: %lu\n", (unsigned long)test_read_count);
60
- tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
58
+ ml_printf("Test data write: %lu\n", (unsigned long)test_write_count);
61
- ofs += 4;
59
ml_printf("Test complete: %s\n", ok ? "PASSED" : "FAILED");
62
-
60
return ok ? 0 : -1;
63
- tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
64
- ofs += 4;
65
-
66
- if (TARGET_LONG_BITS == 64) {
67
- tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
68
- ofs += 4;
69
- }
70
-
71
- tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
72
- ofs += 4;
73
-
74
- tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
75
- } else {
76
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
77
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
78
- l->addrlo_reg);
79
- tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
80
- tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
81
- (uintptr_t)l->raddr);
82
- }
83
-
84
+ tcg_out_ld_helper_args(s, l, &ldst_helper_param);
85
tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
86
+ tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
87
88
- if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
89
- TCGMovExtend ext[2] = {
90
- { .dst = l->datalo_reg, .dst_type = TCG_TYPE_I32,
91
- .src = TCG_REG_EAX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
92
- { .dst = l->datahi_reg, .dst_type = TCG_TYPE_I32,
93
- .src = TCG_REG_EDX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
94
- };
95
- tcg_out_movext2(s, &ext[0], &ext[1], -1);
96
- } else {
97
- tcg_out_movext(s, l->type, l->datalo_reg,
98
- TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX);
99
- }
100
-
101
- /* Jump to the code corresponding to next IR of qemu_st */
102
tcg_out_jmp(s, l->raddr);
103
return true;
104
}
61
}
105
--
62
--
106
2.34.1
63
2.43.0
107
108
diff view generated by jsdifflib
New patch
1
From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
1
2
3
When running with a single vcpu, we can return a constant instead of a
4
load when accessing cpu_index.
5
A side effect is that all tcg operations using it are optimized, most
6
notably scoreboard access.
7
When running a simple loop in user-mode, the speedup is around 20%.
8
9
Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-ID: <20241128213843.1023080-1-pierrick.bouvier@linaro.org>
13
---
14
accel/tcg/plugin-gen.c | 9 +++++++++
15
1 file changed, 9 insertions(+)
16
17
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/accel/tcg/plugin-gen.c
20
+++ b/accel/tcg/plugin-gen.c
21
@@ -XXX,XX +XXX,XX @@ static void gen_disable_mem_helper(void)
22
23
static TCGv_i32 gen_cpu_index(void)
24
{
25
+ /*
26
+ * Optimize when we run with a single vcpu. All values using cpu_index,
27
+ * including scoreboard index, will be optimized out.
28
+ * User-mode calls tb_flush when setting this flag. In system-mode, all
29
+ * vcpus are created before generating code.
30
+ */
31
+ if (!tcg_cflags_has(current_cpu, CF_PARALLEL)) {
32
+ return tcg_constant_i32(current_cpu->cpu_index);
33
+ }
34
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
35
tcg_gen_ld_i32(cpu_index, tcg_env,
36
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
37
--
38
2.43.0
diff view generated by jsdifflib
1
Instead of trying to unify all operations on uint64_t, use
1
Call them directly from the opcode switch statement in tcg_optimize,
2
mmu_lookup() to perform the basic tlb hit and resolution.
2
rather than in finish_folding based on opcode flags. Adjust folding
3
Create individual functions to handle access by size.
3
of conditional branches to match.
4
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
accel/tcg/cputlb.c | 408 +++++++++++++++++++++------------------------
8
tcg/optimize.c | 47 +++++++++++++++++++++++++++++++----------------
9
1 file changed, 193 insertions(+), 215 deletions(-)
9
1 file changed, 31 insertions(+), 16 deletions(-)
10
10
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cputlb.c
13
--- a/tcg/optimize.c
14
+++ b/accel/tcg/cputlb.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ store_memop(void *haddr, uint64_t val, MemOp op)
15
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
16
}
16
}
17
}
17
}
18
18
19
-static void full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
19
+static void finish_bb(OptContext *ctx)
20
- MemOpIdx oi, uintptr_t retaddr);
20
+{
21
-
21
+ /* We only optimize memory barriers across basic blocks. */
22
-static void __attribute__((noinline))
22
+ ctx->prev_mb = NULL;
23
-store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
23
+}
24
- uintptr_t retaddr, size_t size, uintptr_t mmu_idx,
24
+
25
- bool big_endian)
25
+static void finish_ebb(OptContext *ctx)
26
+/**
26
+{
27
+ * do_st_mmio_leN:
27
+ finish_bb(ctx);
28
+ * @env: cpu context
28
+ /* We only optimize across extended basic blocks. */
29
+ * @p: translation parameters
29
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
30
+ * @val_le: data to store
30
+ remove_mem_copy_all(ctx);
31
+ * @mmu_idx: virtual address context
31
+}
32
+ * @ra: return address into tcg generated code, or 0
32
+
33
+ *
33
static void finish_folding(OptContext *ctx, TCGOp *op)
34
+ * Store @p->size bytes at @p->addr, which is memory-mapped i/o.
35
+ * The bytes to store are extracted in little-endian order from @val_le;
36
+ * return the bytes of @val_le beyond @p->size that have not been stored.
37
+ */
38
+static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p,
39
+ uint64_t val_le, int mmu_idx, uintptr_t ra)
40
{
34
{
41
- uintptr_t index, index2;
35
const TCGOpDef *def = &tcg_op_defs[op->opc];
42
- CPUTLBEntry *entry, *entry2;
36
int i, nb_oargs;
43
- target_ulong page1, page2, tlb_addr, tlb_addr2;
44
- MemOpIdx oi;
45
- size_t size2;
46
- int i;
47
+ CPUTLBEntryFull *full = p->full;
48
+ target_ulong addr = p->addr;
49
+ int i, size = p->size;
50
37
51
- /*
38
- /*
52
- * Ensure the second page is in the TLB. Note that the first page
39
- * We only optimize extended basic blocks. If the opcode ends a BB
53
- * is already guaranteed to be filled, and that the second page
40
- * and is not a conditional branch, reset all temp data.
54
- * cannot evict the first. An exception to this rule is PAGE_WRITE_INV
55
- * handling: the first page could have evicted itself.
56
- */
41
- */
57
- page1 = addr & TARGET_PAGE_MASK;
42
- if (def->flags & TCG_OPF_BB_END) {
58
- page2 = (addr + size) & TARGET_PAGE_MASK;
43
- ctx->prev_mb = NULL;
59
- size2 = (addr + size) & ~TARGET_PAGE_MASK;
44
- if (!(def->flags & TCG_OPF_COND_BRANCH)) {
60
- index2 = tlb_index(env, mmu_idx, page2);
45
- memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
61
- entry2 = tlb_entry(env, mmu_idx, page2);
46
- remove_mem_copy_all(ctx);
62
-
63
- tlb_addr2 = tlb_addr_write(entry2);
64
- if (page1 != page2 && !tlb_hit_page(tlb_addr2, page2)) {
65
- if (!victim_tlb_hit(env, mmu_idx, index2, MMU_DATA_STORE, page2)) {
66
- tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
67
- mmu_idx, retaddr);
68
- index2 = tlb_index(env, mmu_idx, page2);
69
- entry2 = tlb_entry(env, mmu_idx, page2);
70
- }
71
- tlb_addr2 = tlb_addr_write(entry2);
72
+ QEMU_IOTHREAD_LOCK_GUARD();
73
+ for (i = 0; i < size; i++, val_le >>= 8) {
74
+ io_writex(env, full, mmu_idx, val_le, addr + i, ra, MO_UB);
75
}
76
+ return val_le;
77
+}
78
79
- index = tlb_index(env, mmu_idx, addr);
80
- entry = tlb_entry(env, mmu_idx, addr);
81
- tlb_addr = tlb_addr_write(entry);
82
+/**
83
+ * do_st_bytes_leN:
84
+ * @p: translation parameters
85
+ * @val_le: data to store
86
+ *
87
+ * Store @p->size bytes at @p->haddr, which is RAM.
88
+ * The bytes to store are extracted in little-endian order from @val_le;
89
+ * return the bytes of @val_le beyond @p->size that have not been stored.
90
+ */
91
+static uint64_t do_st_bytes_leN(MMULookupPageData *p, uint64_t val_le)
92
+{
93
+ uint8_t *haddr = p->haddr;
94
+ int i, size = p->size;
95
96
- /*
97
- * Handle watchpoints. Since this may trap, all checks
98
- * must happen before any store.
99
- */
100
- if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
101
- cpu_check_watchpoint(env_cpu(env), addr, size - size2,
102
- env_tlb(env)->d[mmu_idx].fulltlb[index].attrs,
103
- BP_MEM_WRITE, retaddr);
104
- }
105
- if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
106
- cpu_check_watchpoint(env_cpu(env), page2, size2,
107
- env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs,
108
- BP_MEM_WRITE, retaddr);
109
+ for (i = 0; i < size; i++, val_le >>= 8) {
110
+ haddr[i] = val_le;
111
}
112
+ return val_le;
113
+}
114
115
- /*
116
- * XXX: not efficient, but simple.
117
- * This loop must go in the forward direction to avoid issues
118
- * with self-modifying code in Windows 64-bit.
119
- */
120
- oi = make_memop_idx(MO_UB, mmu_idx);
121
- if (big_endian) {
122
- for (i = 0; i < size; ++i) {
123
- /* Big-endian extract. */
124
- uint8_t val8 = val >> (((size - 1) * 8) - (i * 8));
125
- full_stb_mmu(env, addr + i, val8, oi, retaddr);
126
- }
127
+/*
128
+ * Wrapper for the above.
129
+ */
130
+static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
131
+ uint64_t val_le, int mmu_idx, uintptr_t ra)
132
+{
133
+ if (unlikely(p->flags & TLB_MMIO)) {
134
+ return do_st_mmio_leN(env, p, val_le, mmu_idx, ra);
135
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
136
+ return val_le >> (p->size * 8);
137
} else {
138
- for (i = 0; i < size; ++i) {
139
- /* Little-endian extract. */
140
- uint8_t val8 = val >> (i * 8);
141
- full_stb_mmu(env, addr + i, val8, oi, retaddr);
142
- }
143
+ return do_st_bytes_leN(p, val_le);
144
}
145
}
146
147
-static inline void QEMU_ALWAYS_INLINE
148
-store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
149
- MemOpIdx oi, uintptr_t retaddr, MemOp op)
150
+static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val,
151
+ int mmu_idx, uintptr_t ra)
152
{
153
- const unsigned a_bits = get_alignment_bits(get_memop(oi));
154
- const size_t size = memop_size(op);
155
- uintptr_t mmu_idx = get_mmuidx(oi);
156
- uintptr_t index;
157
- CPUTLBEntry *entry;
158
- target_ulong tlb_addr;
159
- void *haddr;
160
-
161
- tcg_debug_assert(mmu_idx < NB_MMU_MODES);
162
-
163
- /* Handle CPU specific unaligned behaviour */
164
- if (addr & ((1 << a_bits) - 1)) {
165
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
166
- mmu_idx, retaddr);
167
+ if (unlikely(p->flags & TLB_MMIO)) {
168
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, MO_UB);
169
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
170
+ /* nothing */
171
+ } else {
172
+ *(uint8_t *)p->haddr = val;
173
}
174
-
175
- index = tlb_index(env, mmu_idx, addr);
176
- entry = tlb_entry(env, mmu_idx, addr);
177
- tlb_addr = tlb_addr_write(entry);
178
-
179
- /* If the TLB entry is for a different page, reload and try again. */
180
- if (!tlb_hit(tlb_addr, addr)) {
181
- if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
182
- addr & TARGET_PAGE_MASK)) {
183
- tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
184
- mmu_idx, retaddr);
185
- index = tlb_index(env, mmu_idx, addr);
186
- entry = tlb_entry(env, mmu_idx, addr);
187
- }
188
- tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
189
- }
190
-
191
- /* Handle anything that isn't just a straight memory access. */
192
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
193
- CPUTLBEntryFull *full;
194
- bool need_swap;
195
-
196
- /* For anything that is unaligned, recurse through byte stores. */
197
- if ((addr & (size - 1)) != 0) {
198
- goto do_unaligned_access;
199
- }
200
-
201
- full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
202
-
203
- /* Handle watchpoints. */
204
- if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
205
- /* On watchpoint hit, this will longjmp out. */
206
- cpu_check_watchpoint(env_cpu(env), addr, size,
207
- full->attrs, BP_MEM_WRITE, retaddr);
208
- }
209
-
210
- need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
211
-
212
- /* Handle I/O access. */
213
- if (tlb_addr & TLB_MMIO) {
214
- io_writex(env, full, mmu_idx, val, addr, retaddr,
215
- op ^ (need_swap * MO_BSWAP));
216
- return;
217
- }
218
-
219
- /* Ignore writes to ROM. */
220
- if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
221
- return;
222
- }
223
-
224
- /* Handle clean RAM pages. */
225
- if (tlb_addr & TLB_NOTDIRTY) {
226
- notdirty_write(env_cpu(env), addr, size, full, retaddr);
227
- }
228
-
229
- haddr = (void *)((uintptr_t)addr + entry->addend);
230
-
231
- /*
232
- * Keep these two store_memop separate to ensure that the compiler
233
- * is able to fold the entire function to a single instruction.
234
- * There is a build-time assert inside to remind you of this. ;-)
235
- */
236
- if (unlikely(need_swap)) {
237
- store_memop(haddr, val, op ^ MO_BSWAP);
238
- } else {
239
- store_memop(haddr, val, op);
240
- }
47
- }
241
- return;
48
- return;
242
- }
49
- }
243
-
50
-
244
- /* Handle slow unaligned access (it spans two pages or IO). */
51
nb_oargs = def->nb_oargs;
245
- if (size > 1
52
for (i = 0; i < nb_oargs; i++) {
246
- && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
53
TCGTemp *ts = arg_temp(op->args[i]);
247
- >= TARGET_PAGE_SIZE)) {
54
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
248
- do_unaligned_access:
55
if (i > 0) {
249
- store_helper_unaligned(env, addr, val, retaddr, size,
56
op->opc = INDEX_op_br;
250
- mmu_idx, memop_big_endian(op));
57
op->args[0] = op->args[3];
251
- return;
58
+ finish_ebb(ctx);
252
- }
59
+ } else {
253
-
60
+ finish_bb(ctx);
254
- haddr = (void *)((uintptr_t)addr + entry->addend);
61
}
255
- store_memop(haddr, val, op);
62
- return false;
63
+ return true;
256
}
64
}
257
65
258
-static void __attribute__((noinline))
66
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
259
-full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
67
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
260
- MemOpIdx oi, uintptr_t retaddr)
68
}
261
+static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val,
69
op->opc = INDEX_op_br;
262
+ int mmu_idx, MemOp memop, uintptr_t ra)
70
op->args[0] = label;
263
{
71
- break;
264
- validate_memop(oi, MO_UB);
72
+ finish_ebb(ctx);
265
- store_helper(env, addr, val, oi, retaddr, MO_UB);
73
+ return true;
266
+ if (unlikely(p->flags & TLB_MMIO)) {
74
}
267
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
75
- return false;
268
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
269
+ /* nothing */
270
+ } else {
271
+ /* Swap to host endian if necessary, then store. */
272
+ if (memop & MO_BSWAP) {
273
+ val = bswap16(val);
274
+ }
275
+ store_memop(p->haddr, val, MO_UW);
276
+ }
277
+}
278
+
76
+
279
+static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val,
77
+ finish_bb(ctx);
280
+ int mmu_idx, MemOp memop, uintptr_t ra)
78
+ return true;
281
+{
282
+ if (unlikely(p->flags & TLB_MMIO)) {
283
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
284
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
285
+ /* nothing */
286
+ } else {
287
+ /* Swap to host endian if necessary, then store. */
288
+ if (memop & MO_BSWAP) {
289
+ val = bswap32(val);
290
+ }
291
+ store_memop(p->haddr, val, MO_UL);
292
+ }
293
+}
294
+
295
+static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
296
+ int mmu_idx, MemOp memop, uintptr_t ra)
297
+{
298
+ if (unlikely(p->flags & TLB_MMIO)) {
299
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
300
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
301
+ /* nothing */
302
+ } else {
303
+ /* Swap to host endian if necessary, then store. */
304
+ if (memop & MO_BSWAP) {
305
+ val = bswap64(val);
306
+ }
307
+ store_memop(p->haddr, val, MO_UQ);
308
+ }
309
}
79
}
310
80
311
void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
81
static bool fold_bswap(OptContext *ctx, TCGOp *op)
312
- MemOpIdx oi, uintptr_t retaddr)
82
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
313
+ MemOpIdx oi, uintptr_t ra)
83
CASE_OP_32_64_VEC(xor):
314
{
84
done = fold_xor(&ctx, op);
315
- full_stb_mmu(env, addr, val, oi, retaddr);
85
break;
316
+ MMULookupLocals l;
86
+ case INDEX_op_set_label:
317
+ bool crosspage;
87
+ case INDEX_op_br:
318
+
88
+ case INDEX_op_exit_tb:
319
+ validate_memop(oi, MO_UB);
89
+ case INDEX_op_goto_tb:
320
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
90
+ case INDEX_op_goto_ptr:
321
+ tcg_debug_assert(!crosspage);
91
+ finish_ebb(&ctx);
322
+
92
+ done = true;
323
+ do_st_1(env, &l.page[0], val, l.mmu_idx, ra);
93
+ break;
324
}
94
default:
325
95
break;
326
-static void full_le_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
96
}
327
- MemOpIdx oi, uintptr_t retaddr)
328
+static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
329
+ MemOpIdx oi, uintptr_t ra)
330
{
331
- validate_memop(oi, MO_LEUW);
332
- store_helper(env, addr, val, oi, retaddr, MO_LEUW);
333
+ MMULookupLocals l;
334
+ bool crosspage;
335
+ uint8_t a, b;
336
+
337
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
338
+ if (likely(!crosspage)) {
339
+ do_st_2(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
340
+ return;
341
+ }
342
+
343
+ if ((l.memop & MO_BSWAP) == MO_LE) {
344
+ a = val, b = val >> 8;
345
+ } else {
346
+ b = val, a = val >> 8;
347
+ }
348
+ do_st_1(env, &l.page[0], a, l.mmu_idx, ra);
349
+ do_st_1(env, &l.page[1], b, l.mmu_idx, ra);
350
}
351
352
void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
353
MemOpIdx oi, uintptr_t retaddr)
354
{
355
- full_le_stw_mmu(env, addr, val, oi, retaddr);
356
-}
357
-
358
-static void full_be_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
359
- MemOpIdx oi, uintptr_t retaddr)
360
-{
361
- validate_memop(oi, MO_BEUW);
362
- store_helper(env, addr, val, oi, retaddr, MO_BEUW);
363
+ validate_memop(oi, MO_LEUW);
364
+ do_st2_mmu(env, addr, val, oi, retaddr);
365
}
366
367
void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
368
MemOpIdx oi, uintptr_t retaddr)
369
{
370
- full_be_stw_mmu(env, addr, val, oi, retaddr);
371
+ validate_memop(oi, MO_BEUW);
372
+ do_st2_mmu(env, addr, val, oi, retaddr);
373
}
374
375
-static void full_le_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
376
- MemOpIdx oi, uintptr_t retaddr)
377
+static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
378
+ MemOpIdx oi, uintptr_t ra)
379
{
380
- validate_memop(oi, MO_LEUL);
381
- store_helper(env, addr, val, oi, retaddr, MO_LEUL);
382
+ MMULookupLocals l;
383
+ bool crosspage;
384
+
385
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
386
+ if (likely(!crosspage)) {
387
+ do_st_4(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
388
+ return;
389
+ }
390
+
391
+ /* Swap to little endian for simplicity, then store by bytes. */
392
+ if ((l.memop & MO_BSWAP) != MO_LE) {
393
+ val = bswap32(val);
394
+ }
395
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
396
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
397
}
398
399
void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
400
MemOpIdx oi, uintptr_t retaddr)
401
{
402
- full_le_stl_mmu(env, addr, val, oi, retaddr);
403
-}
404
-
405
-static void full_be_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
406
- MemOpIdx oi, uintptr_t retaddr)
407
-{
408
- validate_memop(oi, MO_BEUL);
409
- store_helper(env, addr, val, oi, retaddr, MO_BEUL);
410
+ validate_memop(oi, MO_LEUL);
411
+ do_st4_mmu(env, addr, val, oi, retaddr);
412
}
413
414
void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
415
MemOpIdx oi, uintptr_t retaddr)
416
{
417
- full_be_stl_mmu(env, addr, val, oi, retaddr);
418
+ validate_memop(oi, MO_BEUL);
419
+ do_st4_mmu(env, addr, val, oi, retaddr);
420
+}
421
+
422
+static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
423
+ MemOpIdx oi, uintptr_t ra)
424
+{
425
+ MMULookupLocals l;
426
+ bool crosspage;
427
+
428
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
429
+ if (likely(!crosspage)) {
430
+ do_st_8(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
431
+ return;
432
+ }
433
+
434
+ /* Swap to little endian for simplicity, then store by bytes. */
435
+ if ((l.memop & MO_BSWAP) != MO_LE) {
436
+ val = bswap64(val);
437
+ }
438
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
439
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
440
}
441
442
void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
443
MemOpIdx oi, uintptr_t retaddr)
444
{
445
validate_memop(oi, MO_LEUQ);
446
- store_helper(env, addr, val, oi, retaddr, MO_LEUQ);
447
+ do_st8_mmu(env, addr, val, oi, retaddr);
448
}
449
450
void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
451
MemOpIdx oi, uintptr_t retaddr)
452
{
453
validate_memop(oi, MO_BEUQ);
454
- store_helper(env, addr, val, oi, retaddr, MO_BEUQ);
455
+ do_st8_mmu(env, addr, val, oi, retaddr);
456
}
457
458
/*
459
* Store Helpers for cpu_ldst.h
460
*/
461
462
-typedef void FullStoreHelper(CPUArchState *env, target_ulong addr,
463
- uint64_t val, MemOpIdx oi, uintptr_t retaddr);
464
-
465
-static inline void cpu_store_helper(CPUArchState *env, target_ulong addr,
466
- uint64_t val, MemOpIdx oi, uintptr_t ra,
467
- FullStoreHelper *full_store)
468
+static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
469
{
470
- full_store(env, addr, val, oi, ra);
471
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
472
}
473
474
void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
475
MemOpIdx oi, uintptr_t retaddr)
476
{
477
- cpu_store_helper(env, addr, val, oi, retaddr, full_stb_mmu);
478
+ helper_ret_stb_mmu(env, addr, val, oi, retaddr);
479
+ plugin_store_cb(env, addr, oi);
480
}
481
482
void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
483
MemOpIdx oi, uintptr_t retaddr)
484
{
485
- cpu_store_helper(env, addr, val, oi, retaddr, full_be_stw_mmu);
486
+ helper_be_stw_mmu(env, addr, val, oi, retaddr);
487
+ plugin_store_cb(env, addr, oi);
488
}
489
490
void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
491
MemOpIdx oi, uintptr_t retaddr)
492
{
493
- cpu_store_helper(env, addr, val, oi, retaddr, full_be_stl_mmu);
494
+ helper_be_stl_mmu(env, addr, val, oi, retaddr);
495
+ plugin_store_cb(env, addr, oi);
496
}
497
498
void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
499
MemOpIdx oi, uintptr_t retaddr)
500
{
501
- cpu_store_helper(env, addr, val, oi, retaddr, helper_be_stq_mmu);
502
+ helper_be_stq_mmu(env, addr, val, oi, retaddr);
503
+ plugin_store_cb(env, addr, oi);
504
}
505
506
void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
507
MemOpIdx oi, uintptr_t retaddr)
508
{
509
- cpu_store_helper(env, addr, val, oi, retaddr, full_le_stw_mmu);
510
+ helper_le_stw_mmu(env, addr, val, oi, retaddr);
511
+ plugin_store_cb(env, addr, oi);
512
}
513
514
void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
515
MemOpIdx oi, uintptr_t retaddr)
516
{
517
- cpu_store_helper(env, addr, val, oi, retaddr, full_le_stl_mmu);
518
+ helper_le_stl_mmu(env, addr, val, oi, retaddr);
519
+ plugin_store_cb(env, addr, oi);
520
}
521
522
void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
523
MemOpIdx oi, uintptr_t retaddr)
524
{
525
- cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu);
526
+ helper_le_stq_mmu(env, addr, val, oi, retaddr);
527
+ plugin_store_cb(env, addr, oi);
528
}
529
530
void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
531
--
97
--
532
2.34.1
98
2.43.0
diff view generated by jsdifflib
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
1
There are only a few logical operations which can compute
2
and tcg_out_st_helper_args.
2
an "affected" mask. Split out handling of this optimization
3
to a separate function, only to be called when applicable.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Remove the a_mask field from OptContext, as the mask is
5
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
6
no longer stored anywhere.
7
8
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
10
---
8
tcg/ppc/tcg-target.c.inc | 88 ++++++++++++----------------------------
11
tcg/optimize.c | 42 +++++++++++++++++++++++++++---------------
9
1 file changed, 26 insertions(+), 62 deletions(-)
12
1 file changed, 27 insertions(+), 15 deletions(-)
10
13
11
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.c.inc
16
--- a/tcg/optimize.c
14
+++ b/tcg/ppc/tcg-target.c.inc
17
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
18
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
16
[MO_BEUQ] = helper_be_stq_mmu,
19
QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
17
};
20
18
21
/* In flight values from optimization. */
19
+static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
22
- uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
20
+{
23
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
21
+ if (arg < 0) {
24
uint64_t s_mask; /* mask of clrsb(value) bits */
22
+ arg = TCG_REG_TMP1;
25
TCGType type;
23
+ }
26
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
24
+ tcg_out32(s, MFSPR | RT(arg) | LR);
27
25
+ return arg;
28
static bool fold_masks(OptContext *ctx, TCGOp *op)
29
{
30
- uint64_t a_mask = ctx->a_mask;
31
uint64_t z_mask = ctx->z_mask;
32
uint64_t s_mask = ctx->s_mask;
33
34
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
35
* type changing opcodes.
36
*/
37
if (ctx->type == TCG_TYPE_I32) {
38
- a_mask = (int32_t)a_mask;
39
z_mask = (int32_t)z_mask;
40
s_mask |= MAKE_64BIT_MASK(32, 32);
41
ctx->z_mask = z_mask;
42
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
43
if (z_mask == 0) {
44
return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
45
}
46
+ return false;
26
+}
47
+}
27
+
48
+
28
+/*
49
+/*
29
+ * For the purposes of ppc32 sorting 4 input registers into 4 argument
50
+ * An "affected" mask bit is 0 if and only if the result is identical
30
+ * registers, there is an outside chance we would require 3 temps.
51
+ * to the first input. Thus if the entire mask is 0, the operation
31
+ * Because of constraints, no inputs are in r3, and env will not be
52
+ * is equivalent to a copy.
32
+ * placed into r3 until after the sorting is done, and is thus free.
33
+ */
53
+ */
34
+static const TCGLdstHelperParam ldst_helper_param = {
54
+static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask)
35
+ .ra_gen = ldst_ra_gen,
55
+{
36
+ .ntmp = 3,
56
+ if (ctx->type == TCG_TYPE_I32) {
37
+ .tmp = { TCG_REG_TMP1, TCG_REG_R0, TCG_REG_R3 }
57
+ a_mask = (uint32_t)a_mask;
38
+};
58
+ }
39
+
59
if (a_mask == 0) {
40
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
60
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
41
{
42
- MemOpIdx oi = lb->oi;
43
- MemOp opc = get_memop(oi);
44
- TCGReg hi, lo, arg = TCG_REG_R3;
45
+ MemOp opc = get_memop(lb->oi);
46
47
if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
48
return false;
49
}
61
}
50
62
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
51
- tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
63
* Known-zeros does not imply known-ones. Therefore unless
52
-
64
* arg2 is constant, we can't infer affected bits from it.
53
- lo = lb->addrlo_reg;
65
*/
54
- hi = lb->addrhi_reg;
66
- if (arg_is_const(op->args[2])) {
55
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
67
- ctx->a_mask = z1 & ~z2;
56
- arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
68
+ if (arg_is_const(op->args[2]) &&
57
- tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
69
+ fold_affected_mask(ctx, op, z1 & ~z2)) {
58
- tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
70
+ return true;
59
- } else {
60
- /* If the address needed to be zero-extended, we'll have already
61
- placed it in R4. The only remaining case is 64-bit guest. */
62
- tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
63
- }
64
-
65
- tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
66
- tcg_out32(s, MFSPR | RT(arg) | LR);
67
-
68
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
69
tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
70
-
71
- lo = lb->datalo_reg;
72
- hi = lb->datahi_reg;
73
- if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
74
- tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
75
- tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
76
- } else {
77
- tcg_out_movext(s, lb->type, lo,
78
- TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_R3);
79
- }
80
+ tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
81
82
tcg_out_b(s, 0, lb->raddr);
83
return true;
84
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
85
86
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
87
{
88
- MemOpIdx oi = lb->oi;
89
- MemOp opc = get_memop(oi);
90
- MemOp s_bits = opc & MO_SIZE;
91
- TCGReg hi, lo, arg = TCG_REG_R3;
92
+ MemOp opc = get_memop(lb->oi);
93
94
if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
95
return false;
96
}
71
}
97
72
98
- tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
73
return fold_masks(ctx, op);
99
-
74
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
100
- lo = lb->addrlo_reg;
75
*/
101
- hi = lb->addrhi_reg;
76
if (arg_is_const(op->args[2])) {
102
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
77
uint64_t z2 = ~arg_info(op->args[2])->z_mask;
103
- arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
78
- ctx->a_mask = z1 & ~z2;
104
- tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
79
+ if (fold_affected_mask(ctx, op, z1 & ~z2)) {
105
- tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
80
+ return true;
106
- } else {
81
+ }
107
- /* If the address needed to be zero-extended, we'll have already
82
z1 &= z2;
108
- placed it in R4. The only remaining case is 64-bit guest. */
83
}
109
- tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
84
ctx->z_mask = z1;
110
- }
85
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
111
-
86
112
- lo = lb->datalo_reg;
87
z_mask_old = arg_info(op->args[1])->z_mask;
113
- hi = lb->datahi_reg;
88
z_mask = extract64(z_mask_old, pos, len);
114
- if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
89
- if (pos == 0) {
115
- arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
90
- ctx->a_mask = z_mask_old ^ z_mask;
116
- tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
91
+ if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
117
- tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
92
+ return true;
118
- } else {
93
}
119
- tcg_out_movext(s, s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
94
ctx->z_mask = z_mask;
120
- arg++, lb->type, s_bits, lo);
95
ctx->s_mask = smask_from_zmask(z_mask);
121
- }
96
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
122
-
97
123
- tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
98
ctx->z_mask = z_mask;
124
- tcg_out32(s, MFSPR | RT(arg) | LR);
99
ctx->s_mask = s_mask;
125
-
100
- if (!type_change) {
126
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
101
- ctx->a_mask = s_mask & ~s_mask_old;
127
tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
102
+ if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
128
103
+ return true;
129
tcg_out_b(s, 0, lb->raddr);
104
}
105
106
return fold_masks(ctx, op);
107
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
108
109
ctx->z_mask = z_mask;
110
ctx->s_mask = smask_from_zmask(z_mask);
111
- if (!type_change) {
112
- ctx->a_mask = z_mask_old ^ z_mask;
113
+ if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
114
+ return true;
115
}
116
return fold_masks(ctx, op);
117
}
118
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
119
s_mask |= MAKE_64BIT_MASK(len, 64 - len);
120
ctx->s_mask = s_mask;
121
122
- if (pos == 0) {
123
- ctx->a_mask = s_mask & ~s_mask_old;
124
+ if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
125
+ return true;
126
}
127
128
return fold_masks(ctx, op);
129
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
130
}
131
132
/* Assume all bits affected, no bits known zero, no sign reps. */
133
- ctx.a_mask = -1;
134
ctx.z_mask = -1;
135
ctx.s_mask = 0;
136
130
--
137
--
131
2.34.1
138
2.43.0
132
133
diff view generated by jsdifflib
1
These constraints have not been used for quite some time.
1
Use of fold_masks should be restricted to those opcodes that
2
can reliably make use of it -- those with a single output,
3
and from higher-level folders that set up the masks.
4
Prepare for conversion of each folder in turn.
2
5
3
Fixes: 77b73de67632 ("Use rem/div[u]_i32 drop div[u]2_i32")
6
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
tcg/ppc/tcg-target-con-str.h | 4 ----
9
tcg/optimize.c | 17 ++++++++++++++---
10
1 file changed, 4 deletions(-)
10
1 file changed, 14 insertions(+), 3 deletions(-)
11
11
12
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/ppc/tcg-target-con-str.h
14
--- a/tcg/optimize.c
15
+++ b/tcg/ppc/tcg-target-con-str.h
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
17
*/
17
{
18
REGS('r', ALL_GENERAL_REGS)
18
uint64_t z_mask = ctx->z_mask;
19
REGS('v', ALL_VECTOR_REGS)
19
uint64_t s_mask = ctx->s_mask;
20
-REGS('A', 1u << TCG_REG_R3)
20
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
21
-REGS('B', 1u << TCG_REG_R4)
21
+ TCGTemp *ts;
22
-REGS('C', 1u << TCG_REG_R5)
22
+ TempOptInfo *ti;
23
-REGS('D', 1u << TCG_REG_R6)
23
+
24
+ /* Only single-output opcodes are supported here. */
25
+ tcg_debug_assert(def->nb_oargs == 1);
26
27
/*
28
* 32-bit ops generate 32-bit results, which for the purpose of
29
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
30
if (ctx->type == TCG_TYPE_I32) {
31
z_mask = (int32_t)z_mask;
32
s_mask |= MAKE_64BIT_MASK(32, 32);
33
- ctx->z_mask = z_mask;
34
- ctx->s_mask = s_mask;
35
}
36
37
if (z_mask == 0) {
38
return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
39
}
40
- return false;
41
+
42
+ ts = arg_temp(op->args[0]);
43
+ reset_ts(ctx, ts);
44
+
45
+ ti = ts_info(ts);
46
+ ti->z_mask = z_mask;
47
+ ti->s_mask = s_mask;
48
+ return true;
49
}
24
50
25
/*
51
/*
26
* Define constraint letters for constants:
27
--
52
--
28
2.34.1
53
2.43.0
29
30
diff view generated by jsdifflib
1
Like cpu_in_exclusive_context, but also true if
1
Add a routine to which masks can be passed directly, rather than
2
there is no other cpu against which we could race.
2
storing them into OptContext. To be used in upcoming patches.
3
3
4
Use it in tb_flush as a direct replacement.
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Use it in cpu_loop_exit_atomic to ensure that there
6
is no loop against cpu_exec_step_atomic.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
6
---
13
accel/tcg/internal.h | 9 +++++++++
7
tcg/optimize.c | 15 ++++++++++++---
14
accel/tcg/cpu-exec-common.c | 3 +++
8
1 file changed, 12 insertions(+), 3 deletions(-)
15
accel/tcg/tb-maint.c | 2 +-
16
3 files changed, 13 insertions(+), 1 deletion(-)
17
9
18
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
19
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
20
--- a/accel/tcg/internal.h
12
--- a/tcg/optimize.c
21
+++ b/accel/tcg/internal.h
13
+++ b/tcg/optimize.c
22
@@ -XXX,XX +XXX,XX @@ static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
23
}
15
return fold_const2(ctx, op);
24
}
16
}
25
17
18
-static bool fold_masks(OptContext *ctx, TCGOp *op)
26
+/*
19
+/*
27
+ * Return true if CS is not running in parallel with other cpus, either
20
+ * Record "zero" and "sign" masks for the single output of @op.
28
+ * because there are no other cpus or we are within an exclusive context.
21
+ * See TempOptInfo definition of z_mask and s_mask.
22
+ * If z_mask allows, fold the output to constant zero.
29
+ */
23
+ */
30
+static inline bool cpu_in_serial_context(CPUState *cs)
24
+static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
25
+ uint64_t z_mask, uint64_t s_mask)
26
{
27
- uint64_t z_mask = ctx->z_mask;
28
- uint64_t s_mask = ctx->s_mask;
29
const TCGOpDef *def = &tcg_op_defs[op->opc];
30
TCGTemp *ts;
31
TempOptInfo *ti;
32
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
33
return true;
34
}
35
36
+static bool fold_masks(OptContext *ctx, TCGOp *op)
31
+{
37
+{
32
+ return !(cs->tcg_cflags & CF_PARALLEL) || cpu_in_exclusive_context(cs);
38
+ return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
33
+}
39
+}
34
+
40
+
35
extern int64_t max_delay;
41
/*
36
extern int64_t max_advance;
42
* An "affected" mask bit is 0 if and only if the result is identical
37
43
* to the first input. Thus if the entire mask is 0, the operation
38
diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/accel/tcg/cpu-exec-common.c
41
+++ b/accel/tcg/cpu-exec-common.c
42
@@ -XXX,XX +XXX,XX @@
43
#include "sysemu/tcg.h"
44
#include "exec/exec-all.h"
45
#include "qemu/plugin.h"
46
+#include "internal.h"
47
48
bool tcg_allowed;
49
50
@@ -XXX,XX +XXX,XX @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
51
52
void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
53
{
54
+ /* Prevent looping if already executing in a serial context. */
55
+ g_assert(!cpu_in_serial_context(cpu));
56
cpu->exception_index = EXCP_ATOMIC;
57
cpu_loop_exit_restore(cpu, pc);
58
}
59
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/accel/tcg/tb-maint.c
62
+++ b/accel/tcg/tb-maint.c
63
@@ -XXX,XX +XXX,XX @@ void tb_flush(CPUState *cpu)
64
if (tcg_enabled()) {
65
unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
66
67
- if (cpu_in_exclusive_context(cpu)) {
68
+ if (cpu_in_serial_context(cpu)) {
69
do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
70
} else {
71
async_safe_run_on_cpu(cpu, do_tb_flush,
72
--
44
--
73
2.34.1
45
2.43.0
74
75
diff view generated by jsdifflib
1
While performing the load in the delay slot of the call to the common
1
Consider the passed s_mask to be a minimum deduced from
2
bswap helper function is cute, it is not worth the added complexity.
2
either existing s_mask or from a sign-extension operation.
3
We may be able to deduce more from the set of known zeros.
4
Remove identical logic from several opcode folders.
3
5
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
8
---
7
tcg/mips/tcg-target.h | 4 +-
9
tcg/optimize.c | 21 ++++++---------------
8
tcg/mips/tcg-target.c.inc | 284 ++++++--------------------------------
10
1 file changed, 6 insertions(+), 15 deletions(-)
9
2 files changed, 48 insertions(+), 240 deletions(-)
10
11
11
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/mips/tcg-target.h
14
--- a/tcg/optimize.c
14
+++ b/tcg/mips/tcg-target.h
15
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
16
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
16
#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */
17
* Record "zero" and "sign" masks for the single output of @op.
17
#endif
18
* See TempOptInfo definition of z_mask and s_mask.
18
19
* If z_mask allows, fold the output to constant zero.
19
-#define TCG_TARGET_DEFAULT_MO (0)
20
+ * The passed s_mask may be augmented by z_mask.
20
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
21
*/
21
+#define TCG_TARGET_DEFAULT_MO 0
22
static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
22
+#define TCG_TARGET_HAS_MEMORY_BSWAP 0
23
uint64_t z_mask, uint64_t s_mask)
23
24
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
24
#define TCG_TARGET_NEED_LDST_LABELS
25
25
26
ti = ts_info(ts);
26
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
27
ti->z_mask = z_mask;
27
index XXXXXXX..XXXXXXX 100644
28
- ti->s_mask = s_mask;
28
--- a/tcg/mips/tcg-target.c.inc
29
+ ti->s_mask = s_mask | smask_from_zmask(z_mask);
29
+++ b/tcg/mips/tcg-target.c.inc
30
return true;
30
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
31
}
31
}
32
32
33
#if defined(CONFIG_SOFTMMU)
33
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
34
-static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
35
+static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
36
[MO_UB] = helper_ret_ldub_mmu,
37
[MO_SB] = helper_ret_ldsb_mmu,
38
- [MO_LEUW] = helper_le_lduw_mmu,
39
- [MO_LESW] = helper_le_ldsw_mmu,
40
- [MO_LEUL] = helper_le_ldul_mmu,
41
- [MO_LEUQ] = helper_le_ldq_mmu,
42
- [MO_BEUW] = helper_be_lduw_mmu,
43
- [MO_BESW] = helper_be_ldsw_mmu,
44
- [MO_BEUL] = helper_be_ldul_mmu,
45
- [MO_BEUQ] = helper_be_ldq_mmu,
46
-#if TCG_TARGET_REG_BITS == 64
47
- [MO_LESL] = helper_le_ldsl_mmu,
48
- [MO_BESL] = helper_be_ldsl_mmu,
49
+#if HOST_BIG_ENDIAN
50
+ [MO_UW] = helper_be_lduw_mmu,
51
+ [MO_SW] = helper_be_ldsw_mmu,
52
+ [MO_UL] = helper_be_ldul_mmu,
53
+ [MO_SL] = helper_be_ldsl_mmu,
54
+ [MO_UQ] = helper_be_ldq_mmu,
55
+#else
56
+ [MO_UW] = helper_le_lduw_mmu,
57
+ [MO_SW] = helper_le_ldsw_mmu,
58
+ [MO_UL] = helper_le_ldul_mmu,
59
+ [MO_UQ] = helper_le_ldq_mmu,
60
+ [MO_SL] = helper_le_ldsl_mmu,
61
#endif
62
};
63
64
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
65
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
66
[MO_UB] = helper_ret_stb_mmu,
67
- [MO_LEUW] = helper_le_stw_mmu,
68
- [MO_LEUL] = helper_le_stl_mmu,
69
- [MO_LEUQ] = helper_le_stq_mmu,
70
- [MO_BEUW] = helper_be_stw_mmu,
71
- [MO_BEUL] = helper_be_stl_mmu,
72
- [MO_BEUQ] = helper_be_stq_mmu,
73
+#if HOST_BIG_ENDIAN
74
+ [MO_UW] = helper_be_stw_mmu,
75
+ [MO_UL] = helper_be_stl_mmu,
76
+ [MO_UQ] = helper_be_stq_mmu,
77
+#else
78
+ [MO_UW] = helper_le_stw_mmu,
79
+ [MO_UL] = helper_le_stl_mmu,
80
+ [MO_UQ] = helper_le_stq_mmu,
81
+#endif
82
};
83
84
/* We have four temps, we might as well expose three of them. */
85
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
86
87
tcg_out_ld_helper_args(s, l, &ldst_helper_param);
88
89
- tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false);
90
+ tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
91
/* delay slot */
92
tcg_out_nop(s);
93
94
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
95
96
tcg_out_st_helper_args(s, l, &ldst_helper_param);
97
98
- tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], false);
99
+ tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
100
/* delay slot */
101
tcg_out_nop(s);
102
103
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
104
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
105
TCGReg base, MemOp opc, TCGType type)
106
{
107
- switch (opc & (MO_SSIZE | MO_BSWAP)) {
108
+ switch (opc & MO_SSIZE) {
109
case MO_UB:
110
tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
111
break;
112
case MO_SB:
113
tcg_out_opc_imm(s, OPC_LB, lo, base, 0);
114
break;
115
- case MO_UW | MO_BSWAP:
116
- tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
117
- tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
118
- break;
119
case MO_UW:
120
tcg_out_opc_imm(s, OPC_LHU, lo, base, 0);
121
break;
122
- case MO_SW | MO_BSWAP:
123
- tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
124
- tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OS);
125
- break;
126
case MO_SW:
127
tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
128
break;
129
- case MO_UL | MO_BSWAP:
130
- if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
131
- if (use_mips32r2_instructions) {
132
- tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
133
- tcg_out_bswap32(s, lo, lo, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
134
- } else {
135
- tcg_out_bswap_subr(s, bswap32u_addr);
136
- /* delay slot */
137
- tcg_out_opc_imm(s, OPC_LWU, TCG_TMP0, base, 0);
138
- tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
139
- }
140
- break;
141
- }
142
- /* FALLTHRU */
143
- case MO_SL | MO_BSWAP:
144
- if (use_mips32r2_instructions) {
145
- tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
146
- tcg_out_bswap32(s, lo, lo, 0);
147
- } else {
148
- tcg_out_bswap_subr(s, bswap32_addr);
149
- /* delay slot */
150
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
151
- tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_TMP3);
152
- }
153
- break;
154
case MO_UL:
155
if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
156
tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
157
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
158
case MO_SL:
159
tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
160
break;
161
- case MO_UQ | MO_BSWAP:
162
- if (TCG_TARGET_REG_BITS == 64) {
163
- if (use_mips32r2_instructions) {
164
- tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
165
- tcg_out_bswap64(s, lo, lo);
166
- } else {
167
- tcg_out_bswap_subr(s, bswap64_addr);
168
- /* delay slot */
169
- tcg_out_opc_imm(s, OPC_LD, TCG_TMP0, base, 0);
170
- tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
171
- }
172
- } else if (use_mips32r2_instructions) {
173
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
174
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 4);
175
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0);
176
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1);
177
- tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16);
178
- tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16);
179
- } else {
180
- tcg_out_bswap_subr(s, bswap32_addr);
181
- /* delay slot */
182
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
183
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 4);
184
- tcg_out_bswap_subr(s, bswap32_addr);
185
- /* delay slot */
186
- tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3);
187
- tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3);
188
- }
189
- break;
190
case MO_UQ:
191
/* Prefer to load from offset 0 first, but allow for overlap. */
192
if (TCG_TARGET_REG_BITS == 64) {
193
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
194
const MIPSInsn lw2 = MIPS_BE ? OPC_LWR : OPC_LWL;
195
const MIPSInsn ld1 = MIPS_BE ? OPC_LDL : OPC_LDR;
196
const MIPSInsn ld2 = MIPS_BE ? OPC_LDR : OPC_LDL;
197
+ bool sgn = opc & MO_SIGN;
198
199
- bool sgn = (opc & MO_SIGN);
200
-
201
- switch (opc & (MO_SSIZE | MO_BSWAP)) {
202
- case MO_SW | MO_BE:
203
- case MO_UW | MO_BE:
204
- tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 0);
205
- tcg_out_opc_imm(s, OPC_LBU, lo, base, 1);
206
- if (use_mips32r2_instructions) {
207
- tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8);
208
- } else {
209
- tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
210
- tcg_out_opc_reg(s, OPC_OR, lo, TCG_TMP0, TCG_TMP1);
211
- }
212
- break;
213
-
214
- case MO_SW | MO_LE:
215
- case MO_UW | MO_LE:
216
- if (use_mips32r2_instructions && lo != base) {
217
+ switch (opc & MO_SIZE) {
218
+ case MO_16:
219
+ if (HOST_BIG_ENDIAN) {
220
+ tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 0);
221
+ tcg_out_opc_imm(s, OPC_LBU, lo, base, 1);
222
+ if (use_mips32r2_instructions) {
223
+ tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8);
224
+ } else {
225
+ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
226
+ tcg_out_opc_reg(s, OPC_OR, lo, lo, TCG_TMP0);
227
+ }
228
+ } else if (use_mips32r2_instructions && lo != base) {
229
tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
230
tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 1);
231
tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8);
232
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
233
}
234
break;
235
236
- case MO_SL:
237
- case MO_UL:
238
+ case MO_32:
239
tcg_out_opc_imm(s, lw1, lo, base, 0);
240
tcg_out_opc_imm(s, lw2, lo, base, 3);
241
if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 && !sgn) {
242
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
243
}
244
break;
245
246
- case MO_UL | MO_BSWAP:
247
- case MO_SL | MO_BSWAP:
248
- if (use_mips32r2_instructions) {
249
- tcg_out_opc_imm(s, lw1, lo, base, 0);
250
- tcg_out_opc_imm(s, lw2, lo, base, 3);
251
- tcg_out_bswap32(s, lo, lo,
252
- TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64
253
- ? (sgn ? TCG_BSWAP_OS : TCG_BSWAP_OZ) : 0);
254
- } else {
255
- const tcg_insn_unit *subr =
256
- (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 && !sgn
257
- ? bswap32u_addr : bswap32_addr);
258
-
259
- tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0);
260
- tcg_out_bswap_subr(s, subr);
261
- /* delay slot */
262
- tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 3);
263
- tcg_out_mov(s, type, lo, TCG_TMP3);
264
- }
265
- break;
266
-
267
- case MO_UQ:
268
+ case MO_64:
269
if (TCG_TARGET_REG_BITS == 64) {
270
tcg_out_opc_imm(s, ld1, lo, base, 0);
271
tcg_out_opc_imm(s, ld2, lo, base, 7);
272
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
273
}
274
break;
275
276
- case MO_UQ | MO_BSWAP:
277
- if (TCG_TARGET_REG_BITS == 64) {
278
- if (use_mips32r2_instructions) {
279
- tcg_out_opc_imm(s, ld1, lo, base, 0);
280
- tcg_out_opc_imm(s, ld2, lo, base, 7);
281
- tcg_out_bswap64(s, lo, lo);
282
- } else {
283
- tcg_out_opc_imm(s, ld1, TCG_TMP0, base, 0);
284
- tcg_out_bswap_subr(s, bswap64_addr);
285
- /* delay slot */
286
- tcg_out_opc_imm(s, ld2, TCG_TMP0, base, 7);
287
- tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
288
- }
289
- } else if (use_mips32r2_instructions) {
290
- tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0);
291
- tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3);
292
- tcg_out_opc_imm(s, lw1, TCG_TMP1, base, 4 + 0);
293
- tcg_out_opc_imm(s, lw2, TCG_TMP1, base, 4 + 3);
294
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0);
295
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1);
296
- tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16);
297
- tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16);
298
- } else {
299
- tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0);
300
- tcg_out_bswap_subr(s, bswap32_addr);
301
- /* delay slot */
302
- tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3);
303
- tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 4 + 0);
304
- tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3);
305
- tcg_out_bswap_subr(s, bswap32_addr);
306
- /* delay slot */
307
- tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 4 + 3);
308
- tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3);
309
- }
310
- break;
311
-
312
default:
34
default:
313
g_assert_not_reached();
35
g_assert_not_reached();
314
}
36
}
315
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
37
- s_mask = smask_from_zmask(z_mask);
316
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
38
317
TCGReg base, MemOp opc)
39
+ s_mask = 0;
318
{
40
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
319
- /* Don't clutter the code below with checks to avoid bswapping ZERO. */
41
case TCG_BSWAP_OZ:
320
- if ((lo | hi) == 0) {
321
- opc &= ~MO_BSWAP;
322
- }
323
-
324
- switch (opc & (MO_SIZE | MO_BSWAP)) {
325
+ switch (opc & MO_SIZE) {
326
case MO_8:
327
tcg_out_opc_imm(s, OPC_SB, lo, base, 0);
328
break;
42
break;
329
-
43
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
330
- case MO_16 | MO_BSWAP:
44
default:
331
- tcg_out_bswap16(s, TCG_TMP1, lo, 0);
45
/* The high bits are undefined: force all bits above the sign to 1. */
332
- lo = TCG_TMP1;
46
z_mask |= sign << 1;
333
- /* FALLTHRU */
47
- s_mask = 0;
334
case MO_16:
335
tcg_out_opc_imm(s, OPC_SH, lo, base, 0);
336
break;
48
break;
337
-
49
}
338
- case MO_32 | MO_BSWAP:
50
ctx->z_mask = z_mask;
339
- tcg_out_bswap32(s, TCG_TMP3, lo, 0);
51
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
340
- lo = TCG_TMP3;
52
g_assert_not_reached();
341
- /* FALLTHRU */
53
}
342
case MO_32:
54
ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
343
tcg_out_opc_imm(s, OPC_SW, lo, base, 0);
55
- ctx->s_mask = smask_from_zmask(ctx->z_mask);
344
break;
56
return false;
345
-
57
}
346
- case MO_64 | MO_BSWAP:
58
347
- if (TCG_TARGET_REG_BITS == 64) {
59
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
348
- tcg_out_bswap64(s, TCG_TMP3, lo);
349
- tcg_out_opc_imm(s, OPC_SD, TCG_TMP3, base, 0);
350
- } else if (use_mips32r2_instructions) {
351
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? lo : hi);
352
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? hi : lo);
353
- tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16);
354
- tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16);
355
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP0, base, 0);
356
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, 4);
357
- } else {
358
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0);
359
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 0);
360
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0);
361
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 4);
362
- }
363
- break;
364
case MO_64:
365
if (TCG_TARGET_REG_BITS == 64) {
366
tcg_out_opc_imm(s, OPC_SD, lo, base, 0);
367
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
368
tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? lo : hi, base, 4);
369
}
370
break;
371
-
372
default:
60
default:
373
g_assert_not_reached();
61
g_assert_not_reached();
374
}
62
}
375
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
63
- ctx->s_mask = smask_from_zmask(ctx->z_mask);
376
const MIPSInsn sd1 = MIPS_BE ? OPC_SDL : OPC_SDR;
64
return false;
377
const MIPSInsn sd2 = MIPS_BE ? OPC_SDR : OPC_SDL;
65
}
378
66
379
- /* Don't clutter the code below with checks to avoid bswapping ZERO. */
67
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
380
- if ((lo | hi) == 0) {
68
return true;
381
- opc &= ~MO_BSWAP;
69
}
382
- }
70
ctx->z_mask = z_mask;
383
-
71
- ctx->s_mask = smask_from_zmask(z_mask);
384
- switch (opc & (MO_SIZE | MO_BSWAP)) {
72
385
- case MO_16 | MO_BE:
73
return fold_masks(ctx, op);
386
+ switch (opc & MO_SIZE) {
74
}
387
+ case MO_16:
75
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
388
tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8);
76
}
389
- tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 0);
77
390
- tcg_out_opc_imm(s, OPC_SB, lo, base, 1);
78
ctx->z_mask = z_mask;
391
+ tcg_out_opc_imm(s, OPC_SB, HOST_BIG_ENDIAN ? TCG_TMP0 : lo, base, 0);
79
- ctx->s_mask = smask_from_zmask(z_mask);
392
+ tcg_out_opc_imm(s, OPC_SB, HOST_BIG_ENDIAN ? lo : TCG_TMP0, base, 1);
80
if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
81
return true;
82
}
83
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
84
int width = 8 * memop_size(mop);
85
86
if (width < 64) {
87
- ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
88
- if (!(mop & MO_SIGN)) {
89
+ if (mop & MO_SIGN) {
90
+ ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
91
+ } else {
92
ctx->z_mask = MAKE_64BIT_MASK(0, width);
93
- ctx->s_mask <<= 1;
94
}
95
}
96
97
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
98
fold_setcond_tst_pow2(ctx, op, false);
99
100
ctx->z_mask = 1;
101
- ctx->s_mask = smask_from_zmask(1);
102
return false;
103
}
104
105
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
106
}
107
108
ctx->z_mask = 1;
109
- ctx->s_mask = smask_from_zmask(1);
110
return false;
111
112
do_setcond_const:
113
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
393
break;
114
break;
394
115
CASE_OP_32_64(ld8u):
395
- case MO_16 | MO_LE:
116
ctx->z_mask = MAKE_64BIT_MASK(0, 8);
396
- tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8);
117
- ctx->s_mask = MAKE_64BIT_MASK(9, 55);
397
- tcg_out_opc_imm(s, OPC_SB, lo, base, 0);
398
- tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 1);
399
- break;
400
-
401
- case MO_32 | MO_BSWAP:
402
- tcg_out_bswap32(s, TCG_TMP3, lo, 0);
403
- lo = TCG_TMP3;
404
- /* fall through */
405
case MO_32:
406
tcg_out_opc_imm(s, sw1, lo, base, 0);
407
tcg_out_opc_imm(s, sw2, lo, base, 3);
408
break;
118
break;
409
119
CASE_OP_32_64(ld16s):
410
- case MO_64 | MO_BSWAP:
120
ctx->s_mask = MAKE_64BIT_MASK(16, 48);
411
- if (TCG_TARGET_REG_BITS == 64) {
121
break;
412
- tcg_out_bswap64(s, TCG_TMP3, lo);
122
CASE_OP_32_64(ld16u):
413
- lo = TCG_TMP3;
123
ctx->z_mask = MAKE_64BIT_MASK(0, 16);
414
- } else if (use_mips32r2_instructions) {
124
- ctx->s_mask = MAKE_64BIT_MASK(17, 47);
415
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? hi : lo);
125
break;
416
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? lo : hi);
126
case INDEX_op_ld32s_i64:
417
- tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16);
127
ctx->s_mask = MAKE_64BIT_MASK(32, 32);
418
- tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16);
128
break;
419
- hi = MIPS_BE ? TCG_TMP0 : TCG_TMP1;
129
case INDEX_op_ld32u_i64:
420
- lo = MIPS_BE ? TCG_TMP1 : TCG_TMP0;
130
ctx->z_mask = MAKE_64BIT_MASK(0, 32);
421
- } else {
131
- ctx->s_mask = MAKE_64BIT_MASK(33, 31);
422
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0);
132
break;
423
- tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 0 + 0);
133
default:
424
- tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 0 + 3);
134
g_assert_not_reached();
425
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0);
426
- tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 4 + 0);
427
- tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 4 + 3);
428
- break;
429
- }
430
- /* fall through */
431
case MO_64:
432
if (TCG_TARGET_REG_BITS == 64) {
433
tcg_out_opc_imm(s, sd1, lo, base, 0);
434
--
135
--
435
2.34.1
136
2.43.0
436
437
diff view generated by jsdifflib
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
1
Change the representation from sign bit repetitions to all bits equal
2
and tcg_out_st_helper_args. This allows our local
2
to the sign bit, including the sign bit itself.
3
tcg_out_arg_* infrastructure to be removed.
4
3
5
We are no longer filling the call or return branch
4
The previous format has a problem in that it is difficult to recreate
6
delay slots, nor are we tail-calling for the store,
5
a valid sign mask after a shift operation: the "repetitions" part of
7
but this seems a small price to pay.
6
the previous format meant that applying the same shift as for the value
7
lead to an off-by-one value.
8
8
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
The new format, including the sign bit itself, means that the sign mask
10
can be manipulated in exactly the same way as the value, canonicalization
11
is easier.
12
13
Canonicalize the s_mask in fold_masks_zs, rather than requiring callers
14
to do so. Treat 0 as a non-canonical but typeless input for no sign
15
information, which will be reset as appropriate for the data type.
16
We can easily fold in the data from z_mask while canonicalizing.
17
18
Temporarily disable optimizations using s_mask while each operation is
19
converted to use fold_masks_zs and to the new form.
20
21
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
22
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
23
---
12
tcg/mips/tcg-target.c.inc | 154 ++++++--------------------------------
24
tcg/optimize.c | 64 ++++++++++++--------------------------------------
13
1 file changed, 22 insertions(+), 132 deletions(-)
25
1 file changed, 15 insertions(+), 49 deletions(-)
14
26
15
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
27
diff --git a/tcg/optimize.c b/tcg/optimize.c
16
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/mips/tcg-target.c.inc
29
--- a/tcg/optimize.c
18
+++ b/tcg/mips/tcg-target.c.inc
30
+++ b/tcg/optimize.c
19
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
31
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
20
[MO_BEUQ] = helper_be_stq_mmu,
32
QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
21
};
33
uint64_t val;
22
34
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
23
-/* Helper routines for marshalling helper function arguments into
35
- uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
24
- * the correct registers and stack.
36
+ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
25
- * I is where we want to put this argument, and is updated and returned
37
} TempOptInfo;
26
- * for the next call. ARG is the argument itself.
38
27
- *
39
typedef struct OptContext {
28
- * We provide routines for arguments which are: immediate, 32 bit
40
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
29
- * value in register, 16 and 8 bit values in register (which must be zero
41
30
- * extended before use) and 64 bit value in a lo:hi register pair.
42
/* In flight values from optimization. */
31
- */
43
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
32
-
44
- uint64_t s_mask; /* mask of clrsb(value) bits */
33
-static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg)
45
+ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
46
TCGType type;
47
} OptContext;
48
49
-/* Calculate the smask for a specific value. */
50
-static uint64_t smask_from_value(uint64_t value)
34
-{
51
-{
35
- if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
52
- int rep = clrsb64(value);
36
- tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg);
53
- return ~(~0ull >> rep);
37
- } else {
38
- /* For N32 and N64, the initial offset is different. But there
39
- we also have 8 argument register so we don't run out here. */
40
- tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
41
- tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i);
42
- }
43
- return i + 1;
44
-}
54
-}
45
-
55
-
46
-static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg)
56
-/*
57
- * Calculate the smask for a given set of known-zeros.
58
- * If there are lots of zeros on the left, we can consider the remainder
59
- * an unsigned field, and thus the corresponding signed field is one bit
60
- * larger.
61
- */
62
-static uint64_t smask_from_zmask(uint64_t zmask)
47
-{
63
-{
48
- TCGReg tmp = TCG_TMP0;
64
- /*
49
- if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
65
- * Only the 0 bits are significant for zmask, thus the msb itself
50
- tmp = tcg_target_call_iarg_regs[i];
66
- * must be zero, else we have no sign information.
67
- */
68
- int rep = clz64(zmask);
69
- if (rep == 0) {
70
- return 0;
51
- }
71
- }
52
- tcg_out_ext8u(s, tmp, arg);
72
- rep -= 1;
53
- return tcg_out_call_iarg_reg(s, i, tmp);
73
- return ~(~0ull >> rep);
54
-}
74
-}
55
-
75
-
56
-static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg)
76
-/*
77
- * Recreate a properly left-aligned smask after manipulation.
78
- * Some bit-shuffling, particularly shifts and rotates, may
79
- * retain sign bits on the left, but may scatter disconnected
80
- * sign bits on the right. Retain only what remains to the left.
81
- */
82
-static uint64_t smask_from_smask(int64_t smask)
57
-{
83
-{
58
- TCGReg tmp = TCG_TMP0;
84
- /* Only the 1 bits are significant for smask */
59
- if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
85
- return smask_from_zmask(~smask);
60
- tmp = tcg_target_call_iarg_regs[i];
61
- }
62
- tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff);
63
- return tcg_out_call_iarg_reg(s, i, tmp);
64
-}
86
-}
65
-
87
-
66
-static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg)
88
static inline TempOptInfo *ts_info(TCGTemp *ts)
67
-{
68
- TCGReg tmp = TCG_TMP0;
69
- if (arg == 0) {
70
- tmp = TCG_REG_ZERO;
71
- } else {
72
- if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
73
- tmp = tcg_target_call_iarg_regs[i];
74
- }
75
- tcg_out_movi(s, TCG_TYPE_REG, tmp, arg);
76
- }
77
- return tcg_out_call_iarg_reg(s, i, tmp);
78
-}
79
-
80
-static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
81
-{
82
- tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
83
- i = (i + 1) & ~1;
84
- i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al));
85
- i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah));
86
- return i;
87
-}
88
+/* We have four temps, we might as well expose three of them. */
89
+static const TCGLdstHelperParam ldst_helper_param = {
90
+ .ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 }
91
+};
92
93
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
94
{
89
{
95
const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
90
return ts->state_ptr;
96
- MemOpIdx oi = l->oi;
91
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
97
- MemOp opc = get_memop(oi);
92
ti->is_const = true;
98
- TCGReg v0;
93
ti->val = ts->val;
99
- int i;
94
ti->z_mask = ts->val;
100
+ MemOp opc = get_memop(l->oi);
95
- ti->s_mask = smask_from_value(ts->val);
101
96
+ ti->s_mask = INT64_MIN >> clrsb64(ts->val);
102
/* resolve label address */
97
} else {
103
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
98
ti->is_const = false;
104
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
99
ti->z_mask = -1;
105
return false;
100
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
101
*/
102
if (i == 0) {
103
ts_info(ts)->z_mask = ctx->z_mask;
104
- ts_info(ts)->s_mask = ctx->s_mask;
105
}
106
}
106
}
107
107
}
108
- i = 1;
108
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
109
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
109
* The passed s_mask may be augmented by z_mask.
110
- i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
110
*/
111
- } else {
111
static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
112
- i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
112
- uint64_t z_mask, uint64_t s_mask)
113
- }
113
+ uint64_t z_mask, int64_t s_mask)
114
- i = tcg_out_call_iarg_imm(s, i, oi);
114
{
115
- i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr);
115
const TCGOpDef *def = &tcg_op_defs[op->opc];
116
+ tcg_out_ld_helper_args(s, l, &ldst_helper_param);
116
TCGTemp *ts;
117
TempOptInfo *ti;
118
+ int rep;
119
120
/* Only single-output opcodes are supported here. */
121
tcg_debug_assert(def->nb_oargs == 1);
122
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
123
*/
124
if (ctx->type == TCG_TYPE_I32) {
125
z_mask = (int32_t)z_mask;
126
- s_mask |= MAKE_64BIT_MASK(32, 32);
127
+ s_mask |= INT32_MIN;
128
}
129
130
if (z_mask == 0) {
131
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
132
133
ti = ts_info(ts);
134
ti->z_mask = z_mask;
135
- ti->s_mask = s_mask | smask_from_zmask(z_mask);
117
+
136
+
118
tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false);
137
+ /* Canonicalize s_mask and incorporate data from z_mask. */
119
/* delay slot */
138
+ rep = clz64(~s_mask);
120
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
139
+ rep = MAX(rep, clz64(z_mask));
121
+ tcg_out_nop(s);
140
+ rep = MAX(rep - 1, 0);
122
141
+ ti->s_mask = INT64_MIN >> rep;
123
- v0 = l->datalo_reg;
142
+
124
- if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
125
- /* We eliminated V0 from the possible output registers, so it
126
- cannot be clobbered here. So we must move V1 first. */
127
- if (MIPS_BE) {
128
- tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1);
129
- v0 = l->datahi_reg;
130
- } else {
131
- tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1);
132
- }
133
- }
134
+ tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param);
135
136
tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
137
if (!reloc_pc16(s->code_ptr - 1, l->raddr)) {
138
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
139
}
140
141
/* delay slot */
142
- if (TCG_TARGET_REG_BITS == 64 && l->type == TCG_TYPE_I32) {
143
- /* we always sign-extend 32-bit loads */
144
- tcg_out_ext32s(s, v0, TCG_REG_V0);
145
- } else {
146
- tcg_out_opc_reg(s, OPC_OR, v0, TCG_REG_V0, TCG_REG_ZERO);
147
- }
148
+ tcg_out_nop(s);
149
return true;
143
return true;
150
}
144
}
151
145
152
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
146
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
153
{
147
154
const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
148
ctx->z_mask = z_mask;
155
- MemOpIdx oi = l->oi;
149
ctx->s_mask = s_mask;
156
- MemOp opc = get_memop(oi);
150
- if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
157
- MemOp s_bits = opc & MO_SIZE;
151
+ if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
158
- int i;
152
return true;
159
+ MemOp opc = get_memop(l->oi);
160
161
/* resolve label address */
162
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
163
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
164
return false;
165
}
153
}
166
154
167
- i = 1;
155
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
168
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
156
s_mask |= MAKE_64BIT_MASK(len, 64 - len);
169
- i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
157
ctx->s_mask = s_mask;
170
- } else {
158
171
- i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
159
- if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
172
- }
160
+ if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
173
- switch (s_bits) {
161
return true;
174
- case MO_8:
162
}
175
- i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg);
163
176
- break;
164
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
177
- case MO_16:
165
ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
178
- i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg);
166
179
- break;
167
s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
180
- case MO_32:
168
- ctx->s_mask = smask_from_smask(s_mask);
181
- i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
169
182
- break;
170
return fold_masks(ctx, op);
183
- case MO_64:
171
}
184
- if (TCG_TARGET_REG_BITS == 32) {
185
- i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg);
186
- } else {
187
- i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
188
- }
189
- break;
190
- default:
191
- g_assert_not_reached();
192
- }
193
- i = tcg_out_call_iarg_imm(s, i, oi);
194
+ tcg_out_st_helper_args(s, l, &ldst_helper_param);
195
196
- /* Tail call to the store helper. Thus force the return address
197
- computation to take place in the return address register. */
198
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr);
199
- i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA);
200
- tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true);
201
+ tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], false);
202
/* delay slot */
203
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
204
+ tcg_out_nop(s);
205
+
206
+ tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
207
+ if (!reloc_pc16(s->code_ptr - 1, l->raddr)) {
208
+ return false;
209
+ }
210
+
211
+ /* delay slot */
212
+ tcg_out_nop(s);
213
return true;
214
}
215
216
--
172
--
217
2.34.1
173
2.43.0
218
219
diff view generated by jsdifflib
1
Instead of playing with offsetof in various places, use
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
MMUAccessType to index an array. This is easily defined
3
instead of the previous dummy padding array in the union.
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
3
---
10
include/exec/cpu-defs.h | 7 ++-
4
tcg/optimize.c | 9 +++++----
11
include/exec/cpu_ldst.h | 26 ++++++++--
5
1 file changed, 5 insertions(+), 4 deletions(-)
12
accel/tcg/cputlb.c | 104 +++++++++++++---------------------------
13
3 files changed, 59 insertions(+), 78 deletions(-)
14
6
15
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
16
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
17
--- a/include/exec/cpu-defs.h
9
--- a/tcg/optimize.c
18
+++ b/include/exec/cpu-defs.h
10
+++ b/tcg/optimize.c
19
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntry {
11
@@ -XXX,XX +XXX,XX @@ static void finish_ebb(OptContext *ctx)
20
use the corresponding iotlb value. */
12
remove_mem_copy_all(ctx);
21
uintptr_t addend;
13
}
22
};
14
23
- /* padding to get a power of two size */
15
-static void finish_folding(OptContext *ctx, TCGOp *op)
24
- uint8_t dummy[1 << CPU_TLB_ENTRY_BITS];
16
+static bool finish_folding(OptContext *ctx, TCGOp *op)
25
+ /*
26
+ * Padding to get a power of two size, as well as index
27
+ * access to addr_{read,write,code}.
28
+ */
29
+ target_ulong addr_idx[(1 << CPU_TLB_ENTRY_BITS) / TARGET_LONG_SIZE];
30
};
31
} CPUTLBEntry;
32
33
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/include/exec/cpu_ldst.h
36
+++ b/include/exec/cpu_ldst.h
37
@@ -XXX,XX +XXX,XX @@ static inline void clear_helper_retaddr(void)
38
/* Needed for TCG_OVERSIZED_GUEST */
39
#include "tcg/tcg.h"
40
41
+static inline target_ulong tlb_read_idx(const CPUTLBEntry *entry,
42
+ MMUAccessType access_type)
43
+{
44
+ /* Do not rearrange the CPUTLBEntry structure members. */
45
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) !=
46
+ MMU_DATA_LOAD * TARGET_LONG_SIZE);
47
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_write) !=
48
+ MMU_DATA_STORE * TARGET_LONG_SIZE);
49
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_code) !=
50
+ MMU_INST_FETCH * TARGET_LONG_SIZE);
51
+
52
+ const target_ulong *ptr = &entry->addr_idx[access_type];
53
+#if TCG_OVERSIZED_GUEST
54
+ return *ptr;
55
+#else
56
+ /* ofs might correspond to .addr_write, so use qatomic_read */
57
+ return qatomic_read(ptr);
58
+#endif
59
+}
60
+
61
static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry)
62
{
17
{
63
-#if TCG_OVERSIZED_GUEST
18
const TCGOpDef *def = &tcg_op_defs[op->opc];
64
- return entry->addr_write;
19
int i, nb_oargs;
65
-#else
20
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
66
- return qatomic_read(&entry->addr_write);
21
ts_info(ts)->z_mask = ctx->z_mask;
67
-#endif
22
}
68
+ return tlb_read_idx(entry, MMU_DATA_STORE);
23
}
24
+ return true;
69
}
25
}
70
26
71
/* Find the TLB index corresponding to the mmu_idx + address pair. */
27
/*
72
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
28
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
73
index XXXXXXX..XXXXXXX 100644
29
fold_xi_to_x(ctx, op, 0)) {
74
--- a/accel/tcg/cputlb.c
30
return true;
75
+++ b/accel/tcg/cputlb.c
76
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
77
}
31
}
32
- return false;
33
+ return finish_folding(ctx, op);
78
}
34
}
79
35
80
-static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
36
/* We cannot as yet do_constant_folding with vectors. */
81
-{
37
@@ -XXX,XX +XXX,XX @@ static bool fold_add_vec(OptContext *ctx, TCGOp *op)
82
-#if TCG_OVERSIZED_GUEST
38
fold_xi_to_x(ctx, op, 0)) {
83
- return *(target_ulong *)((uintptr_t)entry + ofs);
39
return true;
84
-#else
40
}
85
- /* ofs might correspond to .addr_write, so use qatomic_read */
41
- return false;
86
- return qatomic_read((target_ulong *)((uintptr_t)entry + ofs));
42
+ return finish_folding(ctx, op);
87
-#endif
88
-}
89
-
90
/* Return true if ADDR is present in the victim tlb, and has been copied
91
back to the main tlb. */
92
static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
93
- size_t elt_ofs, target_ulong page)
94
+ MMUAccessType access_type, target_ulong page)
95
{
96
size_t vidx;
97
98
assert_cpu_is_self(env_cpu(env));
99
for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
100
CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
101
- target_ulong cmp;
102
-
103
- /* elt_ofs might correspond to .addr_write, so use qatomic_read */
104
-#if TCG_OVERSIZED_GUEST
105
- cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
106
-#else
107
- cmp = qatomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
108
-#endif
109
+ target_ulong cmp = tlb_read_idx(vtlb, access_type);
110
111
if (cmp == page) {
112
/* Found entry in victim tlb, swap tlb and iotlb. */
113
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
114
return false;
115
}
43
}
116
44
117
-/* Macro to call the above, with local variables from the use context. */
45
static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
118
-#define VICTIM_TLB_HIT(TY, ADDR) \
46
@@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
119
- victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
47
op->args[4] = arg_new_constant(ctx, bl);
120
- (ADDR) & TARGET_PAGE_MASK)
48
op->args[5] = arg_new_constant(ctx, bh);
121
-
122
static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
123
CPUTLBEntryFull *full, uintptr_t retaddr)
124
{
125
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
126
{
127
uintptr_t index = tlb_index(env, mmu_idx, addr);
128
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
129
- target_ulong tlb_addr, page_addr;
130
- size_t elt_ofs;
131
- int flags;
132
+ target_ulong tlb_addr = tlb_read_idx(entry, access_type);
133
+ target_ulong page_addr = addr & TARGET_PAGE_MASK;
134
+ int flags = TLB_FLAGS_MASK;
135
136
- switch (access_type) {
137
- case MMU_DATA_LOAD:
138
- elt_ofs = offsetof(CPUTLBEntry, addr_read);
139
- break;
140
- case MMU_DATA_STORE:
141
- elt_ofs = offsetof(CPUTLBEntry, addr_write);
142
- break;
143
- case MMU_INST_FETCH:
144
- elt_ofs = offsetof(CPUTLBEntry, addr_code);
145
- break;
146
- default:
147
- g_assert_not_reached();
148
- }
149
- tlb_addr = tlb_read_ofs(entry, elt_ofs);
150
-
151
- flags = TLB_FLAGS_MASK;
152
- page_addr = addr & TARGET_PAGE_MASK;
153
if (!tlb_hit_page(tlb_addr, page_addr)) {
154
- if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
155
+ if (!victim_tlb_hit(env, mmu_idx, index, access_type, page_addr)) {
156
CPUState *cs = env_cpu(env);
157
158
if (!cs->cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
159
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
160
*/
161
flags &= ~TLB_INVALID_MASK;
162
}
163
- tlb_addr = tlb_read_ofs(entry, elt_ofs);
164
+ tlb_addr = tlb_read_idx(entry, access_type);
165
}
49
}
166
flags &= tlb_addr;
50
- return false;
167
51
+ return finish_folding(ctx, op);
168
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
169
if (prot & PAGE_WRITE) {
170
tlb_addr = tlb_addr_write(tlbe);
171
if (!tlb_hit(tlb_addr, addr)) {
172
- if (!VICTIM_TLB_HIT(addr_write, addr)) {
173
+ if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
174
+ addr & TARGET_PAGE_MASK)) {
175
tlb_fill(env_cpu(env), addr, size,
176
MMU_DATA_STORE, mmu_idx, retaddr);
177
index = tlb_index(env, mmu_idx, addr);
178
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
179
} else /* if (prot & PAGE_READ) */ {
180
tlb_addr = tlbe->addr_read;
181
if (!tlb_hit(tlb_addr, addr)) {
182
- if (!VICTIM_TLB_HIT(addr_read, addr)) {
183
+ if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_LOAD,
184
+ addr & TARGET_PAGE_MASK)) {
185
tlb_fill(env_cpu(env), addr, size,
186
MMU_DATA_LOAD, mmu_idx, retaddr);
187
index = tlb_index(env, mmu_idx, addr);
188
@@ -XXX,XX +XXX,XX @@ load_memop(const void *haddr, MemOp op)
189
190
static inline uint64_t QEMU_ALWAYS_INLINE
191
load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
192
- uintptr_t retaddr, MemOp op, bool code_read,
193
+ uintptr_t retaddr, MemOp op, MMUAccessType access_type,
194
FullLoadHelper *full_load)
195
{
196
- const size_t tlb_off = code_read ?
197
- offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
198
- const MMUAccessType access_type =
199
- code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
200
const unsigned a_bits = get_alignment_bits(get_memop(oi));
201
const size_t size = memop_size(op);
202
uintptr_t mmu_idx = get_mmuidx(oi);
203
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
204
205
index = tlb_index(env, mmu_idx, addr);
206
entry = tlb_entry(env, mmu_idx, addr);
207
- tlb_addr = code_read ? entry->addr_code : entry->addr_read;
208
+ tlb_addr = tlb_read_idx(entry, access_type);
209
210
/* If the TLB entry is for a different page, reload and try again. */
211
if (!tlb_hit(tlb_addr, addr)) {
212
- if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
213
+ if (!victim_tlb_hit(env, mmu_idx, index, access_type,
214
addr & TARGET_PAGE_MASK)) {
215
tlb_fill(env_cpu(env), addr, size,
216
access_type, mmu_idx, retaddr);
217
index = tlb_index(env, mmu_idx, addr);
218
entry = tlb_entry(env, mmu_idx, addr);
219
}
220
- tlb_addr = code_read ? entry->addr_code : entry->addr_read;
221
+ tlb_addr = tlb_read_idx(entry, access_type);
222
tlb_addr &= ~TLB_INVALID_MASK;
223
}
224
225
@@ -XXX,XX +XXX,XX @@ static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
226
MemOpIdx oi, uintptr_t retaddr)
227
{
228
validate_memop(oi, MO_UB);
229
- return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
230
+ return load_helper(env, addr, oi, retaddr, MO_UB, MMU_DATA_LOAD,
231
+ full_ldub_mmu);
232
}
52
}
233
53
234
tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
54
static bool fold_add2(OptContext *ctx, TCGOp *op)
235
@@ -XXX,XX +XXX,XX @@ static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
236
MemOpIdx oi, uintptr_t retaddr)
237
{
238
validate_memop(oi, MO_LEUW);
239
- return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
240
+ return load_helper(env, addr, oi, retaddr, MO_LEUW, MMU_DATA_LOAD,
241
full_le_lduw_mmu);
242
}
243
244
@@ -XXX,XX +XXX,XX @@ static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
245
MemOpIdx oi, uintptr_t retaddr)
246
{
247
validate_memop(oi, MO_BEUW);
248
- return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
249
+ return load_helper(env, addr, oi, retaddr, MO_BEUW, MMU_DATA_LOAD,
250
full_be_lduw_mmu);
251
}
252
253
@@ -XXX,XX +XXX,XX @@ static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
254
MemOpIdx oi, uintptr_t retaddr)
255
{
256
validate_memop(oi, MO_LEUL);
257
- return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
258
+ return load_helper(env, addr, oi, retaddr, MO_LEUL, MMU_DATA_LOAD,
259
full_le_ldul_mmu);
260
}
261
262
@@ -XXX,XX +XXX,XX @@ static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
263
MemOpIdx oi, uintptr_t retaddr)
264
{
265
validate_memop(oi, MO_BEUL);
266
- return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
267
+ return load_helper(env, addr, oi, retaddr, MO_BEUL, MMU_DATA_LOAD,
268
full_be_ldul_mmu);
269
}
270
271
@@ -XXX,XX +XXX,XX @@ uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
272
MemOpIdx oi, uintptr_t retaddr)
273
{
274
validate_memop(oi, MO_LEUQ);
275
- return load_helper(env, addr, oi, retaddr, MO_LEUQ, false,
276
+ return load_helper(env, addr, oi, retaddr, MO_LEUQ, MMU_DATA_LOAD,
277
helper_le_ldq_mmu);
278
}
279
280
@@ -XXX,XX +XXX,XX @@ uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
281
MemOpIdx oi, uintptr_t retaddr)
282
{
283
validate_memop(oi, MO_BEUQ);
284
- return load_helper(env, addr, oi, retaddr, MO_BEUQ, false,
285
+ return load_helper(env, addr, oi, retaddr, MO_BEUQ, MMU_DATA_LOAD,
286
helper_be_ldq_mmu);
287
}
288
289
@@ -XXX,XX +XXX,XX @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
290
uintptr_t retaddr, size_t size, uintptr_t mmu_idx,
291
bool big_endian)
292
{
293
- const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
294
uintptr_t index, index2;
295
CPUTLBEntry *entry, *entry2;
296
target_ulong page1, page2, tlb_addr, tlb_addr2;
297
@@ -XXX,XX +XXX,XX @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
298
299
tlb_addr2 = tlb_addr_write(entry2);
300
if (page1 != page2 && !tlb_hit_page(tlb_addr2, page2)) {
301
- if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
302
+ if (!victim_tlb_hit(env, mmu_idx, index2, MMU_DATA_STORE, page2)) {
303
tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
304
mmu_idx, retaddr);
305
index2 = tlb_index(env, mmu_idx, page2);
306
@@ -XXX,XX +XXX,XX @@ static inline void QEMU_ALWAYS_INLINE
307
store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
308
MemOpIdx oi, uintptr_t retaddr, MemOp op)
309
{
310
- const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
311
const unsigned a_bits = get_alignment_bits(get_memop(oi));
312
const size_t size = memop_size(op);
313
uintptr_t mmu_idx = get_mmuidx(oi);
314
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
315
316
/* If the TLB entry is for a different page, reload and try again. */
317
if (!tlb_hit(tlb_addr, addr)) {
318
- if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
319
+ if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
320
addr & TARGET_PAGE_MASK)) {
321
tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
322
mmu_idx, retaddr);
323
@@ -XXX,XX +XXX,XX @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
324
static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
325
MemOpIdx oi, uintptr_t retaddr)
326
{
327
- return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
328
+ return load_helper(env, addr, oi, retaddr, MO_8,
329
+ MMU_INST_FETCH, full_ldub_code);
330
}
331
332
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
333
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
334
static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
335
MemOpIdx oi, uintptr_t retaddr)
336
{
337
- return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
338
+ return load_helper(env, addr, oi, retaddr, MO_TEUW,
339
+ MMU_INST_FETCH, full_lduw_code);
340
}
341
342
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
343
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
344
static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
345
MemOpIdx oi, uintptr_t retaddr)
346
{
347
- return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
348
+ return load_helper(env, addr, oi, retaddr, MO_TEUL,
349
+ MMU_INST_FETCH, full_ldl_code);
350
}
351
352
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
353
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
354
static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
355
MemOpIdx oi, uintptr_t retaddr)
356
{
357
- return load_helper(env, addr, oi, retaddr, MO_TEUQ, true, full_ldq_code);
358
+ return load_helper(env, addr, oi, retaddr, MO_TEUQ,
359
+ MMU_INST_FETCH, full_ldq_code);
360
}
361
362
uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
363
--
55
--
364
2.34.1
56
2.43.0
365
366
diff view generated by jsdifflib
1
Add tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
1
Introduce ti_is_const, ti_const_val, ti_is_const_val.
2
and tcg_out_st_helper_args. These and their subroutines
3
use the existing knowledge of the host function call abi
4
to load the function call arguments and return results.
5
2
6
These will be used to simplify the backends in turn.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
4
---
11
tcg/tcg.c | 475 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
5
tcg/optimize.c | 20 +++++++++++++++++---
12
1 file changed, 471 insertions(+), 4 deletions(-)
6
1 file changed, 17 insertions(+), 3 deletions(-)
13
7
14
diff --git a/tcg/tcg.c b/tcg/tcg.c
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/tcg.c
10
--- a/tcg/optimize.c
17
+++ b/tcg/tcg.c
11
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
12
@@ -XXX,XX +XXX,XX @@ static inline TempOptInfo *arg_info(TCGArg arg)
19
static int tcg_out_ldst_finalize(TCGContext *s);
13
return ts_info(arg_temp(arg));
20
#endif
21
22
+typedef struct TCGLdstHelperParam {
23
+ TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
24
+ unsigned ntmp;
25
+ int tmp[3];
26
+} TCGLdstHelperParam;
27
+
28
+static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
29
+ const TCGLdstHelperParam *p)
30
+ __attribute__((unused));
31
+static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
32
+ bool load_sign, const TCGLdstHelperParam *p)
33
+ __attribute__((unused));
34
+static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
35
+ const TCGLdstHelperParam *p)
36
+ __attribute__((unused));
37
+
38
TCGContext tcg_init_ctx;
39
__thread TCGContext *tcg_ctx;
40
41
@@ -XXX,XX +XXX,XX @@ void tcg_raise_tb_overflow(TCGContext *s)
42
siglongjmp(s->jmp_trans, -2);
43
}
14
}
44
15
45
+/*
16
+static inline bool ti_is_const(TempOptInfo *ti)
46
+ * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
47
+ * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
48
+ *
49
+ * However, tcg_out_helper_load_slots reuses this field to hold an
50
+ * argument slot number (which may designate a argument register or an
51
+ * argument stack slot), converting to TCGReg once all arguments that
52
+ * are destined for the stack are processed.
53
+ */
54
typedef struct TCGMovExtend {
55
- TCGReg dst;
56
+ unsigned dst;
57
TCGReg src;
58
TCGType dst_type;
59
TCGType src_type;
60
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
61
* between the sources and destinations.
62
*/
63
64
-static void __attribute__((unused))
65
-tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
66
- const TCGMovExtend *i2, int scratch)
67
+static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
68
+ const TCGMovExtend *i2, int scratch)
69
{
70
TCGReg src1 = i1->src;
71
TCGReg src2 = i2->src;
72
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo all_helpers[] = {
73
};
74
static GHashTable *helper_table;
75
76
+/*
77
+ * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
78
+ * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
79
+ * We only use these for layout in tcg_out_ld_helper_ret and
80
+ * tcg_out_st_helper_args, and share them between several of
81
+ * the helpers, with the end result that it's easier to build manually.
82
+ */
83
+
84
+#if TCG_TARGET_REG_BITS == 32
85
+# define dh_typecode_ttl dh_typecode_i32
86
+#else
87
+# define dh_typecode_ttl dh_typecode_i64
88
+#endif
89
+
90
+static TCGHelperInfo info_helper_ld32_mmu = {
91
+ .flags = TCG_CALL_NO_WG,
92
+ .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */
93
+ | dh_typemask(env, 1)
94
+ | dh_typemask(tl, 2) /* target_ulong addr */
95
+ | dh_typemask(i32, 3) /* unsigned oi */
96
+ | dh_typemask(ptr, 4) /* uintptr_t ra */
97
+};
98
+
99
+static TCGHelperInfo info_helper_ld64_mmu = {
100
+ .flags = TCG_CALL_NO_WG,
101
+ .typemask = dh_typemask(i64, 0) /* return uint64_t */
102
+ | dh_typemask(env, 1)
103
+ | dh_typemask(tl, 2) /* target_ulong addr */
104
+ | dh_typemask(i32, 3) /* unsigned oi */
105
+ | dh_typemask(ptr, 4) /* uintptr_t ra */
106
+};
107
+
108
+static TCGHelperInfo info_helper_st32_mmu = {
109
+ .flags = TCG_CALL_NO_WG,
110
+ .typemask = dh_typemask(void, 0)
111
+ | dh_typemask(env, 1)
112
+ | dh_typemask(tl, 2) /* target_ulong addr */
113
+ | dh_typemask(i32, 3) /* uint32_t data */
114
+ | dh_typemask(i32, 4) /* unsigned oi */
115
+ | dh_typemask(ptr, 5) /* uintptr_t ra */
116
+};
117
+
118
+static TCGHelperInfo info_helper_st64_mmu = {
119
+ .flags = TCG_CALL_NO_WG,
120
+ .typemask = dh_typemask(void, 0)
121
+ | dh_typemask(env, 1)
122
+ | dh_typemask(tl, 2) /* target_ulong addr */
123
+ | dh_typemask(i64, 3) /* uint64_t data */
124
+ | dh_typemask(i32, 4) /* unsigned oi */
125
+ | dh_typemask(ptr, 5) /* uintptr_t ra */
126
+};
127
+
128
#ifdef CONFIG_TCG_INTERPRETER
129
static ffi_type *typecode_to_ffi(int argmask)
130
{
131
@@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus)
132
(gpointer)&all_helpers[i]);
133
}
134
135
+ init_call_layout(&info_helper_ld32_mmu);
136
+ init_call_layout(&info_helper_ld64_mmu);
137
+ init_call_layout(&info_helper_st32_mmu);
138
+ init_call_layout(&info_helper_st64_mmu);
139
+
140
#ifdef CONFIG_TCG_INTERPRETER
141
init_ffi_layouts();
142
#endif
143
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
144
}
145
}
146
147
+/*
148
+ * Similarly for qemu_ld/st slow path helpers.
149
+ * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
150
+ * using only the provided backend tcg_out_* functions.
151
+ */
152
+
153
+static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
154
+{
17
+{
155
+ int ofs = arg_slot_stk_ofs(slot);
18
+ return ti->is_const;
156
+
157
+ /*
158
+ * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not
159
+ * require extension to uint64_t, adjust the address for uint32_t.
160
+ */
161
+ if (HOST_BIG_ENDIAN &&
162
+ TCG_TARGET_REG_BITS == 64 &&
163
+ type == TCG_TYPE_I32) {
164
+ ofs += 4;
165
+ }
166
+ return ofs;
167
+}
19
+}
168
+
20
+
169
+static void tcg_out_helper_load_regs(TCGContext *s,
21
+static inline uint64_t ti_const_val(TempOptInfo *ti)
170
+ unsigned nmov, TCGMovExtend *mov,
171
+ unsigned ntmp, const int *tmp)
172
+{
22
+{
173
+ switch (nmov) {
23
+ return ti->val;
174
+ default:
175
+ /* The backend must have provided enough temps for the worst case. */
176
+ tcg_debug_assert(ntmp + 1 >= nmov);
177
+
178
+ for (unsigned i = nmov - 1; i >= 2; --i) {
179
+ TCGReg dst = mov[i].dst;
180
+
181
+ for (unsigned j = 0; j < i; ++j) {
182
+ if (dst == mov[j].src) {
183
+ /*
184
+ * Conflict.
185
+ * Copy the source to a temporary, recurse for the
186
+ * remaining moves, perform the extension from our
187
+ * scratch on the way out.
188
+ */
189
+ TCGReg scratch = tmp[--ntmp];
190
+ tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src);
191
+ mov[i].src = scratch;
192
+
193
+ tcg_out_helper_load_regs(s, i, mov, ntmp, tmp);
194
+ tcg_out_movext1(s, &mov[i]);
195
+ return;
196
+ }
197
+ }
198
+
199
+ /* No conflicts: perform this move and continue. */
200
+ tcg_out_movext1(s, &mov[i]);
201
+ }
202
+ /* fall through for the final two moves */
203
+
204
+ case 2:
205
+ tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1);
206
+ return;
207
+ case 1:
208
+ tcg_out_movext1(s, mov);
209
+ return;
210
+ case 0:
211
+ g_assert_not_reached();
212
+ }
213
+}
24
+}
214
+
25
+
215
+static void tcg_out_helper_load_slots(TCGContext *s,
26
+static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
216
+ unsigned nmov, TCGMovExtend *mov,
217
+ const TCGLdstHelperParam *parm)
218
+{
27
+{
219
+ unsigned i;
28
+ return ti_is_const(ti) && ti_const_val(ti) == val;
220
+
221
+ /*
222
+ * Start from the end, storing to the stack first.
223
+ * This frees those registers, so we need not consider overlap.
224
+ */
225
+ for (i = nmov; i-- > 0; ) {
226
+ unsigned slot = mov[i].dst;
227
+
228
+ if (arg_slot_reg_p(slot)) {
229
+ goto found_reg;
230
+ }
231
+
232
+ TCGReg src = mov[i].src;
233
+ TCGType dst_type = mov[i].dst_type;
234
+ MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
235
+
236
+ /* The argument is going onto the stack; extend into scratch. */
237
+ if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
238
+ tcg_debug_assert(parm->ntmp != 0);
239
+ mov[i].dst = src = parm->tmp[0];
240
+ tcg_out_movext1(s, &mov[i]);
241
+ }
242
+
243
+ tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
244
+ tcg_out_helper_stk_ofs(dst_type, slot));
245
+ }
246
+ return;
247
+
248
+ found_reg:
249
+ /*
250
+ * The remaining arguments are in registers.
251
+ * Convert slot numbers to argument registers.
252
+ */
253
+ nmov = i + 1;
254
+ for (i = 0; i < nmov; ++i) {
255
+ mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
256
+ }
257
+ tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp);
258
+}
29
+}
259
+
30
+
260
+static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
31
static inline bool ts_is_const(TCGTemp *ts)
261
+ TCGType type, tcg_target_long imm,
32
{
262
+ const TCGLdstHelperParam *parm)
33
- return ts_info(ts)->is_const;
263
+{
34
+ return ti_is_const(ts_info(ts));
264
+ if (arg_slot_reg_p(slot)) {
35
}
265
+ tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
36
266
+ } else {
37
static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
267
+ int ofs = tcg_out_helper_stk_ofs(type, slot);
38
{
268
+ if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
39
- TempOptInfo *ti = ts_info(ts);
269
+ tcg_debug_assert(parm->ntmp != 0);
40
- return ti->is_const && ti->val == val;
270
+ tcg_out_movi(s, type, parm->tmp[0], imm);
41
+ return ti_is_const_val(ts_info(ts), val);
271
+ tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
42
}
272
+ }
43
273
+ }
44
static inline bool arg_is_const(TCGArg arg)
274
+}
275
+
276
+static void tcg_out_helper_load_common_args(TCGContext *s,
277
+ const TCGLabelQemuLdst *ldst,
278
+ const TCGLdstHelperParam *parm,
279
+ const TCGHelperInfo *info,
280
+ unsigned next_arg)
281
+{
282
+ TCGMovExtend ptr_mov = {
283
+ .dst_type = TCG_TYPE_PTR,
284
+ .src_type = TCG_TYPE_PTR,
285
+ .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
286
+ };
287
+ const TCGCallArgumentLoc *loc = &info->in[0];
288
+ TCGType type;
289
+ unsigned slot;
290
+ tcg_target_ulong imm;
291
+
292
+ /*
293
+ * Handle env, which is always first.
294
+ */
295
+ ptr_mov.dst = loc->arg_slot;
296
+ ptr_mov.src = TCG_AREG0;
297
+ tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
298
+
299
+ /*
300
+ * Handle oi.
301
+ */
302
+ imm = ldst->oi;
303
+ loc = &info->in[next_arg];
304
+ type = TCG_TYPE_I32;
305
+ switch (loc->kind) {
306
+ case TCG_CALL_ARG_NORMAL:
307
+ break;
308
+ case TCG_CALL_ARG_EXTEND_U:
309
+ case TCG_CALL_ARG_EXTEND_S:
310
+ /* No extension required for MemOpIdx. */
311
+ tcg_debug_assert(imm <= INT32_MAX);
312
+ type = TCG_TYPE_REG;
313
+ break;
314
+ default:
315
+ g_assert_not_reached();
316
+ }
317
+ tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
318
+ next_arg++;
319
+
320
+ /*
321
+ * Handle ra.
322
+ */
323
+ loc = &info->in[next_arg];
324
+ slot = loc->arg_slot;
325
+ if (parm->ra_gen) {
326
+ int arg_reg = -1;
327
+ TCGReg ra_reg;
328
+
329
+ if (arg_slot_reg_p(slot)) {
330
+ arg_reg = tcg_target_call_iarg_regs[slot];
331
+ }
332
+ ra_reg = parm->ra_gen(s, ldst, arg_reg);
333
+
334
+ ptr_mov.dst = slot;
335
+ ptr_mov.src = ra_reg;
336
+ tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
337
+ } else {
338
+ imm = (uintptr_t)ldst->raddr;
339
+ tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
340
+ }
341
+}
342
+
343
+static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
344
+ const TCGCallArgumentLoc *loc,
345
+ TCGType dst_type, TCGType src_type,
346
+ TCGReg lo, TCGReg hi)
347
+{
348
+ if (dst_type <= TCG_TYPE_REG) {
349
+ MemOp src_ext;
350
+
351
+ switch (loc->kind) {
352
+ case TCG_CALL_ARG_NORMAL:
353
+ src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
354
+ break;
355
+ case TCG_CALL_ARG_EXTEND_U:
356
+ dst_type = TCG_TYPE_REG;
357
+ src_ext = MO_UL;
358
+ break;
359
+ case TCG_CALL_ARG_EXTEND_S:
360
+ dst_type = TCG_TYPE_REG;
361
+ src_ext = MO_SL;
362
+ break;
363
+ default:
364
+ g_assert_not_reached();
365
+ }
366
+
367
+ mov[0].dst = loc->arg_slot;
368
+ mov[0].dst_type = dst_type;
369
+ mov[0].src = lo;
370
+ mov[0].src_type = src_type;
371
+ mov[0].src_ext = src_ext;
372
+ return 1;
373
+ }
374
+
375
+ assert(TCG_TARGET_REG_BITS == 32);
376
+
377
+ mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
378
+ mov[0].src = lo;
379
+ mov[0].dst_type = TCG_TYPE_I32;
380
+ mov[0].src_type = TCG_TYPE_I32;
381
+ mov[0].src_ext = MO_32;
382
+
383
+ mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
384
+ mov[1].src = hi;
385
+ mov[1].dst_type = TCG_TYPE_I32;
386
+ mov[1].src_type = TCG_TYPE_I32;
387
+ mov[1].src_ext = MO_32;
388
+
389
+ return 2;
390
+}
391
+
392
+static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
393
+ const TCGLdstHelperParam *parm)
394
+{
395
+ const TCGHelperInfo *info;
396
+ const TCGCallArgumentLoc *loc;
397
+ TCGMovExtend mov[2];
398
+ unsigned next_arg, nmov;
399
+ MemOp mop = get_memop(ldst->oi);
400
+
401
+ switch (mop & MO_SIZE) {
402
+ case MO_8:
403
+ case MO_16:
404
+ case MO_32:
405
+ info = &info_helper_ld32_mmu;
406
+ break;
407
+ case MO_64:
408
+ info = &info_helper_ld64_mmu;
409
+ break;
410
+ default:
411
+ g_assert_not_reached();
412
+ }
413
+
414
+ /* Defer env argument. */
415
+ next_arg = 1;
416
+
417
+ loc = &info->in[next_arg];
418
+ nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
419
+ ldst->addrlo_reg, ldst->addrhi_reg);
420
+ next_arg += nmov;
421
+
422
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
423
+
424
+ /* No special attention for 32 and 64-bit return values. */
425
+ tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL);
426
+
427
+ tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
428
+}
429
+
430
+static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
431
+ bool load_sign,
432
+ const TCGLdstHelperParam *parm)
433
+{
434
+ TCGMovExtend mov[2];
435
+
436
+ if (ldst->type <= TCG_TYPE_REG) {
437
+ MemOp mop = get_memop(ldst->oi);
438
+
439
+ mov[0].dst = ldst->datalo_reg;
440
+ mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
441
+ mov[0].dst_type = ldst->type;
442
+ mov[0].src_type = TCG_TYPE_REG;
443
+
444
+ /*
445
+ * If load_sign, then we allowed the helper to perform the
446
+ * appropriate sign extension to tcg_target_ulong, and all
447
+ * we need now is a plain move.
448
+ *
449
+ * If they do not, then we expect the relevant extension
450
+ * instruction to be no more expensive than a move, and
451
+ * we thus save the icache etc by only using one of two
452
+ * helper functions.
453
+ */
454
+ if (load_sign || !(mop & MO_SIGN)) {
455
+ if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
456
+ mov[0].src_ext = MO_32;
457
+ } else {
458
+ mov[0].src_ext = MO_64;
459
+ }
460
+ } else {
461
+ mov[0].src_ext = mop & MO_SSIZE;
462
+ }
463
+ tcg_out_movext1(s, mov);
464
+ } else {
465
+ assert(TCG_TARGET_REG_BITS == 32);
466
+
467
+ mov[0].dst = ldst->datalo_reg;
468
+ mov[0].src =
469
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
470
+ mov[0].dst_type = TCG_TYPE_I32;
471
+ mov[0].src_type = TCG_TYPE_I32;
472
+ mov[0].src_ext = MO_32;
473
+
474
+ mov[1].dst = ldst->datahi_reg;
475
+ mov[1].src =
476
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
477
+ mov[1].dst_type = TCG_TYPE_REG;
478
+ mov[1].src_type = TCG_TYPE_REG;
479
+ mov[1].src_ext = MO_32;
480
+
481
+ tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
482
+ }
483
+}
484
+
485
+static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
486
+ const TCGLdstHelperParam *parm)
487
+{
488
+ const TCGHelperInfo *info;
489
+ const TCGCallArgumentLoc *loc;
490
+ TCGMovExtend mov[4];
491
+ TCGType data_type;
492
+ unsigned next_arg, nmov, n;
493
+ MemOp mop = get_memop(ldst->oi);
494
+
495
+ switch (mop & MO_SIZE) {
496
+ case MO_8:
497
+ case MO_16:
498
+ case MO_32:
499
+ info = &info_helper_st32_mmu;
500
+ data_type = TCG_TYPE_I32;
501
+ break;
502
+ case MO_64:
503
+ info = &info_helper_st64_mmu;
504
+ data_type = TCG_TYPE_I64;
505
+ break;
506
+ default:
507
+ g_assert_not_reached();
508
+ }
509
+
510
+ /* Defer env argument. */
511
+ next_arg = 1;
512
+ nmov = 0;
513
+
514
+ /* Handle addr argument. */
515
+ loc = &info->in[next_arg];
516
+ n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
517
+ ldst->addrlo_reg, ldst->addrhi_reg);
518
+ next_arg += n;
519
+ nmov += n;
520
+
521
+ /* Handle data argument. */
522
+ loc = &info->in[next_arg];
523
+ n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
524
+ ldst->datalo_reg, ldst->datahi_reg);
525
+ next_arg += n;
526
+ nmov += n;
527
+ tcg_debug_assert(nmov <= ARRAY_SIZE(mov));
528
+
529
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
530
+ tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
531
+}
532
+
533
#ifdef CONFIG_PROFILER
534
535
/* avoid copy/paste errors */
536
--
45
--
537
2.34.1
46
2.43.0
538
539
diff view generated by jsdifflib
1
Adjust the softmmu tlb to use R0+R1, not any of the normally available
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
registers. Since we handle overlap betwen inputs and helper arguments,
2
Sink mask computation below fold_affected_mask early exit.
3
we can allow any allocatable reg.
4
3
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/s390x/tcg-target-con-set.h | 2 --
7
tcg/optimize.c | 30 ++++++++++++++++--------------
9
tcg/s390x/tcg-target-con-str.h | 1 -
8
1 file changed, 16 insertions(+), 14 deletions(-)
10
tcg/s390x/tcg-target.c.inc | 36 ++++++++++++----------------------
11
3 files changed, 12 insertions(+), 27 deletions(-)
12
9
13
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/s390x/tcg-target-con-set.h
12
--- a/tcg/optimize.c
16
+++ b/tcg/s390x/tcg-target-con-set.h
13
+++ b/tcg/optimize.c
17
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ static bool fold_add2(OptContext *ctx, TCGOp *op)
18
* tcg-target-con-str.h; the constraint combination is inclusive or.
15
19
*/
16
static bool fold_and(OptContext *ctx, TCGOp *op)
20
C_O0_I1(r)
17
{
21
-C_O0_I2(L, L)
18
- uint64_t z1, z2;
22
C_O0_I2(r, r)
19
+ uint64_t z1, z2, z_mask, s_mask;
23
C_O0_I2(r, ri)
20
+ TempOptInfo *t1, *t2;
24
C_O0_I2(r, rA)
21
25
C_O0_I2(v, r)
22
if (fold_const2_commutative(ctx, op) ||
26
-C_O1_I1(r, L)
23
fold_xi_to_i(ctx, op, 0) ||
27
C_O1_I1(r, r)
24
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
28
C_O1_I1(v, r)
25
return true;
29
C_O1_I1(v, v)
26
}
30
diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h
27
31
index XXXXXXX..XXXXXXX 100644
28
- z1 = arg_info(op->args[1])->z_mask;
32
--- a/tcg/s390x/tcg-target-con-str.h
29
- z2 = arg_info(op->args[2])->z_mask;
33
+++ b/tcg/s390x/tcg-target-con-str.h
30
- ctx->z_mask = z1 & z2;
34
@@ -XXX,XX +XXX,XX @@
35
* REGS(letter, register_mask)
36
*/
37
REGS('r', ALL_GENERAL_REGS)
38
-REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
39
REGS('v', ALL_VECTOR_REGS)
40
REGS('o', 0xaaaa) /* odd numbered general regs */
41
42
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
43
index XXXXXXX..XXXXXXX 100644
44
--- a/tcg/s390x/tcg-target.c.inc
45
+++ b/tcg/s390x/tcg-target.c.inc
46
@@ -XXX,XX +XXX,XX @@
47
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16)
48
#define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)
49
50
-/*
51
- * For softmmu, we need to avoid conflicts with the first 3
52
- * argument registers to perform the tlb lookup, and to call
53
- * the helper function.
54
- */
55
-#ifdef CONFIG_SOFTMMU
56
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
57
-#else
58
-#define SOFTMMU_RESERVE_REGS 0
59
-#endif
60
-
31
-
61
-
32
- /*
62
/* Several places within the instruction set 0 means "no register"
33
- * Sign repetitions are perforce all identical, whether they are 1 or 0.
63
rather than TCG_REG_R0. */
34
- * Bitwise operations preserve the relative quantity of the repetitions.
64
#define TCG_REG_NONE 0
35
- */
65
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
36
- ctx->s_mask = arg_info(op->args[1])->s_mask
66
ldst->oi = oi;
37
- & arg_info(op->args[2])->s_mask;
67
ldst->addrlo_reg = addr_reg;
38
+ t1 = arg_info(op->args[1]);
68
39
+ t2 = arg_info(op->args[2]);
69
- tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
40
+ z1 = t1->z_mask;
70
+ tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
41
+ z2 = t2->z_mask;
71
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
72
73
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
74
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
75
- tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
76
- tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
77
+ tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off);
78
+ tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off);
79
42
80
/*
43
/*
81
* For aligned accesses, we check the first byte and include the alignment
44
* Known-zeros does not imply known-ones. Therefore unless
82
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
45
* arg2 is constant, we can't infer affected bits from it.
83
a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
46
*/
84
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
47
- if (arg_is_const(op->args[2]) &&
85
if (a_off == 0) {
48
- fold_affected_mask(ctx, op, z1 & ~z2)) {
86
- tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
49
+ if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) {
87
+ tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
50
return true;
88
} else {
89
- tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
90
- tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
91
+ tcg_out_insn(s, RX, LA, TCG_REG_R0, addr_reg, TCG_REG_NONE, a_off);
92
+ tgen_andi(s, TCG_TYPE_TL, TCG_REG_R0, tlb_mask);
93
}
51
}
94
52
95
if (is_ld) {
53
- return fold_masks(ctx, op);
96
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
54
+ z_mask = z1 & z2;
97
ofs = offsetof(CPUTLBEntry, addr_write);
55
+
98
}
56
+ /*
99
if (TARGET_LONG_BITS == 32) {
57
+ * Sign repetitions are perforce all identical, whether they are 1 or 0.
100
- tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
58
+ * Bitwise operations preserve the relative quantity of the repetitions.
101
+ tcg_out_insn(s, RX, C, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
59
+ */
102
} else {
60
+ s_mask = t1->s_mask & t2->s_mask;
103
- tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
61
+
104
+ tcg_out_insn(s, RXY, CG, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
62
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
105
}
63
}
106
64
107
tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
65
static bool fold_andc(OptContext *ctx, TCGOp *op)
108
ldst->label_ptr[0] = s->code_ptr++;
109
110
- h->index = TCG_REG_R2;
111
- tcg_out_insn(s, RXY, LG, h->index, TCG_REG_R2, TCG_REG_NONE,
112
+ h->index = TCG_TMP0;
113
+ tcg_out_insn(s, RXY, LG, h->index, TCG_TMP0, TCG_REG_NONE,
114
offsetof(CPUTLBEntry, addend));
115
116
if (TARGET_LONG_BITS == 32) {
117
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
118
119
case INDEX_op_qemu_ld_i32:
120
case INDEX_op_qemu_ld_i64:
121
- return C_O1_I1(r, L);
122
+ return C_O1_I1(r, r);
123
case INDEX_op_qemu_st_i64:
124
case INDEX_op_qemu_st_i32:
125
- return C_O0_I2(L, L);
126
+ return C_O0_I2(r, r);
127
128
case INDEX_op_deposit_i32:
129
case INDEX_op_deposit_i64:
130
--
66
--
131
2.34.1
67
2.43.0
132
133
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Avoid double inversion of the value of second const operand.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 21 +++++++++++----------
8
1 file changed, 11 insertions(+), 10 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
15
16
static bool fold_andc(OptContext *ctx, TCGOp *op)
17
{
18
- uint64_t z1;
19
+ uint64_t z_mask, s_mask;
20
+ TempOptInfo *t1, *t2;
21
22
if (fold_const2(ctx, op) ||
23
fold_xx_to_i(ctx, op, 0) ||
24
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
25
return true;
26
}
27
28
- z1 = arg_info(op->args[1])->z_mask;
29
+ t1 = arg_info(op->args[1]);
30
+ t2 = arg_info(op->args[2]);
31
+ z_mask = t1->z_mask;
32
33
/*
34
* Known-zeros does not imply known-ones. Therefore unless
35
* arg2 is constant, we can't infer anything from it.
36
*/
37
- if (arg_is_const(op->args[2])) {
38
- uint64_t z2 = ~arg_info(op->args[2])->z_mask;
39
- if (fold_affected_mask(ctx, op, z1 & ~z2)) {
40
+ if (ti_is_const(t2)) {
41
+ uint64_t v2 = ti_const_val(t2);
42
+ if (fold_affected_mask(ctx, op, z_mask & v2)) {
43
return true;
44
}
45
- z1 &= z2;
46
+ z_mask &= ~v2;
47
}
48
- ctx->z_mask = z1;
49
50
- ctx->s_mask = arg_info(op->args[1])->s_mask
51
- & arg_info(op->args[2])->s_mask;
52
- return fold_masks(ctx, op);
53
+ s_mask = t1->s_mask & t2->s_mask;
54
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
55
}
56
57
static bool fold_brcond(OptContext *ctx, TCGOp *op)
58
--
59
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Always set s_mask along the BSWAP_OS path, since the result is
3
being explicitly sign-extended.
1
4
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 21 ++++++++++-----------
9
1 file changed, 10 insertions(+), 11 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
16
static bool fold_bswap(OptContext *ctx, TCGOp *op)
17
{
18
uint64_t z_mask, s_mask, sign;
19
+ TempOptInfo *t1 = arg_info(op->args[1]);
20
21
- if (arg_is_const(op->args[1])) {
22
- uint64_t t = arg_info(op->args[1])->val;
23
-
24
- t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
25
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
26
+ if (ti_is_const(t1)) {
27
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
28
+ do_constant_folding(op->opc, ctx->type,
29
+ ti_const_val(t1),
30
+ op->args[2]));
31
}
32
33
- z_mask = arg_info(op->args[1])->z_mask;
34
-
35
+ z_mask = t1->z_mask;
36
switch (op->opc) {
37
case INDEX_op_bswap16_i32:
38
case INDEX_op_bswap16_i64:
39
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
40
/* If the sign bit may be 1, force all the bits above to 1. */
41
if (z_mask & sign) {
42
z_mask |= sign;
43
- s_mask = sign << 1;
44
}
45
+ /* The value and therefore s_mask is explicitly sign-extended. */
46
+ s_mask = sign;
47
break;
48
default:
49
/* The high bits are undefined: force all bits above the sign to 1. */
50
z_mask |= sign << 1;
51
break;
52
}
53
- ctx->z_mask = z_mask;
54
- ctx->s_mask = s_mask;
55
56
- return fold_masks(ctx, op);
57
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
58
}
59
60
static bool fold_call(OptContext *ctx, TCGOp *op)
61
--
62
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Compute s_mask from the union of the maximum count and the
3
op2 fallback for op1 being zero.
1
4
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 15 ++++++++++-----
9
1 file changed, 10 insertions(+), 5 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
16
17
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
18
{
19
- uint64_t z_mask;
20
+ uint64_t z_mask, s_mask;
21
+ TempOptInfo *t1 = arg_info(op->args[1]);
22
+ TempOptInfo *t2 = arg_info(op->args[2]);
23
24
- if (arg_is_const(op->args[1])) {
25
- uint64_t t = arg_info(op->args[1])->val;
26
+ if (ti_is_const(t1)) {
27
+ uint64_t t = ti_const_val(t1);
28
29
if (t != 0) {
30
t = do_constant_folding(op->opc, ctx->type, t, 0);
31
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
32
default:
33
g_assert_not_reached();
34
}
35
- ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
36
- return false;
37
+ s_mask = ~z_mask;
38
+ z_mask |= t2->z_mask;
39
+ s_mask &= t2->s_mask;
40
+
41
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
42
}
43
44
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
45
--
46
2.43.0
diff view generated by jsdifflib
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
1
Add fold_masks_z as a trivial wrapper around fold_masks_zs.
2
and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st
2
Avoid the use of the OptContext slots.
3
into one function that returns HostAddress and TCGLabelQemuLdst structures.
4
3
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/ppc/tcg-target.c.inc | 381 ++++++++++++++++++---------------------
7
tcg/optimize.c | 13 ++++++++++---
9
1 file changed, 172 insertions(+), 209 deletions(-)
8
1 file changed, 10 insertions(+), 3 deletions(-)
10
9
11
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.c.inc
12
--- a/tcg/optimize.c
14
+++ b/tcg/ppc/tcg-target.c.inc
13
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
16
[MO_BEUQ] = helper_be_stq_mmu,
17
};
18
19
-/* We expect to use a 16-bit negative offset from ENV. */
20
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
21
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
22
-
23
-/* Perform the TLB load and compare. Places the result of the comparison
24
- in CR7, loads the addend of the TLB into R3, and returns the register
25
- containing the guest address (zero-extended into R4). Clobbers R0 and R2. */
26
-
27
-static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
28
- TCGReg addrlo, TCGReg addrhi,
29
- int mem_index, bool is_read)
30
-{
31
- int cmp_off
32
- = (is_read
33
- ? offsetof(CPUTLBEntry, addr_read)
34
- : offsetof(CPUTLBEntry, addr_write));
35
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
36
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
37
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
38
- unsigned s_bits = opc & MO_SIZE;
39
- unsigned a_bits = get_alignment_bits(opc);
40
-
41
- /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
42
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
43
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
44
-
45
- /* Extract the page index, shifted into place for tlb index. */
46
- if (TCG_TARGET_REG_BITS == 32) {
47
- tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
48
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
49
- } else {
50
- tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
51
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
52
- }
53
- tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
54
-
55
- /* Load the TLB comparator. */
56
- if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
57
- uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
58
- ? LWZUX : LDUX);
59
- tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
60
- } else {
61
- tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
62
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
63
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
64
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
65
- } else {
66
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
67
- }
68
- }
69
-
70
- /* Load the TLB addend for use on the fast path. Do this asap
71
- to minimize any load use delay. */
72
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
73
- offsetof(CPUTLBEntry, addend));
74
-
75
- /* Clear the non-page, non-alignment bits from the address */
76
- if (TCG_TARGET_REG_BITS == 32) {
77
- /* We don't support unaligned accesses on 32-bits.
78
- * Preserve the bottom bits and thus trigger a comparison
79
- * failure on unaligned accesses.
80
- */
81
- if (a_bits < s_bits) {
82
- a_bits = s_bits;
83
- }
84
- tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
85
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
86
- } else {
87
- TCGReg t = addrlo;
88
-
89
- /* If the access is unaligned, we need to make sure we fail if we
90
- * cross a page boundary. The trick is to add the access size-1
91
- * to the address before masking the low bits. That will make the
92
- * address overflow to the next page if we cross a page boundary,
93
- * which will then force a mismatch of the TLB compare.
94
- */
95
- if (a_bits < s_bits) {
96
- unsigned a_mask = (1 << a_bits) - 1;
97
- unsigned s_mask = (1 << s_bits) - 1;
98
- tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
99
- t = TCG_REG_R0;
100
- }
101
-
102
- /* Mask the address for the requested alignment. */
103
- if (TARGET_LONG_BITS == 32) {
104
- tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
105
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
106
- /* Zero-extend the address for use in the final address. */
107
- tcg_out_ext32u(s, TCG_REG_R4, addrlo);
108
- addrlo = TCG_REG_R4;
109
- } else if (a_bits == 0) {
110
- tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
111
- } else {
112
- tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
113
- 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
114
- tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
115
- }
116
- }
117
-
118
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
119
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
120
- 0, 7, TCG_TYPE_I32);
121
- tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
122
- tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
123
- } else {
124
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
125
- 0, 7, TCG_TYPE_TL);
126
- }
127
-
128
- return addrlo;
129
-}
130
-
131
-/* Record the context of a call to the out of line helper code for the slow
132
- path for a load or store, so that we can later generate the correct
133
- helper code. */
134
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
135
- TCGType type, MemOpIdx oi,
136
- TCGReg datalo_reg, TCGReg datahi_reg,
137
- TCGReg addrlo_reg, TCGReg addrhi_reg,
138
- tcg_insn_unit *raddr, tcg_insn_unit *lptr)
139
-{
140
- TCGLabelQemuLdst *label = new_ldst_label(s);
141
-
142
- label->is_ld = is_ld;
143
- label->type = type;
144
- label->oi = oi;
145
- label->datalo_reg = datalo_reg;
146
- label->datahi_reg = datahi_reg;
147
- label->addrlo_reg = addrlo_reg;
148
- label->addrhi_reg = addrhi_reg;
149
- label->raddr = tcg_splitwx_to_rx(raddr);
150
- label->label_ptr[0] = lptr;
151
-}
152
-
153
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
154
{
155
MemOpIdx oi = lb->oi;
156
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
157
return true;
15
return true;
158
}
16
}
159
#else
17
160
-
18
+static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
161
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
162
- TCGReg addrhi, unsigned a_bits)
163
-{
164
- unsigned a_mask = (1 << a_bits) - 1;
165
- TCGLabelQemuLdst *label = new_ldst_label(s);
166
-
167
- label->is_ld = is_ld;
168
- label->addrlo_reg = addrlo;
169
- label->addrhi_reg = addrhi;
170
-
171
- /* We are expecting a_bits to max out at 7, much lower than ANDI. */
172
- tcg_debug_assert(a_bits < 16);
173
- tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, a_mask));
174
-
175
- label->label_ptr[0] = s->code_ptr;
176
- tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
177
-
178
- label->raddr = tcg_splitwx_to_rx(s->code_ptr);
179
-}
180
-
181
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
182
{
183
if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
184
@@ -XXX,XX +XXX,XX @@ typedef struct {
185
TCGReg index;
186
} HostAddress;
187
188
+/*
189
+ * For softmmu, perform the TLB load and compare.
190
+ * For useronly, perform any required alignment tests.
191
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
192
+ * is required and fill in @h with the host address for the fast path.
193
+ */
194
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
195
+ TCGReg addrlo, TCGReg addrhi,
196
+ MemOpIdx oi, bool is_ld)
197
+{
19
+{
198
+ TCGLabelQemuLdst *ldst = NULL;
20
+ return fold_masks_zs(ctx, op, z_mask, 0);
199
+ MemOp opc = get_memop(oi);
200
+ unsigned a_bits = get_alignment_bits(opc);
201
+
202
+#ifdef CONFIG_SOFTMMU
203
+ int mem_index = get_mmuidx(oi);
204
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
205
+ : offsetof(CPUTLBEntry, addr_write);
206
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
207
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
208
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
209
+ unsigned s_bits = opc & MO_SIZE;
210
+
211
+ ldst = new_ldst_label(s);
212
+ ldst->is_ld = is_ld;
213
+ ldst->oi = oi;
214
+ ldst->addrlo_reg = addrlo;
215
+ ldst->addrhi_reg = addrhi;
216
+
217
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
218
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
219
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
220
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
221
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
222
+
223
+ /* Extract the page index, shifted into place for tlb index. */
224
+ if (TCG_TARGET_REG_BITS == 32) {
225
+ tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
226
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
227
+ } else {
228
+ tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
229
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
230
+ }
231
+ tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
232
+
233
+ /* Load the TLB comparator. */
234
+ if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
235
+ uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
236
+ ? LWZUX : LDUX);
237
+ tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
238
+ } else {
239
+ tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
240
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
241
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
242
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
243
+ } else {
244
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
245
+ }
246
+ }
247
+
248
+ /*
249
+ * Load the TLB addend for use on the fast path.
250
+ * Do this asap to minimize any load use delay.
251
+ */
252
+ h->base = TCG_REG_R3;
253
+ tcg_out_ld(s, TCG_TYPE_PTR, h->base, TCG_REG_R3,
254
+ offsetof(CPUTLBEntry, addend));
255
+
256
+ /* Clear the non-page, non-alignment bits from the address */
257
+ if (TCG_TARGET_REG_BITS == 32) {
258
+ /*
259
+ * We don't support unaligned accesses on 32-bits.
260
+ * Preserve the bottom bits and thus trigger a comparison
261
+ * failure on unaligned accesses.
262
+ */
263
+ if (a_bits < s_bits) {
264
+ a_bits = s_bits;
265
+ }
266
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
267
+ (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
268
+ } else {
269
+ TCGReg t = addrlo;
270
+
271
+ /*
272
+ * If the access is unaligned, we need to make sure we fail if we
273
+ * cross a page boundary. The trick is to add the access size-1
274
+ * to the address before masking the low bits. That will make the
275
+ * address overflow to the next page if we cross a page boundary,
276
+ * which will then force a mismatch of the TLB compare.
277
+ */
278
+ if (a_bits < s_bits) {
279
+ unsigned a_mask = (1 << a_bits) - 1;
280
+ unsigned s_mask = (1 << s_bits) - 1;
281
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
282
+ t = TCG_REG_R0;
283
+ }
284
+
285
+ /* Mask the address for the requested alignment. */
286
+ if (TARGET_LONG_BITS == 32) {
287
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
288
+ (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
289
+ /* Zero-extend the address for use in the final address. */
290
+ tcg_out_ext32u(s, TCG_REG_R4, addrlo);
291
+ addrlo = TCG_REG_R4;
292
+ } else if (a_bits == 0) {
293
+ tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
294
+ } else {
295
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
296
+ 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
297
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
298
+ }
299
+ }
300
+ h->index = addrlo;
301
+
302
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
303
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
304
+ 0, 7, TCG_TYPE_I32);
305
+ tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
306
+ tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
307
+ } else {
308
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
309
+ 0, 7, TCG_TYPE_TL);
310
+ }
311
+
312
+ /* Load a pointer into the current opcode w/conditional branch-link. */
313
+ ldst->label_ptr[0] = s->code_ptr;
314
+ tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
315
+#else
316
+ if (a_bits) {
317
+ ldst = new_ldst_label(s);
318
+ ldst->is_ld = is_ld;
319
+ ldst->oi = oi;
320
+ ldst->addrlo_reg = addrlo;
321
+ ldst->addrhi_reg = addrhi;
322
+
323
+ /* We are expecting a_bits to max out at 7, much lower than ANDI. */
324
+ tcg_debug_assert(a_bits < 16);
325
+ tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
326
+
327
+ ldst->label_ptr[0] = s->code_ptr;
328
+ tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
329
+ }
330
+
331
+ h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
332
+ h->index = addrlo;
333
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
334
+ tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
335
+ h->index = TCG_REG_TMP1;
336
+ }
337
+#endif
338
+
339
+ return ldst;
340
+}
21
+}
341
+
22
+
342
static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
23
static bool fold_masks(OptContext *ctx, TCGOp *op)
343
TCGReg addrlo, TCGReg addrhi,
344
MemOpIdx oi, TCGType data_type)
345
{
24
{
346
MemOp opc = get_memop(oi);
25
return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
347
- MemOp s_bits = opc & MO_SIZE;
26
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
348
+ TCGLabelQemuLdst *ldst;
27
349
HostAddress h;
28
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
350
29
{
351
-#ifdef CONFIG_SOFTMMU
30
+ uint64_t z_mask;
352
- tcg_insn_unit *label_ptr;
31
+
353
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
32
if (fold_const1(ctx, op)) {
354
33
return true;
355
- h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), true);
356
- h.base = TCG_REG_R3;
357
-
358
- /* Load a pointer into the current opcode w/conditional branch-link. */
359
- label_ptr = s->code_ptr;
360
- tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
361
-#else /* !CONFIG_SOFTMMU */
362
- unsigned a_bits = get_alignment_bits(opc);
363
- if (a_bits) {
364
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
365
- }
366
- h.base = guest_base ? TCG_GUEST_BASE_REG : 0;
367
- h.index = addrlo;
368
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
369
- tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
370
- h.index = TCG_REG_TMP1;
371
- }
372
-#endif
373
-
374
- if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
375
+ if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
376
if (opc & MO_BSWAP) {
377
tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
378
tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
379
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
380
}
381
}
34
}
382
35
383
-#ifdef CONFIG_SOFTMMU
36
switch (ctx->type) {
384
- add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi,
37
case TCG_TYPE_I32:
385
- addrlo, addrhi, s->code_ptr, label_ptr);
38
- ctx->z_mask = 32 | 31;
386
-#endif
39
+ z_mask = 32 | 31;
387
+ if (ldst) {
40
break;
388
+ ldst->type = data_type;
41
case TCG_TYPE_I64:
389
+ ldst->datalo_reg = datalo;
42
- ctx->z_mask = 64 | 63;
390
+ ldst->datahi_reg = datahi;
43
+ z_mask = 64 | 63;
391
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
44
break;
392
+ }
45
default:
46
g_assert_not_reached();
47
}
48
- return false;
49
+ return fold_masks_z(ctx, op, z_mask);
393
}
50
}
394
51
395
static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
52
static bool fold_deposit(OptContext *ctx, TCGOp *op)
396
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
397
MemOpIdx oi, TCGType data_type)
398
{
399
MemOp opc = get_memop(oi);
400
- MemOp s_bits = opc & MO_SIZE;
401
+ TCGLabelQemuLdst *ldst;
402
HostAddress h;
403
404
-#ifdef CONFIG_SOFTMMU
405
- tcg_insn_unit *label_ptr;
406
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
407
408
- h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), false);
409
- h.base = TCG_REG_R3;
410
-
411
- /* Load a pointer into the current opcode w/conditional branch-link. */
412
- label_ptr = s->code_ptr;
413
- tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
414
-#else /* !CONFIG_SOFTMMU */
415
- unsigned a_bits = get_alignment_bits(opc);
416
- if (a_bits) {
417
- tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
418
- }
419
- h.base = guest_base ? TCG_GUEST_BASE_REG : 0;
420
- h.index = addrlo;
421
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
422
- tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
423
- h.index = TCG_REG_TMP1;
424
- }
425
-#endif
426
-
427
- if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
428
+ if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
429
if (opc & MO_BSWAP) {
430
tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
431
tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
432
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
433
}
434
}
435
436
-#ifdef CONFIG_SOFTMMU
437
- add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi,
438
- addrlo, addrhi, s->code_ptr, label_ptr);
439
-#endif
440
+ if (ldst) {
441
+ ldst->type = data_type;
442
+ ldst->datalo_reg = datalo;
443
+ ldst->datahi_reg = datahi;
444
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
445
+ }
446
}
447
448
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
449
--
53
--
450
2.34.1
54
2.43.0
451
452
diff view generated by jsdifflib
1
Compare the address vs the tlb entry with sign-extended values.
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
This simplifies the page+alignment mask constant, and the
2
When we fold to and, use fold_and.
3
generation of the last byte address for the misaligned test.
4
3
5
Move the tlb addend load up, and the zero-extension down.
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
7
This frees up a register, which allows us use TMP3 as the returned base
8
address register instead of A0, which we were using as a 5th temporary.
9
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
6
---
13
tcg/mips/tcg-target.c.inc | 38 ++++++++++++++++++--------------------
7
tcg/optimize.c | 35 +++++++++++++++++------------------
14
1 file changed, 18 insertions(+), 20 deletions(-)
8
1 file changed, 17 insertions(+), 18 deletions(-)
15
9
16
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
17
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
18
--- a/tcg/mips/tcg-target.c.inc
12
--- a/tcg/optimize.c
19
+++ b/tcg/mips/tcg-target.c.inc
13
+++ b/tcg/optimize.c
20
@@ -XXX,XX +XXX,XX @@ typedef enum {
14
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
21
ALIAS_PADDI = sizeof(void *) == 4 ? OPC_ADDIU : OPC_DADDIU,
15
22
ALIAS_TSRL = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
16
static bool fold_deposit(OptContext *ctx, TCGOp *op)
23
? OPC_SRL : OPC_DSRL,
17
{
24
+ ALIAS_TADDI = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
18
+ TempOptInfo *t1 = arg_info(op->args[1]);
25
+ ? OPC_ADDIU : OPC_DADDIU,
19
+ TempOptInfo *t2 = arg_info(op->args[2]);
26
} MIPSInsn;
20
+ int ofs = op->args[3];
27
21
+ int len = op->args[4];
28
/*
22
TCGOpcode and_opc;
29
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
23
+ uint64_t z_mask;
30
int add_off = offsetof(CPUTLBEntry, addend);
24
31
int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
25
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
32
: offsetof(CPUTLBEntry, addr_write);
26
- uint64_t t1 = arg_info(op->args[1])->val;
33
- target_ulong tlb_mask;
27
- uint64_t t2 = arg_info(op->args[2])->val;
34
28
-
35
ldst = new_ldst_label(s);
29
- t1 = deposit64(t1, op->args[3], op->args[4], t2);
36
ldst->is_ld = is_ld;
30
- return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
37
ldst->oi = oi;
31
+ if (ti_is_const(t1) && ti_is_const(t2)) {
38
ldst->addrlo_reg = addrlo;
32
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
39
ldst->addrhi_reg = addrhi;
33
+ deposit64(ti_const_val(t1), ofs, len,
40
- base = TCG_REG_A0;
34
+ ti_const_val(t2)));
41
42
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
43
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
44
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
45
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
46
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
47
} else {
48
- tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD
49
- : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
50
- TCG_TMP0, TCG_TMP3, cmp_off);
51
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_TMP0, TCG_TMP3, cmp_off);
52
}
35
}
53
36
54
- /* Zero extend a 32-bit guest address for a 64-bit host. */
37
switch (ctx->type) {
55
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
38
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
56
- tcg_out_ext32u(s, base, addrlo);
57
- addrlo = base;
58
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
59
+ /* Load the tlb addend for the fast path. */
60
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
61
}
39
}
62
40
63
/*
41
/* Inserting a value into zero at offset 0. */
64
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
42
- if (arg_is_const_val(op->args[1], 0) && op->args[3] == 0) {
65
* For unaligned accesses, compare against the end of the access to
43
- uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
66
* verify that it does not cross a page boundary.
44
+ if (ti_is_const_val(t1, 0) && ofs == 0) {
67
*/
45
+ uint64_t mask = MAKE_64BIT_MASK(0, len);
68
- tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
46
69
- tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask);
47
op->opc = and_opc;
70
- if (a_mask >= s_mask) {
48
op->args[1] = op->args[2];
71
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
49
op->args[2] = arg_new_constant(ctx, mask);
72
- } else {
50
- ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
73
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrlo, s_mask - a_mask);
51
- return false;
74
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
52
+ return fold_and(ctx, op);
75
+ if (a_mask < s_mask) {
76
+ tcg_out_opc_imm(s, ALIAS_TADDI, TCG_TMP2, addrlo, s_mask - a_mask);
77
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
78
+ } else {
79
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
80
}
53
}
81
54
82
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
55
/* Inserting zero into a value. */
83
- /* Load the tlb addend for the fast path. */
56
- if (arg_is_const_val(op->args[2], 0)) {
84
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
57
- uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
85
+ /* Zero extend a 32-bit guest address for a 64-bit host. */
58
+ if (ti_is_const_val(t2, 0)) {
86
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
59
+ uint64_t mask = deposit64(-1, ofs, len, 0);
87
+ tcg_out_ext32u(s, TCG_TMP2, addrlo);
60
88
+ addrlo = TCG_TMP2;
61
op->opc = and_opc;
62
op->args[2] = arg_new_constant(ctx, mask);
63
- ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
64
- return false;
65
+ return fold_and(ctx, op);
89
}
66
}
90
67
91
ldst->label_ptr[0] = s->code_ptr;
68
- ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
92
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
69
- op->args[3], op->args[4],
93
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
70
- arg_info(op->args[2])->z_mask);
94
71
- return false;
95
/* Load the tlb addend for the fast path. */
72
+ z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
96
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
73
+ return fold_masks_z(ctx, op, z_mask);
97
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
74
}
98
75
99
ldst->label_ptr[1] = s->code_ptr;
76
static bool fold_divide(OptContext *ctx, TCGOp *op)
100
tcg_out_opc_br(s, OPC_BNE, addrhi, TCG_TMP0);
101
}
102
103
/* delay slot */
104
- tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrlo);
105
+ base = TCG_TMP3;
106
+ tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP3, addrlo);
107
#else
108
if (a_mask && (use_mips32r6_instructions || a_bits != s_bits)) {
109
ldst = new_ldst_label(s);
110
--
77
--
111
2.34.1
78
2.43.0
112
113
diff view generated by jsdifflib
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
1
The input which overlaps the sign bit of the output can
2
and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st
2
have its input s_mask propagated to the output s_mask.
3
into one function that returns HostAddress and TCGLabelQemuLdst structures.
4
3
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/aarch64/tcg-target.c.inc | 313 +++++++++++++++--------------------
7
tcg/optimize.c | 14 ++++++++++++--
9
1 file changed, 133 insertions(+), 180 deletions(-)
8
1 file changed, 12 insertions(+), 2 deletions(-)
10
9
11
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/aarch64/tcg-target.c.inc
12
--- a/tcg/optimize.c
14
+++ b/tcg/aarch64/tcg-target.c.inc
13
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
16
tcg_out_goto(s, lb->raddr);
15
TempOptInfo *t2 = arg_info(op->args[2]);
17
return true;
16
int ofs = op->args[3];
18
}
17
int len = op->args[4];
19
-
18
+ int width;
20
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
19
TCGOpcode and_opc;
21
- TCGType ext, TCGReg data_reg, TCGReg addr_reg,
20
- uint64_t z_mask;
22
- tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
21
+ uint64_t z_mask, s_mask;
23
-{
22
24
- TCGLabelQemuLdst *label = new_ldst_label(s);
23
if (ti_is_const(t1) && ti_is_const(t2)) {
25
-
24
return tcg_opt_gen_movi(ctx, op, op->args[0],
26
- label->is_ld = is_ld;
25
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
27
- label->oi = oi;
26
switch (ctx->type) {
28
- label->type = ext;
27
case TCG_TYPE_I32:
29
- label->datalo_reg = data_reg;
28
and_opc = INDEX_op_and_i32;
30
- label->addrlo_reg = addr_reg;
29
+ width = 32;
31
- label->raddr = tcg_splitwx_to_rx(raddr);
30
break;
32
- label->label_ptr[0] = label_ptr;
31
case TCG_TYPE_I64:
33
-}
32
and_opc = INDEX_op_and_i64;
34
-
33
+ width = 64;
35
-/* We expect to use a 7-bit scaled negative offset from ENV. */
34
break;
36
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
35
default:
37
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
36
g_assert_not_reached();
38
-
37
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
39
-/* These offsets are built into the LDP below. */
38
return fold_and(ctx, op);
40
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
39
}
41
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
40
42
-
41
+ /* The s_mask from the top portion of the deposit is still valid. */
43
-/* Load and compare a TLB entry, emitting the conditional jump to the
42
+ if (ofs + len == width) {
44
- slow path for the failure case, which will be patched later when finalizing
43
+ s_mask = t2->s_mask << ofs;
45
- the slow path. Generated code returns the host addend in X1,
46
- clobbers X0,X2,X3,TMP. */
47
-static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
48
- tcg_insn_unit **label_ptr, int mem_index,
49
- bool is_read)
50
-{
51
- unsigned a_bits = get_alignment_bits(opc);
52
- unsigned s_bits = opc & MO_SIZE;
53
- unsigned a_mask = (1u << a_bits) - 1;
54
- unsigned s_mask = (1u << s_bits) - 1;
55
- TCGReg x3;
56
- TCGType mask_type;
57
- uint64_t compare_mask;
58
-
59
- mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
60
- ? TCG_TYPE_I64 : TCG_TYPE_I32);
61
-
62
- /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
63
- tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
64
- TLB_MASK_TABLE_OFS(mem_index), 1, 0);
65
-
66
- /* Extract the TLB index from the address into X0. */
67
- tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
68
- TCG_REG_X0, TCG_REG_X0, addr_reg,
69
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
70
-
71
- /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
72
- tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
73
-
74
- /* Load the tlb comparator into X0, and the fast path addend into X1. */
75
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
76
- ? offsetof(CPUTLBEntry, addr_read)
77
- : offsetof(CPUTLBEntry, addr_write));
78
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
79
- offsetof(CPUTLBEntry, addend));
80
-
81
- /* For aligned accesses, we check the first byte and include the alignment
82
- bits within the address. For unaligned access, we check that we don't
83
- cross pages using the address of the last byte of the access. */
84
- if (a_bits >= s_bits) {
85
- x3 = addr_reg;
86
- } else {
87
- tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
88
- TCG_REG_X3, addr_reg, s_mask - a_mask);
89
- x3 = TCG_REG_X3;
90
- }
91
- compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
92
-
93
- /* Store the page mask part of the address into X3. */
94
- tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
95
- TCG_REG_X3, x3, compare_mask);
96
-
97
- /* Perform the address comparison. */
98
- tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
99
-
100
- /* If not equal, we jump to the slow path. */
101
- *label_ptr = s->code_ptr;
102
- tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
103
-}
104
-
105
#else
106
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
107
- unsigned a_bits)
108
-{
109
- unsigned a_mask = (1 << a_bits) - 1;
110
- TCGLabelQemuLdst *label = new_ldst_label(s);
111
-
112
- label->is_ld = is_ld;
113
- label->addrlo_reg = addr_reg;
114
-
115
- /* tst addr, #mask */
116
- tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
117
-
118
- label->label_ptr[0] = s->code_ptr;
119
-
120
- /* b.ne slow_path */
121
- tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
122
-
123
- label->raddr = tcg_splitwx_to_rx(s->code_ptr);
124
-}
125
-
126
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
127
{
128
if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
129
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
130
}
131
#endif /* CONFIG_SOFTMMU */
132
133
+/*
134
+ * For softmmu, perform the TLB load and compare.
135
+ * For useronly, perform any required alignment tests.
136
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
137
+ * is required and fill in @h with the host address for the fast path.
138
+ */
139
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
140
+ TCGReg addr_reg, MemOpIdx oi,
141
+ bool is_ld)
142
+{
143
+ TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
144
+ TCGLabelQemuLdst *ldst = NULL;
145
+ MemOp opc = get_memop(oi);
146
+ unsigned a_bits = get_alignment_bits(opc);
147
+ unsigned a_mask = (1u << a_bits) - 1;
148
+
149
+#ifdef CONFIG_SOFTMMU
150
+ unsigned s_bits = opc & MO_SIZE;
151
+ unsigned s_mask = (1u << s_bits) - 1;
152
+ unsigned mem_index = get_mmuidx(oi);
153
+ TCGReg x3;
154
+ TCGType mask_type;
155
+ uint64_t compare_mask;
156
+
157
+ ldst = new_ldst_label(s);
158
+ ldst->is_ld = is_ld;
159
+ ldst->oi = oi;
160
+ ldst->addrlo_reg = addr_reg;
161
+
162
+ mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
163
+ ? TCG_TYPE_I64 : TCG_TYPE_I32);
164
+
165
+ /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
166
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
167
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
168
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
169
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
170
+ tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
171
+ TLB_MASK_TABLE_OFS(mem_index), 1, 0);
172
+
173
+ /* Extract the TLB index from the address into X0. */
174
+ tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
175
+ TCG_REG_X0, TCG_REG_X0, addr_reg,
176
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
177
+
178
+ /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
179
+ tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
180
+
181
+ /* Load the tlb comparator into X0, and the fast path addend into X1. */
182
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1,
183
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
184
+ : offsetof(CPUTLBEntry, addr_write));
185
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
186
+ offsetof(CPUTLBEntry, addend));
187
+
188
+ /*
189
+ * For aligned accesses, we check the first byte and include the alignment
190
+ * bits within the address. For unaligned access, we check that we don't
191
+ * cross pages using the address of the last byte of the access.
192
+ */
193
+ if (a_bits >= s_bits) {
194
+ x3 = addr_reg;
195
+ } else {
44
+ } else {
196
+ tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
45
+ s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
197
+ TCG_REG_X3, addr_reg, s_mask - a_mask);
198
+ x3 = TCG_REG_X3;
199
+ }
200
+ compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
201
+
202
+ /* Store the page mask part of the address into X3. */
203
+ tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
204
+ TCG_REG_X3, x3, compare_mask);
205
+
206
+ /* Perform the address comparison. */
207
+ tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
208
+
209
+ /* If not equal, we jump to the slow path. */
210
+ ldst->label_ptr[0] = s->code_ptr;
211
+ tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
212
+
213
+ *h = (HostAddress){
214
+ .base = TCG_REG_X1,
215
+ .index = addr_reg,
216
+ .index_ext = addr_type
217
+ };
218
+#else
219
+ if (a_mask) {
220
+ ldst = new_ldst_label(s);
221
+
222
+ ldst->is_ld = is_ld;
223
+ ldst->oi = oi;
224
+ ldst->addrlo_reg = addr_reg;
225
+
226
+ /* tst addr, #mask */
227
+ tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
228
+
229
+ /* b.ne slow_path */
230
+ ldst->label_ptr[0] = s->code_ptr;
231
+ tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
232
+ }
46
+ }
233
+
47
+
234
+ if (USE_GUEST_BASE) {
48
z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
235
+ *h = (HostAddress){
49
- return fold_masks_z(ctx, op, z_mask);
236
+ .base = TCG_REG_GUEST_BASE,
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
237
+ .index = addr_reg,
238
+ .index_ext = addr_type
239
+ };
240
+ } else {
241
+ *h = (HostAddress){
242
+ .base = addr_reg,
243
+ .index = TCG_REG_XZR,
244
+ .index_ext = TCG_TYPE_I64
245
+ };
246
+ }
247
+#endif
248
+
249
+ return ldst;
250
+}
251
+
252
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
253
TCGReg data_r, HostAddress h)
254
{
255
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
256
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
257
MemOpIdx oi, TCGType data_type)
258
{
259
- MemOp memop = get_memop(oi);
260
- TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
261
+ TCGLabelQemuLdst *ldst;
262
HostAddress h;
263
264
- /* Byte swapping is left to middle-end expansion. */
265
- tcg_debug_assert((memop & MO_BSWAP) == 0);
266
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
267
+ tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
268
269
-#ifdef CONFIG_SOFTMMU
270
- tcg_insn_unit *label_ptr;
271
-
272
- tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 1);
273
-
274
- h = (HostAddress){
275
- .base = TCG_REG_X1,
276
- .index = addr_reg,
277
- .index_ext = addr_type
278
- };
279
- tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h);
280
-
281
- add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
282
- s->code_ptr, label_ptr);
283
-#else /* !CONFIG_SOFTMMU */
284
- unsigned a_bits = get_alignment_bits(memop);
285
- if (a_bits) {
286
- tcg_out_test_alignment(s, true, addr_reg, a_bits);
287
+ if (ldst) {
288
+ ldst->type = data_type;
289
+ ldst->datalo_reg = data_reg;
290
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
291
}
292
- if (USE_GUEST_BASE) {
293
- h = (HostAddress){
294
- .base = TCG_REG_GUEST_BASE,
295
- .index = addr_reg,
296
- .index_ext = addr_type
297
- };
298
- } else {
299
- h = (HostAddress){
300
- .base = addr_reg,
301
- .index = TCG_REG_XZR,
302
- .index_ext = TCG_TYPE_I64
303
- };
304
- }
305
- tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h);
306
-#endif /* CONFIG_SOFTMMU */
307
}
51
}
308
52
309
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
53
static bool fold_divide(OptContext *ctx, TCGOp *op)
310
MemOpIdx oi, TCGType data_type)
311
{
312
- MemOp memop = get_memop(oi);
313
- TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
314
+ TCGLabelQemuLdst *ldst;
315
HostAddress h;
316
317
- /* Byte swapping is left to middle-end expansion. */
318
- tcg_debug_assert((memop & MO_BSWAP) == 0);
319
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
320
+ tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
321
322
-#ifdef CONFIG_SOFTMMU
323
- tcg_insn_unit *label_ptr;
324
-
325
- tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 0);
326
-
327
- h = (HostAddress){
328
- .base = TCG_REG_X1,
329
- .index = addr_reg,
330
- .index_ext = addr_type
331
- };
332
- tcg_out_qemu_st_direct(s, memop, data_reg, h);
333
-
334
- add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
335
- s->code_ptr, label_ptr);
336
-#else /* !CONFIG_SOFTMMU */
337
- unsigned a_bits = get_alignment_bits(memop);
338
- if (a_bits) {
339
- tcg_out_test_alignment(s, false, addr_reg, a_bits);
340
+ if (ldst) {
341
+ ldst->type = data_type;
342
+ ldst->datalo_reg = data_reg;
343
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
344
}
345
- if (USE_GUEST_BASE) {
346
- h = (HostAddress){
347
- .base = TCG_REG_GUEST_BASE,
348
- .index = addr_reg,
349
- .index_ext = addr_type
350
- };
351
- } else {
352
- h = (HostAddress){
353
- .base = addr_reg,
354
- .index = TCG_REG_XZR,
355
- .index_ext = TCG_TYPE_I64
356
- };
357
- }
358
- tcg_out_qemu_st_direct(s, memop, data_reg, h);
359
-#endif /* CONFIG_SOFTMMU */
360
}
361
362
static const tcg_insn_unit *tb_ret_addr;
363
--
54
--
364
2.34.1
55
2.43.0
365
366
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
12
fold_xi_to_x(ctx, op, 1)) {
13
return true;
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_dup(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 4 ++--
5
1 file changed, 2 insertions(+), 2 deletions(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_dup(OptContext *ctx, TCGOp *op)
12
t = dup_const(TCGOP_VECE(op), t);
13
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_dup2(OptContext *ctx, TCGOp *op)
20
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
21
op->opc = INDEX_op_dup_vec;
22
TCGOP_VECE(op) = MO_32;
23
}
24
- return false;
25
+ return finish_folding(ctx, op);
26
}
27
28
static bool fold_eqv(OptContext *ctx, TCGOp *op)
29
--
30
2.43.0
diff view generated by jsdifflib
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
1
Add fold_masks_s as a trivial wrapper around fold_masks_zs.
2
and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st
2
Avoid the use of the OptContext slots.
3
into one function that returns TCGReg and TCGLabelQemuLdst.
4
3
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/riscv/tcg-target.c.inc | 253 +++++++++++++++++--------------------
7
tcg/optimize.c | 13 ++++++++++---
9
1 file changed, 114 insertions(+), 139 deletions(-)
8
1 file changed, 10 insertions(+), 3 deletions(-)
10
9
11
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/riscv/tcg-target.c.inc
12
--- a/tcg/optimize.c
14
+++ b/tcg/riscv/tcg-target.c.inc
13
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] = {
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
16
#endif
15
return fold_masks_zs(ctx, op, z_mask, 0);
17
};
18
19
-/* We expect to use a 12-bit negative offset from ENV. */
20
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
21
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
22
-
23
static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
24
{
25
tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
26
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
27
tcg_debug_assert(ok);
28
}
16
}
29
17
30
-static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, MemOpIdx oi,
18
+static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
31
- tcg_insn_unit **label_ptr, bool is_load)
32
-{
33
- MemOp opc = get_memop(oi);
34
- unsigned s_bits = opc & MO_SIZE;
35
- unsigned a_bits = get_alignment_bits(opc);
36
- tcg_target_long compare_mask;
37
- int mem_index = get_mmuidx(oi);
38
- int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
39
- int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
40
- int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
41
- TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
42
-
43
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
44
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
45
-
46
- tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr,
47
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
48
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
49
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
50
-
51
- /* Load the tlb comparator and the addend. */
52
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
53
- is_load ? offsetof(CPUTLBEntry, addr_read)
54
- : offsetof(CPUTLBEntry, addr_write));
55
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
56
- offsetof(CPUTLBEntry, addend));
57
-
58
- /* We don't support unaligned accesses. */
59
- if (a_bits < s_bits) {
60
- a_bits = s_bits;
61
- }
62
- /* Clear the non-page, non-alignment bits from the address. */
63
- compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
64
- if (compare_mask == sextreg(compare_mask, 0, 12)) {
65
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr, compare_mask);
66
- } else {
67
- tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
68
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr);
69
- }
70
-
71
- /* Compare masked address with the TLB entry. */
72
- label_ptr[0] = s->code_ptr;
73
- tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
74
-
75
- /* TLB Hit - translate address using addend. */
76
- if (TARGET_LONG_BITS == 32) {
77
- tcg_out_ext32u(s, TCG_REG_TMP0, addr);
78
- addr = TCG_REG_TMP0;
79
- }
80
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr);
81
- return TCG_REG_TMP0;
82
-}
83
-
84
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
85
- TCGType data_type, TCGReg data_reg,
86
- TCGReg addr_reg, void *raddr,
87
- tcg_insn_unit **label_ptr)
88
-{
89
- TCGLabelQemuLdst *label = new_ldst_label(s);
90
-
91
- label->is_ld = is_ld;
92
- label->oi = oi;
93
- label->type = data_type;
94
- label->datalo_reg = data_reg;
95
- label->addrlo_reg = addr_reg;
96
- label->raddr = tcg_splitwx_to_rx(raddr);
97
- label->label_ptr[0] = label_ptr[0];
98
-}
99
-
100
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
101
{
102
MemOpIdx oi = l->oi;
103
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
104
return true;
105
}
106
#else
107
-
108
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
109
- unsigned a_bits)
110
-{
111
- unsigned a_mask = (1 << a_bits) - 1;
112
- TCGLabelQemuLdst *l = new_ldst_label(s);
113
-
114
- l->is_ld = is_ld;
115
- l->addrlo_reg = addr_reg;
116
-
117
- /* We are expecting a_bits to max out at 7, so we can always use andi. */
118
- tcg_debug_assert(a_bits < 12);
119
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
120
-
121
- l->label_ptr[0] = s->code_ptr;
122
- tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
123
-
124
- l->raddr = tcg_splitwx_to_rx(s->code_ptr);
125
-}
126
-
127
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
128
{
129
/* resolve label address */
130
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
131
{
132
return tcg_out_fail_alignment(s, l);
133
}
134
-
135
#endif /* CONFIG_SOFTMMU */
136
137
+/*
138
+ * For softmmu, perform the TLB load and compare.
139
+ * For useronly, perform any required alignment tests.
140
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
141
+ * is required and fill in @h with the host address for the fast path.
142
+ */
143
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
144
+ TCGReg addr_reg, MemOpIdx oi,
145
+ bool is_ld)
146
+{
19
+{
147
+ TCGLabelQemuLdst *ldst = NULL;
20
+ return fold_masks_zs(ctx, op, -1, s_mask);
148
+ MemOp opc = get_memop(oi);
149
+ unsigned a_bits = get_alignment_bits(opc);
150
+ unsigned a_mask = (1u << a_bits) - 1;
151
+
152
+#ifdef CONFIG_SOFTMMU
153
+ unsigned s_bits = opc & MO_SIZE;
154
+ int mem_index = get_mmuidx(oi);
155
+ int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
156
+ int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
157
+ int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
158
+ TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
159
+ tcg_target_long compare_mask;
160
+
161
+ ldst = new_ldst_label(s);
162
+ ldst->is_ld = is_ld;
163
+ ldst->oi = oi;
164
+ ldst->addrlo_reg = addr_reg;
165
+
166
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
167
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
168
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
169
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
170
+
171
+ tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
172
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
173
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
174
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
175
+
176
+ /* Load the tlb comparator and the addend. */
177
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
178
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
179
+ : offsetof(CPUTLBEntry, addr_write));
180
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
181
+ offsetof(CPUTLBEntry, addend));
182
+
183
+ /* We don't support unaligned accesses. */
184
+ if (a_bits < s_bits) {
185
+ a_bits = s_bits;
186
+ }
187
+ /* Clear the non-page, non-alignment bits from the address. */
188
+ compare_mask = (tcg_target_long)TARGET_PAGE_MASK | a_mask;
189
+ if (compare_mask == sextreg(compare_mask, 0, 12)) {
190
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, compare_mask);
191
+ } else {
192
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
193
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
194
+ }
195
+
196
+ /* Compare masked address with the TLB entry. */
197
+ ldst->label_ptr[0] = s->code_ptr;
198
+ tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
199
+
200
+ /* TLB Hit - translate address using addend. */
201
+ if (TARGET_LONG_BITS == 32) {
202
+ tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
203
+ addr_reg = TCG_REG_TMP0;
204
+ }
205
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_reg);
206
+ *pbase = TCG_REG_TMP0;
207
+#else
208
+ if (a_mask) {
209
+ ldst = new_ldst_label(s);
210
+ ldst->is_ld = is_ld;
211
+ ldst->oi = oi;
212
+ ldst->addrlo_reg = addr_reg;
213
+
214
+ /* We are expecting a_bits max 7, so we can always use andi. */
215
+ tcg_debug_assert(a_bits < 12);
216
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
217
+
218
+ ldst->label_ptr[0] = s->code_ptr;
219
+ tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
220
+ }
221
+
222
+ TCGReg base = addr_reg;
223
+ if (TARGET_LONG_BITS == 32) {
224
+ tcg_out_ext32u(s, TCG_REG_TMP0, base);
225
+ base = TCG_REG_TMP0;
226
+ }
227
+ if (guest_base != 0) {
228
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
229
+ base = TCG_REG_TMP0;
230
+ }
231
+ *pbase = base;
232
+#endif
233
+
234
+ return ldst;
235
+}
21
+}
236
+
22
+
237
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
23
static bool fold_masks(OptContext *ctx, TCGOp *op)
238
TCGReg base, MemOp opc, TCGType type)
239
{
24
{
240
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
25
return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
241
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
26
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
242
MemOpIdx oi, TCGType data_type)
27
28
static bool fold_eqv(OptContext *ctx, TCGOp *op)
243
{
29
{
244
- MemOp opc = get_memop(oi);
30
+ uint64_t s_mask;
245
+ TCGLabelQemuLdst *ldst;
31
+
246
TCGReg base;
32
if (fold_const2_commutative(ctx, op) ||
247
33
fold_xi_to_x(ctx, op, -1) ||
248
-#if defined(CONFIG_SOFTMMU)
34
fold_xi_to_not(ctx, op, 0)) {
249
- tcg_insn_unit *label_ptr[1];
35
return true;
250
+ ldst = prepare_host_addr(s, &base, addr_reg, oi, true);
251
+ tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), data_type);
252
253
- base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1);
254
- tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type);
255
- add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
256
- s->code_ptr, label_ptr);
257
-#else
258
- unsigned a_bits = get_alignment_bits(opc);
259
- if (a_bits) {
260
- tcg_out_test_alignment(s, true, addr_reg, a_bits);
261
+ if (ldst) {
262
+ ldst->type = data_type;
263
+ ldst->datalo_reg = data_reg;
264
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
265
}
36
}
266
- base = addr_reg;
37
267
- if (TARGET_LONG_BITS == 32) {
38
- ctx->s_mask = arg_info(op->args[1])->s_mask
268
- tcg_out_ext32u(s, TCG_REG_TMP0, base);
39
- & arg_info(op->args[2])->s_mask;
269
- base = TCG_REG_TMP0;
40
- return false;
270
- }
41
+ s_mask = arg_info(op->args[1])->s_mask
271
- if (guest_base != 0) {
42
+ & arg_info(op->args[2])->s_mask;
272
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
43
+ return fold_masks_s(ctx, op, s_mask);
273
- base = TCG_REG_TMP0;
274
- }
275
- tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type);
276
-#endif
277
}
44
}
278
45
279
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val,
46
static bool fold_extract(OptContext *ctx, TCGOp *op)
280
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val,
281
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
282
MemOpIdx oi, TCGType data_type)
283
{
284
- MemOp opc = get_memop(oi);
285
+ TCGLabelQemuLdst *ldst;
286
TCGReg base;
287
288
-#if defined(CONFIG_SOFTMMU)
289
- tcg_insn_unit *label_ptr[1];
290
+ ldst = prepare_host_addr(s, &base, addr_reg, oi, false);
291
+ tcg_out_qemu_st_direct(s, data_reg, base, get_memop(oi));
292
293
- base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0);
294
- tcg_out_qemu_st_direct(s, data_reg, base, opc);
295
- add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
296
- s->code_ptr, label_ptr);
297
-#else
298
- unsigned a_bits = get_alignment_bits(opc);
299
- if (a_bits) {
300
- tcg_out_test_alignment(s, false, addr_reg, a_bits);
301
+ if (ldst) {
302
+ ldst->type = data_type;
303
+ ldst->datalo_reg = data_reg;
304
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
305
}
306
- base = addr_reg;
307
- if (TARGET_LONG_BITS == 32) {
308
- tcg_out_ext32u(s, TCG_REG_TMP0, base);
309
- base = TCG_REG_TMP0;
310
- }
311
- if (guest_base != 0) {
312
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
313
- base = TCG_REG_TMP0;
314
- }
315
- tcg_out_qemu_st_direct(s, data_reg, base, opc);
316
-#endif
317
}
318
319
static const tcg_insn_unit *tb_ret_addr;
320
--
47
--
321
2.34.1
48
2.43.0
322
323
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 15 ++++++---------
7
1 file changed, 6 insertions(+), 9 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
14
static bool fold_extract(OptContext *ctx, TCGOp *op)
15
{
16
uint64_t z_mask_old, z_mask;
17
+ TempOptInfo *t1 = arg_info(op->args[1]);
18
int pos = op->args[2];
19
int len = op->args[3];
20
21
- if (arg_is_const(op->args[1])) {
22
- uint64_t t;
23
-
24
- t = arg_info(op->args[1])->val;
25
- t = extract64(t, pos, len);
26
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
27
+ if (ti_is_const(t1)) {
28
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
29
+ extract64(ti_const_val(t1), pos, len));
30
}
31
32
- z_mask_old = arg_info(op->args[1])->z_mask;
33
+ z_mask_old = t1->z_mask;
34
z_mask = extract64(z_mask_old, pos, len);
35
if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
36
return true;
37
}
38
- ctx->z_mask = z_mask;
39
40
- return fold_masks(ctx, op);
41
+ return fold_masks_z(ctx, op, z_mask);
42
}
43
44
static bool fold_extract2(OptContext *ctx, TCGOp *op)
45
--
46
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
12
}
13
return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_exts(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
1
The softmmu tlb uses TCG_REG_TMP[0-2], not any of the normally available
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
registers. Now that we handle overlap betwen inputs and helper arguments,
2
Explicitly sign-extend z_mask instead of doing that manually.
3
we can allow any allocatable reg.
4
3
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
tcg/riscv/tcg-target-con-set.h | 2 --
7
tcg/optimize.c | 29 ++++++++++++-----------------
10
tcg/riscv/tcg-target-con-str.h | 1 -
8
1 file changed, 12 insertions(+), 17 deletions(-)
11
tcg/riscv/tcg-target.c.inc | 16 +++-------------
12
3 files changed, 3 insertions(+), 16 deletions(-)
13
9
14
diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/riscv/tcg-target-con-set.h
12
--- a/tcg/optimize.c
17
+++ b/tcg/riscv/tcg-target-con-set.h
13
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
19
* tcg-target-con-str.h; the constraint combination is inclusive or.
15
20
*/
16
static bool fold_exts(OptContext *ctx, TCGOp *op)
21
C_O0_I1(r)
17
{
22
-C_O0_I2(LZ, L)
18
- uint64_t s_mask_old, s_mask, z_mask, sign;
23
C_O0_I2(rZ, r)
19
+ uint64_t s_mask_old, s_mask, z_mask;
24
C_O0_I2(rZ, rZ)
20
bool type_change = false;
25
-C_O1_I1(r, L)
21
+ TempOptInfo *t1;
26
C_O1_I1(r, r)
22
27
C_O1_I2(r, r, ri)
23
if (fold_const1(ctx, op)) {
28
C_O1_I2(r, r, rI)
24
return true;
29
diff --git a/tcg/riscv/tcg-target-con-str.h b/tcg/riscv/tcg-target-con-str.h
25
}
30
index XXXXXXX..XXXXXXX 100644
26
31
--- a/tcg/riscv/tcg-target-con-str.h
27
- z_mask = arg_info(op->args[1])->z_mask;
32
+++ b/tcg/riscv/tcg-target-con-str.h
28
- s_mask = arg_info(op->args[1])->s_mask;
33
@@ -XXX,XX +XXX,XX @@
29
+ t1 = arg_info(op->args[1]);
34
* REGS(letter, register_mask)
30
+ z_mask = t1->z_mask;
35
*/
31
+ s_mask = t1->s_mask;
36
REGS('r', ALL_GENERAL_REGS)
32
s_mask_old = s_mask;
37
-REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
33
38
34
switch (op->opc) {
39
/*
35
CASE_OP_32_64(ext8s):
40
* Define constraint letters for constants:
36
- sign = INT8_MIN;
41
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
37
- z_mask = (uint8_t)z_mask;
42
index XXXXXXX..XXXXXXX 100644
38
+ s_mask |= INT8_MIN;
43
--- a/tcg/riscv/tcg-target.c.inc
39
+ z_mask = (int8_t)z_mask;
44
+++ b/tcg/riscv/tcg-target.c.inc
40
break;
45
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
41
CASE_OP_32_64(ext16s):
46
#define TCG_CT_CONST_N12 0x400
42
- sign = INT16_MIN;
47
#define TCG_CT_CONST_M12 0x800
43
- z_mask = (uint16_t)z_mask;
48
44
+ s_mask |= INT16_MIN;
49
-#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
45
+ z_mask = (int16_t)z_mask;
50
-/*
46
break;
51
- * For softmmu, we need to avoid conflicts with the first 5
47
case INDEX_op_ext_i32_i64:
52
- * argument registers to call the helper. Some of these are
48
type_change = true;
53
- * also used for the tlb lookup.
49
QEMU_FALLTHROUGH;
54
- */
50
case INDEX_op_ext32s_i64:
55
-#ifdef CONFIG_SOFTMMU
51
- sign = INT32_MIN;
56
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5)
52
- z_mask = (uint32_t)z_mask;
57
-#else
53
+ s_mask |= INT32_MIN;
58
-#define SOFTMMU_RESERVE_REGS 0
54
+ z_mask = (int32_t)z_mask;
59
-#endif
55
break;
60
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
61
62
#define sextreg sextract64
63
64
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
65
66
case INDEX_op_qemu_ld_i32:
67
case INDEX_op_qemu_ld_i64:
68
- return C_O1_I1(r, L);
69
+ return C_O1_I1(r, r);
70
case INDEX_op_qemu_st_i32:
71
case INDEX_op_qemu_st_i64:
72
- return C_O0_I2(LZ, L);
73
+ return C_O0_I2(rZ, r);
74
75
default:
56
default:
76
g_assert_not_reached();
57
g_assert_not_reached();
58
}
59
60
- if (z_mask & sign) {
61
- z_mask |= sign;
62
- }
63
- s_mask |= sign << 1;
64
-
65
- ctx->z_mask = z_mask;
66
- ctx->s_mask = s_mask;
67
if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
68
return true;
69
}
70
71
- return fold_masks(ctx, op);
72
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
73
}
74
75
static bool fold_extu(OptContext *ctx, TCGOp *op)
77
--
76
--
78
2.34.1
77
2.43.0
79
80
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 4 ++--
7
1 file changed, 2 insertions(+), 2 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
14
g_assert_not_reached();
15
}
16
17
- ctx->z_mask = z_mask;
18
if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
19
return true;
20
}
21
- return fold_masks(ctx, op);
22
+
23
+ return fold_masks_z(ctx, op, z_mask);
24
}
25
26
static bool fold_mb(OptContext *ctx, TCGOp *op)
27
--
28
2.43.0
diff view generated by jsdifflib
1
From: Jamie Iles <quic_jiles@quicinc.com>
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
2
3
The round-robin scheduler will iterate over the CPU list with an
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
assigned budget until the next timer expiry and may exit early because
5
of a TB exit. This is fine under normal operation but with icount
6
enabled and SMP it is possible for a CPU to be starved of run time and
7
the system live-locks.
8
9
For example, booting a riscv64 platform with '-icount
10
shift=0,align=off,sleep=on -smp 2' we observe a livelock once the kernel
11
has timers enabled and starts performing TLB shootdowns. In this case
12
we have CPU 0 in M-mode with interrupts disabled sending an IPI to CPU
13
1. As we enter the TCG loop, we assign the icount budget to next timer
14
interrupt to CPU 0 and begin executing where the guest is sat in a busy
15
loop exhausting all of the budget before we try to execute CPU 1 which
16
is the target of the IPI but CPU 1 is left with no budget with which to
17
execute and the process repeats.
18
19
We try here to add some fairness by splitting the budget across all of
20
the CPUs on the thread fairly before entering each one. The CPU count
21
is cached on CPU list generation ID to avoid iterating the list on each
22
loop iteration. With this change it is possible to boot an SMP rv64
23
guest with icount enabled and no hangs.
24
25
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
26
Tested-by: Peter Maydell <peter.maydell@linaro.org>
27
Signed-off-by: Jamie Iles <quic_jiles@quicinc.com>
28
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
29
Message-Id: <20230427020925.51003-3-quic_jiles@quicinc.com>
30
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
31
---
5
---
32
accel/tcg/tcg-accel-ops-icount.h | 3 ++-
6
tcg/optimize.c | 19 +++++++++++--------
33
accel/tcg/tcg-accel-ops-icount.c | 21 ++++++++++++++----
7
1 file changed, 11 insertions(+), 8 deletions(-)
34
accel/tcg/tcg-accel-ops-rr.c | 37 +++++++++++++++++++++++++++++++-
35
replay/replay.c | 3 +--
36
4 files changed, 56 insertions(+), 8 deletions(-)
37
8
38
diff --git a/accel/tcg/tcg-accel-ops-icount.h b/accel/tcg/tcg-accel-ops-icount.h
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
39
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
40
--- a/accel/tcg/tcg-accel-ops-icount.h
11
--- a/tcg/optimize.c
41
+++ b/accel/tcg/tcg-accel-ops-icount.h
12
+++ b/tcg/optimize.c
42
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
43
#define TCG_ACCEL_OPS_ICOUNT_H
14
44
15
static bool fold_movcond(OptContext *ctx, TCGOp *op)
45
void icount_handle_deadline(void);
16
{
46
-void icount_prepare_for_run(CPUState *cpu);
17
+ uint64_t z_mask, s_mask;
47
+void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget);
18
+ TempOptInfo *tt, *ft;
48
+int64_t icount_percpu_budget(int cpu_count);
19
int i;
49
void icount_process_data(CPUState *cpu);
20
50
21
/* If true and false values are the same, eliminate the cmp. */
51
void icount_handle_interrupt(CPUState *cpu, int mask);
22
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
52
diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c
23
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
53
index XXXXXXX..XXXXXXX 100644
54
--- a/accel/tcg/tcg-accel-ops-icount.c
55
+++ b/accel/tcg/tcg-accel-ops-icount.c
56
@@ -XXX,XX +XXX,XX @@ void icount_handle_deadline(void)
57
}
24
}
58
}
25
59
26
- ctx->z_mask = arg_info(op->args[3])->z_mask
60
-void icount_prepare_for_run(CPUState *cpu)
27
- | arg_info(op->args[4])->z_mask;
61
+/* Distribute the budget evenly across all CPUs */
28
- ctx->s_mask = arg_info(op->args[3])->s_mask
62
+int64_t icount_percpu_budget(int cpu_count)
29
- & arg_info(op->args[4])->s_mask;
63
+{
30
+ tt = arg_info(op->args[3]);
64
+ int64_t limit = icount_get_limit();
31
+ ft = arg_info(op->args[4]);
65
+ int64_t timeslice = limit / cpu_count;
32
+ z_mask = tt->z_mask | ft->z_mask;
66
+
33
+ s_mask = tt->s_mask & ft->s_mask;
67
+ if (timeslice == 0) {
34
68
+ timeslice = limit;
35
- if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
69
+ }
36
- uint64_t tv = arg_info(op->args[3])->val;
70
+
37
- uint64_t fv = arg_info(op->args[4])->val;
71
+ return timeslice;
38
+ if (ti_is_const(tt) && ti_is_const(ft)) {
72
+}
39
+ uint64_t tv = ti_const_val(tt);
73
+
40
+ uint64_t fv = ti_const_val(ft);
74
+void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget)
41
TCGOpcode opc, negopc = 0;
75
{
42
TCGCond cond = op->args[5];
76
int insns_left;
43
77
44
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
78
@@ -XXX,XX +XXX,XX @@ void icount_prepare_for_run(CPUState *cpu)
79
g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
80
g_assert(cpu->icount_extra == 0);
81
82
- cpu->icount_budget = icount_get_limit();
83
+ replay_mutex_lock();
84
+
85
+ cpu->icount_budget = MIN(icount_get_limit(), cpu_budget);
86
insns_left = MIN(0xffff, cpu->icount_budget);
87
cpu_neg(cpu)->icount_decr.u16.low = insns_left;
88
cpu->icount_extra = cpu->icount_budget - insns_left;
89
90
- replay_mutex_lock();
91
-
92
if (cpu->icount_budget == 0) {
93
/*
94
* We're called without the iothread lock, so must take it while
95
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
96
index XXXXXXX..XXXXXXX 100644
97
--- a/accel/tcg/tcg-accel-ops-rr.c
98
+++ b/accel/tcg/tcg-accel-ops-rr.c
99
@@ -XXX,XX +XXX,XX @@
100
*/
101
102
#include "qemu/osdep.h"
103
+#include "qemu/lockable.h"
104
#include "sysemu/tcg.h"
105
#include "sysemu/replay.h"
106
#include "sysemu/cpu-timers.h"
107
@@ -XXX,XX +XXX,XX @@ static void rr_force_rcu(Notifier *notify, void *data)
108
rr_kick_next_cpu();
109
}
110
111
+/*
112
+ * Calculate the number of CPUs that we will process in a single iteration of
113
+ * the main CPU thread loop so that we can fairly distribute the instruction
114
+ * count across CPUs.
115
+ *
116
+ * The CPU count is cached based on the CPU list generation ID to avoid
117
+ * iterating the list every time.
118
+ */
119
+static int rr_cpu_count(void)
120
+{
121
+ static unsigned int last_gen_id = ~0;
122
+ static int cpu_count;
123
+ CPUState *cpu;
124
+
125
+ QEMU_LOCK_GUARD(&qemu_cpu_list_lock);
126
+
127
+ if (cpu_list_generation_id_get() != last_gen_id) {
128
+ cpu_count = 0;
129
+ CPU_FOREACH(cpu) {
130
+ ++cpu_count;
131
+ }
132
+ last_gen_id = cpu_list_generation_id_get();
133
+ }
134
+
135
+ return cpu_count;
136
+}
137
+
138
/*
139
* In the single-threaded case each vCPU is simulated in turn. If
140
* there is more than a single vCPU we create a simple timer to kick
141
@@ -XXX,XX +XXX,XX @@ static void *rr_cpu_thread_fn(void *arg)
142
cpu->exit_request = 1;
143
144
while (1) {
145
+ /* Only used for icount_enabled() */
146
+ int64_t cpu_budget = 0;
147
+
148
qemu_mutex_unlock_iothread();
149
replay_mutex_lock();
150
qemu_mutex_lock_iothread();
151
152
if (icount_enabled()) {
153
+ int cpu_count = rr_cpu_count();
154
+
155
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
156
icount_account_warp_timer();
157
/*
158
@@ -XXX,XX +XXX,XX @@ static void *rr_cpu_thread_fn(void *arg)
159
* waking up the I/O thread and waiting for completion.
160
*/
161
icount_handle_deadline();
162
+
163
+ cpu_budget = icount_percpu_budget(cpu_count);
164
}
165
166
replay_mutex_unlock();
167
@@ -XXX,XX +XXX,XX @@ static void *rr_cpu_thread_fn(void *arg)
168
169
qemu_mutex_unlock_iothread();
170
if (icount_enabled()) {
171
- icount_prepare_for_run(cpu);
172
+ icount_prepare_for_run(cpu, cpu_budget);
173
}
174
r = tcg_cpus_exec(cpu);
175
if (icount_enabled()) {
176
diff --git a/replay/replay.c b/replay/replay.c
177
index XXXXXXX..XXXXXXX 100644
178
--- a/replay/replay.c
179
+++ b/replay/replay.c
180
@@ -XXX,XX +XXX,XX @@ uint64_t replay_get_current_icount(void)
181
int replay_get_instructions(void)
182
{
183
int res = 0;
184
- replay_mutex_lock();
185
+ g_assert(replay_mutex_locked());
186
if (replay_next_event_is(EVENT_INSTRUCTION)) {
187
res = replay_state.instruction_count;
188
if (replay_break_icount != -1LL) {
189
@@ -XXX,XX +XXX,XX @@ int replay_get_instructions(void)
190
}
45
}
191
}
46
}
192
}
47
}
193
- replay_mutex_unlock();
48
- return false;
194
return res;
49
+
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
195
}
51
}
196
52
53
static bool fold_mul(OptContext *ctx, TCGOp *op)
197
--
54
--
198
2.34.1
55
2.43.0
199
200
diff view generated by jsdifflib
1
Use uint64_t for the pc, and size_t for the size.
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 6 +++---
5
1 file changed, 3 insertions(+), 3 deletions(-)
2
6
3
Reviewed-by: Thomas Huth <thuth@redhat.com>
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-Id: <20230503072331.1747057-81-richard.henderson@linaro.org>
6
---
7
include/disas/disas.h | 17 ++++++-----------
8
bsd-user/elfload.c | 5 +++--
9
disas/disas.c | 19 +++++++++----------
10
linux-user/elfload.c | 5 +++--
11
4 files changed, 21 insertions(+), 25 deletions(-)
12
13
diff --git a/include/disas/disas.h b/include/disas/disas.h
14
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
15
--- a/include/disas/disas.h
9
--- a/tcg/optimize.c
16
+++ b/include/disas/disas.h
10
+++ b/tcg/optimize.c
17
@@ -XXX,XX +XXX,XX @@
11
@@ -XXX,XX +XXX,XX @@ static bool fold_mul(OptContext *ctx, TCGOp *op)
18
#include "cpu.h"
12
fold_xi_to_x(ctx, op, 1)) {
19
13
return true;
20
/* Disassemble this for me please... (debugging). */
14
}
21
-void disas(FILE *out, const void *code, unsigned long size);
15
- return false;
22
-void target_disas(FILE *out, CPUState *cpu, target_ulong code,
16
+ return finish_folding(ctx, op);
23
- target_ulong size);
24
+void disas(FILE *out, const void *code, size_t size);
25
+void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size);
26
27
-void monitor_disas(Monitor *mon, CPUState *cpu,
28
- target_ulong pc, int nb_insn, int is_physical);
29
+void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
30
+ int nb_insn, bool is_physical);
31
32
char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size);
33
34
/* Look up symbol for debugging purpose. Returns "" if unknown. */
35
-const char *lookup_symbol(target_ulong orig_addr);
36
+const char *lookup_symbol(uint64_t orig_addr);
37
#endif
38
39
struct syminfo;
40
struct elf32_sym;
41
struct elf64_sym;
42
43
-#if defined(CONFIG_USER_ONLY)
44
-typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_ulong orig_addr);
45
-#else
46
-typedef const char *(*lookup_symbol_t)(struct syminfo *s, hwaddr orig_addr);
47
-#endif
48
+typedef const char *(*lookup_symbol_t)(struct syminfo *s, uint64_t orig_addr);
49
50
struct syminfo {
51
lookup_symbol_t lookup_symbol;
52
diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/bsd-user/elfload.c
55
+++ b/bsd-user/elfload.c
56
@@ -XXX,XX +XXX,XX @@ static abi_ulong load_elf_interp(struct elfhdr *interp_elf_ex,
57
58
static int symfind(const void *s0, const void *s1)
59
{
60
- target_ulong addr = *(target_ulong *)s0;
61
+ __typeof(sym->st_value) addr = *(uint64_t *)s0;
62
struct elf_sym *sym = (struct elf_sym *)s1;
63
int result = 0;
64
+
65
if (addr < sym->st_value) {
66
result = -1;
67
} else if (addr >= sym->st_value + sym->st_size) {
68
@@ -XXX,XX +XXX,XX @@ static int symfind(const void *s0, const void *s1)
69
return result;
70
}
17
}
71
18
72
-static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr)
19
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
73
+static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr)
20
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
74
{
21
fold_xi_to_i(ctx, op, 0)) {
75
#if ELF_CLASS == ELFCLASS32
22
return true;
76
struct elf_sym *syms = s->disas_symtab.elf32;
23
}
77
diff --git a/disas/disas.c b/disas/disas.c
24
- return false;
78
index XXXXXXX..XXXXXXX 100644
25
+ return finish_folding(ctx, op);
79
--- a/disas/disas.c
80
+++ b/disas/disas.c
81
@@ -XXX,XX +XXX,XX @@ static void initialize_debug_host(CPUDebug *s)
82
}
26
}
83
27
84
/* Disassemble this for me please... (debugging). */
28
static bool fold_multiply2(OptContext *ctx, TCGOp *op)
85
-void target_disas(FILE *out, CPUState *cpu, target_ulong code,
29
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
86
- target_ulong size)
30
tcg_opt_gen_movi(ctx, op2, rh, h);
87
+void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size)
31
return true;
88
{
89
- target_ulong pc;
90
+ uint64_t pc;
91
int count;
92
CPUDebug s;
93
94
@@ -XXX,XX +XXX,XX @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
95
}
32
}
96
33
- return false;
97
for (pc = code; size > 0; pc += count, size -= count) {
34
+ return finish_folding(ctx, op);
98
- fprintf(out, "0x" TARGET_FMT_lx ": ", pc);
99
+ fprintf(out, "0x%08" PRIx64 ": ", pc);
100
count = s.info.print_insn(pc, &s.info);
101
fprintf(out, "\n");
102
if (count < 0) {
103
@@ -XXX,XX +XXX,XX @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size)
104
}
35
}
105
36
106
/* Disassemble this for me please... (debugging). */
37
static bool fold_nand(OptContext *ctx, TCGOp *op)
107
-void disas(FILE *out, const void *code, unsigned long size)
108
+void disas(FILE *out, const void *code, size_t size)
109
{
110
uintptr_t pc;
111
int count;
112
@@ -XXX,XX +XXX,XX @@ void disas(FILE *out, const void *code, unsigned long size)
113
}
114
115
/* Look up symbol for debugging purpose. Returns "" if unknown. */
116
-const char *lookup_symbol(target_ulong orig_addr)
117
+const char *lookup_symbol(uint64_t orig_addr)
118
{
119
const char *symbol = "";
120
struct syminfo *s;
121
@@ -XXX,XX +XXX,XX @@ physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
122
}
123
124
/* Disassembler for the monitor. */
125
-void monitor_disas(Monitor *mon, CPUState *cpu,
126
- target_ulong pc, int nb_insn, int is_physical)
127
+void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
128
+ int nb_insn, bool is_physical)
129
{
130
int count, i;
131
CPUDebug s;
132
@@ -XXX,XX +XXX,XX @@ void monitor_disas(Monitor *mon, CPUState *cpu,
133
}
134
135
if (!s.info.print_insn) {
136
- monitor_printf(mon, "0x" TARGET_FMT_lx
137
+ monitor_printf(mon, "0x%08" PRIx64
138
": Asm output not supported on this arch\n", pc);
139
return;
140
}
141
142
for (i = 0; i < nb_insn; i++) {
143
- g_string_append_printf(ds, "0x" TARGET_FMT_lx ": ", pc);
144
+ g_string_append_printf(ds, "0x%08" PRIx64 ": ", pc);
145
count = s.info.print_insn(pc, &s.info);
146
g_string_append_c(ds, '\n');
147
if (count < 0) {
148
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
149
index XXXXXXX..XXXXXXX 100644
150
--- a/linux-user/elfload.c
151
+++ b/linux-user/elfload.c
152
@@ -XXX,XX +XXX,XX @@ static void load_elf_interp(const char *filename, struct image_info *info,
153
154
static int symfind(const void *s0, const void *s1)
155
{
156
- target_ulong addr = *(target_ulong *)s0;
157
struct elf_sym *sym = (struct elf_sym *)s1;
158
+ __typeof(sym->st_value) addr = *(uint64_t *)s0;
159
int result = 0;
160
+
161
if (addr < sym->st_value) {
162
result = -1;
163
} else if (addr >= sym->st_value + sym->st_size) {
164
@@ -XXX,XX +XXX,XX @@ static int symfind(const void *s0, const void *s1)
165
return result;
166
}
167
168
-static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr)
169
+static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr)
170
{
171
#if ELF_CLASS == ELFCLASS32
172
struct elf_sym *syms = s->disas_symtab.elf32;
173
--
38
--
174
2.34.1
39
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 8 +++++---
7
1 file changed, 5 insertions(+), 3 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
14
15
static bool fold_nand(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t s_mask;
18
+
19
if (fold_const2_commutative(ctx, op) ||
20
fold_xi_to_not(ctx, op, -1)) {
21
return true;
22
}
23
24
- ctx->s_mask = arg_info(op->args[1])->s_mask
25
- & arg_info(op->args[2])->s_mask;
26
- return false;
27
+ s_mask = arg_info(op->args[1])->s_mask
28
+ & arg_info(op->args[2])->s_mask;
29
+ return fold_masks_s(ctx, op, s_mask);
30
}
31
32
static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
33
--
34
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 9 ++-------
7
1 file changed, 2 insertions(+), 7 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
14
{
15
/* Set to 1 all bits to the left of the rightmost. */
16
uint64_t z_mask = arg_info(op->args[1])->z_mask;
17
- ctx->z_mask = -(z_mask & -z_mask);
18
+ z_mask = -(z_mask & -z_mask);
19
20
- /*
21
- * Because of fold_sub_to_neg, we want to always return true,
22
- * via finish_folding.
23
- */
24
- finish_folding(ctx, op);
25
- return true;
26
+ return fold_masks_z(ctx, op, z_mask);
27
}
28
29
static bool fold_neg(OptContext *ctx, TCGOp *op)
30
--
31
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 8 +++++---
7
1 file changed, 5 insertions(+), 3 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
14
15
static bool fold_nor(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t s_mask;
18
+
19
if (fold_const2_commutative(ctx, op) ||
20
fold_xi_to_not(ctx, op, 0)) {
21
return true;
22
}
23
24
- ctx->s_mask = arg_info(op->args[1])->s_mask
25
- & arg_info(op->args[2])->s_mask;
26
- return false;
27
+ s_mask = arg_info(op->args[1])->s_mask
28
+ & arg_info(op->args[2])->s_mask;
29
+ return fold_masks_s(ctx, op, s_mask);
30
}
31
32
static bool fold_not(OptContext *ctx, TCGOp *op)
33
--
34
2.43.0
diff view generated by jsdifflib
1
Merge tcg_out_tlb_load, add_qemu_ldst_label,
1
Avoid the use of the OptContext slots.
2
tcg_out_test_alignment, and some code that lived in both
3
tcg_out_qemu_ld and tcg_out_qemu_st into one function
4
that returns HostAddress and TCGLabelQemuLdst structures.
5
2
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
tcg/i386/tcg-target.c.inc | 346 ++++++++++++++++----------------------
6
tcg/optimize.c | 7 +------
10
1 file changed, 145 insertions(+), 201 deletions(-)
7
1 file changed, 1 insertion(+), 6 deletions(-)
11
8
12
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/i386/tcg-target.c.inc
11
--- a/tcg/optimize.c
15
+++ b/tcg/i386/tcg-target.c.inc
12
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
13
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
17
[MO_BEUQ] = helper_be_stq_mmu,
14
if (fold_const1(ctx, op)) {
18
};
15
return true;
19
20
-/* Perform the TLB load and compare.
21
-
22
- Inputs:
23
- ADDRLO and ADDRHI contain the low and high part of the address.
24
-
25
- MEM_INDEX and S_BITS are the memory context and log2 size of the load.
26
-
27
- WHICH is the offset into the CPUTLBEntry structure of the slot to read.
28
- This should be offsetof addr_read or addr_write.
29
-
30
- Outputs:
31
- LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
32
- positions of the displacements of forward jumps to the TLB miss case.
33
-
34
- Second argument register is loaded with the low part of the address.
35
- In the TLB hit case, it has been adjusted as indicated by the TLB
36
- and so is a host address. In the TLB miss case, it continues to
37
- hold a guest address.
38
-
39
- First argument register is clobbered. */
40
-
41
-static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
42
- int mem_index, MemOp opc,
43
- tcg_insn_unit **label_ptr, int which)
44
-{
45
- TCGType ttype = TCG_TYPE_I32;
46
- TCGType tlbtype = TCG_TYPE_I32;
47
- int trexw = 0, hrexw = 0, tlbrexw = 0;
48
- unsigned a_bits = get_alignment_bits(opc);
49
- unsigned s_bits = opc & MO_SIZE;
50
- unsigned a_mask = (1 << a_bits) - 1;
51
- unsigned s_mask = (1 << s_bits) - 1;
52
- target_ulong tlb_mask;
53
-
54
- if (TCG_TARGET_REG_BITS == 64) {
55
- if (TARGET_LONG_BITS == 64) {
56
- ttype = TCG_TYPE_I64;
57
- trexw = P_REXW;
58
- }
59
- if (TCG_TYPE_PTR == TCG_TYPE_I64) {
60
- hrexw = P_REXW;
61
- if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
62
- tlbtype = TCG_TYPE_I64;
63
- tlbrexw = P_REXW;
64
- }
65
- }
66
- }
67
-
68
- tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
69
- tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
70
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
71
-
72
- tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
73
- TLB_MASK_TABLE_OFS(mem_index) +
74
- offsetof(CPUTLBDescFast, mask));
75
-
76
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
77
- TLB_MASK_TABLE_OFS(mem_index) +
78
- offsetof(CPUTLBDescFast, table));
79
-
80
- /* If the required alignment is at least as large as the access, simply
81
- copy the address and mask. For lesser alignments, check that we don't
82
- cross pages for the complete access. */
83
- if (a_bits >= s_bits) {
84
- tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
85
- } else {
86
- tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
87
- addrlo, s_mask - a_mask);
88
- }
89
- tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
90
- tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
91
-
92
- /* cmp 0(TCG_REG_L0), TCG_REG_L1 */
93
- tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
94
- TCG_REG_L1, TCG_REG_L0, which);
95
-
96
- /* Prepare for both the fast path add of the tlb addend, and the slow
97
- path function argument setup. */
98
- tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
99
-
100
- /* jne slow_path */
101
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
102
- label_ptr[0] = s->code_ptr;
103
- s->code_ptr += 4;
104
-
105
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
106
- /* cmp 4(TCG_REG_L0), addrhi */
107
- tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, which + 4);
108
-
109
- /* jne slow_path */
110
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
111
- label_ptr[1] = s->code_ptr;
112
- s->code_ptr += 4;
113
- }
114
-
115
- /* TLB Hit. */
116
-
117
- /* add addend(TCG_REG_L0), TCG_REG_L1 */
118
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L1, TCG_REG_L0,
119
- offsetof(CPUTLBEntry, addend));
120
-}
121
-
122
-/*
123
- * Record the context of a call to the out of line helper code for the slow path
124
- * for a load or store, so that we can later generate the correct helper code
125
- */
126
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
127
- TCGType type, MemOpIdx oi,
128
- TCGReg datalo, TCGReg datahi,
129
- TCGReg addrlo, TCGReg addrhi,
130
- tcg_insn_unit *raddr,
131
- tcg_insn_unit **label_ptr)
132
-{
133
- TCGLabelQemuLdst *label = new_ldst_label(s);
134
-
135
- label->is_ld = is_ld;
136
- label->oi = oi;
137
- label->type = type;
138
- label->datalo_reg = datalo;
139
- label->datahi_reg = datahi;
140
- label->addrlo_reg = addrlo;
141
- label->addrhi_reg = addrhi;
142
- label->raddr = tcg_splitwx_to_rx(raddr);
143
- label->label_ptr[0] = label_ptr[0];
144
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
145
- label->label_ptr[1] = label_ptr[1];
146
- }
147
-}
148
-
149
/*
150
* Generate code for the slow path for a load at the end of block
151
*/
152
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
153
return true;
154
}
155
#else
156
-
157
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
158
- TCGReg addrhi, unsigned a_bits)
159
-{
160
- unsigned a_mask = (1 << a_bits) - 1;
161
- TCGLabelQemuLdst *label;
162
-
163
- tcg_out_testi(s, addrlo, a_mask);
164
- /* jne slow_path */
165
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
166
-
167
- label = new_ldst_label(s);
168
- label->is_ld = is_ld;
169
- label->addrlo_reg = addrlo;
170
- label->addrhi_reg = addrhi;
171
- label->raddr = tcg_splitwx_to_rx(s->code_ptr + 4);
172
- label->label_ptr[0] = s->code_ptr;
173
-
174
- s->code_ptr += 4;
175
-}
176
-
177
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
178
{
179
/* resolve label address */
180
@@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void)
181
#endif /* setup_guest_base_seg */
182
#endif /* SOFTMMU */
183
184
+/*
185
+ * For softmmu, perform the TLB load and compare.
186
+ * For useronly, perform any required alignment tests.
187
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
188
+ * is required and fill in @h with the host address for the fast path.
189
+ */
190
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
191
+ TCGReg addrlo, TCGReg addrhi,
192
+ MemOpIdx oi, bool is_ld)
193
+{
194
+ TCGLabelQemuLdst *ldst = NULL;
195
+ MemOp opc = get_memop(oi);
196
+ unsigned a_bits = get_alignment_bits(opc);
197
+ unsigned a_mask = (1 << a_bits) - 1;
198
+
199
+#ifdef CONFIG_SOFTMMU
200
+ int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read)
201
+ : offsetof(CPUTLBEntry, addr_write);
202
+ TCGType ttype = TCG_TYPE_I32;
203
+ TCGType tlbtype = TCG_TYPE_I32;
204
+ int trexw = 0, hrexw = 0, tlbrexw = 0;
205
+ unsigned mem_index = get_mmuidx(oi);
206
+ unsigned s_bits = opc & MO_SIZE;
207
+ unsigned s_mask = (1 << s_bits) - 1;
208
+ target_ulong tlb_mask;
209
+
210
+ ldst = new_ldst_label(s);
211
+ ldst->is_ld = is_ld;
212
+ ldst->oi = oi;
213
+ ldst->addrlo_reg = addrlo;
214
+ ldst->addrhi_reg = addrhi;
215
+
216
+ if (TCG_TARGET_REG_BITS == 64) {
217
+ if (TARGET_LONG_BITS == 64) {
218
+ ttype = TCG_TYPE_I64;
219
+ trexw = P_REXW;
220
+ }
221
+ if (TCG_TYPE_PTR == TCG_TYPE_I64) {
222
+ hrexw = P_REXW;
223
+ if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
224
+ tlbtype = TCG_TYPE_I64;
225
+ tlbrexw = P_REXW;
226
+ }
227
+ }
228
+ }
229
+
230
+ tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
231
+ tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
232
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
233
+
234
+ tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
235
+ TLB_MASK_TABLE_OFS(mem_index) +
236
+ offsetof(CPUTLBDescFast, mask));
237
+
238
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
239
+ TLB_MASK_TABLE_OFS(mem_index) +
240
+ offsetof(CPUTLBDescFast, table));
241
+
242
+ /*
243
+ * If the required alignment is at least as large as the access, simply
244
+ * copy the address and mask. For lesser alignments, check that we don't
245
+ * cross pages for the complete access.
246
+ */
247
+ if (a_bits >= s_bits) {
248
+ tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
249
+ } else {
250
+ tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
251
+ addrlo, s_mask - a_mask);
252
+ }
253
+ tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
254
+ tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
255
+
256
+ /* cmp 0(TCG_REG_L0), TCG_REG_L1 */
257
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
258
+ TCG_REG_L1, TCG_REG_L0, cmp_ofs);
259
+
260
+ /*
261
+ * Prepare for both the fast path add of the tlb addend, and the slow
262
+ * path function argument setup.
263
+ */
264
+ *h = (HostAddress) {
265
+ .base = TCG_REG_L1,
266
+ .index = -1
267
+ };
268
+ tcg_out_mov(s, ttype, h->base, addrlo);
269
+
270
+ /* jne slow_path */
271
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
272
+ ldst->label_ptr[0] = s->code_ptr;
273
+ s->code_ptr += 4;
274
+
275
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
276
+ /* cmp 4(TCG_REG_L0), addrhi */
277
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4);
278
+
279
+ /* jne slow_path */
280
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
281
+ ldst->label_ptr[1] = s->code_ptr;
282
+ s->code_ptr += 4;
283
+ }
284
+
285
+ /* TLB Hit. */
286
+
287
+ /* add addend(TCG_REG_L0), TCG_REG_L1 */
288
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, h->base, TCG_REG_L0,
289
+ offsetof(CPUTLBEntry, addend));
290
+#else
291
+ if (a_bits) {
292
+ ldst = new_ldst_label(s);
293
+
294
+ ldst->is_ld = is_ld;
295
+ ldst->oi = oi;
296
+ ldst->addrlo_reg = addrlo;
297
+ ldst->addrhi_reg = addrhi;
298
+
299
+ tcg_out_testi(s, addrlo, a_mask);
300
+ /* jne slow_path */
301
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
302
+ ldst->label_ptr[0] = s->code_ptr;
303
+ s->code_ptr += 4;
304
+ }
305
+
306
+ *h = x86_guest_base;
307
+ h->base = addrlo;
308
+#endif
309
+
310
+ return ldst;
311
+}
312
+
313
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
314
HostAddress h, TCGType type, MemOp memop)
315
{
316
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
317
TCGReg addrlo, TCGReg addrhi,
318
MemOpIdx oi, TCGType data_type)
319
{
320
- MemOp opc = get_memop(oi);
321
+ TCGLabelQemuLdst *ldst;
322
HostAddress h;
323
324
-#if defined(CONFIG_SOFTMMU)
325
- tcg_insn_unit *label_ptr[2];
326
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
327
+ tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, get_memop(oi));
328
329
- tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
330
- label_ptr, offsetof(CPUTLBEntry, addr_read));
331
-
332
- /* TLB Hit. */
333
- h.base = TCG_REG_L1;
334
- h.index = -1;
335
- h.ofs = 0;
336
- h.seg = 0;
337
- tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc);
338
-
339
- /* Record the current context of a load into ldst label */
340
- add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi,
341
- addrlo, addrhi, s->code_ptr, label_ptr);
342
-#else
343
- unsigned a_bits = get_alignment_bits(opc);
344
- if (a_bits) {
345
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
346
+ if (ldst) {
347
+ ldst->type = data_type;
348
+ ldst->datalo_reg = datalo;
349
+ ldst->datahi_reg = datahi;
350
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
351
}
16
}
352
-
17
-
353
- h = x86_guest_base;
18
- ctx->s_mask = arg_info(op->args[1])->s_mask;
354
- h.base = addrlo;
19
-
355
- tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc);
20
- /* Because of fold_to_not, we want to always return true, via finish. */
356
-#endif
21
- finish_folding(ctx, op);
22
- return true;
23
+ return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask);
357
}
24
}
358
25
359
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
26
static bool fold_or(OptContext *ctx, TCGOp *op)
360
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
361
TCGReg addrlo, TCGReg addrhi,
362
MemOpIdx oi, TCGType data_type)
363
{
364
- MemOp opc = get_memop(oi);
365
+ TCGLabelQemuLdst *ldst;
366
HostAddress h;
367
368
-#if defined(CONFIG_SOFTMMU)
369
- tcg_insn_unit *label_ptr[2];
370
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
371
+ tcg_out_qemu_st_direct(s, datalo, datahi, h, get_memop(oi));
372
373
- tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
374
- label_ptr, offsetof(CPUTLBEntry, addr_write));
375
-
376
- /* TLB Hit. */
377
- h.base = TCG_REG_L1;
378
- h.index = -1;
379
- h.ofs = 0;
380
- h.seg = 0;
381
- tcg_out_qemu_st_direct(s, datalo, datahi, h, opc);
382
-
383
- /* Record the current context of a store into ldst label */
384
- add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi,
385
- addrlo, addrhi, s->code_ptr, label_ptr);
386
-#else
387
- unsigned a_bits = get_alignment_bits(opc);
388
- if (a_bits) {
389
- tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
390
+ if (ldst) {
391
+ ldst->type = data_type;
392
+ ldst->datalo_reg = datalo;
393
+ ldst->datahi_reg = datahi;
394
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
395
}
396
-
397
- h = x86_guest_base;
398
- h.base = addrlo;
399
-
400
- tcg_out_qemu_st_direct(s, datalo, datahi, h, opc);
401
-#endif
402
}
403
404
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
405
--
27
--
406
2.34.1
28
2.43.0
407
408
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 13 ++++++++-----
7
1 file changed, 8 insertions(+), 5 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
14
15
static bool fold_or(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t z_mask, s_mask;
18
+ TempOptInfo *t1, *t2;
19
+
20
if (fold_const2_commutative(ctx, op) ||
21
fold_xi_to_x(ctx, op, 0) ||
22
fold_xx_to_x(ctx, op)) {
23
return true;
24
}
25
26
- ctx->z_mask = arg_info(op->args[1])->z_mask
27
- | arg_info(op->args[2])->z_mask;
28
- ctx->s_mask = arg_info(op->args[1])->s_mask
29
- & arg_info(op->args[2])->s_mask;
30
- return fold_masks(ctx, op);
31
+ t1 = arg_info(op->args[1]);
32
+ t2 = arg_info(op->args[2]);
33
+ z_mask = t1->z_mask | t2->z_mask;
34
+ s_mask = t1->s_mask & t2->s_mask;
35
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
36
}
37
38
static bool fold_orc(OptContext *ctx, TCGOp *op)
39
--
40
2.43.0
diff view generated by jsdifflib
1
Rather than zero-extend the guest address into a register,
1
Avoid the use of the OptContext slots.
2
use an add instruction which zero-extends the second input.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
tcg/s390x/tcg-target.c.inc | 8 +++++---
6
tcg/optimize.c | 8 +++++---
8
1 file changed, 5 insertions(+), 3 deletions(-)
7
1 file changed, 5 insertions(+), 3 deletions(-)
9
8
10
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/s390x/tcg-target.c.inc
11
--- a/tcg/optimize.c
13
+++ b/tcg/s390x/tcg-target.c.inc
12
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
13
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
15
RRE_ALGR = 0xb90a,
14
16
RRE_ALCR = 0xb998,
15
static bool fold_orc(OptContext *ctx, TCGOp *op)
17
RRE_ALCGR = 0xb988,
16
{
18
+ RRE_ALGFR = 0xb91a,
17
+ uint64_t s_mask;
19
RRE_CGR = 0xb920,
18
+
20
RRE_CLGR = 0xb921,
19
if (fold_const2(ctx, op) ||
21
RRE_DLGR = 0xb987,
20
fold_xx_to_i(ctx, op, -1) ||
22
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
21
fold_xi_to_x(ctx, op, -1) ||
23
tcg_out_insn(s, RXY, LG, h->index, TCG_REG_R2, TCG_REG_NONE,
22
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
24
offsetof(CPUTLBEntry, addend));
23
return true;
25
26
- h->base = addr_reg;
27
if (TARGET_LONG_BITS == 32) {
28
- tcg_out_ext32u(s, TCG_REG_R3, addr_reg);
29
- h->base = TCG_REG_R3;
30
+ tcg_out_insn(s, RRE, ALGFR, h->index, addr_reg);
31
+ h->base = TCG_REG_NONE;
32
+ } else {
33
+ h->base = addr_reg;
34
}
24
}
35
h->disp = 0;
25
36
#else
26
- ctx->s_mask = arg_info(op->args[1])->s_mask
27
- & arg_info(op->args[2])->s_mask;
28
- return false;
29
+ s_mask = arg_info(op->args[1])->s_mask
30
+ & arg_info(op->args[2])->s_mask;
31
+ return fold_masks_s(ctx, op, s_mask);
32
}
33
34
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
37
--
35
--
38
2.34.1
36
2.43.0
39
40
diff view generated by jsdifflib
1
Allocate TCG_REG_TMP2. Use R0, TMP1, TMP2 instead of any of
1
Avoid the use of the OptContext slots.
2
the normally allocated registers for the tlb load.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Be careful not to call fold_masks_zs when the memory operation
5
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
4
is wide enough to require multiple outputs, so split into two
5
functions: fold_qemu_ld_1reg and fold_qemu_ld_2reg.
6
7
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
9
---
8
tcg/ppc/tcg-target.c.inc | 78 ++++++++++++++++++++++++----------------
10
tcg/optimize.c | 26 +++++++++++++++++++++-----
9
1 file changed, 47 insertions(+), 31 deletions(-)
11
1 file changed, 21 insertions(+), 5 deletions(-)
10
12
11
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
13
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.c.inc
15
--- a/tcg/optimize.c
14
+++ b/tcg/ppc/tcg-target.c.inc
16
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@
17
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
16
#else
18
return fold_masks_s(ctx, op, s_mask);
17
# define TCG_REG_TMP1 TCG_REG_R12
19
}
18
#endif
20
19
+#define TCG_REG_TMP2 TCG_REG_R11
21
-static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
20
22
+static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
21
#define TCG_VEC_TMP1 TCG_REG_V0
23
{
22
#define TCG_VEC_TMP2 TCG_REG_V1
24
const TCGOpDef *def = &tcg_op_defs[op->opc];
23
@@ -XXX,XX +XXX,XX @@ static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
25
MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
24
/*
26
MemOp mop = get_memop(oi);
25
* For the purposes of ppc32 sorting 4 input registers into 4 argument
27
int width = 8 * memop_size(mop);
26
* registers, there is an outside chance we would require 3 temps.
28
+ uint64_t z_mask = -1, s_mask = 0;
27
- * Because of constraints, no inputs are in r3, and env will not be
29
28
- * placed into r3 until after the sorting is done, and is thus free.
30
if (width < 64) {
29
*/
31
if (mop & MO_SIGN) {
30
static const TCGLdstHelperParam ldst_helper_param = {
32
- ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
31
.ra_gen = ldst_ra_gen,
33
+ s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
32
.ntmp = 3,
33
- .tmp = { TCG_REG_TMP1, TCG_REG_R0, TCG_REG_R3 }
34
+ .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
35
};
36
37
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
38
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
39
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
40
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
41
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
42
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
43
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
44
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
45
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
46
47
/* Extract the page index, shifted into place for tlb index. */
48
if (TCG_TARGET_REG_BITS == 32) {
49
- tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
50
+ tcg_out_shri32(s, TCG_REG_R0, addrlo,
51
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
52
} else {
53
- tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
54
+ tcg_out_shri64(s, TCG_REG_R0, addrlo,
55
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
56
}
57
- tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
58
+ tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
59
60
- /* Load the TLB comparator. */
61
+ /* Load the (low part) TLB comparator into TMP2. */
62
if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
63
uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
64
? LWZUX : LDUX);
65
- tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
66
+ tcg_out32(s, lxu | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
67
} else {
68
- tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
69
+ tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
70
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
71
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
72
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
73
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2,
74
+ TCG_REG_TMP1, cmp_off + 4 * HOST_BIG_ENDIAN);
75
} else {
34
} else {
76
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
35
- ctx->z_mask = MAKE_64BIT_MASK(0, width);
77
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
36
+ z_mask = MAKE_64BIT_MASK(0, width);
78
}
37
}
79
}
38
}
80
39
81
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
40
/* Opcodes that touch guest memory stop the mb optimization. */
82
* Load the TLB addend for use on the fast path.
41
ctx->prev_mb = NULL;
83
* Do this asap to minimize any load use delay.
42
- return false;
84
*/
85
- h->base = TCG_REG_R3;
86
- tcg_out_ld(s, TCG_TYPE_PTR, h->base, TCG_REG_R3,
87
- offsetof(CPUTLBEntry, addend));
88
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
89
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
90
+ offsetof(CPUTLBEntry, addend));
91
+ }
92
93
- /* Clear the non-page, non-alignment bits from the address */
94
+ /* Clear the non-page, non-alignment bits from the address in R0. */
95
if (TCG_TARGET_REG_BITS == 32) {
96
/*
97
* We don't support unaligned accesses on 32-bits.
98
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
99
if (TARGET_LONG_BITS == 32) {
100
tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
101
(32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
102
- /* Zero-extend the address for use in the final address. */
103
- tcg_out_ext32u(s, TCG_REG_R4, addrlo);
104
- addrlo = TCG_REG_R4;
105
} else if (a_bits == 0) {
106
tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
107
} else {
108
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
109
tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
110
}
111
}
112
- h->index = addrlo;
113
114
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
115
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
116
+ /* Low part comparison into cr7. */
117
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
118
0, 7, TCG_TYPE_I32);
119
- tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
120
+
43
+
121
+ /* Load the high part TLB comparator into TMP2. */
44
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
122
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
45
+}
123
+ cmp_off + 4 * !HOST_BIG_ENDIAN);
124
+
46
+
125
+ /* Load addend, deferred for this case. */
47
+static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
126
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
48
+{
127
+ offsetof(CPUTLBEntry, addend));
49
+ /* Opcodes that touch guest memory stop the mb optimization. */
128
+
50
+ ctx->prev_mb = NULL;
129
+ /* High part comparison into cr6. */
51
+ return finish_folding(ctx, op);
130
+ tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 0, 6, TCG_TYPE_I32);
131
+
132
+ /* Combine comparisons into cr7. */
133
tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
134
} else {
135
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
136
+ /* Full comparison into cr7. */
137
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
138
0, 7, TCG_TYPE_TL);
139
}
140
141
/* Load a pointer into the current opcode w/conditional branch-link. */
142
ldst->label_ptr[0] = s->code_ptr;
143
tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
144
+
145
+ h->base = TCG_REG_TMP1;
146
#else
147
if (a_bits) {
148
ldst = new_ldst_label(s);
149
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
150
}
151
152
h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
153
- h->index = addrlo;
154
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
155
- tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
156
- h->index = TCG_REG_TMP1;
157
- }
158
#endif
159
160
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
161
+ /* Zero-extend the guest address for use in the host address. */
162
+ tcg_out_ext32u(s, TCG_REG_R0, addrlo);
163
+ h->index = TCG_REG_R0;
164
+ } else {
165
+ h->index = addrlo;
166
+ }
167
+
168
return ldst;
169
}
52
}
170
53
171
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
54
static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
172
#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
55
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
173
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
56
break;
174
#endif
57
case INDEX_op_qemu_ld_a32_i32:
175
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
58
case INDEX_op_qemu_ld_a64_i32:
176
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
59
+ done = fold_qemu_ld_1reg(&ctx, op);
177
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
60
+ break;
178
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
61
case INDEX_op_qemu_ld_a32_i64:
179
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
62
case INDEX_op_qemu_ld_a64_i64:
180
if (USE_REG_TB) {
63
+ if (TCG_TARGET_REG_BITS == 64) {
64
+ done = fold_qemu_ld_1reg(&ctx, op);
65
+ break;
66
+ }
67
+ QEMU_FALLTHROUGH;
68
case INDEX_op_qemu_ld_a32_i128:
69
case INDEX_op_qemu_ld_a64_i128:
70
- done = fold_qemu_ld(&ctx, op);
71
+ done = fold_qemu_ld_2reg(&ctx, op);
72
break;
73
case INDEX_op_qemu_st8_a32_i32:
74
case INDEX_op_qemu_st8_a64_i32:
181
--
75
--
182
2.34.1
76
2.43.0
183
184
diff view generated by jsdifflib
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
1
Stores have no output operands, and so need no further work.
2
and tcg_out_st_helper_args.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
tcg/loongarch64/tcg-target.c.inc | 37 ++++++++++----------------------
6
tcg/optimize.c | 11 +++++------
8
1 file changed, 11 insertions(+), 26 deletions(-)
7
1 file changed, 5 insertions(+), 6 deletions(-)
9
8
10
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/loongarch64/tcg-target.c.inc
11
--- a/tcg/optimize.c
13
+++ b/tcg/loongarch64/tcg-target.c.inc
12
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
13
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
15
return reloc_br_sd10k16(s->code_ptr - 1, target);
14
{
15
/* Opcodes that touch guest memory stop the mb optimization. */
16
ctx->prev_mb = NULL;
17
- return false;
18
+ return true;
16
}
19
}
17
20
18
+static const TCGLdstHelperParam ldst_helper_param = {
21
static bool fold_remainder(OptContext *ctx, TCGOp *op)
19
+ .ntmp = 1, .tmp = { TCG_REG_TMP0 }
22
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
20
+};
23
21
+
24
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
22
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
25
remove_mem_copy_all(ctx);
23
{
26
- return false;
24
- MemOpIdx oi = l->oi;
27
+ return true;
25
- MemOp opc = get_memop(oi);
26
- MemOp size = opc & MO_SIZE;
27
+ MemOp opc = get_memop(l->oi);
28
29
/* resolve label address */
30
if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
31
return false;
32
}
28
}
33
29
34
- /* call load helper */
30
switch (op->opc) {
35
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
31
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
36
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
32
g_assert_not_reached();
37
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi);
33
}
38
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr);
34
remove_mem_copy_in(ctx, ofs, ofs + lm1);
39
-
35
- return false;
40
- tcg_out_call_int(s, qemu_ld_helpers[size], false);
36
+ return true;
41
-
42
- tcg_out_movext(s, l->type, l->datalo_reg,
43
- TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_A0);
44
+ tcg_out_ld_helper_args(s, l, &ldst_helper_param);
45
+ tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE], false);
46
+ tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
47
return tcg_out_goto(s, l->raddr);
48
}
37
}
49
38
50
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
39
static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
51
{
40
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
52
- MemOpIdx oi = l->oi;
41
TCGType type;
53
- MemOp opc = get_memop(oi);
42
54
- MemOp size = opc & MO_SIZE;
43
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
55
+ MemOp opc = get_memop(l->oi);
44
- fold_tcg_st(ctx, op);
56
45
- return false;
57
/* resolve label address */
46
+ return fold_tcg_st(ctx, op);
58
if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
59
return false;
60
}
47
}
61
48
62
- /* call store helper */
49
src = arg_temp(op->args[0]);
63
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
50
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
64
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
51
last = ofs + tcg_type_size(type) - 1;
65
- tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I32 : TCG_TYPE_I32, TCG_REG_A2,
52
remove_mem_copy_in(ctx, ofs, last);
66
- l->type, size, l->datalo_reg);
53
record_mem_copy(ctx, type, src, ofs, last);
67
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi);
54
- return false;
68
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr);
55
+ return true;
69
-
70
- tcg_out_call_int(s, qemu_st_helpers[size], false);
71
-
72
+ tcg_out_st_helper_args(s, l, &ldst_helper_param);
73
+ tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
74
return tcg_out_goto(s, l->raddr);
75
}
56
}
76
#else
57
58
static bool fold_xor(OptContext *ctx, TCGOp *op)
77
--
59
--
78
2.34.1
60
2.43.0
79
80
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
12
fold_xx_to_i(ctx, op, 0)) {
13
return true;
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
20
--
21
2.43.0
diff view generated by jsdifflib
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, and some code that lived
1
Change return from bool to int; distinguish between
2
in both tcg_out_qemu_ld and tcg_out_qemu_st into one function that
2
complete folding, simplification, and no change.
3
returns HostAddress and TCGLabelQemuLdst structures.
4
3
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/arm/tcg-target.c.inc | 351 ++++++++++++++++++---------------------
7
tcg/optimize.c | 22 ++++++++++++++--------
9
1 file changed, 159 insertions(+), 192 deletions(-)
8
1 file changed, 14 insertions(+), 8 deletions(-)
10
9
11
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/arm/tcg-target.c.inc
12
--- a/tcg/optimize.c
14
+++ b/tcg/arm/tcg-target.c.inc
13
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
14
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
16
}
15
return finish_folding(ctx, op);
17
}
16
}
18
17
19
-#define TLB_SHIFT    (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
18
-static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
20
-
19
+/* Return 1 if finished, -1 if simplified, 0 if unchanged. */
21
-/* We expect to use an 9-bit sign-magnitude negative offset from ENV. */
20
+static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
22
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
23
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
24
-
25
-/* These offsets are built into the LDRD below. */
26
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
27
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
28
-
29
-/* Load and compare a TLB entry, leaving the flags set. Returns the register
30
- containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
31
-
32
-static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
33
- MemOp opc, int mem_index, bool is_load)
34
-{
35
- int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
36
- : offsetof(CPUTLBEntry, addr_write));
37
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
38
- unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
39
- unsigned a_mask = (1 << get_alignment_bits(opc)) - 1;
40
- TCGReg t_addr;
41
-
42
- /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
43
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
44
-
45
- /* Extract the tlb index from the address into R0. */
46
- tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
47
- SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
48
-
49
- /*
50
- * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
51
- * Load the tlb comparator into R2/R3 and the fast path addend into R1.
52
- */
53
- if (cmp_off == 0) {
54
- if (TARGET_LONG_BITS == 64) {
55
- tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
56
- } else {
57
- tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
58
- }
59
- } else {
60
- tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
61
- TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
62
- if (TARGET_LONG_BITS == 64) {
63
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
64
- } else {
65
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
66
- }
67
- }
68
-
69
- /* Load the tlb addend. */
70
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
71
- offsetof(CPUTLBEntry, addend));
72
-
73
- /*
74
- * Check alignment, check comparators.
75
- * Do this in 2-4 insns. Use MOVW for v7, if possible,
76
- * to reduce the number of sequential conditional instructions.
77
- * Almost all guests have at least 4k pages, which means that we need
78
- * to clear at least 9 bits even for an 8-byte memory, which means it
79
- * isn't worth checking for an immediate operand for BIC.
80
- *
81
- * For unaligned accesses, test the page of the last unit of alignment.
82
- * This leaves the least significant alignment bits unchanged, and of
83
- * course must be zero.
84
- */
85
- t_addr = addrlo;
86
- if (a_mask < s_mask) {
87
- t_addr = TCG_REG_R0;
88
- tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
89
- addrlo, s_mask - a_mask);
90
- }
91
- if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
92
- tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
93
- tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
94
- t_addr, TCG_REG_TMP, 0);
95
- tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
96
- } else {
97
- if (a_mask) {
98
- tcg_debug_assert(a_mask <= 0xff);
99
- tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
100
- }
101
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
102
- SHIFT_IMM_LSR(TARGET_PAGE_BITS));
103
- tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
104
- 0, TCG_REG_R2, TCG_REG_TMP,
105
- SHIFT_IMM_LSL(TARGET_PAGE_BITS));
106
- }
107
-
108
- if (TARGET_LONG_BITS == 64) {
109
- tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
110
- }
111
-
112
- return TCG_REG_R1;
113
-}
114
-
115
-/* Record the context of a call to the out of line helper code for the slow
116
- path for a load or store, so that we can later generate the correct
117
- helper code. */
118
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
119
- MemOpIdx oi, TCGType type,
120
- TCGReg datalo, TCGReg datahi,
121
- TCGReg addrlo, TCGReg addrhi,
122
- tcg_insn_unit *raddr,
123
- tcg_insn_unit *label_ptr)
124
-{
125
- TCGLabelQemuLdst *label = new_ldst_label(s);
126
-
127
- label->is_ld = is_ld;
128
- label->oi = oi;
129
- label->type = type;
130
- label->datalo_reg = datalo;
131
- label->datahi_reg = datahi;
132
- label->addrlo_reg = addrlo;
133
- label->addrhi_reg = addrhi;
134
- label->raddr = tcg_splitwx_to_rx(raddr);
135
- label->label_ptr[0] = label_ptr;
136
-}
137
-
138
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
139
{
21
{
140
TCGReg argreg;
22
uint64_t a_zmask, b_val;
141
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
23
TCGCond cond;
142
return true;
24
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
143
}
25
op->opc = xor_opc;
144
#else
26
op->args[2] = arg_new_constant(ctx, 1);
145
-
27
}
146
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
28
- return false;
147
- TCGReg addrhi, unsigned a_bits)
29
+ return -1;
148
-{
30
}
149
- unsigned a_mask = (1 << a_bits) - 1;
150
- TCGLabelQemuLdst *label = new_ldst_label(s);
151
-
152
- label->is_ld = is_ld;
153
- label->addrlo_reg = addrlo;
154
- label->addrhi_reg = addrhi;
155
-
156
- /* We are expecting a_bits to max out at 7, and can easily support 8. */
157
- tcg_debug_assert(a_mask <= 0xff);
158
- /* tst addr, #mask */
159
- tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
160
-
161
- /* blne slow_path */
162
- label->label_ptr[0] = s->code_ptr;
163
- tcg_out_bl_imm(s, COND_NE, 0);
164
-
165
- label->raddr = tcg_splitwx_to_rx(s->code_ptr);
166
-}
167
-
168
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
169
{
170
if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
171
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
172
}
173
#endif /* SOFTMMU */
174
175
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
176
+ TCGReg addrlo, TCGReg addrhi,
177
+ MemOpIdx oi, bool is_ld)
178
+{
179
+ TCGLabelQemuLdst *ldst = NULL;
180
+ MemOp opc = get_memop(oi);
181
+ MemOp a_bits = get_alignment_bits(opc);
182
+ unsigned a_mask = (1 << a_bits) - 1;
183
+
184
+#ifdef CONFIG_SOFTMMU
185
+ int mem_index = get_mmuidx(oi);
186
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
187
+ : offsetof(CPUTLBEntry, addr_write);
188
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
189
+ unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
190
+ TCGReg t_addr;
191
+
192
+ ldst = new_ldst_label(s);
193
+ ldst->is_ld = is_ld;
194
+ ldst->oi = oi;
195
+ ldst->addrlo_reg = addrlo;
196
+ ldst->addrhi_reg = addrhi;
197
+
198
+ /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
199
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
200
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
201
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
202
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
203
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
204
+
205
+ /* Extract the tlb index from the address into R0. */
206
+ tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
207
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
208
+
209
+ /*
210
+ * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
211
+ * Load the tlb comparator into R2/R3 and the fast path addend into R1.
212
+ */
213
+ if (cmp_off == 0) {
214
+ if (TARGET_LONG_BITS == 64) {
215
+ tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
216
+ } else {
217
+ tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
218
+ }
219
+ } else {
220
+ tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
221
+ TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
222
+ if (TARGET_LONG_BITS == 64) {
223
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
224
+ } else {
225
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
226
+ }
227
+ }
228
+
229
+ /* Load the tlb addend. */
230
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
231
+ offsetof(CPUTLBEntry, addend));
232
+
233
+ /*
234
+ * Check alignment, check comparators.
235
+ * Do this in 2-4 insns. Use MOVW for v7, if possible,
236
+ * to reduce the number of sequential conditional instructions.
237
+ * Almost all guests have at least 4k pages, which means that we need
238
+ * to clear at least 9 bits even for an 8-byte memory, which means it
239
+ * isn't worth checking for an immediate operand for BIC.
240
+ *
241
+ * For unaligned accesses, test the page of the last unit of alignment.
242
+ * This leaves the least significant alignment bits unchanged, and of
243
+ * course must be zero.
244
+ */
245
+ t_addr = addrlo;
246
+ if (a_mask < s_mask) {
247
+ t_addr = TCG_REG_R0;
248
+ tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
249
+ addrlo, s_mask - a_mask);
250
+ }
251
+ if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
252
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
253
+ tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
254
+ t_addr, TCG_REG_TMP, 0);
255
+ tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
256
+ } else {
257
+ if (a_mask) {
258
+ tcg_debug_assert(a_mask <= 0xff);
259
+ tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
260
+ }
261
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
262
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS));
263
+ tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
264
+ 0, TCG_REG_R2, TCG_REG_TMP,
265
+ SHIFT_IMM_LSL(TARGET_PAGE_BITS));
266
+ }
267
+
268
+ if (TARGET_LONG_BITS == 64) {
269
+ tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
270
+ }
271
+
272
+ *h = (HostAddress){
273
+ .cond = COND_AL,
274
+ .base = addrlo,
275
+ .index = TCG_REG_R1,
276
+ .index_scratch = true,
277
+ };
278
+#else
279
+ if (a_mask) {
280
+ ldst = new_ldst_label(s);
281
+ ldst->is_ld = is_ld;
282
+ ldst->oi = oi;
283
+ ldst->addrlo_reg = addrlo;
284
+ ldst->addrhi_reg = addrhi;
285
+
286
+ /* We are expecting a_bits to max out at 7 */
287
+ tcg_debug_assert(a_mask <= 0xff);
288
+ /* tst addr, #mask */
289
+ tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
290
+ }
291
+
292
+ *h = (HostAddress){
293
+ .cond = COND_AL,
294
+ .base = addrlo,
295
+ .index = guest_base ? TCG_REG_GUEST_BASE : -1,
296
+ .index_scratch = false,
297
+ };
298
+#endif
299
+
300
+ return ldst;
301
+}
302
+
303
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
304
TCGReg datahi, HostAddress h)
305
{
306
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
307
MemOpIdx oi, TCGType data_type)
308
{
309
MemOp opc = get_memop(oi);
310
+ TCGLabelQemuLdst *ldst;
311
HostAddress h;
312
313
-#ifdef CONFIG_SOFTMMU
314
- h.cond = COND_AL;
315
- h.base = addrlo;
316
- h.index_scratch = true;
317
- h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 1);
318
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
319
+ if (ldst) {
320
+ ldst->type = data_type;
321
+ ldst->datalo_reg = datalo;
322
+ ldst->datahi_reg = datahi;
323
324
- /*
325
- * This a conditional BL only to load a pointer within this opcode into
326
- * LR for the slow path. We will not be using the value for a tail call.
327
- */
328
- tcg_insn_unit *label_ptr = s->code_ptr;
329
- tcg_out_bl_imm(s, COND_NE, 0);
330
+ /*
331
+ * This a conditional BL only to load a pointer within this
332
+ * opcode into LR for the slow path. We will not be using
333
+ * the value for a tail call.
334
+ */
335
+ ldst->label_ptr[0] = s->code_ptr;
336
+ tcg_out_bl_imm(s, COND_NE, 0);
337
338
- tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
339
-
340
- add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi,
341
- addrlo, addrhi, s->code_ptr, label_ptr);
342
-#else
343
- unsigned a_bits = get_alignment_bits(opc);
344
- if (a_bits) {
345
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
346
+ tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
347
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
348
+ } else {
349
+ tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
350
}
31
}
351
-
32
-
352
- h.cond = COND_AL;
33
- return false;
353
- h.base = addrlo;
34
+ return 0;
354
- h.index = guest_base ? TCG_REG_GUEST_BASE : -1;
355
- h.index_scratch = false;
356
- tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
357
-#endif
358
}
35
}
359
36
360
static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
37
static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
361
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
38
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
362
MemOpIdx oi, TCGType data_type)
39
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
363
{
40
}
364
MemOp opc = get_memop(oi);
41
365
+ TCGLabelQemuLdst *ldst;
42
- if (fold_setcond_zmask(ctx, op, false)) {
366
HostAddress h;
43
+ i = fold_setcond_zmask(ctx, op, false);
367
44
+ if (i > 0) {
368
-#ifdef CONFIG_SOFTMMU
45
return true;
369
- h.cond = COND_EQ;
46
}
370
- h.base = addrlo;
47
- fold_setcond_tst_pow2(ctx, op, false);
371
- h.index_scratch = true;
48
+ if (i == 0) {
372
- h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 0);
49
+ fold_setcond_tst_pow2(ctx, op, false);
373
- tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
374
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
375
+ if (ldst) {
376
+ ldst->type = data_type;
377
+ ldst->datalo_reg = datalo;
378
+ ldst->datahi_reg = datahi;
379
380
- /* The conditional call must come last, as we're going to return here. */
381
- tcg_insn_unit *label_ptr = s->code_ptr;
382
- tcg_out_bl_imm(s, COND_NE, 0);
383
-
384
- add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi,
385
- addrlo, addrhi, s->code_ptr, label_ptr);
386
-#else
387
- unsigned a_bits = get_alignment_bits(opc);
388
-
389
- h.cond = COND_AL;
390
- if (a_bits) {
391
- tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
392
h.cond = COND_EQ;
393
- }
394
+ tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
395
396
- h.base = addrlo;
397
- h.index = guest_base ? TCG_REG_GUEST_BASE : -1;
398
- h.index_scratch = false;
399
- tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
400
-#endif
401
+ /* The conditional call is last, as we're going to return here. */
402
+ ldst->label_ptr[0] = s->code_ptr;
403
+ tcg_out_bl_imm(s, COND_NE, 0);
404
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
405
+ } else {
406
+ tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
407
+ }
50
+ }
408
}
51
409
52
ctx->z_mask = 1;
410
static void tcg_out_epilogue(TCGContext *s);
53
return false;
54
@@ -XXX,XX +XXX,XX @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
55
return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
56
}
57
58
- if (fold_setcond_zmask(ctx, op, true)) {
59
+ i = fold_setcond_zmask(ctx, op, true);
60
+ if (i > 0) {
61
return true;
62
}
63
- fold_setcond_tst_pow2(ctx, op, true);
64
+ if (i == 0) {
65
+ fold_setcond_tst_pow2(ctx, op, true);
66
+ }
67
68
/* Value is {0,-1} so all bits are repetitions of the sign. */
69
ctx->s_mask = -1;
411
--
70
--
412
2.34.1
71
2.43.0
413
414
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 3 +--
7
1 file changed, 1 insertion(+), 2 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
14
fold_setcond_tst_pow2(ctx, op, false);
15
}
16
17
- ctx->z_mask = 1;
18
- return false;
19
+ return fold_masks_z(ctx, op, 1);
20
}
21
22
static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
23
--
24
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 3 +--
7
1 file changed, 1 insertion(+), 2 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
14
}
15
16
/* Value is {0,-1} so all bits are repetitions of the sign. */
17
- ctx->s_mask = -1;
18
- return false;
19
+ return fold_masks_s(ctx, op, -1);
20
}
21
22
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
23
--
24
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 3 +--
7
1 file changed, 1 insertion(+), 2 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
14
return fold_setcond(ctx, op);
15
}
16
17
- ctx->z_mask = 1;
18
- return false;
19
+ return fold_masks_z(ctx, op, 1);
20
21
do_setcond_const:
22
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
23
--
24
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
12
if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
13
op->args[3] = tcg_swap_cond(op->args[3]);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
12
if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
13
op->args[5] = tcg_invert_cond(op->args[5]);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_sextract(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
1
This header is supposed to be private to tcg and in fact
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
does not need to be included here at all.
3
2
4
Reviewed-by: Song Gao <gaosong@loongson.cn>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/loongarch/csr_helper.c | 1 -
6
tcg/optimize.c | 24 +++++++++---------------
9
target/loongarch/iocsr_helper.c | 1 -
7
1 file changed, 9 insertions(+), 15 deletions(-)
10
2 files changed, 2 deletions(-)
11
8
12
diff --git a/target/loongarch/csr_helper.c b/target/loongarch/csr_helper.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/target/loongarch/csr_helper.c
11
--- a/tcg/optimize.c
15
+++ b/target/loongarch/csr_helper.c
12
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
17
#include "exec/cpu_ldst.h"
14
static bool fold_sextract(OptContext *ctx, TCGOp *op)
18
#include "hw/irq.h"
19
#include "cpu-csr.h"
20
-#include "tcg/tcg-ldst.h"
21
22
target_ulong helper_csrrd_pgd(CPULoongArchState *env)
23
{
15
{
24
diff --git a/target/loongarch/iocsr_helper.c b/target/loongarch/iocsr_helper.c
16
uint64_t z_mask, s_mask, s_mask_old;
25
index XXXXXXX..XXXXXXX 100644
17
+ TempOptInfo *t1 = arg_info(op->args[1]);
26
--- a/target/loongarch/iocsr_helper.c
18
int pos = op->args[2];
27
+++ b/target/loongarch/iocsr_helper.c
19
int len = op->args[3];
28
@@ -XXX,XX +XXX,XX @@
20
29
#include "exec/helper-proto.h"
21
- if (arg_is_const(op->args[1])) {
30
#include "exec/exec-all.h"
22
- uint64_t t;
31
#include "exec/cpu_ldst.h"
23
-
32
-#include "tcg/tcg-ldst.h"
24
- t = arg_info(op->args[1])->val;
33
25
- t = sextract64(t, pos, len);
34
#define GET_MEMTXATTRS(cas) \
26
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
35
((MemTxAttrs){.requester_id = env_cpu(cas)->cpu_index})
27
+ if (ti_is_const(t1)) {
28
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
29
+ sextract64(ti_const_val(t1), pos, len));
30
}
31
32
- z_mask = arg_info(op->args[1])->z_mask;
33
- z_mask = sextract64(z_mask, pos, len);
34
- ctx->z_mask = z_mask;
35
-
36
- s_mask_old = arg_info(op->args[1])->s_mask;
37
- s_mask = sextract64(s_mask_old, pos, len);
38
- s_mask |= MAKE_64BIT_MASK(len, 64 - len);
39
- ctx->s_mask = s_mask;
40
+ s_mask_old = t1->s_mask;
41
+ s_mask = s_mask_old >> pos;
42
+ s_mask |= -1ull << (len - 1);
43
44
if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
45
return true;
46
}
47
48
- return fold_masks(ctx, op);
49
+ z_mask = sextract64(t1->z_mask, pos, len);
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
51
}
52
53
static bool fold_shift(OptContext *ctx, TCGOp *op)
36
--
54
--
37
2.34.1
55
2.43.0
diff view generated by jsdifflib
1
Since tcg_out_{ld,st}_helper_args, the slow path no longer requires
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
the address argument to be set up by the tlb load sequence. Use a
3
plain load for the addend and indexed addressing with the original
4
input address register.
5
2
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
tcg/i386/tcg-target.c.inc | 25 ++++++++++---------------
6
tcg/optimize.c | 27 ++++++++++++++-------------
10
1 file changed, 10 insertions(+), 15 deletions(-)
7
1 file changed, 14 insertions(+), 13 deletions(-)
11
8
12
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/i386/tcg-target.c.inc
11
--- a/tcg/optimize.c
15
+++ b/tcg/i386/tcg-target.c.inc
12
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
13
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
17
tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
14
static bool fold_shift(OptContext *ctx, TCGOp *op)
18
} else {
15
{
19
tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
16
uint64_t s_mask, z_mask, sign;
20
- /* The second argument is already loaded with addrlo. */
17
+ TempOptInfo *t1, *t2;
21
+ tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
18
22
+ l->addrlo_reg);
19
if (fold_const2(ctx, op) ||
23
tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
20
fold_ix_to_i(ctx, op, 0) ||
24
tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
21
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
25
(uintptr_t)l->raddr);
22
return true;
26
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
23
}
27
tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
24
28
} else {
25
- s_mask = arg_info(op->args[1])->s_mask;
29
tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
26
- z_mask = arg_info(op->args[1])->z_mask;
30
- /* The second argument is already loaded with addrlo. */
27
+ t1 = arg_info(op->args[1]);
31
+ tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
28
+ t2 = arg_info(op->args[2]);
32
+ l->addrlo_reg);
29
+ s_mask = t1->s_mask;
33
tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
30
+ z_mask = t1->z_mask;
34
tcg_target_call_iarg_regs[2], l->datalo_reg);
31
35
tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
32
- if (arg_is_const(op->args[2])) {
36
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
33
- int sh = arg_info(op->args[2])->val;
37
tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
38
TCG_REG_L1, TCG_REG_L0, cmp_ofs);
39
40
- /*
41
- * Prepare for both the fast path add of the tlb addend, and the slow
42
- * path function argument setup.
43
- */
44
- *h = (HostAddress) {
45
- .base = TCG_REG_L1,
46
- .index = -1
47
- };
48
- tcg_out_mov(s, ttype, h->base, addrlo);
49
-
34
-
50
/* jne slow_path */
35
- ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
51
tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
36
+ if (ti_is_const(t2)) {
52
ldst->label_ptr[0] = s->code_ptr;
37
+ int sh = ti_const_val(t2);
53
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
38
39
+ z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
40
s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
41
42
- return fold_masks(ctx, op);
43
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
54
}
44
}
55
45
56
/* TLB Hit. */
46
switch (op->opc) {
57
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0,
47
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
58
+ offsetof(CPUTLBEntry, addend));
48
* Arithmetic right shift will not reduce the number of
59
49
* input sign repetitions.
60
- /* add addend(TCG_REG_L0), TCG_REG_L1 */
50
*/
61
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, h->base, TCG_REG_L0,
51
- ctx->s_mask = s_mask;
62
- offsetof(CPUTLBEntry, addend));
52
- break;
63
+ *h = (HostAddress) {
53
+ return fold_masks_s(ctx, op, s_mask);
64
+ .base = addrlo,
54
CASE_OP_32_64(shr):
65
+ .index = TCG_REG_L0,
55
/*
66
+ };
56
* If the sign bit is known zero, then logical right shift
67
#else
57
- * will not reduced the number of input sign repetitions.
68
if (a_bits) {
58
+ * will not reduce the number of input sign repetitions.
69
ldst = new_ldst_label(s);
59
*/
60
- sign = (s_mask & -s_mask) >> 1;
61
+ sign = -s_mask;
62
if (sign && !(z_mask & sign)) {
63
- ctx->s_mask = s_mask;
64
+ return fold_masks_s(ctx, op, s_mask);
65
}
66
break;
67
default:
68
break;
69
}
70
71
- return false;
72
+ return finish_folding(ctx, op);
73
}
74
75
static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
70
--
76
--
71
2.34.1
77
2.43.0
72
73
diff view generated by jsdifflib
1
Case was accidentally dropped in b7a94da9550b.
1
Merge the two conditions, sign != 0 && !(z_mask & sign),
2
by testing ~z_mask & sign. If sign == 0, the logical and
3
will produce false.
2
4
3
Tested-by: Laurent Vivier <laurent@vivier.eu>
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
target/m68k/translate.c | 1 +
8
tcg/optimize.c | 5 ++---
9
1 file changed, 1 insertion(+)
9
1 file changed, 2 insertions(+), 3 deletions(-)
10
10
11
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/m68k/translate.c
13
--- a/tcg/optimize.c
14
+++ b/target/m68k/translate.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void gen_load_fp(DisasContext *s, int opsize, TCGv addr, TCGv_ptr fp,
15
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
16
switch (opsize) {
16
17
case OS_BYTE:
17
static bool fold_shift(OptContext *ctx, TCGOp *op)
18
case OS_WORD:
18
{
19
+ case OS_LONG:
19
- uint64_t s_mask, z_mask, sign;
20
tcg_gen_qemu_ld_tl(tmp, addr, index, opsize | MO_SIGN | MO_TE);
20
+ uint64_t s_mask, z_mask;
21
gen_helper_exts32(cpu_env, fp, tmp);
21
TempOptInfo *t1, *t2;
22
23
if (fold_const2(ctx, op) ||
24
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
25
* If the sign bit is known zero, then logical right shift
26
* will not reduce the number of input sign repetitions.
27
*/
28
- sign = -s_mask;
29
- if (sign && !(z_mask & sign)) {
30
+ if (~z_mask & -s_mask) {
31
return fold_masks_s(ctx, op, s_mask);
32
}
22
break;
33
break;
23
--
34
--
24
2.34.1
35
2.43.0
25
26
diff view generated by jsdifflib
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
1
Duplicate fold_sub_vec into fold_sub instead of calling it,
2
and tcg_out_st_helper_args.
2
now that fold_sub_vec always returns true.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
tcg/s390x/tcg-target.c.inc | 35 ++++++++++-------------------------
7
tcg/optimize.c | 9 ++++++---
8
1 file changed, 10 insertions(+), 25 deletions(-)
8
1 file changed, 6 insertions(+), 3 deletions(-)
9
9
10
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/s390x/tcg-target.c.inc
12
--- a/tcg/optimize.c
13
+++ b/tcg/s390x/tcg-target.c.inc
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
14
@@ -XXX,XX +XXX,XX @@ static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
15
fold_sub_to_neg(ctx, op)) {
16
return true;
17
}
18
- return false;
19
+ return finish_folding(ctx, op);
15
}
20
}
16
21
17
#if defined(CONFIG_SOFTMMU)
22
static bool fold_sub(OptContext *ctx, TCGOp *op)
18
+static const TCGLdstHelperParam ldst_helper_param = {
19
+ .ntmp = 1, .tmp = { TCG_TMP0 }
20
+};
21
+
22
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
23
{
23
{
24
- TCGReg addr_reg = lb->addrlo_reg;
24
- if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) {
25
- TCGReg data_reg = lb->datalo_reg;
25
+ if (fold_const2(ctx, op) ||
26
- MemOpIdx oi = lb->oi;
26
+ fold_xx_to_i(ctx, op, 0) ||
27
- MemOp opc = get_memop(oi);
27
+ fold_xi_to_x(ctx, op, 0) ||
28
+ MemOp opc = get_memop(lb->oi);
28
+ fold_sub_to_neg(ctx, op)) {
29
29
return true;
30
if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
31
(intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
32
return false;
33
}
30
}
34
31
35
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
32
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
36
- if (TARGET_LONG_BITS == 64) {
33
? INDEX_op_add_i32 : INDEX_op_add_i64);
37
- tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
34
op->args[2] = arg_new_constant(ctx, -val);
38
- }
39
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
40
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
41
- tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
42
- tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
43
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
44
+ tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
45
+ tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
46
47
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
48
return true;
49
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
50
51
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
52
{
53
- TCGReg addr_reg = lb->addrlo_reg;
54
- TCGReg data_reg = lb->datalo_reg;
55
- MemOpIdx oi = lb->oi;
56
- MemOp opc = get_memop(oi);
57
- MemOp size = opc & MO_SIZE;
58
+ MemOp opc = get_memop(lb->oi);
59
60
if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
61
(intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
62
return false;
63
}
35
}
64
36
- return false;
65
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
37
+ return finish_folding(ctx, op);
66
- if (TARGET_LONG_BITS == 64) {
38
}
67
- tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
39
68
- }
40
static bool fold_sub2(OptContext *ctx, TCGOp *op)
69
- tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
70
- TCG_REG_R4, lb->type, size, data_reg);
71
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
72
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
73
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
74
tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
75
76
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
77
--
41
--
78
2.34.1
42
2.43.0
79
80
diff view generated by jsdifflib
1
The softmmu tlb uses TCG_REG_TMP[0-3], not any of the normally available
1
Avoid the use of the OptContext slots.
2
registers. Now that we handle overlap betwen inputs and helper arguments,
3
and have eliminated use of A0, we can allow any allocatable reg.
4
2
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
tcg/mips/tcg-target-con-set.h | 13 +++++--------
6
tcg/optimize.c | 16 +++++++++-------
9
tcg/mips/tcg-target-con-str.h | 2 --
7
1 file changed, 9 insertions(+), 7 deletions(-)
10
tcg/mips/tcg-target.c.inc | 30 ++++++++----------------------
11
3 files changed, 13 insertions(+), 32 deletions(-)
12
8
13
diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/mips/tcg-target-con-set.h
11
--- a/tcg/optimize.c
16
+++ b/tcg/mips/tcg-target-con-set.h
12
+++ b/tcg/optimize.c
17
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2(OptContext *ctx, TCGOp *op)
18
C_O0_I1(r)
14
19
C_O0_I2(rZ, r)
15
static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
20
C_O0_I2(rZ, rZ)
21
-C_O0_I2(SZ, S)
22
-C_O0_I3(SZ, S, S)
23
-C_O0_I3(SZ, SZ, S)
24
+C_O0_I3(rZ, r, r)
25
+C_O0_I3(rZ, rZ, r)
26
C_O0_I4(rZ, rZ, rZ, rZ)
27
-C_O0_I4(SZ, SZ, S, S)
28
-C_O1_I1(r, L)
29
+C_O0_I4(rZ, rZ, r, r)
30
C_O1_I1(r, r)
31
C_O1_I2(r, 0, rZ)
32
-C_O1_I2(r, L, L)
33
+C_O1_I2(r, r, r)
34
C_O1_I2(r, r, ri)
35
C_O1_I2(r, r, rI)
36
C_O1_I2(r, r, rIK)
37
@@ -XXX,XX +XXX,XX @@ C_O1_I2(r, rZ, rN)
38
C_O1_I2(r, rZ, rZ)
39
C_O1_I4(r, rZ, rZ, rZ, 0)
40
C_O1_I4(r, rZ, rZ, rZ, rZ)
41
-C_O2_I1(r, r, L)
42
-C_O2_I2(r, r, L, L)
43
+C_O2_I1(r, r, r)
44
C_O2_I2(r, r, r, r)
45
C_O2_I4(r, r, rZ, rZ, rN, rN)
46
diff --git a/tcg/mips/tcg-target-con-str.h b/tcg/mips/tcg-target-con-str.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/tcg/mips/tcg-target-con-str.h
49
+++ b/tcg/mips/tcg-target-con-str.h
50
@@ -XXX,XX +XXX,XX @@
51
* REGS(letter, register_mask)
52
*/
53
REGS('r', ALL_GENERAL_REGS)
54
-REGS('L', ALL_QLOAD_REGS)
55
-REGS('S', ALL_QSTORE_REGS)
56
57
/*
58
* Define constraint letters for constants:
59
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
60
index XXXXXXX..XXXXXXX 100644
61
--- a/tcg/mips/tcg-target.c.inc
62
+++ b/tcg/mips/tcg-target.c.inc
63
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
64
#define TCG_CT_CONST_WSZ 0x2000 /* word size */
65
66
#define ALL_GENERAL_REGS 0xffffffffu
67
-#define NOA0_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_A0))
68
-
69
-#ifdef CONFIG_SOFTMMU
70
-#define ALL_QLOAD_REGS \
71
- (NOA0_REGS & ~((TCG_TARGET_REG_BITS < TARGET_LONG_BITS) << TCG_REG_A2))
72
-#define ALL_QSTORE_REGS \
73
- (NOA0_REGS & ~(TCG_TARGET_REG_BITS < TARGET_LONG_BITS \
74
- ? (1 << TCG_REG_A2) | (1 << TCG_REG_A3) \
75
- : (1 << TCG_REG_A1)))
76
-#else
77
-#define ALL_QLOAD_REGS NOA0_REGS
78
-#define ALL_QSTORE_REGS NOA0_REGS
79
-#endif
80
-
81
82
static bool is_p2m1(tcg_target_long val)
83
{
16
{
84
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
17
+ uint64_t z_mask = -1, s_mask = 0;
85
18
+
86
case INDEX_op_qemu_ld_i32:
19
/* We can't do any folding with a load, but we can record bits. */
87
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
20
switch (op->opc) {
88
- ? C_O1_I1(r, L) : C_O1_I2(r, L, L));
21
CASE_OP_32_64(ld8s):
89
+ ? C_O1_I1(r, r) : C_O1_I2(r, r, r));
22
- ctx->s_mask = MAKE_64BIT_MASK(8, 56);
90
case INDEX_op_qemu_st_i32:
23
+ s_mask = INT8_MIN;
91
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
24
break;
92
- ? C_O0_I2(SZ, S) : C_O0_I3(SZ, S, S));
25
CASE_OP_32_64(ld8u):
93
+ ? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r));
26
- ctx->z_mask = MAKE_64BIT_MASK(0, 8);
94
case INDEX_op_qemu_ld_i64:
27
+ z_mask = MAKE_64BIT_MASK(0, 8);
95
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
28
break;
96
- : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, L)
29
CASE_OP_32_64(ld16s):
97
- : C_O2_I2(r, r, L, L));
30
- ctx->s_mask = MAKE_64BIT_MASK(16, 48);
98
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
31
+ s_mask = INT16_MIN;
99
+ : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
32
break;
100
+ : C_O2_I2(r, r, r, r));
33
CASE_OP_32_64(ld16u):
101
case INDEX_op_qemu_st_i64:
34
- ctx->z_mask = MAKE_64BIT_MASK(0, 16);
102
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(SZ, S)
35
+ z_mask = MAKE_64BIT_MASK(0, 16);
103
- : TARGET_LONG_BITS == 32 ? C_O0_I3(SZ, SZ, S)
36
break;
104
- : C_O0_I4(SZ, SZ, S, S));
37
case INDEX_op_ld32s_i64:
105
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r)
38
- ctx->s_mask = MAKE_64BIT_MASK(32, 32);
106
+ : TARGET_LONG_BITS == 32 ? C_O0_I3(rZ, rZ, r)
39
+ s_mask = INT32_MIN;
107
+ : C_O0_I4(rZ, rZ, r, r));
40
break;
108
41
case INDEX_op_ld32u_i64:
42
- ctx->z_mask = MAKE_64BIT_MASK(0, 32);
43
+ z_mask = MAKE_64BIT_MASK(0, 32);
44
break;
109
default:
45
default:
110
g_assert_not_reached();
46
g_assert_not_reached();
47
}
48
- return false;
49
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
50
}
51
52
static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
111
--
53
--
112
2.34.1
54
2.43.0
113
114
diff view generated by jsdifflib
1
Fix these before moving the file, for checkpatch.pl.
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
2
6
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-Id: <20230510170812.663149-1-richard.henderson@linaro.org>
6
---
7
disas.c | 11 ++++++-----
8
1 file changed, 6 insertions(+), 5 deletions(-)
9
10
diff --git a/disas.c b/disas.c
11
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
12
--- a/disas.c
9
--- a/tcg/optimize.c
13
+++ b/disas.c
10
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
11
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
12
TCGType type;
13
14
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
15
- return false;
16
+ return finish_folding(ctx, op);
15
}
17
}
16
18
17
for (pc = code; size > 0; pc += count, size -= count) {
19
type = ctx->type;
18
-    fprintf(out, "0x" TARGET_FMT_lx ": ", pc);
19
-    count = s.info.print_insn(pc, &s.info);
20
-    fprintf(out, "\n");
21
-    if (count < 0)
22
-     break;
23
+ fprintf(out, "0x" TARGET_FMT_lx ": ", pc);
24
+ count = s.info.print_insn(pc, &s.info);
25
+ fprintf(out, "\n");
26
+ if (count < 0) {
27
+ break;
28
+ }
29
if (size < count) {
30
fprintf(out,
31
"Disassembler disagrees with translator over instruction "
32
--
20
--
33
2.34.1
21
2.43.0
diff view generated by jsdifflib
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
and tcg_out_st_helper_args.
2
Remove fold_masks as the function becomes unused.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
tcg/aarch64/tcg-target.c.inc | 40 +++++++++++++++---------------------
7
tcg/optimize.c | 18 ++++++++----------
8
1 file changed, 16 insertions(+), 24 deletions(-)
8
1 file changed, 8 insertions(+), 10 deletions(-)
9
9
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/aarch64/tcg-target.c.inc
12
--- a/tcg/optimize.c
13
+++ b/tcg/aarch64/tcg-target.c.inc
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
15
}
15
return fold_masks_zs(ctx, op, -1, s_mask);
16
}
16
}
17
17
18
-static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
18
-static bool fold_masks(OptContext *ctx, TCGOp *op)
19
-{
19
-{
20
- ptrdiff_t offset = tcg_pcrel_diff(s, target);
20
- return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
21
- tcg_debug_assert(offset == sextract64(offset, 0, 21));
22
- tcg_out_insn(s, 3406, ADR, rd, offset);
23
-}
21
-}
24
-
22
-
25
typedef struct {
23
/*
26
TCGReg base;
24
* An "affected" mask bit is 0 if and only if the result is identical
27
TCGReg index;
25
* to the first input. Thus if the entire mask is 0, the operation
28
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] = {
26
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
29
#endif
27
30
};
28
static bool fold_xor(OptContext *ctx, TCGOp *op)
31
29
{
32
+static const TCGLdstHelperParam ldst_helper_param = {
30
+ uint64_t z_mask, s_mask;
33
+ .ntmp = 1, .tmp = { TCG_REG_TMP }
31
+ TempOptInfo *t1, *t2;
34
+};
35
+
32
+
36
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
33
if (fold_const2_commutative(ctx, op) ||
37
{
34
fold_xx_to_i(ctx, op, 0) ||
38
- MemOpIdx oi = lb->oi;
35
fold_xi_to_x(ctx, op, 0) ||
39
- MemOp opc = get_memop(oi);
36
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
40
+ MemOp opc = get_memop(lb->oi);
37
return true;
41
42
if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
43
return false;
44
}
38
}
45
39
46
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
40
- ctx->z_mask = arg_info(op->args[1])->z_mask
47
- tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
41
- | arg_info(op->args[2])->z_mask;
48
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
42
- ctx->s_mask = arg_info(op->args[1])->s_mask
49
- tcg_out_adr(s, TCG_REG_X3, lb->raddr);
43
- & arg_info(op->args[2])->s_mask;
50
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
44
- return fold_masks(ctx, op);
51
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
45
+ t1 = arg_info(op->args[1]);
52
-
46
+ t2 = arg_info(op->args[2]);
53
- tcg_out_movext(s, lb->type, lb->datalo_reg,
47
+ z_mask = t1->z_mask | t2->z_mask;
54
- TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_X0);
48
+ s_mask = t1->s_mask & t2->s_mask;
55
+ tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
49
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
56
tcg_out_goto(s, lb->raddr);
57
return true;
58
}
50
}
59
51
60
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
52
static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
61
{
62
- MemOpIdx oi = lb->oi;
63
- MemOp opc = get_memop(oi);
64
- MemOp size = opc & MO_SIZE;
65
+ MemOp opc = get_memop(lb->oi);
66
67
if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
68
return false;
69
}
70
71
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
72
- tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
73
- tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
74
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
75
- tcg_out_adr(s, TCG_REG_X4, lb->raddr);
76
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
77
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
78
tcg_out_goto(s, lb->raddr);
79
return true;
80
}
81
#else
82
+static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
83
+{
84
+ ptrdiff_t offset = tcg_pcrel_diff(s, target);
85
+ tcg_debug_assert(offset == sextract64(offset, 0, 21));
86
+ tcg_out_insn(s, 3406, ADR, rd, offset);
87
+}
88
+
89
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
90
{
91
if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
92
--
53
--
93
2.34.1
54
2.43.0
94
95
diff view generated by jsdifflib
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
---
3
configs/targets/mips-linux-user.mak | 1 -
4
tcg/optimize.c | 2 +-
4
configs/targets/mips-softmmu.mak | 1 -
5
1 file changed, 1 insertion(+), 1 deletion(-)
5
configs/targets/mips64-linux-user.mak | 1 -
6
configs/targets/mips64-softmmu.mak | 1 -
7
configs/targets/mips64el-linux-user.mak | 1 -
8
configs/targets/mips64el-softmmu.mak | 1 -
9
configs/targets/mipsel-linux-user.mak | 1 -
10
configs/targets/mipsel-softmmu.mak | 1 -
11
configs/targets/mipsn32-linux-user.mak | 1 -
12
configs/targets/mipsn32el-linux-user.mak | 1 -
13
10 files changed, 10 deletions(-)
14
6
15
diff --git a/configs/targets/mips-linux-user.mak b/configs/targets/mips-linux-user.mak
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
16
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
17
--- a/configs/targets/mips-linux-user.mak
9
--- a/tcg/optimize.c
18
+++ b/configs/targets/mips-linux-user.mak
10
+++ b/tcg/optimize.c
19
@@ -XXX,XX +XXX,XX @@ TARGET_ARCH=mips
11
@@ -XXX,XX +XXX,XX @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
20
TARGET_ABI_MIPSO32=y
12
return fold_orc(ctx, op);
21
TARGET_SYSTBL_ABI=o32
13
}
22
TARGET_SYSTBL=syscall_o32.tbl
14
}
23
-TARGET_ALIGNED_ONLY=y
15
- return false;
24
TARGET_BIG_ENDIAN=y
16
+ return finish_folding(ctx, op);
25
diff --git a/configs/targets/mips-softmmu.mak b/configs/targets/mips-softmmu.mak
17
}
26
index XXXXXXX..XXXXXXX 100644
18
27
--- a/configs/targets/mips-softmmu.mak
19
/* Propagate constants and copies, fold constant expressions. */
28
+++ b/configs/targets/mips-softmmu.mak
29
@@ -XXX,XX +XXX,XX @@
30
TARGET_ARCH=mips
31
-TARGET_ALIGNED_ONLY=y
32
TARGET_BIG_ENDIAN=y
33
TARGET_SUPPORTS_MTTCG=y
34
diff --git a/configs/targets/mips64-linux-user.mak b/configs/targets/mips64-linux-user.mak
35
index XXXXXXX..XXXXXXX 100644
36
--- a/configs/targets/mips64-linux-user.mak
37
+++ b/configs/targets/mips64-linux-user.mak
38
@@ -XXX,XX +XXX,XX @@ TARGET_ABI_MIPSN64=y
39
TARGET_BASE_ARCH=mips
40
TARGET_SYSTBL_ABI=n64
41
TARGET_SYSTBL=syscall_n64.tbl
42
-TARGET_ALIGNED_ONLY=y
43
TARGET_BIG_ENDIAN=y
44
diff --git a/configs/targets/mips64-softmmu.mak b/configs/targets/mips64-softmmu.mak
45
index XXXXXXX..XXXXXXX 100644
46
--- a/configs/targets/mips64-softmmu.mak
47
+++ b/configs/targets/mips64-softmmu.mak
48
@@ -XXX,XX +XXX,XX @@
49
TARGET_ARCH=mips64
50
TARGET_BASE_ARCH=mips
51
-TARGET_ALIGNED_ONLY=y
52
TARGET_BIG_ENDIAN=y
53
diff --git a/configs/targets/mips64el-linux-user.mak b/configs/targets/mips64el-linux-user.mak
54
index XXXXXXX..XXXXXXX 100644
55
--- a/configs/targets/mips64el-linux-user.mak
56
+++ b/configs/targets/mips64el-linux-user.mak
57
@@ -XXX,XX +XXX,XX @@ TARGET_ABI_MIPSN64=y
58
TARGET_BASE_ARCH=mips
59
TARGET_SYSTBL_ABI=n64
60
TARGET_SYSTBL=syscall_n64.tbl
61
-TARGET_ALIGNED_ONLY=y
62
diff --git a/configs/targets/mips64el-softmmu.mak b/configs/targets/mips64el-softmmu.mak
63
index XXXXXXX..XXXXXXX 100644
64
--- a/configs/targets/mips64el-softmmu.mak
65
+++ b/configs/targets/mips64el-softmmu.mak
66
@@ -XXX,XX +XXX,XX @@
67
TARGET_ARCH=mips64
68
TARGET_BASE_ARCH=mips
69
-TARGET_ALIGNED_ONLY=y
70
TARGET_NEED_FDT=y
71
diff --git a/configs/targets/mipsel-linux-user.mak b/configs/targets/mipsel-linux-user.mak
72
index XXXXXXX..XXXXXXX 100644
73
--- a/configs/targets/mipsel-linux-user.mak
74
+++ b/configs/targets/mipsel-linux-user.mak
75
@@ -XXX,XX +XXX,XX @@ TARGET_ARCH=mips
76
TARGET_ABI_MIPSO32=y
77
TARGET_SYSTBL_ABI=o32
78
TARGET_SYSTBL=syscall_o32.tbl
79
-TARGET_ALIGNED_ONLY=y
80
diff --git a/configs/targets/mipsel-softmmu.mak b/configs/targets/mipsel-softmmu.mak
81
index XXXXXXX..XXXXXXX 100644
82
--- a/configs/targets/mipsel-softmmu.mak
83
+++ b/configs/targets/mipsel-softmmu.mak
84
@@ -XXX,XX +XXX,XX @@
85
TARGET_ARCH=mips
86
-TARGET_ALIGNED_ONLY=y
87
TARGET_SUPPORTS_MTTCG=y
88
diff --git a/configs/targets/mipsn32-linux-user.mak b/configs/targets/mipsn32-linux-user.mak
89
index XXXXXXX..XXXXXXX 100644
90
--- a/configs/targets/mipsn32-linux-user.mak
91
+++ b/configs/targets/mipsn32-linux-user.mak
92
@@ -XXX,XX +XXX,XX @@ TARGET_ABI32=y
93
TARGET_BASE_ARCH=mips
94
TARGET_SYSTBL_ABI=n32
95
TARGET_SYSTBL=syscall_n32.tbl
96
-TARGET_ALIGNED_ONLY=y
97
TARGET_BIG_ENDIAN=y
98
diff --git a/configs/targets/mipsn32el-linux-user.mak b/configs/targets/mipsn32el-linux-user.mak
99
index XXXXXXX..XXXXXXX 100644
100
--- a/configs/targets/mipsn32el-linux-user.mak
101
+++ b/configs/targets/mipsn32el-linux-user.mak
102
@@ -XXX,XX +XXX,XX @@ TARGET_ABI32=y
103
TARGET_BASE_ARCH=mips
104
TARGET_SYSTBL_ABI=n32
105
TARGET_SYSTBL=syscall_n32.tbl
106
-TARGET_ALIGNED_ONLY=y
107
--
20
--
108
2.34.1
21
2.43.0
diff view generated by jsdifflib
1
Memory operations that are not already aligned, or otherwise
1
All non-default cases now finish folding within each function.
2
marked up, require addition of ctx->default_tcg_memop_mask.
2
Do the same with the default case and assert it is done after.
3
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
---
6
target/mips/tcg/mxu_translate.c | 3 ++-
7
tcg/optimize.c | 6 ++----
7
target/mips/tcg/micromips_translate.c.inc | 24 ++++++++++++++--------
8
1 file changed, 2 insertions(+), 4 deletions(-)
8
target/mips/tcg/mips16e_translate.c.inc | 18 ++++++++++------
9
target/mips/tcg/nanomips_translate.c.inc | 25 +++++++++++------------
10
4 files changed, 42 insertions(+), 28 deletions(-)
11
9
12
diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/target/mips/tcg/mxu_translate.c
12
--- a/tcg/optimize.c
15
+++ b/target/mips/tcg/mxu_translate.c
13
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx)
14
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
17
tcg_gen_ori_tl(t1, t1, 0xFFFFF000);
15
done = true;
16
break;
17
default:
18
+ done = finish_folding(&ctx, op);
19
break;
20
}
21
-
22
- if (!done) {
23
- finish_folding(&ctx, op);
24
- }
25
+ tcg_debug_assert(done);
18
}
26
}
19
tcg_gen_add_tl(t1, t0, t1);
20
- tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_TESL ^ (sel * MO_BSWAP));
21
+ tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, (MO_TESL ^ (sel * MO_BSWAP)) |
22
+ ctx->default_tcg_memop_mask);
23
24
gen_store_mxu_gpr(t1, XRa);
25
}
27
}
26
diff --git a/target/mips/tcg/micromips_translate.c.inc b/target/mips/tcg/micromips_translate.c.inc
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/mips/tcg/micromips_translate.c.inc
29
+++ b/target/mips/tcg/micromips_translate.c.inc
30
@@ -XXX,XX +XXX,XX @@ static void gen_ldst_pair(DisasContext *ctx, uint32_t opc, int rd,
31
gen_reserved_instruction(ctx);
32
return;
33
}
34
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL);
35
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL |
36
+ ctx->default_tcg_memop_mask);
37
gen_store_gpr(t1, rd);
38
tcg_gen_movi_tl(t1, 4);
39
gen_op_addr_add(ctx, t0, t0, t1);
40
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL);
41
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL |
42
+ ctx->default_tcg_memop_mask);
43
gen_store_gpr(t1, rd + 1);
44
break;
45
case SWP:
46
gen_load_gpr(t1, rd);
47
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
48
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
49
+ ctx->default_tcg_memop_mask);
50
tcg_gen_movi_tl(t1, 4);
51
gen_op_addr_add(ctx, t0, t0, t1);
52
gen_load_gpr(t1, rd + 1);
53
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
54
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
55
+ ctx->default_tcg_memop_mask);
56
break;
57
#ifdef TARGET_MIPS64
58
case LDP:
59
@@ -XXX,XX +XXX,XX @@ static void gen_ldst_pair(DisasContext *ctx, uint32_t opc, int rd,
60
gen_reserved_instruction(ctx);
61
return;
62
}
63
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
64
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
65
+ ctx->default_tcg_memop_mask);
66
gen_store_gpr(t1, rd);
67
tcg_gen_movi_tl(t1, 8);
68
gen_op_addr_add(ctx, t0, t0, t1);
69
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
70
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
71
+ ctx->default_tcg_memop_mask);
72
gen_store_gpr(t1, rd + 1);
73
break;
74
case SDP:
75
gen_load_gpr(t1, rd);
76
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
77
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
78
+ ctx->default_tcg_memop_mask);
79
tcg_gen_movi_tl(t1, 8);
80
gen_op_addr_add(ctx, t0, t0, t1);
81
gen_load_gpr(t1, rd + 1);
82
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
83
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
84
+ ctx->default_tcg_memop_mask);
85
break;
86
#endif
87
}
88
diff --git a/target/mips/tcg/mips16e_translate.c.inc b/target/mips/tcg/mips16e_translate.c.inc
89
index XXXXXXX..XXXXXXX 100644
90
--- a/target/mips/tcg/mips16e_translate.c.inc
91
+++ b/target/mips/tcg/mips16e_translate.c.inc
92
@@ -XXX,XX +XXX,XX @@ static void gen_mips16_save(DisasContext *ctx,
93
case 4:
94
gen_base_offset_addr(ctx, t0, 29, 12);
95
gen_load_gpr(t1, 7);
96
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
97
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
98
+ ctx->default_tcg_memop_mask);
99
/* Fall through */
100
case 3:
101
gen_base_offset_addr(ctx, t0, 29, 8);
102
gen_load_gpr(t1, 6);
103
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
104
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
105
+ ctx->default_tcg_memop_mask);
106
/* Fall through */
107
case 2:
108
gen_base_offset_addr(ctx, t0, 29, 4);
109
gen_load_gpr(t1, 5);
110
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
111
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
112
+ ctx->default_tcg_memop_mask);
113
/* Fall through */
114
case 1:
115
gen_base_offset_addr(ctx, t0, 29, 0);
116
gen_load_gpr(t1, 4);
117
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
118
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
119
+ ctx->default_tcg_memop_mask);
120
}
121
122
gen_load_gpr(t0, 29);
123
@@ -XXX,XX +XXX,XX @@ static void gen_mips16_save(DisasContext *ctx,
124
tcg_gen_movi_tl(t2, -4); \
125
gen_op_addr_add(ctx, t0, t0, t2); \
126
gen_load_gpr(t1, reg); \
127
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); \
128
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | \
129
+ ctx->default_tcg_memop_mask); \
130
} while (0)
131
132
if (do_ra) {
133
@@ -XXX,XX +XXX,XX @@ static void gen_mips16_restore(DisasContext *ctx,
134
#define DECR_AND_LOAD(reg) do { \
135
tcg_gen_movi_tl(t2, -4); \
136
gen_op_addr_add(ctx, t0, t0, t2); \
137
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL); \
138
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL | \
139
+ ctx->default_tcg_memop_mask); \
140
gen_store_gpr(t1, reg); \
141
} while (0)
142
143
diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc
144
index XXXXXXX..XXXXXXX 100644
145
--- a/target/mips/tcg/nanomips_translate.c.inc
146
+++ b/target/mips/tcg/nanomips_translate.c.inc
147
@@ -XXX,XX +XXX,XX @@ static void gen_p_lsx(DisasContext *ctx, int rd, int rs, int rt)
148
149
switch (extract32(ctx->opcode, 7, 4)) {
150
case NM_LBX:
151
- tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
152
- MO_SB);
153
+ tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB);
154
gen_store_gpr(t0, rd);
155
break;
156
case NM_LHX:
157
/*case NM_LHXS:*/
158
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
159
- MO_TESW);
160
+ MO_TESW | ctx->default_tcg_memop_mask);
161
gen_store_gpr(t0, rd);
162
break;
163
case NM_LWX:
164
/*case NM_LWXS:*/
165
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
166
- MO_TESL);
167
+ MO_TESL | ctx->default_tcg_memop_mask);
168
gen_store_gpr(t0, rd);
169
break;
170
case NM_LBUX:
171
- tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
172
- MO_UB);
173
+ tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_UB);
174
gen_store_gpr(t0, rd);
175
break;
176
case NM_LHUX:
177
/*case NM_LHUXS:*/
178
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
179
- MO_TEUW);
180
+ MO_TEUW | ctx->default_tcg_memop_mask);
181
gen_store_gpr(t0, rd);
182
break;
183
case NM_SBX:
184
check_nms(ctx);
185
gen_load_gpr(t1, rd);
186
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
187
- MO_8);
188
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_8);
189
break;
190
case NM_SHX:
191
/*case NM_SHXS:*/
192
check_nms(ctx);
193
gen_load_gpr(t1, rd);
194
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
195
- MO_TEUW);
196
+ MO_TEUW | ctx->default_tcg_memop_mask);
197
break;
198
case NM_SWX:
199
/*case NM_SWXS:*/
200
check_nms(ctx);
201
gen_load_gpr(t1, rd);
202
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
203
- MO_TEUL);
204
+ MO_TEUL | ctx->default_tcg_memop_mask);
205
break;
206
case NM_LWC1X:
207
/*case NM_LWC1XS:*/
208
@@ -XXX,XX +XXX,XX @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx)
209
addr_off);
210
211
tcg_gen_movi_tl(t0, addr);
212
- tcg_gen_qemu_ld_tl(cpu_gpr[rt], t0, ctx->mem_idx, MO_TESL);
213
+ tcg_gen_qemu_ld_tl(cpu_gpr[rt], t0, ctx->mem_idx,
214
+ MO_TESL | ctx->default_tcg_memop_mask);
215
}
216
break;
217
case NM_SWPC48:
218
@@ -XXX,XX +XXX,XX @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx)
219
tcg_gen_movi_tl(t0, addr);
220
gen_load_gpr(t1, rt);
221
222
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
223
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
224
+ MO_TEUL | ctx->default_tcg_memop_mask);
225
}
226
break;
227
default:
228
--
28
--
229
2.34.1
29
2.43.0
diff view generated by jsdifflib
1
Use tcg_out_st_helper_args. This eliminates the use of a tail call to
1
All mask setting is now done with parameters via fold_masks_*.
2
the store helper. This may or may not be an improvement, depending on
3
the call/return branch prediction of the host microarchitecture.
4
2
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
tcg/i386/tcg-target.c.inc | 57 +++------------------------------------
6
tcg/optimize.c | 13 -------------
9
1 file changed, 4 insertions(+), 53 deletions(-)
7
1 file changed, 13 deletions(-)
10
8
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/i386/tcg-target.c.inc
11
--- a/tcg/optimize.c
14
+++ b/tcg/i386/tcg-target.c.inc
12
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
13
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
16
*/
14
QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
17
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
15
18
{
16
/* In flight values from optimization. */
19
- MemOpIdx oi = l->oi;
17
- uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
20
- MemOp opc = get_memop(oi);
18
- uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
21
- MemOp s_bits = opc & MO_SIZE;
19
TCGType type;
22
+ MemOp opc = get_memop(l->oi);
20
} OptContext;
23
tcg_insn_unit **label_ptr = &l->label_ptr[0];
21
24
- TCGReg retaddr;
22
@@ -XXX,XX +XXX,XX @@ static bool finish_folding(OptContext *ctx, TCGOp *op)
25
23
for (i = 0; i < nb_oargs; i++) {
26
/* resolve label address */
24
TCGTemp *ts = arg_temp(op->args[i]);
27
tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
25
reset_ts(ctx, ts);
28
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
26
- /*
29
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
27
- * Save the corresponding known-zero/sign bits mask for the
28
- * first output argument (only one supported so far).
29
- */
30
- if (i == 0) {
31
- ts_info(ts)->z_mask = ctx->z_mask;
32
- }
30
}
33
}
31
32
- if (TCG_TARGET_REG_BITS == 32) {
33
- int ofs = 0;
34
+ tcg_out_st_helper_args(s, l, &ldst_helper_param);
35
+ tcg_out_branch(s, 1, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
36
37
- tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
38
- ofs += 4;
39
-
40
- tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
41
- ofs += 4;
42
-
43
- if (TARGET_LONG_BITS == 64) {
44
- tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
45
- ofs += 4;
46
- }
47
-
48
- tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
49
- ofs += 4;
50
-
51
- if (s_bits == MO_64) {
52
- tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
53
- ofs += 4;
54
- }
55
-
56
- tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
57
- ofs += 4;
58
-
59
- retaddr = TCG_REG_EAX;
60
- tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
61
- tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
62
- } else {
63
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
64
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
65
- l->addrlo_reg);
66
- tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
67
- tcg_target_call_iarg_regs[2], l->datalo_reg);
68
- tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
69
-
70
- if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
71
- retaddr = tcg_target_call_iarg_regs[4];
72
- tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
73
- } else {
74
- retaddr = TCG_REG_RAX;
75
- tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
76
- tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
77
- TCG_TARGET_CALL_STACK_OFFSET);
78
- }
79
- }
80
-
81
- /* "Tail call" to the helper, with the return address back inline. */
82
- tcg_out_push(s, retaddr);
83
- tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
84
+ tcg_out_jmp(s, l->raddr);
85
return true;
34
return true;
86
}
35
}
87
#else
36
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
37
ctx.type = TCG_TYPE_I32;
38
}
39
40
- /* Assume all bits affected, no bits known zero, no sign reps. */
41
- ctx.z_mask = -1;
42
- ctx.s_mask = 0;
43
-
44
/*
45
* Process each opcode.
46
* Sorted alphabetically by opcode as much as possible.
88
--
47
--
89
2.34.1
48
2.43.0
90
91
diff view generated by jsdifflib
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
1
All instances of s_mask have been converted to the new
2
and tcg_out_st_helper_args.
2
representation. We can now re-enable usage.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/riscv/tcg-target.c.inc | 37 ++++++++++---------------------------
7
tcg/optimize.c | 4 ++--
9
1 file changed, 10 insertions(+), 27 deletions(-)
8
1 file changed, 2 insertions(+), 2 deletions(-)
10
9
11
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/riscv/tcg-target.c.inc
12
--- a/tcg/optimize.c
14
+++ b/tcg/riscv/tcg-target.c.inc
13
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
16
tcg_debug_assert(ok);
15
g_assert_not_reached();
17
}
18
19
+/* We have three temps, we might as well expose them. */
20
+static const TCGLdstHelperParam ldst_helper_param = {
21
+ .ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 }
22
+};
23
+
24
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
25
{
26
- MemOpIdx oi = l->oi;
27
- MemOp opc = get_memop(oi);
28
- TCGReg a0 = tcg_target_call_iarg_regs[0];
29
- TCGReg a1 = tcg_target_call_iarg_regs[1];
30
- TCGReg a2 = tcg_target_call_iarg_regs[2];
31
- TCGReg a3 = tcg_target_call_iarg_regs[3];
32
+ MemOp opc = get_memop(l->oi);
33
34
/* resolve label address */
35
if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
36
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
37
}
16
}
38
17
39
/* call load helper */
18
- if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
40
- tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
19
+ if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
41
- tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
20
return true;
42
- tcg_out_movi(s, TCG_TYPE_PTR, a2, oi);
43
- tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr);
44
-
45
+ tcg_out_ld_helper_args(s, l, &ldst_helper_param);
46
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
47
- tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0);
48
+ tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param);
49
50
tcg_out_goto(s, l->raddr);
51
return true;
52
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
53
54
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
55
{
56
- MemOpIdx oi = l->oi;
57
- MemOp opc = get_memop(oi);
58
- MemOp s_bits = opc & MO_SIZE;
59
- TCGReg a0 = tcg_target_call_iarg_regs[0];
60
- TCGReg a1 = tcg_target_call_iarg_regs[1];
61
- TCGReg a2 = tcg_target_call_iarg_regs[2];
62
- TCGReg a3 = tcg_target_call_iarg_regs[3];
63
- TCGReg a4 = tcg_target_call_iarg_regs[4];
64
+ MemOp opc = get_memop(l->oi);
65
66
/* resolve label address */
67
if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
68
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
69
}
21
}
70
22
71
/* call store helper */
23
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
72
- tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
24
s_mask = s_mask_old >> pos;
73
- tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
25
s_mask |= -1ull << (len - 1);
74
- tcg_out_movext(s, s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, a2,
26
75
- l->type, s_bits, l->datalo_reg);
27
- if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
76
- tcg_out_movi(s, TCG_TYPE_PTR, a3, oi);
28
+ if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
77
- tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr);
29
return true;
78
-
30
}
79
+ tcg_out_st_helper_args(s, l, &ldst_helper_param);
31
80
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
81
82
tcg_out_goto(s, l->raddr);
83
--
32
--
84
2.34.1
33
2.43.0
85
86
diff view generated by jsdifflib
1
Instead of trying to unify all operations on uint64_t, pull out
1
The big comment just above says functions should be sorted.
2
mmu_lookup() to perform the basic tlb hit and resolution.
2
Add forward declarations as needed.
3
Create individual functions to handle access by size.
4
3
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
accel/tcg/cputlb.c | 645 +++++++++++++++++++++++++++++----------------
7
tcg/optimize.c | 114 +++++++++++++++++++++++++------------------------
10
1 file changed, 424 insertions(+), 221 deletions(-)
8
1 file changed, 59 insertions(+), 55 deletions(-)
11
9
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
12
--- a/tcg/optimize.c
15
+++ b/accel/tcg/cputlb.c
13
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
14
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
17
15
* 3) those that produce information about the result value.
18
#endif
16
*/
19
17
20
+/*
18
+static bool fold_or(OptContext *ctx, TCGOp *op);
21
+ * Probe for a load/store operation.
19
+static bool fold_orc(OptContext *ctx, TCGOp *op);
22
+ * Return the host address and into @flags.
20
+static bool fold_xor(OptContext *ctx, TCGOp *op);
23
+ */
24
+
21
+
25
+typedef struct MMULookupPageData {
22
static bool fold_add(OptContext *ctx, TCGOp *op)
26
+ CPUTLBEntryFull *full;
23
{
27
+ void *haddr;
24
if (fold_const2_commutative(ctx, op) ||
28
+ target_ulong addr;
25
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
29
+ int flags;
26
return fold_masks_zs(ctx, op, z_mask, s_mask);
30
+ int size;
27
}
31
+} MMULookupPageData;
28
32
+
29
+static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
33
+typedef struct MMULookupLocals {
34
+ MMULookupPageData page[2];
35
+ MemOp memop;
36
+ int mmu_idx;
37
+} MMULookupLocals;
38
+
39
+/**
40
+ * mmu_lookup1: translate one page
41
+ * @env: cpu context
42
+ * @data: lookup parameters
43
+ * @mmu_idx: virtual address context
44
+ * @access_type: load/store/code
45
+ * @ra: return address into tcg generated code, or 0
46
+ *
47
+ * Resolve the translation for the one page at @data.addr, filling in
48
+ * the rest of @data with the results. If the translation fails,
49
+ * tlb_fill will longjmp out. Return true if the softmmu tlb for
50
+ * @mmu_idx may have resized.
51
+ */
52
+static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data,
53
+ int mmu_idx, MMUAccessType access_type, uintptr_t ra)
54
+{
30
+{
55
+ target_ulong addr = data->addr;
31
+ /* If true and false values are the same, eliminate the cmp. */
56
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
32
+ if (args_are_copies(op->args[2], op->args[3])) {
57
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
33
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
58
+ target_ulong tlb_addr = tlb_read_idx(entry, access_type);
59
+ bool maybe_resized = false;
60
+
61
+ /* If the TLB entry is for a different page, reload and try again. */
62
+ if (!tlb_hit(tlb_addr, addr)) {
63
+ if (!victim_tlb_hit(env, mmu_idx, index, access_type,
64
+ addr & TARGET_PAGE_MASK)) {
65
+ tlb_fill(env_cpu(env), addr, data->size, access_type, mmu_idx, ra);
66
+ maybe_resized = true;
67
+ index = tlb_index(env, mmu_idx, addr);
68
+ entry = tlb_entry(env, mmu_idx, addr);
69
+ }
70
+ tlb_addr = tlb_read_idx(entry, access_type) & ~TLB_INVALID_MASK;
71
+ }
34
+ }
72
+
35
+
73
+ data->flags = tlb_addr & TLB_FLAGS_MASK;
36
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
74
+ data->full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
37
+ uint64_t tv = arg_info(op->args[2])->val;
75
+ /* Compute haddr speculatively; depending on flags it might be invalid. */
38
+ uint64_t fv = arg_info(op->args[3])->val;
76
+ data->haddr = (void *)((uintptr_t)addr + entry->addend);
77
+
39
+
78
+ return maybe_resized;
40
+ if (tv == -1 && fv == 0) {
41
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
42
+ }
43
+ if (tv == 0 && fv == -1) {
44
+ if (TCG_TARGET_HAS_not_vec) {
45
+ op->opc = INDEX_op_not_vec;
46
+ return fold_not(ctx, op);
47
+ } else {
48
+ op->opc = INDEX_op_xor_vec;
49
+ op->args[2] = arg_new_constant(ctx, -1);
50
+ return fold_xor(ctx, op);
51
+ }
52
+ }
53
+ }
54
+ if (arg_is_const(op->args[2])) {
55
+ uint64_t tv = arg_info(op->args[2])->val;
56
+ if (tv == -1) {
57
+ op->opc = INDEX_op_or_vec;
58
+ op->args[2] = op->args[3];
59
+ return fold_or(ctx, op);
60
+ }
61
+ if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
62
+ op->opc = INDEX_op_andc_vec;
63
+ op->args[2] = op->args[1];
64
+ op->args[1] = op->args[3];
65
+ return fold_andc(ctx, op);
66
+ }
67
+ }
68
+ if (arg_is_const(op->args[3])) {
69
+ uint64_t fv = arg_info(op->args[3])->val;
70
+ if (fv == 0) {
71
+ op->opc = INDEX_op_and_vec;
72
+ return fold_and(ctx, op);
73
+ }
74
+ if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
75
+ op->opc = INDEX_op_orc_vec;
76
+ op->args[2] = op->args[1];
77
+ op->args[1] = op->args[3];
78
+ return fold_orc(ctx, op);
79
+ }
80
+ }
81
+ return finish_folding(ctx, op);
79
+}
82
+}
80
+
83
+
81
+/**
84
static bool fold_brcond(OptContext *ctx, TCGOp *op)
82
+ * mmu_watch_or_dirty
85
{
83
+ * @env: cpu context
86
int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
84
+ * @data: lookup parameters
87
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
85
+ * @access_type: load/store/code
88
return fold_masks_zs(ctx, op, z_mask, s_mask);
86
+ * @ra: return address into tcg generated code, or 0
87
+ *
88
+ * Trigger watchpoints for @data.addr:@data.size;
89
+ * record writes to protected clean pages.
90
+ */
91
+static void mmu_watch_or_dirty(CPUArchState *env, MMULookupPageData *data,
92
+ MMUAccessType access_type, uintptr_t ra)
93
+{
94
+ CPUTLBEntryFull *full = data->full;
95
+ target_ulong addr = data->addr;
96
+ int flags = data->flags;
97
+ int size = data->size;
98
+
99
+ /* On watchpoint hit, this will longjmp out. */
100
+ if (flags & TLB_WATCHPOINT) {
101
+ int wp = access_type == MMU_DATA_STORE ? BP_MEM_WRITE : BP_MEM_READ;
102
+ cpu_check_watchpoint(env_cpu(env), addr, size, full->attrs, wp, ra);
103
+ flags &= ~TLB_WATCHPOINT;
104
+ }
105
+
106
+ /* Note that notdirty is only set for writes. */
107
+ if (flags & TLB_NOTDIRTY) {
108
+ notdirty_write(env_cpu(env), addr, size, full, ra);
109
+ flags &= ~TLB_NOTDIRTY;
110
+ }
111
+ data->flags = flags;
112
+}
113
+
114
+/**
115
+ * mmu_lookup: translate page(s)
116
+ * @env: cpu context
117
+ * @addr: virtual address
118
+ * @oi: combined mmu_idx and MemOp
119
+ * @ra: return address into tcg generated code, or 0
120
+ * @access_type: load/store/code
121
+ * @l: output result
122
+ *
123
+ * Resolve the translation for the page(s) beginning at @addr, for MemOp.size
124
+ * bytes. Return true if the lookup crosses a page boundary.
125
+ */
126
+static bool mmu_lookup(CPUArchState *env, target_ulong addr, MemOpIdx oi,
127
+ uintptr_t ra, MMUAccessType type, MMULookupLocals *l)
128
+{
129
+ unsigned a_bits;
130
+ bool crosspage;
131
+ int flags;
132
+
133
+ l->memop = get_memop(oi);
134
+ l->mmu_idx = get_mmuidx(oi);
135
+
136
+ tcg_debug_assert(l->mmu_idx < NB_MMU_MODES);
137
+
138
+ /* Handle CPU specific unaligned behaviour */
139
+ a_bits = get_alignment_bits(l->memop);
140
+ if (addr & ((1 << a_bits) - 1)) {
141
+ cpu_unaligned_access(env_cpu(env), addr, type, l->mmu_idx, ra);
142
+ }
143
+
144
+ l->page[0].addr = addr;
145
+ l->page[0].size = memop_size(l->memop);
146
+ l->page[1].addr = (addr + l->page[0].size - 1) & TARGET_PAGE_MASK;
147
+ l->page[1].size = 0;
148
+ crosspage = (addr ^ l->page[1].addr) & TARGET_PAGE_MASK;
149
+
150
+ if (likely(!crosspage)) {
151
+ mmu_lookup1(env, &l->page[0], l->mmu_idx, type, ra);
152
+
153
+ flags = l->page[0].flags;
154
+ if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
155
+ mmu_watch_or_dirty(env, &l->page[0], type, ra);
156
+ }
157
+ if (unlikely(flags & TLB_BSWAP)) {
158
+ l->memop ^= MO_BSWAP;
159
+ }
160
+ } else {
161
+ /* Finish compute of page crossing. */
162
+ int size0 = l->page[1].addr - addr;
163
+ l->page[1].size = l->page[0].size - size0;
164
+ l->page[0].size = size0;
165
+
166
+ /*
167
+ * Lookup both pages, recognizing exceptions from either. If the
168
+ * second lookup potentially resized, refresh first CPUTLBEntryFull.
169
+ */
170
+ mmu_lookup1(env, &l->page[0], l->mmu_idx, type, ra);
171
+ if (mmu_lookup1(env, &l->page[1], l->mmu_idx, type, ra)) {
172
+ uintptr_t index = tlb_index(env, l->mmu_idx, addr);
173
+ l->page[0].full = &env_tlb(env)->d[l->mmu_idx].fulltlb[index];
174
+ }
175
+
176
+ flags = l->page[0].flags | l->page[1].flags;
177
+ if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
178
+ mmu_watch_or_dirty(env, &l->page[0], type, ra);
179
+ mmu_watch_or_dirty(env, &l->page[1], type, ra);
180
+ }
181
+
182
+ /*
183
+ * Since target/sparc is the only user of TLB_BSWAP, and all
184
+ * Sparc accesses are aligned, any treatment across two pages
185
+ * would be arbitrary. Refuse it until there's a use.
186
+ */
187
+ tcg_debug_assert((flags & TLB_BSWAP) == 0);
188
+ }
189
+
190
+ return crosspage;
191
+}
192
+
193
/*
194
* Probe for an atomic operation. Do not allow unaligned operations,
195
* or io operations to proceed. Return the host address.
196
@@ -XXX,XX +XXX,XX @@ load_memop(const void *haddr, MemOp op)
197
}
198
}
89
}
199
90
200
-static inline uint64_t QEMU_ALWAYS_INLINE
91
-static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
201
-load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
202
- uintptr_t retaddr, MemOp op, MMUAccessType access_type,
203
- FullLoadHelper *full_load)
204
-{
92
-{
205
- const unsigned a_bits = get_alignment_bits(get_memop(oi));
93
- /* If true and false values are the same, eliminate the cmp. */
206
- const size_t size = memop_size(op);
94
- if (args_are_copies(op->args[2], op->args[3])) {
207
- uintptr_t mmu_idx = get_mmuidx(oi);
95
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
208
- uintptr_t index;
209
- CPUTLBEntry *entry;
210
- target_ulong tlb_addr;
211
- void *haddr;
212
- uint64_t res;
213
-
214
- tcg_debug_assert(mmu_idx < NB_MMU_MODES);
215
-
216
- /* Handle CPU specific unaligned behaviour */
217
- if (addr & ((1 << a_bits) - 1)) {
218
- cpu_unaligned_access(env_cpu(env), addr, access_type,
219
- mmu_idx, retaddr);
220
- }
96
- }
221
-
97
-
222
- index = tlb_index(env, mmu_idx, addr);
98
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
223
- entry = tlb_entry(env, mmu_idx, addr);
99
- uint64_t tv = arg_info(op->args[2])->val;
224
- tlb_addr = tlb_read_idx(entry, access_type);
100
- uint64_t fv = arg_info(op->args[3])->val;
225
-
101
-
226
- /* If the TLB entry is for a different page, reload and try again. */
102
- if (tv == -1 && fv == 0) {
227
- if (!tlb_hit(tlb_addr, addr)) {
103
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
228
- if (!victim_tlb_hit(env, mmu_idx, index, access_type,
229
- addr & TARGET_PAGE_MASK)) {
230
- tlb_fill(env_cpu(env), addr, size,
231
- access_type, mmu_idx, retaddr);
232
- index = tlb_index(env, mmu_idx, addr);
233
- entry = tlb_entry(env, mmu_idx, addr);
234
- }
104
- }
235
- tlb_addr = tlb_read_idx(entry, access_type);
105
- if (tv == 0 && fv == -1) {
236
- tlb_addr &= ~TLB_INVALID_MASK;
106
- if (TCG_TARGET_HAS_not_vec) {
107
- op->opc = INDEX_op_not_vec;
108
- return fold_not(ctx, op);
109
- } else {
110
- op->opc = INDEX_op_xor_vec;
111
- op->args[2] = arg_new_constant(ctx, -1);
112
- return fold_xor(ctx, op);
113
- }
114
- }
237
- }
115
- }
238
-
116
- if (arg_is_const(op->args[2])) {
239
- /* Handle anything that isn't just a straight memory access. */
117
- uint64_t tv = arg_info(op->args[2])->val;
240
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
118
- if (tv == -1) {
241
- CPUTLBEntryFull *full;
119
- op->opc = INDEX_op_or_vec;
242
- bool need_swap;
120
- op->args[2] = op->args[3];
243
-
121
- return fold_or(ctx, op);
244
- /* For anything that is unaligned, recurse through full_load. */
245
- if ((addr & (size - 1)) != 0) {
246
- goto do_unaligned_access;
247
- }
122
- }
248
-
123
- if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
249
- full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
124
- op->opc = INDEX_op_andc_vec;
250
-
125
- op->args[2] = op->args[1];
251
- /* Handle watchpoints. */
126
- op->args[1] = op->args[3];
252
- if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
127
- return fold_andc(ctx, op);
253
- /* On watchpoint hit, this will longjmp out. */
254
- cpu_check_watchpoint(env_cpu(env), addr, size,
255
- full->attrs, BP_MEM_READ, retaddr);
256
- }
128
- }
257
-
129
- }
258
- need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
130
- if (arg_is_const(op->args[3])) {
259
-
131
- uint64_t fv = arg_info(op->args[3])->val;
260
- /* Handle I/O access. */
132
- if (fv == 0) {
261
- if (likely(tlb_addr & TLB_MMIO)) {
133
- op->opc = INDEX_op_and_vec;
262
- return io_readx(env, full, mmu_idx, addr, retaddr,
134
- return fold_and(ctx, op);
263
- access_type, op ^ (need_swap * MO_BSWAP));
264
- }
135
- }
265
-
136
- if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
266
- haddr = (void *)((uintptr_t)addr + entry->addend);
137
- op->opc = INDEX_op_orc_vec;
267
-
138
- op->args[2] = op->args[1];
268
- /*
139
- op->args[1] = op->args[3];
269
- * Keep these two load_memop separate to ensure that the compiler
140
- return fold_orc(ctx, op);
270
- * is able to fold the entire function to a single instruction.
271
- * There is a build-time assert inside to remind you of this. ;-)
272
- */
273
- if (unlikely(need_swap)) {
274
- return load_memop(haddr, op ^ MO_BSWAP);
275
- }
141
- }
276
- return load_memop(haddr, op);
277
- }
142
- }
278
-
143
- return finish_folding(ctx, op);
279
- /* Handle slow unaligned access (it spans two pages or IO). */
280
- if (size > 1
281
- && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
282
- >= TARGET_PAGE_SIZE)) {
283
- target_ulong addr1, addr2;
284
- uint64_t r1, r2;
285
- unsigned shift;
286
- do_unaligned_access:
287
- addr1 = addr & ~((target_ulong)size - 1);
288
- addr2 = addr1 + size;
289
- r1 = full_load(env, addr1, oi, retaddr);
290
- r2 = full_load(env, addr2, oi, retaddr);
291
- shift = (addr & (size - 1)) * 8;
292
-
293
- if (memop_big_endian(op)) {
294
- /* Big-endian combine. */
295
- res = (r1 << shift) | (r2 >> ((size * 8) - shift));
296
- } else {
297
- /* Little-endian combine. */
298
- res = (r1 >> shift) | (r2 << ((size * 8) - shift));
299
- }
300
- return res & MAKE_64BIT_MASK(0, size * 8);
301
- }
302
-
303
- haddr = (void *)((uintptr_t)addr + entry->addend);
304
- return load_memop(haddr, op);
305
-}
144
-}
306
-
145
-
307
/*
146
/* Propagate constants and copies, fold constant expressions. */
308
* For the benefit of TCG generated code, we want to avoid the
147
void tcg_optimize(TCGContext *s)
309
* complication of ABI-specific return type promotion and always
310
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
311
* We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
312
*/
313
314
-static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
315
- MemOpIdx oi, uintptr_t retaddr)
316
+/**
317
+ * do_ld_mmio_beN:
318
+ * @env: cpu context
319
+ * @p: translation parameters
320
+ * @ret_be: accumulated data
321
+ * @mmu_idx: virtual address context
322
+ * @ra: return address into tcg generated code, or 0
323
+ *
324
+ * Load @p->size bytes from @p->addr, which is memory-mapped i/o.
325
+ * The bytes are concatenated in big-endian order with @ret_be.
326
+ */
327
+static uint64_t do_ld_mmio_beN(CPUArchState *env, MMULookupPageData *p,
328
+ uint64_t ret_be, int mmu_idx,
329
+ MMUAccessType type, uintptr_t ra)
330
{
148
{
331
- validate_memop(oi, MO_UB);
332
- return load_helper(env, addr, oi, retaddr, MO_UB, MMU_DATA_LOAD,
333
- full_ldub_mmu);
334
+ CPUTLBEntryFull *full = p->full;
335
+ target_ulong addr = p->addr;
336
+ int i, size = p->size;
337
+
338
+ QEMU_IOTHREAD_LOCK_GUARD();
339
+ for (i = 0; i < size; i++) {
340
+ uint8_t x = io_readx(env, full, mmu_idx, addr + i, ra, type, MO_UB);
341
+ ret_be = (ret_be << 8) | x;
342
+ }
343
+ return ret_be;
344
+}
345
+
346
+/**
347
+ * do_ld_bytes_beN
348
+ * @p: translation parameters
349
+ * @ret_be: accumulated data
350
+ *
351
+ * Load @p->size bytes from @p->haddr, which is RAM.
352
+ * The bytes to concatenated in big-endian order with @ret_be.
353
+ */
354
+static uint64_t do_ld_bytes_beN(MMULookupPageData *p, uint64_t ret_be)
355
+{
356
+ uint8_t *haddr = p->haddr;
357
+ int i, size = p->size;
358
+
359
+ for (i = 0; i < size; i++) {
360
+ ret_be = (ret_be << 8) | haddr[i];
361
+ }
362
+ return ret_be;
363
+}
364
+
365
+/*
366
+ * Wrapper for the above.
367
+ */
368
+static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
369
+ uint64_t ret_be, int mmu_idx,
370
+ MMUAccessType type, uintptr_t ra)
371
+{
372
+ if (unlikely(p->flags & TLB_MMIO)) {
373
+ return do_ld_mmio_beN(env, p, ret_be, mmu_idx, type, ra);
374
+ } else {
375
+ return do_ld_bytes_beN(p, ret_be);
376
+ }
377
+}
378
+
379
+static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
380
+ MMUAccessType type, uintptr_t ra)
381
+{
382
+ if (unlikely(p->flags & TLB_MMIO)) {
383
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, MO_UB);
384
+ } else {
385
+ return *(uint8_t *)p->haddr;
386
+ }
387
+}
388
+
389
+static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
390
+ MMUAccessType type, MemOp memop, uintptr_t ra)
391
+{
392
+ uint64_t ret;
393
+
394
+ if (unlikely(p->flags & TLB_MMIO)) {
395
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
396
+ }
397
+
398
+ /* Perform the load host endian, then swap if necessary. */
399
+ ret = load_memop(p->haddr, MO_UW);
400
+ if (memop & MO_BSWAP) {
401
+ ret = bswap16(ret);
402
+ }
403
+ return ret;
404
+}
405
+
406
+static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
407
+ MMUAccessType type, MemOp memop, uintptr_t ra)
408
+{
409
+ uint32_t ret;
410
+
411
+ if (unlikely(p->flags & TLB_MMIO)) {
412
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
413
+ }
414
+
415
+ /* Perform the load host endian. */
416
+ ret = load_memop(p->haddr, MO_UL);
417
+ if (memop & MO_BSWAP) {
418
+ ret = bswap32(ret);
419
+ }
420
+ return ret;
421
+}
422
+
423
+static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
424
+ MMUAccessType type, MemOp memop, uintptr_t ra)
425
+{
426
+ uint64_t ret;
427
+
428
+ if (unlikely(p->flags & TLB_MMIO)) {
429
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
430
+ }
431
+
432
+ /* Perform the load host endian. */
433
+ ret = load_memop(p->haddr, MO_UQ);
434
+ if (memop & MO_BSWAP) {
435
+ ret = bswap64(ret);
436
+ }
437
+ return ret;
438
+}
439
+
440
+static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
441
+ uintptr_t ra, MMUAccessType access_type)
442
+{
443
+ MMULookupLocals l;
444
+ bool crosspage;
445
+
446
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
447
+ tcg_debug_assert(!crosspage);
448
+
449
+ return do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
450
}
451
452
tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
453
MemOpIdx oi, uintptr_t retaddr)
454
{
455
- return full_ldub_mmu(env, addr, oi, retaddr);
456
+ validate_memop(oi, MO_UB);
457
+ return do_ld1_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
458
}
459
460
-static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
461
- MemOpIdx oi, uintptr_t retaddr)
462
+static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
463
+ uintptr_t ra, MMUAccessType access_type)
464
{
465
- validate_memop(oi, MO_LEUW);
466
- return load_helper(env, addr, oi, retaddr, MO_LEUW, MMU_DATA_LOAD,
467
- full_le_lduw_mmu);
468
+ MMULookupLocals l;
469
+ bool crosspage;
470
+ uint16_t ret;
471
+ uint8_t a, b;
472
+
473
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
474
+ if (likely(!crosspage)) {
475
+ return do_ld_2(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
476
+ }
477
+
478
+ a = do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
479
+ b = do_ld_1(env, &l.page[1], l.mmu_idx, access_type, ra);
480
+
481
+ if ((l.memop & MO_BSWAP) == MO_LE) {
482
+ ret = a | (b << 8);
483
+ } else {
484
+ ret = b | (a << 8);
485
+ }
486
+ return ret;
487
}
488
489
tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
490
MemOpIdx oi, uintptr_t retaddr)
491
{
492
- return full_le_lduw_mmu(env, addr, oi, retaddr);
493
-}
494
-
495
-static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
496
- MemOpIdx oi, uintptr_t retaddr)
497
-{
498
- validate_memop(oi, MO_BEUW);
499
- return load_helper(env, addr, oi, retaddr, MO_BEUW, MMU_DATA_LOAD,
500
- full_be_lduw_mmu);
501
+ validate_memop(oi, MO_LEUW);
502
+ return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
503
}
504
505
tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
506
MemOpIdx oi, uintptr_t retaddr)
507
{
508
- return full_be_lduw_mmu(env, addr, oi, retaddr);
509
+ validate_memop(oi, MO_BEUW);
510
+ return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
511
}
512
513
-static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
514
- MemOpIdx oi, uintptr_t retaddr)
515
+static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
516
+ uintptr_t ra, MMUAccessType access_type)
517
{
518
- validate_memop(oi, MO_LEUL);
519
- return load_helper(env, addr, oi, retaddr, MO_LEUL, MMU_DATA_LOAD,
520
- full_le_ldul_mmu);
521
+ MMULookupLocals l;
522
+ bool crosspage;
523
+ uint32_t ret;
524
+
525
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
526
+ if (likely(!crosspage)) {
527
+ return do_ld_4(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
528
+ }
529
+
530
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
531
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
532
+ if ((l.memop & MO_BSWAP) == MO_LE) {
533
+ ret = bswap32(ret);
534
+ }
535
+ return ret;
536
}
537
538
tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
539
MemOpIdx oi, uintptr_t retaddr)
540
{
541
- return full_le_ldul_mmu(env, addr, oi, retaddr);
542
-}
543
-
544
-static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
545
- MemOpIdx oi, uintptr_t retaddr)
546
-{
547
- validate_memop(oi, MO_BEUL);
548
- return load_helper(env, addr, oi, retaddr, MO_BEUL, MMU_DATA_LOAD,
549
- full_be_ldul_mmu);
550
+ validate_memop(oi, MO_LEUL);
551
+ return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
552
}
553
554
tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
555
MemOpIdx oi, uintptr_t retaddr)
556
{
557
- return full_be_ldul_mmu(env, addr, oi, retaddr);
558
+ validate_memop(oi, MO_BEUL);
559
+ return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
560
+}
561
+
562
+static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
563
+ uintptr_t ra, MMUAccessType access_type)
564
+{
565
+ MMULookupLocals l;
566
+ bool crosspage;
567
+ uint64_t ret;
568
+
569
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
570
+ if (likely(!crosspage)) {
571
+ return do_ld_8(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
572
+ }
573
+
574
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
575
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
576
+ if ((l.memop & MO_BSWAP) == MO_LE) {
577
+ ret = bswap64(ret);
578
+ }
579
+ return ret;
580
}
581
582
uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
583
MemOpIdx oi, uintptr_t retaddr)
584
{
585
validate_memop(oi, MO_LEUQ);
586
- return load_helper(env, addr, oi, retaddr, MO_LEUQ, MMU_DATA_LOAD,
587
- helper_le_ldq_mmu);
588
+ return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
589
}
590
591
uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
592
MemOpIdx oi, uintptr_t retaddr)
593
{
594
validate_memop(oi, MO_BEUQ);
595
- return load_helper(env, addr, oi, retaddr, MO_BEUQ, MMU_DATA_LOAD,
596
- helper_be_ldq_mmu);
597
+ return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
598
}
599
600
/*
601
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
602
* Load helpers for cpu_ldst.h.
603
*/
604
605
-static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
606
- MemOpIdx oi, uintptr_t retaddr,
607
- FullLoadHelper *full_load)
608
+static void plugin_load_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
609
{
610
- uint64_t ret;
611
-
612
- ret = full_load(env, addr, oi, retaddr);
613
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
614
- return ret;
615
}
616
617
uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
618
{
619
- return cpu_load_helper(env, addr, oi, ra, full_ldub_mmu);
620
+ uint8_t ret;
621
+
622
+ validate_memop(oi, MO_UB);
623
+ ret = do_ld1_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
624
+ plugin_load_cb(env, addr, oi);
625
+ return ret;
626
}
627
628
uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
629
MemOpIdx oi, uintptr_t ra)
630
{
631
- return cpu_load_helper(env, addr, oi, ra, full_be_lduw_mmu);
632
+ uint16_t ret;
633
+
634
+ validate_memop(oi, MO_BEUW);
635
+ ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
636
+ plugin_load_cb(env, addr, oi);
637
+ return ret;
638
}
639
640
uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
641
MemOpIdx oi, uintptr_t ra)
642
{
643
- return cpu_load_helper(env, addr, oi, ra, full_be_ldul_mmu);
644
+ uint32_t ret;
645
+
646
+ validate_memop(oi, MO_BEUL);
647
+ ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
648
+ plugin_load_cb(env, addr, oi);
649
+ return ret;
650
}
651
652
uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
653
MemOpIdx oi, uintptr_t ra)
654
{
655
- return cpu_load_helper(env, addr, oi, ra, helper_be_ldq_mmu);
656
+ uint64_t ret;
657
+
658
+ validate_memop(oi, MO_BEUQ);
659
+ ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
660
+ plugin_load_cb(env, addr, oi);
661
+ return ret;
662
}
663
664
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
665
MemOpIdx oi, uintptr_t ra)
666
{
667
- return cpu_load_helper(env, addr, oi, ra, full_le_lduw_mmu);
668
+ uint16_t ret;
669
+
670
+ validate_memop(oi, MO_LEUW);
671
+ ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
672
+ plugin_load_cb(env, addr, oi);
673
+ return ret;
674
}
675
676
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
677
MemOpIdx oi, uintptr_t ra)
678
{
679
- return cpu_load_helper(env, addr, oi, ra, full_le_ldul_mmu);
680
+ uint32_t ret;
681
+
682
+ validate_memop(oi, MO_LEUL);
683
+ ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
684
+ plugin_load_cb(env, addr, oi);
685
+ return ret;
686
}
687
688
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
689
MemOpIdx oi, uintptr_t ra)
690
{
691
- return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu);
692
+ uint64_t ret;
693
+
694
+ validate_memop(oi, MO_LEUQ);
695
+ ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
696
+ plugin_load_cb(env, addr, oi);
697
+ return ret;
698
}
699
700
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
701
@@ -XXX,XX +XXX,XX @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
702
703
/* Code access functions. */
704
705
-static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
706
- MemOpIdx oi, uintptr_t retaddr)
707
-{
708
- return load_helper(env, addr, oi, retaddr, MO_8,
709
- MMU_INST_FETCH, full_ldub_code);
710
-}
711
-
712
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
713
{
714
MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
715
- return full_ldub_code(env, addr, oi, 0);
716
-}
717
-
718
-static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
719
- MemOpIdx oi, uintptr_t retaddr)
720
-{
721
- return load_helper(env, addr, oi, retaddr, MO_TEUW,
722
- MMU_INST_FETCH, full_lduw_code);
723
+ return do_ld1_mmu(env, addr, oi, 0, MMU_INST_FETCH);
724
}
725
726
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
727
{
728
MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
729
- return full_lduw_code(env, addr, oi, 0);
730
-}
731
-
732
-static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
733
- MemOpIdx oi, uintptr_t retaddr)
734
-{
735
- return load_helper(env, addr, oi, retaddr, MO_TEUL,
736
- MMU_INST_FETCH, full_ldl_code);
737
+ return do_ld2_mmu(env, addr, oi, 0, MMU_INST_FETCH);
738
}
739
740
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
741
{
742
MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
743
- return full_ldl_code(env, addr, oi, 0);
744
-}
745
-
746
-static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
747
- MemOpIdx oi, uintptr_t retaddr)
748
-{
749
- return load_helper(env, addr, oi, retaddr, MO_TEUQ,
750
- MMU_INST_FETCH, full_ldq_code);
751
+ return do_ld4_mmu(env, addr, oi, 0, MMU_INST_FETCH);
752
}
753
754
uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
755
{
756
MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
757
- return full_ldq_code(env, addr, oi, 0);
758
+ return do_ld8_mmu(env, addr, oi, 0, MMU_INST_FETCH);
759
}
760
761
uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
762
MemOpIdx oi, uintptr_t retaddr)
763
{
764
- return full_ldub_code(env, addr, oi, retaddr);
765
+ return do_ld1_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
766
}
767
768
uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
769
MemOpIdx oi, uintptr_t retaddr)
770
{
771
- MemOp mop = get_memop(oi);
772
- int idx = get_mmuidx(oi);
773
- uint16_t ret;
774
-
775
- ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr);
776
- if ((mop & MO_BSWAP) != MO_TE) {
777
- ret = bswap16(ret);
778
- }
779
- return ret;
780
+ return do_ld2_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
781
}
782
783
uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
784
MemOpIdx oi, uintptr_t retaddr)
785
{
786
- MemOp mop = get_memop(oi);
787
- int idx = get_mmuidx(oi);
788
- uint32_t ret;
789
-
790
- ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr);
791
- if ((mop & MO_BSWAP) != MO_TE) {
792
- ret = bswap32(ret);
793
- }
794
- return ret;
795
+ return do_ld4_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
796
}
797
798
uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
799
MemOpIdx oi, uintptr_t retaddr)
800
{
801
- MemOp mop = get_memop(oi);
802
- int idx = get_mmuidx(oi);
803
- uint64_t ret;
804
-
805
- ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr);
806
- if ((mop & MO_BSWAP) != MO_TE) {
807
- ret = bswap64(ret);
808
- }
809
- return ret;
810
+ return do_ld8_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
811
}
812
--
149
--
813
2.34.1
150
2.43.0
814
815
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
The big comment just above says functions should be sorted.
2
2
3
We'd like to move disas.c into the common code source set, where
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
CONFIG_USER_ONLY is not available anymore. So we have to move
5
the related code into a separate file instead.
6
7
Signed-off-by: Thomas Huth <thuth@redhat.com>
8
Message-Id: <20230508133745.109463-2-thuth@redhat.com>
9
[rth: Type change done in a separate patch]
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
5
---
12
disas/disas-internal.h | 21 ++++++++++++
6
tcg/optimize.c | 60 +++++++++++++++++++++++++-------------------------
13
disas/disas-mon.c | 65 ++++++++++++++++++++++++++++++++++++
7
1 file changed, 30 insertions(+), 30 deletions(-)
14
disas/disas.c | 76 ++++--------------------------------------
15
disas/meson.build | 1 +
16
4 files changed, 93 insertions(+), 70 deletions(-)
17
create mode 100644 disas/disas-internal.h
18
create mode 100644 disas/disas-mon.c
19
8
20
diff --git a/disas/disas-internal.h b/disas/disas-internal.h
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
21
new file mode 100644
10
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX
11
--- a/tcg/optimize.c
23
--- /dev/null
12
+++ b/tcg/optimize.c
24
+++ b/disas/disas-internal.h
13
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
25
@@ -XXX,XX +XXX,XX @@
14
return true;
26
+/*
15
}
27
+ * Definitions used internally in the disassembly code
16
28
+ *
17
+static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
29
+ * SPDX-License-Identifier: GPL-2.0-or-later
30
+ */
31
+
32
+#ifndef DISAS_INTERNAL_H
33
+#define DISAS_INTERNAL_H
34
+
35
+#include "disas/dis-asm.h"
36
+
37
+typedef struct CPUDebug {
38
+ struct disassemble_info info;
39
+ CPUState *cpu;
40
+} CPUDebug;
41
+
42
+void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu);
43
+int disas_gstring_printf(FILE *stream, const char *fmt, ...)
44
+ G_GNUC_PRINTF(2, 3);
45
+
46
+#endif
47
diff --git a/disas/disas-mon.c b/disas/disas-mon.c
48
new file mode 100644
49
index XXXXXXX..XXXXXXX
50
--- /dev/null
51
+++ b/disas/disas-mon.c
52
@@ -XXX,XX +XXX,XX @@
53
+/*
54
+ * Functions related to disassembly from the monitor
55
+ *
56
+ * SPDX-License-Identifier: GPL-2.0-or-later
57
+ */
58
+
59
+#include "qemu/osdep.h"
60
+#include "disas-internal.h"
61
+#include "disas/disas.h"
62
+#include "exec/memory.h"
63
+#include "hw/core/cpu.h"
64
+#include "monitor/monitor.h"
65
+
66
+static int
67
+physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
68
+ struct disassemble_info *info)
69
+{
18
+{
70
+ CPUDebug *s = container_of(info, CPUDebug, info);
19
+ /* Canonicalize the comparison to put immediate second. */
71
+ MemTxResult res;
20
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
72
+
21
+ op->args[3] = tcg_swap_cond(op->args[3]);
73
+ res = address_space_read(s->cpu->as, memaddr, MEMTXATTRS_UNSPECIFIED,
22
+ }
74
+ myaddr, length);
23
+ return finish_folding(ctx, op);
75
+ return res == MEMTX_OK ? 0 : EIO;
76
+}
24
+}
77
+
25
+
78
+/* Disassembler for the monitor. */
26
+static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
79
+void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
80
+ int nb_insn, bool is_physical)
81
+{
27
+{
82
+ int count, i;
28
+ /* If true and false values are the same, eliminate the cmp. */
83
+ CPUDebug s;
29
+ if (args_are_copies(op->args[3], op->args[4])) {
84
+ g_autoptr(GString) ds = g_string_new("");
30
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
85
+
86
+ disas_initialize_debug_target(&s, cpu);
87
+ s.info.fprintf_func = disas_gstring_printf;
88
+ s.info.stream = (FILE *)ds; /* abuse this slot */
89
+
90
+ if (is_physical) {
91
+ s.info.read_memory_func = physical_read_memory;
92
+ }
93
+ s.info.buffer_vma = pc;
94
+
95
+ if (s.info.cap_arch >= 0 && cap_disas_monitor(&s.info, pc, nb_insn)) {
96
+ monitor_puts(mon, ds->str);
97
+ return;
98
+ }
31
+ }
99
+
32
+
100
+ if (!s.info.print_insn) {
33
+ /* Canonicalize the comparison to put immediate second. */
101
+ monitor_printf(mon, "0x%08" PRIx64
34
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
102
+ ": Asm output not supported on this arch\n", pc);
35
+ op->args[5] = tcg_swap_cond(op->args[5]);
103
+ return;
104
+ }
36
+ }
37
+ /*
38
+ * Canonicalize the "false" input reg to match the destination,
39
+ * so that the tcg backend can implement "move if true".
40
+ */
41
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
42
+ op->args[5] = tcg_invert_cond(op->args[5]);
43
+ }
44
+ return finish_folding(ctx, op);
45
+}
105
+
46
+
106
+ for (i = 0; i < nb_insn; i++) {
47
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
107
+ g_string_append_printf(ds, "0x%08" PRIx64 ": ", pc);
48
{
108
+ count = s.info.print_insn(pc, &s.info);
49
uint64_t z_mask, s_mask;
109
+ g_string_append_c(ds, '\n');
50
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
110
+ if (count < 0) {
51
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
111
+ break;
112
+ }
113
+ pc += count;
114
+ }
115
+
116
+ monitor_puts(mon, ds->str);
117
+}
118
diff --git a/disas/disas.c b/disas/disas.c
119
index XXXXXXX..XXXXXXX 100644
120
--- a/disas/disas.c
121
+++ b/disas/disas.c
122
@@ -XXX,XX +XXX,XX @@
123
/* General "disassemble this chunk" code. Used for debugging. */
124
#include "qemu/osdep.h"
125
-#include "disas/dis-asm.h"
126
+#include "disas/disas-internal.h"
127
#include "elf.h"
128
#include "qemu/qemu-print.h"
129
#include "disas/disas.h"
130
@@ -XXX,XX +XXX,XX @@
131
#include "hw/core/cpu.h"
132
#include "exec/memory.h"
133
134
-typedef struct CPUDebug {
135
- struct disassemble_info info;
136
- CPUState *cpu;
137
-} CPUDebug;
138
-
139
/* Filled in by elfload.c. Simplistic, but will do for now. */
140
struct syminfo *syminfos = NULL;
141
142
@@ -XXX,XX +XXX,XX @@ static void initialize_debug(CPUDebug *s)
143
s->info.symbol_at_address_func = symbol_at_address;
144
}
52
}
145
53
146
-static void initialize_debug_target(CPUDebug *s, CPUState *cpu)
54
-static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
147
+void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu)
148
{
149
initialize_debug(s);
150
151
@@ -XXX,XX +XXX,XX @@ void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size)
152
int count;
153
CPUDebug s;
154
155
- initialize_debug_target(&s, cpu);
156
+ disas_initialize_debug_target(&s, cpu);
157
s.info.fprintf_func = fprintf;
158
s.info.stream = out;
159
s.info.buffer_vma = code;
160
@@ -XXX,XX +XXX,XX @@ void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size)
161
}
162
}
163
164
-static int G_GNUC_PRINTF(2, 3)
165
-gstring_printf(FILE *stream, const char *fmt, ...)
166
+int disas_gstring_printf(FILE *stream, const char *fmt, ...)
167
{
168
/* We abuse the FILE parameter to pass a GString. */
169
GString *s = (GString *)stream;
170
@@ -XXX,XX +XXX,XX @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size)
171
CPUDebug s;
172
GString *ds = g_string_new(NULL);
173
174
- initialize_debug_target(&s, cpu);
175
- s.info.fprintf_func = gstring_printf;
176
+ disas_initialize_debug_target(&s, cpu);
177
+ s.info.fprintf_func = disas_gstring_printf;
178
s.info.stream = (FILE *)ds; /* abuse this slot */
179
s.info.buffer_vma = addr;
180
s.info.buffer_length = size;
181
@@ -XXX,XX +XXX,XX @@ const char *lookup_symbol(uint64_t orig_addr)
182
183
return symbol;
184
}
185
-
186
-#if !defined(CONFIG_USER_ONLY)
187
-
188
-#include "monitor/monitor.h"
189
-
190
-static int
191
-physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
192
- struct disassemble_info *info)
193
-{
55
-{
194
- CPUDebug *s = container_of(info, CPUDebug, info);
56
- /* Canonicalize the comparison to put immediate second. */
195
- MemTxResult res;
57
- if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
196
-
58
- op->args[3] = tcg_swap_cond(op->args[3]);
197
- res = address_space_read(s->cpu->as, memaddr, MEMTXATTRS_UNSPECIFIED,
59
- }
198
- myaddr, length);
60
- return finish_folding(ctx, op);
199
- return res == MEMTX_OK ? 0 : EIO;
200
-}
61
-}
201
-
62
-
202
-/* Disassembler for the monitor. */
63
-static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
203
-void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
204
- int nb_insn, bool is_physical)
205
-{
64
-{
206
- int count, i;
65
- /* If true and false values are the same, eliminate the cmp. */
207
- CPUDebug s;
66
- if (args_are_copies(op->args[3], op->args[4])) {
208
- g_autoptr(GString) ds = g_string_new("");
67
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
209
-
210
- initialize_debug_target(&s, cpu);
211
- s.info.fprintf_func = gstring_printf;
212
- s.info.stream = (FILE *)ds; /* abuse this slot */
213
-
214
- if (is_physical) {
215
- s.info.read_memory_func = physical_read_memory;
216
- }
217
- s.info.buffer_vma = pc;
218
-
219
- if (s.info.cap_arch >= 0 && cap_disas_monitor(&s.info, pc, nb_insn)) {
220
- monitor_puts(mon, ds->str);
221
- return;
222
- }
68
- }
223
-
69
-
224
- if (!s.info.print_insn) {
70
- /* Canonicalize the comparison to put immediate second. */
225
- monitor_printf(mon, "0x%08" PRIx64
71
- if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
226
- ": Asm output not supported on this arch\n", pc);
72
- op->args[5] = tcg_swap_cond(op->args[5]);
227
- return;
228
- }
73
- }
74
- /*
75
- * Canonicalize the "false" input reg to match the destination,
76
- * so that the tcg backend can implement "move if true".
77
- */
78
- if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
79
- op->args[5] = tcg_invert_cond(op->args[5]);
80
- }
81
- return finish_folding(ctx, op);
82
-}
229
-
83
-
230
- for (i = 0; i < nb_insn; i++) {
84
static bool fold_sextract(OptContext *ctx, TCGOp *op)
231
- g_string_append_printf(ds, "0x%08" PRIx64 ": ", pc);
85
{
232
- count = s.info.print_insn(pc, &s.info);
86
uint64_t z_mask, s_mask, s_mask_old;
233
- g_string_append_c(ds, '\n');
234
- if (count < 0) {
235
- break;
236
- }
237
- pc += count;
238
- }
239
-
240
- monitor_puts(mon, ds->str);
241
-}
242
-#endif
243
diff --git a/disas/meson.build b/disas/meson.build
244
index XXXXXXX..XXXXXXX 100644
245
--- a/disas/meson.build
246
+++ b/disas/meson.build
247
@@ -XXX,XX +XXX,XX @@ common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c'))
248
common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c'))
249
common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone])
250
251
+softmmu_ss.add(files('disas-mon.c'))
252
specific_ss.add(files('disas.c'), capstone)
253
--
87
--
254
2.34.1
88
2.43.0
diff view generated by jsdifflib
1
We currently have a flag, float_muladd_halve_result, to scale
2
the result by 2**-1. Extend this to handle arbitrary scaling.
3
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
6
---
4
configs/targets/sh4-linux-user.mak | 1 -
7
include/fpu/softfloat.h | 6 ++++
5
configs/targets/sh4-softmmu.mak | 1 -
8
fpu/softfloat.c | 58 ++++++++++++++++++++++-----------------
6
configs/targets/sh4eb-linux-user.mak | 1 -
9
fpu/softfloat-parts.c.inc | 7 +++--
7
configs/targets/sh4eb-softmmu.mak | 1 -
10
3 files changed, 44 insertions(+), 27 deletions(-)
8
4 files changed, 4 deletions(-)
11
9
12
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
10
diff --git a/configs/targets/sh4-linux-user.mak b/configs/targets/sh4-linux-user.mak
11
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
12
--- a/configs/targets/sh4-linux-user.mak
14
--- a/include/fpu/softfloat.h
13
+++ b/configs/targets/sh4-linux-user.mak
15
+++ b/include/fpu/softfloat.h
14
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ float16 float16_add(float16, float16, float_status *status);
15
TARGET_ARCH=sh4
17
float16 float16_sub(float16, float16, float_status *status);
16
TARGET_SYSTBL_ABI=common
18
float16 float16_mul(float16, float16, float_status *status);
17
TARGET_SYSTBL=syscall.tbl
19
float16 float16_muladd(float16, float16, float16, int, float_status *status);
18
-TARGET_ALIGNED_ONLY=y
20
+float16 float16_muladd_scalbn(float16, float16, float16,
19
TARGET_HAS_BFLT=y
21
+ int, int, float_status *status);
20
diff --git a/configs/targets/sh4-softmmu.mak b/configs/targets/sh4-softmmu.mak
22
float16 float16_div(float16, float16, float_status *status);
23
float16 float16_scalbn(float16, int, float_status *status);
24
float16 float16_min(float16, float16, float_status *status);
25
@@ -XXX,XX +XXX,XX @@ float32 float32_mul(float32, float32, float_status *status);
26
float32 float32_div(float32, float32, float_status *status);
27
float32 float32_rem(float32, float32, float_status *status);
28
float32 float32_muladd(float32, float32, float32, int, float_status *status);
29
+float32 float32_muladd_scalbn(float32, float32, float32,
30
+ int, int, float_status *status);
31
float32 float32_sqrt(float32, float_status *status);
32
float32 float32_exp2(float32, float_status *status);
33
float32 float32_log2(float32, float_status *status);
34
@@ -XXX,XX +XXX,XX @@ float64 float64_mul(float64, float64, float_status *status);
35
float64 float64_div(float64, float64, float_status *status);
36
float64 float64_rem(float64, float64, float_status *status);
37
float64 float64_muladd(float64, float64, float64, int, float_status *status);
38
+float64 float64_muladd_scalbn(float64, float64, float64,
39
+ int, int, float_status *status);
40
float64 float64_sqrt(float64, float_status *status);
41
float64 float64_log2(float64, float_status *status);
42
FloatRelation float64_compare(float64, float64, float_status *status);
43
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
21
index XXXXXXX..XXXXXXX 100644
44
index XXXXXXX..XXXXXXX 100644
22
--- a/configs/targets/sh4-softmmu.mak
45
--- a/fpu/softfloat.c
23
+++ b/configs/targets/sh4-softmmu.mak
46
+++ b/fpu/softfloat.c
24
@@ -1,2 +1 @@
47
@@ -XXX,XX +XXX,XX @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
25
TARGET_ARCH=sh4
48
#define parts_mul(A, B, S) \
26
-TARGET_ALIGNED_ONLY=y
49
PARTS_GENERIC_64_128(mul, A)(A, B, S)
27
diff --git a/configs/targets/sh4eb-linux-user.mak b/configs/targets/sh4eb-linux-user.mak
50
51
-static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
52
- FloatParts64 *c, int flags,
53
- float_status *s);
54
-static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
55
- FloatParts128 *c, int flags,
56
- float_status *s);
57
+static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
58
+ FloatParts64 *c, int scale,
59
+ int flags, float_status *s);
60
+static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
61
+ FloatParts128 *c, int scale,
62
+ int flags, float_status *s);
63
64
-#define parts_muladd(A, B, C, Z, S) \
65
- PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
66
+#define parts_muladd_scalbn(A, B, C, Z, Y, S) \
67
+ PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
68
69
static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
70
float_status *s);
71
@@ -XXX,XX +XXX,XX @@ floatx80_mul(floatx80 a, floatx80 b, float_status *status)
72
* Fused multiply-add
73
*/
74
75
-float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
76
- int flags, float_status *status)
77
+float16 QEMU_FLATTEN
78
+float16_muladd_scalbn(float16 a, float16 b, float16 c,
79
+ int scale, int flags, float_status *status)
80
{
81
FloatParts64 pa, pb, pc, *pr;
82
83
float16_unpack_canonical(&pa, a, status);
84
float16_unpack_canonical(&pb, b, status);
85
float16_unpack_canonical(&pc, c, status);
86
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
87
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
88
89
return float16_round_pack_canonical(pr, status);
90
}
91
92
-static float32 QEMU_SOFTFLOAT_ATTR
93
-soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
94
- float_status *status)
95
+float16 float16_muladd(float16 a, float16 b, float16 c,
96
+ int flags, float_status *status)
97
+{
98
+ return float16_muladd_scalbn(a, b, c, 0, flags, status);
99
+}
100
+
101
+float32 QEMU_SOFTFLOAT_ATTR
102
+float32_muladd_scalbn(float32 a, float32 b, float32 c,
103
+ int scale, int flags, float_status *status)
104
{
105
FloatParts64 pa, pb, pc, *pr;
106
107
float32_unpack_canonical(&pa, a, status);
108
float32_unpack_canonical(&pb, b, status);
109
float32_unpack_canonical(&pc, c, status);
110
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
111
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
112
113
return float32_round_pack_canonical(pr, status);
114
}
115
116
-static float64 QEMU_SOFTFLOAT_ATTR
117
-soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
118
- float_status *status)
119
+float64 QEMU_SOFTFLOAT_ATTR
120
+float64_muladd_scalbn(float64 a, float64 b, float64 c,
121
+ int scale, int flags, float_status *status)
122
{
123
FloatParts64 pa, pb, pc, *pr;
124
125
float64_unpack_canonical(&pa, a, status);
126
float64_unpack_canonical(&pb, b, status);
127
float64_unpack_canonical(&pc, c, status);
128
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
129
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
130
131
return float64_round_pack_canonical(pr, status);
132
}
133
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
134
return ur.s;
135
136
soft:
137
- return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
138
+ return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
139
}
140
141
float64 QEMU_FLATTEN
142
@@ -XXX,XX +XXX,XX @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
143
return ur.s;
144
145
soft:
146
- return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
147
+ return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
148
}
149
150
float64 float64r32_muladd(float64 a, float64 b, float64 c,
151
@@ -XXX,XX +XXX,XX @@ float64 float64r32_muladd(float64 a, float64 b, float64 c,
152
float64_unpack_canonical(&pa, a, status);
153
float64_unpack_canonical(&pb, b, status);
154
float64_unpack_canonical(&pc, c, status);
155
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
156
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
157
158
return float64r32_round_pack_canonical(pr, status);
159
}
160
@@ -XXX,XX +XXX,XX @@ bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
161
bfloat16_unpack_canonical(&pa, a, status);
162
bfloat16_unpack_canonical(&pb, b, status);
163
bfloat16_unpack_canonical(&pc, c, status);
164
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
165
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
166
167
return bfloat16_round_pack_canonical(pr, status);
168
}
169
@@ -XXX,XX +XXX,XX @@ float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
170
float128_unpack_canonical(&pa, a, status);
171
float128_unpack_canonical(&pb, b, status);
172
float128_unpack_canonical(&pc, c, status);
173
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
174
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
175
176
return float128_round_pack_canonical(pr, status);
177
}
178
@@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status)
179
180
float64_unpack_canonical(&rp, float64_one, status);
181
for (i = 0 ; i < 15 ; i++) {
182
+
183
float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
184
- rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
185
+ rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
186
xnp = *parts_mul(&xnp, &xp, status);
187
}
188
189
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
28
index XXXXXXX..XXXXXXX 100644
190
index XXXXXXX..XXXXXXX 100644
29
--- a/configs/targets/sh4eb-linux-user.mak
191
--- a/fpu/softfloat-parts.c.inc
30
+++ b/configs/targets/sh4eb-linux-user.mak
192
+++ b/fpu/softfloat-parts.c.inc
31
@@ -XXX,XX +XXX,XX @@
193
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
32
TARGET_ARCH=sh4
194
* Requires A and C extracted into a double-sized structure to provide the
33
TARGET_SYSTBL_ABI=common
195
* extra space for the widening multiply.
34
TARGET_SYSTBL=syscall.tbl
196
*/
35
-TARGET_ALIGNED_ONLY=y
197
-static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
36
TARGET_BIG_ENDIAN=y
198
- FloatPartsN *c, int flags, float_status *s)
37
TARGET_HAS_BFLT=y
199
+static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
38
diff --git a/configs/targets/sh4eb-softmmu.mak b/configs/targets/sh4eb-softmmu.mak
200
+ FloatPartsN *c, int scale,
39
index XXXXXXX..XXXXXXX 100644
201
+ int flags, float_status *s)
40
--- a/configs/targets/sh4eb-softmmu.mak
202
{
41
+++ b/configs/targets/sh4eb-softmmu.mak
203
int ab_mask, abc_mask;
42
@@ -XXX,XX +XXX,XX @@
204
FloatPartsW p_widen, c_widen;
43
TARGET_ARCH=sh4
205
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
44
-TARGET_ALIGNED_ONLY=y
206
a->exp = p_widen.exp;
45
TARGET_BIG_ENDIAN=y
207
208
return_normal:
209
+ /* TODO: Replace all use of float_muladd_halve_result with scale. */
210
if (flags & float_muladd_halve_result) {
211
a->exp -= 1;
212
}
213
+ a->exp += scale;
214
finish_sign:
215
if (flags & float_muladd_negate_result) {
216
a->sign ^= 1;
46
--
217
--
47
2.34.1
218
2.43.0
48
219
49
220
diff view generated by jsdifflib
1
In gen_ldx/gen_stx, the only two locations for memory operations,
1
Use the scalbn interface instead of float_muladd_halve_result.
2
mark the operation as either aligned (softmmu) or unaligned
3
(user-only, as if emulated by the kernel).
4
2
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
configs/targets/nios2-softmmu.mak | 1 -
6
target/arm/tcg/helper-a64.c | 6 +++---
9
target/nios2/translate.c | 10 ++++++++++
7
1 file changed, 3 insertions(+), 3 deletions(-)
10
2 files changed, 10 insertions(+), 1 deletion(-)
11
8
12
diff --git a/configs/targets/nios2-softmmu.mak b/configs/targets/nios2-softmmu.mak
9
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/configs/targets/nios2-softmmu.mak
11
--- a/target/arm/tcg/helper-a64.c
15
+++ b/configs/targets/nios2-softmmu.mak
12
+++ b/target/arm/tcg/helper-a64.c
16
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
17
TARGET_ARCH=nios2
14
(float16_is_infinity(b) && float16_is_zero(a))) {
18
-TARGET_ALIGNED_ONLY=y
15
return float16_one_point_five;
19
TARGET_NEED_FDT=y
16
}
20
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
17
- return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
21
index XXXXXXX..XXXXXXX 100644
18
+ return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst);
22
--- a/target/nios2/translate.c
23
+++ b/target/nios2/translate.c
24
@@ -XXX,XX +XXX,XX @@ static void gen_ldx(DisasContext *dc, uint32_t code, uint32_t flags)
25
TCGv data = dest_gpr(dc, instr.b);
26
27
tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16.s);
28
+#ifdef CONFIG_USER_ONLY
29
+ flags |= MO_UNALN;
30
+#else
31
+ flags |= MO_ALIGN;
32
+#endif
33
tcg_gen_qemu_ld_tl(data, addr, dc->mem_idx, flags);
34
}
19
}
35
20
36
@@ -XXX,XX +XXX,XX @@ static void gen_stx(DisasContext *dc, uint32_t code, uint32_t flags)
21
float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
37
22
@@ -XXX,XX +XXX,XX @@ float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
38
TCGv addr = tcg_temp_new();
23
(float32_is_infinity(b) && float32_is_zero(a))) {
39
tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16.s);
24
return float32_one_point_five;
40
+#ifdef CONFIG_USER_ONLY
25
}
41
+ flags |= MO_UNALN;
26
- return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
42
+#else
27
+ return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst);
43
+ flags |= MO_ALIGN;
44
+#endif
45
tcg_gen_qemu_st_tl(val, addr, dc->mem_idx, flags);
46
}
28
}
47
29
30
float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
31
@@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
32
(float64_is_infinity(b) && float64_is_zero(a))) {
33
return float64_one_point_five;
34
}
35
- return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
36
+ return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst);
37
}
38
39
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
48
--
40
--
49
2.34.1
41
2.43.0
50
42
51
43
diff view generated by jsdifflib
1
All uses have now been expunged.
1
Use the scalbn interface instead of float_muladd_halve_result.
2
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
5
---
6
include/exec/memop.h | 13 ++-----------
6
target/sparc/helper.h | 4 +-
7
include/exec/poison.h | 1 -
7
target/sparc/fop_helper.c | 8 ++--
8
tcg/tcg.c | 5 -----
8
target/sparc/translate.c | 80 +++++++++++++++++++++++----------------
9
3 files changed, 2 insertions(+), 17 deletions(-)
9
3 files changed, 54 insertions(+), 38 deletions(-)
10
10
11
diff --git a/include/exec/memop.h b/include/exec/memop.h
11
diff --git a/target/sparc/helper.h b/target/sparc/helper.h
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/include/exec/memop.h
13
--- a/target/sparc/helper.h
14
+++ b/include/exec/memop.h
14
+++ b/target/sparc/helper.h
15
@@ -XXX,XX +XXX,XX @@ typedef enum MemOp {
15
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(faddd, TCG_CALL_NO_WG, f64, env, f64, f64)
16
* MO_UNALN accesses are never checked for alignment.
16
DEF_HELPER_FLAGS_3(fsubd, TCG_CALL_NO_WG, f64, env, f64, f64)
17
* MO_ALIGN accesses will result in a call to the CPU's
17
DEF_HELPER_FLAGS_3(fmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
18
* do_unaligned_access hook if the guest address is not aligned.
18
DEF_HELPER_FLAGS_3(fdivd, TCG_CALL_NO_WG, f64, env, f64, f64)
19
- * The default depends on whether the target CPU defines
19
-DEF_HELPER_FLAGS_5(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, i32)
20
- * TARGET_ALIGNED_ONLY.
20
+DEF_HELPER_FLAGS_6(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, s32, i32)
21
*
21
DEF_HELPER_FLAGS_3(fnaddd, TCG_CALL_NO_WG, f64, env, f64, f64)
22
* Some architectures (e.g. ARMv8) need the address which is aligned
22
DEF_HELPER_FLAGS_3(fnmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
23
* to a size more than the size of the memory access.
23
24
@@ -XXX,XX +XXX,XX @@ typedef enum MemOp {
24
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(fadds, TCG_CALL_NO_WG, f32, env, f32, f32)
25
*/
25
DEF_HELPER_FLAGS_3(fsubs, TCG_CALL_NO_WG, f32, env, f32, f32)
26
MO_ASHIFT = 5,
26
DEF_HELPER_FLAGS_3(fmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
27
MO_AMASK = 0x7 << MO_ASHIFT,
27
DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_WG, f32, env, f32, f32)
28
-#ifdef NEED_CPU_H
28
-DEF_HELPER_FLAGS_5(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, i32)
29
-#ifdef TARGET_ALIGNED_ONLY
29
+DEF_HELPER_FLAGS_6(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, s32, i32)
30
- MO_ALIGN = 0,
30
DEF_HELPER_FLAGS_3(fnadds, TCG_CALL_NO_WG, f32, env, f32, f32)
31
- MO_UNALN = MO_AMASK,
31
DEF_HELPER_FLAGS_3(fnmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
32
-#else
32
33
- MO_ALIGN = MO_AMASK,
33
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
34
- MO_UNALN = 0,
35
-#endif
36
-#endif
37
+ MO_UNALN = 0,
38
MO_ALIGN_2 = 1 << MO_ASHIFT,
39
MO_ALIGN_4 = 2 << MO_ASHIFT,
40
MO_ALIGN_8 = 3 << MO_ASHIFT,
41
MO_ALIGN_16 = 4 << MO_ASHIFT,
42
MO_ALIGN_32 = 5 << MO_ASHIFT,
43
MO_ALIGN_64 = 6 << MO_ASHIFT,
44
+ MO_ALIGN = MO_AMASK,
45
46
/* Combinations of the above, for ease of use. */
47
MO_UB = MO_8,
48
diff --git a/include/exec/poison.h b/include/exec/poison.h
49
index XXXXXXX..XXXXXXX 100644
34
index XXXXXXX..XXXXXXX 100644
50
--- a/include/exec/poison.h
35
--- a/target/sparc/fop_helper.c
51
+++ b/include/exec/poison.h
36
+++ b/target/sparc/fop_helper.c
52
@@ -XXX,XX +XXX,XX @@
37
@@ -XXX,XX +XXX,XX @@ Int128 helper_fsqrtq(CPUSPARCState *env, Int128 src)
53
#pragma GCC poison TARGET_TRICORE
38
}
54
#pragma GCC poison TARGET_XTENSA
39
55
40
float32 helper_fmadds(CPUSPARCState *env, float32 s1,
56
-#pragma GCC poison TARGET_ALIGNED_ONLY
41
- float32 s2, float32 s3, uint32_t op)
57
#pragma GCC poison TARGET_HAS_BFLT
42
+ float32 s2, float32 s3, int32_t sc, uint32_t op)
58
#pragma GCC poison TARGET_NAME
43
{
59
#pragma GCC poison TARGET_SUPPORTS_MTTCG
44
- float32 ret = float32_muladd(s1, s2, s3, op, &env->fp_status);
60
diff --git a/tcg/tcg.c b/tcg/tcg.c
45
+ float32 ret = float32_muladd_scalbn(s1, s2, s3, sc, op, &env->fp_status);
46
check_ieee_exceptions(env, GETPC());
47
return ret;
48
}
49
50
float64 helper_fmaddd(CPUSPARCState *env, float64 s1,
51
- float64 s2, float64 s3, uint32_t op)
52
+ float64 s2, float64 s3, int32_t sc, uint32_t op)
53
{
54
- float64 ret = float64_muladd(s1, s2, s3, op, &env->fp_status);
55
+ float64 ret = float64_muladd_scalbn(s1, s2, s3, sc, op, &env->fp_status);
56
check_ieee_exceptions(env, GETPC());
57
return ret;
58
}
59
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
61
index XXXXXXX..XXXXXXX 100644
60
index XXXXXXX..XXXXXXX 100644
62
--- a/tcg/tcg.c
61
--- a/target/sparc/translate.c
63
+++ b/tcg/tcg.c
62
+++ b/target/sparc/translate.c
64
@@ -XXX,XX +XXX,XX @@ static const char * const ldst_name[] =
63
@@ -XXX,XX +XXX,XX @@ static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src)
65
};
64
66
65
static void gen_op_fmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
67
static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
66
{
68
-#ifdef TARGET_ALIGNED_ONLY
67
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
69
[MO_UNALN >> MO_ASHIFT] = "un+",
68
+ TCGv_i32 z = tcg_constant_i32(0);
70
- [MO_ALIGN >> MO_ASHIFT] = "",
69
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, z);
71
-#else
70
}
72
- [MO_UNALN >> MO_ASHIFT] = "",
71
73
[MO_ALIGN >> MO_ASHIFT] = "al+",
72
static void gen_op_fmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
74
-#endif
73
{
75
[MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
74
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
76
[MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
75
+ TCGv_i32 z = tcg_constant_i32(0);
77
[MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
76
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, z);
77
}
78
79
static void gen_op_fmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
80
{
81
- int op = float_muladd_negate_c;
82
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
83
+ TCGv_i32 z = tcg_constant_i32(0);
84
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
85
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
86
}
87
88
static void gen_op_fmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
89
{
90
- int op = float_muladd_negate_c;
91
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
92
+ TCGv_i32 z = tcg_constant_i32(0);
93
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
94
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
95
}
96
97
static void gen_op_fnmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
98
{
99
- int op = float_muladd_negate_c | float_muladd_negate_result;
100
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
101
+ TCGv_i32 z = tcg_constant_i32(0);
102
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c |
103
+ float_muladd_negate_result);
104
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
105
}
106
107
static void gen_op_fnmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
108
{
109
- int op = float_muladd_negate_c | float_muladd_negate_result;
110
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
111
+ TCGv_i32 z = tcg_constant_i32(0);
112
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c |
113
+ float_muladd_negate_result);
114
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
115
}
116
117
static void gen_op_fnmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
118
{
119
- int op = float_muladd_negate_result;
120
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
121
+ TCGv_i32 z = tcg_constant_i32(0);
122
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
123
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
124
}
125
126
static void gen_op_fnmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
127
{
128
- int op = float_muladd_negate_result;
129
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
130
+ TCGv_i32 z = tcg_constant_i32(0);
131
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
132
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
133
}
134
135
/* Use muladd to compute (1 * src1) + src2 / 2 with one rounding. */
136
static void gen_op_fhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
137
{
138
- TCGv_i32 one = tcg_constant_i32(float32_one);
139
- int op = float_muladd_halve_result;
140
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
141
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
142
+ TCGv_i32 mone = tcg_constant_i32(-1);
143
+ TCGv_i32 op = tcg_constant_i32(0);
144
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
145
}
146
147
static void gen_op_fhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
148
{
149
- TCGv_i64 one = tcg_constant_i64(float64_one);
150
- int op = float_muladd_halve_result;
151
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
152
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
153
+ TCGv_i32 mone = tcg_constant_i32(-1);
154
+ TCGv_i32 op = tcg_constant_i32(0);
155
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
156
}
157
158
/* Use muladd to compute (1 * src1) - src2 / 2 with one rounding. */
159
static void gen_op_fhsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
160
{
161
- TCGv_i32 one = tcg_constant_i32(float32_one);
162
- int op = float_muladd_negate_c | float_muladd_halve_result;
163
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
164
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
165
+ TCGv_i32 mone = tcg_constant_i32(-1);
166
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
167
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
168
}
169
170
static void gen_op_fhsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
171
{
172
- TCGv_i64 one = tcg_constant_i64(float64_one);
173
- int op = float_muladd_negate_c | float_muladd_halve_result;
174
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
175
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
176
+ TCGv_i32 mone = tcg_constant_i32(-1);
177
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
178
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
179
}
180
181
/* Use muladd to compute -((1 * src1) + src2 / 2) with one rounding. */
182
static void gen_op_fnhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
183
{
184
- TCGv_i32 one = tcg_constant_i32(float32_one);
185
- int op = float_muladd_negate_result | float_muladd_halve_result;
186
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
187
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
188
+ TCGv_i32 mone = tcg_constant_i32(-1);
189
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
190
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
191
}
192
193
static void gen_op_fnhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
194
{
195
- TCGv_i64 one = tcg_constant_i64(float64_one);
196
- int op = float_muladd_negate_result | float_muladd_halve_result;
197
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
198
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
199
+ TCGv_i32 mone = tcg_constant_i32(-1);
200
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
201
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
202
}
203
204
static void gen_op_fpexception_im(DisasContext *dc, int ftt)
78
--
205
--
79
2.34.1
206
2.43.0
80
207
81
208
diff view generated by jsdifflib
1
The opposite of MO_UNALN is MO_ALIGN.
1
All uses have been convered to float*_muladd_scalbn.
2
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
5
---
6
target/mips/tcg/nanomips_translate.c.inc | 2 +-
6
include/fpu/softfloat.h | 3 ---
7
1 file changed, 1 insertion(+), 1 deletion(-)
7
fpu/softfloat.c | 6 ------
8
fpu/softfloat-parts.c.inc | 4 ----
9
3 files changed, 13 deletions(-)
8
10
9
diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc
11
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
10
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
11
--- a/target/mips/tcg/nanomips_translate.c.inc
13
--- a/include/fpu/softfloat.h
12
+++ b/target/mips/tcg/nanomips_translate.c.inc
14
+++ b/include/fpu/softfloat.h
13
@@ -XXX,XX +XXX,XX @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx)
15
@@ -XXX,XX +XXX,XX @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
14
TCGv va = tcg_temp_new();
16
| Using these differs from negating an input or output before calling
15
TCGv t1 = tcg_temp_new();
17
| the muladd function in that this means that a NaN doesn't have its
16
MemOp memop = (extract32(ctx->opcode, 8, 3)) ==
18
| sign bit inverted before it is propagated.
17
- NM_P_LS_UAWM ? MO_UNALN : 0;
19
-| We also support halving the result before rounding, as a special
18
+ NM_P_LS_UAWM ? MO_UNALN : MO_ALIGN;
20
-| case to support the ARM fused-sqrt-step instruction FRSQRTS.
19
21
*----------------------------------------------------------------------------*/
20
count = (count == 0) ? 8 : count;
22
enum {
21
while (counter != count) {
23
float_muladd_negate_c = 1,
24
float_muladd_negate_product = 2,
25
float_muladd_negate_result = 4,
26
- float_muladd_halve_result = 8,
27
};
28
29
/*----------------------------------------------------------------------------
30
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/fpu/softfloat.c
33
+++ b/fpu/softfloat.c
34
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
35
if (unlikely(!can_use_fpu(s))) {
36
goto soft;
37
}
38
- if (unlikely(flags & float_muladd_halve_result)) {
39
- goto soft;
40
- }
41
42
float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
43
if (unlikely(!f32_is_zon3(ua, ub, uc))) {
44
@@ -XXX,XX +XXX,XX @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
45
if (unlikely(!can_use_fpu(s))) {
46
goto soft;
47
}
48
- if (unlikely(flags & float_muladd_halve_result)) {
49
- goto soft;
50
- }
51
52
float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
53
if (unlikely(!f64_is_zon3(ua, ub, uc))) {
54
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
55
index XXXXXXX..XXXXXXX 100644
56
--- a/fpu/softfloat-parts.c.inc
57
+++ b/fpu/softfloat-parts.c.inc
58
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
59
a->exp = p_widen.exp;
60
61
return_normal:
62
- /* TODO: Replace all use of float_muladd_halve_result with scale. */
63
- if (flags & float_muladd_halve_result) {
64
- a->exp -= 1;
65
- }
66
a->exp += scale;
67
finish_sign:
68
if (flags & float_muladd_negate_result) {
22
--
69
--
23
2.34.1
70
2.43.0
24
71
25
72
diff view generated by jsdifflib
1
These are atomic operations, so mark as requiring alignment.
1
This rounding mode is used by Hexagon.
2
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
4
---
5
target/mips/tcg/nanomips_translate.c.inc | 5 +++--
5
include/fpu/softfloat-types.h | 2 ++
6
1 file changed, 3 insertions(+), 2 deletions(-)
6
fpu/softfloat-parts.c.inc | 3 +++
7
2 files changed, 5 insertions(+)
7
8
8
diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc
9
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
9
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
10
--- a/target/mips/tcg/nanomips_translate.c.inc
11
--- a/include/fpu/softfloat-types.h
11
+++ b/target/mips/tcg/nanomips_translate.c.inc
12
+++ b/include/fpu/softfloat-types.h
12
@@ -XXX,XX +XXX,XX @@ static void gen_llwp(DisasContext *ctx, uint32_t base, int16_t offset,
13
@@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) {
13
TCGv tmp2 = tcg_temp_new();
14
float_round_to_odd = 5,
14
15
/* Not an IEEE rounding mode: round to closest odd, overflow to inf */
15
gen_base_offset_addr(ctx, taddr, base, offset);
16
float_round_to_odd_inf = 6,
16
- tcg_gen_qemu_ld_i64(tval, taddr, ctx->mem_idx, MO_TEUQ);
17
+ /* Not an IEEE rounding mode: round to nearest even, overflow to max */
17
+ tcg_gen_qemu_ld_i64(tval, taddr, ctx->mem_idx, MO_TEUQ | MO_ALIGN);
18
+ float_round_nearest_even_max = 7,
18
if (cpu_is_bigendian(ctx)) {
19
} FloatRoundMode;
19
tcg_gen_extr_i64_tl(tmp2, tmp1, tval);
20
20
} else {
21
/*
21
@@ -XXX,XX +XXX,XX @@ static void gen_scwp(DisasContext *ctx, uint32_t base, int16_t offset,
22
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
22
23
index XXXXXXX..XXXXXXX 100644
23
tcg_gen_ld_i64(llval, cpu_env, offsetof(CPUMIPSState, llval_wp));
24
--- a/fpu/softfloat-parts.c.inc
24
tcg_gen_atomic_cmpxchg_i64(val, taddr, llval, tval,
25
+++ b/fpu/softfloat-parts.c.inc
25
- eva ? MIPS_HFLAG_UM : ctx->mem_idx, MO_64);
26
@@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
26
+ eva ? MIPS_HFLAG_UM : ctx->mem_idx,
27
int exp, flags = 0;
27
+ MO_64 | MO_ALIGN);
28
28
if (reg1 != 0) {
29
switch (s->float_rounding_mode) {
29
tcg_gen_movi_tl(cpu_gpr[reg1], 1);
30
+ case float_round_nearest_even_max:
30
}
31
+ overflow_norm = true;
32
+ /* fall through */
33
case float_round_nearest_even:
34
if (N > 64 && frac_lsb == 0) {
35
inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
31
--
36
--
32
2.34.1
37
2.43.0
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
Certain Hexagon instructions suppress changes to the result
2
when the product of fma() is a true zero.
2
3
3
By using target_words_bigendian() instead of an ifdef,
4
we can build this code once.
5
6
Signed-off-by: Thomas Huth <thuth@redhat.com>
7
Message-Id: <20230508133745.109463-3-thuth@redhat.com>
8
[rth: Type change done in a separate patch]
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
5
---
11
disas/disas.c | 10 +++++-----
6
include/fpu/softfloat.h | 5 +++++
12
disas/meson.build | 3 ++-
7
fpu/softfloat.c | 3 +++
13
2 files changed, 7 insertions(+), 6 deletions(-)
8
fpu/softfloat-parts.c.inc | 4 +++-
9
3 files changed, 11 insertions(+), 1 deletion(-)
14
10
15
diff --git a/disas/disas.c b/disas/disas.c
11
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
16
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
17
--- a/disas/disas.c
13
--- a/include/fpu/softfloat.h
18
+++ b/disas/disas.c
14
+++ b/include/fpu/softfloat.h
19
@@ -XXX,XX +XXX,XX @@ void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu)
15
@@ -XXX,XX +XXX,XX @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
20
s->cpu = cpu;
16
| Using these differs from negating an input or output before calling
21
s->info.read_memory_func = target_read_memory;
17
| the muladd function in that this means that a NaN doesn't have its
22
s->info.print_address_func = print_address;
18
| sign bit inverted before it is propagated.
23
-#if TARGET_BIG_ENDIAN
19
+|
24
- s->info.endian = BFD_ENDIAN_BIG;
20
+| With float_muladd_suppress_add_product_zero, if A or B is zero
25
-#else
21
+| such that the product is a true zero, then return C without addition.
26
- s->info.endian = BFD_ENDIAN_LITTLE;
22
+| This preserves the sign of C when C is +/- 0. Used for Hexagon.
27
-#endif
23
*----------------------------------------------------------------------------*/
28
+ if (target_words_bigendian()) {
24
enum {
29
+ s->info.endian = BFD_ENDIAN_BIG;
25
float_muladd_negate_c = 1,
30
+ } else {
26
float_muladd_negate_product = 2,
31
+ s->info.endian = BFD_ENDIAN_LITTLE;
27
float_muladd_negate_result = 4,
28
+ float_muladd_suppress_add_product_zero = 8,
29
};
30
31
/*----------------------------------------------------------------------------
32
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/fpu/softfloat.c
35
+++ b/fpu/softfloat.c
36
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
37
if (unlikely(!can_use_fpu(s))) {
38
goto soft;
39
}
40
+ if (unlikely(flags & float_muladd_suppress_add_product_zero)) {
41
+ goto soft;
32
+ }
42
+ }
33
43
34
CPUClass *cc = CPU_GET_CLASS(cpu);
44
float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
35
if (cc->disas_set_info) {
45
if (unlikely(!f32_is_zon3(ua, ub, uc))) {
36
diff --git a/disas/meson.build b/disas/meson.build
46
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
37
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
38
--- a/disas/meson.build
48
--- a/fpu/softfloat-parts.c.inc
39
+++ b/disas/meson.build
49
+++ b/fpu/softfloat-parts.c.inc
40
@@ -XXX,XX +XXX,XX @@ common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c'))
50
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
41
common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c'))
51
goto return_normal;
42
common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c'))
52
}
43
common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone])
53
if (c->cls == float_class_zero) {
44
+common_ss.add(files('disas.c'))
54
- if (a->sign != c->sign) {
45
55
+ if (flags & float_muladd_suppress_add_product_zero) {
46
softmmu_ss.add(files('disas-mon.c'))
56
+ a->sign = c->sign;
47
-specific_ss.add(files('disas.c'), capstone)
57
+ } else if (a->sign != c->sign) {
48
+specific_ss.add(capstone)
58
goto return_sub_zero;
59
}
60
goto return_zero;
49
--
61
--
50
2.34.1
62
2.43.0
diff view generated by jsdifflib
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
1
There are no special cases for this instruction.
2
tcg_prepare_user_ldst, and some code that lived in both tcg_out_qemu_ld
2
Remove internal_mpyf as unused.
3
and tcg_out_qemu_st into one function that returns HostAddress and
4
TCGLabelQemuLdst structures.
5
3
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
tcg/s390x/tcg-target.c.inc | 263 ++++++++++++++++---------------------
7
target/hexagon/fma_emu.h | 1 -
10
1 file changed, 113 insertions(+), 150 deletions(-)
8
target/hexagon/fma_emu.c | 8 --------
9
target/hexagon/op_helper.c | 2 +-
10
3 files changed, 1 insertion(+), 10 deletions(-)
11
11
12
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
12
diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/s390x/tcg-target.c.inc
14
--- a/target/hexagon/fma_emu.h
15
+++ b/tcg/s390x/tcg-target.c.inc
15
+++ b/target/hexagon/fma_emu.h
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
16
@@ -XXX,XX +XXX,XX @@ int32_t float32_getexp(float32 f32);
17
float32 infinite_float32(uint8_t sign);
18
float32 internal_fmafx(float32 a, float32 b, float32 c,
19
int scale, float_status *fp_status);
20
-float32 internal_mpyf(float32 a, float32 b, float_status *fp_status);
21
float64 internal_mpyhh(float64 a, float64 b,
22
unsigned long long int accumulated,
23
float_status *fp_status);
24
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/hexagon/fma_emu.c
27
+++ b/target/hexagon/fma_emu.c
28
@@ -XXX,XX +XXX,XX @@ float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
29
return accum_round_float32(result, fp_status);
17
}
30
}
18
31
19
#if defined(CONFIG_SOFTMMU)
32
-float32 internal_mpyf(float32 a, float32 b, float_status *fp_status)
20
-/* We're expecting to use a 20-bit negative offset on the tlb memory ops. */
21
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
22
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
23
-
24
-/* Load and compare a TLB entry, leaving the flags set. Loads the TLB
25
- addend into R2. Returns a register with the santitized guest address. */
26
-static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
27
- int mem_index, bool is_ld)
28
-{
33
-{
29
- unsigned s_bits = opc & MO_SIZE;
34
- if (float32_is_zero(a) || float32_is_zero(b)) {
30
- unsigned a_bits = get_alignment_bits(opc);
35
- return float32_mul(a, b, fp_status);
31
- unsigned s_mask = (1 << s_bits) - 1;
32
- unsigned a_mask = (1 << a_bits) - 1;
33
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
34
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
35
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
36
- int ofs, a_off;
37
- uint64_t tlb_mask;
38
-
39
- tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
40
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
41
- tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
42
- tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
43
-
44
- /* For aligned accesses, we check the first byte and include the alignment
45
- bits within the address. For unaligned access, we check that we don't
46
- cross pages using the address of the last byte of the access. */
47
- a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
48
- tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
49
- if (a_off == 0) {
50
- tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
51
- } else {
52
- tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
53
- tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
54
- }
36
- }
55
-
37
- return internal_fmafx(a, b, float32_zero, 0, fp_status);
56
- if (is_ld) {
57
- ofs = offsetof(CPUTLBEntry, addr_read);
58
- } else {
59
- ofs = offsetof(CPUTLBEntry, addr_write);
60
- }
61
- if (TARGET_LONG_BITS == 32) {
62
- tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
63
- } else {
64
- tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
65
- }
66
-
67
- tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
68
- offsetof(CPUTLBEntry, addend));
69
-
70
- if (TARGET_LONG_BITS == 32) {
71
- tcg_out_ext32u(s, TCG_REG_R3, addr_reg);
72
- return TCG_REG_R3;
73
- }
74
- return addr_reg;
75
-}
38
-}
76
-
39
-
77
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
40
float64 internal_mpyhh(float64 a, float64 b,
78
- TCGType type, TCGReg data, TCGReg addr,
41
unsigned long long int accumulated,
79
- tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
42
float_status *fp_status)
80
-{
43
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
81
- TCGLabelQemuLdst *label = new_ldst_label(s);
44
index XXXXXXX..XXXXXXX 100644
82
-
45
--- a/target/hexagon/op_helper.c
83
- label->is_ld = is_ld;
46
+++ b/target/hexagon/op_helper.c
84
- label->oi = oi;
47
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
85
- label->type = type;
86
- label->datalo_reg = data;
87
- label->addrlo_reg = addr;
88
- label->raddr = tcg_splitwx_to_rx(raddr);
89
- label->label_ptr[0] = label_ptr;
90
-}
91
-
92
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
93
{
48
{
94
TCGReg addr_reg = lb->addrlo_reg;
49
float32 RdV;
95
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
50
arch_fpop_start(env);
96
return true;
51
- RdV = internal_mpyf(RsV, RtV, &env->fp_status);
52
+ RdV = float32_mul(RsV, RtV, &env->fp_status);
53
arch_fpop_end(env);
54
return RdV;
97
}
55
}
98
#else
99
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld,
100
- TCGReg addrlo, unsigned a_bits)
101
-{
102
- unsigned a_mask = (1 << a_bits) - 1;
103
- TCGLabelQemuLdst *l = new_ldst_label(s);
104
-
105
- l->is_ld = is_ld;
106
- l->addrlo_reg = addrlo;
107
-
108
- /* We are expecting a_bits to max out at 7, much lower than TMLL. */
109
- tcg_debug_assert(a_bits < 16);
110
- tcg_out_insn(s, RI, TMLL, addrlo, a_mask);
111
-
112
- tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
113
- l->label_ptr[0] = s->code_ptr;
114
- s->code_ptr += 1;
115
-
116
- l->raddr = tcg_splitwx_to_rx(s->code_ptr);
117
-}
118
-
119
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
120
{
121
if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
122
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
123
{
124
return tcg_out_fail_alignment(s, l);
125
}
126
+#endif /* CONFIG_SOFTMMU */
127
128
-static HostAddress tcg_prepare_user_ldst(TCGContext *s, TCGReg addr_reg)
129
+/*
130
+ * For softmmu, perform the TLB load and compare.
131
+ * For useronly, perform any required alignment tests.
132
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
133
+ * is required and fill in @h with the host address for the fast path.
134
+ */
135
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
136
+ TCGReg addr_reg, MemOpIdx oi,
137
+ bool is_ld)
138
{
139
- TCGReg index;
140
- int disp;
141
+ TCGLabelQemuLdst *ldst = NULL;
142
+ MemOp opc = get_memop(oi);
143
+ unsigned a_bits = get_alignment_bits(opc);
144
+ unsigned a_mask = (1u << a_bits) - 1;
145
146
+#ifdef CONFIG_SOFTMMU
147
+ unsigned s_bits = opc & MO_SIZE;
148
+ unsigned s_mask = (1 << s_bits) - 1;
149
+ int mem_index = get_mmuidx(oi);
150
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
151
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
152
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
153
+ int ofs, a_off;
154
+ uint64_t tlb_mask;
155
+
156
+ ldst = new_ldst_label(s);
157
+ ldst->is_ld = is_ld;
158
+ ldst->oi = oi;
159
+ ldst->addrlo_reg = addr_reg;
160
+
161
+ tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
162
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
163
+
164
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
165
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
166
+ tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
167
+ tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
168
+
169
+ /*
170
+ * For aligned accesses, we check the first byte and include the alignment
171
+ * bits within the address. For unaligned access, we check that we don't
172
+ * cross pages using the address of the last byte of the access.
173
+ */
174
+ a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
175
+ tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
176
+ if (a_off == 0) {
177
+ tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
178
+ } else {
179
+ tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
180
+ tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
181
+ }
182
+
183
+ if (is_ld) {
184
+ ofs = offsetof(CPUTLBEntry, addr_read);
185
+ } else {
186
+ ofs = offsetof(CPUTLBEntry, addr_write);
187
+ }
188
+ if (TARGET_LONG_BITS == 32) {
189
+ tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
190
+ } else {
191
+ tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
192
+ }
193
+
194
+ tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
195
+ ldst->label_ptr[0] = s->code_ptr++;
196
+
197
+ h->index = TCG_REG_R2;
198
+ tcg_out_insn(s, RXY, LG, h->index, TCG_REG_R2, TCG_REG_NONE,
199
+ offsetof(CPUTLBEntry, addend));
200
+
201
+ h->base = addr_reg;
202
+ if (TARGET_LONG_BITS == 32) {
203
+ tcg_out_ext32u(s, TCG_REG_R3, addr_reg);
204
+ h->base = TCG_REG_R3;
205
+ }
206
+ h->disp = 0;
207
+#else
208
+ if (a_mask) {
209
+ ldst = new_ldst_label(s);
210
+ ldst->is_ld = is_ld;
211
+ ldst->oi = oi;
212
+ ldst->addrlo_reg = addr_reg;
213
+
214
+ /* We are expecting a_bits to max out at 7, much lower than TMLL. */
215
+ tcg_debug_assert(a_bits < 16);
216
+ tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
217
+
218
+ tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
219
+ ldst->label_ptr[0] = s->code_ptr++;
220
+ }
221
+
222
+ h->base = addr_reg;
223
if (TARGET_LONG_BITS == 32) {
224
tcg_out_ext32u(s, TCG_TMP0, addr_reg);
225
- addr_reg = TCG_TMP0;
226
+ h->base = TCG_TMP0;
227
}
228
if (guest_base < 0x80000) {
229
- index = TCG_REG_NONE;
230
- disp = guest_base;
231
+ h->index = TCG_REG_NONE;
232
+ h->disp = guest_base;
233
} else {
234
- index = TCG_GUEST_BASE_REG;
235
- disp = 0;
236
+ h->index = TCG_GUEST_BASE_REG;
237
+ h->disp = 0;
238
}
239
- return (HostAddress){ .base = addr_reg, .index = index, .disp = disp };
240
+#endif
241
+
242
+ return ldst;
243
}
244
-#endif /* CONFIG_SOFTMMU */
245
246
static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
247
MemOpIdx oi, TCGType data_type)
248
{
249
- MemOp opc = get_memop(oi);
250
+ TCGLabelQemuLdst *ldst;
251
HostAddress h;
252
253
-#ifdef CONFIG_SOFTMMU
254
- unsigned mem_index = get_mmuidx(oi);
255
- tcg_insn_unit *label_ptr;
256
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
257
+ tcg_out_qemu_ld_direct(s, get_memop(oi), data_reg, h);
258
259
- h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
260
- h.index = TCG_REG_R2;
261
- h.disp = 0;
262
-
263
- tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
264
- label_ptr = s->code_ptr;
265
- s->code_ptr += 1;
266
-
267
- tcg_out_qemu_ld_direct(s, opc, data_reg, h);
268
-
269
- add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
270
- s->code_ptr, label_ptr);
271
-#else
272
- unsigned a_bits = get_alignment_bits(opc);
273
-
274
- if (a_bits) {
275
- tcg_out_test_alignment(s, true, addr_reg, a_bits);
276
+ if (ldst) {
277
+ ldst->type = data_type;
278
+ ldst->datalo_reg = data_reg;
279
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
280
}
281
- h = tcg_prepare_user_ldst(s, addr_reg);
282
- tcg_out_qemu_ld_direct(s, opc, data_reg, h);
283
-#endif
284
}
285
286
static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
287
MemOpIdx oi, TCGType data_type)
288
{
289
- MemOp opc = get_memop(oi);
290
+ TCGLabelQemuLdst *ldst;
291
HostAddress h;
292
293
-#ifdef CONFIG_SOFTMMU
294
- unsigned mem_index = get_mmuidx(oi);
295
- tcg_insn_unit *label_ptr;
296
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
297
+ tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
298
299
- h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
300
- h.index = TCG_REG_R2;
301
- h.disp = 0;
302
-
303
- tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
304
- label_ptr = s->code_ptr;
305
- s->code_ptr += 1;
306
-
307
- tcg_out_qemu_st_direct(s, opc, data_reg, h);
308
-
309
- add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
310
- s->code_ptr, label_ptr);
311
-#else
312
- unsigned a_bits = get_alignment_bits(opc);
313
-
314
- if (a_bits) {
315
- tcg_out_test_alignment(s, false, addr_reg, a_bits);
316
+ if (ldst) {
317
+ ldst->type = data_type;
318
+ ldst->datalo_reg = data_reg;
319
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
320
}
321
- h = tcg_prepare_user_ldst(s, addr_reg);
322
- tcg_out_qemu_st_direct(s, opc, data_reg, h);
323
-#endif
324
}
325
326
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
327
--
56
--
328
2.34.1
57
2.43.0
329
330
diff view generated by jsdifflib
1
Never used since its introduction.
1
There are no special cases for this instruction.
2
2
3
Fixes: 3d582c6179c ("tcg-ppc64: Rearrange integer constant constraints")
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
tcg/ppc/tcg-target-con-str.h | 1 -
6
target/hexagon/op_helper.c | 2 +-
8
tcg/ppc/tcg-target.c.inc | 3 ---
7
1 file changed, 1 insertion(+), 1 deletion(-)
9
2 files changed, 4 deletions(-)
10
8
11
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
9
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target-con-str.h
11
--- a/target/hexagon/op_helper.c
14
+++ b/tcg/ppc/tcg-target-con-str.h
12
+++ b/target/hexagon/op_helper.c
15
@@ -XXX,XX +XXX,XX @@ REGS('v', ALL_VECTOR_REGS)
13
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
16
* CONST(letter, TCG_CT_CONST_* bit set)
14
float32 RsV, float32 RtV)
17
*/
15
{
18
CONST('I', TCG_CT_CONST_S16)
16
arch_fpop_start(env);
19
-CONST('J', TCG_CT_CONST_U16)
17
- RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
20
CONST('M', TCG_CT_CONST_MONE)
18
+ RxV = float32_muladd(RsV, RtV, RxV, 0, &env->fp_status);
21
CONST('T', TCG_CT_CONST_S32)
19
arch_fpop_end(env);
22
CONST('U', TCG_CT_CONST_U32)
20
return RxV;
23
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
21
}
24
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/ppc/tcg-target.c.inc
26
+++ b/tcg/ppc/tcg-target.c.inc
27
@@ -XXX,XX +XXX,XX @@
28
#define SZR (TCG_TARGET_REG_BITS / 8)
29
30
#define TCG_CT_CONST_S16 0x100
31
-#define TCG_CT_CONST_U16 0x200
32
#define TCG_CT_CONST_S32 0x400
33
#define TCG_CT_CONST_U32 0x800
34
#define TCG_CT_CONST_ZERO 0x1000
35
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
36
37
if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
38
return 1;
39
- } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
40
- return 1;
41
} else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
42
return 1;
43
} else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
44
--
22
--
45
2.34.1
23
2.43.0
46
47
diff view generated by jsdifflib
1
The softmmu tlb uses TCG_REG_{TMP1,TMP2,R0}, not any of the normally
1
There are no special cases for this instruction. Since hexagon
2
available registers. Now that we handle overlap betwen inputs and
2
always uses default-nan mode, explicitly negating the first
3
helper arguments, we can allow any allocatable reg.
3
input is unnecessary. Use float_muladd_negate_product instead.
4
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
6
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/ppc/tcg-target-con-set.h | 11 ++++-------
8
target/hexagon/op_helper.c | 5 ++---
10
tcg/ppc/tcg-target-con-str.h | 2 --
9
1 file changed, 2 insertions(+), 3 deletions(-)
11
tcg/ppc/tcg-target.c.inc | 32 ++++++++++----------------------
12
3 files changed, 14 insertions(+), 31 deletions(-)
13
10
14
diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h
11
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
15
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/ppc/tcg-target-con-set.h
13
--- a/target/hexagon/op_helper.c
17
+++ b/tcg/ppc/tcg-target-con-set.h
14
+++ b/target/hexagon/op_helper.c
18
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
19
C_O0_I1(r)
16
float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
20
C_O0_I2(r, r)
17
float32 RsV, float32 RtV)
21
C_O0_I2(r, ri)
18
{
22
-C_O0_I2(S, S)
19
- float32 neg_RsV;
23
C_O0_I2(v, r)
20
arch_fpop_start(env);
24
-C_O0_I3(S, S, S)
21
- neg_RsV = float32_set_sign(RsV, float32_is_neg(RsV) ? 0 : 1);
25
+C_O0_I3(r, r, r)
22
- RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
26
C_O0_I4(r, r, ri, ri)
23
+ RxV = float32_muladd(RsV, RtV, RxV, float_muladd_negate_product,
27
-C_O0_I4(S, S, S, S)
24
+ &env->fp_status);
28
-C_O1_I1(r, L)
25
arch_fpop_end(env);
29
+C_O0_I4(r, r, r, r)
26
return RxV;
30
C_O1_I1(r, r)
27
}
31
C_O1_I1(v, r)
32
C_O1_I1(v, v)
33
C_O1_I1(v, vr)
34
C_O1_I2(r, 0, rZ)
35
-C_O1_I2(r, L, L)
36
C_O1_I2(r, rI, ri)
37
C_O1_I2(r, rI, rT)
38
C_O1_I2(r, r, r)
39
@@ -XXX,XX +XXX,XX @@ C_O1_I2(v, v, v)
40
C_O1_I3(v, v, v, v)
41
C_O1_I4(r, r, ri, rZ, rZ)
42
C_O1_I4(r, r, r, ri, ri)
43
-C_O2_I1(L, L, L)
44
-C_O2_I2(L, L, L, L)
45
+C_O2_I1(r, r, r)
46
+C_O2_I2(r, r, r, r)
47
C_O2_I4(r, r, rI, rZM, r, r)
48
C_O2_I4(r, r, r, r, rI, rZM)
49
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
50
index XXXXXXX..XXXXXXX 100644
51
--- a/tcg/ppc/tcg-target-con-str.h
52
+++ b/tcg/ppc/tcg-target-con-str.h
53
@@ -XXX,XX +XXX,XX @@ REGS('A', 1u << TCG_REG_R3)
54
REGS('B', 1u << TCG_REG_R4)
55
REGS('C', 1u << TCG_REG_R5)
56
REGS('D', 1u << TCG_REG_R6)
57
-REGS('L', ALL_QLOAD_REGS)
58
-REGS('S', ALL_QSTORE_REGS)
59
60
/*
61
* Define constraint letters for constants:
62
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
63
index XXXXXXX..XXXXXXX 100644
64
--- a/tcg/ppc/tcg-target.c.inc
65
+++ b/tcg/ppc/tcg-target.c.inc
66
@@ -XXX,XX +XXX,XX @@
67
#define ALL_GENERAL_REGS 0xffffffffu
68
#define ALL_VECTOR_REGS 0xffffffff00000000ull
69
70
-#ifdef CONFIG_SOFTMMU
71
-#define ALL_QLOAD_REGS \
72
- (ALL_GENERAL_REGS & \
73
- ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5)))
74
-#define ALL_QSTORE_REGS \
75
- (ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \
76
- (1 << TCG_REG_R5) | (1 << TCG_REG_R6)))
77
-#else
78
-#define ALL_QLOAD_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3))
79
-#define ALL_QSTORE_REGS ALL_QLOAD_REGS
80
-#endif
81
-
82
TCGPowerISA have_isa;
83
static bool have_isel;
84
bool have_altivec;
85
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
86
87
case INDEX_op_qemu_ld_i32:
88
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
89
- ? C_O1_I1(r, L)
90
- : C_O1_I2(r, L, L));
91
+ ? C_O1_I1(r, r)
92
+ : C_O1_I2(r, r, r));
93
94
case INDEX_op_qemu_st_i32:
95
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
96
- ? C_O0_I2(S, S)
97
- : C_O0_I3(S, S, S));
98
+ ? C_O0_I2(r, r)
99
+ : C_O0_I3(r, r, r));
100
101
case INDEX_op_qemu_ld_i64:
102
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
103
- : TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L)
104
- : C_O2_I2(L, L, L, L));
105
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
106
+ : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
107
+ : C_O2_I2(r, r, r, r));
108
109
case INDEX_op_qemu_st_i64:
110
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S)
111
- : TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S)
112
- : C_O0_I4(S, S, S, S));
113
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
114
+ : TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r)
115
+ : C_O0_I4(r, r, r, r));
116
117
case INDEX_op_add_vec:
118
case INDEX_op_sub_vec:
119
--
28
--
120
2.34.1
29
2.43.0
121
122
diff view generated by jsdifflib
1
The softmmu tlb uses TCG_REG_TMP[0-2], not any of the normally available
1
This instruction has a special case that 0 * x + c returns c
2
registers. Now that we handle overlap betwen inputs and helper arguments,
2
without the normal sign folding that comes with 0 + -0.
3
we can allow any allocatable reg.
3
Use the new float_muladd_suppress_add_product_zero to
4
describe this.
4
5
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
---
8
tcg/loongarch64/tcg-target-con-set.h | 2 --
9
target/hexagon/op_helper.c | 11 +++--------
9
tcg/loongarch64/tcg-target-con-str.h | 1 -
10
1 file changed, 3 insertions(+), 8 deletions(-)
10
tcg/loongarch64/tcg-target.c.inc | 23 ++++-------------------
11
3 files changed, 4 insertions(+), 22 deletions(-)
12
11
13
diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h
12
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
14
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/loongarch64/tcg-target-con-set.h
14
--- a/target/hexagon/op_helper.c
16
+++ b/tcg/loongarch64/tcg-target-con-set.h
15
+++ b/target/hexagon/op_helper.c
17
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
18
C_O0_I1(r)
17
float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
19
C_O0_I2(rZ, r)
18
float32 RsV, float32 RtV, float32 PuV)
20
C_O0_I2(rZ, rZ)
21
-C_O0_I2(LZ, L)
22
C_O1_I1(r, r)
23
-C_O1_I1(r, L)
24
C_O1_I2(r, r, rC)
25
C_O1_I2(r, r, ri)
26
C_O1_I2(r, r, rI)
27
diff --git a/tcg/loongarch64/tcg-target-con-str.h b/tcg/loongarch64/tcg-target-con-str.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/tcg/loongarch64/tcg-target-con-str.h
30
+++ b/tcg/loongarch64/tcg-target-con-str.h
31
@@ -XXX,XX +XXX,XX @@
32
* REGS(letter, register_mask)
33
*/
34
REGS('r', ALL_GENERAL_REGS)
35
-REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
36
37
/*
38
* Define constraint letters for constants:
39
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
40
index XXXXXXX..XXXXXXX 100644
41
--- a/tcg/loongarch64/tcg-target.c.inc
42
+++ b/tcg/loongarch64/tcg-target.c.inc
43
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
44
#define TCG_CT_CONST_C12 0x1000
45
#define TCG_CT_CONST_WSZ 0x2000
46
47
-#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
48
-/*
49
- * For softmmu, we need to avoid conflicts with the first 5
50
- * argument registers to call the helper. Some of these are
51
- * also used for the tlb lookup.
52
- */
53
-#ifdef CONFIG_SOFTMMU
54
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5)
55
-#else
56
-#define SOFTMMU_RESERVE_REGS 0
57
-#endif
58
-
59
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
60
61
static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
62
{
19
{
63
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
20
- size4s_t tmp;
64
case INDEX_op_st32_i64:
21
arch_fpop_start(env);
65
case INDEX_op_st_i32:
22
- RxV = check_nan(RxV, RxV, &env->fp_status);
66
case INDEX_op_st_i64:
23
- RxV = check_nan(RxV, RsV, &env->fp_status);
67
+ case INDEX_op_qemu_st_i32:
24
- RxV = check_nan(RxV, RtV, &env->fp_status);
68
+ case INDEX_op_qemu_st_i64:
25
- tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
69
return C_O0_I2(rZ, r);
26
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
70
27
- RxV = tmp;
71
case INDEX_op_brcond_i32:
28
- }
72
case INDEX_op_brcond_i64:
29
+ RxV = float32_muladd_scalbn(RsV, RtV, RxV, fSXTN(8, 64, PuV),
73
return C_O0_I2(rZ, rZ);
30
+ float_muladd_suppress_add_product_zero,
74
31
+ &env->fp_status);
75
- case INDEX_op_qemu_st_i32:
32
arch_fpop_end(env);
76
- case INDEX_op_qemu_st_i64:
33
return RxV;
77
- return C_O0_I2(LZ, L);
34
}
78
-
79
case INDEX_op_ext8s_i32:
80
case INDEX_op_ext8s_i64:
81
case INDEX_op_ext8u_i32:
82
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
83
case INDEX_op_ld32u_i64:
84
case INDEX_op_ld_i32:
85
case INDEX_op_ld_i64:
86
- return C_O1_I1(r, r);
87
-
88
case INDEX_op_qemu_ld_i32:
89
case INDEX_op_qemu_ld_i64:
90
- return C_O1_I1(r, L);
91
+ return C_O1_I1(r, r);
92
93
case INDEX_op_andc_i32:
94
case INDEX_op_andc_i64:
95
--
35
--
96
2.34.1
36
2.43.0
97
98
diff view generated by jsdifflib
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
1
There are multiple special cases for this instruction.
2
and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st
2
(1) The saturate to normal maximum instead of overflow to infinity is
3
into one function that returns HostAddress and TCGLabelQemuLdst structures.
3
handled by the new float_round_nearest_even_max rounding mode.
4
(2) The 0 * n + c special case is handled by the new
5
float_muladd_suppress_add_product_zero flag.
6
(3) The Inf - Inf -> 0 special case can be detected after the fact
7
by examining float_flag_invalid_isi.
4
8
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
11
---
8
tcg/mips/tcg-target.c.inc | 404 ++++++++++++++++----------------------
12
target/hexagon/op_helper.c | 105 +++++++++----------------------------
9
1 file changed, 172 insertions(+), 232 deletions(-)
13
1 file changed, 26 insertions(+), 79 deletions(-)
10
14
11
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
15
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
12
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/mips/tcg-target.c.inc
17
--- a/target/hexagon/op_helper.c
14
+++ b/tcg/mips/tcg-target.c.inc
18
+++ b/target/hexagon/op_helper.c
15
@@ -XXX,XX +XXX,XX @@ static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
19
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
16
return i;
20
return RxV;
17
}
21
}
18
22
19
-/* We expect to use a 16-bit negative offset from ENV. */
23
-static bool is_zero_prod(float32 a, float32 b)
20
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
21
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
22
-
23
-/*
24
- * Perform the tlb comparison operation.
25
- * The complete host address is placed in BASE.
26
- * Clobbers TMP0, TMP1, TMP2, TMP3.
27
- */
28
-static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
29
- TCGReg addrh, MemOpIdx oi,
30
- tcg_insn_unit *label_ptr[2], bool is_load)
31
-{
24
-{
32
- MemOp opc = get_memop(oi);
25
- return ((float32_is_zero(a) && is_finite(b)) ||
33
- unsigned a_bits = get_alignment_bits(opc);
26
- (float32_is_zero(b) && is_finite(a)));
34
- unsigned s_bits = opc & MO_SIZE;
35
- unsigned a_mask = (1 << a_bits) - 1;
36
- unsigned s_mask = (1 << s_bits) - 1;
37
- int mem_index = get_mmuidx(oi);
38
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
39
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
40
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
41
- int add_off = offsetof(CPUTLBEntry, addend);
42
- int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
43
- : offsetof(CPUTLBEntry, addr_write));
44
- target_ulong tlb_mask;
45
-
46
- /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
47
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
48
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
49
-
50
- /* Extract the TLB index from the address into TMP3. */
51
- tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrl,
52
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
53
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
54
-
55
- /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
56
- tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
57
-
58
- /* Load the (low-half) tlb comparator. */
59
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
60
- tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
61
- } else {
62
- tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD
63
- : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
64
- TCG_TMP0, TCG_TMP3, cmp_off);
65
- }
66
-
67
- /* Zero extend a 32-bit guest address for a 64-bit host. */
68
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
69
- tcg_out_ext32u(s, base, addrl);
70
- addrl = base;
71
- }
72
-
73
- /*
74
- * Mask the page bits, keeping the alignment bits to compare against.
75
- * For unaligned accesses, compare against the end of the access to
76
- * verify that it does not cross a page boundary.
77
- */
78
- tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
79
- tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask);
80
- if (a_mask >= s_mask) {
81
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
82
- } else {
83
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrl, s_mask - a_mask);
84
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
85
- }
86
-
87
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
88
- /* Load the tlb addend for the fast path. */
89
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
90
- }
91
-
92
- label_ptr[0] = s->code_ptr;
93
- tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
94
-
95
- /* Load and test the high half tlb comparator. */
96
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
97
- /* delay slot */
98
- tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
99
-
100
- /* Load the tlb addend for the fast path. */
101
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
102
-
103
- label_ptr[1] = s->code_ptr;
104
- tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0);
105
- }
106
-
107
- /* delay slot */
108
- tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrl);
109
-}
27
-}
110
-
28
-
111
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
29
-static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
112
- TCGType ext,
113
- TCGReg datalo, TCGReg datahi,
114
- TCGReg addrlo, TCGReg addrhi,
115
- void *raddr, tcg_insn_unit *label_ptr[2])
116
-{
30
-{
117
- TCGLabelQemuLdst *label = new_ldst_label(s);
31
- float32 ret = dst;
118
-
32
- if (float32_is_any_nan(x)) {
119
- label->is_ld = is_ld;
33
- if (extract32(x, 22, 1) == 0) {
120
- label->oi = oi;
34
- float_raise(float_flag_invalid, fp_status);
121
- label->type = ext;
35
- }
122
- label->datalo_reg = datalo;
36
- ret = make_float32(0xffffffff); /* nan */
123
- label->datahi_reg = datahi;
124
- label->addrlo_reg = addrlo;
125
- label->addrhi_reg = addrhi;
126
- label->raddr = tcg_splitwx_to_rx(raddr);
127
- label->label_ptr[0] = label_ptr[0];
128
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
129
- label->label_ptr[1] = label_ptr[1];
130
- }
37
- }
38
- return ret;
131
-}
39
-}
132
-
40
-
133
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
41
float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
42
float32 RsV, float32 RtV, float32 PuV)
134
{
43
{
135
const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
44
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
136
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
45
return RxV;
137
}
46
}
138
47
139
#else
48
-static bool is_inf_prod(int32_t a, int32_t b)
140
-
49
+static float32 do_sffma_lib(CPUHexagonState *env, float32 RxV,
141
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
50
+ float32 RsV, float32 RtV, int negate)
142
- TCGReg addrhi, unsigned a_bits)
143
-{
144
- unsigned a_mask = (1 << a_bits) - 1;
145
- TCGLabelQemuLdst *l = new_ldst_label(s);
146
-
147
- l->is_ld = is_ld;
148
- l->addrlo_reg = addrlo;
149
- l->addrhi_reg = addrhi;
150
-
151
- /* We are expecting a_bits to max out at 7, much lower than ANDI. */
152
- tcg_debug_assert(a_bits < 16);
153
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask);
154
-
155
- l->label_ptr[0] = s->code_ptr;
156
- if (use_mips32r6_instructions) {
157
- tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0);
158
- } else {
159
- tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO);
160
- tcg_out_nop(s);
161
- }
162
-
163
- l->raddr = tcg_splitwx_to_rx(s->code_ptr);
164
-}
165
-
166
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
167
{
51
{
168
void *target;
52
- return (float32_is_infinity(a) && float32_is_infinity(b)) ||
169
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
53
- (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
170
}
54
- (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
171
#endif /* SOFTMMU */
55
+ int flags;
172
173
+typedef struct {
174
+ TCGReg base;
175
+ MemOp align;
176
+} HostAddress;
177
+
56
+
178
+/*
57
+ arch_fpop_start(env);
179
+ * For softmmu, perform the TLB load and compare.
180
+ * For useronly, perform any required alignment tests.
181
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
182
+ * is required and fill in @h with the host address for the fast path.
183
+ */
184
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
185
+ TCGReg addrlo, TCGReg addrhi,
186
+ MemOpIdx oi, bool is_ld)
187
+{
188
+ TCGLabelQemuLdst *ldst = NULL;
189
+ MemOp opc = get_memop(oi);
190
+ unsigned a_bits = get_alignment_bits(opc);
191
+ unsigned s_bits = opc & MO_SIZE;
192
+ unsigned a_mask = (1 << a_bits) - 1;
193
+ TCGReg base;
194
+
58
+
195
+#ifdef CONFIG_SOFTMMU
59
+ set_float_rounding_mode(float_round_nearest_even_max, &env->fp_status);
196
+ unsigned s_mask = (1 << s_bits) - 1;
60
+ RxV = float32_muladd(RsV, RtV, RxV,
197
+ int mem_index = get_mmuidx(oi);
61
+ negate | float_muladd_suppress_add_product_zero,
198
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
62
+ &env->fp_status);
199
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
200
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
201
+ int add_off = offsetof(CPUTLBEntry, addend);
202
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
203
+ : offsetof(CPUTLBEntry, addr_write);
204
+ target_ulong tlb_mask;
205
+
63
+
206
+ ldst = new_ldst_label(s);
64
+ flags = get_float_exception_flags(&env->fp_status);
207
+ ldst->is_ld = is_ld;
65
+ if (flags) {
208
+ ldst->oi = oi;
66
+ /* Flags are suppressed by this instruction. */
209
+ ldst->addrlo_reg = addrlo;
67
+ set_float_exception_flags(0, &env->fp_status);
210
+ ldst->addrhi_reg = addrhi;
211
+ base = TCG_REG_A0;
212
+
68
+
213
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
69
+ /* Return 0 for Inf - Inf. */
214
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
70
+ if (flags & float_flag_invalid_isi) {
215
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
71
+ RxV = 0;
216
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
217
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
218
+
219
+ /* Extract the TLB index from the address into TMP3. */
220
+ tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrlo,
221
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
222
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
223
+
224
+ /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
225
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
226
+
227
+ /* Load the (low-half) tlb comparator. */
228
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
229
+ tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
230
+ } else {
231
+ tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD
232
+ : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
233
+ TCG_TMP0, TCG_TMP3, cmp_off);
234
+ }
235
+
236
+ /* Zero extend a 32-bit guest address for a 64-bit host. */
237
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
238
+ tcg_out_ext32u(s, base, addrlo);
239
+ addrlo = base;
240
+ }
241
+
242
+ /*
243
+ * Mask the page bits, keeping the alignment bits to compare against.
244
+ * For unaligned accesses, compare against the end of the access to
245
+ * verify that it does not cross a page boundary.
246
+ */
247
+ tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
248
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask);
249
+ if (a_mask >= s_mask) {
250
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
251
+ } else {
252
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrlo, s_mask - a_mask);
253
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
254
+ }
255
+
256
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
257
+ /* Load the tlb addend for the fast path. */
258
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
259
+ }
260
+
261
+ ldst->label_ptr[0] = s->code_ptr;
262
+ tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
263
+
264
+ /* Load and test the high half tlb comparator. */
265
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
266
+ /* delay slot */
267
+ tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
268
+
269
+ /* Load the tlb addend for the fast path. */
270
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
271
+
272
+ ldst->label_ptr[1] = s->code_ptr;
273
+ tcg_out_opc_br(s, OPC_BNE, addrhi, TCG_TMP0);
274
+ }
275
+
276
+ /* delay slot */
277
+ tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrlo);
278
+#else
279
+ if (a_mask && (use_mips32r6_instructions || a_bits != s_bits)) {
280
+ ldst = new_ldst_label(s);
281
+
282
+ ldst->is_ld = is_ld;
283
+ ldst->oi = oi;
284
+ ldst->addrlo_reg = addrlo;
285
+ ldst->addrhi_reg = addrhi;
286
+
287
+ /* We are expecting a_bits to max out at 7, much lower than ANDI. */
288
+ tcg_debug_assert(a_bits < 16);
289
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask);
290
+
291
+ ldst->label_ptr[0] = s->code_ptr;
292
+ if (use_mips32r6_instructions) {
293
+ tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0);
294
+ } else {
295
+ tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO);
296
+ tcg_out_nop(s);
297
+ }
72
+ }
298
+ }
73
+ }
299
+
74
+
300
+ base = addrlo;
75
+ arch_fpop_end(env);
301
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
76
+ return RxV;
302
+ tcg_out_ext32u(s, TCG_REG_A0, base);
77
}
303
+ base = TCG_REG_A0;
78
304
+ }
79
float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
305
+ if (guest_base) {
80
float32 RsV, float32 RtV)
306
+ if (guest_base == (int16_t)guest_base) {
307
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
308
+ } else {
309
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
310
+ TCG_GUEST_BASE_REG);
311
+ }
312
+ base = TCG_REG_A0;
313
+ }
314
+#endif
315
+
316
+ h->base = base;
317
+ h->align = a_bits;
318
+ return ldst;
319
+}
320
+
321
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
322
TCGReg base, MemOp opc, TCGType type)
323
{
81
{
324
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
82
- bool infinp;
325
MemOpIdx oi, TCGType data_type)
83
- bool infminusinf;
84
- float32 tmp;
85
-
86
- arch_fpop_start(env);
87
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
88
- infminusinf = float32_is_infinity(RxV) &&
89
- is_inf_prod(RsV, RtV) &&
90
- (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
91
- infinp = float32_is_infinity(RxV) ||
92
- float32_is_infinity(RtV) ||
93
- float32_is_infinity(RsV);
94
- RxV = check_nan(RxV, RxV, &env->fp_status);
95
- RxV = check_nan(RxV, RsV, &env->fp_status);
96
- RxV = check_nan(RxV, RtV, &env->fp_status);
97
- tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
98
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
99
- RxV = tmp;
100
- }
101
- set_float_exception_flags(0, &env->fp_status);
102
- if (float32_is_infinity(RxV) && !infinp) {
103
- RxV = RxV - 1;
104
- }
105
- if (infminusinf) {
106
- RxV = 0;
107
- }
108
- arch_fpop_end(env);
109
- return RxV;
110
+ return do_sffma_lib(env, RxV, RsV, RtV, 0);
111
}
112
113
float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
114
float32 RsV, float32 RtV)
326
{
115
{
327
MemOp opc = get_memop(oi);
116
- bool infinp;
328
- unsigned a_bits = get_alignment_bits(opc);
117
- bool infminusinf;
329
- unsigned s_bits = opc & MO_SIZE;
118
- float32 tmp;
330
- TCGReg base;
119
-
331
+ TCGLabelQemuLdst *ldst;
120
- arch_fpop_start(env);
332
+ HostAddress h;
121
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
333
122
- infminusinf = float32_is_infinity(RxV) &&
334
- /*
123
- is_inf_prod(RsV, RtV) &&
335
- * R6 removes the left/right instructions but requires the
124
- (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
336
- * system to support misaligned memory accesses.
125
- infinp = float32_is_infinity(RxV) ||
337
- */
126
- float32_is_infinity(RtV) ||
338
-#if defined(CONFIG_SOFTMMU)
127
- float32_is_infinity(RsV);
339
- tcg_insn_unit *label_ptr[2];
128
- RxV = check_nan(RxV, RxV, &env->fp_status);
340
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
129
- RxV = check_nan(RxV, RsV, &env->fp_status);
341
130
- RxV = check_nan(RxV, RtV, &env->fp_status);
342
- base = TCG_REG_A0;
131
- float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
343
- tcg_out_tlb_load(s, base, addrlo, addrhi, oi, label_ptr, 1);
132
- tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
344
- if (use_mips32r6_instructions || a_bits >= s_bits) {
133
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
345
- tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
134
- RxV = tmp;
346
+ if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
347
+ tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, data_type);
348
} else {
349
- tcg_out_qemu_ld_unalign(s, datalo, datahi, base, opc, data_type);
350
+ tcg_out_qemu_ld_unalign(s, datalo, datahi, h.base, opc, data_type);
351
}
352
- add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi,
353
- addrlo, addrhi, s->code_ptr, label_ptr);
354
-#else
355
- base = addrlo;
356
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
357
- tcg_out_ext32u(s, TCG_REG_A0, base);
358
- base = TCG_REG_A0;
359
+
360
+ if (ldst) {
361
+ ldst->type = data_type;
362
+ ldst->datalo_reg = datalo;
363
+ ldst->datahi_reg = datahi;
364
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
365
}
366
- if (guest_base) {
367
- if (guest_base == (int16_t)guest_base) {
368
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
369
- } else {
370
- tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
371
- TCG_GUEST_BASE_REG);
372
- }
373
- base = TCG_REG_A0;
374
- }
135
- }
375
- if (use_mips32r6_instructions) {
136
- set_float_exception_flags(0, &env->fp_status);
376
- if (a_bits) {
137
- if (float32_is_infinity(RxV) && !infinp) {
377
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
138
- RxV = RxV - 1;
378
- }
379
- tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
380
- } else {
381
- if (a_bits && a_bits != s_bits) {
382
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
383
- }
384
- if (a_bits >= s_bits) {
385
- tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
386
- } else {
387
- tcg_out_qemu_ld_unalign(s, datalo, datahi, base, opc, data_type);
388
- }
389
- }
139
- }
390
-#endif
140
- if (infminusinf) {
141
- RxV = 0;
142
- }
143
- arch_fpop_end(env);
144
- return RxV;
145
+ return do_sffma_lib(env, RxV, RsV, RtV, float_muladd_negate_product);
391
}
146
}
392
147
393
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
148
float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
394
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
395
MemOpIdx oi, TCGType data_type)
396
{
397
MemOp opc = get_memop(oi);
398
- unsigned a_bits = get_alignment_bits(opc);
399
- unsigned s_bits = opc & MO_SIZE;
400
- TCGReg base;
401
+ TCGLabelQemuLdst *ldst;
402
+ HostAddress h;
403
404
- /*
405
- * R6 removes the left/right instructions but requires the
406
- * system to support misaligned memory accesses.
407
- */
408
-#if defined(CONFIG_SOFTMMU)
409
- tcg_insn_unit *label_ptr[2];
410
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
411
412
- base = TCG_REG_A0;
413
- tcg_out_tlb_load(s, base, addrlo, addrhi, oi, label_ptr, 0);
414
- if (use_mips32r6_instructions || a_bits >= s_bits) {
415
- tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
416
+ if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
417
+ tcg_out_qemu_st_direct(s, datalo, datahi, h.base, opc);
418
} else {
419
- tcg_out_qemu_st_unalign(s, datalo, datahi, base, opc);
420
+ tcg_out_qemu_st_unalign(s, datalo, datahi, h.base, opc);
421
}
422
- add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi,
423
- addrlo, addrhi, s->code_ptr, label_ptr);
424
-#else
425
- base = addrlo;
426
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
427
- tcg_out_ext32u(s, TCG_REG_A0, base);
428
- base = TCG_REG_A0;
429
+
430
+ if (ldst) {
431
+ ldst->type = data_type;
432
+ ldst->datalo_reg = datalo;
433
+ ldst->datahi_reg = datahi;
434
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
435
}
436
- if (guest_base) {
437
- if (guest_base == (int16_t)guest_base) {
438
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
439
- } else {
440
- tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
441
- TCG_GUEST_BASE_REG);
442
- }
443
- base = TCG_REG_A0;
444
- }
445
- if (use_mips32r6_instructions) {
446
- if (a_bits) {
447
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
448
- }
449
- tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
450
- } else {
451
- if (a_bits && a_bits != s_bits) {
452
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
453
- }
454
- if (a_bits >= s_bits) {
455
- tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
456
- } else {
457
- tcg_out_qemu_st_unalign(s, datalo, datahi, base, opc);
458
- }
459
- }
460
-#endif
461
}
462
463
static void tcg_out_mb(TCGContext *s, TCGArg a0)
464
--
149
--
465
2.34.1
150
2.43.0
466
467
diff view generated by jsdifflib
1
Reviewed-by: Thomas Huth <thuth@redhat.com>
1
The function is now unused.
2
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-Id: <20230503072331.1747057-83-richard.henderson@linaro.org>
4
---
5
---
5
include/disas/disas.h | 6 ------
6
target/hexagon/fma_emu.h | 2 -
6
disas/disas.c | 3 ++-
7
target/hexagon/fma_emu.c | 171 ---------------------------------------
7
2 files changed, 2 insertions(+), 7 deletions(-)
8
2 files changed, 173 deletions(-)
8
9
9
diff --git a/include/disas/disas.h b/include/disas/disas.h
10
diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
10
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
11
--- a/include/disas/disas.h
12
--- a/target/hexagon/fma_emu.h
12
+++ b/include/disas/disas.h
13
+++ b/target/hexagon/fma_emu.h
13
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ static inline uint32_t float32_getexp_raw(float32 f32)
14
#ifndef QEMU_DISAS_H
15
}
15
#define QEMU_DISAS_H
16
int32_t float32_getexp(float32 f32);
16
17
float32 infinite_float32(uint8_t sign);
17
-#include "exec/hwaddr.h"
18
-float32 internal_fmafx(float32 a, float32 b, float32 c,
18
-
19
- int scale, float_status *fp_status);
19
-#ifdef NEED_CPU_H
20
float64 internal_mpyhh(float64 a, float64 b,
20
-#include "cpu.h"
21
unsigned long long int accumulated,
21
-
22
float_status *fp_status);
22
/* Disassemble this for me please... (debugging). */
23
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
23
void disas(FILE *out, const void *code, size_t size);
24
void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size);
25
@@ -XXX,XX +XXX,XX @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size);
26
27
/* Look up symbol for debugging purpose. Returns "" if unknown. */
28
const char *lookup_symbol(uint64_t orig_addr);
29
-#endif
30
31
struct syminfo;
32
struct elf32_sym;
33
diff --git a/disas/disas.c b/disas/disas.c
34
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
35
--- a/disas/disas.c
25
--- a/target/hexagon/fma_emu.c
36
+++ b/disas/disas.c
26
+++ b/target/hexagon/fma_emu.c
37
@@ -XXX,XX +XXX,XX @@
27
@@ -XXX,XX +XXX,XX @@ int32_t float64_getexp(float64 f64)
38
#include "disas/dis-asm.h"
28
return -1;
39
#include "elf.h"
29
}
40
#include "qemu/qemu-print.h"
30
41
-
31
-static uint64_t float32_getmant(float32 f32)
42
#include "disas/disas.h"
32
-{
43
#include "disas/capstone.h"
33
- Float a = { .i = f32 };
44
+#include "hw/core/cpu.h"
34
- if (float32_is_normal(f32)) {
45
+#include "exec/memory.h"
35
- return a.mant | 1ULL << 23;
46
36
- }
47
typedef struct CPUDebug {
37
- if (float32_is_zero(f32)) {
48
struct disassemble_info info;
38
- return 0;
39
- }
40
- if (float32_is_denormal(f32)) {
41
- return a.mant;
42
- }
43
- return ~0ULL;
44
-}
45
-
46
int32_t float32_getexp(float32 f32)
47
{
48
Float a = { .i = f32 };
49
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
50
}
51
52
/* Return a maximum finite value with the requested sign */
53
-static float32 maxfinite_float32(uint8_t sign)
54
-{
55
- if (sign) {
56
- return make_float32(SF_MINUS_MAXF);
57
- } else {
58
- return make_float32(SF_MAXF);
59
- }
60
-}
61
-
62
-/* Return a zero value with requested sign */
63
-static float32 zero_float32(uint8_t sign)
64
-{
65
- if (sign) {
66
- return make_float32(0x80000000);
67
- } else {
68
- return float32_zero;
69
- }
70
-}
71
-
72
#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
73
static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
74
{ \
75
@@ -XXX,XX +XXX,XX @@ static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
76
}
77
78
GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
79
-GEN_XF_ROUND(float32, SF_MANTBITS, SF_INF_EXP, Float)
80
-
81
-static bool is_inf_prod(float64 a, float64 b)
82
-{
83
- return ((float64_is_infinity(a) && float64_is_infinity(b)) ||
84
- (float64_is_infinity(a) && is_finite(b) && (!float64_is_zero(b))) ||
85
- (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a))));
86
-}
87
-
88
-static float64 special_fma(float64 a, float64 b, float64 c,
89
- float_status *fp_status)
90
-{
91
- float64 ret = make_float64(0);
92
-
93
- /*
94
- * If A multiplied by B is an exact infinity and C is also an infinity
95
- * but with the opposite sign, FMA returns NaN and raises invalid.
96
- */
97
- uint8_t a_sign = float64_is_neg(a);
98
- uint8_t b_sign = float64_is_neg(b);
99
- uint8_t c_sign = float64_is_neg(c);
100
- if (is_inf_prod(a, b) && float64_is_infinity(c)) {
101
- if ((a_sign ^ b_sign) != c_sign) {
102
- ret = make_float64(DF_NAN);
103
- float_raise(float_flag_invalid, fp_status);
104
- return ret;
105
- }
106
- }
107
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
108
- (float64_is_zero(a) && float64_is_infinity(b))) {
109
- ret = make_float64(DF_NAN);
110
- float_raise(float_flag_invalid, fp_status);
111
- return ret;
112
- }
113
- /*
114
- * If none of the above checks are true and C is a NaN,
115
- * a NaN shall be returned
116
- * If A or B are NaN, a NAN shall be returned.
117
- */
118
- if (float64_is_any_nan(a) ||
119
- float64_is_any_nan(b) ||
120
- float64_is_any_nan(c)) {
121
- if (float64_is_any_nan(a) && (fGETBIT(51, a) == 0)) {
122
- float_raise(float_flag_invalid, fp_status);
123
- }
124
- if (float64_is_any_nan(b) && (fGETBIT(51, b) == 0)) {
125
- float_raise(float_flag_invalid, fp_status);
126
- }
127
- if (float64_is_any_nan(c) && (fGETBIT(51, c) == 0)) {
128
- float_raise(float_flag_invalid, fp_status);
129
- }
130
- ret = make_float64(DF_NAN);
131
- return ret;
132
- }
133
- /*
134
- * We have checked for adding opposite-signed infinities.
135
- * Other infinities return infinity with the correct sign
136
- */
137
- if (float64_is_infinity(c)) {
138
- ret = infinite_float64(c_sign);
139
- return ret;
140
- }
141
- if (float64_is_infinity(a) || float64_is_infinity(b)) {
142
- ret = infinite_float64(a_sign ^ b_sign);
143
- return ret;
144
- }
145
- g_assert_not_reached();
146
-}
147
-
148
-static float32 special_fmaf(float32 a, float32 b, float32 c,
149
- float_status *fp_status)
150
-{
151
- float64 aa, bb, cc;
152
- aa = float32_to_float64(a, fp_status);
153
- bb = float32_to_float64(b, fp_status);
154
- cc = float32_to_float64(c, fp_status);
155
- return float64_to_float32(special_fma(aa, bb, cc, fp_status), fp_status);
156
-}
157
-
158
-float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
159
- float_status *fp_status)
160
-{
161
- Accum prod;
162
- Accum acc;
163
- Accum result;
164
- accum_init(&prod);
165
- accum_init(&acc);
166
- accum_init(&result);
167
-
168
- uint8_t a_sign = float32_is_neg(a);
169
- uint8_t b_sign = float32_is_neg(b);
170
- uint8_t c_sign = float32_is_neg(c);
171
- if (float32_is_infinity(a) ||
172
- float32_is_infinity(b) ||
173
- float32_is_infinity(c)) {
174
- return special_fmaf(a, b, c, fp_status);
175
- }
176
- if (float32_is_any_nan(a) ||
177
- float32_is_any_nan(b) ||
178
- float32_is_any_nan(c)) {
179
- return special_fmaf(a, b, c, fp_status);
180
- }
181
- if ((scale == 0) && (float32_is_zero(a) || float32_is_zero(b))) {
182
- float32 tmp = float32_mul(a, b, fp_status);
183
- tmp = float32_add(tmp, c, fp_status);
184
- return tmp;
185
- }
186
-
187
- /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
188
- prod.mant = int128_mul_6464(float32_getmant(a), float32_getmant(b));
189
-
190
- /*
191
- * Note: extracting the mantissa into an int is multiplying by
192
- * 2**23, so adjust here
193
- */
194
- prod.exp = float32_getexp(a) + float32_getexp(b) - SF_BIAS - 23;
195
- prod.sign = a_sign ^ b_sign;
196
- if (float32_is_zero(a) || float32_is_zero(b)) {
197
- prod.exp = -2 * WAY_BIG_EXP;
198
- }
199
- if ((scale > 0) && float32_is_denormal(c)) {
200
- acc.mant = int128_mul_6464(0, 0);
201
- acc.exp = -WAY_BIG_EXP;
202
- acc.sign = c_sign;
203
- acc.sticky = 1;
204
- result = accum_add(prod, acc);
205
- } else if (!float32_is_zero(c)) {
206
- acc.mant = int128_mul_6464(float32_getmant(c), 1);
207
- acc.exp = float32_getexp(c);
208
- acc.sign = c_sign;
209
- result = accum_add(prod, acc);
210
- } else {
211
- result = prod;
212
- }
213
- result.exp += scale;
214
- return accum_round_float32(result, fp_status);
215
-}
216
217
float64 internal_mpyhh(float64 a, float64 b,
218
unsigned long long int accumulated,
49
--
219
--
50
2.34.1
220
2.43.0
diff view generated by jsdifflib
1
From: Jamie Iles <quic_jiles@quicinc.com>
1
This massive macro is now only used once.
2
Expand it for use only by float64.
2
3
3
Expose qemu_cpu_list_lock globally so that we can use
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
WITH_QEMU_LOCK_GUARD and QEMU_LOCK_GUARD to simplify a few code paths
5
now and in future.
6
7
Signed-off-by: Jamie Iles <quic_jiles@quicinc.com>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-Id: <20230427020925.51003-2-quic_jiles@quicinc.com>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
6
---
13
include/exec/cpu-common.h | 1 +
7
target/hexagon/fma_emu.c | 255 +++++++++++++++++++--------------------
14
cpus-common.c | 2 +-
8
1 file changed, 127 insertions(+), 128 deletions(-)
15
linux-user/elfload.c | 13 +++++++------
16
migration/dirtyrate.c | 26 +++++++++++++-------------
17
trace/control-target.c | 9 ++++-----
18
5 files changed, 26 insertions(+), 25 deletions(-)
19
9
20
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
21
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
22
--- a/include/exec/cpu-common.h
12
--- a/target/hexagon/fma_emu.c
23
+++ b/include/exec/cpu-common.h
13
+++ b/target/hexagon/fma_emu.c
24
@@ -XXX,XX +XXX,XX @@ extern intptr_t qemu_host_page_mask;
14
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
25
#define REAL_HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_real_host_page_size())
26
27
/* The CPU list lock nests outside page_(un)lock or mmap_(un)lock */
28
+extern QemuMutex qemu_cpu_list_lock;
29
void qemu_init_cpu_list(void);
30
void cpu_list_lock(void);
31
void cpu_list_unlock(void);
32
diff --git a/cpus-common.c b/cpus-common.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/cpus-common.c
35
+++ b/cpus-common.c
36
@@ -XXX,XX +XXX,XX @@
37
#include "qemu/lockable.h"
38
#include "trace/trace-root.h"
39
40
-static QemuMutex qemu_cpu_list_lock;
41
+QemuMutex qemu_cpu_list_lock;
42
static QemuCond exclusive_cond;
43
static QemuCond exclusive_resume;
44
static QemuCond qemu_work_cond;
45
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/linux-user/elfload.c
48
+++ b/linux-user/elfload.c
49
@@ -XXX,XX +XXX,XX @@
50
#include "qemu/guest-random.h"
51
#include "qemu/units.h"
52
#include "qemu/selfmap.h"
53
+#include "qemu/lockable.h"
54
#include "qapi/error.h"
55
#include "qemu/error-report.h"
56
#include "target_signal.h"
57
@@ -XXX,XX +XXX,XX @@ static int fill_note_info(struct elf_note_info *info,
58
info->notes_size += note_size(&info->notes[i]);
59
60
/* read and fill status of all threads */
61
- cpu_list_lock();
62
- CPU_FOREACH(cpu) {
63
- if (cpu == thread_cpu) {
64
- continue;
65
+ WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
66
+ CPU_FOREACH(cpu) {
67
+ if (cpu == thread_cpu) {
68
+ continue;
69
+ }
70
+ fill_thread_info(info, cpu->env_ptr);
71
}
72
- fill_thread_info(info, cpu->env_ptr);
73
}
74
- cpu_list_unlock();
75
76
return (0);
77
}
15
}
78
diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
16
79
index XXXXXXX..XXXXXXX 100644
17
/* Return a maximum finite value with the requested sign */
80
--- a/migration/dirtyrate.c
18
-#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
81
+++ b/migration/dirtyrate.c
19
-static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
82
@@ -XXX,XX +XXX,XX @@ int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
20
-{ \
83
retry:
21
- if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \
84
init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
22
- && ((a.guard | a.round | a.sticky) == 0)) { \
85
23
- /* result zero */ \
86
- cpu_list_lock();
24
- switch (fp_status->float_rounding_mode) { \
87
- gen_id = cpu_list_generation_id_get();
25
- case float_round_down: \
88
- records = vcpu_dirty_stat_alloc(stat);
26
- return zero_##SUFFIX(1); \
89
- vcpu_dirty_stat_collect(stat, records, true);
27
- default: \
90
- cpu_list_unlock();
28
- return zero_##SUFFIX(0); \
91
+ WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
29
- } \
92
+ gen_id = cpu_list_generation_id_get();
30
- } \
93
+ records = vcpu_dirty_stat_alloc(stat);
31
- /* Normalize right */ \
94
+ vcpu_dirty_stat_collect(stat, records, true);
32
- /* We want MANTBITS bits of mantissa plus the leading one. */ \
95
+ }
33
- /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \
96
34
- /* So we need to normalize right while the high word is non-zero and \
97
duration = dirty_stat_wait(calc_time_ms, init_time_ms);
35
- * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \
98
36
- while ((int128_gethi(a.mant) != 0) || \
99
global_dirty_log_sync(flag, one_shot);
37
- ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0)) { \
100
38
- a = accum_norm_right(a, 1); \
101
- cpu_list_lock();
39
- } \
102
- if (gen_id != cpu_list_generation_id_get()) {
40
- /* \
103
- g_free(records);
41
- * OK, now normalize left \
104
- g_free(stat->rates);
42
- * We want to normalize left until we have a leading one in bit 24 \
105
- cpu_list_unlock();
43
- * Theoretically, we only need to shift a maximum of one to the left if we \
106
- goto retry;
44
- * shifted out lots of bits from B, or if we had no shift / 1 shift sticky \
107
+ WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
45
- * should be 0 \
108
+ if (gen_id != cpu_list_generation_id_get()) {
46
- */ \
109
+ g_free(records);
47
- while ((int128_getlo(a.mant) & (1ULL << MANTBITS)) == 0) { \
110
+ g_free(stat->rates);
48
- a = accum_norm_left(a); \
111
+ cpu_list_unlock();
49
- } \
112
+ goto retry;
50
- /* \
113
+ }
51
- * OK, now we might need to denormalize because of potential underflow. \
114
+ vcpu_dirty_stat_collect(stat, records, false);
52
- * We need to do this before rounding, and rounding might make us normal \
115
}
53
- * again \
116
- vcpu_dirty_stat_collect(stat, records, false);
54
- */ \
117
- cpu_list_unlock();
55
- while (a.exp <= 0) { \
118
56
- a = accum_norm_right(a, 1 - a.exp); \
119
for (i = 0; i < stat->nvcpu; i++) {
57
- /* \
120
dirtyrate = do_calculate_dirtyrate(records[i], duration);
58
- * Do we have underflow? \
121
diff --git a/trace/control-target.c b/trace/control-target.c
59
- * That's when we get an inexact answer because we ran out of bits \
122
index XXXXXXX..XXXXXXX 100644
60
- * in a denormal. \
123
--- a/trace/control-target.c
61
- */ \
124
+++ b/trace/control-target.c
62
- if (a.guard || a.round || a.sticky) { \
125
@@ -XXX,XX +XXX,XX @@
63
- float_raise(float_flag_underflow, fp_status); \
126
*/
64
- } \
127
65
- } \
128
#include "qemu/osdep.h"
66
- /* OK, we're relatively canonical... now we need to round */ \
129
+#include "qemu/lockable.h"
67
- if (a.guard || a.round || a.sticky) { \
130
#include "cpu.h"
68
- float_raise(float_flag_inexact, fp_status); \
131
#include "trace/trace-root.h"
69
- switch (fp_status->float_rounding_mode) { \
132
#include "trace/control.h"
70
- case float_round_to_zero: \
133
@@ -XXX,XX +XXX,XX @@ static bool adding_first_cpu1(void)
71
- /* Chop and we're done */ \
134
72
- break; \
135
static bool adding_first_cpu(void)
73
- case float_round_up: \
136
{
74
- if (a.sign == 0) { \
137
- bool res;
75
- a.mant = int128_add(a.mant, int128_one()); \
138
- cpu_list_lock();
76
- } \
139
- res = adding_first_cpu1();
77
- break; \
140
- cpu_list_unlock();
78
- case float_round_down: \
141
- return res;
79
- if (a.sign != 0) { \
142
+ QEMU_LOCK_GUARD(&qemu_cpu_list_lock);
80
- a.mant = int128_add(a.mant, int128_one()); \
143
+
81
- } \
144
+ return adding_first_cpu1();
82
- break; \
83
- default: \
84
- if (a.round || a.sticky) { \
85
- /* round up if guard is 1, down if guard is zero */ \
86
- a.mant = int128_add(a.mant, int128_make64(a.guard)); \
87
- } else if (a.guard) { \
88
- /* exactly .5, round up if odd */ \
89
- a.mant = int128_add(a.mant, int128_and(a.mant, int128_one())); \
90
- } \
91
- break; \
92
- } \
93
- } \
94
- /* \
95
- * OK, now we might have carried all the way up. \
96
- * So we might need to shr once \
97
- * at least we know that the lsb should be zero if we rounded and \
98
- * got a carry out... \
99
- */ \
100
- if ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0) { \
101
- a = accum_norm_right(a, 1); \
102
- } \
103
- /* Overflow? */ \
104
- if (a.exp >= INF_EXP) { \
105
- /* Yep, inf result */ \
106
- float_raise(float_flag_overflow, fp_status); \
107
- float_raise(float_flag_inexact, fp_status); \
108
- switch (fp_status->float_rounding_mode) { \
109
- case float_round_to_zero: \
110
- return maxfinite_##SUFFIX(a.sign); \
111
- case float_round_up: \
112
- if (a.sign == 0) { \
113
- return infinite_##SUFFIX(a.sign); \
114
- } else { \
115
- return maxfinite_##SUFFIX(a.sign); \
116
- } \
117
- case float_round_down: \
118
- if (a.sign != 0) { \
119
- return infinite_##SUFFIX(a.sign); \
120
- } else { \
121
- return maxfinite_##SUFFIX(a.sign); \
122
- } \
123
- default: \
124
- return infinite_##SUFFIX(a.sign); \
125
- } \
126
- } \
127
- /* Underflow? */ \
128
- if (int128_getlo(a.mant) & (1ULL << MANTBITS)) { \
129
- /* Leading one means: No, we're normal. So, we should be done... */ \
130
- INTERNAL_TYPE ret; \
131
- ret.i = 0; \
132
- ret.sign = a.sign; \
133
- ret.exp = a.exp; \
134
- ret.mant = int128_getlo(a.mant); \
135
- return ret.i; \
136
- } \
137
- assert(a.exp == 1); \
138
- INTERNAL_TYPE ret; \
139
- ret.i = 0; \
140
- ret.sign = a.sign; \
141
- ret.exp = 0; \
142
- ret.mant = int128_getlo(a.mant); \
143
- return ret.i; \
144
+static float64 accum_round_float64(Accum a, float_status *fp_status)
145
+{
146
+ if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
147
+ && ((a.guard | a.round | a.sticky) == 0)) {
148
+ /* result zero */
149
+ switch (fp_status->float_rounding_mode) {
150
+ case float_round_down:
151
+ return zero_float64(1);
152
+ default:
153
+ return zero_float64(0);
154
+ }
155
+ }
156
+ /*
157
+ * Normalize right
158
+ * We want DF_MANTBITS bits of mantissa plus the leading one.
159
+ * That means that we want DF_MANTBITS+1 bits, or 0x000000000000FF_FFFF
160
+ * So we need to normalize right while the high word is non-zero and
161
+ * while the low word is nonzero when masked with 0xffe0_0000_0000_0000
162
+ */
163
+ while ((int128_gethi(a.mant) != 0) ||
164
+ ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0)) {
165
+ a = accum_norm_right(a, 1);
166
+ }
167
+ /*
168
+ * OK, now normalize left
169
+ * We want to normalize left until we have a leading one in bit 24
170
+ * Theoretically, we only need to shift a maximum of one to the left if we
171
+ * shifted out lots of bits from B, or if we had no shift / 1 shift sticky
172
+ * should be 0
173
+ */
174
+ while ((int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) == 0) {
175
+ a = accum_norm_left(a);
176
+ }
177
+ /*
178
+ * OK, now we might need to denormalize because of potential underflow.
179
+ * We need to do this before rounding, and rounding might make us normal
180
+ * again
181
+ */
182
+ while (a.exp <= 0) {
183
+ a = accum_norm_right(a, 1 - a.exp);
184
+ /*
185
+ * Do we have underflow?
186
+ * That's when we get an inexact answer because we ran out of bits
187
+ * in a denormal.
188
+ */
189
+ if (a.guard || a.round || a.sticky) {
190
+ float_raise(float_flag_underflow, fp_status);
191
+ }
192
+ }
193
+ /* OK, we're relatively canonical... now we need to round */
194
+ if (a.guard || a.round || a.sticky) {
195
+ float_raise(float_flag_inexact, fp_status);
196
+ switch (fp_status->float_rounding_mode) {
197
+ case float_round_to_zero:
198
+ /* Chop and we're done */
199
+ break;
200
+ case float_round_up:
201
+ if (a.sign == 0) {
202
+ a.mant = int128_add(a.mant, int128_one());
203
+ }
204
+ break;
205
+ case float_round_down:
206
+ if (a.sign != 0) {
207
+ a.mant = int128_add(a.mant, int128_one());
208
+ }
209
+ break;
210
+ default:
211
+ if (a.round || a.sticky) {
212
+ /* round up if guard is 1, down if guard is zero */
213
+ a.mant = int128_add(a.mant, int128_make64(a.guard));
214
+ } else if (a.guard) {
215
+ /* exactly .5, round up if odd */
216
+ a.mant = int128_add(a.mant, int128_and(a.mant, int128_one()));
217
+ }
218
+ break;
219
+ }
220
+ }
221
+ /*
222
+ * OK, now we might have carried all the way up.
223
+ * So we might need to shr once
224
+ * at least we know that the lsb should be zero if we rounded and
225
+ * got a carry out...
226
+ */
227
+ if ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0) {
228
+ a = accum_norm_right(a, 1);
229
+ }
230
+ /* Overflow? */
231
+ if (a.exp >= DF_INF_EXP) {
232
+ /* Yep, inf result */
233
+ float_raise(float_flag_overflow, fp_status);
234
+ float_raise(float_flag_inexact, fp_status);
235
+ switch (fp_status->float_rounding_mode) {
236
+ case float_round_to_zero:
237
+ return maxfinite_float64(a.sign);
238
+ case float_round_up:
239
+ if (a.sign == 0) {
240
+ return infinite_float64(a.sign);
241
+ } else {
242
+ return maxfinite_float64(a.sign);
243
+ }
244
+ case float_round_down:
245
+ if (a.sign != 0) {
246
+ return infinite_float64(a.sign);
247
+ } else {
248
+ return maxfinite_float64(a.sign);
249
+ }
250
+ default:
251
+ return infinite_float64(a.sign);
252
+ }
253
+ }
254
+ /* Underflow? */
255
+ if (int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) {
256
+ /* Leading one means: No, we're normal. So, we should be done... */
257
+ Double ret;
258
+ ret.i = 0;
259
+ ret.sign = a.sign;
260
+ ret.exp = a.exp;
261
+ ret.mant = int128_getlo(a.mant);
262
+ return ret.i;
263
+ }
264
+ assert(a.exp == 1);
265
+ Double ret;
266
+ ret.i = 0;
267
+ ret.sign = a.sign;
268
+ ret.exp = 0;
269
+ ret.mant = int128_getlo(a.mant);
270
+ return ret.i;
145
}
271
}
146
272
147
void trace_init_vcpu(CPUState *vcpu)
273
-GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
274
-
275
float64 internal_mpyhh(float64 a, float64 b,
276
unsigned long long int accumulated,
277
float_status *fp_status)
148
--
278
--
149
2.34.1
279
2.43.0
150
151
diff view generated by jsdifflib
1
Reviewed-by: Thomas Huth <thuth@redhat.com>
1
This structure, with bitfields, is incorrect for big-endian.
2
Use the existing float32_getexp_raw which uses extract32.
3
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-Id: <20230503072331.1747057-80-richard.henderson@linaro.org>
4
---
6
---
5
meson.build | 3 ---
7
target/hexagon/fma_emu.c | 16 +++-------------
6
disas.c => disas/disas.c | 0
8
1 file changed, 3 insertions(+), 13 deletions(-)
7
disas/meson.build | 4 +++-
8
3 files changed, 3 insertions(+), 4 deletions(-)
9
rename disas.c => disas/disas.c (100%)
10
9
11
diff --git a/meson.build b/meson.build
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/meson.build
12
--- a/target/hexagon/fma_emu.c
14
+++ b/meson.build
13
+++ b/target/hexagon/fma_emu.c
15
@@ -XXX,XX +XXX,XX @@ specific_ss.add(files('cpu.c'))
14
@@ -XXX,XX +XXX,XX @@ typedef union {
16
15
};
17
subdir('softmmu')
16
} Double;
18
17
19
-common_ss.add(capstone)
18
-typedef union {
20
-specific_ss.add(files('disas.c'), capstone)
19
- float f;
20
- uint32_t i;
21
- struct {
22
- uint32_t mant:23;
23
- uint32_t exp:8;
24
- uint32_t sign:1;
25
- };
26
-} Float;
21
-
27
-
22
# Work around a gcc bug/misfeature wherein constant propagation looks
28
static uint64_t float64_getmant(float64 f64)
23
# through an alias:
29
{
24
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99696
30
Double a = { .i = f64 };
25
diff --git a/disas.c b/disas/disas.c
31
@@ -XXX,XX +XXX,XX @@ int32_t float64_getexp(float64 f64)
26
similarity index 100%
32
27
rename from disas.c
33
int32_t float32_getexp(float32 f32)
28
rename to disas/disas.c
34
{
29
diff --git a/disas/meson.build b/disas/meson.build
35
- Float a = { .i = f32 };
30
index XXXXXXX..XXXXXXX 100644
36
+ int exp = float32_getexp_raw(f32);
31
--- a/disas/meson.build
37
if (float32_is_normal(f32)) {
32
+++ b/disas/meson.build
38
- return a.exp;
33
@@ -XXX,XX +XXX,XX @@ common_ss.add(when: 'CONFIG_RISCV_DIS', if_true: files('riscv.c'))
39
+ return exp;
34
common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c'))
40
}
35
common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c'))
41
if (float32_is_denormal(f32)) {
36
common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c'))
42
- return a.exp + 1;
37
-common_ss.add(when: capstone, if_true: files('capstone.c'))
43
+ return exp + 1;
38
+common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone])
44
}
39
+
45
return -1;
40
+specific_ss.add(files('disas.c'), capstone)
46
}
41
--
47
--
42
2.34.1
48
2.43.0
diff view generated by jsdifflib
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
1
This structure, with bitfields, is incorrect for big-endian.
2
tcg_out_zext_addr_if_32_bit, and some code that lived in both
2
Use extract64 and deposit64 instead.
3
tcg_out_qemu_ld and tcg_out_qemu_st into one function that returns
4
HostAddress and TCGLabelQemuLdst structures.
5
3
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
tcg/loongarch64/tcg-target.c.inc | 255 +++++++++++++------------------
7
target/hexagon/fma_emu.c | 46 ++++++++++++++--------------------------
10
1 file changed, 105 insertions(+), 150 deletions(-)
8
1 file changed, 16 insertions(+), 30 deletions(-)
11
9
12
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/loongarch64/tcg-target.c.inc
12
--- a/target/hexagon/fma_emu.c
15
+++ b/tcg/loongarch64/tcg-target.c.inc
13
+++ b/target/hexagon/fma_emu.c
16
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[4] = {
14
@@ -XXX,XX +XXX,XX @@
17
[MO_64] = helper_le_stq_mmu,
15
18
};
16
#define WAY_BIG_EXP 4096
19
17
20
-/* We expect to use a 12-bit negative offset from ENV. */
18
-typedef union {
21
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
19
- double f;
22
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
20
- uint64_t i;
21
- struct {
22
- uint64_t mant:52;
23
- uint64_t exp:11;
24
- uint64_t sign:1;
25
- };
26
-} Double;
23
-
27
-
24
static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
28
static uint64_t float64_getmant(float64 f64)
25
{
29
{
26
tcg_out_opc_b(s, 0);
30
- Double a = { .i = f64 };
27
return reloc_br_sd10k16(s->code_ptr - 1, target);
31
+ uint64_t mant = extract64(f64, 0, 52);
32
if (float64_is_normal(f64)) {
33
- return a.mant | 1ULL << 52;
34
+ return mant | 1ULL << 52;
35
}
36
if (float64_is_zero(f64)) {
37
return 0;
38
}
39
if (float64_is_denormal(f64)) {
40
- return a.mant;
41
+ return mant;
42
}
43
return ~0ULL;
28
}
44
}
29
45
30
-/*
46
int32_t float64_getexp(float64 f64)
31
- * Emits common code for TLB addend lookup, that eventually loads the
32
- * addend in TCG_REG_TMP2.
33
- */
34
-static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, MemOpIdx oi,
35
- tcg_insn_unit **label_ptr, bool is_load)
36
-{
37
- MemOp opc = get_memop(oi);
38
- unsigned s_bits = opc & MO_SIZE;
39
- unsigned a_bits = get_alignment_bits(opc);
40
- tcg_target_long compare_mask;
41
- int mem_index = get_mmuidx(oi);
42
- int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
43
- int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
44
- int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
45
-
46
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
47
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
48
-
49
- tcg_out_opc_srli_d(s, TCG_REG_TMP2, addrl,
50
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
51
- tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
52
- tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
53
-
54
- /* Load the tlb comparator and the addend. */
55
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
56
- is_load ? offsetof(CPUTLBEntry, addr_read)
57
- : offsetof(CPUTLBEntry, addr_write));
58
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
59
- offsetof(CPUTLBEntry, addend));
60
-
61
- /* We don't support unaligned accesses. */
62
- if (a_bits < s_bits) {
63
- a_bits = s_bits;
64
- }
65
- /* Clear the non-page, non-alignment bits from the address. */
66
- compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
67
- tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
68
- tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addrl);
69
-
70
- /* Compare masked address with the TLB entry. */
71
- label_ptr[0] = s->code_ptr;
72
- tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
73
-
74
- /* TLB Hit - addend in TCG_REG_TMP2, ready for use. */
75
-}
76
-
77
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
78
- TCGType type,
79
- TCGReg datalo, TCGReg addrlo,
80
- void *raddr, tcg_insn_unit **label_ptr)
81
-{
82
- TCGLabelQemuLdst *label = new_ldst_label(s);
83
-
84
- label->is_ld = is_ld;
85
- label->oi = oi;
86
- label->type = type;
87
- label->datalo_reg = datalo;
88
- label->datahi_reg = 0; /* unused */
89
- label->addrlo_reg = addrlo;
90
- label->addrhi_reg = 0; /* unused */
91
- label->raddr = tcg_splitwx_to_rx(raddr);
92
- label->label_ptr[0] = label_ptr[0];
93
-}
94
-
95
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
96
{
47
{
97
MemOpIdx oi = l->oi;
48
- Double a = { .i = f64 };
98
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
49
+ int exp = extract64(f64, 52, 11);
99
return tcg_out_goto(s, l->raddr);
50
if (float64_is_normal(f64)) {
51
- return a.exp;
52
+ return exp;
53
}
54
if (float64_is_denormal(f64)) {
55
- return a.exp + 1;
56
+ return exp + 1;
57
}
58
return -1;
100
}
59
}
101
#else
60
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
102
-
61
/* Return a maximum finite value with the requested sign */
103
-/*
62
static float64 accum_round_float64(Accum a, float_status *fp_status)
104
- * Alignment helpers for user-mode emulation
105
- */
106
-
107
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
108
- unsigned a_bits)
109
-{
110
- TCGLabelQemuLdst *l = new_ldst_label(s);
111
-
112
- l->is_ld = is_ld;
113
- l->addrlo_reg = addr_reg;
114
-
115
- /*
116
- * Without micro-architecture details, we don't know which of bstrpick or
117
- * andi is faster, so use bstrpick as it's not constrained by imm field
118
- * width. (Not to say alignments >= 2^12 are going to happen any time
119
- * soon, though)
120
- */
121
- tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
122
-
123
- l->label_ptr[0] = s->code_ptr;
124
- tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
125
-
126
- l->raddr = tcg_splitwx_to_rx(s->code_ptr);
127
-}
128
-
129
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
130
{
63
{
131
/* resolve label address */
64
+ uint64_t ret;
132
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
133
134
#endif /* CONFIG_SOFTMMU */
135
136
-/*
137
- * `ext32u` the address register into the temp register given,
138
- * if target is 32-bit, no-op otherwise.
139
- *
140
- * Returns the address register ready for use with TLB addend.
141
- */
142
-static TCGReg tcg_out_zext_addr_if_32_bit(TCGContext *s,
143
- TCGReg addr, TCGReg tmp)
144
-{
145
- if (TARGET_LONG_BITS == 32) {
146
- tcg_out_ext32u(s, tmp, addr);
147
- return tmp;
148
- }
149
- return addr;
150
-}
151
-
152
typedef struct {
153
TCGReg base;
154
TCGReg index;
155
} HostAddress;
156
157
+/*
158
+ * For softmmu, perform the TLB load and compare.
159
+ * For useronly, perform any required alignment tests.
160
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
161
+ * is required and fill in @h with the host address for the fast path.
162
+ */
163
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
164
+ TCGReg addr_reg, MemOpIdx oi,
165
+ bool is_ld)
166
+{
167
+ TCGLabelQemuLdst *ldst = NULL;
168
+ MemOp opc = get_memop(oi);
169
+ unsigned a_bits = get_alignment_bits(opc);
170
+
65
+
171
+#ifdef CONFIG_SOFTMMU
66
if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
172
+ unsigned s_bits = opc & MO_SIZE;
67
&& ((a.guard | a.round | a.sticky) == 0)) {
173
+ int mem_index = get_mmuidx(oi);
68
/* result zero */
174
+ int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
69
@@ -XXX,XX +XXX,XX @@ static float64 accum_round_float64(Accum a, float_status *fp_status)
175
+ int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
70
}
176
+ int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
71
}
177
+ tcg_target_long compare_mask;
72
/* Underflow? */
178
+
73
- if (int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) {
179
+ ldst = new_ldst_label(s);
74
+ ret = int128_getlo(a.mant);
180
+ ldst->is_ld = is_ld;
75
+ if (ret & (1ULL << DF_MANTBITS)) {
181
+ ldst->oi = oi;
76
/* Leading one means: No, we're normal. So, we should be done... */
182
+ ldst->addrlo_reg = addr_reg;
77
- Double ret;
183
+
78
- ret.i = 0;
184
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
79
- ret.sign = a.sign;
185
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
80
- ret.exp = a.exp;
186
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
81
- ret.mant = int128_getlo(a.mant);
187
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
82
- return ret.i;
188
+
83
+ ret = deposit64(ret, 52, 11, a.exp);
189
+ tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
190
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
191
+ tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
192
+ tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
193
+
194
+ /* Load the tlb comparator and the addend. */
195
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
196
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
197
+ : offsetof(CPUTLBEntry, addr_write));
198
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
199
+ offsetof(CPUTLBEntry, addend));
200
+
201
+ /* We don't support unaligned accesses. */
202
+ if (a_bits < s_bits) {
203
+ a_bits = s_bits;
204
+ }
205
+ /* Clear the non-page, non-alignment bits from the address. */
206
+ compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
207
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
208
+ tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
209
+
210
+ /* Compare masked address with the TLB entry. */
211
+ ldst->label_ptr[0] = s->code_ptr;
212
+ tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
213
+
214
+ h->index = TCG_REG_TMP2;
215
+#else
216
+ if (a_bits) {
217
+ ldst = new_ldst_label(s);
218
+
219
+ ldst->is_ld = is_ld;
220
+ ldst->oi = oi;
221
+ ldst->addrlo_reg = addr_reg;
222
+
223
+ /*
224
+ * Without micro-architecture details, we don't know which of
225
+ * bstrpick or andi is faster, so use bstrpick as it's not
226
+ * constrained by imm field width. Not to say alignments >= 2^12
227
+ * are going to happen any time soon.
228
+ */
229
+ tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
230
+
231
+ ldst->label_ptr[0] = s->code_ptr;
232
+ tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
233
+ }
234
+
235
+ h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
236
+#endif
237
+
238
+ if (TARGET_LONG_BITS == 32) {
239
+ h->base = TCG_REG_TMP0;
240
+ tcg_out_ext32u(s, h->base, addr_reg);
241
+ } else {
84
+ } else {
242
+ h->base = addr_reg;
85
+ assert(a.exp == 1);
243
+ }
86
+ ret = deposit64(ret, 52, 11, 0);
244
+
245
+ return ldst;
246
+}
247
+
248
static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type,
249
TCGReg rd, HostAddress h)
250
{
251
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type,
252
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
253
MemOpIdx oi, TCGType data_type)
254
{
255
- MemOp opc = get_memop(oi);
256
+ TCGLabelQemuLdst *ldst;
257
HostAddress h;
258
259
-#ifdef CONFIG_SOFTMMU
260
- tcg_insn_unit *label_ptr[1];
261
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
262
+ tcg_out_qemu_ld_indexed(s, get_memop(oi), data_type, data_reg, h);
263
264
- tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1);
265
- h.index = TCG_REG_TMP2;
266
-#else
267
- unsigned a_bits = get_alignment_bits(opc);
268
- if (a_bits) {
269
- tcg_out_test_alignment(s, true, addr_reg, a_bits);
270
+ if (ldst) {
271
+ ldst->type = data_type;
272
+ ldst->datalo_reg = data_reg;
273
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
274
}
87
}
275
- h.index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
88
- assert(a.exp == 1);
276
-#endif
89
- Double ret;
277
-
90
- ret.i = 0;
278
- h.base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
91
- ret.sign = a.sign;
279
- tcg_out_qemu_ld_indexed(s, opc, data_type, data_reg, h);
92
- ret.exp = 0;
280
-
93
- ret.mant = int128_getlo(a.mant);
281
-#ifdef CONFIG_SOFTMMU
94
- return ret.i;
282
- add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
95
+ ret = deposit64(ret, 63, 1, a.sign);
283
- s->code_ptr, label_ptr);
96
+ return ret;
284
-#endif
285
}
97
}
286
98
287
static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc,
99
float64 internal_mpyhh(float64 a, float64 b,
288
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc,
289
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
290
MemOpIdx oi, TCGType data_type)
291
{
292
- MemOp opc = get_memop(oi);
293
+ TCGLabelQemuLdst *ldst;
294
HostAddress h;
295
296
-#ifdef CONFIG_SOFTMMU
297
- tcg_insn_unit *label_ptr[1];
298
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
299
+ tcg_out_qemu_st_indexed(s, get_memop(oi), data_reg, h);
300
301
- tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0);
302
- h.index = TCG_REG_TMP2;
303
-#else
304
- unsigned a_bits = get_alignment_bits(opc);
305
- if (a_bits) {
306
- tcg_out_test_alignment(s, false, addr_reg, a_bits);
307
+ if (ldst) {
308
+ ldst->type = data_type;
309
+ ldst->datalo_reg = data_reg;
310
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
311
}
312
- h.index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
313
-#endif
314
-
315
- h.base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
316
- tcg_out_qemu_st_indexed(s, opc, data_reg, h);
317
-
318
-#ifdef CONFIG_SOFTMMU
319
- add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
320
- s->code_ptr, label_ptr);
321
-#endif
322
}
323
324
/*
325
--
100
--
326
2.34.1
101
2.43.0
327
328
diff view generated by jsdifflib
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
1
No need to open-code 64x64->128-bit multiplication.
2
and tcg_out_st_helper_args. This allows our local
3
tcg_out_arg_* infrastructure to be removed.
4
2
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
tcg/arm/tcg-target.c.inc | 140 +++++----------------------------------
6
target/hexagon/fma_emu.c | 32 +++-----------------------------
9
1 file changed, 18 insertions(+), 122 deletions(-)
7
1 file changed, 3 insertions(+), 29 deletions(-)
10
8
11
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
9
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/arm/tcg-target.c.inc
11
--- a/target/hexagon/fma_emu.c
14
+++ b/tcg/arm/tcg-target.c.inc
12
+++ b/target/hexagon/fma_emu.c
15
@@ -XXX,XX +XXX,XX @@ tcg_out_ldrd_rwb(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, TCGReg rm)
13
@@ -XXX,XX +XXX,XX @@ int32_t float32_getexp(float32 f32)
16
tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1);
14
return -1;
17
}
15
}
18
16
19
-static void tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt,
17
-static uint32_t int128_getw0(Int128 x)
20
- TCGReg rn, int imm8)
21
+static void __attribute__((unused))
22
+tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, int imm8)
23
{
24
tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
25
}
26
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
27
tcg_out_dat_imm(s, COND_AL, ARITH_AND, rd, rn, 0xff);
28
}
29
30
-static void __attribute__((unused))
31
-tcg_out_ext8u_cond(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
32
-{
18
-{
33
- tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
19
- return int128_getlo(x);
34
-}
20
-}
35
-
21
-
36
static void tcg_out_ext16s(TCGContext *s, TCGType t, TCGReg rd, TCGReg rn)
22
-static uint32_t int128_getw1(Int128 x)
37
{
38
/* sxth */
39
tcg_out32(s, 0x06bf0070 | (COND_AL << 28) | (rd << 12) | rn);
40
}
41
42
-static void tcg_out_ext16u_cond(TCGContext *s, ARMCond cond,
43
- TCGReg rd, TCGReg rn)
44
-{
23
-{
45
- /* uxth */
24
- return int128_getlo(x) >> 32;
46
- tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
47
-}
25
-}
48
-
26
-
49
static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
27
static Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
50
{
28
{
51
- tcg_out_ext16u_cond(s, COND_AL, rd, rn);
29
- Int128 a, b;
52
+ /* uxth */
30
- uint64_t pp0, pp1a, pp1b, pp1s, pp2;
53
+ tcg_out32(s, 0x06ff0070 | (COND_AL << 28) | (rd << 12) | rn);
31
+ uint64_t l, h;
54
}
32
55
33
- a = int128_make64(ai);
56
static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
34
- b = int128_make64(bi);
57
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] = {
35
- pp0 = (uint64_t)int128_getw0(a) * (uint64_t)int128_getw0(b);
58
#endif
36
- pp1a = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw0(b);
59
};
37
- pp1b = (uint64_t)int128_getw1(b) * (uint64_t)int128_getw0(a);
60
38
- pp2 = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw1(b);
61
-/* Helper routines for marshalling helper function arguments into
62
- * the correct registers and stack.
63
- * argreg is where we want to put this argument, arg is the argument itself.
64
- * Return value is the updated argreg ready for the next call.
65
- * Note that argreg 0..3 is real registers, 4+ on stack.
66
- *
67
- * We provide routines for arguments which are: immediate, 32 bit
68
- * value in register, 16 and 8 bit values in register (which must be zero
69
- * extended before use) and 64 bit value in a lo:hi register pair.
70
- */
71
-#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \
72
-static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \
73
-{ \
74
- if (argreg < 4) { \
75
- MOV_ARG(s, COND_AL, argreg, arg); \
76
- } else { \
77
- int ofs = (argreg - 4) * 4; \
78
- EXT_ARG; \
79
- tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \
80
- tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \
81
- } \
82
- return argreg + 1; \
83
-}
84
-
39
-
85
-DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
40
- pp1s = pp1a + pp1b;
86
- (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
41
- if ((pp1s < pp1a) || (pp1s < pp1b)) {
87
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u_cond,
42
- pp2 += (1ULL << 32);
88
- (tcg_out_ext8u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
89
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u_cond,
90
- (tcg_out_ext16u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
91
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
92
-
93
-static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
94
- TCGReg arglo, TCGReg arghi)
95
+static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
96
{
97
- /* 64 bit arguments must go in even/odd register pairs
98
- * and in 8-aligned stack slots.
99
- */
100
- if (argreg & 1) {
101
- argreg++;
102
- }
43
- }
103
- if (argreg >= 4 && (arglo & 1) == 0 && arghi == arglo + 1) {
44
- uint64_t ret_low = pp0 + (pp1s << 32);
104
- tcg_out_strd_8(s, COND_AL, arglo,
45
- if ((ret_low < pp0) || (ret_low < (pp1s << 32))) {
105
- TCG_REG_CALL_STACK, (argreg - 4) * 4);
46
- pp2 += 1;
106
- return argreg + 2;
107
- } else {
108
- argreg = tcg_out_arg_reg32(s, argreg, arglo);
109
- argreg = tcg_out_arg_reg32(s, argreg, arghi);
110
- return argreg;
111
- }
112
+ /* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
113
+ return TCG_REG_R14;
114
}
115
116
+static const TCGLdstHelperParam ldst_helper_param = {
117
+ .ra_gen = ldst_ra_gen,
118
+ .ntmp = 1,
119
+ .tmp = { TCG_REG_TMP },
120
+};
121
+
122
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
123
{
124
- TCGReg argreg;
125
- MemOpIdx oi = lb->oi;
126
- MemOp opc = get_memop(oi);
127
+ MemOp opc = get_memop(lb->oi);
128
129
if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
130
return false;
131
}
132
133
- argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
134
- if (TARGET_LONG_BITS == 64) {
135
- argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
136
- } else {
137
- argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
138
- }
139
- argreg = tcg_out_arg_imm32(s, argreg, oi);
140
- argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
141
-
142
- /* Use the canonical unsigned helpers and minimize icache usage. */
143
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
144
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
145
-
146
- if ((opc & MO_SIZE) == MO_64) {
147
- TCGMovExtend ext[2] = {
148
- { .dst = lb->datalo_reg, .dst_type = TCG_TYPE_I32,
149
- .src = TCG_REG_R0, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
150
- { .dst = lb->datahi_reg, .dst_type = TCG_TYPE_I32,
151
- .src = TCG_REG_R1, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
152
- };
153
- tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
154
- } else {
155
- tcg_out_movext(s, TCG_TYPE_I32, lb->datalo_reg,
156
- TCG_TYPE_I32, opc & MO_SSIZE, TCG_REG_R0);
157
- }
158
+ tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
159
160
tcg_out_goto(s, COND_AL, lb->raddr);
161
return true;
162
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
163
164
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
165
{
166
- TCGReg argreg, datalo, datahi;
167
- MemOpIdx oi = lb->oi;
168
- MemOp opc = get_memop(oi);
169
+ MemOp opc = get_memop(lb->oi);
170
171
if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
172
return false;
173
}
174
175
- argreg = TCG_REG_R0;
176
- argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
177
- if (TARGET_LONG_BITS == 64) {
178
- argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
179
- } else {
180
- argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
181
- }
47
- }
182
-
48
-
183
- datalo = lb->datalo_reg;
49
- return int128_make128(ret_low, pp2 + (pp1s >> 32));
184
- datahi = lb->datahi_reg;
50
+ mulu64(&l, &h, ai, bi);
185
- switch (opc & MO_SIZE) {
51
+ return int128_make128(l, h);
186
- case MO_8:
52
}
187
- argreg = tcg_out_arg_reg8(s, argreg, datalo);
53
188
- break;
54
static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
189
- case MO_16:
190
- argreg = tcg_out_arg_reg16(s, argreg, datalo);
191
- break;
192
- case MO_32:
193
- default:
194
- argreg = tcg_out_arg_reg32(s, argreg, datalo);
195
- break;
196
- case MO_64:
197
- argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
198
- break;
199
- }
200
-
201
- argreg = tcg_out_arg_imm32(s, argreg, oi);
202
- argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
203
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
204
205
/* Tail-call to the helper, which will return to the fast path. */
206
tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
207
--
55
--
208
2.34.1
56
2.43.0
209
210
diff view generated by jsdifflib
1
A copy-paste bug had us looking at the victim cache for writes.
1
Initialize x with accumulated via direct assignment,
2
rather than multiplying by 1.
2
3
3
Cc: qemu-stable@nongnu.org
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Reported-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Fixes: 08dff435e2 ("tcg: Probe the proper permissions for atomic ops")
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Message-Id: <20230505204049.352469-1-richard.henderson@linaro.org>
10
---
6
---
11
accel/tcg/cputlb.c | 2 +-
7
target/hexagon/fma_emu.c | 2 +-
12
1 file changed, 1 insertion(+), 1 deletion(-)
8
1 file changed, 1 insertion(+), 1 deletion(-)
13
9
14
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/accel/tcg/cputlb.c
12
--- a/target/hexagon/fma_emu.c
17
+++ b/accel/tcg/cputlb.c
13
+++ b/target/hexagon/fma_emu.c
18
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
14
@@ -XXX,XX +XXX,XX @@ float64 internal_mpyhh(float64 a, float64 b,
19
} else /* if (prot & PAGE_READ) */ {
15
float64_is_infinity(b)) {
20
tlb_addr = tlbe->addr_read;
16
return float64_mul(a, b, fp_status);
21
if (!tlb_hit(tlb_addr, addr)) {
17
}
22
- if (!VICTIM_TLB_HIT(addr_write, addr)) {
18
- x.mant = int128_mul_6464(accumulated, 1);
23
+ if (!VICTIM_TLB_HIT(addr_read, addr)) {
19
+ x.mant = int128_make64(accumulated);
24
tlb_fill(env_cpu(env), addr, size,
20
x.sticky = sticky;
25
MMU_DATA_LOAD, mmu_idx, retaddr);
21
prod = fGETUWORD(1, float64_getmant(a)) * fGETUWORD(1, float64_getmant(b));
26
index = tlb_index(env, mmu_idx, addr);
22
x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL));
27
--
23
--
28
2.34.1
24
2.43.0
29
30
diff view generated by jsdifflib
1
Mark all memory operations that are not already marked with UNALIGN.
1
Convert all targets simultaneously, as the gen_intermediate_code
2
function disappears from the target. While there are possible
3
workarounds, they're larger than simply performing the conversion.
2
4
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
7
---
6
target/sh4/translate.c | 102 ++++++++++++++++++++++++++---------------
8
include/exec/translator.h | 14 --------------
7
1 file changed, 66 insertions(+), 36 deletions(-)
9
include/hw/core/tcg-cpu-ops.h | 13 +++++++++++++
10
target/alpha/cpu.h | 2 ++
11
target/arm/internals.h | 2 ++
12
target/avr/cpu.h | 2 ++
13
target/hexagon/cpu.h | 2 ++
14
target/hppa/cpu.h | 2 ++
15
target/i386/tcg/helper-tcg.h | 2 ++
16
target/loongarch/internals.h | 2 ++
17
target/m68k/cpu.h | 2 ++
18
target/microblaze/cpu.h | 2 ++
19
target/mips/tcg/tcg-internal.h | 2 ++
20
target/openrisc/cpu.h | 2 ++
21
target/ppc/cpu.h | 2 ++
22
target/riscv/cpu.h | 3 +++
23
target/rx/cpu.h | 2 ++
24
target/s390x/s390x-internal.h | 2 ++
25
target/sh4/cpu.h | 2 ++
26
target/sparc/cpu.h | 2 ++
27
target/tricore/cpu.h | 2 ++
28
target/xtensa/cpu.h | 2 ++
29
accel/tcg/cpu-exec.c | 8 +++++---
30
accel/tcg/translate-all.c | 8 +++++---
31
target/alpha/cpu.c | 1 +
32
target/alpha/translate.c | 4 ++--
33
target/arm/cpu.c | 1 +
34
target/arm/tcg/cpu-v7m.c | 1 +
35
target/arm/tcg/translate.c | 5 ++---
36
target/avr/cpu.c | 1 +
37
target/avr/translate.c | 6 +++---
38
target/hexagon/cpu.c | 1 +
39
target/hexagon/translate.c | 4 ++--
40
target/hppa/cpu.c | 1 +
41
target/hppa/translate.c | 4 ++--
42
target/i386/tcg/tcg-cpu.c | 1 +
43
target/i386/tcg/translate.c | 5 ++---
44
target/loongarch/cpu.c | 1 +
45
target/loongarch/tcg/translate.c | 4 ++--
46
target/m68k/cpu.c | 1 +
47
target/m68k/translate.c | 4 ++--
48
target/microblaze/cpu.c | 1 +
49
target/microblaze/translate.c | 4 ++--
50
target/mips/cpu.c | 1 +
51
target/mips/tcg/translate.c | 4 ++--
52
target/openrisc/cpu.c | 1 +
53
target/openrisc/translate.c | 4 ++--
54
target/ppc/cpu_init.c | 1 +
55
target/ppc/translate.c | 4 ++--
56
target/riscv/tcg/tcg-cpu.c | 1 +
57
target/riscv/translate.c | 4 ++--
58
target/rx/cpu.c | 1 +
59
target/rx/translate.c | 4 ++--
60
target/s390x/cpu.c | 1 +
61
target/s390x/tcg/translate.c | 4 ++--
62
target/sh4/cpu.c | 1 +
63
target/sh4/translate.c | 4 ++--
64
target/sparc/cpu.c | 1 +
65
target/sparc/translate.c | 4 ++--
66
target/tricore/cpu.c | 1 +
67
target/tricore/translate.c | 5 ++---
68
target/xtensa/cpu.c | 1 +
69
target/xtensa/translate.c | 4 ++--
70
62 files changed, 121 insertions(+), 62 deletions(-)
8
71
72
diff --git a/include/exec/translator.h b/include/exec/translator.h
73
index XXXXXXX..XXXXXXX 100644
74
--- a/include/exec/translator.h
75
+++ b/include/exec/translator.h
76
@@ -XXX,XX +XXX,XX @@
77
#include "qemu/bswap.h"
78
#include "exec/vaddr.h"
79
80
-/**
81
- * gen_intermediate_code
82
- * @cpu: cpu context
83
- * @tb: translation block
84
- * @max_insns: max number of instructions to translate
85
- * @pc: guest virtual program counter address
86
- * @host_pc: host physical program counter address
87
- *
88
- * This function must be provided by the target, which should create
89
- * the target-specific DisasContext, and then invoke translator_loop.
90
- */
91
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
92
- vaddr pc, void *host_pc);
93
-
94
/**
95
* DisasJumpType:
96
* @DISAS_NEXT: Next instruction in program order.
97
diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
98
index XXXXXXX..XXXXXXX 100644
99
--- a/include/hw/core/tcg-cpu-ops.h
100
+++ b/include/hw/core/tcg-cpu-ops.h
101
@@ -XXX,XX +XXX,XX @@ struct TCGCPUOps {
102
* Called when the first CPU is realized.
103
*/
104
void (*initialize)(void);
105
+ /**
106
+ * @translate_code: Translate guest instructions to TCGOps
107
+ * @cpu: cpu context
108
+ * @tb: translation block
109
+ * @max_insns: max number of instructions to translate
110
+ * @pc: guest virtual program counter address
111
+ * @host_pc: host physical program counter address
112
+ *
113
+ * This function must be provided by the target, which should create
114
+ * the target-specific DisasContext, and then invoke translator_loop.
115
+ */
116
+ void (*translate_code)(CPUState *cpu, TranslationBlock *tb,
117
+ int *max_insns, vaddr pc, void *host_pc);
118
/**
119
* @synchronize_from_tb: Synchronize state from a TCG #TranslationBlock
120
*
121
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
122
index XXXXXXX..XXXXXXX 100644
123
--- a/target/alpha/cpu.h
124
+++ b/target/alpha/cpu.h
125
@@ -XXX,XX +XXX,XX @@ enum {
126
};
127
128
void alpha_translate_init(void);
129
+void alpha_translate_code(CPUState *cs, TranslationBlock *tb,
130
+ int *max_insns, vaddr pc, void *host_pc);
131
132
#define CPU_RESOLVING_TYPE TYPE_ALPHA_CPU
133
134
diff --git a/target/arm/internals.h b/target/arm/internals.h
135
index XXXXXXX..XXXXXXX 100644
136
--- a/target/arm/internals.h
137
+++ b/target/arm/internals.h
138
@@ -XXX,XX +XXX,XX @@ void init_cpreg_list(ARMCPU *cpu);
139
140
void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
141
void arm_translate_init(void);
142
+void arm_translate_code(CPUState *cs, TranslationBlock *tb,
143
+ int *max_insns, vaddr pc, void *host_pc);
144
145
void arm_cpu_register_gdb_commands(ARMCPU *cpu);
146
void aarch64_cpu_register_gdb_commands(ARMCPU *cpu, GString *,
147
diff --git a/target/avr/cpu.h b/target/avr/cpu.h
148
index XXXXXXX..XXXXXXX 100644
149
--- a/target/avr/cpu.h
150
+++ b/target/avr/cpu.h
151
@@ -XXX,XX +XXX,XX @@ static inline void set_avr_feature(CPUAVRState *env, int feature)
152
}
153
154
void avr_cpu_tcg_init(void);
155
+void avr_cpu_translate_code(CPUState *cs, TranslationBlock *tb,
156
+ int *max_insns, vaddr pc, void *host_pc);
157
158
int cpu_avr_exec(CPUState *cpu);
159
160
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
161
index XXXXXXX..XXXXXXX 100644
162
--- a/target/hexagon/cpu.h
163
+++ b/target/hexagon/cpu.h
164
@@ -XXX,XX +XXX,XX @@ static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc,
165
typedef HexagonCPU ArchCPU;
166
167
void hexagon_translate_init(void);
168
+void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
169
+ int *max_insns, vaddr pc, void *host_pc);
170
171
#include "exec/cpu-all.h"
172
173
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
174
index XXXXXXX..XXXXXXX 100644
175
--- a/target/hppa/cpu.h
176
+++ b/target/hppa/cpu.h
177
@@ -XXX,XX +XXX,XX @@ static inline int HPPA_BTLB_ENTRIES(CPUHPPAState *env)
178
}
179
180
void hppa_translate_init(void);
181
+void hppa_translate_code(CPUState *cs, TranslationBlock *tb,
182
+ int *max_insns, vaddr pc, void *host_pc);
183
184
#define CPU_RESOLVING_TYPE TYPE_HPPA_CPU
185
186
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
187
index XXXXXXX..XXXXXXX 100644
188
--- a/target/i386/tcg/helper-tcg.h
189
+++ b/target/i386/tcg/helper-tcg.h
190
@@ -XXX,XX +XXX,XX @@ static inline target_long lshift(target_long x, int n)
191
192
/* translate.c */
193
void tcg_x86_init(void);
194
+void x86_translate_code(CPUState *cs, TranslationBlock *tb,
195
+ int *max_insns, vaddr pc, void *host_pc);
196
197
/* excp_helper.c */
198
G_NORETURN void raise_exception(CPUX86State *env, int exception_index);
199
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
200
index XXXXXXX..XXXXXXX 100644
201
--- a/target/loongarch/internals.h
202
+++ b/target/loongarch/internals.h
203
@@ -XXX,XX +XXX,XX @@
204
#define TARGET_VIRT_MASK MAKE_64BIT_MASK(0, TARGET_VIRT_ADDR_SPACE_BITS)
205
206
void loongarch_translate_init(void);
207
+void loongarch_translate_code(CPUState *cs, TranslationBlock *tb,
208
+ int *max_insns, vaddr pc, void *host_pc);
209
210
void G_NORETURN do_raise_exception(CPULoongArchState *env,
211
uint32_t exception,
212
diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h
213
index XXXXXXX..XXXXXXX 100644
214
--- a/target/m68k/cpu.h
215
+++ b/target/m68k/cpu.h
216
@@ -XXX,XX +XXX,XX @@ int m68k_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
217
int m68k_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
218
219
void m68k_tcg_init(void);
220
+void m68k_translate_code(CPUState *cs, TranslationBlock *tb,
221
+ int *max_insns, vaddr pc, void *host_pc);
222
void m68k_cpu_init_gdb(M68kCPU *cpu);
223
uint32_t cpu_m68k_get_ccr(CPUM68KState *env);
224
void cpu_m68k_set_ccr(CPUM68KState *env, uint32_t);
225
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
226
index XXXXXXX..XXXXXXX 100644
227
--- a/target/microblaze/cpu.h
228
+++ b/target/microblaze/cpu.h
229
@@ -XXX,XX +XXX,XX @@ static inline void mb_cpu_write_msr(CPUMBState *env, uint32_t val)
230
}
231
232
void mb_tcg_init(void);
233
+void mb_translate_code(CPUState *cs, TranslationBlock *tb,
234
+ int *max_insns, vaddr pc, void *host_pc);
235
236
#define CPU_RESOLVING_TYPE TYPE_MICROBLAZE_CPU
237
238
diff --git a/target/mips/tcg/tcg-internal.h b/target/mips/tcg/tcg-internal.h
239
index XXXXXXX..XXXXXXX 100644
240
--- a/target/mips/tcg/tcg-internal.h
241
+++ b/target/mips/tcg/tcg-internal.h
242
@@ -XXX,XX +XXX,XX @@
243
#include "cpu.h"
244
245
void mips_tcg_init(void);
246
+void mips_translate_code(CPUState *cs, TranslationBlock *tb,
247
+ int *max_insns, vaddr pc, void *host_pc);
248
249
void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
250
G_NORETURN void mips_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
251
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
252
index XXXXXXX..XXXXXXX 100644
253
--- a/target/openrisc/cpu.h
254
+++ b/target/openrisc/cpu.h
255
@@ -XXX,XX +XXX,XX @@ void openrisc_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
256
int openrisc_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
257
int openrisc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
258
void openrisc_translate_init(void);
259
+void openrisc_translate_code(CPUState *cs, TranslationBlock *tb,
260
+ int *max_insns, vaddr pc, void *host_pc);
261
int print_insn_or1k(bfd_vma addr, disassemble_info *info);
262
263
#ifndef CONFIG_USER_ONLY
264
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
265
index XXXXXXX..XXXXXXX 100644
266
--- a/target/ppc/cpu.h
267
+++ b/target/ppc/cpu.h
268
@@ -XXX,XX +XXX,XX @@ extern const VMStateDescription vmstate_ppc_cpu;
269
270
/*****************************************************************************/
271
void ppc_translate_init(void);
272
+void ppc_translate_code(CPUState *cs, TranslationBlock *tb,
273
+ int *max_insns, vaddr pc, void *host_pc);
274
275
#if !defined(CONFIG_USER_ONLY)
276
void ppc_store_sdr1(CPUPPCState *env, target_ulong value);
277
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
278
index XXXXXXX..XXXXXXX 100644
279
--- a/target/riscv/cpu.h
280
+++ b/target/riscv/cpu.h
281
@@ -XXX,XX +XXX,XX @@ RISCVException smstateen_acc_ok(CPURISCVState *env, int index, uint64_t bit);
282
void riscv_cpu_set_mode(CPURISCVState *env, target_ulong newpriv, bool virt_en);
283
284
void riscv_translate_init(void);
285
+void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
286
+ int *max_insns, vaddr pc, void *host_pc);
287
+
288
G_NORETURN void riscv_raise_exception(CPURISCVState *env,
289
uint32_t exception, uintptr_t pc);
290
291
diff --git a/target/rx/cpu.h b/target/rx/cpu.h
292
index XXXXXXX..XXXXXXX 100644
293
--- a/target/rx/cpu.h
294
+++ b/target/rx/cpu.h
295
@@ -XXX,XX +XXX,XX @@ int rx_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
296
int rx_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
297
298
void rx_translate_init(void);
299
+void rx_translate_code(CPUState *cs, TranslationBlock *tb,
300
+ int *max_insns, vaddr pc, void *host_pc);
301
void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte);
302
303
#include "exec/cpu-all.h"
304
diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h
305
index XXXXXXX..XXXXXXX 100644
306
--- a/target/s390x/s390x-internal.h
307
+++ b/target/s390x/s390x-internal.h
308
@@ -XXX,XX +XXX,XX @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3,
309
310
/* translate.c */
311
void s390x_translate_init(void);
312
+void s390x_translate_code(CPUState *cs, TranslationBlock *tb,
313
+ int *max_insns, vaddr pc, void *host_pc);
314
void s390x_restore_state_to_opc(CPUState *cs,
315
const TranslationBlock *tb,
316
const uint64_t *data);
317
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
318
index XXXXXXX..XXXXXXX 100644
319
--- a/target/sh4/cpu.h
320
+++ b/target/sh4/cpu.h
321
@@ -XXX,XX +XXX,XX @@ G_NORETURN void superh_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
322
uintptr_t retaddr);
323
324
void sh4_translate_init(void);
325
+void sh4_translate_code(CPUState *cs, TranslationBlock *tb,
326
+ int *max_insns, vaddr pc, void *host_pc);
327
328
#if !defined(CONFIG_USER_ONLY)
329
hwaddr superh_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
330
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
331
index XXXXXXX..XXXXXXX 100644
332
--- a/target/sparc/cpu.h
333
+++ b/target/sparc/cpu.h
334
@@ -XXX,XX +XXX,XX @@ int sparc_cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
335
336
/* translate.c */
337
void sparc_tcg_init(void);
338
+void sparc_translate_code(CPUState *cs, TranslationBlock *tb,
339
+ int *max_insns, vaddr pc, void *host_pc);
340
341
/* fop_helper.c */
342
target_ulong cpu_get_fsr(CPUSPARCState *);
343
diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h
344
index XXXXXXX..XXXXXXX 100644
345
--- a/target/tricore/cpu.h
346
+++ b/target/tricore/cpu.h
347
@@ -XXX,XX +XXX,XX @@ FIELD(TB_FLAGS, PRIV, 0, 2)
348
349
void cpu_state_reset(CPUTriCoreState *s);
350
void tricore_tcg_init(void);
351
+void tricore_translate_code(CPUState *cs, TranslationBlock *tb,
352
+ int *max_insns, vaddr pc, void *host_pc);
353
354
static inline void cpu_get_tb_cpu_state(CPUTriCoreState *env, vaddr *pc,
355
uint64_t *cs_base, uint32_t *flags)
356
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
357
index XXXXXXX..XXXXXXX 100644
358
--- a/target/xtensa/cpu.h
359
+++ b/target/xtensa/cpu.h
360
@@ -XXX,XX +XXX,XX @@ G_NORETURN void xtensa_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
361
362
void xtensa_collect_sr_names(const XtensaConfig *config);
363
void xtensa_translate_init(void);
364
+void xtensa_translate_code(CPUState *cs, TranslationBlock *tb,
365
+ int *max_insns, vaddr pc, void *host_pc);
366
void **xtensa_get_regfile_by_name(const char *name, int entries, int bits);
367
void xtensa_breakpoint_handler(CPUState *cs);
368
void xtensa_register_core(XtensaConfigList *node);
369
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
370
index XXXXXXX..XXXXXXX 100644
371
--- a/accel/tcg/cpu-exec.c
372
+++ b/accel/tcg/cpu-exec.c
373
@@ -XXX,XX +XXX,XX @@ bool tcg_exec_realizefn(CPUState *cpu, Error **errp)
374
375
if (!tcg_target_initialized) {
376
/* Check mandatory TCGCPUOps handlers */
377
+ const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
378
#ifndef CONFIG_USER_ONLY
379
- assert(cpu->cc->tcg_ops->cpu_exec_halt);
380
- assert(cpu->cc->tcg_ops->cpu_exec_interrupt);
381
+ assert(tcg_ops->cpu_exec_halt);
382
+ assert(tcg_ops->cpu_exec_interrupt);
383
#endif /* !CONFIG_USER_ONLY */
384
- cpu->cc->tcg_ops->initialize();
385
+ assert(tcg_ops->translate_code);
386
+ tcg_ops->initialize();
387
tcg_target_initialized = true;
388
}
389
390
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
391
index XXXXXXX..XXXXXXX 100644
392
--- a/accel/tcg/translate-all.c
393
+++ b/accel/tcg/translate-all.c
394
@@ -XXX,XX +XXX,XX @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
395
396
tcg_func_start(tcg_ctx);
397
398
- tcg_ctx->cpu = env_cpu(env);
399
- gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
400
+ CPUState *cs = env_cpu(env);
401
+ tcg_ctx->cpu = cs;
402
+ cs->cc->tcg_ops->translate_code(cs, tb, max_insns, pc, host_pc);
403
+
404
assert(tb->size != 0);
405
tcg_ctx->cpu = NULL;
406
*max_insns = tb->icount;
407
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
408
/*
409
* Overflow of code_gen_buffer, or the current slice of it.
410
*
411
- * TODO: We don't need to re-do gen_intermediate_code, nor
412
+ * TODO: We don't need to re-do tcg_ops->translate_code, nor
413
* should we re-do the tcg optimization currently hidden
414
* inside tcg_gen_code. All that should be required is to
415
* flush the TBs, allocate a new TB, re-initialize it per
416
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
417
index XXXXXXX..XXXXXXX 100644
418
--- a/target/alpha/cpu.c
419
+++ b/target/alpha/cpu.c
420
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps alpha_sysemu_ops = {
421
422
static const TCGCPUOps alpha_tcg_ops = {
423
.initialize = alpha_translate_init,
424
+ .translate_code = alpha_translate_code,
425
.synchronize_from_tb = alpha_cpu_synchronize_from_tb,
426
.restore_state_to_opc = alpha_restore_state_to_opc,
427
428
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
429
index XXXXXXX..XXXXXXX 100644
430
--- a/target/alpha/translate.c
431
+++ b/target/alpha/translate.c
432
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps alpha_tr_ops = {
433
.tb_stop = alpha_tr_tb_stop,
434
};
435
436
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
437
- vaddr pc, void *host_pc)
438
+void alpha_translate_code(CPUState *cpu, TranslationBlock *tb,
439
+ int *max_insns, vaddr pc, void *host_pc)
440
{
441
DisasContext dc;
442
translator_loop(cpu, tb, max_insns, pc, host_pc, &alpha_tr_ops, &dc.base);
443
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
444
index XXXXXXX..XXXXXXX 100644
445
--- a/target/arm/cpu.c
446
+++ b/target/arm/cpu.c
447
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps arm_sysemu_ops = {
448
#ifdef CONFIG_TCG
449
static const TCGCPUOps arm_tcg_ops = {
450
.initialize = arm_translate_init,
451
+ .translate_code = arm_translate_code,
452
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
453
.debug_excp_handler = arm_debug_excp_handler,
454
.restore_state_to_opc = arm_restore_state_to_opc,
455
diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c
456
index XXXXXXX..XXXXXXX 100644
457
--- a/target/arm/tcg/cpu-v7m.c
458
+++ b/target/arm/tcg/cpu-v7m.c
459
@@ -XXX,XX +XXX,XX @@ static void cortex_m55_initfn(Object *obj)
460
461
static const TCGCPUOps arm_v7m_tcg_ops = {
462
.initialize = arm_translate_init,
463
+ .translate_code = arm_translate_code,
464
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
465
.debug_excp_handler = arm_debug_excp_handler,
466
.restore_state_to_opc = arm_restore_state_to_opc,
467
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
468
index XXXXXXX..XXXXXXX 100644
469
--- a/target/arm/tcg/translate.c
470
+++ b/target/arm/tcg/translate.c
471
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps thumb_translator_ops = {
472
.tb_stop = arm_tr_tb_stop,
473
};
474
475
-/* generate intermediate code for basic block 'tb'. */
476
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
477
- vaddr pc, void *host_pc)
478
+void arm_translate_code(CPUState *cpu, TranslationBlock *tb,
479
+ int *max_insns, vaddr pc, void *host_pc)
480
{
481
DisasContext dc = { };
482
const TranslatorOps *ops = &arm_translator_ops;
483
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
484
index XXXXXXX..XXXXXXX 100644
485
--- a/target/avr/cpu.c
486
+++ b/target/avr/cpu.c
487
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps avr_sysemu_ops = {
488
489
static const TCGCPUOps avr_tcg_ops = {
490
.initialize = avr_cpu_tcg_init,
491
+ .translate_code = avr_cpu_translate_code,
492
.synchronize_from_tb = avr_cpu_synchronize_from_tb,
493
.restore_state_to_opc = avr_restore_state_to_opc,
494
.cpu_exec_interrupt = avr_cpu_exec_interrupt,
495
diff --git a/target/avr/translate.c b/target/avr/translate.c
496
index XXXXXXX..XXXXXXX 100644
497
--- a/target/avr/translate.c
498
+++ b/target/avr/translate.c
499
@@ -XXX,XX +XXX,XX @@ static bool trans_WDR(DisasContext *ctx, arg_WDR *a)
500
*
501
* - translate()
502
* - canonicalize_skip()
503
- * - gen_intermediate_code()
504
+ * - translate_code()
505
* - restore_state_to_opc()
506
*
507
*/
508
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps avr_tr_ops = {
509
.tb_stop = avr_tr_tb_stop,
510
};
511
512
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
513
- vaddr pc, void *host_pc)
514
+void avr_cpu_translate_code(CPUState *cs, TranslationBlock *tb,
515
+ int *max_insns, vaddr pc, void *host_pc)
516
{
517
DisasContext dc = { };
518
translator_loop(cs, tb, max_insns, pc, host_pc, &avr_tr_ops, &dc.base);
519
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
520
index XXXXXXX..XXXXXXX 100644
521
--- a/target/hexagon/cpu.c
522
+++ b/target/hexagon/cpu.c
523
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_init(Object *obj)
524
525
static const TCGCPUOps hexagon_tcg_ops = {
526
.initialize = hexagon_translate_init,
527
+ .translate_code = hexagon_translate_code,
528
.synchronize_from_tb = hexagon_cpu_synchronize_from_tb,
529
.restore_state_to_opc = hexagon_restore_state_to_opc,
530
};
531
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
532
index XXXXXXX..XXXXXXX 100644
533
--- a/target/hexagon/translate.c
534
+++ b/target/hexagon/translate.c
535
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hexagon_tr_ops = {
536
.tb_stop = hexagon_tr_tb_stop,
537
};
538
539
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
540
- vaddr pc, void *host_pc)
541
+void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
542
+ int *max_insns, vaddr pc, void *host_pc)
543
{
544
DisasContext ctx;
545
546
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
547
index XXXXXXX..XXXXXXX 100644
548
--- a/target/hppa/cpu.c
549
+++ b/target/hppa/cpu.c
550
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps hppa_sysemu_ops = {
551
552
static const TCGCPUOps hppa_tcg_ops = {
553
.initialize = hppa_translate_init,
554
+ .translate_code = hppa_translate_code,
555
.synchronize_from_tb = hppa_cpu_synchronize_from_tb,
556
.restore_state_to_opc = hppa_restore_state_to_opc,
557
558
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
559
index XXXXXXX..XXXXXXX 100644
560
--- a/target/hppa/translate.c
561
+++ b/target/hppa/translate.c
562
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hppa_tr_ops = {
563
#endif
564
};
565
566
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
567
- vaddr pc, void *host_pc)
568
+void hppa_translate_code(CPUState *cs, TranslationBlock *tb,
569
+ int *max_insns, vaddr pc, void *host_pc)
570
{
571
DisasContext ctx = { };
572
translator_loop(cs, tb, max_insns, pc, host_pc, &hppa_tr_ops, &ctx.base);
573
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
574
index XXXXXXX..XXXXXXX 100644
575
--- a/target/i386/tcg/tcg-cpu.c
576
+++ b/target/i386/tcg/tcg-cpu.c
577
@@ -XXX,XX +XXX,XX @@ static bool x86_debug_check_breakpoint(CPUState *cs)
578
579
static const TCGCPUOps x86_tcg_ops = {
580
.initialize = tcg_x86_init,
581
+ .translate_code = x86_translate_code,
582
.synchronize_from_tb = x86_cpu_synchronize_from_tb,
583
.restore_state_to_opc = x86_restore_state_to_opc,
584
.cpu_exec_enter = x86_cpu_exec_enter,
585
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
586
index XXXXXXX..XXXXXXX 100644
587
--- a/target/i386/tcg/translate.c
588
+++ b/target/i386/tcg/translate.c
589
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps i386_tr_ops = {
590
.tb_stop = i386_tr_tb_stop,
591
};
592
593
-/* generate intermediate code for basic block 'tb'. */
594
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
595
- vaddr pc, void *host_pc)
596
+void x86_translate_code(CPUState *cpu, TranslationBlock *tb,
597
+ int *max_insns, vaddr pc, void *host_pc)
598
{
599
DisasContext dc;
600
601
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
602
index XXXXXXX..XXXXXXX 100644
603
--- a/target/loongarch/cpu.c
604
+++ b/target/loongarch/cpu.c
605
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags)
606
607
static const TCGCPUOps loongarch_tcg_ops = {
608
.initialize = loongarch_translate_init,
609
+ .translate_code = loongarch_translate_code,
610
.synchronize_from_tb = loongarch_cpu_synchronize_from_tb,
611
.restore_state_to_opc = loongarch_restore_state_to_opc,
612
613
diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c
614
index XXXXXXX..XXXXXXX 100644
615
--- a/target/loongarch/tcg/translate.c
616
+++ b/target/loongarch/tcg/translate.c
617
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps loongarch_tr_ops = {
618
.tb_stop = loongarch_tr_tb_stop,
619
};
620
621
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
622
- vaddr pc, void *host_pc)
623
+void loongarch_translate_code(CPUState *cs, TranslationBlock *tb,
624
+ int *max_insns, vaddr pc, void *host_pc)
625
{
626
DisasContext ctx;
627
628
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
629
index XXXXXXX..XXXXXXX 100644
630
--- a/target/m68k/cpu.c
631
+++ b/target/m68k/cpu.c
632
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps m68k_sysemu_ops = {
633
634
static const TCGCPUOps m68k_tcg_ops = {
635
.initialize = m68k_tcg_init,
636
+ .translate_code = m68k_translate_code,
637
.restore_state_to_opc = m68k_restore_state_to_opc,
638
639
#ifndef CONFIG_USER_ONLY
640
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
641
index XXXXXXX..XXXXXXX 100644
642
--- a/target/m68k/translate.c
643
+++ b/target/m68k/translate.c
644
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps m68k_tr_ops = {
645
.tb_stop = m68k_tr_tb_stop,
646
};
647
648
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
649
- vaddr pc, void *host_pc)
650
+void m68k_translate_code(CPUState *cpu, TranslationBlock *tb,
651
+ int *max_insns, vaddr pc, void *host_pc)
652
{
653
DisasContext dc;
654
translator_loop(cpu, tb, max_insns, pc, host_pc, &m68k_tr_ops, &dc.base);
655
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
656
index XXXXXXX..XXXXXXX 100644
657
--- a/target/microblaze/cpu.c
658
+++ b/target/microblaze/cpu.c
659
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps mb_sysemu_ops = {
660
661
static const TCGCPUOps mb_tcg_ops = {
662
.initialize = mb_tcg_init,
663
+ .translate_code = mb_translate_code,
664
.synchronize_from_tb = mb_cpu_synchronize_from_tb,
665
.restore_state_to_opc = mb_restore_state_to_opc,
666
667
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
668
index XXXXXXX..XXXXXXX 100644
669
--- a/target/microblaze/translate.c
670
+++ b/target/microblaze/translate.c
671
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mb_tr_ops = {
672
.tb_stop = mb_tr_tb_stop,
673
};
674
675
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
676
- vaddr pc, void *host_pc)
677
+void mb_translate_code(CPUState *cpu, TranslationBlock *tb,
678
+ int *max_insns, vaddr pc, void *host_pc)
679
{
680
DisasContext dc;
681
translator_loop(cpu, tb, max_insns, pc, host_pc, &mb_tr_ops, &dc.base);
682
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
683
index XXXXXXX..XXXXXXX 100644
684
--- a/target/mips/cpu.c
685
+++ b/target/mips/cpu.c
686
@@ -XXX,XX +XXX,XX @@ static const Property mips_cpu_properties[] = {
687
#include "hw/core/tcg-cpu-ops.h"
688
static const TCGCPUOps mips_tcg_ops = {
689
.initialize = mips_tcg_init,
690
+ .translate_code = mips_translate_code,
691
.synchronize_from_tb = mips_cpu_synchronize_from_tb,
692
.restore_state_to_opc = mips_restore_state_to_opc,
693
694
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
695
index XXXXXXX..XXXXXXX 100644
696
--- a/target/mips/tcg/translate.c
697
+++ b/target/mips/tcg/translate.c
698
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mips_tr_ops = {
699
.tb_stop = mips_tr_tb_stop,
700
};
701
702
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
703
- vaddr pc, void *host_pc)
704
+void mips_translate_code(CPUState *cs, TranslationBlock *tb,
705
+ int *max_insns, vaddr pc, void *host_pc)
706
{
707
DisasContext ctx;
708
709
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
710
index XXXXXXX..XXXXXXX 100644
711
--- a/target/openrisc/cpu.c
712
+++ b/target/openrisc/cpu.c
713
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps openrisc_sysemu_ops = {
714
715
static const TCGCPUOps openrisc_tcg_ops = {
716
.initialize = openrisc_translate_init,
717
+ .translate_code = openrisc_translate_code,
718
.synchronize_from_tb = openrisc_cpu_synchronize_from_tb,
719
.restore_state_to_opc = openrisc_restore_state_to_opc,
720
721
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
722
index XXXXXXX..XXXXXXX 100644
723
--- a/target/openrisc/translate.c
724
+++ b/target/openrisc/translate.c
725
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps openrisc_tr_ops = {
726
.tb_stop = openrisc_tr_tb_stop,
727
};
728
729
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
730
- vaddr pc, void *host_pc)
731
+void openrisc_translate_code(CPUState *cs, TranslationBlock *tb,
732
+ int *max_insns, vaddr pc, void *host_pc)
733
{
734
DisasContext ctx;
735
736
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
737
index XXXXXXX..XXXXXXX 100644
738
--- a/target/ppc/cpu_init.c
739
+++ b/target/ppc/cpu_init.c
740
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps ppc_sysemu_ops = {
741
742
static const TCGCPUOps ppc_tcg_ops = {
743
.initialize = ppc_translate_init,
744
+ .translate_code = ppc_translate_code,
745
.restore_state_to_opc = ppc_restore_state_to_opc,
746
747
#ifdef CONFIG_USER_ONLY
748
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
749
index XXXXXXX..XXXXXXX 100644
750
--- a/target/ppc/translate.c
751
+++ b/target/ppc/translate.c
752
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps ppc_tr_ops = {
753
.tb_stop = ppc_tr_tb_stop,
754
};
755
756
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
757
- vaddr pc, void *host_pc)
758
+void ppc_translate_code(CPUState *cs, TranslationBlock *tb,
759
+ int *max_insns, vaddr pc, void *host_pc)
760
{
761
DisasContext ctx;
762
763
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
764
index XXXXXXX..XXXXXXX 100644
765
--- a/target/riscv/tcg/tcg-cpu.c
766
+++ b/target/riscv/tcg/tcg-cpu.c
767
@@ -XXX,XX +XXX,XX @@ static void riscv_restore_state_to_opc(CPUState *cs,
768
769
static const TCGCPUOps riscv_tcg_ops = {
770
.initialize = riscv_translate_init,
771
+ .translate_code = riscv_translate_code,
772
.synchronize_from_tb = riscv_cpu_synchronize_from_tb,
773
.restore_state_to_opc = riscv_restore_state_to_opc,
774
775
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
776
index XXXXXXX..XXXXXXX 100644
777
--- a/target/riscv/translate.c
778
+++ b/target/riscv/translate.c
779
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps riscv_tr_ops = {
780
.tb_stop = riscv_tr_tb_stop,
781
};
782
783
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
784
- vaddr pc, void *host_pc)
785
+void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
786
+ int *max_insns, vaddr pc, void *host_pc)
787
{
788
DisasContext ctx;
789
790
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
791
index XXXXXXX..XXXXXXX 100644
792
--- a/target/rx/cpu.c
793
+++ b/target/rx/cpu.c
794
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps rx_sysemu_ops = {
795
796
static const TCGCPUOps rx_tcg_ops = {
797
.initialize = rx_translate_init,
798
+ .translate_code = rx_translate_code,
799
.synchronize_from_tb = rx_cpu_synchronize_from_tb,
800
.restore_state_to_opc = rx_restore_state_to_opc,
801
.tlb_fill = rx_cpu_tlb_fill,
802
diff --git a/target/rx/translate.c b/target/rx/translate.c
803
index XXXXXXX..XXXXXXX 100644
804
--- a/target/rx/translate.c
805
+++ b/target/rx/translate.c
806
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps rx_tr_ops = {
807
.tb_stop = rx_tr_tb_stop,
808
};
809
810
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
811
- vaddr pc, void *host_pc)
812
+void rx_translate_code(CPUState *cs, TranslationBlock *tb,
813
+ int *max_insns, vaddr pc, void *host_pc)
814
{
815
DisasContext dc;
816
817
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
818
index XXXXXXX..XXXXXXX 100644
819
--- a/target/s390x/cpu.c
820
+++ b/target/s390x/cpu.c
821
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc,
822
823
static const TCGCPUOps s390_tcg_ops = {
824
.initialize = s390x_translate_init,
825
+ .translate_code = s390x_translate_code,
826
.restore_state_to_opc = s390x_restore_state_to_opc,
827
828
#ifdef CONFIG_USER_ONLY
829
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
830
index XXXXXXX..XXXXXXX 100644
831
--- a/target/s390x/tcg/translate.c
832
+++ b/target/s390x/tcg/translate.c
833
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps s390x_tr_ops = {
834
.disas_log = s390x_tr_disas_log,
835
};
836
837
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
838
- vaddr pc, void *host_pc)
839
+void s390x_translate_code(CPUState *cs, TranslationBlock *tb,
840
+ int *max_insns, vaddr pc, void *host_pc)
841
{
842
DisasContext dc;
843
844
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
845
index XXXXXXX..XXXXXXX 100644
846
--- a/target/sh4/cpu.c
847
+++ b/target/sh4/cpu.c
848
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps sh4_sysemu_ops = {
849
850
static const TCGCPUOps superh_tcg_ops = {
851
.initialize = sh4_translate_init,
852
+ .translate_code = sh4_translate_code,
853
.synchronize_from_tb = superh_cpu_synchronize_from_tb,
854
.restore_state_to_opc = superh_restore_state_to_opc,
855
9
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
856
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
10
index XXXXXXX..XXXXXXX 100644
857
index XXXXXXX..XXXXXXX 100644
11
--- a/target/sh4/translate.c
858
--- a/target/sh4/translate.c
12
+++ b/target/sh4/translate.c
859
+++ b/target/sh4/translate.c
13
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
860
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sh4_tr_ops = {
14
case 0x9000:        /* mov.w @(disp,PC),Rn */
861
.tb_stop = sh4_tr_tb_stop,
15
    {
862
};
16
TCGv addr = tcg_constant_i32(ctx->base.pc_next + 4 + B7_0 * 2);
863
17
- tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESW);
864
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
18
+ tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx,
865
- vaddr pc, void *host_pc)
19
+ MO_TESW | MO_ALIGN);
866
+void sh4_translate_code(CPUState *cs, TranslationBlock *tb,
20
    }
867
+ int *max_insns, vaddr pc, void *host_pc)
21
    return;
868
{
22
case 0xd000:        /* mov.l @(disp,PC),Rn */
869
DisasContext ctx;
23
    {
870
24
TCGv addr = tcg_constant_i32((ctx->base.pc_next + 4 + B7_0 * 4) & ~3);
871
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
25
- tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESL);
872
index XXXXXXX..XXXXXXX 100644
26
+ tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx,
873
--- a/target/sparc/cpu.c
27
+ MO_TESL | MO_ALIGN);
874
+++ b/target/sparc/cpu.c
28
    }
875
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps sparc_sysemu_ops = {
29
    return;
876
30
case 0x7000:        /* add #imm,Rn */
877
static const TCGCPUOps sparc_tcg_ops = {
31
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
878
.initialize = sparc_tcg_init,
32
    {
879
+ .translate_code = sparc_translate_code,
33
     TCGv arg0, arg1;
880
.synchronize_from_tb = sparc_cpu_synchronize_from_tb,
34
     arg0 = tcg_temp_new();
881
.restore_state_to_opc = sparc_restore_state_to_opc,
35
- tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL);
882
36
+ tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx,
883
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
37
+ MO_TESL | MO_ALIGN);
884
index XXXXXXX..XXXXXXX 100644
38
     arg1 = tcg_temp_new();
885
--- a/target/sparc/translate.c
39
- tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL);
886
+++ b/target/sparc/translate.c
40
+ tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx,
887
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sparc_tr_ops = {
41
+ MO_TESL | MO_ALIGN);
888
.tb_stop = sparc_tr_tb_stop,
42
gen_helper_macl(cpu_env, arg0, arg1);
889
};
43
     tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
890
44
     tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
891
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
45
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
892
- vaddr pc, void *host_pc)
46
    {
893
+void sparc_translate_code(CPUState *cs, TranslationBlock *tb,
47
     TCGv arg0, arg1;
894
+ int *max_insns, vaddr pc, void *host_pc)
48
     arg0 = tcg_temp_new();
895
{
49
- tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL);
896
DisasContext dc = {};
50
+ tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx,
897
51
+ MO_TESL | MO_ALIGN);
898
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
52
     arg1 = tcg_temp_new();
899
index XXXXXXX..XXXXXXX 100644
53
- tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL);
900
--- a/target/tricore/cpu.c
54
+ tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx,
901
+++ b/target/tricore/cpu.c
55
+ MO_TESL | MO_ALIGN);
902
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps tricore_sysemu_ops = {
56
gen_helper_macw(cpu_env, arg0, arg1);
903
57
     tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 2);
904
static const TCGCPUOps tricore_tcg_ops = {
58
     tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 2);
905
.initialize = tricore_tcg_init,
59
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
906
+ .translate_code = tricore_translate_code,
60
if (ctx->tbflags & FPSCR_SZ) {
907
.synchronize_from_tb = tricore_cpu_synchronize_from_tb,
61
TCGv_i64 fp = tcg_temp_new_i64();
908
.restore_state_to_opc = tricore_restore_state_to_opc,
62
gen_load_fpr64(ctx, fp, XHACK(B7_4));
909
.tlb_fill = tricore_cpu_tlb_fill,
63
- tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx, MO_TEUQ);
910
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
64
+ tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx,
911
index XXXXXXX..XXXXXXX 100644
65
+ MO_TEUQ | MO_ALIGN);
912
--- a/target/tricore/translate.c
66
    } else {
913
+++ b/target/tricore/translate.c
67
- tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx, MO_TEUL);
914
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps tricore_tr_ops = {
68
+ tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx,
915
.tb_stop = tricore_tr_tb_stop,
69
+ MO_TEUL | MO_ALIGN);
916
};
70
    }
917
71
    return;
918
-
72
case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */
919
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
73
    CHECK_FPU_ENABLED
920
- vaddr pc, void *host_pc)
74
if (ctx->tbflags & FPSCR_SZ) {
921
+void tricore_translate_code(CPUState *cs, TranslationBlock *tb,
75
TCGv_i64 fp = tcg_temp_new_i64();
922
+ int *max_insns, vaddr pc, void *host_pc)
76
- tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEUQ);
923
{
77
+ tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx,
924
DisasContext ctx;
78
+ MO_TEUQ | MO_ALIGN);
925
translator_loop(cs, tb, max_insns, pc, host_pc,
79
gen_store_fpr64(ctx, fp, XHACK(B11_8));
926
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
80
    } else {
927
index XXXXXXX..XXXXXXX 100644
81
- tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
928
--- a/target/xtensa/cpu.c
82
+ tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx,
929
+++ b/target/xtensa/cpu.c
83
+ MO_TEUL | MO_ALIGN);
930
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps xtensa_sysemu_ops = {
84
    }
931
85
    return;
932
static const TCGCPUOps xtensa_tcg_ops = {
86
case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */
933
.initialize = xtensa_translate_init,
87
    CHECK_FPU_ENABLED
934
+ .translate_code = xtensa_translate_code,
88
if (ctx->tbflags & FPSCR_SZ) {
935
.debug_excp_handler = xtensa_breakpoint_handler,
89
TCGv_i64 fp = tcg_temp_new_i64();
936
.restore_state_to_opc = xtensa_restore_state_to_opc,
90
- tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEUQ);
937
91
+ tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx,
938
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
92
+ MO_TEUQ | MO_ALIGN);
939
index XXXXXXX..XXXXXXX 100644
93
gen_store_fpr64(ctx, fp, XHACK(B11_8));
940
--- a/target/xtensa/translate.c
94
tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 8);
941
+++ b/target/xtensa/translate.c
95
    } else {
942
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps xtensa_translator_ops = {
96
- tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
943
.tb_stop = xtensa_tr_tb_stop,
97
+ tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx,
944
};
98
+ MO_TEUL | MO_ALIGN);
945
99
     tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
946
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
100
    }
947
- vaddr pc, void *host_pc)
101
    return;
948
+void xtensa_translate_code(CPUState *cpu, TranslationBlock *tb,
102
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
949
+ int *max_insns, vaddr pc, void *host_pc)
103
TCGv_i64 fp = tcg_temp_new_i64();
950
{
104
gen_load_fpr64(ctx, fp, XHACK(B7_4));
951
DisasContext dc = {};
105
tcg_gen_subi_i32(addr, REG(B11_8), 8);
952
translator_loop(cpu, tb, max_insns, pc, host_pc,
106
- tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEUQ);
107
+ tcg_gen_qemu_st_i64(fp, addr, ctx->memidx,
108
+ MO_TEUQ | MO_ALIGN);
109
} else {
110
tcg_gen_subi_i32(addr, REG(B11_8), 4);
111
- tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
112
+ tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx,
113
+ MO_TEUL | MO_ALIGN);
114
}
115
tcg_gen_mov_i32(REG(B11_8), addr);
116
}
117
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
118
     tcg_gen_add_i32(addr, REG(B7_4), REG(0));
119
if (ctx->tbflags & FPSCR_SZ) {
120
TCGv_i64 fp = tcg_temp_new_i64();
121
- tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx, MO_TEUQ);
122
+ tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx,
123
+ MO_TEUQ | MO_ALIGN);
124
gen_store_fpr64(ctx, fp, XHACK(B11_8));
125
     } else {
126
- tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx, MO_TEUL);
127
+ tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx,
128
+ MO_TEUL | MO_ALIGN);
129
     }
130
    }
131
    return;
132
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
133
if (ctx->tbflags & FPSCR_SZ) {
134
TCGv_i64 fp = tcg_temp_new_i64();
135
gen_load_fpr64(ctx, fp, XHACK(B7_4));
136
- tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEUQ);
137
+ tcg_gen_qemu_st_i64(fp, addr, ctx->memidx,
138
+ MO_TEUQ | MO_ALIGN);
139
     } else {
140
- tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
141
+ tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx,
142
+ MO_TEUL | MO_ALIGN);
143
     }
144
    }
145
    return;
146
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
147
    {
148
     TCGv addr = tcg_temp_new();
149
     tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2);
150
- tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW);
151
+ tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW | MO_ALIGN);
152
    }
153
    return;
154
case 0xc600:        /* mov.l @(disp,GBR),R0 */
155
    {
156
     TCGv addr = tcg_temp_new();
157
     tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4);
158
- tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL);
159
+ tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL | MO_ALIGN);
160
    }
161
    return;
162
case 0xc000:        /* mov.b R0,@(disp,GBR) */
163
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
164
    {
165
     TCGv addr = tcg_temp_new();
166
     tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2);
167
- tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW);
168
+ tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW | MO_ALIGN);
169
    }
170
    return;
171
case 0xc200:        /* mov.l R0,@(disp,GBR) */
172
    {
173
     TCGv addr = tcg_temp_new();
174
     tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4);
175
- tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL);
176
+ tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL | MO_ALIGN);
177
    }
178
    return;
179
case 0x8000:        /* mov.b R0,@(disp,Rn) */
180
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
181
    return;
182
case 0x4087:        /* ldc.l @Rm+,Rn_BANK */
183
    CHECK_PRIVILEGED
184
- tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx, MO_TESL);
185
+ tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx,
186
+ MO_TESL | MO_ALIGN);
187
    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
188
    return;
189
case 0x0082:        /* stc Rm_BANK,Rn */
190
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
191
    {
192
     TCGv addr = tcg_temp_new();
193
     tcg_gen_subi_i32(addr, REG(B11_8), 4);
194
- tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx, MO_TEUL);
195
+ tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx,
196
+ MO_TEUL | MO_ALIGN);
197
     tcg_gen_mov_i32(REG(B11_8), addr);
198
    }
199
    return;
200
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
201
    CHECK_PRIVILEGED
202
    {
203
     TCGv val = tcg_temp_new();
204
- tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TESL);
205
+ tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx,
206
+ MO_TESL | MO_ALIGN);
207
tcg_gen_andi_i32(val, val, 0x700083f3);
208
gen_write_sr(val);
209
     tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
210
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
211
TCGv val = tcg_temp_new();
212
     tcg_gen_subi_i32(addr, REG(B11_8), 4);
213
gen_read_sr(val);
214
- tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
215
+ tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL | MO_ALIGN);
216
     tcg_gen_mov_i32(REG(B11_8), addr);
217
    }
218
    return;
219
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
220
return;                            \
221
case ldpnum:                            \
222
prechk                             \
223
- tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, MO_TESL); \
224
+ tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, \
225
+ MO_TESL | MO_ALIGN); \
226
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);        \
227
return;
228
#define ST(reg,stnum,stpnum,prechk)        \
229
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
230
{                                \
231
    TCGv addr = tcg_temp_new();                \
232
    tcg_gen_subi_i32(addr, REG(B11_8), 4);            \
233
- tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, MO_TEUL); \
234
+ tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, \
235
+ MO_TEUL | MO_ALIGN); \
236
    tcg_gen_mov_i32(REG(B11_8), addr);            \
237
}                                \
238
return;
239
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
240
    CHECK_FPU_ENABLED
241
    {
242
     TCGv addr = tcg_temp_new();
243
- tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx, MO_TESL);
244
+ tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx,
245
+ MO_TESL | MO_ALIGN);
246
     tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
247
gen_helper_ld_fpscr(cpu_env, addr);
248
ctx->base.is_jmp = DISAS_STOP;
249
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
250
     tcg_gen_andi_i32(val, cpu_fpscr, 0x003fffff);
251
     addr = tcg_temp_new();
252
     tcg_gen_subi_i32(addr, REG(B11_8), 4);
253
- tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
254
+ tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL | MO_ALIGN);
255
     tcg_gen_mov_i32(REG(B11_8), addr);
256
    }
257
    return;
258
case 0x00c3:        /* movca.l R0,@Rm */
259
{
260
TCGv val = tcg_temp_new();
261
- tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TEUL);
262
+ tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx,
263
+ MO_TEUL | MO_ALIGN);
264
gen_helper_movcal(cpu_env, REG(B11_8), val);
265
- tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
266
+ tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx,
267
+ MO_TEUL | MO_ALIGN);
268
}
269
ctx->has_movcal = 1;
270
    return;
271
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
272
cpu_lock_addr, fail);
273
tmp = tcg_temp_new();
274
tcg_gen_atomic_cmpxchg_i32(tmp, REG(B11_8), cpu_lock_value,
275
- REG(0), ctx->memidx, MO_TEUL);
276
+ REG(0), ctx->memidx,
277
+ MO_TEUL | MO_ALIGN);
278
tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, tmp, cpu_lock_value);
279
} else {
280
tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_lock_addr, -1, fail);
281
- tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
282
+ tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx,
283
+ MO_TEUL | MO_ALIGN);
284
tcg_gen_movi_i32(cpu_sr_t, 1);
285
}
286
tcg_gen_br(done);
287
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
288
if ((tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
289
TCGv tmp = tcg_temp_new();
290
tcg_gen_mov_i32(tmp, REG(B11_8));
291
- tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
292
+ tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
293
+ MO_TESL | MO_ALIGN);
294
tcg_gen_mov_i32(cpu_lock_value, REG(0));
295
tcg_gen_mov_i32(cpu_lock_addr, tmp);
296
} else {
297
- tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
298
+ tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
299
+ MO_TESL | MO_ALIGN);
300
tcg_gen_movi_i32(cpu_lock_addr, 0);
301
}
302
return;
303
--
953
--
304
2.34.1
954
2.43.0
305
955
306
956
diff view generated by jsdifflib