1
The following changes since commit e3acc2c1961cbe22ca474cd5da4163b7bbf7cea3:
1
The following changes since commit aa3a285b5bc56a4208b3b57d4a55291e9c260107:
2
2
3
tests/docker/dockerfiles: Bump fedora-i386-cross to fedora 34 (2021-10-05 16:40:39 -0700)
3
Merge tag 'mem-2024-12-21' of https://github.com/davidhildenbrand/qemu into staging (2024-12-22 14:33:27 -0500)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20211006
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20241224
8
8
9
for you to fetch changes up to ea3f2af8f1b87d7bced9b75ef2e788b66ec49961:
9
for you to fetch changes up to e4a8e093dc74be049f4829831dce76e5edab0003:
10
10
11
tcg/s390x: Implement TCG_TARGET_HAS_cmpsel_vec (2021-10-05 16:53:17 -0700)
11
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core (2024-12-24 08:32:15 -0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
More fixes for fedora-i386-cross
14
tcg/optimize: Remove in-flight mask data from OptContext
15
Add dup_const_tl
15
fpu: Add float*_muladd_scalbn
16
Expand MemOp MO_SIZE
16
fpu: Remove float_muladd_halve_result
17
Move MemOpIdx out of tcg.h
17
fpu: Add float_round_nearest_even_max
18
Vector support for tcg/s390x
18
fpu: Add float_muladd_suppress_add_product_zero
19
target/hexagon: Use float32_muladd
20
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core
19
21
20
----------------------------------------------------------------
22
----------------------------------------------------------------
21
Philipp Tomsich (1):
23
Ilya Leoshkevich (1):
22
tcg: add dup_const_tl wrapper
24
tests/tcg: Do not use inttypes.h in multiarch/system/memory.c
23
25
24
Richard Henderson (27):
26
Pierrick Bouvier (1):
25
tests/docker: Remove fedora-i386-cross from DOCKER_PARTIAL_IMAGES
27
plugins: optimize cpu_index code generation
26
tests/docker: Fix fedora-i386-cross cross-compilation
27
accel/tcg: Drop signness in tracing in cputlb.c
28
tcg: Expand MO_SIZE to 3 bits
29
tcg: Rename TCGMemOpIdx to MemOpIdx
30
tcg: Split out MemOpIdx to exec/memopidx.h
31
trace/mem: Pass MemOpIdx to trace_mem_get_info
32
accel/tcg: Pass MemOpIdx to atomic_trace_*_post
33
plugins: Reorg arguments to qemu_plugin_vcpu_mem_cb
34
trace: Split guest_mem_before
35
hw/core/cpu: Re-sort the non-pointers to the end of CPUClass
36
tcg: Expand usadd/ussub with umin/umax
37
tcg/s390x: Rename from tcg/s390
38
tcg/s390x: Change FACILITY representation
39
tcg/s390x: Merge TCG_AREG0 and TCG_REG_CALL_STACK into TCGReg
40
tcg/s390x: Add host vector framework
41
tcg/s390x: Implement tcg_out_ld/st for vector types
42
tcg/s390x: Implement tcg_out_mov for vector types
43
tcg/s390x: Implement tcg_out_dup*_vec
44
tcg/s390x: Implement minimal vector operations
45
tcg/s390x: Implement andc, orc, abs, neg, not vector operations
46
tcg/s390x: Implement TCG_TARGET_HAS_mul_vec
47
tcg/s390x: Implement vector shift operations
48
tcg/s390x: Implement TCG_TARGET_HAS_minmax_vec
49
tcg/s390x: Implement TCG_TARGET_HAS_sat_vec
50
tcg/s390x: Implement TCG_TARGET_HAS_bitsel_vec
51
tcg/s390x: Implement TCG_TARGET_HAS_cmpsel_vec
52
28
53
meson.build | 2 -
29
Richard Henderson (70):
54
accel/tcg/atomic_template.h | 73 +-
30
tcg/optimize: Split out finish_bb, finish_ebb
55
include/exec/memop.h | 14 +-
31
tcg/optimize: Split out fold_affected_mask
56
include/exec/memopidx.h | 55 ++
32
tcg/optimize: Copy mask writeback to fold_masks
57
include/hw/core/cpu.h | 11 +-
33
tcg/optimize: Split out fold_masks_zs
58
include/qemu/plugin.h | 26 +-
34
tcg/optimize: Augment s_mask from z_mask in fold_masks_zs
59
include/tcg/tcg.h | 117 ++-
35
tcg/optimize: Change representation of s_mask
60
tcg/{s390 => s390x}/tcg-target-con-set.h | 7 +
36
tcg/optimize: Use finish_folding in fold_add, fold_add_vec, fold_addsub2
61
tcg/{s390 => s390x}/tcg-target-con-str.h | 1 +
37
tcg/optimize: Introduce const value accessors for TempOptInfo
62
tcg/{s390 => s390x}/tcg-target.h | 91 ++-
38
tcg/optimize: Use fold_masks_zs in fold_and
63
tcg/s390x/tcg-target.opc.h | 15 +
39
tcg/optimize: Use fold_masks_zs in fold_andc
64
trace/mem.h | 63 --
40
tcg/optimize: Use fold_masks_zs in fold_bswap
65
accel/tcg/cputlb.c | 103 ++-
41
tcg/optimize: Use fold_masks_zs in fold_count_zeros
66
accel/tcg/plugin-gen.c | 5 +-
42
tcg/optimize: Use fold_masks_z in fold_ctpop
67
accel/tcg/user-exec.c | 133 ++-
43
tcg/optimize: Use fold_and and fold_masks_z in fold_deposit
68
plugins/api.c | 19 +-
44
tcg/optimize: Compute sign mask in fold_deposit
69
plugins/core.c | 10 +-
45
tcg/optimize: Use finish_folding in fold_divide
70
target/arm/helper-a64.c | 16 +-
46
tcg/optimize: Use finish_folding in fold_dup, fold_dup2
71
target/arm/m_helper.c | 2 +-
47
tcg/optimize: Use fold_masks_s in fold_eqv
72
target/arm/translate-a64.c | 2 +-
48
tcg/optimize: Use fold_masks_z in fold_extract
73
target/i386/tcg/mem_helper.c | 4 +-
49
tcg/optimize: Use finish_folding in fold_extract2
74
target/m68k/op_helper.c | 2 +-
50
tcg/optimize: Use fold_masks_zs in fold_exts
75
target/mips/tcg/msa_helper.c | 6 +-
51
tcg/optimize: Use fold_masks_z in fold_extu
76
target/s390x/tcg/mem_helper.c | 20 +-
52
tcg/optimize: Use fold_masks_zs in fold_movcond
77
target/sparc/ldst_helper.c | 2 +-
53
tcg/optimize: Use finish_folding in fold_mul*
78
tcg/optimize.c | 2 +-
54
tcg/optimize: Use fold_masks_s in fold_nand
79
tcg/tcg-op-vec.c | 37 +-
55
tcg/optimize: Use fold_masks_z in fold_neg_no_const
80
tcg/tcg-op.c | 60 +-
56
tcg/optimize: Use fold_masks_s in fold_nor
81
tcg/tcg.c | 2 +-
57
tcg/optimize: Use fold_masks_s in fold_not
82
tcg/tci.c | 14 +-
58
tcg/optimize: Use fold_masks_zs in fold_or
83
accel/tcg/atomic_common.c.inc | 43 +-
59
tcg/optimize: Use fold_masks_zs in fold_orc
84
target/s390x/tcg/translate_vx.c.inc | 2 +-
60
tcg/optimize: Use fold_masks_zs in fold_qemu_ld
85
tcg/aarch64/tcg-target.c.inc | 18 +-
61
tcg/optimize: Return true from fold_qemu_st, fold_tcg_st
86
tcg/arm/tcg-target.c.inc | 14 +-
62
tcg/optimize: Use finish_folding in fold_remainder
87
tcg/i386/tcg-target.c.inc | 14 +-
63
tcg/optimize: Distinguish simplification in fold_setcond_zmask
88
tcg/mips/tcg-target.c.inc | 16 +-
64
tcg/optimize: Use fold_masks_z in fold_setcond
89
tcg/ppc/tcg-target.c.inc | 18 +-
65
tcg/optimize: Use fold_masks_s in fold_negsetcond
90
tcg/riscv/tcg-target.c.inc | 20 +-
66
tcg/optimize: Use fold_masks_z in fold_setcond2
91
tcg/{s390 => s390x}/tcg-target.c.inc | 949 ++++++++++++++++++++--
67
tcg/optimize: Use finish_folding in fold_cmp_vec
92
tcg/sparc/tcg-target.c.inc | 20 +-
68
tcg/optimize: Use finish_folding in fold_cmpsel_vec
93
tcg/tcg-ldst.c.inc | 2 +-
69
tcg/optimize: Use fold_masks_zs in fold_sextract
94
tests/docker/Makefile.include | 2 +-
70
tcg/optimize: Use fold_masks_zs, fold_masks_s in fold_shift
95
tests/docker/dockerfiles/fedora-i386-cross.docker | 5 +-
71
tcg/optimize: Simplify sign bit test in fold_shift
96
trace-events | 18 +-
72
tcg/optimize: Use finish_folding in fold_sub, fold_sub_vec
97
44 files changed, 1445 insertions(+), 610 deletions(-)
73
tcg/optimize: Use fold_masks_zs in fold_tcg_ld
98
create mode 100644 include/exec/memopidx.h
74
tcg/optimize: Use finish_folding in fold_tcg_ld_memcopy
99
rename tcg/{s390 => s390x}/tcg-target-con-set.h (86%)
75
tcg/optimize: Use fold_masks_zs in fold_xor
100
rename tcg/{s390 => s390x}/tcg-target-con-str.h (96%)
76
tcg/optimize: Use finish_folding in fold_bitsel_vec
101
rename tcg/{s390 => s390x}/tcg-target.h (66%)
77
tcg/optimize: Use finish_folding as default in tcg_optimize
102
create mode 100644 tcg/s390x/tcg-target.opc.h
78
tcg/optimize: Remove z_mask, s_mask from OptContext
103
delete mode 100644 trace/mem.h
79
tcg/optimize: Re-enable sign-mask optimizations
104
rename tcg/{s390 => s390x}/tcg-target.c.inc (73%)
80
tcg/optimize: Move fold_bitsel_vec into alphabetic sort
81
tcg/optimize: Move fold_cmp_vec, fold_cmpsel_vec into alphabetic sort
82
softfloat: Add float{16,32,64}_muladd_scalbn
83
target/arm: Use float*_muladd_scalbn
84
target/sparc: Use float*_muladd_scalbn
85
softfloat: Remove float_muladd_halve_result
86
softfloat: Add float_round_nearest_even_max
87
softfloat: Add float_muladd_suppress_add_product_zero
88
target/hexagon: Use float32_mul in helper_sfmpy
89
target/hexagon: Use float32_muladd for helper_sffma
90
target/hexagon: Use float32_muladd for helper_sffms
91
target/hexagon: Use float32_muladd_scalbn for helper_sffma_sc
92
target/hexagon: Use float32_muladd for helper_sffm[as]_lib
93
target/hexagon: Remove internal_fmafx
94
target/hexagon: Expand GEN_XF_ROUND
95
target/hexagon: Remove Float
96
target/hexagon: Remove Double
97
target/hexagon: Use mulu64 for int128_mul_6464
98
target/hexagon: Simplify internal_mpyhh setup
99
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core
105
100
101
include/exec/translator.h | 14 -
102
include/fpu/softfloat-types.h | 2 +
103
include/fpu/softfloat.h | 14 +-
104
include/hw/core/tcg-cpu-ops.h | 13 +
105
target/alpha/cpu.h | 2 +
106
target/arm/internals.h | 2 +
107
target/avr/cpu.h | 2 +
108
target/hexagon/cpu.h | 2 +
109
target/hexagon/fma_emu.h | 3 -
110
target/hppa/cpu.h | 2 +
111
target/i386/tcg/helper-tcg.h | 2 +
112
target/loongarch/internals.h | 2 +
113
target/m68k/cpu.h | 2 +
114
target/microblaze/cpu.h | 2 +
115
target/mips/tcg/tcg-internal.h | 2 +
116
target/openrisc/cpu.h | 2 +
117
target/ppc/cpu.h | 2 +
118
target/riscv/cpu.h | 3 +
119
target/rx/cpu.h | 2 +
120
target/s390x/s390x-internal.h | 2 +
121
target/sh4/cpu.h | 2 +
122
target/sparc/cpu.h | 2 +
123
target/sparc/helper.h | 4 +-
124
target/tricore/cpu.h | 2 +
125
target/xtensa/cpu.h | 2 +
126
accel/tcg/cpu-exec.c | 8 +-
127
accel/tcg/plugin-gen.c | 9 +
128
accel/tcg/translate-all.c | 8 +-
129
fpu/softfloat.c | 63 +--
130
target/alpha/cpu.c | 1 +
131
target/alpha/translate.c | 4 +-
132
target/arm/cpu.c | 1 +
133
target/arm/tcg/cpu-v7m.c | 1 +
134
target/arm/tcg/helper-a64.c | 6 +-
135
target/arm/tcg/translate.c | 5 +-
136
target/avr/cpu.c | 1 +
137
target/avr/translate.c | 6 +-
138
target/hexagon/cpu.c | 1 +
139
target/hexagon/fma_emu.c | 496 ++++++---------------
140
target/hexagon/op_helper.c | 125 ++----
141
target/hexagon/translate.c | 4 +-
142
target/hppa/cpu.c | 1 +
143
target/hppa/translate.c | 4 +-
144
target/i386/tcg/tcg-cpu.c | 1 +
145
target/i386/tcg/translate.c | 5 +-
146
target/loongarch/cpu.c | 1 +
147
target/loongarch/tcg/translate.c | 4 +-
148
target/m68k/cpu.c | 1 +
149
target/m68k/translate.c | 4 +-
150
target/microblaze/cpu.c | 1 +
151
target/microblaze/translate.c | 4 +-
152
target/mips/cpu.c | 1 +
153
target/mips/tcg/translate.c | 4 +-
154
target/openrisc/cpu.c | 1 +
155
target/openrisc/translate.c | 4 +-
156
target/ppc/cpu_init.c | 1 +
157
target/ppc/translate.c | 4 +-
158
target/riscv/tcg/tcg-cpu.c | 1 +
159
target/riscv/translate.c | 4 +-
160
target/rx/cpu.c | 1 +
161
target/rx/translate.c | 4 +-
162
target/s390x/cpu.c | 1 +
163
target/s390x/tcg/translate.c | 4 +-
164
target/sh4/cpu.c | 1 +
165
target/sh4/translate.c | 4 +-
166
target/sparc/cpu.c | 1 +
167
target/sparc/fop_helper.c | 8 +-
168
target/sparc/translate.c | 84 ++--
169
target/tricore/cpu.c | 1 +
170
target/tricore/translate.c | 5 +-
171
target/xtensa/cpu.c | 1 +
172
target/xtensa/translate.c | 4 +-
173
tcg/optimize.c | 857 +++++++++++++++++++-----------------
174
tests/tcg/multiarch/system/memory.c | 9 +-
175
fpu/softfloat-parts.c.inc | 16 +-
176
75 files changed, 866 insertions(+), 1009 deletions(-)
diff view generated by jsdifflib
New patch
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
2
3
make check-tcg fails on Fedora with the following error message:
4
5
alpha-linux-gnu-gcc [...] qemu/tests/tcg/multiarch/system/memory.c -o memory [...]
6
qemu/tests/tcg/multiarch/system/memory.c:17:10: fatal error: inttypes.h: No such file or directory
7
17 | #include <inttypes.h>
8
| ^~~~~~~~~~~~
9
compilation terminated.
10
11
The reason is that Fedora has cross-compilers, but no cross-glibc
12
headers. Fix by hardcoding the format specifiers and dropping the
13
include.
14
15
An alternative fix would be to introduce a configure check for
16
inttypes.h. But this would make it impossible to use Fedora
17
cross-compilers for softmmu tests, which used to work so far.
18
19
Fixes: ecbcc9ead2f8 ("tests/tcg: add a system test to check memory instrumentation")
20
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
21
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
22
Message-ID: <20241010085906.226249-1-iii@linux.ibm.com>
23
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
24
---
25
tests/tcg/multiarch/system/memory.c | 9 ++++-----
26
1 file changed, 4 insertions(+), 5 deletions(-)
27
28
diff --git a/tests/tcg/multiarch/system/memory.c b/tests/tcg/multiarch/system/memory.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tests/tcg/multiarch/system/memory.c
31
+++ b/tests/tcg/multiarch/system/memory.c
32
@@ -XXX,XX +XXX,XX @@
33
34
#include <stdint.h>
35
#include <stdbool.h>
36
-#include <inttypes.h>
37
#include <minilib.h>
38
39
#ifndef CHECK_UNALIGNED
40
@@ -XXX,XX +XXX,XX @@ int main(void)
41
int i;
42
bool ok = true;
43
44
- ml_printf("Test data start: 0x%"PRIxPTR"\n", &test_data[0]);
45
- ml_printf("Test data end: 0x%"PRIxPTR"\n", &test_data[TEST_SIZE]);
46
+ ml_printf("Test data start: 0x%lx\n", (unsigned long)&test_data[0]);
47
+ ml_printf("Test data end: 0x%lx\n", (unsigned long)&test_data[TEST_SIZE]);
48
49
/* Run through the unsigned tests first */
50
for (i = 0; i < ARRAY_SIZE(init_ufns) && ok; i++) {
51
@@ -XXX,XX +XXX,XX @@ int main(void)
52
ok = do_signed_reads(true);
53
}
54
55
- ml_printf("Test data read: %"PRId32"\n", test_read_count);
56
- ml_printf("Test data write: %"PRId32"\n", test_write_count);
57
+ ml_printf("Test data read: %lu\n", (unsigned long)test_read_count);
58
+ ml_printf("Test data write: %lu\n", (unsigned long)test_write_count);
59
ml_printf("Test complete: %s\n", ok ? "PASSED" : "FAILED");
60
return ok ? 0 : -1;
61
}
62
--
63
2.43.0
diff view generated by jsdifflib
New patch
1
From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
1
2
3
When running with a single vcpu, we can return a constant instead of a
4
load when accessing cpu_index.
5
A side effect is that all tcg operations using it are optimized, most
6
notably scoreboard access.
7
When running a simple loop in user-mode, the speedup is around 20%.
8
9
Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-ID: <20241128213843.1023080-1-pierrick.bouvier@linaro.org>
13
---
14
accel/tcg/plugin-gen.c | 9 +++++++++
15
1 file changed, 9 insertions(+)
16
17
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/accel/tcg/plugin-gen.c
20
+++ b/accel/tcg/plugin-gen.c
21
@@ -XXX,XX +XXX,XX @@ static void gen_disable_mem_helper(void)
22
23
static TCGv_i32 gen_cpu_index(void)
24
{
25
+ /*
26
+ * Optimize when we run with a single vcpu. All values using cpu_index,
27
+ * including scoreboard index, will be optimized out.
28
+ * User-mode calls tb_flush when setting this flag. In system-mode, all
29
+ * vcpus are created before generating code.
30
+ */
31
+ if (!tcg_cflags_has(current_cpu, CF_PARALLEL)) {
32
+ return tcg_constant_i32(current_cpu->cpu_index);
33
+ }
34
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
35
tcg_gen_ld_i32(cpu_index, tcg_env,
36
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
37
--
38
2.43.0
diff view generated by jsdifflib
1
Call them directly from the opcode switch statement in tcg_optimize,
2
rather than in finish_folding based on opcode flags. Adjust folding
3
of conditional branches to match.
4
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
7
---
3
tcg/s390x/tcg-target.c.inc | 122 ++++++++++++++++++++++++++++++++++++-
8
tcg/optimize.c | 47 +++++++++++++++++++++++++++++++----------------
4
1 file changed, 119 insertions(+), 3 deletions(-)
9
1 file changed, 31 insertions(+), 16 deletions(-)
5
10
6
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
7
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/s390x/tcg-target.c.inc
13
--- a/tcg/optimize.c
9
+++ b/tcg/s390x/tcg-target.c.inc
14
+++ b/tcg/optimize.c
10
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
15
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
11
RX_STC = 0x42,
16
}
12
RX_STH = 0x40,
13
14
+ VRIa_VGBM = 0xe744,
15
+ VRIa_VREPI = 0xe745,
16
+ VRIb_VGM = 0xe746,
17
+ VRIc_VREP = 0xe74d,
18
+
19
VRRa_VLR = 0xe756,
20
+ VRRf_VLVGP = 0xe762,
21
22
VRSb_VLVG = 0xe722,
23
VRSc_VLGV = 0xe721,
24
25
VRX_VL = 0xe706,
26
VRX_VLLEZ = 0xe704,
27
+ VRX_VLREP = 0xe705,
28
VRX_VST = 0xe70e,
29
VRX_VSTEF = 0xe70b,
30
VRX_VSTEG = 0xe70a,
31
@@ -XXX,XX +XXX,XX @@ static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
32
| ((v4 & 0x10) << (4 + 0));
33
}
17
}
34
18
35
+static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
19
+static void finish_bb(OptContext *ctx)
36
+ TCGReg v1, uint16_t i2, int m3)
37
+{
20
+{
38
+ tcg_debug_assert(is_vector_reg(v1));
21
+ /* We only optimize memory barriers across basic blocks. */
39
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
22
+ ctx->prev_mb = NULL;
40
+ tcg_out16(s, i2);
41
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
42
+}
23
+}
43
+
24
+
44
+static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
25
+static void finish_ebb(OptContext *ctx)
45
+ TCGReg v1, uint8_t i2, uint8_t i3, int m4)
46
+{
26
+{
47
+ tcg_debug_assert(is_vector_reg(v1));
27
+ finish_bb(ctx);
48
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
28
+ /* We only optimize across extended basic blocks. */
49
+ tcg_out16(s, (i2 << 8) | (i3 & 0xff));
29
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
50
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
30
+ remove_mem_copy_all(ctx);
51
+}
31
+}
52
+
32
+
53
+static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
33
static void finish_folding(OptContext *ctx, TCGOp *op)
54
+ TCGReg v1, uint16_t i2, TCGReg v3, int m4)
55
+{
56
+ tcg_debug_assert(is_vector_reg(v1));
57
+ tcg_debug_assert(is_vector_reg(v3));
58
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
59
+ tcg_out16(s, i2);
60
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
61
+}
62
+
63
static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
64
TCGReg v1, TCGReg v2, int m3)
65
{
34
{
66
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
35
const TCGOpDef *def = &tcg_op_defs[op->opc];
67
tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
36
int i, nb_oargs;
68
}
37
69
38
- /*
70
+static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
39
- * We only optimize extended basic blocks. If the opcode ends a BB
71
+ TCGReg v1, TCGReg r2, TCGReg r3)
40
- * and is not a conditional branch, reset all temp data.
72
+{
41
- */
73
+ tcg_debug_assert(is_vector_reg(v1));
42
- if (def->flags & TCG_OPF_BB_END) {
74
+ tcg_debug_assert(is_general_reg(r2));
43
- ctx->prev_mb = NULL;
75
+ tcg_debug_assert(is_general_reg(r3));
44
- if (!(def->flags & TCG_OPF_COND_BRANCH)) {
76
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
45
- memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
77
+ tcg_out16(s, r3 << 12);
46
- remove_mem_copy_all(ctx);
78
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
47
- }
79
+}
48
- return;
80
+
49
- }
81
static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
50
-
82
intptr_t d2, TCGReg b2, TCGReg r3, int m4)
51
nb_oargs = def->nb_oargs;
83
{
52
for (i = 0; i < nb_oargs; i++) {
84
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
53
TCGTemp *ts = arg_temp(op->args[i]);
85
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
54
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
86
TCGReg dst, TCGReg src)
55
if (i > 0) {
87
{
56
op->opc = INDEX_op_br;
88
- g_assert_not_reached();
57
op->args[0] = op->args[3];
89
+ if (is_general_reg(src)) {
58
+ finish_ebb(ctx);
90
+ /* Replicate general register into two MO_64. */
59
+ } else {
91
+ tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
60
+ finish_bb(ctx);
92
+ if (vece == MO_64) {
61
}
93
+ return true;
62
- return false;
94
+ }
95
+ }
96
+
97
+ /*
98
+ * Recall that the "standard" integer, within a vector, is the
99
+ * rightmost element of the leftmost doubleword, a-la VLLEZ.
100
+ */
101
+ tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
102
+ return true;
63
+ return true;
103
}
64
}
104
65
105
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
66
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
106
TCGReg dst, TCGReg base, intptr_t offset)
67
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
107
{
68
}
108
- g_assert_not_reached();
69
op->opc = INDEX_op_br;
109
+ tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
70
op->args[0] = label;
71
- break;
72
+ finish_ebb(ctx);
73
+ return true;
74
}
75
- return false;
76
+
77
+ finish_bb(ctx);
110
+ return true;
78
+ return true;
111
}
79
}
112
80
113
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
81
static bool fold_bswap(OptContext *ctx, TCGOp *op)
114
TCGReg dst, int64_t val)
82
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
115
{
83
CASE_OP_32_64_VEC(xor):
116
- g_assert_not_reached();
84
done = fold_xor(&ctx, op);
117
+ int i, mask, msb, lsb;
85
break;
118
+
86
+ case INDEX_op_set_label:
119
+ /* Look for int16_t elements. */
87
+ case INDEX_op_br:
120
+ if (vece <= MO_16 ||
88
+ case INDEX_op_exit_tb:
121
+ (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
89
+ case INDEX_op_goto_tb:
122
+ tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
90
+ case INDEX_op_goto_ptr:
123
+ return;
91
+ finish_ebb(&ctx);
124
+ }
92
+ done = true;
125
+
126
+ /* Look for bit masks. */
127
+ if (vece == MO_32) {
128
+ if (risbg_mask((int32_t)val)) {
129
+ /* Handle wraparound by swapping msb and lsb. */
130
+ if ((val & 0x80000001u) == 0x80000001u) {
131
+ msb = 32 - ctz32(~val);
132
+ lsb = clz32(~val) - 1;
133
+ } else {
134
+ msb = clz32(val);
135
+ lsb = 31 - ctz32(val);
136
+ }
137
+ tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_32);
138
+ return;
139
+ }
140
+ } else {
141
+ if (risbg_mask(val)) {
142
+ /* Handle wraparound by swapping msb and lsb. */
143
+ if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
144
+ /* Handle wraparound by swapping msb and lsb. */
145
+ msb = 64 - ctz64(~val);
146
+ lsb = clz64(~val) - 1;
147
+ } else {
148
+ msb = clz64(val);
149
+ lsb = 63 - ctz64(val);
150
+ }
151
+ tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_64);
152
+ return;
153
+ }
154
+ }
155
+
156
+ /* Look for all bytes 0x00 or 0xff. */
157
+ for (i = mask = 0; i < 8; i++) {
158
+ uint8_t byte = val >> (i * 8);
159
+ if (byte == 0xff) {
160
+ mask |= 1 << i;
161
+ } else if (byte != 0) {
162
+ break;
93
+ break;
163
+ }
94
default:
164
+ }
95
break;
165
+ if (i == 8) {
96
}
166
+ tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
167
+ return;
168
+ }
169
+
170
+ /* Otherwise, stuff it in the constant pool. */
171
+ tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
172
+ new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
173
+ tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
174
}
175
176
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
177
--
97
--
178
2.25.1
98
2.43.0
179
180
diff view generated by jsdifflib
1
Use the MemOpIdx directly, rather than the rearrangement
1
There are only a few logical operations which can compute
2
of the same bits currently done by the trace infrastructure.
2
an "affected" mask. Split out handling of this optimization
3
Pass in enum qemu_plugin_mem_rw so that we are able to treat
3
to a separate function, only to be called when applicable.
4
read-modify-write operations as a single operation.
5
4
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Remove the a_mask field from OptContext, as the mask is
6
no longer stored anywhere.
7
8
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
10
---
9
include/qemu/plugin.h | 26 ++++++++++++++++++++++++--
11
tcg/optimize.c | 42 +++++++++++++++++++++++++++---------------
10
accel/tcg/cputlb.c | 4 ++--
12
1 file changed, 27 insertions(+), 15 deletions(-)
11
accel/tcg/plugin-gen.c | 5 ++---
12
accel/tcg/user-exec.c | 28 ++++++++++++++--------------
13
plugins/api.c | 19 +++++++++++--------
14
plugins/core.c | 10 +++++-----
15
tcg/tcg-op.c | 30 +++++++++++++++++++++---------
16
accel/tcg/atomic_common.c.inc | 13 +++----------
17
8 files changed, 82 insertions(+), 53 deletions(-)
18
13
19
diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
20
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
21
--- a/include/qemu/plugin.h
16
--- a/tcg/optimize.c
22
+++ b/include/qemu/plugin.h
17
+++ b/tcg/optimize.c
23
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
24
#include "qemu/error-report.h"
19
QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
25
#include "qemu/queue.h"
20
26
#include "qemu/option.h"
21
/* In flight values from optimization. */
27
+#include "exec/memopidx.h"
22
- uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
28
23
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
29
/*
24
uint64_t s_mask; /* mask of clrsb(value) bits */
30
* Events that plugins can subscribe to.
25
TCGType type;
31
@@ -XXX,XX +XXX,XX @@ enum qemu_plugin_event {
26
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
32
struct qemu_plugin_desc;
27
33
typedef QTAILQ_HEAD(, qemu_plugin_desc) QemuPluginList;
28
static bool fold_masks(OptContext *ctx, TCGOp *op)
34
29
{
35
+/*
30
- uint64_t a_mask = ctx->a_mask;
36
+ * Construct a qemu_plugin_meminfo_t.
31
uint64_t z_mask = ctx->z_mask;
37
+ */
32
uint64_t s_mask = ctx->s_mask;
38
+static inline qemu_plugin_meminfo_t
33
39
+make_plugin_meminfo(MemOpIdx oi, enum qemu_plugin_mem_rw rw)
34
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
40
+{
35
* type changing opcodes.
41
+ return oi | (rw << 16);
36
*/
37
if (ctx->type == TCG_TYPE_I32) {
38
- a_mask = (int32_t)a_mask;
39
z_mask = (int32_t)z_mask;
40
s_mask |= MAKE_64BIT_MASK(32, 32);
41
ctx->z_mask = z_mask;
42
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
43
if (z_mask == 0) {
44
return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
45
}
46
+ return false;
42
+}
47
+}
43
+
48
+
44
+/*
49
+/*
45
+ * Extract the memory operation direction from a qemu_plugin_meminfo_t.
50
+ * An "affected" mask bit is 0 if and only if the result is identical
46
+ * Other portions may be extracted via get_memop and get_mmuidx.
51
+ * to the first input. Thus if the entire mask is 0, the operation
52
+ * is equivalent to a copy.
47
+ */
53
+ */
48
+static inline enum qemu_plugin_mem_rw
54
+static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask)
49
+get_plugin_meminfo_rw(qemu_plugin_meminfo_t i)
50
+{
55
+{
51
+ return i >> 16;
56
+ if (ctx->type == TCG_TYPE_I32) {
52
+}
57
+ a_mask = (uint32_t)a_mask;
53
+
58
+ }
54
#ifdef CONFIG_PLUGIN
59
if (a_mask == 0) {
55
extern QemuOptsList qemu_plugin_opts;
60
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
56
61
}
57
@@ -XXX,XX +XXX,XX @@ qemu_plugin_vcpu_syscall(CPUState *cpu, int64_t num, uint64_t a1,
62
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
58
uint64_t a6, uint64_t a7, uint64_t a8);
63
* Known-zeros does not imply known-ones. Therefore unless
59
void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret);
64
* arg2 is constant, we can't infer affected bits from it.
60
65
*/
61
-void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, uint32_t meminfo);
66
- if (arg_is_const(op->args[2])) {
62
+void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
67
- ctx->a_mask = z1 & ~z2;
63
+ MemOpIdx oi, enum qemu_plugin_mem_rw rw);
68
+ if (arg_is_const(op->args[2]) &&
64
69
+ fold_affected_mask(ctx, op, z1 & ~z2)) {
65
void qemu_plugin_flush_cb(void);
70
+ return true;
66
71
}
67
@@ -XXX,XX +XXX,XX @@ void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret)
72
68
{ }
73
return fold_masks(ctx, op);
69
74
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
70
static inline void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
75
*/
71
- uint32_t meminfo)
76
if (arg_is_const(op->args[2])) {
72
+ MemOpIdx oi,
77
uint64_t z2 = ~arg_info(op->args[2])->z_mask;
73
+ enum qemu_plugin_mem_rw rw)
78
- ctx->a_mask = z1 & ~z2;
74
{ }
79
+ if (fold_affected_mask(ctx, op, z1 & ~z2)) {
75
80
+ return true;
76
static inline void qemu_plugin_flush_cb(void)
81
+ }
77
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
82
z1 &= z2;
78
index XXXXXXX..XXXXXXX 100644
83
}
79
--- a/accel/tcg/cputlb.c
84
ctx->z_mask = z1;
80
+++ b/accel/tcg/cputlb.c
85
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
81
@@ -XXX,XX +XXX,XX @@ static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
86
82
87
z_mask_old = arg_info(op->args[1])->z_mask;
83
ret = full_load(env, addr, oi, retaddr);
88
z_mask = extract64(z_mask_old, pos, len);
84
89
- if (pos == 0) {
85
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
90
- ctx->a_mask = z_mask_old ^ z_mask;
86
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
91
+ if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
87
92
+ return true;
88
return ret;
93
}
94
ctx->z_mask = z_mask;
95
ctx->s_mask = smask_from_zmask(z_mask);
96
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
97
98
ctx->z_mask = z_mask;
99
ctx->s_mask = s_mask;
100
- if (!type_change) {
101
- ctx->a_mask = s_mask & ~s_mask_old;
102
+ if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
103
+ return true;
104
}
105
106
return fold_masks(ctx, op);
107
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
108
109
ctx->z_mask = z_mask;
110
ctx->s_mask = smask_from_zmask(z_mask);
111
- if (!type_change) {
112
- ctx->a_mask = z_mask_old ^ z_mask;
113
+ if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
114
+ return true;
115
}
116
return fold_masks(ctx, op);
89
}
117
}
90
@@ -XXX,XX +XXX,XX @@ cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
118
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
91
119
s_mask |= MAKE_64BIT_MASK(len, 64 - len);
92
store_helper(env, addr, val, oi, retaddr, op);
120
ctx->s_mask = s_mask;
93
121
94
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
122
- if (pos == 0) {
95
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
123
- ctx->a_mask = s_mask & ~s_mask_old;
96
}
124
+ if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
97
125
+ return true;
98
void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
99
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
100
index XXXXXXX..XXXXXXX 100644
101
--- a/accel/tcg/plugin-gen.c
102
+++ b/accel/tcg/plugin-gen.c
103
@@ -XXX,XX +XXX,XX @@
104
#include "qemu/osdep.h"
105
#include "tcg/tcg.h"
106
#include "tcg/tcg-op.h"
107
-#include "trace/mem.h"
108
#include "exec/exec-all.h"
109
#include "exec/plugin-gen.h"
110
#include "exec/translator.h"
111
@@ -XXX,XX +XXX,XX @@ static void gen_mem_wrapped(enum plugin_gen_cb type,
112
const union mem_gen_fn *f, TCGv addr,
113
uint32_t info, bool is_mem)
114
{
115
- int wr = !!(info & TRACE_MEM_ST);
116
+ enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
117
118
- gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, type, wr);
119
+ gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, type, rw);
120
if (is_mem) {
121
f->mem_fn(addr, info);
122
} else {
123
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
124
index XXXXXXX..XXXXXXX 100644
125
--- a/accel/tcg/user-exec.c
126
+++ b/accel/tcg/user-exec.c
127
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr ptr)
128
129
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
130
ret = ldub_p(g2h(env_cpu(env), ptr));
131
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
132
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_R);
133
return ret;
134
}
135
136
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr)
137
138
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
139
ret = lduw_be_p(g2h(env_cpu(env), ptr));
140
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
141
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_R);
142
return ret;
143
}
144
145
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr)
146
147
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
148
ret = ldl_be_p(g2h(env_cpu(env), ptr));
149
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
150
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_R);
151
return ret;
152
}
153
154
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr)
155
156
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
157
ret = ldq_be_p(g2h(env_cpu(env), ptr));
158
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
159
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_R);
160
return ret;
161
}
162
163
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr ptr)
164
165
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
166
ret = lduw_le_p(g2h(env_cpu(env), ptr));
167
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
168
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_R);
169
return ret;
170
}
171
172
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr)
173
174
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
175
ret = ldl_le_p(g2h(env_cpu(env), ptr));
176
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
177
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_R);
178
return ret;
179
}
180
181
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr ptr)
182
183
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
184
ret = ldq_le_p(g2h(env_cpu(env), ptr));
185
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
186
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_R);
187
return ret;
188
}
189
190
@@ -XXX,XX +XXX,XX @@ void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
191
192
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
193
stb_p(g2h(env_cpu(env), ptr), val);
194
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
195
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_W);
196
}
197
198
void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
199
@@ -XXX,XX +XXX,XX @@ void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
200
201
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
202
stw_be_p(g2h(env_cpu(env), ptr), val);
203
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
204
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_W);
205
}
206
207
void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
208
@@ -XXX,XX +XXX,XX @@ void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
209
210
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
211
stl_be_p(g2h(env_cpu(env), ptr), val);
212
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
213
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_W);
214
}
215
216
void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val)
217
@@ -XXX,XX +XXX,XX @@ void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val)
218
219
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
220
stq_be_p(g2h(env_cpu(env), ptr), val);
221
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
222
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_W);
223
}
224
225
void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
226
@@ -XXX,XX +XXX,XX @@ void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
227
228
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
229
stw_le_p(g2h(env_cpu(env), ptr), val);
230
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
231
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_W);
232
}
233
234
void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
235
@@ -XXX,XX +XXX,XX @@ void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
236
237
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
238
stl_le_p(g2h(env_cpu(env), ptr), val);
239
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
240
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_W);
241
}
242
243
void cpu_stq_le_data(CPUArchState *env, abi_ptr ptr, uint64_t val)
244
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_data(CPUArchState *env, abi_ptr ptr, uint64_t val)
245
246
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
247
stq_le_p(g2h(env_cpu(env), ptr), val);
248
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
249
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_W);
250
}
251
252
void cpu_stb_data_ra(CPUArchState *env, abi_ptr ptr,
253
diff --git a/plugins/api.c b/plugins/api.c
254
index XXXXXXX..XXXXXXX 100644
255
--- a/plugins/api.c
256
+++ b/plugins/api.c
257
@@ -XXX,XX +XXX,XX @@
258
#include "qemu/plugin-memory.h"
259
#include "hw/boards.h"
260
#endif
261
-#include "trace/mem.h"
262
263
/* Uninstall and Reset handlers */
264
265
@@ -XXX,XX +XXX,XX @@ const char *qemu_plugin_insn_symbol(const struct qemu_plugin_insn *insn)
266
267
unsigned qemu_plugin_mem_size_shift(qemu_plugin_meminfo_t info)
268
{
269
- return info & TRACE_MEM_SZ_SHIFT_MASK;
270
+ MemOp op = get_memop(info);
271
+ return op & MO_SIZE;
272
}
273
274
bool qemu_plugin_mem_is_sign_extended(qemu_plugin_meminfo_t info)
275
{
276
- return !!(info & TRACE_MEM_SE);
277
+ MemOp op = get_memop(info);
278
+ return op & MO_SIGN;
279
}
280
281
bool qemu_plugin_mem_is_big_endian(qemu_plugin_meminfo_t info)
282
{
283
- return !!(info & TRACE_MEM_BE);
284
+ MemOp op = get_memop(info);
285
+ return (op & MO_BSWAP) == MO_BE;
286
}
287
288
bool qemu_plugin_mem_is_store(qemu_plugin_meminfo_t info)
289
{
290
- return !!(info & TRACE_MEM_ST);
291
+ return get_plugin_meminfo_rw(info) & QEMU_PLUGIN_MEM_W;
292
}
293
294
/*
295
@@ -XXX,XX +XXX,XX @@ struct qemu_plugin_hwaddr *qemu_plugin_get_hwaddr(qemu_plugin_meminfo_t info,
296
{
297
#ifdef CONFIG_SOFTMMU
298
CPUState *cpu = current_cpu;
299
- unsigned int mmu_idx = info >> TRACE_MEM_MMU_SHIFT;
300
- hwaddr_info.is_store = info & TRACE_MEM_ST;
301
+ unsigned int mmu_idx = get_mmuidx(info);
302
+ enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
303
+ hwaddr_info.is_store = (rw & QEMU_PLUGIN_MEM_W) != 0;
304
305
if (!tlb_plugin_lookup(cpu, vaddr, mmu_idx,
306
- info & TRACE_MEM_ST, &hwaddr_info)) {
307
+ hwaddr_info.is_store, &hwaddr_info)) {
308
error_report("invalid use of qemu_plugin_get_hwaddr");
309
return NULL;
310
}
126
}
311
diff --git a/plugins/core.c b/plugins/core.c
127
312
index XXXXXXX..XXXXXXX 100644
128
return fold_masks(ctx, op);
313
--- a/plugins/core.c
129
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
314
+++ b/plugins/core.c
315
@@ -XXX,XX +XXX,XX @@
316
#include "exec/helper-proto.h"
317
#include "tcg/tcg.h"
318
#include "tcg/tcg-op.h"
319
-#include "trace/mem.h" /* mem_info macros */
320
#include "plugin.h"
321
#include "qemu/compiler.h"
322
323
@@ -XXX,XX +XXX,XX @@ void exec_inline_op(struct qemu_plugin_dyn_cb *cb)
324
}
325
}
326
327
-void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, uint32_t info)
328
+void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
329
+ MemOpIdx oi, enum qemu_plugin_mem_rw rw)
330
{
331
GArray *arr = cpu->plugin_mem_cbs;
332
size_t i;
333
@@ -XXX,XX +XXX,XX @@ void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, uint32_t info)
334
for (i = 0; i < arr->len; i++) {
335
struct qemu_plugin_dyn_cb *cb =
336
&g_array_index(arr, struct qemu_plugin_dyn_cb, i);
337
- int w = !!(info & TRACE_MEM_ST) + 1;
338
339
- if (!(w & cb->rw)) {
340
+ if (!(rw & cb->rw)) {
341
break;
342
}
130
}
343
switch (cb->type) {
131
344
case PLUGIN_CB_REGULAR:
132
/* Assume all bits affected, no bits known zero, no sign reps. */
345
- cb->f.vcpu_mem(cpu->cpu_index, info, vaddr, cb->userp);
133
- ctx.a_mask = -1;
346
+ cb->f.vcpu_mem(cpu->cpu_index, make_plugin_meminfo(oi, rw),
134
ctx.z_mask = -1;
347
+ vaddr, cb->userp);
135
ctx.s_mask = 0;
348
break;
349
case PLUGIN_CB_INLINE:
350
exec_inline_op(cb);
351
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
352
index XXXXXXX..XXXXXXX 100644
353
--- a/tcg/tcg-op.c
354
+++ b/tcg/tcg-op.c
355
@@ -XXX,XX +XXX,XX @@ static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
356
return vaddr;
357
}
358
359
-static inline void plugin_gen_mem_callbacks(TCGv vaddr, uint16_t info)
360
+static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
361
+ enum qemu_plugin_mem_rw rw)
362
{
363
#ifdef CONFIG_PLUGIN
364
if (tcg_ctx->plugin_insn != NULL) {
365
+ qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
366
plugin_gen_empty_mem_callback(vaddr, info);
367
tcg_temp_free(vaddr);
368
}
369
@@ -XXX,XX +XXX,XX @@ static inline void plugin_gen_mem_callbacks(TCGv vaddr, uint16_t info)
370
void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
371
{
372
MemOp orig_memop;
373
- uint16_t info = trace_mem_get_info(make_memop_idx(memop, idx), 0);
374
+ MemOpIdx oi;
375
+ uint16_t info;
376
377
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
378
memop = tcg_canonicalize_memop(memop, 0, 0);
379
+ oi = make_memop_idx(memop, idx);
380
+ info = trace_mem_get_info(oi, 0);
381
trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, info);
382
383
orig_memop = memop;
384
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
385
386
addr = plugin_prep_mem_callbacks(addr);
387
gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
388
- plugin_gen_mem_callbacks(addr, info);
389
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
390
391
if ((orig_memop ^ memop) & MO_BSWAP) {
392
switch (orig_memop & MO_SIZE) {
393
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
394
void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
395
{
396
TCGv_i32 swap = NULL;
397
- uint16_t info = trace_mem_get_info(make_memop_idx(memop, idx), 1);
398
+ MemOpIdx oi;
399
+ uint16_t info;
400
401
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
402
memop = tcg_canonicalize_memop(memop, 0, 1);
403
+ oi = make_memop_idx(memop, idx);
404
+ info = trace_mem_get_info(oi, 1);
405
trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, info);
406
407
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
408
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
409
} else {
410
gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
411
}
412
- plugin_gen_mem_callbacks(addr, info);
413
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
414
415
if (swap) {
416
tcg_temp_free_i32(swap);
417
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
418
void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
419
{
420
MemOp orig_memop;
421
+ MemOpIdx oi;
422
uint16_t info;
423
424
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
425
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
426
427
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
428
memop = tcg_canonicalize_memop(memop, 1, 0);
429
- info = trace_mem_get_info(make_memop_idx(memop, idx), 0);
430
+ oi = make_memop_idx(memop, idx);
431
+ info = trace_mem_get_info(oi, 0);
432
trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, info);
433
434
orig_memop = memop;
435
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
436
437
addr = plugin_prep_mem_callbacks(addr);
438
gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
439
- plugin_gen_mem_callbacks(addr, info);
440
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
441
442
if ((orig_memop ^ memop) & MO_BSWAP) {
443
int flags = (orig_memop & MO_SIGN
444
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
445
void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
446
{
447
TCGv_i64 swap = NULL;
448
+ MemOpIdx oi;
449
uint16_t info;
450
451
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
452
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
453
454
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
455
memop = tcg_canonicalize_memop(memop, 1, 1);
456
- info = trace_mem_get_info(make_memop_idx(memop, idx), 1);
457
+ oi = make_memop_idx(memop, idx);
458
+ info = trace_mem_get_info(oi, 1);
459
trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, info);
460
461
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
462
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
463
464
addr = plugin_prep_mem_callbacks(addr);
465
gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
466
- plugin_gen_mem_callbacks(addr, info);
467
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
468
469
if (swap) {
470
tcg_temp_free_i64(swap);
471
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
472
index XXXXXXX..XXXXXXX 100644
473
--- a/accel/tcg/atomic_common.c.inc
474
+++ b/accel/tcg/atomic_common.c.inc
475
@@ -XXX,XX +XXX,XX @@ static void atomic_trace_rmw_pre(CPUArchState *env, target_ulong addr,
476
static void atomic_trace_rmw_post(CPUArchState *env, target_ulong addr,
477
MemOpIdx oi)
478
{
479
- uint16_t info = trace_mem_get_info(oi, false);
480
-
481
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info);
482
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info | TRACE_MEM_ST);
483
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW);
484
}
485
486
#if HAVE_ATOMIC128
487
@@ -XXX,XX +XXX,XX @@ static void atomic_trace_ld_pre(CPUArchState *env, target_ulong addr,
488
static void atomic_trace_ld_post(CPUArchState *env, target_ulong addr,
489
MemOpIdx oi)
490
{
491
- uint16_t info = trace_mem_get_info(oi, false);
492
-
493
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info);
494
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
495
}
496
497
static void atomic_trace_st_pre(CPUArchState *env, target_ulong addr,
498
@@ -XXX,XX +XXX,XX @@ static void atomic_trace_st_pre(CPUArchState *env, target_ulong addr,
499
static void atomic_trace_st_post(CPUArchState *env, target_ulong addr,
500
MemOpIdx oi)
501
{
502
- uint16_t info = trace_mem_get_info(oi, false);
503
-
504
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info);
505
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
506
}
507
#endif
508
136
509
--
137
--
510
2.25.1
138
2.43.0
511
512
diff view generated by jsdifflib
1
Despite the comment, the members were not kept at the end.
1
Use of fold_masks should be restricted to those opcodes that
2
can reliably make use of it -- those with a single output,
3
and from higher-level folders that set up the masks.
4
Prepare for conversion of each folder in turn.
2
5
3
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
8
---
6
include/hw/core/cpu.h | 11 +++++++----
9
tcg/optimize.c | 17 ++++++++++++++---
7
1 file changed, 7 insertions(+), 4 deletions(-)
10
1 file changed, 14 insertions(+), 3 deletions(-)
8
11
9
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
11
--- a/include/hw/core/cpu.h
14
--- a/tcg/optimize.c
12
+++ b/include/hw/core/cpu.h
15
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ struct CPUClass {
16
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
14
ObjectClass *(*class_by_name)(const char *cpu_model);
17
{
15
void (*parse_features)(const char *typename, char *str, Error **errp);
18
uint64_t z_mask = ctx->z_mask;
16
19
uint64_t s_mask = ctx->s_mask;
17
- int reset_dump_flags;
20
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
18
bool (*has_work)(CPUState *cpu);
21
+ TCGTemp *ts;
19
int (*memory_rw_debug)(CPUState *cpu, vaddr addr,
22
+ TempOptInfo *ti;
20
uint8_t *buf, int len, bool is_write);
21
@@ -XXX,XX +XXX,XX @@ struct CPUClass {
22
void (*disas_set_info)(CPUState *cpu, disassemble_info *info);
23
24
const char *deprecation_note;
25
- /* Keep non-pointer data at the end to minimize holes. */
26
- int gdb_num_core_regs;
27
- bool gdb_stop_before_watchpoint;
28
struct AccelCPUClass *accel_cpu;
29
30
/* when system emulation is not available, this pointer is NULL */
31
@@ -XXX,XX +XXX,XX @@ struct CPUClass {
32
* class data that depends on the accelerator, see accel/accel-common.c.
33
*/
34
void (*init_accel_cpu)(struct AccelCPUClass *accel_cpu, CPUClass *cc);
35
+
23
+
36
+ /*
24
+ /* Only single-output opcodes are supported here. */
37
+ * Keep non-pointer data at the end to minimize holes.
25
+ tcg_debug_assert(def->nb_oargs == 1);
38
+ */
26
39
+ int reset_dump_flags;
27
/*
40
+ int gdb_num_core_regs;
28
* 32-bit ops generate 32-bit results, which for the purpose of
41
+ bool gdb_stop_before_watchpoint;
29
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
42
};
30
if (ctx->type == TCG_TYPE_I32) {
31
z_mask = (int32_t)z_mask;
32
s_mask |= MAKE_64BIT_MASK(32, 32);
33
- ctx->z_mask = z_mask;
34
- ctx->s_mask = s_mask;
35
}
36
37
if (z_mask == 0) {
38
return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
39
}
40
- return false;
41
+
42
+ ts = arg_temp(op->args[0]);
43
+ reset_ts(ctx, ts);
44
+
45
+ ti = ts_info(ts);
46
+ ti->z_mask = z_mask;
47
+ ti->s_mask = s_mask;
48
+ return true;
49
}
43
50
44
/*
51
/*
45
--
52
--
46
2.25.1
53
2.43.0
47
48
diff view generated by jsdifflib
1
From: Philipp Tomsich <philipp.tomsich@vrull.eu>
1
Add a routine to which masks can be passed directly, rather than
2
storing them into OptContext. To be used in upcoming patches.
2
3
3
dup_const always generates a uint64_t, which may exceed the size of a
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
target_long (generating warnings with recent-enough compilers).
5
6
To ensure that we can use dup_const both for 64bit and 32bit targets,
7
this adds dup_const_tl, which either maps back to dup_const (for 64bit
8
targets) or provides a similar implementation using 32bit constants.
9
10
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
11
Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
12
Message-Id: <20211003214243.3813425-1-philipp.tomsich@vrull.eu>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
---
6
---
15
include/tcg/tcg.h | 12 ++++++++++++
7
tcg/optimize.c | 15 ++++++++++++---
16
1 file changed, 12 insertions(+)
8
1 file changed, 12 insertions(+), 3 deletions(-)
17
9
18
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
19
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
20
--- a/include/tcg/tcg.h
12
--- a/tcg/optimize.c
21
+++ b/include/tcg/tcg.h
13
+++ b/tcg/optimize.c
22
@@ -XXX,XX +XXX,XX @@ uint64_t dup_const(unsigned vece, uint64_t c);
14
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
23
: (qemu_build_not_reached_always(), 0)) \
15
return fold_const2(ctx, op);
24
: dup_const(VECE, C))
16
}
25
17
26
+#if TARGET_LONG_BITS == 64
18
-static bool fold_masks(OptContext *ctx, TCGOp *op)
27
+# define dup_const_tl dup_const
19
+/*
28
+#else
20
+ * Record "zero" and "sign" masks for the single output of @op.
29
+# define dup_const_tl(VECE, C) \
21
+ * See TempOptInfo definition of z_mask and s_mask.
30
+ (__builtin_constant_p(VECE) \
22
+ * If z_mask allows, fold the output to constant zero.
31
+ ? ( (VECE) == MO_8 ? 0x01010101ul * (uint8_t)(C) \
23
+ */
32
+ : (VECE) == MO_16 ? 0x00010001ul * (uint16_t)(C) \
24
+static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
33
+ : (VECE) == MO_32 ? 0x00000001ul * (uint32_t)(C) \
25
+ uint64_t z_mask, uint64_t s_mask)
34
+ : (qemu_build_not_reached_always(), 0)) \
26
{
35
+ : (target_long)dup_const(VECE, C))
27
- uint64_t z_mask = ctx->z_mask;
36
+#endif
28
- uint64_t s_mask = ctx->s_mask;
29
const TCGOpDef *def = &tcg_op_defs[op->opc];
30
TCGTemp *ts;
31
TempOptInfo *ti;
32
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
33
return true;
34
}
35
36
+static bool fold_masks(OptContext *ctx, TCGOp *op)
37
+{
38
+ return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
39
+}
37
+
40
+
38
/*
41
/*
39
* Memory helpers that will be used by TCG generated code.
42
* An "affected" mask bit is 0 if and only if the result is identical
40
*/
43
* to the first input. Thus if the entire mask is 0, the operation
41
--
44
--
42
2.25.1
45
2.43.0
43
44
diff view generated by jsdifflib
1
Reviewed-by: David Hildenbrand <david@redhat.com>
1
Consider the passed s_mask to be a minimum deduced from
2
either existing s_mask or from a sign-extension operation.
3
We may be able to deduce more from the set of known zeros.
4
Remove identical logic from several opcode folders.
5
6
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
8
---
4
tcg/s390x/tcg-target-con-set.h | 1 +
9
tcg/optimize.c | 21 ++++++---------------
5
tcg/s390x/tcg-target.h | 12 ++---
10
1 file changed, 6 insertions(+), 15 deletions(-)
6
tcg/s390x/tcg-target.c.inc | 93 +++++++++++++++++++++++++++++++++-
7
3 files changed, 99 insertions(+), 7 deletions(-)
8
11
9
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/s390x/tcg-target-con-set.h
14
--- a/tcg/optimize.c
12
+++ b/tcg/s390x/tcg-target-con-set.h
15
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ C_O1_I2(r, 0, rI)
16
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
14
C_O1_I2(r, 0, rJ)
17
* Record "zero" and "sign" masks for the single output of @op.
15
C_O1_I2(r, r, ri)
18
* See TempOptInfo definition of z_mask and s_mask.
16
C_O1_I2(r, rZ, r)
19
* If z_mask allows, fold the output to constant zero.
17
+C_O1_I2(v, v, r)
20
+ * The passed s_mask may be augmented by z_mask.
18
C_O1_I2(v, v, v)
21
*/
19
C_O1_I4(r, r, ri, r, 0)
22
static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
20
C_O1_I4(r, r, ri, rI, 0)
23
uint64_t z_mask, uint64_t s_mask)
21
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
24
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
22
index XXXXXXX..XXXXXXX 100644
25
23
--- a/tcg/s390x/tcg-target.h
26
ti = ts_info(ts);
24
+++ b/tcg/s390x/tcg-target.h
27
ti->z_mask = z_mask;
25
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
28
- ti->s_mask = s_mask;
26
#define TCG_TARGET_HAS_not_vec 1
29
+ ti->s_mask = s_mask | smask_from_zmask(z_mask);
27
#define TCG_TARGET_HAS_neg_vec 1
30
return true;
28
#define TCG_TARGET_HAS_abs_vec 1
29
-#define TCG_TARGET_HAS_roti_vec 0
30
-#define TCG_TARGET_HAS_rots_vec 0
31
-#define TCG_TARGET_HAS_rotv_vec 0
32
-#define TCG_TARGET_HAS_shi_vec 0
33
-#define TCG_TARGET_HAS_shs_vec 0
34
-#define TCG_TARGET_HAS_shv_vec 0
35
+#define TCG_TARGET_HAS_roti_vec 1
36
+#define TCG_TARGET_HAS_rots_vec 1
37
+#define TCG_TARGET_HAS_rotv_vec 1
38
+#define TCG_TARGET_HAS_shi_vec 1
39
+#define TCG_TARGET_HAS_shs_vec 1
40
+#define TCG_TARGET_HAS_shv_vec 1
41
#define TCG_TARGET_HAS_mul_vec 1
42
#define TCG_TARGET_HAS_sat_vec 0
43
#define TCG_TARGET_HAS_minmax_vec 0
44
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
45
index XXXXXXX..XXXXXXX 100644
46
--- a/tcg/s390x/tcg-target.c.inc
47
+++ b/tcg/s390x/tcg-target.c.inc
48
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
49
VRRc_VCEQ = 0xe7f8, /* we leave the m5 cs field 0 */
50
VRRc_VCH = 0xe7fb, /* " */
51
VRRc_VCHL = 0xe7f9, /* " */
52
+ VRRc_VERLLV = 0xe773,
53
+ VRRc_VESLV = 0xe770,
54
+ VRRc_VESRAV = 0xe77a,
55
+ VRRc_VESRLV = 0xe778,
56
VRRc_VML = 0xe7a2,
57
VRRc_VN = 0xe768,
58
VRRc_VNC = 0xe769,
59
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
60
VRRc_VX = 0xe76d,
61
VRRf_VLVGP = 0xe762,
62
63
+ VRSa_VERLL = 0xe733,
64
+ VRSa_VESL = 0xe730,
65
+ VRSa_VESRA = 0xe73a,
66
+ VRSa_VESRL = 0xe738,
67
VRSb_VLVG = 0xe722,
68
VRSc_VLGV = 0xe721,
69
70
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
71
tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
72
}
31
}
73
32
74
+static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
33
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
75
+ intptr_t d2, TCGReg b2, TCGReg v3, int m4)
76
+{
77
+ tcg_debug_assert(is_vector_reg(v1));
78
+ tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
79
+ tcg_debug_assert(is_general_reg(b2));
80
+ tcg_debug_assert(is_vector_reg(v3));
81
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
82
+ tcg_out16(s, b2 << 12 | d2);
83
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
84
+}
85
+
86
static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
87
intptr_t d2, TCGReg b2, TCGReg r3, int m4)
88
{
89
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
90
tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
91
break;
92
93
+ case INDEX_op_shli_vec:
94
+ tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
95
+ break;
96
+ case INDEX_op_shri_vec:
97
+ tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
98
+ break;
99
+ case INDEX_op_sari_vec:
100
+ tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
101
+ break;
102
+ case INDEX_op_rotli_vec:
103
+ tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
104
+ break;
105
+ case INDEX_op_shls_vec:
106
+ tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
107
+ break;
108
+ case INDEX_op_shrs_vec:
109
+ tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
110
+ break;
111
+ case INDEX_op_sars_vec:
112
+ tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
113
+ break;
114
+ case INDEX_op_rotls_vec:
115
+ tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
116
+ break;
117
+ case INDEX_op_shlv_vec:
118
+ tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
119
+ break;
120
+ case INDEX_op_shrv_vec:
121
+ tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
122
+ break;
123
+ case INDEX_op_sarv_vec:
124
+ tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
125
+ break;
126
+ case INDEX_op_rotlv_vec:
127
+ tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
128
+ break;
129
+
130
case INDEX_op_cmp_vec:
131
switch ((TCGCond)args[3]) {
132
case TCG_COND_EQ:
133
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
134
case INDEX_op_not_vec:
135
case INDEX_op_or_vec:
136
case INDEX_op_orc_vec:
137
+ case INDEX_op_rotli_vec:
138
+ case INDEX_op_rotls_vec:
139
+ case INDEX_op_rotlv_vec:
140
+ case INDEX_op_sari_vec:
141
+ case INDEX_op_sars_vec:
142
+ case INDEX_op_sarv_vec:
143
+ case INDEX_op_shli_vec:
144
+ case INDEX_op_shls_vec:
145
+ case INDEX_op_shlv_vec:
146
+ case INDEX_op_shri_vec:
147
+ case INDEX_op_shrs_vec:
148
+ case INDEX_op_shrv_vec:
149
case INDEX_op_sub_vec:
150
case INDEX_op_xor_vec:
151
return 1;
152
case INDEX_op_cmp_vec:
153
+ case INDEX_op_rotrv_vec:
154
return -1;
155
case INDEX_op_mul_vec:
156
return vece < MO_64;
157
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
158
TCGArg a0, ...)
159
{
160
va_list va;
161
- TCGv_vec v0, v1, v2;
162
+ TCGv_vec v0, v1, v2, t0;
163
164
va_start(va, a0);
165
v0 = temp_tcgv_vec(arg_temp(a0));
166
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
167
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
168
break;
169
170
+ case INDEX_op_rotrv_vec:
171
+ t0 = tcg_temp_new_vec(type);
172
+ tcg_gen_neg_vec(vece, t0, v2);
173
+ tcg_gen_rotlv_vec(vece, v0, v1, t0);
174
+ tcg_temp_free_vec(t0);
175
+ break;
176
+
177
default:
34
default:
178
g_assert_not_reached();
35
g_assert_not_reached();
179
}
36
}
180
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
37
- s_mask = smask_from_zmask(z_mask);
181
case INDEX_op_abs_vec:
38
182
case INDEX_op_neg_vec:
39
+ s_mask = 0;
183
case INDEX_op_not_vec:
40
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
184
+ case INDEX_op_rotli_vec:
41
case TCG_BSWAP_OZ:
185
+ case INDEX_op_sari_vec:
42
break;
186
+ case INDEX_op_shli_vec:
43
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
187
+ case INDEX_op_shri_vec:
44
default:
188
return C_O1_I1(v, v);
45
/* The high bits are undefined: force all bits above the sign to 1. */
189
case INDEX_op_add_vec:
46
z_mask |= sign << 1;
190
case INDEX_op_sub_vec:
47
- s_mask = 0;
191
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
48
break;
192
case INDEX_op_xor_vec:
49
}
193
case INDEX_op_cmp_vec:
50
ctx->z_mask = z_mask;
194
case INDEX_op_mul_vec:
51
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
195
+ case INDEX_op_rotlv_vec:
52
g_assert_not_reached();
196
+ case INDEX_op_rotrv_vec:
53
}
197
+ case INDEX_op_shlv_vec:
54
ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
198
+ case INDEX_op_shrv_vec:
55
- ctx->s_mask = smask_from_zmask(ctx->z_mask);
199
+ case INDEX_op_sarv_vec:
56
return false;
200
return C_O1_I2(v, v, v);
57
}
201
+ case INDEX_op_rotls_vec:
58
202
+ case INDEX_op_shls_vec:
59
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
203
+ case INDEX_op_shrs_vec:
60
default:
204
+ case INDEX_op_sars_vec:
61
g_assert_not_reached();
205
+ return C_O1_I2(v, v, r);
62
}
206
63
- ctx->s_mask = smask_from_zmask(ctx->z_mask);
64
return false;
65
}
66
67
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
68
return true;
69
}
70
ctx->z_mask = z_mask;
71
- ctx->s_mask = smask_from_zmask(z_mask);
72
73
return fold_masks(ctx, op);
74
}
75
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
76
}
77
78
ctx->z_mask = z_mask;
79
- ctx->s_mask = smask_from_zmask(z_mask);
80
if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
81
return true;
82
}
83
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
84
int width = 8 * memop_size(mop);
85
86
if (width < 64) {
87
- ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
88
- if (!(mop & MO_SIGN)) {
89
+ if (mop & MO_SIGN) {
90
+ ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
91
+ } else {
92
ctx->z_mask = MAKE_64BIT_MASK(0, width);
93
- ctx->s_mask <<= 1;
94
}
95
}
96
97
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
98
fold_setcond_tst_pow2(ctx, op, false);
99
100
ctx->z_mask = 1;
101
- ctx->s_mask = smask_from_zmask(1);
102
return false;
103
}
104
105
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
106
}
107
108
ctx->z_mask = 1;
109
- ctx->s_mask = smask_from_zmask(1);
110
return false;
111
112
do_setcond_const:
113
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
114
break;
115
CASE_OP_32_64(ld8u):
116
ctx->z_mask = MAKE_64BIT_MASK(0, 8);
117
- ctx->s_mask = MAKE_64BIT_MASK(9, 55);
118
break;
119
CASE_OP_32_64(ld16s):
120
ctx->s_mask = MAKE_64BIT_MASK(16, 48);
121
break;
122
CASE_OP_32_64(ld16u):
123
ctx->z_mask = MAKE_64BIT_MASK(0, 16);
124
- ctx->s_mask = MAKE_64BIT_MASK(17, 47);
125
break;
126
case INDEX_op_ld32s_i64:
127
ctx->s_mask = MAKE_64BIT_MASK(32, 32);
128
break;
129
case INDEX_op_ld32u_i64:
130
ctx->z_mask = MAKE_64BIT_MASK(0, 32);
131
- ctx->s_mask = MAKE_64BIT_MASK(33, 31);
132
break;
207
default:
133
default:
208
g_assert_not_reached();
134
g_assert_not_reached();
209
--
135
--
210
2.25.1
136
2.43.0
211
212
diff view generated by jsdifflib
New patch
1
Change the representation from sign bit repetitions to all bits equal
2
to the sign bit, including the sign bit itself.
1
3
4
The previous format has a problem in that it is difficult to recreate
5
a valid sign mask after a shift operation: the "repetitions" part of
6
the previous format meant that applying the same shift as for the value
7
lead to an off-by-one value.
8
9
The new format, including the sign bit itself, means that the sign mask
10
can be manipulated in exactly the same way as the value, canonicalization
11
is easier.
12
13
Canonicalize the s_mask in fold_masks_zs, rather than requiring callers
14
to do so. Treat 0 as a non-canonical but typeless input for no sign
15
information, which will be reset as appropriate for the data type.
16
We can easily fold in the data from z_mask while canonicalizing.
17
18
Temporarily disable optimizations using s_mask while each operation is
19
converted to use fold_masks_zs and to the new form.
20
21
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
22
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
23
---
24
tcg/optimize.c | 64 ++++++++++++--------------------------------------
25
1 file changed, 15 insertions(+), 49 deletions(-)
26
27
diff --git a/tcg/optimize.c b/tcg/optimize.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/tcg/optimize.c
30
+++ b/tcg/optimize.c
31
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
32
QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
33
uint64_t val;
34
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
35
- uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
36
+ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
37
} TempOptInfo;
38
39
typedef struct OptContext {
40
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
41
42
/* In flight values from optimization. */
43
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
44
- uint64_t s_mask; /* mask of clrsb(value) bits */
45
+ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
46
TCGType type;
47
} OptContext;
48
49
-/* Calculate the smask for a specific value. */
50
-static uint64_t smask_from_value(uint64_t value)
51
-{
52
- int rep = clrsb64(value);
53
- return ~(~0ull >> rep);
54
-}
55
-
56
-/*
57
- * Calculate the smask for a given set of known-zeros.
58
- * If there are lots of zeros on the left, we can consider the remainder
59
- * an unsigned field, and thus the corresponding signed field is one bit
60
- * larger.
61
- */
62
-static uint64_t smask_from_zmask(uint64_t zmask)
63
-{
64
- /*
65
- * Only the 0 bits are significant for zmask, thus the msb itself
66
- * must be zero, else we have no sign information.
67
- */
68
- int rep = clz64(zmask);
69
- if (rep == 0) {
70
- return 0;
71
- }
72
- rep -= 1;
73
- return ~(~0ull >> rep);
74
-}
75
-
76
-/*
77
- * Recreate a properly left-aligned smask after manipulation.
78
- * Some bit-shuffling, particularly shifts and rotates, may
79
- * retain sign bits on the left, but may scatter disconnected
80
- * sign bits on the right. Retain only what remains to the left.
81
- */
82
-static uint64_t smask_from_smask(int64_t smask)
83
-{
84
- /* Only the 1 bits are significant for smask */
85
- return smask_from_zmask(~smask);
86
-}
87
-
88
static inline TempOptInfo *ts_info(TCGTemp *ts)
89
{
90
return ts->state_ptr;
91
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
92
ti->is_const = true;
93
ti->val = ts->val;
94
ti->z_mask = ts->val;
95
- ti->s_mask = smask_from_value(ts->val);
96
+ ti->s_mask = INT64_MIN >> clrsb64(ts->val);
97
} else {
98
ti->is_const = false;
99
ti->z_mask = -1;
100
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
101
*/
102
if (i == 0) {
103
ts_info(ts)->z_mask = ctx->z_mask;
104
- ts_info(ts)->s_mask = ctx->s_mask;
105
}
106
}
107
}
108
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
109
* The passed s_mask may be augmented by z_mask.
110
*/
111
static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
112
- uint64_t z_mask, uint64_t s_mask)
113
+ uint64_t z_mask, int64_t s_mask)
114
{
115
const TCGOpDef *def = &tcg_op_defs[op->opc];
116
TCGTemp *ts;
117
TempOptInfo *ti;
118
+ int rep;
119
120
/* Only single-output opcodes are supported here. */
121
tcg_debug_assert(def->nb_oargs == 1);
122
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
123
*/
124
if (ctx->type == TCG_TYPE_I32) {
125
z_mask = (int32_t)z_mask;
126
- s_mask |= MAKE_64BIT_MASK(32, 32);
127
+ s_mask |= INT32_MIN;
128
}
129
130
if (z_mask == 0) {
131
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
132
133
ti = ts_info(ts);
134
ti->z_mask = z_mask;
135
- ti->s_mask = s_mask | smask_from_zmask(z_mask);
136
+
137
+ /* Canonicalize s_mask and incorporate data from z_mask. */
138
+ rep = clz64(~s_mask);
139
+ rep = MAX(rep, clz64(z_mask));
140
+ rep = MAX(rep - 1, 0);
141
+ ti->s_mask = INT64_MIN >> rep;
142
+
143
return true;
144
}
145
146
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
147
148
ctx->z_mask = z_mask;
149
ctx->s_mask = s_mask;
150
- if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
151
+ if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
152
return true;
153
}
154
155
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
156
s_mask |= MAKE_64BIT_MASK(len, 64 - len);
157
ctx->s_mask = s_mask;
158
159
- if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
160
+ if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
161
return true;
162
}
163
164
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
165
ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
166
167
s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
168
- ctx->s_mask = smask_from_smask(s_mask);
169
170
return fold_masks(ctx, op);
171
}
172
--
173
2.43.0
diff view generated by jsdifflib
1
We're about to move this out of tcg.h, so rename it
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
as we did when moving MemOp.
3
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
accel/tcg/atomic_template.h | 24 +++++------
4
tcg/optimize.c | 9 +++++----
9
include/tcg/tcg.h | 74 ++++++++++++++++-----------------
5
1 file changed, 5 insertions(+), 4 deletions(-)
10
accel/tcg/cputlb.c | 78 +++++++++++++++++------------------
11
accel/tcg/user-exec.c | 2 +-
12
target/arm/helper-a64.c | 16 +++----
13
target/arm/m_helper.c | 2 +-
14
target/i386/tcg/mem_helper.c | 4 +-
15
target/m68k/op_helper.c | 2 +-
16
target/mips/tcg/msa_helper.c | 6 +--
17
target/s390x/tcg/mem_helper.c | 20 ++++-----
18
target/sparc/ldst_helper.c | 2 +-
19
tcg/optimize.c | 2 +-
20
tcg/tcg-op.c | 12 +++---
21
tcg/tcg.c | 2 +-
22
tcg/tci.c | 14 +++----
23
accel/tcg/atomic_common.c.inc | 6 +--
24
tcg/aarch64/tcg-target.c.inc | 14 +++----
25
tcg/arm/tcg-target.c.inc | 10 ++---
26
tcg/i386/tcg-target.c.inc | 10 ++---
27
tcg/mips/tcg-target.c.inc | 12 +++---
28
tcg/ppc/tcg-target.c.inc | 10 ++---
29
tcg/riscv/tcg-target.c.inc | 16 +++----
30
tcg/s390/tcg-target.c.inc | 10 ++---
31
tcg/sparc/tcg-target.c.inc | 4 +-
32
tcg/tcg-ldst.c.inc | 2 +-
33
25 files changed, 177 insertions(+), 177 deletions(-)
34
6
35
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
36
index XXXXXXX..XXXXXXX 100644
37
--- a/accel/tcg/atomic_template.h
38
+++ b/accel/tcg/atomic_template.h
39
@@ -XXX,XX +XXX,XX @@
40
41
ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
42
ABI_TYPE cmpv, ABI_TYPE newv,
43
- TCGMemOpIdx oi, uintptr_t retaddr)
44
+ MemOpIdx oi, uintptr_t retaddr)
45
{
46
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
47
PAGE_READ | PAGE_WRITE, retaddr);
48
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
49
#if DATA_SIZE >= 16
50
#if HAVE_ATOMIC128
51
ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
52
- TCGMemOpIdx oi, uintptr_t retaddr)
53
+ MemOpIdx oi, uintptr_t retaddr)
54
{
55
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
56
PAGE_READ, retaddr);
57
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
58
}
59
60
void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
61
- TCGMemOpIdx oi, uintptr_t retaddr)
62
+ MemOpIdx oi, uintptr_t retaddr)
63
{
64
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
65
PAGE_WRITE, retaddr);
66
@@ -XXX,XX +XXX,XX @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
67
#endif
68
#else
69
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
70
- TCGMemOpIdx oi, uintptr_t retaddr)
71
+ MemOpIdx oi, uintptr_t retaddr)
72
{
73
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
74
PAGE_READ | PAGE_WRITE, retaddr);
75
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
76
77
#define GEN_ATOMIC_HELPER(X) \
78
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
79
- ABI_TYPE val, TCGMemOpIdx oi, uintptr_t retaddr) \
80
+ ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
81
{ \
82
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
83
PAGE_READ | PAGE_WRITE, retaddr); \
84
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER(xor_fetch)
85
*/
86
#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \
87
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
88
- ABI_TYPE xval, TCGMemOpIdx oi, uintptr_t retaddr) \
89
+ ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
90
{ \
91
XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
92
PAGE_READ | PAGE_WRITE, retaddr); \
93
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
94
95
ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
96
ABI_TYPE cmpv, ABI_TYPE newv,
97
- TCGMemOpIdx oi, uintptr_t retaddr)
98
+ MemOpIdx oi, uintptr_t retaddr)
99
{
100
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
101
PAGE_READ | PAGE_WRITE, retaddr);
102
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
103
#if DATA_SIZE >= 16
104
#if HAVE_ATOMIC128
105
ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
106
- TCGMemOpIdx oi, uintptr_t retaddr)
107
+ MemOpIdx oi, uintptr_t retaddr)
108
{
109
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
110
PAGE_READ, retaddr);
111
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
112
}
113
114
void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
115
- TCGMemOpIdx oi, uintptr_t retaddr)
116
+ MemOpIdx oi, uintptr_t retaddr)
117
{
118
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
119
PAGE_WRITE, retaddr);
120
@@ -XXX,XX +XXX,XX @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
121
#endif
122
#else
123
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
124
- TCGMemOpIdx oi, uintptr_t retaddr)
125
+ MemOpIdx oi, uintptr_t retaddr)
126
{
127
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
128
PAGE_READ | PAGE_WRITE, retaddr);
129
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
130
131
#define GEN_ATOMIC_HELPER(X) \
132
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
133
- ABI_TYPE val, TCGMemOpIdx oi, uintptr_t retaddr) \
134
+ ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
135
{ \
136
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
137
PAGE_READ | PAGE_WRITE, retaddr); \
138
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER(xor_fetch)
139
*/
140
#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \
141
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
142
- ABI_TYPE xval, TCGMemOpIdx oi, uintptr_t retaddr) \
143
+ ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
144
{ \
145
XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
146
PAGE_READ | PAGE_WRITE, retaddr); \
147
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
148
index XXXXXXX..XXXXXXX 100644
149
--- a/include/tcg/tcg.h
150
+++ b/include/tcg/tcg.h
151
@@ -XXX,XX +XXX,XX @@ static inline size_t tcg_current_code_size(TCGContext *s)
152
}
153
154
/* Combine the MemOp and mmu_idx parameters into a single value. */
155
-typedef uint32_t TCGMemOpIdx;
156
+typedef uint32_t MemOpIdx;
157
158
/**
159
* make_memop_idx
160
@@ -XXX,XX +XXX,XX @@ typedef uint32_t TCGMemOpIdx;
161
*
162
* Encode these values into a single parameter.
163
*/
164
-static inline TCGMemOpIdx make_memop_idx(MemOp op, unsigned idx)
165
+static inline MemOpIdx make_memop_idx(MemOp op, unsigned idx)
166
{
167
tcg_debug_assert(idx <= 15);
168
return (op << 4) | idx;
169
@@ -XXX,XX +XXX,XX @@ static inline TCGMemOpIdx make_memop_idx(MemOp op, unsigned idx)
170
*
171
* Extract the memory operation from the combined value.
172
*/
173
-static inline MemOp get_memop(TCGMemOpIdx oi)
174
+static inline MemOp get_memop(MemOpIdx oi)
175
{
176
return oi >> 4;
177
}
178
@@ -XXX,XX +XXX,XX @@ static inline MemOp get_memop(TCGMemOpIdx oi)
179
*
180
* Extract the mmu index from the combined value.
181
*/
182
-static inline unsigned get_mmuidx(TCGMemOpIdx oi)
183
+static inline unsigned get_mmuidx(MemOpIdx oi)
184
{
185
return oi & 15;
186
}
187
@@ -XXX,XX +XXX,XX @@ uint64_t dup_const(unsigned vece, uint64_t c);
188
#ifdef CONFIG_SOFTMMU
189
/* Value zero-extended to tcg register size. */
190
tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
191
- TCGMemOpIdx oi, uintptr_t retaddr);
192
+ MemOpIdx oi, uintptr_t retaddr);
193
tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
194
- TCGMemOpIdx oi, uintptr_t retaddr);
195
+ MemOpIdx oi, uintptr_t retaddr);
196
tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
197
- TCGMemOpIdx oi, uintptr_t retaddr);
198
+ MemOpIdx oi, uintptr_t retaddr);
199
uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
200
- TCGMemOpIdx oi, uintptr_t retaddr);
201
+ MemOpIdx oi, uintptr_t retaddr);
202
tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
203
- TCGMemOpIdx oi, uintptr_t retaddr);
204
+ MemOpIdx oi, uintptr_t retaddr);
205
tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
206
- TCGMemOpIdx oi, uintptr_t retaddr);
207
+ MemOpIdx oi, uintptr_t retaddr);
208
uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
209
- TCGMemOpIdx oi, uintptr_t retaddr);
210
+ MemOpIdx oi, uintptr_t retaddr);
211
212
/* Value sign-extended to tcg register size. */
213
tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
214
- TCGMemOpIdx oi, uintptr_t retaddr);
215
+ MemOpIdx oi, uintptr_t retaddr);
216
tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
217
- TCGMemOpIdx oi, uintptr_t retaddr);
218
+ MemOpIdx oi, uintptr_t retaddr);
219
tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
220
- TCGMemOpIdx oi, uintptr_t retaddr);
221
+ MemOpIdx oi, uintptr_t retaddr);
222
tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
223
- TCGMemOpIdx oi, uintptr_t retaddr);
224
+ MemOpIdx oi, uintptr_t retaddr);
225
tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
226
- TCGMemOpIdx oi, uintptr_t retaddr);
227
+ MemOpIdx oi, uintptr_t retaddr);
228
229
void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
230
- TCGMemOpIdx oi, uintptr_t retaddr);
231
+ MemOpIdx oi, uintptr_t retaddr);
232
void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
233
- TCGMemOpIdx oi, uintptr_t retaddr);
234
+ MemOpIdx oi, uintptr_t retaddr);
235
void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
236
- TCGMemOpIdx oi, uintptr_t retaddr);
237
+ MemOpIdx oi, uintptr_t retaddr);
238
void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
239
- TCGMemOpIdx oi, uintptr_t retaddr);
240
+ MemOpIdx oi, uintptr_t retaddr);
241
void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
242
- TCGMemOpIdx oi, uintptr_t retaddr);
243
+ MemOpIdx oi, uintptr_t retaddr);
244
void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
245
- TCGMemOpIdx oi, uintptr_t retaddr);
246
+ MemOpIdx oi, uintptr_t retaddr);
247
void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
248
- TCGMemOpIdx oi, uintptr_t retaddr);
249
+ MemOpIdx oi, uintptr_t retaddr);
250
251
/* Temporary aliases until backends are converted. */
252
#ifdef TARGET_WORDS_BIGENDIAN
253
@@ -XXX,XX +XXX,XX @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
254
255
uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
256
uint32_t cmpv, uint32_t newv,
257
- TCGMemOpIdx oi, uintptr_t retaddr);
258
+ MemOpIdx oi, uintptr_t retaddr);
259
uint32_t cpu_atomic_cmpxchgw_le_mmu(CPUArchState *env, target_ulong addr,
260
uint32_t cmpv, uint32_t newv,
261
- TCGMemOpIdx oi, uintptr_t retaddr);
262
+ MemOpIdx oi, uintptr_t retaddr);
263
uint32_t cpu_atomic_cmpxchgl_le_mmu(CPUArchState *env, target_ulong addr,
264
uint32_t cmpv, uint32_t newv,
265
- TCGMemOpIdx oi, uintptr_t retaddr);
266
+ MemOpIdx oi, uintptr_t retaddr);
267
uint64_t cpu_atomic_cmpxchgq_le_mmu(CPUArchState *env, target_ulong addr,
268
uint64_t cmpv, uint64_t newv,
269
- TCGMemOpIdx oi, uintptr_t retaddr);
270
+ MemOpIdx oi, uintptr_t retaddr);
271
uint32_t cpu_atomic_cmpxchgw_be_mmu(CPUArchState *env, target_ulong addr,
272
uint32_t cmpv, uint32_t newv,
273
- TCGMemOpIdx oi, uintptr_t retaddr);
274
+ MemOpIdx oi, uintptr_t retaddr);
275
uint32_t cpu_atomic_cmpxchgl_be_mmu(CPUArchState *env, target_ulong addr,
276
uint32_t cmpv, uint32_t newv,
277
- TCGMemOpIdx oi, uintptr_t retaddr);
278
+ MemOpIdx oi, uintptr_t retaddr);
279
uint64_t cpu_atomic_cmpxchgq_be_mmu(CPUArchState *env, target_ulong addr,
280
uint64_t cmpv, uint64_t newv,
281
- TCGMemOpIdx oi, uintptr_t retaddr);
282
+ MemOpIdx oi, uintptr_t retaddr);
283
284
#define GEN_ATOMIC_HELPER(NAME, TYPE, SUFFIX) \
285
TYPE cpu_atomic_ ## NAME ## SUFFIX ## _mmu \
286
(CPUArchState *env, target_ulong addr, TYPE val, \
287
- TCGMemOpIdx oi, uintptr_t retaddr);
288
+ MemOpIdx oi, uintptr_t retaddr);
289
290
#ifdef CONFIG_ATOMIC64
291
#define GEN_ATOMIC_HELPER_ALL(NAME) \
292
@@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER_ALL(xchg)
293
294
Int128 cpu_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr,
295
Int128 cmpv, Int128 newv,
296
- TCGMemOpIdx oi, uintptr_t retaddr);
297
+ MemOpIdx oi, uintptr_t retaddr);
298
Int128 cpu_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr,
299
Int128 cmpv, Int128 newv,
300
- TCGMemOpIdx oi, uintptr_t retaddr);
301
+ MemOpIdx oi, uintptr_t retaddr);
302
303
Int128 cpu_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr,
304
- TCGMemOpIdx oi, uintptr_t retaddr);
305
+ MemOpIdx oi, uintptr_t retaddr);
306
Int128 cpu_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr,
307
- TCGMemOpIdx oi, uintptr_t retaddr);
308
+ MemOpIdx oi, uintptr_t retaddr);
309
void cpu_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
310
- TCGMemOpIdx oi, uintptr_t retaddr);
311
+ MemOpIdx oi, uintptr_t retaddr);
312
void cpu_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
313
- TCGMemOpIdx oi, uintptr_t retaddr);
314
+ MemOpIdx oi, uintptr_t retaddr);
315
316
#ifdef CONFIG_DEBUG_TCG
317
void tcg_assert_listed_vecop(TCGOpcode);
318
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
319
index XXXXXXX..XXXXXXX 100644
320
--- a/accel/tcg/cputlb.c
321
+++ b/accel/tcg/cputlb.c
322
@@ -XXX,XX +XXX,XX @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
323
* @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
324
*/
325
static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
326
- TCGMemOpIdx oi, int size, int prot,
327
+ MemOpIdx oi, int size, int prot,
328
uintptr_t retaddr)
329
{
330
size_t mmu_idx = get_mmuidx(oi);
331
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
332
*/
333
334
typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
335
- TCGMemOpIdx oi, uintptr_t retaddr);
336
+ MemOpIdx oi, uintptr_t retaddr);
337
338
static inline uint64_t QEMU_ALWAYS_INLINE
339
load_memop(const void *haddr, MemOp op)
340
@@ -XXX,XX +XXX,XX @@ load_memop(const void *haddr, MemOp op)
341
}
342
343
static inline uint64_t QEMU_ALWAYS_INLINE
344
-load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
345
+load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
346
uintptr_t retaddr, MemOp op, bool code_read,
347
FullLoadHelper *full_load)
348
{
349
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
350
*/
351
352
static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
353
- TCGMemOpIdx oi, uintptr_t retaddr)
354
+ MemOpIdx oi, uintptr_t retaddr)
355
{
356
return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
357
}
358
359
tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
360
- TCGMemOpIdx oi, uintptr_t retaddr)
361
+ MemOpIdx oi, uintptr_t retaddr)
362
{
363
return full_ldub_mmu(env, addr, oi, retaddr);
364
}
365
366
static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
367
- TCGMemOpIdx oi, uintptr_t retaddr)
368
+ MemOpIdx oi, uintptr_t retaddr)
369
{
370
return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
371
full_le_lduw_mmu);
372
}
373
374
tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
375
- TCGMemOpIdx oi, uintptr_t retaddr)
376
+ MemOpIdx oi, uintptr_t retaddr)
377
{
378
return full_le_lduw_mmu(env, addr, oi, retaddr);
379
}
380
381
static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
382
- TCGMemOpIdx oi, uintptr_t retaddr)
383
+ MemOpIdx oi, uintptr_t retaddr)
384
{
385
return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
386
full_be_lduw_mmu);
387
}
388
389
tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
390
- TCGMemOpIdx oi, uintptr_t retaddr)
391
+ MemOpIdx oi, uintptr_t retaddr)
392
{
393
return full_be_lduw_mmu(env, addr, oi, retaddr);
394
}
395
396
static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
397
- TCGMemOpIdx oi, uintptr_t retaddr)
398
+ MemOpIdx oi, uintptr_t retaddr)
399
{
400
return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
401
full_le_ldul_mmu);
402
}
403
404
tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
405
- TCGMemOpIdx oi, uintptr_t retaddr)
406
+ MemOpIdx oi, uintptr_t retaddr)
407
{
408
return full_le_ldul_mmu(env, addr, oi, retaddr);
409
}
410
411
static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
412
- TCGMemOpIdx oi, uintptr_t retaddr)
413
+ MemOpIdx oi, uintptr_t retaddr)
414
{
415
return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
416
full_be_ldul_mmu);
417
}
418
419
tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
420
- TCGMemOpIdx oi, uintptr_t retaddr)
421
+ MemOpIdx oi, uintptr_t retaddr)
422
{
423
return full_be_ldul_mmu(env, addr, oi, retaddr);
424
}
425
426
uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
427
- TCGMemOpIdx oi, uintptr_t retaddr)
428
+ MemOpIdx oi, uintptr_t retaddr)
429
{
430
return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
431
helper_le_ldq_mmu);
432
}
433
434
uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
435
- TCGMemOpIdx oi, uintptr_t retaddr)
436
+ MemOpIdx oi, uintptr_t retaddr)
437
{
438
return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
439
helper_be_ldq_mmu);
440
@@ -XXX,XX +XXX,XX @@ uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
441
442
443
tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
444
- TCGMemOpIdx oi, uintptr_t retaddr)
445
+ MemOpIdx oi, uintptr_t retaddr)
446
{
447
return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
448
}
449
450
tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
451
- TCGMemOpIdx oi, uintptr_t retaddr)
452
+ MemOpIdx oi, uintptr_t retaddr)
453
{
454
return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
455
}
456
457
tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
458
- TCGMemOpIdx oi, uintptr_t retaddr)
459
+ MemOpIdx oi, uintptr_t retaddr)
460
{
461
return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
462
}
463
464
tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
465
- TCGMemOpIdx oi, uintptr_t retaddr)
466
+ MemOpIdx oi, uintptr_t retaddr)
467
{
468
return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
469
}
470
471
tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
472
- TCGMemOpIdx oi, uintptr_t retaddr)
473
+ MemOpIdx oi, uintptr_t retaddr)
474
{
475
return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
476
}
477
@@ -XXX,XX +XXX,XX @@ static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
478
MemOp op, FullLoadHelper *full_load)
479
{
480
uint16_t meminfo;
481
- TCGMemOpIdx oi;
482
+ MemOpIdx oi;
483
uint64_t ret;
484
485
meminfo = trace_mem_get_info(op, mmu_idx, false);
486
@@ -XXX,XX +XXX,XX @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
487
uintptr_t index, index2;
488
CPUTLBEntry *entry, *entry2;
489
target_ulong page2, tlb_addr, tlb_addr2;
490
- TCGMemOpIdx oi;
491
+ MemOpIdx oi;
492
size_t size2;
493
int i;
494
495
@@ -XXX,XX +XXX,XX @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
496
497
static inline void QEMU_ALWAYS_INLINE
498
store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
499
- TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
500
+ MemOpIdx oi, uintptr_t retaddr, MemOp op)
501
{
502
uintptr_t mmu_idx = get_mmuidx(oi);
503
uintptr_t index = tlb_index(env, mmu_idx, addr);
504
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
505
506
void __attribute__((noinline))
507
helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
508
- TCGMemOpIdx oi, uintptr_t retaddr)
509
+ MemOpIdx oi, uintptr_t retaddr)
510
{
511
store_helper(env, addr, val, oi, retaddr, MO_UB);
512
}
513
514
void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
515
- TCGMemOpIdx oi, uintptr_t retaddr)
516
+ MemOpIdx oi, uintptr_t retaddr)
517
{
518
store_helper(env, addr, val, oi, retaddr, MO_LEUW);
519
}
520
521
void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
522
- TCGMemOpIdx oi, uintptr_t retaddr)
523
+ MemOpIdx oi, uintptr_t retaddr)
524
{
525
store_helper(env, addr, val, oi, retaddr, MO_BEUW);
526
}
527
528
void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
529
- TCGMemOpIdx oi, uintptr_t retaddr)
530
+ MemOpIdx oi, uintptr_t retaddr)
531
{
532
store_helper(env, addr, val, oi, retaddr, MO_LEUL);
533
}
534
535
void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
536
- TCGMemOpIdx oi, uintptr_t retaddr)
537
+ MemOpIdx oi, uintptr_t retaddr)
538
{
539
store_helper(env, addr, val, oi, retaddr, MO_BEUL);
540
}
541
542
void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
543
- TCGMemOpIdx oi, uintptr_t retaddr)
544
+ MemOpIdx oi, uintptr_t retaddr)
545
{
546
store_helper(env, addr, val, oi, retaddr, MO_LEQ);
547
}
548
549
void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
550
- TCGMemOpIdx oi, uintptr_t retaddr)
551
+ MemOpIdx oi, uintptr_t retaddr)
552
{
553
store_helper(env, addr, val, oi, retaddr, MO_BEQ);
554
}
555
@@ -XXX,XX +XXX,XX @@ static inline void QEMU_ALWAYS_INLINE
556
cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
557
int mmu_idx, uintptr_t retaddr, MemOp op)
558
{
559
- TCGMemOpIdx oi;
560
+ MemOpIdx oi;
561
uint16_t meminfo;
562
563
meminfo = trace_mem_get_info(op, mmu_idx, true);
564
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val)
565
/* Code access functions. */
566
567
static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
568
- TCGMemOpIdx oi, uintptr_t retaddr)
569
+ MemOpIdx oi, uintptr_t retaddr)
570
{
571
return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
572
}
573
574
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
575
{
576
- TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
577
+ MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
578
return full_ldub_code(env, addr, oi, 0);
579
}
580
581
static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
582
- TCGMemOpIdx oi, uintptr_t retaddr)
583
+ MemOpIdx oi, uintptr_t retaddr)
584
{
585
return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
586
}
587
588
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
589
{
590
- TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
591
+ MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
592
return full_lduw_code(env, addr, oi, 0);
593
}
594
595
static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
596
- TCGMemOpIdx oi, uintptr_t retaddr)
597
+ MemOpIdx oi, uintptr_t retaddr)
598
{
599
return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
600
}
601
602
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
603
{
604
- TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
605
+ MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
606
return full_ldl_code(env, addr, oi, 0);
607
}
608
609
static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
610
- TCGMemOpIdx oi, uintptr_t retaddr)
611
+ MemOpIdx oi, uintptr_t retaddr)
612
{
613
return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code);
614
}
615
616
uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
617
{
618
- TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
619
+ MemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
620
return full_ldq_code(env, addr, oi, 0);
621
}
622
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
623
index XXXXXXX..XXXXXXX 100644
624
--- a/accel/tcg/user-exec.c
625
+++ b/accel/tcg/user-exec.c
626
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
627
* @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
628
*/
629
static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
630
- TCGMemOpIdx oi, int size, int prot,
631
+ MemOpIdx oi, int size, int prot,
632
uintptr_t retaddr)
633
{
634
/* Enforce qemu required alignment. */
635
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
636
index XXXXXXX..XXXXXXX 100644
637
--- a/target/arm/helper-a64.c
638
+++ b/target/arm/helper-a64.c
639
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
640
clear_helper_retaddr();
641
#else
642
int mem_idx = cpu_mmu_index(env, false);
643
- TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
644
- TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
645
+ MemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
646
+ MemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
647
648
o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
649
o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
650
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
651
uintptr_t ra = GETPC();
652
bool success;
653
int mem_idx;
654
- TCGMemOpIdx oi;
655
+ MemOpIdx oi;
656
657
assert(HAVE_CMPXCHG128);
658
659
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
660
clear_helper_retaddr();
661
#else
662
int mem_idx = cpu_mmu_index(env, false);
663
- TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
664
- TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
665
+ MemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
666
+ MemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
667
668
o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
669
o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
670
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
671
uintptr_t ra = GETPC();
672
bool success;
673
int mem_idx;
674
- TCGMemOpIdx oi;
675
+ MemOpIdx oi;
676
677
assert(HAVE_CMPXCHG128);
678
679
@@ -XXX,XX +XXX,XX @@ void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
680
Int128 oldv, cmpv, newv;
681
uintptr_t ra = GETPC();
682
int mem_idx;
683
- TCGMemOpIdx oi;
684
+ MemOpIdx oi;
685
686
assert(HAVE_CMPXCHG128);
687
688
@@ -XXX,XX +XXX,XX @@ void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
689
Int128 oldv, cmpv, newv;
690
uintptr_t ra = GETPC();
691
int mem_idx;
692
- TCGMemOpIdx oi;
693
+ MemOpIdx oi;
694
695
assert(HAVE_CMPXCHG128);
696
697
diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c
698
index XXXXXXX..XXXXXXX 100644
699
--- a/target/arm/m_helper.c
700
+++ b/target/arm/m_helper.c
701
@@ -XXX,XX +XXX,XX @@ static bool do_v7m_function_return(ARMCPU *cpu)
702
703
{
704
bool threadmode, spsel;
705
- TCGMemOpIdx oi;
706
+ MemOpIdx oi;
707
ARMMMUIdx mmu_idx;
708
uint32_t *frame_sp_p;
709
uint32_t frameptr;
710
diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c
711
index XXXXXXX..XXXXXXX 100644
712
--- a/target/i386/tcg/mem_helper.c
713
+++ b/target/i386/tcg/mem_helper.c
714
@@ -XXX,XX +XXX,XX @@ void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
715
{
716
uintptr_t ra = GETPC();
717
int mem_idx = cpu_mmu_index(env, false);
718
- TCGMemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx);
719
+ MemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx);
720
oldv = cpu_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra);
721
}
722
723
@@ -XXX,XX +XXX,XX @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
724
Int128 newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
725
726
int mem_idx = cpu_mmu_index(env, false);
727
- TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
728
+ MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
729
Int128 oldv = cpu_atomic_cmpxchgo_le_mmu(env, a0, cmpv, newv, oi, ra);
730
731
if (int128_eq(oldv, cmpv)) {
732
diff --git a/target/m68k/op_helper.c b/target/m68k/op_helper.c
733
index XXXXXXX..XXXXXXX 100644
734
--- a/target/m68k/op_helper.c
735
+++ b/target/m68k/op_helper.c
736
@@ -XXX,XX +XXX,XX @@ static void do_cas2l(CPUM68KState *env, uint32_t regs, uint32_t a1, uint32_t a2,
737
uintptr_t ra = GETPC();
738
#if defined(CONFIG_ATOMIC64)
739
int mmu_idx = cpu_mmu_index(env, 0);
740
- TCGMemOpIdx oi = make_memop_idx(MO_BEQ, mmu_idx);
741
+ MemOpIdx oi = make_memop_idx(MO_BEQ, mmu_idx);
742
#endif
743
744
if (parallel) {
745
diff --git a/target/mips/tcg/msa_helper.c b/target/mips/tcg/msa_helper.c
746
index XXXXXXX..XXXXXXX 100644
747
--- a/target/mips/tcg/msa_helper.c
748
+++ b/target/mips/tcg/msa_helper.c
749
@@ -XXX,XX +XXX,XX @@ void helper_msa_ffint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
750
#define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
751
752
#if !defined(CONFIG_USER_ONLY)
753
-#define MEMOP_IDX(DF) \
754
- TCGMemOpIdx oi = make_memop_idx(MO_TE | DF | MO_UNALN, \
755
- cpu_mmu_index(env, false));
756
+#define MEMOP_IDX(DF) \
757
+ MemOpIdx oi = make_memop_idx(MO_TE | DF | MO_UNALN, \
758
+ cpu_mmu_index(env, false));
759
#else
760
#define MEMOP_IDX(DF)
761
#endif
762
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
763
index XXXXXXX..XXXXXXX 100644
764
--- a/target/s390x/tcg/mem_helper.c
765
+++ b/target/s390x/tcg/mem_helper.c
766
@@ -XXX,XX +XXX,XX @@ static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
767
g_assert(haddr);
768
memset(haddr, byte, size);
769
#else
770
- TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
771
+ MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
772
int i;
773
774
if (likely(haddr)) {
775
@@ -XXX,XX +XXX,XX @@ static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
776
#ifdef CONFIG_USER_ONLY
777
return ldub_p(*haddr + offset);
778
#else
779
- TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
780
+ MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
781
uint8_t byte;
782
783
if (likely(*haddr)) {
784
@@ -XXX,XX +XXX,XX @@ static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
785
#ifdef CONFIG_USER_ONLY
786
stb_p(*haddr + offset, byte);
787
#else
788
- TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
789
+ MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
790
791
if (likely(*haddr)) {
792
stb_p(*haddr + offset, byte);
793
@@ -XXX,XX +XXX,XX @@ void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
794
Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
795
Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
796
int mem_idx;
797
- TCGMemOpIdx oi;
798
+ MemOpIdx oi;
799
Int128 oldv;
800
bool fail;
801
802
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
803
uint32_t *haddr = g2h(env_cpu(env), a1);
804
ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
805
#else
806
- TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
807
+ MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
808
ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
809
#endif
810
} else {
811
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
812
813
if (parallel) {
814
#ifdef CONFIG_ATOMIC64
815
- TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
816
+ MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
817
ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
818
#else
819
/* Note that we asserted !parallel above. */
820
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
821
cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
822
cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
823
} else if (HAVE_CMPXCHG128) {
824
- TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
825
+ MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
826
ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
827
cc = !int128_eq(ov, cv);
828
} else {
829
@@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
830
cpu_stq_data_ra(env, a2 + 0, svh, ra);
831
cpu_stq_data_ra(env, a2 + 8, svl, ra);
832
} else if (HAVE_ATOMIC128) {
833
- TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
834
+ MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
835
Int128 sv = int128_make128(svl, svh);
836
cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
837
} else {
838
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
839
uintptr_t ra = GETPC();
840
uint64_t hi, lo;
841
int mem_idx;
842
- TCGMemOpIdx oi;
843
+ MemOpIdx oi;
844
Int128 v;
845
846
assert(HAVE_ATOMIC128);
847
@@ -XXX,XX +XXX,XX @@ void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
848
{
849
uintptr_t ra = GETPC();
850
int mem_idx;
851
- TCGMemOpIdx oi;
852
+ MemOpIdx oi;
853
Int128 v;
854
855
assert(HAVE_ATOMIC128);
856
diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c
857
index XXXXXXX..XXXXXXX 100644
858
--- a/target/sparc/ldst_helper.c
859
+++ b/target/sparc/ldst_helper.c
860
@@ -XXX,XX +XXX,XX @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
861
case ASI_SNF:
862
case ASI_SNFL:
863
{
864
- TCGMemOpIdx oi;
865
+ MemOpIdx oi;
866
int idx = (env->pstate & PS_PRIV
867
? (asi & 1 ? MMU_KERNEL_SECONDARY_IDX : MMU_KERNEL_IDX)
868
: (asi & 1 ? MMU_USER_SECONDARY_IDX : MMU_USER_IDX));
869
diff --git a/tcg/optimize.c b/tcg/optimize.c
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
870
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
871
--- a/tcg/optimize.c
9
--- a/tcg/optimize.c
872
+++ b/tcg/optimize.c
10
+++ b/tcg/optimize.c
873
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
11
@@ -XXX,XX +XXX,XX @@ static void finish_ebb(OptContext *ctx)
874
12
remove_mem_copy_all(ctx);
875
CASE_OP_32_64(qemu_ld):
13
}
876
{
14
877
- TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs];
15
-static void finish_folding(OptContext *ctx, TCGOp *op)
878
+ MemOpIdx oi = op->args[nb_oargs + nb_iargs];
16
+static bool finish_folding(OptContext *ctx, TCGOp *op)
879
MemOp mop = get_memop(oi);
880
if (!(mop & MO_SIGN)) {
881
mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
882
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
883
index XXXXXXX..XXXXXXX 100644
884
--- a/tcg/tcg-op.c
885
+++ b/tcg/tcg-op.c
886
@@ -XXX,XX +XXX,XX @@ static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
887
static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
888
MemOp memop, TCGArg idx)
889
{
17
{
890
- TCGMemOpIdx oi = make_memop_idx(memop, idx);
18
const TCGOpDef *def = &tcg_op_defs[op->opc];
891
+ MemOpIdx oi = make_memop_idx(memop, idx);
19
int i, nb_oargs;
892
#if TARGET_LONG_BITS == 32
20
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
893
tcg_gen_op3i_i32(opc, val, addr, oi);
21
ts_info(ts)->z_mask = ctx->z_mask;
894
#else
22
}
895
@@ -XXX,XX +XXX,XX @@ static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
23
}
896
static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
24
+ return true;
897
MemOp memop, TCGArg idx)
898
{
899
- TCGMemOpIdx oi = make_memop_idx(memop, idx);
900
+ MemOpIdx oi = make_memop_idx(memop, idx);
901
#if TARGET_LONG_BITS == 32
902
if (TCG_TARGET_REG_BITS == 32) {
903
tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
904
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
905
tcg_temp_free_i32(t1);
906
} else {
907
gen_atomic_cx_i32 gen;
908
- TCGMemOpIdx oi;
909
+ MemOpIdx oi;
910
911
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
912
tcg_debug_assert(gen != NULL);
913
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
914
} else if ((memop & MO_SIZE) == MO_64) {
915
#ifdef CONFIG_ATOMIC64
916
gen_atomic_cx_i64 gen;
917
- TCGMemOpIdx oi;
918
+ MemOpIdx oi;
919
920
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
921
tcg_debug_assert(gen != NULL);
922
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
923
TCGArg idx, MemOp memop, void * const table[])
924
{
925
gen_atomic_op_i32 gen;
926
- TCGMemOpIdx oi;
927
+ MemOpIdx oi;
928
929
memop = tcg_canonicalize_memop(memop, 0, 0);
930
931
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
932
if ((memop & MO_SIZE) == MO_64) {
933
#ifdef CONFIG_ATOMIC64
934
gen_atomic_op_i64 gen;
935
- TCGMemOpIdx oi;
936
+ MemOpIdx oi;
937
938
gen = table[memop & (MO_SIZE | MO_BSWAP)];
939
tcg_debug_assert(gen != NULL);
940
diff --git a/tcg/tcg.c b/tcg/tcg.c
941
index XXXXXXX..XXXXXXX 100644
942
--- a/tcg/tcg.c
943
+++ b/tcg/tcg.c
944
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
945
case INDEX_op_qemu_ld_i64:
946
case INDEX_op_qemu_st_i64:
947
{
948
- TCGMemOpIdx oi = op->args[k++];
949
+ MemOpIdx oi = op->args[k++];
950
MemOp op = get_memop(oi);
951
unsigned ix = get_mmuidx(oi);
952
953
diff --git a/tcg/tci.c b/tcg/tci.c
954
index XXXXXXX..XXXXXXX 100644
955
--- a/tcg/tci.c
956
+++ b/tcg/tci.c
957
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_uint64(uint32_t high, uint32_t low)
958
* i = immediate (uint32_t)
959
* I = immediate (tcg_target_ulong)
960
* l = label or pointer
961
- * m = immediate (TCGMemOpIdx)
962
+ * m = immediate (MemOpIdx)
963
* n = immediate (call return length)
964
* r = register
965
* s = signed ldst offset
966
@@ -XXX,XX +XXX,XX @@ static void tci_args_ri(uint32_t insn, TCGReg *r0, tcg_target_ulong *i1)
967
}
25
}
968
26
969
static void tci_args_rrm(uint32_t insn, TCGReg *r0,
27
/*
970
- TCGReg *r1, TCGMemOpIdx *m2)
28
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
971
+ TCGReg *r1, MemOpIdx *m2)
29
fold_xi_to_x(ctx, op, 0)) {
972
{
30
return true;
973
*r0 = extract32(insn, 8, 4);
31
}
974
*r1 = extract32(insn, 12, 4);
32
- return false;
975
@@ -XXX,XX +XXX,XX @@ static void tci_args_rrrc(uint32_t insn,
33
+ return finish_folding(ctx, op);
976
}
34
}
977
35
978
static void tci_args_rrrm(uint32_t insn,
36
/* We cannot as yet do_constant_folding with vectors. */
979
- TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGMemOpIdx *m3)
37
@@ -XXX,XX +XXX,XX @@ static bool fold_add_vec(OptContext *ctx, TCGOp *op)
980
+ TCGReg *r0, TCGReg *r1, TCGReg *r2, MemOpIdx *m3)
38
fold_xi_to_x(ctx, op, 0)) {
981
{
39
return true;
982
*r0 = extract32(insn, 8, 4);
40
}
983
*r1 = extract32(insn, 12, 4);
41
- return false;
984
@@ -XXX,XX +XXX,XX @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
42
+ return finish_folding(ctx, op);
985
}
43
}
986
44
987
static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
45
static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
988
- TCGMemOpIdx oi, const void *tb_ptr)
46
@@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
989
+ MemOpIdx oi, const void *tb_ptr)
47
op->args[4] = arg_new_constant(ctx, bl);
990
{
48
op->args[5] = arg_new_constant(ctx, bh);
991
MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE);
49
}
992
uintptr_t ra = (uintptr_t)tb_ptr;
50
- return false;
993
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
51
+ return finish_folding(ctx, op);
994
}
52
}
995
53
996
static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
54
static bool fold_add2(OptContext *ctx, TCGOp *op)
997
- TCGMemOpIdx oi, const void *tb_ptr)
998
+ MemOpIdx oi, const void *tb_ptr)
999
{
1000
MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE);
1001
uintptr_t ra = (uintptr_t)tb_ptr;
1002
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
1003
uint32_t tmp32;
1004
uint64_t tmp64;
1005
uint64_t T1, T2;
1006
- TCGMemOpIdx oi;
1007
+ MemOpIdx oi;
1008
int32_t ofs;
1009
void *ptr;
1010
1011
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
1012
tcg_target_ulong i1;
1013
int32_t s2;
1014
TCGCond c;
1015
- TCGMemOpIdx oi;
1016
+ MemOpIdx oi;
1017
uint8_t pos, len;
1018
void *ptr;
1019
1020
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
1021
index XXXXXXX..XXXXXXX 100644
1022
--- a/accel/tcg/atomic_common.c.inc
1023
+++ b/accel/tcg/atomic_common.c.inc
1024
@@ -XXX,XX +XXX,XX @@
1025
*/
1026
1027
static uint16_t atomic_trace_rmw_pre(CPUArchState *env, target_ulong addr,
1028
- TCGMemOpIdx oi)
1029
+ MemOpIdx oi)
1030
{
1031
CPUState *cpu = env_cpu(env);
1032
uint16_t info = trace_mem_get_info(get_memop(oi), get_mmuidx(oi), false);
1033
@@ -XXX,XX +XXX,XX @@ static void atomic_trace_rmw_post(CPUArchState *env, target_ulong addr,
1034
1035
#if HAVE_ATOMIC128
1036
static uint16_t atomic_trace_ld_pre(CPUArchState *env, target_ulong addr,
1037
- TCGMemOpIdx oi)
1038
+ MemOpIdx oi)
1039
{
1040
uint16_t info = trace_mem_get_info(get_memop(oi), get_mmuidx(oi), false);
1041
1042
@@ -XXX,XX +XXX,XX @@ static void atomic_trace_ld_post(CPUArchState *env, target_ulong addr,
1043
}
1044
1045
static uint16_t atomic_trace_st_pre(CPUArchState *env, target_ulong addr,
1046
- TCGMemOpIdx oi)
1047
+ MemOpIdx oi)
1048
{
1049
uint16_t info = trace_mem_get_info(get_memop(oi), get_mmuidx(oi), true);
1050
1051
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
1052
index XXXXXXX..XXXXXXX 100644
1053
--- a/tcg/aarch64/tcg-target.c.inc
1054
+++ b/tcg/aarch64/tcg-target.c.inc
1055
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1056
#include "../tcg-ldst.c.inc"
1057
1058
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1059
- * TCGMemOpIdx oi, uintptr_t ra)
1060
+ * MemOpIdx oi, uintptr_t ra)
1061
*/
1062
static void * const qemu_ld_helpers[MO_SIZE + 1] = {
1063
[MO_8] = helper_ret_ldub_mmu,
1064
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[MO_SIZE + 1] = {
1065
};
1066
1067
/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1068
- * uintxx_t val, TCGMemOpIdx oi,
1069
+ * uintxx_t val, MemOpIdx oi,
1070
* uintptr_t ra)
1071
*/
1072
static void * const qemu_st_helpers[MO_SIZE + 1] = {
1073
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1074
1075
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1076
{
1077
- TCGMemOpIdx oi = lb->oi;
1078
+ MemOpIdx oi = lb->oi;
1079
MemOp opc = get_memop(oi);
1080
MemOp size = opc & MO_SIZE;
1081
1082
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1083
1084
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1085
{
1086
- TCGMemOpIdx oi = lb->oi;
1087
+ MemOpIdx oi = lb->oi;
1088
MemOp opc = get_memop(oi);
1089
MemOp size = opc & MO_SIZE;
1090
1091
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1092
return true;
1093
}
1094
1095
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1096
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1097
TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1098
tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1099
{
1100
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1101
}
1102
1103
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1104
- TCGMemOpIdx oi, TCGType ext)
1105
+ MemOpIdx oi, TCGType ext)
1106
{
1107
MemOp memop = get_memop(oi);
1108
const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1109
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1110
}
1111
1112
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1113
- TCGMemOpIdx oi)
1114
+ MemOpIdx oi)
1115
{
1116
MemOp memop = get_memop(oi);
1117
const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1118
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
1119
index XXXXXXX..XXXXXXX 100644
1120
--- a/tcg/arm/tcg-target.c.inc
1121
+++ b/tcg/arm/tcg-target.c.inc
1122
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1123
/* Record the context of a call to the out of line helper code for the slow
1124
path for a load or store, so that we can later generate the correct
1125
helper code. */
1126
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1127
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1128
TCGReg datalo, TCGReg datahi, TCGReg addrlo,
1129
TCGReg addrhi, tcg_insn_unit *raddr,
1130
tcg_insn_unit *label_ptr)
1131
@@ -XXX,XX +XXX,XX @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1132
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1133
{
1134
TCGReg argreg, datalo, datahi;
1135
- TCGMemOpIdx oi = lb->oi;
1136
+ MemOpIdx oi = lb->oi;
1137
MemOp opc = get_memop(oi);
1138
void *func;
1139
1140
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1141
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1142
{
1143
TCGReg argreg, datalo, datahi;
1144
- TCGMemOpIdx oi = lb->oi;
1145
+ MemOpIdx oi = lb->oi;
1146
MemOp opc = get_memop(oi);
1147
1148
if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1149
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
1150
static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1151
{
1152
TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1153
- TCGMemOpIdx oi;
1154
+ MemOpIdx oi;
1155
MemOp opc;
1156
#ifdef CONFIG_SOFTMMU
1157
int mem_index;
1158
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
1159
static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1160
{
1161
TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1162
- TCGMemOpIdx oi;
1163
+ MemOpIdx oi;
1164
MemOp opc;
1165
#ifdef CONFIG_SOFTMMU
1166
int mem_index;
1167
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
1168
index XXXXXXX..XXXXXXX 100644
1169
--- a/tcg/i386/tcg-target.c.inc
1170
+++ b/tcg/i386/tcg-target.c.inc
1171
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1172
* for a load or store, so that we can later generate the correct helper code
1173
*/
1174
static void add_qemu_ldst_label(TCGContext *s, bool is_ld, bool is_64,
1175
- TCGMemOpIdx oi,
1176
+ MemOpIdx oi,
1177
TCGReg datalo, TCGReg datahi,
1178
TCGReg addrlo, TCGReg addrhi,
1179
tcg_insn_unit *raddr,
1180
@@ -XXX,XX +XXX,XX @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, bool is_64,
1181
*/
1182
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1183
{
1184
- TCGMemOpIdx oi = l->oi;
1185
+ MemOpIdx oi = l->oi;
1186
MemOp opc = get_memop(oi);
1187
TCGReg data_reg;
1188
tcg_insn_unit **label_ptr = &l->label_ptr[0];
1189
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1190
*/
1191
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1192
{
1193
- TCGMemOpIdx oi = l->oi;
1194
+ MemOpIdx oi = l->oi;
1195
MemOp opc = get_memop(oi);
1196
MemOp s_bits = opc & MO_SIZE;
1197
tcg_insn_unit **label_ptr = &l->label_ptr[0];
1198
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1199
{
1200
TCGReg datalo, datahi, addrlo;
1201
TCGReg addrhi __attribute__((unused));
1202
- TCGMemOpIdx oi;
1203
+ MemOpIdx oi;
1204
MemOp opc;
1205
#if defined(CONFIG_SOFTMMU)
1206
int mem_index;
1207
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1208
{
1209
TCGReg datalo, datahi, addrlo;
1210
TCGReg addrhi __attribute__((unused));
1211
- TCGMemOpIdx oi;
1212
+ MemOpIdx oi;
1213
MemOp opc;
1214
#if defined(CONFIG_SOFTMMU)
1215
int mem_index;
1216
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
1217
index XXXXXXX..XXXXXXX 100644
1218
--- a/tcg/mips/tcg-target.c.inc
1219
+++ b/tcg/mips/tcg-target.c.inc
1220
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
1221
* Clobbers TMP0, TMP1, TMP2, TMP3.
1222
*/
1223
static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
1224
- TCGReg addrh, TCGMemOpIdx oi,
1225
+ TCGReg addrh, MemOpIdx oi,
1226
tcg_insn_unit *label_ptr[2], bool is_load)
1227
{
1228
MemOp opc = get_memop(oi);
1229
@@ -XXX,XX +XXX,XX @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
1230
tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrl);
1231
}
1232
1233
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
1234
+static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
1235
TCGType ext,
1236
TCGReg datalo, TCGReg datahi,
1237
TCGReg addrlo, TCGReg addrhi,
1238
@@ -XXX,XX +XXX,XX @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
1239
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1240
{
1241
const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
1242
- TCGMemOpIdx oi = l->oi;
1243
+ MemOpIdx oi = l->oi;
1244
MemOp opc = get_memop(oi);
1245
TCGReg v0;
1246
int i;
1247
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1248
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1249
{
1250
const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
1251
- TCGMemOpIdx oi = l->oi;
1252
+ MemOpIdx oi = l->oi;
1253
MemOp opc = get_memop(oi);
1254
MemOp s_bits = opc & MO_SIZE;
1255
int i;
1256
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
1257
{
1258
TCGReg addr_regl, addr_regh __attribute__((unused));
1259
TCGReg data_regl, data_regh;
1260
- TCGMemOpIdx oi;
1261
+ MemOpIdx oi;
1262
MemOp opc;
1263
#if defined(CONFIG_SOFTMMU)
1264
tcg_insn_unit *label_ptr[2];
1265
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
1266
{
1267
TCGReg addr_regl, addr_regh __attribute__((unused));
1268
TCGReg data_regl, data_regh;
1269
- TCGMemOpIdx oi;
1270
+ MemOpIdx oi;
1271
MemOp opc;
1272
#if defined(CONFIG_SOFTMMU)
1273
tcg_insn_unit *label_ptr[2];
1274
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
1275
index XXXXXXX..XXXXXXX 100644
1276
--- a/tcg/ppc/tcg-target.c.inc
1277
+++ b/tcg/ppc/tcg-target.c.inc
1278
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
1279
/* Record the context of a call to the out of line helper code for the slow
1280
path for a load or store, so that we can later generate the correct
1281
helper code. */
1282
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1283
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1284
TCGReg datalo_reg, TCGReg datahi_reg,
1285
TCGReg addrlo_reg, TCGReg addrhi_reg,
1286
tcg_insn_unit *raddr, tcg_insn_unit *lptr)
1287
@@ -XXX,XX +XXX,XX @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1288
1289
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1290
{
1291
- TCGMemOpIdx oi = lb->oi;
1292
+ MemOpIdx oi = lb->oi;
1293
MemOp opc = get_memop(oi);
1294
TCGReg hi, lo, arg = TCG_REG_R3;
1295
1296
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1297
1298
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1299
{
1300
- TCGMemOpIdx oi = lb->oi;
1301
+ MemOpIdx oi = lb->oi;
1302
MemOp opc = get_memop(oi);
1303
MemOp s_bits = opc & MO_SIZE;
1304
TCGReg hi, lo, arg = TCG_REG_R3;
1305
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
1306
{
1307
TCGReg datalo, datahi, addrlo, rbase;
1308
TCGReg addrhi __attribute__((unused));
1309
- TCGMemOpIdx oi;
1310
+ MemOpIdx oi;
1311
MemOp opc, s_bits;
1312
#ifdef CONFIG_SOFTMMU
1313
int mem_index;
1314
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
1315
{
1316
TCGReg datalo, datahi, addrlo, rbase;
1317
TCGReg addrhi __attribute__((unused));
1318
- TCGMemOpIdx oi;
1319
+ MemOpIdx oi;
1320
MemOp opc, s_bits;
1321
#ifdef CONFIG_SOFTMMU
1322
int mem_index;
1323
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
1324
index XXXXXXX..XXXXXXX 100644
1325
--- a/tcg/riscv/tcg-target.c.inc
1326
+++ b/tcg/riscv/tcg-target.c.inc
1327
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
1328
#include "../tcg-ldst.c.inc"
1329
1330
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1331
- * TCGMemOpIdx oi, uintptr_t ra)
1332
+ * MemOpIdx oi, uintptr_t ra)
1333
*/
1334
static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
1335
[MO_UB] = helper_ret_ldub_mmu,
1336
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
1337
};
1338
1339
/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1340
- * uintxx_t val, TCGMemOpIdx oi,
1341
+ * uintxx_t val, MemOpIdx oi,
1342
* uintptr_t ra)
1343
*/
1344
static void * const qemu_st_helpers[MO_SIZE + 1] = {
1345
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1346
}
1347
1348
static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
1349
- TCGReg addrh, TCGMemOpIdx oi,
1350
+ TCGReg addrh, MemOpIdx oi,
1351
tcg_insn_unit **label_ptr, bool is_load)
1352
{
1353
MemOp opc = get_memop(oi);
1354
@@ -XXX,XX +XXX,XX @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
1355
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addrl);
1356
}
1357
1358
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
1359
+static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
1360
TCGType ext,
1361
TCGReg datalo, TCGReg datahi,
1362
TCGReg addrlo, TCGReg addrhi,
1363
@@ -XXX,XX +XXX,XX @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
1364
1365
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1366
{
1367
- TCGMemOpIdx oi = l->oi;
1368
+ MemOpIdx oi = l->oi;
1369
MemOp opc = get_memop(oi);
1370
TCGReg a0 = tcg_target_call_iarg_regs[0];
1371
TCGReg a1 = tcg_target_call_iarg_regs[1];
1372
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1373
1374
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1375
{
1376
- TCGMemOpIdx oi = l->oi;
1377
+ MemOpIdx oi = l->oi;
1378
MemOp opc = get_memop(oi);
1379
MemOp s_bits = opc & MO_SIZE;
1380
TCGReg a0 = tcg_target_call_iarg_regs[0];
1381
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
1382
{
1383
TCGReg addr_regl, addr_regh __attribute__((unused));
1384
TCGReg data_regl, data_regh;
1385
- TCGMemOpIdx oi;
1386
+ MemOpIdx oi;
1387
MemOp opc;
1388
#if defined(CONFIG_SOFTMMU)
1389
tcg_insn_unit *label_ptr[1];
1390
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
1391
{
1392
TCGReg addr_regl, addr_regh __attribute__((unused));
1393
TCGReg data_regl, data_regh;
1394
- TCGMemOpIdx oi;
1395
+ MemOpIdx oi;
1396
MemOp opc;
1397
#if defined(CONFIG_SOFTMMU)
1398
tcg_insn_unit *label_ptr[1];
1399
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
1400
index XXXXXXX..XXXXXXX 100644
1401
--- a/tcg/s390/tcg-target.c.inc
1402
+++ b/tcg/s390/tcg-target.c.inc
1403
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1404
return addr_reg;
1405
}
1406
1407
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1408
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1409
TCGReg data, TCGReg addr,
1410
tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1411
{
1412
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1413
{
1414
TCGReg addr_reg = lb->addrlo_reg;
1415
TCGReg data_reg = lb->datalo_reg;
1416
- TCGMemOpIdx oi = lb->oi;
1417
+ MemOpIdx oi = lb->oi;
1418
MemOp opc = get_memop(oi);
1419
1420
if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1421
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1422
{
1423
TCGReg addr_reg = lb->addrlo_reg;
1424
TCGReg data_reg = lb->datalo_reg;
1425
- TCGMemOpIdx oi = lb->oi;
1426
+ MemOpIdx oi = lb->oi;
1427
MemOp opc = get_memop(oi);
1428
1429
if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1430
@@ -XXX,XX +XXX,XX @@ static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
1431
#endif /* CONFIG_SOFTMMU */
1432
1433
static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1434
- TCGMemOpIdx oi)
1435
+ MemOpIdx oi)
1436
{
1437
MemOp opc = get_memop(oi);
1438
#ifdef CONFIG_SOFTMMU
1439
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1440
}
1441
1442
static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1443
- TCGMemOpIdx oi)
1444
+ MemOpIdx oi)
1445
{
1446
MemOp opc = get_memop(oi);
1447
#ifdef CONFIG_SOFTMMU
1448
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
1449
index XXXXXXX..XXXXXXX 100644
1450
--- a/tcg/sparc/tcg-target.c.inc
1451
+++ b/tcg/sparc/tcg-target.c.inc
1452
@@ -XXX,XX +XXX,XX @@ static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
1453
};
1454
1455
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
1456
- TCGMemOpIdx oi, bool is_64)
1457
+ MemOpIdx oi, bool is_64)
1458
{
1459
MemOp memop = get_memop(oi);
1460
#ifdef CONFIG_SOFTMMU
1461
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
1462
}
1463
1464
static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
1465
- TCGMemOpIdx oi)
1466
+ MemOpIdx oi)
1467
{
1468
MemOp memop = get_memop(oi);
1469
#ifdef CONFIG_SOFTMMU
1470
diff --git a/tcg/tcg-ldst.c.inc b/tcg/tcg-ldst.c.inc
1471
index XXXXXXX..XXXXXXX 100644
1472
--- a/tcg/tcg-ldst.c.inc
1473
+++ b/tcg/tcg-ldst.c.inc
1474
@@ -XXX,XX +XXX,XX @@
1475
1476
typedef struct TCGLabelQemuLdst {
1477
bool is_ld; /* qemu_ld: true, qemu_st: false */
1478
- TCGMemOpIdx oi;
1479
+ MemOpIdx oi;
1480
TCGType type; /* result type of a load */
1481
TCGReg addrlo_reg; /* reg index for low word of guest virtual addr */
1482
TCGReg addrhi_reg; /* reg index for high word of guest virtual addr */
1483
--
55
--
1484
2.25.1
56
2.43.0
1485
1486
diff view generated by jsdifflib
1
Add registers and function stubs. The functionality
1
Introduce ti_is_const, ti_const_val, ti_is_const_val.
2
is disabled via squashing s390_facilities[2] to 0.
3
2
4
We must still include results for the mandatory opcodes in
5
tcg_target_op_def, as all opcodes are checked during tcg init.
6
7
Reviewed-by: David Hildenbrand <david@redhat.com>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
4
---
10
tcg/s390x/tcg-target-con-set.h | 4 +
5
tcg/optimize.c | 20 +++++++++++++++++---
11
tcg/s390x/tcg-target-con-str.h | 1 +
6
1 file changed, 17 insertions(+), 3 deletions(-)
12
tcg/s390x/tcg-target.h | 35 ++++++++-
13
tcg/s390x/tcg-target.opc.h | 12 +++
14
tcg/s390x/tcg-target.c.inc | 137 ++++++++++++++++++++++++++++++++-
15
5 files changed, 184 insertions(+), 5 deletions(-)
16
create mode 100644 tcg/s390x/tcg-target.opc.h
17
7
18
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
19
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
20
--- a/tcg/s390x/tcg-target-con-set.h
10
--- a/tcg/optimize.c
21
+++ b/tcg/s390x/tcg-target-con-set.h
11
+++ b/tcg/optimize.c
22
@@ -XXX,XX +XXX,XX @@ C_O0_I1(r)
12
@@ -XXX,XX +XXX,XX @@ static inline TempOptInfo *arg_info(TCGArg arg)
23
C_O0_I2(L, L)
13
return ts_info(arg_temp(arg));
24
C_O0_I2(r, r)
25
C_O0_I2(r, ri)
26
+C_O0_I2(v, r)
27
C_O1_I1(r, L)
28
C_O1_I1(r, r)
29
+C_O1_I1(v, r)
30
+C_O1_I1(v, vr)
31
C_O1_I2(r, 0, ri)
32
C_O1_I2(r, 0, rI)
33
C_O1_I2(r, 0, rJ)
34
C_O1_I2(r, r, ri)
35
C_O1_I2(r, rZ, r)
36
+C_O1_I2(v, v, v)
37
C_O1_I4(r, r, ri, r, 0)
38
C_O1_I4(r, r, ri, rI, 0)
39
C_O2_I2(b, a, 0, r)
40
diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h
41
index XXXXXXX..XXXXXXX 100644
42
--- a/tcg/s390x/tcg-target-con-str.h
43
+++ b/tcg/s390x/tcg-target-con-str.h
44
@@ -XXX,XX +XXX,XX @@
45
*/
46
REGS('r', ALL_GENERAL_REGS)
47
REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
48
+REGS('v', ALL_VECTOR_REGS)
49
/*
50
* A (single) even/odd pair for division.
51
* TODO: Add something to the register allocator to allow
52
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
53
index XXXXXXX..XXXXXXX 100644
54
--- a/tcg/s390x/tcg-target.h
55
+++ b/tcg/s390x/tcg-target.h
56
@@ -XXX,XX +XXX,XX @@ typedef enum TCGReg {
57
TCG_REG_R8, TCG_REG_R9, TCG_REG_R10, TCG_REG_R11,
58
TCG_REG_R12, TCG_REG_R13, TCG_REG_R14, TCG_REG_R15,
59
60
+ TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
61
+ TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
62
+ TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
63
+ TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
64
+ TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
65
+ TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
66
+ TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
67
+ TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
68
+
69
TCG_AREG0 = TCG_REG_R10,
70
TCG_REG_CALL_STACK = TCG_REG_R15
71
} TCGReg;
72
73
-#define TCG_TARGET_NB_REGS 16
74
+#define TCG_TARGET_NB_REGS 64
75
76
/* A list of relevant facilities used by this translator. Some of these
77
are required for proper operation, and these are checked at startup. */
78
@@ -XXX,XX +XXX,XX @@ typedef enum TCGReg {
79
#define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND
80
#define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND
81
#define FACILITY_LOAD_ON_COND2 53
82
+#define FACILITY_VECTOR 129
83
84
-extern uint64_t s390_facilities[1];
85
+extern uint64_t s390_facilities[3];
86
87
#define HAVE_FACILITY(X) \
88
((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1)
89
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[1];
90
#define TCG_TARGET_HAS_muluh_i64 0
91
#define TCG_TARGET_HAS_mulsh_i64 0
92
93
+#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
94
+#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
95
+#define TCG_TARGET_HAS_v256 0
96
+
97
+#define TCG_TARGET_HAS_andc_vec 0
98
+#define TCG_TARGET_HAS_orc_vec 0
99
+#define TCG_TARGET_HAS_not_vec 0
100
+#define TCG_TARGET_HAS_neg_vec 0
101
+#define TCG_TARGET_HAS_abs_vec 0
102
+#define TCG_TARGET_HAS_roti_vec 0
103
+#define TCG_TARGET_HAS_rots_vec 0
104
+#define TCG_TARGET_HAS_rotv_vec 0
105
+#define TCG_TARGET_HAS_shi_vec 0
106
+#define TCG_TARGET_HAS_shs_vec 0
107
+#define TCG_TARGET_HAS_shv_vec 0
108
+#define TCG_TARGET_HAS_mul_vec 0
109
+#define TCG_TARGET_HAS_sat_vec 0
110
+#define TCG_TARGET_HAS_minmax_vec 0
111
+#define TCG_TARGET_HAS_bitsel_vec 0
112
+#define TCG_TARGET_HAS_cmpsel_vec 0
113
+
114
/* used for function call generation */
115
#define TCG_TARGET_STACK_ALIGN        8
116
#define TCG_TARGET_CALL_STACK_OFFSET    160
117
diff --git a/tcg/s390x/tcg-target.opc.h b/tcg/s390x/tcg-target.opc.h
118
new file mode 100644
119
index XXXXXXX..XXXXXXX
120
--- /dev/null
121
+++ b/tcg/s390x/tcg-target.opc.h
122
@@ -XXX,XX +XXX,XX @@
123
+/*
124
+ * Copyright (c) 2021 Linaro
125
+ *
126
+ * This work is licensed under the terms of the GNU GPL, version 2 or
127
+ * (at your option) any later version.
128
+ *
129
+ * See the COPYING file in the top-level directory for details.
130
+ *
131
+ * Target-specific opcodes for host vector expansion. These will be
132
+ * emitted by tcg_expand_vec_op. For those familiar with GCC internals,
133
+ * consider these to be UNSPEC with names.
134
+ */
135
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
136
index XXXXXXX..XXXXXXX 100644
137
--- a/tcg/s390x/tcg-target.c.inc
138
+++ b/tcg/s390x/tcg-target.c.inc
139
@@ -XXX,XX +XXX,XX @@
140
#define TCG_CT_CONST_ZERO 0x800
141
142
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16)
143
+#define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)
144
+
145
/*
146
* For softmmu, we need to avoid conflicts with the first 3
147
* argument registers to perform the tlb lookup, and to call
148
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
149
150
#ifdef CONFIG_DEBUG_TCG
151
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
152
- "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
153
- "%r8", "%r9", "%r10" "%r11" "%r12" "%r13" "%r14" "%r15"
154
+ "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
155
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
156
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
157
+ "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
158
+ "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
159
+ "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
160
+ "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
161
};
162
#endif
163
164
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
165
TCG_REG_R4,
166
TCG_REG_R3,
167
TCG_REG_R2,
168
+
169
+ /* V8-V15 are call saved, and omitted. */
170
+ TCG_REG_V0,
171
+ TCG_REG_V1,
172
+ TCG_REG_V2,
173
+ TCG_REG_V3,
174
+ TCG_REG_V4,
175
+ TCG_REG_V5,
176
+ TCG_REG_V6,
177
+ TCG_REG_V7,
178
+ TCG_REG_V16,
179
+ TCG_REG_V17,
180
+ TCG_REG_V18,
181
+ TCG_REG_V19,
182
+ TCG_REG_V20,
183
+ TCG_REG_V21,
184
+ TCG_REG_V22,
185
+ TCG_REG_V23,
186
+ TCG_REG_V24,
187
+ TCG_REG_V25,
188
+ TCG_REG_V26,
189
+ TCG_REG_V27,
190
+ TCG_REG_V28,
191
+ TCG_REG_V29,
192
+ TCG_REG_V30,
193
+ TCG_REG_V31,
194
};
195
196
static const int tcg_target_call_iarg_regs[] = {
197
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
198
#endif
199
200
static const tcg_insn_unit *tb_ret_addr;
201
-uint64_t s390_facilities[1];
202
+uint64_t s390_facilities[3];
203
204
static bool patch_reloc(tcg_insn_unit *src_rw, int type,
205
intptr_t value, intptr_t addend)
206
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
207
}
208
}
14
}
209
15
210
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
16
+static inline bool ti_is_const(TempOptInfo *ti)
211
+ TCGReg dst, TCGReg src)
212
+{
17
+{
213
+ g_assert_not_reached();
18
+ return ti->is_const;
214
+}
19
+}
215
+
20
+
216
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
21
+static inline uint64_t ti_const_val(TempOptInfo *ti)
217
+ TCGReg dst, TCGReg base, intptr_t offset)
218
+{
22
+{
219
+ g_assert_not_reached();
23
+ return ti->val;
220
+}
24
+}
221
+
25
+
222
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
26
+static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
223
+ TCGReg dst, int64_t val)
224
+{
27
+{
225
+ g_assert_not_reached();
28
+ return ti_is_const(ti) && ti_const_val(ti) == val;
226
+}
29
+}
227
+
30
+
228
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
31
static inline bool ts_is_const(TCGTemp *ts)
229
+ unsigned vecl, unsigned vece,
230
+ const TCGArg *args, const int *const_args)
231
+{
232
+ g_assert_not_reached();
233
+}
234
+
235
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
236
+{
237
+ return 0;
238
+}
239
+
240
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
241
+ TCGArg a0, ...)
242
+{
243
+ g_assert_not_reached();
244
+}
245
+
246
static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
247
{
32
{
248
switch (op) {
33
- return ts_info(ts)->is_const;
249
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
34
+ return ti_is_const(ts_info(ts));
250
? C_O2_I4(r, r, 0, 1, rA, r)
251
: C_O2_I4(r, r, 0, 1, r, r));
252
253
+ case INDEX_op_st_vec:
254
+ return C_O0_I2(v, r);
255
+ case INDEX_op_ld_vec:
256
+ case INDEX_op_dupm_vec:
257
+ return C_O1_I1(v, r);
258
+ case INDEX_op_dup_vec:
259
+ return C_O1_I1(v, vr);
260
+ case INDEX_op_add_vec:
261
+ case INDEX_op_sub_vec:
262
+ case INDEX_op_and_vec:
263
+ case INDEX_op_or_vec:
264
+ case INDEX_op_xor_vec:
265
+ case INDEX_op_cmp_vec:
266
+ return C_O1_I2(v, v, v);
267
+
268
default:
269
g_assert_not_reached();
270
}
271
}
35
}
272
36
273
+/*
37
static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
274
+ * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
275
+ * Some distros have fixed this up locally, others have not.
276
+ */
277
+#ifndef HWCAP_S390_VXRS
278
+#define HWCAP_S390_VXRS 2048
279
+#endif
280
+
281
static void query_s390_facilities(void)
282
{
38
{
283
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
39
- TempOptInfo *ti = ts_info(ts);
284
@@ -XXX,XX +XXX,XX @@ static void query_s390_facilities(void)
40
- return ti->is_const && ti->val == val;
285
asm volatile(".word 0xb2b0,0x1000"
41
+ return ti_is_const_val(ts_info(ts), val);
286
: "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
287
}
288
+
289
+ /*
290
+ * Use of vector registers requires os support beyond the facility bit.
291
+ * If the kernel does not advertise support, disable the facility bits.
292
+ * There is nothing else we currently care about in the 3rd word, so
293
+ * disable VECTOR with one store.
294
+ */
295
+ if (1 || !(hwcap & HWCAP_S390_VXRS)) {
296
+ s390_facilities[2] = 0;
297
+ }
298
}
42
}
299
43
300
static void tcg_target_init(TCGContext *s)
44
static inline bool arg_is_const(TCGArg arg)
301
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
302
303
tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
304
tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
305
+ if (HAVE_FACILITY(VECTOR)) {
306
+ tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
307
+ tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
308
+ }
309
310
tcg_target_call_clobber_regs = 0;
311
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
312
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
313
/* The return register can be considered call-clobbered. */
314
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
315
316
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
317
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
318
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
319
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
320
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
321
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
322
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
323
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
324
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
325
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
326
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
327
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
328
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
329
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
330
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
331
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
332
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
333
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
334
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
335
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
336
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
337
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
338
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
339
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
340
+
341
s->reserved_regs = 0;
342
tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
343
/* XXX many insns can't be used with R0, so we better avoid it for now */
344
--
45
--
345
2.25.1
46
2.43.0
346
347
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Sink mask computation below fold_affected_mask early exit.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 30 ++++++++++++++++--------------
8
1 file changed, 16 insertions(+), 14 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_add2(OptContext *ctx, TCGOp *op)
15
16
static bool fold_and(OptContext *ctx, TCGOp *op)
17
{
18
- uint64_t z1, z2;
19
+ uint64_t z1, z2, z_mask, s_mask;
20
+ TempOptInfo *t1, *t2;
21
22
if (fold_const2_commutative(ctx, op) ||
23
fold_xi_to_i(ctx, op, 0) ||
24
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
25
return true;
26
}
27
28
- z1 = arg_info(op->args[1])->z_mask;
29
- z2 = arg_info(op->args[2])->z_mask;
30
- ctx->z_mask = z1 & z2;
31
-
32
- /*
33
- * Sign repetitions are perforce all identical, whether they are 1 or 0.
34
- * Bitwise operations preserve the relative quantity of the repetitions.
35
- */
36
- ctx->s_mask = arg_info(op->args[1])->s_mask
37
- & arg_info(op->args[2])->s_mask;
38
+ t1 = arg_info(op->args[1]);
39
+ t2 = arg_info(op->args[2]);
40
+ z1 = t1->z_mask;
41
+ z2 = t2->z_mask;
42
43
/*
44
* Known-zeros does not imply known-ones. Therefore unless
45
* arg2 is constant, we can't infer affected bits from it.
46
*/
47
- if (arg_is_const(op->args[2]) &&
48
- fold_affected_mask(ctx, op, z1 & ~z2)) {
49
+ if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) {
50
return true;
51
}
52
53
- return fold_masks(ctx, op);
54
+ z_mask = z1 & z2;
55
+
56
+ /*
57
+ * Sign repetitions are perforce all identical, whether they are 1 or 0.
58
+ * Bitwise operations preserve the relative quantity of the repetitions.
59
+ */
60
+ s_mask = t1->s_mask & t2->s_mask;
61
+
62
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
63
}
64
65
static bool fold_andc(OptContext *ctx, TCGOp *op)
66
--
67
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Avoid double inversion of the value of second const operand.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 21 +++++++++++----------
8
1 file changed, 11 insertions(+), 10 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
15
16
static bool fold_andc(OptContext *ctx, TCGOp *op)
17
{
18
- uint64_t z1;
19
+ uint64_t z_mask, s_mask;
20
+ TempOptInfo *t1, *t2;
21
22
if (fold_const2(ctx, op) ||
23
fold_xx_to_i(ctx, op, 0) ||
24
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
25
return true;
26
}
27
28
- z1 = arg_info(op->args[1])->z_mask;
29
+ t1 = arg_info(op->args[1]);
30
+ t2 = arg_info(op->args[2]);
31
+ z_mask = t1->z_mask;
32
33
/*
34
* Known-zeros does not imply known-ones. Therefore unless
35
* arg2 is constant, we can't infer anything from it.
36
*/
37
- if (arg_is_const(op->args[2])) {
38
- uint64_t z2 = ~arg_info(op->args[2])->z_mask;
39
- if (fold_affected_mask(ctx, op, z1 & ~z2)) {
40
+ if (ti_is_const(t2)) {
41
+ uint64_t v2 = ti_const_val(t2);
42
+ if (fold_affected_mask(ctx, op, z_mask & v2)) {
43
return true;
44
}
45
- z1 &= z2;
46
+ z_mask &= ~v2;
47
}
48
- ctx->z_mask = z1;
49
50
- ctx->s_mask = arg_info(op->args[1])->s_mask
51
- & arg_info(op->args[2])->s_mask;
52
- return fold_masks(ctx, op);
53
+ s_mask = t1->s_mask & t2->s_mask;
54
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
55
}
56
57
static bool fold_brcond(OptContext *ctx, TCGOp *op)
58
--
59
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Always set s_mask along the BSWAP_OS path, since the result is
3
being explicitly sign-extended.
1
4
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 21 ++++++++++-----------
9
1 file changed, 10 insertions(+), 11 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
16
static bool fold_bswap(OptContext *ctx, TCGOp *op)
17
{
18
uint64_t z_mask, s_mask, sign;
19
+ TempOptInfo *t1 = arg_info(op->args[1]);
20
21
- if (arg_is_const(op->args[1])) {
22
- uint64_t t = arg_info(op->args[1])->val;
23
-
24
- t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
25
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
26
+ if (ti_is_const(t1)) {
27
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
28
+ do_constant_folding(op->opc, ctx->type,
29
+ ti_const_val(t1),
30
+ op->args[2]));
31
}
32
33
- z_mask = arg_info(op->args[1])->z_mask;
34
-
35
+ z_mask = t1->z_mask;
36
switch (op->opc) {
37
case INDEX_op_bswap16_i32:
38
case INDEX_op_bswap16_i64:
39
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
40
/* If the sign bit may be 1, force all the bits above to 1. */
41
if (z_mask & sign) {
42
z_mask |= sign;
43
- s_mask = sign << 1;
44
}
45
+ /* The value and therefore s_mask is explicitly sign-extended. */
46
+ s_mask = sign;
47
break;
48
default:
49
/* The high bits are undefined: force all bits above the sign to 1. */
50
z_mask |= sign << 1;
51
break;
52
}
53
- ctx->z_mask = z_mask;
54
- ctx->s_mask = s_mask;
55
56
- return fold_masks(ctx, op);
57
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
58
}
59
60
static bool fold_call(OptContext *ctx, TCGOp *op)
61
--
62
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Compute s_mask from the union of the maximum count and the
3
op2 fallback for op1 being zero.
1
4
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 15 ++++++++++-----
9
1 file changed, 10 insertions(+), 5 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
16
17
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
18
{
19
- uint64_t z_mask;
20
+ uint64_t z_mask, s_mask;
21
+ TempOptInfo *t1 = arg_info(op->args[1]);
22
+ TempOptInfo *t2 = arg_info(op->args[2]);
23
24
- if (arg_is_const(op->args[1])) {
25
- uint64_t t = arg_info(op->args[1])->val;
26
+ if (ti_is_const(t1)) {
27
+ uint64_t t = ti_const_val(t1);
28
29
if (t != 0) {
30
t = do_constant_folding(op->opc, ctx->type, t, 0);
31
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
32
default:
33
g_assert_not_reached();
34
}
35
- ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
36
- return false;
37
+ s_mask = ~z_mask;
38
+ z_mask |= t2->z_mask;
39
+ s_mask &= t2->s_mask;
40
+
41
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
42
}
43
44
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
45
--
46
2.43.0
diff view generated by jsdifflib
1
The unsigned saturations are handled via generic code
1
Add fold_masks_z as a trivial wrapper around fold_masks_zs.
2
using min/max. The signed saturations are expanded using
2
Avoid the use of the OptContext slots.
3
double-sized arithmetic and a saturating pack.
4
3
5
Since all operations are done via expansion, do not
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
actually set TCG_TARGET_HAS_sat_vec.
7
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
6
---
10
tcg/s390x/tcg-target.opc.h | 3 ++
7
tcg/optimize.c | 13 ++++++++++---
11
tcg/s390x/tcg-target.c.inc | 63 ++++++++++++++++++++++++++++++++++++++
8
1 file changed, 10 insertions(+), 3 deletions(-)
12
2 files changed, 66 insertions(+)
13
9
14
diff --git a/tcg/s390x/tcg-target.opc.h b/tcg/s390x/tcg-target.opc.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/s390x/tcg-target.opc.h
12
--- a/tcg/optimize.c
17
+++ b/tcg/s390x/tcg-target.opc.h
13
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
19
* emitted by tcg_expand_vec_op. For those familiar with GCC internals,
15
return true;
20
* consider these to be UNSPEC with names.
21
*/
22
+DEF(s390_vuph_vec, 1, 1, 0, IMPLVEC)
23
+DEF(s390_vupl_vec, 1, 1, 0, IMPLVEC)
24
+DEF(s390_vpks_vec, 1, 2, 0, IMPLVEC)
25
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
26
index XXXXXXX..XXXXXXX 100644
27
--- a/tcg/s390x/tcg-target.c.inc
28
+++ b/tcg/s390x/tcg-target.c.inc
29
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
30
VRRc_VNO = 0xe76b,
31
VRRc_VO = 0xe76a,
32
VRRc_VOC = 0xe76f,
33
+ VRRc_VPKS = 0xe797, /* we leave the m5 cs field 0 */
34
VRRc_VS = 0xe7f7,
35
+ VRRa_VUPH = 0xe7d7,
36
+ VRRa_VUPL = 0xe7d6,
37
VRRc_VX = 0xe76d,
38
VRRf_VLVGP = 0xe762,
39
40
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
41
}
42
break;
43
44
+ case INDEX_op_s390_vuph_vec:
45
+ tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
46
+ break;
47
+ case INDEX_op_s390_vupl_vec:
48
+ tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
49
+ break;
50
+ case INDEX_op_s390_vpks_vec:
51
+ tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
52
+ break;
53
+
54
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
55
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
56
default:
57
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
58
return -1;
59
case INDEX_op_mul_vec:
60
return vece < MO_64;
61
+ case INDEX_op_ssadd_vec:
62
+ case INDEX_op_sssub_vec:
63
+ return vece < MO_64 ? -1 : 0;
64
default:
65
return 0;
66
}
67
@@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
68
}
69
}
16
}
70
17
71
+static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
18
+static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
72
+ TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
73
+{
19
+{
74
+ TCGv_vec h1 = tcg_temp_new_vec(type);
20
+ return fold_masks_zs(ctx, op, z_mask, 0);
75
+ TCGv_vec h2 = tcg_temp_new_vec(type);
76
+ TCGv_vec l1 = tcg_temp_new_vec(type);
77
+ TCGv_vec l2 = tcg_temp_new_vec(type);
78
+
79
+ tcg_debug_assert (vece < MO_64);
80
+
81
+ /* Unpack with sign-extension. */
82
+ vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
83
+ tcgv_vec_arg(h1), tcgv_vec_arg(v1));
84
+ vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
85
+ tcgv_vec_arg(h2), tcgv_vec_arg(v2));
86
+
87
+ vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
88
+ tcgv_vec_arg(l1), tcgv_vec_arg(v1));
89
+ vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
90
+ tcgv_vec_arg(l2), tcgv_vec_arg(v2));
91
+
92
+ /* Arithmetic on a wider element size. */
93
+ vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
94
+ tcgv_vec_arg(h1), tcgv_vec_arg(h2));
95
+ vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
96
+ tcgv_vec_arg(l1), tcgv_vec_arg(l2));
97
+
98
+ /* Pack with saturation. */
99
+ vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
100
+ tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
101
+
102
+ tcg_temp_free_vec(h1);
103
+ tcg_temp_free_vec(h2);
104
+ tcg_temp_free_vec(l1);
105
+ tcg_temp_free_vec(l2);
106
+}
21
+}
107
+
22
+
108
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
23
static bool fold_masks(OptContext *ctx, TCGOp *op)
109
TCGArg a0, ...)
110
{
24
{
111
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
25
return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
112
tcg_temp_free_vec(t0);
26
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
27
28
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
29
{
30
+ uint64_t z_mask;
31
+
32
if (fold_const1(ctx, op)) {
33
return true;
34
}
35
36
switch (ctx->type) {
37
case TCG_TYPE_I32:
38
- ctx->z_mask = 32 | 31;
39
+ z_mask = 32 | 31;
113
break;
40
break;
114
41
case TCG_TYPE_I64:
115
+ case INDEX_op_ssadd_vec:
42
- ctx->z_mask = 64 | 63;
116
+ expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
43
+ z_mask = 64 | 63;
117
+ break;
44
break;
118
+ case INDEX_op_sssub_vec:
119
+ expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
120
+ break;
121
+
122
default:
45
default:
123
g_assert_not_reached();
46
g_assert_not_reached();
124
}
47
}
125
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
48
- return false;
126
case INDEX_op_sari_vec:
49
+ return fold_masks_z(ctx, op, z_mask);
127
case INDEX_op_shli_vec:
50
}
128
case INDEX_op_shri_vec:
51
129
+ case INDEX_op_s390_vuph_vec:
52
static bool fold_deposit(OptContext *ctx, TCGOp *op)
130
+ case INDEX_op_s390_vupl_vec:
131
return C_O1_I1(v, v);
132
case INDEX_op_add_vec:
133
case INDEX_op_sub_vec:
134
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
135
case INDEX_op_smin_vec:
136
case INDEX_op_umax_vec:
137
case INDEX_op_umin_vec:
138
+ case INDEX_op_s390_vpks_vec:
139
return C_O1_I2(v, v, v);
140
case INDEX_op_rotls_vec:
141
case INDEX_op_shls_vec:
142
--
53
--
143
2.25.1
54
2.43.0
144
145
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
When we fold to and, use fold_and.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 35 +++++++++++++++++------------------
8
1 file changed, 17 insertions(+), 18 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
15
16
static bool fold_deposit(OptContext *ctx, TCGOp *op)
17
{
18
+ TempOptInfo *t1 = arg_info(op->args[1]);
19
+ TempOptInfo *t2 = arg_info(op->args[2]);
20
+ int ofs = op->args[3];
21
+ int len = op->args[4];
22
TCGOpcode and_opc;
23
+ uint64_t z_mask;
24
25
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
26
- uint64_t t1 = arg_info(op->args[1])->val;
27
- uint64_t t2 = arg_info(op->args[2])->val;
28
-
29
- t1 = deposit64(t1, op->args[3], op->args[4], t2);
30
- return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
31
+ if (ti_is_const(t1) && ti_is_const(t2)) {
32
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
33
+ deposit64(ti_const_val(t1), ofs, len,
34
+ ti_const_val(t2)));
35
}
36
37
switch (ctx->type) {
38
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
39
}
40
41
/* Inserting a value into zero at offset 0. */
42
- if (arg_is_const_val(op->args[1], 0) && op->args[3] == 0) {
43
- uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
44
+ if (ti_is_const_val(t1, 0) && ofs == 0) {
45
+ uint64_t mask = MAKE_64BIT_MASK(0, len);
46
47
op->opc = and_opc;
48
op->args[1] = op->args[2];
49
op->args[2] = arg_new_constant(ctx, mask);
50
- ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
51
- return false;
52
+ return fold_and(ctx, op);
53
}
54
55
/* Inserting zero into a value. */
56
- if (arg_is_const_val(op->args[2], 0)) {
57
- uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
58
+ if (ti_is_const_val(t2, 0)) {
59
+ uint64_t mask = deposit64(-1, ofs, len, 0);
60
61
op->opc = and_opc;
62
op->args[2] = arg_new_constant(ctx, mask);
63
- ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
64
- return false;
65
+ return fold_and(ctx, op);
66
}
67
68
- ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
69
- op->args[3], op->args[4],
70
- arg_info(op->args[2])->z_mask);
71
- return false;
72
+ z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
73
+ return fold_masks_z(ctx, op, z_mask);
74
}
75
76
static bool fold_divide(OptContext *ctx, TCGOp *op)
77
--
78
2.43.0
diff view generated by jsdifflib
New patch
1
The input which overlaps the sign bit of the output can
2
have its input s_mask propagated to the output s_mask.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 14 ++++++++++++--
8
1 file changed, 12 insertions(+), 2 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
15
TempOptInfo *t2 = arg_info(op->args[2]);
16
int ofs = op->args[3];
17
int len = op->args[4];
18
+ int width;
19
TCGOpcode and_opc;
20
- uint64_t z_mask;
21
+ uint64_t z_mask, s_mask;
22
23
if (ti_is_const(t1) && ti_is_const(t2)) {
24
return tcg_opt_gen_movi(ctx, op, op->args[0],
25
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
26
switch (ctx->type) {
27
case TCG_TYPE_I32:
28
and_opc = INDEX_op_and_i32;
29
+ width = 32;
30
break;
31
case TCG_TYPE_I64:
32
and_opc = INDEX_op_and_i64;
33
+ width = 64;
34
break;
35
default:
36
g_assert_not_reached();
37
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
38
return fold_and(ctx, op);
39
}
40
41
+ /* The s_mask from the top portion of the deposit is still valid. */
42
+ if (ofs + len == width) {
43
+ s_mask = t2->s_mask << ofs;
44
+ } else {
45
+ s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
46
+ }
47
+
48
z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
49
- return fold_masks_z(ctx, op, z_mask);
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
51
}
52
53
static bool fold_divide(OptContext *ctx, TCGOp *op)
54
--
55
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
12
fold_xi_to_x(ctx, op, 1)) {
13
return true;
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_dup(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 4 ++--
5
1 file changed, 2 insertions(+), 2 deletions(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_dup(OptContext *ctx, TCGOp *op)
12
t = dup_const(TCGOP_VECE(op), t);
13
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_dup2(OptContext *ctx, TCGOp *op)
20
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
21
op->opc = INDEX_op_dup_vec;
22
TCGOP_VECE(op) = MO_32;
23
}
24
- return false;
25
+ return finish_folding(ctx, op);
26
}
27
28
static bool fold_eqv(OptContext *ctx, TCGOp *op)
29
--
30
2.43.0
diff view generated by jsdifflib
1
This is via expansion; don't actually set TCG_TARGET_HAS_cmpsel_vec.
1
Add fold_masks_s as a trivial wrapper around fold_masks_zs.
2
Avoid the use of the OptContext slots.
2
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
6
---
5
tcg/s390x/tcg-target.c.inc | 24 +++++++++++++++++++++++-
7
tcg/optimize.c | 13 ++++++++++---
6
1 file changed, 23 insertions(+), 1 deletion(-)
8
1 file changed, 10 insertions(+), 3 deletions(-)
7
9
8
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/s390x/tcg-target.c.inc
12
--- a/tcg/optimize.c
11
+++ b/tcg/s390x/tcg-target.c.inc
13
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
13
case INDEX_op_xor_vec:
15
return fold_masks_zs(ctx, op, z_mask, 0);
14
return 1;
15
case INDEX_op_cmp_vec:
16
+ case INDEX_op_cmpsel_vec:
17
case INDEX_op_rotrv_vec:
18
return -1;
19
case INDEX_op_mul_vec:
20
@@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
21
}
22
}
16
}
23
17
24
+static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
18
+static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
25
+ TCGv_vec c1, TCGv_vec c2,
26
+ TCGv_vec v3, TCGv_vec v4, TCGCond cond)
27
+{
19
+{
28
+ TCGv_vec t = tcg_temp_new_vec(type);
20
+ return fold_masks_zs(ctx, op, -1, s_mask);
29
+
30
+ if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
31
+ /* Invert the sense of the compare by swapping arguments. */
32
+ tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
33
+ } else {
34
+ tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
35
+ }
36
+ tcg_temp_free_vec(t);
37
+}
21
+}
38
+
22
+
39
static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
23
static bool fold_masks(OptContext *ctx, TCGOp *op)
40
TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
41
{
24
{
42
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
25
return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
43
TCGArg a0, ...)
26
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
27
28
static bool fold_eqv(OptContext *ctx, TCGOp *op)
44
{
29
{
45
va_list va;
30
+ uint64_t s_mask;
46
- TCGv_vec v0, v1, v2, t0;
47
+ TCGv_vec v0, v1, v2, v3, v4, t0;
48
49
va_start(va, a0);
50
v0 = temp_tcgv_vec(arg_temp(a0));
51
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
52
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
53
break;
54
55
+ case INDEX_op_cmpsel_vec:
56
+ v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
57
+ v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
58
+ expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
59
+ break;
60
+
31
+
61
case INDEX_op_rotrv_vec:
32
if (fold_const2_commutative(ctx, op) ||
62
t0 = tcg_temp_new_vec(type);
33
fold_xi_to_x(ctx, op, -1) ||
63
tcg_gen_neg_vec(vece, t0, v2);
34
fold_xi_to_not(ctx, op, 0)) {
35
return true;
36
}
37
38
- ctx->s_mask = arg_info(op->args[1])->s_mask
39
- & arg_info(op->args[2])->s_mask;
40
- return false;
41
+ s_mask = arg_info(op->args[1])->s_mask
42
+ & arg_info(op->args[2])->s_mask;
43
+ return fold_masks_s(ctx, op, s_mask);
44
}
45
46
static bool fold_extract(OptContext *ctx, TCGOp *op)
64
--
47
--
65
2.25.1
48
2.43.0
66
67
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 15 ++++++---------
7
1 file changed, 6 insertions(+), 9 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
14
static bool fold_extract(OptContext *ctx, TCGOp *op)
15
{
16
uint64_t z_mask_old, z_mask;
17
+ TempOptInfo *t1 = arg_info(op->args[1]);
18
int pos = op->args[2];
19
int len = op->args[3];
20
21
- if (arg_is_const(op->args[1])) {
22
- uint64_t t;
23
-
24
- t = arg_info(op->args[1])->val;
25
- t = extract64(t, pos, len);
26
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
27
+ if (ti_is_const(t1)) {
28
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
29
+ extract64(ti_const_val(t1), pos, len));
30
}
31
32
- z_mask_old = arg_info(op->args[1])->z_mask;
33
+ z_mask_old = t1->z_mask;
34
z_mask = extract64(z_mask_old, pos, len);
35
if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
36
return true;
37
}
38
- ctx->z_mask = z_mask;
39
40
- return fold_masks(ctx, op);
41
+ return fold_masks_z(ctx, op, z_mask);
42
}
43
44
static bool fold_extract2(OptContext *ctx, TCGOp *op)
45
--
46
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
12
}
13
return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_exts(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Explicitly sign-extend z_mask instead of doing that manually.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 29 ++++++++++++-----------------
8
1 file changed, 12 insertions(+), 17 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
15
16
static bool fold_exts(OptContext *ctx, TCGOp *op)
17
{
18
- uint64_t s_mask_old, s_mask, z_mask, sign;
19
+ uint64_t s_mask_old, s_mask, z_mask;
20
bool type_change = false;
21
+ TempOptInfo *t1;
22
23
if (fold_const1(ctx, op)) {
24
return true;
25
}
26
27
- z_mask = arg_info(op->args[1])->z_mask;
28
- s_mask = arg_info(op->args[1])->s_mask;
29
+ t1 = arg_info(op->args[1]);
30
+ z_mask = t1->z_mask;
31
+ s_mask = t1->s_mask;
32
s_mask_old = s_mask;
33
34
switch (op->opc) {
35
CASE_OP_32_64(ext8s):
36
- sign = INT8_MIN;
37
- z_mask = (uint8_t)z_mask;
38
+ s_mask |= INT8_MIN;
39
+ z_mask = (int8_t)z_mask;
40
break;
41
CASE_OP_32_64(ext16s):
42
- sign = INT16_MIN;
43
- z_mask = (uint16_t)z_mask;
44
+ s_mask |= INT16_MIN;
45
+ z_mask = (int16_t)z_mask;
46
break;
47
case INDEX_op_ext_i32_i64:
48
type_change = true;
49
QEMU_FALLTHROUGH;
50
case INDEX_op_ext32s_i64:
51
- sign = INT32_MIN;
52
- z_mask = (uint32_t)z_mask;
53
+ s_mask |= INT32_MIN;
54
+ z_mask = (int32_t)z_mask;
55
break;
56
default:
57
g_assert_not_reached();
58
}
59
60
- if (z_mask & sign) {
61
- z_mask |= sign;
62
- }
63
- s_mask |= sign << 1;
64
-
65
- ctx->z_mask = z_mask;
66
- ctx->s_mask = s_mask;
67
if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
68
return true;
69
}
70
71
- return fold_masks(ctx, op);
72
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
73
}
74
75
static bool fold_extu(OptContext *ctx, TCGOp *op)
76
--
77
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 4 ++--
7
1 file changed, 2 insertions(+), 2 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
14
g_assert_not_reached();
15
}
16
17
- ctx->z_mask = z_mask;
18
if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
19
return true;
20
}
21
- return fold_masks(ctx, op);
22
+
23
+ return fold_masks_z(ctx, op, z_mask);
24
}
25
26
static bool fold_mb(OptContext *ctx, TCGOp *op)
27
--
28
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 19 +++++++++++--------
7
1 file changed, 11 insertions(+), 8 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
14
15
static bool fold_movcond(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t z_mask, s_mask;
18
+ TempOptInfo *tt, *ft;
19
int i;
20
21
/* If true and false values are the same, eliminate the cmp. */
22
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
23
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
24
}
25
26
- ctx->z_mask = arg_info(op->args[3])->z_mask
27
- | arg_info(op->args[4])->z_mask;
28
- ctx->s_mask = arg_info(op->args[3])->s_mask
29
- & arg_info(op->args[4])->s_mask;
30
+ tt = arg_info(op->args[3]);
31
+ ft = arg_info(op->args[4]);
32
+ z_mask = tt->z_mask | ft->z_mask;
33
+ s_mask = tt->s_mask & ft->s_mask;
34
35
- if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
36
- uint64_t tv = arg_info(op->args[3])->val;
37
- uint64_t fv = arg_info(op->args[4])->val;
38
+ if (ti_is_const(tt) && ti_is_const(ft)) {
39
+ uint64_t tv = ti_const_val(tt);
40
+ uint64_t fv = ti_const_val(ft);
41
TCGOpcode opc, negopc = 0;
42
TCGCond cond = op->args[5];
43
44
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
45
}
46
}
47
}
48
- return false;
49
+
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
51
}
52
53
static bool fold_mul(OptContext *ctx, TCGOp *op)
54
--
55
2.43.0
diff view generated by jsdifflib
1
We have lacked expressive support for memory sizes larger
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
than 64-bits for a while. Fixing that requires adjustment
3
to several points where we used this for array indexing,
4
and two places that develop -Wswitch warnings after the change.
5
6
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
3
---
10
include/exec/memop.h | 14 +++++++++-----
4
tcg/optimize.c | 6 +++---
11
target/arm/translate-a64.c | 2 +-
5
1 file changed, 3 insertions(+), 3 deletions(-)
12
tcg/tcg-op.c | 13 ++++++++-----
13
target/s390x/tcg/translate_vx.c.inc | 2 +-
14
tcg/aarch64/tcg-target.c.inc | 4 ++--
15
tcg/arm/tcg-target.c.inc | 4 ++--
16
tcg/i386/tcg-target.c.inc | 4 ++--
17
tcg/mips/tcg-target.c.inc | 4 ++--
18
tcg/ppc/tcg-target.c.inc | 8 ++++----
19
tcg/riscv/tcg-target.c.inc | 4 ++--
20
tcg/s390/tcg-target.c.inc | 4 ++--
21
tcg/sparc/tcg-target.c.inc | 16 ++++++++--------
22
12 files changed, 43 insertions(+), 36 deletions(-)
23
6
24
diff --git a/include/exec/memop.h b/include/exec/memop.h
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
25
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
26
--- a/include/exec/memop.h
9
--- a/tcg/optimize.c
27
+++ b/include/exec/memop.h
10
+++ b/tcg/optimize.c
28
@@ -XXX,XX +XXX,XX @@ typedef enum MemOp {
11
@@ -XXX,XX +XXX,XX @@ static bool fold_mul(OptContext *ctx, TCGOp *op)
29
MO_16 = 1,
12
fold_xi_to_x(ctx, op, 1)) {
30
MO_32 = 2,
13
return true;
31
MO_64 = 3,
32
- MO_SIZE = 3, /* Mask for the above. */
33
+ MO_128 = 4,
34
+ MO_256 = 5,
35
+ MO_512 = 6,
36
+ MO_1024 = 7,
37
+ MO_SIZE = 0x07, /* Mask for the above. */
38
39
- MO_SIGN = 4, /* Sign-extended, otherwise zero-extended. */
40
+ MO_SIGN = 0x08, /* Sign-extended, otherwise zero-extended. */
41
42
- MO_BSWAP = 8, /* Host reverse endian. */
43
+ MO_BSWAP = 0x10, /* Host reverse endian. */
44
#ifdef HOST_WORDS_BIGENDIAN
45
MO_LE = MO_BSWAP,
46
MO_BE = 0,
47
@@ -XXX,XX +XXX,XX @@ typedef enum MemOp {
48
* - an alignment to a specified size, which may be more or less than
49
* the access size (MO_ALIGN_x where 'x' is a size in bytes);
50
*/
51
- MO_ASHIFT = 4,
52
- MO_AMASK = 7 << MO_ASHIFT,
53
+ MO_ASHIFT = 5,
54
+ MO_AMASK = 0x7 << MO_ASHIFT,
55
#ifdef NEED_CPU_H
56
#ifdef TARGET_ALIGNED_ONLY
57
MO_ALIGN = 0,
58
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/target/arm/translate-a64.c
61
+++ b/target/arm/translate-a64.c
62
@@ -XXX,XX +XXX,XX @@ static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
63
int element, MemOp memop)
64
{
65
int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
66
- switch (memop) {
67
+ switch ((unsigned)memop) {
68
case MO_8:
69
tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
70
break;
71
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/tcg/tcg-op.c
74
+++ b/tcg/tcg-op.c
75
@@ -XXX,XX +XXX,XX @@ static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
76
}
77
break;
78
case MO_64:
79
- if (!is64) {
80
- tcg_abort();
81
+ if (is64) {
82
+ op &= ~MO_SIGN;
83
+ break;
84
}
85
- break;
86
+ /* fall through */
87
+ default:
88
+ g_assert_not_reached();
89
}
14
}
90
if (st) {
15
- return false;
91
op &= ~MO_SIGN;
16
+ return finish_folding(ctx, op);
92
@@ -XXX,XX +XXX,XX @@ typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
93
# define WITH_ATOMIC64(X)
94
#endif
95
96
-static void * const table_cmpxchg[16] = {
97
+static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
98
[MO_8] = gen_helper_atomic_cmpxchgb,
99
[MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
100
[MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
101
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
102
}
17
}
103
18
104
#define GEN_ATOMIC_HELPER(NAME, OP, NEW) \
19
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
105
-static void * const table_##NAME[16] = { \
20
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
106
+static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \
21
fold_xi_to_i(ctx, op, 0)) {
107
[MO_8] = gen_helper_atomic_##NAME##b, \
22
return true;
108
[MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le, \
23
}
109
[MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be, \
24
- return false;
110
diff --git a/target/s390x/tcg/translate_vx.c.inc b/target/s390x/tcg/translate_vx.c.inc
25
+ return finish_folding(ctx, op);
111
index XXXXXXX..XXXXXXX 100644
112
--- a/target/s390x/tcg/translate_vx.c.inc
113
+++ b/target/s390x/tcg/translate_vx.c.inc
114
@@ -XXX,XX +XXX,XX @@ static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, uint8_t enr,
115
{
116
const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
117
118
- switch (memop) {
119
+ switch ((unsigned)memop) {
120
case ES_8:
121
tcg_gen_ld8u_i64(dst, cpu_env, offs);
122
break;
123
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
124
index XXXXXXX..XXXXXXX 100644
125
--- a/tcg/aarch64/tcg-target.c.inc
126
+++ b/tcg/aarch64/tcg-target.c.inc
127
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
128
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
129
* TCGMemOpIdx oi, uintptr_t ra)
130
*/
131
-static void * const qemu_ld_helpers[4] = {
132
+static void * const qemu_ld_helpers[MO_SIZE + 1] = {
133
[MO_8] = helper_ret_ldub_mmu,
134
#ifdef HOST_WORDS_BIGENDIAN
135
[MO_16] = helper_be_lduw_mmu,
136
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[4] = {
137
* uintxx_t val, TCGMemOpIdx oi,
138
* uintptr_t ra)
139
*/
140
-static void * const qemu_st_helpers[4] = {
141
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
142
[MO_8] = helper_ret_stb_mmu,
143
#ifdef HOST_WORDS_BIGENDIAN
144
[MO_16] = helper_be_stw_mmu,
145
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
146
index XXXXXXX..XXXXXXX 100644
147
--- a/tcg/arm/tcg-target.c.inc
148
+++ b/tcg/arm/tcg-target.c.inc
149
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vldst(TCGContext *s, ARMInsn insn,
150
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
151
* int mmu_idx, uintptr_t ra)
152
*/
153
-static void * const qemu_ld_helpers[8] = {
154
+static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
155
[MO_UB] = helper_ret_ldub_mmu,
156
[MO_SB] = helper_ret_ldsb_mmu,
157
#ifdef HOST_WORDS_BIGENDIAN
158
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[8] = {
159
/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
160
* uintxx_t val, int mmu_idx, uintptr_t ra)
161
*/
162
-static void * const qemu_st_helpers[4] = {
163
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
164
[MO_8] = helper_ret_stb_mmu,
165
#ifdef HOST_WORDS_BIGENDIAN
166
[MO_16] = helper_be_stw_mmu,
167
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
168
index XXXXXXX..XXXXXXX 100644
169
--- a/tcg/i386/tcg-target.c.inc
170
+++ b/tcg/i386/tcg-target.c.inc
171
@@ -XXX,XX +XXX,XX @@ static void tcg_out_nopn(TCGContext *s, int n)
172
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
173
* int mmu_idx, uintptr_t ra)
174
*/
175
-static void * const qemu_ld_helpers[16] = {
176
+static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
177
[MO_UB] = helper_ret_ldub_mmu,
178
[MO_LEUW] = helper_le_lduw_mmu,
179
[MO_LEUL] = helper_le_ldul_mmu,
180
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[16] = {
181
/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
182
* uintxx_t val, int mmu_idx, uintptr_t ra)
183
*/
184
-static void * const qemu_st_helpers[16] = {
185
+static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
186
[MO_UB] = helper_ret_stb_mmu,
187
[MO_LEUW] = helper_le_stw_mmu,
188
[MO_LEUL] = helper_le_stl_mmu,
189
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
190
index XXXXXXX..XXXXXXX 100644
191
--- a/tcg/mips/tcg-target.c.inc
192
+++ b/tcg/mips/tcg-target.c.inc
193
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
194
#if defined(CONFIG_SOFTMMU)
195
#include "../tcg-ldst.c.inc"
196
197
-static void * const qemu_ld_helpers[16] = {
198
+static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
199
[MO_UB] = helper_ret_ldub_mmu,
200
[MO_SB] = helper_ret_ldsb_mmu,
201
[MO_LEUW] = helper_le_lduw_mmu,
202
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[16] = {
203
#endif
204
};
205
206
-static void * const qemu_st_helpers[16] = {
207
+static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
208
[MO_UB] = helper_ret_stb_mmu,
209
[MO_LEUW] = helper_le_stw_mmu,
210
[MO_LEUL] = helper_le_stl_mmu,
211
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
212
index XXXXXXX..XXXXXXX 100644
213
--- a/tcg/ppc/tcg-target.c.inc
214
+++ b/tcg/ppc/tcg-target.c.inc
215
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
216
#endif
217
}
26
}
218
27
219
-static const uint32_t qemu_ldx_opc[16] = {
28
static bool fold_multiply2(OptContext *ctx, TCGOp *op)
220
+static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
29
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
221
[MO_UB] = LBZX,
30
tcg_opt_gen_movi(ctx, op2, rh, h);
222
[MO_UW] = LHZX,
31
return true;
223
[MO_UL] = LWZX,
32
}
224
@@ -XXX,XX +XXX,XX @@ static const uint32_t qemu_ldx_opc[16] = {
33
- return false;
225
[MO_BSWAP | MO_Q] = LDBRX,
34
+ return finish_folding(ctx, op);
226
};
227
228
-static const uint32_t qemu_stx_opc[16] = {
229
+static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
230
[MO_UB] = STBX,
231
[MO_UW] = STHX,
232
[MO_UL] = STWX,
233
@@ -XXX,XX +XXX,XX @@ static const uint32_t qemu_exts_opc[4] = {
234
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
235
* int mmu_idx, uintptr_t ra)
236
*/
237
-static void * const qemu_ld_helpers[16] = {
238
+static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
239
[MO_UB] = helper_ret_ldub_mmu,
240
[MO_LEUW] = helper_le_lduw_mmu,
241
[MO_LEUL] = helper_le_ldul_mmu,
242
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[16] = {
243
/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
244
* uintxx_t val, int mmu_idx, uintptr_t ra)
245
*/
246
-static void * const qemu_st_helpers[16] = {
247
+static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
248
[MO_UB] = helper_ret_stb_mmu,
249
[MO_LEUW] = helper_le_stw_mmu,
250
[MO_LEUL] = helper_le_stl_mmu,
251
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
252
index XXXXXXX..XXXXXXX 100644
253
--- a/tcg/riscv/tcg-target.c.inc
254
+++ b/tcg/riscv/tcg-target.c.inc
255
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
256
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
257
* TCGMemOpIdx oi, uintptr_t ra)
258
*/
259
-static void * const qemu_ld_helpers[8] = {
260
+static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
261
[MO_UB] = helper_ret_ldub_mmu,
262
[MO_SB] = helper_ret_ldsb_mmu,
263
#ifdef HOST_WORDS_BIGENDIAN
264
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[8] = {
265
* uintxx_t val, TCGMemOpIdx oi,
266
* uintptr_t ra)
267
*/
268
-static void * const qemu_st_helpers[4] = {
269
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
270
[MO_8] = helper_ret_stb_mmu,
271
#ifdef HOST_WORDS_BIGENDIAN
272
[MO_16] = helper_be_stw_mmu,
273
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
274
index XXXXXXX..XXXXXXX 100644
275
--- a/tcg/s390/tcg-target.c.inc
276
+++ b/tcg/s390/tcg-target.c.inc
277
@@ -XXX,XX +XXX,XX @@ static const uint8_t tcg_cond_to_ltr_cond[] = {
278
};
279
280
#ifdef CONFIG_SOFTMMU
281
-static void * const qemu_ld_helpers[16] = {
282
+static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
283
[MO_UB] = helper_ret_ldub_mmu,
284
[MO_SB] = helper_ret_ldsb_mmu,
285
[MO_LEUW] = helper_le_lduw_mmu,
286
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[16] = {
287
[MO_BEQ] = helper_be_ldq_mmu,
288
};
289
290
-static void * const qemu_st_helpers[16] = {
291
+static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
292
[MO_UB] = helper_ret_stb_mmu,
293
[MO_LEUW] = helper_le_stw_mmu,
294
[MO_LEUL] = helper_le_stl_mmu,
295
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
296
index XXXXXXX..XXXXXXX 100644
297
--- a/tcg/sparc/tcg-target.c.inc
298
+++ b/tcg/sparc/tcg-target.c.inc
299
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
300
}
35
}
301
36
302
#ifdef CONFIG_SOFTMMU
37
static bool fold_nand(OptContext *ctx, TCGOp *op)
303
-static const tcg_insn_unit *qemu_ld_trampoline[16];
304
-static const tcg_insn_unit *qemu_st_trampoline[16];
305
+static const tcg_insn_unit *qemu_ld_trampoline[(MO_SSIZE | MO_BSWAP) + 1];
306
+static const tcg_insn_unit *qemu_st_trampoline[(MO_SIZE | MO_BSWAP) + 1];
307
308
static void emit_extend(TCGContext *s, TCGReg r, int op)
309
{
310
@@ -XXX,XX +XXX,XX @@ static void emit_extend(TCGContext *s, TCGReg r, int op)
311
312
static void build_trampolines(TCGContext *s)
313
{
314
- static void * const qemu_ld_helpers[16] = {
315
+ static void * const qemu_ld_helpers[] = {
316
[MO_UB] = helper_ret_ldub_mmu,
317
[MO_SB] = helper_ret_ldsb_mmu,
318
[MO_LEUW] = helper_le_lduw_mmu,
319
@@ -XXX,XX +XXX,XX @@ static void build_trampolines(TCGContext *s)
320
[MO_BEUL] = helper_be_ldul_mmu,
321
[MO_BEQ] = helper_be_ldq_mmu,
322
};
323
- static void * const qemu_st_helpers[16] = {
324
+ static void * const qemu_st_helpers[] = {
325
[MO_UB] = helper_ret_stb_mmu,
326
[MO_LEUW] = helper_le_stw_mmu,
327
[MO_LEUL] = helper_le_stl_mmu,
328
@@ -XXX,XX +XXX,XX @@ static void build_trampolines(TCGContext *s)
329
int i;
330
TCGReg ra;
331
332
- for (i = 0; i < 16; ++i) {
333
+ for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
334
if (qemu_ld_helpers[i] == NULL) {
335
continue;
336
}
337
@@ -XXX,XX +XXX,XX @@ static void build_trampolines(TCGContext *s)
338
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
339
}
340
341
- for (i = 0; i < 16; ++i) {
342
+ for (i = 0; i < ARRAY_SIZE(qemu_st_helpers); ++i) {
343
if (qemu_st_helpers[i] == NULL) {
344
continue;
345
}
346
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
347
}
348
#endif /* CONFIG_SOFTMMU */
349
350
-static const int qemu_ld_opc[16] = {
351
+static const int qemu_ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
352
[MO_UB] = LDUB,
353
[MO_SB] = LDSB,
354
355
@@ -XXX,XX +XXX,XX @@ static const int qemu_ld_opc[16] = {
356
[MO_LEQ] = LDX_LE,
357
};
358
359
-static const int qemu_st_opc[16] = {
360
+static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
361
[MO_UB] = STB,
362
363
[MO_BEUW] = STH,
364
--
38
--
365
2.25.1
39
2.43.0
366
367
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 8 +++++---
7
1 file changed, 5 insertions(+), 3 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
14
15
static bool fold_nand(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t s_mask;
18
+
19
if (fold_const2_commutative(ctx, op) ||
20
fold_xi_to_not(ctx, op, -1)) {
21
return true;
22
}
23
24
- ctx->s_mask = arg_info(op->args[1])->s_mask
25
- & arg_info(op->args[2])->s_mask;
26
- return false;
27
+ s_mask = arg_info(op->args[1])->s_mask
28
+ & arg_info(op->args[2])->s_mask;
29
+ return fold_masks_s(ctx, op, s_mask);
30
}
31
32
static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
33
--
34
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 9 ++-------
7
1 file changed, 2 insertions(+), 7 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
14
{
15
/* Set to 1 all bits to the left of the rightmost. */
16
uint64_t z_mask = arg_info(op->args[1])->z_mask;
17
- ctx->z_mask = -(z_mask & -z_mask);
18
+ z_mask = -(z_mask & -z_mask);
19
20
- /*
21
- * Because of fold_sub_to_neg, we want to always return true,
22
- * via finish_folding.
23
- */
24
- finish_folding(ctx, op);
25
- return true;
26
+ return fold_masks_z(ctx, op, z_mask);
27
}
28
29
static bool fold_neg(OptContext *ctx, TCGOp *op)
30
--
31
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 8 +++++---
7
1 file changed, 5 insertions(+), 3 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
14
15
static bool fold_nor(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t s_mask;
18
+
19
if (fold_const2_commutative(ctx, op) ||
20
fold_xi_to_not(ctx, op, 0)) {
21
return true;
22
}
23
24
- ctx->s_mask = arg_info(op->args[1])->s_mask
25
- & arg_info(op->args[2])->s_mask;
26
- return false;
27
+ s_mask = arg_info(op->args[1])->s_mask
28
+ & arg_info(op->args[2])->s_mask;
29
+ return fold_masks_s(ctx, op, s_mask);
30
}
31
32
static bool fold_not(OptContext *ctx, TCGOp *op)
33
--
34
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 7 +------
7
1 file changed, 1 insertion(+), 6 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
14
if (fold_const1(ctx, op)) {
15
return true;
16
}
17
-
18
- ctx->s_mask = arg_info(op->args[1])->s_mask;
19
-
20
- /* Because of fold_to_not, we want to always return true, via finish. */
21
- finish_folding(ctx, op);
22
- return true;
23
+ return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask);
24
}
25
26
static bool fold_or(OptContext *ctx, TCGOp *op)
27
--
28
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 13 ++++++++-----
7
1 file changed, 8 insertions(+), 5 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
14
15
static bool fold_or(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t z_mask, s_mask;
18
+ TempOptInfo *t1, *t2;
19
+
20
if (fold_const2_commutative(ctx, op) ||
21
fold_xi_to_x(ctx, op, 0) ||
22
fold_xx_to_x(ctx, op)) {
23
return true;
24
}
25
26
- ctx->z_mask = arg_info(op->args[1])->z_mask
27
- | arg_info(op->args[2])->z_mask;
28
- ctx->s_mask = arg_info(op->args[1])->s_mask
29
- & arg_info(op->args[2])->s_mask;
30
- return fold_masks(ctx, op);
31
+ t1 = arg_info(op->args[1]);
32
+ t2 = arg_info(op->args[2]);
33
+ z_mask = t1->z_mask | t2->z_mask;
34
+ s_mask = t1->s_mask & t2->s_mask;
35
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
36
}
37
38
static bool fold_orc(OptContext *ctx, TCGOp *op)
39
--
40
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 8 +++++---
7
1 file changed, 5 insertions(+), 3 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
14
15
static bool fold_orc(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t s_mask;
18
+
19
if (fold_const2(ctx, op) ||
20
fold_xx_to_i(ctx, op, -1) ||
21
fold_xi_to_x(ctx, op, -1) ||
22
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
23
return true;
24
}
25
26
- ctx->s_mask = arg_info(op->args[1])->s_mask
27
- & arg_info(op->args[2])->s_mask;
28
- return false;
29
+ s_mask = arg_info(op->args[1])->s_mask
30
+ & arg_info(op->args[2])->s_mask;
31
+ return fold_masks_s(ctx, op, s_mask);
32
}
33
34
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
35
--
36
2.43.0
diff view generated by jsdifflib
1
Avoid the use of the OptContext slots.
2
3
Be careful not to call fold_masks_zs when the memory operation
4
is wide enough to require multiple outputs, so split into two
5
functions: fold_qemu_ld_1reg and fold_qemu_ld_2reg.
6
7
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
9
---
3
tcg/s390x/tcg-target-con-set.h | 1 +
10
tcg/optimize.c | 26 +++++++++++++++++++++-----
4
tcg/s390x/tcg-target.h | 2 +-
11
1 file changed, 21 insertions(+), 5 deletions(-)
5
tcg/s390x/tcg-target.c.inc | 20 ++++++++++++++++++++
6
3 files changed, 22 insertions(+), 1 deletion(-)
7
12
8
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
13
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/s390x/tcg-target-con-set.h
15
--- a/tcg/optimize.c
11
+++ b/tcg/s390x/tcg-target-con-set.h
16
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ C_O1_I2(r, r, ri)
17
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
13
C_O1_I2(r, rZ, r)
18
return fold_masks_s(ctx, op, s_mask);
14
C_O1_I2(v, v, r)
15
C_O1_I2(v, v, v)
16
+C_O1_I3(v, v, v, v)
17
C_O1_I4(r, r, ri, r, 0)
18
C_O1_I4(r, r, ri, rI, 0)
19
C_O2_I2(b, a, 0, r)
20
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/tcg/s390x/tcg-target.h
23
+++ b/tcg/s390x/tcg-target.h
24
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
25
#define TCG_TARGET_HAS_mul_vec 1
26
#define TCG_TARGET_HAS_sat_vec 0
27
#define TCG_TARGET_HAS_minmax_vec 1
28
-#define TCG_TARGET_HAS_bitsel_vec 0
29
+#define TCG_TARGET_HAS_bitsel_vec 1
30
#define TCG_TARGET_HAS_cmpsel_vec 0
31
32
/* used for function call generation */
33
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
34
index XXXXXXX..XXXXXXX 100644
35
--- a/tcg/s390x/tcg-target.c.inc
36
+++ b/tcg/s390x/tcg-target.c.inc
37
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
38
VRRa_VUPH = 0xe7d7,
39
VRRa_VUPL = 0xe7d6,
40
VRRc_VX = 0xe76d,
41
+ VRRe_VSEL = 0xe78d,
42
VRRf_VLVGP = 0xe762,
43
44
VRSa_VERLL = 0xe733,
45
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
46
tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
47
}
19
}
48
20
49
+static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
21
-static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
50
+ TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
22
+static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
51
+{
23
{
52
+ tcg_debug_assert(is_vector_reg(v1));
24
const TCGOpDef *def = &tcg_op_defs[op->opc];
53
+ tcg_debug_assert(is_vector_reg(v2));
25
MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
54
+ tcg_debug_assert(is_vector_reg(v3));
26
MemOp mop = get_memop(oi);
55
+ tcg_debug_assert(is_vector_reg(v4));
27
int width = 8 * memop_size(mop);
56
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
28
+ uint64_t z_mask = -1, s_mask = 0;
57
+ tcg_out16(s, v3 << 12);
29
58
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
30
if (width < 64) {
31
if (mop & MO_SIGN) {
32
- ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
33
+ s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
34
} else {
35
- ctx->z_mask = MAKE_64BIT_MASK(0, width);
36
+ z_mask = MAKE_64BIT_MASK(0, width);
37
}
38
}
39
40
/* Opcodes that touch guest memory stop the mb optimization. */
41
ctx->prev_mb = NULL;
42
- return false;
43
+
44
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
59
+}
45
+}
60
+
46
+
61
static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
47
+static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
62
TCGReg v1, TCGReg r2, TCGReg r3)
48
+{
63
{
49
+ /* Opcodes that touch guest memory stop the mb optimization. */
64
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
50
+ ctx->prev_mb = NULL;
65
tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
51
+ return finish_folding(ctx, op);
66
break;
52
}
67
53
68
+ case INDEX_op_bitsel_vec:
54
static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
69
+ tcg_out_insn(s, VRRe, VSEL, a0, a1, a2, args[3]);
55
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
70
+ break;
56
break;
71
+
57
case INDEX_op_qemu_ld_a32_i32:
72
case INDEX_op_cmp_vec:
58
case INDEX_op_qemu_ld_a64_i32:
73
switch ((TCGCond)args[3]) {
59
+ done = fold_qemu_ld_1reg(&ctx, op);
74
case TCG_COND_EQ:
60
+ break;
75
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
61
case INDEX_op_qemu_ld_a32_i64:
76
case INDEX_op_add_vec:
62
case INDEX_op_qemu_ld_a64_i64:
77
case INDEX_op_and_vec:
63
+ if (TCG_TARGET_REG_BITS == 64) {
78
case INDEX_op_andc_vec:
64
+ done = fold_qemu_ld_1reg(&ctx, op);
79
+ case INDEX_op_bitsel_vec:
65
+ break;
80
case INDEX_op_neg_vec:
66
+ }
81
case INDEX_op_not_vec:
67
+ QEMU_FALLTHROUGH;
82
case INDEX_op_or_vec:
68
case INDEX_op_qemu_ld_a32_i128:
83
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
69
case INDEX_op_qemu_ld_a64_i128:
84
case INDEX_op_shrs_vec:
70
- done = fold_qemu_ld(&ctx, op);
85
case INDEX_op_sars_vec:
71
+ done = fold_qemu_ld_2reg(&ctx, op);
86
return C_O1_I2(v, v, r);
72
break;
87
+ case INDEX_op_bitsel_vec:
73
case INDEX_op_qemu_st8_a32_i32:
88
+ return C_O1_I3(v, v, v, v);
74
case INDEX_op_qemu_st8_a64_i32:
89
90
default:
91
g_assert_not_reached();
92
--
75
--
93
2.25.1
76
2.43.0
94
95
diff view generated by jsdifflib
New patch
1
Stores have no output operands, and so need no further work.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 11 +++++------
7
1 file changed, 5 insertions(+), 6 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
14
{
15
/* Opcodes that touch guest memory stop the mb optimization. */
16
ctx->prev_mb = NULL;
17
- return false;
18
+ return true;
19
}
20
21
static bool fold_remainder(OptContext *ctx, TCGOp *op)
22
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
23
24
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
25
remove_mem_copy_all(ctx);
26
- return false;
27
+ return true;
28
}
29
30
switch (op->opc) {
31
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
32
g_assert_not_reached();
33
}
34
remove_mem_copy_in(ctx, ofs, ofs + lm1);
35
- return false;
36
+ return true;
37
}
38
39
static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
40
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
41
TCGType type;
42
43
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
44
- fold_tcg_st(ctx, op);
45
- return false;
46
+ return fold_tcg_st(ctx, op);
47
}
48
49
src = arg_temp(op->args[0]);
50
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
51
last = ofs + tcg_type_size(type) - 1;
52
remove_mem_copy_in(ctx, ofs, last);
53
record_mem_copy(ctx, type, src, ofs, last);
54
- return false;
55
+ return true;
56
}
57
58
static bool fold_xor(OptContext *ctx, TCGOp *op)
59
--
60
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
12
fold_xx_to_i(ctx, op, 0)) {
13
return true;
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
20
--
21
2.43.0
diff view generated by jsdifflib
New patch
1
Change return from bool to int; distinguish between
2
complete folding, simplification, and no change.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 22 ++++++++++++++--------
8
1 file changed, 14 insertions(+), 8 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
15
return finish_folding(ctx, op);
16
}
17
18
-static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
19
+/* Return 1 if finished, -1 if simplified, 0 if unchanged. */
20
+static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
21
{
22
uint64_t a_zmask, b_val;
23
TCGCond cond;
24
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
25
op->opc = xor_opc;
26
op->args[2] = arg_new_constant(ctx, 1);
27
}
28
- return false;
29
+ return -1;
30
}
31
}
32
-
33
- return false;
34
+ return 0;
35
}
36
37
static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
38
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
39
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
40
}
41
42
- if (fold_setcond_zmask(ctx, op, false)) {
43
+ i = fold_setcond_zmask(ctx, op, false);
44
+ if (i > 0) {
45
return true;
46
}
47
- fold_setcond_tst_pow2(ctx, op, false);
48
+ if (i == 0) {
49
+ fold_setcond_tst_pow2(ctx, op, false);
50
+ }
51
52
ctx->z_mask = 1;
53
return false;
54
@@ -XXX,XX +XXX,XX @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
55
return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
56
}
57
58
- if (fold_setcond_zmask(ctx, op, true)) {
59
+ i = fold_setcond_zmask(ctx, op, true);
60
+ if (i > 0) {
61
return true;
62
}
63
- fold_setcond_tst_pow2(ctx, op, true);
64
+ if (i == 0) {
65
+ fold_setcond_tst_pow2(ctx, op, true);
66
+ }
67
68
/* Value is {0,-1} so all bits are repetitions of the sign. */
69
ctx->s_mask = -1;
70
--
71
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 3 +--
7
1 file changed, 1 insertion(+), 2 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
14
fold_setcond_tst_pow2(ctx, op, false);
15
}
16
17
- ctx->z_mask = 1;
18
- return false;
19
+ return fold_masks_z(ctx, op, 1);
20
}
21
22
static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
23
--
24
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 3 +--
7
1 file changed, 1 insertion(+), 2 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
14
}
15
16
/* Value is {0,-1} so all bits are repetitions of the sign. */
17
- ctx->s_mask = -1;
18
- return false;
19
+ return fold_masks_s(ctx, op, -1);
20
}
21
22
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
23
--
24
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 3 +--
7
1 file changed, 1 insertion(+), 2 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
14
return fold_setcond(ctx, op);
15
}
16
17
- ctx->z_mask = 1;
18
- return false;
19
+ return fold_masks_z(ctx, op, 1);
20
21
do_setcond_const:
22
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
23
--
24
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
12
if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
13
op->args[3] = tcg_swap_cond(op->args[3]);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
12
if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
13
op->args[5] = tcg_invert_cond(op->args[5]);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_sextract(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 24 +++++++++---------------
7
1 file changed, 9 insertions(+), 15 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
14
static bool fold_sextract(OptContext *ctx, TCGOp *op)
15
{
16
uint64_t z_mask, s_mask, s_mask_old;
17
+ TempOptInfo *t1 = arg_info(op->args[1]);
18
int pos = op->args[2];
19
int len = op->args[3];
20
21
- if (arg_is_const(op->args[1])) {
22
- uint64_t t;
23
-
24
- t = arg_info(op->args[1])->val;
25
- t = sextract64(t, pos, len);
26
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
27
+ if (ti_is_const(t1)) {
28
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
29
+ sextract64(ti_const_val(t1), pos, len));
30
}
31
32
- z_mask = arg_info(op->args[1])->z_mask;
33
- z_mask = sextract64(z_mask, pos, len);
34
- ctx->z_mask = z_mask;
35
-
36
- s_mask_old = arg_info(op->args[1])->s_mask;
37
- s_mask = sextract64(s_mask_old, pos, len);
38
- s_mask |= MAKE_64BIT_MASK(len, 64 - len);
39
- ctx->s_mask = s_mask;
40
+ s_mask_old = t1->s_mask;
41
+ s_mask = s_mask_old >> pos;
42
+ s_mask |= -1ull << (len - 1);
43
44
if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
45
return true;
46
}
47
48
- return fold_masks(ctx, op);
49
+ z_mask = sextract64(t1->z_mask, pos, len);
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
51
}
52
53
static bool fold_shift(OptContext *ctx, TCGOp *op)
54
--
55
2.43.0
diff view generated by jsdifflib
1
We will shortly need to be able to check facilities beyond the
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
first 64. Instead of explicitly masking against s390_facilities,
3
create a HAVE_FACILITY macro that indexes an array.
4
2
5
Reviewed-by: David Hildenbrand <david@redhat.com>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
v2: Change name to HAVE_FACILITY (david)
6
tcg/optimize.c | 27 ++++++++++++++-------------
9
---
7
1 file changed, 14 insertions(+), 13 deletions(-)
10
tcg/s390x/tcg-target.h | 29 ++++++++-------
11
tcg/s390x/tcg-target.c.inc | 74 +++++++++++++++++++-------------------
12
2 files changed, 52 insertions(+), 51 deletions(-)
13
8
14
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/s390x/tcg-target.h
11
--- a/tcg/optimize.c
17
+++ b/tcg/s390x/tcg-target.h
12
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ typedef enum TCGReg {
13
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
19
/* A list of relevant facilities used by this translator. Some of these
14
static bool fold_shift(OptContext *ctx, TCGOp *op)
20
are required for proper operation, and these are checked at startup. */
15
{
21
16
uint64_t s_mask, z_mask, sign;
22
-#define FACILITY_ZARCH_ACTIVE (1ULL << (63 - 2))
17
+ TempOptInfo *t1, *t2;
23
-#define FACILITY_LONG_DISP (1ULL << (63 - 18))
18
24
-#define FACILITY_EXT_IMM (1ULL << (63 - 21))
19
if (fold_const2(ctx, op) ||
25
-#define FACILITY_GEN_INST_EXT (1ULL << (63 - 34))
20
fold_ix_to_i(ctx, op, 0) ||
26
-#define FACILITY_LOAD_ON_COND (1ULL << (63 - 45))
21
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
27
+#define FACILITY_ZARCH_ACTIVE 2
22
return true;
28
+#define FACILITY_LONG_DISP 18
29
+#define FACILITY_EXT_IMM 21
30
+#define FACILITY_GEN_INST_EXT 34
31
+#define FACILITY_LOAD_ON_COND 45
32
#define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND
33
#define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND
34
-#define FACILITY_LOAD_ON_COND2 (1ULL << (63 - 53))
35
+#define FACILITY_LOAD_ON_COND2 53
36
37
-extern uint64_t s390_facilities;
38
+extern uint64_t s390_facilities[1];
39
+
40
+#define HAVE_FACILITY(X) \
41
+ ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1)
42
43
/* optional instructions */
44
#define TCG_TARGET_HAS_div2_i32 1
45
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities;
46
#define TCG_TARGET_HAS_clz_i32 0
47
#define TCG_TARGET_HAS_ctz_i32 0
48
#define TCG_TARGET_HAS_ctpop_i32 0
49
-#define TCG_TARGET_HAS_deposit_i32 (s390_facilities & FACILITY_GEN_INST_EXT)
50
-#define TCG_TARGET_HAS_extract_i32 (s390_facilities & FACILITY_GEN_INST_EXT)
51
+#define TCG_TARGET_HAS_deposit_i32 HAVE_FACILITY(GEN_INST_EXT)
52
+#define TCG_TARGET_HAS_extract_i32 HAVE_FACILITY(GEN_INST_EXT)
53
#define TCG_TARGET_HAS_sextract_i32 0
54
#define TCG_TARGET_HAS_extract2_i32 0
55
#define TCG_TARGET_HAS_movcond_i32 1
56
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities;
57
#define TCG_TARGET_HAS_mulsh_i32 0
58
#define TCG_TARGET_HAS_extrl_i64_i32 0
59
#define TCG_TARGET_HAS_extrh_i64_i32 0
60
-#define TCG_TARGET_HAS_direct_jump (s390_facilities & FACILITY_GEN_INST_EXT)
61
+#define TCG_TARGET_HAS_direct_jump HAVE_FACILITY(GEN_INST_EXT)
62
#define TCG_TARGET_HAS_qemu_st8_i32 0
63
64
#define TCG_TARGET_HAS_div2_i64 1
65
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities;
66
#define TCG_TARGET_HAS_eqv_i64 0
67
#define TCG_TARGET_HAS_nand_i64 0
68
#define TCG_TARGET_HAS_nor_i64 0
69
-#define TCG_TARGET_HAS_clz_i64 (s390_facilities & FACILITY_EXT_IMM)
70
+#define TCG_TARGET_HAS_clz_i64 HAVE_FACILITY(EXT_IMM)
71
#define TCG_TARGET_HAS_ctz_i64 0
72
#define TCG_TARGET_HAS_ctpop_i64 0
73
-#define TCG_TARGET_HAS_deposit_i64 (s390_facilities & FACILITY_GEN_INST_EXT)
74
-#define TCG_TARGET_HAS_extract_i64 (s390_facilities & FACILITY_GEN_INST_EXT)
75
+#define TCG_TARGET_HAS_deposit_i64 HAVE_FACILITY(GEN_INST_EXT)
76
+#define TCG_TARGET_HAS_extract_i64 HAVE_FACILITY(GEN_INST_EXT)
77
#define TCG_TARGET_HAS_sextract_i64 0
78
#define TCG_TARGET_HAS_extract2_i64 0
79
#define TCG_TARGET_HAS_movcond_i64 1
80
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
81
index XXXXXXX..XXXXXXX 100644
82
--- a/tcg/s390x/tcg-target.c.inc
83
+++ b/tcg/s390x/tcg-target.c.inc
84
@@ -XXX,XX +XXX,XX @@
85
We don't need this when we have pc-relative loads with the general
86
instructions extension facility. */
87
#define TCG_REG_TB TCG_REG_R12
88
-#define USE_REG_TB (!(s390_facilities & FACILITY_GEN_INST_EXT))
89
+#define USE_REG_TB (!HAVE_FACILITY(GEN_INST_EXT))
90
91
#ifndef CONFIG_SOFTMMU
92
#define TCG_GUEST_BASE_REG TCG_REG_R13
93
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
94
#endif
95
96
static const tcg_insn_unit *tb_ret_addr;
97
-uint64_t s390_facilities;
98
+uint64_t s390_facilities[1];
99
100
static bool patch_reloc(tcg_insn_unit *src_rw, int type,
101
intptr_t value, intptr_t addend)
102
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
103
}
23
}
104
24
105
/* Try all 48-bit insns that can load it in one go. */
25
- s_mask = arg_info(op->args[1])->s_mask;
106
- if (s390_facilities & FACILITY_EXT_IMM) {
26
- z_mask = arg_info(op->args[1])->z_mask;
107
+ if (HAVE_FACILITY(EXT_IMM)) {
27
+ t1 = arg_info(op->args[1]);
108
if (sval == (int32_t)sval) {
28
+ t2 = arg_info(op->args[2]);
109
tcg_out_insn(s, RIL, LGFI, ret, sval);
29
+ s_mask = t1->s_mask;
110
return;
30
+ z_mask = t1->z_mask;
111
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
31
32
- if (arg_is_const(op->args[2])) {
33
- int sh = arg_info(op->args[2])->val;
34
-
35
- ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
36
+ if (ti_is_const(t2)) {
37
+ int sh = ti_const_val(t2);
38
39
+ z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
40
s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
41
42
- return fold_masks(ctx, op);
43
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
112
}
44
}
113
45
114
/* Otherwise, stuff it in the constant pool. */
46
switch (op->opc) {
115
- if (s390_facilities & FACILITY_GEN_INST_EXT) {
47
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
116
+ if (HAVE_FACILITY(GEN_INST_EXT)) {
48
* Arithmetic right shift will not reduce the number of
117
tcg_out_insn(s, RIL, LGRL, ret, 0);
49
* input sign repetitions.
118
new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
50
*/
119
} else if (USE_REG_TB && !in_prologue) {
51
- ctx->s_mask = s_mask;
120
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_abs(TCGContext *s, TCGType type,
52
- break;
121
{
53
+ return fold_masks_s(ctx, op, s_mask);
122
intptr_t addr = (intptr_t)abs;
54
CASE_OP_32_64(shr):
123
55
/*
124
- if ((s390_facilities & FACILITY_GEN_INST_EXT) && !(addr & 1)) {
56
* If the sign bit is known zero, then logical right shift
125
+ if (HAVE_FACILITY(GEN_INST_EXT) && !(addr & 1)) {
57
- * will not reduced the number of input sign repetitions.
126
ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
58
+ * will not reduce the number of input sign repetitions.
127
if (disp == (int32_t)disp) {
59
*/
128
if (type == TCG_TYPE_I32) {
60
- sign = (s_mask & -s_mask) >> 1;
129
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
61
+ sign = -s_mask;
130
62
if (sign && !(z_mask & sign)) {
131
static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
63
- ctx->s_mask = s_mask;
132
{
64
+ return fold_masks_s(ctx, op, s_mask);
133
- if (s390_facilities & FACILITY_EXT_IMM) {
134
+ if (HAVE_FACILITY(EXT_IMM)) {
135
tcg_out_insn(s, RRE, LGBR, dest, src);
136
return;
137
}
138
@@ -XXX,XX +XXX,XX @@ static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
139
140
static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
141
{
142
- if (s390_facilities & FACILITY_EXT_IMM) {
143
+ if (HAVE_FACILITY(EXT_IMM)) {
144
tcg_out_insn(s, RRE, LLGCR, dest, src);
145
return;
146
}
147
@@ -XXX,XX +XXX,XX @@ static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
148
149
static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
150
{
151
- if (s390_facilities & FACILITY_EXT_IMM) {
152
+ if (HAVE_FACILITY(EXT_IMM)) {
153
tcg_out_insn(s, RRE, LGHR, dest, src);
154
return;
155
}
156
@@ -XXX,XX +XXX,XX @@ static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
157
158
static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
159
{
160
- if (s390_facilities & FACILITY_EXT_IMM) {
161
+ if (HAVE_FACILITY(EXT_IMM)) {
162
tcg_out_insn(s, RRE, LLGHR, dest, src);
163
return;
164
}
165
@@ -XXX,XX +XXX,XX @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
166
tgen_ext32u(s, dest, dest);
167
return;
168
}
169
- if (s390_facilities & FACILITY_EXT_IMM) {
170
+ if (HAVE_FACILITY(EXT_IMM)) {
171
if ((val & valid) == 0xff) {
172
tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
173
return;
174
@@ -XXX,XX +XXX,XX @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
175
}
176
177
/* Try all 48-bit insns that can perform it in one go. */
178
- if (s390_facilities & FACILITY_EXT_IMM) {
179
+ if (HAVE_FACILITY(EXT_IMM)) {
180
for (i = 0; i < 2; i++) {
181
tcg_target_ulong mask = ~(0xffffffffull << i*32);
182
if (((val | ~valid) & mask) == mask) {
183
@@ -XXX,XX +XXX,XX @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
184
}
185
}
186
}
187
- if ((s390_facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) {
188
+ if (HAVE_FACILITY(GEN_INST_EXT) && risbg_mask(val)) {
189
tgen_andi_risbg(s, dest, dest, val);
190
return;
191
}
192
@@ -XXX,XX +XXX,XX @@ static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
193
}
194
195
/* Try all 48-bit insns that can perform it in one go. */
196
- if (s390_facilities & FACILITY_EXT_IMM) {
197
+ if (HAVE_FACILITY(EXT_IMM)) {
198
for (i = 0; i < 2; i++) {
199
tcg_target_ulong mask = (0xffffffffull << i*32);
200
if ((val & mask) != 0 && (val & ~mask) == 0) {
201
@@ -XXX,XX +XXX,XX @@ static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
202
/* Perform the OR via sequential modifications to the high and
203
low parts. Do this via recursion to handle 16-bit vs 32-bit
204
masks in each half. */
205
- tcg_debug_assert(s390_facilities & FACILITY_EXT_IMM);
206
+ tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
207
tgen_ori(s, type, dest, val & 0x00000000ffffffffull);
208
tgen_ori(s, type, dest, val & 0xffffffff00000000ull);
209
}
210
@@ -XXX,XX +XXX,XX @@ static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
211
static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
212
{
213
/* Try all 48-bit insns that can perform it in one go. */
214
- if (s390_facilities & FACILITY_EXT_IMM) {
215
+ if (HAVE_FACILITY(EXT_IMM)) {
216
if ((val & 0xffffffff00000000ull) == 0) {
217
tcg_out_insn(s, RIL, XILF, dest, val);
218
return;
219
@@ -XXX,XX +XXX,XX @@ static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
220
tcg_tbrel_diff(s, NULL));
221
} else {
222
/* Perform the xor by parts. */
223
- tcg_debug_assert(s390_facilities & FACILITY_EXT_IMM);
224
+ tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
225
if (val & 0xffffffff) {
226
tcg_out_insn(s, RIL, XILF, dest, val);
227
}
228
@@ -XXX,XX +XXX,XX @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
229
goto exit;
230
}
231
232
- if (s390_facilities & FACILITY_EXT_IMM) {
233
+ if (HAVE_FACILITY(EXT_IMM)) {
234
if (type == TCG_TYPE_I32) {
235
op = (is_unsigned ? RIL_CLFI : RIL_CFI);
236
tcg_out_insn_RIL(s, op, r1, c2);
237
@@ -XXX,XX +XXX,XX @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
238
bool have_loc;
239
240
/* With LOC2, we can always emit the minimum 3 insns. */
241
- if (s390_facilities & FACILITY_LOAD_ON_COND2) {
242
+ if (HAVE_FACILITY(LOAD_ON_COND2)) {
243
/* Emit: d = 0, d = (cc ? 1 : d). */
244
cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
245
tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
246
@@ -XXX,XX +XXX,XX @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
247
return;
248
}
249
250
- have_loc = (s390_facilities & FACILITY_LOAD_ON_COND) != 0;
251
+ have_loc = HAVE_FACILITY(LOAD_ON_COND);
252
253
/* For HAVE_LOC, only the paths through GTU/GT/LEU/LE are smaller. */
254
restart:
255
@@ -XXX,XX +XXX,XX @@ static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
256
TCGArg v3, int v3const)
257
{
258
int cc;
259
- if (s390_facilities & FACILITY_LOAD_ON_COND) {
260
+ if (HAVE_FACILITY(LOAD_ON_COND)) {
261
cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
262
if (v3const) {
263
tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc);
264
@@ -XXX,XX +XXX,XX @@ static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
265
} else {
266
tcg_out_mov(s, TCG_TYPE_I64, dest, a2);
267
}
268
- if (s390_facilities & FACILITY_LOAD_ON_COND) {
269
+ if (HAVE_FACILITY(LOAD_ON_COND)) {
270
/* Emit: if (one bit found) dest = r0. */
271
tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2);
272
} else {
273
@@ -XXX,XX +XXX,XX @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
274
{
275
int cc;
276
277
- if (s390_facilities & FACILITY_GEN_INST_EXT) {
278
+ if (HAVE_FACILITY(GEN_INST_EXT)) {
279
bool is_unsigned = is_unsigned_cond(c);
280
bool in_range;
281
S390Opcode opc;
282
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
283
cross pages using the address of the last byte of the access. */
284
a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
285
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
286
- if ((s390_facilities & FACILITY_GEN_INST_EXT) && a_off == 0) {
287
+ if (HAVE_FACILITY(GEN_INST_EXT) && a_off == 0) {
288
tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
289
} else {
290
tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
291
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
292
tcg_out_insn(s, RI, AHI, a0, a2);
293
break;
294
}
295
- if (s390_facilities & FACILITY_EXT_IMM) {
296
+ if (HAVE_FACILITY(EXT_IMM)) {
297
tcg_out_insn(s, RIL, AFI, a0, a2);
298
break;
299
}
300
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
301
tcg_out_insn(s, RI, AGHI, a0, a2);
302
break;
303
}
304
- if (s390_facilities & FACILITY_EXT_IMM) {
305
+ if (HAVE_FACILITY(EXT_IMM)) {
306
if (a2 == (int32_t)a2) {
307
tcg_out_insn(s, RIL, AGFI, a0, a2);
308
break;
309
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
310
/* The host memory model is quite strong, we simply need to
311
serialize the instruction stream. */
312
if (args[0] & TCG_MO_ST_LD) {
313
- tcg_out_insn(s, RR, BCR,
314
- s390_facilities & FACILITY_FAST_BCR_SER ? 14 : 15, 0);
315
+ tcg_out_insn(s, RR, BCR, HAVE_FACILITY(FAST_BCR_SER) ? 14 : 15, 0);
316
}
65
}
317
break;
66
break;
318
67
default:
319
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
68
break;
320
case INDEX_op_or_i64:
321
case INDEX_op_xor_i32:
322
case INDEX_op_xor_i64:
323
- return (s390_facilities & FACILITY_DISTINCT_OPS
324
+ return (HAVE_FACILITY(DISTINCT_OPS)
325
? C_O1_I2(r, r, ri)
326
: C_O1_I2(r, 0, ri));
327
328
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
329
/* If we have the general-instruction-extensions, then we have
330
MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we
331
have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit. */
332
- return (s390_facilities & FACILITY_GEN_INST_EXT
333
+ return (HAVE_FACILITY(GEN_INST_EXT)
334
? C_O1_I2(r, 0, ri)
335
: C_O1_I2(r, 0, rI));
336
337
case INDEX_op_mul_i64:
338
- return (s390_facilities & FACILITY_GEN_INST_EXT
339
+ return (HAVE_FACILITY(GEN_INST_EXT)
340
? C_O1_I2(r, 0, rJ)
341
: C_O1_I2(r, 0, rI));
342
343
case INDEX_op_shl_i32:
344
case INDEX_op_shr_i32:
345
case INDEX_op_sar_i32:
346
- return (s390_facilities & FACILITY_DISTINCT_OPS
347
+ return (HAVE_FACILITY(DISTINCT_OPS)
348
? C_O1_I2(r, r, ri)
349
: C_O1_I2(r, 0, ri));
350
351
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
352
353
case INDEX_op_movcond_i32:
354
case INDEX_op_movcond_i64:
355
- return (s390_facilities & FACILITY_LOAD_ON_COND2
356
+ return (HAVE_FACILITY(LOAD_ON_COND2)
357
? C_O1_I4(r, r, ri, rI, 0)
358
: C_O1_I4(r, r, ri, r, 0));
359
360
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
361
362
case INDEX_op_add2_i32:
363
case INDEX_op_sub2_i32:
364
- return (s390_facilities & FACILITY_EXT_IMM
365
+ return (HAVE_FACILITY(EXT_IMM)
366
? C_O2_I4(r, r, 0, 1, ri, r)
367
: C_O2_I4(r, r, 0, 1, r, r));
368
369
case INDEX_op_add2_i64:
370
case INDEX_op_sub2_i64:
371
- return (s390_facilities & FACILITY_EXT_IMM
372
+ return (HAVE_FACILITY(EXT_IMM)
373
? C_O2_I4(r, r, 0, 1, rA, r)
374
: C_O2_I4(r, r, 0, 1, r, r));
375
376
@@ -XXX,XX +XXX,XX @@ static void query_s390_facilities(void)
377
/* Is STORE FACILITY LIST EXTENDED available? Honestly, I believe this
378
is present on all 64-bit systems, but let's check for it anyway. */
379
if (hwcap & HWCAP_S390_STFLE) {
380
- register int r0 __asm__("0");
381
- register void *r1 __asm__("1");
382
+ register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
383
+ register void *r1 __asm__("1") = s390_facilities;
384
385
/* stfle 0(%r1) */
386
- r1 = &s390_facilities;
387
asm volatile(".word 0xb2b0,0x1000"
388
- : "=r"(r0) : "0"(0), "r"(r1) : "memory", "cc");
389
+ : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
390
}
69
}
70
71
- return false;
72
+ return finish_folding(ctx, op);
391
}
73
}
392
74
75
static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
393
--
76
--
394
2.25.1
77
2.43.0
395
396
diff view generated by jsdifflib
New patch
1
Merge the two conditions, sign != 0 && !(z_mask & sign),
2
by testing ~z_mask & sign. If sign == 0, the logical and
3
will produce false.
1
4
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 5 ++---
9
1 file changed, 2 insertions(+), 3 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
16
17
static bool fold_shift(OptContext *ctx, TCGOp *op)
18
{
19
- uint64_t s_mask, z_mask, sign;
20
+ uint64_t s_mask, z_mask;
21
TempOptInfo *t1, *t2;
22
23
if (fold_const2(ctx, op) ||
24
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
25
* If the sign bit is known zero, then logical right shift
26
* will not reduce the number of input sign repetitions.
27
*/
28
- sign = -s_mask;
29
- if (sign && !(z_mask & sign)) {
30
+ if (~z_mask & -s_mask) {
31
return fold_masks_s(ctx, op, s_mask);
32
}
33
break;
34
--
35
2.43.0
diff view generated by jsdifflib
New patch
1
Duplicate fold_sub_vec into fold_sub instead of calling it,
2
now that fold_sub_vec always returns true.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 9 ++++++---
8
1 file changed, 6 insertions(+), 3 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
15
fold_sub_to_neg(ctx, op)) {
16
return true;
17
}
18
- return false;
19
+ return finish_folding(ctx, op);
20
}
21
22
static bool fold_sub(OptContext *ctx, TCGOp *op)
23
{
24
- if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) {
25
+ if (fold_const2(ctx, op) ||
26
+ fold_xx_to_i(ctx, op, 0) ||
27
+ fold_xi_to_x(ctx, op, 0) ||
28
+ fold_sub_to_neg(ctx, op)) {
29
return true;
30
}
31
32
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
33
? INDEX_op_add_i32 : INDEX_op_add_i64);
34
op->args[2] = arg_new_constant(ctx, -val);
35
}
36
- return false;
37
+ return finish_folding(ctx, op);
38
}
39
40
static bool fold_sub2(OptContext *ctx, TCGOp *op)
41
--
42
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 16 +++++++++-------
7
1 file changed, 9 insertions(+), 7 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2(OptContext *ctx, TCGOp *op)
14
15
static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t z_mask = -1, s_mask = 0;
18
+
19
/* We can't do any folding with a load, but we can record bits. */
20
switch (op->opc) {
21
CASE_OP_32_64(ld8s):
22
- ctx->s_mask = MAKE_64BIT_MASK(8, 56);
23
+ s_mask = INT8_MIN;
24
break;
25
CASE_OP_32_64(ld8u):
26
- ctx->z_mask = MAKE_64BIT_MASK(0, 8);
27
+ z_mask = MAKE_64BIT_MASK(0, 8);
28
break;
29
CASE_OP_32_64(ld16s):
30
- ctx->s_mask = MAKE_64BIT_MASK(16, 48);
31
+ s_mask = INT16_MIN;
32
break;
33
CASE_OP_32_64(ld16u):
34
- ctx->z_mask = MAKE_64BIT_MASK(0, 16);
35
+ z_mask = MAKE_64BIT_MASK(0, 16);
36
break;
37
case INDEX_op_ld32s_i64:
38
- ctx->s_mask = MAKE_64BIT_MASK(32, 32);
39
+ s_mask = INT32_MIN;
40
break;
41
case INDEX_op_ld32u_i64:
42
- ctx->z_mask = MAKE_64BIT_MASK(0, 32);
43
+ z_mask = MAKE_64BIT_MASK(0, 32);
44
break;
45
default:
46
g_assert_not_reached();
47
}
48
- return false;
49
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
50
}
51
52
static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
53
--
54
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
12
TCGType type;
13
14
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
type = ctx->type;
20
--
21
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Remove fold_masks as the function becomes unused.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 18 ++++++++----------
8
1 file changed, 8 insertions(+), 10 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
15
return fold_masks_zs(ctx, op, -1, s_mask);
16
}
17
18
-static bool fold_masks(OptContext *ctx, TCGOp *op)
19
-{
20
- return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
21
-}
22
-
23
/*
24
* An "affected" mask bit is 0 if and only if the result is identical
25
* to the first input. Thus if the entire mask is 0, the operation
26
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
27
28
static bool fold_xor(OptContext *ctx, TCGOp *op)
29
{
30
+ uint64_t z_mask, s_mask;
31
+ TempOptInfo *t1, *t2;
32
+
33
if (fold_const2_commutative(ctx, op) ||
34
fold_xx_to_i(ctx, op, 0) ||
35
fold_xi_to_x(ctx, op, 0) ||
36
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
37
return true;
38
}
39
40
- ctx->z_mask = arg_info(op->args[1])->z_mask
41
- | arg_info(op->args[2])->z_mask;
42
- ctx->s_mask = arg_info(op->args[1])->s_mask
43
- & arg_info(op->args[2])->s_mask;
44
- return fold_masks(ctx, op);
45
+ t1 = arg_info(op->args[1]);
46
+ t2 = arg_info(op->args[2]);
47
+ z_mask = t1->z_mask | t2->z_mask;
48
+ s_mask = t1->s_mask & t2->s_mask;
49
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
50
}
51
52
static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
53
--
54
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
12
return fold_orc(ctx, op);
13
}
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
/* Propagate constants and copies, fold constant expressions. */
20
--
21
2.43.0
diff view generated by jsdifflib
New patch
1
All non-default cases now finish folding within each function.
2
Do the same with the default case and assert it is done after.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 6 ++----
8
1 file changed, 2 insertions(+), 4 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
15
done = true;
16
break;
17
default:
18
+ done = finish_folding(&ctx, op);
19
break;
20
}
21
-
22
- if (!done) {
23
- finish_folding(&ctx, op);
24
- }
25
+ tcg_debug_assert(done);
26
}
27
}
28
--
29
2.43.0
diff view generated by jsdifflib
New patch
1
All mask setting is now done with parameters via fold_masks_*.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 13 -------------
7
1 file changed, 13 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
14
QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
15
16
/* In flight values from optimization. */
17
- uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
18
- uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
19
TCGType type;
20
} OptContext;
21
22
@@ -XXX,XX +XXX,XX @@ static bool finish_folding(OptContext *ctx, TCGOp *op)
23
for (i = 0; i < nb_oargs; i++) {
24
TCGTemp *ts = arg_temp(op->args[i]);
25
reset_ts(ctx, ts);
26
- /*
27
- * Save the corresponding known-zero/sign bits mask for the
28
- * first output argument (only one supported so far).
29
- */
30
- if (i == 0) {
31
- ts_info(ts)->z_mask = ctx->z_mask;
32
- }
33
}
34
return true;
35
}
36
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
37
ctx.type = TCG_TYPE_I32;
38
}
39
40
- /* Assume all bits affected, no bits known zero, no sign reps. */
41
- ctx.z_mask = -1;
42
- ctx.s_mask = 0;
43
-
44
/*
45
* Process each opcode.
46
* Sorted alphabetically by opcode as much as possible.
47
--
48
2.43.0
diff view generated by jsdifflib
New patch
1
All instances of s_mask have been converted to the new
2
representation. We can now re-enable usage.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 4 ++--
8
1 file changed, 2 insertions(+), 2 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
15
g_assert_not_reached();
16
}
17
18
- if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
19
+ if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
20
return true;
21
}
22
23
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
24
s_mask = s_mask_old >> pos;
25
s_mask |= -1ull << (len - 1);
26
27
- if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
28
+ if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
29
return true;
30
}
31
32
--
33
2.43.0
diff view generated by jsdifflib
1
There is no point in encoding load/store within a bit of
1
The big comment just above says functions should be sorted.
2
the memory trace info operand. Represent atomic operations
2
Add forward declarations as needed.
3
as a single read-modify-write tracepoint. Use MemOpIdx
4
instead of inventing a form specifically for traces.
5
3
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
accel/tcg/atomic_template.h | 1 -
7
tcg/optimize.c | 114 +++++++++++++++++++++++++------------------------
10
trace/mem.h | 51 -----------------------------------
8
1 file changed, 59 insertions(+), 55 deletions(-)
11
accel/tcg/cputlb.c | 7 ++---
12
accel/tcg/user-exec.c | 44 +++++++++++-------------------
13
tcg/tcg-op.c | 17 +++---------
14
accel/tcg/atomic_common.c.inc | 12 +++------
15
trace-events | 18 +++----------
16
7 files changed, 28 insertions(+), 122 deletions(-)
17
delete mode 100644 trace/mem.h
18
9
19
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
20
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
21
--- a/accel/tcg/atomic_template.h
12
--- a/tcg/optimize.c
22
+++ b/accel/tcg/atomic_template.h
13
+++ b/tcg/optimize.c
23
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
15
* 3) those that produce information about the result value.
24
*/
16
*/
25
17
26
#include "qemu/plugin.h"
18
+static bool fold_or(OptContext *ctx, TCGOp *op);
27
-#include "trace/mem.h"
19
+static bool fold_orc(OptContext *ctx, TCGOp *op);
28
20
+static bool fold_xor(OptContext *ctx, TCGOp *op);
29
#if DATA_SIZE == 16
21
+
30
# define SUFFIX o
22
static bool fold_add(OptContext *ctx, TCGOp *op)
31
diff --git a/trace/mem.h b/trace/mem.h
23
{
32
deleted file mode 100644
24
if (fold_const2_commutative(ctx, op) ||
33
index XXXXXXX..XXXXXXX
25
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
34
--- a/trace/mem.h
26
return fold_masks_zs(ctx, op, z_mask, s_mask);
35
+++ /dev/null
27
}
36
@@ -XXX,XX +XXX,XX @@
28
37
-/*
29
+static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
38
- * Helper functions for guest memory tracing
30
+{
39
- *
31
+ /* If true and false values are the same, eliminate the cmp. */
40
- * Copyright (C) 2016 Lluís Vilanova <vilanova@ac.upc.edu>
32
+ if (args_are_copies(op->args[2], op->args[3])) {
41
- *
33
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
42
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
34
+ }
43
- * See the COPYING file in the top-level directory.
35
+
44
- */
36
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
37
+ uint64_t tv = arg_info(op->args[2])->val;
38
+ uint64_t fv = arg_info(op->args[3])->val;
39
+
40
+ if (tv == -1 && fv == 0) {
41
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
42
+ }
43
+ if (tv == 0 && fv == -1) {
44
+ if (TCG_TARGET_HAS_not_vec) {
45
+ op->opc = INDEX_op_not_vec;
46
+ return fold_not(ctx, op);
47
+ } else {
48
+ op->opc = INDEX_op_xor_vec;
49
+ op->args[2] = arg_new_constant(ctx, -1);
50
+ return fold_xor(ctx, op);
51
+ }
52
+ }
53
+ }
54
+ if (arg_is_const(op->args[2])) {
55
+ uint64_t tv = arg_info(op->args[2])->val;
56
+ if (tv == -1) {
57
+ op->opc = INDEX_op_or_vec;
58
+ op->args[2] = op->args[3];
59
+ return fold_or(ctx, op);
60
+ }
61
+ if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
62
+ op->opc = INDEX_op_andc_vec;
63
+ op->args[2] = op->args[1];
64
+ op->args[1] = op->args[3];
65
+ return fold_andc(ctx, op);
66
+ }
67
+ }
68
+ if (arg_is_const(op->args[3])) {
69
+ uint64_t fv = arg_info(op->args[3])->val;
70
+ if (fv == 0) {
71
+ op->opc = INDEX_op_and_vec;
72
+ return fold_and(ctx, op);
73
+ }
74
+ if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
75
+ op->opc = INDEX_op_orc_vec;
76
+ op->args[2] = op->args[1];
77
+ op->args[1] = op->args[3];
78
+ return fold_orc(ctx, op);
79
+ }
80
+ }
81
+ return finish_folding(ctx, op);
82
+}
83
+
84
static bool fold_brcond(OptContext *ctx, TCGOp *op)
85
{
86
int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
87
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
88
return fold_masks_zs(ctx, op, z_mask, s_mask);
89
}
90
91
-static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
92
-{
93
- /* If true and false values are the same, eliminate the cmp. */
94
- if (args_are_copies(op->args[2], op->args[3])) {
95
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
96
- }
45
-
97
-
46
-#ifndef TRACE__MEM_H
98
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
47
-#define TRACE__MEM_H
99
- uint64_t tv = arg_info(op->args[2])->val;
100
- uint64_t fv = arg_info(op->args[3])->val;
48
-
101
-
49
-#include "exec/memopidx.h"
102
- if (tv == -1 && fv == 0) {
50
-
103
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
51
-#define TRACE_MEM_SZ_SHIFT_MASK 0xf /* size shift mask */
104
- }
52
-#define TRACE_MEM_SE (1ULL << 4) /* sign extended (y/n) */
105
- if (tv == 0 && fv == -1) {
53
-#define TRACE_MEM_BE (1ULL << 5) /* big endian (y/n) */
106
- if (TCG_TARGET_HAS_not_vec) {
54
-#define TRACE_MEM_ST (1ULL << 6) /* store (y/n) */
107
- op->opc = INDEX_op_not_vec;
55
-#define TRACE_MEM_MMU_SHIFT 8 /* mmu idx */
108
- return fold_not(ctx, op);
56
-
109
- } else {
57
-/**
110
- op->opc = INDEX_op_xor_vec;
58
- * trace_mem_get_info:
111
- op->args[2] = arg_new_constant(ctx, -1);
59
- *
112
- return fold_xor(ctx, op);
60
- * Return a value for the 'info' argument in guest memory access traces.
113
- }
61
- */
114
- }
62
-static inline uint16_t trace_mem_get_info(MemOpIdx oi, bool store)
63
-{
64
- MemOp op = get_memop(oi);
65
- uint32_t size_shift = op & MO_SIZE;
66
- bool sign_extend = op & MO_SIGN;
67
- bool big_endian = (op & MO_BSWAP) == MO_BE;
68
- uint16_t res;
69
-
70
- res = size_shift & TRACE_MEM_SZ_SHIFT_MASK;
71
- if (sign_extend) {
72
- res |= TRACE_MEM_SE;
73
- }
115
- }
74
- if (big_endian) {
116
- if (arg_is_const(op->args[2])) {
75
- res |= TRACE_MEM_BE;
117
- uint64_t tv = arg_info(op->args[2])->val;
118
- if (tv == -1) {
119
- op->opc = INDEX_op_or_vec;
120
- op->args[2] = op->args[3];
121
- return fold_or(ctx, op);
122
- }
123
- if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
124
- op->opc = INDEX_op_andc_vec;
125
- op->args[2] = op->args[1];
126
- op->args[1] = op->args[3];
127
- return fold_andc(ctx, op);
128
- }
76
- }
129
- }
77
- if (store) {
130
- if (arg_is_const(op->args[3])) {
78
- res |= TRACE_MEM_ST;
131
- uint64_t fv = arg_info(op->args[3])->val;
132
- if (fv == 0) {
133
- op->opc = INDEX_op_and_vec;
134
- return fold_and(ctx, op);
135
- }
136
- if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
137
- op->opc = INDEX_op_orc_vec;
138
- op->args[2] = op->args[1];
139
- op->args[1] = op->args[3];
140
- return fold_orc(ctx, op);
141
- }
79
- }
142
- }
80
-#ifdef CONFIG_SOFTMMU
143
- return finish_folding(ctx, op);
81
- res |= get_mmuidx(oi) << TRACE_MEM_MMU_SHIFT;
82
-#endif
83
-
84
- return res;
85
-}
144
-}
86
-
145
-
87
-#endif /* TRACE__MEM_H */
146
/* Propagate constants and copies, fold constant expressions. */
88
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
147
void tcg_optimize(TCGContext *s)
89
index XXXXXXX..XXXXXXX 100644
90
--- a/accel/tcg/cputlb.c
91
+++ b/accel/tcg/cputlb.c
92
@@ -XXX,XX +XXX,XX @@
93
#include "qemu/atomic128.h"
94
#include "exec/translate-all.h"
95
#include "trace/trace-root.h"
96
-#include "trace/mem.h"
97
#include "tb-hash.h"
98
#include "internal.h"
99
#ifdef CONFIG_PLUGIN
100
@@ -XXX,XX +XXX,XX @@ static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
101
MemOp op, FullLoadHelper *full_load)
102
{
148
{
103
MemOpIdx oi = make_memop_idx(op, mmu_idx);
104
- uint16_t meminfo = trace_mem_get_info(oi, false);
105
uint64_t ret;
106
107
- trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
108
+ trace_guest_ld_before_exec(env_cpu(env), addr, oi);
109
110
ret = full_load(env, addr, oi, retaddr);
111
112
@@ -XXX,XX +XXX,XX @@ cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
113
int mmu_idx, uintptr_t retaddr, MemOp op)
114
{
115
MemOpIdx oi = make_memop_idx(op, mmu_idx);
116
- uint16_t meminfo = trace_mem_get_info(oi, true);
117
118
- trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
119
+ trace_guest_st_before_exec(env_cpu(env), addr, oi);
120
121
store_helper(env, addr, val, oi, retaddr, op);
122
123
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
124
index XXXXXXX..XXXXXXX 100644
125
--- a/accel/tcg/user-exec.c
126
+++ b/accel/tcg/user-exec.c
127
@@ -XXX,XX +XXX,XX @@
128
#include "exec/helper-proto.h"
129
#include "qemu/atomic128.h"
130
#include "trace/trace-root.h"
131
-#include "trace/mem.h"
132
+#include "internal.h"
133
134
#undef EAX
135
#undef ECX
136
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
137
uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr ptr)
138
{
139
MemOpIdx oi = make_memop_idx(MO_UB, MMU_USER_IDX);
140
- uint16_t meminfo = trace_mem_get_info(oi, false);
141
uint32_t ret;
142
143
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
144
+ trace_guest_ld_before_exec(env_cpu(env), ptr, oi);
145
ret = ldub_p(g2h(env_cpu(env), ptr));
146
qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_R);
147
return ret;
148
@@ -XXX,XX +XXX,XX @@ int cpu_ldsb_data(CPUArchState *env, abi_ptr ptr)
149
uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr)
150
{
151
MemOpIdx oi = make_memop_idx(MO_BEUW, MMU_USER_IDX);
152
- uint16_t meminfo = trace_mem_get_info(oi, false);
153
uint32_t ret;
154
155
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
156
+ trace_guest_ld_before_exec(env_cpu(env), ptr, oi);
157
ret = lduw_be_p(g2h(env_cpu(env), ptr));
158
qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_R);
159
return ret;
160
@@ -XXX,XX +XXX,XX @@ int cpu_ldsw_be_data(CPUArchState *env, abi_ptr ptr)
161
uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr)
162
{
163
MemOpIdx oi = make_memop_idx(MO_BEUL, MMU_USER_IDX);
164
- uint16_t meminfo = trace_mem_get_info(oi, false);
165
uint32_t ret;
166
167
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
168
+ trace_guest_ld_before_exec(env_cpu(env), ptr, oi);
169
ret = ldl_be_p(g2h(env_cpu(env), ptr));
170
qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_R);
171
return ret;
172
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr)
173
uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr)
174
{
175
MemOpIdx oi = make_memop_idx(MO_BEQ, MMU_USER_IDX);
176
- uint16_t meminfo = trace_mem_get_info(oi, false);
177
uint64_t ret;
178
179
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
180
+ trace_guest_ld_before_exec(env_cpu(env), ptr, oi);
181
ret = ldq_be_p(g2h(env_cpu(env), ptr));
182
qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_R);
183
return ret;
184
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr)
185
uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr ptr)
186
{
187
MemOpIdx oi = make_memop_idx(MO_LEUW, MMU_USER_IDX);
188
- uint16_t meminfo = trace_mem_get_info(oi, false);
189
uint32_t ret;
190
191
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
192
+ trace_guest_ld_before_exec(env_cpu(env), ptr, oi);
193
ret = lduw_le_p(g2h(env_cpu(env), ptr));
194
qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_R);
195
return ret;
196
@@ -XXX,XX +XXX,XX @@ int cpu_ldsw_le_data(CPUArchState *env, abi_ptr ptr)
197
uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr)
198
{
199
MemOpIdx oi = make_memop_idx(MO_LEUL, MMU_USER_IDX);
200
- uint16_t meminfo = trace_mem_get_info(oi, false);
201
uint32_t ret;
202
203
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
204
+ trace_guest_ld_before_exec(env_cpu(env), ptr, oi);
205
ret = ldl_le_p(g2h(env_cpu(env), ptr));
206
qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_R);
207
return ret;
208
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr)
209
uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr ptr)
210
{
211
MemOpIdx oi = make_memop_idx(MO_LEQ, MMU_USER_IDX);
212
- uint16_t meminfo = trace_mem_get_info(oi, false);
213
uint64_t ret;
214
215
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
216
+ trace_guest_ld_before_exec(env_cpu(env), ptr, oi);
217
ret = ldq_le_p(g2h(env_cpu(env), ptr));
218
qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_R);
219
return ret;
220
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
221
void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
222
{
223
MemOpIdx oi = make_memop_idx(MO_UB, MMU_USER_IDX);
224
- uint16_t meminfo = trace_mem_get_info(oi, true);
225
226
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
227
+ trace_guest_st_before_exec(env_cpu(env), ptr, oi);
228
stb_p(g2h(env_cpu(env), ptr), val);
229
qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_W);
230
}
231
@@ -XXX,XX +XXX,XX @@ void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
232
void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
233
{
234
MemOpIdx oi = make_memop_idx(MO_BEUW, MMU_USER_IDX);
235
- uint16_t meminfo = trace_mem_get_info(oi, true);
236
237
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
238
+ trace_guest_st_before_exec(env_cpu(env), ptr, oi);
239
stw_be_p(g2h(env_cpu(env), ptr), val);
240
qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_W);
241
}
242
@@ -XXX,XX +XXX,XX @@ void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
243
void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
244
{
245
MemOpIdx oi = make_memop_idx(MO_BEUL, MMU_USER_IDX);
246
- uint16_t meminfo = trace_mem_get_info(oi, true);
247
248
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
249
+ trace_guest_st_before_exec(env_cpu(env), ptr, oi);
250
stl_be_p(g2h(env_cpu(env), ptr), val);
251
qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_W);
252
}
253
@@ -XXX,XX +XXX,XX @@ void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
254
void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val)
255
{
256
MemOpIdx oi = make_memop_idx(MO_BEQ, MMU_USER_IDX);
257
- uint16_t meminfo = trace_mem_get_info(oi, true);
258
259
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
260
+ trace_guest_st_before_exec(env_cpu(env), ptr, oi);
261
stq_be_p(g2h(env_cpu(env), ptr), val);
262
qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_W);
263
}
264
@@ -XXX,XX +XXX,XX @@ void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val)
265
void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
266
{
267
MemOpIdx oi = make_memop_idx(MO_LEUW, MMU_USER_IDX);
268
- uint16_t meminfo = trace_mem_get_info(oi, true);
269
270
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
271
+ trace_guest_st_before_exec(env_cpu(env), ptr, oi);
272
stw_le_p(g2h(env_cpu(env), ptr), val);
273
qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_W);
274
}
275
@@ -XXX,XX +XXX,XX @@ void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
276
void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
277
{
278
MemOpIdx oi = make_memop_idx(MO_LEUL, MMU_USER_IDX);
279
- uint16_t meminfo = trace_mem_get_info(oi, true);
280
281
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
282
+ trace_guest_st_before_exec(env_cpu(env), ptr, oi);
283
stl_le_p(g2h(env_cpu(env), ptr), val);
284
qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_W);
285
}
286
@@ -XXX,XX +XXX,XX @@ void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
287
void cpu_stq_le_data(CPUArchState *env, abi_ptr ptr, uint64_t val)
288
{
289
MemOpIdx oi = make_memop_idx(MO_LEQ, MMU_USER_IDX);
290
- uint16_t meminfo = trace_mem_get_info(oi, true);
291
292
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
293
+ trace_guest_st_before_exec(env_cpu(env), ptr, oi);
294
stq_le_p(g2h(env_cpu(env), ptr), val);
295
qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, oi, QEMU_PLUGIN_MEM_W);
296
}
297
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
298
index XXXXXXX..XXXXXXX 100644
299
--- a/tcg/tcg-op.c
300
+++ b/tcg/tcg-op.c
301
@@ -XXX,XX +XXX,XX @@
302
#include "tcg/tcg-op.h"
303
#include "tcg/tcg-mo.h"
304
#include "trace-tcg.h"
305
-#include "trace/mem.h"
306
#include "exec/plugin-gen.h"
307
308
/* Reduce the number of ifdefs below. This assumes that all uses of
309
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
310
{
311
MemOp orig_memop;
312
MemOpIdx oi;
313
- uint16_t info;
314
315
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
316
memop = tcg_canonicalize_memop(memop, 0, 0);
317
oi = make_memop_idx(memop, idx);
318
- info = trace_mem_get_info(oi, 0);
319
- trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, info);
320
+ trace_guest_ld_before_tcg(tcg_ctx->cpu, cpu_env, addr, oi);
321
322
orig_memop = memop;
323
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
324
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
325
{
326
TCGv_i32 swap = NULL;
327
MemOpIdx oi;
328
- uint16_t info;
329
330
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
331
memop = tcg_canonicalize_memop(memop, 0, 1);
332
oi = make_memop_idx(memop, idx);
333
- info = trace_mem_get_info(oi, 1);
334
- trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, info);
335
+ trace_guest_st_before_tcg(tcg_ctx->cpu, cpu_env, addr, oi);
336
337
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
338
swap = tcg_temp_new_i32();
339
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
340
{
341
MemOp orig_memop;
342
MemOpIdx oi;
343
- uint16_t info;
344
345
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
346
tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
347
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
348
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
349
memop = tcg_canonicalize_memop(memop, 1, 0);
350
oi = make_memop_idx(memop, idx);
351
- info = trace_mem_get_info(oi, 0);
352
- trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, info);
353
+ trace_guest_ld_before_tcg(tcg_ctx->cpu, cpu_env, addr, oi);
354
355
orig_memop = memop;
356
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
357
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
358
{
359
TCGv_i64 swap = NULL;
360
MemOpIdx oi;
361
- uint16_t info;
362
363
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
364
tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
365
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
366
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
367
memop = tcg_canonicalize_memop(memop, 1, 1);
368
oi = make_memop_idx(memop, idx);
369
- info = trace_mem_get_info(oi, 1);
370
- trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, info);
371
+ trace_guest_st_before_tcg(tcg_ctx->cpu, cpu_env, addr, oi);
372
373
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
374
swap = tcg_temp_new_i64();
375
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
376
index XXXXXXX..XXXXXXX 100644
377
--- a/accel/tcg/atomic_common.c.inc
378
+++ b/accel/tcg/atomic_common.c.inc
379
@@ -XXX,XX +XXX,XX @@ static void atomic_trace_rmw_pre(CPUArchState *env, target_ulong addr,
380
MemOpIdx oi)
381
{
382
CPUState *cpu = env_cpu(env);
383
- uint16_t info = trace_mem_get_info(oi, false);
384
385
- trace_guest_mem_before_exec(cpu, addr, info);
386
- trace_guest_mem_before_exec(cpu, addr, info | TRACE_MEM_ST);
387
+ trace_guest_rmw_before_exec(cpu, addr, oi);
388
}
389
390
static void atomic_trace_rmw_post(CPUArchState *env, target_ulong addr,
391
@@ -XXX,XX +XXX,XX @@ static void atomic_trace_rmw_post(CPUArchState *env, target_ulong addr,
392
static void atomic_trace_ld_pre(CPUArchState *env, target_ulong addr,
393
MemOpIdx oi)
394
{
395
- uint16_t info = trace_mem_get_info(oi, false);
396
-
397
- trace_guest_mem_before_exec(env_cpu(env), addr, info);
398
+ trace_guest_ld_before_exec(env_cpu(env), addr, oi);
399
}
400
401
static void atomic_trace_ld_post(CPUArchState *env, target_ulong addr,
402
@@ -XXX,XX +XXX,XX @@ static void atomic_trace_ld_post(CPUArchState *env, target_ulong addr,
403
static void atomic_trace_st_pre(CPUArchState *env, target_ulong addr,
404
MemOpIdx oi)
405
{
406
- uint16_t info = trace_mem_get_info(oi, true);
407
-
408
- trace_guest_mem_before_exec(env_cpu(env), addr, info);
409
+ trace_guest_st_before_exec(env_cpu(env), addr, oi);
410
}
411
412
static void atomic_trace_st_post(CPUArchState *env, target_ulong addr,
413
diff --git a/trace-events b/trace-events
414
index XXXXXXX..XXXXXXX 100644
415
--- a/trace-events
416
+++ b/trace-events
417
@@ -XXX,XX +XXX,XX @@ vcpu guest_cpu_reset(void)
418
# tcg/tcg-op.c
419
420
# @vaddr: Access' virtual address.
421
-# @info : Access' information (see below).
422
+# @memopidx: Access' information (see below).
423
#
424
# Start virtual memory access (before any potential access violation).
425
-#
426
# Does not include memory accesses performed by devices.
427
#
428
-# Access information can be parsed as:
429
-#
430
-# struct mem_info {
431
-# uint8_t size_shift : 4; /* interpreted as "1 << size_shift" bytes */
432
-# bool sign_extend: 1; /* sign-extended */
433
-# uint8_t endianness : 1; /* 0: little, 1: big */
434
-# bool store : 1; /* whether it is a store operation */
435
-# pad : 1;
436
-# uint8_t mmuidx : 4; /* mmuidx (softmmu only) */
437
-# };
438
-#
439
# Mode: user, softmmu
440
# Targets: TCG(all)
441
-vcpu tcg guest_mem_before(TCGv vaddr, uint16_t info) "info=%d", "vaddr=0x%016"PRIx64" info=%d"
442
+vcpu tcg guest_ld_before(TCGv vaddr, uint32_t memopidx) "info=%d", "vaddr=0x%016"PRIx64" memopidx=0x%x"
443
+vcpu tcg guest_st_before(TCGv vaddr, uint32_t memopidx) "info=%d", "vaddr=0x%016"PRIx64" memopidx=0x%x"
444
+vcpu tcg guest_rmw_before(TCGv vaddr, uint32_t memopidx) "info=%d", "vaddr=0x%016"PRIx64" memopidx=0x%x"
445
446
# include/user/syscall-trace.h
447
448
--
149
--
449
2.25.1
150
2.43.0
450
451
diff view generated by jsdifflib
1
Move this code from tcg/tcg.h to its own header.
1
The big comment just above says functions should be sorted.
2
2
3
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
5
---
6
include/exec/memopidx.h | 55 +++++++++++++++++++++++++++++++++++++++++
6
tcg/optimize.c | 60 +++++++++++++++++++++++++-------------------------
7
include/tcg/tcg.h | 39 +----------------------------
7
1 file changed, 30 insertions(+), 30 deletions(-)
8
2 files changed, 56 insertions(+), 38 deletions(-)
9
create mode 100644 include/exec/memopidx.h
10
8
11
diff --git a/include/exec/memopidx.h b/include/exec/memopidx.h
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
new file mode 100644
10
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX
11
--- a/tcg/optimize.c
14
--- /dev/null
12
+++ b/tcg/optimize.c
15
+++ b/include/exec/memopidx.h
13
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@
14
return true;
17
+/*
15
}
18
+ * Combine the MemOp and mmu_idx parameters into a single value.
16
19
+ *
17
+static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
20
+ * Authors:
21
+ * Richard Henderson <rth@twiddle.net>
22
+ *
23
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
24
+ * See the COPYING file in the top-level directory.
25
+ */
26
+
27
+#ifndef EXEC_MEMOPIDX_H
28
+#define EXEC_MEMOPIDX_H 1
29
+
30
+#include "exec/memop.h"
31
+
32
+typedef uint32_t MemOpIdx;
33
+
34
+/**
35
+ * make_memop_idx
36
+ * @op: memory operation
37
+ * @idx: mmu index
38
+ *
39
+ * Encode these values into a single parameter.
40
+ */
41
+static inline MemOpIdx make_memop_idx(MemOp op, unsigned idx)
42
+{
18
+{
43
+#ifdef CONFIG_DEBUG_TCG
19
+ /* Canonicalize the comparison to put immediate second. */
44
+ assert(idx <= 15);
20
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
45
+#endif
21
+ op->args[3] = tcg_swap_cond(op->args[3]);
46
+ return (op << 4) | idx;
22
+ }
23
+ return finish_folding(ctx, op);
47
+}
24
+}
48
+
25
+
49
+/**
26
+static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
50
+ * get_memop
51
+ * @oi: combined op/idx parameter
52
+ *
53
+ * Extract the memory operation from the combined value.
54
+ */
55
+static inline MemOp get_memop(MemOpIdx oi)
56
+{
27
+{
57
+ return oi >> 4;
28
+ /* If true and false values are the same, eliminate the cmp. */
29
+ if (args_are_copies(op->args[3], op->args[4])) {
30
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
31
+ }
32
+
33
+ /* Canonicalize the comparison to put immediate second. */
34
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
35
+ op->args[5] = tcg_swap_cond(op->args[5]);
36
+ }
37
+ /*
38
+ * Canonicalize the "false" input reg to match the destination,
39
+ * so that the tcg backend can implement "move if true".
40
+ */
41
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
42
+ op->args[5] = tcg_invert_cond(op->args[5]);
43
+ }
44
+ return finish_folding(ctx, op);
58
+}
45
+}
59
+
46
+
60
+/**
47
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
61
+ * get_mmuidx
48
{
62
+ * @oi: combined op/idx parameter
49
uint64_t z_mask, s_mask;
63
+ *
50
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
64
+ * Extract the mmu index from the combined value.
51
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
65
+ */
66
+static inline unsigned get_mmuidx(MemOpIdx oi)
67
+{
68
+ return oi & 15;
69
+}
70
+
71
+#endif
72
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
73
index XXXXXXX..XXXXXXX 100644
74
--- a/include/tcg/tcg.h
75
+++ b/include/tcg/tcg.h
76
@@ -XXX,XX +XXX,XX @@
77
78
#include "cpu.h"
79
#include "exec/memop.h"
80
+#include "exec/memopidx.h"
81
#include "qemu/bitops.h"
82
#include "qemu/plugin.h"
83
#include "qemu/queue.h"
84
@@ -XXX,XX +XXX,XX @@ static inline size_t tcg_current_code_size(TCGContext *s)
85
return tcg_ptr_byte_diff(s->code_ptr, s->code_buf);
86
}
52
}
87
53
88
-/* Combine the MemOp and mmu_idx parameters into a single value. */
54
-static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
89
-typedef uint32_t MemOpIdx;
90
-
91
-/**
92
- * make_memop_idx
93
- * @op: memory operation
94
- * @idx: mmu index
95
- *
96
- * Encode these values into a single parameter.
97
- */
98
-static inline MemOpIdx make_memop_idx(MemOp op, unsigned idx)
99
-{
55
-{
100
- tcg_debug_assert(idx <= 15);
56
- /* Canonicalize the comparison to put immediate second. */
101
- return (op << 4) | idx;
57
- if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
58
- op->args[3] = tcg_swap_cond(op->args[3]);
59
- }
60
- return finish_folding(ctx, op);
102
-}
61
-}
103
-
62
-
104
-/**
63
-static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
105
- * get_memop
106
- * @oi: combined op/idx parameter
107
- *
108
- * Extract the memory operation from the combined value.
109
- */
110
-static inline MemOp get_memop(MemOpIdx oi)
111
-{
64
-{
112
- return oi >> 4;
65
- /* If true and false values are the same, eliminate the cmp. */
66
- if (args_are_copies(op->args[3], op->args[4])) {
67
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
68
- }
69
-
70
- /* Canonicalize the comparison to put immediate second. */
71
- if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
72
- op->args[5] = tcg_swap_cond(op->args[5]);
73
- }
74
- /*
75
- * Canonicalize the "false" input reg to match the destination,
76
- * so that the tcg backend can implement "move if true".
77
- */
78
- if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
79
- op->args[5] = tcg_invert_cond(op->args[5]);
80
- }
81
- return finish_folding(ctx, op);
113
-}
82
-}
114
-
83
-
115
-/**
84
static bool fold_sextract(OptContext *ctx, TCGOp *op)
116
- * get_mmuidx
85
{
117
- * @oi: combined op/idx parameter
86
uint64_t z_mask, s_mask, s_mask_old;
118
- *
119
- * Extract the mmu index from the combined value.
120
- */
121
-static inline unsigned get_mmuidx(MemOpIdx oi)
122
-{
123
- return oi & 15;
124
-}
125
-
126
/**
127
* tcg_qemu_tb_exec:
128
* @env: pointer to CPUArchState for the CPU
129
--
87
--
130
2.25.1
88
2.43.0
131
132
diff view generated by jsdifflib
New patch
1
1
We currently have a flag, float_muladd_halve_result, to scale
2
the result by 2**-1. Extend this to handle arbitrary scaling.
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/fpu/softfloat.h | 6 ++++
8
fpu/softfloat.c | 58 ++++++++++++++++++++++-----------------
9
fpu/softfloat-parts.c.inc | 7 +++--
10
3 files changed, 44 insertions(+), 27 deletions(-)
11
12
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/fpu/softfloat.h
15
+++ b/include/fpu/softfloat.h
16
@@ -XXX,XX +XXX,XX @@ float16 float16_add(float16, float16, float_status *status);
17
float16 float16_sub(float16, float16, float_status *status);
18
float16 float16_mul(float16, float16, float_status *status);
19
float16 float16_muladd(float16, float16, float16, int, float_status *status);
20
+float16 float16_muladd_scalbn(float16, float16, float16,
21
+ int, int, float_status *status);
22
float16 float16_div(float16, float16, float_status *status);
23
float16 float16_scalbn(float16, int, float_status *status);
24
float16 float16_min(float16, float16, float_status *status);
25
@@ -XXX,XX +XXX,XX @@ float32 float32_mul(float32, float32, float_status *status);
26
float32 float32_div(float32, float32, float_status *status);
27
float32 float32_rem(float32, float32, float_status *status);
28
float32 float32_muladd(float32, float32, float32, int, float_status *status);
29
+float32 float32_muladd_scalbn(float32, float32, float32,
30
+ int, int, float_status *status);
31
float32 float32_sqrt(float32, float_status *status);
32
float32 float32_exp2(float32, float_status *status);
33
float32 float32_log2(float32, float_status *status);
34
@@ -XXX,XX +XXX,XX @@ float64 float64_mul(float64, float64, float_status *status);
35
float64 float64_div(float64, float64, float_status *status);
36
float64 float64_rem(float64, float64, float_status *status);
37
float64 float64_muladd(float64, float64, float64, int, float_status *status);
38
+float64 float64_muladd_scalbn(float64, float64, float64,
39
+ int, int, float_status *status);
40
float64 float64_sqrt(float64, float_status *status);
41
float64 float64_log2(float64, float_status *status);
42
FloatRelation float64_compare(float64, float64, float_status *status);
43
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/fpu/softfloat.c
46
+++ b/fpu/softfloat.c
47
@@ -XXX,XX +XXX,XX @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
48
#define parts_mul(A, B, S) \
49
PARTS_GENERIC_64_128(mul, A)(A, B, S)
50
51
-static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
52
- FloatParts64 *c, int flags,
53
- float_status *s);
54
-static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
55
- FloatParts128 *c, int flags,
56
- float_status *s);
57
+static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
58
+ FloatParts64 *c, int scale,
59
+ int flags, float_status *s);
60
+static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
61
+ FloatParts128 *c, int scale,
62
+ int flags, float_status *s);
63
64
-#define parts_muladd(A, B, C, Z, S) \
65
- PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
66
+#define parts_muladd_scalbn(A, B, C, Z, Y, S) \
67
+ PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
68
69
static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
70
float_status *s);
71
@@ -XXX,XX +XXX,XX @@ floatx80_mul(floatx80 a, floatx80 b, float_status *status)
72
* Fused multiply-add
73
*/
74
75
-float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
76
- int flags, float_status *status)
77
+float16 QEMU_FLATTEN
78
+float16_muladd_scalbn(float16 a, float16 b, float16 c,
79
+ int scale, int flags, float_status *status)
80
{
81
FloatParts64 pa, pb, pc, *pr;
82
83
float16_unpack_canonical(&pa, a, status);
84
float16_unpack_canonical(&pb, b, status);
85
float16_unpack_canonical(&pc, c, status);
86
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
87
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
88
89
return float16_round_pack_canonical(pr, status);
90
}
91
92
-static float32 QEMU_SOFTFLOAT_ATTR
93
-soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
94
- float_status *status)
95
+float16 float16_muladd(float16 a, float16 b, float16 c,
96
+ int flags, float_status *status)
97
+{
98
+ return float16_muladd_scalbn(a, b, c, 0, flags, status);
99
+}
100
+
101
+float32 QEMU_SOFTFLOAT_ATTR
102
+float32_muladd_scalbn(float32 a, float32 b, float32 c,
103
+ int scale, int flags, float_status *status)
104
{
105
FloatParts64 pa, pb, pc, *pr;
106
107
float32_unpack_canonical(&pa, a, status);
108
float32_unpack_canonical(&pb, b, status);
109
float32_unpack_canonical(&pc, c, status);
110
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
111
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
112
113
return float32_round_pack_canonical(pr, status);
114
}
115
116
-static float64 QEMU_SOFTFLOAT_ATTR
117
-soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
118
- float_status *status)
119
+float64 QEMU_SOFTFLOAT_ATTR
120
+float64_muladd_scalbn(float64 a, float64 b, float64 c,
121
+ int scale, int flags, float_status *status)
122
{
123
FloatParts64 pa, pb, pc, *pr;
124
125
float64_unpack_canonical(&pa, a, status);
126
float64_unpack_canonical(&pb, b, status);
127
float64_unpack_canonical(&pc, c, status);
128
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
129
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
130
131
return float64_round_pack_canonical(pr, status);
132
}
133
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
134
return ur.s;
135
136
soft:
137
- return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
138
+ return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
139
}
140
141
float64 QEMU_FLATTEN
142
@@ -XXX,XX +XXX,XX @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
143
return ur.s;
144
145
soft:
146
- return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
147
+ return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
148
}
149
150
float64 float64r32_muladd(float64 a, float64 b, float64 c,
151
@@ -XXX,XX +XXX,XX @@ float64 float64r32_muladd(float64 a, float64 b, float64 c,
152
float64_unpack_canonical(&pa, a, status);
153
float64_unpack_canonical(&pb, b, status);
154
float64_unpack_canonical(&pc, c, status);
155
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
156
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
157
158
return float64r32_round_pack_canonical(pr, status);
159
}
160
@@ -XXX,XX +XXX,XX @@ bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
161
bfloat16_unpack_canonical(&pa, a, status);
162
bfloat16_unpack_canonical(&pb, b, status);
163
bfloat16_unpack_canonical(&pc, c, status);
164
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
165
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
166
167
return bfloat16_round_pack_canonical(pr, status);
168
}
169
@@ -XXX,XX +XXX,XX @@ float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
170
float128_unpack_canonical(&pa, a, status);
171
float128_unpack_canonical(&pb, b, status);
172
float128_unpack_canonical(&pc, c, status);
173
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
174
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
175
176
return float128_round_pack_canonical(pr, status);
177
}
178
@@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status)
179
180
float64_unpack_canonical(&rp, float64_one, status);
181
for (i = 0 ; i < 15 ; i++) {
182
+
183
float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
184
- rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
185
+ rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
186
xnp = *parts_mul(&xnp, &xp, status);
187
}
188
189
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
190
index XXXXXXX..XXXXXXX 100644
191
--- a/fpu/softfloat-parts.c.inc
192
+++ b/fpu/softfloat-parts.c.inc
193
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
194
* Requires A and C extracted into a double-sized structure to provide the
195
* extra space for the widening multiply.
196
*/
197
-static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
198
- FloatPartsN *c, int flags, float_status *s)
199
+static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
200
+ FloatPartsN *c, int scale,
201
+ int flags, float_status *s)
202
{
203
int ab_mask, abc_mask;
204
FloatPartsW p_widen, c_widen;
205
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
206
a->exp = p_widen.exp;
207
208
return_normal:
209
+ /* TODO: Replace all use of float_muladd_halve_result with scale. */
210
if (flags & float_muladd_halve_result) {
211
a->exp -= 1;
212
}
213
+ a->exp += scale;
214
finish_sign:
215
if (flags & float_muladd_negate_result) {
216
a->sign ^= 1;
217
--
218
2.43.0
219
220
diff view generated by jsdifflib
1
Reviewed-by: David Hildenbrand <david@redhat.com>
1
Use the scalbn interface instead of float_muladd_halve_result.
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
5
---
4
tcg/s390x/tcg-target.c.inc | 132 +++++++++++++++++++++++++++++++++----
6
target/arm/tcg/helper-a64.c | 6 +++---
5
1 file changed, 120 insertions(+), 12 deletions(-)
7
1 file changed, 3 insertions(+), 3 deletions(-)
6
8
7
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
9
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
8
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/s390x/tcg-target.c.inc
11
--- a/target/arm/tcg/helper-a64.c
10
+++ b/tcg/s390x/tcg-target.c.inc
12
+++ b/target/arm/tcg/helper-a64.c
11
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
13
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
12
RX_STC = 0x42,
14
(float16_is_infinity(b) && float16_is_zero(a))) {
13
RX_STH = 0x40,
15
return float16_one_point_five;
14
15
+ VRX_VL = 0xe706,
16
+ VRX_VLLEZ = 0xe704,
17
+ VRX_VST = 0xe70e,
18
+ VRX_VSTEF = 0xe70b,
19
+ VRX_VSTEG = 0xe70a,
20
+
21
NOP = 0x0707,
22
} S390Opcode;
23
24
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
25
static const tcg_insn_unit *tb_ret_addr;
26
uint64_t s390_facilities[3];
27
28
+static inline bool is_general_reg(TCGReg r)
29
+{
30
+ return r <= TCG_REG_R15;
31
+}
32
+
33
+static inline bool is_vector_reg(TCGReg r)
34
+{
35
+ return r >= TCG_REG_V0 && r <= TCG_REG_V31;
36
+}
37
+
38
static bool patch_reloc(tcg_insn_unit *src_rw, int type,
39
intptr_t value, intptr_t addend)
40
{
41
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
42
#define tcg_out_insn_RX tcg_out_insn_RS
43
#define tcg_out_insn_RXY tcg_out_insn_RSY
44
45
+static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
46
+{
47
+ /*
48
+ * Shift bit 4 of each regno to its corresponding bit of RXB.
49
+ * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
50
+ * is the left-shift of the 4th operand.
51
+ */
52
+ return ((v1 & 0x10) << (4 + 3))
53
+ | ((v2 & 0x10) << (4 + 2))
54
+ | ((v3 & 0x10) << (4 + 1))
55
+ | ((v4 & 0x10) << (4 + 0));
56
+}
57
+
58
+static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
59
+ TCGReg b2, TCGReg x2, intptr_t d2, int m3)
60
+{
61
+ tcg_debug_assert(is_vector_reg(v1));
62
+ tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
63
+ tcg_debug_assert(is_general_reg(x2));
64
+ tcg_debug_assert(is_general_reg(b2));
65
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
66
+ tcg_out16(s, (b2 << 12) | d2);
67
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
68
+}
69
+
70
/* Emit an opcode with "type-checking" of the format. */
71
#define tcg_out_insn(S, FMT, OP, ...) \
72
glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
73
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
74
}
16
}
17
- return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
18
+ return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst);
75
}
19
}
76
20
77
+static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
21
float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
78
+ TCGReg data, TCGReg base, TCGReg index,
22
@@ -XXX,XX +XXX,XX @@ float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
79
+ tcg_target_long ofs, int m3)
23
(float32_is_infinity(b) && float32_is_zero(a))) {
80
+{
24
return float32_one_point_five;
81
+ if (ofs < 0 || ofs >= 0x1000) {
82
+ if (ofs >= -0x80000 && ofs < 0x80000) {
83
+ tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
84
+ base = TCG_TMP0;
85
+ index = TCG_REG_NONE;
86
+ ofs = 0;
87
+ } else {
88
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
89
+ if (index != TCG_REG_NONE) {
90
+ tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
91
+ }
92
+ index = TCG_TMP0;
93
+ ofs = 0;
94
+ }
95
+ }
96
+ tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
97
+}
98
99
/* load data without address translation or endianness conversion */
100
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
101
- TCGReg base, intptr_t ofs)
102
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
103
+ TCGReg base, intptr_t ofs)
104
{
105
- if (type == TCG_TYPE_I32) {
106
- tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
107
- } else {
108
- tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
109
+ switch (type) {
110
+ case TCG_TYPE_I32:
111
+ if (likely(is_general_reg(data))) {
112
+ tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
113
+ break;
114
+ }
115
+ tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
116
+ break;
117
+
118
+ case TCG_TYPE_I64:
119
+ if (likely(is_general_reg(data))) {
120
+ tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
121
+ break;
122
+ }
123
+ /* fallthru */
124
+
125
+ case TCG_TYPE_V64:
126
+ tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
127
+ break;
128
+
129
+ case TCG_TYPE_V128:
130
+ /* Hint quadword aligned. */
131
+ tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
132
+ break;
133
+
134
+ default:
135
+ g_assert_not_reached();
136
}
25
}
26
- return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
27
+ return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst);
137
}
28
}
138
29
139
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
30
float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
140
- TCGReg base, intptr_t ofs)
31
@@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
141
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
32
(float64_is_infinity(b) && float64_is_zero(a))) {
142
+ TCGReg base, intptr_t ofs)
33
return float64_one_point_five;
143
{
144
- if (type == TCG_TYPE_I32) {
145
- tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
146
- } else {
147
- tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
148
+ switch (type) {
149
+ case TCG_TYPE_I32:
150
+ if (likely(is_general_reg(data))) {
151
+ tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
152
+ } else {
153
+ tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
154
+ }
155
+ break;
156
+
157
+ case TCG_TYPE_I64:
158
+ if (likely(is_general_reg(data))) {
159
+ tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
160
+ break;
161
+ }
162
+ /* fallthru */
163
+
164
+ case TCG_TYPE_V64:
165
+ tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
166
+ break;
167
+
168
+ case TCG_TYPE_V128:
169
+ /* Hint quadword aligned. */
170
+ tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
171
+ break;
172
+
173
+ default:
174
+ g_assert_not_reached();
175
}
34
}
35
- return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
36
+ return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst);
176
}
37
}
177
38
39
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
178
--
40
--
179
2.25.1
41
2.43.0
180
42
181
43
diff view generated by jsdifflib
1
We will shortly use the MemOpIdx directly, but in the meantime
1
Use the scalbn interface instead of float_muladd_halve_result.
2
re-compute the trace meminfo.
2
3
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
accel/tcg/atomic_template.h | 48 +++++++++++++++++------------------
6
target/sparc/helper.h | 4 +-
8
accel/tcg/atomic_common.c.inc | 30 +++++++++++-----------
7
target/sparc/fop_helper.c | 8 ++--
9
2 files changed, 39 insertions(+), 39 deletions(-)
8
target/sparc/translate.c | 80 +++++++++++++++++++++++----------------
10
9
3 files changed, 54 insertions(+), 38 deletions(-)
11
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
10
11
diff --git a/target/sparc/helper.h b/target/sparc/helper.h
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/atomic_template.h
13
--- a/target/sparc/helper.h
14
+++ b/accel/tcg/atomic_template.h
14
+++ b/target/sparc/helper.h
15
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
15
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(faddd, TCG_CALL_NO_WG, f64, env, f64, f64)
16
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
16
DEF_HELPER_FLAGS_3(fsubd, TCG_CALL_NO_WG, f64, env, f64, f64)
17
PAGE_READ | PAGE_WRITE, retaddr);
17
DEF_HELPER_FLAGS_3(fmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
18
DATA_TYPE ret;
18
DEF_HELPER_FLAGS_3(fdivd, TCG_CALL_NO_WG, f64, env, f64, f64)
19
- uint16_t info = atomic_trace_rmw_pre(env, addr, oi);
19
-DEF_HELPER_FLAGS_5(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, i32)
20
20
+DEF_HELPER_FLAGS_6(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, s32, i32)
21
+ atomic_trace_rmw_pre(env, addr, oi);
21
DEF_HELPER_FLAGS_3(fnaddd, TCG_CALL_NO_WG, f64, env, f64, f64)
22
#if DATA_SIZE == 16
22
DEF_HELPER_FLAGS_3(fnmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
23
ret = atomic16_cmpxchg(haddr, cmpv, newv);
23
24
#else
24
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(fadds, TCG_CALL_NO_WG, f32, env, f32, f32)
25
ret = qatomic_cmpxchg__nocheck(haddr, cmpv, newv);
25
DEF_HELPER_FLAGS_3(fsubs, TCG_CALL_NO_WG, f32, env, f32, f32)
26
#endif
26
DEF_HELPER_FLAGS_3(fmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
27
ATOMIC_MMU_CLEANUP;
27
DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_WG, f32, env, f32, f32)
28
- atomic_trace_rmw_post(env, addr, info);
28
-DEF_HELPER_FLAGS_5(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, i32)
29
+ atomic_trace_rmw_post(env, addr, oi);
29
+DEF_HELPER_FLAGS_6(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, s32, i32)
30
DEF_HELPER_FLAGS_3(fnadds, TCG_CALL_NO_WG, f32, env, f32, f32)
31
DEF_HELPER_FLAGS_3(fnmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
32
33
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/sparc/fop_helper.c
36
+++ b/target/sparc/fop_helper.c
37
@@ -XXX,XX +XXX,XX @@ Int128 helper_fsqrtq(CPUSPARCState *env, Int128 src)
38
}
39
40
float32 helper_fmadds(CPUSPARCState *env, float32 s1,
41
- float32 s2, float32 s3, uint32_t op)
42
+ float32 s2, float32 s3, int32_t sc, uint32_t op)
43
{
44
- float32 ret = float32_muladd(s1, s2, s3, op, &env->fp_status);
45
+ float32 ret = float32_muladd_scalbn(s1, s2, s3, sc, op, &env->fp_status);
46
check_ieee_exceptions(env, GETPC());
30
return ret;
47
return ret;
31
}
48
}
32
49
33
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
50
float64 helper_fmaddd(CPUSPARCState *env, float64 s1,
34
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
51
- float64 s2, float64 s3, uint32_t op)
35
PAGE_READ, retaddr);
52
+ float64 s2, float64 s3, int32_t sc, uint32_t op)
36
DATA_TYPE val;
53
{
37
- uint16_t info = atomic_trace_ld_pre(env, addr, oi);
54
- float64 ret = float64_muladd(s1, s2, s3, op, &env->fp_status);
38
55
+ float64 ret = float64_muladd_scalbn(s1, s2, s3, sc, op, &env->fp_status);
39
+ atomic_trace_ld_pre(env, addr, oi);
56
check_ieee_exceptions(env, GETPC());
40
val = atomic16_read(haddr);
41
ATOMIC_MMU_CLEANUP;
42
- atomic_trace_ld_post(env, addr, info);
43
+ atomic_trace_ld_post(env, addr, oi);
44
return val;
45
}
46
47
@@ -XXX,XX +XXX,XX @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
48
{
49
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
50
PAGE_WRITE, retaddr);
51
- uint16_t info = atomic_trace_st_pre(env, addr, oi);
52
53
+ atomic_trace_st_pre(env, addr, oi);
54
atomic16_set(haddr, val);
55
ATOMIC_MMU_CLEANUP;
56
- atomic_trace_st_post(env, addr, info);
57
+ atomic_trace_st_post(env, addr, oi);
58
}
59
#endif
60
#else
61
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
62
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
63
PAGE_READ | PAGE_WRITE, retaddr);
64
DATA_TYPE ret;
65
- uint16_t info = atomic_trace_rmw_pre(env, addr, oi);
66
67
+ atomic_trace_rmw_pre(env, addr, oi);
68
ret = qatomic_xchg__nocheck(haddr, val);
69
ATOMIC_MMU_CLEANUP;
70
- atomic_trace_rmw_post(env, addr, info);
71
+ atomic_trace_rmw_post(env, addr, oi);
72
return ret;
57
return ret;
73
}
58
}
74
59
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
75
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
76
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
77
PAGE_READ | PAGE_WRITE, retaddr); \
78
DATA_TYPE ret; \
79
- uint16_t info = atomic_trace_rmw_pre(env, addr, oi); \
80
+ atomic_trace_rmw_pre(env, addr, oi); \
81
ret = qatomic_##X(haddr, val); \
82
ATOMIC_MMU_CLEANUP; \
83
- atomic_trace_rmw_post(env, addr, info); \
84
+ atomic_trace_rmw_post(env, addr, oi); \
85
return ret; \
86
}
87
88
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
89
XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
90
PAGE_READ | PAGE_WRITE, retaddr); \
91
XDATA_TYPE cmp, old, new, val = xval; \
92
- uint16_t info = atomic_trace_rmw_pre(env, addr, oi); \
93
+ atomic_trace_rmw_pre(env, addr, oi); \
94
smp_mb(); \
95
cmp = qatomic_read__nocheck(haddr); \
96
do { \
97
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
98
cmp = qatomic_cmpxchg__nocheck(haddr, old, new); \
99
} while (cmp != old); \
100
ATOMIC_MMU_CLEANUP; \
101
- atomic_trace_rmw_post(env, addr, info); \
102
+ atomic_trace_rmw_post(env, addr, oi); \
103
return RET; \
104
}
105
106
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
107
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
108
PAGE_READ | PAGE_WRITE, retaddr);
109
DATA_TYPE ret;
110
- uint16_t info = atomic_trace_rmw_pre(env, addr, oi);
111
112
+ atomic_trace_rmw_pre(env, addr, oi);
113
#if DATA_SIZE == 16
114
ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv));
115
#else
116
ret = qatomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv));
117
#endif
118
ATOMIC_MMU_CLEANUP;
119
- atomic_trace_rmw_post(env, addr, info);
120
+ atomic_trace_rmw_post(env, addr, oi);
121
return BSWAP(ret);
122
}
123
124
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
125
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
126
PAGE_READ, retaddr);
127
DATA_TYPE val;
128
- uint16_t info = atomic_trace_ld_pre(env, addr, oi);
129
130
+ atomic_trace_ld_pre(env, addr, oi);
131
val = atomic16_read(haddr);
132
ATOMIC_MMU_CLEANUP;
133
- atomic_trace_ld_post(env, addr, info);
134
+ atomic_trace_ld_post(env, addr, oi);
135
return BSWAP(val);
136
}
137
138
@@ -XXX,XX +XXX,XX @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
139
{
140
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
141
PAGE_WRITE, retaddr);
142
- uint16_t info = atomic_trace_st_pre(env, addr, oi);
143
144
+ atomic_trace_st_pre(env, addr, oi);
145
val = BSWAP(val);
146
atomic16_set(haddr, val);
147
ATOMIC_MMU_CLEANUP;
148
- atomic_trace_st_post(env, addr, info);
149
+ atomic_trace_st_post(env, addr, oi);
150
}
151
#endif
152
#else
153
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
154
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
155
PAGE_READ | PAGE_WRITE, retaddr);
156
ABI_TYPE ret;
157
- uint16_t info = atomic_trace_rmw_pre(env, addr, oi);
158
159
+ atomic_trace_rmw_pre(env, addr, oi);
160
ret = qatomic_xchg__nocheck(haddr, BSWAP(val));
161
ATOMIC_MMU_CLEANUP;
162
- atomic_trace_rmw_post(env, addr, info);
163
+ atomic_trace_rmw_post(env, addr, oi);
164
return BSWAP(ret);
165
}
166
167
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
168
DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
169
PAGE_READ | PAGE_WRITE, retaddr); \
170
DATA_TYPE ret; \
171
- uint16_t info = atomic_trace_rmw_pre(env, addr, oi); \
172
+ atomic_trace_rmw_pre(env, addr, oi); \
173
ret = qatomic_##X(haddr, BSWAP(val)); \
174
ATOMIC_MMU_CLEANUP; \
175
- atomic_trace_rmw_post(env, addr, info); \
176
+ atomic_trace_rmw_post(env, addr, oi); \
177
return BSWAP(ret); \
178
}
179
180
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
181
XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
182
PAGE_READ | PAGE_WRITE, retaddr); \
183
XDATA_TYPE ldo, ldn, old, new, val = xval; \
184
- uint16_t info = atomic_trace_rmw_pre(env, addr, oi); \
185
+ atomic_trace_rmw_pre(env, addr, oi); \
186
smp_mb(); \
187
ldn = qatomic_read__nocheck(haddr); \
188
do { \
189
@@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
190
ldn = qatomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new)); \
191
} while (ldo != ldn); \
192
ATOMIC_MMU_CLEANUP; \
193
- atomic_trace_rmw_post(env, addr, info); \
194
+ atomic_trace_rmw_post(env, addr, oi); \
195
return RET; \
196
}
197
198
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
199
index XXXXXXX..XXXXXXX 100644
60
index XXXXXXX..XXXXXXX 100644
200
--- a/accel/tcg/atomic_common.c.inc
61
--- a/target/sparc/translate.c
201
+++ b/accel/tcg/atomic_common.c.inc
62
+++ b/target/sparc/translate.c
202
@@ -XXX,XX +XXX,XX @@
63
@@ -XXX,XX +XXX,XX @@ static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src)
203
* See the COPYING file in the top-level directory.
64
204
*/
65
static void gen_op_fmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
205
66
{
206
-static uint16_t atomic_trace_rmw_pre(CPUArchState *env, target_ulong addr,
67
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
207
- MemOpIdx oi)
68
+ TCGv_i32 z = tcg_constant_i32(0);
208
+static void atomic_trace_rmw_pre(CPUArchState *env, target_ulong addr,
69
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, z);
209
+ MemOpIdx oi)
70
}
210
{
71
211
CPUState *cpu = env_cpu(env);
72
static void gen_op_fmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
212
uint16_t info = trace_mem_get_info(oi, false);
73
{
213
74
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
214
trace_guest_mem_before_exec(cpu, addr, info);
75
+ TCGv_i32 z = tcg_constant_i32(0);
215
trace_guest_mem_before_exec(cpu, addr, info | TRACE_MEM_ST);
76
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, z);
216
-
77
}
217
- return info;
78
218
}
79
static void gen_op_fmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
219
80
{
220
static void atomic_trace_rmw_post(CPUArchState *env, target_ulong addr,
81
- int op = float_muladd_negate_c;
221
- uint16_t info)
82
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
222
+ MemOpIdx oi)
83
+ TCGv_i32 z = tcg_constant_i32(0);
223
{
84
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
224
+ uint16_t info = trace_mem_get_info(oi, false);
85
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
225
+
86
}
226
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info);
87
227
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info | TRACE_MEM_ST);
88
static void gen_op_fmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
228
}
89
{
229
90
- int op = float_muladd_negate_c;
230
#if HAVE_ATOMIC128
91
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
231
-static uint16_t atomic_trace_ld_pre(CPUArchState *env, target_ulong addr,
92
+ TCGv_i32 z = tcg_constant_i32(0);
232
- MemOpIdx oi)
93
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
233
+static void atomic_trace_ld_pre(CPUArchState *env, target_ulong addr,
94
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
234
+ MemOpIdx oi)
95
}
235
{
96
236
uint16_t info = trace_mem_get_info(oi, false);
97
static void gen_op_fnmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
237
98
{
238
trace_guest_mem_before_exec(env_cpu(env), addr, info);
99
- int op = float_muladd_negate_c | float_muladd_negate_result;
239
-
100
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
240
- return info;
101
+ TCGv_i32 z = tcg_constant_i32(0);
241
}
102
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c |
242
103
+ float_muladd_negate_result);
243
static void atomic_trace_ld_post(CPUArchState *env, target_ulong addr,
104
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
244
- uint16_t info)
105
}
245
+ MemOpIdx oi)
106
246
{
107
static void gen_op_fnmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
247
+ uint16_t info = trace_mem_get_info(oi, false);
108
{
248
+
109
- int op = float_muladd_negate_c | float_muladd_negate_result;
249
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info);
110
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
250
}
111
+ TCGv_i32 z = tcg_constant_i32(0);
251
112
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c |
252
-static uint16_t atomic_trace_st_pre(CPUArchState *env, target_ulong addr,
113
+ float_muladd_negate_result);
253
- MemOpIdx oi)
114
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
254
+static void atomic_trace_st_pre(CPUArchState *env, target_ulong addr,
115
}
255
+ MemOpIdx oi)
116
256
{
117
static void gen_op_fnmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
257
uint16_t info = trace_mem_get_info(oi, true);
118
{
258
119
- int op = float_muladd_negate_result;
259
trace_guest_mem_before_exec(env_cpu(env), addr, info);
120
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
260
-
121
+ TCGv_i32 z = tcg_constant_i32(0);
261
- return info;
122
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
262
}
123
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
263
124
}
264
static void atomic_trace_st_post(CPUArchState *env, target_ulong addr,
125
265
- uint16_t info)
126
static void gen_op_fnmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
266
+ MemOpIdx oi)
127
{
267
{
128
- int op = float_muladd_negate_result;
268
+ uint16_t info = trace_mem_get_info(oi, false);
129
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
269
+
130
+ TCGv_i32 z = tcg_constant_i32(0);
270
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info);
131
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
271
}
132
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
272
#endif
133
}
134
135
/* Use muladd to compute (1 * src1) + src2 / 2 with one rounding. */
136
static void gen_op_fhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
137
{
138
- TCGv_i32 one = tcg_constant_i32(float32_one);
139
- int op = float_muladd_halve_result;
140
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
141
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
142
+ TCGv_i32 mone = tcg_constant_i32(-1);
143
+ TCGv_i32 op = tcg_constant_i32(0);
144
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
145
}
146
147
static void gen_op_fhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
148
{
149
- TCGv_i64 one = tcg_constant_i64(float64_one);
150
- int op = float_muladd_halve_result;
151
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
152
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
153
+ TCGv_i32 mone = tcg_constant_i32(-1);
154
+ TCGv_i32 op = tcg_constant_i32(0);
155
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
156
}
157
158
/* Use muladd to compute (1 * src1) - src2 / 2 with one rounding. */
159
static void gen_op_fhsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
160
{
161
- TCGv_i32 one = tcg_constant_i32(float32_one);
162
- int op = float_muladd_negate_c | float_muladd_halve_result;
163
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
164
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
165
+ TCGv_i32 mone = tcg_constant_i32(-1);
166
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
167
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
168
}
169
170
static void gen_op_fhsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
171
{
172
- TCGv_i64 one = tcg_constant_i64(float64_one);
173
- int op = float_muladd_negate_c | float_muladd_halve_result;
174
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
175
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
176
+ TCGv_i32 mone = tcg_constant_i32(-1);
177
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
178
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
179
}
180
181
/* Use muladd to compute -((1 * src1) + src2 / 2) with one rounding. */
182
static void gen_op_fnhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
183
{
184
- TCGv_i32 one = tcg_constant_i32(float32_one);
185
- int op = float_muladd_negate_result | float_muladd_halve_result;
186
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
187
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
188
+ TCGv_i32 mone = tcg_constant_i32(-1);
189
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
190
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
191
}
192
193
static void gen_op_fnhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
194
{
195
- TCGv_i64 one = tcg_constant_i64(float64_one);
196
- int op = float_muladd_negate_result | float_muladd_halve_result;
197
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
198
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
199
+ TCGv_i32 mone = tcg_constant_i32(-1);
200
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
201
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
202
}
203
204
static void gen_op_fpexception_im(DisasContext *dc, int ftt)
273
--
205
--
274
2.25.1
206
2.43.0
275
207
276
208
diff view generated by jsdifflib
1
Reviewed-by: David Hildenbrand <david@redhat.com>
1
All uses have been convered to float*_muladd_scalbn.
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
5
---
4
tcg/s390x/tcg-target.h | 2 +-
6
include/fpu/softfloat.h | 3 ---
5
tcg/s390x/tcg-target.c.inc | 25 +++++++++++++++++++++++++
7
fpu/softfloat.c | 6 ------
6
2 files changed, 26 insertions(+), 1 deletion(-)
8
fpu/softfloat-parts.c.inc | 4 ----
9
3 files changed, 13 deletions(-)
7
10
8
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
11
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
9
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/s390x/tcg-target.h
13
--- a/include/fpu/softfloat.h
11
+++ b/tcg/s390x/tcg-target.h
14
+++ b/include/fpu/softfloat.h
12
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
15
@@ -XXX,XX +XXX,XX @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
13
#define TCG_TARGET_HAS_shv_vec 1
16
| Using these differs from negating an input or output before calling
14
#define TCG_TARGET_HAS_mul_vec 1
17
| the muladd function in that this means that a NaN doesn't have its
15
#define TCG_TARGET_HAS_sat_vec 0
18
| sign bit inverted before it is propagated.
16
-#define TCG_TARGET_HAS_minmax_vec 0
19
-| We also support halving the result before rounding, as a special
17
+#define TCG_TARGET_HAS_minmax_vec 1
20
-| case to support the ARM fused-sqrt-step instruction FRSQRTS.
18
#define TCG_TARGET_HAS_bitsel_vec 0
21
*----------------------------------------------------------------------------*/
19
#define TCG_TARGET_HAS_cmpsel_vec 0
22
enum {
20
23
float_muladd_negate_c = 1,
21
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
24
float_muladd_negate_product = 2,
25
float_muladd_negate_result = 4,
26
- float_muladd_halve_result = 8,
27
};
28
29
/*----------------------------------------------------------------------------
30
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
22
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
23
--- a/tcg/s390x/tcg-target.c.inc
32
--- a/fpu/softfloat.c
24
+++ b/tcg/s390x/tcg-target.c.inc
33
+++ b/fpu/softfloat.c
25
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
34
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
26
VRRc_VESRAV = 0xe77a,
35
if (unlikely(!can_use_fpu(s))) {
27
VRRc_VESRLV = 0xe778,
36
goto soft;
28
VRRc_VML = 0xe7a2,
37
}
29
+ VRRc_VMN = 0xe7fe,
38
- if (unlikely(flags & float_muladd_halve_result)) {
30
+ VRRc_VMNL = 0xe7fc,
39
- goto soft;
31
+ VRRc_VMX = 0xe7ff,
40
- }
32
+ VRRc_VMXL = 0xe7fd,
41
33
VRRc_VN = 0xe768,
42
float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
34
VRRc_VNC = 0xe769,
43
if (unlikely(!f32_is_zon3(ua, ub, uc))) {
35
VRRc_VNO = 0xe76b,
44
@@ -XXX,XX +XXX,XX @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
36
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
45
if (unlikely(!can_use_fpu(s))) {
37
tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
46
goto soft;
38
break;
47
}
39
48
- if (unlikely(flags & float_muladd_halve_result)) {
40
+ case INDEX_op_smin_vec:
49
- goto soft;
41
+ tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
50
- }
42
+ break;
51
43
+ case INDEX_op_smax_vec:
52
float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
44
+ tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
53
if (unlikely(!f64_is_zon3(ua, ub, uc))) {
45
+ break;
54
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
46
+ case INDEX_op_umin_vec:
55
index XXXXXXX..XXXXXXX 100644
47
+ tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
56
--- a/fpu/softfloat-parts.c.inc
48
+ break;
57
+++ b/fpu/softfloat-parts.c.inc
49
+ case INDEX_op_umax_vec:
58
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
50
+ tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
59
a->exp = p_widen.exp;
51
+ break;
60
52
+
61
return_normal:
53
case INDEX_op_cmp_vec:
62
- /* TODO: Replace all use of float_muladd_halve_result with scale. */
54
switch ((TCGCond)args[3]) {
63
- if (flags & float_muladd_halve_result) {
55
case TCG_COND_EQ:
64
- a->exp -= 1;
56
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
65
- }
57
case INDEX_op_shri_vec:
66
a->exp += scale;
58
case INDEX_op_shrs_vec:
67
finish_sign:
59
case INDEX_op_shrv_vec:
68
if (flags & float_muladd_negate_result) {
60
+ case INDEX_op_smax_vec:
61
+ case INDEX_op_smin_vec:
62
case INDEX_op_sub_vec:
63
+ case INDEX_op_umax_vec:
64
+ case INDEX_op_umin_vec:
65
case INDEX_op_xor_vec:
66
return 1;
67
case INDEX_op_cmp_vec:
68
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
69
case INDEX_op_shlv_vec:
70
case INDEX_op_shrv_vec:
71
case INDEX_op_sarv_vec:
72
+ case INDEX_op_smax_vec:
73
+ case INDEX_op_smin_vec:
74
+ case INDEX_op_umax_vec:
75
+ case INDEX_op_umin_vec:
76
return C_O1_I2(v, v, v);
77
case INDEX_op_rotls_vec:
78
case INDEX_op_shls_vec:
79
--
69
--
80
2.25.1
70
2.43.0
81
71
82
72
diff view generated by jsdifflib
New patch
1
This rounding mode is used by Hexagon.
1
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
include/fpu/softfloat-types.h | 2 ++
6
fpu/softfloat-parts.c.inc | 3 +++
7
2 files changed, 5 insertions(+)
8
9
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
10
index XXXXXXX..XXXXXXX 100644
11
--- a/include/fpu/softfloat-types.h
12
+++ b/include/fpu/softfloat-types.h
13
@@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) {
14
float_round_to_odd = 5,
15
/* Not an IEEE rounding mode: round to closest odd, overflow to inf */
16
float_round_to_odd_inf = 6,
17
+ /* Not an IEEE rounding mode: round to nearest even, overflow to max */
18
+ float_round_nearest_even_max = 7,
19
} FloatRoundMode;
20
21
/*
22
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
23
index XXXXXXX..XXXXXXX 100644
24
--- a/fpu/softfloat-parts.c.inc
25
+++ b/fpu/softfloat-parts.c.inc
26
@@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
27
int exp, flags = 0;
28
29
switch (s->float_rounding_mode) {
30
+ case float_round_nearest_even_max:
31
+ overflow_norm = true;
32
+ /* fall through */
33
case float_round_nearest_even:
34
if (N > 64 && frac_lsb == 0) {
35
inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
36
--
37
2.43.0
diff view generated by jsdifflib
1
By using PKG_CONFIG_PATH instead of PKG_CONFIG_LIBDIR,
1
Certain Hexagon instructions suppress changes to the result
2
we were still including the 64-bit packages. Install
2
when the product of fma() is a true zero.
3
pcre-devel.i686 to fill a missing glib2 dependency.
4
5
By using --extra-cflags instead of --cpu, we incorrectly
6
use the wrong probing during meson.
7
3
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Richard W.M. Jones <rjones@redhat.com>
10
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
11
Message-Id: <20210930163636.721311-3-richard.henderson@linaro.org>
12
---
5
---
13
tests/docker/dockerfiles/fedora-i386-cross.docker | 5 +++--
6
include/fpu/softfloat.h | 5 +++++
14
1 file changed, 3 insertions(+), 2 deletions(-)
7
fpu/softfloat.c | 3 +++
8
fpu/softfloat-parts.c.inc | 4 +++-
9
3 files changed, 11 insertions(+), 1 deletion(-)
15
10
16
diff --git a/tests/docker/dockerfiles/fedora-i386-cross.docker b/tests/docker/dockerfiles/fedora-i386-cross.docker
11
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
17
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
18
--- a/tests/docker/dockerfiles/fedora-i386-cross.docker
13
--- a/include/fpu/softfloat.h
19
+++ b/tests/docker/dockerfiles/fedora-i386-cross.docker
14
+++ b/include/fpu/softfloat.h
20
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
15
@@ -XXX,XX +XXX,XX @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
21
glibc-static.i686 \
16
| Using these differs from negating an input or output before calling
22
gnutls-devel.i686 \
17
| the muladd function in that this means that a NaN doesn't have its
23
nettle-devel.i686 \
18
| sign bit inverted before it is propagated.
24
+ pcre-devel.i686 \
19
+|
25
perl-Test-Harness \
20
+| With float_muladd_suppress_add_product_zero, if A or B is zero
26
pixman-devel.i686 \
21
+| such that the product is a true zero, then return C without addition.
27
sysprof-capture-devel.i686 \
22
+| This preserves the sign of C when C is +/- 0. Used for Hexagon.
28
zlib-devel.i686
23
*----------------------------------------------------------------------------*/
29
24
enum {
30
-ENV QEMU_CONFIGURE_OPTS --extra-cflags=-m32 --disable-vhost-user
25
float_muladd_negate_c = 1,
31
-ENV PKG_CONFIG_PATH /usr/lib/pkgconfig
26
float_muladd_negate_product = 2,
32
+ENV QEMU_CONFIGURE_OPTS --cpu=i386 --disable-vhost-user
27
float_muladd_negate_result = 4,
33
+ENV PKG_CONFIG_LIBDIR /usr/lib/pkgconfig
28
+ float_muladd_suppress_add_product_zero = 8,
34
29
};
35
RUN dnf update -y && dnf install -y $PACKAGES
30
36
RUN rpm -q $PACKAGES | sort > /packages.txt
31
/*----------------------------------------------------------------------------
32
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/fpu/softfloat.c
35
+++ b/fpu/softfloat.c
36
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
37
if (unlikely(!can_use_fpu(s))) {
38
goto soft;
39
}
40
+ if (unlikely(flags & float_muladd_suppress_add_product_zero)) {
41
+ goto soft;
42
+ }
43
44
float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
45
if (unlikely(!f32_is_zon3(ua, ub, uc))) {
46
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
47
index XXXXXXX..XXXXXXX 100644
48
--- a/fpu/softfloat-parts.c.inc
49
+++ b/fpu/softfloat-parts.c.inc
50
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
51
goto return_normal;
52
}
53
if (c->cls == float_class_zero) {
54
- if (a->sign != c->sign) {
55
+ if (flags & float_muladd_suppress_add_product_zero) {
56
+ a->sign = c->sign;
57
+ } else if (a->sign != c->sign) {
58
goto return_sub_zero;
59
}
60
goto return_zero;
37
--
61
--
38
2.25.1
62
2.43.0
39
40
diff view generated by jsdifflib
1
Reviewed-by: David Hildenbrand <david@redhat.com>
1
There are no special cases for this instruction.
2
Remove internal_mpyf as unused.
3
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
6
---
4
tcg/s390x/tcg-target.c.inc | 72 +++++++++++++++++++++++++++++++++++---
7
target/hexagon/fma_emu.h | 1 -
5
1 file changed, 68 insertions(+), 4 deletions(-)
8
target/hexagon/fma_emu.c | 8 --------
9
target/hexagon/op_helper.c | 2 +-
10
3 files changed, 1 insertion(+), 10 deletions(-)
6
11
7
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
12
diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
8
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/s390x/tcg-target.c.inc
14
--- a/target/hexagon/fma_emu.h
10
+++ b/tcg/s390x/tcg-target.c.inc
15
+++ b/target/hexagon/fma_emu.h
11
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
16
@@ -XXX,XX +XXX,XX @@ int32_t float32_getexp(float32 f32);
12
RX_STC = 0x42,
17
float32 infinite_float32(uint8_t sign);
13
RX_STH = 0x40,
18
float32 internal_fmafx(float32 a, float32 b, float32 c,
14
19
int scale, float_status *fp_status);
15
+ VRRa_VLR = 0xe756,
20
-float32 internal_mpyf(float32 a, float32 b, float_status *fp_status);
16
+
21
float64 internal_mpyhh(float64 a, float64 b,
17
+ VRSb_VLVG = 0xe722,
22
unsigned long long int accumulated,
18
+ VRSc_VLGV = 0xe721,
23
float_status *fp_status);
19
+
24
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
20
VRX_VL = 0xe706,
25
index XXXXXXX..XXXXXXX 100644
21
VRX_VLLEZ = 0xe704,
26
--- a/target/hexagon/fma_emu.c
22
VRX_VST = 0xe70e,
27
+++ b/target/hexagon/fma_emu.c
23
@@ -XXX,XX +XXX,XX @@ static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
28
@@ -XXX,XX +XXX,XX @@ float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
24
| ((v4 & 0x10) << (4 + 0));
29
return accum_round_float32(result, fp_status);
25
}
30
}
26
31
27
+static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
32
-float32 internal_mpyf(float32 a, float32 b, float_status *fp_status)
28
+ TCGReg v1, TCGReg v2, int m3)
33
-{
29
+{
34
- if (float32_is_zero(a) || float32_is_zero(b)) {
30
+ tcg_debug_assert(is_vector_reg(v1));
35
- return float32_mul(a, b, fp_status);
31
+ tcg_debug_assert(is_vector_reg(v2));
36
- }
32
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
37
- return internal_fmafx(a, b, float32_zero, 0, fp_status);
33
+ tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
38
-}
34
+}
39
-
35
+
40
float64 internal_mpyhh(float64 a, float64 b,
36
+static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
41
unsigned long long int accumulated,
37
+ intptr_t d2, TCGReg b2, TCGReg r3, int m4)
42
float_status *fp_status)
38
+{
43
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
39
+ tcg_debug_assert(is_vector_reg(v1));
44
index XXXXXXX..XXXXXXX 100644
40
+ tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
45
--- a/target/hexagon/op_helper.c
41
+ tcg_debug_assert(is_general_reg(b2));
46
+++ b/target/hexagon/op_helper.c
42
+ tcg_debug_assert(is_general_reg(r3));
47
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
43
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
44
+ tcg_out16(s, b2 << 12 | d2);
45
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
46
+}
47
+
48
+static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
49
+ intptr_t d2, TCGReg b2, TCGReg v3, int m4)
50
+{
51
+ tcg_debug_assert(is_general_reg(r1));
52
+ tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
53
+ tcg_debug_assert(is_general_reg(b2));
54
+ tcg_debug_assert(is_vector_reg(v3));
55
+ tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
56
+ tcg_out16(s, b2 << 12 | d2);
57
+ tcg_out16(s, (op & 0x00ff) | RXB(0, 0, v3, 0) | (m4 << 12));
58
+}
59
+
60
static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
61
TCGReg b2, TCGReg x2, intptr_t d2, int m3)
62
{
48
{
63
@@ -XXX,XX +XXX,XX @@ static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
49
float32 RdV;
64
50
arch_fpop_start(env);
65
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
51
- RdV = internal_mpyf(RsV, RtV, &env->fp_status);
66
{
52
+ RdV = float32_mul(RsV, RtV, &env->fp_status);
67
- if (src != dst) {
53
arch_fpop_end(env);
68
- if (type == TCG_TYPE_I32) {
54
return RdV;
69
+ if (src == dst) {
70
+ return true;
71
+ }
72
+ switch (type) {
73
+ case TCG_TYPE_I32:
74
+ if (likely(is_general_reg(dst) && is_general_reg(src))) {
75
tcg_out_insn(s, RR, LR, dst, src);
76
- } else {
77
- tcg_out_insn(s, RRE, LGR, dst, src);
78
+ break;
79
}
80
+ /* fallthru */
81
+
82
+ case TCG_TYPE_I64:
83
+ if (likely(is_general_reg(dst))) {
84
+ if (likely(is_general_reg(src))) {
85
+ tcg_out_insn(s, RRE, LGR, dst, src);
86
+ } else {
87
+ tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
88
+ }
89
+ break;
90
+ } else if (is_general_reg(src)) {
91
+ tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
92
+ break;
93
+ }
94
+ /* fallthru */
95
+
96
+ case TCG_TYPE_V64:
97
+ case TCG_TYPE_V128:
98
+ tcg_out_insn(s, VRRa, VLR, dst, src, 0);
99
+ break;
100
+
101
+ default:
102
+ g_assert_not_reached();
103
}
104
return true;
105
}
55
}
106
--
56
--
107
2.25.1
57
2.43.0
108
109
diff view generated by jsdifflib
New patch
1
There are no special cases for this instruction.
1
2
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/hexagon/op_helper.c | 2 +-
7
1 file changed, 1 insertion(+), 1 deletion(-)
8
9
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/hexagon/op_helper.c
12
+++ b/target/hexagon/op_helper.c
13
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
14
float32 RsV, float32 RtV)
15
{
16
arch_fpop_start(env);
17
- RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
18
+ RxV = float32_muladd(RsV, RtV, RxV, 0, &env->fp_status);
19
arch_fpop_end(env);
20
return RxV;
21
}
22
--
23
2.43.0
diff view generated by jsdifflib
New patch
1
There are no special cases for this instruction. Since hexagon
2
always uses default-nan mode, explicitly negating the first
3
input is unnecessary. Use float_muladd_negate_product instead.
1
4
5
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/hexagon/op_helper.c | 5 ++---
9
1 file changed, 2 insertions(+), 3 deletions(-)
10
11
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/hexagon/op_helper.c
14
+++ b/target/hexagon/op_helper.c
15
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
16
float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
17
float32 RsV, float32 RtV)
18
{
19
- float32 neg_RsV;
20
arch_fpop_start(env);
21
- neg_RsV = float32_set_sign(RsV, float32_is_neg(RsV) ? 0 : 1);
22
- RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
23
+ RxV = float32_muladd(RsV, RtV, RxV, float_muladd_negate_product,
24
+ &env->fp_status);
25
arch_fpop_end(env);
26
return RxV;
27
}
28
--
29
2.43.0
diff view generated by jsdifflib
1
These logical and arithmetic operations are optional but trivial.
1
This instruction has a special case that 0 * x + c returns c
2
without the normal sign folding that comes with 0 + -0.
3
Use the new float_muladd_suppress_add_product_zero to
4
describe this.
2
5
3
Reviewed-by: David Hildenbrand <david@redhat.com>
6
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
8
---
6
tcg/s390x/tcg-target-con-set.h | 1 +
9
target/hexagon/op_helper.c | 11 +++--------
7
tcg/s390x/tcg-target.h | 11 ++++++-----
10
1 file changed, 3 insertions(+), 8 deletions(-)
8
tcg/s390x/tcg-target.c.inc | 32 ++++++++++++++++++++++++++++++++
9
3 files changed, 39 insertions(+), 5 deletions(-)
10
11
11
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
12
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/s390x/tcg-target-con-set.h
14
--- a/target/hexagon/op_helper.c
14
+++ b/tcg/s390x/tcg-target-con-set.h
15
+++ b/target/hexagon/op_helper.c
15
@@ -XXX,XX +XXX,XX @@ C_O0_I2(v, r)
16
@@ -XXX,XX +XXX,XX @@ static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
16
C_O1_I1(r, L)
17
float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
17
C_O1_I1(r, r)
18
float32 RsV, float32 RtV, float32 PuV)
18
C_O1_I1(v, r)
19
+C_O1_I1(v, v)
20
C_O1_I1(v, vr)
21
C_O1_I2(r, 0, ri)
22
C_O1_I2(r, 0, rI)
23
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
24
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/s390x/tcg-target.h
26
+++ b/tcg/s390x/tcg-target.h
27
@@ -XXX,XX +XXX,XX @@ typedef enum TCGReg {
28
#define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND
29
#define FACILITY_LOAD_ON_COND2 53
30
#define FACILITY_VECTOR 129
31
+#define FACILITY_VECTOR_ENH1 135
32
33
extern uint64_t s390_facilities[3];
34
35
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
36
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
37
#define TCG_TARGET_HAS_v256 0
38
39
-#define TCG_TARGET_HAS_andc_vec 0
40
-#define TCG_TARGET_HAS_orc_vec 0
41
-#define TCG_TARGET_HAS_not_vec 0
42
-#define TCG_TARGET_HAS_neg_vec 0
43
-#define TCG_TARGET_HAS_abs_vec 0
44
+#define TCG_TARGET_HAS_andc_vec 1
45
+#define TCG_TARGET_HAS_orc_vec HAVE_FACILITY(VECTOR_ENH1)
46
+#define TCG_TARGET_HAS_not_vec 1
47
+#define TCG_TARGET_HAS_neg_vec 1
48
+#define TCG_TARGET_HAS_abs_vec 1
49
#define TCG_TARGET_HAS_roti_vec 0
50
#define TCG_TARGET_HAS_rots_vec 0
51
#define TCG_TARGET_HAS_rotv_vec 0
52
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
53
index XXXXXXX..XXXXXXX 100644
54
--- a/tcg/s390x/tcg-target.c.inc
55
+++ b/tcg/s390x/tcg-target.c.inc
56
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
57
VRIb_VGM = 0xe746,
58
VRIc_VREP = 0xe74d,
59
60
+ VRRa_VLC = 0xe7de,
61
+ VRRa_VLP = 0xe7df,
62
VRRa_VLR = 0xe756,
63
VRRc_VA = 0xe7f3,
64
VRRc_VCEQ = 0xe7f8, /* we leave the m5 cs field 0 */
65
VRRc_VCH = 0xe7fb, /* " */
66
VRRc_VCHL = 0xe7f9, /* " */
67
VRRc_VN = 0xe768,
68
+ VRRc_VNC = 0xe769,
69
+ VRRc_VNO = 0xe76b,
70
VRRc_VO = 0xe76a,
71
+ VRRc_VOC = 0xe76f,
72
VRRc_VS = 0xe7f7,
73
VRRc_VX = 0xe76d,
74
VRRf_VLVGP = 0xe762,
75
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
76
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
77
break;
78
79
+ case INDEX_op_abs_vec:
80
+ tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
81
+ break;
82
+ case INDEX_op_neg_vec:
83
+ tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
84
+ break;
85
+ case INDEX_op_not_vec:
86
+ tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
87
+ break;
88
+
89
case INDEX_op_add_vec:
90
tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
91
break;
92
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
93
case INDEX_op_and_vec:
94
tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
95
break;
96
+ case INDEX_op_andc_vec:
97
+ tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
98
+ break;
99
case INDEX_op_or_vec:
100
tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
101
break;
102
+ case INDEX_op_orc_vec:
103
+ tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
104
+ break;
105
case INDEX_op_xor_vec:
106
tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
107
break;
108
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
109
int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
110
{
19
{
111
switch (opc) {
20
- size4s_t tmp;
112
+ case INDEX_op_abs_vec:
21
arch_fpop_start(env);
113
case INDEX_op_add_vec:
22
- RxV = check_nan(RxV, RxV, &env->fp_status);
114
case INDEX_op_and_vec:
23
- RxV = check_nan(RxV, RsV, &env->fp_status);
115
+ case INDEX_op_andc_vec:
24
- RxV = check_nan(RxV, RtV, &env->fp_status);
116
+ case INDEX_op_neg_vec:
25
- tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
117
+ case INDEX_op_not_vec:
26
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
118
case INDEX_op_or_vec:
27
- RxV = tmp;
119
+ case INDEX_op_orc_vec:
28
- }
120
case INDEX_op_sub_vec:
29
+ RxV = float32_muladd_scalbn(RsV, RtV, RxV, fSXTN(8, 64, PuV),
121
case INDEX_op_xor_vec:
30
+ float_muladd_suppress_add_product_zero,
122
return 1;
31
+ &env->fp_status);
123
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
32
arch_fpop_end(env);
124
return C_O1_I1(v, r);
33
return RxV;
125
case INDEX_op_dup_vec:
34
}
126
return C_O1_I1(v, vr);
127
+ case INDEX_op_abs_vec:
128
+ case INDEX_op_neg_vec:
129
+ case INDEX_op_not_vec:
130
+ return C_O1_I1(v, v);
131
case INDEX_op_add_vec:
132
case INDEX_op_sub_vec:
133
case INDEX_op_and_vec:
134
+ case INDEX_op_andc_vec:
135
case INDEX_op_or_vec:
136
+ case INDEX_op_orc_vec:
137
case INDEX_op_xor_vec:
138
case INDEX_op_cmp_vec:
139
return C_O1_I2(v, v, v);
140
--
35
--
141
2.25.1
36
2.43.0
142
143
diff view generated by jsdifflib
1
Implementing add, sub, and, or, xor as the minimal set.
1
There are multiple special cases for this instruction.
2
This allows us to actually enable vectors in query_s390_facilities.
2
(1) The saturate to normal maximum instead of overflow to infinity is
3
handled by the new float_round_nearest_even_max rounding mode.
4
(2) The 0 * n + c special case is handled by the new
5
float_muladd_suppress_add_product_zero flag.
6
(3) The Inf - Inf -> 0 special case can be detected after the fact
7
by examining float_flag_invalid_isi.
3
8
4
Reviewed-by: David Hildenbrand <david@redhat.com>
9
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
11
---
7
tcg/s390x/tcg-target.c.inc | 154 ++++++++++++++++++++++++++++++++++++-
12
target/hexagon/op_helper.c | 105 +++++++++----------------------------
8
1 file changed, 150 insertions(+), 4 deletions(-)
13
1 file changed, 26 insertions(+), 79 deletions(-)
9
14
10
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
15
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
11
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/s390x/tcg-target.c.inc
17
--- a/target/hexagon/op_helper.c
13
+++ b/tcg/s390x/tcg-target.c.inc
18
+++ b/target/hexagon/op_helper.c
14
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
19
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
15
VRIc_VREP = 0xe74d,
20
return RxV;
16
17
VRRa_VLR = 0xe756,
18
+ VRRc_VA = 0xe7f3,
19
+ VRRc_VCEQ = 0xe7f8, /* we leave the m5 cs field 0 */
20
+ VRRc_VCH = 0xe7fb, /* " */
21
+ VRRc_VCHL = 0xe7f9, /* " */
22
+ VRRc_VN = 0xe768,
23
+ VRRc_VO = 0xe76a,
24
+ VRRc_VS = 0xe7f7,
25
+ VRRc_VX = 0xe76d,
26
VRRf_VLVGP = 0xe762,
27
28
VRSb_VLVG = 0xe722,
29
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
30
tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
31
}
21
}
32
22
33
+static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
23
-static bool is_zero_prod(float32 a, float32 b)
34
+ TCGReg v1, TCGReg v2, TCGReg v3, int m4)
24
-{
35
+{
25
- return ((float32_is_zero(a) && is_finite(b)) ||
36
+ tcg_debug_assert(is_vector_reg(v1));
26
- (float32_is_zero(b) && is_finite(a)));
37
+ tcg_debug_assert(is_vector_reg(v2));
27
-}
38
+ tcg_debug_assert(is_vector_reg(v3));
28
-
39
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
29
-static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
40
+ tcg_out16(s, v3 << 12);
30
-{
41
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
31
- float32 ret = dst;
42
+}
32
- if (float32_is_any_nan(x)) {
33
- if (extract32(x, 22, 1) == 0) {
34
- float_raise(float_flag_invalid, fp_status);
35
- }
36
- ret = make_float32(0xffffffff); /* nan */
37
- }
38
- return ret;
39
-}
40
-
41
float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
42
float32 RsV, float32 RtV, float32 PuV)
43
{
44
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
45
return RxV;
46
}
47
48
-static bool is_inf_prod(int32_t a, int32_t b)
49
+static float32 do_sffma_lib(CPUHexagonState *env, float32 RxV,
50
+ float32 RsV, float32 RtV, int negate)
51
{
52
- return (float32_is_infinity(a) && float32_is_infinity(b)) ||
53
- (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
54
- (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
55
+ int flags;
43
+
56
+
44
static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
57
+ arch_fpop_start(env);
45
TCGReg v1, TCGReg r2, TCGReg r3)
46
{
47
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
48
unsigned vecl, unsigned vece,
49
const TCGArg *args, const int *const_args)
50
{
51
- g_assert_not_reached();
52
+ TCGType type = vecl + TCG_TYPE_V64;
53
+ TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
54
+
58
+
55
+ switch (opc) {
59
+ set_float_rounding_mode(float_round_nearest_even_max, &env->fp_status);
56
+ case INDEX_op_ld_vec:
60
+ RxV = float32_muladd(RsV, RtV, RxV,
57
+ tcg_out_ld(s, type, a0, a1, a2);
61
+ negate | float_muladd_suppress_add_product_zero,
58
+ break;
62
+ &env->fp_status);
59
+ case INDEX_op_st_vec:
60
+ tcg_out_st(s, type, a0, a1, a2);
61
+ break;
62
+ case INDEX_op_dupm_vec:
63
+ tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
64
+ break;
65
+
63
+
66
+ case INDEX_op_add_vec:
64
+ flags = get_float_exception_flags(&env->fp_status);
67
+ tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
65
+ if (flags) {
68
+ break;
66
+ /* Flags are suppressed by this instruction. */
69
+ case INDEX_op_sub_vec:
67
+ set_float_exception_flags(0, &env->fp_status);
70
+ tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
71
+ break;
72
+ case INDEX_op_and_vec:
73
+ tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
74
+ break;
75
+ case INDEX_op_or_vec:
76
+ tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
77
+ break;
78
+ case INDEX_op_xor_vec:
79
+ tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
80
+ break;
81
+
68
+
82
+ case INDEX_op_cmp_vec:
69
+ /* Return 0 for Inf - Inf. */
83
+ switch ((TCGCond)args[3]) {
70
+ if (flags & float_flag_invalid_isi) {
84
+ case TCG_COND_EQ:
71
+ RxV = 0;
85
+ tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
86
+ break;
87
+ case TCG_COND_GT:
88
+ tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
89
+ break;
90
+ case TCG_COND_GTU:
91
+ tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
92
+ break;
93
+ default:
94
+ g_assert_not_reached();
95
+ }
72
+ }
96
+ break;
97
+
98
+ case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
99
+ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
100
+ default:
101
+ g_assert_not_reached();
102
+ }
103
}
104
105
int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
106
{
107
- return 0;
108
+ switch (opc) {
109
+ case INDEX_op_add_vec:
110
+ case INDEX_op_and_vec:
111
+ case INDEX_op_or_vec:
112
+ case INDEX_op_sub_vec:
113
+ case INDEX_op_xor_vec:
114
+ return 1;
115
+ case INDEX_op_cmp_vec:
116
+ return -1;
117
+ default:
118
+ return 0;
119
+ }
120
+}
121
+
122
+static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
123
+ TCGv_vec v1, TCGv_vec v2, TCGCond cond)
124
+{
125
+ bool need_swap = false, need_inv = false;
126
+
127
+ switch (cond) {
128
+ case TCG_COND_EQ:
129
+ case TCG_COND_GT:
130
+ case TCG_COND_GTU:
131
+ break;
132
+ case TCG_COND_NE:
133
+ case TCG_COND_LE:
134
+ case TCG_COND_LEU:
135
+ need_inv = true;
136
+ break;
137
+ case TCG_COND_LT:
138
+ case TCG_COND_LTU:
139
+ need_swap = true;
140
+ break;
141
+ case TCG_COND_GE:
142
+ case TCG_COND_GEU:
143
+ need_swap = need_inv = true;
144
+ break;
145
+ default:
146
+ g_assert_not_reached();
147
+ }
73
+ }
148
+
74
+
149
+ if (need_inv) {
75
+ arch_fpop_end(env);
150
+ cond = tcg_invert_cond(cond);
76
+ return RxV;
151
+ }
152
+ if (need_swap) {
153
+ TCGv_vec t1;
154
+ t1 = v1, v1 = v2, v2 = t1;
155
+ cond = tcg_swap_cond(cond);
156
+ }
157
+
158
+ vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
159
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
160
+
161
+ return need_inv;
162
+}
163
+
164
+static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
165
+ TCGv_vec v1, TCGv_vec v2, TCGCond cond)
166
+{
167
+ if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
168
+ tcg_gen_not_vec(vece, v0, v0);
169
+ }
170
}
77
}
171
78
172
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
79
float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
173
TCGArg a0, ...)
80
float32 RsV, float32 RtV)
174
{
81
{
175
- g_assert_not_reached();
82
- bool infinp;
176
+ va_list va;
83
- bool infminusinf;
177
+ TCGv_vec v0, v1, v2;
84
- float32 tmp;
178
+
85
-
179
+ va_start(va, a0);
86
- arch_fpop_start(env);
180
+ v0 = temp_tcgv_vec(arg_temp(a0));
87
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
181
+ v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
88
- infminusinf = float32_is_infinity(RxV) &&
182
+ v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
89
- is_inf_prod(RsV, RtV) &&
183
+
90
- (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
184
+ switch (opc) {
91
- infinp = float32_is_infinity(RxV) ||
185
+ case INDEX_op_cmp_vec:
92
- float32_is_infinity(RtV) ||
186
+ expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
93
- float32_is_infinity(RsV);
187
+ break;
94
- RxV = check_nan(RxV, RxV, &env->fp_status);
188
+
95
- RxV = check_nan(RxV, RsV, &env->fp_status);
189
+ default:
96
- RxV = check_nan(RxV, RtV, &env->fp_status);
190
+ g_assert_not_reached();
97
- tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
191
+ }
98
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
192
+ va_end(va);
99
- RxV = tmp;
100
- }
101
- set_float_exception_flags(0, &env->fp_status);
102
- if (float32_is_infinity(RxV) && !infinp) {
103
- RxV = RxV - 1;
104
- }
105
- if (infminusinf) {
106
- RxV = 0;
107
- }
108
- arch_fpop_end(env);
109
- return RxV;
110
+ return do_sffma_lib(env, RxV, RsV, RtV, 0);
193
}
111
}
194
112
195
static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
113
float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
196
@@ -XXX,XX +XXX,XX @@ static void query_s390_facilities(void)
114
float32 RsV, float32 RtV)
197
* There is nothing else we currently care about in the 3rd word, so
115
{
198
* disable VECTOR with one store.
116
- bool infinp;
199
*/
117
- bool infminusinf;
200
- if (1 || !(hwcap & HWCAP_S390_VXRS)) {
118
- float32 tmp;
201
+ if (!(hwcap & HWCAP_S390_VXRS)) {
119
-
202
s390_facilities[2] = 0;
120
- arch_fpop_start(env);
203
}
121
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
122
- infminusinf = float32_is_infinity(RxV) &&
123
- is_inf_prod(RsV, RtV) &&
124
- (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
125
- infinp = float32_is_infinity(RxV) ||
126
- float32_is_infinity(RtV) ||
127
- float32_is_infinity(RsV);
128
- RxV = check_nan(RxV, RxV, &env->fp_status);
129
- RxV = check_nan(RxV, RsV, &env->fp_status);
130
- RxV = check_nan(RxV, RtV, &env->fp_status);
131
- float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
132
- tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
133
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
134
- RxV = tmp;
135
- }
136
- set_float_exception_flags(0, &env->fp_status);
137
- if (float32_is_infinity(RxV) && !infinp) {
138
- RxV = RxV - 1;
139
- }
140
- if (infminusinf) {
141
- RxV = 0;
142
- }
143
- arch_fpop_end(env);
144
- return RxV;
145
+ return do_sffma_lib(env, RxV, RsV, RtV, float_muladd_negate_product);
204
}
146
}
147
148
float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
205
--
149
--
206
2.25.1
150
2.43.0
207
208
diff view generated by jsdifflib
1
They are rightly values in the same enumeration.
1
The function is now unused.
2
2
3
Reviewed-by: David Hildenbrand <david@redhat.com>
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
5
---
6
tcg/s390x/tcg-target.h | 28 +++++++---------------------
6
target/hexagon/fma_emu.h | 2 -
7
1 file changed, 7 insertions(+), 21 deletions(-)
7
target/hexagon/fma_emu.c | 171 ---------------------------------------
8
2 files changed, 173 deletions(-)
8
9
9
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
10
diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
10
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/s390x/tcg-target.h
12
--- a/target/hexagon/fma_emu.h
12
+++ b/tcg/s390x/tcg-target.h
13
+++ b/target/hexagon/fma_emu.h
13
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ static inline uint32_t float32_getexp_raw(float32 f32)
14
#define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB)
15
}
15
16
int32_t float32_getexp(float32 f32);
16
typedef enum TCGReg {
17
float32 infinite_float32(uint8_t sign);
17
- TCG_REG_R0 = 0,
18
-float32 internal_fmafx(float32 a, float32 b, float32 c,
18
- TCG_REG_R1,
19
- int scale, float_status *fp_status);
19
- TCG_REG_R2,
20
float64 internal_mpyhh(float64 a, float64 b,
20
- TCG_REG_R3,
21
unsigned long long int accumulated,
21
- TCG_REG_R4,
22
float_status *fp_status);
22
- TCG_REG_R5,
23
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
23
- TCG_REG_R6,
24
index XXXXXXX..XXXXXXX 100644
24
- TCG_REG_R7,
25
--- a/target/hexagon/fma_emu.c
25
- TCG_REG_R8,
26
+++ b/target/hexagon/fma_emu.c
26
- TCG_REG_R9,
27
@@ -XXX,XX +XXX,XX @@ int32_t float64_getexp(float64 f64)
27
- TCG_REG_R10,
28
return -1;
28
- TCG_REG_R11,
29
}
29
- TCG_REG_R12,
30
30
- TCG_REG_R13,
31
-static uint64_t float32_getmant(float32 f32)
31
- TCG_REG_R14,
32
-{
32
- TCG_REG_R15
33
- Float a = { .i = f32 };
33
+ TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3,
34
- if (float32_is_normal(f32)) {
34
+ TCG_REG_R4, TCG_REG_R5, TCG_REG_R6, TCG_REG_R7,
35
- return a.mant | 1ULL << 23;
35
+ TCG_REG_R8, TCG_REG_R9, TCG_REG_R10, TCG_REG_R11,
36
- }
36
+ TCG_REG_R12, TCG_REG_R13, TCG_REG_R14, TCG_REG_R15,
37
- if (float32_is_zero(f32)) {
37
+
38
- return 0;
38
+ TCG_AREG0 = TCG_REG_R10,
39
- }
39
+ TCG_REG_CALL_STACK = TCG_REG_R15
40
- if (float32_is_denormal(f32)) {
40
} TCGReg;
41
- return a.mant;
41
42
- }
42
#define TCG_TARGET_NB_REGS 16
43
- return ~0ULL;
43
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[1];
44
-}
44
#define TCG_TARGET_HAS_mulsh_i64 0
45
-
45
46
int32_t float32_getexp(float32 f32)
46
/* used for function call generation */
47
-#define TCG_REG_CALL_STACK        TCG_REG_R15
48
#define TCG_TARGET_STACK_ALIGN        8
49
#define TCG_TARGET_CALL_STACK_OFFSET    160
50
51
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[1];
52
53
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
54
55
-enum {
56
- TCG_AREG0 = TCG_REG_R10,
57
-};
58
-
59
static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
60
uintptr_t jmp_rw, uintptr_t addr)
61
{
47
{
48
Float a = { .i = f32 };
49
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
50
}
51
52
/* Return a maximum finite value with the requested sign */
53
-static float32 maxfinite_float32(uint8_t sign)
54
-{
55
- if (sign) {
56
- return make_float32(SF_MINUS_MAXF);
57
- } else {
58
- return make_float32(SF_MAXF);
59
- }
60
-}
61
-
62
-/* Return a zero value with requested sign */
63
-static float32 zero_float32(uint8_t sign)
64
-{
65
- if (sign) {
66
- return make_float32(0x80000000);
67
- } else {
68
- return float32_zero;
69
- }
70
-}
71
-
72
#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
73
static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
74
{ \
75
@@ -XXX,XX +XXX,XX @@ static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
76
}
77
78
GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
79
-GEN_XF_ROUND(float32, SF_MANTBITS, SF_INF_EXP, Float)
80
-
81
-static bool is_inf_prod(float64 a, float64 b)
82
-{
83
- return ((float64_is_infinity(a) && float64_is_infinity(b)) ||
84
- (float64_is_infinity(a) && is_finite(b) && (!float64_is_zero(b))) ||
85
- (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a))));
86
-}
87
-
88
-static float64 special_fma(float64 a, float64 b, float64 c,
89
- float_status *fp_status)
90
-{
91
- float64 ret = make_float64(0);
92
-
93
- /*
94
- * If A multiplied by B is an exact infinity and C is also an infinity
95
- * but with the opposite sign, FMA returns NaN and raises invalid.
96
- */
97
- uint8_t a_sign = float64_is_neg(a);
98
- uint8_t b_sign = float64_is_neg(b);
99
- uint8_t c_sign = float64_is_neg(c);
100
- if (is_inf_prod(a, b) && float64_is_infinity(c)) {
101
- if ((a_sign ^ b_sign) != c_sign) {
102
- ret = make_float64(DF_NAN);
103
- float_raise(float_flag_invalid, fp_status);
104
- return ret;
105
- }
106
- }
107
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
108
- (float64_is_zero(a) && float64_is_infinity(b))) {
109
- ret = make_float64(DF_NAN);
110
- float_raise(float_flag_invalid, fp_status);
111
- return ret;
112
- }
113
- /*
114
- * If none of the above checks are true and C is a NaN,
115
- * a NaN shall be returned
116
- * If A or B are NaN, a NAN shall be returned.
117
- */
118
- if (float64_is_any_nan(a) ||
119
- float64_is_any_nan(b) ||
120
- float64_is_any_nan(c)) {
121
- if (float64_is_any_nan(a) && (fGETBIT(51, a) == 0)) {
122
- float_raise(float_flag_invalid, fp_status);
123
- }
124
- if (float64_is_any_nan(b) && (fGETBIT(51, b) == 0)) {
125
- float_raise(float_flag_invalid, fp_status);
126
- }
127
- if (float64_is_any_nan(c) && (fGETBIT(51, c) == 0)) {
128
- float_raise(float_flag_invalid, fp_status);
129
- }
130
- ret = make_float64(DF_NAN);
131
- return ret;
132
- }
133
- /*
134
- * We have checked for adding opposite-signed infinities.
135
- * Other infinities return infinity with the correct sign
136
- */
137
- if (float64_is_infinity(c)) {
138
- ret = infinite_float64(c_sign);
139
- return ret;
140
- }
141
- if (float64_is_infinity(a) || float64_is_infinity(b)) {
142
- ret = infinite_float64(a_sign ^ b_sign);
143
- return ret;
144
- }
145
- g_assert_not_reached();
146
-}
147
-
148
-static float32 special_fmaf(float32 a, float32 b, float32 c,
149
- float_status *fp_status)
150
-{
151
- float64 aa, bb, cc;
152
- aa = float32_to_float64(a, fp_status);
153
- bb = float32_to_float64(b, fp_status);
154
- cc = float32_to_float64(c, fp_status);
155
- return float64_to_float32(special_fma(aa, bb, cc, fp_status), fp_status);
156
-}
157
-
158
-float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
159
- float_status *fp_status)
160
-{
161
- Accum prod;
162
- Accum acc;
163
- Accum result;
164
- accum_init(&prod);
165
- accum_init(&acc);
166
- accum_init(&result);
167
-
168
- uint8_t a_sign = float32_is_neg(a);
169
- uint8_t b_sign = float32_is_neg(b);
170
- uint8_t c_sign = float32_is_neg(c);
171
- if (float32_is_infinity(a) ||
172
- float32_is_infinity(b) ||
173
- float32_is_infinity(c)) {
174
- return special_fmaf(a, b, c, fp_status);
175
- }
176
- if (float32_is_any_nan(a) ||
177
- float32_is_any_nan(b) ||
178
- float32_is_any_nan(c)) {
179
- return special_fmaf(a, b, c, fp_status);
180
- }
181
- if ((scale == 0) && (float32_is_zero(a) || float32_is_zero(b))) {
182
- float32 tmp = float32_mul(a, b, fp_status);
183
- tmp = float32_add(tmp, c, fp_status);
184
- return tmp;
185
- }
186
-
187
- /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
188
- prod.mant = int128_mul_6464(float32_getmant(a), float32_getmant(b));
189
-
190
- /*
191
- * Note: extracting the mantissa into an int is multiplying by
192
- * 2**23, so adjust here
193
- */
194
- prod.exp = float32_getexp(a) + float32_getexp(b) - SF_BIAS - 23;
195
- prod.sign = a_sign ^ b_sign;
196
- if (float32_is_zero(a) || float32_is_zero(b)) {
197
- prod.exp = -2 * WAY_BIG_EXP;
198
- }
199
- if ((scale > 0) && float32_is_denormal(c)) {
200
- acc.mant = int128_mul_6464(0, 0);
201
- acc.exp = -WAY_BIG_EXP;
202
- acc.sign = c_sign;
203
- acc.sticky = 1;
204
- result = accum_add(prod, acc);
205
- } else if (!float32_is_zero(c)) {
206
- acc.mant = int128_mul_6464(float32_getmant(c), 1);
207
- acc.exp = float32_getexp(c);
208
- acc.sign = c_sign;
209
- result = accum_add(prod, acc);
210
- } else {
211
- result = prod;
212
- }
213
- result.exp += scale;
214
- return accum_round_float32(result, fp_status);
215
-}
216
217
float64 internal_mpyhh(float64 a, float64 b,
218
unsigned long long int accumulated,
62
--
219
--
63
2.25.1
220
2.43.0
64
65
diff view generated by jsdifflib
1
For usadd, we only have to consider overflow. Since ~B + B == -1,
1
This massive macro is now only used once.
2
the maximum value for A that saturates is ~B.
2
Expand it for use only by float64.
3
3
4
For ussub, we only have to consider underflow. The minimum value
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
that saturates to 0 from A - B is B.
6
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
tcg/tcg-op-vec.c | 37 +++++++++++++++++++++++++++++++++++--
7
target/hexagon/fma_emu.c | 255 +++++++++++++++++++--------------------
10
1 file changed, 35 insertions(+), 2 deletions(-)
8
1 file changed, 127 insertions(+), 128 deletions(-)
11
9
12
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/tcg-op-vec.c
12
--- a/target/hexagon/fma_emu.c
15
+++ b/tcg/tcg-op-vec.c
13
+++ b/target/hexagon/fma_emu.c
16
@@ -XXX,XX +XXX,XX @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list,
14
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
17
continue;
18
}
19
break;
20
+ case INDEX_op_usadd_vec:
21
+ if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) ||
22
+ tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
23
+ continue;
24
+ }
25
+ break;
26
+ case INDEX_op_ussub_vec:
27
+ if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) ||
28
+ tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
29
+ continue;
30
+ }
31
+ break;
32
case INDEX_op_cmpsel_vec:
33
case INDEX_op_smin_vec:
34
case INDEX_op_smax_vec:
35
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
36
37
void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
38
{
39
- do_op3_nofail(vece, r, a, b, INDEX_op_usadd_vec);
40
+ if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) {
41
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
42
+ TCGv_vec t = tcg_temp_new_vec_matching(r);
43
+
44
+ /* usadd(a, b) = min(a, ~b) + b */
45
+ tcg_gen_not_vec(vece, t, b);
46
+ tcg_gen_umin_vec(vece, t, t, a);
47
+ tcg_gen_add_vec(vece, r, t, b);
48
+
49
+ tcg_temp_free_vec(t);
50
+ tcg_swap_vecop_list(hold_list);
51
+ }
52
}
15
}
53
16
54
void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
17
/* Return a maximum finite value with the requested sign */
55
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
18
-#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
56
19
-static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
57
void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
20
-{ \
58
{
21
- if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \
59
- do_op3_nofail(vece, r, a, b, INDEX_op_ussub_vec);
22
- && ((a.guard | a.round | a.sticky) == 0)) { \
60
+ if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) {
23
- /* result zero */ \
61
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
24
- switch (fp_status->float_rounding_mode) { \
62
+ TCGv_vec t = tcg_temp_new_vec_matching(r);
25
- case float_round_down: \
63
+
26
- return zero_##SUFFIX(1); \
64
+ /* ussub(a, b) = max(a, b) - b */
27
- default: \
65
+ tcg_gen_umax_vec(vece, t, a, b);
28
- return zero_##SUFFIX(0); \
66
+ tcg_gen_sub_vec(vece, r, t, b);
29
- } \
67
+
30
- } \
68
+ tcg_temp_free_vec(t);
31
- /* Normalize right */ \
69
+ tcg_swap_vecop_list(hold_list);
32
- /* We want MANTBITS bits of mantissa plus the leading one. */ \
70
+ }
33
- /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \
34
- /* So we need to normalize right while the high word is non-zero and \
35
- * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \
36
- while ((int128_gethi(a.mant) != 0) || \
37
- ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0)) { \
38
- a = accum_norm_right(a, 1); \
39
- } \
40
- /* \
41
- * OK, now normalize left \
42
- * We want to normalize left until we have a leading one in bit 24 \
43
- * Theoretically, we only need to shift a maximum of one to the left if we \
44
- * shifted out lots of bits from B, or if we had no shift / 1 shift sticky \
45
- * should be 0 \
46
- */ \
47
- while ((int128_getlo(a.mant) & (1ULL << MANTBITS)) == 0) { \
48
- a = accum_norm_left(a); \
49
- } \
50
- /* \
51
- * OK, now we might need to denormalize because of potential underflow. \
52
- * We need to do this before rounding, and rounding might make us normal \
53
- * again \
54
- */ \
55
- while (a.exp <= 0) { \
56
- a = accum_norm_right(a, 1 - a.exp); \
57
- /* \
58
- * Do we have underflow? \
59
- * That's when we get an inexact answer because we ran out of bits \
60
- * in a denormal. \
61
- */ \
62
- if (a.guard || a.round || a.sticky) { \
63
- float_raise(float_flag_underflow, fp_status); \
64
- } \
65
- } \
66
- /* OK, we're relatively canonical... now we need to round */ \
67
- if (a.guard || a.round || a.sticky) { \
68
- float_raise(float_flag_inexact, fp_status); \
69
- switch (fp_status->float_rounding_mode) { \
70
- case float_round_to_zero: \
71
- /* Chop and we're done */ \
72
- break; \
73
- case float_round_up: \
74
- if (a.sign == 0) { \
75
- a.mant = int128_add(a.mant, int128_one()); \
76
- } \
77
- break; \
78
- case float_round_down: \
79
- if (a.sign != 0) { \
80
- a.mant = int128_add(a.mant, int128_one()); \
81
- } \
82
- break; \
83
- default: \
84
- if (a.round || a.sticky) { \
85
- /* round up if guard is 1, down if guard is zero */ \
86
- a.mant = int128_add(a.mant, int128_make64(a.guard)); \
87
- } else if (a.guard) { \
88
- /* exactly .5, round up if odd */ \
89
- a.mant = int128_add(a.mant, int128_and(a.mant, int128_one())); \
90
- } \
91
- break; \
92
- } \
93
- } \
94
- /* \
95
- * OK, now we might have carried all the way up. \
96
- * So we might need to shr once \
97
- * at least we know that the lsb should be zero if we rounded and \
98
- * got a carry out... \
99
- */ \
100
- if ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0) { \
101
- a = accum_norm_right(a, 1); \
102
- } \
103
- /* Overflow? */ \
104
- if (a.exp >= INF_EXP) { \
105
- /* Yep, inf result */ \
106
- float_raise(float_flag_overflow, fp_status); \
107
- float_raise(float_flag_inexact, fp_status); \
108
- switch (fp_status->float_rounding_mode) { \
109
- case float_round_to_zero: \
110
- return maxfinite_##SUFFIX(a.sign); \
111
- case float_round_up: \
112
- if (a.sign == 0) { \
113
- return infinite_##SUFFIX(a.sign); \
114
- } else { \
115
- return maxfinite_##SUFFIX(a.sign); \
116
- } \
117
- case float_round_down: \
118
- if (a.sign != 0) { \
119
- return infinite_##SUFFIX(a.sign); \
120
- } else { \
121
- return maxfinite_##SUFFIX(a.sign); \
122
- } \
123
- default: \
124
- return infinite_##SUFFIX(a.sign); \
125
- } \
126
- } \
127
- /* Underflow? */ \
128
- if (int128_getlo(a.mant) & (1ULL << MANTBITS)) { \
129
- /* Leading one means: No, we're normal. So, we should be done... */ \
130
- INTERNAL_TYPE ret; \
131
- ret.i = 0; \
132
- ret.sign = a.sign; \
133
- ret.exp = a.exp; \
134
- ret.mant = int128_getlo(a.mant); \
135
- return ret.i; \
136
- } \
137
- assert(a.exp == 1); \
138
- INTERNAL_TYPE ret; \
139
- ret.i = 0; \
140
- ret.sign = a.sign; \
141
- ret.exp = 0; \
142
- ret.mant = int128_getlo(a.mant); \
143
- return ret.i; \
144
+static float64 accum_round_float64(Accum a, float_status *fp_status)
145
+{
146
+ if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
147
+ && ((a.guard | a.round | a.sticky) == 0)) {
148
+ /* result zero */
149
+ switch (fp_status->float_rounding_mode) {
150
+ case float_round_down:
151
+ return zero_float64(1);
152
+ default:
153
+ return zero_float64(0);
154
+ }
155
+ }
156
+ /*
157
+ * Normalize right
158
+ * We want DF_MANTBITS bits of mantissa plus the leading one.
159
+ * That means that we want DF_MANTBITS+1 bits, or 0x000000000000FF_FFFF
160
+ * So we need to normalize right while the high word is non-zero and
161
+ * while the low word is nonzero when masked with 0xffe0_0000_0000_0000
162
+ */
163
+ while ((int128_gethi(a.mant) != 0) ||
164
+ ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0)) {
165
+ a = accum_norm_right(a, 1);
166
+ }
167
+ /*
168
+ * OK, now normalize left
169
+ * We want to normalize left until we have a leading one in bit 24
170
+ * Theoretically, we only need to shift a maximum of one to the left if we
171
+ * shifted out lots of bits from B, or if we had no shift / 1 shift sticky
172
+ * should be 0
173
+ */
174
+ while ((int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) == 0) {
175
+ a = accum_norm_left(a);
176
+ }
177
+ /*
178
+ * OK, now we might need to denormalize because of potential underflow.
179
+ * We need to do this before rounding, and rounding might make us normal
180
+ * again
181
+ */
182
+ while (a.exp <= 0) {
183
+ a = accum_norm_right(a, 1 - a.exp);
184
+ /*
185
+ * Do we have underflow?
186
+ * That's when we get an inexact answer because we ran out of bits
187
+ * in a denormal.
188
+ */
189
+ if (a.guard || a.round || a.sticky) {
190
+ float_raise(float_flag_underflow, fp_status);
191
+ }
192
+ }
193
+ /* OK, we're relatively canonical... now we need to round */
194
+ if (a.guard || a.round || a.sticky) {
195
+ float_raise(float_flag_inexact, fp_status);
196
+ switch (fp_status->float_rounding_mode) {
197
+ case float_round_to_zero:
198
+ /* Chop and we're done */
199
+ break;
200
+ case float_round_up:
201
+ if (a.sign == 0) {
202
+ a.mant = int128_add(a.mant, int128_one());
203
+ }
204
+ break;
205
+ case float_round_down:
206
+ if (a.sign != 0) {
207
+ a.mant = int128_add(a.mant, int128_one());
208
+ }
209
+ break;
210
+ default:
211
+ if (a.round || a.sticky) {
212
+ /* round up if guard is 1, down if guard is zero */
213
+ a.mant = int128_add(a.mant, int128_make64(a.guard));
214
+ } else if (a.guard) {
215
+ /* exactly .5, round up if odd */
216
+ a.mant = int128_add(a.mant, int128_and(a.mant, int128_one()));
217
+ }
218
+ break;
219
+ }
220
+ }
221
+ /*
222
+ * OK, now we might have carried all the way up.
223
+ * So we might need to shr once
224
+ * at least we know that the lsb should be zero if we rounded and
225
+ * got a carry out...
226
+ */
227
+ if ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0) {
228
+ a = accum_norm_right(a, 1);
229
+ }
230
+ /* Overflow? */
231
+ if (a.exp >= DF_INF_EXP) {
232
+ /* Yep, inf result */
233
+ float_raise(float_flag_overflow, fp_status);
234
+ float_raise(float_flag_inexact, fp_status);
235
+ switch (fp_status->float_rounding_mode) {
236
+ case float_round_to_zero:
237
+ return maxfinite_float64(a.sign);
238
+ case float_round_up:
239
+ if (a.sign == 0) {
240
+ return infinite_float64(a.sign);
241
+ } else {
242
+ return maxfinite_float64(a.sign);
243
+ }
244
+ case float_round_down:
245
+ if (a.sign != 0) {
246
+ return infinite_float64(a.sign);
247
+ } else {
248
+ return maxfinite_float64(a.sign);
249
+ }
250
+ default:
251
+ return infinite_float64(a.sign);
252
+ }
253
+ }
254
+ /* Underflow? */
255
+ if (int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) {
256
+ /* Leading one means: No, we're normal. So, we should be done... */
257
+ Double ret;
258
+ ret.i = 0;
259
+ ret.sign = a.sign;
260
+ ret.exp = a.exp;
261
+ ret.mant = int128_getlo(a.mant);
262
+ return ret.i;
263
+ }
264
+ assert(a.exp == 1);
265
+ Double ret;
266
+ ret.i = 0;
267
+ ret.sign = a.sign;
268
+ ret.exp = 0;
269
+ ret.mant = int128_getlo(a.mant);
270
+ return ret.i;
71
}
271
}
72
272
73
static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a,
273
-GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
274
-
275
float64 internal_mpyhh(float64 a, float64 b,
276
unsigned long long int accumulated,
277
float_status *fp_status)
74
--
278
--
75
2.25.1
279
2.43.0
76
77
diff view generated by jsdifflib
1
This emphasizes that we don't support s390, only 64-bit s390x hosts.
1
This structure, with bitfields, is incorrect for big-endian.
2
Use the existing float32_getexp_raw which uses extract32.
2
3
3
Reviewed-by: Thomas Huth <thuth@redhat.com>
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: David Hildenbrand <david@redhat.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
meson.build | 2 --
7
target/hexagon/fma_emu.c | 16 +++-------------
9
tcg/{s390 => s390x}/tcg-target-con-set.h | 0
8
1 file changed, 3 insertions(+), 13 deletions(-)
10
tcg/{s390 => s390x}/tcg-target-con-str.h | 0
11
tcg/{s390 => s390x}/tcg-target.h | 0
12
tcg/{s390 => s390x}/tcg-target.c.inc | 0
13
5 files changed, 2 deletions(-)
14
rename tcg/{s390 => s390x}/tcg-target-con-set.h (100%)
15
rename tcg/{s390 => s390x}/tcg-target-con-str.h (100%)
16
rename tcg/{s390 => s390x}/tcg-target.h (100%)
17
rename tcg/{s390 => s390x}/tcg-target.c.inc (100%)
18
9
19
diff --git a/meson.build b/meson.build
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
20
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
21
--- a/meson.build
12
--- a/target/hexagon/fma_emu.c
22
+++ b/meson.build
13
+++ b/target/hexagon/fma_emu.c
23
@@ -XXX,XX +XXX,XX @@ if not get_option('tcg').disabled()
14
@@ -XXX,XX +XXX,XX @@ typedef union {
24
tcg_arch = 'tci'
15
};
25
elif config_host['ARCH'] == 'sparc64'
16
} Double;
26
tcg_arch = 'sparc'
17
27
- elif config_host['ARCH'] == 's390x'
18
-typedef union {
28
- tcg_arch = 's390'
19
- float f;
29
elif config_host['ARCH'] in ['x86_64', 'x32']
20
- uint32_t i;
30
tcg_arch = 'i386'
21
- struct {
31
elif config_host['ARCH'] == 'ppc64'
22
- uint32_t mant:23;
32
diff --git a/tcg/s390/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
23
- uint32_t exp:8;
33
similarity index 100%
24
- uint32_t sign:1;
34
rename from tcg/s390/tcg-target-con-set.h
25
- };
35
rename to tcg/s390x/tcg-target-con-set.h
26
-} Float;
36
diff --git a/tcg/s390/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h
27
-
37
similarity index 100%
28
static uint64_t float64_getmant(float64 f64)
38
rename from tcg/s390/tcg-target-con-str.h
29
{
39
rename to tcg/s390x/tcg-target-con-str.h
30
Double a = { .i = f64 };
40
diff --git a/tcg/s390/tcg-target.h b/tcg/s390x/tcg-target.h
31
@@ -XXX,XX +XXX,XX @@ int32_t float64_getexp(float64 f64)
41
similarity index 100%
32
42
rename from tcg/s390/tcg-target.h
33
int32_t float32_getexp(float32 f32)
43
rename to tcg/s390x/tcg-target.h
34
{
44
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
35
- Float a = { .i = f32 };
45
similarity index 100%
36
+ int exp = float32_getexp_raw(f32);
46
rename from tcg/s390/tcg-target.c.inc
37
if (float32_is_normal(f32)) {
47
rename to tcg/s390x/tcg-target.c.inc
38
- return a.exp;
39
+ return exp;
40
}
41
if (float32_is_denormal(f32)) {
42
- return a.exp + 1;
43
+ return exp + 1;
44
}
45
return -1;
46
}
48
--
47
--
49
2.25.1
48
2.43.0
50
51
diff view generated by jsdifflib
1
Reviewed-by: David Hildenbrand <david@redhat.com>
1
This structure, with bitfields, is incorrect for big-endian.
2
Use extract64 and deposit64 instead.
3
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
6
---
4
tcg/s390x/tcg-target.h | 2 +-
7
target/hexagon/fma_emu.c | 46 ++++++++++++++--------------------------
5
tcg/s390x/tcg-target.c.inc | 7 +++++++
8
1 file changed, 16 insertions(+), 30 deletions(-)
6
2 files changed, 8 insertions(+), 1 deletion(-)
7
9
8
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
9
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/s390x/tcg-target.h
12
--- a/target/hexagon/fma_emu.c
11
+++ b/tcg/s390x/tcg-target.h
13
+++ b/target/hexagon/fma_emu.c
12
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
14
@@ -XXX,XX +XXX,XX @@
13
#define TCG_TARGET_HAS_shi_vec 0
15
14
#define TCG_TARGET_HAS_shs_vec 0
16
#define WAY_BIG_EXP 4096
15
#define TCG_TARGET_HAS_shv_vec 0
17
16
-#define TCG_TARGET_HAS_mul_vec 0
18
-typedef union {
17
+#define TCG_TARGET_HAS_mul_vec 1
19
- double f;
18
#define TCG_TARGET_HAS_sat_vec 0
20
- uint64_t i;
19
#define TCG_TARGET_HAS_minmax_vec 0
21
- struct {
20
#define TCG_TARGET_HAS_bitsel_vec 0
22
- uint64_t mant:52;
21
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
23
- uint64_t exp:11;
22
index XXXXXXX..XXXXXXX 100644
24
- uint64_t sign:1;
23
--- a/tcg/s390x/tcg-target.c.inc
25
- };
24
+++ b/tcg/s390x/tcg-target.c.inc
26
-} Double;
25
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
27
-
26
VRRc_VCEQ = 0xe7f8, /* we leave the m5 cs field 0 */
28
static uint64_t float64_getmant(float64 f64)
27
VRRc_VCH = 0xe7fb, /* " */
29
{
28
VRRc_VCHL = 0xe7f9, /* " */
30
- Double a = { .i = f64 };
29
+ VRRc_VML = 0xe7a2,
31
+ uint64_t mant = extract64(f64, 0, 52);
30
VRRc_VN = 0xe768,
32
if (float64_is_normal(f64)) {
31
VRRc_VNC = 0xe769,
33
- return a.mant | 1ULL << 52;
32
VRRc_VNO = 0xe76b,
34
+ return mant | 1ULL << 52;
33
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
35
}
34
case INDEX_op_andc_vec:
36
if (float64_is_zero(f64)) {
35
tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
36
break;
37
+ case INDEX_op_mul_vec:
38
+ tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
39
+ break;
40
case INDEX_op_or_vec:
41
tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
42
break;
43
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
44
return 1;
45
case INDEX_op_cmp_vec:
46
return -1;
47
+ case INDEX_op_mul_vec:
48
+ return vece < MO_64;
49
default:
50
return 0;
37
return 0;
51
}
38
}
52
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
39
if (float64_is_denormal(f64)) {
53
case INDEX_op_orc_vec:
40
- return a.mant;
54
case INDEX_op_xor_vec:
41
+ return mant;
55
case INDEX_op_cmp_vec:
42
}
56
+ case INDEX_op_mul_vec:
43
return ~0ULL;
57
return C_O1_I2(v, v, v);
44
}
58
45
59
default:
46
int32_t float64_getexp(float64 f64)
47
{
48
- Double a = { .i = f64 };
49
+ int exp = extract64(f64, 52, 11);
50
if (float64_is_normal(f64)) {
51
- return a.exp;
52
+ return exp;
53
}
54
if (float64_is_denormal(f64)) {
55
- return a.exp + 1;
56
+ return exp + 1;
57
}
58
return -1;
59
}
60
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
61
/* Return a maximum finite value with the requested sign */
62
static float64 accum_round_float64(Accum a, float_status *fp_status)
63
{
64
+ uint64_t ret;
65
+
66
if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
67
&& ((a.guard | a.round | a.sticky) == 0)) {
68
/* result zero */
69
@@ -XXX,XX +XXX,XX @@ static float64 accum_round_float64(Accum a, float_status *fp_status)
70
}
71
}
72
/* Underflow? */
73
- if (int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) {
74
+ ret = int128_getlo(a.mant);
75
+ if (ret & (1ULL << DF_MANTBITS)) {
76
/* Leading one means: No, we're normal. So, we should be done... */
77
- Double ret;
78
- ret.i = 0;
79
- ret.sign = a.sign;
80
- ret.exp = a.exp;
81
- ret.mant = int128_getlo(a.mant);
82
- return ret.i;
83
+ ret = deposit64(ret, 52, 11, a.exp);
84
+ } else {
85
+ assert(a.exp == 1);
86
+ ret = deposit64(ret, 52, 11, 0);
87
}
88
- assert(a.exp == 1);
89
- Double ret;
90
- ret.i = 0;
91
- ret.sign = a.sign;
92
- ret.exp = 0;
93
- ret.mant = int128_getlo(a.mant);
94
- return ret.i;
95
+ ret = deposit64(ret, 63, 1, a.sign);
96
+ return ret;
97
}
98
99
float64 internal_mpyhh(float64 a, float64 b,
60
--
100
--
61
2.25.1
101
2.43.0
62
63
diff view generated by jsdifflib
1
We (will) often have the complete MemOpIdx handy, so use that.
1
No need to open-code 64x64->128-bit multiplication.
2
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
trace/mem.h | 32 +++++++++-----------------
6
target/hexagon/fma_emu.c | 32 +++-----------------------------
8
accel/tcg/cputlb.c | 12 ++++------
7
1 file changed, 3 insertions(+), 29 deletions(-)
9
accel/tcg/user-exec.c | 42 +++++++++++++++++++++++------------
10
tcg/tcg-op.c | 8 +++----
11
accel/tcg/atomic_common.c.inc | 6 ++---
12
5 files changed, 49 insertions(+), 51 deletions(-)
13
8
14
diff --git a/trace/mem.h b/trace/mem.h
9
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
15
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
16
--- a/trace/mem.h
11
--- a/target/hexagon/fma_emu.c
17
+++ b/trace/mem.h
12
+++ b/target/hexagon/fma_emu.c
18
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ int32_t float32_getexp(float32 f32)
19
#ifndef TRACE__MEM_H
14
return -1;
20
#define TRACE__MEM_H
21
22
-#include "tcg/tcg.h"
23
+#include "exec/memopidx.h"
24
25
#define TRACE_MEM_SZ_SHIFT_MASK 0xf /* size shift mask */
26
#define TRACE_MEM_SE (1ULL << 4) /* sign extended (y/n) */
27
@@ -XXX,XX +XXX,XX @@
28
#define TRACE_MEM_MMU_SHIFT 8 /* mmu idx */
29
30
/**
31
- * trace_mem_build_info:
32
+ * trace_mem_get_info:
33
*
34
* Return a value for the 'info' argument in guest memory access traces.
35
*/
36
-static inline uint16_t trace_mem_build_info(int size_shift, bool sign_extend,
37
- MemOp endianness, bool store,
38
- unsigned int mmu_idx)
39
+static inline uint16_t trace_mem_get_info(MemOpIdx oi, bool store)
40
{
41
+ MemOp op = get_memop(oi);
42
+ uint32_t size_shift = op & MO_SIZE;
43
+ bool sign_extend = op & MO_SIGN;
44
+ bool big_endian = (op & MO_BSWAP) == MO_BE;
45
uint16_t res;
46
47
res = size_shift & TRACE_MEM_SZ_SHIFT_MASK;
48
if (sign_extend) {
49
res |= TRACE_MEM_SE;
50
}
51
- if (endianness == MO_BE) {
52
+ if (big_endian) {
53
res |= TRACE_MEM_BE;
54
}
55
if (store) {
56
res |= TRACE_MEM_ST;
57
}
58
#ifdef CONFIG_SOFTMMU
59
- res |= mmu_idx << TRACE_MEM_MMU_SHIFT;
60
+ res |= get_mmuidx(oi) << TRACE_MEM_MMU_SHIFT;
61
#endif
62
+
63
return res;
64
}
15
}
65
16
66
-
17
-static uint32_t int128_getw0(Int128 x)
67
-/**
68
- * trace_mem_get_info:
69
- *
70
- * Return a value for the 'info' argument in guest memory access traces.
71
- */
72
-static inline uint16_t trace_mem_get_info(MemOp op,
73
- unsigned int mmu_idx,
74
- bool store)
75
-{
18
-{
76
- return trace_mem_build_info(op & MO_SIZE, !!(op & MO_SIGN),
19
- return int128_getlo(x);
77
- op & MO_BSWAP, store,
78
- mmu_idx);
79
-}
20
-}
80
-
21
-
81
#endif /* TRACE__MEM_H */
22
-static uint32_t int128_getw1(Int128 x)
82
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
23
-{
83
index XXXXXXX..XXXXXXX 100644
24
- return int128_getlo(x) >> 32;
84
--- a/accel/tcg/cputlb.c
25
-}
85
+++ b/accel/tcg/cputlb.c
26
-
86
@@ -XXX,XX +XXX,XX @@ static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
27
static Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
87
int mmu_idx, uintptr_t retaddr,
88
MemOp op, FullLoadHelper *full_load)
89
{
28
{
90
- uint16_t meminfo;
29
- Int128 a, b;
91
- MemOpIdx oi;
30
- uint64_t pp0, pp1a, pp1b, pp1s, pp2;
92
+ MemOpIdx oi = make_memop_idx(op, mmu_idx);
31
+ uint64_t l, h;
93
+ uint16_t meminfo = trace_mem_get_info(oi, false);
32
94
uint64_t ret;
33
- a = int128_make64(ai);
95
34
- b = int128_make64(bi);
96
- meminfo = trace_mem_get_info(op, mmu_idx, false);
35
- pp0 = (uint64_t)int128_getw0(a) * (uint64_t)int128_getw0(b);
97
trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
36
- pp1a = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw0(b);
98
37
- pp1b = (uint64_t)int128_getw1(b) * (uint64_t)int128_getw0(a);
99
- oi = make_memop_idx(op, mmu_idx);
38
- pp2 = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw1(b);
100
ret = full_load(env, addr, oi, retaddr);
39
-
101
40
- pp1s = pp1a + pp1b;
102
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
41
- if ((pp1s < pp1a) || (pp1s < pp1b)) {
103
@@ -XXX,XX +XXX,XX @@ static inline void QEMU_ALWAYS_INLINE
42
- pp2 += (1ULL << 32);
104
cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
43
- }
105
int mmu_idx, uintptr_t retaddr, MemOp op)
44
- uint64_t ret_low = pp0 + (pp1s << 32);
106
{
45
- if ((ret_low < pp0) || (ret_low < (pp1s << 32))) {
107
- MemOpIdx oi;
46
- pp2 += 1;
108
- uint16_t meminfo;
47
- }
109
+ MemOpIdx oi = make_memop_idx(op, mmu_idx);
48
-
110
+ uint16_t meminfo = trace_mem_get_info(oi, true);
49
- return int128_make128(ret_low, pp2 + (pp1s >> 32));
111
50
+ mulu64(&l, &h, ai, bi);
112
- meminfo = trace_mem_get_info(op, mmu_idx, true);
51
+ return int128_make128(l, h);
113
trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
52
}
114
53
115
- oi = make_memop_idx(op, mmu_idx);
54
static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
116
store_helper(env, addr, val, oi, retaddr, op);
117
118
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
119
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/accel/tcg/user-exec.c
122
+++ b/accel/tcg/user-exec.c
123
@@ -XXX,XX +XXX,XX @@ int cpu_signal_handler(int host_signum, void *pinfo,
124
125
uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr ptr)
126
{
127
+ MemOpIdx oi = make_memop_idx(MO_UB, MMU_USER_IDX);
128
+ uint16_t meminfo = trace_mem_get_info(oi, false);
129
uint32_t ret;
130
- uint16_t meminfo = trace_mem_get_info(MO_UB, MMU_USER_IDX, false);
131
132
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
133
ret = ldub_p(g2h(env_cpu(env), ptr));
134
@@ -XXX,XX +XXX,XX @@ int cpu_ldsb_data(CPUArchState *env, abi_ptr ptr)
135
136
uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr)
137
{
138
+ MemOpIdx oi = make_memop_idx(MO_BEUW, MMU_USER_IDX);
139
+ uint16_t meminfo = trace_mem_get_info(oi, false);
140
uint32_t ret;
141
- uint16_t meminfo = trace_mem_get_info(MO_BEUW, MMU_USER_IDX, false);
142
143
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
144
ret = lduw_be_p(g2h(env_cpu(env), ptr));
145
@@ -XXX,XX +XXX,XX @@ int cpu_ldsw_be_data(CPUArchState *env, abi_ptr ptr)
146
147
uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr)
148
{
149
+ MemOpIdx oi = make_memop_idx(MO_BEUL, MMU_USER_IDX);
150
+ uint16_t meminfo = trace_mem_get_info(oi, false);
151
uint32_t ret;
152
- uint16_t meminfo = trace_mem_get_info(MO_BEUL, MMU_USER_IDX, false);
153
154
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
155
ret = ldl_be_p(g2h(env_cpu(env), ptr));
156
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr)
157
158
uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr)
159
{
160
+ MemOpIdx oi = make_memop_idx(MO_BEQ, MMU_USER_IDX);
161
+ uint16_t meminfo = trace_mem_get_info(oi, false);
162
uint64_t ret;
163
- uint16_t meminfo = trace_mem_get_info(MO_BEQ, MMU_USER_IDX, false);
164
165
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
166
ret = ldq_be_p(g2h(env_cpu(env), ptr));
167
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr)
168
169
uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr ptr)
170
{
171
+ MemOpIdx oi = make_memop_idx(MO_LEUW, MMU_USER_IDX);
172
+ uint16_t meminfo = trace_mem_get_info(oi, false);
173
uint32_t ret;
174
- uint16_t meminfo = trace_mem_get_info(MO_LEUW, MMU_USER_IDX, false);
175
176
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
177
ret = lduw_le_p(g2h(env_cpu(env), ptr));
178
@@ -XXX,XX +XXX,XX @@ int cpu_ldsw_le_data(CPUArchState *env, abi_ptr ptr)
179
180
uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr)
181
{
182
+ MemOpIdx oi = make_memop_idx(MO_LEUL, MMU_USER_IDX);
183
+ uint16_t meminfo = trace_mem_get_info(oi, false);
184
uint32_t ret;
185
- uint16_t meminfo = trace_mem_get_info(MO_LEUL, MMU_USER_IDX, false);
186
187
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
188
ret = ldl_le_p(g2h(env_cpu(env), ptr));
189
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr)
190
191
uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr ptr)
192
{
193
+ MemOpIdx oi = make_memop_idx(MO_LEQ, MMU_USER_IDX);
194
+ uint16_t meminfo = trace_mem_get_info(oi, false);
195
uint64_t ret;
196
- uint16_t meminfo = trace_mem_get_info(MO_LEQ, MMU_USER_IDX, false);
197
198
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
199
ret = ldq_le_p(g2h(env_cpu(env), ptr));
200
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
201
202
void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
203
{
204
- uint16_t meminfo = trace_mem_get_info(MO_UB, MMU_USER_IDX, true);
205
+ MemOpIdx oi = make_memop_idx(MO_UB, MMU_USER_IDX);
206
+ uint16_t meminfo = trace_mem_get_info(oi, true);
207
208
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
209
stb_p(g2h(env_cpu(env), ptr), val);
210
@@ -XXX,XX +XXX,XX @@ void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
211
212
void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
213
{
214
- uint16_t meminfo = trace_mem_get_info(MO_BEUW, MMU_USER_IDX, true);
215
+ MemOpIdx oi = make_memop_idx(MO_BEUW, MMU_USER_IDX);
216
+ uint16_t meminfo = trace_mem_get_info(oi, true);
217
218
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
219
stw_be_p(g2h(env_cpu(env), ptr), val);
220
@@ -XXX,XX +XXX,XX @@ void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
221
222
void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
223
{
224
- uint16_t meminfo = trace_mem_get_info(MO_BEUL, MMU_USER_IDX, true);
225
+ MemOpIdx oi = make_memop_idx(MO_BEUL, MMU_USER_IDX);
226
+ uint16_t meminfo = trace_mem_get_info(oi, true);
227
228
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
229
stl_be_p(g2h(env_cpu(env), ptr), val);
230
@@ -XXX,XX +XXX,XX @@ void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
231
232
void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val)
233
{
234
- uint16_t meminfo = trace_mem_get_info(MO_BEQ, MMU_USER_IDX, true);
235
+ MemOpIdx oi = make_memop_idx(MO_BEQ, MMU_USER_IDX);
236
+ uint16_t meminfo = trace_mem_get_info(oi, true);
237
238
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
239
stq_be_p(g2h(env_cpu(env), ptr), val);
240
@@ -XXX,XX +XXX,XX @@ void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val)
241
242
void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
243
{
244
- uint16_t meminfo = trace_mem_get_info(MO_LEUW, MMU_USER_IDX, true);
245
+ MemOpIdx oi = make_memop_idx(MO_LEUW, MMU_USER_IDX);
246
+ uint16_t meminfo = trace_mem_get_info(oi, true);
247
248
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
249
stw_le_p(g2h(env_cpu(env), ptr), val);
250
@@ -XXX,XX +XXX,XX @@ void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
251
252
void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
253
{
254
- uint16_t meminfo = trace_mem_get_info(MO_LEUL, MMU_USER_IDX, true);
255
+ MemOpIdx oi = make_memop_idx(MO_LEUL, MMU_USER_IDX);
256
+ uint16_t meminfo = trace_mem_get_info(oi, true);
257
258
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
259
stl_le_p(g2h(env_cpu(env), ptr), val);
260
@@ -XXX,XX +XXX,XX @@ void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
261
262
void cpu_stq_le_data(CPUArchState *env, abi_ptr ptr, uint64_t val)
263
{
264
- uint16_t meminfo = trace_mem_get_info(MO_LEQ, MMU_USER_IDX, true);
265
+ MemOpIdx oi = make_memop_idx(MO_LEQ, MMU_USER_IDX);
266
+ uint16_t meminfo = trace_mem_get_info(oi, true);
267
268
trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
269
stq_le_p(g2h(env_cpu(env), ptr), val);
270
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
271
index XXXXXXX..XXXXXXX 100644
272
--- a/tcg/tcg-op.c
273
+++ b/tcg/tcg-op.c
274
@@ -XXX,XX +XXX,XX @@ static inline void plugin_gen_mem_callbacks(TCGv vaddr, uint16_t info)
275
void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
276
{
277
MemOp orig_memop;
278
- uint16_t info = trace_mem_get_info(memop, idx, 0);
279
+ uint16_t info = trace_mem_get_info(make_memop_idx(memop, idx), 0);
280
281
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
282
memop = tcg_canonicalize_memop(memop, 0, 0);
283
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
284
void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
285
{
286
TCGv_i32 swap = NULL;
287
- uint16_t info = trace_mem_get_info(memop, idx, 1);
288
+ uint16_t info = trace_mem_get_info(make_memop_idx(memop, idx), 1);
289
290
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
291
memop = tcg_canonicalize_memop(memop, 0, 1);
292
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
293
294
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
295
memop = tcg_canonicalize_memop(memop, 1, 0);
296
- info = trace_mem_get_info(memop, idx, 0);
297
+ info = trace_mem_get_info(make_memop_idx(memop, idx), 0);
298
trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, info);
299
300
orig_memop = memop;
301
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
302
303
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
304
memop = tcg_canonicalize_memop(memop, 1, 1);
305
- info = trace_mem_get_info(memop, idx, 1);
306
+ info = trace_mem_get_info(make_memop_idx(memop, idx), 1);
307
trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, addr, info);
308
309
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
310
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
311
index XXXXXXX..XXXXXXX 100644
312
--- a/accel/tcg/atomic_common.c.inc
313
+++ b/accel/tcg/atomic_common.c.inc
314
@@ -XXX,XX +XXX,XX @@ static uint16_t atomic_trace_rmw_pre(CPUArchState *env, target_ulong addr,
315
MemOpIdx oi)
316
{
317
CPUState *cpu = env_cpu(env);
318
- uint16_t info = trace_mem_get_info(get_memop(oi), get_mmuidx(oi), false);
319
+ uint16_t info = trace_mem_get_info(oi, false);
320
321
trace_guest_mem_before_exec(cpu, addr, info);
322
trace_guest_mem_before_exec(cpu, addr, info | TRACE_MEM_ST);
323
@@ -XXX,XX +XXX,XX @@ static void atomic_trace_rmw_post(CPUArchState *env, target_ulong addr,
324
static uint16_t atomic_trace_ld_pre(CPUArchState *env, target_ulong addr,
325
MemOpIdx oi)
326
{
327
- uint16_t info = trace_mem_get_info(get_memop(oi), get_mmuidx(oi), false);
328
+ uint16_t info = trace_mem_get_info(oi, false);
329
330
trace_guest_mem_before_exec(env_cpu(env), addr, info);
331
332
@@ -XXX,XX +XXX,XX @@ static void atomic_trace_ld_post(CPUArchState *env, target_ulong addr,
333
static uint16_t atomic_trace_st_pre(CPUArchState *env, target_ulong addr,
334
MemOpIdx oi)
335
{
336
- uint16_t info = trace_mem_get_info(get_memop(oi), get_mmuidx(oi), true);
337
+ uint16_t info = trace_mem_get_info(oi, true);
338
339
trace_guest_mem_before_exec(env_cpu(env), addr, info);
340
341
--
55
--
342
2.25.1
56
2.43.0
343
344
diff view generated by jsdifflib
1
The image was upgraded to a full image in ee381b7fe146.
1
Initialize x with accumulated via direct assignment,
2
This makes it possible to use docker-test@image syntax
2
rather than multiplying by 1.
3
with this container.
4
3
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
7
Message-Id: <20210930163636.721311-2-richard.henderson@linaro.org>
8
---
6
---
9
tests/docker/Makefile.include | 2 +-
7
target/hexagon/fma_emu.c | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
8
1 file changed, 1 insertion(+), 1 deletion(-)
11
9
12
diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/tests/docker/Makefile.include
12
--- a/target/hexagon/fma_emu.c
15
+++ b/tests/docker/Makefile.include
13
+++ b/target/hexagon/fma_emu.c
16
@@ -XXX,XX +XXX,XX @@ DOCKER_PARTIAL_IMAGES += debian-riscv64-cross
14
@@ -XXX,XX +XXX,XX @@ float64 internal_mpyhh(float64 a, float64 b,
17
DOCKER_PARTIAL_IMAGES += debian-sh4-cross debian-sparc64-cross
15
float64_is_infinity(b)) {
18
DOCKER_PARTIAL_IMAGES += debian-tricore-cross
16
return float64_mul(a, b, fp_status);
19
DOCKER_PARTIAL_IMAGES += debian-xtensa-cross
17
}
20
-DOCKER_PARTIAL_IMAGES += fedora-i386-cross fedora-cris-cross
18
- x.mant = int128_mul_6464(accumulated, 1);
21
+DOCKER_PARTIAL_IMAGES += fedora-cris-cross
19
+ x.mant = int128_make64(accumulated);
22
20
x.sticky = sticky;
23
# Rules for building linux-user powered images
21
prod = fGETUWORD(1, float64_getmant(a)) * fGETUWORD(1, float64_getmant(b));
24
#
22
x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL));
25
--
23
--
26
2.25.1
24
2.43.0
27
28
diff view generated by jsdifflib
1
We are already inconsistent about whether or not
1
Convert all targets simultaneously, as the gen_intermediate_code
2
MO_SIGN is set in trace_mem_get_info. Dropping it
2
function disappears from the target. While there are possible
3
entirely allows some simplification.
3
workarounds, they're larger than simply performing the conversion.
4
4
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
accel/tcg/cputlb.c | 10 +++-------
8
include/exec/translator.h | 14 --------------
9
accel/tcg/user-exec.c | 45 ++++++-------------------------------------
9
include/hw/core/tcg-cpu-ops.h | 13 +++++++++++++
10
2 files changed, 9 insertions(+), 46 deletions(-)
10
target/alpha/cpu.h | 2 ++
11
target/arm/internals.h | 2 ++
12
target/avr/cpu.h | 2 ++
13
target/hexagon/cpu.h | 2 ++
14
target/hppa/cpu.h | 2 ++
15
target/i386/tcg/helper-tcg.h | 2 ++
16
target/loongarch/internals.h | 2 ++
17
target/m68k/cpu.h | 2 ++
18
target/microblaze/cpu.h | 2 ++
19
target/mips/tcg/tcg-internal.h | 2 ++
20
target/openrisc/cpu.h | 2 ++
21
target/ppc/cpu.h | 2 ++
22
target/riscv/cpu.h | 3 +++
23
target/rx/cpu.h | 2 ++
24
target/s390x/s390x-internal.h | 2 ++
25
target/sh4/cpu.h | 2 ++
26
target/sparc/cpu.h | 2 ++
27
target/tricore/cpu.h | 2 ++
28
target/xtensa/cpu.h | 2 ++
29
accel/tcg/cpu-exec.c | 8 +++++---
30
accel/tcg/translate-all.c | 8 +++++---
31
target/alpha/cpu.c | 1 +
32
target/alpha/translate.c | 4 ++--
33
target/arm/cpu.c | 1 +
34
target/arm/tcg/cpu-v7m.c | 1 +
35
target/arm/tcg/translate.c | 5 ++---
36
target/avr/cpu.c | 1 +
37
target/avr/translate.c | 6 +++---
38
target/hexagon/cpu.c | 1 +
39
target/hexagon/translate.c | 4 ++--
40
target/hppa/cpu.c | 1 +
41
target/hppa/translate.c | 4 ++--
42
target/i386/tcg/tcg-cpu.c | 1 +
43
target/i386/tcg/translate.c | 5 ++---
44
target/loongarch/cpu.c | 1 +
45
target/loongarch/tcg/translate.c | 4 ++--
46
target/m68k/cpu.c | 1 +
47
target/m68k/translate.c | 4 ++--
48
target/microblaze/cpu.c | 1 +
49
target/microblaze/translate.c | 4 ++--
50
target/mips/cpu.c | 1 +
51
target/mips/tcg/translate.c | 4 ++--
52
target/openrisc/cpu.c | 1 +
53
target/openrisc/translate.c | 4 ++--
54
target/ppc/cpu_init.c | 1 +
55
target/ppc/translate.c | 4 ++--
56
target/riscv/tcg/tcg-cpu.c | 1 +
57
target/riscv/translate.c | 4 ++--
58
target/rx/cpu.c | 1 +
59
target/rx/translate.c | 4 ++--
60
target/s390x/cpu.c | 1 +
61
target/s390x/tcg/translate.c | 4 ++--
62
target/sh4/cpu.c | 1 +
63
target/sh4/translate.c | 4 ++--
64
target/sparc/cpu.c | 1 +
65
target/sparc/translate.c | 4 ++--
66
target/tricore/cpu.c | 1 +
67
target/tricore/translate.c | 5 ++---
68
target/xtensa/cpu.c | 1 +
69
target/xtensa/translate.c | 4 ++--
70
62 files changed, 121 insertions(+), 62 deletions(-)
11
71
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
72
diff --git a/include/exec/translator.h b/include/exec/translator.h
13
index XXXXXXX..XXXXXXX 100644
73
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
74
--- a/include/exec/translator.h
15
+++ b/accel/tcg/cputlb.c
75
+++ b/include/exec/translator.h
16
@@ -XXX,XX +XXX,XX @@ static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
76
@@ -XXX,XX +XXX,XX @@
17
meminfo = trace_mem_get_info(op, mmu_idx, false);
77
#include "qemu/bswap.h"
18
trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
78
#include "exec/vaddr.h"
19
79
20
- op &= ~MO_SIGN;
80
-/**
21
oi = make_memop_idx(op, mmu_idx);
81
- * gen_intermediate_code
22
ret = full_load(env, addr, oi, retaddr);
82
- * @cpu: cpu context
23
83
- * @tb: translation block
24
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
84
- * @max_insns: max number of instructions to translate
25
int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
85
- * @pc: guest virtual program counter address
26
int mmu_idx, uintptr_t ra)
86
- * @host_pc: host physical program counter address
27
{
87
- *
28
- return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB,
88
- * This function must be provided by the target, which should create
29
- full_ldub_mmu);
89
- * the target-specific DisasContext, and then invoke translator_loop.
30
+ return (int8_t)cpu_ldub_mmuidx_ra(env, addr, mmu_idx, ra);
90
- */
91
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
92
- vaddr pc, void *host_pc);
93
-
94
/**
95
* DisasJumpType:
96
* @DISAS_NEXT: Next instruction in program order.
97
diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
98
index XXXXXXX..XXXXXXX 100644
99
--- a/include/hw/core/tcg-cpu-ops.h
100
+++ b/include/hw/core/tcg-cpu-ops.h
101
@@ -XXX,XX +XXX,XX @@ struct TCGCPUOps {
102
* Called when the first CPU is realized.
103
*/
104
void (*initialize)(void);
105
+ /**
106
+ * @translate_code: Translate guest instructions to TCGOps
107
+ * @cpu: cpu context
108
+ * @tb: translation block
109
+ * @max_insns: max number of instructions to translate
110
+ * @pc: guest virtual program counter address
111
+ * @host_pc: host physical program counter address
112
+ *
113
+ * This function must be provided by the target, which should create
114
+ * the target-specific DisasContext, and then invoke translator_loop.
115
+ */
116
+ void (*translate_code)(CPUState *cpu, TranslationBlock *tb,
117
+ int *max_insns, vaddr pc, void *host_pc);
118
/**
119
* @synchronize_from_tb: Synchronize state from a TCG #TranslationBlock
120
*
121
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
122
index XXXXXXX..XXXXXXX 100644
123
--- a/target/alpha/cpu.h
124
+++ b/target/alpha/cpu.h
125
@@ -XXX,XX +XXX,XX @@ enum {
126
};
127
128
void alpha_translate_init(void);
129
+void alpha_translate_code(CPUState *cs, TranslationBlock *tb,
130
+ int *max_insns, vaddr pc, void *host_pc);
131
132
#define CPU_RESOLVING_TYPE TYPE_ALPHA_CPU
133
134
diff --git a/target/arm/internals.h b/target/arm/internals.h
135
index XXXXXXX..XXXXXXX 100644
136
--- a/target/arm/internals.h
137
+++ b/target/arm/internals.h
138
@@ -XXX,XX +XXX,XX @@ void init_cpreg_list(ARMCPU *cpu);
139
140
void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
141
void arm_translate_init(void);
142
+void arm_translate_code(CPUState *cs, TranslationBlock *tb,
143
+ int *max_insns, vaddr pc, void *host_pc);
144
145
void arm_cpu_register_gdb_commands(ARMCPU *cpu);
146
void aarch64_cpu_register_gdb_commands(ARMCPU *cpu, GString *,
147
diff --git a/target/avr/cpu.h b/target/avr/cpu.h
148
index XXXXXXX..XXXXXXX 100644
149
--- a/target/avr/cpu.h
150
+++ b/target/avr/cpu.h
151
@@ -XXX,XX +XXX,XX @@ static inline void set_avr_feature(CPUAVRState *env, int feature)
31
}
152
}
32
153
33
uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
154
void avr_cpu_tcg_init(void);
34
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
155
+void avr_cpu_translate_code(CPUState *cs, TranslationBlock *tb,
35
int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
156
+ int *max_insns, vaddr pc, void *host_pc);
36
int mmu_idx, uintptr_t ra)
157
37
{
158
int cpu_avr_exec(CPUState *cpu);
38
- return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW,
159
39
- full_be_lduw_mmu);
160
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
40
+ return (int16_t)cpu_lduw_be_mmuidx_ra(env, addr, mmu_idx, ra);
161
index XXXXXXX..XXXXXXX 100644
162
--- a/target/hexagon/cpu.h
163
+++ b/target/hexagon/cpu.h
164
@@ -XXX,XX +XXX,XX @@ static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc,
165
typedef HexagonCPU ArchCPU;
166
167
void hexagon_translate_init(void);
168
+void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
169
+ int *max_insns, vaddr pc, void *host_pc);
170
171
#include "exec/cpu-all.h"
172
173
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
174
index XXXXXXX..XXXXXXX 100644
175
--- a/target/hppa/cpu.h
176
+++ b/target/hppa/cpu.h
177
@@ -XXX,XX +XXX,XX @@ static inline int HPPA_BTLB_ENTRIES(CPUHPPAState *env)
41
}
178
}
42
179
43
uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
180
void hppa_translate_init(void);
44
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
181
+void hppa_translate_code(CPUState *cs, TranslationBlock *tb,
45
int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
182
+ int *max_insns, vaddr pc, void *host_pc);
46
int mmu_idx, uintptr_t ra)
183
47
{
184
#define CPU_RESOLVING_TYPE TYPE_HPPA_CPU
48
- return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW,
185
49
- full_le_lduw_mmu);
186
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
50
+ return (int16_t)cpu_lduw_le_mmuidx_ra(env, addr, mmu_idx, ra);
187
index XXXXXXX..XXXXXXX 100644
188
--- a/target/i386/tcg/helper-tcg.h
189
+++ b/target/i386/tcg/helper-tcg.h
190
@@ -XXX,XX +XXX,XX @@ static inline target_long lshift(target_long x, int n)
191
192
/* translate.c */
193
void tcg_x86_init(void);
194
+void x86_translate_code(CPUState *cs, TranslationBlock *tb,
195
+ int *max_insns, vaddr pc, void *host_pc);
196
197
/* excp_helper.c */
198
G_NORETURN void raise_exception(CPUX86State *env, int exception_index);
199
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
200
index XXXXXXX..XXXXXXX 100644
201
--- a/target/loongarch/internals.h
202
+++ b/target/loongarch/internals.h
203
@@ -XXX,XX +XXX,XX @@
204
#define TARGET_VIRT_MASK MAKE_64BIT_MASK(0, TARGET_VIRT_ADDR_SPACE_BITS)
205
206
void loongarch_translate_init(void);
207
+void loongarch_translate_code(CPUState *cs, TranslationBlock *tb,
208
+ int *max_insns, vaddr pc, void *host_pc);
209
210
void G_NORETURN do_raise_exception(CPULoongArchState *env,
211
uint32_t exception,
212
diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h
213
index XXXXXXX..XXXXXXX 100644
214
--- a/target/m68k/cpu.h
215
+++ b/target/m68k/cpu.h
216
@@ -XXX,XX +XXX,XX @@ int m68k_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
217
int m68k_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
218
219
void m68k_tcg_init(void);
220
+void m68k_translate_code(CPUState *cs, TranslationBlock *tb,
221
+ int *max_insns, vaddr pc, void *host_pc);
222
void m68k_cpu_init_gdb(M68kCPU *cpu);
223
uint32_t cpu_m68k_get_ccr(CPUM68KState *env);
224
void cpu_m68k_set_ccr(CPUM68KState *env, uint32_t);
225
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
226
index XXXXXXX..XXXXXXX 100644
227
--- a/target/microblaze/cpu.h
228
+++ b/target/microblaze/cpu.h
229
@@ -XXX,XX +XXX,XX @@ static inline void mb_cpu_write_msr(CPUMBState *env, uint32_t val)
51
}
230
}
52
231
53
uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
232
void mb_tcg_init(void);
54
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
233
+void mb_translate_code(CPUState *cs, TranslationBlock *tb,
55
index XXXXXXX..XXXXXXX 100644
234
+ int *max_insns, vaddr pc, void *host_pc);
56
--- a/accel/tcg/user-exec.c
235
57
+++ b/accel/tcg/user-exec.c
236
#define CPU_RESOLVING_TYPE TYPE_MICROBLAZE_CPU
58
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr ptr)
237
59
238
diff --git a/target/mips/tcg/tcg-internal.h b/target/mips/tcg/tcg-internal.h
60
int cpu_ldsb_data(CPUArchState *env, abi_ptr ptr)
239
index XXXXXXX..XXXXXXX 100644
61
{
240
--- a/target/mips/tcg/tcg-internal.h
62
- int ret;
241
+++ b/target/mips/tcg/tcg-internal.h
63
- uint16_t meminfo = trace_mem_get_info(MO_SB, MMU_USER_IDX, false);
242
@@ -XXX,XX +XXX,XX @@
243
#include "cpu.h"
244
245
void mips_tcg_init(void);
246
+void mips_translate_code(CPUState *cs, TranslationBlock *tb,
247
+ int *max_insns, vaddr pc, void *host_pc);
248
249
void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
250
G_NORETURN void mips_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
251
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
252
index XXXXXXX..XXXXXXX 100644
253
--- a/target/openrisc/cpu.h
254
+++ b/target/openrisc/cpu.h
255
@@ -XXX,XX +XXX,XX @@ void openrisc_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
256
int openrisc_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
257
int openrisc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
258
void openrisc_translate_init(void);
259
+void openrisc_translate_code(CPUState *cs, TranslationBlock *tb,
260
+ int *max_insns, vaddr pc, void *host_pc);
261
int print_insn_or1k(bfd_vma addr, disassemble_info *info);
262
263
#ifndef CONFIG_USER_ONLY
264
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
265
index XXXXXXX..XXXXXXX 100644
266
--- a/target/ppc/cpu.h
267
+++ b/target/ppc/cpu.h
268
@@ -XXX,XX +XXX,XX @@ extern const VMStateDescription vmstate_ppc_cpu;
269
270
/*****************************************************************************/
271
void ppc_translate_init(void);
272
+void ppc_translate_code(CPUState *cs, TranslationBlock *tb,
273
+ int *max_insns, vaddr pc, void *host_pc);
274
275
#if !defined(CONFIG_USER_ONLY)
276
void ppc_store_sdr1(CPUPPCState *env, target_ulong value);
277
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
278
index XXXXXXX..XXXXXXX 100644
279
--- a/target/riscv/cpu.h
280
+++ b/target/riscv/cpu.h
281
@@ -XXX,XX +XXX,XX @@ RISCVException smstateen_acc_ok(CPURISCVState *env, int index, uint64_t bit);
282
void riscv_cpu_set_mode(CPURISCVState *env, target_ulong newpriv, bool virt_en);
283
284
void riscv_translate_init(void);
285
+void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
286
+ int *max_insns, vaddr pc, void *host_pc);
287
+
288
G_NORETURN void riscv_raise_exception(CPURISCVState *env,
289
uint32_t exception, uintptr_t pc);
290
291
diff --git a/target/rx/cpu.h b/target/rx/cpu.h
292
index XXXXXXX..XXXXXXX 100644
293
--- a/target/rx/cpu.h
294
+++ b/target/rx/cpu.h
295
@@ -XXX,XX +XXX,XX @@ int rx_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
296
int rx_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
297
298
void rx_translate_init(void);
299
+void rx_translate_code(CPUState *cs, TranslationBlock *tb,
300
+ int *max_insns, vaddr pc, void *host_pc);
301
void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte);
302
303
#include "exec/cpu-all.h"
304
diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h
305
index XXXXXXX..XXXXXXX 100644
306
--- a/target/s390x/s390x-internal.h
307
+++ b/target/s390x/s390x-internal.h
308
@@ -XXX,XX +XXX,XX @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3,
309
310
/* translate.c */
311
void s390x_translate_init(void);
312
+void s390x_translate_code(CPUState *cs, TranslationBlock *tb,
313
+ int *max_insns, vaddr pc, void *host_pc);
314
void s390x_restore_state_to_opc(CPUState *cs,
315
const TranslationBlock *tb,
316
const uint64_t *data);
317
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
318
index XXXXXXX..XXXXXXX 100644
319
--- a/target/sh4/cpu.h
320
+++ b/target/sh4/cpu.h
321
@@ -XXX,XX +XXX,XX @@ G_NORETURN void superh_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
322
uintptr_t retaddr);
323
324
void sh4_translate_init(void);
325
+void sh4_translate_code(CPUState *cs, TranslationBlock *tb,
326
+ int *max_insns, vaddr pc, void *host_pc);
327
328
#if !defined(CONFIG_USER_ONLY)
329
hwaddr superh_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
330
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
331
index XXXXXXX..XXXXXXX 100644
332
--- a/target/sparc/cpu.h
333
+++ b/target/sparc/cpu.h
334
@@ -XXX,XX +XXX,XX @@ int sparc_cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
335
336
/* translate.c */
337
void sparc_tcg_init(void);
338
+void sparc_translate_code(CPUState *cs, TranslationBlock *tb,
339
+ int *max_insns, vaddr pc, void *host_pc);
340
341
/* fop_helper.c */
342
target_ulong cpu_get_fsr(CPUSPARCState *);
343
diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h
344
index XXXXXXX..XXXXXXX 100644
345
--- a/target/tricore/cpu.h
346
+++ b/target/tricore/cpu.h
347
@@ -XXX,XX +XXX,XX @@ FIELD(TB_FLAGS, PRIV, 0, 2)
348
349
void cpu_state_reset(CPUTriCoreState *s);
350
void tricore_tcg_init(void);
351
+void tricore_translate_code(CPUState *cs, TranslationBlock *tb,
352
+ int *max_insns, vaddr pc, void *host_pc);
353
354
static inline void cpu_get_tb_cpu_state(CPUTriCoreState *env, vaddr *pc,
355
uint64_t *cs_base, uint32_t *flags)
356
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
357
index XXXXXXX..XXXXXXX 100644
358
--- a/target/xtensa/cpu.h
359
+++ b/target/xtensa/cpu.h
360
@@ -XXX,XX +XXX,XX @@ G_NORETURN void xtensa_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
361
362
void xtensa_collect_sr_names(const XtensaConfig *config);
363
void xtensa_translate_init(void);
364
+void xtensa_translate_code(CPUState *cs, TranslationBlock *tb,
365
+ int *max_insns, vaddr pc, void *host_pc);
366
void **xtensa_get_regfile_by_name(const char *name, int entries, int bits);
367
void xtensa_breakpoint_handler(CPUState *cs);
368
void xtensa_register_core(XtensaConfigList *node);
369
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
370
index XXXXXXX..XXXXXXX 100644
371
--- a/accel/tcg/cpu-exec.c
372
+++ b/accel/tcg/cpu-exec.c
373
@@ -XXX,XX +XXX,XX @@ bool tcg_exec_realizefn(CPUState *cpu, Error **errp)
374
375
if (!tcg_target_initialized) {
376
/* Check mandatory TCGCPUOps handlers */
377
+ const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
378
#ifndef CONFIG_USER_ONLY
379
- assert(cpu->cc->tcg_ops->cpu_exec_halt);
380
- assert(cpu->cc->tcg_ops->cpu_exec_interrupt);
381
+ assert(tcg_ops->cpu_exec_halt);
382
+ assert(tcg_ops->cpu_exec_interrupt);
383
#endif /* !CONFIG_USER_ONLY */
384
- cpu->cc->tcg_ops->initialize();
385
+ assert(tcg_ops->translate_code);
386
+ tcg_ops->initialize();
387
tcg_target_initialized = true;
388
}
389
390
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
391
index XXXXXXX..XXXXXXX 100644
392
--- a/accel/tcg/translate-all.c
393
+++ b/accel/tcg/translate-all.c
394
@@ -XXX,XX +XXX,XX @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
395
396
tcg_func_start(tcg_ctx);
397
398
- tcg_ctx->cpu = env_cpu(env);
399
- gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
400
+ CPUState *cs = env_cpu(env);
401
+ tcg_ctx->cpu = cs;
402
+ cs->cc->tcg_ops->translate_code(cs, tb, max_insns, pc, host_pc);
403
+
404
assert(tb->size != 0);
405
tcg_ctx->cpu = NULL;
406
*max_insns = tb->icount;
407
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
408
/*
409
* Overflow of code_gen_buffer, or the current slice of it.
410
*
411
- * TODO: We don't need to re-do gen_intermediate_code, nor
412
+ * TODO: We don't need to re-do tcg_ops->translate_code, nor
413
* should we re-do the tcg optimization currently hidden
414
* inside tcg_gen_code. All that should be required is to
415
* flush the TBs, allocate a new TB, re-initialize it per
416
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
417
index XXXXXXX..XXXXXXX 100644
418
--- a/target/alpha/cpu.c
419
+++ b/target/alpha/cpu.c
420
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps alpha_sysemu_ops = {
421
422
static const TCGCPUOps alpha_tcg_ops = {
423
.initialize = alpha_translate_init,
424
+ .translate_code = alpha_translate_code,
425
.synchronize_from_tb = alpha_cpu_synchronize_from_tb,
426
.restore_state_to_opc = alpha_restore_state_to_opc,
427
428
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
429
index XXXXXXX..XXXXXXX 100644
430
--- a/target/alpha/translate.c
431
+++ b/target/alpha/translate.c
432
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps alpha_tr_ops = {
433
.tb_stop = alpha_tr_tb_stop,
434
};
435
436
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
437
- vaddr pc, void *host_pc)
438
+void alpha_translate_code(CPUState *cpu, TranslationBlock *tb,
439
+ int *max_insns, vaddr pc, void *host_pc)
440
{
441
DisasContext dc;
442
translator_loop(cpu, tb, max_insns, pc, host_pc, &alpha_tr_ops, &dc.base);
443
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
444
index XXXXXXX..XXXXXXX 100644
445
--- a/target/arm/cpu.c
446
+++ b/target/arm/cpu.c
447
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps arm_sysemu_ops = {
448
#ifdef CONFIG_TCG
449
static const TCGCPUOps arm_tcg_ops = {
450
.initialize = arm_translate_init,
451
+ .translate_code = arm_translate_code,
452
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
453
.debug_excp_handler = arm_debug_excp_handler,
454
.restore_state_to_opc = arm_restore_state_to_opc,
455
diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c
456
index XXXXXXX..XXXXXXX 100644
457
--- a/target/arm/tcg/cpu-v7m.c
458
+++ b/target/arm/tcg/cpu-v7m.c
459
@@ -XXX,XX +XXX,XX @@ static void cortex_m55_initfn(Object *obj)
460
461
static const TCGCPUOps arm_v7m_tcg_ops = {
462
.initialize = arm_translate_init,
463
+ .translate_code = arm_translate_code,
464
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
465
.debug_excp_handler = arm_debug_excp_handler,
466
.restore_state_to_opc = arm_restore_state_to_opc,
467
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
468
index XXXXXXX..XXXXXXX 100644
469
--- a/target/arm/tcg/translate.c
470
+++ b/target/arm/tcg/translate.c
471
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps thumb_translator_ops = {
472
.tb_stop = arm_tr_tb_stop,
473
};
474
475
-/* generate intermediate code for basic block 'tb'. */
476
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
477
- vaddr pc, void *host_pc)
478
+void arm_translate_code(CPUState *cpu, TranslationBlock *tb,
479
+ int *max_insns, vaddr pc, void *host_pc)
480
{
481
DisasContext dc = { };
482
const TranslatorOps *ops = &arm_translator_ops;
483
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
484
index XXXXXXX..XXXXXXX 100644
485
--- a/target/avr/cpu.c
486
+++ b/target/avr/cpu.c
487
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps avr_sysemu_ops = {
488
489
static const TCGCPUOps avr_tcg_ops = {
490
.initialize = avr_cpu_tcg_init,
491
+ .translate_code = avr_cpu_translate_code,
492
.synchronize_from_tb = avr_cpu_synchronize_from_tb,
493
.restore_state_to_opc = avr_restore_state_to_opc,
494
.cpu_exec_interrupt = avr_cpu_exec_interrupt,
495
diff --git a/target/avr/translate.c b/target/avr/translate.c
496
index XXXXXXX..XXXXXXX 100644
497
--- a/target/avr/translate.c
498
+++ b/target/avr/translate.c
499
@@ -XXX,XX +XXX,XX @@ static bool trans_WDR(DisasContext *ctx, arg_WDR *a)
500
*
501
* - translate()
502
* - canonicalize_skip()
503
- * - gen_intermediate_code()
504
+ * - translate_code()
505
* - restore_state_to_opc()
506
*
507
*/
508
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps avr_tr_ops = {
509
.tb_stop = avr_tr_tb_stop,
510
};
511
512
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
513
- vaddr pc, void *host_pc)
514
+void avr_cpu_translate_code(CPUState *cs, TranslationBlock *tb,
515
+ int *max_insns, vaddr pc, void *host_pc)
516
{
517
DisasContext dc = { };
518
translator_loop(cs, tb, max_insns, pc, host_pc, &avr_tr_ops, &dc.base);
519
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
520
index XXXXXXX..XXXXXXX 100644
521
--- a/target/hexagon/cpu.c
522
+++ b/target/hexagon/cpu.c
523
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_init(Object *obj)
524
525
static const TCGCPUOps hexagon_tcg_ops = {
526
.initialize = hexagon_translate_init,
527
+ .translate_code = hexagon_translate_code,
528
.synchronize_from_tb = hexagon_cpu_synchronize_from_tb,
529
.restore_state_to_opc = hexagon_restore_state_to_opc,
530
};
531
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
532
index XXXXXXX..XXXXXXX 100644
533
--- a/target/hexagon/translate.c
534
+++ b/target/hexagon/translate.c
535
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hexagon_tr_ops = {
536
.tb_stop = hexagon_tr_tb_stop,
537
};
538
539
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
540
- vaddr pc, void *host_pc)
541
+void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
542
+ int *max_insns, vaddr pc, void *host_pc)
543
{
544
DisasContext ctx;
545
546
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
547
index XXXXXXX..XXXXXXX 100644
548
--- a/target/hppa/cpu.c
549
+++ b/target/hppa/cpu.c
550
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps hppa_sysemu_ops = {
551
552
static const TCGCPUOps hppa_tcg_ops = {
553
.initialize = hppa_translate_init,
554
+ .translate_code = hppa_translate_code,
555
.synchronize_from_tb = hppa_cpu_synchronize_from_tb,
556
.restore_state_to_opc = hppa_restore_state_to_opc,
557
558
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
559
index XXXXXXX..XXXXXXX 100644
560
--- a/target/hppa/translate.c
561
+++ b/target/hppa/translate.c
562
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hppa_tr_ops = {
563
#endif
564
};
565
566
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
567
- vaddr pc, void *host_pc)
568
+void hppa_translate_code(CPUState *cs, TranslationBlock *tb,
569
+ int *max_insns, vaddr pc, void *host_pc)
570
{
571
DisasContext ctx = { };
572
translator_loop(cs, tb, max_insns, pc, host_pc, &hppa_tr_ops, &ctx.base);
573
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
574
index XXXXXXX..XXXXXXX 100644
575
--- a/target/i386/tcg/tcg-cpu.c
576
+++ b/target/i386/tcg/tcg-cpu.c
577
@@ -XXX,XX +XXX,XX @@ static bool x86_debug_check_breakpoint(CPUState *cs)
578
579
static const TCGCPUOps x86_tcg_ops = {
580
.initialize = tcg_x86_init,
581
+ .translate_code = x86_translate_code,
582
.synchronize_from_tb = x86_cpu_synchronize_from_tb,
583
.restore_state_to_opc = x86_restore_state_to_opc,
584
.cpu_exec_enter = x86_cpu_exec_enter,
585
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
586
index XXXXXXX..XXXXXXX 100644
587
--- a/target/i386/tcg/translate.c
588
+++ b/target/i386/tcg/translate.c
589
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps i386_tr_ops = {
590
.tb_stop = i386_tr_tb_stop,
591
};
592
593
-/* generate intermediate code for basic block 'tb'. */
594
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
595
- vaddr pc, void *host_pc)
596
+void x86_translate_code(CPUState *cpu, TranslationBlock *tb,
597
+ int *max_insns, vaddr pc, void *host_pc)
598
{
599
DisasContext dc;
600
601
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
602
index XXXXXXX..XXXXXXX 100644
603
--- a/target/loongarch/cpu.c
604
+++ b/target/loongarch/cpu.c
605
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags)
606
607
static const TCGCPUOps loongarch_tcg_ops = {
608
.initialize = loongarch_translate_init,
609
+ .translate_code = loongarch_translate_code,
610
.synchronize_from_tb = loongarch_cpu_synchronize_from_tb,
611
.restore_state_to_opc = loongarch_restore_state_to_opc,
612
613
diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c
614
index XXXXXXX..XXXXXXX 100644
615
--- a/target/loongarch/tcg/translate.c
616
+++ b/target/loongarch/tcg/translate.c
617
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps loongarch_tr_ops = {
618
.tb_stop = loongarch_tr_tb_stop,
619
};
620
621
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
622
- vaddr pc, void *host_pc)
623
+void loongarch_translate_code(CPUState *cs, TranslationBlock *tb,
624
+ int *max_insns, vaddr pc, void *host_pc)
625
{
626
DisasContext ctx;
627
628
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
629
index XXXXXXX..XXXXXXX 100644
630
--- a/target/m68k/cpu.c
631
+++ b/target/m68k/cpu.c
632
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps m68k_sysemu_ops = {
633
634
static const TCGCPUOps m68k_tcg_ops = {
635
.initialize = m68k_tcg_init,
636
+ .translate_code = m68k_translate_code,
637
.restore_state_to_opc = m68k_restore_state_to_opc,
638
639
#ifndef CONFIG_USER_ONLY
640
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
641
index XXXXXXX..XXXXXXX 100644
642
--- a/target/m68k/translate.c
643
+++ b/target/m68k/translate.c
644
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps m68k_tr_ops = {
645
.tb_stop = m68k_tr_tb_stop,
646
};
647
648
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
649
- vaddr pc, void *host_pc)
650
+void m68k_translate_code(CPUState *cpu, TranslationBlock *tb,
651
+ int *max_insns, vaddr pc, void *host_pc)
652
{
653
DisasContext dc;
654
translator_loop(cpu, tb, max_insns, pc, host_pc, &m68k_tr_ops, &dc.base);
655
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
656
index XXXXXXX..XXXXXXX 100644
657
--- a/target/microblaze/cpu.c
658
+++ b/target/microblaze/cpu.c
659
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps mb_sysemu_ops = {
660
661
static const TCGCPUOps mb_tcg_ops = {
662
.initialize = mb_tcg_init,
663
+ .translate_code = mb_translate_code,
664
.synchronize_from_tb = mb_cpu_synchronize_from_tb,
665
.restore_state_to_opc = mb_restore_state_to_opc,
666
667
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
668
index XXXXXXX..XXXXXXX 100644
669
--- a/target/microblaze/translate.c
670
+++ b/target/microblaze/translate.c
671
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mb_tr_ops = {
672
.tb_stop = mb_tr_tb_stop,
673
};
674
675
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
676
- vaddr pc, void *host_pc)
677
+void mb_translate_code(CPUState *cpu, TranslationBlock *tb,
678
+ int *max_insns, vaddr pc, void *host_pc)
679
{
680
DisasContext dc;
681
translator_loop(cpu, tb, max_insns, pc, host_pc, &mb_tr_ops, &dc.base);
682
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
683
index XXXXXXX..XXXXXXX 100644
684
--- a/target/mips/cpu.c
685
+++ b/target/mips/cpu.c
686
@@ -XXX,XX +XXX,XX @@ static const Property mips_cpu_properties[] = {
687
#include "hw/core/tcg-cpu-ops.h"
688
static const TCGCPUOps mips_tcg_ops = {
689
.initialize = mips_tcg_init,
690
+ .translate_code = mips_translate_code,
691
.synchronize_from_tb = mips_cpu_synchronize_from_tb,
692
.restore_state_to_opc = mips_restore_state_to_opc,
693
694
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
695
index XXXXXXX..XXXXXXX 100644
696
--- a/target/mips/tcg/translate.c
697
+++ b/target/mips/tcg/translate.c
698
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mips_tr_ops = {
699
.tb_stop = mips_tr_tb_stop,
700
};
701
702
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
703
- vaddr pc, void *host_pc)
704
+void mips_translate_code(CPUState *cs, TranslationBlock *tb,
705
+ int *max_insns, vaddr pc, void *host_pc)
706
{
707
DisasContext ctx;
708
709
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
710
index XXXXXXX..XXXXXXX 100644
711
--- a/target/openrisc/cpu.c
712
+++ b/target/openrisc/cpu.c
713
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps openrisc_sysemu_ops = {
714
715
static const TCGCPUOps openrisc_tcg_ops = {
716
.initialize = openrisc_translate_init,
717
+ .translate_code = openrisc_translate_code,
718
.synchronize_from_tb = openrisc_cpu_synchronize_from_tb,
719
.restore_state_to_opc = openrisc_restore_state_to_opc,
720
721
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
722
index XXXXXXX..XXXXXXX 100644
723
--- a/target/openrisc/translate.c
724
+++ b/target/openrisc/translate.c
725
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps openrisc_tr_ops = {
726
.tb_stop = openrisc_tr_tb_stop,
727
};
728
729
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
730
- vaddr pc, void *host_pc)
731
+void openrisc_translate_code(CPUState *cs, TranslationBlock *tb,
732
+ int *max_insns, vaddr pc, void *host_pc)
733
{
734
DisasContext ctx;
735
736
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
737
index XXXXXXX..XXXXXXX 100644
738
--- a/target/ppc/cpu_init.c
739
+++ b/target/ppc/cpu_init.c
740
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps ppc_sysemu_ops = {
741
742
static const TCGCPUOps ppc_tcg_ops = {
743
.initialize = ppc_translate_init,
744
+ .translate_code = ppc_translate_code,
745
.restore_state_to_opc = ppc_restore_state_to_opc,
746
747
#ifdef CONFIG_USER_ONLY
748
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
749
index XXXXXXX..XXXXXXX 100644
750
--- a/target/ppc/translate.c
751
+++ b/target/ppc/translate.c
752
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps ppc_tr_ops = {
753
.tb_stop = ppc_tr_tb_stop,
754
};
755
756
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
757
- vaddr pc, void *host_pc)
758
+void ppc_translate_code(CPUState *cs, TranslationBlock *tb,
759
+ int *max_insns, vaddr pc, void *host_pc)
760
{
761
DisasContext ctx;
762
763
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
764
index XXXXXXX..XXXXXXX 100644
765
--- a/target/riscv/tcg/tcg-cpu.c
766
+++ b/target/riscv/tcg/tcg-cpu.c
767
@@ -XXX,XX +XXX,XX @@ static void riscv_restore_state_to_opc(CPUState *cs,
768
769
static const TCGCPUOps riscv_tcg_ops = {
770
.initialize = riscv_translate_init,
771
+ .translate_code = riscv_translate_code,
772
.synchronize_from_tb = riscv_cpu_synchronize_from_tb,
773
.restore_state_to_opc = riscv_restore_state_to_opc,
774
775
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
776
index XXXXXXX..XXXXXXX 100644
777
--- a/target/riscv/translate.c
778
+++ b/target/riscv/translate.c
779
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps riscv_tr_ops = {
780
.tb_stop = riscv_tr_tb_stop,
781
};
782
783
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
784
- vaddr pc, void *host_pc)
785
+void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
786
+ int *max_insns, vaddr pc, void *host_pc)
787
{
788
DisasContext ctx;
789
790
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
791
index XXXXXXX..XXXXXXX 100644
792
--- a/target/rx/cpu.c
793
+++ b/target/rx/cpu.c
794
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps rx_sysemu_ops = {
795
796
static const TCGCPUOps rx_tcg_ops = {
797
.initialize = rx_translate_init,
798
+ .translate_code = rx_translate_code,
799
.synchronize_from_tb = rx_cpu_synchronize_from_tb,
800
.restore_state_to_opc = rx_restore_state_to_opc,
801
.tlb_fill = rx_cpu_tlb_fill,
802
diff --git a/target/rx/translate.c b/target/rx/translate.c
803
index XXXXXXX..XXXXXXX 100644
804
--- a/target/rx/translate.c
805
+++ b/target/rx/translate.c
806
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps rx_tr_ops = {
807
.tb_stop = rx_tr_tb_stop,
808
};
809
810
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
811
- vaddr pc, void *host_pc)
812
+void rx_translate_code(CPUState *cs, TranslationBlock *tb,
813
+ int *max_insns, vaddr pc, void *host_pc)
814
{
815
DisasContext dc;
816
817
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
818
index XXXXXXX..XXXXXXX 100644
819
--- a/target/s390x/cpu.c
820
+++ b/target/s390x/cpu.c
821
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc,
822
823
static const TCGCPUOps s390_tcg_ops = {
824
.initialize = s390x_translate_init,
825
+ .translate_code = s390x_translate_code,
826
.restore_state_to_opc = s390x_restore_state_to_opc,
827
828
#ifdef CONFIG_USER_ONLY
829
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
830
index XXXXXXX..XXXXXXX 100644
831
--- a/target/s390x/tcg/translate.c
832
+++ b/target/s390x/tcg/translate.c
833
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps s390x_tr_ops = {
834
.disas_log = s390x_tr_disas_log,
835
};
836
837
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
838
- vaddr pc, void *host_pc)
839
+void s390x_translate_code(CPUState *cs, TranslationBlock *tb,
840
+ int *max_insns, vaddr pc, void *host_pc)
841
{
842
DisasContext dc;
843
844
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
845
index XXXXXXX..XXXXXXX 100644
846
--- a/target/sh4/cpu.c
847
+++ b/target/sh4/cpu.c
848
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps sh4_sysemu_ops = {
849
850
static const TCGCPUOps superh_tcg_ops = {
851
.initialize = sh4_translate_init,
852
+ .translate_code = sh4_translate_code,
853
.synchronize_from_tb = superh_cpu_synchronize_from_tb,
854
.restore_state_to_opc = superh_restore_state_to_opc,
855
856
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
857
index XXXXXXX..XXXXXXX 100644
858
--- a/target/sh4/translate.c
859
+++ b/target/sh4/translate.c
860
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sh4_tr_ops = {
861
.tb_stop = sh4_tr_tb_stop,
862
};
863
864
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
865
- vaddr pc, void *host_pc)
866
+void sh4_translate_code(CPUState *cs, TranslationBlock *tb,
867
+ int *max_insns, vaddr pc, void *host_pc)
868
{
869
DisasContext ctx;
870
871
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
872
index XXXXXXX..XXXXXXX 100644
873
--- a/target/sparc/cpu.c
874
+++ b/target/sparc/cpu.c
875
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps sparc_sysemu_ops = {
876
877
static const TCGCPUOps sparc_tcg_ops = {
878
.initialize = sparc_tcg_init,
879
+ .translate_code = sparc_translate_code,
880
.synchronize_from_tb = sparc_cpu_synchronize_from_tb,
881
.restore_state_to_opc = sparc_restore_state_to_opc,
882
883
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
884
index XXXXXXX..XXXXXXX 100644
885
--- a/target/sparc/translate.c
886
+++ b/target/sparc/translate.c
887
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sparc_tr_ops = {
888
.tb_stop = sparc_tr_tb_stop,
889
};
890
891
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
892
- vaddr pc, void *host_pc)
893
+void sparc_translate_code(CPUState *cs, TranslationBlock *tb,
894
+ int *max_insns, vaddr pc, void *host_pc)
895
{
896
DisasContext dc = {};
897
898
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
899
index XXXXXXX..XXXXXXX 100644
900
--- a/target/tricore/cpu.c
901
+++ b/target/tricore/cpu.c
902
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps tricore_sysemu_ops = {
903
904
static const TCGCPUOps tricore_tcg_ops = {
905
.initialize = tricore_tcg_init,
906
+ .translate_code = tricore_translate_code,
907
.synchronize_from_tb = tricore_cpu_synchronize_from_tb,
908
.restore_state_to_opc = tricore_restore_state_to_opc,
909
.tlb_fill = tricore_cpu_tlb_fill,
910
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
911
index XXXXXXX..XXXXXXX 100644
912
--- a/target/tricore/translate.c
913
+++ b/target/tricore/translate.c
914
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps tricore_tr_ops = {
915
.tb_stop = tricore_tr_tb_stop,
916
};
917
64
-
918
-
65
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
919
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
66
- ret = ldsb_p(g2h(env_cpu(env), ptr));
920
- vaddr pc, void *host_pc)
67
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
921
+void tricore_translate_code(CPUState *cs, TranslationBlock *tb,
68
- return ret;
922
+ int *max_insns, vaddr pc, void *host_pc)
69
+ return (int8_t)cpu_ldub_data(env, ptr);
923
{
70
}
924
DisasContext ctx;
71
925
translator_loop(cs, tb, max_insns, pc, host_pc,
72
uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr)
926
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
73
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr)
927
index XXXXXXX..XXXXXXX 100644
74
928
--- a/target/xtensa/cpu.c
75
int cpu_ldsw_be_data(CPUArchState *env, abi_ptr ptr)
929
+++ b/target/xtensa/cpu.c
76
{
930
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps xtensa_sysemu_ops = {
77
- int ret;
931
78
- uint16_t meminfo = trace_mem_get_info(MO_BESW, MMU_USER_IDX, false);
932
static const TCGCPUOps xtensa_tcg_ops = {
79
-
933
.initialize = xtensa_translate_init,
80
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
934
+ .translate_code = xtensa_translate_code,
81
- ret = ldsw_be_p(g2h(env_cpu(env), ptr));
935
.debug_excp_handler = xtensa_breakpoint_handler,
82
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
936
.restore_state_to_opc = xtensa_restore_state_to_opc,
83
- return ret;
937
84
+ return (int16_t)cpu_lduw_be_data(env, ptr);
938
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
85
}
939
index XXXXXXX..XXXXXXX 100644
86
940
--- a/target/xtensa/translate.c
87
uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr)
941
+++ b/target/xtensa/translate.c
88
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr ptr)
942
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps xtensa_translator_ops = {
89
943
.tb_stop = xtensa_tr_tb_stop,
90
int cpu_ldsw_le_data(CPUArchState *env, abi_ptr ptr)
944
};
91
{
945
92
- int ret;
946
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
93
- uint16_t meminfo = trace_mem_get_info(MO_LESW, MMU_USER_IDX, false);
947
- vaddr pc, void *host_pc)
94
-
948
+void xtensa_translate_code(CPUState *cpu, TranslationBlock *tb,
95
- trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
949
+ int *max_insns, vaddr pc, void *host_pc)
96
- ret = ldsw_le_p(g2h(env_cpu(env), ptr));
950
{
97
- qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
951
DisasContext dc = {};
98
- return ret;
952
translator_loop(cpu, tb, max_insns, pc, host_pc,
99
+ return (int16_t)cpu_lduw_le_data(env, ptr);
100
}
101
102
uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr)
103
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
104
105
int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
106
{
107
- int ret;
108
-
109
- set_helper_retaddr(retaddr);
110
- ret = cpu_ldsb_data(env, ptr);
111
- clear_helper_retaddr();
112
- return ret;
113
+ return (int8_t)cpu_ldub_data_ra(env, ptr, retaddr);
114
}
115
116
uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
117
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
118
119
int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
120
{
121
- int ret;
122
-
123
- set_helper_retaddr(retaddr);
124
- ret = cpu_ldsw_be_data(env, ptr);
125
- clear_helper_retaddr();
126
- return ret;
127
+ return (int16_t)cpu_lduw_be_data_ra(env, ptr, retaddr);
128
}
129
130
uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
131
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
132
133
int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
134
{
135
- int ret;
136
-
137
- set_helper_retaddr(retaddr);
138
- ret = cpu_ldsw_le_data(env, ptr);
139
- clear_helper_retaddr();
140
- return ret;
141
+ return (int16_t)cpu_lduw_le_data_ra(env, ptr, retaddr);
142
}
143
144
uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
145
--
953
--
146
2.25.1
954
2.43.0
147
955
148
956
diff view generated by jsdifflib