1
TCG patch queue, plus one target/sh4 patch that
1
The following changes since commit aa3a285b5bc56a4208b3b57d4a55291e9c260107:
2
Yoshinori Sato asked me to process.
3
2
4
3
Merge tag 'mem-2024-12-21' of https://github.com/davidhildenbrand/qemu into staging (2024-12-22 14:33:27 -0500)
5
r~
6
7
8
The following changes since commit efbf38d73e5dcc4d5f8b98c6e7a12be1f3b91745:
9
10
Merge tag 'for-upstream' of git://repo.or.cz/qemu/kevin into staging (2022-10-03 15:06:07 -0400)
11
4
12
are available in the Git repository at:
5
are available in the Git repository at:
13
6
14
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20221004
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20241224
15
8
16
for you to fetch changes up to ab419fd8a035a65942de4e63effcd55ccbf1a9fe:
9
for you to fetch changes up to e4a8e093dc74be049f4829831dce76e5edab0003:
17
10
18
target/sh4: Fix TB_FLAG_UNALIGN (2022-10-04 12:33:05 -0700)
11
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core (2024-12-24 08:32:15 -0800)
19
12
20
----------------------------------------------------------------
13
----------------------------------------------------------------
21
Cache CPUClass for use in hot code paths.
14
tcg/optimize: Remove in-flight mask data from OptContext
22
Add CPUTLBEntryFull, probe_access_full, tlb_set_page_full.
15
fpu: Add float*_muladd_scalbn
23
Add generic support for TARGET_TB_PCREL.
16
fpu: Remove float_muladd_halve_result
24
tcg/ppc: Optimize 26-bit jumps using STQ for POWER 2.07
17
fpu: Add float_round_nearest_even_max
25
target/sh4: Fix TB_FLAG_UNALIGN
18
fpu: Add float_muladd_suppress_add_product_zero
19
target/hexagon: Use float32_muladd
20
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core
26
21
27
----------------------------------------------------------------
22
----------------------------------------------------------------
28
Alex Bennée (3):
23
Ilya Leoshkevich (1):
29
cpu: cache CPUClass in CPUState for hot code paths
24
tests/tcg: Do not use inttypes.h in multiarch/system/memory.c
30
hw/core/cpu-sysemu: used cached class in cpu_asidx_from_attrs
31
cputlb: used cached CPUClass in our hot-paths
32
25
33
Leandro Lupori (1):
26
Pierrick Bouvier (1):
34
tcg/ppc: Optimize 26-bit jumps
27
plugins: optimize cpu_index code generation
35
28
36
Richard Henderson (16):
29
Richard Henderson (70):
37
accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull
30
tcg/optimize: Split out finish_bb, finish_ebb
38
accel/tcg: Drop addr member from SavedIOTLB
31
tcg/optimize: Split out fold_affected_mask
39
accel/tcg: Suppress auto-invalidate in probe_access_internal
32
tcg/optimize: Copy mask writeback to fold_masks
40
accel/tcg: Introduce probe_access_full
33
tcg/optimize: Split out fold_masks_zs
41
accel/tcg: Introduce tlb_set_page_full
34
tcg/optimize: Augment s_mask from z_mask in fold_masks_zs
42
include/exec: Introduce TARGET_PAGE_ENTRY_EXTRA
35
tcg/optimize: Change representation of s_mask
43
accel/tcg: Remove PageDesc code_bitmap
36
tcg/optimize: Use finish_folding in fold_add, fold_add_vec, fold_addsub2
44
accel/tcg: Use bool for page_find_alloc
37
tcg/optimize: Introduce const value accessors for TempOptInfo
45
accel/tcg: Use DisasContextBase in plugin_gen_tb_start
38
tcg/optimize: Use fold_masks_zs in fold_and
46
accel/tcg: Do not align tb->page_addr[0]
39
tcg/optimize: Use fold_masks_zs in fold_andc
47
accel/tcg: Inline tb_flush_jmp_cache
40
tcg/optimize: Use fold_masks_zs in fold_bswap
48
include/hw/core: Create struct CPUJumpCache
41
tcg/optimize: Use fold_masks_zs in fold_count_zeros
49
hw/core: Add CPUClass.get_pc
42
tcg/optimize: Use fold_masks_z in fold_ctpop
50
accel/tcg: Introduce tb_pc and log_pc
43
tcg/optimize: Use fold_and and fold_masks_z in fold_deposit
51
accel/tcg: Introduce TARGET_TB_PCREL
44
tcg/optimize: Compute sign mask in fold_deposit
52
target/sh4: Fix TB_FLAG_UNALIGN
45
tcg/optimize: Use finish_folding in fold_divide
46
tcg/optimize: Use finish_folding in fold_dup, fold_dup2
47
tcg/optimize: Use fold_masks_s in fold_eqv
48
tcg/optimize: Use fold_masks_z in fold_extract
49
tcg/optimize: Use finish_folding in fold_extract2
50
tcg/optimize: Use fold_masks_zs in fold_exts
51
tcg/optimize: Use fold_masks_z in fold_extu
52
tcg/optimize: Use fold_masks_zs in fold_movcond
53
tcg/optimize: Use finish_folding in fold_mul*
54
tcg/optimize: Use fold_masks_s in fold_nand
55
tcg/optimize: Use fold_masks_z in fold_neg_no_const
56
tcg/optimize: Use fold_masks_s in fold_nor
57
tcg/optimize: Use fold_masks_s in fold_not
58
tcg/optimize: Use fold_masks_zs in fold_or
59
tcg/optimize: Use fold_masks_zs in fold_orc
60
tcg/optimize: Use fold_masks_zs in fold_qemu_ld
61
tcg/optimize: Return true from fold_qemu_st, fold_tcg_st
62
tcg/optimize: Use finish_folding in fold_remainder
63
tcg/optimize: Distinguish simplification in fold_setcond_zmask
64
tcg/optimize: Use fold_masks_z in fold_setcond
65
tcg/optimize: Use fold_masks_s in fold_negsetcond
66
tcg/optimize: Use fold_masks_z in fold_setcond2
67
tcg/optimize: Use finish_folding in fold_cmp_vec
68
tcg/optimize: Use finish_folding in fold_cmpsel_vec
69
tcg/optimize: Use fold_masks_zs in fold_sextract
70
tcg/optimize: Use fold_masks_zs, fold_masks_s in fold_shift
71
tcg/optimize: Simplify sign bit test in fold_shift
72
tcg/optimize: Use finish_folding in fold_sub, fold_sub_vec
73
tcg/optimize: Use fold_masks_zs in fold_tcg_ld
74
tcg/optimize: Use finish_folding in fold_tcg_ld_memcopy
75
tcg/optimize: Use fold_masks_zs in fold_xor
76
tcg/optimize: Use finish_folding in fold_bitsel_vec
77
tcg/optimize: Use finish_folding as default in tcg_optimize
78
tcg/optimize: Remove z_mask, s_mask from OptContext
79
tcg/optimize: Re-enable sign-mask optimizations
80
tcg/optimize: Move fold_bitsel_vec into alphabetic sort
81
tcg/optimize: Move fold_cmp_vec, fold_cmpsel_vec into alphabetic sort
82
softfloat: Add float{16,32,64}_muladd_scalbn
83
target/arm: Use float*_muladd_scalbn
84
target/sparc: Use float*_muladd_scalbn
85
softfloat: Remove float_muladd_halve_result
86
softfloat: Add float_round_nearest_even_max
87
softfloat: Add float_muladd_suppress_add_product_zero
88
target/hexagon: Use float32_mul in helper_sfmpy
89
target/hexagon: Use float32_muladd for helper_sffma
90
target/hexagon: Use float32_muladd for helper_sffms
91
target/hexagon: Use float32_muladd_scalbn for helper_sffma_sc
92
target/hexagon: Use float32_muladd for helper_sffm[as]_lib
93
target/hexagon: Remove internal_fmafx
94
target/hexagon: Expand GEN_XF_ROUND
95
target/hexagon: Remove Float
96
target/hexagon: Remove Double
97
target/hexagon: Use mulu64 for int128_mul_6464
98
target/hexagon: Simplify internal_mpyhh setup
99
accel/tcg: Move gen_intermediate_code to TCGCPUOps.translate_core
53
100
54
accel/tcg/internal.h | 10 ++
101
include/exec/translator.h | 14 -
55
accel/tcg/tb-hash.h | 1 +
102
include/fpu/softfloat-types.h | 2 +
56
accel/tcg/tb-jmp-cache.h | 65 ++++++++
103
include/fpu/softfloat.h | 14 +-
57
include/exec/cpu-common.h | 1 +
104
include/hw/core/tcg-cpu-ops.h | 13 +
58
include/exec/cpu-defs.h | 48 ++++--
105
target/alpha/cpu.h | 2 +
59
include/exec/exec-all.h | 75 ++++++++-
106
target/arm/internals.h | 2 +
60
include/exec/plugin-gen.h | 7 +-
107
target/avr/cpu.h | 2 +
61
include/hw/core/cpu.h | 28 ++--
108
target/hexagon/cpu.h | 2 +
62
include/qemu/typedefs.h | 2 +
109
target/hexagon/fma_emu.h | 3 -
63
include/tcg/tcg.h | 2 +-
110
target/hppa/cpu.h | 2 +
64
target/sh4/cpu.h | 56 ++++---
111
target/i386/tcg/helper-tcg.h | 2 +
65
accel/stubs/tcg-stub.c | 4 +
112
target/loongarch/internals.h | 2 +
66
accel/tcg/cpu-exec.c | 80 +++++-----
113
target/m68k/cpu.h | 2 +
67
accel/tcg/cputlb.c | 259 ++++++++++++++++++--------------
114
target/microblaze/cpu.h | 2 +
68
accel/tcg/plugin-gen.c | 22 +--
115
target/mips/tcg/tcg-internal.h | 2 +
69
accel/tcg/translate-all.c | 214 ++++++++++++--------------
116
target/openrisc/cpu.h | 2 +
70
accel/tcg/translator.c | 2 +-
117
target/ppc/cpu.h | 2 +
71
cpu.c | 9 +-
118
target/riscv/cpu.h | 3 +
72
hw/core/cpu-common.c | 3 +-
119
target/rx/cpu.h | 2 +
73
hw/core/cpu-sysemu.c | 5 +-
120
target/s390x/s390x-internal.h | 2 +
74
linux-user/sh4/signal.c | 6 +-
121
target/sh4/cpu.h | 2 +
75
plugins/core.c | 2 +-
122
target/sparc/cpu.h | 2 +
76
target/alpha/cpu.c | 9 ++
123
target/sparc/helper.h | 4 +-
77
target/arm/cpu.c | 17 ++-
124
target/tricore/cpu.h | 2 +
78
target/arm/mte_helper.c | 14 +-
125
target/xtensa/cpu.h | 2 +
79
target/arm/sve_helper.c | 4 +-
126
accel/tcg/cpu-exec.c | 8 +-
80
target/arm/translate-a64.c | 2 +-
127
accel/tcg/plugin-gen.c | 9 +
81
target/avr/cpu.c | 10 +-
128
accel/tcg/translate-all.c | 8 +-
82
target/cris/cpu.c | 8 +
129
fpu/softfloat.c | 63 +--
83
target/hexagon/cpu.c | 10 +-
130
target/alpha/cpu.c | 1 +
84
target/hppa/cpu.c | 12 +-
131
target/alpha/translate.c | 4 +-
85
target/i386/cpu.c | 9 ++
132
target/arm/cpu.c | 1 +
86
target/i386/tcg/tcg-cpu.c | 2 +-
133
target/arm/tcg/cpu-v7m.c | 1 +
87
target/loongarch/cpu.c | 11 +-
134
target/arm/tcg/helper-a64.c | 6 +-
88
target/m68k/cpu.c | 8 +
135
target/arm/tcg/translate.c | 5 +-
89
target/microblaze/cpu.c | 10 +-
136
target/avr/cpu.c | 1 +
90
target/mips/cpu.c | 8 +
137
target/avr/translate.c | 6 +-
91
target/mips/tcg/exception.c | 2 +-
138
target/hexagon/cpu.c | 1 +
92
target/mips/tcg/sysemu/special_helper.c | 2 +-
139
target/hexagon/fma_emu.c | 496 ++++++---------------
93
target/nios2/cpu.c | 9 ++
140
target/hexagon/op_helper.c | 125 ++----
94
target/openrisc/cpu.c | 10 +-
141
target/hexagon/translate.c | 4 +-
95
target/ppc/cpu_init.c | 8 +
142
target/hppa/cpu.c | 1 +
96
target/riscv/cpu.c | 17 ++-
143
target/hppa/translate.c | 4 +-
97
target/rx/cpu.c | 10 +-
144
target/i386/tcg/tcg-cpu.c | 1 +
98
target/s390x/cpu.c | 8 +
145
target/i386/tcg/translate.c | 5 +-
99
target/s390x/tcg/mem_helper.c | 4 -
146
target/loongarch/cpu.c | 1 +
100
target/sh4/cpu.c | 18 ++-
147
target/loongarch/tcg/translate.c | 4 +-
101
target/sh4/helper.c | 6 +-
148
target/m68k/cpu.c | 1 +
102
target/sh4/translate.c | 90 +++++------
149
target/m68k/translate.c | 4 +-
103
target/sparc/cpu.c | 10 +-
150
target/microblaze/cpu.c | 1 +
104
target/tricore/cpu.c | 11 +-
151
target/microblaze/translate.c | 4 +-
105
target/xtensa/cpu.c | 8 +
152
target/mips/cpu.c | 1 +
106
tcg/tcg.c | 8 +-
153
target/mips/tcg/translate.c | 4 +-
107
trace/control-target.c | 2 +-
154
target/openrisc/cpu.c | 1 +
108
tcg/ppc/tcg-target.c.inc | 119 +++++++++++----
155
target/openrisc/translate.c | 4 +-
109
55 files changed, 915 insertions(+), 462 deletions(-)
156
target/ppc/cpu_init.c | 1 +
110
create mode 100644 accel/tcg/tb-jmp-cache.h
157
target/ppc/translate.c | 4 +-
111
158
target/riscv/tcg/tcg-cpu.c | 1 +
159
target/riscv/translate.c | 4 +-
160
target/rx/cpu.c | 1 +
161
target/rx/translate.c | 4 +-
162
target/s390x/cpu.c | 1 +
163
target/s390x/tcg/translate.c | 4 +-
164
target/sh4/cpu.c | 1 +
165
target/sh4/translate.c | 4 +-
166
target/sparc/cpu.c | 1 +
167
target/sparc/fop_helper.c | 8 +-
168
target/sparc/translate.c | 84 ++--
169
target/tricore/cpu.c | 1 +
170
target/tricore/translate.c | 5 +-
171
target/xtensa/cpu.c | 1 +
172
target/xtensa/translate.c | 4 +-
173
tcg/optimize.c | 857 +++++++++++++++++++-----------------
174
tests/tcg/multiarch/system/memory.c | 9 +-
175
fpu/softfloat-parts.c.inc | 16 +-
176
75 files changed, 866 insertions(+), 1009 deletions(-)
diff view generated by jsdifflib
New patch
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
2
3
make check-tcg fails on Fedora with the following error message:
4
5
alpha-linux-gnu-gcc [...] qemu/tests/tcg/multiarch/system/memory.c -o memory [...]
6
qemu/tests/tcg/multiarch/system/memory.c:17:10: fatal error: inttypes.h: No such file or directory
7
17 | #include <inttypes.h>
8
| ^~~~~~~~~~~~
9
compilation terminated.
10
11
The reason is that Fedora has cross-compilers, but no cross-glibc
12
headers. Fix by hardcoding the format specifiers and dropping the
13
include.
14
15
An alternative fix would be to introduce a configure check for
16
inttypes.h. But this would make it impossible to use Fedora
17
cross-compilers for softmmu tests, which used to work so far.
18
19
Fixes: ecbcc9ead2f8 ("tests/tcg: add a system test to check memory instrumentation")
20
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
21
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
22
Message-ID: <20241010085906.226249-1-iii@linux.ibm.com>
23
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
24
---
25
tests/tcg/multiarch/system/memory.c | 9 ++++-----
26
1 file changed, 4 insertions(+), 5 deletions(-)
27
28
diff --git a/tests/tcg/multiarch/system/memory.c b/tests/tcg/multiarch/system/memory.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tests/tcg/multiarch/system/memory.c
31
+++ b/tests/tcg/multiarch/system/memory.c
32
@@ -XXX,XX +XXX,XX @@
33
34
#include <stdint.h>
35
#include <stdbool.h>
36
-#include <inttypes.h>
37
#include <minilib.h>
38
39
#ifndef CHECK_UNALIGNED
40
@@ -XXX,XX +XXX,XX @@ int main(void)
41
int i;
42
bool ok = true;
43
44
- ml_printf("Test data start: 0x%"PRIxPTR"\n", &test_data[0]);
45
- ml_printf("Test data end: 0x%"PRIxPTR"\n", &test_data[TEST_SIZE]);
46
+ ml_printf("Test data start: 0x%lx\n", (unsigned long)&test_data[0]);
47
+ ml_printf("Test data end: 0x%lx\n", (unsigned long)&test_data[TEST_SIZE]);
48
49
/* Run through the unsigned tests first */
50
for (i = 0; i < ARRAY_SIZE(init_ufns) && ok; i++) {
51
@@ -XXX,XX +XXX,XX @@ int main(void)
52
ok = do_signed_reads(true);
53
}
54
55
- ml_printf("Test data read: %"PRId32"\n", test_read_count);
56
- ml_printf("Test data write: %"PRId32"\n", test_write_count);
57
+ ml_printf("Test data read: %lu\n", (unsigned long)test_read_count);
58
+ ml_printf("Test data write: %lu\n", (unsigned long)test_write_count);
59
ml_printf("Test complete: %s\n", ok ? "PASSED" : "FAILED");
60
return ok ? 0 : -1;
61
}
62
--
63
2.43.0
diff view generated by jsdifflib
1
Use the pc coming from db->pc_first rather than the TB.
1
From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
2
3
Use the cached host_addr rather than re-computing for the
3
When running with a single vcpu, we can return a constant instead of a
4
first page. We still need a separate lookup for the second
4
load when accessing cpu_index.
5
page because it won't be computed for DisasContextBase until
5
A side effect is that all tcg operations using it are optimized, most
6
the translator actually performs a read from the page.
6
notably scoreboard access.
7
When running a simple loop in user-mode, the speedup is around 20%.
7
8
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-ID: <20241128213843.1023080-1-pierrick.bouvier@linaro.org>
10
---
13
---
11
include/exec/plugin-gen.h | 7 ++++---
14
accel/tcg/plugin-gen.c | 9 +++++++++
12
accel/tcg/plugin-gen.c | 22 +++++++++++-----------
15
1 file changed, 9 insertions(+)
13
accel/tcg/translator.c | 2 +-
14
3 files changed, 16 insertions(+), 15 deletions(-)
15
16
16
diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/include/exec/plugin-gen.h
19
+++ b/include/exec/plugin-gen.h
20
@@ -XXX,XX +XXX,XX @@ struct DisasContextBase;
21
22
#ifdef CONFIG_PLUGIN
23
24
-bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress);
25
+bool plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db,
26
+ bool supress);
27
void plugin_gen_tb_end(CPUState *cpu);
28
void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
29
void plugin_gen_insn_end(void);
30
@@ -XXX,XX +XXX,XX @@ static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
31
32
#else /* !CONFIG_PLUGIN */
33
34
-static inline
35
-bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress)
36
+static inline bool
37
+plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db, bool sup)
38
{
39
return false;
40
}
41
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
17
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
42
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
43
--- a/accel/tcg/plugin-gen.c
19
--- a/accel/tcg/plugin-gen.c
44
+++ b/accel/tcg/plugin-gen.c
20
+++ b/accel/tcg/plugin-gen.c
45
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_inject(const struct qemu_plugin_tb *plugin_tb)
21
@@ -XXX,XX +XXX,XX @@ static void gen_disable_mem_helper(void)
46
pr_ops();
22
47
}
23
static TCGv_i32 gen_cpu_index(void)
48
49
-bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_only)
50
+bool plugin_gen_tb_start(CPUState *cpu, const DisasContextBase *db,
51
+ bool mem_only)
52
{
24
{
53
bool ret = false;
25
+ /*
54
26
+ * Optimize when we run with a single vcpu. All values using cpu_index,
55
@@ -XXX,XX +XXX,XX @@ bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_onl
27
+ * including scoreboard index, will be optimized out.
56
28
+ * User-mode calls tb_flush when setting this flag. In system-mode, all
57
ret = true;
29
+ * vcpus are created before generating code.
58
30
+ */
59
- ptb->vaddr = tb->pc;
31
+ if (!tcg_cflags_has(current_cpu, CF_PARALLEL)) {
60
+ ptb->vaddr = db->pc_first;
32
+ return tcg_constant_i32(current_cpu->cpu_index);
61
ptb->vaddr2 = -1;
33
+ }
62
- get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1);
34
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
63
+ ptb->haddr1 = db->host_addr[0];
35
tcg_gen_ld_i32(cpu_index, tcg_env,
64
ptb->haddr2 = NULL;
36
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
65
ptb->mem_only = mem_only;
66
67
@@ -XXX,XX +XXX,XX @@ void plugin_gen_insn_start(CPUState *cpu, const DisasContextBase *db)
68
* Note that we skip this when haddr1 == NULL, e.g. when we're
69
* fetching instructions from a region not backed by RAM.
70
*/
71
- if (likely(ptb->haddr1 != NULL && ptb->vaddr2 == -1) &&
72
- unlikely((db->pc_next & TARGET_PAGE_MASK) !=
73
- (db->pc_first & TARGET_PAGE_MASK))) {
74
- get_page_addr_code_hostp(cpu->env_ptr, db->pc_next,
75
- &ptb->haddr2);
76
- ptb->vaddr2 = db->pc_next;
77
- }
78
- if (likely(ptb->vaddr2 == -1)) {
79
+ if (ptb->haddr1 == NULL) {
80
+ pinsn->haddr = NULL;
81
+ } else if (is_same_page(db, db->pc_next)) {
82
pinsn->haddr = ptb->haddr1 + pinsn->vaddr - ptb->vaddr;
83
} else {
84
+ if (ptb->vaddr2 == -1) {
85
+ ptb->vaddr2 = TARGET_PAGE_ALIGN(db->pc_first);
86
+ get_page_addr_code_hostp(cpu->env_ptr, ptb->vaddr2, &ptb->haddr2);
87
+ }
88
pinsn->haddr = ptb->haddr2 + pinsn->vaddr - ptb->vaddr2;
89
}
90
}
91
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
92
index XXXXXXX..XXXXXXX 100644
93
--- a/accel/tcg/translator.c
94
+++ b/accel/tcg/translator.c
95
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
96
ops->tb_start(db, cpu);
97
tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */
98
99
- plugin_enabled = plugin_gen_tb_start(cpu, tb, cflags & CF_MEMI_ONLY);
100
+ plugin_enabled = plugin_gen_tb_start(cpu, db, cflags & CF_MEMI_ONLY);
101
102
while (true) {
103
db->num_insns++;
104
--
37
--
105
2.34.1
38
2.43.0
106
107
diff view generated by jsdifflib
1
This function has two users, who use it incompatibly.
1
Call them directly from the opcode switch statement in tcg_optimize,
2
In tlb_flush_page_by_mmuidx_async_0, when flushing a
2
rather than in finish_folding based on opcode flags. Adjust folding
3
single page, we need to flush exactly two pages.
3
of conditional branches to match.
4
In tlb_flush_range_by_mmuidx_async_0, when flushing a
5
range of pages, we need to flush N+1 pages.
6
4
7
This avoids double-flushing of jmp cache pages in a range.
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
8
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
7
---
12
accel/tcg/cputlb.c | 25 ++++++++++++++-----------
8
tcg/optimize.c | 47 +++++++++++++++++++++++++++++++----------------
13
1 file changed, 14 insertions(+), 11 deletions(-)
9
1 file changed, 31 insertions(+), 16 deletions(-)
14
10
15
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
16
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
17
--- a/accel/tcg/cputlb.c
13
--- a/tcg/optimize.c
18
+++ b/accel/tcg/cputlb.c
14
+++ b/tcg/optimize.c
19
@@ -XXX,XX +XXX,XX @@ static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
15
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
20
}
16
}
21
}
17
}
22
18
23
-static void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
19
+static void finish_bb(OptContext *ctx)
24
-{
20
+{
25
- /* Discard jump cache entries for any tb which might potentially
21
+ /* We only optimize memory barriers across basic blocks. */
26
- overlap the flushed page. */
22
+ ctx->prev_mb = NULL;
27
- tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
23
+}
28
- tb_jmp_cache_clear_page(cpu, addr);
24
+
29
-}
25
+static void finish_ebb(OptContext *ctx)
26
+{
27
+ finish_bb(ctx);
28
+ /* We only optimize across extended basic blocks. */
29
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
30
+ remove_mem_copy_all(ctx);
31
+}
32
+
33
static void finish_folding(OptContext *ctx, TCGOp *op)
34
{
35
const TCGOpDef *def = &tcg_op_defs[op->opc];
36
int i, nb_oargs;
37
38
- /*
39
- * We only optimize extended basic blocks. If the opcode ends a BB
40
- * and is not a conditional branch, reset all temp data.
41
- */
42
- if (def->flags & TCG_OPF_BB_END) {
43
- ctx->prev_mb = NULL;
44
- if (!(def->flags & TCG_OPF_COND_BRANCH)) {
45
- memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
46
- remove_mem_copy_all(ctx);
47
- }
48
- return;
49
- }
30
-
50
-
31
/**
51
nb_oargs = def->nb_oargs;
32
* tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
52
for (i = 0; i < nb_oargs; i++) {
33
* @desc: The CPUTLBDesc portion of the TLB
53
TCGTemp *ts = arg_temp(op->args[i]);
34
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
54
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
55
if (i > 0) {
56
op->opc = INDEX_op_br;
57
op->args[0] = op->args[3];
58
+ finish_ebb(ctx);
59
+ } else {
60
+ finish_bb(ctx);
35
}
61
}
36
qemu_spin_unlock(&env_tlb(env)->c.lock);
62
- return false;
37
63
+ return true;
38
- tb_flush_jmp_cache(cpu, addr);
39
+ /*
40
+ * Discard jump cache entries for any tb which might potentially
41
+ * overlap the flushed page, which includes the previous.
42
+ */
43
+ tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
44
+ tb_jmp_cache_clear_page(cpu, addr);
45
}
64
}
46
65
47
/**
66
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
48
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
67
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
49
return;
68
}
69
op->opc = INDEX_op_br;
70
op->args[0] = label;
71
- break;
72
+ finish_ebb(ctx);
73
+ return true;
50
}
74
}
51
75
- return false;
52
- for (target_ulong i = 0; i < d.len; i += TARGET_PAGE_SIZE) {
76
+
53
- tb_flush_jmp_cache(cpu, d.addr + i);
77
+ finish_bb(ctx);
54
+ /*
78
+ return true;
55
+ * Discard jump cache entries for any tb which might potentially
56
+ * overlap the flushed pages, which includes the previous.
57
+ */
58
+ d.addr -= TARGET_PAGE_SIZE;
59
+ for (target_ulong i = 0, n = d.len / TARGET_PAGE_SIZE + 1; i < n; i++) {
60
+ tb_jmp_cache_clear_page(cpu, d.addr);
61
+ d.addr += TARGET_PAGE_SIZE;
62
}
63
}
79
}
64
80
81
static bool fold_bswap(OptContext *ctx, TCGOp *op)
82
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
83
CASE_OP_32_64_VEC(xor):
84
done = fold_xor(&ctx, op);
85
break;
86
+ case INDEX_op_set_label:
87
+ case INDEX_op_br:
88
+ case INDEX_op_exit_tb:
89
+ case INDEX_op_goto_tb:
90
+ case INDEX_op_goto_ptr:
91
+ finish_ebb(&ctx);
92
+ done = true;
93
+ break;
94
default:
95
break;
96
}
65
--
97
--
66
2.34.1
98
2.43.0
67
68
diff view generated by jsdifflib
New patch
1
There are only a few logical operations which can compute
2
an "affected" mask. Split out handling of this optimization
3
to a separate function, only to be called when applicable.
1
4
5
Remove the a_mask field from OptContext, as the mask is
6
no longer stored anywhere.
7
8
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
tcg/optimize.c | 42 +++++++++++++++++++++++++++---------------
12
1 file changed, 27 insertions(+), 15 deletions(-)
13
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
17
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
19
QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
20
21
/* In flight values from optimization. */
22
- uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
23
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
24
uint64_t s_mask; /* mask of clrsb(value) bits */
25
TCGType type;
26
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
27
28
static bool fold_masks(OptContext *ctx, TCGOp *op)
29
{
30
- uint64_t a_mask = ctx->a_mask;
31
uint64_t z_mask = ctx->z_mask;
32
uint64_t s_mask = ctx->s_mask;
33
34
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
35
* type changing opcodes.
36
*/
37
if (ctx->type == TCG_TYPE_I32) {
38
- a_mask = (int32_t)a_mask;
39
z_mask = (int32_t)z_mask;
40
s_mask |= MAKE_64BIT_MASK(32, 32);
41
ctx->z_mask = z_mask;
42
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
43
if (z_mask == 0) {
44
return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
45
}
46
+ return false;
47
+}
48
+
49
+/*
50
+ * An "affected" mask bit is 0 if and only if the result is identical
51
+ * to the first input. Thus if the entire mask is 0, the operation
52
+ * is equivalent to a copy.
53
+ */
54
+static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask)
55
+{
56
+ if (ctx->type == TCG_TYPE_I32) {
57
+ a_mask = (uint32_t)a_mask;
58
+ }
59
if (a_mask == 0) {
60
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
61
}
62
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
63
* Known-zeros does not imply known-ones. Therefore unless
64
* arg2 is constant, we can't infer affected bits from it.
65
*/
66
- if (arg_is_const(op->args[2])) {
67
- ctx->a_mask = z1 & ~z2;
68
+ if (arg_is_const(op->args[2]) &&
69
+ fold_affected_mask(ctx, op, z1 & ~z2)) {
70
+ return true;
71
}
72
73
return fold_masks(ctx, op);
74
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
75
*/
76
if (arg_is_const(op->args[2])) {
77
uint64_t z2 = ~arg_info(op->args[2])->z_mask;
78
- ctx->a_mask = z1 & ~z2;
79
+ if (fold_affected_mask(ctx, op, z1 & ~z2)) {
80
+ return true;
81
+ }
82
z1 &= z2;
83
}
84
ctx->z_mask = z1;
85
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
86
87
z_mask_old = arg_info(op->args[1])->z_mask;
88
z_mask = extract64(z_mask_old, pos, len);
89
- if (pos == 0) {
90
- ctx->a_mask = z_mask_old ^ z_mask;
91
+ if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
92
+ return true;
93
}
94
ctx->z_mask = z_mask;
95
ctx->s_mask = smask_from_zmask(z_mask);
96
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
97
98
ctx->z_mask = z_mask;
99
ctx->s_mask = s_mask;
100
- if (!type_change) {
101
- ctx->a_mask = s_mask & ~s_mask_old;
102
+ if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
103
+ return true;
104
}
105
106
return fold_masks(ctx, op);
107
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
108
109
ctx->z_mask = z_mask;
110
ctx->s_mask = smask_from_zmask(z_mask);
111
- if (!type_change) {
112
- ctx->a_mask = z_mask_old ^ z_mask;
113
+ if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
114
+ return true;
115
}
116
return fold_masks(ctx, op);
117
}
118
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
119
s_mask |= MAKE_64BIT_MASK(len, 64 - len);
120
ctx->s_mask = s_mask;
121
122
- if (pos == 0) {
123
- ctx->a_mask = s_mask & ~s_mask_old;
124
+ if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
125
+ return true;
126
}
127
128
return fold_masks(ctx, op);
129
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
130
}
131
132
/* Assume all bits affected, no bits known zero, no sign reps. */
133
- ctx.a_mask = -1;
134
ctx.z_mask = -1;
135
ctx.s_mask = 0;
136
137
--
138
2.43.0
diff view generated by jsdifflib
1
Allow the target to cache items from the guest page tables.
1
Use of fold_masks should be restricted to those opcodes that
2
can reliably make use of it -- those with a single output,
3
and from higher-level folders that set up the masks.
4
Prepare for conversion of each folder in turn.
2
5
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
---
8
include/exec/cpu-defs.h | 9 +++++++++
9
tcg/optimize.c | 17 ++++++++++++++---
9
1 file changed, 9 insertions(+)
10
1 file changed, 14 insertions(+), 3 deletions(-)
10
11
11
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/include/exec/cpu-defs.h
14
--- a/tcg/optimize.c
14
+++ b/include/exec/cpu-defs.h
15
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntryFull {
16
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
16
17
{
17
/* @lg_page_size contains the log2 of the page size. */
18
uint64_t z_mask = ctx->z_mask;
18
uint8_t lg_page_size;
19
uint64_t s_mask = ctx->s_mask;
20
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
21
+ TCGTemp *ts;
22
+ TempOptInfo *ti;
19
+
23
+
20
+ /*
24
+ /* Only single-output opcodes are supported here. */
21
+ * Allow target-specific additions to this structure.
25
+ tcg_debug_assert(def->nb_oargs == 1);
22
+ * This may be used to cache items from the guest cpu
26
23
+ * page tables for later use by the implementation.
27
/*
24
+ */
28
* 32-bit ops generate 32-bit results, which for the purpose of
25
+#ifdef TARGET_PAGE_ENTRY_EXTRA
29
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
26
+ TARGET_PAGE_ENTRY_EXTRA
30
if (ctx->type == TCG_TYPE_I32) {
27
+#endif
31
z_mask = (int32_t)z_mask;
28
} CPUTLBEntryFull;
32
s_mask |= MAKE_64BIT_MASK(32, 32);
33
- ctx->z_mask = z_mask;
34
- ctx->s_mask = s_mask;
35
}
36
37
if (z_mask == 0) {
38
return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
39
}
40
- return false;
41
+
42
+ ts = arg_temp(op->args[0]);
43
+ reset_ts(ctx, ts);
44
+
45
+ ti = ts_info(ts);
46
+ ti->z_mask = z_mask;
47
+ ti->s_mask = s_mask;
48
+ return true;
49
}
29
50
30
/*
51
/*
31
--
52
--
32
2.34.1
53
2.43.0
33
34
diff view generated by jsdifflib
New patch
1
Add a routine to which masks can be passed directly, rather than
2
storing them into OptContext. To be used in upcoming patches.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 15 ++++++++++++---
8
1 file changed, 12 insertions(+), 3 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
15
return fold_const2(ctx, op);
16
}
17
18
-static bool fold_masks(OptContext *ctx, TCGOp *op)
19
+/*
20
+ * Record "zero" and "sign" masks for the single output of @op.
21
+ * See TempOptInfo definition of z_mask and s_mask.
22
+ * If z_mask allows, fold the output to constant zero.
23
+ */
24
+static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
25
+ uint64_t z_mask, uint64_t s_mask)
26
{
27
- uint64_t z_mask = ctx->z_mask;
28
- uint64_t s_mask = ctx->s_mask;
29
const TCGOpDef *def = &tcg_op_defs[op->opc];
30
TCGTemp *ts;
31
TempOptInfo *ti;
32
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
33
return true;
34
}
35
36
+static bool fold_masks(OptContext *ctx, TCGOp *op)
37
+{
38
+ return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
39
+}
40
+
41
/*
42
* An "affected" mask bit is 0 if and only if the result is identical
43
* to the first input. Thus if the entire mask is 0, the operation
44
--
45
2.43.0
diff view generated by jsdifflib
1
The value previously chosen overlaps GUSA_MASK.
1
Consider the passed s_mask to be a minimum deduced from
2
either existing s_mask or from a sign-extension operation.
3
We may be able to deduce more from the set of known zeros.
4
Remove identical logic from several opcode folders.
2
5
3
Rename all DELAY_SLOT_* and GUSA_* defines to emphasize
6
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
that they are included in TB_FLAGs. Add aliases for the
5
FPSCR and SR bits that are included in TB_FLAGS, so that
6
we don't accidentally reassign those bits.
7
8
Fixes: 4da06fb3062 ("target/sh4: Implement prctl_unalign_sigbus")
9
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/856
10
Reviewed-by: Yoshinori Sato <ysato@users.sourceforge.jp>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
8
---
13
target/sh4/cpu.h | 56 +++++++++++++------------
9
tcg/optimize.c | 21 ++++++---------------
14
linux-user/sh4/signal.c | 6 +--
10
1 file changed, 6 insertions(+), 15 deletions(-)
15
target/sh4/cpu.c | 6 +--
16
target/sh4/helper.c | 6 +--
17
target/sh4/translate.c | 90 ++++++++++++++++++++++-------------------
18
5 files changed, 88 insertions(+), 76 deletions(-)
19
11
20
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
21
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
22
--- a/target/sh4/cpu.h
14
--- a/tcg/optimize.c
23
+++ b/target/sh4/cpu.h
15
+++ b/tcg/optimize.c
24
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
25
#define FPSCR_RM_NEAREST (0 << 0)
17
* Record "zero" and "sign" masks for the single output of @op.
26
#define FPSCR_RM_ZERO (1 << 0)
18
* See TempOptInfo definition of z_mask and s_mask.
27
19
* If z_mask allows, fold the output to constant zero.
28
-#define DELAY_SLOT_MASK 0x7
20
+ * The passed s_mask may be augmented by z_mask.
29
-#define DELAY_SLOT (1 << 0)
21
*/
30
-#define DELAY_SLOT_CONDITIONAL (1 << 1)
22
static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
31
-#define DELAY_SLOT_RTE (1 << 2)
23
uint64_t z_mask, uint64_t s_mask)
32
+#define TB_FLAG_DELAY_SLOT (1 << 0)
24
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
33
+#define TB_FLAG_DELAY_SLOT_COND (1 << 1)
25
34
+#define TB_FLAG_DELAY_SLOT_RTE (1 << 2)
26
ti = ts_info(ts);
35
+#define TB_FLAG_PENDING_MOVCA (1 << 3)
27
ti->z_mask = z_mask;
36
+#define TB_FLAG_GUSA_SHIFT 4 /* [11:4] */
28
- ti->s_mask = s_mask;
37
+#define TB_FLAG_GUSA_EXCLUSIVE (1 << 12)
29
+ ti->s_mask = s_mask | smask_from_zmask(z_mask);
38
+#define TB_FLAG_UNALIGN (1 << 13)
30
return true;
39
+#define TB_FLAG_SR_FD (1 << SR_FD) /* 15 */
40
+#define TB_FLAG_FPSCR_PR FPSCR_PR /* 19 */
41
+#define TB_FLAG_FPSCR_SZ FPSCR_SZ /* 20 */
42
+#define TB_FLAG_FPSCR_FR FPSCR_FR /* 21 */
43
+#define TB_FLAG_SR_RB (1 << SR_RB) /* 29 */
44
+#define TB_FLAG_SR_MD (1 << SR_MD) /* 30 */
45
46
-#define TB_FLAG_PENDING_MOVCA (1 << 3)
47
-#define TB_FLAG_UNALIGN (1 << 4)
48
-
49
-#define GUSA_SHIFT 4
50
-#ifdef CONFIG_USER_ONLY
51
-#define GUSA_EXCLUSIVE (1 << 12)
52
-#define GUSA_MASK ((0xff << GUSA_SHIFT) | GUSA_EXCLUSIVE)
53
-#else
54
-/* Provide dummy versions of the above to allow tests against tbflags
55
- to be elided while avoiding ifdefs. */
56
-#define GUSA_EXCLUSIVE 0
57
-#define GUSA_MASK 0
58
-#endif
59
-
60
-#define TB_FLAG_ENVFLAGS_MASK (DELAY_SLOT_MASK | GUSA_MASK)
61
+#define TB_FLAG_DELAY_SLOT_MASK (TB_FLAG_DELAY_SLOT | \
62
+ TB_FLAG_DELAY_SLOT_COND | \
63
+ TB_FLAG_DELAY_SLOT_RTE)
64
+#define TB_FLAG_GUSA_MASK ((0xff << TB_FLAG_GUSA_SHIFT) | \
65
+ TB_FLAG_GUSA_EXCLUSIVE)
66
+#define TB_FLAG_FPSCR_MASK (TB_FLAG_FPSCR_PR | \
67
+ TB_FLAG_FPSCR_SZ | \
68
+ TB_FLAG_FPSCR_FR)
69
+#define TB_FLAG_SR_MASK (TB_FLAG_SR_FD | \
70
+ TB_FLAG_SR_RB | \
71
+ TB_FLAG_SR_MD)
72
+#define TB_FLAG_ENVFLAGS_MASK (TB_FLAG_DELAY_SLOT_MASK | \
73
+ TB_FLAG_GUSA_MASK)
74
75
typedef struct tlb_t {
76
uint32_t vpn;        /* virtual page number */
77
@@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch)
78
{
79
/* The instruction in a RTE delay slot is fetched in privileged
80
mode, but executed in user mode. */
81
- if (ifetch && (env->flags & DELAY_SLOT_RTE)) {
82
+ if (ifetch && (env->flags & TB_FLAG_DELAY_SLOT_RTE)) {
83
return 0;
84
} else {
85
return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
86
@@ -XXX,XX +XXX,XX @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
87
{
88
*pc = env->pc;
89
/* For a gUSA region, notice the end of the region. */
90
- *cs_base = env->flags & GUSA_MASK ? env->gregs[0] : 0;
91
- *flags = env->flags /* TB_FLAG_ENVFLAGS_MASK: bits 0-2, 4-12 */
92
- | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */
93
- | (env->sr & ((1u << SR_MD) | (1u << SR_RB))) /* Bits 29-30 */
94
- | (env->sr & (1u << SR_FD)) /* Bit 15 */
95
+ *cs_base = env->flags & TB_FLAG_GUSA_MASK ? env->gregs[0] : 0;
96
+ *flags = env->flags
97
+ | (env->fpscr & TB_FLAG_FPSCR_MASK)
98
+ | (env->sr & TB_FLAG_SR_MASK)
99
| (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 3 */
100
#ifdef CONFIG_USER_ONLY
101
*flags |= TB_FLAG_UNALIGN * !env_cpu(env)->prctl_unalign_sigbus;
102
diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
103
index XXXXXXX..XXXXXXX 100644
104
--- a/linux-user/sh4/signal.c
105
+++ b/linux-user/sh4/signal.c
106
@@ -XXX,XX +XXX,XX @@ static void restore_sigcontext(CPUSH4State *regs, struct target_sigcontext *sc)
107
__get_user(regs->fpul, &sc->sc_fpul);
108
109
regs->tra = -1; /* disable syscall checks */
110
- regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
111
+ regs->flags = 0;
112
}
31
}
113
32
114
void setup_frame(int sig, struct target_sigaction *ka,
33
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
115
@@ -XXX,XX +XXX,XX @@ void setup_frame(int sig, struct target_sigaction *ka,
34
default:
116
regs->gregs[5] = 0;
35
g_assert_not_reached();
117
regs->gregs[6] = frame_addr += offsetof(typeof(*frame), sc);
36
}
118
regs->pc = (unsigned long) ka->_sa_handler;
37
- s_mask = smask_from_zmask(z_mask);
119
- regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
38
120
+ regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
39
+ s_mask = 0;
121
40
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
122
unlock_user_struct(frame, frame_addr, 1);
41
case TCG_BSWAP_OZ:
123
return;
42
break;
124
@@ -XXX,XX +XXX,XX @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
43
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
125
regs->gregs[5] = frame_addr + offsetof(typeof(*frame), info);
44
default:
126
regs->gregs[6] = frame_addr + offsetof(typeof(*frame), uc);
45
/* The high bits are undefined: force all bits above the sign to 1. */
127
regs->pc = (unsigned long) ka->_sa_handler;
46
z_mask |= sign << 1;
128
- regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
47
- s_mask = 0;
129
+ regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
48
break;
130
49
}
131
unlock_user_struct(frame, frame_addr, 1);
50
ctx->z_mask = z_mask;
132
return;
51
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
133
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
52
g_assert_not_reached();
134
index XXXXXXX..XXXXXXX 100644
53
}
135
--- a/target/sh4/cpu.c
54
ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
136
+++ b/target/sh4/cpu.c
55
- ctx->s_mask = smask_from_zmask(ctx->z_mask);
137
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
56
return false;
138
SuperHCPU *cpu = SUPERH_CPU(cs);
139
140
cpu->env.pc = tb_pc(tb);
141
- cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
142
+ cpu->env.flags = tb->flags;
143
}
57
}
144
58
145
#ifndef CONFIG_USER_ONLY
59
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
146
@@ -XXX,XX +XXX,XX @@ static bool superh_io_recompile_replay_branch(CPUState *cs,
60
default:
147
SuperHCPU *cpu = SUPERH_CPU(cs);
61
g_assert_not_reached();
148
CPUSH4State *env = &cpu->env;
62
}
149
63
- ctx->s_mask = smask_from_zmask(ctx->z_mask);
150
- if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
64
return false;
151
+ if ((env->flags & (TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND))
65
}
152
&& env->pc != tb_pc(tb)) {
66
153
env->pc -= 2;
67
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
154
- env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
155
+ env->flags &= ~(TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND);
156
return true;
68
return true;
157
}
69
}
158
return false;
70
ctx->z_mask = z_mask;
159
diff --git a/target/sh4/helper.c b/target/sh4/helper.c
71
- ctx->s_mask = smask_from_zmask(z_mask);
160
index XXXXXXX..XXXXXXX 100644
72
161
--- a/target/sh4/helper.c
73
return fold_masks(ctx, op);
162
+++ b/target/sh4/helper.c
74
}
163
@@ -XXX,XX +XXX,XX @@ void superh_cpu_do_interrupt(CPUState *cs)
75
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
164
env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB);
165
env->lock_addr = -1;
166
167
- if (env->flags & DELAY_SLOT_MASK) {
168
+ if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
169
/* Branch instruction should be executed again before delay slot. */
170
    env->spc -= 2;
171
    /* Clear flags for exception/interrupt routine. */
172
- env->flags &= ~DELAY_SLOT_MASK;
173
+ env->flags &= ~TB_FLAG_DELAY_SLOT_MASK;
174
}
76
}
175
77
176
if (do_exp) {
78
ctx->z_mask = z_mask;
177
@@ -XXX,XX +XXX,XX @@ bool superh_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
79
- ctx->s_mask = smask_from_zmask(z_mask);
178
CPUSH4State *env = &cpu->env;
80
if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
179
81
return true;
180
/* Delay slots are indivisible, ignore interrupts */
181
- if (env->flags & DELAY_SLOT_MASK) {
182
+ if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
183
return false;
184
} else {
185
superh_cpu_do_interrupt(cs);
186
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
187
index XXXXXXX..XXXXXXX 100644
188
--- a/target/sh4/translate.c
189
+++ b/target/sh4/translate.c
190
@@ -XXX,XX +XXX,XX @@ void superh_cpu_dump_state(CPUState *cs, FILE *f, int flags)
191
         i, env->gregs[i], i + 1, env->gregs[i + 1],
192
         i + 2, env->gregs[i + 2], i + 3, env->gregs[i + 3]);
193
}
82
}
194
- if (env->flags & DELAY_SLOT) {
83
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
195
+ if (env->flags & TB_FLAG_DELAY_SLOT) {
84
int width = 8 * memop_size(mop);
196
qemu_printf("in delay slot (delayed_pc=0x%08x)\n",
85
197
         env->delayed_pc);
86
if (width < 64) {
198
- } else if (env->flags & DELAY_SLOT_CONDITIONAL) {
87
- ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
199
+ } else if (env->flags & TB_FLAG_DELAY_SLOT_COND) {
88
- if (!(mop & MO_SIGN)) {
200
qemu_printf("in conditional delay slot (delayed_pc=0x%08x)\n",
89
+ if (mop & MO_SIGN) {
201
         env->delayed_pc);
90
+ ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
202
- } else if (env->flags & DELAY_SLOT_RTE) {
91
+ } else {
203
+ } else if (env->flags & TB_FLAG_DELAY_SLOT_RTE) {
92
ctx->z_mask = MAKE_64BIT_MASK(0, width);
204
qemu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n",
93
- ctx->s_mask <<= 1;
205
env->delayed_pc);
206
}
207
@@ -XXX,XX +XXX,XX @@ static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
208
209
static inline bool use_exit_tb(DisasContext *ctx)
210
{
211
- return (ctx->tbflags & GUSA_EXCLUSIVE) != 0;
212
+ return (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) != 0;
213
}
214
215
static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
216
@@ -XXX,XX +XXX,XX @@ static void gen_conditional_jump(DisasContext *ctx, target_ulong dest,
217
TCGLabel *l1 = gen_new_label();
218
TCGCond cond_not_taken = jump_if_true ? TCG_COND_EQ : TCG_COND_NE;
219
220
- if (ctx->tbflags & GUSA_EXCLUSIVE) {
221
+ if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
222
/* When in an exclusive region, we must continue to the end.
223
Therefore, exit the region on a taken branch, but otherwise
224
fall through to the next instruction. */
225
tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1);
226
- tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
227
+ tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
228
/* Note that this won't actually use a goto_tb opcode because we
229
disallow it in use_goto_tb, but it handles exit + singlestep. */
230
gen_goto_tb(ctx, 0, dest);
231
@@ -XXX,XX +XXX,XX @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
232
tcg_gen_mov_i32(ds, cpu_delayed_cond);
233
tcg_gen_discard_i32(cpu_delayed_cond);
234
235
- if (ctx->tbflags & GUSA_EXCLUSIVE) {
236
+ if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
237
/* When in an exclusive region, we must continue to the end.
238
Therefore, exit the region on a taken branch, but otherwise
239
fall through to the next instruction. */
240
tcg_gen_brcondi_i32(TCG_COND_EQ, ds, 0, l1);
241
242
/* Leave the gUSA region. */
243
- tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
244
+ tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
245
gen_jump(ctx);
246
247
gen_set_label(l1);
248
@@ -XXX,XX +XXX,XX @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
249
#define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
250
251
#define CHECK_NOT_DELAY_SLOT \
252
- if (ctx->envflags & DELAY_SLOT_MASK) { \
253
- goto do_illegal_slot; \
254
+ if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) { \
255
+ goto do_illegal_slot; \
256
}
257
258
#define CHECK_PRIVILEGED \
259
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
260
case 0x000b:        /* rts */
261
    CHECK_NOT_DELAY_SLOT
262
    tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
263
- ctx->envflags |= DELAY_SLOT;
264
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
265
    ctx->delayed_pc = (uint32_t) - 1;
266
    return;
267
case 0x0028:        /* clrmac */
268
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
269
    CHECK_NOT_DELAY_SLOT
270
gen_write_sr(cpu_ssr);
271
    tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
272
- ctx->envflags |= DELAY_SLOT_RTE;
273
+ ctx->envflags |= TB_FLAG_DELAY_SLOT_RTE;
274
    ctx->delayed_pc = (uint32_t) - 1;
275
ctx->base.is_jmp = DISAS_STOP;
276
    return;
277
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
278
    return;
279
case 0xe000:        /* mov #imm,Rn */
280
#ifdef CONFIG_USER_ONLY
281
- /* Detect the start of a gUSA region. If so, update envflags
282
- and end the TB. This will allow us to see the end of the
283
- region (stored in R0) in the next TB. */
284
+ /*
285
+ * Detect the start of a gUSA region (mov #-n, r15).
286
+ * If so, update envflags and end the TB. This will allow us
287
+ * to see the end of the region (stored in R0) in the next TB.
288
+ */
289
if (B11_8 == 15 && B7_0s < 0 &&
290
(tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
291
- ctx->envflags = deposit32(ctx->envflags, GUSA_SHIFT, 8, B7_0s);
292
+ ctx->envflags =
293
+ deposit32(ctx->envflags, TB_FLAG_GUSA_SHIFT, 8, B7_0s);
294
ctx->base.is_jmp = DISAS_STOP;
295
}
296
#endif
297
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
298
case 0xa000:        /* bra disp */
299
    CHECK_NOT_DELAY_SLOT
300
ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
301
- ctx->envflags |= DELAY_SLOT;
302
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
303
    return;
304
case 0xb000:        /* bsr disp */
305
    CHECK_NOT_DELAY_SLOT
306
tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
307
ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
308
- ctx->envflags |= DELAY_SLOT;
309
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
310
    return;
311
}
312
313
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
314
    CHECK_NOT_DELAY_SLOT
315
tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
316
ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
317
- ctx->envflags |= DELAY_SLOT_CONDITIONAL;
318
+ ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
319
    return;
320
case 0x8900:        /* bt label */
321
    CHECK_NOT_DELAY_SLOT
322
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
323
    CHECK_NOT_DELAY_SLOT
324
tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
325
ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
326
- ctx->envflags |= DELAY_SLOT_CONDITIONAL;
327
+ ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
328
    return;
329
case 0x8800:        /* cmp/eq #imm,R0 */
330
tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
331
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
332
case 0x0023:        /* braf Rn */
333
    CHECK_NOT_DELAY_SLOT
334
tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->base.pc_next + 4);
335
- ctx->envflags |= DELAY_SLOT;
336
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
337
    ctx->delayed_pc = (uint32_t) - 1;
338
    return;
339
case 0x0003:        /* bsrf Rn */
340
    CHECK_NOT_DELAY_SLOT
341
tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
342
    tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr);
343
- ctx->envflags |= DELAY_SLOT;
344
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
345
    ctx->delayed_pc = (uint32_t) - 1;
346
    return;
347
case 0x4015:        /* cmp/pl Rn */
348
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
349
case 0x402b:        /* jmp @Rn */
350
    CHECK_NOT_DELAY_SLOT
351
    tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
352
- ctx->envflags |= DELAY_SLOT;
353
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
354
    ctx->delayed_pc = (uint32_t) - 1;
355
    return;
356
case 0x400b:        /* jsr @Rn */
357
    CHECK_NOT_DELAY_SLOT
358
tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
359
    tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
360
- ctx->envflags |= DELAY_SLOT;
361
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
362
    ctx->delayed_pc = (uint32_t) - 1;
363
    return;
364
case 0x400e:        /* ldc Rm,SR */
365
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
366
fflush(stderr);
367
#endif
368
do_illegal:
369
- if (ctx->envflags & DELAY_SLOT_MASK) {
370
+ if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
371
do_illegal_slot:
372
gen_save_cpu_state(ctx, true);
373
gen_helper_raise_slot_illegal_instruction(cpu_env);
374
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
375
376
do_fpu_disabled:
377
gen_save_cpu_state(ctx, true);
378
- if (ctx->envflags & DELAY_SLOT_MASK) {
379
+ if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
380
gen_helper_raise_slot_fpu_disable(cpu_env);
381
} else {
382
gen_helper_raise_fpu_disable(cpu_env);
383
@@ -XXX,XX +XXX,XX @@ static void decode_opc(DisasContext * ctx)
384
385
_decode_opc(ctx);
386
387
- if (old_flags & DELAY_SLOT_MASK) {
388
+ if (old_flags & TB_FLAG_DELAY_SLOT_MASK) {
389
/* go out of the delay slot */
390
- ctx->envflags &= ~DELAY_SLOT_MASK;
391
+ ctx->envflags &= ~TB_FLAG_DELAY_SLOT_MASK;
392
393
/* When in an exclusive region, we must continue to the end
394
for conditional branches. */
395
- if (ctx->tbflags & GUSA_EXCLUSIVE
396
- && old_flags & DELAY_SLOT_CONDITIONAL) {
397
+ if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE
398
+ && old_flags & TB_FLAG_DELAY_SLOT_COND) {
399
gen_delayed_conditional_jump(ctx);
400
return;
401
}
402
/* Otherwise this is probably an invalid gUSA region.
403
Drop the GUSA bits so the next TB doesn't see them. */
404
- ctx->envflags &= ~GUSA_MASK;
405
+ ctx->envflags &= ~TB_FLAG_GUSA_MASK;
406
407
tcg_gen_movi_i32(cpu_flags, ctx->envflags);
408
- if (old_flags & DELAY_SLOT_CONDITIONAL) {
409
+ if (old_flags & TB_FLAG_DELAY_SLOT_COND) {
410
     gen_delayed_conditional_jump(ctx);
411
} else {
412
gen_jump(ctx);
413
@@ -XXX,XX +XXX,XX @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
414
}
415
416
/* The entire region has been translated. */
417
- ctx->envflags &= ~GUSA_MASK;
418
+ ctx->envflags &= ~TB_FLAG_GUSA_MASK;
419
ctx->base.pc_next = pc_end;
420
ctx->base.num_insns += max_insns - 1;
421
return;
422
@@ -XXX,XX +XXX,XX @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
423
424
/* Restart with the EXCLUSIVE bit set, within a TB run via
425
cpu_exec_step_atomic holding the exclusive lock. */
426
- ctx->envflags |= GUSA_EXCLUSIVE;
427
+ ctx->envflags |= TB_FLAG_GUSA_EXCLUSIVE;
428
gen_save_cpu_state(ctx, false);
429
gen_helper_exclusive(cpu_env);
430
ctx->base.is_jmp = DISAS_NORETURN;
431
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
432
(tbflags & (1 << SR_RB))) * 0x10;
433
ctx->fbank = tbflags & FPSCR_FR ? 0x10 : 0;
434
435
- if (tbflags & GUSA_MASK) {
436
+#ifdef CONFIG_USER_ONLY
437
+ if (tbflags & TB_FLAG_GUSA_MASK) {
438
+ /* In gUSA exclusive region. */
439
uint32_t pc = ctx->base.pc_next;
440
uint32_t pc_end = ctx->base.tb->cs_base;
441
- int backup = sextract32(ctx->tbflags, GUSA_SHIFT, 8);
442
+ int backup = sextract32(ctx->tbflags, TB_FLAG_GUSA_SHIFT, 8);
443
int max_insns = (pc_end - pc) / 2;
444
445
if (pc != pc_end + backup || max_insns < 2) {
446
/* This is a malformed gUSA region. Don't do anything special,
447
since the interpreter is likely to get confused. */
448
- ctx->envflags &= ~GUSA_MASK;
449
- } else if (tbflags & GUSA_EXCLUSIVE) {
450
+ ctx->envflags &= ~TB_FLAG_GUSA_MASK;
451
+ } else if (tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
452
/* Regardless of single-stepping or the end of the page,
453
we must complete execution of the gUSA region while
454
holding the exclusive lock. */
455
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
456
return;
457
}
94
}
458
}
95
}
459
+#endif
96
460
97
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
461
/* Since the ISA is fixed-width, we can bound by the number
98
fold_setcond_tst_pow2(ctx, op, false);
462
of instructions remaining on the page. */
99
463
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
100
ctx->z_mask = 1;
464
DisasContext *ctx = container_of(dcbase, DisasContext, base);
101
- ctx->s_mask = smask_from_zmask(1);
465
102
return false;
466
#ifdef CONFIG_USER_ONLY
103
}
467
- if (unlikely(ctx->envflags & GUSA_MASK)
104
468
- && !(ctx->envflags & GUSA_EXCLUSIVE)) {
105
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
469
+ if (unlikely(ctx->envflags & TB_FLAG_GUSA_MASK)
470
+ && !(ctx->envflags & TB_FLAG_GUSA_EXCLUSIVE)) {
471
/* We're in an gUSA region, and we have not already fallen
472
back on using an exclusive region. Attempt to parse the
473
region into a single supported atomic operation. Failure
474
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
475
{
476
DisasContext *ctx = container_of(dcbase, DisasContext, base);
477
478
- if (ctx->tbflags & GUSA_EXCLUSIVE) {
479
+ if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
480
/* Ending the region of exclusivity. Clear the bits. */
481
- ctx->envflags &= ~GUSA_MASK;
482
+ ctx->envflags &= ~TB_FLAG_GUSA_MASK;
483
}
106
}
484
107
485
switch (ctx->base.is_jmp) {
108
ctx->z_mask = 1;
109
- ctx->s_mask = smask_from_zmask(1);
110
return false;
111
112
do_setcond_const:
113
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
114
break;
115
CASE_OP_32_64(ld8u):
116
ctx->z_mask = MAKE_64BIT_MASK(0, 8);
117
- ctx->s_mask = MAKE_64BIT_MASK(9, 55);
118
break;
119
CASE_OP_32_64(ld16s):
120
ctx->s_mask = MAKE_64BIT_MASK(16, 48);
121
break;
122
CASE_OP_32_64(ld16u):
123
ctx->z_mask = MAKE_64BIT_MASK(0, 16);
124
- ctx->s_mask = MAKE_64BIT_MASK(17, 47);
125
break;
126
case INDEX_op_ld32s_i64:
127
ctx->s_mask = MAKE_64BIT_MASK(32, 32);
128
break;
129
case INDEX_op_ld32u_i64:
130
ctx->z_mask = MAKE_64BIT_MASK(0, 32);
131
- ctx->s_mask = MAKE_64BIT_MASK(33, 31);
132
break;
133
default:
134
g_assert_not_reached();
486
--
135
--
487
2.34.1
136
2.43.0
diff view generated by jsdifflib
1
Wrap the bare TranslationBlock pointer into a structure.
1
Change the representation from sign bit repetitions to all bits equal
2
to the sign bit, including the sign bit itself.
2
3
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
The previous format has a problem in that it is difficult to recreate
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
a valid sign mask after a shift operation: the "repetitions" part of
6
the previous format meant that applying the same shift as for the value
7
lead to an off-by-one value.
8
9
The new format, including the sign bit itself, means that the sign mask
10
can be manipulated in exactly the same way as the value, canonicalization
11
is easier.
12
13
Canonicalize the s_mask in fold_masks_zs, rather than requiring callers
14
to do so. Treat 0 as a non-canonical but typeless input for no sign
15
information, which will be reset as appropriate for the data type.
16
We can easily fold in the data from z_mask while canonicalizing.
17
18
Temporarily disable optimizations using s_mask while each operation is
19
converted to use fold_masks_zs and to the new form.
20
21
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
22
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
23
---
7
accel/tcg/tb-hash.h | 1 +
24
tcg/optimize.c | 64 ++++++++++++--------------------------------------
8
accel/tcg/tb-jmp-cache.h | 24 ++++++++++++++++++++++++
25
1 file changed, 15 insertions(+), 49 deletions(-)
9
include/exec/cpu-common.h | 1 +
10
include/hw/core/cpu.h | 15 +--------------
11
include/qemu/typedefs.h | 1 +
12
accel/stubs/tcg-stub.c | 4 ++++
13
accel/tcg/cpu-exec.c | 10 +++++++---
14
accel/tcg/cputlb.c | 9 +++++----
15
accel/tcg/translate-all.c | 28 +++++++++++++++++++++++++---
16
hw/core/cpu-common.c | 3 +--
17
plugins/core.c | 2 +-
18
trace/control-target.c | 2 +-
19
12 files changed, 72 insertions(+), 28 deletions(-)
20
create mode 100644 accel/tcg/tb-jmp-cache.h
21
26
22
diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
27
diff --git a/tcg/optimize.c b/tcg/optimize.c
23
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
24
--- a/accel/tcg/tb-hash.h
29
--- a/tcg/optimize.c
25
+++ b/accel/tcg/tb-hash.h
30
+++ b/tcg/optimize.c
26
@@ -XXX,XX +XXX,XX @@
31
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
27
#include "exec/cpu-defs.h"
32
QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
28
#include "exec/exec-all.h"
33
uint64_t val;
29
#include "qemu/xxhash.h"
34
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
30
+#include "tb-jmp-cache.h"
35
- uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
31
36
+ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
32
#ifdef CONFIG_SOFTMMU
37
} TempOptInfo;
33
38
34
diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h
39
typedef struct OptContext {
35
new file mode 100644
40
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
36
index XXXXXXX..XXXXXXX
41
37
--- /dev/null
42
/* In flight values from optimization. */
38
+++ b/accel/tcg/tb-jmp-cache.h
43
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
39
@@ -XXX,XX +XXX,XX @@
44
- uint64_t s_mask; /* mask of clrsb(value) bits */
40
+/*
45
+ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
41
+ * The per-CPU TranslationBlock jump cache.
46
TCGType type;
42
+ *
47
} OptContext;
43
+ * Copyright (c) 2003 Fabrice Bellard
48
44
+ *
49
-/* Calculate the smask for a specific value. */
45
+ * SPDX-License-Identifier: GPL-2.0-or-later
50
-static uint64_t smask_from_value(uint64_t value)
46
+ */
47
+
48
+#ifndef ACCEL_TCG_TB_JMP_CACHE_H
49
+#define ACCEL_TCG_TB_JMP_CACHE_H
50
+
51
+#define TB_JMP_CACHE_BITS 12
52
+#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
53
+
54
+/*
55
+ * Accessed in parallel; all accesses to 'tb' must be atomic.
56
+ */
57
+struct CPUJumpCache {
58
+ struct {
59
+ TranslationBlock *tb;
60
+ } array[TB_JMP_CACHE_SIZE];
61
+};
62
+
63
+#endif /* ACCEL_TCG_TB_JMP_CACHE_H */
64
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
65
index XXXXXXX..XXXXXXX 100644
66
--- a/include/exec/cpu-common.h
67
+++ b/include/exec/cpu-common.h
68
@@ -XXX,XX +XXX,XX @@ void cpu_list_unlock(void);
69
unsigned int cpu_list_generation_id_get(void);
70
71
void tcg_flush_softmmu_tlb(CPUState *cs);
72
+void tcg_flush_jmp_cache(CPUState *cs);
73
74
void tcg_iommu_init_notifier_list(CPUState *cpu);
75
void tcg_iommu_free_notifier_list(CPUState *cpu);
76
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
77
index XXXXXXX..XXXXXXX 100644
78
--- a/include/hw/core/cpu.h
79
+++ b/include/hw/core/cpu.h
80
@@ -XXX,XX +XXX,XX @@ struct kvm_run;
81
struct hax_vcpu_state;
82
struct hvf_vcpu_state;
83
84
-#define TB_JMP_CACHE_BITS 12
85
-#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
86
-
87
/* work queue */
88
89
/* The union type allows passing of 64 bit target pointers on 32 bit
90
@@ -XXX,XX +XXX,XX @@ struct CPUState {
91
CPUArchState *env_ptr;
92
IcountDecr *icount_decr_ptr;
93
94
- /* Accessed in parallel; all accesses must be atomic */
95
- TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
96
+ CPUJumpCache *tb_jmp_cache;
97
98
struct GDBRegisterState *gdb_regs;
99
int gdb_num_regs;
100
@@ -XXX,XX +XXX,XX @@ extern CPUTailQ cpus;
101
102
extern __thread CPUState *current_cpu;
103
104
-static inline void cpu_tb_jmp_cache_clear(CPUState *cpu)
105
-{
51
-{
106
- unsigned int i;
52
- int rep = clrsb64(value);
107
-
53
- return ~(~0ull >> rep);
108
- for (i = 0; i < TB_JMP_CACHE_SIZE; i++) {
109
- qatomic_set(&cpu->tb_jmp_cache[i], NULL);
110
- }
111
-}
54
-}
112
-
55
-
113
/**
56
-/*
114
* qemu_tcg_mttcg_enabled:
57
- * Calculate the smask for a given set of known-zeros.
115
* Check whether we are running MultiThread TCG or not.
58
- * If there are lots of zeros on the left, we can consider the remainder
116
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
59
- * an unsigned field, and thus the corresponding signed field is one bit
117
index XXXXXXX..XXXXXXX 100644
60
- * larger.
118
--- a/include/qemu/typedefs.h
61
- */
119
+++ b/include/qemu/typedefs.h
62
-static uint64_t smask_from_zmask(uint64_t zmask)
120
@@ -XXX,XX +XXX,XX @@ typedef struct CoMutex CoMutex;
63
-{
121
typedef struct ConfidentialGuestSupport ConfidentialGuestSupport;
64
- /*
122
typedef struct CPUAddressSpace CPUAddressSpace;
65
- * Only the 0 bits are significant for zmask, thus the msb itself
123
typedef struct CPUArchState CPUArchState;
66
- * must be zero, else we have no sign information.
124
+typedef struct CPUJumpCache CPUJumpCache;
67
- */
125
typedef struct CPUState CPUState;
68
- int rep = clz64(zmask);
126
typedef struct CPUTLBEntryFull CPUTLBEntryFull;
69
- if (rep == 0) {
127
typedef struct DeviceListener DeviceListener;
70
- return 0;
128
diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c
71
- }
129
index XXXXXXX..XXXXXXX 100644
72
- rep -= 1;
130
--- a/accel/stubs/tcg-stub.c
73
- return ~(~0ull >> rep);
131
+++ b/accel/stubs/tcg-stub.c
74
-}
132
@@ -XXX,XX +XXX,XX @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
75
-
76
-/*
77
- * Recreate a properly left-aligned smask after manipulation.
78
- * Some bit-shuffling, particularly shifts and rotates, may
79
- * retain sign bits on the left, but may scatter disconnected
80
- * sign bits on the right. Retain only what remains to the left.
81
- */
82
-static uint64_t smask_from_smask(int64_t smask)
83
-{
84
- /* Only the 1 bits are significant for smask */
85
- return smask_from_zmask(~smask);
86
-}
87
-
88
static inline TempOptInfo *ts_info(TCGTemp *ts)
133
{
89
{
134
}
90
return ts->state_ptr;
135
91
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
136
+void tcg_flush_jmp_cache(CPUState *cpu)
92
ti->is_const = true;
137
+{
93
ti->val = ts->val;
138
+}
94
ti->z_mask = ts->val;
139
+
95
- ti->s_mask = smask_from_value(ts->val);
140
int probe_access_flags(CPUArchState *env, target_ulong addr,
96
+ ti->s_mask = INT64_MIN >> clrsb64(ts->val);
141
MMUAccessType access_type, int mmu_idx,
97
} else {
142
bool nonfault, void **phost, uintptr_t retaddr)
98
ti->is_const = false;
143
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
99
ti->z_mask = -1;
144
index XXXXXXX..XXXXXXX 100644
100
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
145
--- a/accel/tcg/cpu-exec.c
101
*/
146
+++ b/accel/tcg/cpu-exec.c
102
if (i == 0) {
147
@@ -XXX,XX +XXX,XX @@
103
ts_info(ts)->z_mask = ctx->z_mask;
148
#include "sysemu/replay.h"
104
- ts_info(ts)->s_mask = ctx->s_mask;
149
#include "sysemu/tcg.h"
105
}
150
#include "exec/helper-proto.h"
151
+#include "tb-jmp-cache.h"
152
#include "tb-hash.h"
153
#include "tb-context.h"
154
#include "internal.h"
155
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
156
tcg_debug_assert(!(cflags & CF_INVALID));
157
158
hash = tb_jmp_cache_hash_func(pc);
159
- tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
160
+ tb = qatomic_rcu_read(&cpu->tb_jmp_cache->array[hash].tb);
161
162
if (likely(tb &&
163
tb->pc == pc &&
164
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
165
if (tb == NULL) {
166
return NULL;
167
}
168
- qatomic_set(&cpu->tb_jmp_cache[hash], tb);
169
+ qatomic_set(&cpu->tb_jmp_cache->array[hash].tb, tb);
170
return tb;
171
}
172
173
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
174
175
tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
176
if (tb == NULL) {
177
+ uint32_t h;
178
+
179
mmap_lock();
180
tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
181
mmap_unlock();
182
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
183
* We add the TB in the virtual pc hash table
184
* for the fast lookup
185
*/
186
- qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
187
+ h = tb_jmp_cache_hash_func(pc);
188
+ qatomic_set(&cpu->tb_jmp_cache->array[h].tb, tb);
189
}
190
191
#ifndef CONFIG_USER_ONLY
192
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
193
index XXXXXXX..XXXXXXX 100644
194
--- a/accel/tcg/cputlb.c
195
+++ b/accel/tcg/cputlb.c
196
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
197
198
static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
199
{
200
- unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
201
+ int i, i0 = tb_jmp_cache_hash_page(page_addr);
202
+ CPUJumpCache *jc = cpu->tb_jmp_cache;
203
204
for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
205
- qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
206
+ qatomic_set(&jc->array[i0 + i].tb, NULL);
207
}
106
}
208
}
107
}
209
108
@@ -XXX,XX +XXX,XX @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
210
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
109
* The passed s_mask may be augmented by z_mask.
211
110
*/
212
qemu_spin_unlock(&env_tlb(env)->c.lock);
111
static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
213
112
- uint64_t z_mask, uint64_t s_mask)
214
- cpu_tb_jmp_cache_clear(cpu);
113
+ uint64_t z_mask, int64_t s_mask)
215
+ tcg_flush_jmp_cache(cpu);
114
{
216
115
const TCGOpDef *def = &tcg_op_defs[op->opc];
217
if (to_clean == ALL_MMUIDX_BITS) {
116
TCGTemp *ts;
218
qatomic_set(&env_tlb(env)->c.full_flush_count,
117
TempOptInfo *ti;
219
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
118
+ int rep;
220
* longer to clear each entry individually than it will to clear it all.
119
120
/* Only single-output opcodes are supported here. */
121
tcg_debug_assert(def->nb_oargs == 1);
122
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
221
*/
123
*/
222
if (d.len >= (TARGET_PAGE_SIZE * TB_JMP_CACHE_SIZE)) {
124
if (ctx->type == TCG_TYPE_I32) {
223
- cpu_tb_jmp_cache_clear(cpu);
125
z_mask = (int32_t)z_mask;
224
+ tcg_flush_jmp_cache(cpu);
126
- s_mask |= MAKE_64BIT_MASK(32, 32);
225
return;
127
+ s_mask |= INT32_MIN;
226
}
128
}
227
129
228
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
130
if (z_mask == 0) {
229
index XXXXXXX..XXXXXXX 100644
131
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
230
--- a/accel/tcg/translate-all.c
132
231
+++ b/accel/tcg/translate-all.c
133
ti = ts_info(ts);
232
@@ -XXX,XX +XXX,XX @@
134
ti->z_mask = z_mask;
233
#include "sysemu/tcg.h"
135
- ti->s_mask = s_mask | smask_from_zmask(z_mask);
234
#include "qapi/error.h"
136
+
235
#include "hw/core/tcg-cpu-ops.h"
137
+ /* Canonicalize s_mask and incorporate data from z_mask. */
236
+#include "tb-jmp-cache.h"
138
+ rep = clz64(~s_mask);
237
#include "tb-hash.h"
139
+ rep = MAX(rep, clz64(z_mask));
238
#include "tb-context.h"
140
+ rep = MAX(rep - 1, 0);
239
#include "internal.h"
141
+ ti->s_mask = INT64_MIN >> rep;
240
@@ -XXX,XX +XXX,XX @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
142
+
143
return true;
144
}
145
146
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
147
148
ctx->z_mask = z_mask;
149
ctx->s_mask = s_mask;
150
- if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
151
+ if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
152
return true;
241
}
153
}
242
154
243
CPU_FOREACH(cpu) {
155
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
244
- cpu_tb_jmp_cache_clear(cpu);
156
s_mask |= MAKE_64BIT_MASK(len, 64 - len);
245
+ tcg_flush_jmp_cache(cpu);
157
ctx->s_mask = s_mask;
158
159
- if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
160
+ if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
161
return true;
246
}
162
}
247
163
248
qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
164
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
249
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
165
ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
250
/* remove the TB from the hash list */
166
251
h = tb_jmp_cache_hash_func(tb->pc);
167
s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
252
CPU_FOREACH(cpu) {
168
- ctx->s_mask = smask_from_smask(s_mask);
253
- if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) {
169
254
- qatomic_set(&cpu->tb_jmp_cache[h], NULL);
170
return fold_masks(ctx, op);
255
+ CPUJumpCache *jc = cpu->tb_jmp_cache;
256
+ if (qatomic_read(&jc->array[h].tb) == tb) {
257
+ qatomic_set(&jc->array[h].tb, NULL);
258
}
259
}
171
}
260
261
@@ -XXX,XX +XXX,XX @@ int page_unprotect(target_ulong address, uintptr_t pc)
262
}
263
#endif /* CONFIG_USER_ONLY */
264
265
+/*
266
+ * Called by generic code at e.g. cpu reset after cpu creation,
267
+ * therefore we must be prepared to allocate the jump cache.
268
+ */
269
+void tcg_flush_jmp_cache(CPUState *cpu)
270
+{
271
+ CPUJumpCache *jc = cpu->tb_jmp_cache;
272
+
273
+ if (likely(jc)) {
274
+ for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
275
+ qatomic_set(&jc->array[i].tb, NULL);
276
+ }
277
+ } else {
278
+ /* This should happen once during realize, and thus never race. */
279
+ jc = g_new0(CPUJumpCache, 1);
280
+ jc = qatomic_xchg(&cpu->tb_jmp_cache, jc);
281
+ assert(jc == NULL);
282
+ }
283
+}
284
+
285
/* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
286
void tcg_flush_softmmu_tlb(CPUState *cs)
287
{
288
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
289
index XXXXXXX..XXXXXXX 100644
290
--- a/hw/core/cpu-common.c
291
+++ b/hw/core/cpu-common.c
292
@@ -XXX,XX +XXX,XX @@ static void cpu_common_reset(DeviceState *dev)
293
cpu->cflags_next_tb = -1;
294
295
if (tcg_enabled()) {
296
- cpu_tb_jmp_cache_clear(cpu);
297
-
298
+ tcg_flush_jmp_cache(cpu);
299
tcg_flush_softmmu_tlb(cpu);
300
}
301
}
302
diff --git a/plugins/core.c b/plugins/core.c
303
index XXXXXXX..XXXXXXX 100644
304
--- a/plugins/core.c
305
+++ b/plugins/core.c
306
@@ -XXX,XX +XXX,XX @@ struct qemu_plugin_ctx *plugin_id_to_ctx_locked(qemu_plugin_id_t id)
307
static void plugin_cpu_update__async(CPUState *cpu, run_on_cpu_data data)
308
{
309
bitmap_copy(cpu->plugin_mask, &data.host_ulong, QEMU_PLUGIN_EV_MAX);
310
- cpu_tb_jmp_cache_clear(cpu);
311
+ tcg_flush_jmp_cache(cpu);
312
}
313
314
static void plugin_cpu_update__locked(gpointer k, gpointer v, gpointer udata)
315
diff --git a/trace/control-target.c b/trace/control-target.c
316
index XXXXXXX..XXXXXXX 100644
317
--- a/trace/control-target.c
318
+++ b/trace/control-target.c
319
@@ -XXX,XX +XXX,XX @@ static void trace_event_synchronize_vcpu_state_dynamic(
320
{
321
bitmap_copy(vcpu->trace_dstate, vcpu->trace_dstate_delayed,
322
CPU_TRACE_DSTATE_MAX_EVENTS);
323
- cpu_tb_jmp_cache_clear(vcpu);
324
+ tcg_flush_jmp_cache(vcpu);
325
}
326
327
void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
328
--
172
--
329
2.34.1
173
2.43.0
330
331
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 9 +++++----
5
1 file changed, 5 insertions(+), 4 deletions(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static void finish_ebb(OptContext *ctx)
12
remove_mem_copy_all(ctx);
13
}
14
15
-static void finish_folding(OptContext *ctx, TCGOp *op)
16
+static bool finish_folding(OptContext *ctx, TCGOp *op)
17
{
18
const TCGOpDef *def = &tcg_op_defs[op->opc];
19
int i, nb_oargs;
20
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
21
ts_info(ts)->z_mask = ctx->z_mask;
22
}
23
}
24
+ return true;
25
}
26
27
/*
28
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
29
fold_xi_to_x(ctx, op, 0)) {
30
return true;
31
}
32
- return false;
33
+ return finish_folding(ctx, op);
34
}
35
36
/* We cannot as yet do_constant_folding with vectors. */
37
@@ -XXX,XX +XXX,XX @@ static bool fold_add_vec(OptContext *ctx, TCGOp *op)
38
fold_xi_to_x(ctx, op, 0)) {
39
return true;
40
}
41
- return false;
42
+ return finish_folding(ctx, op);
43
}
44
45
static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
46
@@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
47
op->args[4] = arg_new_constant(ctx, bl);
48
op->args[5] = arg_new_constant(ctx, bh);
49
}
50
- return false;
51
+ return finish_folding(ctx, op);
52
}
53
54
static bool fold_add2(OptContext *ctx, TCGOp *op)
55
--
56
2.43.0
diff view generated by jsdifflib
1
From: Leandro Lupori <leandro.lupori@eldorado.org.br>
1
Introduce ti_is_const, ti_const_val, ti_is_const_val.
2
2
3
PowerPC64 processors handle direct branches better than indirect
4
ones, resulting in less stalled cycles and branch misses.
5
6
However, PPC's tb_target_set_jmp_target() was only using direct
7
branches for 16-bit jumps, while PowerPC64's unconditional branch
8
instructions are able to handle displacements of up to 26 bits.
9
To take advantage of this, now jumps whose displacements fit in
10
between 17 and 26 bits are also converted to direct branches.
11
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Leandro Lupori <leandro.lupori@eldorado.org.br>
14
[rth: Expanded some commentary.]
15
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
16
---
4
---
17
tcg/ppc/tcg-target.c.inc | 119 +++++++++++++++++++++++++++++----------
5
tcg/optimize.c | 20 +++++++++++++++++---
18
1 file changed, 88 insertions(+), 31 deletions(-)
6
1 file changed, 17 insertions(+), 3 deletions(-)
19
7
20
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
21
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
22
--- a/tcg/ppc/tcg-target.c.inc
10
--- a/tcg/optimize.c
23
+++ b/tcg/ppc/tcg-target.c.inc
11
+++ b/tcg/optimize.c
24
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
12
@@ -XXX,XX +XXX,XX @@ static inline TempOptInfo *arg_info(TCGArg arg)
25
tcg_out32(s, insn);
13
return ts_info(arg_temp(arg));
26
}
14
}
27
15
28
+static inline uint64_t make_pair(tcg_insn_unit i1, tcg_insn_unit i2)
16
+static inline bool ti_is_const(TempOptInfo *ti)
29
+{
17
+{
30
+ if (HOST_BIG_ENDIAN) {
18
+ return ti->is_const;
31
+ return (uint64_t)i1 << 32 | i2;
32
+ }
33
+ return (uint64_t)i2 << 32 | i1;
34
+}
19
+}
35
+
20
+
36
+static inline void ppc64_replace2(uintptr_t rx, uintptr_t rw,
21
+static inline uint64_t ti_const_val(TempOptInfo *ti)
37
+ tcg_insn_unit i0, tcg_insn_unit i1)
38
+{
22
+{
39
+#if TCG_TARGET_REG_BITS == 64
23
+ return ti->val;
40
+ qatomic_set((uint64_t *)rw, make_pair(i0, i1));
41
+ flush_idcache_range(rx, rw, 8);
42
+#else
43
+ qemu_build_not_reached();
44
+#endif
45
+}
24
+}
46
+
25
+
47
+static inline void ppc64_replace4(uintptr_t rx, uintptr_t rw,
26
+static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
48
+ tcg_insn_unit i0, tcg_insn_unit i1,
49
+ tcg_insn_unit i2, tcg_insn_unit i3)
50
+{
27
+{
51
+ uint64_t p[2];
28
+ return ti_is_const(ti) && ti_const_val(ti) == val;
52
+
53
+ p[!HOST_BIG_ENDIAN] = make_pair(i0, i1);
54
+ p[HOST_BIG_ENDIAN] = make_pair(i2, i3);
55
+
56
+ /*
57
+ * There's no convenient way to get the compiler to allocate a pair
58
+ * of registers at an even index, so copy into r6/r7 and clobber.
59
+ */
60
+ asm("mr %%r6, %1\n\t"
61
+ "mr %%r7, %2\n\t"
62
+ "stq %%r6, %0"
63
+ : "=Q"(*(__int128 *)rw) : "r"(p[0]), "r"(p[1]) : "r6", "r7");
64
+ flush_idcache_range(rx, rw, 16);
65
+}
29
+}
66
+
30
+
67
void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
31
static inline bool ts_is_const(TCGTemp *ts)
68
uintptr_t jmp_rw, uintptr_t addr)
69
{
32
{
70
- if (TCG_TARGET_REG_BITS == 64) {
33
- return ts_info(ts)->is_const;
71
- tcg_insn_unit i1, i2;
34
+ return ti_is_const(ts_info(ts));
72
- intptr_t tb_diff = addr - tc_ptr;
73
- intptr_t br_diff = addr - (jmp_rx + 4);
74
- uint64_t pair;
75
+ tcg_insn_unit i0, i1, i2, i3;
76
+ intptr_t tb_diff = addr - tc_ptr;
77
+ intptr_t br_diff = addr - (jmp_rx + 4);
78
+ intptr_t lo, hi;
79
80
- /* This does not exercise the range of the branch, but we do
81
- still need to be able to load the new value of TCG_REG_TB.
82
- But this does still happen quite often. */
83
- if (tb_diff == (int16_t)tb_diff) {
84
- i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
85
- i2 = B | (br_diff & 0x3fffffc);
86
- } else {
87
- intptr_t lo = (int16_t)tb_diff;
88
- intptr_t hi = (int32_t)(tb_diff - lo);
89
- assert(tb_diff == hi + lo);
90
- i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
91
- i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
92
- }
93
-#if HOST_BIG_ENDIAN
94
- pair = (uint64_t)i1 << 32 | i2;
95
-#else
96
- pair = (uint64_t)i2 << 32 | i1;
97
-#endif
98
-
99
- /* As per the enclosing if, this is ppc64. Avoid the _Static_assert
100
- within qatomic_set that would fail to build a ppc32 host. */
101
- qatomic_set__nocheck((uint64_t *)jmp_rw, pair);
102
- flush_idcache_range(jmp_rx, jmp_rw, 8);
103
- } else {
104
+ if (TCG_TARGET_REG_BITS == 32) {
105
intptr_t diff = addr - jmp_rx;
106
tcg_debug_assert(in_range_b(diff));
107
qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc));
108
flush_idcache_range(jmp_rx, jmp_rw, 4);
109
+ return;
110
}
111
+
112
+ /*
113
+ * For 16-bit displacements, we can use a single add + branch.
114
+ * This happens quite often.
115
+ */
116
+ if (tb_diff == (int16_t)tb_diff) {
117
+ i0 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
118
+ i1 = B | (br_diff & 0x3fffffc);
119
+ ppc64_replace2(jmp_rx, jmp_rw, i0, i1);
120
+ return;
121
+ }
122
+
123
+ lo = (int16_t)tb_diff;
124
+ hi = (int32_t)(tb_diff - lo);
125
+ assert(tb_diff == hi + lo);
126
+ i0 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
127
+ i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
128
+
129
+ /*
130
+ * Without stq from 2.07, we can only update two insns,
131
+ * and those must be the ones that load the target address.
132
+ */
133
+ if (!have_isa_2_07) {
134
+ ppc64_replace2(jmp_rx, jmp_rw, i0, i1);
135
+ return;
136
+ }
137
+
138
+ /*
139
+ * For 26-bit displacements, we can use a direct branch.
140
+ * Otherwise we still need the indirect branch, which we
141
+ * must restore after a potential direct branch write.
142
+ */
143
+ br_diff -= 4;
144
+ if (in_range_b(br_diff)) {
145
+ i2 = B | (br_diff & 0x3fffffc);
146
+ i3 = NOP;
147
+ } else {
148
+ i2 = MTSPR | RS(TCG_REG_TB) | CTR;
149
+ i3 = BCCTR | BO_ALWAYS;
150
+ }
151
+ ppc64_replace4(jmp_rx, jmp_rw, i0, i1, i2, i3);
152
}
35
}
153
36
154
static void tcg_out_call_int(TCGContext *s, int lk,
37
static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
155
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
38
{
156
if (s->tb_jmp_insn_offset) {
39
- TempOptInfo *ti = ts_info(ts);
157
/* Direct jump. */
40
- return ti->is_const && ti->val == val;
158
if (TCG_TARGET_REG_BITS == 64) {
41
+ return ti_is_const_val(ts_info(ts), val);
159
- /* Ensure the next insns are 8-byte aligned. */
42
}
160
- if ((uintptr_t)s->code_ptr & 7) {
43
161
+ /* Ensure the next insns are 8 or 16-byte aligned. */
44
static inline bool arg_is_const(TCGArg arg)
162
+ while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) {
163
tcg_out32(s, NOP);
164
}
165
s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
166
--
45
--
167
2.34.1
46
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Sink mask computation below fold_affected_mask early exit.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 30 ++++++++++++++++--------------
8
1 file changed, 16 insertions(+), 14 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_add2(OptContext *ctx, TCGOp *op)
15
16
static bool fold_and(OptContext *ctx, TCGOp *op)
17
{
18
- uint64_t z1, z2;
19
+ uint64_t z1, z2, z_mask, s_mask;
20
+ TempOptInfo *t1, *t2;
21
22
if (fold_const2_commutative(ctx, op) ||
23
fold_xi_to_i(ctx, op, 0) ||
24
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
25
return true;
26
}
27
28
- z1 = arg_info(op->args[1])->z_mask;
29
- z2 = arg_info(op->args[2])->z_mask;
30
- ctx->z_mask = z1 & z2;
31
-
32
- /*
33
- * Sign repetitions are perforce all identical, whether they are 1 or 0.
34
- * Bitwise operations preserve the relative quantity of the repetitions.
35
- */
36
- ctx->s_mask = arg_info(op->args[1])->s_mask
37
- & arg_info(op->args[2])->s_mask;
38
+ t1 = arg_info(op->args[1]);
39
+ t2 = arg_info(op->args[2]);
40
+ z1 = t1->z_mask;
41
+ z2 = t2->z_mask;
42
43
/*
44
* Known-zeros does not imply known-ones. Therefore unless
45
* arg2 is constant, we can't infer affected bits from it.
46
*/
47
- if (arg_is_const(op->args[2]) &&
48
- fold_affected_mask(ctx, op, z1 & ~z2)) {
49
+ if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) {
50
return true;
51
}
52
53
- return fold_masks(ctx, op);
54
+ z_mask = z1 & z2;
55
+
56
+ /*
57
+ * Sign repetitions are perforce all identical, whether they are 1 or 0.
58
+ * Bitwise operations preserve the relative quantity of the repetitions.
59
+ */
60
+ s_mask = t1->s_mask & t2->s_mask;
61
+
62
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
63
}
64
65
static bool fold_andc(OptContext *ctx, TCGOp *op)
66
--
67
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Avoid double inversion of the value of second const operand.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 21 +++++++++++----------
8
1 file changed, 11 insertions(+), 10 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
15
16
static bool fold_andc(OptContext *ctx, TCGOp *op)
17
{
18
- uint64_t z1;
19
+ uint64_t z_mask, s_mask;
20
+ TempOptInfo *t1, *t2;
21
22
if (fold_const2(ctx, op) ||
23
fold_xx_to_i(ctx, op, 0) ||
24
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
25
return true;
26
}
27
28
- z1 = arg_info(op->args[1])->z_mask;
29
+ t1 = arg_info(op->args[1]);
30
+ t2 = arg_info(op->args[2]);
31
+ z_mask = t1->z_mask;
32
33
/*
34
* Known-zeros does not imply known-ones. Therefore unless
35
* arg2 is constant, we can't infer anything from it.
36
*/
37
- if (arg_is_const(op->args[2])) {
38
- uint64_t z2 = ~arg_info(op->args[2])->z_mask;
39
- if (fold_affected_mask(ctx, op, z1 & ~z2)) {
40
+ if (ti_is_const(t2)) {
41
+ uint64_t v2 = ti_const_val(t2);
42
+ if (fold_affected_mask(ctx, op, z_mask & v2)) {
43
return true;
44
}
45
- z1 &= z2;
46
+ z_mask &= ~v2;
47
}
48
- ctx->z_mask = z1;
49
50
- ctx->s_mask = arg_info(op->args[1])->s_mask
51
- & arg_info(op->args[2])->s_mask;
52
- return fold_masks(ctx, op);
53
+ s_mask = t1->s_mask & t2->s_mask;
54
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
55
}
56
57
static bool fold_brcond(OptContext *ctx, TCGOp *op)
58
--
59
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Always set s_mask along the BSWAP_OS path, since the result is
3
being explicitly sign-extended.
1
4
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 21 ++++++++++-----------
9
1 file changed, 10 insertions(+), 11 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
16
static bool fold_bswap(OptContext *ctx, TCGOp *op)
17
{
18
uint64_t z_mask, s_mask, sign;
19
+ TempOptInfo *t1 = arg_info(op->args[1]);
20
21
- if (arg_is_const(op->args[1])) {
22
- uint64_t t = arg_info(op->args[1])->val;
23
-
24
- t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
25
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
26
+ if (ti_is_const(t1)) {
27
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
28
+ do_constant_folding(op->opc, ctx->type,
29
+ ti_const_val(t1),
30
+ op->args[2]));
31
}
32
33
- z_mask = arg_info(op->args[1])->z_mask;
34
-
35
+ z_mask = t1->z_mask;
36
switch (op->opc) {
37
case INDEX_op_bswap16_i32:
38
case INDEX_op_bswap16_i64:
39
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
40
/* If the sign bit may be 1, force all the bits above to 1. */
41
if (z_mask & sign) {
42
z_mask |= sign;
43
- s_mask = sign << 1;
44
}
45
+ /* The value and therefore s_mask is explicitly sign-extended. */
46
+ s_mask = sign;
47
break;
48
default:
49
/* The high bits are undefined: force all bits above the sign to 1. */
50
z_mask |= sign << 1;
51
break;
52
}
53
- ctx->z_mask = z_mask;
54
- ctx->s_mask = s_mask;
55
56
- return fold_masks(ctx, op);
57
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
58
}
59
60
static bool fold_call(OptContext *ctx, TCGOp *op)
61
--
62
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Compute s_mask from the union of the maximum count and the
3
op2 fallback for op1 being zero.
1
4
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 15 ++++++++++-----
9
1 file changed, 10 insertions(+), 5 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
16
17
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
18
{
19
- uint64_t z_mask;
20
+ uint64_t z_mask, s_mask;
21
+ TempOptInfo *t1 = arg_info(op->args[1]);
22
+ TempOptInfo *t2 = arg_info(op->args[2]);
23
24
- if (arg_is_const(op->args[1])) {
25
- uint64_t t = arg_info(op->args[1])->val;
26
+ if (ti_is_const(t1)) {
27
+ uint64_t t = ti_const_val(t1);
28
29
if (t != 0) {
30
t = do_constant_folding(op->opc, ctx->type, t, 0);
31
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
32
default:
33
g_assert_not_reached();
34
}
35
- ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
36
- return false;
37
+ s_mask = ~z_mask;
38
+ z_mask |= t2->z_mask;
39
+ s_mask &= t2->s_mask;
40
+
41
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
42
}
43
44
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
45
--
46
2.43.0
diff view generated by jsdifflib
New patch
1
Add fold_masks_z as a trivial wrapper around fold_masks_zs.
2
Avoid the use of the OptContext slots.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 13 ++++++++++---
8
1 file changed, 10 insertions(+), 3 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
15
return true;
16
}
17
18
+static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
19
+{
20
+ return fold_masks_zs(ctx, op, z_mask, 0);
21
+}
22
+
23
static bool fold_masks(OptContext *ctx, TCGOp *op)
24
{
25
return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
26
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
27
28
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
29
{
30
+ uint64_t z_mask;
31
+
32
if (fold_const1(ctx, op)) {
33
return true;
34
}
35
36
switch (ctx->type) {
37
case TCG_TYPE_I32:
38
- ctx->z_mask = 32 | 31;
39
+ z_mask = 32 | 31;
40
break;
41
case TCG_TYPE_I64:
42
- ctx->z_mask = 64 | 63;
43
+ z_mask = 64 | 63;
44
break;
45
default:
46
g_assert_not_reached();
47
}
48
- return false;
49
+ return fold_masks_z(ctx, op, z_mask);
50
}
51
52
static bool fold_deposit(OptContext *ctx, TCGOp *op)
53
--
54
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
When we fold to and, use fold_and.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 35 +++++++++++++++++------------------
8
1 file changed, 17 insertions(+), 18 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
15
16
static bool fold_deposit(OptContext *ctx, TCGOp *op)
17
{
18
+ TempOptInfo *t1 = arg_info(op->args[1]);
19
+ TempOptInfo *t2 = arg_info(op->args[2]);
20
+ int ofs = op->args[3];
21
+ int len = op->args[4];
22
TCGOpcode and_opc;
23
+ uint64_t z_mask;
24
25
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
26
- uint64_t t1 = arg_info(op->args[1])->val;
27
- uint64_t t2 = arg_info(op->args[2])->val;
28
-
29
- t1 = deposit64(t1, op->args[3], op->args[4], t2);
30
- return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
31
+ if (ti_is_const(t1) && ti_is_const(t2)) {
32
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
33
+ deposit64(ti_const_val(t1), ofs, len,
34
+ ti_const_val(t2)));
35
}
36
37
switch (ctx->type) {
38
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
39
}
40
41
/* Inserting a value into zero at offset 0. */
42
- if (arg_is_const_val(op->args[1], 0) && op->args[3] == 0) {
43
- uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
44
+ if (ti_is_const_val(t1, 0) && ofs == 0) {
45
+ uint64_t mask = MAKE_64BIT_MASK(0, len);
46
47
op->opc = and_opc;
48
op->args[1] = op->args[2];
49
op->args[2] = arg_new_constant(ctx, mask);
50
- ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
51
- return false;
52
+ return fold_and(ctx, op);
53
}
54
55
/* Inserting zero into a value. */
56
- if (arg_is_const_val(op->args[2], 0)) {
57
- uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
58
+ if (ti_is_const_val(t2, 0)) {
59
+ uint64_t mask = deposit64(-1, ofs, len, 0);
60
61
op->opc = and_opc;
62
op->args[2] = arg_new_constant(ctx, mask);
63
- ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
64
- return false;
65
+ return fold_and(ctx, op);
66
}
67
68
- ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
69
- op->args[3], op->args[4],
70
- arg_info(op->args[2])->z_mask);
71
- return false;
72
+ z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
73
+ return fold_masks_z(ctx, op, z_mask);
74
}
75
76
static bool fold_divide(OptContext *ctx, TCGOp *op)
77
--
78
2.43.0
diff view generated by jsdifflib
New patch
1
The input which overlaps the sign bit of the output can
2
have its input s_mask propagated to the output s_mask.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 14 ++++++++++++--
8
1 file changed, 12 insertions(+), 2 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
15
TempOptInfo *t2 = arg_info(op->args[2]);
16
int ofs = op->args[3];
17
int len = op->args[4];
18
+ int width;
19
TCGOpcode and_opc;
20
- uint64_t z_mask;
21
+ uint64_t z_mask, s_mask;
22
23
if (ti_is_const(t1) && ti_is_const(t2)) {
24
return tcg_opt_gen_movi(ctx, op, op->args[0],
25
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
26
switch (ctx->type) {
27
case TCG_TYPE_I32:
28
and_opc = INDEX_op_and_i32;
29
+ width = 32;
30
break;
31
case TCG_TYPE_I64:
32
and_opc = INDEX_op_and_i64;
33
+ width = 64;
34
break;
35
default:
36
g_assert_not_reached();
37
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
38
return fold_and(ctx, op);
39
}
40
41
+ /* The s_mask from the top portion of the deposit is still valid. */
42
+ if (ofs + len == width) {
43
+ s_mask = t2->s_mask << ofs;
44
+ } else {
45
+ s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
46
+ }
47
+
48
z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
49
- return fold_masks_z(ctx, op, z_mask);
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
51
}
52
53
static bool fold_divide(OptContext *ctx, TCGOp *op)
54
--
55
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
12
fold_xi_to_x(ctx, op, 1)) {
13
return true;
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_dup(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 4 ++--
5
1 file changed, 2 insertions(+), 2 deletions(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_dup(OptContext *ctx, TCGOp *op)
12
t = dup_const(TCGOP_VECE(op), t);
13
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_dup2(OptContext *ctx, TCGOp *op)
20
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
21
op->opc = INDEX_op_dup_vec;
22
TCGOP_VECE(op) = MO_32;
23
}
24
- return false;
25
+ return finish_folding(ctx, op);
26
}
27
28
static bool fold_eqv(OptContext *ctx, TCGOp *op)
29
--
30
2.43.0
diff view generated by jsdifflib
1
Add an interface to return the CPUTLBEntryFull struct
1
Add fold_masks_s as a trivial wrapper around fold_masks_zs.
2
that goes with the lookup. The result is not intended
2
Avoid the use of the OptContext slots.
3
to be valid across multiple lookups, so the user must
4
use the results immediately.
5
3
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
6
---
11
include/exec/exec-all.h | 15 +++++++++++++
7
tcg/optimize.c | 13 ++++++++++---
12
include/qemu/typedefs.h | 1 +
8
1 file changed, 10 insertions(+), 3 deletions(-)
13
accel/tcg/cputlb.c | 47 +++++++++++++++++++++++++----------------
14
3 files changed, 45 insertions(+), 18 deletions(-)
15
9
16
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
17
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
18
--- a/include/exec/exec-all.h
12
--- a/tcg/optimize.c
19
+++ b/include/exec/exec-all.h
13
+++ b/tcg/optimize.c
20
@@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr,
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
21
MMUAccessType access_type, int mmu_idx,
15
return fold_masks_zs(ctx, op, z_mask, 0);
22
bool nonfault, void **phost, uintptr_t retaddr);
23
24
+#ifndef CONFIG_USER_ONLY
25
+/**
26
+ * probe_access_full:
27
+ * Like probe_access_flags, except also return into @pfull.
28
+ *
29
+ * The CPUTLBEntryFull structure returned via @pfull is transient
30
+ * and must be consumed or copied immediately, before any further
31
+ * access or changes to TLB @mmu_idx.
32
+ */
33
+int probe_access_full(CPUArchState *env, target_ulong addr,
34
+ MMUAccessType access_type, int mmu_idx,
35
+ bool nonfault, void **phost,
36
+ CPUTLBEntryFull **pfull, uintptr_t retaddr);
37
+#endif
38
+
39
#define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */
40
41
/* Estimated block size for TB allocation. */
42
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/include/qemu/typedefs.h
45
+++ b/include/qemu/typedefs.h
46
@@ -XXX,XX +XXX,XX @@ typedef struct ConfidentialGuestSupport ConfidentialGuestSupport;
47
typedef struct CPUAddressSpace CPUAddressSpace;
48
typedef struct CPUArchState CPUArchState;
49
typedef struct CPUState CPUState;
50
+typedef struct CPUTLBEntryFull CPUTLBEntryFull;
51
typedef struct DeviceListener DeviceListener;
52
typedef struct DeviceState DeviceState;
53
typedef struct DirtyBitmapSnapshot DirtyBitmapSnapshot;
54
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
55
index XXXXXXX..XXXXXXX 100644
56
--- a/accel/tcg/cputlb.c
57
+++ b/accel/tcg/cputlb.c
58
@@ -XXX,XX +XXX,XX @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
59
static int probe_access_internal(CPUArchState *env, target_ulong addr,
60
int fault_size, MMUAccessType access_type,
61
int mmu_idx, bool nonfault,
62
- void **phost, uintptr_t retaddr)
63
+ void **phost, CPUTLBEntryFull **pfull,
64
+ uintptr_t retaddr)
65
{
66
uintptr_t index = tlb_index(env, mmu_idx, addr);
67
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
68
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
69
mmu_idx, nonfault, retaddr)) {
70
/* Non-faulting page table read failed. */
71
*phost = NULL;
72
+ *pfull = NULL;
73
return TLB_INVALID_MASK;
74
}
75
76
/* TLB resize via tlb_fill may have moved the entry. */
77
+ index = tlb_index(env, mmu_idx, addr);
78
entry = tlb_entry(env, mmu_idx, addr);
79
80
/*
81
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
82
}
83
flags &= tlb_addr;
84
85
+ *pfull = &env_tlb(env)->d[mmu_idx].fulltlb[index];
86
+
87
/* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */
88
if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
89
*phost = NULL;
90
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
91
return flags;
92
}
16
}
93
17
94
-int probe_access_flags(CPUArchState *env, target_ulong addr,
18
+static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
95
- MMUAccessType access_type, int mmu_idx,
96
- bool nonfault, void **phost, uintptr_t retaddr)
97
+int probe_access_full(CPUArchState *env, target_ulong addr,
98
+ MMUAccessType access_type, int mmu_idx,
99
+ bool nonfault, void **phost, CPUTLBEntryFull **pfull,
100
+ uintptr_t retaddr)
101
{
102
- int flags;
103
-
104
- flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
105
- nonfault, phost, retaddr);
106
+ int flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
107
+ nonfault, phost, pfull, retaddr);
108
109
/* Handle clean RAM pages. */
110
if (unlikely(flags & TLB_NOTDIRTY)) {
111
- uintptr_t index = tlb_index(env, mmu_idx, addr);
112
- CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
113
-
114
- notdirty_write(env_cpu(env), addr, 1, full, retaddr);
115
+ notdirty_write(env_cpu(env), addr, 1, *pfull, retaddr);
116
flags &= ~TLB_NOTDIRTY;
117
}
118
119
return flags;
120
}
121
122
+int probe_access_flags(CPUArchState *env, target_ulong addr,
123
+ MMUAccessType access_type, int mmu_idx,
124
+ bool nonfault, void **phost, uintptr_t retaddr)
125
+{
19
+{
126
+ CPUTLBEntryFull *full;
20
+ return fold_masks_zs(ctx, op, -1, s_mask);
127
+
128
+ return probe_access_full(env, addr, access_type, mmu_idx,
129
+ nonfault, phost, &full, retaddr);
130
+}
21
+}
131
+
22
+
132
void *probe_access(CPUArchState *env, target_ulong addr, int size,
23
static bool fold_masks(OptContext *ctx, TCGOp *op)
133
MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
134
{
24
{
135
+ CPUTLBEntryFull *full;
25
return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
136
void *host;
26
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
137
int flags;
27
138
28
static bool fold_eqv(OptContext *ctx, TCGOp *op)
139
g_assert(-(addr | TARGET_PAGE_MASK) >= size);
29
{
140
30
+ uint64_t s_mask;
141
flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
31
+
142
- false, &host, retaddr);
32
if (fold_const2_commutative(ctx, op) ||
143
+ false, &host, &full, retaddr);
33
fold_xi_to_x(ctx, op, -1) ||
144
34
fold_xi_to_not(ctx, op, 0)) {
145
/* Per the interface, size == 0 merely faults the access. */
35
return true;
146
if (size == 0) {
147
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
148
}
36
}
149
37
150
if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
38
- ctx->s_mask = arg_info(op->args[1])->s_mask
151
- uintptr_t index = tlb_index(env, mmu_idx, addr);
39
- & arg_info(op->args[2])->s_mask;
152
- CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
40
- return false;
153
-
41
+ s_mask = arg_info(op->args[1])->s_mask
154
/* Handle watchpoints. */
42
+ & arg_info(op->args[2])->s_mask;
155
if (flags & TLB_WATCHPOINT) {
43
+ return fold_masks_s(ctx, op, s_mask);
156
int wp_access = (access_type == MMU_DATA_STORE
44
}
157
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
45
158
void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
46
static bool fold_extract(OptContext *ctx, TCGOp *op)
159
MMUAccessType access_type, int mmu_idx)
160
{
161
+ CPUTLBEntryFull *full;
162
void *host;
163
int flags;
164
165
flags = probe_access_internal(env, addr, 0, access_type,
166
- mmu_idx, true, &host, 0);
167
+ mmu_idx, true, &host, &full, 0);
168
169
/* No combination of flags are expected by the caller. */
170
return flags ? NULL : host;
171
@@ -XXX,XX +XXX,XX @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
172
tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
173
void **hostp)
174
{
175
+ CPUTLBEntryFull *full;
176
void *p;
177
178
(void)probe_access_internal(env, addr, 1, MMU_INST_FETCH,
179
- cpu_mmu_index(env, true), false, &p, 0);
180
+ cpu_mmu_index(env, true), false, &p, &full, 0);
181
if (p == NULL) {
182
return -1;
183
}
184
--
47
--
185
2.34.1
48
2.43.0
186
187
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 15 ++++++---------
7
1 file changed, 6 insertions(+), 9 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
14
static bool fold_extract(OptContext *ctx, TCGOp *op)
15
{
16
uint64_t z_mask_old, z_mask;
17
+ TempOptInfo *t1 = arg_info(op->args[1]);
18
int pos = op->args[2];
19
int len = op->args[3];
20
21
- if (arg_is_const(op->args[1])) {
22
- uint64_t t;
23
-
24
- t = arg_info(op->args[1])->val;
25
- t = extract64(t, pos, len);
26
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
27
+ if (ti_is_const(t1)) {
28
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
29
+ extract64(ti_const_val(t1), pos, len));
30
}
31
32
- z_mask_old = arg_info(op->args[1])->z_mask;
33
+ z_mask_old = t1->z_mask;
34
z_mask = extract64(z_mask_old, pos, len);
35
if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
36
return true;
37
}
38
- ctx->z_mask = z_mask;
39
40
- return fold_masks(ctx, op);
41
+ return fold_masks_z(ctx, op, z_mask);
42
}
43
44
static bool fold_extract2(OptContext *ctx, TCGOp *op)
45
--
46
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
12
}
13
return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_exts(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
Explicitly sign-extend z_mask instead of doing that manually.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 29 ++++++++++++-----------------
8
1 file changed, 12 insertions(+), 17 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
15
16
static bool fold_exts(OptContext *ctx, TCGOp *op)
17
{
18
- uint64_t s_mask_old, s_mask, z_mask, sign;
19
+ uint64_t s_mask_old, s_mask, z_mask;
20
bool type_change = false;
21
+ TempOptInfo *t1;
22
23
if (fold_const1(ctx, op)) {
24
return true;
25
}
26
27
- z_mask = arg_info(op->args[1])->z_mask;
28
- s_mask = arg_info(op->args[1])->s_mask;
29
+ t1 = arg_info(op->args[1]);
30
+ z_mask = t1->z_mask;
31
+ s_mask = t1->s_mask;
32
s_mask_old = s_mask;
33
34
switch (op->opc) {
35
CASE_OP_32_64(ext8s):
36
- sign = INT8_MIN;
37
- z_mask = (uint8_t)z_mask;
38
+ s_mask |= INT8_MIN;
39
+ z_mask = (int8_t)z_mask;
40
break;
41
CASE_OP_32_64(ext16s):
42
- sign = INT16_MIN;
43
- z_mask = (uint16_t)z_mask;
44
+ s_mask |= INT16_MIN;
45
+ z_mask = (int16_t)z_mask;
46
break;
47
case INDEX_op_ext_i32_i64:
48
type_change = true;
49
QEMU_FALLTHROUGH;
50
case INDEX_op_ext32s_i64:
51
- sign = INT32_MIN;
52
- z_mask = (uint32_t)z_mask;
53
+ s_mask |= INT32_MIN;
54
+ z_mask = (int32_t)z_mask;
55
break;
56
default:
57
g_assert_not_reached();
58
}
59
60
- if (z_mask & sign) {
61
- z_mask |= sign;
62
- }
63
- s_mask |= sign << 1;
64
-
65
- ctx->z_mask = z_mask;
66
- ctx->s_mask = s_mask;
67
if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
68
return true;
69
}
70
71
- return fold_masks(ctx, op);
72
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
73
}
74
75
static bool fold_extu(OptContext *ctx, TCGOp *op)
76
--
77
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 4 ++--
7
1 file changed, 2 insertions(+), 2 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
14
g_assert_not_reached();
15
}
16
17
- ctx->z_mask = z_mask;
18
if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
19
return true;
20
}
21
- return fold_masks(ctx, op);
22
+
23
+ return fold_masks_z(ctx, op, z_mask);
24
}
25
26
static bool fold_mb(OptContext *ctx, TCGOp *op)
27
--
28
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 19 +++++++++++--------
7
1 file changed, 11 insertions(+), 8 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
14
15
static bool fold_movcond(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t z_mask, s_mask;
18
+ TempOptInfo *tt, *ft;
19
int i;
20
21
/* If true and false values are the same, eliminate the cmp. */
22
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
23
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
24
}
25
26
- ctx->z_mask = arg_info(op->args[3])->z_mask
27
- | arg_info(op->args[4])->z_mask;
28
- ctx->s_mask = arg_info(op->args[3])->s_mask
29
- & arg_info(op->args[4])->s_mask;
30
+ tt = arg_info(op->args[3]);
31
+ ft = arg_info(op->args[4]);
32
+ z_mask = tt->z_mask | ft->z_mask;
33
+ s_mask = tt->s_mask & ft->s_mask;
34
35
- if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
36
- uint64_t tv = arg_info(op->args[3])->val;
37
- uint64_t fv = arg_info(op->args[4])->val;
38
+ if (ti_is_const(tt) && ti_is_const(ft)) {
39
+ uint64_t tv = ti_const_val(tt);
40
+ uint64_t fv = ti_const_val(ft);
41
TCGOpcode opc, negopc = 0;
42
TCGCond cond = op->args[5];
43
44
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
45
}
46
}
47
}
48
- return false;
49
+
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
51
}
52
53
static bool fold_mul(OptContext *ctx, TCGOp *op)
54
--
55
2.43.0
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
3
Before: 35.912 s ± 0.168 s
4
After: 35.565 s ± 0.087 s
5
6
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20220811151413.3350684-5-alex.bennee@linaro.org>
9
Signed-off-by: Cédric Le Goater <clg@kaod.org>
10
Message-Id: <20220923084803.498337-5-clg@kaod.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
3
---
13
accel/tcg/cputlb.c | 15 ++++++---------
4
tcg/optimize.c | 6 +++---
14
1 file changed, 6 insertions(+), 9 deletions(-)
5
1 file changed, 3 insertions(+), 3 deletions(-)
15
6
16
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
17
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
18
--- a/accel/tcg/cputlb.c
9
--- a/tcg/optimize.c
19
+++ b/accel/tcg/cputlb.c
10
+++ b/tcg/optimize.c
20
@@ -XXX,XX +XXX,XX @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
11
@@ -XXX,XX +XXX,XX @@ static bool fold_mul(OptContext *ctx, TCGOp *op)
21
static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
12
fold_xi_to_x(ctx, op, 1)) {
22
MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
13
return true;
23
{
14
}
24
- CPUClass *cc = CPU_GET_CLASS(cpu);
15
- return false;
25
bool ok;
16
+ return finish_folding(ctx, op);
26
27
/*
28
* This is not a probe, so only valid return is success; failure
29
* should result in exception + longjmp to the cpu loop.
30
*/
31
- ok = cc->tcg_ops->tlb_fill(cpu, addr, size,
32
- access_type, mmu_idx, false, retaddr);
33
+ ok = cpu->cc->tcg_ops->tlb_fill(cpu, addr, size,
34
+ access_type, mmu_idx, false, retaddr);
35
assert(ok);
36
}
17
}
37
18
38
@@ -XXX,XX +XXX,XX @@ static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
19
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
39
MMUAccessType access_type,
20
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
40
int mmu_idx, uintptr_t retaddr)
21
fold_xi_to_i(ctx, op, 0)) {
41
{
22
return true;
42
- CPUClass *cc = CPU_GET_CLASS(cpu);
23
}
43
-
24
- return false;
44
- cc->tcg_ops->do_unaligned_access(cpu, addr, access_type, mmu_idx, retaddr);
25
+ return finish_folding(ctx, op);
45
+ cpu->cc->tcg_ops->do_unaligned_access(cpu, addr, access_type,
46
+ mmu_idx, retaddr);
47
}
26
}
48
27
49
static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
28
static bool fold_multiply2(OptContext *ctx, TCGOp *op)
50
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
29
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
51
if (!tlb_hit_page(tlb_addr, page_addr)) {
30
tcg_opt_gen_movi(ctx, op2, rh, h);
52
if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
31
return true;
53
CPUState *cs = env_cpu(env);
32
}
54
- CPUClass *cc = CPU_GET_CLASS(cs);
33
- return false;
55
34
+ return finish_folding(ctx, op);
56
- if (!cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
35
}
57
- mmu_idx, nonfault, retaddr)) {
36
58
+ if (!cs->cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
37
static bool fold_nand(OptContext *ctx, TCGOp *op)
59
+ mmu_idx, nonfault, retaddr)) {
60
/* Non-faulting page table read failed. */
61
*phost = NULL;
62
return TLB_INVALID_MASK;
63
--
38
--
64
2.34.1
39
2.43.0
65
66
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 8 +++++---
7
1 file changed, 5 insertions(+), 3 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
14
15
static bool fold_nand(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t s_mask;
18
+
19
if (fold_const2_commutative(ctx, op) ||
20
fold_xi_to_not(ctx, op, -1)) {
21
return true;
22
}
23
24
- ctx->s_mask = arg_info(op->args[1])->s_mask
25
- & arg_info(op->args[2])->s_mask;
26
- return false;
27
+ s_mask = arg_info(op->args[1])->s_mask
28
+ & arg_info(op->args[2])->s_mask;
29
+ return fold_masks_s(ctx, op, s_mask);
30
}
31
32
static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
33
--
34
2.43.0
diff view generated by jsdifflib
1
When PAGE_WRITE_INV is set when calling tlb_set_page,
1
Avoid the use of the OptContext slots.
2
we immediately set TLB_INVALID_MASK in order to force
3
tlb_fill to be called on the next lookup. Here in
4
probe_access_internal, we have just called tlb_fill
5
and eliminated true misses, thus the lookup must be valid.
6
2
7
This allows us to remove a warning comment from s390x.
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
8
There doesn't seem to be a reason to change the code though.
9
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Reviewed-by: David Hildenbrand <david@redhat.com>
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
---
5
---
15
accel/tcg/cputlb.c | 10 +++++++++-
6
tcg/optimize.c | 9 ++-------
16
target/s390x/tcg/mem_helper.c | 4 ----
7
1 file changed, 2 insertions(+), 7 deletions(-)
17
2 files changed, 9 insertions(+), 5 deletions(-)
18
8
19
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
20
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
21
--- a/accel/tcg/cputlb.c
11
--- a/tcg/optimize.c
22
+++ b/accel/tcg/cputlb.c
12
+++ b/tcg/optimize.c
23
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
13
@@ -XXX,XX +XXX,XX @@ static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
24
}
14
{
25
tlb_addr = tlb_read_ofs(entry, elt_ofs);
15
/* Set to 1 all bits to the left of the rightmost. */
26
16
uint64_t z_mask = arg_info(op->args[1])->z_mask;
27
+ flags = TLB_FLAGS_MASK;
17
- ctx->z_mask = -(z_mask & -z_mask);
28
page_addr = addr & TARGET_PAGE_MASK;
18
+ z_mask = -(z_mask & -z_mask);
29
if (!tlb_hit_page(tlb_addr, page_addr)) {
30
if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
31
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
32
33
/* TLB resize via tlb_fill may have moved the entry. */
34
entry = tlb_entry(env, mmu_idx, addr);
35
+
36
+ /*
37
+ * With PAGE_WRITE_INV, we set TLB_INVALID_MASK immediately,
38
+ * to force the next access through tlb_fill. We've just
39
+ * called tlb_fill, so we know that this entry *is* valid.
40
+ */
41
+ flags &= ~TLB_INVALID_MASK;
42
}
43
tlb_addr = tlb_read_ofs(entry, elt_ofs);
44
}
45
- flags = tlb_addr & TLB_FLAGS_MASK;
46
+ flags &= tlb_addr;
47
48
/* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */
49
if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
50
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/target/s390x/tcg/mem_helper.c
53
+++ b/target/s390x/tcg/mem_helper.c
54
@@ -XXX,XX +XXX,XX @@ static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
55
#else
56
int flags;
57
19
58
- /*
20
- /*
59
- * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
21
- * Because of fold_sub_to_neg, we want to always return true,
60
- * to detect if there was an exception during tlb_fill().
22
- * via finish_folding.
61
- */
23
- */
62
env->tlb_fill_exc = 0;
24
- finish_folding(ctx, op);
63
flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
25
- return true;
64
ra);
26
+ return fold_masks_z(ctx, op, z_mask);
27
}
28
29
static bool fold_neg(OptContext *ctx, TCGOp *op)
65
--
30
--
66
2.34.1
31
2.43.0
67
68
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 8 +++++---
7
1 file changed, 5 insertions(+), 3 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
14
15
static bool fold_nor(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t s_mask;
18
+
19
if (fold_const2_commutative(ctx, op) ||
20
fold_xi_to_not(ctx, op, 0)) {
21
return true;
22
}
23
24
- ctx->s_mask = arg_info(op->args[1])->s_mask
25
- & arg_info(op->args[2])->s_mask;
26
- return false;
27
+ s_mask = arg_info(op->args[1])->s_mask
28
+ & arg_info(op->args[2])->s_mask;
29
+ return fold_masks_s(ctx, op, s_mask);
30
}
31
32
static bool fold_not(OptContext *ctx, TCGOp *op)
33
--
34
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 7 +------
7
1 file changed, 1 insertion(+), 6 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
14
if (fold_const1(ctx, op)) {
15
return true;
16
}
17
-
18
- ctx->s_mask = arg_info(op->args[1])->s_mask;
19
-
20
- /* Because of fold_to_not, we want to always return true, via finish. */
21
- finish_folding(ctx, op);
22
- return true;
23
+ return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask);
24
}
25
26
static bool fold_or(OptContext *ctx, TCGOp *op)
27
--
28
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 13 ++++++++-----
7
1 file changed, 8 insertions(+), 5 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
14
15
static bool fold_or(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t z_mask, s_mask;
18
+ TempOptInfo *t1, *t2;
19
+
20
if (fold_const2_commutative(ctx, op) ||
21
fold_xi_to_x(ctx, op, 0) ||
22
fold_xx_to_x(ctx, op)) {
23
return true;
24
}
25
26
- ctx->z_mask = arg_info(op->args[1])->z_mask
27
- | arg_info(op->args[2])->z_mask;
28
- ctx->s_mask = arg_info(op->args[1])->s_mask
29
- & arg_info(op->args[2])->s_mask;
30
- return fold_masks(ctx, op);
31
+ t1 = arg_info(op->args[1]);
32
+ t2 = arg_info(op->args[2]);
33
+ z_mask = t1->z_mask | t2->z_mask;
34
+ s_mask = t1->s_mask & t2->s_mask;
35
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
36
}
37
38
static bool fold_orc(OptContext *ctx, TCGOp *op)
39
--
40
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 8 +++++---
7
1 file changed, 5 insertions(+), 3 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
14
15
static bool fold_orc(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t s_mask;
18
+
19
if (fold_const2(ctx, op) ||
20
fold_xx_to_i(ctx, op, -1) ||
21
fold_xi_to_x(ctx, op, -1) ||
22
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
23
return true;
24
}
25
26
- ctx->s_mask = arg_info(op->args[1])->s_mask
27
- & arg_info(op->args[2])->s_mask;
28
- return false;
29
+ s_mask = arg_info(op->args[1])->s_mask
30
+ & arg_info(op->args[2])->s_mask;
31
+ return fold_masks_s(ctx, op, s_mask);
32
}
33
34
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
35
--
36
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Be careful not to call fold_masks_zs when the memory operation
4
is wide enough to require multiple outputs, so split into two
5
functions: fold_qemu_ld_1reg and fold_qemu_ld_2reg.
6
7
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
tcg/optimize.c | 26 +++++++++++++++++++++-----
11
1 file changed, 21 insertions(+), 5 deletions(-)
12
13
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/optimize.c
16
+++ b/tcg/optimize.c
17
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
18
return fold_masks_s(ctx, op, s_mask);
19
}
20
21
-static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
22
+static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
23
{
24
const TCGOpDef *def = &tcg_op_defs[op->opc];
25
MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
26
MemOp mop = get_memop(oi);
27
int width = 8 * memop_size(mop);
28
+ uint64_t z_mask = -1, s_mask = 0;
29
30
if (width < 64) {
31
if (mop & MO_SIGN) {
32
- ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
33
+ s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
34
} else {
35
- ctx->z_mask = MAKE_64BIT_MASK(0, width);
36
+ z_mask = MAKE_64BIT_MASK(0, width);
37
}
38
}
39
40
/* Opcodes that touch guest memory stop the mb optimization. */
41
ctx->prev_mb = NULL;
42
- return false;
43
+
44
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
45
+}
46
+
47
+static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
48
+{
49
+ /* Opcodes that touch guest memory stop the mb optimization. */
50
+ ctx->prev_mb = NULL;
51
+ return finish_folding(ctx, op);
52
}
53
54
static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
55
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
56
break;
57
case INDEX_op_qemu_ld_a32_i32:
58
case INDEX_op_qemu_ld_a64_i32:
59
+ done = fold_qemu_ld_1reg(&ctx, op);
60
+ break;
61
case INDEX_op_qemu_ld_a32_i64:
62
case INDEX_op_qemu_ld_a64_i64:
63
+ if (TCG_TARGET_REG_BITS == 64) {
64
+ done = fold_qemu_ld_1reg(&ctx, op);
65
+ break;
66
+ }
67
+ QEMU_FALLTHROUGH;
68
case INDEX_op_qemu_ld_a32_i128:
69
case INDEX_op_qemu_ld_a64_i128:
70
- done = fold_qemu_ld(&ctx, op);
71
+ done = fold_qemu_ld_2reg(&ctx, op);
72
break;
73
case INDEX_op_qemu_st8_a32_i32:
74
case INDEX_op_qemu_st8_a64_i32:
75
--
76
2.43.0
diff view generated by jsdifflib
1
Now that we have collected all of the page data into
1
Stores have no output operands, and so need no further work.
2
CPUTLBEntryFull, provide an interface to record that
3
all in one go, instead of using 4 arguments. This interface
4
allows CPUTLBEntryFull to be extended without having to
5
change the number of arguments.
6
2
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
5
---
12
include/exec/cpu-defs.h | 14 +++++++++++
6
tcg/optimize.c | 11 +++++------
13
include/exec/exec-all.h | 22 ++++++++++++++++++
7
1 file changed, 5 insertions(+), 6 deletions(-)
14
accel/tcg/cputlb.c | 51 ++++++++++++++++++++++++++---------------
15
3 files changed, 69 insertions(+), 18 deletions(-)
16
8
17
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
18
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
19
--- a/include/exec/cpu-defs.h
11
--- a/tcg/optimize.c
20
+++ b/include/exec/cpu-defs.h
12
+++ b/tcg/optimize.c
21
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntryFull {
13
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
22
* + the offset within the target MemoryRegion (otherwise)
14
{
23
*/
15
/* Opcodes that touch guest memory stop the mb optimization. */
24
hwaddr xlat_section;
16
ctx->prev_mb = NULL;
25
+
17
- return false;
26
+ /*
18
+ return true;
27
+ * @phys_addr contains the physical address in the address space
28
+ * given by cpu_asidx_from_attrs(cpu, @attrs).
29
+ */
30
+ hwaddr phys_addr;
31
+
32
+ /* @attrs contains the memory transaction attributes for the page. */
33
MemTxAttrs attrs;
34
+
35
+ /* @prot contains the complete protections for the page. */
36
+ uint8_t prot;
37
+
38
+ /* @lg_page_size contains the log2 of the page size. */
39
+ uint8_t lg_page_size;
40
} CPUTLBEntryFull;
41
42
/*
43
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
44
index XXXXXXX..XXXXXXX 100644
45
--- a/include/exec/exec-all.h
46
+++ b/include/exec/exec-all.h
47
@@ -XXX,XX +XXX,XX @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
48
uint16_t idxmap,
49
unsigned bits);
50
51
+/**
52
+ * tlb_set_page_full:
53
+ * @cpu: CPU context
54
+ * @mmu_idx: mmu index of the tlb to modify
55
+ * @vaddr: virtual address of the entry to add
56
+ * @full: the details of the tlb entry
57
+ *
58
+ * Add an entry to @cpu tlb index @mmu_idx. All of the fields of
59
+ * @full must be filled, except for xlat_section, and constitute
60
+ * the complete description of the translated page.
61
+ *
62
+ * This is generally called by the target tlb_fill function after
63
+ * having performed a successful page table walk to find the physical
64
+ * address and attributes for the translation.
65
+ *
66
+ * At most one entry for a given virtual address is permitted. Only a
67
+ * single TARGET_PAGE_SIZE region is mapped; @full->lg_page_size is only
68
+ * used by tlb_flush_page.
69
+ */
70
+void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr,
71
+ CPUTLBEntryFull *full);
72
+
73
/**
74
* tlb_set_page_with_attrs:
75
* @cpu: CPU to add this TLB entry for
76
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/accel/tcg/cputlb.c
79
+++ b/accel/tcg/cputlb.c
80
@@ -XXX,XX +XXX,XX @@ static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
81
env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
82
}
19
}
83
20
84
-/* Add a new TLB entry. At most one entry for a given virtual address
21
static bool fold_remainder(OptContext *ctx, TCGOp *op)
85
+/*
22
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
86
+ * Add a new TLB entry. At most one entry for a given virtual address
23
87
* is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
24
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
88
* supplied size is only used by tlb_flush_page.
25
remove_mem_copy_all(ctx);
89
*
26
- return false;
90
* Called from TCG-generated code, which is under an RCU read-side
27
+ return true;
91
* critical section.
92
*/
93
-void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
94
- hwaddr paddr, MemTxAttrs attrs, int prot,
95
- int mmu_idx, target_ulong size)
96
+void tlb_set_page_full(CPUState *cpu, int mmu_idx,
97
+ target_ulong vaddr, CPUTLBEntryFull *full)
98
{
99
CPUArchState *env = cpu->env_ptr;
100
CPUTLB *tlb = env_tlb(env);
101
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
102
CPUTLBEntry *te, tn;
103
hwaddr iotlb, xlat, sz, paddr_page;
104
target_ulong vaddr_page;
105
- int asidx = cpu_asidx_from_attrs(cpu, attrs);
106
- int wp_flags;
107
+ int asidx, wp_flags, prot;
108
bool is_ram, is_romd;
109
110
assert_cpu_is_self(cpu);
111
112
- if (size <= TARGET_PAGE_SIZE) {
113
+ if (full->lg_page_size <= TARGET_PAGE_BITS) {
114
sz = TARGET_PAGE_SIZE;
115
} else {
116
- tlb_add_large_page(env, mmu_idx, vaddr, size);
117
- sz = size;
118
+ sz = (hwaddr)1 << full->lg_page_size;
119
+ tlb_add_large_page(env, mmu_idx, vaddr, sz);
120
}
28
}
121
vaddr_page = vaddr & TARGET_PAGE_MASK;
29
122
- paddr_page = paddr & TARGET_PAGE_MASK;
30
switch (op->opc) {
123
+ paddr_page = full->phys_addr & TARGET_PAGE_MASK;
31
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
124
32
g_assert_not_reached();
125
+ prot = full->prot;
126
+ asidx = cpu_asidx_from_attrs(cpu, full->attrs);
127
section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
128
- &xlat, &sz, attrs, &prot);
129
+ &xlat, &sz, full->attrs, &prot);
130
assert(sz >= TARGET_PAGE_SIZE);
131
132
tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
133
" prot=%x idx=%d\n",
134
- vaddr, paddr, prot, mmu_idx);
135
+ vaddr, full->phys_addr, prot, mmu_idx);
136
137
address = vaddr_page;
138
- if (size < TARGET_PAGE_SIZE) {
139
+ if (full->lg_page_size < TARGET_PAGE_BITS) {
140
/* Repeat the MMU check and TLB fill on every access. */
141
address |= TLB_INVALID_MASK;
142
}
33
}
143
- if (attrs.byte_swap) {
34
remove_mem_copy_in(ctx, ofs, ofs + lm1);
144
+ if (full->attrs.byte_swap) {
35
- return false;
145
address |= TLB_BSWAP;
36
+ return true;
37
}
38
39
static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
40
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
41
TCGType type;
42
43
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
44
- fold_tcg_st(ctx, op);
45
- return false;
46
+ return fold_tcg_st(ctx, op);
146
}
47
}
147
48
148
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
49
src = arg_temp(op->args[0]);
149
* subtract here is that of the page base, and not the same as the
50
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
150
* vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
51
last = ofs + tcg_type_size(type) - 1;
151
*/
52
remove_mem_copy_in(ctx, ofs, last);
152
+ desc->fulltlb[index] = *full;
53
record_mem_copy(ctx, type, src, ofs, last);
153
desc->fulltlb[index].xlat_section = iotlb - vaddr_page;
54
- return false;
154
- desc->fulltlb[index].attrs = attrs;
55
+ return true;
155
+ desc->fulltlb[index].phys_addr = paddr_page;
156
+ desc->fulltlb[index].prot = prot;
157
158
/* Now calculate the new entry */
159
tn.addend = addend - vaddr_page;
160
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
161
qemu_spin_unlock(&tlb->c.lock);
162
}
56
}
163
57
164
-/* Add a new TLB entry, but without specifying the memory
58
static bool fold_xor(OptContext *ctx, TCGOp *op)
165
- * transaction attributes to be used.
166
- */
167
+void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
168
+ hwaddr paddr, MemTxAttrs attrs, int prot,
169
+ int mmu_idx, target_ulong size)
170
+{
171
+ CPUTLBEntryFull full = {
172
+ .phys_addr = paddr,
173
+ .attrs = attrs,
174
+ .prot = prot,
175
+ .lg_page_size = ctz64(size)
176
+ };
177
+
178
+ assert(is_power_of_2(size));
179
+ tlb_set_page_full(cpu, mmu_idx, vaddr, &full);
180
+}
181
+
182
void tlb_set_page(CPUState *cpu, target_ulong vaddr,
183
hwaddr paddr, int prot,
184
int mmu_idx, target_ulong size)
185
--
59
--
186
2.34.1
60
2.43.0
187
188
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
12
fold_xx_to_i(ctx, op, 0)) {
13
return true;
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
20
--
21
2.43.0
diff view generated by jsdifflib
1
The availability of tb->pc will shortly be conditional.
1
Change return from bool to int; distinguish between
2
Introduce accessor functions to minimize ifdefs.
2
complete folding, simplification, and no change.
3
3
4
Pass around a known pc to places like tcg_gen_code,
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
where the caller must already have the value.
6
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
6
---
10
accel/tcg/internal.h | 6 ++++
7
tcg/optimize.c | 22 ++++++++++++++--------
11
include/exec/exec-all.h | 6 ++++
8
1 file changed, 14 insertions(+), 8 deletions(-)
12
include/tcg/tcg.h | 2 +-
13
accel/tcg/cpu-exec.c | 46 ++++++++++++++-----------
14
accel/tcg/translate-all.c | 37 +++++++++++---------
15
target/arm/cpu.c | 4 +--
16
target/avr/cpu.c | 2 +-
17
target/hexagon/cpu.c | 2 +-
18
target/hppa/cpu.c | 4 +--
19
target/i386/tcg/tcg-cpu.c | 2 +-
20
target/loongarch/cpu.c | 2 +-
21
target/microblaze/cpu.c | 2 +-
22
target/mips/tcg/exception.c | 2 +-
23
target/mips/tcg/sysemu/special_helper.c | 2 +-
24
target/openrisc/cpu.c | 2 +-
25
target/riscv/cpu.c | 4 +--
26
target/rx/cpu.c | 2 +-
27
target/sh4/cpu.c | 4 +--
28
target/sparc/cpu.c | 2 +-
29
target/tricore/cpu.c | 2 +-
30
tcg/tcg.c | 8 ++---
31
21 files changed, 82 insertions(+), 61 deletions(-)
32
9
33
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
34
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
35
--- a/accel/tcg/internal.h
12
--- a/tcg/optimize.c
36
+++ b/accel/tcg/internal.h
13
+++ b/tcg/optimize.c
37
@@ -XXX,XX +XXX,XX @@ G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
14
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
38
void page_init(void);
15
return finish_folding(ctx, op);
39
void tb_htable_init(void);
16
}
40
17
41
+/* Return the current PC from CPU, which may be cached in TB. */
18
-static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
42
+static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
19
+/* Return 1 if finished, -1 if simplified, 0 if unchanged. */
43
+{
20
+static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
44
+ return tb_pc(tb);
45
+}
46
+
47
#endif /* ACCEL_TCG_INTERNAL_H */
48
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
49
index XXXXXXX..XXXXXXX 100644
50
--- a/include/exec/exec-all.h
51
+++ b/include/exec/exec-all.h
52
@@ -XXX,XX +XXX,XX @@ struct TranslationBlock {
53
uintptr_t jmp_dest[2];
54
};
55
56
+/* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */
57
+static inline target_ulong tb_pc(const TranslationBlock *tb)
58
+{
59
+ return tb->pc;
60
+}
61
+
62
/* Hide the qatomic_read to make code a little easier on the eyes */
63
static inline uint32_t tb_cflags(const TranslationBlock *tb)
64
{
21
{
65
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
22
uint64_t a_zmask, b_val;
66
index XXXXXXX..XXXXXXX 100644
23
TCGCond cond;
67
--- a/include/tcg/tcg.h
24
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
68
+++ b/include/tcg/tcg.h
25
op->opc = xor_opc;
69
@@ -XXX,XX +XXX,XX @@ void tcg_register_thread(void);
26
op->args[2] = arg_new_constant(ctx, 1);
70
void tcg_prologue_init(TCGContext *s);
27
}
71
void tcg_func_start(TCGContext *s);
28
- return false;
72
29
+ return -1;
73
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb);
74
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start);
75
76
void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size);
77
78
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
79
index XXXXXXX..XXXXXXX 100644
80
--- a/accel/tcg/cpu-exec.c
81
+++ b/accel/tcg/cpu-exec.c
82
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
83
const TranslationBlock *tb = p;
84
const struct tb_desc *desc = d;
85
86
- if (tb->pc == desc->pc &&
87
+ if (tb_pc(tb) == desc->pc &&
88
tb->page_addr[0] == desc->page_addr0 &&
89
tb->cs_base == desc->cs_base &&
90
tb->flags == desc->flags &&
91
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
92
return tb;
93
}
94
95
-static inline void log_cpu_exec(target_ulong pc, CPUState *cpu,
96
- const TranslationBlock *tb)
97
+static void log_cpu_exec(target_ulong pc, CPUState *cpu,
98
+ const TranslationBlock *tb)
99
{
100
- if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC))
101
- && qemu_log_in_addr_range(pc)) {
102
-
103
+ if (qemu_log_in_addr_range(pc)) {
104
qemu_log_mask(CPU_LOG_EXEC,
105
"Trace %d: %p [" TARGET_FMT_lx
106
"/" TARGET_FMT_lx "/%08x/%08x] %s\n",
107
@@ -XXX,XX +XXX,XX @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
108
return tcg_code_gen_epilogue;
109
}
110
111
- log_cpu_exec(pc, cpu, tb);
112
+ if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
113
+ log_cpu_exec(pc, cpu, tb);
114
+ }
115
116
return tb->tc.ptr;
117
}
118
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
119
TranslationBlock *last_tb;
120
const void *tb_ptr = itb->tc.ptr;
121
122
- log_cpu_exec(itb->pc, cpu, itb);
123
+ if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
124
+ log_cpu_exec(log_pc(cpu, itb), cpu, itb);
125
+ }
126
127
qemu_thread_jit_execute();
128
ret = tcg_qemu_tb_exec(env, tb_ptr);
129
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
130
* of the start of the TB.
131
*/
132
CPUClass *cc = CPU_GET_CLASS(cpu);
133
- qemu_log_mask_and_addr(CPU_LOG_EXEC, last_tb->pc,
134
- "Stopped execution of TB chain before %p ["
135
- TARGET_FMT_lx "] %s\n",
136
- last_tb->tc.ptr, last_tb->pc,
137
- lookup_symbol(last_tb->pc));
138
+
139
if (cc->tcg_ops->synchronize_from_tb) {
140
cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
141
} else {
142
assert(cc->set_pc);
143
- cc->set_pc(cpu, last_tb->pc);
144
+ cc->set_pc(cpu, tb_pc(last_tb));
145
+ }
146
+ if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
147
+ target_ulong pc = log_pc(cpu, last_tb);
148
+ if (qemu_log_in_addr_range(pc)) {
149
+ qemu_log("Stopped execution of TB chain before %p ["
150
+ TARGET_FMT_lx "] %s\n",
151
+ last_tb->tc.ptr, pc, lookup_symbol(pc));
152
+ }
153
}
30
}
154
}
31
}
155
32
-
156
@@ -XXX,XX +XXX,XX @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
33
- return false;
157
34
+ return 0;
158
qemu_spin_unlock(&tb_next->jmp_lock);
159
160
- qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
161
- "Linking TBs %p [" TARGET_FMT_lx
162
- "] index %d -> %p [" TARGET_FMT_lx "]\n",
163
- tb->tc.ptr, tb->pc, n,
164
- tb_next->tc.ptr, tb_next->pc);
165
+ qemu_log_mask(CPU_LOG_EXEC, "Linking TBs %p index %d -> %p\n",
166
+ tb->tc.ptr, n, tb_next->tc.ptr);
167
return;
168
169
out_unlock_next:
170
@@ -XXX,XX +XXX,XX @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
171
}
35
}
172
36
173
static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
37
static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
174
+ target_ulong pc,
38
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
175
TranslationBlock **last_tb, int *tb_exit)
39
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
176
{
177
int32_t insns_left;
178
179
- trace_exec_tb(tb, tb->pc);
180
+ trace_exec_tb(tb, pc);
181
tb = cpu_tb_exec(cpu, tb, tb_exit);
182
if (*tb_exit != TB_EXIT_REQUESTED) {
183
*last_tb = tb;
184
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
185
tb_add_jump(last_tb, tb_exit, tb);
186
}
187
188
- cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit);
189
+ cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit);
190
191
/* Try to align the host and virtual clocks
192
if the guest is in advance */
193
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
194
index XXXXXXX..XXXXXXX 100644
195
--- a/accel/tcg/translate-all.c
196
+++ b/accel/tcg/translate-all.c
197
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
198
199
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
200
if (i == 0) {
201
- prev = (j == 0 ? tb->pc : 0);
202
+ prev = (j == 0 ? tb_pc(tb) : 0);
203
} else {
204
prev = tcg_ctx->gen_insn_data[i - 1][j];
205
}
206
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
207
static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
208
uintptr_t searched_pc, bool reset_icount)
209
{
210
- target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
211
+ target_ulong data[TARGET_INSN_START_WORDS] = { tb_pc(tb) };
212
uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
213
CPUArchState *env = cpu->env_ptr;
214
const uint8_t *p = tb->tc.ptr + tb->tc.size;
215
@@ -XXX,XX +XXX,XX @@ static bool tb_cmp(const void *ap, const void *bp)
216
const TranslationBlock *a = ap;
217
const TranslationBlock *b = bp;
218
219
- return a->pc == b->pc &&
220
+ return tb_pc(a) == tb_pc(b) &&
221
a->cs_base == b->cs_base &&
222
a->flags == b->flags &&
223
(tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
224
@@ -XXX,XX +XXX,XX @@ static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
225
TranslationBlock *tb = p;
226
target_ulong addr = *(target_ulong *)userp;
227
228
- if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
229
+ if (!(addr + TARGET_PAGE_SIZE <= tb_pc(tb) ||
230
+ addr >= tb_pc(tb) + tb->size)) {
231
printf("ERROR invalidate: address=" TARGET_FMT_lx
232
- " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
233
+ " PC=%08lx size=%04x\n", addr, (long)tb_pc(tb), tb->size);
234
}
40
}
235
}
41
236
42
- if (fold_setcond_zmask(ctx, op, false)) {
237
@@ -XXX,XX +XXX,XX @@ static void do_tb_page_check(void *p, uint32_t hash, void *userp)
43
+ i = fold_setcond_zmask(ctx, op, false);
238
TranslationBlock *tb = p;
44
+ if (i > 0) {
239
int flags1, flags2;
45
return true;
240
241
- flags1 = page_get_flags(tb->pc);
242
- flags2 = page_get_flags(tb->pc + tb->size - 1);
243
+ flags1 = page_get_flags(tb_pc(tb));
244
+ flags2 = page_get_flags(tb_pc(tb) + tb->size - 1);
245
if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
246
printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
247
- (long)tb->pc, tb->size, flags1, flags2);
248
+ (long)tb_pc(tb), tb->size, flags1, flags2);
249
}
46
}
250
}
47
- fold_setcond_tst_pow2(ctx, op, false);
251
48
+ if (i == 0) {
252
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
49
+ fold_setcond_tst_pow2(ctx, op, false);
253
50
+ }
254
/* remove the TB from the hash list */
51
255
phys_pc = tb->page_addr[0];
52
ctx->z_mask = 1;
256
- h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
53
return false;
257
+ h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, orig_cflags,
54
@@ -XXX,XX +XXX,XX @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
258
tb->trace_vcpu_dstate);
55
return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
259
if (!qht_remove(&tb_ctx.htable, tb, h)) {
260
return;
261
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
262
}
56
}
263
57
264
/* add in the hash table */
58
- if (fold_setcond_zmask(ctx, op, true)) {
265
- h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
59
+ i = fold_setcond_zmask(ctx, op, true);
266
+ h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, tb->cflags,
60
+ if (i > 0) {
267
tb->trace_vcpu_dstate);
61
return true;
268
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
62
}
269
63
- fold_setcond_tst_pow2(ctx, op, true);
270
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
64
+ if (i == 0) {
271
tcg_ctx->cpu = NULL;
65
+ fold_setcond_tst_pow2(ctx, op, true);
272
max_insns = tb->icount;
273
274
- trace_translate_block(tb, tb->pc, tb->tc.ptr);
275
+ trace_translate_block(tb, pc, tb->tc.ptr);
276
277
/* generate machine code */
278
tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
279
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
280
ti = profile_getclock();
281
#endif
282
283
- gen_code_size = tcg_gen_code(tcg_ctx, tb);
284
+ gen_code_size = tcg_gen_code(tcg_ctx, tb, pc);
285
if (unlikely(gen_code_size < 0)) {
286
error_return:
287
switch (gen_code_size) {
288
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
289
290
#ifdef DEBUG_DISAS
291
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
292
- qemu_log_in_addr_range(tb->pc)) {
293
+ qemu_log_in_addr_range(pc)) {
294
FILE *logfile = qemu_log_trylock();
295
if (logfile) {
296
int code_size, data_size;
297
@@ -XXX,XX +XXX,XX @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
298
*/
299
cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
300
301
- qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
302
- "cpu_io_recompile: rewound execution of TB to "
303
- TARGET_FMT_lx "\n", tb->pc);
304
+ if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
305
+ target_ulong pc = log_pc(cpu, tb);
306
+ if (qemu_log_in_addr_range(pc)) {
307
+ qemu_log("cpu_io_recompile: rewound execution of TB to "
308
+ TARGET_FMT_lx "\n", pc);
309
+ }
310
+ }
66
+ }
311
67
312
cpu_loop_exit_noexc(cpu);
68
/* Value is {0,-1} so all bits are repetitions of the sign. */
313
}
69
ctx->s_mask = -1;
314
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
315
index XXXXXXX..XXXXXXX 100644
316
--- a/target/arm/cpu.c
317
+++ b/target/arm/cpu.c
318
@@ -XXX,XX +XXX,XX @@ void arm_cpu_synchronize_from_tb(CPUState *cs,
319
* never possible for an AArch64 TB to chain to an AArch32 TB.
320
*/
321
if (is_a64(env)) {
322
- env->pc = tb->pc;
323
+ env->pc = tb_pc(tb);
324
} else {
325
- env->regs[15] = tb->pc;
326
+ env->regs[15] = tb_pc(tb);
327
}
328
}
329
#endif /* CONFIG_TCG */
330
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
331
index XXXXXXX..XXXXXXX 100644
332
--- a/target/avr/cpu.c
333
+++ b/target/avr/cpu.c
334
@@ -XXX,XX +XXX,XX @@ static void avr_cpu_synchronize_from_tb(CPUState *cs,
335
AVRCPU *cpu = AVR_CPU(cs);
336
CPUAVRState *env = &cpu->env;
337
338
- env->pc_w = tb->pc / 2; /* internally PC points to words */
339
+ env->pc_w = tb_pc(tb) / 2; /* internally PC points to words */
340
}
341
342
static void avr_cpu_reset(DeviceState *ds)
343
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
344
index XXXXXXX..XXXXXXX 100644
345
--- a/target/hexagon/cpu.c
346
+++ b/target/hexagon/cpu.c
347
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_synchronize_from_tb(CPUState *cs,
348
{
349
HexagonCPU *cpu = HEXAGON_CPU(cs);
350
CPUHexagonState *env = &cpu->env;
351
- env->gpr[HEX_REG_PC] = tb->pc;
352
+ env->gpr[HEX_REG_PC] = tb_pc(tb);
353
}
354
355
static bool hexagon_cpu_has_work(CPUState *cs)
356
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
357
index XXXXXXX..XXXXXXX 100644
358
--- a/target/hppa/cpu.c
359
+++ b/target/hppa/cpu.c
360
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_synchronize_from_tb(CPUState *cs,
361
HPPACPU *cpu = HPPA_CPU(cs);
362
363
#ifdef CONFIG_USER_ONLY
364
- cpu->env.iaoq_f = tb->pc;
365
+ cpu->env.iaoq_f = tb_pc(tb);
366
cpu->env.iaoq_b = tb->cs_base;
367
#else
368
/* Recover the IAOQ values from the GVA + PRIV. */
369
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_synchronize_from_tb(CPUState *cs,
370
int32_t diff = cs_base;
371
372
cpu->env.iasq_f = iasq_f;
373
- cpu->env.iaoq_f = (tb->pc & ~iasq_f) + priv;
374
+ cpu->env.iaoq_f = (tb_pc(tb) & ~iasq_f) + priv;
375
if (diff) {
376
cpu->env.iaoq_b = cpu->env.iaoq_f + diff;
377
}
378
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
379
index XXXXXXX..XXXXXXX 100644
380
--- a/target/i386/tcg/tcg-cpu.c
381
+++ b/target/i386/tcg/tcg-cpu.c
382
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_synchronize_from_tb(CPUState *cs,
383
{
384
X86CPU *cpu = X86_CPU(cs);
385
386
- cpu->env.eip = tb->pc - tb->cs_base;
387
+ cpu->env.eip = tb_pc(tb) - tb->cs_base;
388
}
389
390
#ifndef CONFIG_USER_ONLY
391
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
392
index XXXXXXX..XXXXXXX 100644
393
--- a/target/loongarch/cpu.c
394
+++ b/target/loongarch/cpu.c
395
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_synchronize_from_tb(CPUState *cs,
396
LoongArchCPU *cpu = LOONGARCH_CPU(cs);
397
CPULoongArchState *env = &cpu->env;
398
399
- env->pc = tb->pc;
400
+ env->pc = tb_pc(tb);
401
}
402
#endif /* CONFIG_TCG */
403
404
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
405
index XXXXXXX..XXXXXXX 100644
406
--- a/target/microblaze/cpu.c
407
+++ b/target/microblaze/cpu.c
408
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_synchronize_from_tb(CPUState *cs,
409
{
410
MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
411
412
- cpu->env.pc = tb->pc;
413
+ cpu->env.pc = tb_pc(tb);
414
cpu->env.iflags = tb->flags & IFLAGS_TB_MASK;
415
}
416
417
diff --git a/target/mips/tcg/exception.c b/target/mips/tcg/exception.c
418
index XXXXXXX..XXXXXXX 100644
419
--- a/target/mips/tcg/exception.c
420
+++ b/target/mips/tcg/exception.c
421
@@ -XXX,XX +XXX,XX @@ void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb)
422
MIPSCPU *cpu = MIPS_CPU(cs);
423
CPUMIPSState *env = &cpu->env;
424
425
- env->active_tc.PC = tb->pc;
426
+ env->active_tc.PC = tb_pc(tb);
427
env->hflags &= ~MIPS_HFLAG_BMASK;
428
env->hflags |= tb->flags & MIPS_HFLAG_BMASK;
429
}
430
diff --git a/target/mips/tcg/sysemu/special_helper.c b/target/mips/tcg/sysemu/special_helper.c
431
index XXXXXXX..XXXXXXX 100644
432
--- a/target/mips/tcg/sysemu/special_helper.c
433
+++ b/target/mips/tcg/sysemu/special_helper.c
434
@@ -XXX,XX +XXX,XX @@ bool mips_io_recompile_replay_branch(CPUState *cs, const TranslationBlock *tb)
435
CPUMIPSState *env = &cpu->env;
436
437
if ((env->hflags & MIPS_HFLAG_BMASK) != 0
438
- && env->active_tc.PC != tb->pc) {
439
+ && env->active_tc.PC != tb_pc(tb)) {
440
env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
441
env->hflags &= ~MIPS_HFLAG_BMASK;
442
return true;
443
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
444
index XXXXXXX..XXXXXXX 100644
445
--- a/target/openrisc/cpu.c
446
+++ b/target/openrisc/cpu.c
447
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_synchronize_from_tb(CPUState *cs,
448
{
449
OpenRISCCPU *cpu = OPENRISC_CPU(cs);
450
451
- cpu->env.pc = tb->pc;
452
+ cpu->env.pc = tb_pc(tb);
453
}
454
455
456
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
457
index XXXXXXX..XXXXXXX 100644
458
--- a/target/riscv/cpu.c
459
+++ b/target/riscv/cpu.c
460
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_synchronize_from_tb(CPUState *cs,
461
RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
462
463
if (xl == MXL_RV32) {
464
- env->pc = (int32_t)tb->pc;
465
+ env->pc = (int32_t)tb_pc(tb);
466
} else {
467
- env->pc = tb->pc;
468
+ env->pc = tb_pc(tb);
469
}
470
}
471
472
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
473
index XXXXXXX..XXXXXXX 100644
474
--- a/target/rx/cpu.c
475
+++ b/target/rx/cpu.c
476
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_synchronize_from_tb(CPUState *cs,
477
{
478
RXCPU *cpu = RX_CPU(cs);
479
480
- cpu->env.pc = tb->pc;
481
+ cpu->env.pc = tb_pc(tb);
482
}
483
484
static bool rx_cpu_has_work(CPUState *cs)
485
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
486
index XXXXXXX..XXXXXXX 100644
487
--- a/target/sh4/cpu.c
488
+++ b/target/sh4/cpu.c
489
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
490
{
491
SuperHCPU *cpu = SUPERH_CPU(cs);
492
493
- cpu->env.pc = tb->pc;
494
+ cpu->env.pc = tb_pc(tb);
495
cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
496
}
497
498
@@ -XXX,XX +XXX,XX @@ static bool superh_io_recompile_replay_branch(CPUState *cs,
499
CPUSH4State *env = &cpu->env;
500
501
if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
502
- && env->pc != tb->pc) {
503
+ && env->pc != tb_pc(tb)) {
504
env->pc -= 2;
505
env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
506
return true;
507
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
508
index XXXXXXX..XXXXXXX 100644
509
--- a/target/sparc/cpu.c
510
+++ b/target/sparc/cpu.c
511
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_synchronize_from_tb(CPUState *cs,
512
{
513
SPARCCPU *cpu = SPARC_CPU(cs);
514
515
- cpu->env.pc = tb->pc;
516
+ cpu->env.pc = tb_pc(tb);
517
cpu->env.npc = tb->cs_base;
518
}
519
520
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
521
index XXXXXXX..XXXXXXX 100644
522
--- a/target/tricore/cpu.c
523
+++ b/target/tricore/cpu.c
524
@@ -XXX,XX +XXX,XX @@ static void tricore_cpu_synchronize_from_tb(CPUState *cs,
525
TriCoreCPU *cpu = TRICORE_CPU(cs);
526
CPUTriCoreState *env = &cpu->env;
527
528
- env->PC = tb->pc;
529
+ env->PC = tb_pc(tb);
530
}
531
532
static void tricore_cpu_reset(DeviceState *dev)
533
diff --git a/tcg/tcg.c b/tcg/tcg.c
534
index XXXXXXX..XXXXXXX 100644
535
--- a/tcg/tcg.c
536
+++ b/tcg/tcg.c
537
@@ -XXX,XX +XXX,XX @@ int64_t tcg_cpu_exec_time(void)
538
#endif
539
540
541
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
542
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
543
{
544
#ifdef CONFIG_PROFILER
545
TCGProfile *prof = &s->prof;
546
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
547
548
#ifdef DEBUG_DISAS
549
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
550
- && qemu_log_in_addr_range(tb->pc))) {
551
+ && qemu_log_in_addr_range(pc_start))) {
552
FILE *logfile = qemu_log_trylock();
553
if (logfile) {
554
fprintf(logfile, "OP:\n");
555
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
556
if (s->nb_indirects > 0) {
557
#ifdef DEBUG_DISAS
558
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
559
- && qemu_log_in_addr_range(tb->pc))) {
560
+ && qemu_log_in_addr_range(pc_start))) {
561
FILE *logfile = qemu_log_trylock();
562
if (logfile) {
563
fprintf(logfile, "OP before indirect lowering:\n");
564
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
565
566
#ifdef DEBUG_DISAS
567
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
568
- && qemu_log_in_addr_range(tb->pc))) {
569
+ && qemu_log_in_addr_range(pc_start))) {
570
FILE *logfile = qemu_log_trylock();
571
if (logfile) {
572
fprintf(logfile, "OP after optimization and liveness analysis:\n");
573
--
70
--
574
2.34.1
71
2.43.0
575
576
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 3 +--
7
1 file changed, 1 insertion(+), 2 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
14
fold_setcond_tst_pow2(ctx, op, false);
15
}
16
17
- ctx->z_mask = 1;
18
- return false;
19
+ return fold_masks_z(ctx, op, 1);
20
}
21
22
static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
23
--
24
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 3 +--
7
1 file changed, 1 insertion(+), 2 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
14
}
15
16
/* Value is {0,-1} so all bits are repetitions of the sign. */
17
- ctx->s_mask = -1;
18
- return false;
19
+ return fold_masks_s(ctx, op, -1);
20
}
21
22
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
23
--
24
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 3 +--
7
1 file changed, 1 insertion(+), 2 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
14
return fold_setcond(ctx, op);
15
}
16
17
- ctx->z_mask = 1;
18
- return false;
19
+ return fold_masks_z(ctx, op, 1);
20
21
do_setcond_const:
22
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
23
--
24
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
12
if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
13
op->args[3] = tcg_swap_cond(op->args[3]);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
12
if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
13
op->args[5] = tcg_invert_cond(op->args[5]);
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
static bool fold_sextract(OptContext *ctx, TCGOp *op)
20
--
21
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 24 +++++++++---------------
7
1 file changed, 9 insertions(+), 15 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
14
static bool fold_sextract(OptContext *ctx, TCGOp *op)
15
{
16
uint64_t z_mask, s_mask, s_mask_old;
17
+ TempOptInfo *t1 = arg_info(op->args[1]);
18
int pos = op->args[2];
19
int len = op->args[3];
20
21
- if (arg_is_const(op->args[1])) {
22
- uint64_t t;
23
-
24
- t = arg_info(op->args[1])->val;
25
- t = sextract64(t, pos, len);
26
- return tcg_opt_gen_movi(ctx, op, op->args[0], t);
27
+ if (ti_is_const(t1)) {
28
+ return tcg_opt_gen_movi(ctx, op, op->args[0],
29
+ sextract64(ti_const_val(t1), pos, len));
30
}
31
32
- z_mask = arg_info(op->args[1])->z_mask;
33
- z_mask = sextract64(z_mask, pos, len);
34
- ctx->z_mask = z_mask;
35
-
36
- s_mask_old = arg_info(op->args[1])->s_mask;
37
- s_mask = sextract64(s_mask_old, pos, len);
38
- s_mask |= MAKE_64BIT_MASK(len, 64 - len);
39
- ctx->s_mask = s_mask;
40
+ s_mask_old = t1->s_mask;
41
+ s_mask = s_mask_old >> pos;
42
+ s_mask |= -1ull << (len - 1);
43
44
if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
45
return true;
46
}
47
48
- return fold_masks(ctx, op);
49
+ z_mask = sextract64(t1->z_mask, pos, len);
50
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
51
}
52
53
static bool fold_shift(OptContext *ctx, TCGOp *op)
54
--
55
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 27 ++++++++++++++-------------
7
1 file changed, 14 insertions(+), 13 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
14
static bool fold_shift(OptContext *ctx, TCGOp *op)
15
{
16
uint64_t s_mask, z_mask, sign;
17
+ TempOptInfo *t1, *t2;
18
19
if (fold_const2(ctx, op) ||
20
fold_ix_to_i(ctx, op, 0) ||
21
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
22
return true;
23
}
24
25
- s_mask = arg_info(op->args[1])->s_mask;
26
- z_mask = arg_info(op->args[1])->z_mask;
27
+ t1 = arg_info(op->args[1]);
28
+ t2 = arg_info(op->args[2]);
29
+ s_mask = t1->s_mask;
30
+ z_mask = t1->z_mask;
31
32
- if (arg_is_const(op->args[2])) {
33
- int sh = arg_info(op->args[2])->val;
34
-
35
- ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
36
+ if (ti_is_const(t2)) {
37
+ int sh = ti_const_val(t2);
38
39
+ z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
40
s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
41
42
- return fold_masks(ctx, op);
43
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
44
}
45
46
switch (op->opc) {
47
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
48
* Arithmetic right shift will not reduce the number of
49
* input sign repetitions.
50
*/
51
- ctx->s_mask = s_mask;
52
- break;
53
+ return fold_masks_s(ctx, op, s_mask);
54
CASE_OP_32_64(shr):
55
/*
56
* If the sign bit is known zero, then logical right shift
57
- * will not reduced the number of input sign repetitions.
58
+ * will not reduce the number of input sign repetitions.
59
*/
60
- sign = (s_mask & -s_mask) >> 1;
61
+ sign = -s_mask;
62
if (sign && !(z_mask & sign)) {
63
- ctx->s_mask = s_mask;
64
+ return fold_masks_s(ctx, op, s_mask);
65
}
66
break;
67
default:
68
break;
69
}
70
71
- return false;
72
+ return finish_folding(ctx, op);
73
}
74
75
static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
76
--
77
2.43.0
diff view generated by jsdifflib
New patch
1
Merge the two conditions, sign != 0 && !(z_mask & sign),
2
by testing ~z_mask & sign. If sign == 0, the logical and
3
will produce false.
1
4
5
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 5 ++---
9
1 file changed, 2 insertions(+), 3 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
16
17
static bool fold_shift(OptContext *ctx, TCGOp *op)
18
{
19
- uint64_t s_mask, z_mask, sign;
20
+ uint64_t s_mask, z_mask;
21
TempOptInfo *t1, *t2;
22
23
if (fold_const2(ctx, op) ||
24
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
25
* If the sign bit is known zero, then logical right shift
26
* will not reduce the number of input sign repetitions.
27
*/
28
- sign = -s_mask;
29
- if (sign && !(z_mask & sign)) {
30
+ if (~z_mask & -s_mask) {
31
return fold_masks_s(ctx, op, s_mask);
32
}
33
break;
34
--
35
2.43.0
diff view generated by jsdifflib
New patch
1
Duplicate fold_sub_vec into fold_sub instead of calling it,
2
now that fold_sub_vec always returns true.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 9 ++++++---
8
1 file changed, 6 insertions(+), 3 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
15
fold_sub_to_neg(ctx, op)) {
16
return true;
17
}
18
- return false;
19
+ return finish_folding(ctx, op);
20
}
21
22
static bool fold_sub(OptContext *ctx, TCGOp *op)
23
{
24
- if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) {
25
+ if (fold_const2(ctx, op) ||
26
+ fold_xx_to_i(ctx, op, 0) ||
27
+ fold_xi_to_x(ctx, op, 0) ||
28
+ fold_sub_to_neg(ctx, op)) {
29
return true;
30
}
31
32
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
33
? INDEX_op_add_i32 : INDEX_op_add_i64);
34
op->args[2] = arg_new_constant(ctx, -val);
35
}
36
- return false;
37
+ return finish_folding(ctx, op);
38
}
39
40
static bool fold_sub2(OptContext *ctx, TCGOp *op)
41
--
42
2.43.0
diff view generated by jsdifflib
New patch
1
Avoid the use of the OptContext slots.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 16 +++++++++-------
7
1 file changed, 9 insertions(+), 7 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2(OptContext *ctx, TCGOp *op)
14
15
static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
16
{
17
+ uint64_t z_mask = -1, s_mask = 0;
18
+
19
/* We can't do any folding with a load, but we can record bits. */
20
switch (op->opc) {
21
CASE_OP_32_64(ld8s):
22
- ctx->s_mask = MAKE_64BIT_MASK(8, 56);
23
+ s_mask = INT8_MIN;
24
break;
25
CASE_OP_32_64(ld8u):
26
- ctx->z_mask = MAKE_64BIT_MASK(0, 8);
27
+ z_mask = MAKE_64BIT_MASK(0, 8);
28
break;
29
CASE_OP_32_64(ld16s):
30
- ctx->s_mask = MAKE_64BIT_MASK(16, 48);
31
+ s_mask = INT16_MIN;
32
break;
33
CASE_OP_32_64(ld16u):
34
- ctx->z_mask = MAKE_64BIT_MASK(0, 16);
35
+ z_mask = MAKE_64BIT_MASK(0, 16);
36
break;
37
case INDEX_op_ld32s_i64:
38
- ctx->s_mask = MAKE_64BIT_MASK(32, 32);
39
+ s_mask = INT32_MIN;
40
break;
41
case INDEX_op_ld32u_i64:
42
- ctx->z_mask = MAKE_64BIT_MASK(0, 32);
43
+ z_mask = MAKE_64BIT_MASK(0, 32);
44
break;
45
default:
46
g_assert_not_reached();
47
}
48
- return false;
49
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
50
}
51
52
static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
53
--
54
2.43.0
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
12
TCGType type;
13
14
if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
type = ctx->type;
20
--
21
2.43.0
diff view generated by jsdifflib
1
This bitmap is created and discarded immediately.
1
Avoid the use of the OptContext slots. Find TempOptInfo once.
2
We gain nothing by its existence.
2
Remove fold_masks as the function becomes unused.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-Id: <20220822232338.1727934-2-richard.henderson@linaro.org>
7
---
6
---
8
accel/tcg/translate-all.c | 78 ++-------------------------------------
7
tcg/optimize.c | 18 ++++++++----------
9
1 file changed, 4 insertions(+), 74 deletions(-)
8
1 file changed, 8 insertions(+), 10 deletions(-)
10
9
11
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/translate-all.c
12
--- a/tcg/optimize.c
14
+++ b/accel/tcg/translate-all.c
13
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
16
#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
15
return fold_masks_zs(ctx, op, -1, s_mask);
17
#endif
18
19
-#define SMC_BITMAP_USE_THRESHOLD 10
20
-
21
typedef struct PageDesc {
22
/* list of TBs intersecting this ram page */
23
uintptr_t first_tb;
24
-#ifdef CONFIG_SOFTMMU
25
- /* in order to optimize self modifying code, we count the number
26
- of lookups we do to a given page to use a bitmap */
27
- unsigned long *code_bitmap;
28
- unsigned int code_write_count;
29
-#else
30
+#ifdef CONFIG_USER_ONLY
31
unsigned long flags;
32
void *target_data;
33
#endif
34
-#ifndef CONFIG_USER_ONLY
35
+#ifdef CONFIG_SOFTMMU
36
QemuSpin lock;
37
#endif
38
} PageDesc;
39
@@ -XXX,XX +XXX,XX @@ void tb_htable_init(void)
40
qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
41
}
16
}
42
17
43
-/* call with @p->lock held */
18
-static bool fold_masks(OptContext *ctx, TCGOp *op)
44
-static inline void invalidate_page_bitmap(PageDesc *p)
45
-{
19
-{
46
- assert_page_locked(p);
20
- return fold_masks_zs(ctx, op, ctx->z_mask, ctx->s_mask);
47
-#ifdef CONFIG_SOFTMMU
48
- g_free(p->code_bitmap);
49
- p->code_bitmap = NULL;
50
- p->code_write_count = 0;
51
-#endif
52
-}
21
-}
53
-
22
-
54
/* Set to NULL all the 'first_tb' fields in all PageDescs. */
23
/*
55
static void page_flush_tb_1(int level, void **lp)
24
* An "affected" mask bit is 0 if and only if the result is identical
25
* to the first input. Thus if the entire mask is 0, the operation
26
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
27
28
static bool fold_xor(OptContext *ctx, TCGOp *op)
56
{
29
{
57
@@ -XXX,XX +XXX,XX @@ static void page_flush_tb_1(int level, void **lp)
30
+ uint64_t z_mask, s_mask;
58
for (i = 0; i < V_L2_SIZE; ++i) {
31
+ TempOptInfo *t1, *t2;
59
page_lock(&pd[i]);
32
+
60
pd[i].first_tb = (uintptr_t)NULL;
33
if (fold_const2_commutative(ctx, op) ||
61
- invalidate_page_bitmap(pd + i);
34
fold_xx_to_i(ctx, op, 0) ||
62
page_unlock(&pd[i]);
35
fold_xi_to_x(ctx, op, 0) ||
63
}
36
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
64
} else {
37
return true;
65
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
66
if (rm_from_page_list) {
67
p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
68
tb_page_remove(p, tb);
69
- invalidate_page_bitmap(p);
70
if (tb->page_addr[1] != -1) {
71
p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
72
tb_page_remove(p, tb);
73
- invalidate_page_bitmap(p);
74
}
75
}
38
}
76
39
77
@@ -XXX,XX +XXX,XX @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
40
- ctx->z_mask = arg_info(op->args[1])->z_mask
78
}
41
- | arg_info(op->args[2])->z_mask;
42
- ctx->s_mask = arg_info(op->args[1])->s_mask
43
- & arg_info(op->args[2])->s_mask;
44
- return fold_masks(ctx, op);
45
+ t1 = arg_info(op->args[1]);
46
+ t2 = arg_info(op->args[2]);
47
+ z_mask = t1->z_mask | t2->z_mask;
48
+ s_mask = t1->s_mask & t2->s_mask;
49
+ return fold_masks_zs(ctx, op, z_mask, s_mask);
79
}
50
}
80
51
81
-#ifdef CONFIG_SOFTMMU
52
static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
82
-/* call with @p->lock held */
83
-static void build_page_bitmap(PageDesc *p)
84
-{
85
- int n, tb_start, tb_end;
86
- TranslationBlock *tb;
87
-
88
- assert_page_locked(p);
89
- p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
90
-
91
- PAGE_FOR_EACH_TB(p, tb, n) {
92
- /* NOTE: this is subtle as a TB may span two physical pages */
93
- if (n == 0) {
94
- /* NOTE: tb_end may be after the end of the page, but
95
- it is not a problem */
96
- tb_start = tb->pc & ~TARGET_PAGE_MASK;
97
- tb_end = tb_start + tb->size;
98
- if (tb_end > TARGET_PAGE_SIZE) {
99
- tb_end = TARGET_PAGE_SIZE;
100
- }
101
- } else {
102
- tb_start = 0;
103
- tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
104
- }
105
- bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
106
- }
107
-}
108
-#endif
109
-
110
/* add the tb in the target page and protect it if necessary
111
*
112
* Called with mmap_lock held for user-mode emulation.
113
@@ -XXX,XX +XXX,XX @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
114
page_already_protected = p->first_tb != (uintptr_t)NULL;
115
#endif
116
p->first_tb = (uintptr_t)tb | n;
117
- invalidate_page_bitmap(p);
118
119
#if defined(CONFIG_USER_ONLY)
120
/* translator_loop() must have made all TB pages non-writable */
121
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
122
/* remove TB from the page(s) if we couldn't insert it */
123
if (unlikely(existing_tb)) {
124
tb_page_remove(p, tb);
125
- invalidate_page_bitmap(p);
126
if (p2) {
127
tb_page_remove(p2, tb);
128
- invalidate_page_bitmap(p2);
129
}
130
tb = existing_tb;
131
}
132
@@ -XXX,XX +XXX,XX @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
133
#if !defined(CONFIG_USER_ONLY)
134
/* if no code remaining, no need to continue to use slow writes */
135
if (!p->first_tb) {
136
- invalidate_page_bitmap(p);
137
tlb_unprotect_code(start);
138
}
139
#endif
140
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_page_fast(struct page_collection *pages,
141
}
142
143
assert_page_locked(p);
144
- if (!p->code_bitmap &&
145
- ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
146
- build_page_bitmap(p);
147
- }
148
- if (p->code_bitmap) {
149
- unsigned int nr;
150
- unsigned long b;
151
-
152
- nr = start & ~TARGET_PAGE_MASK;
153
- b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
154
- if (b & ((1 << len) - 1)) {
155
- goto do_invalidate;
156
- }
157
- } else {
158
- do_invalidate:
159
- tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
160
- retaddr);
161
- }
162
+ tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
163
+ retaddr);
164
}
165
#else
166
/* Called with mmap_lock held. If pc is not 0 then it indicates the
167
--
53
--
168
2.34.1
54
2.43.0
169
170
diff view generated by jsdifflib
New patch
1
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/optimize.c | 2 +-
5
1 file changed, 1 insertion(+), 1 deletion(-)
1
6
7
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/optimize.c
10
+++ b/tcg/optimize.c
11
@@ -XXX,XX +XXX,XX @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
12
return fold_orc(ctx, op);
13
}
14
}
15
- return false;
16
+ return finish_folding(ctx, op);
17
}
18
19
/* Propagate constants and copies, fold constant expressions. */
20
--
21
2.43.0
diff view generated by jsdifflib
New patch
1
All non-default cases now finish folding within each function.
2
Do the same with the default case and assert it is done after.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 6 ++----
8
1 file changed, 2 insertions(+), 4 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
15
done = true;
16
break;
17
default:
18
+ done = finish_folding(&ctx, op);
19
break;
20
}
21
-
22
- if (!done) {
23
- finish_folding(&ctx, op);
24
- }
25
+ tcg_debug_assert(done);
26
}
27
}
28
--
29
2.43.0
diff view generated by jsdifflib
New patch
1
All mask setting is now done with parameters via fold_masks_*.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 13 -------------
7
1 file changed, 13 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
14
QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
15
16
/* In flight values from optimization. */
17
- uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
18
- uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
19
TCGType type;
20
} OptContext;
21
22
@@ -XXX,XX +XXX,XX @@ static bool finish_folding(OptContext *ctx, TCGOp *op)
23
for (i = 0; i < nb_oargs; i++) {
24
TCGTemp *ts = arg_temp(op->args[i]);
25
reset_ts(ctx, ts);
26
- /*
27
- * Save the corresponding known-zero/sign bits mask for the
28
- * first output argument (only one supported so far).
29
- */
30
- if (i == 0) {
31
- ts_info(ts)->z_mask = ctx->z_mask;
32
- }
33
}
34
return true;
35
}
36
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
37
ctx.type = TCG_TYPE_I32;
38
}
39
40
- /* Assume all bits affected, no bits known zero, no sign reps. */
41
- ctx.z_mask = -1;
42
- ctx.s_mask = 0;
43
-
44
/*
45
* Process each opcode.
46
* Sorted alphabetically by opcode as much as possible.
47
--
48
2.43.0
diff view generated by jsdifflib
New patch
1
All instances of s_mask have been converted to the new
2
representation. We can now re-enable usage.
1
3
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 4 ++--
8
1 file changed, 2 insertions(+), 2 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
15
g_assert_not_reached();
16
}
17
18
- if (0 && !type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
19
+ if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
20
return true;
21
}
22
23
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
24
s_mask = s_mask_old >> pos;
25
s_mask |= -1ull << (len - 1);
26
27
- if (0 && pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
28
+ if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
29
return true;
30
}
31
32
--
33
2.43.0
diff view generated by jsdifflib
1
Prepare for targets to be able to produce TBs that can
1
The big comment just above says functions should be sorted.
2
run in more than one virtual context.
2
Add forward declarations as needed.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
accel/tcg/internal.h | 4 +++
7
tcg/optimize.c | 114 +++++++++++++++++++++++++------------------------
8
accel/tcg/tb-jmp-cache.h | 41 +++++++++++++++++++++++++
8
1 file changed, 59 insertions(+), 55 deletions(-)
9
include/exec/cpu-defs.h | 3 ++
10
include/exec/exec-all.h | 32 ++++++++++++++++++--
11
accel/tcg/cpu-exec.c | 16 ++++++----
12
accel/tcg/translate-all.c | 64 ++++++++++++++++++++++++++-------------
13
6 files changed, 131 insertions(+), 29 deletions(-)
14
9
15
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
16
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
17
--- a/accel/tcg/internal.h
12
--- a/tcg/optimize.c
18
+++ b/accel/tcg/internal.h
13
+++ b/tcg/optimize.c
19
@@ -XXX,XX +XXX,XX @@ void tb_htable_init(void);
14
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
20
/* Return the current PC from CPU, which may be cached in TB. */
15
* 3) those that produce information about the result value.
21
static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
16
*/
17
18
+static bool fold_or(OptContext *ctx, TCGOp *op);
19
+static bool fold_orc(OptContext *ctx, TCGOp *op);
20
+static bool fold_xor(OptContext *ctx, TCGOp *op);
21
+
22
static bool fold_add(OptContext *ctx, TCGOp *op)
22
{
23
{
23
+#if TARGET_TB_PCREL
24
if (fold_const2_commutative(ctx, op) ||
24
+ return cpu->cc->get_pc(cpu);
25
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
25
+#else
26
return fold_masks_zs(ctx, op, z_mask, s_mask);
26
return tb_pc(tb);
27
+#endif
28
}
27
}
29
28
30
#endif /* ACCEL_TCG_INTERNAL_H */
29
+static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
31
diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/accel/tcg/tb-jmp-cache.h
34
+++ b/accel/tcg/tb-jmp-cache.h
35
@@ -XXX,XX +XXX,XX @@
36
37
/*
38
* Accessed in parallel; all accesses to 'tb' must be atomic.
39
+ * For TARGET_TB_PCREL, accesses to 'pc' must be protected by
40
+ * a load_acquire/store_release to 'tb'.
41
*/
42
struct CPUJumpCache {
43
struct {
44
TranslationBlock *tb;
45
+#if TARGET_TB_PCREL
46
+ target_ulong pc;
47
+#endif
48
} array[TB_JMP_CACHE_SIZE];
49
};
50
51
+static inline TranslationBlock *
52
+tb_jmp_cache_get_tb(CPUJumpCache *jc, uint32_t hash)
53
+{
30
+{
54
+#if TARGET_TB_PCREL
31
+ /* If true and false values are the same, eliminate the cmp. */
55
+ /* Use acquire to ensure current load of pc from jc. */
32
+ if (args_are_copies(op->args[2], op->args[3])) {
56
+ return qatomic_load_acquire(&jc->array[hash].tb);
33
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
57
+#else
58
+ /* Use rcu_read to ensure current load of pc from *tb. */
59
+ return qatomic_rcu_read(&jc->array[hash].tb);
60
+#endif
61
+}
62
+
63
+static inline target_ulong
64
+tb_jmp_cache_get_pc(CPUJumpCache *jc, uint32_t hash, TranslationBlock *tb)
65
+{
66
+#if TARGET_TB_PCREL
67
+ return jc->array[hash].pc;
68
+#else
69
+ return tb_pc(tb);
70
+#endif
71
+}
72
+
73
+static inline void
74
+tb_jmp_cache_set(CPUJumpCache *jc, uint32_t hash,
75
+ TranslationBlock *tb, target_ulong pc)
76
+{
77
+#if TARGET_TB_PCREL
78
+ jc->array[hash].pc = pc;
79
+ /* Use store_release on tb to ensure pc is written first. */
80
+ qatomic_store_release(&jc->array[hash].tb, tb);
81
+#else
82
+ /* Use the pc value already stored in tb->pc. */
83
+ qatomic_set(&jc->array[hash].tb, tb);
84
+#endif
85
+}
86
+
87
#endif /* ACCEL_TCG_TB_JMP_CACHE_H */
88
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
89
index XXXXXXX..XXXXXXX 100644
90
--- a/include/exec/cpu-defs.h
91
+++ b/include/exec/cpu-defs.h
92
@@ -XXX,XX +XXX,XX @@
93
# error TARGET_PAGE_BITS must be defined in cpu-param.h
94
# endif
95
#endif
96
+#ifndef TARGET_TB_PCREL
97
+# define TARGET_TB_PCREL 0
98
+#endif
99
100
#define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
101
102
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
103
index XXXXXXX..XXXXXXX 100644
104
--- a/include/exec/exec-all.h
105
+++ b/include/exec/exec-all.h
106
@@ -XXX,XX +XXX,XX @@ struct tb_tc {
107
};
108
109
struct TranslationBlock {
110
- target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */
111
- target_ulong cs_base; /* CS base for this block */
112
+#if !TARGET_TB_PCREL
113
+ /*
114
+ * Guest PC corresponding to this block. This must be the true
115
+ * virtual address. Therefore e.g. x86 stores EIP + CS_BASE, and
116
+ * targets like Arm, MIPS, HP-PA, which reuse low bits for ISA or
117
+ * privilege, must store those bits elsewhere.
118
+ *
119
+ * If TARGET_TB_PCREL, the opcodes for the TranslationBlock are
120
+ * written such that the TB is associated only with the physical
121
+ * page and may be run in any virtual address context. In this case,
122
+ * PC must always be taken from ENV in a target-specific manner.
123
+ * Unwind information is taken as offsets from the page, to be
124
+ * deposited into the "current" PC.
125
+ */
126
+ target_ulong pc;
127
+#endif
128
+
129
+ /*
130
+ * Target-specific data associated with the TranslationBlock, e.g.:
131
+ * x86: the original user, the Code Segment virtual base,
132
+ * arm: an extension of tb->flags,
133
+ * s390x: instruction data for EXECUTE,
134
+ * sparc: the next pc of the instruction queue (for delay slots).
135
+ */
136
+ target_ulong cs_base;
137
+
138
uint32_t flags; /* flags defining in which context the code was generated */
139
uint32_t cflags; /* compile flags */
140
141
@@ -XXX,XX +XXX,XX @@ struct TranslationBlock {
142
/* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */
143
static inline target_ulong tb_pc(const TranslationBlock *tb)
144
{
145
+#if TARGET_TB_PCREL
146
+ qemu_build_not_reached();
147
+#else
148
return tb->pc;
149
+#endif
150
}
151
152
/* Hide the qatomic_read to make code a little easier on the eyes */
153
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
154
index XXXXXXX..XXXXXXX 100644
155
--- a/accel/tcg/cpu-exec.c
156
+++ b/accel/tcg/cpu-exec.c
157
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
158
const TranslationBlock *tb = p;
159
const struct tb_desc *desc = d;
160
161
- if (tb_pc(tb) == desc->pc &&
162
+ if ((TARGET_TB_PCREL || tb_pc(tb) == desc->pc) &&
163
tb->page_addr[0] == desc->page_addr0 &&
164
tb->cs_base == desc->cs_base &&
165
tb->flags == desc->flags &&
166
@@ -XXX,XX +XXX,XX @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
167
return NULL;
168
}
169
desc.page_addr0 = phys_pc;
170
- h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
171
+ h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : pc),
172
+ flags, cflags, *cpu->trace_dstate);
173
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
174
}
175
176
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
177
uint32_t flags, uint32_t cflags)
178
{
179
TranslationBlock *tb;
180
+ CPUJumpCache *jc;
181
uint32_t hash;
182
183
/* we should never be trying to look up an INVALID tb */
184
tcg_debug_assert(!(cflags & CF_INVALID));
185
186
hash = tb_jmp_cache_hash_func(pc);
187
- tb = qatomic_rcu_read(&cpu->tb_jmp_cache->array[hash].tb);
188
+ jc = cpu->tb_jmp_cache;
189
+ tb = tb_jmp_cache_get_tb(jc, hash);
190
191
if (likely(tb &&
192
- tb->pc == pc &&
193
+ tb_jmp_cache_get_pc(jc, hash, tb) == pc &&
194
tb->cs_base == cs_base &&
195
tb->flags == flags &&
196
tb->trace_vcpu_dstate == *cpu->trace_dstate &&
197
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
198
if (tb == NULL) {
199
return NULL;
200
}
201
- qatomic_set(&cpu->tb_jmp_cache->array[hash].tb, tb);
202
+ tb_jmp_cache_set(jc, hash, tb, pc);
203
return tb;
204
}
205
206
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
207
if (cc->tcg_ops->synchronize_from_tb) {
208
cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
209
} else {
210
+ assert(!TARGET_TB_PCREL);
211
assert(cc->set_pc);
212
cc->set_pc(cpu, tb_pc(last_tb));
213
}
214
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
215
* for the fast lookup
216
*/
217
h = tb_jmp_cache_hash_func(pc);
218
- qatomic_set(&cpu->tb_jmp_cache->array[h].tb, tb);
219
+ tb_jmp_cache_set(cpu->tb_jmp_cache, h, tb, pc);
220
}
221
222
#ifndef CONFIG_USER_ONLY
223
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
224
index XXXXXXX..XXXXXXX 100644
225
--- a/accel/tcg/translate-all.c
226
+++ b/accel/tcg/translate-all.c
227
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
228
229
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
230
if (i == 0) {
231
- prev = (j == 0 ? tb_pc(tb) : 0);
232
+ prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
233
} else {
234
prev = tcg_ctx->gen_insn_data[i - 1][j];
235
}
236
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
237
static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
238
uintptr_t searched_pc, bool reset_icount)
239
{
240
- target_ulong data[TARGET_INSN_START_WORDS] = { tb_pc(tb) };
241
+ target_ulong data[TARGET_INSN_START_WORDS];
242
uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
243
CPUArchState *env = cpu->env_ptr;
244
const uint8_t *p = tb->tc.ptr + tb->tc.size;
245
@@ -XXX,XX +XXX,XX @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
246
return -1;
247
}
248
249
+ memset(data, 0, sizeof(data));
250
+ if (!TARGET_TB_PCREL) {
251
+ data[0] = tb_pc(tb);
252
+ }
34
+ }
253
+
35
+
254
/* Reconstruct the stored insn data while looking for the point at
36
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
255
which the end of the insn exceeds the searched_pc. */
37
+ uint64_t tv = arg_info(op->args[2])->val;
256
for (i = 0; i < num_insns; ++i) {
38
+ uint64_t fv = arg_info(op->args[3])->val;
257
@@ -XXX,XX +XXX,XX @@ static bool tb_cmp(const void *ap, const void *bp)
258
const TranslationBlock *a = ap;
259
const TranslationBlock *b = bp;
260
261
- return tb_pc(a) == tb_pc(b) &&
262
- a->cs_base == b->cs_base &&
263
- a->flags == b->flags &&
264
- (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
265
- a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
266
- a->page_addr[0] == b->page_addr[0] &&
267
- a->page_addr[1] == b->page_addr[1];
268
+ return ((TARGET_TB_PCREL || tb_pc(a) == tb_pc(b)) &&
269
+ a->cs_base == b->cs_base &&
270
+ a->flags == b->flags &&
271
+ (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
272
+ a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
273
+ a->page_addr[0] == b->page_addr[0] &&
274
+ a->page_addr[1] == b->page_addr[1]);
275
}
276
277
void tb_htable_init(void)
278
@@ -XXX,XX +XXX,XX @@ static inline void tb_jmp_unlink(TranslationBlock *dest)
279
qemu_spin_unlock(&dest->jmp_lock);
280
}
281
282
+static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
283
+{
284
+ CPUState *cpu;
285
+
39
+
286
+ if (TARGET_TB_PCREL) {
40
+ if (tv == -1 && fv == 0) {
287
+ /* A TB may be at any virtual address */
41
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
288
+ CPU_FOREACH(cpu) {
289
+ tcg_flush_jmp_cache(cpu);
290
+ }
42
+ }
291
+ } else {
43
+ if (tv == 0 && fv == -1) {
292
+ uint32_t h = tb_jmp_cache_hash_func(tb_pc(tb));
44
+ if (TCG_TARGET_HAS_not_vec) {
293
+
45
+ op->opc = INDEX_op_not_vec;
294
+ CPU_FOREACH(cpu) {
46
+ return fold_not(ctx, op);
295
+ CPUJumpCache *jc = cpu->tb_jmp_cache;
47
+ } else {
296
+
48
+ op->opc = INDEX_op_xor_vec;
297
+ if (qatomic_read(&jc->array[h].tb) == tb) {
49
+ op->args[2] = arg_new_constant(ctx, -1);
298
+ qatomic_set(&jc->array[h].tb, NULL);
50
+ return fold_xor(ctx, op);
299
+ }
51
+ }
300
+ }
52
+ }
301
+ }
53
+ }
54
+ if (arg_is_const(op->args[2])) {
55
+ uint64_t tv = arg_info(op->args[2])->val;
56
+ if (tv == -1) {
57
+ op->opc = INDEX_op_or_vec;
58
+ op->args[2] = op->args[3];
59
+ return fold_or(ctx, op);
60
+ }
61
+ if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
62
+ op->opc = INDEX_op_andc_vec;
63
+ op->args[2] = op->args[1];
64
+ op->args[1] = op->args[3];
65
+ return fold_andc(ctx, op);
66
+ }
67
+ }
68
+ if (arg_is_const(op->args[3])) {
69
+ uint64_t fv = arg_info(op->args[3])->val;
70
+ if (fv == 0) {
71
+ op->opc = INDEX_op_and_vec;
72
+ return fold_and(ctx, op);
73
+ }
74
+ if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
75
+ op->opc = INDEX_op_orc_vec;
76
+ op->args[2] = op->args[1];
77
+ op->args[1] = op->args[3];
78
+ return fold_orc(ctx, op);
79
+ }
80
+ }
81
+ return finish_folding(ctx, op);
302
+}
82
+}
303
+
83
+
304
/*
84
static bool fold_brcond(OptContext *ctx, TCGOp *op)
305
* In user-mode, call with mmap_lock held.
306
* In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
307
@@ -XXX,XX +XXX,XX @@ static inline void tb_jmp_unlink(TranslationBlock *dest)
308
*/
309
static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
310
{
85
{
311
- CPUState *cpu;
86
int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
312
PageDesc *p;
87
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
313
uint32_t h;
88
return fold_masks_zs(ctx, op, z_mask, s_mask);
314
tb_page_addr_t phys_pc;
89
}
315
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
90
316
91
-static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
317
/* remove the TB from the hash list */
92
-{
318
phys_pc = tb->page_addr[0];
93
- /* If true and false values are the same, eliminate the cmp. */
319
- h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, orig_cflags,
94
- if (args_are_copies(op->args[2], op->args[3])) {
320
- tb->trace_vcpu_dstate);
95
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
321
+ h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
96
- }
322
+ tb->flags, orig_cflags, tb->trace_vcpu_dstate);
97
-
323
if (!qht_remove(&tb_ctx.htable, tb, h)) {
98
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
324
return;
99
- uint64_t tv = arg_info(op->args[2])->val;
325
}
100
- uint64_t fv = arg_info(op->args[3])->val;
326
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
101
-
327
}
102
- if (tv == -1 && fv == 0) {
328
103
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
329
/* remove the TB from the hash list */
104
- }
330
- h = tb_jmp_cache_hash_func(tb->pc);
105
- if (tv == 0 && fv == -1) {
331
- CPU_FOREACH(cpu) {
106
- if (TCG_TARGET_HAS_not_vec) {
332
- CPUJumpCache *jc = cpu->tb_jmp_cache;
107
- op->opc = INDEX_op_not_vec;
333
- if (qatomic_read(&jc->array[h].tb) == tb) {
108
- return fold_not(ctx, op);
334
- qatomic_set(&jc->array[h].tb, NULL);
109
- } else {
110
- op->opc = INDEX_op_xor_vec;
111
- op->args[2] = arg_new_constant(ctx, -1);
112
- return fold_xor(ctx, op);
113
- }
335
- }
114
- }
336
- }
115
- }
337
+ tb_jmp_cache_inval_tb(tb);
116
- if (arg_is_const(op->args[2])) {
338
117
- uint64_t tv = arg_info(op->args[2])->val;
339
/* suppress this TB from the two jump lists */
118
- if (tv == -1) {
340
tb_remove_from_jmp_list(tb, 0);
119
- op->opc = INDEX_op_or_vec;
341
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
120
- op->args[2] = op->args[3];
342
}
121
- return fold_or(ctx, op);
343
122
- }
344
/* add in the hash table */
123
- if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
345
- h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, tb->cflags,
124
- op->opc = INDEX_op_andc_vec;
346
- tb->trace_vcpu_dstate);
125
- op->args[2] = op->args[1];
347
+ h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
126
- op->args[1] = op->args[3];
348
+ tb->flags, tb->cflags, tb->trace_vcpu_dstate);
127
- return fold_andc(ctx, op);
349
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
128
- }
350
129
- }
351
/* remove TB from the page(s) if we couldn't insert it */
130
- if (arg_is_const(op->args[3])) {
352
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
131
- uint64_t fv = arg_info(op->args[3])->val;
353
132
- if (fv == 0) {
354
gen_code_buf = tcg_ctx->code_gen_ptr;
133
- op->opc = INDEX_op_and_vec;
355
tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
134
- return fold_and(ctx, op);
356
+#if !TARGET_TB_PCREL
135
- }
357
tb->pc = pc;
136
- if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
358
+#endif
137
- op->opc = INDEX_op_orc_vec;
359
tb->cs_base = cs_base;
138
- op->args[2] = op->args[1];
360
tb->flags = flags;
139
- op->args[1] = op->args[3];
361
tb->cflags = cflags;
140
- return fold_orc(ctx, op);
141
- }
142
- }
143
- return finish_folding(ctx, op);
144
-}
145
-
146
/* Propagate constants and copies, fold constant expressions. */
147
void tcg_optimize(TCGContext *s)
148
{
362
--
149
--
363
2.34.1
150
2.43.0
364
365
diff view generated by jsdifflib
New patch
1
The big comment just above says functions should be sorted.
1
2
3
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 60 +++++++++++++++++++++++++-------------------------
7
1 file changed, 30 insertions(+), 30 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
14
return true;
15
}
16
17
+static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
18
+{
19
+ /* Canonicalize the comparison to put immediate second. */
20
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
21
+ op->args[3] = tcg_swap_cond(op->args[3]);
22
+ }
23
+ return finish_folding(ctx, op);
24
+}
25
+
26
+static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
27
+{
28
+ /* If true and false values are the same, eliminate the cmp. */
29
+ if (args_are_copies(op->args[3], op->args[4])) {
30
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
31
+ }
32
+
33
+ /* Canonicalize the comparison to put immediate second. */
34
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
35
+ op->args[5] = tcg_swap_cond(op->args[5]);
36
+ }
37
+ /*
38
+ * Canonicalize the "false" input reg to match the destination,
39
+ * so that the tcg backend can implement "move if true".
40
+ */
41
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
42
+ op->args[5] = tcg_invert_cond(op->args[5]);
43
+ }
44
+ return finish_folding(ctx, op);
45
+}
46
+
47
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
48
{
49
uint64_t z_mask, s_mask;
50
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
51
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
52
}
53
54
-static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
55
-{
56
- /* Canonicalize the comparison to put immediate second. */
57
- if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
58
- op->args[3] = tcg_swap_cond(op->args[3]);
59
- }
60
- return finish_folding(ctx, op);
61
-}
62
-
63
-static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
64
-{
65
- /* If true and false values are the same, eliminate the cmp. */
66
- if (args_are_copies(op->args[3], op->args[4])) {
67
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
68
- }
69
-
70
- /* Canonicalize the comparison to put immediate second. */
71
- if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
72
- op->args[5] = tcg_swap_cond(op->args[5]);
73
- }
74
- /*
75
- * Canonicalize the "false" input reg to match the destination,
76
- * so that the tcg backend can implement "move if true".
77
- */
78
- if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
79
- op->args[5] = tcg_invert_cond(op->args[5]);
80
- }
81
- return finish_folding(ctx, op);
82
-}
83
-
84
static bool fold_sextract(OptContext *ctx, TCGOp *op)
85
{
86
uint64_t z_mask, s_mask, s_mask_old;
87
--
88
2.43.0
diff view generated by jsdifflib
New patch
1
1
We currently have a flag, float_muladd_halve_result, to scale
2
the result by 2**-1. Extend this to handle arbitrary scaling.
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/fpu/softfloat.h | 6 ++++
8
fpu/softfloat.c | 58 ++++++++++++++++++++++-----------------
9
fpu/softfloat-parts.c.inc | 7 +++--
10
3 files changed, 44 insertions(+), 27 deletions(-)
11
12
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/fpu/softfloat.h
15
+++ b/include/fpu/softfloat.h
16
@@ -XXX,XX +XXX,XX @@ float16 float16_add(float16, float16, float_status *status);
17
float16 float16_sub(float16, float16, float_status *status);
18
float16 float16_mul(float16, float16, float_status *status);
19
float16 float16_muladd(float16, float16, float16, int, float_status *status);
20
+float16 float16_muladd_scalbn(float16, float16, float16,
21
+ int, int, float_status *status);
22
float16 float16_div(float16, float16, float_status *status);
23
float16 float16_scalbn(float16, int, float_status *status);
24
float16 float16_min(float16, float16, float_status *status);
25
@@ -XXX,XX +XXX,XX @@ float32 float32_mul(float32, float32, float_status *status);
26
float32 float32_div(float32, float32, float_status *status);
27
float32 float32_rem(float32, float32, float_status *status);
28
float32 float32_muladd(float32, float32, float32, int, float_status *status);
29
+float32 float32_muladd_scalbn(float32, float32, float32,
30
+ int, int, float_status *status);
31
float32 float32_sqrt(float32, float_status *status);
32
float32 float32_exp2(float32, float_status *status);
33
float32 float32_log2(float32, float_status *status);
34
@@ -XXX,XX +XXX,XX @@ float64 float64_mul(float64, float64, float_status *status);
35
float64 float64_div(float64, float64, float_status *status);
36
float64 float64_rem(float64, float64, float_status *status);
37
float64 float64_muladd(float64, float64, float64, int, float_status *status);
38
+float64 float64_muladd_scalbn(float64, float64, float64,
39
+ int, int, float_status *status);
40
float64 float64_sqrt(float64, float_status *status);
41
float64 float64_log2(float64, float_status *status);
42
FloatRelation float64_compare(float64, float64, float_status *status);
43
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/fpu/softfloat.c
46
+++ b/fpu/softfloat.c
47
@@ -XXX,XX +XXX,XX @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
48
#define parts_mul(A, B, S) \
49
PARTS_GENERIC_64_128(mul, A)(A, B, S)
50
51
-static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
52
- FloatParts64 *c, int flags,
53
- float_status *s);
54
-static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
55
- FloatParts128 *c, int flags,
56
- float_status *s);
57
+static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
58
+ FloatParts64 *c, int scale,
59
+ int flags, float_status *s);
60
+static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
61
+ FloatParts128 *c, int scale,
62
+ int flags, float_status *s);
63
64
-#define parts_muladd(A, B, C, Z, S) \
65
- PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
66
+#define parts_muladd_scalbn(A, B, C, Z, Y, S) \
67
+ PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
68
69
static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
70
float_status *s);
71
@@ -XXX,XX +XXX,XX @@ floatx80_mul(floatx80 a, floatx80 b, float_status *status)
72
* Fused multiply-add
73
*/
74
75
-float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
76
- int flags, float_status *status)
77
+float16 QEMU_FLATTEN
78
+float16_muladd_scalbn(float16 a, float16 b, float16 c,
79
+ int scale, int flags, float_status *status)
80
{
81
FloatParts64 pa, pb, pc, *pr;
82
83
float16_unpack_canonical(&pa, a, status);
84
float16_unpack_canonical(&pb, b, status);
85
float16_unpack_canonical(&pc, c, status);
86
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
87
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
88
89
return float16_round_pack_canonical(pr, status);
90
}
91
92
-static float32 QEMU_SOFTFLOAT_ATTR
93
-soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
94
- float_status *status)
95
+float16 float16_muladd(float16 a, float16 b, float16 c,
96
+ int flags, float_status *status)
97
+{
98
+ return float16_muladd_scalbn(a, b, c, 0, flags, status);
99
+}
100
+
101
+float32 QEMU_SOFTFLOAT_ATTR
102
+float32_muladd_scalbn(float32 a, float32 b, float32 c,
103
+ int scale, int flags, float_status *status)
104
{
105
FloatParts64 pa, pb, pc, *pr;
106
107
float32_unpack_canonical(&pa, a, status);
108
float32_unpack_canonical(&pb, b, status);
109
float32_unpack_canonical(&pc, c, status);
110
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
111
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
112
113
return float32_round_pack_canonical(pr, status);
114
}
115
116
-static float64 QEMU_SOFTFLOAT_ATTR
117
-soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
118
- float_status *status)
119
+float64 QEMU_SOFTFLOAT_ATTR
120
+float64_muladd_scalbn(float64 a, float64 b, float64 c,
121
+ int scale, int flags, float_status *status)
122
{
123
FloatParts64 pa, pb, pc, *pr;
124
125
float64_unpack_canonical(&pa, a, status);
126
float64_unpack_canonical(&pb, b, status);
127
float64_unpack_canonical(&pc, c, status);
128
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
129
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
130
131
return float64_round_pack_canonical(pr, status);
132
}
133
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
134
return ur.s;
135
136
soft:
137
- return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
138
+ return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
139
}
140
141
float64 QEMU_FLATTEN
142
@@ -XXX,XX +XXX,XX @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
143
return ur.s;
144
145
soft:
146
- return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
147
+ return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
148
}
149
150
float64 float64r32_muladd(float64 a, float64 b, float64 c,
151
@@ -XXX,XX +XXX,XX @@ float64 float64r32_muladd(float64 a, float64 b, float64 c,
152
float64_unpack_canonical(&pa, a, status);
153
float64_unpack_canonical(&pb, b, status);
154
float64_unpack_canonical(&pc, c, status);
155
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
156
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
157
158
return float64r32_round_pack_canonical(pr, status);
159
}
160
@@ -XXX,XX +XXX,XX @@ bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
161
bfloat16_unpack_canonical(&pa, a, status);
162
bfloat16_unpack_canonical(&pb, b, status);
163
bfloat16_unpack_canonical(&pc, c, status);
164
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
165
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
166
167
return bfloat16_round_pack_canonical(pr, status);
168
}
169
@@ -XXX,XX +XXX,XX @@ float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
170
float128_unpack_canonical(&pa, a, status);
171
float128_unpack_canonical(&pb, b, status);
172
float128_unpack_canonical(&pc, c, status);
173
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
174
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
175
176
return float128_round_pack_canonical(pr, status);
177
}
178
@@ -XXX,XX +XXX,XX @@ float32 float32_exp2(float32 a, float_status *status)
179
180
float64_unpack_canonical(&rp, float64_one, status);
181
for (i = 0 ; i < 15 ; i++) {
182
+
183
float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
184
- rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
185
+ rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
186
xnp = *parts_mul(&xnp, &xp, status);
187
}
188
189
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
190
index XXXXXXX..XXXXXXX 100644
191
--- a/fpu/softfloat-parts.c.inc
192
+++ b/fpu/softfloat-parts.c.inc
193
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
194
* Requires A and C extracted into a double-sized structure to provide the
195
* extra space for the widening multiply.
196
*/
197
-static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
198
- FloatPartsN *c, int flags, float_status *s)
199
+static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
200
+ FloatPartsN *c, int scale,
201
+ int flags, float_status *s)
202
{
203
int ab_mask, abc_mask;
204
FloatPartsW p_widen, c_widen;
205
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
206
a->exp = p_widen.exp;
207
208
return_normal:
209
+ /* TODO: Replace all use of float_muladd_halve_result with scale. */
210
if (flags & float_muladd_halve_result) {
211
a->exp -= 1;
212
}
213
+ a->exp += scale;
214
finish_sign:
215
if (flags & float_muladd_negate_result) {
216
a->sign ^= 1;
217
--
218
2.43.0
219
220
diff view generated by jsdifflib
1
Bool is more appropriate type for the alloc parameter.
1
Use the scalbn interface instead of float_muladd_halve_result.
2
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
5
---
7
accel/tcg/translate-all.c | 14 +++++++-------
6
target/arm/tcg/helper-a64.c | 6 +++---
8
1 file changed, 7 insertions(+), 7 deletions(-)
7
1 file changed, 3 insertions(+), 3 deletions(-)
9
8
10
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
9
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
11
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
12
--- a/accel/tcg/translate-all.c
11
--- a/target/arm/tcg/helper-a64.c
13
+++ b/accel/tcg/translate-all.c
12
+++ b/target/arm/tcg/helper-a64.c
14
@@ -XXX,XX +XXX,XX @@ void page_init(void)
13
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
15
#endif
14
(float16_is_infinity(b) && float16_is_zero(a))) {
15
return float16_one_point_five;
16
}
17
- return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
18
+ return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst);
16
}
19
}
17
20
18
-static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
21
float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
19
+static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
22
@@ -XXX,XX +XXX,XX @@ float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
20
{
23
(float32_is_infinity(b) && float32_is_zero(a))) {
21
PageDesc *pd;
24
return float32_one_point_five;
22
void **lp;
25
}
23
@@ -XXX,XX +XXX,XX @@ static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
26
- return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
24
27
+ return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst);
25
static inline PageDesc *page_find(tb_page_addr_t index)
26
{
27
- return page_find_alloc(index, 0);
28
+ return page_find_alloc(index, false);
29
}
28
}
30
29
31
static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
30
float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
32
- PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
31
@@ -XXX,XX +XXX,XX @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
33
+ PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc);
32
(float64_is_infinity(b) && float64_is_zero(a))) {
34
33
return float64_one_point_five;
35
/* In user-mode page locks aren't used; mmap_lock is enough */
34
}
36
#ifdef CONFIG_USER_ONLY
35
- return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
37
@@ -XXX,XX +XXX,XX @@ static inline void page_unlock(PageDesc *pd)
36
+ return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst);
38
/* lock the page(s) of a TB in the correct acquisition order */
39
static inline void page_lock_tb(const TranslationBlock *tb)
40
{
41
- page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
42
+ page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], false);
43
}
37
}
44
38
45
static inline void page_unlock_tb(const TranslationBlock *tb)
39
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
46
@@ -XXX,XX +XXX,XX @@ void page_collection_unlock(struct page_collection *set)
47
#endif /* !CONFIG_USER_ONLY */
48
49
static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
50
- PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
51
+ PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc)
52
{
53
PageDesc *p1, *p2;
54
tb_page_addr_t page1;
55
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
56
* Note that inserting into the hash table first isn't an option, since
57
* we can only insert TBs that are fully initialized.
58
*/
59
- page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
60
+ page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
61
tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
62
if (p2) {
63
tb_page_add(p2, tb, 1, phys_page2);
64
@@ -XXX,XX +XXX,XX @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
65
for (addr = start, len = end - start;
66
len != 0;
67
len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
68
- PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
69
+ PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true);
70
71
/* If the write protection bit is set, then we invalidate
72
the code inside. */
73
--
40
--
74
2.34.1
41
2.43.0
75
42
76
43
diff view generated by jsdifflib
New patch
1
1
Use the scalbn interface instead of float_muladd_halve_result.
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/sparc/helper.h | 4 +-
7
target/sparc/fop_helper.c | 8 ++--
8
target/sparc/translate.c | 80 +++++++++++++++++++++++----------------
9
3 files changed, 54 insertions(+), 38 deletions(-)
10
11
diff --git a/target/sparc/helper.h b/target/sparc/helper.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/sparc/helper.h
14
+++ b/target/sparc/helper.h
15
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(faddd, TCG_CALL_NO_WG, f64, env, f64, f64)
16
DEF_HELPER_FLAGS_3(fsubd, TCG_CALL_NO_WG, f64, env, f64, f64)
17
DEF_HELPER_FLAGS_3(fmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
18
DEF_HELPER_FLAGS_3(fdivd, TCG_CALL_NO_WG, f64, env, f64, f64)
19
-DEF_HELPER_FLAGS_5(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, i32)
20
+DEF_HELPER_FLAGS_6(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, s32, i32)
21
DEF_HELPER_FLAGS_3(fnaddd, TCG_CALL_NO_WG, f64, env, f64, f64)
22
DEF_HELPER_FLAGS_3(fnmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
23
24
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(fadds, TCG_CALL_NO_WG, f32, env, f32, f32)
25
DEF_HELPER_FLAGS_3(fsubs, TCG_CALL_NO_WG, f32, env, f32, f32)
26
DEF_HELPER_FLAGS_3(fmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
27
DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_WG, f32, env, f32, f32)
28
-DEF_HELPER_FLAGS_5(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, i32)
29
+DEF_HELPER_FLAGS_6(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, s32, i32)
30
DEF_HELPER_FLAGS_3(fnadds, TCG_CALL_NO_WG, f32, env, f32, f32)
31
DEF_HELPER_FLAGS_3(fnmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
32
33
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/sparc/fop_helper.c
36
+++ b/target/sparc/fop_helper.c
37
@@ -XXX,XX +XXX,XX @@ Int128 helper_fsqrtq(CPUSPARCState *env, Int128 src)
38
}
39
40
float32 helper_fmadds(CPUSPARCState *env, float32 s1,
41
- float32 s2, float32 s3, uint32_t op)
42
+ float32 s2, float32 s3, int32_t sc, uint32_t op)
43
{
44
- float32 ret = float32_muladd(s1, s2, s3, op, &env->fp_status);
45
+ float32 ret = float32_muladd_scalbn(s1, s2, s3, sc, op, &env->fp_status);
46
check_ieee_exceptions(env, GETPC());
47
return ret;
48
}
49
50
float64 helper_fmaddd(CPUSPARCState *env, float64 s1,
51
- float64 s2, float64 s3, uint32_t op)
52
+ float64 s2, float64 s3, int32_t sc, uint32_t op)
53
{
54
- float64 ret = float64_muladd(s1, s2, s3, op, &env->fp_status);
55
+ float64 ret = float64_muladd_scalbn(s1, s2, s3, sc, op, &env->fp_status);
56
check_ieee_exceptions(env, GETPC());
57
return ret;
58
}
59
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/target/sparc/translate.c
62
+++ b/target/sparc/translate.c
63
@@ -XXX,XX +XXX,XX @@ static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src)
64
65
static void gen_op_fmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
66
{
67
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
68
+ TCGv_i32 z = tcg_constant_i32(0);
69
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, z);
70
}
71
72
static void gen_op_fmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
73
{
74
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
75
+ TCGv_i32 z = tcg_constant_i32(0);
76
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, z);
77
}
78
79
static void gen_op_fmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
80
{
81
- int op = float_muladd_negate_c;
82
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
83
+ TCGv_i32 z = tcg_constant_i32(0);
84
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
85
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
86
}
87
88
static void gen_op_fmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
89
{
90
- int op = float_muladd_negate_c;
91
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
92
+ TCGv_i32 z = tcg_constant_i32(0);
93
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
94
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
95
}
96
97
static void gen_op_fnmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
98
{
99
- int op = float_muladd_negate_c | float_muladd_negate_result;
100
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
101
+ TCGv_i32 z = tcg_constant_i32(0);
102
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c |
103
+ float_muladd_negate_result);
104
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
105
}
106
107
static void gen_op_fnmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
108
{
109
- int op = float_muladd_negate_c | float_muladd_negate_result;
110
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
111
+ TCGv_i32 z = tcg_constant_i32(0);
112
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c |
113
+ float_muladd_negate_result);
114
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
115
}
116
117
static void gen_op_fnmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
118
{
119
- int op = float_muladd_negate_result;
120
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
121
+ TCGv_i32 z = tcg_constant_i32(0);
122
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
123
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
124
}
125
126
static void gen_op_fnmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
127
{
128
- int op = float_muladd_negate_result;
129
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
130
+ TCGv_i32 z = tcg_constant_i32(0);
131
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
132
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
133
}
134
135
/* Use muladd to compute (1 * src1) + src2 / 2 with one rounding. */
136
static void gen_op_fhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
137
{
138
- TCGv_i32 one = tcg_constant_i32(float32_one);
139
- int op = float_muladd_halve_result;
140
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
141
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
142
+ TCGv_i32 mone = tcg_constant_i32(-1);
143
+ TCGv_i32 op = tcg_constant_i32(0);
144
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
145
}
146
147
static void gen_op_fhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
148
{
149
- TCGv_i64 one = tcg_constant_i64(float64_one);
150
- int op = float_muladd_halve_result;
151
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
152
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
153
+ TCGv_i32 mone = tcg_constant_i32(-1);
154
+ TCGv_i32 op = tcg_constant_i32(0);
155
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
156
}
157
158
/* Use muladd to compute (1 * src1) - src2 / 2 with one rounding. */
159
static void gen_op_fhsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
160
{
161
- TCGv_i32 one = tcg_constant_i32(float32_one);
162
- int op = float_muladd_negate_c | float_muladd_halve_result;
163
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
164
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
165
+ TCGv_i32 mone = tcg_constant_i32(-1);
166
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
167
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
168
}
169
170
static void gen_op_fhsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
171
{
172
- TCGv_i64 one = tcg_constant_i64(float64_one);
173
- int op = float_muladd_negate_c | float_muladd_halve_result;
174
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
175
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
176
+ TCGv_i32 mone = tcg_constant_i32(-1);
177
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
178
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
179
}
180
181
/* Use muladd to compute -((1 * src1) + src2 / 2) with one rounding. */
182
static void gen_op_fnhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
183
{
184
- TCGv_i32 one = tcg_constant_i32(float32_one);
185
- int op = float_muladd_negate_result | float_muladd_halve_result;
186
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
187
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
188
+ TCGv_i32 mone = tcg_constant_i32(-1);
189
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
190
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
191
}
192
193
static void gen_op_fnhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
194
{
195
- TCGv_i64 one = tcg_constant_i64(float64_one);
196
- int op = float_muladd_negate_result | float_muladd_halve_result;
197
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
198
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
199
+ TCGv_i32 mone = tcg_constant_i32(-1);
200
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
201
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
202
}
203
204
static void gen_op_fpexception_im(DisasContext *dc, int ftt)
205
--
206
2.43.0
207
208
diff view generated by jsdifflib
1
This field is only written, not read; remove it.
1
All uses have been convered to float*_muladd_scalbn.
2
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
include/hw/core/cpu.h | 1 -
6
include/fpu/softfloat.h | 3 ---
9
accel/tcg/cputlb.c | 7 +++----
7
fpu/softfloat.c | 6 ------
10
2 files changed, 3 insertions(+), 5 deletions(-)
8
fpu/softfloat-parts.c.inc | 4 ----
9
3 files changed, 13 deletions(-)
11
10
12
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
11
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/include/hw/core/cpu.h
13
--- a/include/fpu/softfloat.h
15
+++ b/include/hw/core/cpu.h
14
+++ b/include/fpu/softfloat.h
16
@@ -XXX,XX +XXX,XX @@ struct CPUWatchpoint {
15
@@ -XXX,XX +XXX,XX @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
17
* the memory regions get moved around by io_writex.
16
| Using these differs from negating an input or output before calling
18
*/
17
| the muladd function in that this means that a NaN doesn't have its
19
typedef struct SavedIOTLB {
18
| sign bit inverted before it is propagated.
20
- hwaddr addr;
19
-| We also support halving the result before rounding, as a special
21
MemoryRegionSection *section;
20
-| case to support the ARM fused-sqrt-step instruction FRSQRTS.
22
hwaddr mr_offset;
21
*----------------------------------------------------------------------------*/
23
} SavedIOTLB;
22
enum {
24
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
23
float_muladd_negate_c = 1,
24
float_muladd_negate_product = 2,
25
float_muladd_negate_result = 4,
26
- float_muladd_halve_result = 8,
27
};
28
29
/*----------------------------------------------------------------------------
30
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
25
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
26
--- a/accel/tcg/cputlb.c
32
--- a/fpu/softfloat.c
27
+++ b/accel/tcg/cputlb.c
33
+++ b/fpu/softfloat.c
28
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
34
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
29
* This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
35
if (unlikely(!can_use_fpu(s))) {
30
* because of the side effect of io_writex changing memory layout.
36
goto soft;
31
*/
37
}
32
-static void save_iotlb_data(CPUState *cs, hwaddr addr,
38
- if (unlikely(flags & float_muladd_halve_result)) {
33
- MemoryRegionSection *section, hwaddr mr_offset)
39
- goto soft;
34
+static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section,
40
- }
35
+ hwaddr mr_offset)
41
36
{
42
float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
37
#ifdef CONFIG_PLUGIN
43
if (unlikely(!f32_is_zon3(ua, ub, uc))) {
38
SavedIOTLB *saved = &cs->saved_iotlb;
44
@@ -XXX,XX +XXX,XX @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
39
- saved->addr = addr;
45
if (unlikely(!can_use_fpu(s))) {
40
saved->section = section;
46
goto soft;
41
saved->mr_offset = mr_offset;
47
}
42
#endif
48
- if (unlikely(flags & float_muladd_halve_result)) {
43
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
49
- goto soft;
44
* The memory_region_dispatch may trigger a flush/resize
50
- }
45
* so for plugins we save the iotlb_data just in case.
51
46
*/
52
float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
47
- save_iotlb_data(cpu, full->xlat_section, section, mr_offset);
53
if (unlikely(!f64_is_zon3(ua, ub, uc))) {
48
+ save_iotlb_data(cpu, section, mr_offset);
54
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
49
55
index XXXXXXX..XXXXXXX 100644
50
if (!qemu_mutex_iothread_locked()) {
56
--- a/fpu/softfloat-parts.c.inc
51
qemu_mutex_lock_iothread();
57
+++ b/fpu/softfloat-parts.c.inc
58
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
59
a->exp = p_widen.exp;
60
61
return_normal:
62
- /* TODO: Replace all use of float_muladd_halve_result with scale. */
63
- if (flags & float_muladd_halve_result) {
64
- a->exp -= 1;
65
- }
66
a->exp += scale;
67
finish_sign:
68
if (flags & float_muladd_negate_result) {
52
--
69
--
53
2.34.1
70
2.43.0
54
71
55
72
diff view generated by jsdifflib
New patch
1
This rounding mode is used by Hexagon.
1
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
include/fpu/softfloat-types.h | 2 ++
6
fpu/softfloat-parts.c.inc | 3 +++
7
2 files changed, 5 insertions(+)
8
9
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
10
index XXXXXXX..XXXXXXX 100644
11
--- a/include/fpu/softfloat-types.h
12
+++ b/include/fpu/softfloat-types.h
13
@@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) {
14
float_round_to_odd = 5,
15
/* Not an IEEE rounding mode: round to closest odd, overflow to inf */
16
float_round_to_odd_inf = 6,
17
+ /* Not an IEEE rounding mode: round to nearest even, overflow to max */
18
+ float_round_nearest_even_max = 7,
19
} FloatRoundMode;
20
21
/*
22
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
23
index XXXXXXX..XXXXXXX 100644
24
--- a/fpu/softfloat-parts.c.inc
25
+++ b/fpu/softfloat-parts.c.inc
26
@@ -XXX,XX +XXX,XX @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
27
int exp, flags = 0;
28
29
switch (s->float_rounding_mode) {
30
+ case float_round_nearest_even_max:
31
+ overflow_norm = true;
32
+ /* fall through */
33
case float_round_nearest_even:
34
if (N > 64 && frac_lsb == 0) {
35
inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
36
--
37
2.43.0
diff view generated by jsdifflib
New patch
1
Certain Hexagon instructions suppress changes to the result
2
when the product of fma() is a true zero.
1
3
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
include/fpu/softfloat.h | 5 +++++
7
fpu/softfloat.c | 3 +++
8
fpu/softfloat-parts.c.inc | 4 +++-
9
3 files changed, 11 insertions(+), 1 deletion(-)
10
11
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/include/fpu/softfloat.h
14
+++ b/include/fpu/softfloat.h
15
@@ -XXX,XX +XXX,XX @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
16
| Using these differs from negating an input or output before calling
17
| the muladd function in that this means that a NaN doesn't have its
18
| sign bit inverted before it is propagated.
19
+|
20
+| With float_muladd_suppress_add_product_zero, if A or B is zero
21
+| such that the product is a true zero, then return C without addition.
22
+| This preserves the sign of C when C is +/- 0. Used for Hexagon.
23
*----------------------------------------------------------------------------*/
24
enum {
25
float_muladd_negate_c = 1,
26
float_muladd_negate_product = 2,
27
float_muladd_negate_result = 4,
28
+ float_muladd_suppress_add_product_zero = 8,
29
};
30
31
/*----------------------------------------------------------------------------
32
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/fpu/softfloat.c
35
+++ b/fpu/softfloat.c
36
@@ -XXX,XX +XXX,XX @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
37
if (unlikely(!can_use_fpu(s))) {
38
goto soft;
39
}
40
+ if (unlikely(flags & float_muladd_suppress_add_product_zero)) {
41
+ goto soft;
42
+ }
43
44
float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
45
if (unlikely(!f32_is_zon3(ua, ub, uc))) {
46
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
47
index XXXXXXX..XXXXXXX 100644
48
--- a/fpu/softfloat-parts.c.inc
49
+++ b/fpu/softfloat-parts.c.inc
50
@@ -XXX,XX +XXX,XX @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
51
goto return_normal;
52
}
53
if (c->cls == float_class_zero) {
54
- if (a->sign != c->sign) {
55
+ if (flags & float_muladd_suppress_add_product_zero) {
56
+ a->sign = c->sign;
57
+ } else if (a->sign != c->sign) {
58
goto return_sub_zero;
59
}
60
goto return_zero;
61
--
62
2.43.0
diff view generated by jsdifflib
New patch
1
There are no special cases for this instruction.
2
Remove internal_mpyf as unused.
1
3
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/hexagon/fma_emu.h | 1 -
8
target/hexagon/fma_emu.c | 8 --------
9
target/hexagon/op_helper.c | 2 +-
10
3 files changed, 1 insertion(+), 10 deletions(-)
11
12
diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/hexagon/fma_emu.h
15
+++ b/target/hexagon/fma_emu.h
16
@@ -XXX,XX +XXX,XX @@ int32_t float32_getexp(float32 f32);
17
float32 infinite_float32(uint8_t sign);
18
float32 internal_fmafx(float32 a, float32 b, float32 c,
19
int scale, float_status *fp_status);
20
-float32 internal_mpyf(float32 a, float32 b, float_status *fp_status);
21
float64 internal_mpyhh(float64 a, float64 b,
22
unsigned long long int accumulated,
23
float_status *fp_status);
24
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/target/hexagon/fma_emu.c
27
+++ b/target/hexagon/fma_emu.c
28
@@ -XXX,XX +XXX,XX @@ float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
29
return accum_round_float32(result, fp_status);
30
}
31
32
-float32 internal_mpyf(float32 a, float32 b, float_status *fp_status)
33
-{
34
- if (float32_is_zero(a) || float32_is_zero(b)) {
35
- return float32_mul(a, b, fp_status);
36
- }
37
- return internal_fmafx(a, b, float32_zero, 0, fp_status);
38
-}
39
-
40
float64 internal_mpyhh(float64 a, float64 b,
41
unsigned long long int accumulated,
42
float_status *fp_status)
43
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/target/hexagon/op_helper.c
46
+++ b/target/hexagon/op_helper.c
47
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
48
{
49
float32 RdV;
50
arch_fpop_start(env);
51
- RdV = internal_mpyf(RsV, RtV, &env->fp_status);
52
+ RdV = float32_mul(RsV, RtV, &env->fp_status);
53
arch_fpop_end(env);
54
return RdV;
55
}
56
--
57
2.43.0
diff view generated by jsdifflib
New patch
1
There are no special cases for this instruction.
1
2
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/hexagon/op_helper.c | 2 +-
7
1 file changed, 1 insertion(+), 1 deletion(-)
8
9
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/hexagon/op_helper.c
12
+++ b/target/hexagon/op_helper.c
13
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
14
float32 RsV, float32 RtV)
15
{
16
arch_fpop_start(env);
17
- RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
18
+ RxV = float32_muladd(RsV, RtV, RxV, 0, &env->fp_status);
19
arch_fpop_end(env);
20
return RxV;
21
}
22
--
23
2.43.0
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
There are no special cases for this instruction. Since hexagon
2
always uses default-nan mode, explicitly negating the first
3
input is unnecessary. Use float_muladd_negate_product instead.
2
4
3
This is a heavily used function so lets avoid the cost of
5
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
CPU_GET_CLASS. On the romulus-bmc run it has a modest effect:
5
6
Before: 36.812 s ± 0.506 s
7
After: 35.912 s ± 0.168 s
8
9
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-Id: <20220811151413.3350684-4-alex.bennee@linaro.org>
12
Signed-off-by: Cédric Le Goater <clg@kaod.org>
13
Message-Id: <20220923084803.498337-4-clg@kaod.org>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
---
7
---
16
hw/core/cpu-sysemu.c | 5 ++---
8
target/hexagon/op_helper.c | 5 ++---
17
1 file changed, 2 insertions(+), 3 deletions(-)
9
1 file changed, 2 insertions(+), 3 deletions(-)
18
10
19
diff --git a/hw/core/cpu-sysemu.c b/hw/core/cpu-sysemu.c
11
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
20
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/core/cpu-sysemu.c
13
--- a/target/hexagon/op_helper.c
22
+++ b/hw/core/cpu-sysemu.c
14
+++ b/target/hexagon/op_helper.c
23
@@ -XXX,XX +XXX,XX @@ hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr)
15
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
24
16
float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
25
int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs)
17
float32 RsV, float32 RtV)
26
{
18
{
27
- CPUClass *cc = CPU_GET_CLASS(cpu);
19
- float32 neg_RsV;
28
int ret = 0;
20
arch_fpop_start(env);
29
21
- neg_RsV = float32_set_sign(RsV, float32_is_neg(RsV) ? 0 : 1);
30
- if (cc->sysemu_ops->asidx_from_attrs) {
22
- RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
31
- ret = cc->sysemu_ops->asidx_from_attrs(cpu, attrs);
23
+ RxV = float32_muladd(RsV, RtV, RxV, float_muladd_negate_product,
32
+ if (cpu->cc->sysemu_ops->asidx_from_attrs) {
24
+ &env->fp_status);
33
+ ret = cpu->cc->sysemu_ops->asidx_from_attrs(cpu, attrs);
25
arch_fpop_end(env);
34
assert(ret < cpu->num_ases && ret >= 0);
26
return RxV;
35
}
27
}
36
return ret;
37
--
28
--
38
2.34.1
29
2.43.0
39
40
diff view generated by jsdifflib
New patch
1
This instruction has a special case that 0 * x + c returns c
2
without the normal sign folding that comes with 0 + -0.
3
Use the new float_muladd_suppress_add_product_zero to
4
describe this.
1
5
6
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
target/hexagon/op_helper.c | 11 +++--------
10
1 file changed, 3 insertions(+), 8 deletions(-)
11
12
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/hexagon/op_helper.c
15
+++ b/target/hexagon/op_helper.c
16
@@ -XXX,XX +XXX,XX @@ static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
17
float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
18
float32 RsV, float32 RtV, float32 PuV)
19
{
20
- size4s_t tmp;
21
arch_fpop_start(env);
22
- RxV = check_nan(RxV, RxV, &env->fp_status);
23
- RxV = check_nan(RxV, RsV, &env->fp_status);
24
- RxV = check_nan(RxV, RtV, &env->fp_status);
25
- tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
26
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
27
- RxV = tmp;
28
- }
29
+ RxV = float32_muladd_scalbn(RsV, RtV, RxV, fSXTN(8, 64, PuV),
30
+ float_muladd_suppress_add_product_zero,
31
+ &env->fp_status);
32
arch_fpop_end(env);
33
return RxV;
34
}
35
--
36
2.43.0
diff view generated by jsdifflib
1
This structure will shortly contain more than just
1
There are multiple special cases for this instruction.
2
data for accessing MMIO. Rename the 'addr' member
2
(1) The saturate to normal maximum instead of overflow to infinity is
3
to 'xlat_section' to more clearly indicate its purpose.
3
handled by the new float_round_nearest_even_max rounding mode.
4
(2) The 0 * n + c special case is handled by the new
5
float_muladd_suppress_add_product_zero flag.
6
(3) The Inf - Inf -> 0 special case can be detected after the fact
7
by examining float_flag_invalid_isi.
4
8
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
11
---
10
include/exec/cpu-defs.h | 22 ++++----
12
target/hexagon/op_helper.c | 105 +++++++++----------------------------
11
accel/tcg/cputlb.c | 102 +++++++++++++++++++------------------
13
1 file changed, 26 insertions(+), 79 deletions(-)
12
target/arm/mte_helper.c | 14 ++---
13
target/arm/sve_helper.c | 4 +-
14
target/arm/translate-a64.c | 2 +-
15
5 files changed, 73 insertions(+), 71 deletions(-)
16
14
17
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
15
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
18
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
19
--- a/include/exec/cpu-defs.h
17
--- a/target/hexagon/op_helper.c
20
+++ b/include/exec/cpu-defs.h
18
+++ b/target/hexagon/op_helper.c
21
@@ -XXX,XX +XXX,XX @@ typedef uint64_t target_ulong;
19
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
22
# endif
20
return RxV;
23
# endif
24
25
+/* Minimalized TLB entry for use by TCG fast path. */
26
typedef struct CPUTLBEntry {
27
/* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address
28
bit TARGET_PAGE_BITS-1..4 : Nonzero for accesses that should not
29
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntry {
30
31
QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
32
33
-/* The IOTLB is not accessed directly inline by generated TCG code,
34
- * so the CPUIOTLBEntry layout is not as critical as that of the
35
- * CPUTLBEntry. (This is also why we don't want to combine the two
36
- * structs into one.)
37
+/*
38
+ * The full TLB entry, which is not accessed by generated TCG code,
39
+ * so the layout is not as critical as that of CPUTLBEntry. This is
40
+ * also why we don't want to combine the two structs.
41
*/
42
-typedef struct CPUIOTLBEntry {
43
+typedef struct CPUTLBEntryFull {
44
/*
45
- * @addr contains:
46
+ * @xlat_section contains:
47
* - in the lower TARGET_PAGE_BITS, a physical section number
48
* - with the lower TARGET_PAGE_BITS masked off, an offset which
49
* must be added to the virtual address to obtain:
50
@@ -XXX,XX +XXX,XX @@ typedef struct CPUIOTLBEntry {
51
* number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM)
52
* + the offset within the target MemoryRegion (otherwise)
53
*/
54
- hwaddr addr;
55
+ hwaddr xlat_section;
56
MemTxAttrs attrs;
57
-} CPUIOTLBEntry;
58
+} CPUTLBEntryFull;
59
60
/*
61
* Data elements that are per MMU mode, minus the bits accessed by
62
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBDesc {
63
size_t vindex;
64
/* The tlb victim table, in two parts. */
65
CPUTLBEntry vtable[CPU_VTLB_SIZE];
66
- CPUIOTLBEntry viotlb[CPU_VTLB_SIZE];
67
- /* The iotlb. */
68
- CPUIOTLBEntry *iotlb;
69
+ CPUTLBEntryFull vfulltlb[CPU_VTLB_SIZE];
70
+ CPUTLBEntryFull *fulltlb;
71
} CPUTLBDesc;
72
73
/*
74
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/accel/tcg/cputlb.c
77
+++ b/accel/tcg/cputlb.c
78
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
79
}
80
81
g_free(fast->table);
82
- g_free(desc->iotlb);
83
+ g_free(desc->fulltlb);
84
85
tlb_window_reset(desc, now, 0);
86
/* desc->n_used_entries is cleared by the caller */
87
fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
88
fast->table = g_try_new(CPUTLBEntry, new_size);
89
- desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
90
+ desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
91
92
/*
93
* If the allocations fail, try smaller sizes. We just freed some
94
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
95
* allocations to fail though, so we progressively reduce the allocation
96
* size, aborting if we cannot even allocate the smallest TLB we support.
97
*/
98
- while (fast->table == NULL || desc->iotlb == NULL) {
99
+ while (fast->table == NULL || desc->fulltlb == NULL) {
100
if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
101
error_report("%s: %s", __func__, strerror(errno));
102
abort();
103
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
104
fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
105
106
g_free(fast->table);
107
- g_free(desc->iotlb);
108
+ g_free(desc->fulltlb);
109
fast->table = g_try_new(CPUTLBEntry, new_size);
110
- desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
111
+ desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
112
}
113
}
21
}
114
22
115
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
23
-static bool is_zero_prod(float32 a, float32 b)
116
desc->n_used_entries = 0;
24
-{
117
fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
25
- return ((float32_is_zero(a) && is_finite(b)) ||
118
fast->table = g_new(CPUTLBEntry, n_entries);
26
- (float32_is_zero(b) && is_finite(a)));
119
- desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
27
-}
120
+ desc->fulltlb = g_new(CPUTLBEntryFull, n_entries);
28
-
121
tlb_mmu_flush_locked(desc, fast);
29
-static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
30
-{
31
- float32 ret = dst;
32
- if (float32_is_any_nan(x)) {
33
- if (extract32(x, 22, 1) == 0) {
34
- float_raise(float_flag_invalid, fp_status);
35
- }
36
- ret = make_float32(0xffffffff); /* nan */
37
- }
38
- return ret;
39
-}
40
-
41
float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
42
float32 RsV, float32 RtV, float32 PuV)
43
{
44
@@ -XXX,XX +XXX,XX @@ float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
45
return RxV;
122
}
46
}
123
47
124
@@ -XXX,XX +XXX,XX @@ void tlb_destroy(CPUState *cpu)
48
-static bool is_inf_prod(int32_t a, int32_t b)
125
CPUTLBDescFast *fast = &env_tlb(env)->f[i];
49
+static float32 do_sffma_lib(CPUHexagonState *env, float32 RxV,
126
50
+ float32 RsV, float32 RtV, int negate)
127
g_free(fast->table);
51
{
128
- g_free(desc->iotlb);
52
- return (float32_is_infinity(a) && float32_is_infinity(b)) ||
129
+ g_free(desc->fulltlb);
53
- (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
130
}
54
- (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
55
+ int flags;
56
+
57
+ arch_fpop_start(env);
58
+
59
+ set_float_rounding_mode(float_round_nearest_even_max, &env->fp_status);
60
+ RxV = float32_muladd(RsV, RtV, RxV,
61
+ negate | float_muladd_suppress_add_product_zero,
62
+ &env->fp_status);
63
+
64
+ flags = get_float_exception_flags(&env->fp_status);
65
+ if (flags) {
66
+ /* Flags are suppressed by this instruction. */
67
+ set_float_exception_flags(0, &env->fp_status);
68
+
69
+ /* Return 0 for Inf - Inf. */
70
+ if (flags & float_flag_invalid_isi) {
71
+ RxV = 0;
72
+ }
73
+ }
74
+
75
+ arch_fpop_end(env);
76
+ return RxV;
131
}
77
}
132
78
133
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
79
float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
134
80
float32 RsV, float32 RtV)
135
/* Evict the old entry into the victim tlb. */
81
{
136
copy_tlb_helper_locked(tv, te);
82
- bool infinp;
137
- desc->viotlb[vidx] = desc->iotlb[index];
83
- bool infminusinf;
138
+ desc->vfulltlb[vidx] = desc->fulltlb[index];
84
- float32 tmp;
139
tlb_n_used_entries_dec(env, mmu_idx);
85
-
140
}
86
- arch_fpop_start(env);
141
87
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
142
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
88
- infminusinf = float32_is_infinity(RxV) &&
143
* subtract here is that of the page base, and not the same as the
89
- is_inf_prod(RsV, RtV) &&
144
* vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
90
- (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
145
*/
91
- infinp = float32_is_infinity(RxV) ||
146
- desc->iotlb[index].addr = iotlb - vaddr_page;
92
- float32_is_infinity(RtV) ||
147
- desc->iotlb[index].attrs = attrs;
93
- float32_is_infinity(RsV);
148
+ desc->fulltlb[index].xlat_section = iotlb - vaddr_page;
94
- RxV = check_nan(RxV, RxV, &env->fp_status);
149
+ desc->fulltlb[index].attrs = attrs;
95
- RxV = check_nan(RxV, RsV, &env->fp_status);
150
96
- RxV = check_nan(RxV, RtV, &env->fp_status);
151
/* Now calculate the new entry */
97
- tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
152
tn.addend = addend - vaddr_page;
98
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
153
@@ -XXX,XX +XXX,XX @@ static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
99
- RxV = tmp;
154
}
100
- }
101
- set_float_exception_flags(0, &env->fp_status);
102
- if (float32_is_infinity(RxV) && !infinp) {
103
- RxV = RxV - 1;
104
- }
105
- if (infminusinf) {
106
- RxV = 0;
107
- }
108
- arch_fpop_end(env);
109
- return RxV;
110
+ return do_sffma_lib(env, RxV, RsV, RtV, 0);
155
}
111
}
156
112
157
-static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
113
float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
158
+static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
114
float32 RsV, float32 RtV)
159
int mmu_idx, target_ulong addr, uintptr_t retaddr,
160
MMUAccessType access_type, MemOp op)
161
{
115
{
162
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
116
- bool infinp;
163
bool locked = false;
117
- bool infminusinf;
164
MemTxResult r;
118
- float32 tmp;
165
119
-
166
- section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
120
- arch_fpop_start(env);
167
+ section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
121
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
168
mr = section->mr;
122
- infminusinf = float32_is_infinity(RxV) &&
169
- mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
123
- is_inf_prod(RsV, RtV) &&
170
+ mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
124
- (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
171
cpu->mem_io_pc = retaddr;
125
- infinp = float32_is_infinity(RxV) ||
172
if (!cpu->can_do_io) {
126
- float32_is_infinity(RtV) ||
173
cpu_io_recompile(cpu, retaddr);
127
- float32_is_infinity(RsV);
174
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
128
- RxV = check_nan(RxV, RxV, &env->fp_status);
175
qemu_mutex_lock_iothread();
129
- RxV = check_nan(RxV, RsV, &env->fp_status);
176
locked = true;
130
- RxV = check_nan(RxV, RtV, &env->fp_status);
177
}
131
- float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
178
- r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
132
- tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
179
+ r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs);
133
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
180
if (r != MEMTX_OK) {
134
- RxV = tmp;
181
hwaddr physaddr = mr_offset +
135
- }
182
section->offset_within_address_space -
136
- set_float_exception_flags(0, &env->fp_status);
183
section->offset_within_region;
137
- if (float32_is_infinity(RxV) && !infinp) {
184
138
- RxV = RxV - 1;
185
cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
139
- }
186
- mmu_idx, iotlbentry->attrs, r, retaddr);
140
- if (infminusinf) {
187
+ mmu_idx, full->attrs, r, retaddr);
141
- RxV = 0;
188
}
142
- }
189
if (locked) {
143
- arch_fpop_end(env);
190
qemu_mutex_unlock_iothread();
144
- return RxV;
191
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
145
+ return do_sffma_lib(env, RxV, RsV, RtV, float_muladd_negate_product);
192
}
146
}
193
147
194
/*
148
float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
195
- * Save a potentially trashed IOTLB entry for later lookup by plugin.
196
- * This is read by tlb_plugin_lookup if the iotlb entry doesn't match
197
+ * Save a potentially trashed CPUTLBEntryFull for later lookup by plugin.
198
+ * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
199
* because of the side effect of io_writex changing memory layout.
200
*/
201
static void save_iotlb_data(CPUState *cs, hwaddr addr,
202
@@ -XXX,XX +XXX,XX @@ static void save_iotlb_data(CPUState *cs, hwaddr addr,
203
#endif
204
}
205
206
-static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
207
+static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
208
int mmu_idx, uint64_t val, target_ulong addr,
209
uintptr_t retaddr, MemOp op)
210
{
211
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
212
bool locked = false;
213
MemTxResult r;
214
215
- section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
216
+ section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
217
mr = section->mr;
218
- mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
219
+ mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
220
if (!cpu->can_do_io) {
221
cpu_io_recompile(cpu, retaddr);
222
}
223
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
224
* The memory_region_dispatch may trigger a flush/resize
225
* so for plugins we save the iotlb_data just in case.
226
*/
227
- save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset);
228
+ save_iotlb_data(cpu, full->xlat_section, section, mr_offset);
229
230
if (!qemu_mutex_iothread_locked()) {
231
qemu_mutex_lock_iothread();
232
locked = true;
233
}
234
- r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
235
+ r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs);
236
if (r != MEMTX_OK) {
237
hwaddr physaddr = mr_offset +
238
section->offset_within_address_space -
239
section->offset_within_region;
240
241
cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
242
- MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
243
+ MMU_DATA_STORE, mmu_idx, full->attrs, r,
244
retaddr);
245
}
246
if (locked) {
247
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
248
copy_tlb_helper_locked(vtlb, &tmptlb);
249
qemu_spin_unlock(&env_tlb(env)->c.lock);
250
251
- CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
252
- CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
253
- tmpio = *io; *io = *vio; *vio = tmpio;
254
+ CPUTLBEntryFull *f1 = &env_tlb(env)->d[mmu_idx].fulltlb[index];
255
+ CPUTLBEntryFull *f2 = &env_tlb(env)->d[mmu_idx].vfulltlb[vidx];
256
+ CPUTLBEntryFull tmpf;
257
+ tmpf = *f1; *f1 = *f2; *f2 = tmpf;
258
return true;
259
}
260
}
261
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
262
(ADDR) & TARGET_PAGE_MASK)
263
264
static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
265
- CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
266
+ CPUTLBEntryFull *full, uintptr_t retaddr)
267
{
268
- ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
269
+ ram_addr_t ram_addr = mem_vaddr + full->xlat_section;
270
271
trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
272
273
@@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr,
274
/* Handle clean RAM pages. */
275
if (unlikely(flags & TLB_NOTDIRTY)) {
276
uintptr_t index = tlb_index(env, mmu_idx, addr);
277
- CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
278
+ CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
279
280
- notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
281
+ notdirty_write(env_cpu(env), addr, 1, full, retaddr);
282
flags &= ~TLB_NOTDIRTY;
283
}
284
285
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
286
287
if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
288
uintptr_t index = tlb_index(env, mmu_idx, addr);
289
- CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
290
+ CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
291
292
/* Handle watchpoints. */
293
if (flags & TLB_WATCHPOINT) {
294
int wp_access = (access_type == MMU_DATA_STORE
295
? BP_MEM_WRITE : BP_MEM_READ);
296
cpu_check_watchpoint(env_cpu(env), addr, size,
297
- iotlbentry->attrs, wp_access, retaddr);
298
+ full->attrs, wp_access, retaddr);
299
}
300
301
/* Handle clean RAM pages. */
302
if (flags & TLB_NOTDIRTY) {
303
- notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
304
+ notdirty_write(env_cpu(env), addr, 1, full, retaddr);
305
}
306
}
307
308
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
309
* should have just filled the TLB. The one corner case is io_writex
310
* which can cause TLB flushes and potential resizing of the TLBs
311
* losing the information we need. In those cases we need to recover
312
- * data from a copy of the iotlbentry. As long as this always occurs
313
+ * data from a copy of the CPUTLBEntryFull. As long as this always occurs
314
* from the same thread (which a mem callback will be) this is safe.
315
*/
316
317
@@ -XXX,XX +XXX,XX @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
318
if (likely(tlb_hit(tlb_addr, addr))) {
319
/* We must have an iotlb entry for MMIO */
320
if (tlb_addr & TLB_MMIO) {
321
- CPUIOTLBEntry *iotlbentry;
322
- iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
323
+ CPUTLBEntryFull *full;
324
+ full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
325
data->is_io = true;
326
- data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
327
- data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
328
+ data->v.io.section =
329
+ iotlb_to_section(cpu, full->xlat_section, full->attrs);
330
+ data->v.io.offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
331
} else {
332
data->is_io = false;
333
data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
334
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
335
336
if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
337
notdirty_write(env_cpu(env), addr, size,
338
- &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
339
+ &env_tlb(env)->d[mmu_idx].fulltlb[index], retaddr);
340
}
341
342
return hostaddr;
343
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
344
345
/* Handle anything that isn't just a straight memory access. */
346
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
347
- CPUIOTLBEntry *iotlbentry;
348
+ CPUTLBEntryFull *full;
349
bool need_swap;
350
351
/* For anything that is unaligned, recurse through full_load. */
352
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
353
goto do_unaligned_access;
354
}
355
356
- iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
357
+ full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
358
359
/* Handle watchpoints. */
360
if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
361
/* On watchpoint hit, this will longjmp out. */
362
cpu_check_watchpoint(env_cpu(env), addr, size,
363
- iotlbentry->attrs, BP_MEM_READ, retaddr);
364
+ full->attrs, BP_MEM_READ, retaddr);
365
}
366
367
need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
368
369
/* Handle I/O access. */
370
if (likely(tlb_addr & TLB_MMIO)) {
371
- return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
372
+ return io_readx(env, full, mmu_idx, addr, retaddr,
373
access_type, op ^ (need_swap * MO_BSWAP));
374
}
375
376
@@ -XXX,XX +XXX,XX @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
377
*/
378
if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
379
cpu_check_watchpoint(env_cpu(env), addr, size - size2,
380
- env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
381
+ env_tlb(env)->d[mmu_idx].fulltlb[index].attrs,
382
BP_MEM_WRITE, retaddr);
383
}
384
if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
385
cpu_check_watchpoint(env_cpu(env), page2, size2,
386
- env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
387
+ env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs,
388
BP_MEM_WRITE, retaddr);
389
}
390
391
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
392
393
/* Handle anything that isn't just a straight memory access. */
394
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
395
- CPUIOTLBEntry *iotlbentry;
396
+ CPUTLBEntryFull *full;
397
bool need_swap;
398
399
/* For anything that is unaligned, recurse through byte stores. */
400
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
401
goto do_unaligned_access;
402
}
403
404
- iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
405
+ full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
406
407
/* Handle watchpoints. */
408
if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
409
/* On watchpoint hit, this will longjmp out. */
410
cpu_check_watchpoint(env_cpu(env), addr, size,
411
- iotlbentry->attrs, BP_MEM_WRITE, retaddr);
412
+ full->attrs, BP_MEM_WRITE, retaddr);
413
}
414
415
need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
416
417
/* Handle I/O access. */
418
if (tlb_addr & TLB_MMIO) {
419
- io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
420
+ io_writex(env, full, mmu_idx, val, addr, retaddr,
421
op ^ (need_swap * MO_BSWAP));
422
return;
423
}
424
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
425
426
/* Handle clean RAM pages. */
427
if (tlb_addr & TLB_NOTDIRTY) {
428
- notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
429
+ notdirty_write(env_cpu(env), addr, size, full, retaddr);
430
}
431
432
haddr = (void *)((uintptr_t)addr + entry->addend);
433
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
434
index XXXXXXX..XXXXXXX 100644
435
--- a/target/arm/mte_helper.c
436
+++ b/target/arm/mte_helper.c
437
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
438
return tags + index;
439
#else
440
uintptr_t index;
441
- CPUIOTLBEntry *iotlbentry;
442
+ CPUTLBEntryFull *full;
443
int in_page, flags;
444
ram_addr_t ptr_ra;
445
hwaddr ptr_paddr, tag_paddr, xlat;
446
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
447
assert(!(flags & TLB_INVALID_MASK));
448
449
/*
450
- * Find the iotlbentry for ptr. This *must* be present in the TLB
451
+ * Find the CPUTLBEntryFull for ptr. This *must* be present in the TLB
452
* because we just found the mapping.
453
* TODO: Perhaps there should be a cputlb helper that returns a
454
* matching tlb entry + iotlb entry.
455
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
456
g_assert(tlb_hit(comparator, ptr));
457
}
458
# endif
459
- iotlbentry = &env_tlb(env)->d[ptr_mmu_idx].iotlb[index];
460
+ full = &env_tlb(env)->d[ptr_mmu_idx].fulltlb[index];
461
462
/* If the virtual page MemAttr != Tagged, access unchecked. */
463
- if (!arm_tlb_mte_tagged(&iotlbentry->attrs)) {
464
+ if (!arm_tlb_mte_tagged(&full->attrs)) {
465
return NULL;
466
}
467
468
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
469
int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE;
470
assert(ra != 0);
471
cpu_check_watchpoint(env_cpu(env), ptr, ptr_size,
472
- iotlbentry->attrs, wp, ra);
473
+ full->attrs, wp, ra);
474
}
475
476
/*
477
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
478
tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1);
479
480
/* Look up the address in tag space. */
481
- tag_asi = iotlbentry->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
482
+ tag_asi = full->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
483
tag_as = cpu_get_address_space(env_cpu(env), tag_asi);
484
mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL,
485
tag_access == MMU_DATA_STORE,
486
- iotlbentry->attrs);
487
+ full->attrs);
488
489
/*
490
* Note that @mr will never be NULL. If there is nothing in the address
491
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
492
index XXXXXXX..XXXXXXX 100644
493
--- a/target/arm/sve_helper.c
494
+++ b/target/arm/sve_helper.c
495
@@ -XXX,XX +XXX,XX @@ bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env,
496
g_assert(tlb_hit(comparator, addr));
497
# endif
498
499
- CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
500
- info->attrs = iotlbentry->attrs;
501
+ CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
502
+ info->attrs = full->attrs;
503
}
504
#endif
505
506
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
507
index XXXXXXX..XXXXXXX 100644
508
--- a/target/arm/translate-a64.c
509
+++ b/target/arm/translate-a64.c
510
@@ -XXX,XX +XXX,XX @@ static bool is_guarded_page(CPUARMState *env, DisasContext *s)
511
* table entry even for that case.
512
*/
513
return (tlb_hit(entry->addr_code, addr) &&
514
- arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].iotlb[index].attrs));
515
+ arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].fulltlb[index].attrs));
516
#endif
517
}
518
519
--
149
--
520
2.34.1
150
2.43.0
521
522
diff view generated by jsdifflib
New patch
1
The function is now unused.
1
2
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/hexagon/fma_emu.h | 2 -
7
target/hexagon/fma_emu.c | 171 ---------------------------------------
8
2 files changed, 173 deletions(-)
9
10
diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/hexagon/fma_emu.h
13
+++ b/target/hexagon/fma_emu.h
14
@@ -XXX,XX +XXX,XX @@ static inline uint32_t float32_getexp_raw(float32 f32)
15
}
16
int32_t float32_getexp(float32 f32);
17
float32 infinite_float32(uint8_t sign);
18
-float32 internal_fmafx(float32 a, float32 b, float32 c,
19
- int scale, float_status *fp_status);
20
float64 internal_mpyhh(float64 a, float64 b,
21
unsigned long long int accumulated,
22
float_status *fp_status);
23
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/hexagon/fma_emu.c
26
+++ b/target/hexagon/fma_emu.c
27
@@ -XXX,XX +XXX,XX @@ int32_t float64_getexp(float64 f64)
28
return -1;
29
}
30
31
-static uint64_t float32_getmant(float32 f32)
32
-{
33
- Float a = { .i = f32 };
34
- if (float32_is_normal(f32)) {
35
- return a.mant | 1ULL << 23;
36
- }
37
- if (float32_is_zero(f32)) {
38
- return 0;
39
- }
40
- if (float32_is_denormal(f32)) {
41
- return a.mant;
42
- }
43
- return ~0ULL;
44
-}
45
-
46
int32_t float32_getexp(float32 f32)
47
{
48
Float a = { .i = f32 };
49
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
50
}
51
52
/* Return a maximum finite value with the requested sign */
53
-static float32 maxfinite_float32(uint8_t sign)
54
-{
55
- if (sign) {
56
- return make_float32(SF_MINUS_MAXF);
57
- } else {
58
- return make_float32(SF_MAXF);
59
- }
60
-}
61
-
62
-/* Return a zero value with requested sign */
63
-static float32 zero_float32(uint8_t sign)
64
-{
65
- if (sign) {
66
- return make_float32(0x80000000);
67
- } else {
68
- return float32_zero;
69
- }
70
-}
71
-
72
#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
73
static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
74
{ \
75
@@ -XXX,XX +XXX,XX @@ static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
76
}
77
78
GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
79
-GEN_XF_ROUND(float32, SF_MANTBITS, SF_INF_EXP, Float)
80
-
81
-static bool is_inf_prod(float64 a, float64 b)
82
-{
83
- return ((float64_is_infinity(a) && float64_is_infinity(b)) ||
84
- (float64_is_infinity(a) && is_finite(b) && (!float64_is_zero(b))) ||
85
- (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a))));
86
-}
87
-
88
-static float64 special_fma(float64 a, float64 b, float64 c,
89
- float_status *fp_status)
90
-{
91
- float64 ret = make_float64(0);
92
-
93
- /*
94
- * If A multiplied by B is an exact infinity and C is also an infinity
95
- * but with the opposite sign, FMA returns NaN and raises invalid.
96
- */
97
- uint8_t a_sign = float64_is_neg(a);
98
- uint8_t b_sign = float64_is_neg(b);
99
- uint8_t c_sign = float64_is_neg(c);
100
- if (is_inf_prod(a, b) && float64_is_infinity(c)) {
101
- if ((a_sign ^ b_sign) != c_sign) {
102
- ret = make_float64(DF_NAN);
103
- float_raise(float_flag_invalid, fp_status);
104
- return ret;
105
- }
106
- }
107
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
108
- (float64_is_zero(a) && float64_is_infinity(b))) {
109
- ret = make_float64(DF_NAN);
110
- float_raise(float_flag_invalid, fp_status);
111
- return ret;
112
- }
113
- /*
114
- * If none of the above checks are true and C is a NaN,
115
- * a NaN shall be returned
116
- * If A or B are NaN, a NAN shall be returned.
117
- */
118
- if (float64_is_any_nan(a) ||
119
- float64_is_any_nan(b) ||
120
- float64_is_any_nan(c)) {
121
- if (float64_is_any_nan(a) && (fGETBIT(51, a) == 0)) {
122
- float_raise(float_flag_invalid, fp_status);
123
- }
124
- if (float64_is_any_nan(b) && (fGETBIT(51, b) == 0)) {
125
- float_raise(float_flag_invalid, fp_status);
126
- }
127
- if (float64_is_any_nan(c) && (fGETBIT(51, c) == 0)) {
128
- float_raise(float_flag_invalid, fp_status);
129
- }
130
- ret = make_float64(DF_NAN);
131
- return ret;
132
- }
133
- /*
134
- * We have checked for adding opposite-signed infinities.
135
- * Other infinities return infinity with the correct sign
136
- */
137
- if (float64_is_infinity(c)) {
138
- ret = infinite_float64(c_sign);
139
- return ret;
140
- }
141
- if (float64_is_infinity(a) || float64_is_infinity(b)) {
142
- ret = infinite_float64(a_sign ^ b_sign);
143
- return ret;
144
- }
145
- g_assert_not_reached();
146
-}
147
-
148
-static float32 special_fmaf(float32 a, float32 b, float32 c,
149
- float_status *fp_status)
150
-{
151
- float64 aa, bb, cc;
152
- aa = float32_to_float64(a, fp_status);
153
- bb = float32_to_float64(b, fp_status);
154
- cc = float32_to_float64(c, fp_status);
155
- return float64_to_float32(special_fma(aa, bb, cc, fp_status), fp_status);
156
-}
157
-
158
-float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
159
- float_status *fp_status)
160
-{
161
- Accum prod;
162
- Accum acc;
163
- Accum result;
164
- accum_init(&prod);
165
- accum_init(&acc);
166
- accum_init(&result);
167
-
168
- uint8_t a_sign = float32_is_neg(a);
169
- uint8_t b_sign = float32_is_neg(b);
170
- uint8_t c_sign = float32_is_neg(c);
171
- if (float32_is_infinity(a) ||
172
- float32_is_infinity(b) ||
173
- float32_is_infinity(c)) {
174
- return special_fmaf(a, b, c, fp_status);
175
- }
176
- if (float32_is_any_nan(a) ||
177
- float32_is_any_nan(b) ||
178
- float32_is_any_nan(c)) {
179
- return special_fmaf(a, b, c, fp_status);
180
- }
181
- if ((scale == 0) && (float32_is_zero(a) || float32_is_zero(b))) {
182
- float32 tmp = float32_mul(a, b, fp_status);
183
- tmp = float32_add(tmp, c, fp_status);
184
- return tmp;
185
- }
186
-
187
- /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
188
- prod.mant = int128_mul_6464(float32_getmant(a), float32_getmant(b));
189
-
190
- /*
191
- * Note: extracting the mantissa into an int is multiplying by
192
- * 2**23, so adjust here
193
- */
194
- prod.exp = float32_getexp(a) + float32_getexp(b) - SF_BIAS - 23;
195
- prod.sign = a_sign ^ b_sign;
196
- if (float32_is_zero(a) || float32_is_zero(b)) {
197
- prod.exp = -2 * WAY_BIG_EXP;
198
- }
199
- if ((scale > 0) && float32_is_denormal(c)) {
200
- acc.mant = int128_mul_6464(0, 0);
201
- acc.exp = -WAY_BIG_EXP;
202
- acc.sign = c_sign;
203
- acc.sticky = 1;
204
- result = accum_add(prod, acc);
205
- } else if (!float32_is_zero(c)) {
206
- acc.mant = int128_mul_6464(float32_getmant(c), 1);
207
- acc.exp = float32_getexp(c);
208
- acc.sign = c_sign;
209
- result = accum_add(prod, acc);
210
- } else {
211
- result = prod;
212
- }
213
- result.exp += scale;
214
- return accum_round_float32(result, fp_status);
215
-}
216
217
float64 internal_mpyhh(float64 a, float64 b,
218
unsigned long long int accumulated,
219
--
220
2.43.0
diff view generated by jsdifflib
New patch
1
This massive macro is now only used once.
2
Expand it for use only by float64.
1
3
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/hexagon/fma_emu.c | 255 +++++++++++++++++++--------------------
8
1 file changed, 127 insertions(+), 128 deletions(-)
9
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/hexagon/fma_emu.c
13
+++ b/target/hexagon/fma_emu.c
14
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
15
}
16
17
/* Return a maximum finite value with the requested sign */
18
-#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
19
-static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
20
-{ \
21
- if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \
22
- && ((a.guard | a.round | a.sticky) == 0)) { \
23
- /* result zero */ \
24
- switch (fp_status->float_rounding_mode) { \
25
- case float_round_down: \
26
- return zero_##SUFFIX(1); \
27
- default: \
28
- return zero_##SUFFIX(0); \
29
- } \
30
- } \
31
- /* Normalize right */ \
32
- /* We want MANTBITS bits of mantissa plus the leading one. */ \
33
- /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \
34
- /* So we need to normalize right while the high word is non-zero and \
35
- * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \
36
- while ((int128_gethi(a.mant) != 0) || \
37
- ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0)) { \
38
- a = accum_norm_right(a, 1); \
39
- } \
40
- /* \
41
- * OK, now normalize left \
42
- * We want to normalize left until we have a leading one in bit 24 \
43
- * Theoretically, we only need to shift a maximum of one to the left if we \
44
- * shifted out lots of bits from B, or if we had no shift / 1 shift sticky \
45
- * should be 0 \
46
- */ \
47
- while ((int128_getlo(a.mant) & (1ULL << MANTBITS)) == 0) { \
48
- a = accum_norm_left(a); \
49
- } \
50
- /* \
51
- * OK, now we might need to denormalize because of potential underflow. \
52
- * We need to do this before rounding, and rounding might make us normal \
53
- * again \
54
- */ \
55
- while (a.exp <= 0) { \
56
- a = accum_norm_right(a, 1 - a.exp); \
57
- /* \
58
- * Do we have underflow? \
59
- * That's when we get an inexact answer because we ran out of bits \
60
- * in a denormal. \
61
- */ \
62
- if (a.guard || a.round || a.sticky) { \
63
- float_raise(float_flag_underflow, fp_status); \
64
- } \
65
- } \
66
- /* OK, we're relatively canonical... now we need to round */ \
67
- if (a.guard || a.round || a.sticky) { \
68
- float_raise(float_flag_inexact, fp_status); \
69
- switch (fp_status->float_rounding_mode) { \
70
- case float_round_to_zero: \
71
- /* Chop and we're done */ \
72
- break; \
73
- case float_round_up: \
74
- if (a.sign == 0) { \
75
- a.mant = int128_add(a.mant, int128_one()); \
76
- } \
77
- break; \
78
- case float_round_down: \
79
- if (a.sign != 0) { \
80
- a.mant = int128_add(a.mant, int128_one()); \
81
- } \
82
- break; \
83
- default: \
84
- if (a.round || a.sticky) { \
85
- /* round up if guard is 1, down if guard is zero */ \
86
- a.mant = int128_add(a.mant, int128_make64(a.guard)); \
87
- } else if (a.guard) { \
88
- /* exactly .5, round up if odd */ \
89
- a.mant = int128_add(a.mant, int128_and(a.mant, int128_one())); \
90
- } \
91
- break; \
92
- } \
93
- } \
94
- /* \
95
- * OK, now we might have carried all the way up. \
96
- * So we might need to shr once \
97
- * at least we know that the lsb should be zero if we rounded and \
98
- * got a carry out... \
99
- */ \
100
- if ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0) { \
101
- a = accum_norm_right(a, 1); \
102
- } \
103
- /* Overflow? */ \
104
- if (a.exp >= INF_EXP) { \
105
- /* Yep, inf result */ \
106
- float_raise(float_flag_overflow, fp_status); \
107
- float_raise(float_flag_inexact, fp_status); \
108
- switch (fp_status->float_rounding_mode) { \
109
- case float_round_to_zero: \
110
- return maxfinite_##SUFFIX(a.sign); \
111
- case float_round_up: \
112
- if (a.sign == 0) { \
113
- return infinite_##SUFFIX(a.sign); \
114
- } else { \
115
- return maxfinite_##SUFFIX(a.sign); \
116
- } \
117
- case float_round_down: \
118
- if (a.sign != 0) { \
119
- return infinite_##SUFFIX(a.sign); \
120
- } else { \
121
- return maxfinite_##SUFFIX(a.sign); \
122
- } \
123
- default: \
124
- return infinite_##SUFFIX(a.sign); \
125
- } \
126
- } \
127
- /* Underflow? */ \
128
- if (int128_getlo(a.mant) & (1ULL << MANTBITS)) { \
129
- /* Leading one means: No, we're normal. So, we should be done... */ \
130
- INTERNAL_TYPE ret; \
131
- ret.i = 0; \
132
- ret.sign = a.sign; \
133
- ret.exp = a.exp; \
134
- ret.mant = int128_getlo(a.mant); \
135
- return ret.i; \
136
- } \
137
- assert(a.exp == 1); \
138
- INTERNAL_TYPE ret; \
139
- ret.i = 0; \
140
- ret.sign = a.sign; \
141
- ret.exp = 0; \
142
- ret.mant = int128_getlo(a.mant); \
143
- return ret.i; \
144
+static float64 accum_round_float64(Accum a, float_status *fp_status)
145
+{
146
+ if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
147
+ && ((a.guard | a.round | a.sticky) == 0)) {
148
+ /* result zero */
149
+ switch (fp_status->float_rounding_mode) {
150
+ case float_round_down:
151
+ return zero_float64(1);
152
+ default:
153
+ return zero_float64(0);
154
+ }
155
+ }
156
+ /*
157
+ * Normalize right
158
+ * We want DF_MANTBITS bits of mantissa plus the leading one.
159
+ * That means that we want DF_MANTBITS+1 bits, or 0x000000000000FF_FFFF
160
+ * So we need to normalize right while the high word is non-zero and
161
+ * while the low word is nonzero when masked with 0xffe0_0000_0000_0000
162
+ */
163
+ while ((int128_gethi(a.mant) != 0) ||
164
+ ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0)) {
165
+ a = accum_norm_right(a, 1);
166
+ }
167
+ /*
168
+ * OK, now normalize left
169
+ * We want to normalize left until we have a leading one in bit 24
170
+ * Theoretically, we only need to shift a maximum of one to the left if we
171
+ * shifted out lots of bits from B, or if we had no shift / 1 shift sticky
172
+ * should be 0
173
+ */
174
+ while ((int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) == 0) {
175
+ a = accum_norm_left(a);
176
+ }
177
+ /*
178
+ * OK, now we might need to denormalize because of potential underflow.
179
+ * We need to do this before rounding, and rounding might make us normal
180
+ * again
181
+ */
182
+ while (a.exp <= 0) {
183
+ a = accum_norm_right(a, 1 - a.exp);
184
+ /*
185
+ * Do we have underflow?
186
+ * That's when we get an inexact answer because we ran out of bits
187
+ * in a denormal.
188
+ */
189
+ if (a.guard || a.round || a.sticky) {
190
+ float_raise(float_flag_underflow, fp_status);
191
+ }
192
+ }
193
+ /* OK, we're relatively canonical... now we need to round */
194
+ if (a.guard || a.round || a.sticky) {
195
+ float_raise(float_flag_inexact, fp_status);
196
+ switch (fp_status->float_rounding_mode) {
197
+ case float_round_to_zero:
198
+ /* Chop and we're done */
199
+ break;
200
+ case float_round_up:
201
+ if (a.sign == 0) {
202
+ a.mant = int128_add(a.mant, int128_one());
203
+ }
204
+ break;
205
+ case float_round_down:
206
+ if (a.sign != 0) {
207
+ a.mant = int128_add(a.mant, int128_one());
208
+ }
209
+ break;
210
+ default:
211
+ if (a.round || a.sticky) {
212
+ /* round up if guard is 1, down if guard is zero */
213
+ a.mant = int128_add(a.mant, int128_make64(a.guard));
214
+ } else if (a.guard) {
215
+ /* exactly .5, round up if odd */
216
+ a.mant = int128_add(a.mant, int128_and(a.mant, int128_one()));
217
+ }
218
+ break;
219
+ }
220
+ }
221
+ /*
222
+ * OK, now we might have carried all the way up.
223
+ * So we might need to shr once
224
+ * at least we know that the lsb should be zero if we rounded and
225
+ * got a carry out...
226
+ */
227
+ if ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0) {
228
+ a = accum_norm_right(a, 1);
229
+ }
230
+ /* Overflow? */
231
+ if (a.exp >= DF_INF_EXP) {
232
+ /* Yep, inf result */
233
+ float_raise(float_flag_overflow, fp_status);
234
+ float_raise(float_flag_inexact, fp_status);
235
+ switch (fp_status->float_rounding_mode) {
236
+ case float_round_to_zero:
237
+ return maxfinite_float64(a.sign);
238
+ case float_round_up:
239
+ if (a.sign == 0) {
240
+ return infinite_float64(a.sign);
241
+ } else {
242
+ return maxfinite_float64(a.sign);
243
+ }
244
+ case float_round_down:
245
+ if (a.sign != 0) {
246
+ return infinite_float64(a.sign);
247
+ } else {
248
+ return maxfinite_float64(a.sign);
249
+ }
250
+ default:
251
+ return infinite_float64(a.sign);
252
+ }
253
+ }
254
+ /* Underflow? */
255
+ if (int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) {
256
+ /* Leading one means: No, we're normal. So, we should be done... */
257
+ Double ret;
258
+ ret.i = 0;
259
+ ret.sign = a.sign;
260
+ ret.exp = a.exp;
261
+ ret.mant = int128_getlo(a.mant);
262
+ return ret.i;
263
+ }
264
+ assert(a.exp == 1);
265
+ Double ret;
266
+ ret.i = 0;
267
+ ret.sign = a.sign;
268
+ ret.exp = 0;
269
+ ret.mant = int128_getlo(a.mant);
270
+ return ret.i;
271
}
272
273
-GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
274
-
275
float64 internal_mpyhh(float64 a, float64 b,
276
unsigned long long int accumulated,
277
float_status *fp_status)
278
--
279
2.43.0
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
This structure, with bitfields, is incorrect for big-endian.
2
Use the existing float32_getexp_raw which uses extract32.
2
3
3
The class cast checkers are quite expensive and always on (unlike the
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
4
dynamic case who's checks are gated by CONFIG_QOM_CAST_DEBUG). To
5
avoid the overhead of repeatedly checking something which should never
6
change we cache the CPUClass reference for use in the hot code paths.
7
8
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-Id: <20220811151413.3350684-3-alex.bennee@linaro.org>
11
Signed-off-by: Cédric Le Goater <clg@kaod.org>
12
Message-Id: <20220923084803.498337-3-clg@kaod.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
---
6
---
15
include/hw/core/cpu.h | 9 +++++++++
7
target/hexagon/fma_emu.c | 16 +++-------------
16
cpu.c | 9 ++++-----
8
1 file changed, 3 insertions(+), 13 deletions(-)
17
2 files changed, 13 insertions(+), 5 deletions(-)
18
9
19
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
20
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
21
--- a/include/hw/core/cpu.h
12
--- a/target/hexagon/fma_emu.c
22
+++ b/include/hw/core/cpu.h
13
+++ b/target/hexagon/fma_emu.c
23
@@ -XXX,XX +XXX,XX @@ typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
14
@@ -XXX,XX +XXX,XX @@ typedef union {
24
*/
15
};
25
#define CPU(obj) ((CPUState *)(obj))
16
} Double;
26
17
27
+/*
18
-typedef union {
28
+ * The class checkers bring in CPU_GET_CLASS() which is potentially
19
- float f;
29
+ * expensive given the eventual call to
20
- uint32_t i;
30
+ * object_class_dynamic_cast_assert(). Because of this the CPUState
21
- struct {
31
+ * has a cached value for the class in cs->cc which is set up in
22
- uint32_t mant:23;
32
+ * cpu_exec_realizefn() for use in hot code paths.
23
- uint32_t exp:8;
33
+ */
24
- uint32_t sign:1;
34
typedef struct CPUClass CPUClass;
25
- };
35
DECLARE_CLASS_CHECKERS(CPUClass, CPU,
26
-} Float;
36
TYPE_CPU)
27
-
37
@@ -XXX,XX +XXX,XX @@ struct qemu_work_item;
28
static uint64_t float64_getmant(float64 f64)
38
struct CPUState {
39
/*< private >*/
40
DeviceState parent_obj;
41
+ /* cache to avoid expensive CPU_GET_CLASS */
42
+ CPUClass *cc;
43
/*< public >*/
44
45
int nr_cores;
46
diff --git a/cpu.c b/cpu.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/cpu.c
49
+++ b/cpu.c
50
@@ -XXX,XX +XXX,XX @@ const VMStateDescription vmstate_cpu_common = {
51
52
void cpu_exec_realizefn(CPUState *cpu, Error **errp)
53
{
29
{
54
-#ifndef CONFIG_USER_ONLY
30
Double a = { .i = f64 };
55
- CPUClass *cc = CPU_GET_CLASS(cpu);
31
@@ -XXX,XX +XXX,XX @@ int32_t float64_getexp(float64 f64)
56
-#endif
32
57
+ /* cache the cpu class for the hotpath */
33
int32_t float32_getexp(float32 f32)
58
+ cpu->cc = CPU_GET_CLASS(cpu);
34
{
59
35
- Float a = { .i = f32 };
60
cpu_list_add(cpu);
36
+ int exp = float32_getexp_raw(f32);
61
if (!accel_cpu_realizefn(cpu, errp)) {
37
if (float32_is_normal(f32)) {
62
@@ -XXX,XX +XXX,XX @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
38
- return a.exp;
63
if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
39
+ return exp;
64
vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
65
}
40
}
66
- if (cc->sysemu_ops->legacy_vmsd != NULL) {
41
if (float32_is_denormal(f32)) {
67
- vmstate_register(NULL, cpu->cpu_index, cc->sysemu_ops->legacy_vmsd, cpu);
42
- return a.exp + 1;
68
+ if (cpu->cc->sysemu_ops->legacy_vmsd != NULL) {
43
+ return exp + 1;
69
+ vmstate_register(NULL, cpu->cpu_index, cpu->cc->sysemu_ops->legacy_vmsd, cpu);
70
}
44
}
71
#endif /* CONFIG_USER_ONLY */
45
return -1;
72
}
46
}
73
--
47
--
74
2.34.1
48
2.43.0
75
76
diff view generated by jsdifflib
New patch
1
This structure, with bitfields, is incorrect for big-endian.
2
Use extract64 and deposit64 instead.
1
3
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/hexagon/fma_emu.c | 46 ++++++++++++++--------------------------
8
1 file changed, 16 insertions(+), 30 deletions(-)
9
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/hexagon/fma_emu.c
13
+++ b/target/hexagon/fma_emu.c
14
@@ -XXX,XX +XXX,XX @@
15
16
#define WAY_BIG_EXP 4096
17
18
-typedef union {
19
- double f;
20
- uint64_t i;
21
- struct {
22
- uint64_t mant:52;
23
- uint64_t exp:11;
24
- uint64_t sign:1;
25
- };
26
-} Double;
27
-
28
static uint64_t float64_getmant(float64 f64)
29
{
30
- Double a = { .i = f64 };
31
+ uint64_t mant = extract64(f64, 0, 52);
32
if (float64_is_normal(f64)) {
33
- return a.mant | 1ULL << 52;
34
+ return mant | 1ULL << 52;
35
}
36
if (float64_is_zero(f64)) {
37
return 0;
38
}
39
if (float64_is_denormal(f64)) {
40
- return a.mant;
41
+ return mant;
42
}
43
return ~0ULL;
44
}
45
46
int32_t float64_getexp(float64 f64)
47
{
48
- Double a = { .i = f64 };
49
+ int exp = extract64(f64, 52, 11);
50
if (float64_is_normal(f64)) {
51
- return a.exp;
52
+ return exp;
53
}
54
if (float64_is_denormal(f64)) {
55
- return a.exp + 1;
56
+ return exp + 1;
57
}
58
return -1;
59
}
60
@@ -XXX,XX +XXX,XX @@ float32 infinite_float32(uint8_t sign)
61
/* Return a maximum finite value with the requested sign */
62
static float64 accum_round_float64(Accum a, float_status *fp_status)
63
{
64
+ uint64_t ret;
65
+
66
if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
67
&& ((a.guard | a.round | a.sticky) == 0)) {
68
/* result zero */
69
@@ -XXX,XX +XXX,XX @@ static float64 accum_round_float64(Accum a, float_status *fp_status)
70
}
71
}
72
/* Underflow? */
73
- if (int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) {
74
+ ret = int128_getlo(a.mant);
75
+ if (ret & (1ULL << DF_MANTBITS)) {
76
/* Leading one means: No, we're normal. So, we should be done... */
77
- Double ret;
78
- ret.i = 0;
79
- ret.sign = a.sign;
80
- ret.exp = a.exp;
81
- ret.mant = int128_getlo(a.mant);
82
- return ret.i;
83
+ ret = deposit64(ret, 52, 11, a.exp);
84
+ } else {
85
+ assert(a.exp == 1);
86
+ ret = deposit64(ret, 52, 11, 0);
87
}
88
- assert(a.exp == 1);
89
- Double ret;
90
- ret.i = 0;
91
- ret.sign = a.sign;
92
- ret.exp = 0;
93
- ret.mant = int128_getlo(a.mant);
94
- return ret.i;
95
+ ret = deposit64(ret, 63, 1, a.sign);
96
+ return ret;
97
}
98
99
float64 internal_mpyhh(float64 a, float64 b,
100
--
101
2.43.0
diff view generated by jsdifflib
1
Let tb->page_addr[0] contain the address of the first byte of the
1
No need to open-code 64x64->128-bit multiplication.
2
translated block, rather than the address of the page containing the
3
start of the translated block. We need to recover this value anyway
4
at various points, and it is easier to discard a page offset when it
5
is not needed, which happens naturally via the existing find_page shift.
6
2
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
5
---
10
accel/tcg/cpu-exec.c | 16 ++++++++--------
6
target/hexagon/fma_emu.c | 32 +++-----------------------------
11
accel/tcg/cputlb.c | 3 ++-
7
1 file changed, 3 insertions(+), 29 deletions(-)
12
accel/tcg/translate-all.c | 9 +++++----
13
3 files changed, 15 insertions(+), 13 deletions(-)
14
8
15
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
9
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
16
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
17
--- a/accel/tcg/cpu-exec.c
11
--- a/target/hexagon/fma_emu.c
18
+++ b/accel/tcg/cpu-exec.c
12
+++ b/target/hexagon/fma_emu.c
19
@@ -XXX,XX +XXX,XX @@ struct tb_desc {
13
@@ -XXX,XX +XXX,XX @@ int32_t float32_getexp(float32 f32)
20
target_ulong pc;
14
return -1;
21
target_ulong cs_base;
22
CPUArchState *env;
23
- tb_page_addr_t phys_page1;
24
+ tb_page_addr_t page_addr0;
25
uint32_t flags;
26
uint32_t cflags;
27
uint32_t trace_vcpu_dstate;
28
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
29
const struct tb_desc *desc = d;
30
31
if (tb->pc == desc->pc &&
32
- tb->page_addr[0] == desc->phys_page1 &&
33
+ tb->page_addr[0] == desc->page_addr0 &&
34
tb->cs_base == desc->cs_base &&
35
tb->flags == desc->flags &&
36
tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
37
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
38
if (tb->page_addr[1] == -1) {
39
return true;
40
} else {
41
- tb_page_addr_t phys_page2;
42
- target_ulong virt_page2;
43
+ tb_page_addr_t phys_page1;
44
+ target_ulong virt_page1;
45
46
/*
47
* We know that the first page matched, and an otherwise valid TB
48
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
49
* is different for the new TB. Therefore any exception raised
50
* here by the faulting lookup is not premature.
51
*/
52
- virt_page2 = TARGET_PAGE_ALIGN(desc->pc);
53
- phys_page2 = get_page_addr_code(desc->env, virt_page2);
54
- if (tb->page_addr[1] == phys_page2) {
55
+ virt_page1 = TARGET_PAGE_ALIGN(desc->pc);
56
+ phys_page1 = get_page_addr_code(desc->env, virt_page1);
57
+ if (tb->page_addr[1] == phys_page1) {
58
return true;
59
}
60
}
61
@@ -XXX,XX +XXX,XX @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
62
if (phys_pc == -1) {
63
return NULL;
64
}
65
- desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
66
+ desc.page_addr0 = phys_pc;
67
h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
68
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
69
}
15
}
70
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
16
71
index XXXXXXX..XXXXXXX 100644
17
-static uint32_t int128_getw0(Int128 x)
72
--- a/accel/tcg/cputlb.c
18
-{
73
+++ b/accel/tcg/cputlb.c
19
- return int128_getlo(x);
74
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
20
-}
75
can be detected */
21
-
76
void tlb_protect_code(ram_addr_t ram_addr)
22
-static uint32_t int128_getw1(Int128 x)
23
-{
24
- return int128_getlo(x) >> 32;
25
-}
26
-
27
static Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
77
{
28
{
78
- cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
29
- Int128 a, b;
79
+ cpu_physical_memory_test_and_clear_dirty(ram_addr & TARGET_PAGE_MASK,
30
- uint64_t pp0, pp1a, pp1b, pp1s, pp2;
80
+ TARGET_PAGE_SIZE,
31
+ uint64_t l, h;
81
DIRTY_MEMORY_CODE);
32
33
- a = int128_make64(ai);
34
- b = int128_make64(bi);
35
- pp0 = (uint64_t)int128_getw0(a) * (uint64_t)int128_getw0(b);
36
- pp1a = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw0(b);
37
- pp1b = (uint64_t)int128_getw1(b) * (uint64_t)int128_getw0(a);
38
- pp2 = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw1(b);
39
-
40
- pp1s = pp1a + pp1b;
41
- if ((pp1s < pp1a) || (pp1s < pp1b)) {
42
- pp2 += (1ULL << 32);
43
- }
44
- uint64_t ret_low = pp0 + (pp1s << 32);
45
- if ((ret_low < pp0) || (ret_low < (pp1s << 32))) {
46
- pp2 += 1;
47
- }
48
-
49
- return int128_make128(ret_low, pp2 + (pp1s >> 32));
50
+ mulu64(&l, &h, ai, bi);
51
+ return int128_make128(l, h);
82
}
52
}
83
53
84
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
54
static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
85
index XXXXXXX..XXXXXXX 100644
86
--- a/accel/tcg/translate-all.c
87
+++ b/accel/tcg/translate-all.c
88
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
89
qemu_spin_unlock(&tb->jmp_lock);
90
91
/* remove the TB from the hash list */
92
- phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
93
+ phys_pc = tb->page_addr[0];
94
h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
95
tb->trace_vcpu_dstate);
96
if (!qht_remove(&tb_ctx.htable, tb, h)) {
97
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
98
* we can only insert TBs that are fully initialized.
99
*/
100
page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
101
- tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
102
+ tb_page_add(p, tb, 0, phys_pc);
103
if (p2) {
104
tb_page_add(p2, tb, 1, phys_page2);
105
} else {
106
@@ -XXX,XX +XXX,XX @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
107
if (n == 0) {
108
/* NOTE: tb_end may be after the end of the page, but
109
it is not a problem */
110
- tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
111
+ tb_start = tb->page_addr[0];
112
tb_end = tb_start + tb->size;
113
} else {
114
tb_start = tb->page_addr[1];
115
- tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
116
+ tb_end = tb_start + ((tb->page_addr[0] + tb->size)
117
+ & ~TARGET_PAGE_MASK);
118
}
119
if (!(tb_end <= start || tb_start >= end)) {
120
#ifdef TARGET_HAS_PRECISE_SMC
121
--
55
--
122
2.34.1
56
2.43.0
123
124
diff view generated by jsdifflib
New patch
1
Initialize x with accumulated via direct assignment,
2
rather than multiplying by 1.
1
3
4
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/hexagon/fma_emu.c | 2 +-
8
1 file changed, 1 insertion(+), 1 deletion(-)
9
10
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/hexagon/fma_emu.c
13
+++ b/target/hexagon/fma_emu.c
14
@@ -XXX,XX +XXX,XX @@ float64 internal_mpyhh(float64 a, float64 b,
15
float64_is_infinity(b)) {
16
return float64_mul(a, b, fp_status);
17
}
18
- x.mant = int128_mul_6464(accumulated, 1);
19
+ x.mant = int128_make64(accumulated);
20
x.sticky = sticky;
21
prod = fGETUWORD(1, float64_getmant(a)) * fGETUWORD(1, float64_getmant(b));
22
x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL));
23
--
24
2.43.0
diff view generated by jsdifflib
1
Populate this new method for all targets. Always match
1
Convert all targets simultaneously, as the gen_intermediate_code
2
the result that would be given by cpu_get_tb_cpu_state,
2
function disappears from the target. While there are possible
3
as we will want these values to correspond in the logs.
3
workarounds, they're larger than simply performing the conversion.
4
4
5
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (target/sparc)
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
7
---
10
Cc: Eduardo Habkost <eduardo@habkost.net> (supporter:Machine core)
8
include/exec/translator.h | 14 --------------
11
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> (supporter:Machine core)
9
include/hw/core/tcg-cpu-ops.h | 13 +++++++++++++
12
Cc: "Philippe Mathieu-Daudé" <f4bug@amsat.org> (reviewer:Machine core)
10
target/alpha/cpu.h | 2 ++
13
Cc: Yanan Wang <wangyanan55@huawei.com> (reviewer:Machine core)
11
target/arm/internals.h | 2 ++
14
Cc: Michael Rolnik <mrolnik@gmail.com> (maintainer:AVR TCG CPUs)
12
target/avr/cpu.h | 2 ++
15
Cc: "Edgar E. Iglesias" <edgar.iglesias@gmail.com> (maintainer:CRIS TCG CPUs)
13
target/hexagon/cpu.h | 2 ++
16
Cc: Taylor Simpson <tsimpson@quicinc.com> (supporter:Hexagon TCG CPUs)
14
target/hppa/cpu.h | 2 ++
17
Cc: Song Gao <gaosong@loongson.cn> (maintainer:LoongArch TCG CPUs)
15
target/i386/tcg/helper-tcg.h | 2 ++
18
Cc: Xiaojuan Yang <yangxiaojuan@loongson.cn> (maintainer:LoongArch TCG CPUs)
16
target/loongarch/internals.h | 2 ++
19
Cc: Laurent Vivier <laurent@vivier.eu> (maintainer:M68K TCG CPUs)
17
target/m68k/cpu.h | 2 ++
20
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com> (reviewer:MIPS TCG CPUs)
18
target/microblaze/cpu.h | 2 ++
21
Cc: Aleksandar Rikalo <aleksandar.rikalo@syrmia.com> (reviewer:MIPS TCG CPUs)
19
target/mips/tcg/tcg-internal.h | 2 ++
22
Cc: Chris Wulff <crwulff@gmail.com> (maintainer:NiosII TCG CPUs)
20
target/openrisc/cpu.h | 2 ++
23
Cc: Marek Vasut <marex@denx.de> (maintainer:NiosII TCG CPUs)
21
target/ppc/cpu.h | 2 ++
24
Cc: Stafford Horne <shorne@gmail.com> (odd fixer:OpenRISC TCG CPUs)
22
target/riscv/cpu.h | 3 +++
25
Cc: Yoshinori Sato <ysato@users.sourceforge.jp> (reviewer:RENESAS RX CPUs)
23
target/rx/cpu.h | 2 ++
26
Cc: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (maintainer:SPARC TCG CPUs)
24
target/s390x/s390x-internal.h | 2 ++
27
Cc: Bastian Koppelmann <kbastian@mail.uni-paderborn.de> (maintainer:TriCore TCG CPUs)
25
target/sh4/cpu.h | 2 ++
28
Cc: Max Filippov <jcmvbkbc@gmail.com> (maintainer:Xtensa TCG CPUs)
26
target/sparc/cpu.h | 2 ++
29
Cc: qemu-arm@nongnu.org (open list:ARM TCG CPUs)
27
target/tricore/cpu.h | 2 ++
30
Cc: qemu-ppc@nongnu.org (open list:PowerPC TCG CPUs)
28
target/xtensa/cpu.h | 2 ++
31
Cc: qemu-riscv@nongnu.org (open list:RISC-V TCG CPUs)
29
accel/tcg/cpu-exec.c | 8 +++++---
32
Cc: qemu-s390x@nongnu.org (open list:S390 TCG CPUs)
30
accel/tcg/translate-all.c | 8 +++++---
33
---
31
target/alpha/cpu.c | 1 +
34
include/hw/core/cpu.h | 3 +++
32
target/alpha/translate.c | 4 ++--
35
target/alpha/cpu.c | 9 +++++++++
33
target/arm/cpu.c | 1 +
36
target/arm/cpu.c | 13 +++++++++++++
34
target/arm/tcg/cpu-v7m.c | 1 +
37
target/avr/cpu.c | 8 ++++++++
35
target/arm/tcg/translate.c | 5 ++---
38
target/cris/cpu.c | 8 ++++++++
36
target/avr/cpu.c | 1 +
39
target/hexagon/cpu.c | 8 ++++++++
37
target/avr/translate.c | 6 +++---
40
target/hppa/cpu.c | 8 ++++++++
38
target/hexagon/cpu.c | 1 +
41
target/i386/cpu.c | 9 +++++++++
39
target/hexagon/translate.c | 4 ++--
42
target/loongarch/cpu.c | 9 +++++++++
40
target/hppa/cpu.c | 1 +
43
target/m68k/cpu.c | 8 ++++++++
41
target/hppa/translate.c | 4 ++--
44
target/microblaze/cpu.c | 8 ++++++++
42
target/i386/tcg/tcg-cpu.c | 1 +
45
target/mips/cpu.c | 8 ++++++++
43
target/i386/tcg/translate.c | 5 ++---
46
target/nios2/cpu.c | 9 +++++++++
44
target/loongarch/cpu.c | 1 +
47
target/openrisc/cpu.c | 8 ++++++++
45
target/loongarch/tcg/translate.c | 4 ++--
48
target/ppc/cpu_init.c | 8 ++++++++
46
target/m68k/cpu.c | 1 +
49
target/riscv/cpu.c | 13 +++++++++++++
47
target/m68k/translate.c | 4 ++--
50
target/rx/cpu.c | 8 ++++++++
48
target/microblaze/cpu.c | 1 +
51
target/s390x/cpu.c | 8 ++++++++
49
target/microblaze/translate.c | 4 ++--
52
target/sh4/cpu.c | 8 ++++++++
50
target/mips/cpu.c | 1 +
53
target/sparc/cpu.c | 8 ++++++++
51
target/mips/tcg/translate.c | 4 ++--
54
target/tricore/cpu.c | 9 +++++++++
52
target/openrisc/cpu.c | 1 +
55
target/xtensa/cpu.c | 8 ++++++++
53
target/openrisc/translate.c | 4 ++--
56
22 files changed, 186 insertions(+)
54
target/ppc/cpu_init.c | 1 +
55
target/ppc/translate.c | 4 ++--
56
target/riscv/tcg/tcg-cpu.c | 1 +
57
target/riscv/translate.c | 4 ++--
58
target/rx/cpu.c | 1 +
59
target/rx/translate.c | 4 ++--
60
target/s390x/cpu.c | 1 +
61
target/s390x/tcg/translate.c | 4 ++--
62
target/sh4/cpu.c | 1 +
63
target/sh4/translate.c | 4 ++--
64
target/sparc/cpu.c | 1 +
65
target/sparc/translate.c | 4 ++--
66
target/tricore/cpu.c | 1 +
67
target/tricore/translate.c | 5 ++---
68
target/xtensa/cpu.c | 1 +
69
target/xtensa/translate.c | 4 ++--
70
62 files changed, 121 insertions(+), 62 deletions(-)
57
71
58
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
72
diff --git a/include/exec/translator.h b/include/exec/translator.h
59
index XXXXXXX..XXXXXXX 100644
73
index XXXXXXX..XXXXXXX 100644
60
--- a/include/hw/core/cpu.h
74
--- a/include/exec/translator.h
61
+++ b/include/hw/core/cpu.h
75
+++ b/include/exec/translator.h
62
@@ -XXX,XX +XXX,XX @@ struct SysemuCPUOps;
76
@@ -XXX,XX +XXX,XX @@
63
* If the target behaviour here is anything other than "set
77
#include "qemu/bswap.h"
64
* the PC register to the value passed in" then the target must
78
#include "exec/vaddr.h"
65
* also implement the synchronize_from_tb hook.
79
66
+ * @get_pc: Callback for getting the Program Counter register.
80
-/**
67
+ * As above, with the semantics of the target architecture.
81
- * gen_intermediate_code
68
* @gdb_read_register: Callback for letting GDB read a register.
82
- * @cpu: cpu context
69
* @gdb_write_register: Callback for letting GDB write a register.
83
- * @tb: translation block
70
* @gdb_adjust_breakpoint: Callback for adjusting the address of a
84
- * @max_insns: max number of instructions to translate
71
@@ -XXX,XX +XXX,XX @@ struct CPUClass {
85
- * @pc: guest virtual program counter address
72
void (*dump_state)(CPUState *cpu, FILE *, int flags);
86
- * @host_pc: host physical program counter address
73
int64_t (*get_arch_id)(CPUState *cpu);
87
- *
74
void (*set_pc)(CPUState *cpu, vaddr value);
88
- * This function must be provided by the target, which should create
75
+ vaddr (*get_pc)(CPUState *cpu);
89
- * the target-specific DisasContext, and then invoke translator_loop.
76
int (*gdb_read_register)(CPUState *cpu, GByteArray *buf, int reg);
90
- */
77
int (*gdb_write_register)(CPUState *cpu, uint8_t *buf, int reg);
91
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
78
vaddr (*gdb_adjust_breakpoint)(CPUState *cpu, vaddr addr);
92
- vaddr pc, void *host_pc);
93
-
94
/**
95
* DisasJumpType:
96
* @DISAS_NEXT: Next instruction in program order.
97
diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
98
index XXXXXXX..XXXXXXX 100644
99
--- a/include/hw/core/tcg-cpu-ops.h
100
+++ b/include/hw/core/tcg-cpu-ops.h
101
@@ -XXX,XX +XXX,XX @@ struct TCGCPUOps {
102
* Called when the first CPU is realized.
103
*/
104
void (*initialize)(void);
105
+ /**
106
+ * @translate_code: Translate guest instructions to TCGOps
107
+ * @cpu: cpu context
108
+ * @tb: translation block
109
+ * @max_insns: max number of instructions to translate
110
+ * @pc: guest virtual program counter address
111
+ * @host_pc: host physical program counter address
112
+ *
113
+ * This function must be provided by the target, which should create
114
+ * the target-specific DisasContext, and then invoke translator_loop.
115
+ */
116
+ void (*translate_code)(CPUState *cpu, TranslationBlock *tb,
117
+ int *max_insns, vaddr pc, void *host_pc);
118
/**
119
* @synchronize_from_tb: Synchronize state from a TCG #TranslationBlock
120
*
121
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
122
index XXXXXXX..XXXXXXX 100644
123
--- a/target/alpha/cpu.h
124
+++ b/target/alpha/cpu.h
125
@@ -XXX,XX +XXX,XX @@ enum {
126
};
127
128
void alpha_translate_init(void);
129
+void alpha_translate_code(CPUState *cs, TranslationBlock *tb,
130
+ int *max_insns, vaddr pc, void *host_pc);
131
132
#define CPU_RESOLVING_TYPE TYPE_ALPHA_CPU
133
134
diff --git a/target/arm/internals.h b/target/arm/internals.h
135
index XXXXXXX..XXXXXXX 100644
136
--- a/target/arm/internals.h
137
+++ b/target/arm/internals.h
138
@@ -XXX,XX +XXX,XX @@ void init_cpreg_list(ARMCPU *cpu);
139
140
void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
141
void arm_translate_init(void);
142
+void arm_translate_code(CPUState *cs, TranslationBlock *tb,
143
+ int *max_insns, vaddr pc, void *host_pc);
144
145
void arm_cpu_register_gdb_commands(ARMCPU *cpu);
146
void aarch64_cpu_register_gdb_commands(ARMCPU *cpu, GString *,
147
diff --git a/target/avr/cpu.h b/target/avr/cpu.h
148
index XXXXXXX..XXXXXXX 100644
149
--- a/target/avr/cpu.h
150
+++ b/target/avr/cpu.h
151
@@ -XXX,XX +XXX,XX @@ static inline void set_avr_feature(CPUAVRState *env, int feature)
152
}
153
154
void avr_cpu_tcg_init(void);
155
+void avr_cpu_translate_code(CPUState *cs, TranslationBlock *tb,
156
+ int *max_insns, vaddr pc, void *host_pc);
157
158
int cpu_avr_exec(CPUState *cpu);
159
160
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
161
index XXXXXXX..XXXXXXX 100644
162
--- a/target/hexagon/cpu.h
163
+++ b/target/hexagon/cpu.h
164
@@ -XXX,XX +XXX,XX @@ static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc,
165
typedef HexagonCPU ArchCPU;
166
167
void hexagon_translate_init(void);
168
+void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
169
+ int *max_insns, vaddr pc, void *host_pc);
170
171
#include "exec/cpu-all.h"
172
173
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
174
index XXXXXXX..XXXXXXX 100644
175
--- a/target/hppa/cpu.h
176
+++ b/target/hppa/cpu.h
177
@@ -XXX,XX +XXX,XX @@ static inline int HPPA_BTLB_ENTRIES(CPUHPPAState *env)
178
}
179
180
void hppa_translate_init(void);
181
+void hppa_translate_code(CPUState *cs, TranslationBlock *tb,
182
+ int *max_insns, vaddr pc, void *host_pc);
183
184
#define CPU_RESOLVING_TYPE TYPE_HPPA_CPU
185
186
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
187
index XXXXXXX..XXXXXXX 100644
188
--- a/target/i386/tcg/helper-tcg.h
189
+++ b/target/i386/tcg/helper-tcg.h
190
@@ -XXX,XX +XXX,XX @@ static inline target_long lshift(target_long x, int n)
191
192
/* translate.c */
193
void tcg_x86_init(void);
194
+void x86_translate_code(CPUState *cs, TranslationBlock *tb,
195
+ int *max_insns, vaddr pc, void *host_pc);
196
197
/* excp_helper.c */
198
G_NORETURN void raise_exception(CPUX86State *env, int exception_index);
199
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
200
index XXXXXXX..XXXXXXX 100644
201
--- a/target/loongarch/internals.h
202
+++ b/target/loongarch/internals.h
203
@@ -XXX,XX +XXX,XX @@
204
#define TARGET_VIRT_MASK MAKE_64BIT_MASK(0, TARGET_VIRT_ADDR_SPACE_BITS)
205
206
void loongarch_translate_init(void);
207
+void loongarch_translate_code(CPUState *cs, TranslationBlock *tb,
208
+ int *max_insns, vaddr pc, void *host_pc);
209
210
void G_NORETURN do_raise_exception(CPULoongArchState *env,
211
uint32_t exception,
212
diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h
213
index XXXXXXX..XXXXXXX 100644
214
--- a/target/m68k/cpu.h
215
+++ b/target/m68k/cpu.h
216
@@ -XXX,XX +XXX,XX @@ int m68k_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
217
int m68k_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
218
219
void m68k_tcg_init(void);
220
+void m68k_translate_code(CPUState *cs, TranslationBlock *tb,
221
+ int *max_insns, vaddr pc, void *host_pc);
222
void m68k_cpu_init_gdb(M68kCPU *cpu);
223
uint32_t cpu_m68k_get_ccr(CPUM68KState *env);
224
void cpu_m68k_set_ccr(CPUM68KState *env, uint32_t);
225
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
226
index XXXXXXX..XXXXXXX 100644
227
--- a/target/microblaze/cpu.h
228
+++ b/target/microblaze/cpu.h
229
@@ -XXX,XX +XXX,XX @@ static inline void mb_cpu_write_msr(CPUMBState *env, uint32_t val)
230
}
231
232
void mb_tcg_init(void);
233
+void mb_translate_code(CPUState *cs, TranslationBlock *tb,
234
+ int *max_insns, vaddr pc, void *host_pc);
235
236
#define CPU_RESOLVING_TYPE TYPE_MICROBLAZE_CPU
237
238
diff --git a/target/mips/tcg/tcg-internal.h b/target/mips/tcg/tcg-internal.h
239
index XXXXXXX..XXXXXXX 100644
240
--- a/target/mips/tcg/tcg-internal.h
241
+++ b/target/mips/tcg/tcg-internal.h
242
@@ -XXX,XX +XXX,XX @@
243
#include "cpu.h"
244
245
void mips_tcg_init(void);
246
+void mips_translate_code(CPUState *cs, TranslationBlock *tb,
247
+ int *max_insns, vaddr pc, void *host_pc);
248
249
void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
250
G_NORETURN void mips_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
251
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
252
index XXXXXXX..XXXXXXX 100644
253
--- a/target/openrisc/cpu.h
254
+++ b/target/openrisc/cpu.h
255
@@ -XXX,XX +XXX,XX @@ void openrisc_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
256
int openrisc_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
257
int openrisc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
258
void openrisc_translate_init(void);
259
+void openrisc_translate_code(CPUState *cs, TranslationBlock *tb,
260
+ int *max_insns, vaddr pc, void *host_pc);
261
int print_insn_or1k(bfd_vma addr, disassemble_info *info);
262
263
#ifndef CONFIG_USER_ONLY
264
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
265
index XXXXXXX..XXXXXXX 100644
266
--- a/target/ppc/cpu.h
267
+++ b/target/ppc/cpu.h
268
@@ -XXX,XX +XXX,XX @@ extern const VMStateDescription vmstate_ppc_cpu;
269
270
/*****************************************************************************/
271
void ppc_translate_init(void);
272
+void ppc_translate_code(CPUState *cs, TranslationBlock *tb,
273
+ int *max_insns, vaddr pc, void *host_pc);
274
275
#if !defined(CONFIG_USER_ONLY)
276
void ppc_store_sdr1(CPUPPCState *env, target_ulong value);
277
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
278
index XXXXXXX..XXXXXXX 100644
279
--- a/target/riscv/cpu.h
280
+++ b/target/riscv/cpu.h
281
@@ -XXX,XX +XXX,XX @@ RISCVException smstateen_acc_ok(CPURISCVState *env, int index, uint64_t bit);
282
void riscv_cpu_set_mode(CPURISCVState *env, target_ulong newpriv, bool virt_en);
283
284
void riscv_translate_init(void);
285
+void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
286
+ int *max_insns, vaddr pc, void *host_pc);
287
+
288
G_NORETURN void riscv_raise_exception(CPURISCVState *env,
289
uint32_t exception, uintptr_t pc);
290
291
diff --git a/target/rx/cpu.h b/target/rx/cpu.h
292
index XXXXXXX..XXXXXXX 100644
293
--- a/target/rx/cpu.h
294
+++ b/target/rx/cpu.h
295
@@ -XXX,XX +XXX,XX @@ int rx_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
296
int rx_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
297
298
void rx_translate_init(void);
299
+void rx_translate_code(CPUState *cs, TranslationBlock *tb,
300
+ int *max_insns, vaddr pc, void *host_pc);
301
void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte);
302
303
#include "exec/cpu-all.h"
304
diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h
305
index XXXXXXX..XXXXXXX 100644
306
--- a/target/s390x/s390x-internal.h
307
+++ b/target/s390x/s390x-internal.h
308
@@ -XXX,XX +XXX,XX @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3,
309
310
/* translate.c */
311
void s390x_translate_init(void);
312
+void s390x_translate_code(CPUState *cs, TranslationBlock *tb,
313
+ int *max_insns, vaddr pc, void *host_pc);
314
void s390x_restore_state_to_opc(CPUState *cs,
315
const TranslationBlock *tb,
316
const uint64_t *data);
317
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
318
index XXXXXXX..XXXXXXX 100644
319
--- a/target/sh4/cpu.h
320
+++ b/target/sh4/cpu.h
321
@@ -XXX,XX +XXX,XX @@ G_NORETURN void superh_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
322
uintptr_t retaddr);
323
324
void sh4_translate_init(void);
325
+void sh4_translate_code(CPUState *cs, TranslationBlock *tb,
326
+ int *max_insns, vaddr pc, void *host_pc);
327
328
#if !defined(CONFIG_USER_ONLY)
329
hwaddr superh_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
330
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
331
index XXXXXXX..XXXXXXX 100644
332
--- a/target/sparc/cpu.h
333
+++ b/target/sparc/cpu.h
334
@@ -XXX,XX +XXX,XX @@ int sparc_cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
335
336
/* translate.c */
337
void sparc_tcg_init(void);
338
+void sparc_translate_code(CPUState *cs, TranslationBlock *tb,
339
+ int *max_insns, vaddr pc, void *host_pc);
340
341
/* fop_helper.c */
342
target_ulong cpu_get_fsr(CPUSPARCState *);
343
diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h
344
index XXXXXXX..XXXXXXX 100644
345
--- a/target/tricore/cpu.h
346
+++ b/target/tricore/cpu.h
347
@@ -XXX,XX +XXX,XX @@ FIELD(TB_FLAGS, PRIV, 0, 2)
348
349
void cpu_state_reset(CPUTriCoreState *s);
350
void tricore_tcg_init(void);
351
+void tricore_translate_code(CPUState *cs, TranslationBlock *tb,
352
+ int *max_insns, vaddr pc, void *host_pc);
353
354
static inline void cpu_get_tb_cpu_state(CPUTriCoreState *env, vaddr *pc,
355
uint64_t *cs_base, uint32_t *flags)
356
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
357
index XXXXXXX..XXXXXXX 100644
358
--- a/target/xtensa/cpu.h
359
+++ b/target/xtensa/cpu.h
360
@@ -XXX,XX +XXX,XX @@ G_NORETURN void xtensa_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
361
362
void xtensa_collect_sr_names(const XtensaConfig *config);
363
void xtensa_translate_init(void);
364
+void xtensa_translate_code(CPUState *cs, TranslationBlock *tb,
365
+ int *max_insns, vaddr pc, void *host_pc);
366
void **xtensa_get_regfile_by_name(const char *name, int entries, int bits);
367
void xtensa_breakpoint_handler(CPUState *cs);
368
void xtensa_register_core(XtensaConfigList *node);
369
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
370
index XXXXXXX..XXXXXXX 100644
371
--- a/accel/tcg/cpu-exec.c
372
+++ b/accel/tcg/cpu-exec.c
373
@@ -XXX,XX +XXX,XX @@ bool tcg_exec_realizefn(CPUState *cpu, Error **errp)
374
375
if (!tcg_target_initialized) {
376
/* Check mandatory TCGCPUOps handlers */
377
+ const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
378
#ifndef CONFIG_USER_ONLY
379
- assert(cpu->cc->tcg_ops->cpu_exec_halt);
380
- assert(cpu->cc->tcg_ops->cpu_exec_interrupt);
381
+ assert(tcg_ops->cpu_exec_halt);
382
+ assert(tcg_ops->cpu_exec_interrupt);
383
#endif /* !CONFIG_USER_ONLY */
384
- cpu->cc->tcg_ops->initialize();
385
+ assert(tcg_ops->translate_code);
386
+ tcg_ops->initialize();
387
tcg_target_initialized = true;
388
}
389
390
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
391
index XXXXXXX..XXXXXXX 100644
392
--- a/accel/tcg/translate-all.c
393
+++ b/accel/tcg/translate-all.c
394
@@ -XXX,XX +XXX,XX @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
395
396
tcg_func_start(tcg_ctx);
397
398
- tcg_ctx->cpu = env_cpu(env);
399
- gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
400
+ CPUState *cs = env_cpu(env);
401
+ tcg_ctx->cpu = cs;
402
+ cs->cc->tcg_ops->translate_code(cs, tb, max_insns, pc, host_pc);
403
+
404
assert(tb->size != 0);
405
tcg_ctx->cpu = NULL;
406
*max_insns = tb->icount;
407
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
408
/*
409
* Overflow of code_gen_buffer, or the current slice of it.
410
*
411
- * TODO: We don't need to re-do gen_intermediate_code, nor
412
+ * TODO: We don't need to re-do tcg_ops->translate_code, nor
413
* should we re-do the tcg optimization currently hidden
414
* inside tcg_gen_code. All that should be required is to
415
* flush the TBs, allocate a new TB, re-initialize it per
79
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
416
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
80
index XXXXXXX..XXXXXXX 100644
417
index XXXXXXX..XXXXXXX 100644
81
--- a/target/alpha/cpu.c
418
--- a/target/alpha/cpu.c
82
+++ b/target/alpha/cpu.c
419
+++ b/target/alpha/cpu.c
83
@@ -XXX,XX +XXX,XX @@ static void alpha_cpu_set_pc(CPUState *cs, vaddr value)
420
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps alpha_sysemu_ops = {
84
cpu->env.pc = value;
421
85
}
422
static const TCGCPUOps alpha_tcg_ops = {
86
423
.initialize = alpha_translate_init,
87
+static vaddr alpha_cpu_get_pc(CPUState *cs)
424
+ .translate_code = alpha_translate_code,
88
+{
425
.synchronize_from_tb = alpha_cpu_synchronize_from_tb,
89
+ AlphaCPU *cpu = ALPHA_CPU(cs);
426
.restore_state_to_opc = alpha_restore_state_to_opc,
90
+
427
91
+ return cpu->env.pc;
428
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
92
+}
429
index XXXXXXX..XXXXXXX 100644
93
+
430
--- a/target/alpha/translate.c
94
+
431
+++ b/target/alpha/translate.c
95
static bool alpha_cpu_has_work(CPUState *cs)
432
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps alpha_tr_ops = {
96
{
433
.tb_stop = alpha_tr_tb_stop,
97
/* Here we are checking to see if the CPU should wake up from HALT.
434
};
98
@@ -XXX,XX +XXX,XX @@ static void alpha_cpu_class_init(ObjectClass *oc, void *data)
435
99
cc->has_work = alpha_cpu_has_work;
436
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
100
cc->dump_state = alpha_cpu_dump_state;
437
- vaddr pc, void *host_pc)
101
cc->set_pc = alpha_cpu_set_pc;
438
+void alpha_translate_code(CPUState *cpu, TranslationBlock *tb,
102
+ cc->get_pc = alpha_cpu_get_pc;
439
+ int *max_insns, vaddr pc, void *host_pc)
103
cc->gdb_read_register = alpha_cpu_gdb_read_register;
440
{
104
cc->gdb_write_register = alpha_cpu_gdb_write_register;
441
DisasContext dc;
105
#ifndef CONFIG_USER_ONLY
442
translator_loop(cpu, tb, max_insns, pc, host_pc, &alpha_tr_ops, &dc.base);
106
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
443
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
107
index XXXXXXX..XXXXXXX 100644
444
index XXXXXXX..XXXXXXX 100644
108
--- a/target/arm/cpu.c
445
--- a/target/arm/cpu.c
109
+++ b/target/arm/cpu.c
446
+++ b/target/arm/cpu.c
110
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_set_pc(CPUState *cs, vaddr value)
447
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps arm_sysemu_ops = {
111
}
112
}
113
114
+static vaddr arm_cpu_get_pc(CPUState *cs)
115
+{
116
+ ARMCPU *cpu = ARM_CPU(cs);
117
+ CPUARMState *env = &cpu->env;
118
+
119
+ if (is_a64(env)) {
120
+ return env->pc;
121
+ } else {
122
+ return env->regs[15];
123
+ }
124
+}
125
+
126
#ifdef CONFIG_TCG
448
#ifdef CONFIG_TCG
127
void arm_cpu_synchronize_from_tb(CPUState *cs,
449
static const TCGCPUOps arm_tcg_ops = {
128
const TranslationBlock *tb)
450
.initialize = arm_translate_init,
129
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
451
+ .translate_code = arm_translate_code,
130
cc->has_work = arm_cpu_has_work;
452
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
131
cc->dump_state = arm_cpu_dump_state;
453
.debug_excp_handler = arm_debug_excp_handler,
132
cc->set_pc = arm_cpu_set_pc;
454
.restore_state_to_opc = arm_restore_state_to_opc,
133
+ cc->get_pc = arm_cpu_get_pc;
455
diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c
134
cc->gdb_read_register = arm_cpu_gdb_read_register;
456
index XXXXXXX..XXXXXXX 100644
135
cc->gdb_write_register = arm_cpu_gdb_write_register;
457
--- a/target/arm/tcg/cpu-v7m.c
136
#ifndef CONFIG_USER_ONLY
458
+++ b/target/arm/tcg/cpu-v7m.c
459
@@ -XXX,XX +XXX,XX @@ static void cortex_m55_initfn(Object *obj)
460
461
static const TCGCPUOps arm_v7m_tcg_ops = {
462
.initialize = arm_translate_init,
463
+ .translate_code = arm_translate_code,
464
.synchronize_from_tb = arm_cpu_synchronize_from_tb,
465
.debug_excp_handler = arm_debug_excp_handler,
466
.restore_state_to_opc = arm_restore_state_to_opc,
467
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
468
index XXXXXXX..XXXXXXX 100644
469
--- a/target/arm/tcg/translate.c
470
+++ b/target/arm/tcg/translate.c
471
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps thumb_translator_ops = {
472
.tb_stop = arm_tr_tb_stop,
473
};
474
475
-/* generate intermediate code for basic block 'tb'. */
476
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
477
- vaddr pc, void *host_pc)
478
+void arm_translate_code(CPUState *cpu, TranslationBlock *tb,
479
+ int *max_insns, vaddr pc, void *host_pc)
480
{
481
DisasContext dc = { };
482
const TranslatorOps *ops = &arm_translator_ops;
137
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
483
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
138
index XXXXXXX..XXXXXXX 100644
484
index XXXXXXX..XXXXXXX 100644
139
--- a/target/avr/cpu.c
485
--- a/target/avr/cpu.c
140
+++ b/target/avr/cpu.c
486
+++ b/target/avr/cpu.c
141
@@ -XXX,XX +XXX,XX @@ static void avr_cpu_set_pc(CPUState *cs, vaddr value)
487
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps avr_sysemu_ops = {
142
cpu->env.pc_w = value / 2; /* internally PC points to words */
488
143
}
489
static const TCGCPUOps avr_tcg_ops = {
144
490
.initialize = avr_cpu_tcg_init,
145
+static vaddr avr_cpu_get_pc(CPUState *cs)
491
+ .translate_code = avr_cpu_translate_code,
146
+{
492
.synchronize_from_tb = avr_cpu_synchronize_from_tb,
147
+ AVRCPU *cpu = AVR_CPU(cs);
493
.restore_state_to_opc = avr_restore_state_to_opc,
148
+
494
.cpu_exec_interrupt = avr_cpu_exec_interrupt,
149
+ return cpu->env.pc_w * 2;
495
diff --git a/target/avr/translate.c b/target/avr/translate.c
150
+}
496
index XXXXXXX..XXXXXXX 100644
151
+
497
--- a/target/avr/translate.c
152
static bool avr_cpu_has_work(CPUState *cs)
498
+++ b/target/avr/translate.c
153
{
499
@@ -XXX,XX +XXX,XX @@ static bool trans_WDR(DisasContext *ctx, arg_WDR *a)
154
AVRCPU *cpu = AVR_CPU(cs);
500
*
155
@@ -XXX,XX +XXX,XX @@ static void avr_cpu_class_init(ObjectClass *oc, void *data)
501
* - translate()
156
cc->has_work = avr_cpu_has_work;
502
* - canonicalize_skip()
157
cc->dump_state = avr_cpu_dump_state;
503
- * - gen_intermediate_code()
158
cc->set_pc = avr_cpu_set_pc;
504
+ * - translate_code()
159
+ cc->get_pc = avr_cpu_get_pc;
505
* - restore_state_to_opc()
160
dc->vmsd = &vms_avr_cpu;
506
*
161
cc->sysemu_ops = &avr_sysemu_ops;
507
*/
162
cc->disas_set_info = avr_cpu_disas_set_info;
508
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps avr_tr_ops = {
163
diff --git a/target/cris/cpu.c b/target/cris/cpu.c
509
.tb_stop = avr_tr_tb_stop,
164
index XXXXXXX..XXXXXXX 100644
510
};
165
--- a/target/cris/cpu.c
511
166
+++ b/target/cris/cpu.c
512
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
167
@@ -XXX,XX +XXX,XX @@ static void cris_cpu_set_pc(CPUState *cs, vaddr value)
513
- vaddr pc, void *host_pc)
168
cpu->env.pc = value;
514
+void avr_cpu_translate_code(CPUState *cs, TranslationBlock *tb,
169
}
515
+ int *max_insns, vaddr pc, void *host_pc)
170
516
{
171
+static vaddr cris_cpu_get_pc(CPUState *cs)
517
DisasContext dc = { };
172
+{
518
translator_loop(cs, tb, max_insns, pc, host_pc, &avr_tr_ops, &dc.base);
173
+ CRISCPU *cpu = CRIS_CPU(cs);
174
+
175
+ return cpu->env.pc;
176
+}
177
+
178
static bool cris_cpu_has_work(CPUState *cs)
179
{
180
return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI);
181
@@ -XXX,XX +XXX,XX @@ static void cris_cpu_class_init(ObjectClass *oc, void *data)
182
cc->has_work = cris_cpu_has_work;
183
cc->dump_state = cris_cpu_dump_state;
184
cc->set_pc = cris_cpu_set_pc;
185
+ cc->get_pc = cris_cpu_get_pc;
186
cc->gdb_read_register = cris_cpu_gdb_read_register;
187
cc->gdb_write_register = cris_cpu_gdb_write_register;
188
#ifndef CONFIG_USER_ONLY
189
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
519
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
190
index XXXXXXX..XXXXXXX 100644
520
index XXXXXXX..XXXXXXX 100644
191
--- a/target/hexagon/cpu.c
521
--- a/target/hexagon/cpu.c
192
+++ b/target/hexagon/cpu.c
522
+++ b/target/hexagon/cpu.c
193
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_set_pc(CPUState *cs, vaddr value)
523
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_init(Object *obj)
194
env->gpr[HEX_REG_PC] = value;
524
195
}
525
static const TCGCPUOps hexagon_tcg_ops = {
196
526
.initialize = hexagon_translate_init,
197
+static vaddr hexagon_cpu_get_pc(CPUState *cs)
527
+ .translate_code = hexagon_translate_code,
198
+{
528
.synchronize_from_tb = hexagon_cpu_synchronize_from_tb,
199
+ HexagonCPU *cpu = HEXAGON_CPU(cs);
529
.restore_state_to_opc = hexagon_restore_state_to_opc,
200
+ CPUHexagonState *env = &cpu->env;
530
};
201
+ return env->gpr[HEX_REG_PC];
531
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
202
+}
532
index XXXXXXX..XXXXXXX 100644
203
+
533
--- a/target/hexagon/translate.c
204
static void hexagon_cpu_synchronize_from_tb(CPUState *cs,
534
+++ b/target/hexagon/translate.c
205
const TranslationBlock *tb)
535
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hexagon_tr_ops = {
206
{
536
.tb_stop = hexagon_tr_tb_stop,
207
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data)
537
};
208
cc->has_work = hexagon_cpu_has_work;
538
209
cc->dump_state = hexagon_dump_state;
539
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
210
cc->set_pc = hexagon_cpu_set_pc;
540
- vaddr pc, void *host_pc)
211
+ cc->get_pc = hexagon_cpu_get_pc;
541
+void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
212
cc->gdb_read_register = hexagon_gdb_read_register;
542
+ int *max_insns, vaddr pc, void *host_pc)
213
cc->gdb_write_register = hexagon_gdb_write_register;
543
{
214
cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS + NUM_VREGS + NUM_QREGS;
544
DisasContext ctx;
545
215
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
546
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
216
index XXXXXXX..XXXXXXX 100644
547
index XXXXXXX..XXXXXXX 100644
217
--- a/target/hppa/cpu.c
548
--- a/target/hppa/cpu.c
218
+++ b/target/hppa/cpu.c
549
+++ b/target/hppa/cpu.c
219
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_set_pc(CPUState *cs, vaddr value)
550
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps hppa_sysemu_ops = {
220
cpu->env.iaoq_b = value + 4;
551
221
}
552
static const TCGCPUOps hppa_tcg_ops = {
222
553
.initialize = hppa_translate_init,
223
+static vaddr hppa_cpu_get_pc(CPUState *cs)
554
+ .translate_code = hppa_translate_code,
224
+{
555
.synchronize_from_tb = hppa_cpu_synchronize_from_tb,
225
+ HPPACPU *cpu = HPPA_CPU(cs);
556
.restore_state_to_opc = hppa_restore_state_to_opc,
226
+
557
227
+ return cpu->env.iaoq_f;
558
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
228
+}
559
index XXXXXXX..XXXXXXX 100644
229
+
560
--- a/target/hppa/translate.c
230
static void hppa_cpu_synchronize_from_tb(CPUState *cs,
561
+++ b/target/hppa/translate.c
231
const TranslationBlock *tb)
562
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hppa_tr_ops = {
232
{
563
#endif
233
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_class_init(ObjectClass *oc, void *data)
564
};
234
cc->has_work = hppa_cpu_has_work;
565
235
cc->dump_state = hppa_cpu_dump_state;
566
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
236
cc->set_pc = hppa_cpu_set_pc;
567
- vaddr pc, void *host_pc)
237
+ cc->get_pc = hppa_cpu_get_pc;
568
+void hppa_translate_code(CPUState *cs, TranslationBlock *tb,
238
cc->gdb_read_register = hppa_cpu_gdb_read_register;
569
+ int *max_insns, vaddr pc, void *host_pc)
239
cc->gdb_write_register = hppa_cpu_gdb_write_register;
570
{
240
#ifndef CONFIG_USER_ONLY
571
DisasContext ctx = { };
241
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
572
translator_loop(cs, tb, max_insns, pc, host_pc, &hppa_tr_ops, &ctx.base);
242
index XXXXXXX..XXXXXXX 100644
573
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
243
--- a/target/i386/cpu.c
574
index XXXXXXX..XXXXXXX 100644
244
+++ b/target/i386/cpu.c
575
--- a/target/i386/tcg/tcg-cpu.c
245
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_set_pc(CPUState *cs, vaddr value)
576
+++ b/target/i386/tcg/tcg-cpu.c
246
cpu->env.eip = value;
577
@@ -XXX,XX +XXX,XX @@ static bool x86_debug_check_breakpoint(CPUState *cs)
247
}
578
248
579
static const TCGCPUOps x86_tcg_ops = {
249
+static vaddr x86_cpu_get_pc(CPUState *cs)
580
.initialize = tcg_x86_init,
250
+{
581
+ .translate_code = x86_translate_code,
251
+ X86CPU *cpu = X86_CPU(cs);
582
.synchronize_from_tb = x86_cpu_synchronize_from_tb,
252
+
583
.restore_state_to_opc = x86_restore_state_to_opc,
253
+ /* Match cpu_get_tb_cpu_state. */
584
.cpu_exec_enter = x86_cpu_exec_enter,
254
+ return cpu->env.eip + cpu->env.segs[R_CS].base;
585
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
255
+}
586
index XXXXXXX..XXXXXXX 100644
256
+
587
--- a/target/i386/tcg/translate.c
257
int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request)
588
+++ b/target/i386/tcg/translate.c
258
{
589
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps i386_tr_ops = {
259
X86CPU *cpu = X86_CPU(cs);
590
.tb_stop = i386_tr_tb_stop,
260
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
591
};
261
cc->has_work = x86_cpu_has_work;
592
262
cc->dump_state = x86_cpu_dump_state;
593
-/* generate intermediate code for basic block 'tb'. */
263
cc->set_pc = x86_cpu_set_pc;
594
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
264
+ cc->get_pc = x86_cpu_get_pc;
595
- vaddr pc, void *host_pc)
265
cc->gdb_read_register = x86_cpu_gdb_read_register;
596
+void x86_translate_code(CPUState *cpu, TranslationBlock *tb,
266
cc->gdb_write_register = x86_cpu_gdb_write_register;
597
+ int *max_insns, vaddr pc, void *host_pc)
267
cc->get_arch_id = x86_cpu_get_arch_id;
598
{
599
DisasContext dc;
600
268
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
601
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
269
index XXXXXXX..XXXXXXX 100644
602
index XXXXXXX..XXXXXXX 100644
270
--- a/target/loongarch/cpu.c
603
--- a/target/loongarch/cpu.c
271
+++ b/target/loongarch/cpu.c
604
+++ b/target/loongarch/cpu.c
272
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_set_pc(CPUState *cs, vaddr value)
605
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags)
273
env->pc = value;
606
274
}
607
static const TCGCPUOps loongarch_tcg_ops = {
275
608
.initialize = loongarch_translate_init,
276
+static vaddr loongarch_cpu_get_pc(CPUState *cs)
609
+ .translate_code = loongarch_translate_code,
277
+{
610
.synchronize_from_tb = loongarch_cpu_synchronize_from_tb,
278
+ LoongArchCPU *cpu = LOONGARCH_CPU(cs);
611
.restore_state_to_opc = loongarch_restore_state_to_opc,
279
+ CPULoongArchState *env = &cpu->env;
612
280
+
613
diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c
281
+ return env->pc;
614
index XXXXXXX..XXXXXXX 100644
282
+}
615
--- a/target/loongarch/tcg/translate.c
283
+
616
+++ b/target/loongarch/tcg/translate.c
284
#ifndef CONFIG_USER_ONLY
617
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps loongarch_tr_ops = {
285
#include "hw/loongarch/virt.h"
618
.tb_stop = loongarch_tr_tb_stop,
286
619
};
287
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_class_init(ObjectClass *c, void *data)
620
288
cc->has_work = loongarch_cpu_has_work;
621
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
289
cc->dump_state = loongarch_cpu_dump_state;
622
- vaddr pc, void *host_pc)
290
cc->set_pc = loongarch_cpu_set_pc;
623
+void loongarch_translate_code(CPUState *cs, TranslationBlock *tb,
291
+ cc->get_pc = loongarch_cpu_get_pc;
624
+ int *max_insns, vaddr pc, void *host_pc)
292
#ifndef CONFIG_USER_ONLY
625
{
293
dc->vmsd = &vmstate_loongarch_cpu;
626
DisasContext ctx;
294
cc->sysemu_ops = &loongarch_sysemu_ops;
627
295
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
628
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
296
index XXXXXXX..XXXXXXX 100644
629
index XXXXXXX..XXXXXXX 100644
297
--- a/target/m68k/cpu.c
630
--- a/target/m68k/cpu.c
298
+++ b/target/m68k/cpu.c
631
+++ b/target/m68k/cpu.c
299
@@ -XXX,XX +XXX,XX @@ static void m68k_cpu_set_pc(CPUState *cs, vaddr value)
632
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps m68k_sysemu_ops = {
300
cpu->env.pc = value;
633
301
}
634
static const TCGCPUOps m68k_tcg_ops = {
302
635
.initialize = m68k_tcg_init,
303
+static vaddr m68k_cpu_get_pc(CPUState *cs)
636
+ .translate_code = m68k_translate_code,
304
+{
637
.restore_state_to_opc = m68k_restore_state_to_opc,
305
+ M68kCPU *cpu = M68K_CPU(cs);
638
306
+
639
#ifndef CONFIG_USER_ONLY
307
+ return cpu->env.pc;
640
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
308
+}
641
index XXXXXXX..XXXXXXX 100644
309
+
642
--- a/target/m68k/translate.c
310
static bool m68k_cpu_has_work(CPUState *cs)
643
+++ b/target/m68k/translate.c
311
{
644
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps m68k_tr_ops = {
312
return cs->interrupt_request & CPU_INTERRUPT_HARD;
645
.tb_stop = m68k_tr_tb_stop,
313
@@ -XXX,XX +XXX,XX @@ static void m68k_cpu_class_init(ObjectClass *c, void *data)
646
};
314
cc->has_work = m68k_cpu_has_work;
647
315
cc->dump_state = m68k_cpu_dump_state;
648
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
316
cc->set_pc = m68k_cpu_set_pc;
649
- vaddr pc, void *host_pc)
317
+ cc->get_pc = m68k_cpu_get_pc;
650
+void m68k_translate_code(CPUState *cpu, TranslationBlock *tb,
318
cc->gdb_read_register = m68k_cpu_gdb_read_register;
651
+ int *max_insns, vaddr pc, void *host_pc)
319
cc->gdb_write_register = m68k_cpu_gdb_write_register;
652
{
320
#if defined(CONFIG_SOFTMMU)
653
DisasContext dc;
654
translator_loop(cpu, tb, max_insns, pc, host_pc, &m68k_tr_ops, &dc.base);
321
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
655
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
322
index XXXXXXX..XXXXXXX 100644
656
index XXXXXXX..XXXXXXX 100644
323
--- a/target/microblaze/cpu.c
657
--- a/target/microblaze/cpu.c
324
+++ b/target/microblaze/cpu.c
658
+++ b/target/microblaze/cpu.c
325
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_set_pc(CPUState *cs, vaddr value)
659
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps mb_sysemu_ops = {
326
cpu->env.iflags = 0;
660
327
}
661
static const TCGCPUOps mb_tcg_ops = {
328
662
.initialize = mb_tcg_init,
329
+static vaddr mb_cpu_get_pc(CPUState *cs)
663
+ .translate_code = mb_translate_code,
330
+{
664
.synchronize_from_tb = mb_cpu_synchronize_from_tb,
331
+ MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
665
.restore_state_to_opc = mb_restore_state_to_opc,
332
+
666
333
+ return cpu->env.pc;
667
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
334
+}
668
index XXXXXXX..XXXXXXX 100644
335
+
669
--- a/target/microblaze/translate.c
336
static void mb_cpu_synchronize_from_tb(CPUState *cs,
670
+++ b/target/microblaze/translate.c
337
const TranslationBlock *tb)
671
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mb_tr_ops = {
338
{
672
.tb_stop = mb_tr_tb_stop,
339
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_class_init(ObjectClass *oc, void *data)
673
};
340
674
341
cc->dump_state = mb_cpu_dump_state;
675
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
342
cc->set_pc = mb_cpu_set_pc;
676
- vaddr pc, void *host_pc)
343
+ cc->get_pc = mb_cpu_get_pc;
677
+void mb_translate_code(CPUState *cpu, TranslationBlock *tb,
344
cc->gdb_read_register = mb_cpu_gdb_read_register;
678
+ int *max_insns, vaddr pc, void *host_pc)
345
cc->gdb_write_register = mb_cpu_gdb_write_register;
679
{
346
680
DisasContext dc;
681
translator_loop(cpu, tb, max_insns, pc, host_pc, &mb_tr_ops, &dc.base);
347
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
682
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
348
index XXXXXXX..XXXXXXX 100644
683
index XXXXXXX..XXXXXXX 100644
349
--- a/target/mips/cpu.c
684
--- a/target/mips/cpu.c
350
+++ b/target/mips/cpu.c
685
+++ b/target/mips/cpu.c
351
@@ -XXX,XX +XXX,XX @@ static void mips_cpu_set_pc(CPUState *cs, vaddr value)
686
@@ -XXX,XX +XXX,XX @@ static const Property mips_cpu_properties[] = {
352
mips_env_set_pc(&cpu->env, value);
687
#include "hw/core/tcg-cpu-ops.h"
353
}
688
static const TCGCPUOps mips_tcg_ops = {
354
689
.initialize = mips_tcg_init,
355
+static vaddr mips_cpu_get_pc(CPUState *cs)
690
+ .translate_code = mips_translate_code,
356
+{
691
.synchronize_from_tb = mips_cpu_synchronize_from_tb,
357
+ MIPSCPU *cpu = MIPS_CPU(cs);
692
.restore_state_to_opc = mips_restore_state_to_opc,
358
+
693
359
+ return cpu->env.active_tc.PC;
694
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
360
+}
695
index XXXXXXX..XXXXXXX 100644
361
+
696
--- a/target/mips/tcg/translate.c
362
static bool mips_cpu_has_work(CPUState *cs)
697
+++ b/target/mips/tcg/translate.c
363
{
698
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mips_tr_ops = {
364
MIPSCPU *cpu = MIPS_CPU(cs);
699
.tb_stop = mips_tr_tb_stop,
365
@@ -XXX,XX +XXX,XX @@ static void mips_cpu_class_init(ObjectClass *c, void *data)
700
};
366
cc->has_work = mips_cpu_has_work;
701
367
cc->dump_state = mips_cpu_dump_state;
702
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
368
cc->set_pc = mips_cpu_set_pc;
703
- vaddr pc, void *host_pc)
369
+ cc->get_pc = mips_cpu_get_pc;
704
+void mips_translate_code(CPUState *cs, TranslationBlock *tb,
370
cc->gdb_read_register = mips_cpu_gdb_read_register;
705
+ int *max_insns, vaddr pc, void *host_pc)
371
cc->gdb_write_register = mips_cpu_gdb_write_register;
706
{
372
#ifndef CONFIG_USER_ONLY
707
DisasContext ctx;
373
diff --git a/target/nios2/cpu.c b/target/nios2/cpu.c
708
374
index XXXXXXX..XXXXXXX 100644
375
--- a/target/nios2/cpu.c
376
+++ b/target/nios2/cpu.c
377
@@ -XXX,XX +XXX,XX @@ static void nios2_cpu_set_pc(CPUState *cs, vaddr value)
378
env->pc = value;
379
}
380
381
+static vaddr nios2_cpu_get_pc(CPUState *cs)
382
+{
383
+ Nios2CPU *cpu = NIOS2_CPU(cs);
384
+ CPUNios2State *env = &cpu->env;
385
+
386
+ return env->pc;
387
+}
388
+
389
static bool nios2_cpu_has_work(CPUState *cs)
390
{
391
return cs->interrupt_request & CPU_INTERRUPT_HARD;
392
@@ -XXX,XX +XXX,XX @@ static void nios2_cpu_class_init(ObjectClass *oc, void *data)
393
cc->has_work = nios2_cpu_has_work;
394
cc->dump_state = nios2_cpu_dump_state;
395
cc->set_pc = nios2_cpu_set_pc;
396
+ cc->get_pc = nios2_cpu_get_pc;
397
cc->disas_set_info = nios2_cpu_disas_set_info;
398
#ifndef CONFIG_USER_ONLY
399
cc->sysemu_ops = &nios2_sysemu_ops;
400
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
709
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
401
index XXXXXXX..XXXXXXX 100644
710
index XXXXXXX..XXXXXXX 100644
402
--- a/target/openrisc/cpu.c
711
--- a/target/openrisc/cpu.c
403
+++ b/target/openrisc/cpu.c
712
+++ b/target/openrisc/cpu.c
404
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_set_pc(CPUState *cs, vaddr value)
713
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps openrisc_sysemu_ops = {
405
cpu->env.dflag = 0;
714
406
}
715
static const TCGCPUOps openrisc_tcg_ops = {
407
716
.initialize = openrisc_translate_init,
408
+static vaddr openrisc_cpu_get_pc(CPUState *cs)
717
+ .translate_code = openrisc_translate_code,
409
+{
718
.synchronize_from_tb = openrisc_cpu_synchronize_from_tb,
410
+ OpenRISCCPU *cpu = OPENRISC_CPU(cs);
719
.restore_state_to_opc = openrisc_restore_state_to_opc,
411
+
720
412
+ return cpu->env.pc;
721
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
413
+}
722
index XXXXXXX..XXXXXXX 100644
414
+
723
--- a/target/openrisc/translate.c
415
static void openrisc_cpu_synchronize_from_tb(CPUState *cs,
724
+++ b/target/openrisc/translate.c
416
const TranslationBlock *tb)
725
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps openrisc_tr_ops = {
417
{
726
.tb_stop = openrisc_tr_tb_stop,
418
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_class_init(ObjectClass *oc, void *data)
727
};
419
cc->has_work = openrisc_cpu_has_work;
728
420
cc->dump_state = openrisc_cpu_dump_state;
729
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
421
cc->set_pc = openrisc_cpu_set_pc;
730
- vaddr pc, void *host_pc)
422
+ cc->get_pc = openrisc_cpu_get_pc;
731
+void openrisc_translate_code(CPUState *cs, TranslationBlock *tb,
423
cc->gdb_read_register = openrisc_cpu_gdb_read_register;
732
+ int *max_insns, vaddr pc, void *host_pc)
424
cc->gdb_write_register = openrisc_cpu_gdb_write_register;
733
{
425
#ifndef CONFIG_USER_ONLY
734
DisasContext ctx;
735
426
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
736
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
427
index XXXXXXX..XXXXXXX 100644
737
index XXXXXXX..XXXXXXX 100644
428
--- a/target/ppc/cpu_init.c
738
--- a/target/ppc/cpu_init.c
429
+++ b/target/ppc/cpu_init.c
739
+++ b/target/ppc/cpu_init.c
430
@@ -XXX,XX +XXX,XX @@ static void ppc_cpu_set_pc(CPUState *cs, vaddr value)
740
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps ppc_sysemu_ops = {
431
cpu->env.nip = value;
741
432
}
742
static const TCGCPUOps ppc_tcg_ops = {
433
743
.initialize = ppc_translate_init,
434
+static vaddr ppc_cpu_get_pc(CPUState *cs)
744
+ .translate_code = ppc_translate_code,
435
+{
745
.restore_state_to_opc = ppc_restore_state_to_opc,
436
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
746
437
+
747
#ifdef CONFIG_USER_ONLY
438
+ return cpu->env.nip;
748
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
439
+}
749
index XXXXXXX..XXXXXXX 100644
440
+
750
--- a/target/ppc/translate.c
441
static bool ppc_cpu_has_work(CPUState *cs)
751
+++ b/target/ppc/translate.c
442
{
752
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps ppc_tr_ops = {
443
PowerPCCPU *cpu = POWERPC_CPU(cs);
753
.tb_stop = ppc_tr_tb_stop,
444
@@ -XXX,XX +XXX,XX @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
754
};
445
cc->has_work = ppc_cpu_has_work;
755
446
cc->dump_state = ppc_cpu_dump_state;
756
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
447
cc->set_pc = ppc_cpu_set_pc;
757
- vaddr pc, void *host_pc)
448
+ cc->get_pc = ppc_cpu_get_pc;
758
+void ppc_translate_code(CPUState *cs, TranslationBlock *tb,
449
cc->gdb_read_register = ppc_cpu_gdb_read_register;
759
+ int *max_insns, vaddr pc, void *host_pc)
450
cc->gdb_write_register = ppc_cpu_gdb_write_register;
760
{
451
#ifndef CONFIG_USER_ONLY
761
DisasContext ctx;
452
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
762
453
index XXXXXXX..XXXXXXX 100644
763
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
454
--- a/target/riscv/cpu.c
764
index XXXXXXX..XXXXXXX 100644
455
+++ b/target/riscv/cpu.c
765
--- a/target/riscv/tcg/tcg-cpu.c
456
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_set_pc(CPUState *cs, vaddr value)
766
+++ b/target/riscv/tcg/tcg-cpu.c
457
}
767
@@ -XXX,XX +XXX,XX @@ static void riscv_restore_state_to_opc(CPUState *cs,
458
}
768
459
769
static const TCGCPUOps riscv_tcg_ops = {
460
+static vaddr riscv_cpu_get_pc(CPUState *cs)
770
.initialize = riscv_translate_init,
461
+{
771
+ .translate_code = riscv_translate_code,
462
+ RISCVCPU *cpu = RISCV_CPU(cs);
772
.synchronize_from_tb = riscv_cpu_synchronize_from_tb,
463
+ CPURISCVState *env = &cpu->env;
773
.restore_state_to_opc = riscv_restore_state_to_opc,
464
+
774
465
+ /* Match cpu_get_tb_cpu_state. */
775
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
466
+ if (env->xl == MXL_RV32) {
776
index XXXXXXX..XXXXXXX 100644
467
+ return env->pc & UINT32_MAX;
777
--- a/target/riscv/translate.c
468
+ }
778
+++ b/target/riscv/translate.c
469
+ return env->pc;
779
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps riscv_tr_ops = {
470
+}
780
.tb_stop = riscv_tr_tb_stop,
471
+
781
};
472
static void riscv_cpu_synchronize_from_tb(CPUState *cs,
782
473
const TranslationBlock *tb)
783
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
474
{
784
- vaddr pc, void *host_pc)
475
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_class_init(ObjectClass *c, void *data)
785
+void riscv_translate_code(CPUState *cs, TranslationBlock *tb,
476
cc->has_work = riscv_cpu_has_work;
786
+ int *max_insns, vaddr pc, void *host_pc)
477
cc->dump_state = riscv_cpu_dump_state;
787
{
478
cc->set_pc = riscv_cpu_set_pc;
788
DisasContext ctx;
479
+ cc->get_pc = riscv_cpu_get_pc;
789
480
cc->gdb_read_register = riscv_cpu_gdb_read_register;
481
cc->gdb_write_register = riscv_cpu_gdb_write_register;
482
cc->gdb_num_core_regs = 33;
483
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
790
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
484
index XXXXXXX..XXXXXXX 100644
791
index XXXXXXX..XXXXXXX 100644
485
--- a/target/rx/cpu.c
792
--- a/target/rx/cpu.c
486
+++ b/target/rx/cpu.c
793
+++ b/target/rx/cpu.c
487
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_set_pc(CPUState *cs, vaddr value)
794
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps rx_sysemu_ops = {
488
cpu->env.pc = value;
795
489
}
796
static const TCGCPUOps rx_tcg_ops = {
490
797
.initialize = rx_translate_init,
491
+static vaddr rx_cpu_get_pc(CPUState *cs)
798
+ .translate_code = rx_translate_code,
492
+{
799
.synchronize_from_tb = rx_cpu_synchronize_from_tb,
493
+ RXCPU *cpu = RX_CPU(cs);
800
.restore_state_to_opc = rx_restore_state_to_opc,
494
+
801
.tlb_fill = rx_cpu_tlb_fill,
495
+ return cpu->env.pc;
802
diff --git a/target/rx/translate.c b/target/rx/translate.c
496
+}
803
index XXXXXXX..XXXXXXX 100644
497
+
804
--- a/target/rx/translate.c
498
static void rx_cpu_synchronize_from_tb(CPUState *cs,
805
+++ b/target/rx/translate.c
499
const TranslationBlock *tb)
806
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps rx_tr_ops = {
500
{
807
.tb_stop = rx_tr_tb_stop,
501
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_class_init(ObjectClass *klass, void *data)
808
};
502
cc->has_work = rx_cpu_has_work;
809
503
cc->dump_state = rx_cpu_dump_state;
810
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
504
cc->set_pc = rx_cpu_set_pc;
811
- vaddr pc, void *host_pc)
505
+ cc->get_pc = rx_cpu_get_pc;
812
+void rx_translate_code(CPUState *cs, TranslationBlock *tb,
506
813
+ int *max_insns, vaddr pc, void *host_pc)
507
#ifndef CONFIG_USER_ONLY
814
{
508
cc->sysemu_ops = &rx_sysemu_ops;
815
DisasContext dc;
816
509
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
817
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
510
index XXXXXXX..XXXXXXX 100644
818
index XXXXXXX..XXXXXXX 100644
511
--- a/target/s390x/cpu.c
819
--- a/target/s390x/cpu.c
512
+++ b/target/s390x/cpu.c
820
+++ b/target/s390x/cpu.c
513
@@ -XXX,XX +XXX,XX @@ static void s390_cpu_set_pc(CPUState *cs, vaddr value)
821
@@ -XXX,XX +XXX,XX @@ void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc,
514
cpu->env.psw.addr = value;
822
515
}
823
static const TCGCPUOps s390_tcg_ops = {
516
824
.initialize = s390x_translate_init,
517
+static vaddr s390_cpu_get_pc(CPUState *cs)
825
+ .translate_code = s390x_translate_code,
518
+{
826
.restore_state_to_opc = s390x_restore_state_to_opc,
519
+ S390CPU *cpu = S390_CPU(cs);
827
520
+
828
#ifdef CONFIG_USER_ONLY
521
+ return cpu->env.psw.addr;
829
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
522
+}
830
index XXXXXXX..XXXXXXX 100644
523
+
831
--- a/target/s390x/tcg/translate.c
524
static bool s390_cpu_has_work(CPUState *cs)
832
+++ b/target/s390x/tcg/translate.c
525
{
833
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps s390x_tr_ops = {
526
S390CPU *cpu = S390_CPU(cs);
834
.disas_log = s390x_tr_disas_log,
527
@@ -XXX,XX +XXX,XX @@ static void s390_cpu_class_init(ObjectClass *oc, void *data)
835
};
528
cc->has_work = s390_cpu_has_work;
836
529
cc->dump_state = s390_cpu_dump_state;
837
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
530
cc->set_pc = s390_cpu_set_pc;
838
- vaddr pc, void *host_pc)
531
+ cc->get_pc = s390_cpu_get_pc;
839
+void s390x_translate_code(CPUState *cs, TranslationBlock *tb,
532
cc->gdb_read_register = s390_cpu_gdb_read_register;
840
+ int *max_insns, vaddr pc, void *host_pc)
533
cc->gdb_write_register = s390_cpu_gdb_write_register;
841
{
534
#ifndef CONFIG_USER_ONLY
842
DisasContext dc;
843
535
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
844
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
536
index XXXXXXX..XXXXXXX 100644
845
index XXXXXXX..XXXXXXX 100644
537
--- a/target/sh4/cpu.c
846
--- a/target/sh4/cpu.c
538
+++ b/target/sh4/cpu.c
847
+++ b/target/sh4/cpu.c
539
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_set_pc(CPUState *cs, vaddr value)
848
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps sh4_sysemu_ops = {
540
cpu->env.pc = value;
849
541
}
850
static const TCGCPUOps superh_tcg_ops = {
542
851
.initialize = sh4_translate_init,
543
+static vaddr superh_cpu_get_pc(CPUState *cs)
852
+ .translate_code = sh4_translate_code,
544
+{
853
.synchronize_from_tb = superh_cpu_synchronize_from_tb,
545
+ SuperHCPU *cpu = SUPERH_CPU(cs);
854
.restore_state_to_opc = superh_restore_state_to_opc,
546
+
855
547
+ return cpu->env.pc;
856
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
548
+}
857
index XXXXXXX..XXXXXXX 100644
549
+
858
--- a/target/sh4/translate.c
550
static void superh_cpu_synchronize_from_tb(CPUState *cs,
859
+++ b/target/sh4/translate.c
551
const TranslationBlock *tb)
860
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sh4_tr_ops = {
552
{
861
.tb_stop = sh4_tr_tb_stop,
553
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_class_init(ObjectClass *oc, void *data)
862
};
554
cc->has_work = superh_cpu_has_work;
863
555
cc->dump_state = superh_cpu_dump_state;
864
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
556
cc->set_pc = superh_cpu_set_pc;
865
- vaddr pc, void *host_pc)
557
+ cc->get_pc = superh_cpu_get_pc;
866
+void sh4_translate_code(CPUState *cs, TranslationBlock *tb,
558
cc->gdb_read_register = superh_cpu_gdb_read_register;
867
+ int *max_insns, vaddr pc, void *host_pc)
559
cc->gdb_write_register = superh_cpu_gdb_write_register;
868
{
560
#ifndef CONFIG_USER_ONLY
869
DisasContext ctx;
870
561
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
871
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
562
index XXXXXXX..XXXXXXX 100644
872
index XXXXXXX..XXXXXXX 100644
563
--- a/target/sparc/cpu.c
873
--- a/target/sparc/cpu.c
564
+++ b/target/sparc/cpu.c
874
+++ b/target/sparc/cpu.c
565
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_set_pc(CPUState *cs, vaddr value)
875
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps sparc_sysemu_ops = {
566
cpu->env.npc = value + 4;
876
567
}
877
static const TCGCPUOps sparc_tcg_ops = {
568
878
.initialize = sparc_tcg_init,
569
+static vaddr sparc_cpu_get_pc(CPUState *cs)
879
+ .translate_code = sparc_translate_code,
570
+{
880
.synchronize_from_tb = sparc_cpu_synchronize_from_tb,
571
+ SPARCCPU *cpu = SPARC_CPU(cs);
881
.restore_state_to_opc = sparc_restore_state_to_opc,
572
+
882
573
+ return cpu->env.pc;
883
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
574
+}
884
index XXXXXXX..XXXXXXX 100644
575
+
885
--- a/target/sparc/translate.c
576
static void sparc_cpu_synchronize_from_tb(CPUState *cs,
886
+++ b/target/sparc/translate.c
577
const TranslationBlock *tb)
887
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sparc_tr_ops = {
578
{
888
.tb_stop = sparc_tr_tb_stop,
579
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_class_init(ObjectClass *oc, void *data)
889
};
580
cc->memory_rw_debug = sparc_cpu_memory_rw_debug;
890
581
#endif
891
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
582
cc->set_pc = sparc_cpu_set_pc;
892
- vaddr pc, void *host_pc)
583
+ cc->get_pc = sparc_cpu_get_pc;
893
+void sparc_translate_code(CPUState *cs, TranslationBlock *tb,
584
cc->gdb_read_register = sparc_cpu_gdb_read_register;
894
+ int *max_insns, vaddr pc, void *host_pc)
585
cc->gdb_write_register = sparc_cpu_gdb_write_register;
895
{
586
#ifndef CONFIG_USER_ONLY
896
DisasContext dc = {};
897
587
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
898
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
588
index XXXXXXX..XXXXXXX 100644
899
index XXXXXXX..XXXXXXX 100644
589
--- a/target/tricore/cpu.c
900
--- a/target/tricore/cpu.c
590
+++ b/target/tricore/cpu.c
901
+++ b/target/tricore/cpu.c
591
@@ -XXX,XX +XXX,XX @@ static void tricore_cpu_set_pc(CPUState *cs, vaddr value)
902
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps tricore_sysemu_ops = {
592
env->PC = value & ~(target_ulong)1;
903
593
}
904
static const TCGCPUOps tricore_tcg_ops = {
594
905
.initialize = tricore_tcg_init,
595
+static vaddr tricore_cpu_get_pc(CPUState *cs)
906
+ .translate_code = tricore_translate_code,
596
+{
907
.synchronize_from_tb = tricore_cpu_synchronize_from_tb,
597
+ TriCoreCPU *cpu = TRICORE_CPU(cs);
908
.restore_state_to_opc = tricore_restore_state_to_opc,
598
+ CPUTriCoreState *env = &cpu->env;
909
.tlb_fill = tricore_cpu_tlb_fill,
599
+
910
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
600
+ return env->PC;
911
index XXXXXXX..XXXXXXX 100644
601
+}
912
--- a/target/tricore/translate.c
602
+
913
+++ b/target/tricore/translate.c
603
static void tricore_cpu_synchronize_from_tb(CPUState *cs,
914
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps tricore_tr_ops = {
604
const TranslationBlock *tb)
915
.tb_stop = tricore_tr_tb_stop,
605
{
916
};
606
@@ -XXX,XX +XXX,XX @@ static void tricore_cpu_class_init(ObjectClass *c, void *data)
917
607
918
-
608
cc->dump_state = tricore_cpu_dump_state;
919
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
609
cc->set_pc = tricore_cpu_set_pc;
920
- vaddr pc, void *host_pc)
610
+ cc->get_pc = tricore_cpu_get_pc;
921
+void tricore_translate_code(CPUState *cs, TranslationBlock *tb,
611
cc->sysemu_ops = &tricore_sysemu_ops;
922
+ int *max_insns, vaddr pc, void *host_pc)
612
cc->tcg_ops = &tricore_tcg_ops;
923
{
613
}
924
DisasContext ctx;
925
translator_loop(cs, tb, max_insns, pc, host_pc,
614
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
926
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
615
index XXXXXXX..XXXXXXX 100644
927
index XXXXXXX..XXXXXXX 100644
616
--- a/target/xtensa/cpu.c
928
--- a/target/xtensa/cpu.c
617
+++ b/target/xtensa/cpu.c
929
+++ b/target/xtensa/cpu.c
618
@@ -XXX,XX +XXX,XX @@ static void xtensa_cpu_set_pc(CPUState *cs, vaddr value)
930
@@ -XXX,XX +XXX,XX @@ static const struct SysemuCPUOps xtensa_sysemu_ops = {
619
cpu->env.pc = value;
931
620
}
932
static const TCGCPUOps xtensa_tcg_ops = {
621
933
.initialize = xtensa_translate_init,
622
+static vaddr xtensa_cpu_get_pc(CPUState *cs)
934
+ .translate_code = xtensa_translate_code,
623
+{
935
.debug_excp_handler = xtensa_breakpoint_handler,
624
+ XtensaCPU *cpu = XTENSA_CPU(cs);
936
.restore_state_to_opc = xtensa_restore_state_to_opc,
625
+
937
626
+ return cpu->env.pc;
938
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
627
+}
939
index XXXXXXX..XXXXXXX 100644
628
+
940
--- a/target/xtensa/translate.c
629
static bool xtensa_cpu_has_work(CPUState *cs)
941
+++ b/target/xtensa/translate.c
630
{
942
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps xtensa_translator_ops = {
631
#ifndef CONFIG_USER_ONLY
943
.tb_stop = xtensa_tr_tb_stop,
632
@@ -XXX,XX +XXX,XX @@ static void xtensa_cpu_class_init(ObjectClass *oc, void *data)
944
};
633
cc->has_work = xtensa_cpu_has_work;
945
634
cc->dump_state = xtensa_cpu_dump_state;
946
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
635
cc->set_pc = xtensa_cpu_set_pc;
947
- vaddr pc, void *host_pc)
636
+ cc->get_pc = xtensa_cpu_get_pc;
948
+void xtensa_translate_code(CPUState *cpu, TranslationBlock *tb,
637
cc->gdb_read_register = xtensa_cpu_gdb_read_register;
949
+ int *max_insns, vaddr pc, void *host_pc)
638
cc->gdb_write_register = xtensa_cpu_gdb_write_register;
950
{
639
cc->gdb_stop_before_watchpoint = true;
951
DisasContext dc = {};
952
translator_loop(cpu, tb, max_insns, pc, host_pc,
640
--
953
--
641
2.34.1
954
2.43.0
642
955
643
956
diff view generated by jsdifflib