1
The following changes since commit 45240eed4f064576d589ea60ebadf3c11d7ab891:
1
TCG patch queue, plus one target/sh4 patch that
2
Yoshinori Sato asked me to process.
2
3
3
Merge remote-tracking branch 'remotes/armbru/tags/pull-yank-2021-01-13' into staging (2021-01-13 14:19:24 +0000)
4
5
r~
6
7
8
The following changes since commit efbf38d73e5dcc4d5f8b98c6e7a12be1f3b91745:
9
10
Merge tag 'for-upstream' of git://repo.or.cz/qemu/kevin into staging (2022-10-03 15:06:07 -0400)
4
11
5
are available in the Git repository at:
12
are available in the Git repository at:
6
13
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20210113
14
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20221004
8
15
9
for you to fetch changes up to 4cacecaaa2bbf8af0967bd3eee43297fada475a9:
16
for you to fetch changes up to ab419fd8a035a65942de4e63effcd55ccbf1a9fe:
10
17
11
decodetree: Open files with encoding='utf-8' (2021-01-13 08:39:08 -1000)
18
target/sh4: Fix TB_FLAG_UNALIGN (2022-10-04 12:33:05 -0700)
12
19
13
----------------------------------------------------------------
20
----------------------------------------------------------------
14
Improvements to tcg constant handling.
21
Cache CPUClass for use in hot code paths.
15
Force utf8 for decodetree.
22
Add CPUTLBEntryFull, probe_access_full, tlb_set_page_full.
23
Add generic support for TARGET_TB_PCREL.
24
tcg/ppc: Optimize 26-bit jumps using STQ for POWER 2.07
25
target/sh4: Fix TB_FLAG_UNALIGN
16
26
17
----------------------------------------------------------------
27
----------------------------------------------------------------
18
Philippe Mathieu-Daudé (1):
28
Alex Bennée (3):
19
decodetree: Open files with encoding='utf-8'
29
cpu: cache CPUClass in CPUState for hot code paths
30
hw/core/cpu-sysemu: used cached class in cpu_asidx_from_attrs
31
cputlb: used cached CPUClass in our hot-paths
20
32
21
Richard Henderson (23):
33
Leandro Lupori (1):
22
tcg: Use tcg_out_dupi_vec from temp_load
34
tcg/ppc: Optimize 26-bit jumps
23
tcg: Increase tcg_out_dupi_vec immediate to int64_t
24
tcg: Consolidate 3 bits into enum TCGTempKind
25
tcg: Add temp_readonly
26
tcg: Expand TCGTemp.val to 64-bits
27
tcg: Rename struct tcg_temp_info to TempOptInfo
28
tcg: Expand TempOptInfo to 64-bits
29
tcg: Introduce TYPE_CONST temporaries
30
tcg/optimize: Improve find_better_copy
31
tcg/optimize: Adjust TempOptInfo allocation
32
tcg/optimize: Use tcg_constant_internal with constant folding
33
tcg: Convert tcg_gen_dupi_vec to TCG_CONST
34
tcg: Use tcg_constant_i32 with icount expander
35
tcg: Use tcg_constant_{i32,i64} with tcg int expanders
36
tcg: Use tcg_constant_{i32,i64} with tcg plugins
37
tcg: Use tcg_constant_{i32,i64,vec} with gvec expanders
38
tcg/tci: Add special tci_movi_{i32,i64} opcodes
39
tcg: Remove movi and dupi opcodes
40
tcg: Add tcg_reg_alloc_dup2
41
tcg/i386: Use tcg_constant_vec with tcg vec expanders
42
tcg: Remove tcg_gen_dup{8,16,32,64}i_vec
43
tcg/ppc: Use tcg_constant_vec with tcg vec expanders
44
tcg/aarch64: Use tcg_constant_vec with tcg vec expanders
45
35
46
include/exec/gen-icount.h | 25 +--
36
Richard Henderson (16):
47
include/tcg/tcg-op.h | 17 +-
37
accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull
48
include/tcg/tcg-opc.h | 11 +-
38
accel/tcg: Drop addr member from SavedIOTLB
49
include/tcg/tcg.h | 50 ++++-
39
accel/tcg: Suppress auto-invalidate in probe_access_internal
50
accel/tcg/plugin-gen.c | 49 ++---
40
accel/tcg: Introduce probe_access_full
51
tcg/optimize.c | 249 +++++++++++-----------
41
accel/tcg: Introduce tlb_set_page_full
52
tcg/tcg-op-gvec.c | 129 +++++-------
42
include/exec: Introduce TARGET_PAGE_ENTRY_EXTRA
53
tcg/tcg-op-vec.c | 52 +----
43
accel/tcg: Remove PageDesc code_bitmap
54
tcg/tcg-op.c | 227 ++++++++++----------
44
accel/tcg: Use bool for page_find_alloc
55
tcg/tcg.c | 488 +++++++++++++++++++++++++++++++++----------
45
accel/tcg: Use DisasContextBase in plugin_gen_tb_start
56
tcg/tci.c | 4 +-
46
accel/tcg: Do not align tb->page_addr[0]
57
tcg/aarch64/tcg-target.c.inc | 32 +--
47
accel/tcg: Inline tb_flush_jmp_cache
58
tcg/arm/tcg-target.c.inc | 1 -
48
include/hw/core: Create struct CPUJumpCache
59
tcg/i386/tcg-target.c.inc | 112 ++++++----
49
hw/core: Add CPUClass.get_pc
60
tcg/mips/tcg-target.c.inc | 2 -
50
accel/tcg: Introduce tb_pc and log_pc
61
tcg/ppc/tcg-target.c.inc | 90 ++++----
51
accel/tcg: Introduce TARGET_TB_PCREL
62
tcg/riscv/tcg-target.c.inc | 2 -
52
target/sh4: Fix TB_FLAG_UNALIGN
63
tcg/s390/tcg-target.c.inc | 2 -
64
tcg/sparc/tcg-target.c.inc | 2 -
65
tcg/tci/tcg-target.c.inc | 6 +-
66
scripts/decodetree.py | 9 +-
67
21 files changed, 890 insertions(+), 669 deletions(-)
68
53
54
accel/tcg/internal.h | 10 ++
55
accel/tcg/tb-hash.h | 1 +
56
accel/tcg/tb-jmp-cache.h | 65 ++++++++
57
include/exec/cpu-common.h | 1 +
58
include/exec/cpu-defs.h | 48 ++++--
59
include/exec/exec-all.h | 75 ++++++++-
60
include/exec/plugin-gen.h | 7 +-
61
include/hw/core/cpu.h | 28 ++--
62
include/qemu/typedefs.h | 2 +
63
include/tcg/tcg.h | 2 +-
64
target/sh4/cpu.h | 56 ++++---
65
accel/stubs/tcg-stub.c | 4 +
66
accel/tcg/cpu-exec.c | 80 +++++-----
67
accel/tcg/cputlb.c | 259 ++++++++++++++++++--------------
68
accel/tcg/plugin-gen.c | 22 +--
69
accel/tcg/translate-all.c | 214 ++++++++++++--------------
70
accel/tcg/translator.c | 2 +-
71
cpu.c | 9 +-
72
hw/core/cpu-common.c | 3 +-
73
hw/core/cpu-sysemu.c | 5 +-
74
linux-user/sh4/signal.c | 6 +-
75
plugins/core.c | 2 +-
76
target/alpha/cpu.c | 9 ++
77
target/arm/cpu.c | 17 ++-
78
target/arm/mte_helper.c | 14 +-
79
target/arm/sve_helper.c | 4 +-
80
target/arm/translate-a64.c | 2 +-
81
target/avr/cpu.c | 10 +-
82
target/cris/cpu.c | 8 +
83
target/hexagon/cpu.c | 10 +-
84
target/hppa/cpu.c | 12 +-
85
target/i386/cpu.c | 9 ++
86
target/i386/tcg/tcg-cpu.c | 2 +-
87
target/loongarch/cpu.c | 11 +-
88
target/m68k/cpu.c | 8 +
89
target/microblaze/cpu.c | 10 +-
90
target/mips/cpu.c | 8 +
91
target/mips/tcg/exception.c | 2 +-
92
target/mips/tcg/sysemu/special_helper.c | 2 +-
93
target/nios2/cpu.c | 9 ++
94
target/openrisc/cpu.c | 10 +-
95
target/ppc/cpu_init.c | 8 +
96
target/riscv/cpu.c | 17 ++-
97
target/rx/cpu.c | 10 +-
98
target/s390x/cpu.c | 8 +
99
target/s390x/tcg/mem_helper.c | 4 -
100
target/sh4/cpu.c | 18 ++-
101
target/sh4/helper.c | 6 +-
102
target/sh4/translate.c | 90 +++++------
103
target/sparc/cpu.c | 10 +-
104
target/tricore/cpu.c | 11 +-
105
target/xtensa/cpu.c | 8 +
106
tcg/tcg.c | 8 +-
107
trace/control-target.c | 2 +-
108
tcg/ppc/tcg-target.c.inc | 119 +++++++++++----
109
55 files changed, 915 insertions(+), 462 deletions(-)
110
create mode 100644 accel/tcg/tb-jmp-cache.h
111
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
From: Alex Bennée <alex.bennee@linaro.org>
2
2
3
When decodetree.py was added in commit 568ae7efae7, QEMU was
3
The class cast checkers are quite expensive and always on (unlike the
4
using Python 2 which happily reads UTF-8 files in text mode.
4
dynamic case who's checks are gated by CONFIG_QOM_CAST_DEBUG). To
5
Python 3 requires either UTF-8 locale or an explicit encoding
5
avoid the overhead of repeatedly checking something which should never
6
passed to open(). Now that Python 3 is required, explicit
6
change we cache the CPUClass reference for use in the hot code paths.
7
UTF-8 encoding for decodetree source files.
8
7
9
To avoid further problems with the user locale, also explicit
8
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
10
UTF-8 encoding for the generated C files.
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
10
Message-Id: <20220811151413.3350684-3-alex.bennee@linaro.org>
12
Explicit both input/output are plain text by using the 't' mode.
11
Signed-off-by: Cédric Le Goater <clg@kaod.org>
13
12
Message-Id: <20220923084803.498337-3-clg@kaod.org>
14
This fixes:
15
16
$ /usr/bin/python3 scripts/decodetree.py test.decode
17
Traceback (most recent call last):
18
File "scripts/decodetree.py", line 1397, in <module>
19
main()
20
File "scripts/decodetree.py", line 1308, in main
21
parse_file(f, toppat)
22
File "scripts/decodetree.py", line 994, in parse_file
23
for line in f:
24
File "/usr/lib/python3.6/encodings/ascii.py", line 26, in decode
25
return codecs.ascii_decode(input, self.errors)[0]
26
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 80:
27
ordinal not in range(128)
28
29
Reported-by: Peter Maydell <peter.maydell@linaro.org>
30
Suggested-by: Yonggang Luo <luoyonggang@gmail.com>
31
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
32
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
33
Message-Id: <20210110000240.761122-1-f4bug@amsat.org>
34
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
35
---
14
---
36
scripts/decodetree.py | 9 ++++++---
15
include/hw/core/cpu.h | 9 +++++++++
37
1 file changed, 6 insertions(+), 3 deletions(-)
16
cpu.c | 9 ++++-----
17
2 files changed, 13 insertions(+), 5 deletions(-)
38
18
39
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
19
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
40
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
41
--- a/scripts/decodetree.py
21
--- a/include/hw/core/cpu.h
42
+++ b/scripts/decodetree.py
22
+++ b/include/hw/core/cpu.h
43
@@ -XXX,XX +XXX,XX @@
23
@@ -XXX,XX +XXX,XX @@ typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
44
# See the syntax and semantics in docs/devel/decodetree.rst.
24
*/
45
#
25
#define CPU(obj) ((CPUState *)(obj))
46
26
47
+import io
27
+/*
48
import os
28
+ * The class checkers bring in CPU_GET_CLASS() which is potentially
49
import re
29
+ * expensive given the eventual call to
50
import sys
30
+ * object_class_dynamic_cast_assert(). Because of this the CPUState
51
@@ -XXX,XX +XXX,XX @@ def main():
31
+ * has a cached value for the class in cs->cc which is set up in
52
32
+ * cpu_exec_realizefn() for use in hot code paths.
53
for filename in args:
33
+ */
54
input_file = filename
34
typedef struct CPUClass CPUClass;
55
- f = open(filename, 'r')
35
DECLARE_CLASS_CHECKERS(CPUClass, CPU,
56
+ f = open(filename, 'rt', encoding='utf-8')
36
TYPE_CPU)
57
parse_file(f, toppat)
37
@@ -XXX,XX +XXX,XX @@ struct qemu_work_item;
58
f.close()
38
struct CPUState {
59
39
/*< private >*/
60
@@ -XXX,XX +XXX,XX @@ def main():
40
DeviceState parent_obj;
61
prop_size(stree)
41
+ /* cache to avoid expensive CPU_GET_CLASS */
62
42
+ CPUClass *cc;
63
if output_file:
43
/*< public >*/
64
- output_fd = open(output_file, 'w')
44
65
+ output_fd = open(output_file, 'wt', encoding='utf-8')
45
int nr_cores;
66
else:
46
diff --git a/cpu.c b/cpu.c
67
- output_fd = sys.stdout
47
index XXXXXXX..XXXXXXX 100644
68
+ output_fd = io.TextIOWrapper(sys.stdout.buffer,
48
--- a/cpu.c
69
+ encoding=sys.stdout.encoding,
49
+++ b/cpu.c
70
+ errors="ignore")
50
@@ -XXX,XX +XXX,XX @@ const VMStateDescription vmstate_cpu_common = {
71
51
72
output_autogen()
52
void cpu_exec_realizefn(CPUState *cpu, Error **errp)
73
for n in sorted(arguments.keys()):
53
{
54
-#ifndef CONFIG_USER_ONLY
55
- CPUClass *cc = CPU_GET_CLASS(cpu);
56
-#endif
57
+ /* cache the cpu class for the hotpath */
58
+ cpu->cc = CPU_GET_CLASS(cpu);
59
60
cpu_list_add(cpu);
61
if (!accel_cpu_realizefn(cpu, errp)) {
62
@@ -XXX,XX +XXX,XX @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
63
if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
64
vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
65
}
66
- if (cc->sysemu_ops->legacy_vmsd != NULL) {
67
- vmstate_register(NULL, cpu->cpu_index, cc->sysemu_ops->legacy_vmsd, cpu);
68
+ if (cpu->cc->sysemu_ops->legacy_vmsd != NULL) {
69
+ vmstate_register(NULL, cpu->cpu_index, cpu->cc->sysemu_ops->legacy_vmsd, cpu);
70
}
71
#endif /* CONFIG_USER_ONLY */
72
}
74
--
73
--
75
2.25.1
74
2.34.1
76
75
77
76
diff view generated by jsdifflib
1
Improve rotrv_vec to reduce "t1 = -v2, t2 = t1 + c" to
1
From: Alex Bennée <alex.bennee@linaro.org>
2
"t1 = -v2, t2 = c - v2". This avoids a serial dependency
3
between t1 and t2.
4
2
3
This is a heavily used function so lets avoid the cost of
4
CPU_GET_CLASS. On the romulus-bmc run it has a modest effect:
5
6
Before: 36.812 s ± 0.506 s
7
After: 35.912 s ± 0.168 s
8
9
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-Id: <20220811151413.3350684-4-alex.bennee@linaro.org>
12
Signed-off-by: Cédric Le Goater <clg@kaod.org>
13
Message-Id: <20220923084803.498337-4-clg@kaod.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
15
---
7
tcg/aarch64/tcg-target.c.inc | 10 +++++-----
16
hw/core/cpu-sysemu.c | 5 ++---
8
1 file changed, 5 insertions(+), 5 deletions(-)
17
1 file changed, 2 insertions(+), 3 deletions(-)
9
18
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
19
diff --git a/hw/core/cpu-sysemu.c b/hw/core/cpu-sysemu.c
11
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/aarch64/tcg-target.c.inc
21
--- a/hw/core/cpu-sysemu.c
13
+++ b/tcg/aarch64/tcg-target.c.inc
22
+++ b/hw/core/cpu-sysemu.c
14
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
23
@@ -XXX,XX +XXX,XX @@ hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr)
15
TCGArg a0, ...)
24
25
int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs)
16
{
26
{
17
va_list va;
27
- CPUClass *cc = CPU_GET_CLASS(cpu);
18
- TCGv_vec v0, v1, v2, t1, t2;
28
int ret = 0;
19
+ TCGv_vec v0, v1, v2, t1, t2, c1;
29
20
TCGArg a2;
30
- if (cc->sysemu_ops->asidx_from_attrs) {
21
31
- ret = cc->sysemu_ops->asidx_from_attrs(cpu, attrs);
22
va_start(va, a0);
32
+ if (cpu->cc->sysemu_ops->asidx_from_attrs) {
23
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
33
+ ret = cpu->cc->sysemu_ops->asidx_from_attrs(cpu, attrs);
24
34
assert(ret < cpu->num_ases && ret >= 0);
25
case INDEX_op_rotlv_vec:
35
}
26
t1 = tcg_temp_new_vec(type);
36
return ret;
27
- tcg_gen_dupi_vec(vece, t1, 8 << vece);
28
- tcg_gen_sub_vec(vece, t1, v2, t1);
29
+ c1 = tcg_constant_vec(type, vece, 8 << vece);
30
+ tcg_gen_sub_vec(vece, t1, v2, c1);
31
/* Right shifts are negative left shifts for AArch64. */
32
vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
33
tcgv_vec_arg(v1), tcgv_vec_arg(t1));
34
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
35
case INDEX_op_rotrv_vec:
36
t1 = tcg_temp_new_vec(type);
37
t2 = tcg_temp_new_vec(type);
38
+ c1 = tcg_constant_vec(type, vece, 8 << vece);
39
tcg_gen_neg_vec(vece, t1, v2);
40
- tcg_gen_dupi_vec(vece, t2, 8 << vece);
41
- tcg_gen_add_vec(vece, t2, t1, t2);
42
+ tcg_gen_sub_vec(vece, t2, c1, v2);
43
/* Right shifts are negative left shifts for AArch64. */
44
vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
45
tcgv_vec_arg(v1), tcgv_vec_arg(t1));
46
--
37
--
47
2.25.1
38
2.34.1
48
39
49
40
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
2
3
Before: 35.912 s ± 0.168 s
4
After: 35.565 s ± 0.087 s
5
6
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20220811151413.3350684-5-alex.bennee@linaro.org>
9
Signed-off-by: Cédric Le Goater <clg@kaod.org>
10
Message-Id: <20220923084803.498337-5-clg@kaod.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
12
---
3
include/tcg/tcg.h | 1 +
13
accel/tcg/cputlb.c | 15 ++++++---------
4
tcg/tcg-op-gvec.c | 129 ++++++++++++++++++----------------------------
14
1 file changed, 6 insertions(+), 9 deletions(-)
5
tcg/tcg.c | 8 +++
6
3 files changed, 60 insertions(+), 78 deletions(-)
7
15
8
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
16
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
9
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
10
--- a/include/tcg/tcg.h
18
--- a/accel/tcg/cputlb.c
11
+++ b/include/tcg/tcg.h
19
+++ b/accel/tcg/cputlb.c
12
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i64 tcg_constant_i64(int64_t val)
20
@@ -XXX,XX +XXX,XX @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
21
static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
22
MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
23
{
24
- CPUClass *cc = CPU_GET_CLASS(cpu);
25
bool ok;
26
27
/*
28
* This is not a probe, so only valid return is success; failure
29
* should result in exception + longjmp to the cpu loop.
30
*/
31
- ok = cc->tcg_ops->tlb_fill(cpu, addr, size,
32
- access_type, mmu_idx, false, retaddr);
33
+ ok = cpu->cc->tcg_ops->tlb_fill(cpu, addr, size,
34
+ access_type, mmu_idx, false, retaddr);
35
assert(ok);
13
}
36
}
14
37
15
TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val);
38
@@ -XXX,XX +XXX,XX @@ static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
16
+TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val);
39
MMUAccessType access_type,
17
40
int mmu_idx, uintptr_t retaddr)
18
#if UINTPTR_MAX == UINT32_MAX
19
# define tcg_const_ptr(x) ((TCGv_ptr)tcg_const_i32((intptr_t)(x)))
20
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/tcg/tcg-op-gvec.c
23
+++ b/tcg/tcg-op-gvec.c
24
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
25
gen_helper_gvec_2 *fn)
26
{
41
{
27
TCGv_ptr a0, a1;
42
- CPUClass *cc = CPU_GET_CLASS(cpu);
28
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
43
-
29
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
44
- cc->tcg_ops->do_unaligned_access(cpu, addr, access_type, mmu_idx, retaddr);
30
45
+ cpu->cc->tcg_ops->do_unaligned_access(cpu, addr, access_type,
31
a0 = tcg_temp_new_ptr();
46
+ mmu_idx, retaddr);
32
a1 = tcg_temp_new_ptr();
33
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
34
35
tcg_temp_free_ptr(a0);
36
tcg_temp_free_ptr(a1);
37
- tcg_temp_free_i32(desc);
38
}
47
}
39
48
40
/* Generate a call to a gvec-style helper with two vector operands
49
static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
41
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c,
50
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
42
gen_helper_gvec_2i *fn)
51
if (!tlb_hit_page(tlb_addr, page_addr)) {
43
{
52
if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
44
TCGv_ptr a0, a1;
53
CPUState *cs = env_cpu(env);
45
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
54
- CPUClass *cc = CPU_GET_CLASS(cs);
46
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
55
47
56
- if (!cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
48
a0 = tcg_temp_new_ptr();
57
- mmu_idx, nonfault, retaddr)) {
49
a1 = tcg_temp_new_ptr();
58
+ if (!cs->cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
50
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c,
59
+ mmu_idx, nonfault, retaddr)) {
51
60
/* Non-faulting page table read failed. */
52
tcg_temp_free_ptr(a0);
61
*phost = NULL;
53
tcg_temp_free_ptr(a1);
62
return TLB_INVALID_MASK;
54
- tcg_temp_free_i32(desc);
55
}
56
57
/* Generate a call to a gvec-style helper with three vector operands. */
58
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
59
gen_helper_gvec_3 *fn)
60
{
61
TCGv_ptr a0, a1, a2;
62
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
63
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
64
65
a0 = tcg_temp_new_ptr();
66
a1 = tcg_temp_new_ptr();
67
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
68
tcg_temp_free_ptr(a0);
69
tcg_temp_free_ptr(a1);
70
tcg_temp_free_ptr(a2);
71
- tcg_temp_free_i32(desc);
72
}
73
74
/* Generate a call to a gvec-style helper with four vector operands. */
75
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
76
int32_t data, gen_helper_gvec_4 *fn)
77
{
78
TCGv_ptr a0, a1, a2, a3;
79
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
80
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
81
82
a0 = tcg_temp_new_ptr();
83
a1 = tcg_temp_new_ptr();
84
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
85
tcg_temp_free_ptr(a1);
86
tcg_temp_free_ptr(a2);
87
tcg_temp_free_ptr(a3);
88
- tcg_temp_free_i32(desc);
89
}
90
91
/* Generate a call to a gvec-style helper with five vector operands. */
92
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_5_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
93
uint32_t maxsz, int32_t data, gen_helper_gvec_5 *fn)
94
{
95
TCGv_ptr a0, a1, a2, a3, a4;
96
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
97
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
98
99
a0 = tcg_temp_new_ptr();
100
a1 = tcg_temp_new_ptr();
101
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_5_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
102
tcg_temp_free_ptr(a2);
103
tcg_temp_free_ptr(a3);
104
tcg_temp_free_ptr(a4);
105
- tcg_temp_free_i32(desc);
106
}
107
108
/* Generate a call to a gvec-style helper with three vector operands
109
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2_ptr(uint32_t dofs, uint32_t aofs,
110
int32_t data, gen_helper_gvec_2_ptr *fn)
111
{
112
TCGv_ptr a0, a1;
113
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
114
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
115
116
a0 = tcg_temp_new_ptr();
117
a1 = tcg_temp_new_ptr();
118
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2_ptr(uint32_t dofs, uint32_t aofs,
119
120
tcg_temp_free_ptr(a0);
121
tcg_temp_free_ptr(a1);
122
- tcg_temp_free_i32(desc);
123
}
124
125
/* Generate a call to a gvec-style helper with three vector operands
126
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
127
int32_t data, gen_helper_gvec_3_ptr *fn)
128
{
129
TCGv_ptr a0, a1, a2;
130
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
131
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
132
133
a0 = tcg_temp_new_ptr();
134
a1 = tcg_temp_new_ptr();
135
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
136
tcg_temp_free_ptr(a0);
137
tcg_temp_free_ptr(a1);
138
tcg_temp_free_ptr(a2);
139
- tcg_temp_free_i32(desc);
140
}
141
142
/* Generate a call to a gvec-style helper with four vector operands
143
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
144
gen_helper_gvec_4_ptr *fn)
145
{
146
TCGv_ptr a0, a1, a2, a3;
147
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
148
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
149
150
a0 = tcg_temp_new_ptr();
151
a1 = tcg_temp_new_ptr();
152
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
153
tcg_temp_free_ptr(a1);
154
tcg_temp_free_ptr(a2);
155
tcg_temp_free_ptr(a3);
156
- tcg_temp_free_i32(desc);
157
}
158
159
/* Generate a call to a gvec-style helper with five vector operands
160
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
161
gen_helper_gvec_5_ptr *fn)
162
{
163
TCGv_ptr a0, a1, a2, a3, a4;
164
- TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
165
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
166
167
a0 = tcg_temp_new_ptr();
168
a1 = tcg_temp_new_ptr();
169
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
170
tcg_temp_free_ptr(a2);
171
tcg_temp_free_ptr(a3);
172
tcg_temp_free_ptr(a4);
173
- tcg_temp_free_i32(desc);
174
}
175
176
/* Return true if we want to implement something of OPRSZ bytes
177
@@ -XXX,XX +XXX,XX @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
178
|| (TCG_TARGET_REG_BITS == 64
179
&& (in_c == 0 || in_c == -1
180
|| !check_size_impl(oprsz, 4)))) {
181
- t_64 = tcg_const_i64(in_c);
182
+ t_64 = tcg_constant_i64(in_c);
183
} else {
184
- t_32 = tcg_const_i32(in_c);
185
+ t_32 = tcg_constant_i32(in_c);
186
}
187
}
188
189
@@ -XXX,XX +XXX,XX @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
190
t_val = tcg_temp_new_i32();
191
tcg_gen_extrl_i64_i32(t_val, in_64);
192
} else {
193
- t_val = tcg_const_i32(in_c);
194
+ t_val = tcg_constant_i32(in_c);
195
}
196
gen_helper_memset(t_ptr, t_ptr, t_val, t_size);
197
198
- if (!in_32) {
199
+ if (in_64) {
200
tcg_temp_free_i32(t_val);
201
}
202
tcg_temp_free_ptr(t_size);
203
@@ -XXX,XX +XXX,XX @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
204
return;
205
}
206
207
- t_desc = tcg_const_i32(simd_desc(oprsz, maxsz, 0));
208
+ t_desc = tcg_constant_i32(simd_desc(oprsz, maxsz, 0));
209
210
if (vece == MO_64) {
211
if (in_64) {
212
gen_helper_gvec_dup64(t_ptr, t_desc, in_64);
213
} else {
214
- t_64 = tcg_const_i64(in_c);
215
+ t_64 = tcg_constant_i64(in_c);
216
gen_helper_gvec_dup64(t_ptr, t_desc, t_64);
217
- tcg_temp_free_i64(t_64);
218
}
219
} else {
220
typedef void dup_fn(TCGv_ptr, TCGv_i32, TCGv_i32);
221
@@ -XXX,XX +XXX,XX @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
222
223
if (in_32) {
224
fns[vece](t_ptr, t_desc, in_32);
225
- } else {
226
+ } else if (in_64) {
227
t_32 = tcg_temp_new_i32();
228
- if (in_64) {
229
- tcg_gen_extrl_i64_i32(t_32, in_64);
230
- } else if (vece == MO_8) {
231
- tcg_gen_movi_i32(t_32, in_c & 0xff);
232
- } else if (vece == MO_16) {
233
- tcg_gen_movi_i32(t_32, in_c & 0xffff);
234
- } else {
235
- tcg_gen_movi_i32(t_32, in_c);
236
- }
237
+ tcg_gen_extrl_i64_i32(t_32, in_64);
238
fns[vece](t_ptr, t_desc, t_32);
239
tcg_temp_free_i32(t_32);
240
+ } else {
241
+ if (vece == MO_8) {
242
+ in_c &= 0xff;
243
+ } else if (vece == MO_16) {
244
+ in_c &= 0xffff;
245
+ }
246
+ t_32 = tcg_constant_i32(in_c);
247
+ fns[vece](t_ptr, t_desc, t_32);
248
}
249
}
250
251
tcg_temp_free_ptr(t_ptr);
252
- tcg_temp_free_i32(t_desc);
253
return;
254
255
done:
256
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
257
if (g->fno) {
258
tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, c, g->fno);
259
} else {
260
- TCGv_i64 tcg_c = tcg_const_i64(c);
261
+ TCGv_i64 tcg_c = tcg_constant_i64(c);
262
tcg_gen_gvec_2i_ool(dofs, aofs, tcg_c, oprsz,
263
maxsz, c, g->fnoi);
264
- tcg_temp_free_i64(tcg_c);
265
}
266
oprsz = maxsz;
267
}
268
@@ -XXX,XX +XXX,XX @@ static void gen_addv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)
269
270
void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
271
{
272
- TCGv_i64 m = tcg_const_i64(dup_const(MO_8, 0x80));
273
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_8, 0x80));
274
gen_addv_mask(d, a, b, m);
275
- tcg_temp_free_i64(m);
276
}
277
278
void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
279
{
280
- TCGv_i64 m = tcg_const_i64(dup_const(MO_16, 0x8000));
281
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
282
gen_addv_mask(d, a, b, m);
283
- tcg_temp_free_i64(m);
284
}
285
286
void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
287
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_adds(unsigned vece, uint32_t dofs, uint32_t aofs,
288
void tcg_gen_gvec_addi(unsigned vece, uint32_t dofs, uint32_t aofs,
289
int64_t c, uint32_t oprsz, uint32_t maxsz)
290
{
291
- TCGv_i64 tmp = tcg_const_i64(c);
292
+ TCGv_i64 tmp = tcg_constant_i64(c);
293
tcg_gen_gvec_adds(vece, dofs, aofs, tmp, oprsz, maxsz);
294
- tcg_temp_free_i64(tmp);
295
}
296
297
static const TCGOpcode vecop_list_sub[] = { INDEX_op_sub_vec, 0 };
298
@@ -XXX,XX +XXX,XX @@ static void gen_subv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)
299
300
void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
301
{
302
- TCGv_i64 m = tcg_const_i64(dup_const(MO_8, 0x80));
303
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_8, 0x80));
304
gen_subv_mask(d, a, b, m);
305
- tcg_temp_free_i64(m);
306
}
307
308
void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
309
{
310
- TCGv_i64 m = tcg_const_i64(dup_const(MO_16, 0x8000));
311
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
312
gen_subv_mask(d, a, b, m);
313
- tcg_temp_free_i64(m);
314
}
315
316
void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
317
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_muls(unsigned vece, uint32_t dofs, uint32_t aofs,
318
void tcg_gen_gvec_muli(unsigned vece, uint32_t dofs, uint32_t aofs,
319
int64_t c, uint32_t oprsz, uint32_t maxsz)
320
{
321
- TCGv_i64 tmp = tcg_const_i64(c);
322
+ TCGv_i64 tmp = tcg_constant_i64(c);
323
tcg_gen_gvec_muls(vece, dofs, aofs, tmp, oprsz, maxsz);
324
- tcg_temp_free_i64(tmp);
325
}
326
327
void tcg_gen_gvec_ssadd(unsigned vece, uint32_t dofs, uint32_t aofs,
328
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_sssub(unsigned vece, uint32_t dofs, uint32_t aofs,
329
330
static void tcg_gen_usadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
331
{
332
- TCGv_i32 max = tcg_const_i32(-1);
333
+ TCGv_i32 max = tcg_constant_i32(-1);
334
tcg_gen_add_i32(d, a, b);
335
tcg_gen_movcond_i32(TCG_COND_LTU, d, d, a, max, d);
336
- tcg_temp_free_i32(max);
337
}
338
339
static void tcg_gen_usadd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
340
{
341
- TCGv_i64 max = tcg_const_i64(-1);
342
+ TCGv_i64 max = tcg_constant_i64(-1);
343
tcg_gen_add_i64(d, a, b);
344
tcg_gen_movcond_i64(TCG_COND_LTU, d, d, a, max, d);
345
- tcg_temp_free_i64(max);
346
}
347
348
void tcg_gen_gvec_usadd(unsigned vece, uint32_t dofs, uint32_t aofs,
349
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_usadd(unsigned vece, uint32_t dofs, uint32_t aofs,
350
351
static void tcg_gen_ussub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
352
{
353
- TCGv_i32 min = tcg_const_i32(0);
354
+ TCGv_i32 min = tcg_constant_i32(0);
355
tcg_gen_sub_i32(d, a, b);
356
tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, min, d);
357
- tcg_temp_free_i32(min);
358
}
359
360
static void tcg_gen_ussub_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
361
{
362
- TCGv_i64 min = tcg_const_i64(0);
363
+ TCGv_i64 min = tcg_constant_i64(0);
364
tcg_gen_sub_i64(d, a, b);
365
tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, min, d);
366
- tcg_temp_free_i64(min);
367
}
368
369
void tcg_gen_gvec_ussub(unsigned vece, uint32_t dofs, uint32_t aofs,
370
@@ -XXX,XX +XXX,XX @@ static void gen_negv_mask(TCGv_i64 d, TCGv_i64 b, TCGv_i64 m)
371
372
void tcg_gen_vec_neg8_i64(TCGv_i64 d, TCGv_i64 b)
373
{
374
- TCGv_i64 m = tcg_const_i64(dup_const(MO_8, 0x80));
375
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_8, 0x80));
376
gen_negv_mask(d, b, m);
377
- tcg_temp_free_i64(m);
378
}
379
380
void tcg_gen_vec_neg16_i64(TCGv_i64 d, TCGv_i64 b)
381
{
382
- TCGv_i64 m = tcg_const_i64(dup_const(MO_16, 0x8000));
383
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
384
gen_negv_mask(d, b, m);
385
- tcg_temp_free_i64(m);
386
}
387
388
void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 b)
389
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
390
void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs,
391
int64_t c, uint32_t oprsz, uint32_t maxsz)
392
{
393
- TCGv_i64 tmp = tcg_const_i64(dup_const(vece, c));
394
+ TCGv_i64 tmp = tcg_constant_i64(dup_const(vece, c));
395
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
396
- tcg_temp_free_i64(tmp);
397
}
398
399
static const GVecGen2s gop_xors = {
400
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
401
void tcg_gen_gvec_xori(unsigned vece, uint32_t dofs, uint32_t aofs,
402
int64_t c, uint32_t oprsz, uint32_t maxsz)
403
{
404
- TCGv_i64 tmp = tcg_const_i64(dup_const(vece, c));
405
+ TCGv_i64 tmp = tcg_constant_i64(dup_const(vece, c));
406
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_xors);
407
- tcg_temp_free_i64(tmp);
408
}
409
410
static const GVecGen2s gop_ors = {
411
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
412
void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs,
413
int64_t c, uint32_t oprsz, uint32_t maxsz)
414
{
415
- TCGv_i64 tmp = tcg_const_i64(dup_const(vece, c));
416
+ TCGv_i64 tmp = tcg_constant_i64(dup_const(vece, c));
417
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ors);
418
- tcg_temp_free_i64(tmp);
419
}
420
421
void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
422
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_shlv_mod_vec(unsigned vece, TCGv_vec d,
423
TCGv_vec a, TCGv_vec b)
424
{
425
TCGv_vec t = tcg_temp_new_vec_matching(d);
426
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);
427
428
- tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
429
- tcg_gen_and_vec(vece, t, t, b);
430
+ tcg_gen_and_vec(vece, t, b, m);
431
tcg_gen_shlv_vec(vece, d, a, t);
432
tcg_temp_free_vec(t);
433
}
434
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_shrv_mod_vec(unsigned vece, TCGv_vec d,
435
TCGv_vec a, TCGv_vec b)
436
{
437
TCGv_vec t = tcg_temp_new_vec_matching(d);
438
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);
439
440
- tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
441
- tcg_gen_and_vec(vece, t, t, b);
442
+ tcg_gen_and_vec(vece, t, b, m);
443
tcg_gen_shrv_vec(vece, d, a, t);
444
tcg_temp_free_vec(t);
445
}
446
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_sarv_mod_vec(unsigned vece, TCGv_vec d,
447
TCGv_vec a, TCGv_vec b)
448
{
449
TCGv_vec t = tcg_temp_new_vec_matching(d);
450
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);
451
452
- tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
453
- tcg_gen_and_vec(vece, t, t, b);
454
+ tcg_gen_and_vec(vece, t, b, m);
455
tcg_gen_sarv_vec(vece, d, a, t);
456
tcg_temp_free_vec(t);
457
}
458
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_rotlv_mod_vec(unsigned vece, TCGv_vec d,
459
TCGv_vec a, TCGv_vec b)
460
{
461
TCGv_vec t = tcg_temp_new_vec_matching(d);
462
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);
463
464
- tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
465
- tcg_gen_and_vec(vece, t, t, b);
466
+ tcg_gen_and_vec(vece, t, b, m);
467
tcg_gen_rotlv_vec(vece, d, a, t);
468
tcg_temp_free_vec(t);
469
}
470
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_rotrv_mod_vec(unsigned vece, TCGv_vec d,
471
TCGv_vec a, TCGv_vec b)
472
{
473
TCGv_vec t = tcg_temp_new_vec_matching(d);
474
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);
475
476
- tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
477
- tcg_gen_and_vec(vece, t, t, b);
478
+ tcg_gen_and_vec(vece, t, b, m);
479
tcg_gen_rotrv_vec(vece, d, a, t);
480
tcg_temp_free_vec(t);
481
}
482
diff --git a/tcg/tcg.c b/tcg/tcg.c
483
index XXXXXXX..XXXXXXX 100644
484
--- a/tcg/tcg.c
485
+++ b/tcg/tcg.c
486
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
487
return temp_tcgv_vec(tcg_constant_internal(type, val));
488
}
489
490
+TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
491
+{
492
+ TCGTemp *t = tcgv_vec_temp(match);
493
+
494
+ tcg_debug_assert(t->temp_allocated != 0);
495
+ return tcg_constant_vec(t->base_type, vece, val);
496
+}
497
+
498
TCGv_i32 tcg_const_i32(int32_t val)
499
{
500
TCGv_i32 t0;
501
--
63
--
502
2.25.1
64
2.34.1
503
65
504
66
diff view generated by jsdifflib
1
These will hold a single constant for the duration of the TB.
1
This structure will shortly contain more than just
2
They are hashed, so that each value has one temp across the TB.
2
data for accessing MMIO. Rename the 'addr' member
3
to 'xlat_section' to more clearly indicate its purpose.
3
4
4
Not used yet, this is all infrastructure.
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
9
---
8
include/tcg/tcg.h | 24 ++++-
10
include/exec/cpu-defs.h | 22 ++++----
9
tcg/optimize.c | 13 ++-
11
accel/tcg/cputlb.c | 102 +++++++++++++++++++------------------
10
tcg/tcg.c | 224 ++++++++++++++++++++++++++++++++++++----------
12
target/arm/mte_helper.c | 14 ++---
11
3 files changed, 211 insertions(+), 50 deletions(-)
13
target/arm/sve_helper.c | 4 +-
14
target/arm/translate-a64.c | 2 +-
15
5 files changed, 73 insertions(+), 71 deletions(-)
12
16
13
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
17
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
14
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg.h
19
--- a/include/exec/cpu-defs.h
16
+++ b/include/tcg/tcg.h
20
+++ b/include/exec/cpu-defs.h
17
@@ -XXX,XX +XXX,XX @@ typedef enum TCGTempKind {
21
@@ -XXX,XX +XXX,XX @@ typedef uint64_t target_ulong;
18
TEMP_GLOBAL,
22
# endif
19
/* Temp is in a fixed register. */
23
# endif
20
TEMP_FIXED,
24
21
+ /* Temp is a fixed constant. */
25
+/* Minimalized TLB entry for use by TCG fast path. */
22
+ TEMP_CONST,
26
typedef struct CPUTLBEntry {
23
} TCGTempKind;
27
/* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address
24
28
bit TARGET_PAGE_BITS-1..4 : Nonzero for accesses that should not
25
typedef struct TCGTemp {
29
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntry {
26
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
30
27
QSIMPLEQ_HEAD(, TCGOp) plugin_ops;
31
QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
32
33
-/* The IOTLB is not accessed directly inline by generated TCG code,
34
- * so the CPUIOTLBEntry layout is not as critical as that of the
35
- * CPUTLBEntry. (This is also why we don't want to combine the two
36
- * structs into one.)
37
+/*
38
+ * The full TLB entry, which is not accessed by generated TCG code,
39
+ * so the layout is not as critical as that of CPUTLBEntry. This is
40
+ * also why we don't want to combine the two structs.
41
*/
42
-typedef struct CPUIOTLBEntry {
43
+typedef struct CPUTLBEntryFull {
44
/*
45
- * @addr contains:
46
+ * @xlat_section contains:
47
* - in the lower TARGET_PAGE_BITS, a physical section number
48
* - with the lower TARGET_PAGE_BITS masked off, an offset which
49
* must be added to the virtual address to obtain:
50
@@ -XXX,XX +XXX,XX @@ typedef struct CPUIOTLBEntry {
51
* number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM)
52
* + the offset within the target MemoryRegion (otherwise)
53
*/
54
- hwaddr addr;
55
+ hwaddr xlat_section;
56
MemTxAttrs attrs;
57
-} CPUIOTLBEntry;
58
+} CPUTLBEntryFull;
59
60
/*
61
* Data elements that are per MMU mode, minus the bits accessed by
62
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBDesc {
63
size_t vindex;
64
/* The tlb victim table, in two parts. */
65
CPUTLBEntry vtable[CPU_VTLB_SIZE];
66
- CPUIOTLBEntry viotlb[CPU_VTLB_SIZE];
67
- /* The iotlb. */
68
- CPUIOTLBEntry *iotlb;
69
+ CPUTLBEntryFull vfulltlb[CPU_VTLB_SIZE];
70
+ CPUTLBEntryFull *fulltlb;
71
} CPUTLBDesc;
72
73
/*
74
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/accel/tcg/cputlb.c
77
+++ b/accel/tcg/cputlb.c
78
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
79
}
80
81
g_free(fast->table);
82
- g_free(desc->iotlb);
83
+ g_free(desc->fulltlb);
84
85
tlb_window_reset(desc, now, 0);
86
/* desc->n_used_entries is cleared by the caller */
87
fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
88
fast->table = g_try_new(CPUTLBEntry, new_size);
89
- desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
90
+ desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
91
92
/*
93
* If the allocations fail, try smaller sizes. We just freed some
94
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
95
* allocations to fail though, so we progressively reduce the allocation
96
* size, aborting if we cannot even allocate the smallest TLB we support.
97
*/
98
- while (fast->table == NULL || desc->iotlb == NULL) {
99
+ while (fast->table == NULL || desc->fulltlb == NULL) {
100
if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
101
error_report("%s: %s", __func__, strerror(errno));
102
abort();
103
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
104
fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
105
106
g_free(fast->table);
107
- g_free(desc->iotlb);
108
+ g_free(desc->fulltlb);
109
fast->table = g_try_new(CPUTLBEntry, new_size);
110
- desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
111
+ desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
112
}
113
}
114
115
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
116
desc->n_used_entries = 0;
117
fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
118
fast->table = g_new(CPUTLBEntry, n_entries);
119
- desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
120
+ desc->fulltlb = g_new(CPUTLBEntryFull, n_entries);
121
tlb_mmu_flush_locked(desc, fast);
122
}
123
124
@@ -XXX,XX +XXX,XX @@ void tlb_destroy(CPUState *cpu)
125
CPUTLBDescFast *fast = &env_tlb(env)->f[i];
126
127
g_free(fast->table);
128
- g_free(desc->iotlb);
129
+ g_free(desc->fulltlb);
130
}
131
}
132
133
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
134
135
/* Evict the old entry into the victim tlb. */
136
copy_tlb_helper_locked(tv, te);
137
- desc->viotlb[vidx] = desc->iotlb[index];
138
+ desc->vfulltlb[vidx] = desc->fulltlb[index];
139
tlb_n_used_entries_dec(env, mmu_idx);
140
}
141
142
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
143
* subtract here is that of the page base, and not the same as the
144
* vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
145
*/
146
- desc->iotlb[index].addr = iotlb - vaddr_page;
147
- desc->iotlb[index].attrs = attrs;
148
+ desc->fulltlb[index].xlat_section = iotlb - vaddr_page;
149
+ desc->fulltlb[index].attrs = attrs;
150
151
/* Now calculate the new entry */
152
tn.addend = addend - vaddr_page;
153
@@ -XXX,XX +XXX,XX @@ static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
154
}
155
}
156
157
-static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
158
+static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
159
int mmu_idx, target_ulong addr, uintptr_t retaddr,
160
MMUAccessType access_type, MemOp op)
161
{
162
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
163
bool locked = false;
164
MemTxResult r;
165
166
- section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
167
+ section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
168
mr = section->mr;
169
- mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
170
+ mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
171
cpu->mem_io_pc = retaddr;
172
if (!cpu->can_do_io) {
173
cpu_io_recompile(cpu, retaddr);
174
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
175
qemu_mutex_lock_iothread();
176
locked = true;
177
}
178
- r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
179
+ r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs);
180
if (r != MEMTX_OK) {
181
hwaddr physaddr = mr_offset +
182
section->offset_within_address_space -
183
section->offset_within_region;
184
185
cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
186
- mmu_idx, iotlbentry->attrs, r, retaddr);
187
+ mmu_idx, full->attrs, r, retaddr);
188
}
189
if (locked) {
190
qemu_mutex_unlock_iothread();
191
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
192
}
193
194
/*
195
- * Save a potentially trashed IOTLB entry for later lookup by plugin.
196
- * This is read by tlb_plugin_lookup if the iotlb entry doesn't match
197
+ * Save a potentially trashed CPUTLBEntryFull for later lookup by plugin.
198
+ * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
199
* because of the side effect of io_writex changing memory layout.
200
*/
201
static void save_iotlb_data(CPUState *cs, hwaddr addr,
202
@@ -XXX,XX +XXX,XX @@ static void save_iotlb_data(CPUState *cs, hwaddr addr,
28
#endif
203
#endif
29
204
}
30
+ GHashTable *const_table[TCG_TYPE_COUNT];
205
31
TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
206
-static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
32
TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
207
+static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
33
208
int mmu_idx, uint64_t val, target_ulong addr,
34
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
209
uintptr_t retaddr, MemOp op)
35
36
static inline bool temp_readonly(TCGTemp *ts)
37
{
210
{
38
- return ts->kind == TEMP_FIXED;
211
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
39
+ return ts->kind >= TEMP_FIXED;
212
bool locked = false;
40
}
213
MemTxResult r;
41
214
42
extern TCGContext tcg_init_ctx;
215
- section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
43
@@ -XXX,XX +XXX,XX @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc);
216
+ section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
44
217
mr = section->mr;
45
void tcg_optimize(TCGContext *s);
218
- mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
46
219
+ mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
47
+/* Allocate a new temporary and initialize it with a constant. */
220
if (!cpu->can_do_io) {
48
TCGv_i32 tcg_const_i32(int32_t val);
221
cpu_io_recompile(cpu, retaddr);
49
TCGv_i64 tcg_const_i64(int64_t val);
222
}
50
TCGv_i32 tcg_const_local_i32(int32_t val);
223
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
51
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_const_ones_vec(TCGType);
224
* The memory_region_dispatch may trigger a flush/resize
52
TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec);
225
* so for plugins we save the iotlb_data just in case.
53
TCGv_vec tcg_const_ones_vec_matching(TCGv_vec);
226
*/
54
227
- save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset);
55
+/*
228
+ save_iotlb_data(cpu, full->xlat_section, section, mr_offset);
56
+ * Locate or create a read-only temporary that is a constant.
229
57
+ * This kind of temporary need not and should not be freed.
230
if (!qemu_mutex_iothread_locked()) {
58
+ */
231
qemu_mutex_lock_iothread();
59
+TCGTemp *tcg_constant_internal(TCGType type, int64_t val);
232
locked = true;
60
+
233
}
61
+static inline TCGv_i32 tcg_constant_i32(int32_t val)
234
- r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
62
+{
235
+ r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs);
63
+ return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
236
if (r != MEMTX_OK) {
64
+}
237
hwaddr physaddr = mr_offset +
65
+
238
section->offset_within_address_space -
66
+static inline TCGv_i64 tcg_constant_i64(int64_t val)
239
section->offset_within_region;
67
+{
240
68
+ return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
241
cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
69
+}
242
- MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
70
+
243
+ MMU_DATA_STORE, mmu_idx, full->attrs, r,
71
+TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val);
244
retaddr);
72
+
245
}
73
#if UINTPTR_MAX == UINT32_MAX
246
if (locked) {
74
# define tcg_const_ptr(x) ((TCGv_ptr)tcg_const_i32((intptr_t)(x)))
247
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
75
# define tcg_const_local_ptr(x) ((TCGv_ptr)tcg_const_local_i32((intptr_t)(x)))
248
copy_tlb_helper_locked(vtlb, &tmptlb);
76
diff --git a/tcg/optimize.c b/tcg/optimize.c
249
qemu_spin_unlock(&env_tlb(env)->c.lock);
250
251
- CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
252
- CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
253
- tmpio = *io; *io = *vio; *vio = tmpio;
254
+ CPUTLBEntryFull *f1 = &env_tlb(env)->d[mmu_idx].fulltlb[index];
255
+ CPUTLBEntryFull *f2 = &env_tlb(env)->d[mmu_idx].vfulltlb[vidx];
256
+ CPUTLBEntryFull tmpf;
257
+ tmpf = *f1; *f1 = *f2; *f2 = tmpf;
258
return true;
259
}
260
}
261
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
262
(ADDR) & TARGET_PAGE_MASK)
263
264
static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
265
- CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
266
+ CPUTLBEntryFull *full, uintptr_t retaddr)
267
{
268
- ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
269
+ ram_addr_t ram_addr = mem_vaddr + full->xlat_section;
270
271
trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
272
273
@@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr,
274
/* Handle clean RAM pages. */
275
if (unlikely(flags & TLB_NOTDIRTY)) {
276
uintptr_t index = tlb_index(env, mmu_idx, addr);
277
- CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
278
+ CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
279
280
- notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
281
+ notdirty_write(env_cpu(env), addr, 1, full, retaddr);
282
flags &= ~TLB_NOTDIRTY;
283
}
284
285
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
286
287
if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
288
uintptr_t index = tlb_index(env, mmu_idx, addr);
289
- CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
290
+ CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
291
292
/* Handle watchpoints. */
293
if (flags & TLB_WATCHPOINT) {
294
int wp_access = (access_type == MMU_DATA_STORE
295
? BP_MEM_WRITE : BP_MEM_READ);
296
cpu_check_watchpoint(env_cpu(env), addr, size,
297
- iotlbentry->attrs, wp_access, retaddr);
298
+ full->attrs, wp_access, retaddr);
299
}
300
301
/* Handle clean RAM pages. */
302
if (flags & TLB_NOTDIRTY) {
303
- notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
304
+ notdirty_write(env_cpu(env), addr, 1, full, retaddr);
305
}
306
}
307
308
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
309
* should have just filled the TLB. The one corner case is io_writex
310
* which can cause TLB flushes and potential resizing of the TLBs
311
* losing the information we need. In those cases we need to recover
312
- * data from a copy of the iotlbentry. As long as this always occurs
313
+ * data from a copy of the CPUTLBEntryFull. As long as this always occurs
314
* from the same thread (which a mem callback will be) this is safe.
315
*/
316
317
@@ -XXX,XX +XXX,XX @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
318
if (likely(tlb_hit(tlb_addr, addr))) {
319
/* We must have an iotlb entry for MMIO */
320
if (tlb_addr & TLB_MMIO) {
321
- CPUIOTLBEntry *iotlbentry;
322
- iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
323
+ CPUTLBEntryFull *full;
324
+ full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
325
data->is_io = true;
326
- data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
327
- data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
328
+ data->v.io.section =
329
+ iotlb_to_section(cpu, full->xlat_section, full->attrs);
330
+ data->v.io.offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
331
} else {
332
data->is_io = false;
333
data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
334
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
335
336
if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
337
notdirty_write(env_cpu(env), addr, size,
338
- &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
339
+ &env_tlb(env)->d[mmu_idx].fulltlb[index], retaddr);
340
}
341
342
return hostaddr;
343
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
344
345
/* Handle anything that isn't just a straight memory access. */
346
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
347
- CPUIOTLBEntry *iotlbentry;
348
+ CPUTLBEntryFull *full;
349
bool need_swap;
350
351
/* For anything that is unaligned, recurse through full_load. */
352
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
353
goto do_unaligned_access;
354
}
355
356
- iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
357
+ full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
358
359
/* Handle watchpoints. */
360
if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
361
/* On watchpoint hit, this will longjmp out. */
362
cpu_check_watchpoint(env_cpu(env), addr, size,
363
- iotlbentry->attrs, BP_MEM_READ, retaddr);
364
+ full->attrs, BP_MEM_READ, retaddr);
365
}
366
367
need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
368
369
/* Handle I/O access. */
370
if (likely(tlb_addr & TLB_MMIO)) {
371
- return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
372
+ return io_readx(env, full, mmu_idx, addr, retaddr,
373
access_type, op ^ (need_swap * MO_BSWAP));
374
}
375
376
@@ -XXX,XX +XXX,XX @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
377
*/
378
if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
379
cpu_check_watchpoint(env_cpu(env), addr, size - size2,
380
- env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
381
+ env_tlb(env)->d[mmu_idx].fulltlb[index].attrs,
382
BP_MEM_WRITE, retaddr);
383
}
384
if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
385
cpu_check_watchpoint(env_cpu(env), page2, size2,
386
- env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
387
+ env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs,
388
BP_MEM_WRITE, retaddr);
389
}
390
391
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
392
393
/* Handle anything that isn't just a straight memory access. */
394
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
395
- CPUIOTLBEntry *iotlbentry;
396
+ CPUTLBEntryFull *full;
397
bool need_swap;
398
399
/* For anything that is unaligned, recurse through byte stores. */
400
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
401
goto do_unaligned_access;
402
}
403
404
- iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
405
+ full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
406
407
/* Handle watchpoints. */
408
if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
409
/* On watchpoint hit, this will longjmp out. */
410
cpu_check_watchpoint(env_cpu(env), addr, size,
411
- iotlbentry->attrs, BP_MEM_WRITE, retaddr);
412
+ full->attrs, BP_MEM_WRITE, retaddr);
413
}
414
415
need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
416
417
/* Handle I/O access. */
418
if (tlb_addr & TLB_MMIO) {
419
- io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
420
+ io_writex(env, full, mmu_idx, val, addr, retaddr,
421
op ^ (need_swap * MO_BSWAP));
422
return;
423
}
424
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
425
426
/* Handle clean RAM pages. */
427
if (tlb_addr & TLB_NOTDIRTY) {
428
- notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
429
+ notdirty_write(env_cpu(env), addr, size, full, retaddr);
430
}
431
432
haddr = (void *)((uintptr_t)addr + entry->addend);
433
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
77
index XXXXXXX..XXXXXXX 100644
434
index XXXXXXX..XXXXXXX 100644
78
--- a/tcg/optimize.c
435
--- a/target/arm/mte_helper.c
79
+++ b/tcg/optimize.c
436
+++ b/target/arm/mte_helper.c
80
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TempOptInfo *infos,
437
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
81
ts->state_ptr = ti;
438
return tags + index;
82
ti->next_copy = ts;
439
#else
83
ti->prev_copy = ts;
440
uintptr_t index;
84
- ti->is_const = false;
441
- CPUIOTLBEntry *iotlbentry;
85
- ti->mask = -1;
442
+ CPUTLBEntryFull *full;
86
+ if (ts->kind == TEMP_CONST) {
443
int in_page, flags;
87
+ ti->is_const = true;
444
ram_addr_t ptr_ra;
88
+ ti->val = ti->mask = ts->val;
445
hwaddr ptr_paddr, tag_paddr, xlat;
89
+ if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
446
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
90
+ /* High bits of a 32-bit quantity are garbage. */
447
assert(!(flags & TLB_INVALID_MASK));
91
+ ti->mask |= ~0xffffffffull;
448
92
+ }
449
/*
93
+ } else {
450
- * Find the iotlbentry for ptr. This *must* be present in the TLB
94
+ ti->is_const = false;
451
+ * Find the CPUTLBEntryFull for ptr. This *must* be present in the TLB
95
+ ti->mask = -1;
452
* because we just found the mapping.
96
+ }
453
* TODO: Perhaps there should be a cputlb helper that returns a
97
set_bit(idx, temps_used->l);
454
* matching tlb entry + iotlb entry.
98
}
455
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
99
}
456
g_assert(tlb_hit(comparator, ptr));
100
diff --git a/tcg/tcg.c b/tcg/tcg.c
457
}
458
# endif
459
- iotlbentry = &env_tlb(env)->d[ptr_mmu_idx].iotlb[index];
460
+ full = &env_tlb(env)->d[ptr_mmu_idx].fulltlb[index];
461
462
/* If the virtual page MemAttr != Tagged, access unchecked. */
463
- if (!arm_tlb_mte_tagged(&iotlbentry->attrs)) {
464
+ if (!arm_tlb_mte_tagged(&full->attrs)) {
465
return NULL;
466
}
467
468
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
469
int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE;
470
assert(ra != 0);
471
cpu_check_watchpoint(env_cpu(env), ptr, ptr_size,
472
- iotlbentry->attrs, wp, ra);
473
+ full->attrs, wp, ra);
474
}
475
476
/*
477
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
478
tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1);
479
480
/* Look up the address in tag space. */
481
- tag_asi = iotlbentry->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
482
+ tag_asi = full->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
483
tag_as = cpu_get_address_space(env_cpu(env), tag_asi);
484
mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL,
485
tag_access == MMU_DATA_STORE,
486
- iotlbentry->attrs);
487
+ full->attrs);
488
489
/*
490
* Note that @mr will never be NULL. If there is nothing in the address
491
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
101
index XXXXXXX..XXXXXXX 100644
492
index XXXXXXX..XXXXXXX 100644
102
--- a/tcg/tcg.c
493
--- a/target/arm/sve_helper.c
103
+++ b/tcg/tcg.c
494
+++ b/target/arm/sve_helper.c
104
@@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s)
495
@@ -XXX,XX +XXX,XX @@ bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env,
105
/* No temps have been previously allocated for size or locality. */
496
g_assert(tlb_hit(comparator, addr));
106
memset(s->free_temps, 0, sizeof(s->free_temps));
497
# endif
107
498
108
+ /* No constant temps have been previously allocated. */
499
- CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
109
+ for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
500
- info->attrs = iotlbentry->attrs;
110
+ if (s->const_table[i]) {
501
+ CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
111
+ g_hash_table_remove_all(s->const_table[i]);
502
+ info->attrs = full->attrs;
112
+ }
503
}
113
+ }
114
+
115
s->nb_ops = 0;
116
s->nb_labels = 0;
117
s->current_frame_offset = s->frame_start;
118
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
119
bigendian = 1;
120
#endif
504
#endif
121
505
122
- if (base_ts->kind != TEMP_FIXED) {
506
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
123
+ switch (base_ts->kind) {
507
index XXXXXXX..XXXXXXX 100644
124
+ case TEMP_FIXED:
508
--- a/target/arm/translate-a64.c
125
+ break;
509
+++ b/target/arm/translate-a64.c
126
+ case TEMP_GLOBAL:
510
@@ -XXX,XX +XXX,XX @@ static bool is_guarded_page(CPUARMState *env, DisasContext *s)
127
/* We do not support double-indirect registers. */
511
* table entry even for that case.
128
tcg_debug_assert(!base_ts->indirect_reg);
512
*/
129
base_ts->indirect_base = 1;
513
return (tlb_hit(entry->addr_code, addr) &&
130
s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
514
- arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].iotlb[index].attrs));
131
? 2 : 1);
515
+ arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].fulltlb[index].attrs));
132
indirect_reg = 1;
516
#endif
133
+ break;
517
}
134
+ default:
518
135
+ g_assert_not_reached();
136
}
137
138
if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
139
@@ -XXX,XX +XXX,XX @@ void tcg_temp_free_internal(TCGTemp *ts)
140
TCGContext *s = tcg_ctx;
141
int k, idx;
142
143
+ /* In order to simplify users of tcg_constant_*, silently ignore free. */
144
+ if (ts->kind == TEMP_CONST) {
145
+ return;
146
+ }
147
+
148
#if defined(CONFIG_DEBUG_TCG)
149
s->temps_in_use--;
150
if (s->temps_in_use < 0) {
151
@@ -XXX,XX +XXX,XX @@ void tcg_temp_free_internal(TCGTemp *ts)
152
set_bit(idx, s->free_temps[k].l);
153
}
154
155
+TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
156
+{
157
+ TCGContext *s = tcg_ctx;
158
+ GHashTable *h = s->const_table[type];
159
+ TCGTemp *ts;
160
+
161
+ if (h == NULL) {
162
+ h = g_hash_table_new(g_int64_hash, g_int64_equal);
163
+ s->const_table[type] = h;
164
+ }
165
+
166
+ ts = g_hash_table_lookup(h, &val);
167
+ if (ts == NULL) {
168
+ ts = tcg_temp_alloc(s);
169
+
170
+ if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
171
+ TCGTemp *ts2 = tcg_temp_alloc(s);
172
+
173
+ ts->base_type = TCG_TYPE_I64;
174
+ ts->type = TCG_TYPE_I32;
175
+ ts->kind = TEMP_CONST;
176
+ ts->temp_allocated = 1;
177
+ /*
178
+ * Retain the full value of the 64-bit constant in the low
179
+ * part, so that the hash table works. Actual uses will
180
+ * truncate the value to the low part.
181
+ */
182
+ ts->val = val;
183
+
184
+ tcg_debug_assert(ts2 == ts + 1);
185
+ ts2->base_type = TCG_TYPE_I64;
186
+ ts2->type = TCG_TYPE_I32;
187
+ ts2->kind = TEMP_CONST;
188
+ ts2->temp_allocated = 1;
189
+ ts2->val = val >> 32;
190
+ } else {
191
+ ts->base_type = type;
192
+ ts->type = type;
193
+ ts->kind = TEMP_CONST;
194
+ ts->temp_allocated = 1;
195
+ ts->val = val;
196
+ }
197
+ g_hash_table_insert(h, &ts->val, ts);
198
+ }
199
+
200
+ return ts;
201
+}
202
+
203
+TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
204
+{
205
+ val = dup_const(vece, val);
206
+ return temp_tcgv_vec(tcg_constant_internal(type, val));
207
+}
208
+
209
TCGv_i32 tcg_const_i32(int32_t val)
210
{
211
TCGv_i32 t0;
212
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_start(TCGContext *s)
213
TCGTempVal val = TEMP_VAL_MEM;
214
215
switch (ts->kind) {
216
+ case TEMP_CONST:
217
+ val = TEMP_VAL_CONST;
218
+ break;
219
case TEMP_FIXED:
220
val = TEMP_VAL_REG;
221
break;
222
@@ -XXX,XX +XXX,XX @@ static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
223
case TEMP_NORMAL:
224
snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
225
break;
226
+ case TEMP_CONST:
227
+ switch (ts->type) {
228
+ case TCG_TYPE_I32:
229
+ snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
230
+ break;
231
+#if TCG_TARGET_REG_BITS > 32
232
+ case TCG_TYPE_I64:
233
+ snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
234
+ break;
235
+#endif
236
+ case TCG_TYPE_V64:
237
+ case TCG_TYPE_V128:
238
+ case TCG_TYPE_V256:
239
+ snprintf(buf, buf_size, "v%d$0x%" PRIx64,
240
+ 64 << (ts->type - TCG_TYPE_V64), ts->val);
241
+ break;
242
+ default:
243
+ g_assert_not_reached();
244
+ }
245
+ break;
246
}
247
return buf;
248
}
249
@@ -XXX,XX +XXX,XX @@ static void la_bb_end(TCGContext *s, int ng, int nt)
250
state = TS_DEAD | TS_MEM;
251
break;
252
case TEMP_NORMAL:
253
+ case TEMP_CONST:
254
state = TS_DEAD;
255
break;
256
default:
257
@@ -XXX,XX +XXX,XX @@ static void la_bb_sync(TCGContext *s, int ng, int nt)
258
la_global_sync(s, ng);
259
260
for (int i = ng; i < nt; ++i) {
261
- if (s->temps[i].kind == TEMP_LOCAL) {
262
- int state = s->temps[i].state;
263
- s->temps[i].state = state | TS_MEM;
264
+ TCGTemp *ts = &s->temps[i];
265
+ int state;
266
+
267
+ switch (ts->kind) {
268
+ case TEMP_LOCAL:
269
+ state = ts->state;
270
+ ts->state = state | TS_MEM;
271
if (state != TS_DEAD) {
272
continue;
273
}
274
- } else {
275
+ break;
276
+ case TEMP_NORMAL:
277
s->temps[i].state = TS_DEAD;
278
+ break;
279
+ case TEMP_CONST:
280
+ continue;
281
+ default:
282
+ g_assert_not_reached();
283
}
284
la_reset_pref(&s->temps[i]);
285
}
286
@@ -XXX,XX +XXX,XX @@ static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
287
mark it free; otherwise mark it dead. */
288
static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
289
{
290
- if (temp_readonly(ts)) {
291
+ TCGTempVal new_type;
292
+
293
+ switch (ts->kind) {
294
+ case TEMP_FIXED:
295
return;
296
+ case TEMP_GLOBAL:
297
+ case TEMP_LOCAL:
298
+ new_type = TEMP_VAL_MEM;
299
+ break;
300
+ case TEMP_NORMAL:
301
+ new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
302
+ break;
303
+ case TEMP_CONST:
304
+ new_type = TEMP_VAL_CONST;
305
+ break;
306
+ default:
307
+ g_assert_not_reached();
308
}
309
if (ts->val_type == TEMP_VAL_REG) {
310
s->reg_to_temp[ts->reg] = NULL;
311
}
312
- ts->val_type = (free_or_dead < 0
313
- || ts->kind != TEMP_NORMAL
314
- ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
315
+ ts->val_type = new_type;
316
}
317
318
/* Mark a temporary as dead. */
319
@@ -XXX,XX +XXX,XX @@ static inline void temp_dead(TCGContext *s, TCGTemp *ts)
320
static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
321
TCGRegSet preferred_regs, int free_or_dead)
322
{
323
- if (temp_readonly(ts)) {
324
- return;
325
- }
326
- if (!ts->mem_coherent) {
327
+ if (!temp_readonly(ts) && !ts->mem_coherent) {
328
if (!ts->mem_allocated) {
329
temp_allocate_frame(s, ts);
330
}
331
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
332
333
for (i = s->nb_globals; i < s->nb_temps; i++) {
334
TCGTemp *ts = &s->temps[i];
335
- if (ts->kind == TEMP_LOCAL) {
336
+
337
+ switch (ts->kind) {
338
+ case TEMP_LOCAL:
339
temp_save(s, ts, allocated_regs);
340
- } else {
341
+ break;
342
+ case TEMP_NORMAL:
343
/* The liveness analysis already ensures that temps are dead.
344
Keep an tcg_debug_assert for safety. */
345
tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
346
+ break;
347
+ case TEMP_CONST:
348
+ /* Similarly, we should have freed any allocated register. */
349
+ tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
350
+ break;
351
+ default:
352
+ g_assert_not_reached();
353
}
354
}
355
356
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
357
* The liveness analysis already ensures that temps are dead.
358
* Keep tcg_debug_asserts for safety.
359
*/
360
- if (ts->kind == TEMP_LOCAL) {
361
+ switch (ts->kind) {
362
+ case TEMP_LOCAL:
363
tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
364
- } else {
365
+ break;
366
+ case TEMP_NORMAL:
367
tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
368
+ break;
369
+ case TEMP_CONST:
370
+ break;
371
+ default:
372
+ g_assert_not_reached();
373
}
374
}
375
}
376
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
377
i_preferred_regs = o_preferred_regs = 0;
378
if (arg_ct->ialias) {
379
o_preferred_regs = op->output_pref[arg_ct->alias_index];
380
- if (ts->kind == TEMP_FIXED) {
381
- /* if fixed register, we must allocate a new register
382
- if the alias is not the same register */
383
- if (arg != op->args[arg_ct->alias_index]) {
384
- goto allocate_in_reg;
385
- }
386
- } else {
387
- /* if the input is aliased to an output and if it is
388
- not dead after the instruction, we must allocate
389
- a new register and move it */
390
- if (!IS_DEAD_ARG(i)) {
391
- goto allocate_in_reg;
392
- }
393
394
- /* check if the current register has already been allocated
395
- for another input aliased to an output */
396
- if (ts->val_type == TEMP_VAL_REG) {
397
- int k2, i2;
398
- reg = ts->reg;
399
- for (k2 = 0 ; k2 < k ; k2++) {
400
- i2 = def->args_ct[nb_oargs + k2].sort_index;
401
- if (def->args_ct[i2].ialias && reg == new_args[i2]) {
402
- goto allocate_in_reg;
403
- }
404
+ /*
405
+ * If the input is readonly, then it cannot also be an
406
+ * output and aliased to itself. If the input is not
407
+ * dead after the instruction, we must allocate a new
408
+ * register and move it.
409
+ */
410
+ if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
411
+ goto allocate_in_reg;
412
+ }
413
+
414
+ /*
415
+ * Check if the current register has already been allocated
416
+ * for another input aliased to an output.
417
+ */
418
+ if (ts->val_type == TEMP_VAL_REG) {
419
+ reg = ts->reg;
420
+ for (int k2 = 0; k2 < k; k2++) {
421
+ int i2 = def->args_ct[nb_oargs + k2].sort_index;
422
+ if (def->args_ct[i2].ialias && reg == new_args[i2]) {
423
+ goto allocate_in_reg;
424
}
425
}
426
- i_preferred_regs = o_preferred_regs;
427
}
428
+ i_preferred_regs = o_preferred_regs;
429
}
430
431
temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
432
reg = ts->reg;
433
434
- if (tcg_regset_test_reg(arg_ct->regs, reg)) {
435
- /* nothing to do : the constraint is satisfied */
436
- } else {
437
- allocate_in_reg:
438
- /* allocate a new register matching the constraint
439
- and move the temporary register into it */
440
+ if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
441
+ allocate_in_reg:
442
+ /*
443
+ * Allocate a new register matching the constraint
444
+ * and move the temporary register into it.
445
+ */
446
temp_load(s, ts, tcg_target_available_regs[ts->type],
447
i_allocated_regs, 0);
448
reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
449
--
519
--
450
2.25.1
520
2.34.1
451
521
452
522
diff view generated by jsdifflib
1
The normal movi opcodes are going away. We need something
1
This field is only written, not read; remove it.
2
for TCI to use internally.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
include/tcg/tcg-opc.h | 8 ++++++++
8
include/hw/core/cpu.h | 1 -
8
tcg/tci.c | 4 ++--
9
accel/tcg/cputlb.c | 7 +++----
9
tcg/tci/tcg-target.c.inc | 4 ++--
10
2 files changed, 3 insertions(+), 5 deletions(-)
10
3 files changed, 12 insertions(+), 4 deletions(-)
11
11
12
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
12
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg-opc.h
14
--- a/include/hw/core/cpu.h
15
+++ b/include/tcg/tcg-opc.h
15
+++ b/include/hw/core/cpu.h
16
@@ -XXX,XX +XXX,XX @@ DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
16
@@ -XXX,XX +XXX,XX @@ struct CPUWatchpoint {
17
#include "tcg-target.opc.h"
17
* the memory regions get moved around by io_writex.
18
*/
19
typedef struct SavedIOTLB {
20
- hwaddr addr;
21
MemoryRegionSection *section;
22
hwaddr mr_offset;
23
} SavedIOTLB;
24
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/accel/tcg/cputlb.c
27
+++ b/accel/tcg/cputlb.c
28
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
29
* This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
30
* because of the side effect of io_writex changing memory layout.
31
*/
32
-static void save_iotlb_data(CPUState *cs, hwaddr addr,
33
- MemoryRegionSection *section, hwaddr mr_offset)
34
+static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section,
35
+ hwaddr mr_offset)
36
{
37
#ifdef CONFIG_PLUGIN
38
SavedIOTLB *saved = &cs->saved_iotlb;
39
- saved->addr = addr;
40
saved->section = section;
41
saved->mr_offset = mr_offset;
18
#endif
42
#endif
19
43
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
20
+#ifdef TCG_TARGET_INTERPRETER
44
* The memory_region_dispatch may trigger a flush/resize
21
+/* These opcodes are only for use between the tci generator and interpreter. */
45
* so for plugins we save the iotlb_data just in case.
22
+DEF(tci_movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT)
46
*/
23
+#if TCG_TARGET_REG_BITS == 64
47
- save_iotlb_data(cpu, full->xlat_section, section, mr_offset);
24
+DEF(tci_movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
48
+ save_iotlb_data(cpu, section, mr_offset);
25
+#endif
49
26
+#endif
50
if (!qemu_mutex_iothread_locked()) {
27
+
51
qemu_mutex_lock_iothread();
28
#undef TLADDR_ARGS
29
#undef DATA64_ARGS
30
#undef IMPL
31
diff --git a/tcg/tci.c b/tcg/tci.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/tcg/tci.c
34
+++ b/tcg/tci.c
35
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
36
t1 = tci_read_r32(regs, &tb_ptr);
37
tci_write_reg32(regs, t0, t1);
38
break;
39
- case INDEX_op_movi_i32:
40
+ case INDEX_op_tci_movi_i32:
41
t0 = *tb_ptr++;
42
t1 = tci_read_i32(&tb_ptr);
43
tci_write_reg32(regs, t0, t1);
44
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
45
t1 = tci_read_r64(regs, &tb_ptr);
46
tci_write_reg64(regs, t0, t1);
47
break;
48
- case INDEX_op_movi_i64:
49
+ case INDEX_op_tci_movi_i64:
50
t0 = *tb_ptr++;
51
t1 = tci_read_i64(&tb_ptr);
52
tci_write_reg64(regs, t0, t1);
53
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
54
index XXXXXXX..XXXXXXX 100644
55
--- a/tcg/tci/tcg-target.c.inc
56
+++ b/tcg/tci/tcg-target.c.inc
57
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
58
uint8_t *old_code_ptr = s->code_ptr;
59
uint32_t arg32 = arg;
60
if (type == TCG_TYPE_I32 || arg == arg32) {
61
- tcg_out_op_t(s, INDEX_op_movi_i32);
62
+ tcg_out_op_t(s, INDEX_op_tci_movi_i32);
63
tcg_out_r(s, t0);
64
tcg_out32(s, arg32);
65
} else {
66
tcg_debug_assert(type == TCG_TYPE_I64);
67
#if TCG_TARGET_REG_BITS == 64
68
- tcg_out_op_t(s, INDEX_op_movi_i64);
69
+ tcg_out_op_t(s, INDEX_op_tci_movi_i64);
70
tcg_out_r(s, t0);
71
tcg_out64(s, arg);
72
#else
73
--
52
--
74
2.25.1
53
2.34.1
75
54
76
55
diff view generated by jsdifflib
1
While we don't store more than tcg_target_long in TCGTemp,
1
When PAGE_WRITE_INV is set when calling tlb_set_page,
2
we shouldn't be limited to that for code generation. We will
2
we immediately set TLB_INVALID_MASK in order to force
3
be able to use this for INDEX_op_dup2_vec with 2 constants.
3
tlb_fill to be called on the next lookup. Here in
4
probe_access_internal, we have just called tlb_fill
5
and eliminated true misses, thus the lookup must be valid.
4
6
5
Also pass along the minimal vece that may be said to apply
7
This allows us to remove a warning comment from s390x.
6
to the constant. This allows some simplification in the
8
There doesn't seem to be a reason to change the code though.
7
various backends.
8
9
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Reviewed-by: David Hildenbrand <david@redhat.com>
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
14
---
11
tcg/tcg.c | 31 +++++++++++++++++++++++++-----
15
accel/tcg/cputlb.c | 10 +++++++++-
12
tcg/aarch64/tcg-target.c.inc | 12 ++++++------
16
target/s390x/tcg/mem_helper.c | 4 ----
13
tcg/i386/tcg-target.c.inc | 22 ++++++++++++---------
17
2 files changed, 9 insertions(+), 5 deletions(-)
14
tcg/ppc/tcg-target.c.inc | 37 +++++++++++++++++++++++-------------
15
4 files changed, 69 insertions(+), 33 deletions(-)
16
18
17
diff --git a/tcg/tcg.c b/tcg/tcg.c
19
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
18
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
19
--- a/tcg/tcg.c
21
--- a/accel/tcg/cputlb.c
20
+++ b/tcg/tcg.c
22
+++ b/accel/tcg/cputlb.c
21
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
23
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
22
TCGReg dst, TCGReg src);
24
}
23
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
25
tlb_addr = tlb_read_ofs(entry, elt_ofs);
24
TCGReg dst, TCGReg base, intptr_t offset);
26
25
-static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
27
+ flags = TLB_FLAGS_MASK;
26
- TCGReg dst, tcg_target_long arg);
28
page_addr = addr & TARGET_PAGE_MASK;
27
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
29
if (!tlb_hit_page(tlb_addr, page_addr)) {
28
+ TCGReg dst, int64_t arg);
30
if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
29
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
31
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
30
unsigned vece, const TCGArg *args,
32
31
const int *const_args);
33
/* TLB resize via tlb_fill may have moved the entry. */
32
@@ -XXX,XX +XXX,XX @@ static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
34
entry = tlb_entry(env, mmu_idx, addr);
33
{
34
g_assert_not_reached();
35
}
36
-static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
37
- TCGReg dst, tcg_target_long arg)
38
+static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
39
+ TCGReg dst, int64_t arg)
40
{
41
g_assert_not_reached();
42
}
43
@@ -XXX,XX +XXX,XX @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
44
if (ts->type <= TCG_TYPE_I64) {
45
tcg_out_movi(s, ts->type, reg, ts->val);
46
} else {
47
- tcg_out_dupi_vec(s, ts->type, reg, ts->val);
48
+ uint64_t val = ts->val;
49
+ MemOp vece = MO_64;
50
+
35
+
51
+ /*
36
+ /*
52
+ * Find the minimal vector element that matches the constant.
37
+ * With PAGE_WRITE_INV, we set TLB_INVALID_MASK immediately,
53
+ * The targets will, in general, have to do this search anyway,
38
+ * to force the next access through tlb_fill. We've just
54
+ * do this generically.
39
+ * called tlb_fill, so we know that this entry *is* valid.
55
+ */
40
+ */
56
+ if (TCG_TARGET_REG_BITS == 32) {
41
+ flags &= ~TLB_INVALID_MASK;
57
+ val = dup_const(MO_32, val);
58
+ vece = MO_32;
59
+ }
60
+ if (val == dup_const(MO_8, val)) {
61
+ vece = MO_8;
62
+ } else if (val == dup_const(MO_16, val)) {
63
+ vece = MO_16;
64
+ } else if (TCG_TARGET_REG_BITS == 64 &&
65
+ val == dup_const(MO_32, val)) {
66
+ vece = MO_32;
67
+ }
68
+
69
+ tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
70
}
42
}
71
ts->mem_coherent = 0;
43
tlb_addr = tlb_read_ofs(entry, elt_ofs);
72
break;
44
}
73
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
45
- flags = tlb_addr & TLB_FLAGS_MASK;
46
+ flags &= tlb_addr;
47
48
/* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */
49
if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
50
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
74
index XXXXXXX..XXXXXXX 100644
51
index XXXXXXX..XXXXXXX 100644
75
--- a/tcg/aarch64/tcg-target.c.inc
52
--- a/target/s390x/tcg/mem_helper.c
76
+++ b/tcg/aarch64/tcg-target.c.inc
53
+++ b/target/s390x/tcg/mem_helper.c
77
@@ -XXX,XX +XXX,XX @@ static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
54
@@ -XXX,XX +XXX,XX @@ static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
78
tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
55
#else
79
}
56
int flags;
80
57
81
-static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
58
- /*
82
- TCGReg rd, tcg_target_long v64)
59
- * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
83
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
60
- * to detect if there was an exception during tlb_fill().
84
+ TCGReg rd, int64_t v64)
61
- */
85
{
62
env->tlb_fill_exc = 0;
86
bool q = type == TCG_TYPE_V128;
63
flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
87
int cmode, imm8, i;
64
ra);
88
89
/* Test all bytes equal first. */
90
- if (v64 == dup_const(MO_8, v64)) {
91
+ if (vece == MO_8) {
92
imm8 = (uint8_t)v64;
93
tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
94
return;
95
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
96
* cannot find an expansion there's no point checking a larger
97
* width because we already know by replication it cannot match.
98
*/
99
- if (v64 == dup_const(MO_16, v64)) {
100
+ if (vece == MO_16) {
101
uint16_t v16 = v64;
102
103
if (is_shimm16(v16, &cmode, &imm8)) {
104
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
105
tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
106
tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
107
return;
108
- } else if (v64 == dup_const(MO_32, v64)) {
109
+ } else if (vece == MO_32) {
110
uint32_t v32 = v64;
111
uint32_t n32 = ~v32;
112
113
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
114
tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
115
break;
116
}
117
- tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
118
+ tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
119
a2 = TCG_VEC_TMP;
120
}
121
insn = cmp_insn[cond];
122
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
123
index XXXXXXX..XXXXXXX 100644
124
--- a/tcg/i386/tcg-target.c.inc
125
+++ b/tcg/i386/tcg-target.c.inc
126
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
127
return true;
128
}
129
130
-static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
131
- TCGReg ret, tcg_target_long arg)
132
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
133
+ TCGReg ret, int64_t arg)
134
{
135
int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
136
137
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
138
return;
139
}
140
141
- if (TCG_TARGET_REG_BITS == 64) {
142
+ if (TCG_TARGET_REG_BITS == 32 && vece < MO_64) {
143
+ if (have_avx2) {
144
+ tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
145
+ } else {
146
+ tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
147
+ }
148
+ new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
149
+ } else {
150
if (type == TCG_TYPE_V64) {
151
tcg_out_vex_modrm_pool(s, OPC_MOVQ_VqWq, ret);
152
} else if (have_avx2) {
153
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
154
} else {
155
tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret);
156
}
157
- new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
158
- } else {
159
- if (have_avx2) {
160
- tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
161
+ if (TCG_TARGET_REG_BITS == 64) {
162
+ new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
163
} else {
164
- tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
165
+ new_pool_l2(s, R_386_32, s->code_ptr - 4, 0, arg, arg >> 32);
166
}
167
- new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
168
}
169
}
170
171
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
172
index XXXXXXX..XXXXXXX 100644
173
--- a/tcg/ppc/tcg-target.c.inc
174
+++ b/tcg/ppc/tcg-target.c.inc
175
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
176
}
177
}
178
179
-static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
180
- tcg_target_long val)
181
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
182
+ TCGReg ret, int64_t val)
183
{
184
uint32_t load_insn;
185
int rel, low;
186
intptr_t add;
187
188
- low = (int8_t)val;
189
- if (low >= -16 && low < 16) {
190
- if (val == (tcg_target_long)dup_const(MO_8, low)) {
191
+ switch (vece) {
192
+ case MO_8:
193
+ low = (int8_t)val;
194
+ if (low >= -16 && low < 16) {
195
tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
196
return;
197
}
198
- if (val == (tcg_target_long)dup_const(MO_16, low)) {
199
+ if (have_isa_3_00) {
200
+ tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
201
+ return;
202
+ }
203
+ break;
204
+
205
+ case MO_16:
206
+ low = (int16_t)val;
207
+ if (low >= -16 && low < 16) {
208
tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
209
return;
210
}
211
- if (val == (tcg_target_long)dup_const(MO_32, low)) {
212
+ break;
213
+
214
+ case MO_32:
215
+ low = (int32_t)val;
216
+ if (low >= -16 && low < 16) {
217
tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
218
return;
219
}
220
- }
221
- if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) {
222
- tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
223
- return;
224
+ break;
225
}
226
227
/*
228
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
229
if (TCG_TARGET_REG_BITS == 64) {
230
new_pool_label(s, val, rel, s->code_ptr, add);
231
} else {
232
- new_pool_l2(s, rel, s->code_ptr, add, val, val);
233
+ new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
234
}
235
} else {
236
load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
237
if (TCG_TARGET_REG_BITS == 64) {
238
new_pool_l2(s, rel, s->code_ptr, add, val, val);
239
} else {
240
- new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
241
+ new_pool_l4(s, rel, s->code_ptr, add,
242
+ val >> 32, val, val >> 32, val);
243
}
244
}
245
246
--
65
--
247
2.25.1
66
2.34.1
248
67
249
68
diff view generated by jsdifflib
1
We must do this before we adjust tcg_out_movi_i32, lest the
1
Add an interface to return the CPUTLBEntryFull struct
2
under-the-hood poking that we do for icount be broken.
2
that goes with the lookup. The result is not intended
3
to be valid across multiple lookups, so the user must
4
use the results immediately.
3
5
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
10
---
7
include/exec/gen-icount.h | 25 +++++++++++++------------
11
include/exec/exec-all.h | 15 +++++++++++++
8
1 file changed, 13 insertions(+), 12 deletions(-)
12
include/qemu/typedefs.h | 1 +
13
accel/tcg/cputlb.c | 47 +++++++++++++++++++++++++----------------
14
3 files changed, 45 insertions(+), 18 deletions(-)
9
15
10
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
16
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
11
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
12
--- a/include/exec/gen-icount.h
18
--- a/include/exec/exec-all.h
13
+++ b/include/exec/gen-icount.h
19
+++ b/include/exec/exec-all.h
14
@@ -XXX,XX +XXX,XX @@ static inline void gen_io_end(void)
20
@@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr,
15
21
MMUAccessType access_type, int mmu_idx,
16
static inline void gen_tb_start(const TranslationBlock *tb)
22
bool nonfault, void **phost, uintptr_t retaddr);
23
24
+#ifndef CONFIG_USER_ONLY
25
+/**
26
+ * probe_access_full:
27
+ * Like probe_access_flags, except also return into @pfull.
28
+ *
29
+ * The CPUTLBEntryFull structure returned via @pfull is transient
30
+ * and must be consumed or copied immediately, before any further
31
+ * access or changes to TLB @mmu_idx.
32
+ */
33
+int probe_access_full(CPUArchState *env, target_ulong addr,
34
+ MMUAccessType access_type, int mmu_idx,
35
+ bool nonfault, void **phost,
36
+ CPUTLBEntryFull **pfull, uintptr_t retaddr);
37
+#endif
38
+
39
#define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */
40
41
/* Estimated block size for TB allocation. */
42
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/include/qemu/typedefs.h
45
+++ b/include/qemu/typedefs.h
46
@@ -XXX,XX +XXX,XX @@ typedef struct ConfidentialGuestSupport ConfidentialGuestSupport;
47
typedef struct CPUAddressSpace CPUAddressSpace;
48
typedef struct CPUArchState CPUArchState;
49
typedef struct CPUState CPUState;
50
+typedef struct CPUTLBEntryFull CPUTLBEntryFull;
51
typedef struct DeviceListener DeviceListener;
52
typedef struct DeviceState DeviceState;
53
typedef struct DirtyBitmapSnapshot DirtyBitmapSnapshot;
54
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
55
index XXXXXXX..XXXXXXX 100644
56
--- a/accel/tcg/cputlb.c
57
+++ b/accel/tcg/cputlb.c
58
@@ -XXX,XX +XXX,XX @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
59
static int probe_access_internal(CPUArchState *env, target_ulong addr,
60
int fault_size, MMUAccessType access_type,
61
int mmu_idx, bool nonfault,
62
- void **phost, uintptr_t retaddr)
63
+ void **phost, CPUTLBEntryFull **pfull,
64
+ uintptr_t retaddr)
17
{
65
{
18
- TCGv_i32 count, imm;
66
uintptr_t index = tlb_index(env, mmu_idx, addr);
19
+ TCGv_i32 count;
67
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
20
68
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
21
tcg_ctx->exitreq_label = gen_new_label();
69
mmu_idx, nonfault, retaddr)) {
22
if (tb_cflags(tb) & CF_USE_ICOUNT) {
70
/* Non-faulting page table read failed. */
23
@@ -XXX,XX +XXX,XX @@ static inline void gen_tb_start(const TranslationBlock *tb)
71
*phost = NULL;
24
offsetof(ArchCPU, env));
72
+ *pfull = NULL;
25
73
return TLB_INVALID_MASK;
26
if (tb_cflags(tb) & CF_USE_ICOUNT) {
74
}
27
- imm = tcg_temp_new_i32();
75
28
- /* We emit a movi with a dummy immediate argument. Keep the insn index
76
/* TLB resize via tlb_fill may have moved the entry. */
29
- * of the movi so that we later (when we know the actual insn count)
77
+ index = tlb_index(env, mmu_idx, addr);
30
- * can update the immediate argument with the actual insn count. */
78
entry = tlb_entry(env, mmu_idx, addr);
31
- tcg_gen_movi_i32(imm, 0xdeadbeef);
79
32
+ /*
80
/*
33
+ * We emit a sub with a dummy immediate argument. Keep the insn index
81
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
34
+ * of the sub so that we later (when we know the actual insn count)
82
}
35
+ * can update the argument with the actual insn count.
83
flags &= tlb_addr;
36
+ */
84
37
+ tcg_gen_sub_i32(count, count, tcg_constant_i32(0));
85
+ *pfull = &env_tlb(env)->d[mmu_idx].fulltlb[index];
38
icount_start_insn = tcg_last_op();
86
+
87
/* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */
88
if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
89
*phost = NULL;
90
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
91
return flags;
92
}
93
94
-int probe_access_flags(CPUArchState *env, target_ulong addr,
95
- MMUAccessType access_type, int mmu_idx,
96
- bool nonfault, void **phost, uintptr_t retaddr)
97
+int probe_access_full(CPUArchState *env, target_ulong addr,
98
+ MMUAccessType access_type, int mmu_idx,
99
+ bool nonfault, void **phost, CPUTLBEntryFull **pfull,
100
+ uintptr_t retaddr)
101
{
102
- int flags;
39
-
103
-
40
- tcg_gen_sub_i32(count, count, imm);
104
- flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
41
- tcg_temp_free_i32(imm);
105
- nonfault, phost, retaddr);
106
+ int flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
107
+ nonfault, phost, pfull, retaddr);
108
109
/* Handle clean RAM pages. */
110
if (unlikely(flags & TLB_NOTDIRTY)) {
111
- uintptr_t index = tlb_index(env, mmu_idx, addr);
112
- CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
113
-
114
- notdirty_write(env_cpu(env), addr, 1, full, retaddr);
115
+ notdirty_write(env_cpu(env), addr, 1, *pfull, retaddr);
116
flags &= ~TLB_NOTDIRTY;
42
}
117
}
43
118
44
tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label);
119
return flags;
45
@@ -XXX,XX +XXX,XX @@ static inline void gen_tb_start(const TranslationBlock *tb)
120
}
46
static inline void gen_tb_end(const TranslationBlock *tb, int num_insns)
121
122
+int probe_access_flags(CPUArchState *env, target_ulong addr,
123
+ MMUAccessType access_type, int mmu_idx,
124
+ bool nonfault, void **phost, uintptr_t retaddr)
125
+{
126
+ CPUTLBEntryFull *full;
127
+
128
+ return probe_access_full(env, addr, access_type, mmu_idx,
129
+ nonfault, phost, &full, retaddr);
130
+}
131
+
132
void *probe_access(CPUArchState *env, target_ulong addr, int size,
133
MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
47
{
134
{
48
if (tb_cflags(tb) & CF_USE_ICOUNT) {
135
+ CPUTLBEntryFull *full;
49
- /* Update the num_insn immediate parameter now that we know
136
void *host;
50
- * the actual insn count. */
137
int flags;
51
- tcg_set_insn_param(icount_start_insn, 1, num_insns);
138
52
+ /*
139
g_assert(-(addr | TARGET_PAGE_MASK) >= size);
53
+ * Update the num_insn immediate parameter now that we know
140
54
+ * the actual insn count.
141
flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
55
+ */
142
- false, &host, retaddr);
56
+ tcg_set_insn_param(icount_start_insn, 2,
143
+ false, &host, &full, retaddr);
57
+ tcgv_i32_arg(tcg_constant_i32(num_insns)));
144
145
/* Per the interface, size == 0 merely faults the access. */
146
if (size == 0) {
147
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
58
}
148
}
59
149
60
gen_set_label(tcg_ctx->exitreq_label);
150
if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
151
- uintptr_t index = tlb_index(env, mmu_idx, addr);
152
- CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
153
-
154
/* Handle watchpoints. */
155
if (flags & TLB_WATCHPOINT) {
156
int wp_access = (access_type == MMU_DATA_STORE
157
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
158
void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
159
MMUAccessType access_type, int mmu_idx)
160
{
161
+ CPUTLBEntryFull *full;
162
void *host;
163
int flags;
164
165
flags = probe_access_internal(env, addr, 0, access_type,
166
- mmu_idx, true, &host, 0);
167
+ mmu_idx, true, &host, &full, 0);
168
169
/* No combination of flags are expected by the caller. */
170
return flags ? NULL : host;
171
@@ -XXX,XX +XXX,XX @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
172
tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
173
void **hostp)
174
{
175
+ CPUTLBEntryFull *full;
176
void *p;
177
178
(void)probe_access_internal(env, addr, 1, MMU_INST_FETCH,
179
- cpu_mmu_index(env, true), false, &p, 0);
180
+ cpu_mmu_index(env, true), false, &p, &full, 0);
181
if (p == NULL) {
182
return -1;
183
}
61
--
184
--
62
2.25.1
185
2.34.1
63
186
64
187
diff view generated by jsdifflib
1
Now that we have collected all of the page data into
2
CPUTLBEntryFull, provide an interface to record that
3
all in one go, instead of using 4 arguments. This interface
4
allows CPUTLBEntryFull to be extended without having to
5
change the number of arguments.
6
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
11
---
3
tcg/optimize.c | 108 ++++++++++++++++++++++---------------------------
12
include/exec/cpu-defs.h | 14 +++++++++++
4
1 file changed, 49 insertions(+), 59 deletions(-)
13
include/exec/exec-all.h | 22 ++++++++++++++++++
14
accel/tcg/cputlb.c | 51 ++++++++++++++++++++++++++---------------
15
3 files changed, 69 insertions(+), 18 deletions(-)
5
16
6
diff --git a/tcg/optimize.c b/tcg/optimize.c
17
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
7
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/optimize.c
19
--- a/include/exec/cpu-defs.h
9
+++ b/tcg/optimize.c
20
+++ b/include/exec/cpu-defs.h
10
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
21
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntryFull {
11
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
22
* + the offset within the target MemoryRegion (otherwise)
23
*/
24
hwaddr xlat_section;
25
+
26
+ /*
27
+ * @phys_addr contains the physical address in the address space
28
+ * given by cpu_asidx_from_attrs(cpu, @attrs).
29
+ */
30
+ hwaddr phys_addr;
31
+
32
+ /* @attrs contains the memory transaction attributes for the page. */
33
MemTxAttrs attrs;
34
+
35
+ /* @prot contains the complete protections for the page. */
36
+ uint8_t prot;
37
+
38
+ /* @lg_page_size contains the log2 of the page size. */
39
+ uint8_t lg_page_size;
40
} CPUTLBEntryFull;
41
42
/*
43
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
44
index XXXXXXX..XXXXXXX 100644
45
--- a/include/exec/exec-all.h
46
+++ b/include/exec/exec-all.h
47
@@ -XXX,XX +XXX,XX @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
48
uint16_t idxmap,
49
unsigned bits);
50
51
+/**
52
+ * tlb_set_page_full:
53
+ * @cpu: CPU context
54
+ * @mmu_idx: mmu index of the tlb to modify
55
+ * @vaddr: virtual address of the entry to add
56
+ * @full: the details of the tlb entry
57
+ *
58
+ * Add an entry to @cpu tlb index @mmu_idx. All of the fields of
59
+ * @full must be filled, except for xlat_section, and constitute
60
+ * the complete description of the translated page.
61
+ *
62
+ * This is generally called by the target tlb_fill function after
63
+ * having performed a successful page table walk to find the physical
64
+ * address and attributes for the translation.
65
+ *
66
+ * At most one entry for a given virtual address is permitted. Only a
67
+ * single TARGET_PAGE_SIZE region is mapped; @full->lg_page_size is only
68
+ * used by tlb_flush_page.
69
+ */
70
+void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr,
71
+ CPUTLBEntryFull *full);
72
+
73
/**
74
* tlb_set_page_with_attrs:
75
* @cpu: CPU to add this TLB entry for
76
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/accel/tcg/cputlb.c
79
+++ b/accel/tcg/cputlb.c
80
@@ -XXX,XX +XXX,XX @@ static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
81
env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
12
}
82
}
13
83
14
-static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, uint64_t val)
84
-/* Add a new TLB entry. At most one entry for a given virtual address
15
-{
85
+/*
16
- const TCGOpDef *def;
86
+ * Add a new TLB entry. At most one entry for a given virtual address
17
- TCGOpcode new_op;
87
* is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
18
- uint64_t mask;
88
* supplied size is only used by tlb_flush_page.
19
- TempOptInfo *di = arg_info(dst);
89
*
20
-
90
* Called from TCG-generated code, which is under an RCU read-side
21
- def = &tcg_op_defs[op->opc];
91
* critical section.
22
- if (def->flags & TCG_OPF_VECTOR) {
92
*/
23
- new_op = INDEX_op_dupi_vec;
93
-void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
24
- } else if (def->flags & TCG_OPF_64BIT) {
94
- hwaddr paddr, MemTxAttrs attrs, int prot,
25
- new_op = INDEX_op_movi_i64;
95
- int mmu_idx, target_ulong size)
26
- } else {
96
+void tlb_set_page_full(CPUState *cpu, int mmu_idx,
27
- new_op = INDEX_op_movi_i32;
97
+ target_ulong vaddr, CPUTLBEntryFull *full)
28
- }
29
- op->opc = new_op;
30
- /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
31
- op->args[0] = dst;
32
- op->args[1] = val;
33
-
34
- reset_temp(dst);
35
- di->is_const = true;
36
- di->val = val;
37
- mask = val;
38
- if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
39
- /* High bits of the destination are now garbage. */
40
- mask |= ~0xffffffffull;
41
- }
42
- di->mask = mask;
43
-}
44
-
45
static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
46
{
98
{
47
TCGTemp *dst_ts = arg_temp(dst);
99
CPUArchState *env = cpu->env_ptr;
48
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
100
CPUTLB *tlb = env_tlb(env);
101
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
102
CPUTLBEntry *te, tn;
103
hwaddr iotlb, xlat, sz, paddr_page;
104
target_ulong vaddr_page;
105
- int asidx = cpu_asidx_from_attrs(cpu, attrs);
106
- int wp_flags;
107
+ int asidx, wp_flags, prot;
108
bool is_ram, is_romd;
109
110
assert_cpu_is_self(cpu);
111
112
- if (size <= TARGET_PAGE_SIZE) {
113
+ if (full->lg_page_size <= TARGET_PAGE_BITS) {
114
sz = TARGET_PAGE_SIZE;
115
} else {
116
- tlb_add_large_page(env, mmu_idx, vaddr, size);
117
- sz = size;
118
+ sz = (hwaddr)1 << full->lg_page_size;
119
+ tlb_add_large_page(env, mmu_idx, vaddr, sz);
49
}
120
}
121
vaddr_page = vaddr & TARGET_PAGE_MASK;
122
- paddr_page = paddr & TARGET_PAGE_MASK;
123
+ paddr_page = full->phys_addr & TARGET_PAGE_MASK;
124
125
+ prot = full->prot;
126
+ asidx = cpu_asidx_from_attrs(cpu, full->attrs);
127
section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
128
- &xlat, &sz, attrs, &prot);
129
+ &xlat, &sz, full->attrs, &prot);
130
assert(sz >= TARGET_PAGE_SIZE);
131
132
tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
133
" prot=%x idx=%d\n",
134
- vaddr, paddr, prot, mmu_idx);
135
+ vaddr, full->phys_addr, prot, mmu_idx);
136
137
address = vaddr_page;
138
- if (size < TARGET_PAGE_SIZE) {
139
+ if (full->lg_page_size < TARGET_PAGE_BITS) {
140
/* Repeat the MMU check and TLB fill on every access. */
141
address |= TLB_INVALID_MASK;
142
}
143
- if (attrs.byte_swap) {
144
+ if (full->attrs.byte_swap) {
145
address |= TLB_BSWAP;
146
}
147
148
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
149
* subtract here is that of the page base, and not the same as the
150
* vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
151
*/
152
+ desc->fulltlb[index] = *full;
153
desc->fulltlb[index].xlat_section = iotlb - vaddr_page;
154
- desc->fulltlb[index].attrs = attrs;
155
+ desc->fulltlb[index].phys_addr = paddr_page;
156
+ desc->fulltlb[index].prot = prot;
157
158
/* Now calculate the new entry */
159
tn.addend = addend - vaddr_page;
160
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
161
qemu_spin_unlock(&tlb->c.lock);
50
}
162
}
51
163
52
+static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
164
-/* Add a new TLB entry, but without specifying the memory
53
+ TCGOp *op, TCGArg dst, uint64_t val)
165
- * transaction attributes to be used.
166
- */
167
+void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
168
+ hwaddr paddr, MemTxAttrs attrs, int prot,
169
+ int mmu_idx, target_ulong size)
54
+{
170
+{
55
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
171
+ CPUTLBEntryFull full = {
56
+ TCGType type;
172
+ .phys_addr = paddr,
57
+ TCGTemp *tv;
173
+ .attrs = attrs,
174
+ .prot = prot,
175
+ .lg_page_size = ctz64(size)
176
+ };
58
+
177
+
59
+ if (def->flags & TCG_OPF_VECTOR) {
178
+ assert(is_power_of_2(size));
60
+ type = TCGOP_VECL(op) + TCG_TYPE_V64;
179
+ tlb_set_page_full(cpu, mmu_idx, vaddr, &full);
61
+ } else if (def->flags & TCG_OPF_64BIT) {
62
+ type = TCG_TYPE_I64;
63
+ } else {
64
+ type = TCG_TYPE_I32;
65
+ }
66
+
67
+ /* Convert movi to mov with constant temp. */
68
+ tv = tcg_constant_internal(type, val);
69
+ init_ts_info(temps_used, tv);
70
+ tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
71
+}
180
+}
72
+
181
+
73
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
182
void tlb_set_page(CPUState *cpu, target_ulong vaddr,
74
{
183
hwaddr paddr, int prot,
75
uint64_t l64, h64;
184
int mmu_idx, target_ulong size)
76
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
77
nb_temps = s->nb_temps;
78
nb_globals = s->nb_globals;
79
80
- bitmap_zero(temps_used.l, nb_temps);
81
+ memset(&temps_used, 0, sizeof(temps_used));
82
for (i = 0; i < nb_temps; ++i) {
83
s->temps[i].state_ptr = NULL;
84
}
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
86
CASE_OP_32_64(rotr):
87
if (arg_is_const(op->args[1])
88
&& arg_info(op->args[1])->val == 0) {
89
- tcg_opt_gen_movi(s, op, op->args[0], 0);
90
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
91
continue;
92
}
93
break;
94
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
95
96
if (partmask == 0) {
97
tcg_debug_assert(nb_oargs == 1);
98
- tcg_opt_gen_movi(s, op, op->args[0], 0);
99
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
100
continue;
101
}
102
if (affected == 0) {
103
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
104
CASE_OP_32_64(mulsh):
105
if (arg_is_const(op->args[2])
106
&& arg_info(op->args[2])->val == 0) {
107
- tcg_opt_gen_movi(s, op, op->args[0], 0);
108
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
109
continue;
110
}
111
break;
112
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
113
CASE_OP_32_64_VEC(sub):
114
CASE_OP_32_64_VEC(xor):
115
if (args_are_copies(op->args[1], op->args[2])) {
116
- tcg_opt_gen_movi(s, op, op->args[0], 0);
117
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
118
continue;
119
}
120
break;
121
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
122
break;
123
CASE_OP_32_64(movi):
124
case INDEX_op_dupi_vec:
125
- tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);
126
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], op->args[1]);
127
break;
128
129
case INDEX_op_dup_vec:
130
if (arg_is_const(op->args[1])) {
131
tmp = arg_info(op->args[1])->val;
132
tmp = dup_const(TCGOP_VECE(op), tmp);
133
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
134
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
135
break;
136
}
137
goto do_default;
138
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
139
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
140
tmp = arg_info(op->args[1])->val;
141
if (tmp == arg_info(op->args[2])->val) {
142
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
143
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
144
break;
145
}
146
} else if (args_are_copies(op->args[1], op->args[2])) {
147
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
148
case INDEX_op_extrh_i64_i32:
149
if (arg_is_const(op->args[1])) {
150
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
151
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
152
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
153
break;
154
}
155
goto do_default;
156
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
157
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
158
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
159
arg_info(op->args[2])->val);
160
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
161
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
162
break;
163
}
164
goto do_default;
165
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
166
TCGArg v = arg_info(op->args[1])->val;
167
if (v != 0) {
168
tmp = do_constant_folding(opc, v, 0);
169
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
170
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
171
} else {
172
tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
173
}
174
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
175
tmp = deposit64(arg_info(op->args[1])->val,
176
op->args[3], op->args[4],
177
arg_info(op->args[2])->val);
178
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
179
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
180
break;
181
}
182
goto do_default;
183
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
184
if (arg_is_const(op->args[1])) {
185
tmp = extract64(arg_info(op->args[1])->val,
186
op->args[2], op->args[3]);
187
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
188
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
189
break;
190
}
191
goto do_default;
192
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
193
if (arg_is_const(op->args[1])) {
194
tmp = sextract64(arg_info(op->args[1])->val,
195
op->args[2], op->args[3]);
196
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
197
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
198
break;
199
}
200
goto do_default;
201
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
202
tmp = (int32_t)(((uint32_t)v1 >> shr) |
203
((uint32_t)v2 << (32 - shr)));
204
}
205
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
206
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
207
break;
208
}
209
goto do_default;
210
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
211
tmp = do_constant_folding_cond(opc, op->args[1],
212
op->args[2], op->args[3]);
213
if (tmp != 2) {
214
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
215
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
216
break;
217
}
218
goto do_default;
219
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
220
op->args[1], op->args[2]);
221
if (tmp != 2) {
222
if (tmp) {
223
- bitmap_zero(temps_used.l, nb_temps);
224
+ memset(&temps_used, 0, sizeof(temps_used));
225
op->opc = INDEX_op_br;
226
op->args[0] = op->args[3];
227
} else {
228
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
229
uint64_t a = ((uint64_t)ah << 32) | al;
230
uint64_t b = ((uint64_t)bh << 32) | bl;
231
TCGArg rl, rh;
232
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
233
+ TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
234
235
if (opc == INDEX_op_add2_i32) {
236
a += b;
237
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
238
239
rl = op->args[0];
240
rh = op->args[1];
241
- tcg_opt_gen_movi(s, op, rl, (int32_t)a);
242
- tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32));
243
+ tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a);
244
+ tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32));
245
break;
246
}
247
goto do_default;
248
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
249
uint32_t b = arg_info(op->args[3])->val;
250
uint64_t r = (uint64_t)a * b;
251
TCGArg rl, rh;
252
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
253
+ TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
254
255
rl = op->args[0];
256
rh = op->args[1];
257
- tcg_opt_gen_movi(s, op, rl, (int32_t)r);
258
- tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32));
259
+ tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r);
260
+ tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32));
261
break;
262
}
263
goto do_default;
264
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
265
if (tmp != 2) {
266
if (tmp) {
267
do_brcond_true:
268
- bitmap_zero(temps_used.l, nb_temps);
269
+ memset(&temps_used, 0, sizeof(temps_used));
270
op->opc = INDEX_op_br;
271
op->args[0] = op->args[5];
272
} else {
273
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
274
/* Simplify LT/GE comparisons vs zero to a single compare
275
vs the high word of the input. */
276
do_brcond_high:
277
- bitmap_zero(temps_used.l, nb_temps);
278
+ memset(&temps_used, 0, sizeof(temps_used));
279
op->opc = INDEX_op_brcond_i32;
280
op->args[0] = op->args[1];
281
op->args[1] = op->args[3];
282
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
283
goto do_default;
284
}
285
do_brcond_low:
286
- bitmap_zero(temps_used.l, nb_temps);
287
+ memset(&temps_used, 0, sizeof(temps_used));
288
op->opc = INDEX_op_brcond_i32;
289
op->args[1] = op->args[2];
290
op->args[2] = op->args[4];
291
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
292
op->args[5]);
293
if (tmp != 2) {
294
do_setcond_const:
295
- tcg_opt_gen_movi(s, op, op->args[0], tmp);
296
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
297
} else if ((op->args[5] == TCG_COND_LT
298
|| op->args[5] == TCG_COND_GE)
299
&& arg_is_const(op->args[3])
300
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
301
block, otherwise we only trash the output args. "mask" is
302
the non-zero bits mask for the first output arg. */
303
if (def->flags & TCG_OPF_BB_END) {
304
- bitmap_zero(temps_used.l, nb_temps);
305
+ memset(&temps_used, 0, sizeof(temps_used));
306
} else {
307
do_reset_output:
308
for (i = 0; i < nb_oargs; i++) {
309
--
185
--
310
2.25.1
186
2.34.1
311
187
312
188
diff view generated by jsdifflib
1
These are now completely covered by mov from a
1
Allow the target to cache items from the guest page tables.
2
TYPE_CONST temporary.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Aleksandar Markovic <aleksandar.qemu.devel@gmail.com>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
include/tcg/tcg-opc.h | 3 ---
8
include/exec/cpu-defs.h | 9 +++++++++
9
tcg/optimize.c | 4 ----
9
1 file changed, 9 insertions(+)
10
tcg/tcg-op-vec.c | 1 -
11
tcg/tcg.c | 18 +-----------------
12
tcg/aarch64/tcg-target.c.inc | 3 ---
13
tcg/arm/tcg-target.c.inc | 1 -
14
tcg/i386/tcg-target.c.inc | 3 ---
15
tcg/mips/tcg-target.c.inc | 2 --
16
tcg/ppc/tcg-target.c.inc | 3 ---
17
tcg/riscv/tcg-target.c.inc | 2 --
18
tcg/s390/tcg-target.c.inc | 2 --
19
tcg/sparc/tcg-target.c.inc | 2 --
20
tcg/tci/tcg-target.c.inc | 2 --
21
13 files changed, 1 insertion(+), 45 deletions(-)
22
10
23
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
11
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
24
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
25
--- a/include/tcg/tcg-opc.h
13
--- a/include/exec/cpu-defs.h
26
+++ b/include/tcg/tcg-opc.h
14
+++ b/include/exec/cpu-defs.h
27
@@ -XXX,XX +XXX,XX @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END)
15
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntryFull {
28
DEF(mb, 0, 0, 1, 0)
16
29
17
/* @lg_page_size contains the log2 of the page size. */
30
DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT)
18
uint8_t lg_page_size;
31
-DEF(movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT)
19
+
32
DEF(setcond_i32, 1, 2, 1, 0)
20
+ /*
33
DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32))
21
+ * Allow target-specific additions to this structure.
34
/* load/store */
22
+ * This may be used to cache items from the guest cpu
35
@@ -XXX,XX +XXX,XX @@ DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32))
23
+ * page tables for later use by the implementation.
36
DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32))
24
+ */
37
25
+#ifdef TARGET_PAGE_ENTRY_EXTRA
38
DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
26
+ TARGET_PAGE_ENTRY_EXTRA
39
-DEF(movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
27
+#endif
40
DEF(setcond_i64, 1, 2, 1, IMPL64)
28
} CPUTLBEntryFull;
41
DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64))
42
/* load/store */
43
@@ -XXX,XX +XXX,XX @@ DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1,
44
#define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)
45
46
DEF(mov_vec, 1, 1, 0, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
47
-DEF(dupi_vec, 1, 0, 1, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
48
49
DEF(dup_vec, 1, 1, 0, IMPLVEC)
50
DEF(dup2_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_REG_BITS == 32))
51
diff --git a/tcg/optimize.c b/tcg/optimize.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/tcg/optimize.c
54
+++ b/tcg/optimize.c
55
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
56
CASE_OP_32_64_VEC(mov):
57
tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
58
break;
59
- CASE_OP_32_64(movi):
60
- case INDEX_op_dupi_vec:
61
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], op->args[1]);
62
- break;
63
64
case INDEX_op_dup_vec:
65
if (arg_is_const(op->args[1])) {
66
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/tcg/tcg-op-vec.c
69
+++ b/tcg/tcg-op-vec.c
70
@@ -XXX,XX +XXX,XX @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list,
71
case INDEX_op_xor_vec:
72
case INDEX_op_mov_vec:
73
case INDEX_op_dup_vec:
74
- case INDEX_op_dupi_vec:
75
case INDEX_op_dup2_vec:
76
case INDEX_op_ld_vec:
77
case INDEX_op_st_vec:
78
diff --git a/tcg/tcg.c b/tcg/tcg.c
79
index XXXXXXX..XXXXXXX 100644
80
--- a/tcg/tcg.c
81
+++ b/tcg/tcg.c
82
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
83
return TCG_TARGET_HAS_goto_ptr;
84
85
case INDEX_op_mov_i32:
86
- case INDEX_op_movi_i32:
87
case INDEX_op_setcond_i32:
88
case INDEX_op_brcond_i32:
89
case INDEX_op_ld8u_i32:
90
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
91
return TCG_TARGET_REG_BITS == 32;
92
93
case INDEX_op_mov_i64:
94
- case INDEX_op_movi_i64:
95
case INDEX_op_setcond_i64:
96
case INDEX_op_brcond_i64:
97
case INDEX_op_ld8u_i64:
98
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
99
100
case INDEX_op_mov_vec:
101
case INDEX_op_dup_vec:
102
- case INDEX_op_dupi_vec:
103
case INDEX_op_dupm_vec:
104
case INDEX_op_ld_vec:
105
case INDEX_op_st_vec:
106
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
107
}
108
29
109
/*
30
/*
110
- * Specialized code generation for INDEX_op_movi_*.
111
+ * Specialized code generation for INDEX_op_mov_* with a constant.
112
*/
113
static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
114
tcg_target_ulong val, TCGLifeData arg_life,
115
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
116
}
117
}
118
119
-static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
120
-{
121
- TCGTemp *ots = arg_temp(op->args[0]);
122
- tcg_target_ulong val = op->args[1];
123
-
124
- tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
125
-}
126
-
127
/*
128
* Specialized code generation for INDEX_op_mov_*.
129
*/
130
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
131
case INDEX_op_mov_vec:
132
tcg_reg_alloc_mov(s, op);
133
break;
134
- case INDEX_op_movi_i32:
135
- case INDEX_op_movi_i64:
136
- case INDEX_op_dupi_vec:
137
- tcg_reg_alloc_movi(s, op);
138
- break;
139
case INDEX_op_dup_vec:
140
tcg_reg_alloc_dup(s, op);
141
break;
142
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
143
index XXXXXXX..XXXXXXX 100644
144
--- a/tcg/aarch64/tcg-target.c.inc
145
+++ b/tcg/aarch64/tcg-target.c.inc
146
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
147
148
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
149
case INDEX_op_mov_i64:
150
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
151
- case INDEX_op_movi_i64:
152
case INDEX_op_call: /* Always emitted via tcg_out_call. */
153
default:
154
g_assert_not_reached();
155
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
156
break;
157
158
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
159
- case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
160
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
161
default:
162
g_assert_not_reached();
163
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
164
index XXXXXXX..XXXXXXX 100644
165
--- a/tcg/arm/tcg-target.c.inc
166
+++ b/tcg/arm/tcg-target.c.inc
167
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
168
break;
169
170
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
171
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
172
case INDEX_op_call: /* Always emitted via tcg_out_call. */
173
default:
174
tcg_abort();
175
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
176
index XXXXXXX..XXXXXXX 100644
177
--- a/tcg/i386/tcg-target.c.inc
178
+++ b/tcg/i386/tcg-target.c.inc
179
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
180
break;
181
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
182
case INDEX_op_mov_i64:
183
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
184
- case INDEX_op_movi_i64:
185
case INDEX_op_call: /* Always emitted via tcg_out_call. */
186
default:
187
tcg_abort();
188
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
189
break;
190
191
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
192
- case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
193
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
194
default:
195
g_assert_not_reached();
196
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
197
index XXXXXXX..XXXXXXX 100644
198
--- a/tcg/mips/tcg-target.c.inc
199
+++ b/tcg/mips/tcg-target.c.inc
200
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
201
break;
202
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
203
case INDEX_op_mov_i64:
204
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
205
- case INDEX_op_movi_i64:
206
case INDEX_op_call: /* Always emitted via tcg_out_call. */
207
default:
208
tcg_abort();
209
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
210
index XXXXXXX..XXXXXXX 100644
211
--- a/tcg/ppc/tcg-target.c.inc
212
+++ b/tcg/ppc/tcg-target.c.inc
213
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
214
215
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
216
case INDEX_op_mov_i64:
217
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
218
- case INDEX_op_movi_i64:
219
case INDEX_op_call: /* Always emitted via tcg_out_call. */
220
default:
221
tcg_abort();
222
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
223
return;
224
225
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
226
- case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
227
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
228
default:
229
g_assert_not_reached();
230
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
231
index XXXXXXX..XXXXXXX 100644
232
--- a/tcg/riscv/tcg-target.c.inc
233
+++ b/tcg/riscv/tcg-target.c.inc
234
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
235
236
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
237
case INDEX_op_mov_i64:
238
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
239
- case INDEX_op_movi_i64:
240
case INDEX_op_call: /* Always emitted via tcg_out_call. */
241
default:
242
g_assert_not_reached();
243
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
244
index XXXXXXX..XXXXXXX 100644
245
--- a/tcg/s390/tcg-target.c.inc
246
+++ b/tcg/s390/tcg-target.c.inc
247
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
248
249
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
250
case INDEX_op_mov_i64:
251
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
252
- case INDEX_op_movi_i64:
253
case INDEX_op_call: /* Always emitted via tcg_out_call. */
254
default:
255
tcg_abort();
256
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
257
index XXXXXXX..XXXXXXX 100644
258
--- a/tcg/sparc/tcg-target.c.inc
259
+++ b/tcg/sparc/tcg-target.c.inc
260
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
261
262
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
263
case INDEX_op_mov_i64:
264
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
265
- case INDEX_op_movi_i64:
266
case INDEX_op_call: /* Always emitted via tcg_out_call. */
267
default:
268
tcg_abort();
269
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
270
index XXXXXXX..XXXXXXX 100644
271
--- a/tcg/tci/tcg-target.c.inc
272
+++ b/tcg/tci/tcg-target.c.inc
273
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
274
break;
275
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
276
case INDEX_op_mov_i64:
277
- case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
278
- case INDEX_op_movi_i64:
279
case INDEX_op_call: /* Always emitted via tcg_out_call. */
280
default:
281
tcg_abort();
282
--
31
--
283
2.25.1
32
2.34.1
284
33
285
34
diff view generated by jsdifflib
1
Do not allocate a large block for indexing. Instead, allocate
1
This bitmap is created and discarded immediately.
2
for each temporary as they are seen.
2
We gain nothing by its existence.
3
4
In general, this will use less memory, if we consider that most
5
TBs do not touch every target register. This also allows us to
6
allocate TempOptInfo for new temps created during optimization.
7
3
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-Id: <20220822232338.1727934-2-richard.henderson@linaro.org>
10
---
7
---
11
tcg/optimize.c | 60 ++++++++++++++++++++++++++++----------------------
8
accel/tcg/translate-all.c | 78 ++-------------------------------------
12
1 file changed, 34 insertions(+), 26 deletions(-)
9
1 file changed, 4 insertions(+), 74 deletions(-)
13
10
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
15
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
13
--- a/accel/tcg/translate-all.c
17
+++ b/tcg/optimize.c
14
+++ b/accel/tcg/translate-all.c
18
@@ -XXX,XX +XXX,XX @@ static void reset_temp(TCGArg arg)
15
@@ -XXX,XX +XXX,XX @@
16
#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
17
#endif
18
19
-#define SMC_BITMAP_USE_THRESHOLD 10
20
-
21
typedef struct PageDesc {
22
/* list of TBs intersecting this ram page */
23
uintptr_t first_tb;
24
-#ifdef CONFIG_SOFTMMU
25
- /* in order to optimize self modifying code, we count the number
26
- of lookups we do to a given page to use a bitmap */
27
- unsigned long *code_bitmap;
28
- unsigned int code_write_count;
29
-#else
30
+#ifdef CONFIG_USER_ONLY
31
unsigned long flags;
32
void *target_data;
33
#endif
34
-#ifndef CONFIG_USER_ONLY
35
+#ifdef CONFIG_SOFTMMU
36
QemuSpin lock;
37
#endif
38
} PageDesc;
39
@@ -XXX,XX +XXX,XX @@ void tb_htable_init(void)
40
qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
19
}
41
}
20
42
21
/* Initialize and activate a temporary. */
43
-/* call with @p->lock held */
22
-static void init_ts_info(TempOptInfo *infos,
44
-static inline void invalidate_page_bitmap(PageDesc *p)
23
- TCGTempSet *temps_used, TCGTemp *ts)
45
-{
24
+static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
46
- assert_page_locked(p);
47
-#ifdef CONFIG_SOFTMMU
48
- g_free(p->code_bitmap);
49
- p->code_bitmap = NULL;
50
- p->code_write_count = 0;
51
-#endif
52
-}
53
-
54
/* Set to NULL all the 'first_tb' fields in all PageDescs. */
55
static void page_flush_tb_1(int level, void **lp)
25
{
56
{
26
size_t idx = temp_idx(ts);
57
@@ -XXX,XX +XXX,XX @@ static void page_flush_tb_1(int level, void **lp)
27
- if (!test_bit(idx, temps_used->l)) {
58
for (i = 0; i < V_L2_SIZE; ++i) {
28
- TempOptInfo *ti = &infos[idx];
59
page_lock(&pd[i]);
29
+ TempOptInfo *ti;
60
pd[i].first_tb = (uintptr_t)NULL;
30
61
- invalidate_page_bitmap(pd + i);
31
+ if (test_bit(idx, temps_used->l)) {
62
page_unlock(&pd[i]);
32
+ return;
33
+ }
34
+ set_bit(idx, temps_used->l);
35
+
36
+ ti = ts->state_ptr;
37
+ if (ti == NULL) {
38
+ ti = tcg_malloc(sizeof(TempOptInfo));
39
ts->state_ptr = ti;
40
- ti->next_copy = ts;
41
- ti->prev_copy = ts;
42
- if (ts->kind == TEMP_CONST) {
43
- ti->is_const = true;
44
- ti->val = ti->mask = ts->val;
45
- if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
46
- /* High bits of a 32-bit quantity are garbage. */
47
- ti->mask |= ~0xffffffffull;
48
- }
49
- } else {
50
- ti->is_const = false;
51
- ti->mask = -1;
52
+ }
53
+
54
+ ti->next_copy = ts;
55
+ ti->prev_copy = ts;
56
+ if (ts->kind == TEMP_CONST) {
57
+ ti->is_const = true;
58
+ ti->val = ts->val;
59
+ ti->mask = ts->val;
60
+ if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
61
+ /* High bits of a 32-bit quantity are garbage. */
62
+ ti->mask |= ~0xffffffffull;
63
}
63
}
64
- set_bit(idx, temps_used->l);
64
} else {
65
+ } else {
65
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
66
+ ti->is_const = false;
66
if (rm_from_page_list) {
67
+ ti->mask = -1;
67
p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
68
tb_page_remove(p, tb);
69
- invalidate_page_bitmap(p);
70
if (tb->page_addr[1] != -1) {
71
p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
72
tb_page_remove(p, tb);
73
- invalidate_page_bitmap(p);
74
}
75
}
76
77
@@ -XXX,XX +XXX,XX @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
68
}
78
}
69
}
79
}
70
80
71
-static void init_arg_info(TempOptInfo *infos,
81
-#ifdef CONFIG_SOFTMMU
72
- TCGTempSet *temps_used, TCGArg arg)
82
-/* call with @p->lock held */
73
+static void init_arg_info(TCGTempSet *temps_used, TCGArg arg)
83
-static void build_page_bitmap(PageDesc *p)
74
{
84
-{
75
- init_ts_info(infos, temps_used, arg_temp(arg));
85
- int n, tb_start, tb_end;
76
+ init_ts_info(temps_used, arg_temp(arg));
86
- TranslationBlock *tb;
87
-
88
- assert_page_locked(p);
89
- p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
90
-
91
- PAGE_FOR_EACH_TB(p, tb, n) {
92
- /* NOTE: this is subtle as a TB may span two physical pages */
93
- if (n == 0) {
94
- /* NOTE: tb_end may be after the end of the page, but
95
- it is not a problem */
96
- tb_start = tb->pc & ~TARGET_PAGE_MASK;
97
- tb_end = tb_start + tb->size;
98
- if (tb_end > TARGET_PAGE_SIZE) {
99
- tb_end = TARGET_PAGE_SIZE;
100
- }
101
- } else {
102
- tb_start = 0;
103
- tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
104
- }
105
- bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
106
- }
107
-}
108
-#endif
109
-
110
/* add the tb in the target page and protect it if necessary
111
*
112
* Called with mmap_lock held for user-mode emulation.
113
@@ -XXX,XX +XXX,XX @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
114
page_already_protected = p->first_tb != (uintptr_t)NULL;
115
#endif
116
p->first_tb = (uintptr_t)tb | n;
117
- invalidate_page_bitmap(p);
118
119
#if defined(CONFIG_USER_ONLY)
120
/* translator_loop() must have made all TB pages non-writable */
121
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
122
/* remove TB from the page(s) if we couldn't insert it */
123
if (unlikely(existing_tb)) {
124
tb_page_remove(p, tb);
125
- invalidate_page_bitmap(p);
126
if (p2) {
127
tb_page_remove(p2, tb);
128
- invalidate_page_bitmap(p2);
129
}
130
tb = existing_tb;
131
}
132
@@ -XXX,XX +XXX,XX @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
133
#if !defined(CONFIG_USER_ONLY)
134
/* if no code remaining, no need to continue to use slow writes */
135
if (!p->first_tb) {
136
- invalidate_page_bitmap(p);
137
tlb_unprotect_code(start);
138
}
139
#endif
140
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_page_fast(struct page_collection *pages,
141
}
142
143
assert_page_locked(p);
144
- if (!p->code_bitmap &&
145
- ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
146
- build_page_bitmap(p);
147
- }
148
- if (p->code_bitmap) {
149
- unsigned int nr;
150
- unsigned long b;
151
-
152
- nr = start & ~TARGET_PAGE_MASK;
153
- b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
154
- if (b & ((1 << len) - 1)) {
155
- goto do_invalidate;
156
- }
157
- } else {
158
- do_invalidate:
159
- tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
160
- retaddr);
161
- }
162
+ tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
163
+ retaddr);
77
}
164
}
78
165
#else
79
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
166
/* Called with mmap_lock held. If pc is not 0 then it indicates the
80
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
81
/* Propagate constants and copies, fold constant expressions. */
82
void tcg_optimize(TCGContext *s)
83
{
84
- int nb_temps, nb_globals;
85
+ int nb_temps, nb_globals, i;
86
TCGOp *op, *op_next, *prev_mb = NULL;
87
- TempOptInfo *infos;
88
TCGTempSet temps_used;
89
90
/* Array VALS has an element for each temp.
91
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
92
93
nb_temps = s->nb_temps;
94
nb_globals = s->nb_globals;
95
+
96
bitmap_zero(temps_used.l, nb_temps);
97
- infos = tcg_malloc(sizeof(TempOptInfo) * nb_temps);
98
+ for (i = 0; i < nb_temps; ++i) {
99
+ s->temps[i].state_ptr = NULL;
100
+ }
101
102
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
103
uint64_t mask, partmask, affected, tmp;
104
- int nb_oargs, nb_iargs, i;
105
+ int nb_oargs, nb_iargs;
106
TCGOpcode opc = op->opc;
107
const TCGOpDef *def = &tcg_op_defs[opc];
108
109
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
110
for (i = 0; i < nb_oargs + nb_iargs; i++) {
111
TCGTemp *ts = arg_temp(op->args[i]);
112
if (ts) {
113
- init_ts_info(infos, &temps_used, ts);
114
+ init_ts_info(&temps_used, ts);
115
}
116
}
117
} else {
118
nb_oargs = def->nb_oargs;
119
nb_iargs = def->nb_iargs;
120
for (i = 0; i < nb_oargs + nb_iargs; i++) {
121
- init_arg_info(infos, &temps_used, op->args[i]);
122
+ init_arg_info(&temps_used, op->args[i]);
123
}
124
}
125
126
--
167
--
127
2.25.1
168
2.34.1
128
169
129
170
diff view generated by jsdifflib
1
Fix this name vs our coding style.
1
Bool is more appropriate type for the alloc parameter.
2
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
tcg/optimize.c | 32 ++++++++++++++++----------------
7
accel/tcg/translate-all.c | 14 +++++++-------
8
1 file changed, 16 insertions(+), 16 deletions(-)
8
1 file changed, 7 insertions(+), 7 deletions(-)
9
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
11
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
12
--- a/accel/tcg/translate-all.c
13
+++ b/tcg/optimize.c
13
+++ b/accel/tcg/translate-all.c
14
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ void page_init(void)
15
glue(glue(case INDEX_op_, x), _i64): \
15
#endif
16
glue(glue(case INDEX_op_, x), _vec)
16
}
17
17
18
-struct tcg_temp_info {
18
-static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
19
+typedef struct TempOptInfo {
19
+static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
20
bool is_const;
21
TCGTemp *prev_copy;
22
TCGTemp *next_copy;
23
tcg_target_ulong val;
24
tcg_target_ulong mask;
25
-};
26
+} TempOptInfo;
27
28
-static inline struct tcg_temp_info *ts_info(TCGTemp *ts)
29
+static inline TempOptInfo *ts_info(TCGTemp *ts)
30
{
20
{
31
return ts->state_ptr;
21
PageDesc *pd;
22
void **lp;
23
@@ -XXX,XX +XXX,XX @@ static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
24
25
static inline PageDesc *page_find(tb_page_addr_t index)
26
{
27
- return page_find_alloc(index, 0);
28
+ return page_find_alloc(index, false);
32
}
29
}
33
30
34
-static inline struct tcg_temp_info *arg_info(TCGArg arg)
31
static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
35
+static inline TempOptInfo *arg_info(TCGArg arg)
32
- PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
33
+ PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc);
34
35
/* In user-mode page locks aren't used; mmap_lock is enough */
36
#ifdef CONFIG_USER_ONLY
37
@@ -XXX,XX +XXX,XX @@ static inline void page_unlock(PageDesc *pd)
38
/* lock the page(s) of a TB in the correct acquisition order */
39
static inline void page_lock_tb(const TranslationBlock *tb)
36
{
40
{
37
return ts_info(arg_temp(arg));
41
- page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
42
+ page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], false);
38
}
43
}
39
@@ -XXX,XX +XXX,XX @@ static inline bool ts_is_copy(TCGTemp *ts)
44
40
/* Reset TEMP's state, possibly removing the temp for the list of copies. */
45
static inline void page_unlock_tb(const TranslationBlock *tb)
41
static void reset_ts(TCGTemp *ts)
46
@@ -XXX,XX +XXX,XX @@ void page_collection_unlock(struct page_collection *set)
47
#endif /* !CONFIG_USER_ONLY */
48
49
static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
50
- PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
51
+ PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc)
42
{
52
{
43
- struct tcg_temp_info *ti = ts_info(ts);
53
PageDesc *p1, *p2;
44
- struct tcg_temp_info *pi = ts_info(ti->prev_copy);
54
tb_page_addr_t page1;
45
- struct tcg_temp_info *ni = ts_info(ti->next_copy);
55
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
46
+ TempOptInfo *ti = ts_info(ts);
56
* Note that inserting into the hash table first isn't an option, since
47
+ TempOptInfo *pi = ts_info(ti->prev_copy);
57
* we can only insert TBs that are fully initialized.
48
+ TempOptInfo *ni = ts_info(ti->next_copy);
58
*/
49
59
- page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
50
ni->prev_copy = ti->prev_copy;
60
+ page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
51
pi->next_copy = ti->next_copy;
61
tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
52
@@ -XXX,XX +XXX,XX @@ static void reset_temp(TCGArg arg)
62
if (p2) {
53
}
63
tb_page_add(p2, tb, 1, phys_page2);
54
64
@@ -XXX,XX +XXX,XX @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
55
/* Initialize and activate a temporary. */
65
for (addr = start, len = end - start;
56
-static void init_ts_info(struct tcg_temp_info *infos,
66
len != 0;
57
+static void init_ts_info(TempOptInfo *infos,
67
len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
58
TCGTempSet *temps_used, TCGTemp *ts)
68
- PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
59
{
69
+ PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true);
60
size_t idx = temp_idx(ts);
70
61
if (!test_bit(idx, temps_used->l)) {
71
/* If the write protection bit is set, then we invalidate
62
- struct tcg_temp_info *ti = &infos[idx];
72
the code inside. */
63
+ TempOptInfo *ti = &infos[idx];
64
65
ts->state_ptr = ti;
66
ti->next_copy = ts;
67
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(struct tcg_temp_info *infos,
68
}
69
}
70
71
-static void init_arg_info(struct tcg_temp_info *infos,
72
+static void init_arg_info(TempOptInfo *infos,
73
TCGTempSet *temps_used, TCGArg arg)
74
{
75
init_ts_info(infos, temps_used, arg_temp(arg));
76
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
77
const TCGOpDef *def;
78
TCGOpcode new_op;
79
tcg_target_ulong mask;
80
- struct tcg_temp_info *di = arg_info(dst);
81
+ TempOptInfo *di = arg_info(dst);
82
83
def = &tcg_op_defs[op->opc];
84
if (def->flags & TCG_OPF_VECTOR) {
85
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
86
TCGTemp *dst_ts = arg_temp(dst);
87
TCGTemp *src_ts = arg_temp(src);
88
const TCGOpDef *def;
89
- struct tcg_temp_info *di;
90
- struct tcg_temp_info *si;
91
+ TempOptInfo *di;
92
+ TempOptInfo *si;
93
tcg_target_ulong mask;
94
TCGOpcode new_op;
95
96
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
97
di->mask = mask;
98
99
if (src_ts->type == dst_ts->type) {
100
- struct tcg_temp_info *ni = ts_info(si->next_copy);
101
+ TempOptInfo *ni = ts_info(si->next_copy);
102
103
di->next_copy = si->next_copy;
104
di->prev_copy = src_ts;
105
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
106
{
107
int nb_temps, nb_globals;
108
TCGOp *op, *op_next, *prev_mb = NULL;
109
- struct tcg_temp_info *infos;
110
+ TempOptInfo *infos;
111
TCGTempSet temps_used;
112
113
/* Array VALS has an element for each temp.
114
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
115
nb_temps = s->nb_temps;
116
nb_globals = s->nb_globals;
117
bitmap_zero(temps_used.l, nb_temps);
118
- infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps);
119
+ infos = tcg_malloc(sizeof(TempOptInfo) * nb_temps);
120
121
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
122
tcg_target_ulong mask, partmask, affected;
123
--
73
--
124
2.25.1
74
2.34.1
125
75
126
76
diff view generated by jsdifflib
1
Use the pc coming from db->pc_first rather than the TB.
2
3
Use the cached host_addr rather than re-computing for the
4
first page. We still need a separate lookup for the second
5
page because it won't be computed for DisasContextBase until
6
the translator actually performs a read from the page.
7
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
10
---
4
accel/tcg/plugin-gen.c | 49 +++++++++++++++++++-----------------------
11
include/exec/plugin-gen.h | 7 ++++---
5
1 file changed, 22 insertions(+), 27 deletions(-)
12
accel/tcg/plugin-gen.c | 22 +++++++++++-----------
13
accel/tcg/translator.c | 2 +-
14
3 files changed, 16 insertions(+), 15 deletions(-)
6
15
16
diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/include/exec/plugin-gen.h
19
+++ b/include/exec/plugin-gen.h
20
@@ -XXX,XX +XXX,XX @@ struct DisasContextBase;
21
22
#ifdef CONFIG_PLUGIN
23
24
-bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress);
25
+bool plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db,
26
+ bool supress);
27
void plugin_gen_tb_end(CPUState *cpu);
28
void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
29
void plugin_gen_insn_end(void);
30
@@ -XXX,XX +XXX,XX @@ static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
31
32
#else /* !CONFIG_PLUGIN */
33
34
-static inline
35
-bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress)
36
+static inline bool
37
+plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db, bool sup)
38
{
39
return false;
40
}
7
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
41
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
8
index XXXXXXX..XXXXXXX 100644
42
index XXXXXXX..XXXXXXX 100644
9
--- a/accel/tcg/plugin-gen.c
43
--- a/accel/tcg/plugin-gen.c
10
+++ b/accel/tcg/plugin-gen.c
44
+++ b/accel/tcg/plugin-gen.c
11
@@ -XXX,XX +XXX,XX @@ static TCGOp *copy_extu_i32_i64(TCGOp **begin_op, TCGOp *op)
45
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_inject(const struct qemu_plugin_tb *plugin_tb)
12
if (TCG_TARGET_REG_BITS == 32) {
46
pr_ops();
13
/* mov_i32 */
47
}
14
op = copy_op(begin_op, op, INDEX_op_mov_i32);
48
15
- /* movi_i32 */
49
-bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_only)
16
- op = copy_op(begin_op, op, INDEX_op_movi_i32);
50
+bool plugin_gen_tb_start(CPUState *cpu, const DisasContextBase *db,
17
+ /* mov_i32 w/ $0 */
51
+ bool mem_only)
18
+ op = copy_op(begin_op, op, INDEX_op_mov_i32);
52
{
53
bool ret = false;
54
55
@@ -XXX,XX +XXX,XX @@ bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_onl
56
57
ret = true;
58
59
- ptb->vaddr = tb->pc;
60
+ ptb->vaddr = db->pc_first;
61
ptb->vaddr2 = -1;
62
- get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1);
63
+ ptb->haddr1 = db->host_addr[0];
64
ptb->haddr2 = NULL;
65
ptb->mem_only = mem_only;
66
67
@@ -XXX,XX +XXX,XX @@ void plugin_gen_insn_start(CPUState *cpu, const DisasContextBase *db)
68
* Note that we skip this when haddr1 == NULL, e.g. when we're
69
* fetching instructions from a region not backed by RAM.
70
*/
71
- if (likely(ptb->haddr1 != NULL && ptb->vaddr2 == -1) &&
72
- unlikely((db->pc_next & TARGET_PAGE_MASK) !=
73
- (db->pc_first & TARGET_PAGE_MASK))) {
74
- get_page_addr_code_hostp(cpu->env_ptr, db->pc_next,
75
- &ptb->haddr2);
76
- ptb->vaddr2 = db->pc_next;
77
- }
78
- if (likely(ptb->vaddr2 == -1)) {
79
+ if (ptb->haddr1 == NULL) {
80
+ pinsn->haddr = NULL;
81
+ } else if (is_same_page(db, db->pc_next)) {
82
pinsn->haddr = ptb->haddr1 + pinsn->vaddr - ptb->vaddr;
19
} else {
83
} else {
20
/* extu_i32_i64 */
84
+ if (ptb->vaddr2 == -1) {
21
op = copy_op(begin_op, op, INDEX_op_extu_i32_i64);
85
+ ptb->vaddr2 = TARGET_PAGE_ALIGN(db->pc_first);
22
@@ -XXX,XX +XXX,XX @@ static TCGOp *copy_mov_i64(TCGOp **begin_op, TCGOp *op)
86
+ get_page_addr_code_hostp(cpu->env_ptr, ptb->vaddr2, &ptb->haddr2);
23
return op;
87
+ }
88
pinsn->haddr = ptb->haddr2 + pinsn->vaddr - ptb->vaddr2;
89
}
24
}
90
}
25
91
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
26
-static TCGOp *copy_movi_i64(TCGOp **begin_op, TCGOp *op, uint64_t v)
92
index XXXXXXX..XXXXXXX 100644
27
-{
93
--- a/accel/tcg/translator.c
28
- if (TCG_TARGET_REG_BITS == 32) {
94
+++ b/accel/tcg/translator.c
29
- /* 2x movi_i32 */
95
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
30
- op = copy_op(begin_op, op, INDEX_op_movi_i32);
96
ops->tb_start(db, cpu);
31
- op->args[1] = v;
97
tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */
32
-
98
33
- op = copy_op(begin_op, op, INDEX_op_movi_i32);
99
- plugin_enabled = plugin_gen_tb_start(cpu, tb, cflags & CF_MEMI_ONLY);
34
- op->args[1] = v >> 32;
100
+ plugin_enabled = plugin_gen_tb_start(cpu, db, cflags & CF_MEMI_ONLY);
35
- } else {
101
36
- /* movi_i64 */
102
while (true) {
37
- op = copy_op(begin_op, op, INDEX_op_movi_i64);
103
db->num_insns++;
38
- op->args[1] = v;
39
- }
40
- return op;
41
-}
42
-
43
static TCGOp *copy_const_ptr(TCGOp **begin_op, TCGOp *op, void *ptr)
44
{
45
if (UINTPTR_MAX == UINT32_MAX) {
46
- /* movi_i32 */
47
- op = copy_op(begin_op, op, INDEX_op_movi_i32);
48
- op->args[1] = (uintptr_t)ptr;
49
+ /* mov_i32 */
50
+ op = copy_op(begin_op, op, INDEX_op_mov_i32);
51
+ op->args[1] = tcgv_i32_arg(tcg_constant_i32((uintptr_t)ptr));
52
} else {
53
- /* movi_i64 */
54
- op = copy_movi_i64(begin_op, op, (uint64_t)(uintptr_t)ptr);
55
+ /* mov_i64 */
56
+ op = copy_op(begin_op, op, INDEX_op_mov_i64);
57
+ op->args[1] = tcgv_i64_arg(tcg_constant_i64((uintptr_t)ptr));
58
}
59
return op;
60
}
61
62
static TCGOp *copy_const_i64(TCGOp **begin_op, TCGOp *op, uint64_t v)
63
{
64
- return copy_movi_i64(begin_op, op, v);
65
+ if (TCG_TARGET_REG_BITS == 32) {
66
+ /* 2x mov_i32 */
67
+ op = copy_op(begin_op, op, INDEX_op_mov_i32);
68
+ op->args[1] = tcgv_i32_arg(tcg_constant_i32(v));
69
+ op = copy_op(begin_op, op, INDEX_op_mov_i32);
70
+ op->args[1] = tcgv_i32_arg(tcg_constant_i32(v >> 32));
71
+ } else {
72
+ /* mov_i64 */
73
+ op = copy_op(begin_op, op, INDEX_op_mov_i64);
74
+ op->args[1] = tcgv_i64_arg(tcg_constant_i64(v));
75
+ }
76
+ return op;
77
}
78
79
static TCGOp *copy_extu_tl_i64(TCGOp **begin_op, TCGOp *op)
80
@@ -XXX,XX +XXX,XX @@ static TCGOp *append_mem_cb(const struct qemu_plugin_dyn_cb *cb,
81
82
tcg_debug_assert(type == PLUGIN_GEN_CB_MEM);
83
84
- /* const_i32 == movi_i32 ("info", so it remains as is) */
85
- op = copy_op(&begin_op, op, INDEX_op_movi_i32);
86
+ /* const_i32 == mov_i32 ("info", so it remains as is) */
87
+ op = copy_op(&begin_op, op, INDEX_op_mov_i32);
88
89
/* const_ptr */
90
op = copy_const_ptr(&begin_op, op, cb->userp);
91
--
104
--
92
2.25.1
105
2.34.1
93
106
94
107
diff view generated by jsdifflib
1
These interfaces have been replaced by tcg_gen_dupi_vec
1
Let tb->page_addr[0] contain the address of the first byte of the
2
and tcg_constant_vec.
2
translated block, rather than the address of the page containing the
3
start of the translated block. We need to recover this value anyway
4
at various points, and it is easier to discard a page offset when it
5
is not needed, which happens naturally via the existing find_page shift.
3
6
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
9
---
7
include/tcg/tcg-op.h | 4 ----
10
accel/tcg/cpu-exec.c | 16 ++++++++--------
8
tcg/tcg-op-vec.c | 20 --------------------
11
accel/tcg/cputlb.c | 3 ++-
9
2 files changed, 24 deletions(-)
12
accel/tcg/translate-all.c | 9 +++++----
13
3 files changed, 15 insertions(+), 13 deletions(-)
10
14
11
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
15
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
12
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
13
--- a/include/tcg/tcg-op.h
17
--- a/accel/tcg/cpu-exec.c
14
+++ b/include/tcg/tcg-op.h
18
+++ b/accel/tcg/cpu-exec.c
15
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
19
@@ -XXX,XX +XXX,XX @@ struct tb_desc {
16
void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);
20
target_ulong pc;
17
void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec, TCGv_i64);
21
target_ulong cs_base;
18
void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec, TCGv_ptr, tcg_target_long);
22
CPUArchState *env;
19
-void tcg_gen_dup8i_vec(TCGv_vec, uint32_t);
23
- tb_page_addr_t phys_page1;
20
-void tcg_gen_dup16i_vec(TCGv_vec, uint32_t);
24
+ tb_page_addr_t page_addr0;
21
-void tcg_gen_dup32i_vec(TCGv_vec, uint32_t);
25
uint32_t flags;
22
-void tcg_gen_dup64i_vec(TCGv_vec, uint64_t);
26
uint32_t cflags;
23
void tcg_gen_dupi_vec(unsigned vece, TCGv_vec, uint64_t);
27
uint32_t trace_vcpu_dstate;
24
void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
28
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
25
void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
29
const struct tb_desc *desc = d;
26
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
30
31
if (tb->pc == desc->pc &&
32
- tb->page_addr[0] == desc->phys_page1 &&
33
+ tb->page_addr[0] == desc->page_addr0 &&
34
tb->cs_base == desc->cs_base &&
35
tb->flags == desc->flags &&
36
tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
37
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
38
if (tb->page_addr[1] == -1) {
39
return true;
40
} else {
41
- tb_page_addr_t phys_page2;
42
- target_ulong virt_page2;
43
+ tb_page_addr_t phys_page1;
44
+ target_ulong virt_page1;
45
46
/*
47
* We know that the first page matched, and an otherwise valid TB
48
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
49
* is different for the new TB. Therefore any exception raised
50
* here by the faulting lookup is not premature.
51
*/
52
- virt_page2 = TARGET_PAGE_ALIGN(desc->pc);
53
- phys_page2 = get_page_addr_code(desc->env, virt_page2);
54
- if (tb->page_addr[1] == phys_page2) {
55
+ virt_page1 = TARGET_PAGE_ALIGN(desc->pc);
56
+ phys_page1 = get_page_addr_code(desc->env, virt_page1);
57
+ if (tb->page_addr[1] == phys_page1) {
58
return true;
59
}
60
}
61
@@ -XXX,XX +XXX,XX @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
62
if (phys_pc == -1) {
63
return NULL;
64
}
65
- desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
66
+ desc.page_addr0 = phys_pc;
67
h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
68
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
69
}
70
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
27
index XXXXXXX..XXXXXXX 100644
71
index XXXXXXX..XXXXXXX 100644
28
--- a/tcg/tcg-op-vec.c
72
--- a/accel/tcg/cputlb.c
29
+++ b/tcg/tcg-op-vec.c
73
+++ b/accel/tcg/cputlb.c
30
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
74
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
31
return tcg_const_ones_vec(t->base_type);
75
can be detected */
76
void tlb_protect_code(ram_addr_t ram_addr)
77
{
78
- cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
79
+ cpu_physical_memory_test_and_clear_dirty(ram_addr & TARGET_PAGE_MASK,
80
+ TARGET_PAGE_SIZE,
81
DIRTY_MEMORY_CODE);
32
}
82
}
33
83
34
-void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
84
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
35
-{
85
index XXXXXXX..XXXXXXX 100644
36
- tcg_gen_dupi_vec(MO_64, r, a);
86
--- a/accel/tcg/translate-all.c
37
-}
87
+++ b/accel/tcg/translate-all.c
38
-
88
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
39
-void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a)
89
qemu_spin_unlock(&tb->jmp_lock);
40
-{
90
41
- tcg_gen_dupi_vec(MO_32, r, a);
91
/* remove the TB from the hash list */
42
-}
92
- phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
43
-
93
+ phys_pc = tb->page_addr[0];
44
-void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a)
94
h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
45
-{
95
tb->trace_vcpu_dstate);
46
- tcg_gen_dupi_vec(MO_16, r, a);
96
if (!qht_remove(&tb_ctx.htable, tb, h)) {
47
-}
97
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
48
-
98
* we can only insert TBs that are fully initialized.
49
-void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
99
*/
50
-{
100
page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
51
- tcg_gen_dupi_vec(MO_8, r, a);
101
- tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
52
-}
102
+ tb_page_add(p, tb, 0, phys_pc);
53
-
103
if (p2) {
54
void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
104
tb_page_add(p2, tb, 1, phys_page2);
55
{
105
} else {
56
TCGTemp *rt = tcgv_vec_temp(r);
106
@@ -XXX,XX +XXX,XX @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
107
if (n == 0) {
108
/* NOTE: tb_end may be after the end of the page, but
109
it is not a problem */
110
- tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
111
+ tb_start = tb->page_addr[0];
112
tb_end = tb_start + tb->size;
113
} else {
114
tb_start = tb->page_addr[1];
115
- tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
116
+ tb_end = tb_start + ((tb->page_addr[0] + tb->size)
117
+ & ~TARGET_PAGE_MASK);
118
}
119
if (!(tb_end <= start || tb_start >= end)) {
120
#ifdef TARGET_HAS_PRECISE_SMC
57
--
121
--
58
2.25.1
122
2.34.1
59
123
60
124
diff view generated by jsdifflib
1
This function has two users, who use it incompatibly.
2
In tlb_flush_page_by_mmuidx_async_0, when flushing a
3
single page, we need to flush exactly two pages.
4
In tlb_flush_range_by_mmuidx_async_0, when flushing a
5
range of pages, we need to flush N+1 pages.
6
7
This avoids double-flushing of jmp cache pages in a range.
8
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
11
---
3
include/tcg/tcg-op.h | 13 +--
12
accel/tcg/cputlb.c | 25 ++++++++++++++-----------
4
tcg/tcg-op.c | 227 ++++++++++++++++++++-----------------------
13
1 file changed, 14 insertions(+), 11 deletions(-)
5
2 files changed, 109 insertions(+), 131 deletions(-)
6
14
7
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
15
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
8
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
9
--- a/include/tcg/tcg-op.h
17
--- a/accel/tcg/cputlb.c
10
+++ b/include/tcg/tcg-op.h
18
+++ b/accel/tcg/cputlb.c
11
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mb(TCGBar);
19
@@ -XXX,XX +XXX,XX @@ static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
12
13
/* 32 bit ops */
14
15
+void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg);
16
void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
17
void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2);
18
void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
19
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg)
20
}
20
}
21
}
21
}
22
22
23
-static inline void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg)
23
-static void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
24
-{
24
-{
25
- tcg_gen_op2i_i32(INDEX_op_movi_i32, ret, arg);
25
- /* Discard jump cache entries for any tb which might potentially
26
- overlap the flushed page. */
27
- tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
28
- tb_jmp_cache_clear_page(cpu, addr);
26
-}
29
-}
27
-
30
-
28
static inline void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2,
31
/**
29
tcg_target_long offset)
32
* tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
30
{
33
* @desc: The CPUTLBDesc portion of the TLB
31
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg)
34
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
32
35
}
33
/* 64 bit ops */
36
qemu_spin_unlock(&env_tlb(env)->c.lock);
34
37
35
+void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg);
38
- tb_flush_jmp_cache(cpu, addr);
36
void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
39
+ /*
37
void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2);
40
+ * Discard jump cache entries for any tb which might potentially
38
void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
41
+ * overlap the flushed page, which includes the previous.
39
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
42
+ */
43
+ tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
44
+ tb_jmp_cache_clear_page(cpu, addr);
45
}
46
47
/**
48
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
49
return;
50
}
51
52
- for (target_ulong i = 0; i < d.len; i += TARGET_PAGE_SIZE) {
53
- tb_flush_jmp_cache(cpu, d.addr + i);
54
+ /*
55
+ * Discard jump cache entries for any tb which might potentially
56
+ * overlap the flushed pages, which includes the previous.
57
+ */
58
+ d.addr -= TARGET_PAGE_SIZE;
59
+ for (target_ulong i = 0, n = d.len / TARGET_PAGE_SIZE + 1; i < n; i++) {
60
+ tb_jmp_cache_clear_page(cpu, d.addr);
61
+ d.addr += TARGET_PAGE_SIZE;
40
}
62
}
41
}
63
}
42
64
43
-static inline void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
44
-{
45
- tcg_gen_op2i_i64(INDEX_op_movi_i64, ret, arg);
46
-}
47
-
48
static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2,
49
tcg_target_long offset)
50
{
51
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
52
53
void tcg_gen_discard_i64(TCGv_i64 arg);
54
void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg);
55
-void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg);
56
void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
57
void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
58
void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
59
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/tcg/tcg-op.c
62
+++ b/tcg/tcg-op.c
63
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mb(TCGBar mb_type)
64
65
/* 32 bit ops */
66
67
+void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg)
68
+{
69
+ tcg_gen_mov_i32(ret, tcg_constant_i32(arg));
70
+}
71
+
72
void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
73
{
74
/* some cases can be optimized here */
75
if (arg2 == 0) {
76
tcg_gen_mov_i32(ret, arg1);
77
} else {
78
- TCGv_i32 t0 = tcg_const_i32(arg2);
79
- tcg_gen_add_i32(ret, arg1, t0);
80
- tcg_temp_free_i32(t0);
81
+ tcg_gen_add_i32(ret, arg1, tcg_constant_i32(arg2));
82
}
83
}
84
85
@@ -XXX,XX +XXX,XX @@ void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2)
86
/* Don't recurse with tcg_gen_neg_i32. */
87
tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg2);
88
} else {
89
- TCGv_i32 t0 = tcg_const_i32(arg1);
90
- tcg_gen_sub_i32(ret, t0, arg2);
91
- tcg_temp_free_i32(t0);
92
+ tcg_gen_sub_i32(ret, tcg_constant_i32(arg1), arg2);
93
}
94
}
95
96
@@ -XXX,XX +XXX,XX @@ void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
97
if (arg2 == 0) {
98
tcg_gen_mov_i32(ret, arg1);
99
} else {
100
- TCGv_i32 t0 = tcg_const_i32(arg2);
101
- tcg_gen_sub_i32(ret, arg1, t0);
102
- tcg_temp_free_i32(t0);
103
+ tcg_gen_sub_i32(ret, arg1, tcg_constant_i32(arg2));
104
}
105
}
106
107
void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
108
{
109
- TCGv_i32 t0;
110
/* Some cases can be optimized here. */
111
switch (arg2) {
112
case 0:
113
@@ -XXX,XX +XXX,XX @@ void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
114
}
115
break;
116
}
117
- t0 = tcg_const_i32(arg2);
118
- tcg_gen_and_i32(ret, arg1, t0);
119
- tcg_temp_free_i32(t0);
120
+
121
+ tcg_gen_and_i32(ret, arg1, tcg_constant_i32(arg2));
122
}
123
124
void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
125
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
126
} else if (arg2 == 0) {
127
tcg_gen_mov_i32(ret, arg1);
128
} else {
129
- TCGv_i32 t0 = tcg_const_i32(arg2);
130
- tcg_gen_or_i32(ret, arg1, t0);
131
- tcg_temp_free_i32(t0);
132
+ tcg_gen_or_i32(ret, arg1, tcg_constant_i32(arg2));
133
}
134
}
135
136
@@ -XXX,XX +XXX,XX @@ void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
137
/* Don't recurse with tcg_gen_not_i32. */
138
tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1);
139
} else {
140
- TCGv_i32 t0 = tcg_const_i32(arg2);
141
- tcg_gen_xor_i32(ret, arg1, t0);
142
- tcg_temp_free_i32(t0);
143
+ tcg_gen_xor_i32(ret, arg1, tcg_constant_i32(arg2));
144
}
145
}
146
147
@@ -XXX,XX +XXX,XX @@ void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
148
if (arg2 == 0) {
149
tcg_gen_mov_i32(ret, arg1);
150
} else {
151
- TCGv_i32 t0 = tcg_const_i32(arg2);
152
- tcg_gen_shl_i32(ret, arg1, t0);
153
- tcg_temp_free_i32(t0);
154
+ tcg_gen_shl_i32(ret, arg1, tcg_constant_i32(arg2));
155
}
156
}
157
158
@@ -XXX,XX +XXX,XX @@ void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
159
if (arg2 == 0) {
160
tcg_gen_mov_i32(ret, arg1);
161
} else {
162
- TCGv_i32 t0 = tcg_const_i32(arg2);
163
- tcg_gen_shr_i32(ret, arg1, t0);
164
- tcg_temp_free_i32(t0);
165
+ tcg_gen_shr_i32(ret, arg1, tcg_constant_i32(arg2));
166
}
167
}
168
169
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
170
if (arg2 == 0) {
171
tcg_gen_mov_i32(ret, arg1);
172
} else {
173
- TCGv_i32 t0 = tcg_const_i32(arg2);
174
- tcg_gen_sar_i32(ret, arg1, t0);
175
- tcg_temp_free_i32(t0);
176
+ tcg_gen_sar_i32(ret, arg1, tcg_constant_i32(arg2));
177
}
178
}
179
180
@@ -XXX,XX +XXX,XX @@ void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *l)
181
if (cond == TCG_COND_ALWAYS) {
182
tcg_gen_br(l);
183
} else if (cond != TCG_COND_NEVER) {
184
- TCGv_i32 t0 = tcg_const_i32(arg2);
185
- tcg_gen_brcond_i32(cond, arg1, t0, l);
186
- tcg_temp_free_i32(t0);
187
+ tcg_gen_brcond_i32(cond, arg1, tcg_constant_i32(arg2), l);
188
}
189
}
190
191
@@ -XXX,XX +XXX,XX @@ void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
192
void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
193
TCGv_i32 arg1, int32_t arg2)
194
{
195
- TCGv_i32 t0 = tcg_const_i32(arg2);
196
- tcg_gen_setcond_i32(cond, ret, arg1, t0);
197
- tcg_temp_free_i32(t0);
198
+ tcg_gen_setcond_i32(cond, ret, arg1, tcg_constant_i32(arg2));
199
}
200
201
void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
202
@@ -XXX,XX +XXX,XX @@ void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
203
} else if (is_power_of_2(arg2)) {
204
tcg_gen_shli_i32(ret, arg1, ctz32(arg2));
205
} else {
206
- TCGv_i32 t0 = tcg_const_i32(arg2);
207
- tcg_gen_mul_i32(ret, arg1, t0);
208
- tcg_temp_free_i32(t0);
209
+ tcg_gen_mul_i32(ret, arg1, tcg_constant_i32(arg2));
210
}
211
}
212
213
@@ -XXX,XX +XXX,XX @@ void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
214
215
void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
216
{
217
- TCGv_i32 t = tcg_const_i32(arg2);
218
- tcg_gen_clz_i32(ret, arg1, t);
219
- tcg_temp_free_i32(t);
220
+ tcg_gen_clz_i32(ret, arg1, tcg_constant_i32(arg2));
221
}
222
223
void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
224
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
225
tcg_gen_clzi_i32(t, t, 32);
226
tcg_gen_xori_i32(t, t, 31);
227
}
228
- z = tcg_const_i32(0);
229
+ z = tcg_constant_i32(0);
230
tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t);
231
tcg_temp_free_i32(t);
232
- tcg_temp_free_i32(z);
233
} else {
234
gen_helper_ctz_i32(ret, arg1, arg2);
235
}
236
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
237
tcg_gen_ctpop_i32(ret, t);
238
tcg_temp_free_i32(t);
239
} else {
240
- TCGv_i32 t = tcg_const_i32(arg2);
241
- tcg_gen_ctz_i32(ret, arg1, t);
242
- tcg_temp_free_i32(t);
243
+ tcg_gen_ctz_i32(ret, arg1, tcg_constant_i32(arg2));
244
}
245
}
246
247
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
248
if (arg2 == 0) {
249
tcg_gen_mov_i32(ret, arg1);
250
} else if (TCG_TARGET_HAS_rot_i32) {
251
- TCGv_i32 t0 = tcg_const_i32(arg2);
252
- tcg_gen_rotl_i32(ret, arg1, t0);
253
- tcg_temp_free_i32(t0);
254
+ tcg_gen_rotl_i32(ret, arg1, tcg_constant_i32(arg2));
255
} else {
256
TCGv_i32 t0, t1;
257
t0 = tcg_temp_new_i32();
258
@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
259
tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
260
} else if (TCG_TARGET_HAS_deposit_i32
261
&& TCG_TARGET_deposit_i32_valid(ofs, len)) {
262
- TCGv_i32 zero = tcg_const_i32(0);
263
+ TCGv_i32 zero = tcg_constant_i32(0);
264
tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len);
265
- tcg_temp_free_i32(zero);
266
} else {
267
/* To help two-operand hosts we prefer to zero-extend first,
268
which allows ARG to stay live. */
269
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
270
} else {
271
TCGv_i32 t0 = tcg_temp_new_i32();
272
TCGv_i32 t1 = tcg_temp_new_i32();
273
- TCGv_i32 t2 = tcg_const_i32(0x00ff00ff);
274
+ TCGv_i32 t2 = tcg_constant_i32(0x00ff00ff);
275
276
/* arg = abcd */
277
tcg_gen_shri_i32(t0, arg, 8); /* t0 = .abc */
278
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
279
280
tcg_temp_free_i32(t0);
281
tcg_temp_free_i32(t1);
282
- tcg_temp_free_i32(t2);
283
}
284
}
285
286
@@ -XXX,XX +XXX,XX @@ void tcg_gen_discard_i64(TCGv_i64 arg)
287
288
void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
289
{
290
- tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
291
- tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
292
+ TCGTemp *ts = tcgv_i64_temp(arg);
293
+
294
+ /* Canonicalize TCGv_i64 TEMP_CONST into TCGv_i32 TEMP_CONST. */
295
+ if (ts->kind == TEMP_CONST) {
296
+ tcg_gen_movi_i64(ret, ts->val);
297
+ } else {
298
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
299
+ tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
300
+ }
301
}
302
303
void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
304
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
305
tcg_temp_free_i64(t0);
306
tcg_temp_free_i32(t1);
307
}
308
+
309
+#else
310
+
311
+void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
312
+{
313
+ tcg_gen_mov_i64(ret, tcg_constant_i64(arg));
314
+}
315
+
316
#endif /* TCG_TARGET_REG_SIZE == 32 */
317
318
void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
319
@@ -XXX,XX +XXX,XX @@ void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
320
/* some cases can be optimized here */
321
if (arg2 == 0) {
322
tcg_gen_mov_i64(ret, arg1);
323
+ } else if (TCG_TARGET_REG_BITS == 64) {
324
+ tcg_gen_add_i64(ret, arg1, tcg_constant_i64(arg2));
325
} else {
326
- TCGv_i64 t0 = tcg_const_i64(arg2);
327
- tcg_gen_add_i64(ret, arg1, t0);
328
- tcg_temp_free_i64(t0);
329
+ tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
330
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
331
+ tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
332
}
333
}
334
335
@@ -XXX,XX +XXX,XX @@ void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2)
336
if (arg1 == 0 && TCG_TARGET_HAS_neg_i64) {
337
/* Don't recurse with tcg_gen_neg_i64. */
338
tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg2);
339
+ } else if (TCG_TARGET_REG_BITS == 64) {
340
+ tcg_gen_sub_i64(ret, tcg_constant_i64(arg1), arg2);
341
} else {
342
- TCGv_i64 t0 = tcg_const_i64(arg1);
343
- tcg_gen_sub_i64(ret, t0, arg2);
344
- tcg_temp_free_i64(t0);
345
+ tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
346
+ tcg_constant_i32(arg1), tcg_constant_i32(arg1 >> 32),
347
+ TCGV_LOW(arg2), TCGV_HIGH(arg2));
348
}
349
}
350
351
@@ -XXX,XX +XXX,XX @@ void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
352
/* some cases can be optimized here */
353
if (arg2 == 0) {
354
tcg_gen_mov_i64(ret, arg1);
355
+ } else if (TCG_TARGET_REG_BITS == 64) {
356
+ tcg_gen_sub_i64(ret, arg1, tcg_constant_i64(arg2));
357
} else {
358
- TCGv_i64 t0 = tcg_const_i64(arg2);
359
- tcg_gen_sub_i64(ret, arg1, t0);
360
- tcg_temp_free_i64(t0);
361
+ tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
362
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
363
+ tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
364
}
365
}
366
367
void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
368
{
369
- TCGv_i64 t0;
370
-
371
if (TCG_TARGET_REG_BITS == 32) {
372
tcg_gen_andi_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
373
tcg_gen_andi_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
374
@@ -XXX,XX +XXX,XX @@ void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
375
}
376
break;
377
}
378
- t0 = tcg_const_i64(arg2);
379
- tcg_gen_and_i64(ret, arg1, t0);
380
- tcg_temp_free_i64(t0);
381
+
382
+ tcg_gen_and_i64(ret, arg1, tcg_constant_i64(arg2));
383
}
384
385
void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
386
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
387
} else if (arg2 == 0) {
388
tcg_gen_mov_i64(ret, arg1);
389
} else {
390
- TCGv_i64 t0 = tcg_const_i64(arg2);
391
- tcg_gen_or_i64(ret, arg1, t0);
392
- tcg_temp_free_i64(t0);
393
+ tcg_gen_or_i64(ret, arg1, tcg_constant_i64(arg2));
394
}
395
}
396
397
@@ -XXX,XX +XXX,XX @@ void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
398
/* Don't recurse with tcg_gen_not_i64. */
399
tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1);
400
} else {
401
- TCGv_i64 t0 = tcg_const_i64(arg2);
402
- tcg_gen_xor_i64(ret, arg1, t0);
403
- tcg_temp_free_i64(t0);
404
+ tcg_gen_xor_i64(ret, arg1, tcg_constant_i64(arg2));
405
}
406
}
407
408
@@ -XXX,XX +XXX,XX @@ void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
409
} else if (arg2 == 0) {
410
tcg_gen_mov_i64(ret, arg1);
411
} else {
412
- TCGv_i64 t0 = tcg_const_i64(arg2);
413
- tcg_gen_shl_i64(ret, arg1, t0);
414
- tcg_temp_free_i64(t0);
415
+ tcg_gen_shl_i64(ret, arg1, tcg_constant_i64(arg2));
416
}
417
}
418
419
@@ -XXX,XX +XXX,XX @@ void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
420
} else if (arg2 == 0) {
421
tcg_gen_mov_i64(ret, arg1);
422
} else {
423
- TCGv_i64 t0 = tcg_const_i64(arg2);
424
- tcg_gen_shr_i64(ret, arg1, t0);
425
- tcg_temp_free_i64(t0);
426
+ tcg_gen_shr_i64(ret, arg1, tcg_constant_i64(arg2));
427
}
428
}
429
430
@@ -XXX,XX +XXX,XX @@ void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
431
} else if (arg2 == 0) {
432
tcg_gen_mov_i64(ret, arg1);
433
} else {
434
- TCGv_i64 t0 = tcg_const_i64(arg2);
435
- tcg_gen_sar_i64(ret, arg1, t0);
436
- tcg_temp_free_i64(t0);
437
+ tcg_gen_sar_i64(ret, arg1, tcg_constant_i64(arg2));
438
}
439
}
440
441
@@ -XXX,XX +XXX,XX @@ void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l)
442
443
void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l)
444
{
445
- if (cond == TCG_COND_ALWAYS) {
446
+ if (TCG_TARGET_REG_BITS == 64) {
447
+ tcg_gen_brcond_i64(cond, arg1, tcg_constant_i64(arg2), l);
448
+ } else if (cond == TCG_COND_ALWAYS) {
449
tcg_gen_br(l);
450
} else if (cond != TCG_COND_NEVER) {
451
- TCGv_i64 t0 = tcg_const_i64(arg2);
452
- tcg_gen_brcond_i64(cond, arg1, t0, l);
453
- tcg_temp_free_i64(t0);
454
+ l->refs++;
455
+ tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
456
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
457
+ tcg_constant_i32(arg2),
458
+ tcg_constant_i32(arg2 >> 32),
459
+ cond, label_arg(l));
460
}
461
}
462
463
@@ -XXX,XX +XXX,XX @@ void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
464
void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
465
TCGv_i64 arg1, int64_t arg2)
466
{
467
- TCGv_i64 t0 = tcg_const_i64(arg2);
468
- tcg_gen_setcond_i64(cond, ret, arg1, t0);
469
- tcg_temp_free_i64(t0);
470
+ if (TCG_TARGET_REG_BITS == 64) {
471
+ tcg_gen_setcond_i64(cond, ret, arg1, tcg_constant_i64(arg2));
472
+ } else if (cond == TCG_COND_ALWAYS) {
473
+ tcg_gen_movi_i64(ret, 1);
474
+ } else if (cond == TCG_COND_NEVER) {
475
+ tcg_gen_movi_i64(ret, 0);
476
+ } else {
477
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
478
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
479
+ tcg_constant_i32(arg2),
480
+ tcg_constant_i32(arg2 >> 32), cond);
481
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
482
+ }
483
}
484
485
void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
486
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
487
} else {
488
TCGv_i64 t0 = tcg_temp_new_i64();
489
TCGv_i64 t1 = tcg_temp_new_i64();
490
- TCGv_i64 t2 = tcg_const_i64(0x00ff00ff);
491
+ TCGv_i64 t2 = tcg_constant_i64(0x00ff00ff);
492
493
/* arg = ....abcd */
494
tcg_gen_shri_i64(t0, arg, 8); /* t0 = .....abc */
495
@@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
496
497
tcg_temp_free_i64(t0);
498
tcg_temp_free_i64(t1);
499
- tcg_temp_free_i64(t2);
500
}
501
}
502
503
@@ -XXX,XX +XXX,XX @@ void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
504
if (TCG_TARGET_REG_BITS == 32
505
&& TCG_TARGET_HAS_clz_i32
506
&& arg2 <= 0xffffffffu) {
507
- TCGv_i32 t = tcg_const_i32((uint32_t)arg2 - 32);
508
- tcg_gen_clz_i32(t, TCGV_LOW(arg1), t);
509
+ TCGv_i32 t = tcg_temp_new_i32();
510
+ tcg_gen_clzi_i32(t, TCGV_LOW(arg1), arg2 - 32);
511
tcg_gen_addi_i32(t, t, 32);
512
tcg_gen_clz_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), t);
513
tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
514
tcg_temp_free_i32(t);
515
} else {
516
- TCGv_i64 t = tcg_const_i64(arg2);
517
- tcg_gen_clz_i64(ret, arg1, t);
518
- tcg_temp_free_i64(t);
519
+ TCGv_i64 t0 = tcg_const_i64(arg2);
520
+ tcg_gen_clz_i64(ret, arg1, t0);
521
+ tcg_temp_free_i64(t0);
522
}
523
}
524
525
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
526
tcg_gen_clzi_i64(t, t, 64);
527
tcg_gen_xori_i64(t, t, 63);
528
}
529
- z = tcg_const_i64(0);
530
+ z = tcg_constant_i64(0);
531
tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t);
532
tcg_temp_free_i64(t);
533
tcg_temp_free_i64(z);
534
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
535
if (TCG_TARGET_REG_BITS == 32
536
&& TCG_TARGET_HAS_ctz_i32
537
&& arg2 <= 0xffffffffu) {
538
- TCGv_i32 t32 = tcg_const_i32((uint32_t)arg2 - 32);
539
- tcg_gen_ctz_i32(t32, TCGV_HIGH(arg1), t32);
540
+ TCGv_i32 t32 = tcg_temp_new_i32();
541
+ tcg_gen_ctzi_i32(t32, TCGV_HIGH(arg1), arg2 - 32);
542
tcg_gen_addi_i32(t32, t32, 32);
543
tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32);
544
tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
545
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
546
tcg_gen_ctpop_i64(ret, t);
547
tcg_temp_free_i64(t);
548
} else {
549
- TCGv_i64 t64 = tcg_const_i64(arg2);
550
- tcg_gen_ctz_i64(ret, arg1, t64);
551
- tcg_temp_free_i64(t64);
552
+ TCGv_i64 t0 = tcg_const_i64(arg2);
553
+ tcg_gen_ctz_i64(ret, arg1, t0);
554
+ tcg_temp_free_i64(t0);
555
}
556
}
557
558
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
559
if (arg2 == 0) {
560
tcg_gen_mov_i64(ret, arg1);
561
} else if (TCG_TARGET_HAS_rot_i64) {
562
- TCGv_i64 t0 = tcg_const_i64(arg2);
563
- tcg_gen_rotl_i64(ret, arg1, t0);
564
- tcg_temp_free_i64(t0);
565
+ tcg_gen_rotl_i64(ret, arg1, tcg_constant_i64(arg2));
566
} else {
567
TCGv_i64 t0, t1;
568
t0 = tcg_temp_new_i64();
569
@@ -XXX,XX +XXX,XX @@ void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
570
tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
571
} else if (TCG_TARGET_HAS_deposit_i64
572
&& TCG_TARGET_deposit_i64_valid(ofs, len)) {
573
- TCGv_i64 zero = tcg_const_i64(0);
574
+ TCGv_i64 zero = tcg_constant_i64(0);
575
tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len);
576
- tcg_temp_free_i64(zero);
577
} else {
578
if (TCG_TARGET_REG_BITS == 32) {
579
if (ofs >= 32) {
580
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
581
582
#ifdef CONFIG_SOFTMMU
583
{
584
- TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
585
- gen(retv, cpu_env, addr, cmpv, newv, oi);
586
- tcg_temp_free_i32(oi);
587
+ TCGMemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
588
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
589
}
590
#else
591
gen(retv, cpu_env, addr, cmpv, newv);
592
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
593
594
#ifdef CONFIG_SOFTMMU
595
{
596
- TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop, idx));
597
- gen(retv, cpu_env, addr, cmpv, newv, oi);
598
- tcg_temp_free_i32(oi);
599
+ TCGMemOpIdx oi = make_memop_idx(memop, idx);
600
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
601
}
602
#else
603
gen(retv, cpu_env, addr, cmpv, newv);
604
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
605
606
#ifdef CONFIG_SOFTMMU
607
{
608
- TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
609
- gen(ret, cpu_env, addr, val, oi);
610
- tcg_temp_free_i32(oi);
611
+ TCGMemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
612
+ gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
613
}
614
#else
615
gen(ret, cpu_env, addr, val);
616
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
617
618
#ifdef CONFIG_SOFTMMU
619
{
620
- TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
621
- gen(ret, cpu_env, addr, val, oi);
622
- tcg_temp_free_i32(oi);
623
+ TCGMemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
624
+ gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
625
}
626
#else
627
gen(ret, cpu_env, addr, val);
628
--
65
--
629
2.25.1
66
2.34.1
630
67
631
68
diff view generated by jsdifflib
1
In most, but not all, places that we check for TEMP_FIXED,
1
Wrap the bare TranslationBlock pointer into a structure.
2
we are really testing that we do not modify the temporary.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
include/tcg/tcg.h | 5 +++++
7
accel/tcg/tb-hash.h | 1 +
9
tcg/tcg.c | 21 ++++++++++-----------
8
accel/tcg/tb-jmp-cache.h | 24 ++++++++++++++++++++++++
10
2 files changed, 15 insertions(+), 11 deletions(-)
9
include/exec/cpu-common.h | 1 +
10
include/hw/core/cpu.h | 15 +--------------
11
include/qemu/typedefs.h | 1 +
12
accel/stubs/tcg-stub.c | 4 ++++
13
accel/tcg/cpu-exec.c | 10 +++++++---
14
accel/tcg/cputlb.c | 9 +++++----
15
accel/tcg/translate-all.c | 28 +++++++++++++++++++++++++---
16
hw/core/cpu-common.c | 3 +--
17
plugins/core.c | 2 +-
18
trace/control-target.c | 2 +-
19
12 files changed, 72 insertions(+), 28 deletions(-)
20
create mode 100644 accel/tcg/tb-jmp-cache.h
11
21
12
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
22
diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
13
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg.h
24
--- a/accel/tcg/tb-hash.h
15
+++ b/include/tcg/tcg.h
25
+++ b/accel/tcg/tb-hash.h
16
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
26
@@ -XXX,XX +XXX,XX @@
17
target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
27
#include "exec/cpu-defs.h"
18
};
28
#include "exec/exec-all.h"
19
29
#include "qemu/xxhash.h"
20
+static inline bool temp_readonly(TCGTemp *ts)
30
+#include "tb-jmp-cache.h"
31
32
#ifdef CONFIG_SOFTMMU
33
34
diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h
35
new file mode 100644
36
index XXXXXXX..XXXXXXX
37
--- /dev/null
38
+++ b/accel/tcg/tb-jmp-cache.h
39
@@ -XXX,XX +XXX,XX @@
40
+/*
41
+ * The per-CPU TranslationBlock jump cache.
42
+ *
43
+ * Copyright (c) 2003 Fabrice Bellard
44
+ *
45
+ * SPDX-License-Identifier: GPL-2.0-or-later
46
+ */
47
+
48
+#ifndef ACCEL_TCG_TB_JMP_CACHE_H
49
+#define ACCEL_TCG_TB_JMP_CACHE_H
50
+
51
+#define TB_JMP_CACHE_BITS 12
52
+#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
53
+
54
+/*
55
+ * Accessed in parallel; all accesses to 'tb' must be atomic.
56
+ */
57
+struct CPUJumpCache {
58
+ struct {
59
+ TranslationBlock *tb;
60
+ } array[TB_JMP_CACHE_SIZE];
61
+};
62
+
63
+#endif /* ACCEL_TCG_TB_JMP_CACHE_H */
64
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
65
index XXXXXXX..XXXXXXX 100644
66
--- a/include/exec/cpu-common.h
67
+++ b/include/exec/cpu-common.h
68
@@ -XXX,XX +XXX,XX @@ void cpu_list_unlock(void);
69
unsigned int cpu_list_generation_id_get(void);
70
71
void tcg_flush_softmmu_tlb(CPUState *cs);
72
+void tcg_flush_jmp_cache(CPUState *cs);
73
74
void tcg_iommu_init_notifier_list(CPUState *cpu);
75
void tcg_iommu_free_notifier_list(CPUState *cpu);
76
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
77
index XXXXXXX..XXXXXXX 100644
78
--- a/include/hw/core/cpu.h
79
+++ b/include/hw/core/cpu.h
80
@@ -XXX,XX +XXX,XX @@ struct kvm_run;
81
struct hax_vcpu_state;
82
struct hvf_vcpu_state;
83
84
-#define TB_JMP_CACHE_BITS 12
85
-#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
86
-
87
/* work queue */
88
89
/* The union type allows passing of 64 bit target pointers on 32 bit
90
@@ -XXX,XX +XXX,XX @@ struct CPUState {
91
CPUArchState *env_ptr;
92
IcountDecr *icount_decr_ptr;
93
94
- /* Accessed in parallel; all accesses must be atomic */
95
- TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
96
+ CPUJumpCache *tb_jmp_cache;
97
98
struct GDBRegisterState *gdb_regs;
99
int gdb_num_regs;
100
@@ -XXX,XX +XXX,XX @@ extern CPUTailQ cpus;
101
102
extern __thread CPUState *current_cpu;
103
104
-static inline void cpu_tb_jmp_cache_clear(CPUState *cpu)
105
-{
106
- unsigned int i;
107
-
108
- for (i = 0; i < TB_JMP_CACHE_SIZE; i++) {
109
- qatomic_set(&cpu->tb_jmp_cache[i], NULL);
110
- }
111
-}
112
-
113
/**
114
* qemu_tcg_mttcg_enabled:
115
* Check whether we are running MultiThread TCG or not.
116
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
117
index XXXXXXX..XXXXXXX 100644
118
--- a/include/qemu/typedefs.h
119
+++ b/include/qemu/typedefs.h
120
@@ -XXX,XX +XXX,XX @@ typedef struct CoMutex CoMutex;
121
typedef struct ConfidentialGuestSupport ConfidentialGuestSupport;
122
typedef struct CPUAddressSpace CPUAddressSpace;
123
typedef struct CPUArchState CPUArchState;
124
+typedef struct CPUJumpCache CPUJumpCache;
125
typedef struct CPUState CPUState;
126
typedef struct CPUTLBEntryFull CPUTLBEntryFull;
127
typedef struct DeviceListener DeviceListener;
128
diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c
129
index XXXXXXX..XXXXXXX 100644
130
--- a/accel/stubs/tcg-stub.c
131
+++ b/accel/stubs/tcg-stub.c
132
@@ -XXX,XX +XXX,XX @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
133
{
134
}
135
136
+void tcg_flush_jmp_cache(CPUState *cpu)
21
+{
137
+{
22
+ return ts->kind == TEMP_FIXED;
23
+}
138
+}
24
+
139
+
25
extern TCGContext tcg_init_ctx;
140
int probe_access_flags(CPUArchState *env, target_ulong addr,
26
extern __thread TCGContext *tcg_ctx;
141
MMUAccessType access_type, int mmu_idx,
27
extern const void *tcg_code_gen_epilogue;
142
bool nonfault, void **phost, uintptr_t retaddr)
28
diff --git a/tcg/tcg.c b/tcg/tcg.c
143
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
29
index XXXXXXX..XXXXXXX 100644
144
index XXXXXXX..XXXXXXX 100644
30
--- a/tcg/tcg.c
145
--- a/accel/tcg/cpu-exec.c
31
+++ b/tcg/tcg.c
146
+++ b/accel/tcg/cpu-exec.c
32
@@ -XXX,XX +XXX,XX @@ static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
147
@@ -XXX,XX +XXX,XX @@
33
mark it free; otherwise mark it dead. */
148
#include "sysemu/replay.h"
34
static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
149
#include "sysemu/tcg.h"
35
{
150
#include "exec/helper-proto.h"
36
- if (ts->kind == TEMP_FIXED) {
151
+#include "tb-jmp-cache.h"
37
+ if (temp_readonly(ts)) {
152
#include "tb-hash.h"
153
#include "tb-context.h"
154
#include "internal.h"
155
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
156
tcg_debug_assert(!(cflags & CF_INVALID));
157
158
hash = tb_jmp_cache_hash_func(pc);
159
- tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
160
+ tb = qatomic_rcu_read(&cpu->tb_jmp_cache->array[hash].tb);
161
162
if (likely(tb &&
163
tb->pc == pc &&
164
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
165
if (tb == NULL) {
166
return NULL;
167
}
168
- qatomic_set(&cpu->tb_jmp_cache[hash], tb);
169
+ qatomic_set(&cpu->tb_jmp_cache->array[hash].tb, tb);
170
return tb;
171
}
172
173
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
174
175
tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
176
if (tb == NULL) {
177
+ uint32_t h;
178
+
179
mmap_lock();
180
tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
181
mmap_unlock();
182
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
183
* We add the TB in the virtual pc hash table
184
* for the fast lookup
185
*/
186
- qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
187
+ h = tb_jmp_cache_hash_func(pc);
188
+ qatomic_set(&cpu->tb_jmp_cache->array[h].tb, tb);
189
}
190
191
#ifndef CONFIG_USER_ONLY
192
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
193
index XXXXXXX..XXXXXXX 100644
194
--- a/accel/tcg/cputlb.c
195
+++ b/accel/tcg/cputlb.c
196
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
197
198
static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
199
{
200
- unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
201
+ int i, i0 = tb_jmp_cache_hash_page(page_addr);
202
+ CPUJumpCache *jc = cpu->tb_jmp_cache;
203
204
for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
205
- qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
206
+ qatomic_set(&jc->array[i0 + i].tb, NULL);
207
}
208
}
209
210
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
211
212
qemu_spin_unlock(&env_tlb(env)->c.lock);
213
214
- cpu_tb_jmp_cache_clear(cpu);
215
+ tcg_flush_jmp_cache(cpu);
216
217
if (to_clean == ALL_MMUIDX_BITS) {
218
qatomic_set(&env_tlb(env)->c.full_flush_count,
219
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
220
* longer to clear each entry individually than it will to clear it all.
221
*/
222
if (d.len >= (TARGET_PAGE_SIZE * TB_JMP_CACHE_SIZE)) {
223
- cpu_tb_jmp_cache_clear(cpu);
224
+ tcg_flush_jmp_cache(cpu);
38
return;
225
return;
39
}
226
}
40
if (ts->val_type == TEMP_VAL_REG) {
227
41
@@ -XXX,XX +XXX,XX @@ static inline void temp_dead(TCGContext *s, TCGTemp *ts)
228
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
42
static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
229
index XXXXXXX..XXXXXXX 100644
43
TCGRegSet preferred_regs, int free_or_dead)
230
--- a/accel/tcg/translate-all.c
44
{
231
+++ b/accel/tcg/translate-all.c
45
- if (ts->kind == TEMP_FIXED) {
232
@@ -XXX,XX +XXX,XX @@
46
+ if (temp_readonly(ts)) {
233
#include "sysemu/tcg.h"
47
return;
234
#include "qapi/error.h"
48
}
235
#include "hw/core/tcg-cpu-ops.h"
49
if (!ts->mem_coherent) {
236
+#include "tb-jmp-cache.h"
50
@@ -XXX,XX +XXX,XX @@ static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
237
#include "tb-hash.h"
51
{
238
#include "tb-context.h"
52
/* The liveness analysis already ensures that globals are back
239
#include "internal.h"
53
in memory. Keep an tcg_debug_assert for safety. */
240
@@ -XXX,XX +XXX,XX @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
54
- tcg_debug_assert(ts->val_type == TEMP_VAL_MEM
241
}
55
- || ts->kind == TEMP_FIXED);
242
56
+ tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
243
CPU_FOREACH(cpu) {
57
}
244
- cpu_tb_jmp_cache_clear(cpu);
58
245
+ tcg_flush_jmp_cache(cpu);
59
/* save globals to their canonical location and assume they can be
246
}
60
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
247
61
TCGRegSet preferred_regs)
248
qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
62
{
249
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
63
/* ENV should not be modified. */
250
/* remove the TB from the hash list */
64
- tcg_debug_assert(ots->kind != TEMP_FIXED);
251
h = tb_jmp_cache_hash_func(tb->pc);
65
+ tcg_debug_assert(!temp_readonly(ots));
252
CPU_FOREACH(cpu) {
66
253
- if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) {
67
/* The movi is not explicitly generated here. */
254
- qatomic_set(&cpu->tb_jmp_cache[h], NULL);
68
if (ots->val_type == TEMP_VAL_REG) {
255
+ CPUJumpCache *jc = cpu->tb_jmp_cache;
69
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
256
+ if (qatomic_read(&jc->array[h].tb) == tb) {
70
ts = arg_temp(op->args[1]);
257
+ qatomic_set(&jc->array[h].tb, NULL);
71
258
}
72
/* ENV should not be modified. */
259
}
73
- tcg_debug_assert(ots->kind != TEMP_FIXED);
260
74
+ tcg_debug_assert(!temp_readonly(ots));
261
@@ -XXX,XX +XXX,XX @@ int page_unprotect(target_ulong address, uintptr_t pc)
75
262
}
76
/* Note that otype != itype for no-op truncation. */
263
#endif /* CONFIG_USER_ONLY */
77
otype = ots->type;
264
78
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
265
+/*
79
* Store the source register into the destination slot
266
+ * Called by generic code at e.g. cpu reset after cpu creation,
80
* and leave the destination temp as TEMP_VAL_MEM.
267
+ * therefore we must be prepared to allocate the jump cache.
81
*/
268
+ */
82
- assert(ots->kind != TEMP_FIXED);
269
+void tcg_flush_jmp_cache(CPUState *cpu)
83
+ assert(!temp_readonly(ots));
270
+{
84
if (!ts->mem_allocated) {
271
+ CPUJumpCache *jc = cpu->tb_jmp_cache;
85
temp_allocate_frame(s, ots);
272
+
86
}
273
+ if (likely(jc)) {
87
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
274
+ for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
88
its = arg_temp(op->args[1]);
275
+ qatomic_set(&jc->array[i].tb, NULL);
89
276
+ }
90
/* ENV should not be modified. */
277
+ } else {
91
- tcg_debug_assert(ots->kind != TEMP_FIXED);
278
+ /* This should happen once during realize, and thus never race. */
92
+ tcg_debug_assert(!temp_readonly(ots));
279
+ jc = g_new0(CPUJumpCache, 1);
93
280
+ jc = qatomic_xchg(&cpu->tb_jmp_cache, jc);
94
itype = its->type;
281
+ assert(jc == NULL);
95
vece = TCGOP_VECE(op);
282
+ }
96
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
283
+}
97
ts = arg_temp(arg);
284
+
98
285
/* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
99
/* ENV should not be modified. */
286
void tcg_flush_softmmu_tlb(CPUState *cs)
100
- tcg_debug_assert(ts->kind != TEMP_FIXED);
287
{
101
+ tcg_debug_assert(!temp_readonly(ts));
288
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
102
289
index XXXXXXX..XXXXXXX 100644
103
if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
290
--- a/hw/core/cpu-common.c
104
reg = new_args[arg_ct->alias_index];
291
+++ b/hw/core/cpu-common.c
105
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
292
@@ -XXX,XX +XXX,XX @@ static void cpu_common_reset(DeviceState *dev)
106
ts = arg_temp(op->args[i]);
293
cpu->cflags_next_tb = -1;
107
294
108
/* ENV should not be modified. */
295
if (tcg_enabled()) {
109
- tcg_debug_assert(ts->kind != TEMP_FIXED);
296
- cpu_tb_jmp_cache_clear(cpu);
110
+ tcg_debug_assert(!temp_readonly(ts));
297
-
111
298
+ tcg_flush_jmp_cache(cpu);
112
if (NEED_SYNC_ARG(i)) {
299
tcg_flush_softmmu_tlb(cpu);
113
temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
300
}
114
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
301
}
115
ts = arg_temp(arg);
302
diff --git a/plugins/core.c b/plugins/core.c
116
303
index XXXXXXX..XXXXXXX 100644
117
/* ENV should not be modified. */
304
--- a/plugins/core.c
118
- tcg_debug_assert(ts->kind != TEMP_FIXED);
305
+++ b/plugins/core.c
119
+ tcg_debug_assert(!temp_readonly(ts));
306
@@ -XXX,XX +XXX,XX @@ struct qemu_plugin_ctx *plugin_id_to_ctx_locked(qemu_plugin_id_t id)
120
307
static void plugin_cpu_update__async(CPUState *cpu, run_on_cpu_data data)
121
reg = tcg_target_call_oarg_regs[i];
308
{
122
tcg_debug_assert(s->reg_to_temp[reg] == NULL);
309
bitmap_copy(cpu->plugin_mask, &data.host_ulong, QEMU_PLUGIN_EV_MAX);
310
- cpu_tb_jmp_cache_clear(cpu);
311
+ tcg_flush_jmp_cache(cpu);
312
}
313
314
static void plugin_cpu_update__locked(gpointer k, gpointer v, gpointer udata)
315
diff --git a/trace/control-target.c b/trace/control-target.c
316
index XXXXXXX..XXXXXXX 100644
317
--- a/trace/control-target.c
318
+++ b/trace/control-target.c
319
@@ -XXX,XX +XXX,XX @@ static void trace_event_synchronize_vcpu_state_dynamic(
320
{
321
bitmap_copy(vcpu->trace_dstate, vcpu->trace_dstate_delayed,
322
CPU_TRACE_DSTATE_MAX_EVENTS);
323
- cpu_tb_jmp_cache_clear(vcpu);
324
+ tcg_flush_jmp_cache(vcpu);
325
}
326
327
void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
123
--
328
--
124
2.25.1
329
2.34.1
125
330
126
331
diff view generated by jsdifflib
1
This propagates the extended value of TCGTemp.val that we did before.
1
Populate this new method for all targets. Always match
2
In addition, it will be required for vector constants.
2
the result that would be given by cpu_get_tb_cpu_state,
3
as we will want these values to correspond in the logs.
3
4
5
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (target/sparc)
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
9
---
6
tcg/optimize.c | 40 +++++++++++++++++++++-------------------
10
Cc: Eduardo Habkost <eduardo@habkost.net> (supporter:Machine core)
7
1 file changed, 21 insertions(+), 19 deletions(-)
11
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> (supporter:Machine core)
12
Cc: "Philippe Mathieu-Daudé" <f4bug@amsat.org> (reviewer:Machine core)
13
Cc: Yanan Wang <wangyanan55@huawei.com> (reviewer:Machine core)
14
Cc: Michael Rolnik <mrolnik@gmail.com> (maintainer:AVR TCG CPUs)
15
Cc: "Edgar E. Iglesias" <edgar.iglesias@gmail.com> (maintainer:CRIS TCG CPUs)
16
Cc: Taylor Simpson <tsimpson@quicinc.com> (supporter:Hexagon TCG CPUs)
17
Cc: Song Gao <gaosong@loongson.cn> (maintainer:LoongArch TCG CPUs)
18
Cc: Xiaojuan Yang <yangxiaojuan@loongson.cn> (maintainer:LoongArch TCG CPUs)
19
Cc: Laurent Vivier <laurent@vivier.eu> (maintainer:M68K TCG CPUs)
20
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com> (reviewer:MIPS TCG CPUs)
21
Cc: Aleksandar Rikalo <aleksandar.rikalo@syrmia.com> (reviewer:MIPS TCG CPUs)
22
Cc: Chris Wulff <crwulff@gmail.com> (maintainer:NiosII TCG CPUs)
23
Cc: Marek Vasut <marex@denx.de> (maintainer:NiosII TCG CPUs)
24
Cc: Stafford Horne <shorne@gmail.com> (odd fixer:OpenRISC TCG CPUs)
25
Cc: Yoshinori Sato <ysato@users.sourceforge.jp> (reviewer:RENESAS RX CPUs)
26
Cc: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (maintainer:SPARC TCG CPUs)
27
Cc: Bastian Koppelmann <kbastian@mail.uni-paderborn.de> (maintainer:TriCore TCG CPUs)
28
Cc: Max Filippov <jcmvbkbc@gmail.com> (maintainer:Xtensa TCG CPUs)
29
Cc: qemu-arm@nongnu.org (open list:ARM TCG CPUs)
30
Cc: qemu-ppc@nongnu.org (open list:PowerPC TCG CPUs)
31
Cc: qemu-riscv@nongnu.org (open list:RISC-V TCG CPUs)
32
Cc: qemu-s390x@nongnu.org (open list:S390 TCG CPUs)
33
---
34
include/hw/core/cpu.h | 3 +++
35
target/alpha/cpu.c | 9 +++++++++
36
target/arm/cpu.c | 13 +++++++++++++
37
target/avr/cpu.c | 8 ++++++++
38
target/cris/cpu.c | 8 ++++++++
39
target/hexagon/cpu.c | 8 ++++++++
40
target/hppa/cpu.c | 8 ++++++++
41
target/i386/cpu.c | 9 +++++++++
42
target/loongarch/cpu.c | 9 +++++++++
43
target/m68k/cpu.c | 8 ++++++++
44
target/microblaze/cpu.c | 8 ++++++++
45
target/mips/cpu.c | 8 ++++++++
46
target/nios2/cpu.c | 9 +++++++++
47
target/openrisc/cpu.c | 8 ++++++++
48
target/ppc/cpu_init.c | 8 ++++++++
49
target/riscv/cpu.c | 13 +++++++++++++
50
target/rx/cpu.c | 8 ++++++++
51
target/s390x/cpu.c | 8 ++++++++
52
target/sh4/cpu.c | 8 ++++++++
53
target/sparc/cpu.c | 8 ++++++++
54
target/tricore/cpu.c | 9 +++++++++
55
target/xtensa/cpu.c | 8 ++++++++
56
22 files changed, 186 insertions(+)
8
57
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
58
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
10
index XXXXXXX..XXXXXXX 100644
59
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
60
--- a/include/hw/core/cpu.h
12
+++ b/tcg/optimize.c
61
+++ b/include/hw/core/cpu.h
13
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
62
@@ -XXX,XX +XXX,XX @@ struct SysemuCPUOps;
14
bool is_const;
63
* If the target behaviour here is anything other than "set
15
TCGTemp *prev_copy;
64
* the PC register to the value passed in" then the target must
16
TCGTemp *next_copy;
65
* also implement the synchronize_from_tb hook.
17
- tcg_target_ulong val;
66
+ * @get_pc: Callback for getting the Program Counter register.
18
- tcg_target_ulong mask;
67
+ * As above, with the semantics of the target architecture.
19
+ uint64_t val;
68
* @gdb_read_register: Callback for letting GDB read a register.
20
+ uint64_t mask;
69
* @gdb_write_register: Callback for letting GDB write a register.
21
} TempOptInfo;
70
* @gdb_adjust_breakpoint: Callback for adjusting the address of a
22
71
@@ -XXX,XX +XXX,XX @@ struct CPUClass {
23
static inline TempOptInfo *ts_info(TCGTemp *ts)
72
void (*dump_state)(CPUState *cpu, FILE *, int flags);
24
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
73
int64_t (*get_arch_id)(CPUState *cpu);
25
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
74
void (*set_pc)(CPUState *cpu, vaddr value);
26
}
75
+ vaddr (*get_pc)(CPUState *cpu);
27
76
int (*gdb_read_register)(CPUState *cpu, GByteArray *buf, int reg);
28
-static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
77
int (*gdb_write_register)(CPUState *cpu, uint8_t *buf, int reg);
29
+static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, uint64_t val)
78
vaddr (*gdb_adjust_breakpoint)(CPUState *cpu, vaddr addr);
30
{
79
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
31
const TCGOpDef *def;
80
index XXXXXXX..XXXXXXX 100644
32
TCGOpcode new_op;
81
--- a/target/alpha/cpu.c
33
- tcg_target_ulong mask;
82
+++ b/target/alpha/cpu.c
34
+ uint64_t mask;
83
@@ -XXX,XX +XXX,XX @@ static void alpha_cpu_set_pc(CPUState *cs, vaddr value)
35
TempOptInfo *di = arg_info(dst);
84
cpu->env.pc = value;
36
85
}
37
def = &tcg_op_defs[op->opc];
86
38
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
87
+static vaddr alpha_cpu_get_pc(CPUState *cs)
39
const TCGOpDef *def;
88
+{
40
TempOptInfo *di;
89
+ AlphaCPU *cpu = ALPHA_CPU(cs);
41
TempOptInfo *si;
90
+
42
- tcg_target_ulong mask;
91
+ return cpu->env.pc;
43
+ uint64_t mask;
92
+}
44
TCGOpcode new_op;
93
+
45
94
+
46
if (ts_are_copies(dst_ts, src_ts)) {
95
static bool alpha_cpu_has_work(CPUState *cs)
47
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
96
{
97
/* Here we are checking to see if the CPU should wake up from HALT.
98
@@ -XXX,XX +XXX,XX @@ static void alpha_cpu_class_init(ObjectClass *oc, void *data)
99
cc->has_work = alpha_cpu_has_work;
100
cc->dump_state = alpha_cpu_dump_state;
101
cc->set_pc = alpha_cpu_set_pc;
102
+ cc->get_pc = alpha_cpu_get_pc;
103
cc->gdb_read_register = alpha_cpu_gdb_read_register;
104
cc->gdb_write_register = alpha_cpu_gdb_write_register;
105
#ifndef CONFIG_USER_ONLY
106
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
107
index XXXXXXX..XXXXXXX 100644
108
--- a/target/arm/cpu.c
109
+++ b/target/arm/cpu.c
110
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_set_pc(CPUState *cs, vaddr value)
48
}
111
}
49
}
112
}
50
113
51
-static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
114
+static vaddr arm_cpu_get_pc(CPUState *cs)
52
+static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
115
+{
53
{
116
+ ARMCPU *cpu = ARM_CPU(cs);
54
uint64_t l64, h64;
117
+ CPUARMState *env = &cpu->env;
55
118
+
56
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
119
+ if (is_a64(env)) {
120
+ return env->pc;
121
+ } else {
122
+ return env->regs[15];
123
+ }
124
+}
125
+
126
#ifdef CONFIG_TCG
127
void arm_cpu_synchronize_from_tb(CPUState *cs,
128
const TranslationBlock *tb)
129
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
130
cc->has_work = arm_cpu_has_work;
131
cc->dump_state = arm_cpu_dump_state;
132
cc->set_pc = arm_cpu_set_pc;
133
+ cc->get_pc = arm_cpu_get_pc;
134
cc->gdb_read_register = arm_cpu_gdb_read_register;
135
cc->gdb_write_register = arm_cpu_gdb_write_register;
136
#ifndef CONFIG_USER_ONLY
137
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
138
index XXXXXXX..XXXXXXX 100644
139
--- a/target/avr/cpu.c
140
+++ b/target/avr/cpu.c
141
@@ -XXX,XX +XXX,XX @@ static void avr_cpu_set_pc(CPUState *cs, vaddr value)
142
cpu->env.pc_w = value / 2; /* internally PC points to words */
143
}
144
145
+static vaddr avr_cpu_get_pc(CPUState *cs)
146
+{
147
+ AVRCPU *cpu = AVR_CPU(cs);
148
+
149
+ return cpu->env.pc_w * 2;
150
+}
151
+
152
static bool avr_cpu_has_work(CPUState *cs)
153
{
154
AVRCPU *cpu = AVR_CPU(cs);
155
@@ -XXX,XX +XXX,XX @@ static void avr_cpu_class_init(ObjectClass *oc, void *data)
156
cc->has_work = avr_cpu_has_work;
157
cc->dump_state = avr_cpu_dump_state;
158
cc->set_pc = avr_cpu_set_pc;
159
+ cc->get_pc = avr_cpu_get_pc;
160
dc->vmsd = &vms_avr_cpu;
161
cc->sysemu_ops = &avr_sysemu_ops;
162
cc->disas_set_info = avr_cpu_disas_set_info;
163
diff --git a/target/cris/cpu.c b/target/cris/cpu.c
164
index XXXXXXX..XXXXXXX 100644
165
--- a/target/cris/cpu.c
166
+++ b/target/cris/cpu.c
167
@@ -XXX,XX +XXX,XX @@ static void cris_cpu_set_pc(CPUState *cs, vaddr value)
168
cpu->env.pc = value;
169
}
170
171
+static vaddr cris_cpu_get_pc(CPUState *cs)
172
+{
173
+ CRISCPU *cpu = CRIS_CPU(cs);
174
+
175
+ return cpu->env.pc;
176
+}
177
+
178
static bool cris_cpu_has_work(CPUState *cs)
179
{
180
return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI);
181
@@ -XXX,XX +XXX,XX @@ static void cris_cpu_class_init(ObjectClass *oc, void *data)
182
cc->has_work = cris_cpu_has_work;
183
cc->dump_state = cris_cpu_dump_state;
184
cc->set_pc = cris_cpu_set_pc;
185
+ cc->get_pc = cris_cpu_get_pc;
186
cc->gdb_read_register = cris_cpu_gdb_read_register;
187
cc->gdb_write_register = cris_cpu_gdb_write_register;
188
#ifndef CONFIG_USER_ONLY
189
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
190
index XXXXXXX..XXXXXXX 100644
191
--- a/target/hexagon/cpu.c
192
+++ b/target/hexagon/cpu.c
193
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_set_pc(CPUState *cs, vaddr value)
194
env->gpr[HEX_REG_PC] = value;
195
}
196
197
+static vaddr hexagon_cpu_get_pc(CPUState *cs)
198
+{
199
+ HexagonCPU *cpu = HEXAGON_CPU(cs);
200
+ CPUHexagonState *env = &cpu->env;
201
+ return env->gpr[HEX_REG_PC];
202
+}
203
+
204
static void hexagon_cpu_synchronize_from_tb(CPUState *cs,
205
const TranslationBlock *tb)
206
{
207
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data)
208
cc->has_work = hexagon_cpu_has_work;
209
cc->dump_state = hexagon_dump_state;
210
cc->set_pc = hexagon_cpu_set_pc;
211
+ cc->get_pc = hexagon_cpu_get_pc;
212
cc->gdb_read_register = hexagon_gdb_read_register;
213
cc->gdb_write_register = hexagon_gdb_write_register;
214
cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS + NUM_VREGS + NUM_QREGS;
215
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
216
index XXXXXXX..XXXXXXX 100644
217
--- a/target/hppa/cpu.c
218
+++ b/target/hppa/cpu.c
219
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_set_pc(CPUState *cs, vaddr value)
220
cpu->env.iaoq_b = value + 4;
221
}
222
223
+static vaddr hppa_cpu_get_pc(CPUState *cs)
224
+{
225
+ HPPACPU *cpu = HPPA_CPU(cs);
226
+
227
+ return cpu->env.iaoq_f;
228
+}
229
+
230
static void hppa_cpu_synchronize_from_tb(CPUState *cs,
231
const TranslationBlock *tb)
232
{
233
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_class_init(ObjectClass *oc, void *data)
234
cc->has_work = hppa_cpu_has_work;
235
cc->dump_state = hppa_cpu_dump_state;
236
cc->set_pc = hppa_cpu_set_pc;
237
+ cc->get_pc = hppa_cpu_get_pc;
238
cc->gdb_read_register = hppa_cpu_gdb_read_register;
239
cc->gdb_write_register = hppa_cpu_gdb_write_register;
240
#ifndef CONFIG_USER_ONLY
241
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
242
index XXXXXXX..XXXXXXX 100644
243
--- a/target/i386/cpu.c
244
+++ b/target/i386/cpu.c
245
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_set_pc(CPUState *cs, vaddr value)
246
cpu->env.eip = value;
247
}
248
249
+static vaddr x86_cpu_get_pc(CPUState *cs)
250
+{
251
+ X86CPU *cpu = X86_CPU(cs);
252
+
253
+ /* Match cpu_get_tb_cpu_state. */
254
+ return cpu->env.eip + cpu->env.segs[R_CS].base;
255
+}
256
+
257
int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request)
258
{
259
X86CPU *cpu = X86_CPU(cs);
260
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
261
cc->has_work = x86_cpu_has_work;
262
cc->dump_state = x86_cpu_dump_state;
263
cc->set_pc = x86_cpu_set_pc;
264
+ cc->get_pc = x86_cpu_get_pc;
265
cc->gdb_read_register = x86_cpu_gdb_read_register;
266
cc->gdb_write_register = x86_cpu_gdb_write_register;
267
cc->get_arch_id = x86_cpu_get_arch_id;
268
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
269
index XXXXXXX..XXXXXXX 100644
270
--- a/target/loongarch/cpu.c
271
+++ b/target/loongarch/cpu.c
272
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_set_pc(CPUState *cs, vaddr value)
273
env->pc = value;
274
}
275
276
+static vaddr loongarch_cpu_get_pc(CPUState *cs)
277
+{
278
+ LoongArchCPU *cpu = LOONGARCH_CPU(cs);
279
+ CPULoongArchState *env = &cpu->env;
280
+
281
+ return env->pc;
282
+}
283
+
284
#ifndef CONFIG_USER_ONLY
285
#include "hw/loongarch/virt.h"
286
287
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_class_init(ObjectClass *c, void *data)
288
cc->has_work = loongarch_cpu_has_work;
289
cc->dump_state = loongarch_cpu_dump_state;
290
cc->set_pc = loongarch_cpu_set_pc;
291
+ cc->get_pc = loongarch_cpu_get_pc;
292
#ifndef CONFIG_USER_ONLY
293
dc->vmsd = &vmstate_loongarch_cpu;
294
cc->sysemu_ops = &loongarch_sysemu_ops;
295
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
296
index XXXXXXX..XXXXXXX 100644
297
--- a/target/m68k/cpu.c
298
+++ b/target/m68k/cpu.c
299
@@ -XXX,XX +XXX,XX @@ static void m68k_cpu_set_pc(CPUState *cs, vaddr value)
300
cpu->env.pc = value;
301
}
302
303
+static vaddr m68k_cpu_get_pc(CPUState *cs)
304
+{
305
+ M68kCPU *cpu = M68K_CPU(cs);
306
+
307
+ return cpu->env.pc;
308
+}
309
+
310
static bool m68k_cpu_has_work(CPUState *cs)
311
{
312
return cs->interrupt_request & CPU_INTERRUPT_HARD;
313
@@ -XXX,XX +XXX,XX @@ static void m68k_cpu_class_init(ObjectClass *c, void *data)
314
cc->has_work = m68k_cpu_has_work;
315
cc->dump_state = m68k_cpu_dump_state;
316
cc->set_pc = m68k_cpu_set_pc;
317
+ cc->get_pc = m68k_cpu_get_pc;
318
cc->gdb_read_register = m68k_cpu_gdb_read_register;
319
cc->gdb_write_register = m68k_cpu_gdb_write_register;
320
#if defined(CONFIG_SOFTMMU)
321
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
322
index XXXXXXX..XXXXXXX 100644
323
--- a/target/microblaze/cpu.c
324
+++ b/target/microblaze/cpu.c
325
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_set_pc(CPUState *cs, vaddr value)
326
cpu->env.iflags = 0;
327
}
328
329
+static vaddr mb_cpu_get_pc(CPUState *cs)
330
+{
331
+ MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
332
+
333
+ return cpu->env.pc;
334
+}
335
+
336
static void mb_cpu_synchronize_from_tb(CPUState *cs,
337
const TranslationBlock *tb)
338
{
339
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_class_init(ObjectClass *oc, void *data)
340
341
cc->dump_state = mb_cpu_dump_state;
342
cc->set_pc = mb_cpu_set_pc;
343
+ cc->get_pc = mb_cpu_get_pc;
344
cc->gdb_read_register = mb_cpu_gdb_read_register;
345
cc->gdb_write_register = mb_cpu_gdb_write_register;
346
347
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
348
index XXXXXXX..XXXXXXX 100644
349
--- a/target/mips/cpu.c
350
+++ b/target/mips/cpu.c
351
@@ -XXX,XX +XXX,XX @@ static void mips_cpu_set_pc(CPUState *cs, vaddr value)
352
mips_env_set_pc(&cpu->env, value);
353
}
354
355
+static vaddr mips_cpu_get_pc(CPUState *cs)
356
+{
357
+ MIPSCPU *cpu = MIPS_CPU(cs);
358
+
359
+ return cpu->env.active_tc.PC;
360
+}
361
+
362
static bool mips_cpu_has_work(CPUState *cs)
363
{
364
MIPSCPU *cpu = MIPS_CPU(cs);
365
@@ -XXX,XX +XXX,XX @@ static void mips_cpu_class_init(ObjectClass *c, void *data)
366
cc->has_work = mips_cpu_has_work;
367
cc->dump_state = mips_cpu_dump_state;
368
cc->set_pc = mips_cpu_set_pc;
369
+ cc->get_pc = mips_cpu_get_pc;
370
cc->gdb_read_register = mips_cpu_gdb_read_register;
371
cc->gdb_write_register = mips_cpu_gdb_write_register;
372
#ifndef CONFIG_USER_ONLY
373
diff --git a/target/nios2/cpu.c b/target/nios2/cpu.c
374
index XXXXXXX..XXXXXXX 100644
375
--- a/target/nios2/cpu.c
376
+++ b/target/nios2/cpu.c
377
@@ -XXX,XX +XXX,XX @@ static void nios2_cpu_set_pc(CPUState *cs, vaddr value)
378
env->pc = value;
379
}
380
381
+static vaddr nios2_cpu_get_pc(CPUState *cs)
382
+{
383
+ Nios2CPU *cpu = NIOS2_CPU(cs);
384
+ CPUNios2State *env = &cpu->env;
385
+
386
+ return env->pc;
387
+}
388
+
389
static bool nios2_cpu_has_work(CPUState *cs)
390
{
391
return cs->interrupt_request & CPU_INTERRUPT_HARD;
392
@@ -XXX,XX +XXX,XX @@ static void nios2_cpu_class_init(ObjectClass *oc, void *data)
393
cc->has_work = nios2_cpu_has_work;
394
cc->dump_state = nios2_cpu_dump_state;
395
cc->set_pc = nios2_cpu_set_pc;
396
+ cc->get_pc = nios2_cpu_get_pc;
397
cc->disas_set_info = nios2_cpu_disas_set_info;
398
#ifndef CONFIG_USER_ONLY
399
cc->sysemu_ops = &nios2_sysemu_ops;
400
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
401
index XXXXXXX..XXXXXXX 100644
402
--- a/target/openrisc/cpu.c
403
+++ b/target/openrisc/cpu.c
404
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_set_pc(CPUState *cs, vaddr value)
405
cpu->env.dflag = 0;
406
}
407
408
+static vaddr openrisc_cpu_get_pc(CPUState *cs)
409
+{
410
+ OpenRISCCPU *cpu = OPENRISC_CPU(cs);
411
+
412
+ return cpu->env.pc;
413
+}
414
+
415
static void openrisc_cpu_synchronize_from_tb(CPUState *cs,
416
const TranslationBlock *tb)
417
{
418
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_class_init(ObjectClass *oc, void *data)
419
cc->has_work = openrisc_cpu_has_work;
420
cc->dump_state = openrisc_cpu_dump_state;
421
cc->set_pc = openrisc_cpu_set_pc;
422
+ cc->get_pc = openrisc_cpu_get_pc;
423
cc->gdb_read_register = openrisc_cpu_gdb_read_register;
424
cc->gdb_write_register = openrisc_cpu_gdb_write_register;
425
#ifndef CONFIG_USER_ONLY
426
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
427
index XXXXXXX..XXXXXXX 100644
428
--- a/target/ppc/cpu_init.c
429
+++ b/target/ppc/cpu_init.c
430
@@ -XXX,XX +XXX,XX @@ static void ppc_cpu_set_pc(CPUState *cs, vaddr value)
431
cpu->env.nip = value;
432
}
433
434
+static vaddr ppc_cpu_get_pc(CPUState *cs)
435
+{
436
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
437
+
438
+ return cpu->env.nip;
439
+}
440
+
441
static bool ppc_cpu_has_work(CPUState *cs)
442
{
443
PowerPCCPU *cpu = POWERPC_CPU(cs);
444
@@ -XXX,XX +XXX,XX @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
445
cc->has_work = ppc_cpu_has_work;
446
cc->dump_state = ppc_cpu_dump_state;
447
cc->set_pc = ppc_cpu_set_pc;
448
+ cc->get_pc = ppc_cpu_get_pc;
449
cc->gdb_read_register = ppc_cpu_gdb_read_register;
450
cc->gdb_write_register = ppc_cpu_gdb_write_register;
451
#ifndef CONFIG_USER_ONLY
452
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
453
index XXXXXXX..XXXXXXX 100644
454
--- a/target/riscv/cpu.c
455
+++ b/target/riscv/cpu.c
456
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_set_pc(CPUState *cs, vaddr value)
57
}
457
}
58
}
458
}
59
459
60
-static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
460
+static vaddr riscv_cpu_get_pc(CPUState *cs)
61
+static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y)
461
+{
62
{
462
+ RISCVCPU *cpu = RISCV_CPU(cs);
63
const TCGOpDef *def = &tcg_op_defs[op];
463
+ CPURISCVState *env = &cpu->env;
64
- TCGArg res = do_constant_folding_2(op, x, y);
464
+
65
+ uint64_t res = do_constant_folding_2(op, x, y);
465
+ /* Match cpu_get_tb_cpu_state. */
66
if (!(def->flags & TCG_OPF_64BIT)) {
466
+ if (env->xl == MXL_RV32) {
67
res = (int32_t)res;
467
+ return env->pc & UINT32_MAX;
68
}
468
+ }
69
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
469
+ return env->pc;
70
static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
470
+}
71
TCGArg y, TCGCond c)
471
+
72
{
472
static void riscv_cpu_synchronize_from_tb(CPUState *cs,
73
- tcg_target_ulong xv = arg_info(x)->val;
473
const TranslationBlock *tb)
74
- tcg_target_ulong yv = arg_info(y)->val;
474
{
75
+ uint64_t xv = arg_info(x)->val;
475
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_class_init(ObjectClass *c, void *data)
76
+ uint64_t yv = arg_info(y)->val;
476
cc->has_work = riscv_cpu_has_work;
77
+
477
cc->dump_state = riscv_cpu_dump_state;
78
if (arg_is_const(x) && arg_is_const(y)) {
478
cc->set_pc = riscv_cpu_set_pc;
79
const TCGOpDef *def = &tcg_op_defs[op];
479
+ cc->get_pc = riscv_cpu_get_pc;
80
tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
480
cc->gdb_read_register = riscv_cpu_gdb_read_register;
81
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
481
cc->gdb_write_register = riscv_cpu_gdb_write_register;
82
infos = tcg_malloc(sizeof(TempOptInfo) * nb_temps);
482
cc->gdb_num_core_regs = 33;
83
483
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
84
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
484
index XXXXXXX..XXXXXXX 100644
85
- tcg_target_ulong mask, partmask, affected;
485
--- a/target/rx/cpu.c
86
+ uint64_t mask, partmask, affected, tmp;
486
+++ b/target/rx/cpu.c
87
int nb_oargs, nb_iargs, i;
487
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_set_pc(CPUState *cs, vaddr value)
88
- TCGArg tmp;
488
cpu->env.pc = value;
89
TCGOpcode opc = op->opc;
489
}
90
const TCGOpDef *def = &tcg_op_defs[opc];
490
91
491
+static vaddr rx_cpu_get_pc(CPUState *cs)
92
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
492
+{
93
493
+ RXCPU *cpu = RX_CPU(cs);
94
CASE_OP_32_64(extract2):
494
+
95
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
495
+ return cpu->env.pc;
96
- TCGArg v1 = arg_info(op->args[1])->val;
496
+}
97
- TCGArg v2 = arg_info(op->args[2])->val;
497
+
98
+ uint64_t v1 = arg_info(op->args[1])->val;
498
static void rx_cpu_synchronize_from_tb(CPUState *cs,
99
+ uint64_t v2 = arg_info(op->args[2])->val;
499
const TranslationBlock *tb)
100
+ int shr = op->args[3];
500
{
101
501
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_class_init(ObjectClass *klass, void *data)
102
if (opc == INDEX_op_extract2_i64) {
502
cc->has_work = rx_cpu_has_work;
103
- tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3]));
503
cc->dump_state = rx_cpu_dump_state;
104
+ tmp = (v1 >> shr) | (v2 << (64 - shr));
504
cc->set_pc = rx_cpu_set_pc;
105
} else {
505
+ cc->get_pc = rx_cpu_get_pc;
106
- tmp = (int32_t)(((uint32_t)v1 >> op->args[3]) |
506
107
- ((uint32_t)v2 << (32 - op->args[3])));
507
#ifndef CONFIG_USER_ONLY
108
+ tmp = (int32_t)(((uint32_t)v1 >> shr) |
508
cc->sysemu_ops = &rx_sysemu_ops;
109
+ ((uint32_t)v2 << (32 - shr)));
509
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
110
}
510
index XXXXXXX..XXXXXXX 100644
111
tcg_opt_gen_movi(s, op, op->args[0], tmp);
511
--- a/target/s390x/cpu.c
112
break;
512
+++ b/target/s390x/cpu.c
113
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
513
@@ -XXX,XX +XXX,XX @@ static void s390_cpu_set_pc(CPUState *cs, vaddr value)
114
break;
514
cpu->env.psw.addr = value;
115
}
515
}
116
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
516
117
- tcg_target_ulong tv = arg_info(op->args[3])->val;
517
+static vaddr s390_cpu_get_pc(CPUState *cs)
118
- tcg_target_ulong fv = arg_info(op->args[4])->val;
518
+{
119
+ uint64_t tv = arg_info(op->args[3])->val;
519
+ S390CPU *cpu = S390_CPU(cs);
120
+ uint64_t fv = arg_info(op->args[4])->val;
520
+
121
TCGCond cond = op->args[5];
521
+ return cpu->env.psw.addr;
122
+
522
+}
123
if (fv == 1 && tv == 0) {
523
+
124
cond = tcg_invert_cond(cond);
524
static bool s390_cpu_has_work(CPUState *cs)
125
} else if (!(tv == 1 && fv == 0)) {
525
{
526
S390CPU *cpu = S390_CPU(cs);
527
@@ -XXX,XX +XXX,XX @@ static void s390_cpu_class_init(ObjectClass *oc, void *data)
528
cc->has_work = s390_cpu_has_work;
529
cc->dump_state = s390_cpu_dump_state;
530
cc->set_pc = s390_cpu_set_pc;
531
+ cc->get_pc = s390_cpu_get_pc;
532
cc->gdb_read_register = s390_cpu_gdb_read_register;
533
cc->gdb_write_register = s390_cpu_gdb_write_register;
534
#ifndef CONFIG_USER_ONLY
535
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
536
index XXXXXXX..XXXXXXX 100644
537
--- a/target/sh4/cpu.c
538
+++ b/target/sh4/cpu.c
539
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_set_pc(CPUState *cs, vaddr value)
540
cpu->env.pc = value;
541
}
542
543
+static vaddr superh_cpu_get_pc(CPUState *cs)
544
+{
545
+ SuperHCPU *cpu = SUPERH_CPU(cs);
546
+
547
+ return cpu->env.pc;
548
+}
549
+
550
static void superh_cpu_synchronize_from_tb(CPUState *cs,
551
const TranslationBlock *tb)
552
{
553
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_class_init(ObjectClass *oc, void *data)
554
cc->has_work = superh_cpu_has_work;
555
cc->dump_state = superh_cpu_dump_state;
556
cc->set_pc = superh_cpu_set_pc;
557
+ cc->get_pc = superh_cpu_get_pc;
558
cc->gdb_read_register = superh_cpu_gdb_read_register;
559
cc->gdb_write_register = superh_cpu_gdb_write_register;
560
#ifndef CONFIG_USER_ONLY
561
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
562
index XXXXXXX..XXXXXXX 100644
563
--- a/target/sparc/cpu.c
564
+++ b/target/sparc/cpu.c
565
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_set_pc(CPUState *cs, vaddr value)
566
cpu->env.npc = value + 4;
567
}
568
569
+static vaddr sparc_cpu_get_pc(CPUState *cs)
570
+{
571
+ SPARCCPU *cpu = SPARC_CPU(cs);
572
+
573
+ return cpu->env.pc;
574
+}
575
+
576
static void sparc_cpu_synchronize_from_tb(CPUState *cs,
577
const TranslationBlock *tb)
578
{
579
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_class_init(ObjectClass *oc, void *data)
580
cc->memory_rw_debug = sparc_cpu_memory_rw_debug;
581
#endif
582
cc->set_pc = sparc_cpu_set_pc;
583
+ cc->get_pc = sparc_cpu_get_pc;
584
cc->gdb_read_register = sparc_cpu_gdb_read_register;
585
cc->gdb_write_register = sparc_cpu_gdb_write_register;
586
#ifndef CONFIG_USER_ONLY
587
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
588
index XXXXXXX..XXXXXXX 100644
589
--- a/target/tricore/cpu.c
590
+++ b/target/tricore/cpu.c
591
@@ -XXX,XX +XXX,XX @@ static void tricore_cpu_set_pc(CPUState *cs, vaddr value)
592
env->PC = value & ~(target_ulong)1;
593
}
594
595
+static vaddr tricore_cpu_get_pc(CPUState *cs)
596
+{
597
+ TriCoreCPU *cpu = TRICORE_CPU(cs);
598
+ CPUTriCoreState *env = &cpu->env;
599
+
600
+ return env->PC;
601
+}
602
+
603
static void tricore_cpu_synchronize_from_tb(CPUState *cs,
604
const TranslationBlock *tb)
605
{
606
@@ -XXX,XX +XXX,XX @@ static void tricore_cpu_class_init(ObjectClass *c, void *data)
607
608
cc->dump_state = tricore_cpu_dump_state;
609
cc->set_pc = tricore_cpu_set_pc;
610
+ cc->get_pc = tricore_cpu_get_pc;
611
cc->sysemu_ops = &tricore_sysemu_ops;
612
cc->tcg_ops = &tricore_tcg_ops;
613
}
614
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
615
index XXXXXXX..XXXXXXX 100644
616
--- a/target/xtensa/cpu.c
617
+++ b/target/xtensa/cpu.c
618
@@ -XXX,XX +XXX,XX @@ static void xtensa_cpu_set_pc(CPUState *cs, vaddr value)
619
cpu->env.pc = value;
620
}
621
622
+static vaddr xtensa_cpu_get_pc(CPUState *cs)
623
+{
624
+ XtensaCPU *cpu = XTENSA_CPU(cs);
625
+
626
+ return cpu->env.pc;
627
+}
628
+
629
static bool xtensa_cpu_has_work(CPUState *cs)
630
{
631
#ifndef CONFIG_USER_ONLY
632
@@ -XXX,XX +XXX,XX @@ static void xtensa_cpu_class_init(ObjectClass *oc, void *data)
633
cc->has_work = xtensa_cpu_has_work;
634
cc->dump_state = xtensa_cpu_dump_state;
635
cc->set_pc = xtensa_cpu_set_pc;
636
+ cc->get_pc = xtensa_cpu_get_pc;
637
cc->gdb_read_register = xtensa_cpu_gdb_read_register;
638
cc->gdb_write_register = xtensa_cpu_gdb_write_register;
639
cc->gdb_stop_before_watchpoint = true;
126
--
640
--
127
2.25.1
641
2.34.1
128
642
129
643
diff view generated by jsdifflib
1
The temp_fixed, temp_global, temp_local bits are all related.
1
The availability of tb->pc will shortly be conditional.
2
Combine them into a single enumeration.
2
Introduce accessor functions to minimize ifdefs.
3
4
Pass around a known pc to places like tcg_gen_code,
5
where the caller must already have the value.
3
6
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
9
---
8
include/tcg/tcg.h | 20 +++++---
10
accel/tcg/internal.h | 6 ++++
9
tcg/optimize.c | 8 +--
11
include/exec/exec-all.h | 6 ++++
10
tcg/tcg.c | 126 ++++++++++++++++++++++++++++------------------
12
include/tcg/tcg.h | 2 +-
11
3 files changed, 92 insertions(+), 62 deletions(-)
13
accel/tcg/cpu-exec.c | 46 ++++++++++++++-----------
14
accel/tcg/translate-all.c | 37 +++++++++++---------
15
target/arm/cpu.c | 4 +--
16
target/avr/cpu.c | 2 +-
17
target/hexagon/cpu.c | 2 +-
18
target/hppa/cpu.c | 4 +--
19
target/i386/tcg/tcg-cpu.c | 2 +-
20
target/loongarch/cpu.c | 2 +-
21
target/microblaze/cpu.c | 2 +-
22
target/mips/tcg/exception.c | 2 +-
23
target/mips/tcg/sysemu/special_helper.c | 2 +-
24
target/openrisc/cpu.c | 2 +-
25
target/riscv/cpu.c | 4 +--
26
target/rx/cpu.c | 2 +-
27
target/sh4/cpu.c | 4 +--
28
target/sparc/cpu.c | 2 +-
29
target/tricore/cpu.c | 2 +-
30
tcg/tcg.c | 8 ++---
31
21 files changed, 82 insertions(+), 61 deletions(-)
12
32
33
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/accel/tcg/internal.h
36
+++ b/accel/tcg/internal.h
37
@@ -XXX,XX +XXX,XX @@ G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
38
void page_init(void);
39
void tb_htable_init(void);
40
41
+/* Return the current PC from CPU, which may be cached in TB. */
42
+static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
43
+{
44
+ return tb_pc(tb);
45
+}
46
+
47
#endif /* ACCEL_TCG_INTERNAL_H */
48
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
49
index XXXXXXX..XXXXXXX 100644
50
--- a/include/exec/exec-all.h
51
+++ b/include/exec/exec-all.h
52
@@ -XXX,XX +XXX,XX @@ struct TranslationBlock {
53
uintptr_t jmp_dest[2];
54
};
55
56
+/* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */
57
+static inline target_ulong tb_pc(const TranslationBlock *tb)
58
+{
59
+ return tb->pc;
60
+}
61
+
62
/* Hide the qatomic_read to make code a little easier on the eyes */
63
static inline uint32_t tb_cflags(const TranslationBlock *tb)
64
{
13
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
65
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
14
index XXXXXXX..XXXXXXX 100644
66
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg.h
67
--- a/include/tcg/tcg.h
16
+++ b/include/tcg/tcg.h
68
+++ b/include/tcg/tcg.h
17
@@ -XXX,XX +XXX,XX @@ typedef enum TCGTempVal {
69
@@ -XXX,XX +XXX,XX @@ void tcg_register_thread(void);
18
TEMP_VAL_CONST,
70
void tcg_prologue_init(TCGContext *s);
19
} TCGTempVal;
71
void tcg_func_start(TCGContext *s);
20
72
21
+typedef enum TCGTempKind {
73
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb);
22
+ /* Temp is dead at the end of all basic blocks. */
74
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start);
23
+ TEMP_NORMAL,
75
24
+ /* Temp is saved across basic blocks but dead at the end of TBs. */
76
void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size);
25
+ TEMP_LOCAL,
77
26
+ /* Temp is saved across both basic blocks and translation blocks. */
78
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
27
+ TEMP_GLOBAL,
79
index XXXXXXX..XXXXXXX 100644
28
+ /* Temp is in a fixed register. */
80
--- a/accel/tcg/cpu-exec.c
29
+ TEMP_FIXED,
81
+++ b/accel/tcg/cpu-exec.c
30
+} TCGTempKind;
82
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
83
const TranslationBlock *tb = p;
84
const struct tb_desc *desc = d;
85
86
- if (tb->pc == desc->pc &&
87
+ if (tb_pc(tb) == desc->pc &&
88
tb->page_addr[0] == desc->page_addr0 &&
89
tb->cs_base == desc->cs_base &&
90
tb->flags == desc->flags &&
91
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
92
return tb;
93
}
94
95
-static inline void log_cpu_exec(target_ulong pc, CPUState *cpu,
96
- const TranslationBlock *tb)
97
+static void log_cpu_exec(target_ulong pc, CPUState *cpu,
98
+ const TranslationBlock *tb)
99
{
100
- if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC))
101
- && qemu_log_in_addr_range(pc)) {
102
-
103
+ if (qemu_log_in_addr_range(pc)) {
104
qemu_log_mask(CPU_LOG_EXEC,
105
"Trace %d: %p [" TARGET_FMT_lx
106
"/" TARGET_FMT_lx "/%08x/%08x] %s\n",
107
@@ -XXX,XX +XXX,XX @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
108
return tcg_code_gen_epilogue;
109
}
110
111
- log_cpu_exec(pc, cpu, tb);
112
+ if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
113
+ log_cpu_exec(pc, cpu, tb);
114
+ }
115
116
return tb->tc.ptr;
117
}
118
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
119
TranslationBlock *last_tb;
120
const void *tb_ptr = itb->tc.ptr;
121
122
- log_cpu_exec(itb->pc, cpu, itb);
123
+ if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
124
+ log_cpu_exec(log_pc(cpu, itb), cpu, itb);
125
+ }
126
127
qemu_thread_jit_execute();
128
ret = tcg_qemu_tb_exec(env, tb_ptr);
129
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
130
* of the start of the TB.
131
*/
132
CPUClass *cc = CPU_GET_CLASS(cpu);
133
- qemu_log_mask_and_addr(CPU_LOG_EXEC, last_tb->pc,
134
- "Stopped execution of TB chain before %p ["
135
- TARGET_FMT_lx "] %s\n",
136
- last_tb->tc.ptr, last_tb->pc,
137
- lookup_symbol(last_tb->pc));
31
+
138
+
32
typedef struct TCGTemp {
139
if (cc->tcg_ops->synchronize_from_tb) {
33
TCGReg reg:8;
140
cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
34
TCGTempVal val_type:8;
141
} else {
35
TCGType base_type:8;
142
assert(cc->set_pc);
36
TCGType type:8;
143
- cc->set_pc(cpu, last_tb->pc);
37
- unsigned int fixed_reg:1;
144
+ cc->set_pc(cpu, tb_pc(last_tb));
38
+ TCGTempKind kind:3;
145
+ }
39
unsigned int indirect_reg:1;
146
+ if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
40
unsigned int indirect_base:1;
147
+ target_ulong pc = log_pc(cpu, last_tb);
41
unsigned int mem_coherent:1;
148
+ if (qemu_log_in_addr_range(pc)) {
42
unsigned int mem_allocated:1;
149
+ qemu_log("Stopped execution of TB chain before %p ["
43
- /* If true, the temp is saved across both basic blocks and
150
+ TARGET_FMT_lx "] %s\n",
44
- translation blocks. */
151
+ last_tb->tc.ptr, pc, lookup_symbol(pc));
45
- unsigned int temp_global:1;
152
+ }
46
- /* If true, the temp is saved across basic blocks but dead
47
- at the end of translation blocks. If false, the temp is
48
- dead at the end of basic blocks. */
49
- unsigned int temp_local:1;
50
unsigned int temp_allocated:1;
51
52
tcg_target_long val;
53
diff --git a/tcg/optimize.c b/tcg/optimize.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/tcg/optimize.c
56
+++ b/tcg/optimize.c
57
@@ -XXX,XX +XXX,XX @@ static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
58
TCGTemp *i;
59
60
/* If this is already a global, we can't do better. */
61
- if (ts->temp_global) {
62
+ if (ts->kind >= TEMP_GLOBAL) {
63
return ts;
64
}
65
66
/* Search for a global first. */
67
for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
68
- if (i->temp_global) {
69
+ if (i->kind >= TEMP_GLOBAL) {
70
return i;
71
}
153
}
72
}
154
}
73
155
74
/* If it is a temp, search for a temp local. */
156
@@ -XXX,XX +XXX,XX @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
75
- if (!ts->temp_local) {
157
76
+ if (ts->kind == TEMP_NORMAL) {
158
qemu_spin_unlock(&tb_next->jmp_lock);
77
for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
159
78
- if (ts->temp_local) {
160
- qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
79
+ if (i->kind >= TEMP_LOCAL) {
161
- "Linking TBs %p [" TARGET_FMT_lx
80
return i;
162
- "] index %d -> %p [" TARGET_FMT_lx "]\n",
163
- tb->tc.ptr, tb->pc, n,
164
- tb_next->tc.ptr, tb_next->pc);
165
+ qemu_log_mask(CPU_LOG_EXEC, "Linking TBs %p index %d -> %p\n",
166
+ tb->tc.ptr, n, tb_next->tc.ptr);
167
return;
168
169
out_unlock_next:
170
@@ -XXX,XX +XXX,XX @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
171
}
172
173
static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
174
+ target_ulong pc,
175
TranslationBlock **last_tb, int *tb_exit)
176
{
177
int32_t insns_left;
178
179
- trace_exec_tb(tb, tb->pc);
180
+ trace_exec_tb(tb, pc);
181
tb = cpu_tb_exec(cpu, tb, tb_exit);
182
if (*tb_exit != TB_EXIT_REQUESTED) {
183
*last_tb = tb;
184
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
185
tb_add_jump(last_tb, tb_exit, tb);
81
}
186
}
82
}
187
188
- cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit);
189
+ cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit);
190
191
/* Try to align the host and virtual clocks
192
if the guest is in advance */
193
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
194
index XXXXXXX..XXXXXXX 100644
195
--- a/accel/tcg/translate-all.c
196
+++ b/accel/tcg/translate-all.c
197
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
198
199
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
200
if (i == 0) {
201
- prev = (j == 0 ? tb->pc : 0);
202
+ prev = (j == 0 ? tb_pc(tb) : 0);
203
} else {
204
prev = tcg_ctx->gen_insn_data[i - 1][j];
205
}
206
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
207
static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
208
uintptr_t searched_pc, bool reset_icount)
209
{
210
- target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
211
+ target_ulong data[TARGET_INSN_START_WORDS] = { tb_pc(tb) };
212
uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
213
CPUArchState *env = cpu->env_ptr;
214
const uint8_t *p = tb->tc.ptr + tb->tc.size;
215
@@ -XXX,XX +XXX,XX @@ static bool tb_cmp(const void *ap, const void *bp)
216
const TranslationBlock *a = ap;
217
const TranslationBlock *b = bp;
218
219
- return a->pc == b->pc &&
220
+ return tb_pc(a) == tb_pc(b) &&
221
a->cs_base == b->cs_base &&
222
a->flags == b->flags &&
223
(tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
224
@@ -XXX,XX +XXX,XX @@ static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
225
TranslationBlock *tb = p;
226
target_ulong addr = *(target_ulong *)userp;
227
228
- if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
229
+ if (!(addr + TARGET_PAGE_SIZE <= tb_pc(tb) ||
230
+ addr >= tb_pc(tb) + tb->size)) {
231
printf("ERROR invalidate: address=" TARGET_FMT_lx
232
- " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
233
+ " PC=%08lx size=%04x\n", addr, (long)tb_pc(tb), tb->size);
234
}
235
}
236
237
@@ -XXX,XX +XXX,XX @@ static void do_tb_page_check(void *p, uint32_t hash, void *userp)
238
TranslationBlock *tb = p;
239
int flags1, flags2;
240
241
- flags1 = page_get_flags(tb->pc);
242
- flags2 = page_get_flags(tb->pc + tb->size - 1);
243
+ flags1 = page_get_flags(tb_pc(tb));
244
+ flags2 = page_get_flags(tb_pc(tb) + tb->size - 1);
245
if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
246
printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
247
- (long)tb->pc, tb->size, flags1, flags2);
248
+ (long)tb_pc(tb), tb->size, flags1, flags2);
249
}
250
}
251
252
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
253
254
/* remove the TB from the hash list */
255
phys_pc = tb->page_addr[0];
256
- h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
257
+ h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, orig_cflags,
258
tb->trace_vcpu_dstate);
259
if (!qht_remove(&tb_ctx.htable, tb, h)) {
260
return;
261
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
262
}
263
264
/* add in the hash table */
265
- h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
266
+ h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, tb->cflags,
267
tb->trace_vcpu_dstate);
268
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
269
270
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
271
tcg_ctx->cpu = NULL;
272
max_insns = tb->icount;
273
274
- trace_translate_block(tb, tb->pc, tb->tc.ptr);
275
+ trace_translate_block(tb, pc, tb->tc.ptr);
276
277
/* generate machine code */
278
tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
279
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
280
ti = profile_getclock();
281
#endif
282
283
- gen_code_size = tcg_gen_code(tcg_ctx, tb);
284
+ gen_code_size = tcg_gen_code(tcg_ctx, tb, pc);
285
if (unlikely(gen_code_size < 0)) {
286
error_return:
287
switch (gen_code_size) {
288
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
289
290
#ifdef DEBUG_DISAS
291
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
292
- qemu_log_in_addr_range(tb->pc)) {
293
+ qemu_log_in_addr_range(pc)) {
294
FILE *logfile = qemu_log_trylock();
295
if (logfile) {
296
int code_size, data_size;
297
@@ -XXX,XX +XXX,XX @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
298
*/
299
cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
300
301
- qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
302
- "cpu_io_recompile: rewound execution of TB to "
303
- TARGET_FMT_lx "\n", tb->pc);
304
+ if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
305
+ target_ulong pc = log_pc(cpu, tb);
306
+ if (qemu_log_in_addr_range(pc)) {
307
+ qemu_log("cpu_io_recompile: rewound execution of TB to "
308
+ TARGET_FMT_lx "\n", pc);
309
+ }
310
+ }
311
312
cpu_loop_exit_noexc(cpu);
313
}
314
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
315
index XXXXXXX..XXXXXXX 100644
316
--- a/target/arm/cpu.c
317
+++ b/target/arm/cpu.c
318
@@ -XXX,XX +XXX,XX @@ void arm_cpu_synchronize_from_tb(CPUState *cs,
319
* never possible for an AArch64 TB to chain to an AArch32 TB.
320
*/
321
if (is_a64(env)) {
322
- env->pc = tb->pc;
323
+ env->pc = tb_pc(tb);
324
} else {
325
- env->regs[15] = tb->pc;
326
+ env->regs[15] = tb_pc(tb);
327
}
328
}
329
#endif /* CONFIG_TCG */
330
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
331
index XXXXXXX..XXXXXXX 100644
332
--- a/target/avr/cpu.c
333
+++ b/target/avr/cpu.c
334
@@ -XXX,XX +XXX,XX @@ static void avr_cpu_synchronize_from_tb(CPUState *cs,
335
AVRCPU *cpu = AVR_CPU(cs);
336
CPUAVRState *env = &cpu->env;
337
338
- env->pc_w = tb->pc / 2; /* internally PC points to words */
339
+ env->pc_w = tb_pc(tb) / 2; /* internally PC points to words */
340
}
341
342
static void avr_cpu_reset(DeviceState *ds)
343
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
344
index XXXXXXX..XXXXXXX 100644
345
--- a/target/hexagon/cpu.c
346
+++ b/target/hexagon/cpu.c
347
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_synchronize_from_tb(CPUState *cs,
348
{
349
HexagonCPU *cpu = HEXAGON_CPU(cs);
350
CPUHexagonState *env = &cpu->env;
351
- env->gpr[HEX_REG_PC] = tb->pc;
352
+ env->gpr[HEX_REG_PC] = tb_pc(tb);
353
}
354
355
static bool hexagon_cpu_has_work(CPUState *cs)
356
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
357
index XXXXXXX..XXXXXXX 100644
358
--- a/target/hppa/cpu.c
359
+++ b/target/hppa/cpu.c
360
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_synchronize_from_tb(CPUState *cs,
361
HPPACPU *cpu = HPPA_CPU(cs);
362
363
#ifdef CONFIG_USER_ONLY
364
- cpu->env.iaoq_f = tb->pc;
365
+ cpu->env.iaoq_f = tb_pc(tb);
366
cpu->env.iaoq_b = tb->cs_base;
367
#else
368
/* Recover the IAOQ values from the GVA + PRIV. */
369
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_synchronize_from_tb(CPUState *cs,
370
int32_t diff = cs_base;
371
372
cpu->env.iasq_f = iasq_f;
373
- cpu->env.iaoq_f = (tb->pc & ~iasq_f) + priv;
374
+ cpu->env.iaoq_f = (tb_pc(tb) & ~iasq_f) + priv;
375
if (diff) {
376
cpu->env.iaoq_b = cpu->env.iaoq_f + diff;
377
}
378
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
379
index XXXXXXX..XXXXXXX 100644
380
--- a/target/i386/tcg/tcg-cpu.c
381
+++ b/target/i386/tcg/tcg-cpu.c
382
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_synchronize_from_tb(CPUState *cs,
383
{
384
X86CPU *cpu = X86_CPU(cs);
385
386
- cpu->env.eip = tb->pc - tb->cs_base;
387
+ cpu->env.eip = tb_pc(tb) - tb->cs_base;
388
}
389
390
#ifndef CONFIG_USER_ONLY
391
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
392
index XXXXXXX..XXXXXXX 100644
393
--- a/target/loongarch/cpu.c
394
+++ b/target/loongarch/cpu.c
395
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_synchronize_from_tb(CPUState *cs,
396
LoongArchCPU *cpu = LOONGARCH_CPU(cs);
397
CPULoongArchState *env = &cpu->env;
398
399
- env->pc = tb->pc;
400
+ env->pc = tb_pc(tb);
401
}
402
#endif /* CONFIG_TCG */
403
404
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
405
index XXXXXXX..XXXXXXX 100644
406
--- a/target/microblaze/cpu.c
407
+++ b/target/microblaze/cpu.c
408
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_synchronize_from_tb(CPUState *cs,
409
{
410
MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
411
412
- cpu->env.pc = tb->pc;
413
+ cpu->env.pc = tb_pc(tb);
414
cpu->env.iflags = tb->flags & IFLAGS_TB_MASK;
415
}
416
417
diff --git a/target/mips/tcg/exception.c b/target/mips/tcg/exception.c
418
index XXXXXXX..XXXXXXX 100644
419
--- a/target/mips/tcg/exception.c
420
+++ b/target/mips/tcg/exception.c
421
@@ -XXX,XX +XXX,XX @@ void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb)
422
MIPSCPU *cpu = MIPS_CPU(cs);
423
CPUMIPSState *env = &cpu->env;
424
425
- env->active_tc.PC = tb->pc;
426
+ env->active_tc.PC = tb_pc(tb);
427
env->hflags &= ~MIPS_HFLAG_BMASK;
428
env->hflags |= tb->flags & MIPS_HFLAG_BMASK;
429
}
430
diff --git a/target/mips/tcg/sysemu/special_helper.c b/target/mips/tcg/sysemu/special_helper.c
431
index XXXXXXX..XXXXXXX 100644
432
--- a/target/mips/tcg/sysemu/special_helper.c
433
+++ b/target/mips/tcg/sysemu/special_helper.c
434
@@ -XXX,XX +XXX,XX @@ bool mips_io_recompile_replay_branch(CPUState *cs, const TranslationBlock *tb)
435
CPUMIPSState *env = &cpu->env;
436
437
if ((env->hflags & MIPS_HFLAG_BMASK) != 0
438
- && env->active_tc.PC != tb->pc) {
439
+ && env->active_tc.PC != tb_pc(tb)) {
440
env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
441
env->hflags &= ~MIPS_HFLAG_BMASK;
442
return true;
443
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
444
index XXXXXXX..XXXXXXX 100644
445
--- a/target/openrisc/cpu.c
446
+++ b/target/openrisc/cpu.c
447
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_synchronize_from_tb(CPUState *cs,
448
{
449
OpenRISCCPU *cpu = OPENRISC_CPU(cs);
450
451
- cpu->env.pc = tb->pc;
452
+ cpu->env.pc = tb_pc(tb);
453
}
454
455
456
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
457
index XXXXXXX..XXXXXXX 100644
458
--- a/target/riscv/cpu.c
459
+++ b/target/riscv/cpu.c
460
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_synchronize_from_tb(CPUState *cs,
461
RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
462
463
if (xl == MXL_RV32) {
464
- env->pc = (int32_t)tb->pc;
465
+ env->pc = (int32_t)tb_pc(tb);
466
} else {
467
- env->pc = tb->pc;
468
+ env->pc = tb_pc(tb);
469
}
470
}
471
472
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
473
index XXXXXXX..XXXXXXX 100644
474
--- a/target/rx/cpu.c
475
+++ b/target/rx/cpu.c
476
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_synchronize_from_tb(CPUState *cs,
477
{
478
RXCPU *cpu = RX_CPU(cs);
479
480
- cpu->env.pc = tb->pc;
481
+ cpu->env.pc = tb_pc(tb);
482
}
483
484
static bool rx_cpu_has_work(CPUState *cs)
485
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
486
index XXXXXXX..XXXXXXX 100644
487
--- a/target/sh4/cpu.c
488
+++ b/target/sh4/cpu.c
489
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
490
{
491
SuperHCPU *cpu = SUPERH_CPU(cs);
492
493
- cpu->env.pc = tb->pc;
494
+ cpu->env.pc = tb_pc(tb);
495
cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
496
}
497
498
@@ -XXX,XX +XXX,XX @@ static bool superh_io_recompile_replay_branch(CPUState *cs,
499
CPUSH4State *env = &cpu->env;
500
501
if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
502
- && env->pc != tb->pc) {
503
+ && env->pc != tb_pc(tb)) {
504
env->pc -= 2;
505
env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
506
return true;
507
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
508
index XXXXXXX..XXXXXXX 100644
509
--- a/target/sparc/cpu.c
510
+++ b/target/sparc/cpu.c
511
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_synchronize_from_tb(CPUState *cs,
512
{
513
SPARCCPU *cpu = SPARC_CPU(cs);
514
515
- cpu->env.pc = tb->pc;
516
+ cpu->env.pc = tb_pc(tb);
517
cpu->env.npc = tb->cs_base;
518
}
519
520
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
521
index XXXXXXX..XXXXXXX 100644
522
--- a/target/tricore/cpu.c
523
+++ b/target/tricore/cpu.c
524
@@ -XXX,XX +XXX,XX @@ static void tricore_cpu_synchronize_from_tb(CPUState *cs,
525
TriCoreCPU *cpu = TRICORE_CPU(cs);
526
CPUTriCoreState *env = &cpu->env;
527
528
- env->PC = tb->pc;
529
+ env->PC = tb_pc(tb);
530
}
531
532
static void tricore_cpu_reset(DeviceState *dev)
83
diff --git a/tcg/tcg.c b/tcg/tcg.c
533
diff --git a/tcg/tcg.c b/tcg/tcg.c
84
index XXXXXXX..XXXXXXX 100644
534
index XXXXXXX..XXXXXXX 100644
85
--- a/tcg/tcg.c
535
--- a/tcg/tcg.c
86
+++ b/tcg/tcg.c
536
+++ b/tcg/tcg.c
87
@@ -XXX,XX +XXX,XX @@ static inline TCGTemp *tcg_global_alloc(TCGContext *s)
537
@@ -XXX,XX +XXX,XX @@ int64_t tcg_cpu_exec_time(void)
88
tcg_debug_assert(s->nb_globals == s->nb_temps);
89
s->nb_globals++;
90
ts = tcg_temp_alloc(s);
91
- ts->temp_global = 1;
92
+ ts->kind = TEMP_GLOBAL;
93
94
return ts;
95
}
96
@@ -XXX,XX +XXX,XX @@ static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
97
ts = tcg_global_alloc(s);
98
ts->base_type = type;
99
ts->type = type;
100
- ts->fixed_reg = 1;
101
+ ts->kind = TEMP_FIXED;
102
ts->reg = reg;
103
ts->name = name;
104
tcg_regset_set_reg(s->reserved_regs, reg);
105
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
106
bigendian = 1;
107
#endif
538
#endif
108
539
109
- if (!base_ts->fixed_reg) {
540
110
+ if (base_ts->kind != TEMP_FIXED) {
541
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
111
/* We do not support double-indirect registers. */
542
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
112
tcg_debug_assert(!base_ts->indirect_reg);
543
{
113
base_ts->indirect_base = 1;
544
#ifdef CONFIG_PROFILER
114
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
545
TCGProfile *prof = &s->prof;
115
TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
546
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
116
{
547
117
TCGContext *s = tcg_ctx;
548
#ifdef DEBUG_DISAS
118
+ TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
549
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
119
TCGTemp *ts;
550
- && qemu_log_in_addr_range(tb->pc))) {
120
int idx, k;
551
+ && qemu_log_in_addr_range(pc_start))) {
121
552
FILE *logfile = qemu_log_trylock();
122
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
553
if (logfile) {
123
ts = &s->temps[idx];
554
fprintf(logfile, "OP:\n");
124
ts->temp_allocated = 1;
555
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
125
tcg_debug_assert(ts->base_type == type);
556
if (s->nb_indirects > 0) {
126
- tcg_debug_assert(ts->temp_local == temp_local);
557
#ifdef DEBUG_DISAS
127
+ tcg_debug_assert(ts->kind == kind);
558
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
128
} else {
559
- && qemu_log_in_addr_range(tb->pc))) {
129
ts = tcg_temp_alloc(s);
560
+ && qemu_log_in_addr_range(pc_start))) {
130
if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
561
FILE *logfile = qemu_log_trylock();
131
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
562
if (logfile) {
132
ts->base_type = type;
563
fprintf(logfile, "OP before indirect lowering:\n");
133
ts->type = TCG_TYPE_I32;
564
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
134
ts->temp_allocated = 1;
565
135
- ts->temp_local = temp_local;
566
#ifdef DEBUG_DISAS
136
+ ts->kind = kind;
567
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
137
568
- && qemu_log_in_addr_range(tb->pc))) {
138
tcg_debug_assert(ts2 == ts + 1);
569
+ && qemu_log_in_addr_range(pc_start))) {
139
ts2->base_type = TCG_TYPE_I64;
570
FILE *logfile = qemu_log_trylock();
140
ts2->type = TCG_TYPE_I32;
571
if (logfile) {
141
ts2->temp_allocated = 1;
572
fprintf(logfile, "OP after optimization and liveness analysis:\n");
142
- ts2->temp_local = temp_local;
143
+ ts2->kind = kind;
144
} else {
145
ts->base_type = type;
146
ts->type = type;
147
ts->temp_allocated = 1;
148
- ts->temp_local = temp_local;
149
+ ts->kind = kind;
150
}
151
}
152
153
@@ -XXX,XX +XXX,XX @@ void tcg_temp_free_internal(TCGTemp *ts)
154
}
155
#endif
156
157
- tcg_debug_assert(ts->temp_global == 0);
158
+ tcg_debug_assert(ts->kind < TEMP_GLOBAL);
159
tcg_debug_assert(ts->temp_allocated != 0);
160
ts->temp_allocated = 0;
161
162
idx = temp_idx(ts);
163
- k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
164
+ k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
165
set_bit(idx, s->free_temps[k].l);
166
}
167
168
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
169
static void tcg_reg_alloc_start(TCGContext *s)
170
{
171
int i, n;
172
- TCGTemp *ts;
173
174
- for (i = 0, n = s->nb_globals; i < n; i++) {
175
- ts = &s->temps[i];
176
- ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
177
- }
178
- for (n = s->nb_temps; i < n; i++) {
179
- ts = &s->temps[i];
180
- ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
181
- ts->mem_allocated = 0;
182
- ts->fixed_reg = 0;
183
+ for (i = 0, n = s->nb_temps; i < n; i++) {
184
+ TCGTemp *ts = &s->temps[i];
185
+ TCGTempVal val = TEMP_VAL_MEM;
186
+
187
+ switch (ts->kind) {
188
+ case TEMP_FIXED:
189
+ val = TEMP_VAL_REG;
190
+ break;
191
+ case TEMP_GLOBAL:
192
+ break;
193
+ case TEMP_NORMAL:
194
+ val = TEMP_VAL_DEAD;
195
+ /* fall through */
196
+ case TEMP_LOCAL:
197
+ ts->mem_allocated = 0;
198
+ break;
199
+ default:
200
+ g_assert_not_reached();
201
+ }
202
+ ts->val_type = val;
203
}
204
205
memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
206
@@ -XXX,XX +XXX,XX @@ static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
207
{
208
int idx = temp_idx(ts);
209
210
- if (ts->temp_global) {
211
+ switch (ts->kind) {
212
+ case TEMP_FIXED:
213
+ case TEMP_GLOBAL:
214
pstrcpy(buf, buf_size, ts->name);
215
- } else if (ts->temp_local) {
216
+ break;
217
+ case TEMP_LOCAL:
218
snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
219
- } else {
220
+ break;
221
+ case TEMP_NORMAL:
222
snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
223
+ break;
224
}
225
return buf;
226
}
227
@@ -XXX,XX +XXX,XX @@ static void la_bb_end(TCGContext *s, int ng, int nt)
228
{
229
int i;
230
231
- for (i = 0; i < ng; ++i) {
232
- s->temps[i].state = TS_DEAD | TS_MEM;
233
- la_reset_pref(&s->temps[i]);
234
- }
235
- for (i = ng; i < nt; ++i) {
236
- s->temps[i].state = (s->temps[i].temp_local
237
- ? TS_DEAD | TS_MEM
238
- : TS_DEAD);
239
- la_reset_pref(&s->temps[i]);
240
+ for (i = 0; i < nt; ++i) {
241
+ TCGTemp *ts = &s->temps[i];
242
+ int state;
243
+
244
+ switch (ts->kind) {
245
+ case TEMP_FIXED:
246
+ case TEMP_GLOBAL:
247
+ case TEMP_LOCAL:
248
+ state = TS_DEAD | TS_MEM;
249
+ break;
250
+ case TEMP_NORMAL:
251
+ state = TS_DEAD;
252
+ break;
253
+ default:
254
+ g_assert_not_reached();
255
+ }
256
+ ts->state = state;
257
+ la_reset_pref(ts);
258
}
259
}
260
261
@@ -XXX,XX +XXX,XX @@ static void la_bb_sync(TCGContext *s, int ng, int nt)
262
la_global_sync(s, ng);
263
264
for (int i = ng; i < nt; ++i) {
265
- if (s->temps[i].temp_local) {
266
+ if (s->temps[i].kind == TEMP_LOCAL) {
267
int state = s->temps[i].state;
268
s->temps[i].state = state | TS_MEM;
269
if (state != TS_DEAD) {
270
@@ -XXX,XX +XXX,XX @@ static void check_regs(TCGContext *s)
271
}
272
for (k = 0; k < s->nb_temps; k++) {
273
ts = &s->temps[k];
274
- if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
275
+ if (ts->val_type == TEMP_VAL_REG
276
+ && ts->kind != TEMP_FIXED
277
&& s->reg_to_temp[ts->reg] != ts) {
278
printf("Inconsistency for temp %s:\n",
279
tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
280
@@ -XXX,XX +XXX,XX @@ static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
281
mark it free; otherwise mark it dead. */
282
static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
283
{
284
- if (ts->fixed_reg) {
285
+ if (ts->kind == TEMP_FIXED) {
286
return;
287
}
288
if (ts->val_type == TEMP_VAL_REG) {
289
s->reg_to_temp[ts->reg] = NULL;
290
}
291
ts->val_type = (free_or_dead < 0
292
- || ts->temp_local
293
- || ts->temp_global
294
+ || ts->kind != TEMP_NORMAL
295
? TEMP_VAL_MEM : TEMP_VAL_DEAD);
296
}
297
298
@@ -XXX,XX +XXX,XX @@ static inline void temp_dead(TCGContext *s, TCGTemp *ts)
299
static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
300
TCGRegSet preferred_regs, int free_or_dead)
301
{
302
- if (ts->fixed_reg) {
303
+ if (ts->kind == TEMP_FIXED) {
304
return;
305
}
306
if (!ts->mem_coherent) {
307
@@ -XXX,XX +XXX,XX @@ static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
308
{
309
/* The liveness analysis already ensures that globals are back
310
in memory. Keep an tcg_debug_assert for safety. */
311
- tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
312
+ tcg_debug_assert(ts->val_type == TEMP_VAL_MEM
313
+ || ts->kind == TEMP_FIXED);
314
}
315
316
/* save globals to their canonical location and assume they can be
317
@@ -XXX,XX +XXX,XX @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
318
for (i = 0, n = s->nb_globals; i < n; i++) {
319
TCGTemp *ts = &s->temps[i];
320
tcg_debug_assert(ts->val_type != TEMP_VAL_REG
321
- || ts->fixed_reg
322
+ || ts->kind == TEMP_FIXED
323
|| ts->mem_coherent);
324
}
325
}
326
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
327
328
for (i = s->nb_globals; i < s->nb_temps; i++) {
329
TCGTemp *ts = &s->temps[i];
330
- if (ts->temp_local) {
331
+ if (ts->kind == TEMP_LOCAL) {
332
temp_save(s, ts, allocated_regs);
333
} else {
334
/* The liveness analysis already ensures that temps are dead.
335
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
336
* The liveness analysis already ensures that temps are dead.
337
* Keep tcg_debug_asserts for safety.
338
*/
339
- if (ts->temp_local) {
340
+ if (ts->kind == TEMP_LOCAL) {
341
tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
342
} else {
343
tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
344
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
345
TCGRegSet preferred_regs)
346
{
347
/* ENV should not be modified. */
348
- tcg_debug_assert(!ots->fixed_reg);
349
+ tcg_debug_assert(ots->kind != TEMP_FIXED);
350
351
/* The movi is not explicitly generated here. */
352
if (ots->val_type == TEMP_VAL_REG) {
353
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
354
ts = arg_temp(op->args[1]);
355
356
/* ENV should not be modified. */
357
- tcg_debug_assert(!ots->fixed_reg);
358
+ tcg_debug_assert(ots->kind != TEMP_FIXED);
359
360
/* Note that otype != itype for no-op truncation. */
361
otype = ots->type;
362
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
363
}
364
temp_dead(s, ots);
365
} else {
366
- if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
367
+ if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
368
/* the mov can be suppressed */
369
if (ots->val_type == TEMP_VAL_REG) {
370
s->reg_to_temp[ots->reg] = NULL;
371
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
372
* Store the source register into the destination slot
373
* and leave the destination temp as TEMP_VAL_MEM.
374
*/
375
- assert(!ots->fixed_reg);
376
+ assert(ots->kind != TEMP_FIXED);
377
if (!ts->mem_allocated) {
378
temp_allocate_frame(s, ots);
379
}
380
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
381
its = arg_temp(op->args[1]);
382
383
/* ENV should not be modified. */
384
- tcg_debug_assert(!ots->fixed_reg);
385
+ tcg_debug_assert(ots->kind != TEMP_FIXED);
386
387
itype = its->type;
388
vece = TCGOP_VECE(op);
389
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
390
i_preferred_regs = o_preferred_regs = 0;
391
if (arg_ct->ialias) {
392
o_preferred_regs = op->output_pref[arg_ct->alias_index];
393
- if (ts->fixed_reg) {
394
+ if (ts->kind == TEMP_FIXED) {
395
/* if fixed register, we must allocate a new register
396
if the alias is not the same register */
397
if (arg != op->args[arg_ct->alias_index]) {
398
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
399
ts = arg_temp(arg);
400
401
/* ENV should not be modified. */
402
- tcg_debug_assert(!ts->fixed_reg);
403
+ tcg_debug_assert(ts->kind != TEMP_FIXED);
404
405
if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
406
reg = new_args[arg_ct->alias_index];
407
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
408
ts = arg_temp(op->args[i]);
409
410
/* ENV should not be modified. */
411
- tcg_debug_assert(!ts->fixed_reg);
412
+ tcg_debug_assert(ts->kind != TEMP_FIXED);
413
414
if (NEED_SYNC_ARG(i)) {
415
temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
416
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
417
ts = arg_temp(arg);
418
419
/* ENV should not be modified. */
420
- tcg_debug_assert(!ts->fixed_reg);
421
+ tcg_debug_assert(ts->kind != TEMP_FIXED);
422
423
reg = tcg_target_call_oarg_regs[i];
424
tcg_debug_assert(s->reg_to_temp[reg] == NULL);
425
--
573
--
426
2.25.1
574
2.34.1
427
575
428
576
diff view generated by jsdifflib
1
Having dupi pass though movi is confusing and arguably wrong.
1
Prepare for targets to be able to produce TBs that can
2
run in more than one virtual context.
2
3
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
---
6
tcg/tcg.c | 6 +++-
7
accel/tcg/internal.h | 4 +++
7
tcg/aarch64/tcg-target.c.inc | 7 ----
8
accel/tcg/tb-jmp-cache.h | 41 +++++++++++++++++++++++++
8
tcg/i386/tcg-target.c.inc | 63 ++++++++++++++++++++++++------------
9
include/exec/cpu-defs.h | 3 ++
9
tcg/ppc/tcg-target.c.inc | 6 ----
10
include/exec/exec-all.h | 32 ++++++++++++++++++--
10
4 files changed, 47 insertions(+), 35 deletions(-)
11
accel/tcg/cpu-exec.c | 16 ++++++----
12
accel/tcg/translate-all.c | 64 ++++++++++++++++++++++++++-------------
13
6 files changed, 131 insertions(+), 29 deletions(-)
11
14
12
diff --git a/tcg/tcg.c b/tcg/tcg.c
15
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/tcg.c
17
--- a/accel/tcg/internal.h
15
+++ b/tcg/tcg.c
18
+++ b/accel/tcg/internal.h
16
@@ -XXX,XX +XXX,XX @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
19
@@ -XXX,XX +XXX,XX @@ void tb_htable_init(void);
17
case TEMP_VAL_CONST:
20
/* Return the current PC from CPU, which may be cached in TB. */
18
reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
21
static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
19
preferred_regs, ts->indirect_base);
22
{
20
- tcg_out_movi(s, ts->type, reg, ts->val);
23
+#if TARGET_TB_PCREL
21
+ if (ts->type <= TCG_TYPE_I64) {
24
+ return cpu->cc->get_pc(cpu);
22
+ tcg_out_movi(s, ts->type, reg, ts->val);
25
+#else
23
+ } else {
26
return tb_pc(tb);
24
+ tcg_out_dupi_vec(s, ts->type, reg, ts->val);
27
+#endif
28
}
29
30
#endif /* ACCEL_TCG_INTERNAL_H */
31
diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/accel/tcg/tb-jmp-cache.h
34
+++ b/accel/tcg/tb-jmp-cache.h
35
@@ -XXX,XX +XXX,XX @@
36
37
/*
38
* Accessed in parallel; all accesses to 'tb' must be atomic.
39
+ * For TARGET_TB_PCREL, accesses to 'pc' must be protected by
40
+ * a load_acquire/store_release to 'tb'.
41
*/
42
struct CPUJumpCache {
43
struct {
44
TranslationBlock *tb;
45
+#if TARGET_TB_PCREL
46
+ target_ulong pc;
47
+#endif
48
} array[TB_JMP_CACHE_SIZE];
49
};
50
51
+static inline TranslationBlock *
52
+tb_jmp_cache_get_tb(CPUJumpCache *jc, uint32_t hash)
53
+{
54
+#if TARGET_TB_PCREL
55
+ /* Use acquire to ensure current load of pc from jc. */
56
+ return qatomic_load_acquire(&jc->array[hash].tb);
57
+#else
58
+ /* Use rcu_read to ensure current load of pc from *tb. */
59
+ return qatomic_rcu_read(&jc->array[hash].tb);
60
+#endif
61
+}
62
+
63
+static inline target_ulong
64
+tb_jmp_cache_get_pc(CPUJumpCache *jc, uint32_t hash, TranslationBlock *tb)
65
+{
66
+#if TARGET_TB_PCREL
67
+ return jc->array[hash].pc;
68
+#else
69
+ return tb_pc(tb);
70
+#endif
71
+}
72
+
73
+static inline void
74
+tb_jmp_cache_set(CPUJumpCache *jc, uint32_t hash,
75
+ TranslationBlock *tb, target_ulong pc)
76
+{
77
+#if TARGET_TB_PCREL
78
+ jc->array[hash].pc = pc;
79
+ /* Use store_release on tb to ensure pc is written first. */
80
+ qatomic_store_release(&jc->array[hash].tb, tb);
81
+#else
82
+ /* Use the pc value already stored in tb->pc. */
83
+ qatomic_set(&jc->array[hash].tb, tb);
84
+#endif
85
+}
86
+
87
#endif /* ACCEL_TCG_TB_JMP_CACHE_H */
88
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
89
index XXXXXXX..XXXXXXX 100644
90
--- a/include/exec/cpu-defs.h
91
+++ b/include/exec/cpu-defs.h
92
@@ -XXX,XX +XXX,XX @@
93
# error TARGET_PAGE_BITS must be defined in cpu-param.h
94
# endif
95
#endif
96
+#ifndef TARGET_TB_PCREL
97
+# define TARGET_TB_PCREL 0
98
+#endif
99
100
#define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
101
102
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
103
index XXXXXXX..XXXXXXX 100644
104
--- a/include/exec/exec-all.h
105
+++ b/include/exec/exec-all.h
106
@@ -XXX,XX +XXX,XX @@ struct tb_tc {
107
};
108
109
struct TranslationBlock {
110
- target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */
111
- target_ulong cs_base; /* CS base for this block */
112
+#if !TARGET_TB_PCREL
113
+ /*
114
+ * Guest PC corresponding to this block. This must be the true
115
+ * virtual address. Therefore e.g. x86 stores EIP + CS_BASE, and
116
+ * targets like Arm, MIPS, HP-PA, which reuse low bits for ISA or
117
+ * privilege, must store those bits elsewhere.
118
+ *
119
+ * If TARGET_TB_PCREL, the opcodes for the TranslationBlock are
120
+ * written such that the TB is associated only with the physical
121
+ * page and may be run in any virtual address context. In this case,
122
+ * PC must always be taken from ENV in a target-specific manner.
123
+ * Unwind information is taken as offsets from the page, to be
124
+ * deposited into the "current" PC.
125
+ */
126
+ target_ulong pc;
127
+#endif
128
+
129
+ /*
130
+ * Target-specific data associated with the TranslationBlock, e.g.:
131
+ * x86: the original user, the Code Segment virtual base,
132
+ * arm: an extension of tb->flags,
133
+ * s390x: instruction data for EXECUTE,
134
+ * sparc: the next pc of the instruction queue (for delay slots).
135
+ */
136
+ target_ulong cs_base;
137
+
138
uint32_t flags; /* flags defining in which context the code was generated */
139
uint32_t cflags; /* compile flags */
140
141
@@ -XXX,XX +XXX,XX @@ struct TranslationBlock {
142
/* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */
143
static inline target_ulong tb_pc(const TranslationBlock *tb)
144
{
145
+#if TARGET_TB_PCREL
146
+ qemu_build_not_reached();
147
+#else
148
return tb->pc;
149
+#endif
150
}
151
152
/* Hide the qatomic_read to make code a little easier on the eyes */
153
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
154
index XXXXXXX..XXXXXXX 100644
155
--- a/accel/tcg/cpu-exec.c
156
+++ b/accel/tcg/cpu-exec.c
157
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
158
const TranslationBlock *tb = p;
159
const struct tb_desc *desc = d;
160
161
- if (tb_pc(tb) == desc->pc &&
162
+ if ((TARGET_TB_PCREL || tb_pc(tb) == desc->pc) &&
163
tb->page_addr[0] == desc->page_addr0 &&
164
tb->cs_base == desc->cs_base &&
165
tb->flags == desc->flags &&
166
@@ -XXX,XX +XXX,XX @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
167
return NULL;
168
}
169
desc.page_addr0 = phys_pc;
170
- h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
171
+ h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : pc),
172
+ flags, cflags, *cpu->trace_dstate);
173
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
174
}
175
176
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
177
uint32_t flags, uint32_t cflags)
178
{
179
TranslationBlock *tb;
180
+ CPUJumpCache *jc;
181
uint32_t hash;
182
183
/* we should never be trying to look up an INVALID tb */
184
tcg_debug_assert(!(cflags & CF_INVALID));
185
186
hash = tb_jmp_cache_hash_func(pc);
187
- tb = qatomic_rcu_read(&cpu->tb_jmp_cache->array[hash].tb);
188
+ jc = cpu->tb_jmp_cache;
189
+ tb = tb_jmp_cache_get_tb(jc, hash);
190
191
if (likely(tb &&
192
- tb->pc == pc &&
193
+ tb_jmp_cache_get_pc(jc, hash, tb) == pc &&
194
tb->cs_base == cs_base &&
195
tb->flags == flags &&
196
tb->trace_vcpu_dstate == *cpu->trace_dstate &&
197
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
198
if (tb == NULL) {
199
return NULL;
200
}
201
- qatomic_set(&cpu->tb_jmp_cache->array[hash].tb, tb);
202
+ tb_jmp_cache_set(jc, hash, tb, pc);
203
return tb;
204
}
205
206
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
207
if (cc->tcg_ops->synchronize_from_tb) {
208
cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
209
} else {
210
+ assert(!TARGET_TB_PCREL);
211
assert(cc->set_pc);
212
cc->set_pc(cpu, tb_pc(last_tb));
213
}
214
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
215
* for the fast lookup
216
*/
217
h = tb_jmp_cache_hash_func(pc);
218
- qatomic_set(&cpu->tb_jmp_cache->array[h].tb, tb);
219
+ tb_jmp_cache_set(cpu->tb_jmp_cache, h, tb, pc);
220
}
221
222
#ifndef CONFIG_USER_ONLY
223
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
224
index XXXXXXX..XXXXXXX 100644
225
--- a/accel/tcg/translate-all.c
226
+++ b/accel/tcg/translate-all.c
227
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
228
229
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
230
if (i == 0) {
231
- prev = (j == 0 ? tb_pc(tb) : 0);
232
+ prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
233
} else {
234
prev = tcg_ctx->gen_insn_data[i - 1][j];
235
}
236
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
237
static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
238
uintptr_t searched_pc, bool reset_icount)
239
{
240
- target_ulong data[TARGET_INSN_START_WORDS] = { tb_pc(tb) };
241
+ target_ulong data[TARGET_INSN_START_WORDS];
242
uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
243
CPUArchState *env = cpu->env_ptr;
244
const uint8_t *p = tb->tc.ptr + tb->tc.size;
245
@@ -XXX,XX +XXX,XX @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
246
return -1;
247
}
248
249
+ memset(data, 0, sizeof(data));
250
+ if (!TARGET_TB_PCREL) {
251
+ data[0] = tb_pc(tb);
252
+ }
253
+
254
/* Reconstruct the stored insn data while looking for the point at
255
which the end of the insn exceeds the searched_pc. */
256
for (i = 0; i < num_insns; ++i) {
257
@@ -XXX,XX +XXX,XX @@ static bool tb_cmp(const void *ap, const void *bp)
258
const TranslationBlock *a = ap;
259
const TranslationBlock *b = bp;
260
261
- return tb_pc(a) == tb_pc(b) &&
262
- a->cs_base == b->cs_base &&
263
- a->flags == b->flags &&
264
- (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
265
- a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
266
- a->page_addr[0] == b->page_addr[0] &&
267
- a->page_addr[1] == b->page_addr[1];
268
+ return ((TARGET_TB_PCREL || tb_pc(a) == tb_pc(b)) &&
269
+ a->cs_base == b->cs_base &&
270
+ a->flags == b->flags &&
271
+ (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
272
+ a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
273
+ a->page_addr[0] == b->page_addr[0] &&
274
+ a->page_addr[1] == b->page_addr[1]);
275
}
276
277
void tb_htable_init(void)
278
@@ -XXX,XX +XXX,XX @@ static inline void tb_jmp_unlink(TranslationBlock *dest)
279
qemu_spin_unlock(&dest->jmp_lock);
280
}
281
282
+static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
283
+{
284
+ CPUState *cpu;
285
+
286
+ if (TARGET_TB_PCREL) {
287
+ /* A TB may be at any virtual address */
288
+ CPU_FOREACH(cpu) {
289
+ tcg_flush_jmp_cache(cpu);
25
+ }
290
+ }
26
ts->mem_coherent = 0;
27
break;
28
case TEMP_VAL_MEM:
29
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
30
index XXXXXXX..XXXXXXX 100644
31
--- a/tcg/aarch64/tcg-target.c.inc
32
+++ b/tcg/aarch64/tcg-target.c.inc
33
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
34
case TCG_TYPE_I64:
35
tcg_debug_assert(rd < 32);
36
break;
37
-
38
- case TCG_TYPE_V64:
39
- case TCG_TYPE_V128:
40
- tcg_debug_assert(rd >= 32);
41
- tcg_out_dupi_vec(s, type, rd, value);
42
- return;
43
-
44
default:
45
g_assert_not_reached();
46
}
47
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
48
index XXXXXXX..XXXXXXX 100644
49
--- a/tcg/i386/tcg-target.c.inc
50
+++ b/tcg/i386/tcg-target.c.inc
51
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
52
}
53
}
54
55
-static void tcg_out_movi(TCGContext *s, TCGType type,
56
- TCGReg ret, tcg_target_long arg)
57
+static void tcg_out_movi_vec(TCGContext *s, TCGType type,
58
+ TCGReg ret, tcg_target_long arg)
59
+{
60
+ if (arg == 0) {
61
+ tcg_out_vex_modrm(s, OPC_PXOR, ret, ret, ret);
62
+ return;
63
+ }
64
+ if (arg == -1) {
65
+ tcg_out_vex_modrm(s, OPC_PCMPEQB, ret, ret, ret);
66
+ return;
67
+ }
68
+
69
+ int rexw = (type == TCG_TYPE_I32 ? 0 : P_REXW);
70
+ tcg_out_vex_modrm_pool(s, OPC_MOVD_VyEy + rexw, ret);
71
+ if (TCG_TARGET_REG_BITS == 64) {
72
+ new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
73
+ } else {
291
+ } else {
74
+ new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
292
+ uint32_t h = tb_jmp_cache_hash_func(tb_pc(tb));
293
+
294
+ CPU_FOREACH(cpu) {
295
+ CPUJumpCache *jc = cpu->tb_jmp_cache;
296
+
297
+ if (qatomic_read(&jc->array[h].tb) == tb) {
298
+ qatomic_set(&jc->array[h].tb, NULL);
299
+ }
300
+ }
75
+ }
301
+ }
76
+}
302
+}
77
+
303
+
78
+static void tcg_out_movi_int(TCGContext *s, TCGType type,
304
/*
79
+ TCGReg ret, tcg_target_long arg)
305
* In user-mode, call with mmap_lock held.
80
{
306
* In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
81
tcg_target_long diff;
307
@@ -XXX,XX +XXX,XX @@ static inline void tb_jmp_unlink(TranslationBlock *dest)
82
308
*/
83
- switch (type) {
309
static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
84
- case TCG_TYPE_I32:
310
{
85
-#if TCG_TARGET_REG_BITS == 64
311
- CPUState *cpu;
86
- case TCG_TYPE_I64:
312
PageDesc *p;
87
-#endif
313
uint32_t h;
88
- if (ret < 16) {
314
tb_page_addr_t phys_pc;
89
- break;
315
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
316
317
/* remove the TB from the hash list */
318
phys_pc = tb->page_addr[0];
319
- h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, orig_cflags,
320
- tb->trace_vcpu_dstate);
321
+ h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
322
+ tb->flags, orig_cflags, tb->trace_vcpu_dstate);
323
if (!qht_remove(&tb_ctx.htable, tb, h)) {
324
return;
325
}
326
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
327
}
328
329
/* remove the TB from the hash list */
330
- h = tb_jmp_cache_hash_func(tb->pc);
331
- CPU_FOREACH(cpu) {
332
- CPUJumpCache *jc = cpu->tb_jmp_cache;
333
- if (qatomic_read(&jc->array[h].tb) == tb) {
334
- qatomic_set(&jc->array[h].tb, NULL);
90
- }
335
- }
91
- /* fallthru */
92
- case TCG_TYPE_V64:
93
- case TCG_TYPE_V128:
94
- case TCG_TYPE_V256:
95
- tcg_debug_assert(ret >= 16);
96
- tcg_out_dupi_vec(s, type, ret, arg);
97
- return;
98
- default:
99
- g_assert_not_reached();
100
- }
336
- }
101
-
337
+ tb_jmp_cache_inval_tb(tb);
102
if (arg == 0) {
338
103
tgen_arithr(s, ARITH_XOR, ret, ret);
339
/* suppress this TB from the two jump lists */
104
return;
340
tb_remove_from_jmp_list(tb, 0);
105
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
341
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
106
tcg_out64(s, arg);
342
}
107
}
343
108
344
/* add in the hash table */
109
+static void tcg_out_movi(TCGContext *s, TCGType type,
345
- h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, tb->cflags,
110
+ TCGReg ret, tcg_target_long arg)
346
- tb->trace_vcpu_dstate);
111
+{
347
+ h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
112
+ switch (type) {
348
+ tb->flags, tb->cflags, tb->trace_vcpu_dstate);
113
+ case TCG_TYPE_I32:
349
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
114
+#if TCG_TARGET_REG_BITS == 64
350
115
+ case TCG_TYPE_I64:
351
/* remove TB from the page(s) if we couldn't insert it */
116
+#endif
352
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
117
+ if (ret < 16) {
353
118
+ tcg_out_movi_int(s, type, ret, arg);
354
gen_code_buf = tcg_ctx->code_gen_ptr;
119
+ } else {
355
tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
120
+ tcg_out_movi_vec(s, type, ret, arg);
356
+#if !TARGET_TB_PCREL
121
+ }
357
tb->pc = pc;
122
+ break;
358
+#endif
123
+ default:
359
tb->cs_base = cs_base;
124
+ g_assert_not_reached();
360
tb->flags = flags;
125
+ }
361
tb->cflags = cflags;
126
+}
127
+
128
static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
129
{
130
if (val == (int8_t)val) {
131
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
132
index XXXXXXX..XXXXXXX 100644
133
--- a/tcg/ppc/tcg-target.c.inc
134
+++ b/tcg/ppc/tcg-target.c.inc
135
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
136
tcg_out_movi_int(s, type, ret, arg, false);
137
break;
138
139
- case TCG_TYPE_V64:
140
- case TCG_TYPE_V128:
141
- tcg_debug_assert(ret >= TCG_REG_V0);
142
- tcg_out_dupi_vec(s, type, ret, arg);
143
- break;
144
-
145
default:
146
g_assert_not_reached();
147
}
148
--
362
--
149
2.25.1
363
2.34.1
150
364
151
365
diff view generated by jsdifflib
Deleted patch
1
This will reduce the differences between 32-bit and 64-bit hosts,
2
allowing full 64-bit constants to be created with the same interface.
3
1
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
include/tcg/tcg.h | 2 +-
7
tcg/tcg.c | 2 +-
8
2 files changed, 2 insertions(+), 2 deletions(-)
9
10
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/include/tcg/tcg.h
13
+++ b/include/tcg/tcg.h
14
@@ -XXX,XX +XXX,XX @@ typedef struct TCGTemp {
15
unsigned int mem_allocated:1;
16
unsigned int temp_allocated:1;
17
18
- tcg_target_long val;
19
+ int64_t val;
20
struct TCGTemp *mem_base;
21
intptr_t mem_offset;
22
const char *name;
23
diff --git a/tcg/tcg.c b/tcg/tcg.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/tcg.c
26
+++ b/tcg/tcg.c
27
@@ -XXX,XX +XXX,XX @@ static void dump_regs(TCGContext *s)
28
tcg_target_reg_names[ts->mem_base->reg]);
29
break;
30
case TEMP_VAL_CONST:
31
- printf("$0x%" TCG_PRIlx, ts->val);
32
+ printf("$0x%" PRIx64, ts->val);
33
break;
34
case TEMP_VAL_DEAD:
35
printf("D");
36
--
37
2.25.1
38
39
diff view generated by jsdifflib
Deleted patch
1
Prefer TEMP_CONST over anything else.
2
1
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/optimize.c | 27 ++++++++++++---------------
6
1 file changed, 12 insertions(+), 15 deletions(-)
7
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static void init_arg_info(TempOptInfo *infos,
13
14
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
15
{
16
- TCGTemp *i;
17
+ TCGTemp *i, *g, *l;
18
19
- /* If this is already a global, we can't do better. */
20
- if (ts->kind >= TEMP_GLOBAL) {
21
+ /* If this is already readonly, we can't do better. */
22
+ if (temp_readonly(ts)) {
23
return ts;
24
}
25
26
- /* Search for a global first. */
27
+ g = l = NULL;
28
for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
29
- if (i->kind >= TEMP_GLOBAL) {
30
+ if (temp_readonly(i)) {
31
return i;
32
- }
33
- }
34
-
35
- /* If it is a temp, search for a temp local. */
36
- if (ts->kind == TEMP_NORMAL) {
37
- for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
38
- if (i->kind >= TEMP_LOCAL) {
39
- return i;
40
+ } else if (i->kind > ts->kind) {
41
+ if (i->kind == TEMP_GLOBAL) {
42
+ g = i;
43
+ } else if (i->kind == TEMP_LOCAL) {
44
+ l = i;
45
}
46
}
47
}
48
49
- /* Failure to find a better representation, return the same temp. */
50
- return ts;
51
+ /* If we didn't find a better representation, return the same temp. */
52
+ return g ? g : l ? l : ts;
53
}
54
55
static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
56
--
57
2.25.1
58
59
diff view generated by jsdifflib
1
Improve expand_vec_shi to use sign-extraction for MO_32.
1
From: Leandro Lupori <leandro.lupori@eldorado.org.br>
2
This allows a single VSPLTISB instruction to load all of
3
the valid shift constants.
4
2
3
PowerPC64 processors handle direct branches better than indirect
4
ones, resulting in less stalled cycles and branch misses.
5
6
However, PPC's tb_target_set_jmp_target() was only using direct
7
branches for 16-bit jumps, while PowerPC64's unconditional branch
8
instructions are able to handle displacements of up to 26 bits.
9
To take advantage of this, now jumps whose displacements fit in
10
between 17 and 26 bits are also converted to direct branches.
11
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Leandro Lupori <leandro.lupori@eldorado.org.br>
14
[rth: Expanded some commentary.]
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
16
---
7
tcg/ppc/tcg-target.c.inc | 44 ++++++++++++++++++++++++----------------
17
tcg/ppc/tcg-target.c.inc | 119 +++++++++++++++++++++++++++++----------
8
1 file changed, 27 insertions(+), 17 deletions(-)
18
1 file changed, 88 insertions(+), 31 deletions(-)
9
19
10
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
20
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.c.inc
22
--- a/tcg/ppc/tcg-target.c.inc
13
+++ b/tcg/ppc/tcg-target.c.inc
23
+++ b/tcg/ppc/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
24
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
15
static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
25
tcg_out32(s, insn);
16
TCGv_vec v1, TCGArg imm, TCGOpcode opci)
26
}
27
28
+static inline uint64_t make_pair(tcg_insn_unit i1, tcg_insn_unit i2)
29
+{
30
+ if (HOST_BIG_ENDIAN) {
31
+ return (uint64_t)i1 << 32 | i2;
32
+ }
33
+ return (uint64_t)i2 << 32 | i1;
34
+}
35
+
36
+static inline void ppc64_replace2(uintptr_t rx, uintptr_t rw,
37
+ tcg_insn_unit i0, tcg_insn_unit i1)
38
+{
39
+#if TCG_TARGET_REG_BITS == 64
40
+ qatomic_set((uint64_t *)rw, make_pair(i0, i1));
41
+ flush_idcache_range(rx, rw, 8);
42
+#else
43
+ qemu_build_not_reached();
44
+#endif
45
+}
46
+
47
+static inline void ppc64_replace4(uintptr_t rx, uintptr_t rw,
48
+ tcg_insn_unit i0, tcg_insn_unit i1,
49
+ tcg_insn_unit i2, tcg_insn_unit i3)
50
+{
51
+ uint64_t p[2];
52
+
53
+ p[!HOST_BIG_ENDIAN] = make_pair(i0, i1);
54
+ p[HOST_BIG_ENDIAN] = make_pair(i2, i3);
55
+
56
+ /*
57
+ * There's no convenient way to get the compiler to allocate a pair
58
+ * of registers at an even index, so copy into r6/r7 and clobber.
59
+ */
60
+ asm("mr %%r6, %1\n\t"
61
+ "mr %%r7, %2\n\t"
62
+ "stq %%r6, %0"
63
+ : "=Q"(*(__int128 *)rw) : "r"(p[0]), "r"(p[1]) : "r6", "r7");
64
+ flush_idcache_range(rx, rw, 16);
65
+}
66
+
67
void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
68
uintptr_t jmp_rw, uintptr_t addr)
17
{
69
{
18
- TCGv_vec t1 = tcg_temp_new_vec(type);
70
- if (TCG_TARGET_REG_BITS == 64) {
19
+ TCGv_vec t1;
71
- tcg_insn_unit i1, i2;
20
72
- intptr_t tb_diff = addr - tc_ptr;
21
- /* Splat w/bytes for xxspltib. */
73
- intptr_t br_diff = addr - (jmp_rx + 4);
22
- tcg_gen_dupi_vec(MO_8, t1, imm & ((8 << vece) - 1));
74
- uint64_t pair;
23
+ if (vece == MO_32) {
75
+ tcg_insn_unit i0, i1, i2, i3;
24
+ /*
76
+ intptr_t tb_diff = addr - tc_ptr;
25
+ * Only 5 bits are significant, and VSPLTISB can represent -16..15.
77
+ intptr_t br_diff = addr - (jmp_rx + 4);
26
+ * So using negative numbers gets us the 4th bit easily.
78
+ intptr_t lo, hi;
27
+ */
79
28
+ imm = sextract32(imm, 0, 5);
80
- /* This does not exercise the range of the branch, but we do
29
+ } else {
81
- still need to be able to load the new value of TCG_REG_TB.
30
+ imm &= (8 << vece) - 1;
82
- But this does still happen quite often. */
83
- if (tb_diff == (int16_t)tb_diff) {
84
- i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
85
- i2 = B | (br_diff & 0x3fffffc);
86
- } else {
87
- intptr_t lo = (int16_t)tb_diff;
88
- intptr_t hi = (int32_t)(tb_diff - lo);
89
- assert(tb_diff == hi + lo);
90
- i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
91
- i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
92
- }
93
-#if HOST_BIG_ENDIAN
94
- pair = (uint64_t)i1 << 32 | i2;
95
-#else
96
- pair = (uint64_t)i2 << 32 | i1;
97
-#endif
98
-
99
- /* As per the enclosing if, this is ppc64. Avoid the _Static_assert
100
- within qatomic_set that would fail to build a ppc32 host. */
101
- qatomic_set__nocheck((uint64_t *)jmp_rw, pair);
102
- flush_idcache_range(jmp_rx, jmp_rw, 8);
103
- } else {
104
+ if (TCG_TARGET_REG_BITS == 32) {
105
intptr_t diff = addr - jmp_rx;
106
tcg_debug_assert(in_range_b(diff));
107
qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc));
108
flush_idcache_range(jmp_rx, jmp_rw, 4);
109
+ return;
110
}
111
+
112
+ /*
113
+ * For 16-bit displacements, we can use a single add + branch.
114
+ * This happens quite often.
115
+ */
116
+ if (tb_diff == (int16_t)tb_diff) {
117
+ i0 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
118
+ i1 = B | (br_diff & 0x3fffffc);
119
+ ppc64_replace2(jmp_rx, jmp_rw, i0, i1);
120
+ return;
31
+ }
121
+ }
32
+
122
+
33
+ /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
123
+ lo = (int16_t)tb_diff;
34
+ t1 = tcg_constant_vec(type, MO_8, imm);
124
+ hi = (int32_t)(tb_diff - lo);
35
vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
125
+ assert(tb_diff == hi + lo);
36
tcgv_vec_arg(v1), tcgv_vec_arg(t1));
126
+ i0 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
37
- tcg_temp_free_vec(t1);
127
+ i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
128
+
129
+ /*
130
+ * Without stq from 2.07, we can only update two insns,
131
+ * and those must be the ones that load the target address.
132
+ */
133
+ if (!have_isa_2_07) {
134
+ ppc64_replace2(jmp_rx, jmp_rw, i0, i1);
135
+ return;
136
+ }
137
+
138
+ /*
139
+ * For 26-bit displacements, we can use a direct branch.
140
+ * Otherwise we still need the indirect branch, which we
141
+ * must restore after a potential direct branch write.
142
+ */
143
+ br_diff -= 4;
144
+ if (in_range_b(br_diff)) {
145
+ i2 = B | (br_diff & 0x3fffffc);
146
+ i3 = NOP;
147
+ } else {
148
+ i2 = MTSPR | RS(TCG_REG_TB) | CTR;
149
+ i3 = BCCTR | BO_ALWAYS;
150
+ }
151
+ ppc64_replace4(jmp_rx, jmp_rw, i0, i1, i2, i3);
38
}
152
}
39
153
40
static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
154
static void tcg_out_call_int(TCGContext *s, int lk,
41
@@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
155
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
42
{
156
if (s->tb_jmp_insn_offset) {
43
TCGv_vec t1 = tcg_temp_new_vec(type);
157
/* Direct jump. */
44
TCGv_vec t2 = tcg_temp_new_vec(type);
158
if (TCG_TARGET_REG_BITS == 64) {
45
- TCGv_vec t3, t4;
159
- /* Ensure the next insns are 8-byte aligned. */
46
+ TCGv_vec c0, c16;
160
- if ((uintptr_t)s->code_ptr & 7) {
47
161
+ /* Ensure the next insns are 8 or 16-byte aligned. */
48
switch (vece) {
162
+ while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) {
49
case MO_8:
163
tcg_out32(s, NOP);
50
@@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
164
}
51
165
s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
52
case MO_32:
53
tcg_debug_assert(!have_isa_2_07);
54
- t3 = tcg_temp_new_vec(type);
55
- t4 = tcg_temp_new_vec(type);
56
- tcg_gen_dupi_vec(MO_8, t4, -16);
57
+ /*
58
+ * Only 5 bits are significant, and VSPLTISB can represent -16..15.
59
+ * So using -16 is a quick way to represent 16.
60
+ */
61
+ c16 = tcg_constant_vec(type, MO_8, -16);
62
+ c0 = tcg_constant_vec(type, MO_8, 0);
63
+
64
vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
65
- tcgv_vec_arg(v2), tcgv_vec_arg(t4));
66
+ tcgv_vec_arg(v2), tcgv_vec_arg(c16));
67
vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
68
tcgv_vec_arg(v1), tcgv_vec_arg(v2));
69
- tcg_gen_dupi_vec(MO_8, t3, 0);
70
- vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t3),
71
- tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(t3));
72
- vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t3),
73
- tcgv_vec_arg(t3), tcgv_vec_arg(t4));
74
- tcg_gen_add_vec(MO_32, v0, t2, t3);
75
- tcg_temp_free_vec(t3);
76
- tcg_temp_free_vec(t4);
77
+ vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
78
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
79
+ vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
80
+ tcgv_vec_arg(t1), tcgv_vec_arg(c16));
81
+ tcg_gen_add_vec(MO_32, v0, t1, t2);
82
break;
83
84
default:
85
--
166
--
86
2.25.1
167
2.34.1
87
88
diff view generated by jsdifflib
1
Because we now store uint64_t in TCGTemp, we can now always
1
The value previously chosen overlaps GUSA_MASK.
2
store the full 64-bit duplicate immediate. So remove the
3
difference between 32- and 64-bit hosts.
4
2
3
Rename all DELAY_SLOT_* and GUSA_* defines to emphasize
4
that they are included in TB_FLAGs. Add aliases for the
5
FPSCR and SR bits that are included in TB_FLAGS, so that
6
we don't accidentally reassign those bits.
7
8
Fixes: 4da06fb3062 ("target/sh4: Implement prctl_unalign_sigbus")
9
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/856
10
Reviewed-by: Yoshinori Sato <ysato@users.sourceforge.jp>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
12
---
7
tcg/optimize.c | 9 ++++-----
13
target/sh4/cpu.h | 56 +++++++++++++------------
8
tcg/tcg-op-vec.c | 39 ++++++++++-----------------------------
14
linux-user/sh4/signal.c | 6 +--
9
tcg/tcg.c | 7 +------
15
target/sh4/cpu.c | 6 +--
10
3 files changed, 15 insertions(+), 40 deletions(-)
16
target/sh4/helper.c | 6 +--
17
target/sh4/translate.c | 90 ++++++++++++++++++++++-------------------
18
5 files changed, 88 insertions(+), 76 deletions(-)
11
19
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
20
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
13
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
22
--- a/target/sh4/cpu.h
15
+++ b/tcg/optimize.c
23
+++ b/target/sh4/cpu.h
16
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
24
@@ -XXX,XX +XXX,XX @@
17
case INDEX_op_dup2_vec:
25
#define FPSCR_RM_NEAREST (0 << 0)
18
assert(TCG_TARGET_REG_BITS == 32);
26
#define FPSCR_RM_ZERO (1 << 0)
19
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
27
20
- tmp = arg_info(op->args[1])->val;
28
-#define DELAY_SLOT_MASK 0x7
21
- if (tmp == arg_info(op->args[2])->val) {
29
-#define DELAY_SLOT (1 << 0)
22
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
30
-#define DELAY_SLOT_CONDITIONAL (1 << 1)
23
- break;
31
-#define DELAY_SLOT_RTE (1 << 2)
24
- }
32
+#define TB_FLAG_DELAY_SLOT (1 << 0)
25
+ tcg_opt_gen_movi(s, &temps_used, op, op->args[0],
33
+#define TB_FLAG_DELAY_SLOT_COND (1 << 1)
26
+ deposit64(arg_info(op->args[1])->val, 32, 32,
34
+#define TB_FLAG_DELAY_SLOT_RTE (1 << 2)
27
+ arg_info(op->args[2])->val));
35
+#define TB_FLAG_PENDING_MOVCA (1 << 3)
28
+ break;
36
+#define TB_FLAG_GUSA_SHIFT 4 /* [11:4] */
29
} else if (args_are_copies(op->args[1], op->args[2])) {
37
+#define TB_FLAG_GUSA_EXCLUSIVE (1 << 12)
30
op->opc = INDEX_op_dup_vec;
38
+#define TB_FLAG_UNALIGN (1 << 13)
31
TCGOP_VECE(op) = MO_32;
39
+#define TB_FLAG_SR_FD (1 << SR_FD) /* 15 */
32
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
40
+#define TB_FLAG_FPSCR_PR FPSCR_PR /* 19 */
41
+#define TB_FLAG_FPSCR_SZ FPSCR_SZ /* 20 */
42
+#define TB_FLAG_FPSCR_FR FPSCR_FR /* 21 */
43
+#define TB_FLAG_SR_RB (1 << SR_RB) /* 29 */
44
+#define TB_FLAG_SR_MD (1 << SR_MD) /* 30 */
45
46
-#define TB_FLAG_PENDING_MOVCA (1 << 3)
47
-#define TB_FLAG_UNALIGN (1 << 4)
48
-
49
-#define GUSA_SHIFT 4
50
-#ifdef CONFIG_USER_ONLY
51
-#define GUSA_EXCLUSIVE (1 << 12)
52
-#define GUSA_MASK ((0xff << GUSA_SHIFT) | GUSA_EXCLUSIVE)
53
-#else
54
-/* Provide dummy versions of the above to allow tests against tbflags
55
- to be elided while avoiding ifdefs. */
56
-#define GUSA_EXCLUSIVE 0
57
-#define GUSA_MASK 0
58
-#endif
59
-
60
-#define TB_FLAG_ENVFLAGS_MASK (DELAY_SLOT_MASK | GUSA_MASK)
61
+#define TB_FLAG_DELAY_SLOT_MASK (TB_FLAG_DELAY_SLOT | \
62
+ TB_FLAG_DELAY_SLOT_COND | \
63
+ TB_FLAG_DELAY_SLOT_RTE)
64
+#define TB_FLAG_GUSA_MASK ((0xff << TB_FLAG_GUSA_SHIFT) | \
65
+ TB_FLAG_GUSA_EXCLUSIVE)
66
+#define TB_FLAG_FPSCR_MASK (TB_FLAG_FPSCR_PR | \
67
+ TB_FLAG_FPSCR_SZ | \
68
+ TB_FLAG_FPSCR_FR)
69
+#define TB_FLAG_SR_MASK (TB_FLAG_SR_FD | \
70
+ TB_FLAG_SR_RB | \
71
+ TB_FLAG_SR_MD)
72
+#define TB_FLAG_ENVFLAGS_MASK (TB_FLAG_DELAY_SLOT_MASK | \
73
+ TB_FLAG_GUSA_MASK)
74
75
typedef struct tlb_t {
76
uint32_t vpn;        /* virtual page number */
77
@@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch)
78
{
79
/* The instruction in a RTE delay slot is fetched in privileged
80
mode, but executed in user mode. */
81
- if (ifetch && (env->flags & DELAY_SLOT_RTE)) {
82
+ if (ifetch && (env->flags & TB_FLAG_DELAY_SLOT_RTE)) {
83
return 0;
84
} else {
85
return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
86
@@ -XXX,XX +XXX,XX @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
87
{
88
*pc = env->pc;
89
/* For a gUSA region, notice the end of the region. */
90
- *cs_base = env->flags & GUSA_MASK ? env->gregs[0] : 0;
91
- *flags = env->flags /* TB_FLAG_ENVFLAGS_MASK: bits 0-2, 4-12 */
92
- | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */
93
- | (env->sr & ((1u << SR_MD) | (1u << SR_RB))) /* Bits 29-30 */
94
- | (env->sr & (1u << SR_FD)) /* Bit 15 */
95
+ *cs_base = env->flags & TB_FLAG_GUSA_MASK ? env->gregs[0] : 0;
96
+ *flags = env->flags
97
+ | (env->fpscr & TB_FLAG_FPSCR_MASK)
98
+ | (env->sr & TB_FLAG_SR_MASK)
99
| (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 3 */
100
#ifdef CONFIG_USER_ONLY
101
*flags |= TB_FLAG_UNALIGN * !env_cpu(env)->prctl_unalign_sigbus;
102
diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
33
index XXXXXXX..XXXXXXX 100644
103
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/tcg-op-vec.c
104
--- a/linux-user/sh4/signal.c
35
+++ b/tcg/tcg-op-vec.c
105
+++ b/linux-user/sh4/signal.c
36
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
106
@@ -XXX,XX +XXX,XX @@ static void restore_sigcontext(CPUSH4State *regs, struct target_sigcontext *sc)
37
}
107
__get_user(regs->fpul, &sc->sc_fpul);
108
109
regs->tra = -1; /* disable syscall checks */
110
- regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
111
+ regs->flags = 0;
38
}
112
}
39
113
40
-#define MO_REG (TCG_TARGET_REG_BITS == 64 ? MO_64 : MO_32)
114
void setup_frame(int sig, struct target_sigaction *ka,
41
-
115
@@ -XXX,XX +XXX,XX @@ void setup_frame(int sig, struct target_sigaction *ka,
42
-static void do_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a)
116
regs->gregs[5] = 0;
43
-{
117
regs->gregs[6] = frame_addr += offsetof(typeof(*frame), sc);
44
- TCGTemp *rt = tcgv_vec_temp(r);
118
regs->pc = (unsigned long) ka->_sa_handler;
45
- vec_gen_2(INDEX_op_dupi_vec, rt->base_type, vece, temp_arg(rt), a);
119
- regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
46
-}
120
+ regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
47
-
121
48
TCGv_vec tcg_const_zeros_vec(TCGType type)
122
unlock_user_struct(frame, frame_addr, 1);
123
return;
124
@@ -XXX,XX +XXX,XX @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
125
regs->gregs[5] = frame_addr + offsetof(typeof(*frame), info);
126
regs->gregs[6] = frame_addr + offsetof(typeof(*frame), uc);
127
regs->pc = (unsigned long) ka->_sa_handler;
128
- regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
129
+ regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
130
131
unlock_user_struct(frame, frame_addr, 1);
132
return;
133
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/target/sh4/cpu.c
136
+++ b/target/sh4/cpu.c
137
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
138
SuperHCPU *cpu = SUPERH_CPU(cs);
139
140
cpu->env.pc = tb_pc(tb);
141
- cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
142
+ cpu->env.flags = tb->flags;
143
}
144
145
#ifndef CONFIG_USER_ONLY
146
@@ -XXX,XX +XXX,XX @@ static bool superh_io_recompile_replay_branch(CPUState *cs,
147
SuperHCPU *cpu = SUPERH_CPU(cs);
148
CPUSH4State *env = &cpu->env;
149
150
- if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
151
+ if ((env->flags & (TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND))
152
&& env->pc != tb_pc(tb)) {
153
env->pc -= 2;
154
- env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
155
+ env->flags &= ~(TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND);
156
return true;
157
}
158
return false;
159
diff --git a/target/sh4/helper.c b/target/sh4/helper.c
160
index XXXXXXX..XXXXXXX 100644
161
--- a/target/sh4/helper.c
162
+++ b/target/sh4/helper.c
163
@@ -XXX,XX +XXX,XX @@ void superh_cpu_do_interrupt(CPUState *cs)
164
env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB);
165
env->lock_addr = -1;
166
167
- if (env->flags & DELAY_SLOT_MASK) {
168
+ if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
169
/* Branch instruction should be executed again before delay slot. */
170
    env->spc -= 2;
171
    /* Clear flags for exception/interrupt routine. */
172
- env->flags &= ~DELAY_SLOT_MASK;
173
+ env->flags &= ~TB_FLAG_DELAY_SLOT_MASK;
174
}
175
176
if (do_exp) {
177
@@ -XXX,XX +XXX,XX @@ bool superh_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
178
CPUSH4State *env = &cpu->env;
179
180
/* Delay slots are indivisible, ignore interrupts */
181
- if (env->flags & DELAY_SLOT_MASK) {
182
+ if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
183
return false;
184
} else {
185
superh_cpu_do_interrupt(cs);
186
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
187
index XXXXXXX..XXXXXXX 100644
188
--- a/target/sh4/translate.c
189
+++ b/target/sh4/translate.c
190
@@ -XXX,XX +XXX,XX @@ void superh_cpu_dump_state(CPUState *cs, FILE *f, int flags)
191
         i, env->gregs[i], i + 1, env->gregs[i + 1],
192
         i + 2, env->gregs[i + 2], i + 3, env->gregs[i + 3]);
193
}
194
- if (env->flags & DELAY_SLOT) {
195
+ if (env->flags & TB_FLAG_DELAY_SLOT) {
196
qemu_printf("in delay slot (delayed_pc=0x%08x)\n",
197
         env->delayed_pc);
198
- } else if (env->flags & DELAY_SLOT_CONDITIONAL) {
199
+ } else if (env->flags & TB_FLAG_DELAY_SLOT_COND) {
200
qemu_printf("in conditional delay slot (delayed_pc=0x%08x)\n",
201
         env->delayed_pc);
202
- } else if (env->flags & DELAY_SLOT_RTE) {
203
+ } else if (env->flags & TB_FLAG_DELAY_SLOT_RTE) {
204
qemu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n",
205
env->delayed_pc);
206
}
207
@@ -XXX,XX +XXX,XX @@ static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
208
209
static inline bool use_exit_tb(DisasContext *ctx)
49
{
210
{
50
TCGv_vec ret = tcg_temp_new_vec(type);
211
- return (ctx->tbflags & GUSA_EXCLUSIVE) != 0;
51
- do_dupi_vec(ret, MO_REG, 0);
212
+ return (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) != 0;
52
+ tcg_gen_dupi_vec(MO_64, ret, 0);
53
return ret;
54
}
213
}
55
214
56
TCGv_vec tcg_const_ones_vec(TCGType type)
215
static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
216
@@ -XXX,XX +XXX,XX @@ static void gen_conditional_jump(DisasContext *ctx, target_ulong dest,
217
TCGLabel *l1 = gen_new_label();
218
TCGCond cond_not_taken = jump_if_true ? TCG_COND_EQ : TCG_COND_NE;
219
220
- if (ctx->tbflags & GUSA_EXCLUSIVE) {
221
+ if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
222
/* When in an exclusive region, we must continue to the end.
223
Therefore, exit the region on a taken branch, but otherwise
224
fall through to the next instruction. */
225
tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1);
226
- tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
227
+ tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
228
/* Note that this won't actually use a goto_tb opcode because we
229
disallow it in use_goto_tb, but it handles exit + singlestep. */
230
gen_goto_tb(ctx, 0, dest);
231
@@ -XXX,XX +XXX,XX @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
232
tcg_gen_mov_i32(ds, cpu_delayed_cond);
233
tcg_gen_discard_i32(cpu_delayed_cond);
234
235
- if (ctx->tbflags & GUSA_EXCLUSIVE) {
236
+ if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
237
/* When in an exclusive region, we must continue to the end.
238
Therefore, exit the region on a taken branch, but otherwise
239
fall through to the next instruction. */
240
tcg_gen_brcondi_i32(TCG_COND_EQ, ds, 0, l1);
241
242
/* Leave the gUSA region. */
243
- tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
244
+ tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
245
gen_jump(ctx);
246
247
gen_set_label(l1);
248
@@ -XXX,XX +XXX,XX @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
249
#define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
250
251
#define CHECK_NOT_DELAY_SLOT \
252
- if (ctx->envflags & DELAY_SLOT_MASK) { \
253
- goto do_illegal_slot; \
254
+ if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) { \
255
+ goto do_illegal_slot; \
256
}
257
258
#define CHECK_PRIVILEGED \
259
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
260
case 0x000b:        /* rts */
261
    CHECK_NOT_DELAY_SLOT
262
    tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
263
- ctx->envflags |= DELAY_SLOT;
264
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
265
    ctx->delayed_pc = (uint32_t) - 1;
266
    return;
267
case 0x0028:        /* clrmac */
268
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
269
    CHECK_NOT_DELAY_SLOT
270
gen_write_sr(cpu_ssr);
271
    tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
272
- ctx->envflags |= DELAY_SLOT_RTE;
273
+ ctx->envflags |= TB_FLAG_DELAY_SLOT_RTE;
274
    ctx->delayed_pc = (uint32_t) - 1;
275
ctx->base.is_jmp = DISAS_STOP;
276
    return;
277
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
278
    return;
279
case 0xe000:        /* mov #imm,Rn */
280
#ifdef CONFIG_USER_ONLY
281
- /* Detect the start of a gUSA region. If so, update envflags
282
- and end the TB. This will allow us to see the end of the
283
- region (stored in R0) in the next TB. */
284
+ /*
285
+ * Detect the start of a gUSA region (mov #-n, r15).
286
+ * If so, update envflags and end the TB. This will allow us
287
+ * to see the end of the region (stored in R0) in the next TB.
288
+ */
289
if (B11_8 == 15 && B7_0s < 0 &&
290
(tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
291
- ctx->envflags = deposit32(ctx->envflags, GUSA_SHIFT, 8, B7_0s);
292
+ ctx->envflags =
293
+ deposit32(ctx->envflags, TB_FLAG_GUSA_SHIFT, 8, B7_0s);
294
ctx->base.is_jmp = DISAS_STOP;
295
}
296
#endif
297
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
298
case 0xa000:        /* bra disp */
299
    CHECK_NOT_DELAY_SLOT
300
ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
301
- ctx->envflags |= DELAY_SLOT;
302
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
303
    return;
304
case 0xb000:        /* bsr disp */
305
    CHECK_NOT_DELAY_SLOT
306
tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
307
ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
308
- ctx->envflags |= DELAY_SLOT;
309
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
310
    return;
311
}
312
313
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
314
    CHECK_NOT_DELAY_SLOT
315
tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
316
ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
317
- ctx->envflags |= DELAY_SLOT_CONDITIONAL;
318
+ ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
319
    return;
320
case 0x8900:        /* bt label */
321
    CHECK_NOT_DELAY_SLOT
322
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
323
    CHECK_NOT_DELAY_SLOT
324
tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
325
ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
326
- ctx->envflags |= DELAY_SLOT_CONDITIONAL;
327
+ ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
328
    return;
329
case 0x8800:        /* cmp/eq #imm,R0 */
330
tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
331
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
332
case 0x0023:        /* braf Rn */
333
    CHECK_NOT_DELAY_SLOT
334
tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->base.pc_next + 4);
335
- ctx->envflags |= DELAY_SLOT;
336
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
337
    ctx->delayed_pc = (uint32_t) - 1;
338
    return;
339
case 0x0003:        /* bsrf Rn */
340
    CHECK_NOT_DELAY_SLOT
341
tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
342
    tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr);
343
- ctx->envflags |= DELAY_SLOT;
344
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
345
    ctx->delayed_pc = (uint32_t) - 1;
346
    return;
347
case 0x4015:        /* cmp/pl Rn */
348
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
349
case 0x402b:        /* jmp @Rn */
350
    CHECK_NOT_DELAY_SLOT
351
    tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
352
- ctx->envflags |= DELAY_SLOT;
353
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
354
    ctx->delayed_pc = (uint32_t) - 1;
355
    return;
356
case 0x400b:        /* jsr @Rn */
357
    CHECK_NOT_DELAY_SLOT
358
tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
359
    tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
360
- ctx->envflags |= DELAY_SLOT;
361
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
362
    ctx->delayed_pc = (uint32_t) - 1;
363
    return;
364
case 0x400e:        /* ldc Rm,SR */
365
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
366
fflush(stderr);
367
#endif
368
do_illegal:
369
- if (ctx->envflags & DELAY_SLOT_MASK) {
370
+ if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
371
do_illegal_slot:
372
gen_save_cpu_state(ctx, true);
373
gen_helper_raise_slot_illegal_instruction(cpu_env);
374
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
375
376
do_fpu_disabled:
377
gen_save_cpu_state(ctx, true);
378
- if (ctx->envflags & DELAY_SLOT_MASK) {
379
+ if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
380
gen_helper_raise_slot_fpu_disable(cpu_env);
381
} else {
382
gen_helper_raise_fpu_disable(cpu_env);
383
@@ -XXX,XX +XXX,XX @@ static void decode_opc(DisasContext * ctx)
384
385
_decode_opc(ctx);
386
387
- if (old_flags & DELAY_SLOT_MASK) {
388
+ if (old_flags & TB_FLAG_DELAY_SLOT_MASK) {
389
/* go out of the delay slot */
390
- ctx->envflags &= ~DELAY_SLOT_MASK;
391
+ ctx->envflags &= ~TB_FLAG_DELAY_SLOT_MASK;
392
393
/* When in an exclusive region, we must continue to the end
394
for conditional branches. */
395
- if (ctx->tbflags & GUSA_EXCLUSIVE
396
- && old_flags & DELAY_SLOT_CONDITIONAL) {
397
+ if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE
398
+ && old_flags & TB_FLAG_DELAY_SLOT_COND) {
399
gen_delayed_conditional_jump(ctx);
400
return;
401
}
402
/* Otherwise this is probably an invalid gUSA region.
403
Drop the GUSA bits so the next TB doesn't see them. */
404
- ctx->envflags &= ~GUSA_MASK;
405
+ ctx->envflags &= ~TB_FLAG_GUSA_MASK;
406
407
tcg_gen_movi_i32(cpu_flags, ctx->envflags);
408
- if (old_flags & DELAY_SLOT_CONDITIONAL) {
409
+ if (old_flags & TB_FLAG_DELAY_SLOT_COND) {
410
     gen_delayed_conditional_jump(ctx);
411
} else {
412
gen_jump(ctx);
413
@@ -XXX,XX +XXX,XX @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
414
}
415
416
/* The entire region has been translated. */
417
- ctx->envflags &= ~GUSA_MASK;
418
+ ctx->envflags &= ~TB_FLAG_GUSA_MASK;
419
ctx->base.pc_next = pc_end;
420
ctx->base.num_insns += max_insns - 1;
421
return;
422
@@ -XXX,XX +XXX,XX @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
423
424
/* Restart with the EXCLUSIVE bit set, within a TB run via
425
cpu_exec_step_atomic holding the exclusive lock. */
426
- ctx->envflags |= GUSA_EXCLUSIVE;
427
+ ctx->envflags |= TB_FLAG_GUSA_EXCLUSIVE;
428
gen_save_cpu_state(ctx, false);
429
gen_helper_exclusive(cpu_env);
430
ctx->base.is_jmp = DISAS_NORETURN;
431
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
432
(tbflags & (1 << SR_RB))) * 0x10;
433
ctx->fbank = tbflags & FPSCR_FR ? 0x10 : 0;
434
435
- if (tbflags & GUSA_MASK) {
436
+#ifdef CONFIG_USER_ONLY
437
+ if (tbflags & TB_FLAG_GUSA_MASK) {
438
+ /* In gUSA exclusive region. */
439
uint32_t pc = ctx->base.pc_next;
440
uint32_t pc_end = ctx->base.tb->cs_base;
441
- int backup = sextract32(ctx->tbflags, GUSA_SHIFT, 8);
442
+ int backup = sextract32(ctx->tbflags, TB_FLAG_GUSA_SHIFT, 8);
443
int max_insns = (pc_end - pc) / 2;
444
445
if (pc != pc_end + backup || max_insns < 2) {
446
/* This is a malformed gUSA region. Don't do anything special,
447
since the interpreter is likely to get confused. */
448
- ctx->envflags &= ~GUSA_MASK;
449
- } else if (tbflags & GUSA_EXCLUSIVE) {
450
+ ctx->envflags &= ~TB_FLAG_GUSA_MASK;
451
+ } else if (tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
452
/* Regardless of single-stepping or the end of the page,
453
we must complete execution of the gUSA region while
454
holding the exclusive lock. */
455
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
456
return;
457
}
458
}
459
+#endif
460
461
/* Since the ISA is fixed-width, we can bound by the number
462
of instructions remaining on the page. */
463
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
464
DisasContext *ctx = container_of(dcbase, DisasContext, base);
465
466
#ifdef CONFIG_USER_ONLY
467
- if (unlikely(ctx->envflags & GUSA_MASK)
468
- && !(ctx->envflags & GUSA_EXCLUSIVE)) {
469
+ if (unlikely(ctx->envflags & TB_FLAG_GUSA_MASK)
470
+ && !(ctx->envflags & TB_FLAG_GUSA_EXCLUSIVE)) {
471
/* We're in an gUSA region, and we have not already fallen
472
back on using an exclusive region. Attempt to parse the
473
region into a single supported atomic operation. Failure
474
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
57
{
475
{
58
TCGv_vec ret = tcg_temp_new_vec(type);
476
DisasContext *ctx = container_of(dcbase, DisasContext, base);
59
- do_dupi_vec(ret, MO_REG, -1);
477
60
+ tcg_gen_dupi_vec(MO_64, ret, -1);
478
- if (ctx->tbflags & GUSA_EXCLUSIVE) {
61
return ret;
479
+ if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
62
}
480
/* Ending the region of exclusivity. Clear the bits. */
63
481
- ctx->envflags &= ~GUSA_MASK;
64
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
482
+ ctx->envflags &= ~TB_FLAG_GUSA_MASK;
65
483
}
66
void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
484
67
{
485
switch (ctx->base.is_jmp) {
68
- if (TCG_TARGET_REG_BITS == 64) {
69
- do_dupi_vec(r, MO_64, a);
70
- } else if (a == dup_const(MO_32, a)) {
71
- do_dupi_vec(r, MO_32, a);
72
- } else {
73
- TCGv_i64 c = tcg_const_i64(a);
74
- tcg_gen_dup_i64_vec(MO_64, r, c);
75
- tcg_temp_free_i64(c);
76
- }
77
+ tcg_gen_dupi_vec(MO_64, r, a);
78
}
79
80
void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a)
81
{
82
- do_dupi_vec(r, MO_REG, dup_const(MO_32, a));
83
+ tcg_gen_dupi_vec(MO_32, r, a);
84
}
85
86
void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a)
87
{
88
- do_dupi_vec(r, MO_REG, dup_const(MO_16, a));
89
+ tcg_gen_dupi_vec(MO_16, r, a);
90
}
91
92
void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
93
{
94
- do_dupi_vec(r, MO_REG, dup_const(MO_8, a));
95
+ tcg_gen_dupi_vec(MO_8, r, a);
96
}
97
98
void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
99
{
100
- if (vece == MO_64) {
101
- tcg_gen_dup64i_vec(r, a);
102
- } else {
103
- do_dupi_vec(r, MO_REG, dup_const(vece, a));
104
- }
105
+ TCGTemp *rt = tcgv_vec_temp(r);
106
+ tcg_gen_mov_vec(r, tcg_constant_vec(rt->base_type, vece, a));
107
}
108
109
void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
110
@@ -XXX,XX +XXX,XX @@ void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
111
if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) {
112
tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1);
113
} else {
114
- do_dupi_vec(t, MO_REG, 0);
115
- tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a, t);
116
+ tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a,
117
+ tcg_constant_vec(type, vece, 0));
118
}
119
tcg_gen_xor_vec(vece, r, a, t);
120
tcg_gen_sub_vec(vece, r, r, t);
121
diff --git a/tcg/tcg.c b/tcg/tcg.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/tcg/tcg.c
124
+++ b/tcg/tcg.c
125
@@ -XXX,XX +XXX,XX @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
126
* The targets will, in general, have to do this search anyway,
127
* do this generically.
128
*/
129
- if (TCG_TARGET_REG_BITS == 32) {
130
- val = dup_const(MO_32, val);
131
- vece = MO_32;
132
- }
133
if (val == dup_const(MO_8, val)) {
134
vece = MO_8;
135
} else if (val == dup_const(MO_16, val)) {
136
vece = MO_16;
137
- } else if (TCG_TARGET_REG_BITS == 64 &&
138
- val == dup_const(MO_32, val)) {
139
+ } else if (val == dup_const(MO_32, val)) {
140
vece = MO_32;
141
}
142
143
--
486
--
144
2.25.1
487
2.34.1
145
146
diff view generated by jsdifflib
Deleted patch
1
There are several ways we can expand a vector dup of a 64-bit
2
element on a 32-bit host.
3
1
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/tcg.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
7
1 file changed, 97 insertions(+)
8
9
diff --git a/tcg/tcg.c b/tcg/tcg.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/tcg.c
12
+++ b/tcg/tcg.c
13
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
14
}
15
}
16
17
+static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
18
+{
19
+ const TCGLifeData arg_life = op->life;
20
+ TCGTemp *ots, *itsl, *itsh;
21
+ TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
22
+
23
+ /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
24
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
25
+ tcg_debug_assert(TCGOP_VECE(op) == MO_64);
26
+
27
+ ots = arg_temp(op->args[0]);
28
+ itsl = arg_temp(op->args[1]);
29
+ itsh = arg_temp(op->args[2]);
30
+
31
+ /* ENV should not be modified. */
32
+ tcg_debug_assert(!temp_readonly(ots));
33
+
34
+ /* Allocate the output register now. */
35
+ if (ots->val_type != TEMP_VAL_REG) {
36
+ TCGRegSet allocated_regs = s->reserved_regs;
37
+ TCGRegSet dup_out_regs =
38
+ tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
39
+
40
+ /* Make sure to not spill the input registers. */
41
+ if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
42
+ tcg_regset_set_reg(allocated_regs, itsl->reg);
43
+ }
44
+ if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
45
+ tcg_regset_set_reg(allocated_regs, itsh->reg);
46
+ }
47
+
48
+ ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
49
+ op->output_pref[0], ots->indirect_base);
50
+ ots->val_type = TEMP_VAL_REG;
51
+ ots->mem_coherent = 0;
52
+ s->reg_to_temp[ots->reg] = ots;
53
+ }
54
+
55
+ /* Promote dup2 of immediates to dupi_vec. */
56
+ if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
57
+ uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
58
+ MemOp vece = MO_64;
59
+
60
+ if (val == dup_const(MO_8, val)) {
61
+ vece = MO_8;
62
+ } else if (val == dup_const(MO_16, val)) {
63
+ vece = MO_16;
64
+ } else if (val == dup_const(MO_32, val)) {
65
+ vece = MO_32;
66
+ }
67
+
68
+ tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
69
+ goto done;
70
+ }
71
+
72
+ /* If the two inputs form one 64-bit value, try dupm_vec. */
73
+ if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
74
+ if (!itsl->mem_coherent) {
75
+ temp_sync(s, itsl, s->reserved_regs, 0, 0);
76
+ }
77
+ if (!itsh->mem_coherent) {
78
+ temp_sync(s, itsh, s->reserved_regs, 0, 0);
79
+ }
80
+#ifdef HOST_WORDS_BIGENDIAN
81
+ TCGTemp *its = itsh;
82
+#else
83
+ TCGTemp *its = itsl;
84
+#endif
85
+ if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
86
+ its->mem_base->reg, its->mem_offset)) {
87
+ goto done;
88
+ }
89
+ }
90
+
91
+ /* Fall back to generic expansion. */
92
+ return false;
93
+
94
+ done:
95
+ if (IS_DEAD_ARG(1)) {
96
+ temp_dead(s, itsl);
97
+ }
98
+ if (IS_DEAD_ARG(2)) {
99
+ temp_dead(s, itsh);
100
+ }
101
+ if (NEED_SYNC_ARG(0)) {
102
+ temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
103
+ } else if (IS_DEAD_ARG(0)) {
104
+ temp_dead(s, ots);
105
+ }
106
+ return true;
107
+}
108
+
109
#ifdef TCG_TARGET_STACK_GROWSUP
110
#define STACK_DIR(x) (-(x))
111
#else
112
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
113
case INDEX_op_call:
114
tcg_reg_alloc_call(s, op);
115
break;
116
+ case INDEX_op_dup2_vec:
117
+ if (tcg_reg_alloc_dup2(s, op)) {
118
+ break;
119
+ }
120
+ /* fall through */
121
default:
122
/* Sanity check that we've not introduced any unhandled opcodes. */
123
tcg_debug_assert(tcg_op_supported(opc));
124
--
125
2.25.1
126
127
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/i386/tcg-target.c.inc | 26 +++++++++++++-------------
5
1 file changed, 13 insertions(+), 13 deletions(-)
6
1
7
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/i386/tcg-target.c.inc
10
+++ b/tcg/i386/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
12
static void expand_vec_mul(TCGType type, unsigned vece,
13
TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
14
{
15
- TCGv_vec t1, t2, t3, t4;
16
+ TCGv_vec t1, t2, t3, t4, zero;
17
18
tcg_debug_assert(vece == MO_8);
19
20
@@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece,
21
case TCG_TYPE_V64:
22
t1 = tcg_temp_new_vec(TCG_TYPE_V128);
23
t2 = tcg_temp_new_vec(TCG_TYPE_V128);
24
- tcg_gen_dup16i_vec(t2, 0);
25
+ zero = tcg_constant_vec(TCG_TYPE_V128, MO_8, 0);
26
vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
27
- tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(t2));
28
+ tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(zero));
29
vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
30
- tcgv_vec_arg(t2), tcgv_vec_arg(t2), tcgv_vec_arg(v2));
31
+ tcgv_vec_arg(t2), tcgv_vec_arg(zero), tcgv_vec_arg(v2));
32
tcg_gen_mul_vec(MO_16, t1, t1, t2);
33
tcg_gen_shri_vec(MO_16, t1, t1, 8);
34
vec_gen_3(INDEX_op_x86_packus_vec, TCG_TYPE_V128, MO_8,
35
@@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece,
36
t2 = tcg_temp_new_vec(type);
37
t3 = tcg_temp_new_vec(type);
38
t4 = tcg_temp_new_vec(type);
39
- tcg_gen_dup16i_vec(t4, 0);
40
+ zero = tcg_constant_vec(TCG_TYPE_V128, MO_8, 0);
41
vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
42
- tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(t4));
43
+ tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(zero));
44
vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
45
- tcgv_vec_arg(t2), tcgv_vec_arg(t4), tcgv_vec_arg(v2));
46
+ tcgv_vec_arg(t2), tcgv_vec_arg(zero), tcgv_vec_arg(v2));
47
vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
48
- tcgv_vec_arg(t3), tcgv_vec_arg(v1), tcgv_vec_arg(t4));
49
+ tcgv_vec_arg(t3), tcgv_vec_arg(v1), tcgv_vec_arg(zero));
50
vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
51
- tcgv_vec_arg(t4), tcgv_vec_arg(t4), tcgv_vec_arg(v2));
52
+ tcgv_vec_arg(t4), tcgv_vec_arg(zero), tcgv_vec_arg(v2));
53
tcg_gen_mul_vec(MO_16, t1, t1, t2);
54
tcg_gen_mul_vec(MO_16, t3, t3, t4);
55
tcg_gen_shri_vec(MO_16, t1, t1, 8);
56
@@ -XXX,XX +XXX,XX @@ static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
57
NEED_UMIN = 8,
58
NEED_UMAX = 16,
59
};
60
- TCGv_vec t1, t2;
61
+ TCGv_vec t1, t2, t3;
62
uint8_t fixup;
63
64
switch (cond) {
65
@@ -XXX,XX +XXX,XX @@ static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
66
} else if (fixup & NEED_BIAS) {
67
t1 = tcg_temp_new_vec(type);
68
t2 = tcg_temp_new_vec(type);
69
- tcg_gen_dupi_vec(vece, t2, 1ull << ((8 << vece) - 1));
70
- tcg_gen_sub_vec(vece, t1, v1, t2);
71
- tcg_gen_sub_vec(vece, t2, v2, t2);
72
+ t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1));
73
+ tcg_gen_sub_vec(vece, t1, v1, t3);
74
+ tcg_gen_sub_vec(vece, t2, v2, t3);
75
v1 = t1;
76
v2 = t2;
77
cond = tcg_signed_cond(cond);
78
--
79
2.25.1
80
81
diff view generated by jsdifflib