1 | The following changes since commit ff56877e911782dedc9a424233fd3f62369c258c: | 1 | The following changes since commit 222059a0fccf4af3be776fe35a5ea2d6a68f9a0b: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/kraxel/tags/vga-20181015-pull-request' into staging (2018-10-15 15:03:45 +0100) | 3 | Merge tag 'pull-ppc-20221221' of https://gitlab.com/danielhb/qemu into staging (2022-12-21 18:08:09 +0000) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://github.com/rth7680/qemu.git tags/pull-tcg-20181016 | 7 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20221229 |
8 | 8 | ||
9 | for you to fetch changes up to e3e9d1ea20c75718ce7c528c588a0a497f12f750: | 9 | for you to fetch changes up to b05e35533782a71a9fda472afd08442f50622a3e: |
10 | 10 | ||
11 | cputlb: read CPUTLBEntry.addr_write atomically (2018-10-16 10:04:27 -0700) | 11 | tests/tcg/multiarch: add vma-pthread.c (2022-12-29 12:39:45 -0800) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Queued tcg patches | 14 | Fix race conditions in new user-only vma tracking. |
15 | Add tcg backend paired register allocation. | ||
16 | Cleanup tcg backend function call abi. | ||
15 | 17 | ||
16 | ---------------------------------------------------------------- | 18 | ---------------------------------------------------------------- |
17 | Emilio G. Cota (10): | 19 | Ilya Leoshkevich (1): |
18 | tcg: access cpu->icount_decr.u16.high with atomics | 20 | tests/tcg/multiarch: add vma-pthread.c |
19 | tcg: fix use of uninitialized variable under CONFIG_PROFILER | ||
20 | tcg: plug holes in struct TCGProfile | ||
21 | tcg: distribute tcg_time into TCG contexts | ||
22 | target/alpha: remove tlb_flush from alpha_cpu_initfn | ||
23 | target/unicore32: remove tlb_flush from uc32_init_fn | ||
24 | exec: introduce tlb_init | ||
25 | cputlb: fix assert_cpu_is_self macro | ||
26 | cputlb: serialize tlb updates with env->tlb_lock | ||
27 | cputlb: read CPUTLBEntry.addr_write atomically | ||
28 | 21 | ||
29 | Richard Henderson (11): | 22 | Mark Cave-Ayland (1): |
30 | tcg: Implement CPU_LOG_TB_NOCHAIN during expansion | 23 | tcg: convert tcg/README to rst |
31 | tcg: Add tlb_index and tlb_entry helpers | ||
32 | tcg: Split CONFIG_ATOMIC128 | ||
33 | target/i386: Convert to HAVE_CMPXCHG128 | ||
34 | target/arm: Convert to HAVE_CMPXCHG128 | ||
35 | target/arm: Check HAVE_CMPXCHG128 at translate time | ||
36 | target/ppc: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128 | ||
37 | target/s390x: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128 | ||
38 | target/s390x: Split do_cdsg, do_lpq, do_stpq | ||
39 | target/s390x: Skip wout, cout helpers if op helper does not return | ||
40 | target/s390x: Check HAVE_ATOMIC128 and HAVE_CMPXCHG128 at translate | ||
41 | 24 | ||
42 | accel/tcg/atomic_template.h | 20 +++- | 25 | Philippe Mathieu-Daudé (5): |
43 | accel/tcg/softmmu_template.h | 64 +++++----- | 26 | tcg/s390x: Fix coding style |
44 | include/exec/cpu-defs.h | 3 + | 27 | tcg: Massage process_op_defs() |
45 | include/exec/cpu_ldst.h | 30 ++++- | 28 | tcg: Pass number of arguments to tcg_emit_op() / tcg_op_insert_*() |
46 | include/exec/cpu_ldst_template.h | 25 ++-- | 29 | tcg: Convert typecode_to_ffi from array to function |
47 | include/exec/exec-all.h | 8 ++ | 30 | tcg: Factor init_ffi_layouts() out of tcg_context_init() |
48 | include/qemu/atomic128.h | 155 ++++++++++++++++++++++++ | ||
49 | include/qemu/timer.h | 1 - | ||
50 | target/ppc/helper.h | 2 +- | ||
51 | tcg/tcg.h | 20 ++-- | ||
52 | accel/tcg/cpu-exec.c | 2 +- | ||
53 | accel/tcg/cputlb.c | 235 +++++++++++++++++++----------------- | ||
54 | accel/tcg/tcg-all.c | 2 +- | ||
55 | accel/tcg/translate-all.c | 2 +- | ||
56 | accel/tcg/user-exec.c | 5 +- | ||
57 | cpus.c | 3 +- | ||
58 | exec.c | 1 + | ||
59 | monitor.c | 13 +- | ||
60 | qom/cpu.c | 2 +- | ||
61 | target/alpha/cpu.c | 1 - | ||
62 | target/arm/helper-a64.c | 251 +++++++++++++++++++-------------------- | ||
63 | target/arm/translate-a64.c | 38 +++--- | ||
64 | target/i386/mem_helper.c | 9 +- | ||
65 | target/ppc/mem_helper.c | 33 ++++- | ||
66 | target/ppc/translate.c | 115 +++++++++--------- | ||
67 | target/s390x/mem_helper.c | 202 +++++++++++++++---------------- | ||
68 | target/s390x/translate.c | 45 +++++-- | ||
69 | target/unicore32/cpu.c | 2 - | ||
70 | tcg/tcg-op.c | 9 +- | ||
71 | tcg/tcg.c | 25 +++- | ||
72 | configure | 19 +++ | ||
73 | 31 files changed, 830 insertions(+), 512 deletions(-) | ||
74 | create mode 100644 include/qemu/atomic128.h | ||
75 | 31 | ||
32 | Richard Henderson (40): | ||
33 | meson: Move CONFIG_TCG_INTERPRETER to config_host | ||
34 | tcg: Cleanup trailing whitespace | ||
35 | qemu/main-loop: Introduce QEMU_IOTHREAD_LOCK_GUARD | ||
36 | hw/mips: Use QEMU_IOTHREAD_LOCK_GUARD in cpu_mips_irq_request | ||
37 | target/ppc: Use QEMU_IOTHREAD_LOCK_GUARD in ppc_maybe_interrupt | ||
38 | target/ppc: Use QEMU_IOTHREAD_LOCK_GUARD in cpu_interrupt_exittb | ||
39 | target/riscv: Use QEMU_IOTHREAD_LOCK_GUARD in riscv_cpu_update_mip | ||
40 | hw/ppc: Use QEMU_IOTHREAD_LOCK_GUARD in ppc_set_irq | ||
41 | accel/tcg: Use QEMU_IOTHREAD_LOCK_GUARD in io_readx/io_writex | ||
42 | tcg: Tidy tcg_reg_alloc_op | ||
43 | tcg: Remove TCG_TARGET_STACK_GROWSUP | ||
44 | tci: MAX_OPC_PARAM_IARGS is no longer used | ||
45 | tcg: Fix tcg_reg_alloc_dup* | ||
46 | tcg: Centralize updates to reg_to_temp | ||
47 | tcg: Remove check_regs | ||
48 | tcg: Introduce paired register allocation | ||
49 | accel/tcg: Set cflags_next_tb in cpu_common_initfn | ||
50 | target/sparc: Avoid TCGV_{LOW,HIGH} | ||
51 | tcg: Move TCG_{LOW,HIGH} to tcg-internal.h | ||
52 | tcg: Add temp_subindex to TCGTemp | ||
53 | tcg: Simplify calls to temp_sync vs mem_coherent | ||
54 | tcg: Allocate TCGTemp pairs in host memory order | ||
55 | tcg: Move TCG_TYPE_COUNT outside enum | ||
56 | tcg: Introduce tcg_type_size | ||
57 | tcg: Introduce TCGCallReturnKind and TCGCallArgumentKind | ||
58 | tcg: Replace TCG_TARGET_CALL_ALIGN_ARGS with TCG_TARGET_CALL_ARG_I64 | ||
59 | tcg: Replace TCG_TARGET_EXTEND_ARGS with TCG_TARGET_CALL_ARG_I32 | ||
60 | tcg: Use TCG_CALL_ARG_EVEN for TCI special case | ||
61 | accel/tcg/plugin: Don't search for the function pointer index | ||
62 | accel/tcg/plugin: Avoid duplicate copy in copy_call | ||
63 | accel/tcg/plugin: Use copy_op in append_{udata,mem}_cb | ||
64 | tcg: Vary the allocation size for TCGOp | ||
65 | tcg: Use output_pref wrapper function | ||
66 | tcg: Reorg function calls | ||
67 | tcg: Move ffi_cif pointer into TCGHelperInfo | ||
68 | tcg/aarch64: Merge tcg_out_callr into tcg_out_call | ||
69 | tcg: Add TCGHelperInfo argument to tcg_out_call | ||
70 | accel/tcg: Fix tb_invalidate_phys_page_unwind | ||
71 | accel/tcg: Use g_free_rcu for user-exec interval trees | ||
72 | accel/tcg: Handle false negative lookup in page_check_range | ||
73 | |||
74 | docs/devel/atomics.rst | 2 + | ||
75 | docs/devel/index-tcg.rst | 1 + | ||
76 | docs/devel/tcg-ops.rst | 941 +++++++++++++++++++ | ||
77 | docs/devel/tcg.rst | 2 +- | ||
78 | meson.build | 4 +- | ||
79 | include/exec/helper-head.h | 2 +- | ||
80 | include/qemu/main-loop.h | 29 + | ||
81 | include/tcg/tcg-op.h | 35 +- | ||
82 | include/tcg/tcg.h | 96 +- | ||
83 | tcg/aarch64/tcg-target.h | 4 +- | ||
84 | tcg/arm/tcg-target.h | 4 +- | ||
85 | tcg/i386/tcg-target.h | 2 + | ||
86 | tcg/loongarch64/tcg-target.h | 3 +- | ||
87 | tcg/mips/tcg-target.h | 4 +- | ||
88 | tcg/riscv/tcg-target.h | 7 +- | ||
89 | tcg/s390x/tcg-target.h | 3 +- | ||
90 | tcg/sparc64/tcg-target.h | 3 +- | ||
91 | tcg/tcg-internal.h | 58 +- | ||
92 | tcg/tci/tcg-target.h | 7 + | ||
93 | tests/tcg/multiarch/nop_func.h | 25 + | ||
94 | accel/tcg/cputlb.c | 25 +- | ||
95 | accel/tcg/plugin-gen.c | 54 +- | ||
96 | accel/tcg/tb-maint.c | 78 +- | ||
97 | accel/tcg/user-exec.c | 59 +- | ||
98 | hw/core/cpu-common.c | 1 + | ||
99 | hw/mips/mips_int.c | 11 +- | ||
100 | hw/ppc/ppc.c | 10 +- | ||
101 | target/ppc/excp_helper.c | 11 +- | ||
102 | target/ppc/helper_regs.c | 14 +- | ||
103 | target/riscv/cpu_helper.c | 10 +- | ||
104 | target/sparc/translate.c | 21 +- | ||
105 | tcg/optimize.c | 10 +- | ||
106 | tcg/tcg-op-vec.c | 10 +- | ||
107 | tcg/tcg-op.c | 49 +- | ||
108 | tcg/tcg.c | 1658 +++++++++++++++++++++------------- | ||
109 | tcg/tci.c | 1 - | ||
110 | tests/tcg/multiarch/munmap-pthread.c | 16 +- | ||
111 | tests/tcg/multiarch/vma-pthread.c | 207 +++++ | ||
112 | tcg/aarch64/tcg-target.c.inc | 19 +- | ||
113 | tcg/arm/tcg-target.c.inc | 10 +- | ||
114 | tcg/i386/tcg-target.c.inc | 5 +- | ||
115 | tcg/loongarch64/tcg-target.c.inc | 7 +- | ||
116 | tcg/mips/tcg-target.c.inc | 3 +- | ||
117 | tcg/ppc/tcg-target.c.inc | 36 +- | ||
118 | tcg/riscv/tcg-target.c.inc | 7 +- | ||
119 | tcg/s390x/tcg-target.c.inc | 32 +- | ||
120 | tcg/sparc64/tcg-target.c.inc | 3 +- | ||
121 | tcg/tci/tcg-target.c.inc | 7 +- | ||
122 | tcg/README | 784 ---------------- | ||
123 | tests/tcg/multiarch/Makefile.target | 3 + | ||
124 | 50 files changed, 2630 insertions(+), 1763 deletions(-) | ||
125 | create mode 100644 docs/devel/tcg-ops.rst | ||
126 | create mode 100644 tests/tcg/multiarch/nop_func.h | ||
127 | create mode 100644 tests/tcg/multiarch/vma-pthread.c | ||
128 | delete mode 100644 tcg/README | ||
129 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> | ||
1 | 2 | ||
3 | Convert tcg/README to rst and move it to docs/devel as a new "TCG Intermediate | ||
4 | Representation" page. There are a few minor changes to improve the aesthetic | ||
5 | of the final output which are as follows: | ||
6 | |||
7 | - Rename the title from "Tiny Code Generator - Fabrice Bellard" to "TCG | ||
8 | Intermediate Representation" | ||
9 | |||
10 | - Remove the section numbering | ||
11 | |||
12 | - Add the missing parameters to the ssadd_vec operations in the "Host | ||
13 | vector operations" section | ||
14 | |||
15 | - Change the path to the Atomic Operations document to use a proper | ||
16 | reference | ||
17 | |||
18 | - Replace tcg/README in tcg.rst with a proper reference to the new document | ||
19 | |||
20 | Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> | ||
21 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
22 | Message-Id: <20221130100434.64207-2-mark.cave-ayland@ilande.co.uk> | ||
23 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
24 | --- | ||
25 | docs/devel/atomics.rst | 2 + | ||
26 | docs/devel/index-tcg.rst | 1 + | ||
27 | docs/devel/tcg-ops.rst | 941 +++++++++++++++++++++++++++++++++++++++ | ||
28 | docs/devel/tcg.rst | 2 +- | ||
29 | tcg/README | 784 -------------------------------- | ||
30 | 5 files changed, 945 insertions(+), 785 deletions(-) | ||
31 | create mode 100644 docs/devel/tcg-ops.rst | ||
32 | delete mode 100644 tcg/README | ||
33 | |||
34 | diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/docs/devel/atomics.rst | ||
37 | +++ b/docs/devel/atomics.rst | ||
38 | @@ -XXX,XX +XXX,XX @@ | ||
39 | +.. _atomics-ref: | ||
40 | + | ||
41 | ========================= | ||
42 | Atomic operations in QEMU | ||
43 | ========================= | ||
44 | diff --git a/docs/devel/index-tcg.rst b/docs/devel/index-tcg.rst | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/docs/devel/index-tcg.rst | ||
47 | +++ b/docs/devel/index-tcg.rst | ||
48 | @@ -XXX,XX +XXX,XX @@ are only implementing things for HW accelerated hypervisors. | ||
49 | :maxdepth: 2 | ||
50 | |||
51 | tcg | ||
52 | + tcg-ops | ||
53 | decodetree | ||
54 | multi-thread-tcg | ||
55 | tcg-icount | ||
56 | diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst | ||
57 | new file mode 100644 | ||
58 | index XXXXXXX..XXXXXXX | ||
59 | --- /dev/null | ||
60 | +++ b/docs/devel/tcg-ops.rst | ||
61 | @@ -XXX,XX +XXX,XX @@ | ||
62 | +.. _tcg-ops-ref: | ||
63 | + | ||
64 | +******************************* | ||
65 | +TCG Intermediate Representation | ||
66 | +******************************* | ||
67 | + | ||
68 | +Introduction | ||
69 | +============ | ||
70 | + | ||
71 | +TCG (Tiny Code Generator) began as a generic backend for a C | ||
72 | +compiler. It was simplified to be used in QEMU. It also has its roots | ||
73 | +in the QOP code generator written by Paul Brook. | ||
74 | + | ||
75 | +Definitions | ||
76 | +=========== | ||
77 | + | ||
78 | +TCG receives RISC-like *TCG ops* and performs some optimizations on them, | ||
79 | +including liveness analysis and trivial constant expression | ||
80 | +evaluation. TCG ops are then implemented in the host CPU back end, | ||
81 | +also known as the TCG target. | ||
82 | + | ||
83 | +The TCG *target* is the architecture for which we generate the | ||
84 | +code. It is of course not the same as the "target" of QEMU which is | ||
85 | +the emulated architecture. As TCG started as a generic C backend used | ||
86 | +for cross compiling, it is assumed that the TCG target is different | ||
87 | +from the host, although it is never the case for QEMU. | ||
88 | + | ||
89 | +In this document, we use *guest* to specify what architecture we are | ||
90 | +emulating; *target* always means the TCG target, the machine on which | ||
91 | +we are running QEMU. | ||
92 | + | ||
93 | +A TCG *function* corresponds to a QEMU Translated Block (TB). | ||
94 | + | ||
95 | +A TCG *temporary* is a variable only live in a basic block. Temporaries are allocated explicitly in each function. | ||
96 | + | ||
97 | +A TCG *local temporary* is a variable only live in a function. Local temporaries are allocated explicitly in each function. | ||
98 | + | ||
99 | +A TCG *global* is a variable which is live in all the functions | ||
100 | +(equivalent of a C global variable). They are defined before the | ||
101 | +functions defined. A TCG global can be a memory location (e.g. a QEMU | ||
102 | +CPU register), a fixed host register (e.g. the QEMU CPU state pointer) | ||
103 | +or a memory location which is stored in a register outside QEMU TBs | ||
104 | +(not implemented yet). | ||
105 | + | ||
106 | +A TCG *basic block* corresponds to a list of instructions terminated | ||
107 | +by a branch instruction. | ||
108 | + | ||
109 | +An operation with *undefined behavior* may result in a crash. | ||
110 | + | ||
111 | +An operation with *unspecified behavior* shall not crash. However, | ||
112 | +the result may be one of several possibilities so may be considered | ||
113 | +an *undefined result*. | ||
114 | + | ||
115 | +Intermediate representation | ||
116 | +=========================== | ||
117 | + | ||
118 | +Introduction | ||
119 | +------------ | ||
120 | + | ||
121 | +TCG instructions operate on variables which are temporaries, local | ||
122 | +temporaries or globals. TCG instructions and variables are strongly | ||
123 | +typed. Two types are supported: 32 bit integers and 64 bit | ||
124 | +integers. Pointers are defined as an alias to 32 bit or 64 bit | ||
125 | +integers depending on the TCG target word size. | ||
126 | + | ||
127 | +Each instruction has a fixed number of output variable operands, input | ||
128 | +variable operands and always constant operands. | ||
129 | + | ||
130 | +The notable exception is the call instruction which has a variable | ||
131 | +number of outputs and inputs. | ||
132 | + | ||
133 | +In the textual form, output operands usually come first, followed by | ||
134 | +input operands, followed by constant operands. The output type is | ||
135 | +included in the instruction name. Constants are prefixed with a '$'. | ||
136 | + | ||
137 | +.. code-block:: none | ||
138 | + | ||
139 | + add_i32 t0, t1, t2 /* (t0 <- t1 + t2) */ | ||
140 | + | ||
141 | + | ||
142 | +Assumptions | ||
143 | +----------- | ||
144 | + | ||
145 | +Basic blocks | ||
146 | +^^^^^^^^^^^^ | ||
147 | + | ||
148 | +* Basic blocks end after branches (e.g. brcond_i32 instruction), | ||
149 | + goto_tb and exit_tb instructions. | ||
150 | + | ||
151 | +* Basic blocks start after the end of a previous basic block, or at a | ||
152 | + set_label instruction. | ||
153 | + | ||
154 | +After the end of a basic block, the content of temporaries is | ||
155 | +destroyed, but local temporaries and globals are preserved. | ||
156 | + | ||
157 | +Floating point types | ||
158 | +^^^^^^^^^^^^^^^^^^^^ | ||
159 | + | ||
160 | +* Floating point types are not supported yet | ||
161 | + | ||
162 | +Pointers | ||
163 | +^^^^^^^^ | ||
164 | + | ||
165 | +* Depending on the TCG target, pointer size is 32 bit or 64 | ||
166 | + bit. The type ``TCG_TYPE_PTR`` is an alias to ``TCG_TYPE_I32`` or | ||
167 | + ``TCG_TYPE_I64``. | ||
168 | + | ||
169 | +Helpers | ||
170 | +^^^^^^^ | ||
171 | + | ||
172 | +* Using the tcg_gen_helper_x_y it is possible to call any function | ||
173 | + taking i32, i64 or pointer types. By default, before calling a helper, | ||
174 | + all globals are stored at their canonical location and it is assumed | ||
175 | + that the function can modify them. By default, the helper is allowed to | ||
176 | + modify the CPU state or raise an exception. | ||
177 | + | ||
178 | + This can be overridden using the following function modifiers: | ||
179 | + | ||
180 | + - ``TCG_CALL_NO_READ_GLOBALS`` means that the helper does not read globals, | ||
181 | + either directly or via an exception. They will not be saved to their | ||
182 | + canonical locations before calling the helper. | ||
183 | + | ||
184 | + - ``TCG_CALL_NO_WRITE_GLOBALS`` means that the helper does not modify any globals. | ||
185 | + They will only be saved to their canonical location before calling helpers, | ||
186 | + but they won't be reloaded afterwards. | ||
187 | + | ||
188 | + - ``TCG_CALL_NO_SIDE_EFFECTS`` means that the call to the function is removed if | ||
189 | + the return value is not used. | ||
190 | + | ||
191 | + Note that ``TCG_CALL_NO_READ_GLOBALS`` implies ``TCG_CALL_NO_WRITE_GLOBALS``. | ||
192 | + | ||
193 | + On some TCG targets (e.g. x86), several calling conventions are | ||
194 | + supported. | ||
195 | + | ||
196 | +Branches | ||
197 | +^^^^^^^^ | ||
198 | + | ||
199 | +* Use the instruction 'br' to jump to a label. | ||
200 | + | ||
201 | +Code Optimizations | ||
202 | +------------------ | ||
203 | + | ||
204 | +When generating instructions, you can count on at least the following | ||
205 | +optimizations: | ||
206 | + | ||
207 | +- Single instructions are simplified, e.g. | ||
208 | + | ||
209 | + .. code-block:: none | ||
210 | + | ||
211 | + and_i32 t0, t0, $0xffffffff | ||
212 | + | ||
213 | + is suppressed. | ||
214 | + | ||
215 | +- A liveness analysis is done at the basic block level. The | ||
216 | + information is used to suppress moves from a dead variable to | ||
217 | + another one. It is also used to remove instructions which compute | ||
218 | + dead results. The later is especially useful for condition code | ||
219 | + optimization in QEMU. | ||
220 | + | ||
221 | + In the following example: | ||
222 | + | ||
223 | + .. code-block:: none | ||
224 | + | ||
225 | + add_i32 t0, t1, t2 | ||
226 | + add_i32 t0, t0, $1 | ||
227 | + mov_i32 t0, $1 | ||
228 | + | ||
229 | + only the last instruction is kept. | ||
230 | + | ||
231 | + | ||
232 | +Instruction Reference | ||
233 | +===================== | ||
234 | + | ||
235 | +Function call | ||
236 | +------------- | ||
237 | + | ||
238 | +.. list-table:: | ||
239 | + | ||
240 | + * - call *<ret>* *<params>* ptr | ||
241 | + | ||
242 | + - | call function 'ptr' (pointer type) | ||
243 | + | | ||
244 | + | *<ret>* optional 32 bit or 64 bit return value | ||
245 | + | *<params>* optional 32 bit or 64 bit parameters | ||
246 | + | ||
247 | +Jumps/Labels | ||
248 | +------------ | ||
249 | + | ||
250 | +.. list-table:: | ||
251 | + | ||
252 | + * - set_label $label | ||
253 | + | ||
254 | + - | Define label 'label' at the current program point. | ||
255 | + | ||
256 | + * - br $label | ||
257 | + | ||
258 | + - | Jump to label. | ||
259 | + | ||
260 | + * - brcond_i32/i64 *t0*, *t1*, *cond*, *label* | ||
261 | + | ||
262 | + - | Conditional jump if *t0* *cond* *t1* is true. *cond* can be: | ||
263 | + | | ||
264 | + | ``TCG_COND_EQ`` | ||
265 | + | ``TCG_COND_NE`` | ||
266 | + | ``TCG_COND_LT /* signed */`` | ||
267 | + | ``TCG_COND_GE /* signed */`` | ||
268 | + | ``TCG_COND_LE /* signed */`` | ||
269 | + | ``TCG_COND_GT /* signed */`` | ||
270 | + | ``TCG_COND_LTU /* unsigned */`` | ||
271 | + | ``TCG_COND_GEU /* unsigned */`` | ||
272 | + | ``TCG_COND_LEU /* unsigned */`` | ||
273 | + | ``TCG_COND_GTU /* unsigned */`` | ||
274 | + | ||
275 | +Arithmetic | ||
276 | +---------- | ||
277 | + | ||
278 | +.. list-table:: | ||
279 | + | ||
280 | + * - add_i32/i64 *t0*, *t1*, *t2* | ||
281 | + | ||
282 | + - | *t0* = *t1* + *t2* | ||
283 | + | ||
284 | + * - sub_i32/i64 *t0*, *t1*, *t2* | ||
285 | + | ||
286 | + - | *t0* = *t1* - *t2* | ||
287 | + | ||
288 | + * - neg_i32/i64 *t0*, *t1* | ||
289 | + | ||
290 | + - | *t0* = -*t1* (two's complement) | ||
291 | + | ||
292 | + * - mul_i32/i64 *t0*, *t1*, *t2* | ||
293 | + | ||
294 | + - | *t0* = *t1* * *t2* | ||
295 | + | ||
296 | + * - div_i32/i64 *t0*, *t1*, *t2* | ||
297 | + | ||
298 | + - | *t0* = *t1* / *t2* (signed) | ||
299 | + | Undefined behavior if division by zero or overflow. | ||
300 | + | ||
301 | + * - divu_i32/i64 *t0*, *t1*, *t2* | ||
302 | + | ||
303 | + - | *t0* = *t1* / *t2* (unsigned) | ||
304 | + | Undefined behavior if division by zero. | ||
305 | + | ||
306 | + * - rem_i32/i64 *t0*, *t1*, *t2* | ||
307 | + | ||
308 | + - | *t0* = *t1* % *t2* (signed) | ||
309 | + | Undefined behavior if division by zero or overflow. | ||
310 | + | ||
311 | + * - remu_i32/i64 *t0*, *t1*, *t2* | ||
312 | + | ||
313 | + - | *t0* = *t1* % *t2* (unsigned) | ||
314 | + | Undefined behavior if division by zero. | ||
315 | + | ||
316 | + | ||
317 | +Logical | ||
318 | +------- | ||
319 | + | ||
320 | +.. list-table:: | ||
321 | + | ||
322 | + * - and_i32/i64 *t0*, *t1*, *t2* | ||
323 | + | ||
324 | + - | *t0* = *t1* & *t2* | ||
325 | + | ||
326 | + * - or_i32/i64 *t0*, *t1*, *t2* | ||
327 | + | ||
328 | + - | *t0* = *t1* | *t2* | ||
329 | + | ||
330 | + * - xor_i32/i64 *t0*, *t1*, *t2* | ||
331 | + | ||
332 | + - | *t0* = *t1* ^ *t2* | ||
333 | + | ||
334 | + * - not_i32/i64 *t0*, *t1* | ||
335 | + | ||
336 | + - | *t0* = ~\ *t1* | ||
337 | + | ||
338 | + * - andc_i32/i64 *t0*, *t1*, *t2* | ||
339 | + | ||
340 | + - | *t0* = *t1* & ~\ *t2* | ||
341 | + | ||
342 | + * - eqv_i32/i64 *t0*, *t1*, *t2* | ||
343 | + | ||
344 | + - | *t0* = ~(*t1* ^ *t2*), or equivalently, *t0* = *t1* ^ ~\ *t2* | ||
345 | + | ||
346 | + * - nand_i32/i64 *t0*, *t1*, *t2* | ||
347 | + | ||
348 | + - | *t0* = ~(*t1* & *t2*) | ||
349 | + | ||
350 | + * - nor_i32/i64 *t0*, *t1*, *t2* | ||
351 | + | ||
352 | + - | *t0* = ~(*t1* | *t2*) | ||
353 | + | ||
354 | + * - orc_i32/i64 *t0*, *t1*, *t2* | ||
355 | + | ||
356 | + - | *t0* = *t1* | ~\ *t2* | ||
357 | + | ||
358 | + * - clz_i32/i64 *t0*, *t1*, *t2* | ||
359 | + | ||
360 | + - | *t0* = *t1* ? clz(*t1*) : *t2* | ||
361 | + | ||
362 | + * - ctz_i32/i64 *t0*, *t1*, *t2* | ||
363 | + | ||
364 | + - | *t0* = *t1* ? ctz(*t1*) : *t2* | ||
365 | + | ||
366 | + * - ctpop_i32/i64 *t0*, *t1* | ||
367 | + | ||
368 | + - | *t0* = number of bits set in *t1* | ||
369 | + | | ||
370 | + | With *ctpop* short for "count population", matching | ||
371 | + | the function name used in ``include/qemu/host-utils.h``. | ||
372 | + | ||
373 | + | ||
374 | +Shifts/Rotates | ||
375 | +-------------- | ||
376 | + | ||
377 | +.. list-table:: | ||
378 | + | ||
379 | + * - shl_i32/i64 *t0*, *t1*, *t2* | ||
380 | + | ||
381 | + - | *t0* = *t1* << *t2* | ||
382 | + | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) | ||
383 | + | ||
384 | + * - shr_i32/i64 *t0*, *t1*, *t2* | ||
385 | + | ||
386 | + - | *t0* = *t1* >> *t2* (unsigned) | ||
387 | + | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) | ||
388 | + | ||
389 | + * - sar_i32/i64 *t0*, *t1*, *t2* | ||
390 | + | ||
391 | + - | *t0* = *t1* >> *t2* (signed) | ||
392 | + | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) | ||
393 | + | ||
394 | + * - rotl_i32/i64 *t0*, *t1*, *t2* | ||
395 | + | ||
396 | + - | Rotation of *t2* bits to the left | ||
397 | + | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) | ||
398 | + | ||
399 | + * - rotr_i32/i64 *t0*, *t1*, *t2* | ||
400 | + | ||
401 | + - | Rotation of *t2* bits to the right. | ||
402 | + | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) | ||
403 | + | ||
404 | + | ||
405 | +Misc | ||
406 | +---- | ||
407 | + | ||
408 | +.. list-table:: | ||
409 | + | ||
410 | + * - mov_i32/i64 *t0*, *t1* | ||
411 | + | ||
412 | + - | *t0* = *t1* | ||
413 | + | Move *t1* to *t0* (both operands must have the same type). | ||
414 | + | ||
415 | + * - ext8s_i32/i64 *t0*, *t1* | ||
416 | + | ||
417 | + ext8u_i32/i64 *t0*, *t1* | ||
418 | + | ||
419 | + ext16s_i32/i64 *t0*, *t1* | ||
420 | + | ||
421 | + ext16u_i32/i64 *t0*, *t1* | ||
422 | + | ||
423 | + ext32s_i64 *t0*, *t1* | ||
424 | + | ||
425 | + ext32u_i64 *t0*, *t1* | ||
426 | + | ||
427 | + - | 8, 16 or 32 bit sign/zero extension (both operands must have the same type) | ||
428 | + | ||
429 | + * - bswap16_i32/i64 *t0*, *t1*, *flags* | ||
430 | + | ||
431 | + - | 16 bit byte swap on the low bits of a 32/64 bit input. | ||
432 | + | | ||
433 | + | If *flags* & ``TCG_BSWAP_IZ``, then *t1* is known to be zero-extended from bit 15. | ||
434 | + | If *flags* & ``TCG_BSWAP_OZ``, then *t0* will be zero-extended from bit 15. | ||
435 | + | If *flags* & ``TCG_BSWAP_OS``, then *t0* will be sign-extended from bit 15. | ||
436 | + | | ||
437 | + | If neither ``TCG_BSWAP_OZ`` nor ``TCG_BSWAP_OS`` are set, then the bits of *t0* above bit 15 may contain any value. | ||
438 | + | ||
439 | + * - bswap32_i64 *t0*, *t1*, *flags* | ||
440 | + | ||
441 | + - | 32 bit byte swap on a 64-bit value. The flags are the same as for bswap16, | ||
442 | + except they apply from bit 31 instead of bit 15. | ||
443 | + | ||
444 | + * - bswap32_i32 *t0*, *t1*, *flags* | ||
445 | + | ||
446 | + bswap64_i64 *t0*, *t1*, *flags* | ||
447 | + | ||
448 | + - | 32/64 bit byte swap. The flags are ignored, but still present | ||
449 | + for consistency with the other bswap opcodes. | ||
450 | + | ||
451 | + * - discard_i32/i64 *t0* | ||
452 | + | ||
453 | + - | Indicate that the value of *t0* won't be used later. It is useful to | ||
454 | + force dead code elimination. | ||
455 | + | ||
456 | + * - deposit_i32/i64 *dest*, *t1*, *t2*, *pos*, *len* | ||
457 | + | ||
458 | + - | Deposit *t2* as a bitfield into *t1*, placing the result in *dest*. | ||
459 | + | | ||
460 | + | The bitfield is described by *pos*/*len*, which are immediate values: | ||
461 | + | | ||
462 | + | *len* - the length of the bitfield | ||
463 | + | *pos* - the position of the first bit, counting from the LSB | ||
464 | + | | ||
465 | + | For example, "deposit_i32 dest, t1, t2, 8, 4" indicates a 4-bit field | ||
466 | + at bit 8. This operation would be equivalent to | ||
467 | + | | ||
468 | + | *dest* = (*t1* & ~0x0f00) | ((*t2* << 8) & 0x0f00) | ||
469 | + | ||
470 | + * - extract_i32/i64 *dest*, *t1*, *pos*, *len* | ||
471 | + | ||
472 | + sextract_i32/i64 *dest*, *t1*, *pos*, *len* | ||
473 | + | ||
474 | + - | Extract a bitfield from *t1*, placing the result in *dest*. | ||
475 | + | | ||
476 | + | The bitfield is described by *pos*/*len*, which are immediate values, | ||
477 | + as above for deposit. For extract_*, the result will be extended | ||
478 | + to the left with zeros; for sextract_*, the result will be extended | ||
479 | + to the left with copies of the bitfield sign bit at *pos* + *len* - 1. | ||
480 | + | | ||
481 | + | For example, "sextract_i32 dest, t1, 8, 4" indicates a 4-bit field | ||
482 | + at bit 8. This operation would be equivalent to | ||
483 | + | | ||
484 | + | *dest* = (*t1* << 20) >> 28 | ||
485 | + | | ||
486 | + | (using an arithmetic right shift). | ||
487 | + | ||
488 | + * - extract2_i32/i64 *dest*, *t1*, *t2*, *pos* | ||
489 | + | ||
490 | + - | For N = {32,64}, extract an N-bit quantity from the concatenation | ||
491 | + of *t2*:*t1*, beginning at *pos*. The tcg_gen_extract2_{i32,i64} expander | ||
492 | + accepts 0 <= *pos* <= N as inputs. The backend code generator will | ||
493 | + not see either 0 or N as inputs for these opcodes. | ||
494 | + | ||
495 | + * - extrl_i64_i32 *t0*, *t1* | ||
496 | + | ||
497 | + - | For 64-bit hosts only, extract the low 32-bits of input *t1* and place it | ||
498 | + into 32-bit output *t0*. Depending on the host, this may be a simple move, | ||
499 | + or may require additional canonicalization. | ||
500 | + | ||
501 | + * - extrh_i64_i32 *t0*, *t1* | ||
502 | + | ||
503 | + - | For 64-bit hosts only, extract the high 32-bits of input *t1* and place it | ||
504 | + into 32-bit output *t0*. Depending on the host, this may be a simple shift, | ||
505 | + or may require additional canonicalization. | ||
506 | + | ||
507 | + | ||
508 | +Conditional moves | ||
509 | +----------------- | ||
510 | + | ||
511 | +.. list-table:: | ||
512 | + | ||
513 | + * - setcond_i32/i64 *dest*, *t1*, *t2*, *cond* | ||
514 | + | ||
515 | + - | *dest* = (*t1* *cond* *t2*) | ||
516 | + | | ||
517 | + | Set *dest* to 1 if (*t1* *cond* *t2*) is true, otherwise set to 0. | ||
518 | + | ||
519 | + * - movcond_i32/i64 *dest*, *c1*, *c2*, *v1*, *v2*, *cond* | ||
520 | + | ||
521 | + - | *dest* = (*c1* *cond* *c2* ? *v1* : *v2*) | ||
522 | + | | ||
523 | + | Set *dest* to *v1* if (*c1* *cond* *c2*) is true, otherwise set to *v2*. | ||
524 | + | ||
525 | + | ||
526 | +Type conversions | ||
527 | +---------------- | ||
528 | + | ||
529 | +.. list-table:: | ||
530 | + | ||
531 | + * - ext_i32_i64 *t0*, *t1* | ||
532 | + | ||
533 | + - | Convert *t1* (32 bit) to *t0* (64 bit) and does sign extension | ||
534 | + | ||
535 | + * - extu_i32_i64 *t0*, *t1* | ||
536 | + | ||
537 | + - | Convert *t1* (32 bit) to *t0* (64 bit) and does zero extension | ||
538 | + | ||
539 | + * - trunc_i64_i32 *t0*, *t1* | ||
540 | + | ||
541 | + - | Truncate *t1* (64 bit) to *t0* (32 bit) | ||
542 | + | ||
543 | + * - concat_i32_i64 *t0*, *t1*, *t2* | ||
544 | + | ||
545 | + - | Construct *t0* (64-bit) taking the low half from *t1* (32 bit) and the high half | ||
546 | + from *t2* (32 bit). | ||
547 | + | ||
548 | + * - concat32_i64 *t0*, *t1*, *t2* | ||
549 | + | ||
550 | + - | Construct *t0* (64-bit) taking the low half from *t1* (64 bit) and the high half | ||
551 | + from *t2* (64 bit). | ||
552 | + | ||
553 | + | ||
554 | +Load/Store | ||
555 | +---------- | ||
556 | + | ||
557 | +.. list-table:: | ||
558 | + | ||
559 | + * - ld_i32/i64 *t0*, *t1*, *offset* | ||
560 | + | ||
561 | + ld8s_i32/i64 *t0*, *t1*, *offset* | ||
562 | + | ||
563 | + ld8u_i32/i64 *t0*, *t1*, *offset* | ||
564 | + | ||
565 | + ld16s_i32/i64 *t0*, *t1*, *offset* | ||
566 | + | ||
567 | + ld16u_i32/i64 *t0*, *t1*, *offset* | ||
568 | + | ||
569 | + ld32s_i64 t0, *t1*, *offset* | ||
570 | + | ||
571 | + ld32u_i64 t0, *t1*, *offset* | ||
572 | + | ||
573 | + - | *t0* = read(*t1* + *offset*) | ||
574 | + | | ||
575 | + | Load 8, 16, 32 or 64 bits with or without sign extension from host memory. | ||
576 | + *offset* must be a constant. | ||
577 | + | ||
578 | + * - st_i32/i64 *t0*, *t1*, *offset* | ||
579 | + | ||
580 | + st8_i32/i64 *t0*, *t1*, *offset* | ||
581 | + | ||
582 | + st16_i32/i64 *t0*, *t1*, *offset* | ||
583 | + | ||
584 | + st32_i64 *t0*, *t1*, *offset* | ||
585 | + | ||
586 | + - | write(*t0*, *t1* + *offset*) | ||
587 | + | | ||
588 | + | Write 8, 16, 32 or 64 bits to host memory. | ||
589 | + | ||
590 | +All this opcodes assume that the pointed host memory doesn't correspond | ||
591 | +to a global. In the latter case the behaviour is unpredictable. | ||
592 | + | ||
593 | + | ||
594 | +Multiword arithmetic support | ||
595 | +---------------------------- | ||
596 | + | ||
597 | +.. list-table:: | ||
598 | + | ||
599 | + * - add2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high* | ||
600 | + | ||
601 | + sub2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high* | ||
602 | + | ||
603 | + - | Similar to add/sub, except that the double-word inputs *t1* and *t2* are | ||
604 | + formed from two single-word arguments, and the double-word output *t0* | ||
605 | + is returned in two single-word outputs. | ||
606 | + | ||
607 | + * - mulu2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2* | ||
608 | + | ||
609 | + - | Similar to mul, except two unsigned inputs *t1* and *t2* yielding the full | ||
610 | + double-word product *t0*. The latter is returned in two single-word outputs. | ||
611 | + | ||
612 | + * - muls2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2* | ||
613 | + | ||
614 | + - | Similar to mulu2, except the two inputs *t1* and *t2* are signed. | ||
615 | + | ||
616 | + * - mulsh_i32/i64 *t0*, *t1*, *t2* | ||
617 | + | ||
618 | + muluh_i32/i64 *t0*, *t1*, *t2* | ||
619 | + | ||
620 | + - | Provide the high part of a signed or unsigned multiply, respectively. | ||
621 | + | | ||
622 | + | If mulu2/muls2 are not provided by the backend, the tcg-op generator | ||
623 | + can obtain the same results by emitting a pair of opcodes, mul + muluh/mulsh. | ||
624 | + | ||
625 | + | ||
626 | +Memory Barrier support | ||
627 | +---------------------- | ||
628 | + | ||
629 | +.. list-table:: | ||
630 | + | ||
631 | + * - mb *<$arg>* | ||
632 | + | ||
633 | + - | Generate a target memory barrier instruction to ensure memory ordering | ||
634 | + as being enforced by a corresponding guest memory barrier instruction. | ||
635 | + | | ||
636 | + | The ordering enforced by the backend may be stricter than the ordering | ||
637 | + required by the guest. It cannot be weaker. This opcode takes a constant | ||
638 | + argument which is required to generate the appropriate barrier | ||
639 | + instruction. The backend should take care to emit the target barrier | ||
640 | + instruction only when necessary i.e., for SMP guests and when MTTCG is | ||
641 | + enabled. | ||
642 | + | | ||
643 | + | The guest translators should generate this opcode for all guest instructions | ||
644 | + which have ordering side effects. | ||
645 | + | | ||
646 | + | Please see :ref:`atomics-ref` for more information on memory barriers. | ||
647 | + | ||
648 | + | ||
649 | +64-bit guest on 32-bit host support | ||
650 | +----------------------------------- | ||
651 | + | ||
652 | +The following opcodes are internal to TCG. Thus they are to be implemented by | ||
653 | +32-bit host code generators, but are not to be emitted by guest translators. | ||
654 | +They are emitted as needed by inline functions within ``tcg-op.h``. | ||
655 | + | ||
656 | +.. list-table:: | ||
657 | + | ||
658 | + * - brcond2_i32 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *cond*, *label* | ||
659 | + | ||
660 | + - | Similar to brcond, except that the 64-bit values *t0* and *t1* | ||
661 | + are formed from two 32-bit arguments. | ||
662 | + | ||
663 | + * - setcond2_i32 *dest*, *t1_low*, *t1_high*, *t2_low*, *t2_high*, *cond* | ||
664 | + | ||
665 | + - | Similar to setcond, except that the 64-bit values *t1* and *t2* are | ||
666 | + formed from two 32-bit arguments. The result is a 32-bit value. | ||
667 | + | ||
668 | + | ||
669 | +QEMU specific operations | ||
670 | +------------------------ | ||
671 | + | ||
672 | +.. list-table:: | ||
673 | + | ||
674 | + * - exit_tb *t0* | ||
675 | + | ||
676 | + - | Exit the current TB and return the value *t0* (word type). | ||
677 | + | ||
678 | + * - goto_tb *index* | ||
679 | + | ||
680 | + - | Exit the current TB and jump to the TB index *index* (constant) if the | ||
681 | + current TB was linked to this TB. Otherwise execute the next | ||
682 | + instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued | ||
683 | + at most once with each slot index per TB. | ||
684 | + | ||
685 | + * - lookup_and_goto_ptr *tb_addr* | ||
686 | + | ||
687 | + - | Look up a TB address *tb_addr* and jump to it if valid. If not valid, | ||
688 | + jump to the TCG epilogue to go back to the exec loop. | ||
689 | + | | ||
690 | + | This operation is optional. If the TCG backend does not implement the | ||
691 | + goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0). | ||
692 | + | ||
693 | + * - qemu_ld_i32/i64 *t0*, *t1*, *flags*, *memidx* | ||
694 | + | ||
695 | + qemu_st_i32/i64 *t0*, *t1*, *flags*, *memidx* | ||
696 | + | ||
697 | + qemu_st8_i32 *t0*, *t1*, *flags*, *memidx* | ||
698 | + | ||
699 | + - | Load data at the guest address *t1* into *t0*, or store data in *t0* at guest | ||
700 | + address *t1*. The _i32/_i64 size applies to the size of the input/output | ||
701 | + register *t0* only. The address *t1* is always sized according to the guest, | ||
702 | + and the width of the memory operation is controlled by *flags*. | ||
703 | + | | ||
704 | + | Both *t0* and *t1* may be split into little-endian ordered pairs of registers | ||
705 | + if dealing with 64-bit quantities on a 32-bit host. | ||
706 | + | | ||
707 | + | The *memidx* selects the qemu tlb index to use (e.g. user or kernel access). | ||
708 | + The flags are the MemOp bits, selecting the sign, width, and endianness | ||
709 | + of the memory access. | ||
710 | + | | ||
711 | + | For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a | ||
712 | + 64-bit memory access specified in *flags*. | ||
713 | + | | ||
714 | + | For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of | ||
715 | + the memory operation is known to be 8-bit. This allows the backend to | ||
716 | + provide a different set of register constraints. | ||
717 | + | ||
718 | + | ||
719 | +Host vector operations | ||
720 | +---------------------- | ||
721 | + | ||
722 | +All of the vector ops have two parameters, ``TCGOP_VECL`` & ``TCGOP_VECE``. | ||
723 | +The former specifies the length of the vector in log2 64-bit units; the | ||
724 | +latter specifies the length of the element (if applicable) in log2 8-bit units. | ||
725 | +E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32. | ||
726 | + | ||
727 | +.. list-table:: | ||
728 | + | ||
729 | + * - mov_vec *v0*, *v1* | ||
730 | + ld_vec *v0*, *t1* | ||
731 | + st_vec *v0*, *t1* | ||
732 | + | ||
733 | + - | Move, load and store. | ||
734 | + | ||
735 | + * - dup_vec *v0*, *r1* | ||
736 | + | ||
737 | + - | Duplicate the low N bits of *r1* into VECL/VECE copies across *v0*. | ||
738 | + | ||
739 | + * - dupi_vec *v0*, *c* | ||
740 | + | ||
741 | + - | Similarly, for a constant. | ||
742 | + | Smaller values will be replicated to host register size by the expanders. | ||
743 | + | ||
744 | + * - dup2_vec *v0*, *r1*, *r2* | ||
745 | + | ||
746 | + - | Duplicate *r2*:*r1* into VECL/64 copies across *v0*. This opcode is | ||
747 | + only present for 32-bit hosts. | ||
748 | + | ||
749 | + * - add_vec *v0*, *v1*, *v2* | ||
750 | + | ||
751 | + - | *v0* = *v1* + *v2*, in elements across the vector. | ||
752 | + | ||
753 | + * - sub_vec *v0*, *v1*, *v2* | ||
754 | + | ||
755 | + - | Similarly, *v0* = *v1* - *v2*. | ||
756 | + | ||
757 | + * - mul_vec *v0*, *v1*, *v2* | ||
758 | + | ||
759 | + - | Similarly, *v0* = *v1* * *v2*. | ||
760 | + | ||
761 | + * - neg_vec *v0*, *v1* | ||
762 | + | ||
763 | + - | Similarly, *v0* = -*v1*. | ||
764 | + | ||
765 | + * - abs_vec *v0*, *v1* | ||
766 | + | ||
767 | + - | Similarly, *v0* = *v1* < 0 ? -*v1* : *v1*, in elements across the vector. | ||
768 | + | ||
769 | + * - smin_vec *v0*, *v1*, *v2* | ||
770 | + | ||
771 | + umin_vec *v0*, *v1*, *v2* | ||
772 | + | ||
773 | + - | Similarly, *v0* = MIN(*v1*, *v2*), for signed and unsigned element types. | ||
774 | + | ||
775 | + * - smax_vec *v0*, *v1*, *v2* | ||
776 | + | ||
777 | + umax_vec *v0*, *v1*, *v2* | ||
778 | + | ||
779 | + - | Similarly, *v0* = MAX(*v1*, *v2*), for signed and unsigned element types. | ||
780 | + | ||
781 | + * - ssadd_vec *v0*, *v1*, *v2* | ||
782 | + | ||
783 | + sssub_vec *v0*, *v1*, *v2* | ||
784 | + | ||
785 | + usadd_vec *v0*, *v1*, *v2* | ||
786 | + | ||
787 | + ussub_vec *v0*, *v1*, *v2* | ||
788 | + | ||
789 | + - | Signed and unsigned saturating addition and subtraction. | ||
790 | + | | ||
791 | + | If the true result is not representable within the element type, the | ||
792 | + element is set to the minimum or maximum value for the type. | ||
793 | + | ||
794 | + * - and_vec *v0*, *v1*, *v2* | ||
795 | + | ||
796 | + or_vec *v0*, *v1*, *v2* | ||
797 | + | ||
798 | + xor_vec *v0*, *v1*, *v2* | ||
799 | + | ||
800 | + andc_vec *v0*, *v1*, *v2* | ||
801 | + | ||
802 | + orc_vec *v0*, *v1*, *v2* | ||
803 | + | ||
804 | + not_vec *v0*, *v1* | ||
805 | + | ||
806 | + - | Similarly, logical operations with and without complement. | ||
807 | + | | ||
808 | + | Note that VECE is unused. | ||
809 | + | ||
810 | + * - shli_vec *v0*, *v1*, *i2* | ||
811 | + | ||
812 | + shls_vec *v0*, *v1*, *s2* | ||
813 | + | ||
814 | + - | Shift all elements from v1 by a scalar *i2*/*s2*. I.e. | ||
815 | + | ||
816 | + .. code-block:: c | ||
817 | + | ||
818 | + for (i = 0; i < VECL/VECE; ++i) { | ||
819 | + v0[i] = v1[i] << s2; | ||
820 | + } | ||
821 | + | ||
822 | + * - shri_vec *v0*, *v1*, *i2* | ||
823 | + | ||
824 | + sari_vec *v0*, *v1*, *i2* | ||
825 | + | ||
826 | + rotli_vec *v0*, *v1*, *i2* | ||
827 | + | ||
828 | + shrs_vec *v0*, *v1*, *s2* | ||
829 | + | ||
830 | + sars_vec *v0*, *v1*, *s2* | ||
831 | + | ||
832 | + - | Similarly for logical and arithmetic right shift, and left rotate. | ||
833 | + | ||
834 | + * - shlv_vec *v0*, *v1*, *v2* | ||
835 | + | ||
836 | + - | Shift elements from *v1* by elements from *v2*. I.e. | ||
837 | + | ||
838 | + .. code-block:: c | ||
839 | + | ||
840 | + for (i = 0; i < VECL/VECE; ++i) { | ||
841 | + v0[i] = v1[i] << v2[i]; | ||
842 | + } | ||
843 | + | ||
844 | + * - shrv_vec *v0*, *v1*, *v2* | ||
845 | + | ||
846 | + sarv_vec *v0*, *v1*, *v2* | ||
847 | + | ||
848 | + rotlv_vec *v0*, *v1*, *v2* | ||
849 | + | ||
850 | + rotrv_vec *v0*, *v1*, *v2* | ||
851 | + | ||
852 | + - | Similarly for logical and arithmetic right shift, and rotates. | ||
853 | + | ||
854 | + * - cmp_vec *v0*, *v1*, *v2*, *cond* | ||
855 | + | ||
856 | + - | Compare vectors by element, storing -1 for true and 0 for false. | ||
857 | + | ||
858 | + * - bitsel_vec *v0*, *v1*, *v2*, *v3* | ||
859 | + | ||
860 | + - | Bitwise select, *v0* = (*v2* & *v1*) | (*v3* & ~\ *v1*), across the entire vector. | ||
861 | + | ||
862 | + * - cmpsel_vec *v0*, *c1*, *c2*, *v3*, *v4*, *cond* | ||
863 | + | ||
864 | + - | Select elements based on comparison results: | ||
865 | + | ||
866 | + .. code-block:: c | ||
867 | + | ||
868 | + for (i = 0; i < n; ++i) { | ||
869 | + v0[i] = (c1[i] cond c2[i]) ? v3[i] : v4[i]. | ||
870 | + } | ||
871 | + | ||
872 | +**Note 1**: Some shortcuts are defined when the last operand is known to be | ||
873 | +a constant (e.g. addi for add, movi for mov). | ||
874 | + | ||
875 | +**Note 2**: When using TCG, the opcodes must never be generated directly | ||
876 | +as some of them may not be available as "real" opcodes. Always use the | ||
877 | +function tcg_gen_xxx(args). | ||
878 | + | ||
879 | + | ||
880 | +Backend | ||
881 | +======= | ||
882 | + | ||
883 | +``tcg-target.h`` contains the target specific definitions. ``tcg-target.c.inc`` | ||
884 | +contains the target specific code; it is #included by ``tcg/tcg.c``, rather | ||
885 | +than being a standalone C file. | ||
886 | + | ||
887 | +Assumptions | ||
888 | +----------- | ||
889 | + | ||
890 | +The target word size (``TCG_TARGET_REG_BITS``) is expected to be 32 bit or | ||
891 | +64 bit. It is expected that the pointer has the same size as the word. | ||
892 | + | ||
893 | +On a 32 bit target, all 64 bit operations are converted to 32 bits. A | ||
894 | +few specific operations must be implemented to allow it (see add2_i32, | ||
895 | +sub2_i32, brcond2_i32). | ||
896 | + | ||
897 | +On a 64 bit target, the values are transferred between 32 and 64-bit | ||
898 | +registers using the following ops: | ||
899 | + | ||
900 | +- trunc_shr_i64_i32 | ||
901 | +- ext_i32_i64 | ||
902 | +- extu_i32_i64 | ||
903 | + | ||
904 | +They ensure that the values are correctly truncated or extended when | ||
905 | +moved from a 32-bit to a 64-bit register or vice-versa. Note that the | ||
906 | +trunc_shr_i64_i32 is an optional op. It is not necessary to implement | ||
907 | +it if all the following conditions are met: | ||
908 | + | ||
909 | +- 64-bit registers can hold 32-bit values | ||
910 | +- 32-bit values in a 64-bit register do not need to stay zero or | ||
911 | + sign extended | ||
912 | +- all 32-bit TCG ops ignore the high part of 64-bit registers | ||
913 | + | ||
914 | +Floating point operations are not supported in this version. A | ||
915 | +previous incarnation of the code generator had full support of them, | ||
916 | +but it is better to concentrate on integer operations first. | ||
917 | + | ||
918 | +Constraints | ||
919 | +---------------- | ||
920 | + | ||
921 | +GCC like constraints are used to define the constraints of every | ||
922 | +instruction. Memory constraints are not supported in this | ||
923 | +version. Aliases are specified in the input operands as for GCC. | ||
924 | + | ||
925 | +The same register may be used for both an input and an output, even when | ||
926 | +they are not explicitly aliased. If an op expands to multiple target | ||
927 | +instructions then care must be taken to avoid clobbering input values. | ||
928 | +GCC style "early clobber" outputs are supported, with '``&``'. | ||
929 | + | ||
930 | +A target can define specific register or constant constraints. If an | ||
931 | +operation uses a constant input constraint which does not allow all | ||
932 | +constants, it must also accept registers in order to have a fallback. | ||
933 | +The constraint '``i``' is defined generically to accept any constant. | ||
934 | +The constraint '``r``' is not defined generically, but is consistently | ||
935 | +used by each backend to indicate all registers. | ||
936 | + | ||
937 | +The movi_i32 and movi_i64 operations must accept any constants. | ||
938 | + | ||
939 | +The mov_i32 and mov_i64 operations must accept any registers of the | ||
940 | +same type. | ||
941 | + | ||
942 | +The ld/st/sti instructions must accept signed 32 bit constant offsets. | ||
943 | +This can be implemented by reserving a specific register in which to | ||
944 | +compute the address if the offset is too big. | ||
945 | + | ||
946 | +The ld/st instructions must accept any destination (ld) or source (st) | ||
947 | +register. | ||
948 | + | ||
949 | +The sti instruction may fail if it cannot store the given constant. | ||
950 | + | ||
951 | +Function call assumptions | ||
952 | +------------------------- | ||
953 | + | ||
954 | +- The only supported types for parameters and return value are: 32 and | ||
955 | + 64 bit integers and pointer. | ||
956 | +- The stack grows downwards. | ||
957 | +- The first N parameters are passed in registers. | ||
958 | +- The next parameters are passed on the stack by storing them as words. | ||
959 | +- Some registers are clobbered during the call. | ||
960 | +- The function can return 0 or 1 value in registers. On a 32 bit | ||
961 | + target, functions must be able to return 2 values in registers for | ||
962 | + 64 bit return type. | ||
963 | + | ||
964 | + | ||
965 | +Recommended coding rules for best performance | ||
966 | +============================================= | ||
967 | + | ||
968 | +- Use globals to represent the parts of the QEMU CPU state which are | ||
969 | + often modified, e.g. the integer registers and the condition | ||
970 | + codes. TCG will be able to use host registers to store them. | ||
971 | + | ||
972 | +- Avoid globals stored in fixed registers. They must be used only to | ||
973 | + store the pointer to the CPU state and possibly to store a pointer | ||
974 | + to a register window. | ||
975 | + | ||
976 | +- Use temporaries. Use local temporaries only when really needed, | ||
977 | + e.g. when you need to use a value after a jump. Local temporaries | ||
978 | + introduce a performance hit in the current TCG implementation: their | ||
979 | + content is saved to memory at end of each basic block. | ||
980 | + | ||
981 | +- Free temporaries and local temporaries when they are no longer used | ||
982 | + (tcg_temp_free). Since tcg_const_x() also creates a temporary, you | ||
983 | + should free it after it is used. Freeing temporaries does not yield | ||
984 | + a better generated code, but it reduces the memory usage of TCG and | ||
985 | + the speed of the translation. | ||
986 | + | ||
987 | +- Don't hesitate to use helpers for complicated or seldom used guest | ||
988 | + instructions. There is little performance advantage in using TCG to | ||
989 | + implement guest instructions taking more than about twenty TCG | ||
990 | + instructions. Note that this rule of thumb is more applicable to | ||
991 | + helpers doing complex logic or arithmetic, where the C compiler has | ||
992 | + scope to do a good job of optimisation; it is less relevant where | ||
993 | + the instruction is mostly doing loads and stores, and in those cases | ||
994 | + inline TCG may still be faster for longer sequences. | ||
995 | + | ||
996 | +- The hard limit on the number of TCG instructions you can generate | ||
997 | + per guest instruction is set by ``MAX_OP_PER_INSTR`` in ``exec-all.h`` -- | ||
998 | + you cannot exceed this without risking a buffer overrun. | ||
999 | + | ||
1000 | +- Use the 'discard' instruction if you know that TCG won't be able to | ||
1001 | + prove that a given global is "dead" at a given program point. The | ||
1002 | + x86 guest uses it to improve the condition codes optimisation. | ||
1003 | diff --git a/docs/devel/tcg.rst b/docs/devel/tcg.rst | ||
1004 | index XXXXXXX..XXXXXXX 100644 | ||
1005 | --- a/docs/devel/tcg.rst | ||
1006 | +++ b/docs/devel/tcg.rst | ||
1007 | @@ -XXX,XX +XXX,XX @@ which make it relatively easily portable and simple while achieving good | ||
1008 | performances. | ||
1009 | |||
1010 | QEMU's dynamic translation backend is called TCG, for "Tiny Code | ||
1011 | -Generator". For more information, please take a look at ``tcg/README``. | ||
1012 | +Generator". For more information, please take a look at :ref:`tcg-ops-ref`. | ||
1013 | |||
1014 | The following sections outline some notable features and implementation | ||
1015 | details of QEMU's dynamic translator. | ||
1016 | diff --git a/tcg/README b/tcg/README | ||
1017 | deleted file mode 100644 | ||
1018 | index XXXXXXX..XXXXXXX | ||
1019 | --- a/tcg/README | ||
1020 | +++ /dev/null | ||
1021 | @@ -XXX,XX +XXX,XX @@ | ||
1022 | -Tiny Code Generator - Fabrice Bellard. | ||
1023 | - | ||
1024 | -1) Introduction | ||
1025 | - | ||
1026 | -TCG (Tiny Code Generator) began as a generic backend for a C | ||
1027 | -compiler. It was simplified to be used in QEMU. It also has its roots | ||
1028 | -in the QOP code generator written by Paul Brook. | ||
1029 | - | ||
1030 | -2) Definitions | ||
1031 | - | ||
1032 | -TCG receives RISC-like "TCG ops" and performs some optimizations on them, | ||
1033 | -including liveness analysis and trivial constant expression | ||
1034 | -evaluation. TCG ops are then implemented in the host CPU back end, | ||
1035 | -also known as the TCG "target". | ||
1036 | - | ||
1037 | -The TCG "target" is the architecture for which we generate the | ||
1038 | -code. It is of course not the same as the "target" of QEMU which is | ||
1039 | -the emulated architecture. As TCG started as a generic C backend used | ||
1040 | -for cross compiling, it is assumed that the TCG target is different | ||
1041 | -from the host, although it is never the case for QEMU. | ||
1042 | - | ||
1043 | -In this document, we use "guest" to specify what architecture we are | ||
1044 | -emulating; "target" always means the TCG target, the machine on which | ||
1045 | -we are running QEMU. | ||
1046 | - | ||
1047 | -A TCG "function" corresponds to a QEMU Translated Block (TB). | ||
1048 | - | ||
1049 | -A TCG "temporary" is a variable only live in a basic | ||
1050 | -block. Temporaries are allocated explicitly in each function. | ||
1051 | - | ||
1052 | -A TCG "local temporary" is a variable only live in a function. Local | ||
1053 | -temporaries are allocated explicitly in each function. | ||
1054 | - | ||
1055 | -A TCG "global" is a variable which is live in all the functions | ||
1056 | -(equivalent of a C global variable). They are defined before the | ||
1057 | -functions defined. A TCG global can be a memory location (e.g. a QEMU | ||
1058 | -CPU register), a fixed host register (e.g. the QEMU CPU state pointer) | ||
1059 | -or a memory location which is stored in a register outside QEMU TBs | ||
1060 | -(not implemented yet). | ||
1061 | - | ||
1062 | -A TCG "basic block" corresponds to a list of instructions terminated | ||
1063 | -by a branch instruction. | ||
1064 | - | ||
1065 | -An operation with "undefined behavior" may result in a crash. | ||
1066 | - | ||
1067 | -An operation with "unspecified behavior" shall not crash. However, | ||
1068 | -the result may be one of several possibilities so may be considered | ||
1069 | -an "undefined result". | ||
1070 | - | ||
1071 | -3) Intermediate representation | ||
1072 | - | ||
1073 | -3.1) Introduction | ||
1074 | - | ||
1075 | -TCG instructions operate on variables which are temporaries, local | ||
1076 | -temporaries or globals. TCG instructions and variables are strongly | ||
1077 | -typed. Two types are supported: 32 bit integers and 64 bit | ||
1078 | -integers. Pointers are defined as an alias to 32 bit or 64 bit | ||
1079 | -integers depending on the TCG target word size. | ||
1080 | - | ||
1081 | -Each instruction has a fixed number of output variable operands, input | ||
1082 | -variable operands and always constant operands. | ||
1083 | - | ||
1084 | -The notable exception is the call instruction which has a variable | ||
1085 | -number of outputs and inputs. | ||
1086 | - | ||
1087 | -In the textual form, output operands usually come first, followed by | ||
1088 | -input operands, followed by constant operands. The output type is | ||
1089 | -included in the instruction name. Constants are prefixed with a '$'. | ||
1090 | - | ||
1091 | -add_i32 t0, t1, t2 (t0 <- t1 + t2) | ||
1092 | - | ||
1093 | -3.2) Assumptions | ||
1094 | - | ||
1095 | -* Basic blocks | ||
1096 | - | ||
1097 | -- Basic blocks end after branches (e.g. brcond_i32 instruction), | ||
1098 | - goto_tb and exit_tb instructions. | ||
1099 | -- Basic blocks start after the end of a previous basic block, or at a | ||
1100 | - set_label instruction. | ||
1101 | - | ||
1102 | -After the end of a basic block, the content of temporaries is | ||
1103 | -destroyed, but local temporaries and globals are preserved. | ||
1104 | - | ||
1105 | -* Floating point types are not supported yet | ||
1106 | - | ||
1107 | -* Pointers: depending on the TCG target, pointer size is 32 bit or 64 | ||
1108 | - bit. The type TCG_TYPE_PTR is an alias to TCG_TYPE_I32 or | ||
1109 | - TCG_TYPE_I64. | ||
1110 | - | ||
1111 | -* Helpers: | ||
1112 | - | ||
1113 | -Using the tcg_gen_helper_x_y it is possible to call any function | ||
1114 | -taking i32, i64 or pointer types. By default, before calling a helper, | ||
1115 | -all globals are stored at their canonical location and it is assumed | ||
1116 | -that the function can modify them. By default, the helper is allowed to | ||
1117 | -modify the CPU state or raise an exception. | ||
1118 | - | ||
1119 | -This can be overridden using the following function modifiers: | ||
1120 | -- TCG_CALL_NO_READ_GLOBALS means that the helper does not read globals, | ||
1121 | - either directly or via an exception. They will not be saved to their | ||
1122 | - canonical locations before calling the helper. | ||
1123 | -- TCG_CALL_NO_WRITE_GLOBALS means that the helper does not modify any globals. | ||
1124 | - They will only be saved to their canonical location before calling helpers, | ||
1125 | - but they won't be reloaded afterwards. | ||
1126 | -- TCG_CALL_NO_SIDE_EFFECTS means that the call to the function is removed if | ||
1127 | - the return value is not used. | ||
1128 | - | ||
1129 | -Note that TCG_CALL_NO_READ_GLOBALS implies TCG_CALL_NO_WRITE_GLOBALS. | ||
1130 | - | ||
1131 | -On some TCG targets (e.g. x86), several calling conventions are | ||
1132 | -supported. | ||
1133 | - | ||
1134 | -* Branches: | ||
1135 | - | ||
1136 | -Use the instruction 'br' to jump to a label. | ||
1137 | - | ||
1138 | -3.3) Code Optimizations | ||
1139 | - | ||
1140 | -When generating instructions, you can count on at least the following | ||
1141 | -optimizations: | ||
1142 | - | ||
1143 | -- Single instructions are simplified, e.g. | ||
1144 | - | ||
1145 | - and_i32 t0, t0, $0xffffffff | ||
1146 | - | ||
1147 | - is suppressed. | ||
1148 | - | ||
1149 | -- A liveness analysis is done at the basic block level. The | ||
1150 | - information is used to suppress moves from a dead variable to | ||
1151 | - another one. It is also used to remove instructions which compute | ||
1152 | - dead results. The later is especially useful for condition code | ||
1153 | - optimization in QEMU. | ||
1154 | - | ||
1155 | - In the following example: | ||
1156 | - | ||
1157 | - add_i32 t0, t1, t2 | ||
1158 | - add_i32 t0, t0, $1 | ||
1159 | - mov_i32 t0, $1 | ||
1160 | - | ||
1161 | - only the last instruction is kept. | ||
1162 | - | ||
1163 | -3.4) Instruction Reference | ||
1164 | - | ||
1165 | -********* Function call | ||
1166 | - | ||
1167 | -* call <ret> <params> ptr | ||
1168 | - | ||
1169 | -call function 'ptr' (pointer type) | ||
1170 | - | ||
1171 | -<ret> optional 32 bit or 64 bit return value | ||
1172 | -<params> optional 32 bit or 64 bit parameters | ||
1173 | - | ||
1174 | -********* Jumps/Labels | ||
1175 | - | ||
1176 | -* set_label $label | ||
1177 | - | ||
1178 | -Define label 'label' at the current program point. | ||
1179 | - | ||
1180 | -* br $label | ||
1181 | - | ||
1182 | -Jump to label. | ||
1183 | - | ||
1184 | -* brcond_i32/i64 t0, t1, cond, label | ||
1185 | - | ||
1186 | -Conditional jump if t0 cond t1 is true. cond can be: | ||
1187 | - TCG_COND_EQ | ||
1188 | - TCG_COND_NE | ||
1189 | - TCG_COND_LT /* signed */ | ||
1190 | - TCG_COND_GE /* signed */ | ||
1191 | - TCG_COND_LE /* signed */ | ||
1192 | - TCG_COND_GT /* signed */ | ||
1193 | - TCG_COND_LTU /* unsigned */ | ||
1194 | - TCG_COND_GEU /* unsigned */ | ||
1195 | - TCG_COND_LEU /* unsigned */ | ||
1196 | - TCG_COND_GTU /* unsigned */ | ||
1197 | - | ||
1198 | -********* Arithmetic | ||
1199 | - | ||
1200 | -* add_i32/i64 t0, t1, t2 | ||
1201 | - | ||
1202 | -t0=t1+t2 | ||
1203 | - | ||
1204 | -* sub_i32/i64 t0, t1, t2 | ||
1205 | - | ||
1206 | -t0=t1-t2 | ||
1207 | - | ||
1208 | -* neg_i32/i64 t0, t1 | ||
1209 | - | ||
1210 | -t0=-t1 (two's complement) | ||
1211 | - | ||
1212 | -* mul_i32/i64 t0, t1, t2 | ||
1213 | - | ||
1214 | -t0=t1*t2 | ||
1215 | - | ||
1216 | -* div_i32/i64 t0, t1, t2 | ||
1217 | - | ||
1218 | -t0=t1/t2 (signed). Undefined behavior if division by zero or overflow. | ||
1219 | - | ||
1220 | -* divu_i32/i64 t0, t1, t2 | ||
1221 | - | ||
1222 | -t0=t1/t2 (unsigned). Undefined behavior if division by zero. | ||
1223 | - | ||
1224 | -* rem_i32/i64 t0, t1, t2 | ||
1225 | - | ||
1226 | -t0=t1%t2 (signed). Undefined behavior if division by zero or overflow. | ||
1227 | - | ||
1228 | -* remu_i32/i64 t0, t1, t2 | ||
1229 | - | ||
1230 | -t0=t1%t2 (unsigned). Undefined behavior if division by zero. | ||
1231 | - | ||
1232 | -********* Logical | ||
1233 | - | ||
1234 | -* and_i32/i64 t0, t1, t2 | ||
1235 | - | ||
1236 | -t0=t1&t2 | ||
1237 | - | ||
1238 | -* or_i32/i64 t0, t1, t2 | ||
1239 | - | ||
1240 | -t0=t1|t2 | ||
1241 | - | ||
1242 | -* xor_i32/i64 t0, t1, t2 | ||
1243 | - | ||
1244 | -t0=t1^t2 | ||
1245 | - | ||
1246 | -* not_i32/i64 t0, t1 | ||
1247 | - | ||
1248 | -t0=~t1 | ||
1249 | - | ||
1250 | -* andc_i32/i64 t0, t1, t2 | ||
1251 | - | ||
1252 | -t0=t1&~t2 | ||
1253 | - | ||
1254 | -* eqv_i32/i64 t0, t1, t2 | ||
1255 | - | ||
1256 | -t0=~(t1^t2), or equivalently, t0=t1^~t2 | ||
1257 | - | ||
1258 | -* nand_i32/i64 t0, t1, t2 | ||
1259 | - | ||
1260 | -t0=~(t1&t2) | ||
1261 | - | ||
1262 | -* nor_i32/i64 t0, t1, t2 | ||
1263 | - | ||
1264 | -t0=~(t1|t2) | ||
1265 | - | ||
1266 | -* orc_i32/i64 t0, t1, t2 | ||
1267 | - | ||
1268 | -t0=t1|~t2 | ||
1269 | - | ||
1270 | -* clz_i32/i64 t0, t1, t2 | ||
1271 | - | ||
1272 | -t0 = t1 ? clz(t1) : t2 | ||
1273 | - | ||
1274 | -* ctz_i32/i64 t0, t1, t2 | ||
1275 | - | ||
1276 | -t0 = t1 ? ctz(t1) : t2 | ||
1277 | - | ||
1278 | -* ctpop_i32/i64 t0, t1 | ||
1279 | - | ||
1280 | -t0 = number of bits set in t1 | ||
1281 | -With "ctpop" short for "count population", matching | ||
1282 | -the function name used in include/qemu/host-utils.h. | ||
1283 | - | ||
1284 | -********* Shifts/Rotates | ||
1285 | - | ||
1286 | -* shl_i32/i64 t0, t1, t2 | ||
1287 | - | ||
1288 | -t0=t1 << t2. Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) | ||
1289 | - | ||
1290 | -* shr_i32/i64 t0, t1, t2 | ||
1291 | - | ||
1292 | -t0=t1 >> t2 (unsigned). Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) | ||
1293 | - | ||
1294 | -* sar_i32/i64 t0, t1, t2 | ||
1295 | - | ||
1296 | -t0=t1 >> t2 (signed). Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) | ||
1297 | - | ||
1298 | -* rotl_i32/i64 t0, t1, t2 | ||
1299 | - | ||
1300 | -Rotation of t2 bits to the left. | ||
1301 | -Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) | ||
1302 | - | ||
1303 | -* rotr_i32/i64 t0, t1, t2 | ||
1304 | - | ||
1305 | -Rotation of t2 bits to the right. | ||
1306 | -Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) | ||
1307 | - | ||
1308 | -********* Misc | ||
1309 | - | ||
1310 | -* mov_i32/i64 t0, t1 | ||
1311 | - | ||
1312 | -t0 = t1 | ||
1313 | - | ||
1314 | -Move t1 to t0 (both operands must have the same type). | ||
1315 | - | ||
1316 | -* ext8s_i32/i64 t0, t1 | ||
1317 | -ext8u_i32/i64 t0, t1 | ||
1318 | -ext16s_i32/i64 t0, t1 | ||
1319 | -ext16u_i32/i64 t0, t1 | ||
1320 | -ext32s_i64 t0, t1 | ||
1321 | -ext32u_i64 t0, t1 | ||
1322 | - | ||
1323 | -8, 16 or 32 bit sign/zero extension (both operands must have the same type) | ||
1324 | - | ||
1325 | -* bswap16_i32/i64 t0, t1, flags | ||
1326 | - | ||
1327 | -16 bit byte swap on the low bits of a 32/64 bit input. | ||
1328 | -If flags & TCG_BSWAP_IZ, then t1 is known to be zero-extended from bit 15. | ||
1329 | -If flags & TCG_BSWAP_OZ, then t0 will be zero-extended from bit 15. | ||
1330 | -If flags & TCG_BSWAP_OS, then t0 will be sign-extended from bit 15. | ||
1331 | -If neither TCG_BSWAP_OZ nor TCG_BSWAP_OS are set, then the bits of | ||
1332 | -t0 above bit 15 may contain any value. | ||
1333 | - | ||
1334 | -* bswap32_i64 t0, t1, flags | ||
1335 | - | ||
1336 | -32 bit byte swap on a 64-bit value. The flags are the same as for bswap16, | ||
1337 | -except they apply from bit 31 instead of bit 15. | ||
1338 | - | ||
1339 | -* bswap32_i32 t0, t1, flags | ||
1340 | -* bswap64_i64 t0, t1, flags | ||
1341 | - | ||
1342 | -32/64 bit byte swap. The flags are ignored, but still present | ||
1343 | -for consistency with the other bswap opcodes. | ||
1344 | - | ||
1345 | -* discard_i32/i64 t0 | ||
1346 | - | ||
1347 | -Indicate that the value of t0 won't be used later. It is useful to | ||
1348 | -force dead code elimination. | ||
1349 | - | ||
1350 | -* deposit_i32/i64 dest, t1, t2, pos, len | ||
1351 | - | ||
1352 | -Deposit T2 as a bitfield into T1, placing the result in DEST. | ||
1353 | -The bitfield is described by POS/LEN, which are immediate values: | ||
1354 | - | ||
1355 | - LEN - the length of the bitfield | ||
1356 | - POS - the position of the first bit, counting from the LSB | ||
1357 | - | ||
1358 | -For example, "deposit_i32 dest, t1, t2, 8, 4" indicates a 4-bit field | ||
1359 | -at bit 8. This operation would be equivalent to | ||
1360 | - | ||
1361 | - dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00) | ||
1362 | - | ||
1363 | -* extract_i32/i64 dest, t1, pos, len | ||
1364 | -* sextract_i32/i64 dest, t1, pos, len | ||
1365 | - | ||
1366 | -Extract a bitfield from T1, placing the result in DEST. | ||
1367 | -The bitfield is described by POS/LEN, which are immediate values, | ||
1368 | -as above for deposit. For extract_*, the result will be extended | ||
1369 | -to the left with zeros; for sextract_*, the result will be extended | ||
1370 | -to the left with copies of the bitfield sign bit at pos + len - 1. | ||
1371 | - | ||
1372 | -For example, "sextract_i32 dest, t1, 8, 4" indicates a 4-bit field | ||
1373 | -at bit 8. This operation would be equivalent to | ||
1374 | - | ||
1375 | - dest = (t1 << 20) >> 28 | ||
1376 | - | ||
1377 | -(using an arithmetic right shift). | ||
1378 | - | ||
1379 | -* extract2_i32/i64 dest, t1, t2, pos | ||
1380 | - | ||
1381 | -For N = {32,64}, extract an N-bit quantity from the concatenation | ||
1382 | -of t2:t1, beginning at pos. The tcg_gen_extract2_{i32,i64} expander | ||
1383 | -accepts 0 <= pos <= N as inputs. The backend code generator will | ||
1384 | -not see either 0 or N as inputs for these opcodes. | ||
1385 | - | ||
1386 | -* extrl_i64_i32 t0, t1 | ||
1387 | - | ||
1388 | -For 64-bit hosts only, extract the low 32-bits of input T1 and place it | ||
1389 | -into 32-bit output T0. Depending on the host, this may be a simple move, | ||
1390 | -or may require additional canonicalization. | ||
1391 | - | ||
1392 | -* extrh_i64_i32 t0, t1 | ||
1393 | - | ||
1394 | -For 64-bit hosts only, extract the high 32-bits of input T1 and place it | ||
1395 | -into 32-bit output T0. Depending on the host, this may be a simple shift, | ||
1396 | -or may require additional canonicalization. | ||
1397 | - | ||
1398 | -********* Conditional moves | ||
1399 | - | ||
1400 | -* setcond_i32/i64 dest, t1, t2, cond | ||
1401 | - | ||
1402 | -dest = (t1 cond t2) | ||
1403 | - | ||
1404 | -Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0. | ||
1405 | - | ||
1406 | -* movcond_i32/i64 dest, c1, c2, v1, v2, cond | ||
1407 | - | ||
1408 | -dest = (c1 cond c2 ? v1 : v2) | ||
1409 | - | ||
1410 | -Set DEST to V1 if (C1 cond C2) is true, otherwise set to V2. | ||
1411 | - | ||
1412 | -********* Type conversions | ||
1413 | - | ||
1414 | -* ext_i32_i64 t0, t1 | ||
1415 | -Convert t1 (32 bit) to t0 (64 bit) and does sign extension | ||
1416 | - | ||
1417 | -* extu_i32_i64 t0, t1 | ||
1418 | -Convert t1 (32 bit) to t0 (64 bit) and does zero extension | ||
1419 | - | ||
1420 | -* trunc_i64_i32 t0, t1 | ||
1421 | -Truncate t1 (64 bit) to t0 (32 bit) | ||
1422 | - | ||
1423 | -* concat_i32_i64 t0, t1, t2 | ||
1424 | -Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half | ||
1425 | -from t2 (32 bit). | ||
1426 | - | ||
1427 | -* concat32_i64 t0, t1, t2 | ||
1428 | -Construct t0 (64-bit) taking the low half from t1 (64 bit) and the high half | ||
1429 | -from t2 (64 bit). | ||
1430 | - | ||
1431 | -********* Load/Store | ||
1432 | - | ||
1433 | -* ld_i32/i64 t0, t1, offset | ||
1434 | -ld8s_i32/i64 t0, t1, offset | ||
1435 | -ld8u_i32/i64 t0, t1, offset | ||
1436 | -ld16s_i32/i64 t0, t1, offset | ||
1437 | -ld16u_i32/i64 t0, t1, offset | ||
1438 | -ld32s_i64 t0, t1, offset | ||
1439 | -ld32u_i64 t0, t1, offset | ||
1440 | - | ||
1441 | -t0 = read(t1 + offset) | ||
1442 | -Load 8, 16, 32 or 64 bits with or without sign extension from host memory. | ||
1443 | -offset must be a constant. | ||
1444 | - | ||
1445 | -* st_i32/i64 t0, t1, offset | ||
1446 | -st8_i32/i64 t0, t1, offset | ||
1447 | -st16_i32/i64 t0, t1, offset | ||
1448 | -st32_i64 t0, t1, offset | ||
1449 | - | ||
1450 | -write(t0, t1 + offset) | ||
1451 | -Write 8, 16, 32 or 64 bits to host memory. | ||
1452 | - | ||
1453 | -All this opcodes assume that the pointed host memory doesn't correspond | ||
1454 | -to a global. In the latter case the behaviour is unpredictable. | ||
1455 | - | ||
1456 | -********* Multiword arithmetic support | ||
1457 | - | ||
1458 | -* add2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high | ||
1459 | -* sub2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high | ||
1460 | - | ||
1461 | -Similar to add/sub, except that the double-word inputs T1 and T2 are | ||
1462 | -formed from two single-word arguments, and the double-word output T0 | ||
1463 | -is returned in two single-word outputs. | ||
1464 | - | ||
1465 | -* mulu2_i32/i64 t0_low, t0_high, t1, t2 | ||
1466 | - | ||
1467 | -Similar to mul, except two unsigned inputs T1 and T2 yielding the full | ||
1468 | -double-word product T0. The later is returned in two single-word outputs. | ||
1469 | - | ||
1470 | -* muls2_i32/i64 t0_low, t0_high, t1, t2 | ||
1471 | - | ||
1472 | -Similar to mulu2, except the two inputs T1 and T2 are signed. | ||
1473 | - | ||
1474 | -* mulsh_i32/i64 t0, t1, t2 | ||
1475 | -* muluh_i32/i64 t0, t1, t2 | ||
1476 | - | ||
1477 | -Provide the high part of a signed or unsigned multiply, respectively. | ||
1478 | -If mulu2/muls2 are not provided by the backend, the tcg-op generator | ||
1479 | -can obtain the same results can be obtained by emitting a pair of | ||
1480 | -opcodes, mul+muluh/mulsh. | ||
1481 | - | ||
1482 | -********* Memory Barrier support | ||
1483 | - | ||
1484 | -* mb <$arg> | ||
1485 | - | ||
1486 | -Generate a target memory barrier instruction to ensure memory ordering as being | ||
1487 | -enforced by a corresponding guest memory barrier instruction. The ordering | ||
1488 | -enforced by the backend may be stricter than the ordering required by the guest. | ||
1489 | -It cannot be weaker. This opcode takes a constant argument which is required to | ||
1490 | -generate the appropriate barrier instruction. The backend should take care to | ||
1491 | -emit the target barrier instruction only when necessary i.e., for SMP guests and | ||
1492 | -when MTTCG is enabled. | ||
1493 | - | ||
1494 | -The guest translators should generate this opcode for all guest instructions | ||
1495 | -which have ordering side effects. | ||
1496 | - | ||
1497 | -Please see docs/devel/atomics.rst for more information on memory barriers. | ||
1498 | - | ||
1499 | -********* 64-bit guest on 32-bit host support | ||
1500 | - | ||
1501 | -The following opcodes are internal to TCG. Thus they are to be implemented by | ||
1502 | -32-bit host code generators, but are not to be emitted by guest translators. | ||
1503 | -They are emitted as needed by inline functions within "tcg-op.h". | ||
1504 | - | ||
1505 | -* brcond2_i32 t0_low, t0_high, t1_low, t1_high, cond, label | ||
1506 | - | ||
1507 | -Similar to brcond, except that the 64-bit values T0 and T1 | ||
1508 | -are formed from two 32-bit arguments. | ||
1509 | - | ||
1510 | -* setcond2_i32 dest, t1_low, t1_high, t2_low, t2_high, cond | ||
1511 | - | ||
1512 | -Similar to setcond, except that the 64-bit values T1 and T2 are | ||
1513 | -formed from two 32-bit arguments. The result is a 32-bit value. | ||
1514 | - | ||
1515 | -********* QEMU specific operations | ||
1516 | - | ||
1517 | -* exit_tb t0 | ||
1518 | - | ||
1519 | -Exit the current TB and return the value t0 (word type). | ||
1520 | - | ||
1521 | -* goto_tb index | ||
1522 | - | ||
1523 | -Exit the current TB and jump to the TB index 'index' (constant) if the | ||
1524 | -current TB was linked to this TB. Otherwise execute the next | ||
1525 | -instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued | ||
1526 | -at most once with each slot index per TB. | ||
1527 | - | ||
1528 | -* lookup_and_goto_ptr tb_addr | ||
1529 | - | ||
1530 | -Look up a TB address ('tb_addr') and jump to it if valid. If not valid, | ||
1531 | -jump to the TCG epilogue to go back to the exec loop. | ||
1532 | - | ||
1533 | -This operation is optional. If the TCG backend does not implement the | ||
1534 | -goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0). | ||
1535 | - | ||
1536 | -* qemu_ld_i32/i64 t0, t1, flags, memidx | ||
1537 | -* qemu_st_i32/i64 t0, t1, flags, memidx | ||
1538 | -* qemu_st8_i32 t0, t1, flags, memidx | ||
1539 | - | ||
1540 | -Load data at the guest address t1 into t0, or store data in t0 at guest | ||
1541 | -address t1. The _i32/_i64 size applies to the size of the input/output | ||
1542 | -register t0 only. The address t1 is always sized according to the guest, | ||
1543 | -and the width of the memory operation is controlled by flags. | ||
1544 | - | ||
1545 | -Both t0 and t1 may be split into little-endian ordered pairs of registers | ||
1546 | -if dealing with 64-bit quantities on a 32-bit host. | ||
1547 | - | ||
1548 | -The memidx selects the qemu tlb index to use (e.g. user or kernel access). | ||
1549 | -The flags are the MemOp bits, selecting the sign, width, and endianness | ||
1550 | -of the memory access. | ||
1551 | - | ||
1552 | -For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a | ||
1553 | -64-bit memory access specified in flags. | ||
1554 | - | ||
1555 | -For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of | ||
1556 | -the memory operation is known to be 8-bit. This allows the backend to | ||
1557 | -provide a different set of register constraints. | ||
1558 | - | ||
1559 | -********* Host vector operations | ||
1560 | - | ||
1561 | -All of the vector ops have two parameters, TCGOP_VECL & TCGOP_VECE. | ||
1562 | -The former specifies the length of the vector in log2 64-bit units; the | ||
1563 | -later specifies the length of the element (if applicable) in log2 8-bit units. | ||
1564 | -E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32. | ||
1565 | - | ||
1566 | -* mov_vec v0, v1 | ||
1567 | -* ld_vec v0, t1 | ||
1568 | -* st_vec v0, t1 | ||
1569 | - | ||
1570 | - Move, load and store. | ||
1571 | - | ||
1572 | -* dup_vec v0, r1 | ||
1573 | - | ||
1574 | - Duplicate the low N bits of R1 into VECL/VECE copies across V0. | ||
1575 | - | ||
1576 | -* dupi_vec v0, c | ||
1577 | - | ||
1578 | - Similarly, for a constant. | ||
1579 | - Smaller values will be replicated to host register size by the expanders. | ||
1580 | - | ||
1581 | -* dup2_vec v0, r1, r2 | ||
1582 | - | ||
1583 | - Duplicate r2:r1 into VECL/64 copies across V0. This opcode is | ||
1584 | - only present for 32-bit hosts. | ||
1585 | - | ||
1586 | -* add_vec v0, v1, v2 | ||
1587 | - | ||
1588 | - v0 = v1 + v2, in elements across the vector. | ||
1589 | - | ||
1590 | -* sub_vec v0, v1, v2 | ||
1591 | - | ||
1592 | - Similarly, v0 = v1 - v2. | ||
1593 | - | ||
1594 | -* mul_vec v0, v1, v2 | ||
1595 | - | ||
1596 | - Similarly, v0 = v1 * v2. | ||
1597 | - | ||
1598 | -* neg_vec v0, v1 | ||
1599 | - | ||
1600 | - Similarly, v0 = -v1. | ||
1601 | - | ||
1602 | -* abs_vec v0, v1 | ||
1603 | - | ||
1604 | - Similarly, v0 = v1 < 0 ? -v1 : v1, in elements across the vector. | ||
1605 | - | ||
1606 | -* smin_vec: | ||
1607 | -* umin_vec: | ||
1608 | - | ||
1609 | - Similarly, v0 = MIN(v1, v2), for signed and unsigned element types. | ||
1610 | - | ||
1611 | -* smax_vec: | ||
1612 | -* umax_vec: | ||
1613 | - | ||
1614 | - Similarly, v0 = MAX(v1, v2), for signed and unsigned element types. | ||
1615 | - | ||
1616 | -* ssadd_vec: | ||
1617 | -* sssub_vec: | ||
1618 | -* usadd_vec: | ||
1619 | -* ussub_vec: | ||
1620 | - | ||
1621 | - Signed and unsigned saturating addition and subtraction. If the true | ||
1622 | - result is not representable within the element type, the element is | ||
1623 | - set to the minimum or maximum value for the type. | ||
1624 | - | ||
1625 | -* and_vec v0, v1, v2 | ||
1626 | -* or_vec v0, v1, v2 | ||
1627 | -* xor_vec v0, v1, v2 | ||
1628 | -* andc_vec v0, v1, v2 | ||
1629 | -* orc_vec v0, v1, v2 | ||
1630 | -* not_vec v0, v1 | ||
1631 | - | ||
1632 | - Similarly, logical operations with and without complement. | ||
1633 | - Note that VECE is unused. | ||
1634 | - | ||
1635 | -* shli_vec v0, v1, i2 | ||
1636 | -* shls_vec v0, v1, s2 | ||
1637 | - | ||
1638 | - Shift all elements from v1 by a scalar i2/s2. I.e. | ||
1639 | - | ||
1640 | - for (i = 0; i < VECL/VECE; ++i) { | ||
1641 | - v0[i] = v1[i] << s2; | ||
1642 | - } | ||
1643 | - | ||
1644 | -* shri_vec v0, v1, i2 | ||
1645 | -* sari_vec v0, v1, i2 | ||
1646 | -* rotli_vec v0, v1, i2 | ||
1647 | -* shrs_vec v0, v1, s2 | ||
1648 | -* sars_vec v0, v1, s2 | ||
1649 | - | ||
1650 | - Similarly for logical and arithmetic right shift, and left rotate. | ||
1651 | - | ||
1652 | -* shlv_vec v0, v1, v2 | ||
1653 | - | ||
1654 | - Shift elements from v1 by elements from v2. I.e. | ||
1655 | - | ||
1656 | - for (i = 0; i < VECL/VECE; ++i) { | ||
1657 | - v0[i] = v1[i] << v2[i]; | ||
1658 | - } | ||
1659 | - | ||
1660 | -* shrv_vec v0, v1, v2 | ||
1661 | -* sarv_vec v0, v1, v2 | ||
1662 | -* rotlv_vec v0, v1, v2 | ||
1663 | -* rotrv_vec v0, v1, v2 | ||
1664 | - | ||
1665 | - Similarly for logical and arithmetic right shift, and rotates. | ||
1666 | - | ||
1667 | -* cmp_vec v0, v1, v2, cond | ||
1668 | - | ||
1669 | - Compare vectors by element, storing -1 for true and 0 for false. | ||
1670 | - | ||
1671 | -* bitsel_vec v0, v1, v2, v3 | ||
1672 | - | ||
1673 | - Bitwise select, v0 = (v2 & v1) | (v3 & ~v1), across the entire vector. | ||
1674 | - | ||
1675 | -* cmpsel_vec v0, c1, c2, v3, v4, cond | ||
1676 | - | ||
1677 | - Select elements based on comparison results: | ||
1678 | - for (i = 0; i < n; ++i) { | ||
1679 | - v0[i] = (c1[i] cond c2[i]) ? v3[i] : v4[i]. | ||
1680 | - } | ||
1681 | - | ||
1682 | -********* | ||
1683 | - | ||
1684 | -Note 1: Some shortcuts are defined when the last operand is known to be | ||
1685 | -a constant (e.g. addi for add, movi for mov). | ||
1686 | - | ||
1687 | -Note 2: When using TCG, the opcodes must never be generated directly | ||
1688 | -as some of them may not be available as "real" opcodes. Always use the | ||
1689 | -function tcg_gen_xxx(args). | ||
1690 | - | ||
1691 | -4) Backend | ||
1692 | - | ||
1693 | -tcg-target.h contains the target specific definitions. tcg-target.c.inc | ||
1694 | -contains the target specific code; it is #included by tcg/tcg.c, rather | ||
1695 | -than being a standalone C file. | ||
1696 | - | ||
1697 | -4.1) Assumptions | ||
1698 | - | ||
1699 | -The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or | ||
1700 | -64 bit. It is expected that the pointer has the same size as the word. | ||
1701 | - | ||
1702 | -On a 32 bit target, all 64 bit operations are converted to 32 bits. A | ||
1703 | -few specific operations must be implemented to allow it (see add2_i32, | ||
1704 | -sub2_i32, brcond2_i32). | ||
1705 | - | ||
1706 | -On a 64 bit target, the values are transferred between 32 and 64-bit | ||
1707 | -registers using the following ops: | ||
1708 | -- trunc_shr_i64_i32 | ||
1709 | -- ext_i32_i64 | ||
1710 | -- extu_i32_i64 | ||
1711 | - | ||
1712 | -They ensure that the values are correctly truncated or extended when | ||
1713 | -moved from a 32-bit to a 64-bit register or vice-versa. Note that the | ||
1714 | -trunc_shr_i64_i32 is an optional op. It is not necessary to implement | ||
1715 | -it if all the following conditions are met: | ||
1716 | -- 64-bit registers can hold 32-bit values | ||
1717 | -- 32-bit values in a 64-bit register do not need to stay zero or | ||
1718 | - sign extended | ||
1719 | -- all 32-bit TCG ops ignore the high part of 64-bit registers | ||
1720 | - | ||
1721 | -Floating point operations are not supported in this version. A | ||
1722 | -previous incarnation of the code generator had full support of them, | ||
1723 | -but it is better to concentrate on integer operations first. | ||
1724 | - | ||
1725 | -4.2) Constraints | ||
1726 | - | ||
1727 | -GCC like constraints are used to define the constraints of every | ||
1728 | -instruction. Memory constraints are not supported in this | ||
1729 | -version. Aliases are specified in the input operands as for GCC. | ||
1730 | - | ||
1731 | -The same register may be used for both an input and an output, even when | ||
1732 | -they are not explicitly aliased. If an op expands to multiple target | ||
1733 | -instructions then care must be taken to avoid clobbering input values. | ||
1734 | -GCC style "early clobber" outputs are supported, with '&'. | ||
1735 | - | ||
1736 | -A target can define specific register or constant constraints. If an | ||
1737 | -operation uses a constant input constraint which does not allow all | ||
1738 | -constants, it must also accept registers in order to have a fallback. | ||
1739 | -The constraint 'i' is defined generically to accept any constant. | ||
1740 | -The constraint 'r' is not defined generically, but is consistently | ||
1741 | -used by each backend to indicate all registers. | ||
1742 | - | ||
1743 | -The movi_i32 and movi_i64 operations must accept any constants. | ||
1744 | - | ||
1745 | -The mov_i32 and mov_i64 operations must accept any registers of the | ||
1746 | -same type. | ||
1747 | - | ||
1748 | -The ld/st/sti instructions must accept signed 32 bit constant offsets. | ||
1749 | -This can be implemented by reserving a specific register in which to | ||
1750 | -compute the address if the offset is too big. | ||
1751 | - | ||
1752 | -The ld/st instructions must accept any destination (ld) or source (st) | ||
1753 | -register. | ||
1754 | - | ||
1755 | -The sti instruction may fail if it cannot store the given constant. | ||
1756 | - | ||
1757 | -4.3) Function call assumptions | ||
1758 | - | ||
1759 | -- The only supported types for parameters and return value are: 32 and | ||
1760 | - 64 bit integers and pointer. | ||
1761 | -- The stack grows downwards. | ||
1762 | -- The first N parameters are passed in registers. | ||
1763 | -- The next parameters are passed on the stack by storing them as words. | ||
1764 | -- Some registers are clobbered during the call. | ||
1765 | -- The function can return 0 or 1 value in registers. On a 32 bit | ||
1766 | - target, functions must be able to return 2 values in registers for | ||
1767 | - 64 bit return type. | ||
1768 | - | ||
1769 | -5) Recommended coding rules for best performance | ||
1770 | - | ||
1771 | -- Use globals to represent the parts of the QEMU CPU state which are | ||
1772 | - often modified, e.g. the integer registers and the condition | ||
1773 | - codes. TCG will be able to use host registers to store them. | ||
1774 | - | ||
1775 | -- Avoid globals stored in fixed registers. They must be used only to | ||
1776 | - store the pointer to the CPU state and possibly to store a pointer | ||
1777 | - to a register window. | ||
1778 | - | ||
1779 | -- Use temporaries. Use local temporaries only when really needed, | ||
1780 | - e.g. when you need to use a value after a jump. Local temporaries | ||
1781 | - introduce a performance hit in the current TCG implementation: their | ||
1782 | - content is saved to memory at end of each basic block. | ||
1783 | - | ||
1784 | -- Free temporaries and local temporaries when they are no longer used | ||
1785 | - (tcg_temp_free). Since tcg_const_x() also creates a temporary, you | ||
1786 | - should free it after it is used. Freeing temporaries does not yield | ||
1787 | - a better generated code, but it reduces the memory usage of TCG and | ||
1788 | - the speed of the translation. | ||
1789 | - | ||
1790 | -- Don't hesitate to use helpers for complicated or seldom used guest | ||
1791 | - instructions. There is little performance advantage in using TCG to | ||
1792 | - implement guest instructions taking more than about twenty TCG | ||
1793 | - instructions. Note that this rule of thumb is more applicable to | ||
1794 | - helpers doing complex logic or arithmetic, where the C compiler has | ||
1795 | - scope to do a good job of optimisation; it is less relevant where | ||
1796 | - the instruction is mostly doing loads and stores, and in those cases | ||
1797 | - inline TCG may still be faster for longer sequences. | ||
1798 | - | ||
1799 | -- The hard limit on the number of TCG instructions you can generate | ||
1800 | - per guest instruction is set by MAX_OP_PER_INSTR in exec-all.h -- | ||
1801 | - you cannot exceed this without risking a buffer overrun. | ||
1802 | - | ||
1803 | -- Use the 'discard' instruction if you know that TCG won't be able to | ||
1804 | - prove that a given global is "dead" at a given program point. The | ||
1805 | - x86 guest uses it to improve the condition codes optimisation. | ||
1806 | -- | ||
1807 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Like CONFIG_TCG, the enabled method of execution is a host property | ||
2 | not a guest property. This exposes the define to compile-once files. | ||
1 | 3 | ||
4 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | meson.build | 4 +--- | ||
9 | 1 file changed, 1 insertion(+), 3 deletions(-) | ||
10 | |||
11 | diff --git a/meson.build b/meson.build | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/meson.build | ||
14 | +++ b/meson.build | ||
15 | @@ -XXX,XX +XXX,XX @@ if get_option('tcg').allowed() | ||
16 | endif | ||
17 | if get_option('tcg_interpreter') | ||
18 | tcg_arch = 'tci' | ||
19 | + config_host += { 'CONFIG_TCG_INTERPRETER': 'y' } | ||
20 | elif host_arch == 'x86_64' | ||
21 | tcg_arch = 'i386' | ||
22 | elif host_arch == 'ppc64' | ||
23 | @@ -XXX,XX +XXX,XX @@ foreach target : target_dirs | ||
24 | if sym == 'CONFIG_TCG' or target in accelerator_targets.get(sym, []) | ||
25 | config_target += { sym: 'y' } | ||
26 | config_all += { sym: 'y' } | ||
27 | - if sym == 'CONFIG_TCG' and tcg_arch == 'tci' | ||
28 | - config_target += { 'CONFIG_TCG_INTERPRETER': 'y' } | ||
29 | - endif | ||
30 | if target in modular_tcg | ||
31 | config_target += { 'CONFIG_TCG_MODULAR': 'y' } | ||
32 | else | ||
33 | -- | ||
34 | 2.34.1 | ||
35 | |||
36 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
1 | 2 | ||
3 | We are going to modify this code, so fix its style first to avoid: | ||
4 | |||
5 | ERROR: spaces required around that '*' (ctx:VxV) | ||
6 | #281: FILE: tcg/s390x/tcg-target.c.inc:1224: | ||
7 | + uintptr_t mask = ~(0xffffull << i*16); | ||
8 | ^ | ||
9 | |||
10 | Reviewed-by: Wilfred Mallawa <wilfred.mallawa@wdc.com> | ||
11 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
12 | Message-Id: <20221130132654.76369-2-philmd@linaro.org> | ||
13 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | --- | ||
15 | tcg/s390x/tcg-target.c.inc | 20 ++++++++++---------- | ||
16 | 1 file changed, 10 insertions(+), 10 deletions(-) | ||
17 | |||
18 | diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/tcg/s390x/tcg-target.c.inc | ||
21 | +++ b/tcg/s390x/tcg-target.c.inc | ||
22 | @@ -XXX,XX +XXX,XX @@ static bool maybe_out_small_movi(TCGContext *s, TCGType type, | ||
23 | } | ||
24 | |||
25 | for (i = 0; i < 4; i++) { | ||
26 | - tcg_target_long mask = 0xffffull << i*16; | ||
27 | + tcg_target_long mask = 0xffffull << i * 16; | ||
28 | if ((uval & mask) == uval) { | ||
29 | - tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16); | ||
30 | + tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i * 16); | ||
31 | return true; | ||
32 | } | ||
33 | } | ||
34 | @@ -XXX,XX +XXX,XX @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) | ||
35 | |||
36 | /* Try all 32-bit insns that can perform it in one go. */ | ||
37 | for (i = 0; i < 4; i++) { | ||
38 | - tcg_target_ulong mask = ~(0xffffull << i*16); | ||
39 | + tcg_target_ulong mask = ~(0xffffull << i * 16); | ||
40 | if (((val | ~valid) & mask) == mask) { | ||
41 | - tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16); | ||
42 | + tcg_out_insn_RI(s, ni_insns[i], dest, val >> i * 16); | ||
43 | return; | ||
44 | } | ||
45 | } | ||
46 | @@ -XXX,XX +XXX,XX @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) | ||
47 | /* Try all 48-bit insns that can perform it in one go. */ | ||
48 | if (HAVE_FACILITY(EXT_IMM)) { | ||
49 | for (i = 0; i < 2; i++) { | ||
50 | - tcg_target_ulong mask = ~(0xffffffffull << i*32); | ||
51 | + tcg_target_ulong mask = ~(0xffffffffull << i * 32); | ||
52 | if (((val | ~valid) & mask) == mask) { | ||
53 | - tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); | ||
54 | + tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i * 32); | ||
55 | return; | ||
56 | } | ||
57 | } | ||
58 | @@ -XXX,XX +XXX,XX @@ static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) | ||
59 | |||
60 | /* Try all 32-bit insns that can perform it in one go. */ | ||
61 | for (i = 0; i < 4; i++) { | ||
62 | - tcg_target_ulong mask = (0xffffull << i*16); | ||
63 | + tcg_target_ulong mask = (0xffffull << i * 16); | ||
64 | if ((val & mask) != 0 && (val & ~mask) == 0) { | ||
65 | - tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); | ||
66 | + tcg_out_insn_RI(s, oi_insns[i], dest, val >> i * 16); | ||
67 | return; | ||
68 | } | ||
69 | } | ||
70 | @@ -XXX,XX +XXX,XX @@ static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) | ||
71 | /* Try all 48-bit insns that can perform it in one go. */ | ||
72 | if (HAVE_FACILITY(EXT_IMM)) { | ||
73 | for (i = 0; i < 2; i++) { | ||
74 | - tcg_target_ulong mask = (0xffffffffull << i*32); | ||
75 | + tcg_target_ulong mask = (0xffffffffull << i * 32); | ||
76 | if ((val & mask) != 0 && (val & ~mask) == 0) { | ||
77 | - tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i*32); | ||
78 | + tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i * 32); | ||
79 | return; | ||
80 | } | ||
81 | } | ||
82 | -- | ||
83 | 2.34.1 | ||
84 | |||
85 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Remove whitespace at end of line, plus one place this also | ||
2 | highlights some missing braces. | ||
1 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | tcg/tcg.c | 33 +++++++++++++++++---------------- | ||
8 | tcg/ppc/tcg-target.c.inc | 2 +- | ||
9 | 2 files changed, 18 insertions(+), 17 deletions(-) | ||
10 | |||
11 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/tcg/tcg.c | ||
14 | +++ b/tcg/tcg.c | ||
15 | @@ -XXX,XX +XXX,XX @@ void *tcg_malloc_internal(TCGContext *s, int size) | ||
16 | { | ||
17 | TCGPool *p; | ||
18 | int pool_size; | ||
19 | - | ||
20 | + | ||
21 | if (size > TCG_POOL_CHUNK_SIZE) { | ||
22 | /* big malloc: insert a new pool (XXX: could optimize) */ | ||
23 | p = g_malloc(sizeof(TCGPool) + size); | ||
24 | @@ -XXX,XX +XXX,XX @@ void *tcg_malloc_internal(TCGContext *s, int size) | ||
25 | p = g_malloc(sizeof(TCGPool) + pool_size); | ||
26 | p->size = pool_size; | ||
27 | p->next = NULL; | ||
28 | - if (s->pool_current) | ||
29 | + if (s->pool_current) { | ||
30 | s->pool_current->next = p; | ||
31 | - else | ||
32 | + } else { | ||
33 | s->pool_first = p; | ||
34 | + } | ||
35 | } else { | ||
36 | p = p->next; | ||
37 | } | ||
38 | @@ -XXX,XX +XXX,XX @@ static void dump_regs(TCGContext *s) | ||
39 | |||
40 | for(i = 0; i < TCG_TARGET_NB_REGS; i++) { | ||
41 | if (s->reg_to_temp[i] != NULL) { | ||
42 | - printf("%s: %s\n", | ||
43 | - tcg_target_reg_names[i], | ||
44 | + printf("%s: %s\n", | ||
45 | + tcg_target_reg_names[i], | ||
46 | tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); | ||
47 | } | ||
48 | } | ||
49 | @@ -XXX,XX +XXX,XX @@ static void check_regs(TCGContext *s) | ||
50 | ts = s->reg_to_temp[reg]; | ||
51 | if (ts != NULL) { | ||
52 | if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { | ||
53 | - printf("Inconsistency for register %s:\n", | ||
54 | + printf("Inconsistency for register %s:\n", | ||
55 | tcg_target_reg_names[reg]); | ||
56 | goto fail; | ||
57 | } | ||
58 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
59 | nb_iargs = def->nb_iargs; | ||
60 | |||
61 | /* copy constants */ | ||
62 | - memcpy(new_args + nb_oargs + nb_iargs, | ||
63 | + memcpy(new_args + nb_oargs + nb_iargs, | ||
64 | op->args + nb_oargs + nb_iargs, | ||
65 | sizeof(TCGArg) * def->nb_cargs); | ||
66 | |||
67 | i_allocated_regs = s->reserved_regs; | ||
68 | o_allocated_regs = s->reserved_regs; | ||
69 | |||
70 | - /* satisfy input constraints */ | ||
71 | + /* satisfy input constraints */ | ||
72 | for (k = 0; k < nb_iargs; k++) { | ||
73 | TCGRegSet i_preferred_regs, o_preferred_regs; | ||
74 | |||
75 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
76 | const_args[i] = 0; | ||
77 | tcg_regset_set_reg(i_allocated_regs, reg); | ||
78 | } | ||
79 | - | ||
80 | + | ||
81 | /* mark dead temporaries and free the associated registers */ | ||
82 | for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { | ||
83 | if (IS_DEAD_ARG(i)) { | ||
84 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
85 | tcg_reg_alloc_bb_end(s, i_allocated_regs); | ||
86 | } else { | ||
87 | if (def->flags & TCG_OPF_CALL_CLOBBER) { | ||
88 | - /* XXX: permit generic clobber register list ? */ | ||
89 | + /* XXX: permit generic clobber register list ? */ | ||
90 | for (i = 0; i < TCG_TARGET_NB_REGS; i++) { | ||
91 | if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { | ||
92 | tcg_reg_free(s, i, i_allocated_regs); | ||
93 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
94 | an exception. */ | ||
95 | sync_globals(s, i_allocated_regs); | ||
96 | } | ||
97 | - | ||
98 | + | ||
99 | /* satisfy the output constraints */ | ||
100 | for(k = 0; k < nb_oargs; k++) { | ||
101 | i = def->args_ct[k].sort_index; | ||
102 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
103 | |||
104 | /* assign stack slots first */ | ||
105 | call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); | ||
106 | - call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & | ||
107 | + call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & | ||
108 | ~(TCG_TARGET_STACK_ALIGN - 1); | ||
109 | allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); | ||
110 | if (allocate_args) { | ||
111 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
112 | stack_offset += sizeof(tcg_target_long); | ||
113 | #endif | ||
114 | } | ||
115 | - | ||
116 | + | ||
117 | /* assign input registers */ | ||
118 | allocated_regs = s->reserved_regs; | ||
119 | for (i = 0; i < nb_regs; i++) { | ||
120 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
121 | tcg_regset_set_reg(allocated_regs, reg); | ||
122 | } | ||
123 | } | ||
124 | - | ||
125 | + | ||
126 | /* mark dead temporaries and free the associated registers */ | ||
127 | for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { | ||
128 | if (IS_DEAD_ARG(i)) { | ||
129 | temp_dead(s, arg_temp(op->args[i])); | ||
130 | } | ||
131 | } | ||
132 | - | ||
133 | + | ||
134 | /* clobber call registers */ | ||
135 | for (i = 0; i < TCG_TARGET_NB_REGS; i++) { | ||
136 | if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { | ||
137 | @@ -XXX,XX +XXX,XX @@ void tcg_dump_info(GString *buf) | ||
138 | (double)s->code_out_len / tb_div_count); | ||
139 | g_string_append_printf(buf, "avg search data/TB %0.1f\n", | ||
140 | (double)s->search_out_len / tb_div_count); | ||
141 | - | ||
142 | + | ||
143 | g_string_append_printf(buf, "cycles/op %0.1f\n", | ||
144 | s->op_count ? (double)tot / s->op_count : 0); | ||
145 | g_string_append_printf(buf, "cycles/in byte %0.1f\n", | ||
146 | diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc | ||
147 | index XXXXXXX..XXXXXXX 100644 | ||
148 | --- a/tcg/ppc/tcg-target.c.inc | ||
149 | +++ b/tcg/ppc/tcg-target.c.inc | ||
150 | @@ -XXX,XX +XXX,XX @@ | ||
151 | # else | ||
152 | # error "Unknown ABI" | ||
153 | # endif | ||
154 | -#endif | ||
155 | +#endif | ||
156 | |||
157 | #ifdef _CALL_SYSV | ||
158 | # define TCG_TARGET_CALL_ALIGN_ARGS 1 | ||
159 | -- | ||
160 | 2.34.1 | ||
161 | |||
162 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Create a wrapper for locking/unlocking the iothread lock. | ||
1 | 2 | ||
3 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | include/qemu/main-loop.h | 29 +++++++++++++++++++++++++++++ | ||
7 | 1 file changed, 29 insertions(+) | ||
8 | |||
9 | diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/include/qemu/main-loop.h | ||
12 | +++ b/include/qemu/main-loop.h | ||
13 | @@ -XXX,XX +XXX,XX @@ void qemu_mutex_lock_iothread_impl(const char *file, int line); | ||
14 | */ | ||
15 | void qemu_mutex_unlock_iothread(void); | ||
16 | |||
17 | +/** | ||
18 | + * QEMU_IOTHREAD_LOCK_GUARD | ||
19 | + * | ||
20 | + * Wrap a block of code in a conditional qemu_mutex_{lock,unlock}_iothread. | ||
21 | + */ | ||
22 | +typedef struct IOThreadLockAuto IOThreadLockAuto; | ||
23 | + | ||
24 | +static inline IOThreadLockAuto *qemu_iothread_auto_lock(const char *file, | ||
25 | + int line) | ||
26 | +{ | ||
27 | + if (qemu_mutex_iothread_locked()) { | ||
28 | + return NULL; | ||
29 | + } | ||
30 | + qemu_mutex_lock_iothread_impl(file, line); | ||
31 | + /* Anything non-NULL causes the cleanup function to be called */ | ||
32 | + return (IOThreadLockAuto *)(uintptr_t)1; | ||
33 | +} | ||
34 | + | ||
35 | +static inline void qemu_iothread_auto_unlock(IOThreadLockAuto *l) | ||
36 | +{ | ||
37 | + qemu_mutex_unlock_iothread(); | ||
38 | +} | ||
39 | + | ||
40 | +G_DEFINE_AUTOPTR_CLEANUP_FUNC(IOThreadLockAuto, qemu_iothread_auto_unlock) | ||
41 | + | ||
42 | +#define QEMU_IOTHREAD_LOCK_GUARD() \ | ||
43 | + g_autoptr(IOThreadLockAuto) _iothread_lock_auto __attribute__((unused)) \ | ||
44 | + = qemu_iothread_auto_lock(__FILE__, __LINE__) | ||
45 | + | ||
46 | /* | ||
47 | * qemu_cond_wait_iothread: Wait on condition for the main loop mutex | ||
48 | * | ||
49 | -- | ||
50 | 2.34.1 | ||
51 | |||
52 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | --- | ||
4 | hw/mips/mips_int.c | 11 +---------- | ||
5 | 1 file changed, 1 insertion(+), 10 deletions(-) | ||
1 | 6 | ||
7 | diff --git a/hw/mips/mips_int.c b/hw/mips/mips_int.c | ||
8 | index XXXXXXX..XXXXXXX 100644 | ||
9 | --- a/hw/mips/mips_int.c | ||
10 | +++ b/hw/mips/mips_int.c | ||
11 | @@ -XXX,XX +XXX,XX @@ static void cpu_mips_irq_request(void *opaque, int irq, int level) | ||
12 | MIPSCPU *cpu = opaque; | ||
13 | CPUMIPSState *env = &cpu->env; | ||
14 | CPUState *cs = CPU(cpu); | ||
15 | - bool locked = false; | ||
16 | |||
17 | if (irq < 0 || irq > 7) { | ||
18 | return; | ||
19 | } | ||
20 | |||
21 | - /* Make sure locking works even if BQL is already held by the caller */ | ||
22 | - if (!qemu_mutex_iothread_locked()) { | ||
23 | - locked = true; | ||
24 | - qemu_mutex_lock_iothread(); | ||
25 | - } | ||
26 | + QEMU_IOTHREAD_LOCK_GUARD(); | ||
27 | |||
28 | if (level) { | ||
29 | env->CP0_Cause |= 1 << (irq + CP0Ca_IP); | ||
30 | @@ -XXX,XX +XXX,XX @@ static void cpu_mips_irq_request(void *opaque, int irq, int level) | ||
31 | } else { | ||
32 | cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); | ||
33 | } | ||
34 | - | ||
35 | - if (locked) { | ||
36 | - qemu_mutex_unlock_iothread(); | ||
37 | - } | ||
38 | } | ||
39 | |||
40 | void cpu_mips_irq_init_cpu(MIPSCPU *cpu) | ||
41 | -- | ||
42 | 2.34.1 | ||
43 | |||
44 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
2 | Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com> | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | --- | ||
5 | target/ppc/excp_helper.c | 11 +---------- | ||
6 | 1 file changed, 1 insertion(+), 10 deletions(-) | ||
1 | 7 | ||
8 | diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c | ||
9 | index XXXXXXX..XXXXXXX 100644 | ||
10 | --- a/target/ppc/excp_helper.c | ||
11 | +++ b/target/ppc/excp_helper.c | ||
12 | @@ -XXX,XX +XXX,XX @@ static int ppc_next_unmasked_interrupt(CPUPPCState *env) | ||
13 | void ppc_maybe_interrupt(CPUPPCState *env) | ||
14 | { | ||
15 | CPUState *cs = env_cpu(env); | ||
16 | - bool locked = false; | ||
17 | - | ||
18 | - if (!qemu_mutex_iothread_locked()) { | ||
19 | - locked = true; | ||
20 | - qemu_mutex_lock_iothread(); | ||
21 | - } | ||
22 | + QEMU_IOTHREAD_LOCK_GUARD(); | ||
23 | |||
24 | if (ppc_next_unmasked_interrupt(env)) { | ||
25 | cpu_interrupt(cs, CPU_INTERRUPT_HARD); | ||
26 | } else { | ||
27 | cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); | ||
28 | } | ||
29 | - | ||
30 | - if (locked) { | ||
31 | - qemu_mutex_unlock_iothread(); | ||
32 | - } | ||
33 | } | ||
34 | |||
35 | #if defined(TARGET_PPC64) | ||
36 | -- | ||
37 | 2.34.1 | ||
38 | |||
39 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | In addition, use tcg_enabled instead of !kvm_enabled. | ||
1 | 2 | ||
3 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
4 | Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/ppc/helper_regs.c | 14 ++++---------- | ||
8 | 1 file changed, 4 insertions(+), 10 deletions(-) | ||
9 | |||
10 | diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/target/ppc/helper_regs.c | ||
13 | +++ b/target/ppc/helper_regs.c | ||
14 | @@ -XXX,XX +XXX,XX @@ | ||
15 | #include "qemu/main-loop.h" | ||
16 | #include "exec/exec-all.h" | ||
17 | #include "sysemu/kvm.h" | ||
18 | +#include "sysemu/tcg.h" | ||
19 | #include "helper_regs.h" | ||
20 | #include "power8-pmu.h" | ||
21 | #include "cpu-models.h" | ||
22 | @@ -XXX,XX +XXX,XX @@ void cpu_interrupt_exittb(CPUState *cs) | ||
23 | { | ||
24 | /* | ||
25 | * We don't need to worry about translation blocks | ||
26 | - * when running with KVM. | ||
27 | + * unless running with TCG. | ||
28 | */ | ||
29 | - if (kvm_enabled()) { | ||
30 | - return; | ||
31 | - } | ||
32 | - | ||
33 | - if (!qemu_mutex_iothread_locked()) { | ||
34 | - qemu_mutex_lock_iothread(); | ||
35 | - cpu_interrupt(cs, CPU_INTERRUPT_EXITTB); | ||
36 | - qemu_mutex_unlock_iothread(); | ||
37 | - } else { | ||
38 | + if (tcg_enabled()) { | ||
39 | + QEMU_IOTHREAD_LOCK_GUARD(); | ||
40 | cpu_interrupt(cs, CPU_INTERRUPT_EXITTB); | ||
41 | } | ||
42 | } | ||
43 | -- | ||
44 | 2.34.1 | ||
45 | |||
46 | diff view generated by jsdifflib |
1 | Reviewed-by: Emilio G. Cota <cota@braap.org> | 1 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
3 | --- | 4 | --- |
4 | target/ppc/helper.h | 2 +- | 5 | target/riscv/cpu_helper.c | 10 +--------- |
5 | target/ppc/mem_helper.c | 33 ++++++++++-- | 6 | 1 file changed, 1 insertion(+), 9 deletions(-) |
6 | target/ppc/translate.c | 115 +++++++++++++++++++++------------------- | ||
7 | 3 files changed, 88 insertions(+), 62 deletions(-) | ||
8 | 7 | ||
9 | diff --git a/target/ppc/helper.h b/target/ppc/helper.h | 8 | diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c |
10 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/target/ppc/helper.h | 10 | --- a/target/riscv/cpu_helper.c |
12 | +++ b/target/ppc/helper.h | 11 | +++ b/target/riscv/cpu_helper.c |
13 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32) | 12 | @@ -XXX,XX +XXX,XX @@ uint64_t riscv_cpu_update_mip(RISCVCPU *cpu, uint64_t mask, uint64_t value) |
14 | DEF_HELPER_1(tbegin, void, env) | 13 | CPURISCVState *env = &cpu->env; |
15 | DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env) | 14 | CPUState *cs = CPU(cpu); |
16 | 15 | uint64_t gein, vsgein = 0, vstip = 0, old = env->mip; | |
17 | -#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128) | 16 | - bool locked = false; |
18 | +#ifdef TARGET_PPC64 | 17 | |
19 | DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32) | 18 | if (riscv_cpu_virt_enabled(env)) { |
20 | DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32) | 19 | gein = get_field(env->hstatus, HSTATUS_VGEIN); |
21 | DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG, | 20 | @@ -XXX,XX +XXX,XX @@ uint64_t riscv_cpu_update_mip(RISCVCPU *cpu, uint64_t mask, uint64_t value) |
22 | diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c | 21 | mask = ((mask == MIP_VSTIP) && env->vstime_irq) ? 0 : mask; |
23 | index XXXXXXX..XXXXXXX 100644 | 22 | vstip = env->vstime_irq ? MIP_VSTIP : 0; |
24 | --- a/target/ppc/mem_helper.c | 23 | |
25 | +++ b/target/ppc/mem_helper.c | 24 | - if (!qemu_mutex_iothread_locked()) { |
26 | @@ -XXX,XX +XXX,XX @@ | 25 | - locked = true; |
27 | #include "exec/cpu_ldst.h" | 26 | - qemu_mutex_lock_iothread(); |
28 | #include "tcg.h" | 27 | - } |
29 | #include "internal.h" | 28 | + QEMU_IOTHREAD_LOCK_GUARD(); |
30 | +#include "qemu/atomic128.h" | 29 | |
31 | 30 | env->mip = (env->mip & ~mask) | (value & mask); | |
32 | //#define DEBUG_OP | 31 | |
33 | 32 | @@ -XXX,XX +XXX,XX @@ uint64_t riscv_cpu_update_mip(RISCVCPU *cpu, uint64_t mask, uint64_t value) | |
34 | @@ -XXX,XX +XXX,XX @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg, | 33 | cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); |
35 | return i; | 34 | } |
35 | |||
36 | - if (locked) { | ||
37 | - qemu_mutex_unlock_iothread(); | ||
38 | - } | ||
39 | - | ||
40 | return old; | ||
36 | } | 41 | } |
37 | 42 | ||
38 | -#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128) | ||
39 | +#ifdef TARGET_PPC64 | ||
40 | uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr, | ||
41 | uint32_t opidx) | ||
42 | { | ||
43 | - Int128 ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC()); | ||
44 | + Int128 ret; | ||
45 | + | ||
46 | + /* We will have raised EXCP_ATOMIC from the translator. */ | ||
47 | + assert(HAVE_ATOMIC128); | ||
48 | + ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC()); | ||
49 | env->retxh = int128_gethi(ret); | ||
50 | return int128_getlo(ret); | ||
51 | } | ||
52 | @@ -XXX,XX +XXX,XX @@ uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr, | ||
53 | uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr, | ||
54 | uint32_t opidx) | ||
55 | { | ||
56 | - Int128 ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC()); | ||
57 | + Int128 ret; | ||
58 | + | ||
59 | + /* We will have raised EXCP_ATOMIC from the translator. */ | ||
60 | + assert(HAVE_ATOMIC128); | ||
61 | + ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC()); | ||
62 | env->retxh = int128_gethi(ret); | ||
63 | return int128_getlo(ret); | ||
64 | } | ||
65 | @@ -XXX,XX +XXX,XX @@ uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr, | ||
66 | void helper_stq_le_parallel(CPUPPCState *env, target_ulong addr, | ||
67 | uint64_t lo, uint64_t hi, uint32_t opidx) | ||
68 | { | ||
69 | - Int128 val = int128_make128(lo, hi); | ||
70 | + Int128 val; | ||
71 | + | ||
72 | + /* We will have raised EXCP_ATOMIC from the translator. */ | ||
73 | + assert(HAVE_ATOMIC128); | ||
74 | + val = int128_make128(lo, hi); | ||
75 | helper_atomic_sto_le_mmu(env, addr, val, opidx, GETPC()); | ||
76 | } | ||
77 | |||
78 | void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr, | ||
79 | uint64_t lo, uint64_t hi, uint32_t opidx) | ||
80 | { | ||
81 | - Int128 val = int128_make128(lo, hi); | ||
82 | + Int128 val; | ||
83 | + | ||
84 | + /* We will have raised EXCP_ATOMIC from the translator. */ | ||
85 | + assert(HAVE_ATOMIC128); | ||
86 | + val = int128_make128(lo, hi); | ||
87 | helper_atomic_sto_be_mmu(env, addr, val, opidx, GETPC()); | ||
88 | } | ||
89 | |||
90 | @@ -XXX,XX +XXX,XX @@ uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr, | ||
91 | { | ||
92 | bool success = false; | ||
93 | |||
94 | + /* We will have raised EXCP_ATOMIC from the translator. */ | ||
95 | + assert(HAVE_CMPXCHG128); | ||
96 | + | ||
97 | if (likely(addr == env->reserve_addr)) { | ||
98 | Int128 oldv, cmpv, newv; | ||
99 | |||
100 | @@ -XXX,XX +XXX,XX @@ uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr, | ||
101 | { | ||
102 | bool success = false; | ||
103 | |||
104 | + /* We will have raised EXCP_ATOMIC from the translator. */ | ||
105 | + assert(HAVE_CMPXCHG128); | ||
106 | + | ||
107 | if (likely(addr == env->reserve_addr)) { | ||
108 | Int128 oldv, cmpv, newv; | ||
109 | |||
110 | diff --git a/target/ppc/translate.c b/target/ppc/translate.c | ||
111 | index XXXXXXX..XXXXXXX 100644 | ||
112 | --- a/target/ppc/translate.c | ||
113 | +++ b/target/ppc/translate.c | ||
114 | @@ -XXX,XX +XXX,XX @@ | ||
115 | #include "trace-tcg.h" | ||
116 | #include "exec/translator.h" | ||
117 | #include "exec/log.h" | ||
118 | +#include "qemu/atomic128.h" | ||
119 | |||
120 | |||
121 | #define CPU_SINGLE_STEP 0x1 | ||
122 | @@ -XXX,XX +XXX,XX @@ static void gen_lq(DisasContext *ctx) | ||
123 | hi = cpu_gpr[rd]; | ||
124 | |||
125 | if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { | ||
126 | -#ifdef CONFIG_ATOMIC128 | ||
127 | - TCGv_i32 oi = tcg_temp_new_i32(); | ||
128 | - if (ctx->le_mode) { | ||
129 | - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); | ||
130 | - gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); | ||
131 | + if (HAVE_ATOMIC128) { | ||
132 | + TCGv_i32 oi = tcg_temp_new_i32(); | ||
133 | + if (ctx->le_mode) { | ||
134 | + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); | ||
135 | + gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); | ||
136 | + } else { | ||
137 | + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); | ||
138 | + gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); | ||
139 | + } | ||
140 | + tcg_temp_free_i32(oi); | ||
141 | + tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); | ||
142 | } else { | ||
143 | - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); | ||
144 | - gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); | ||
145 | + /* Restart with exclusive lock. */ | ||
146 | + gen_helper_exit_atomic(cpu_env); | ||
147 | + ctx->base.is_jmp = DISAS_NORETURN; | ||
148 | } | ||
149 | - tcg_temp_free_i32(oi); | ||
150 | - tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); | ||
151 | -#else | ||
152 | - /* Restart with exclusive lock. */ | ||
153 | - gen_helper_exit_atomic(cpu_env); | ||
154 | - ctx->base.is_jmp = DISAS_NORETURN; | ||
155 | -#endif | ||
156 | } else if (ctx->le_mode) { | ||
157 | tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ); | ||
158 | gen_addr_add(ctx, EA, EA, 8); | ||
159 | @@ -XXX,XX +XXX,XX @@ static void gen_std(DisasContext *ctx) | ||
160 | hi = cpu_gpr[rs]; | ||
161 | |||
162 | if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { | ||
163 | -#ifdef CONFIG_ATOMIC128 | ||
164 | - TCGv_i32 oi = tcg_temp_new_i32(); | ||
165 | - if (ctx->le_mode) { | ||
166 | - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); | ||
167 | - gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi); | ||
168 | + if (HAVE_ATOMIC128) { | ||
169 | + TCGv_i32 oi = tcg_temp_new_i32(); | ||
170 | + if (ctx->le_mode) { | ||
171 | + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); | ||
172 | + gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi); | ||
173 | + } else { | ||
174 | + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); | ||
175 | + gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi); | ||
176 | + } | ||
177 | + tcg_temp_free_i32(oi); | ||
178 | } else { | ||
179 | - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); | ||
180 | - gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi); | ||
181 | + /* Restart with exclusive lock. */ | ||
182 | + gen_helper_exit_atomic(cpu_env); | ||
183 | + ctx->base.is_jmp = DISAS_NORETURN; | ||
184 | } | ||
185 | - tcg_temp_free_i32(oi); | ||
186 | -#else | ||
187 | - /* Restart with exclusive lock. */ | ||
188 | - gen_helper_exit_atomic(cpu_env); | ||
189 | - ctx->base.is_jmp = DISAS_NORETURN; | ||
190 | -#endif | ||
191 | } else if (ctx->le_mode) { | ||
192 | tcg_gen_qemu_st_i64(lo, EA, ctx->mem_idx, MO_LEQ); | ||
193 | gen_addr_add(ctx, EA, EA, 8); | ||
194 | @@ -XXX,XX +XXX,XX @@ static void gen_lqarx(DisasContext *ctx) | ||
195 | hi = cpu_gpr[rd]; | ||
196 | |||
197 | if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { | ||
198 | -#ifdef CONFIG_ATOMIC128 | ||
199 | - TCGv_i32 oi = tcg_temp_new_i32(); | ||
200 | - if (ctx->le_mode) { | ||
201 | - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16, | ||
202 | - ctx->mem_idx)); | ||
203 | - gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); | ||
204 | + if (HAVE_ATOMIC128) { | ||
205 | + TCGv_i32 oi = tcg_temp_new_i32(); | ||
206 | + if (ctx->le_mode) { | ||
207 | + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16, | ||
208 | + ctx->mem_idx)); | ||
209 | + gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); | ||
210 | + } else { | ||
211 | + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16, | ||
212 | + ctx->mem_idx)); | ||
213 | + gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); | ||
214 | + } | ||
215 | + tcg_temp_free_i32(oi); | ||
216 | + tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); | ||
217 | } else { | ||
218 | - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16, | ||
219 | - ctx->mem_idx)); | ||
220 | - gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); | ||
221 | + /* Restart with exclusive lock. */ | ||
222 | + gen_helper_exit_atomic(cpu_env); | ||
223 | + ctx->base.is_jmp = DISAS_NORETURN; | ||
224 | + tcg_temp_free(EA); | ||
225 | + return; | ||
226 | } | ||
227 | - tcg_temp_free_i32(oi); | ||
228 | - tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); | ||
229 | -#else | ||
230 | - /* Restart with exclusive lock. */ | ||
231 | - gen_helper_exit_atomic(cpu_env); | ||
232 | - ctx->base.is_jmp = DISAS_NORETURN; | ||
233 | - tcg_temp_free(EA); | ||
234 | - return; | ||
235 | -#endif | ||
236 | } else if (ctx->le_mode) { | ||
237 | tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ | MO_ALIGN_16); | ||
238 | tcg_gen_mov_tl(cpu_reserve, EA); | ||
239 | @@ -XXX,XX +XXX,XX @@ static void gen_stqcx_(DisasContext *ctx) | ||
240 | hi = cpu_gpr[rs]; | ||
241 | |||
242 | if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { | ||
243 | - TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16); | ||
244 | -#ifdef CONFIG_ATOMIC128 | ||
245 | - if (ctx->le_mode) { | ||
246 | - gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi); | ||
247 | + if (HAVE_CMPXCHG128) { | ||
248 | + TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16); | ||
249 | + if (ctx->le_mode) { | ||
250 | + gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, | ||
251 | + EA, lo, hi, oi); | ||
252 | + } else { | ||
253 | + gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env, | ||
254 | + EA, lo, hi, oi); | ||
255 | + } | ||
256 | + tcg_temp_free_i32(oi); | ||
257 | } else { | ||
258 | - gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi); | ||
259 | + /* Restart with exclusive lock. */ | ||
260 | + gen_helper_exit_atomic(cpu_env); | ||
261 | + ctx->base.is_jmp = DISAS_NORETURN; | ||
262 | } | ||
263 | -#else | ||
264 | - /* Restart with exclusive lock. */ | ||
265 | - gen_helper_exit_atomic(cpu_env); | ||
266 | - ctx->base.is_jmp = DISAS_NORETURN; | ||
267 | -#endif | ||
268 | tcg_temp_free(EA); | ||
269 | - tcg_temp_free_i32(oi); | ||
270 | } else { | ||
271 | TCGLabel *lab_fail = gen_new_label(); | ||
272 | TCGLabel *lab_over = gen_new_label(); | ||
273 | -- | 43 | -- |
274 | 2.17.2 | 44 | 2.34.1 |
275 | 45 | ||
276 | 46 | diff view generated by jsdifflib |
1 | From: "Emilio G. Cota" <cota@braap.org> | 1 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com> | |
3 | Consistently access u16.high with atomics to avoid | ||
4 | undefined behaviour in MTTCG. | ||
5 | |||
6 | Note that icount_decr.u16.low is only used in icount mode, | ||
7 | so regular accesses to it are OK. | ||
8 | |||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
11 | Message-Id: <20181010144853.13005-2-cota@braap.org> | ||
12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
13 | --- | 4 | --- |
14 | accel/tcg/tcg-all.c | 2 +- | 5 | hw/ppc/ppc.c | 10 +--------- |
15 | accel/tcg/translate-all.c | 2 +- | 6 | 1 file changed, 1 insertion(+), 9 deletions(-) |
16 | qom/cpu.c | 2 +- | ||
17 | 3 files changed, 3 insertions(+), 3 deletions(-) | ||
18 | 7 | ||
19 | diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c | 8 | diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c |
20 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/accel/tcg/tcg-all.c | 10 | --- a/hw/ppc/ppc.c |
22 | +++ b/accel/tcg/tcg-all.c | 11 | +++ b/hw/ppc/ppc.c |
23 | @@ -XXX,XX +XXX,XX @@ static void tcg_handle_interrupt(CPUState *cpu, int mask) | 12 | @@ -XXX,XX +XXX,XX @@ void ppc_set_irq(PowerPCCPU *cpu, int irq, int level) |
24 | if (!qemu_cpu_is_self(cpu)) { | ||
25 | qemu_cpu_kick(cpu); | ||
26 | } else { | ||
27 | - cpu->icount_decr.u16.high = -1; | ||
28 | + atomic_set(&cpu->icount_decr.u16.high, -1); | ||
29 | if (use_icount && | ||
30 | !cpu->can_do_io | ||
31 | && (mask & ~old_mask) != 0) { | ||
32 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/accel/tcg/translate-all.c | ||
35 | +++ b/accel/tcg/translate-all.c | ||
36 | @@ -XXX,XX +XXX,XX @@ void cpu_interrupt(CPUState *cpu, int mask) | ||
37 | { | 13 | { |
38 | g_assert(qemu_mutex_iothread_locked()); | 14 | CPUPPCState *env = &cpu->env; |
39 | cpu->interrupt_request |= mask; | 15 | unsigned int old_pending; |
40 | - cpu->icount_decr.u16.high = -1; | 16 | - bool locked = false; |
41 | + atomic_set(&cpu->icount_decr.u16.high, -1); | 17 | |
18 | /* We may already have the BQL if coming from the reset path */ | ||
19 | - if (!qemu_mutex_iothread_locked()) { | ||
20 | - locked = true; | ||
21 | - qemu_mutex_lock_iothread(); | ||
22 | - } | ||
23 | + QEMU_IOTHREAD_LOCK_GUARD(); | ||
24 | |||
25 | old_pending = env->pending_interrupts; | ||
26 | |||
27 | @@ -XXX,XX +XXX,XX @@ void ppc_set_irq(PowerPCCPU *cpu, int irq, int level) | ||
28 | |||
29 | trace_ppc_irq_set_exit(env, irq, level, env->pending_interrupts, | ||
30 | CPU(cpu)->interrupt_request); | ||
31 | - | ||
32 | - if (locked) { | ||
33 | - qemu_mutex_unlock_iothread(); | ||
34 | - } | ||
42 | } | 35 | } |
43 | 36 | ||
44 | /* | 37 | /* PowerPC 6xx / 7xx internal IRQ controller */ |
45 | diff --git a/qom/cpu.c b/qom/cpu.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/qom/cpu.c | ||
48 | +++ b/qom/cpu.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static void cpu_common_reset(CPUState *cpu) | ||
50 | cpu->mem_io_pc = 0; | ||
51 | cpu->mem_io_vaddr = 0; | ||
52 | cpu->icount_extra = 0; | ||
53 | - cpu->icount_decr.u32 = 0; | ||
54 | + atomic_set(&cpu->icount_decr.u32, 0); | ||
55 | cpu->can_do_io = 1; | ||
56 | cpu->exception_index = -1; | ||
57 | cpu->crash_occurred = false; | ||
58 | -- | 38 | -- |
59 | 2.17.2 | 39 | 2.34.1 |
60 | 40 | ||
61 | 41 | diff view generated by jsdifflib |
1 | From: "Emilio G. Cota" <cota@braap.org> | 1 | Narrow the scope of the lock to the actual read/write, |
---|---|---|---|
2 | moving the cpu_transation_failed call outside the lock. | ||
2 | 3 | ||
3 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
5 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
6 | Message-Id: <20181009174557.16125-5-cota@braap.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 6 | --- |
9 | accel/tcg/cputlb.c | 4 ++-- | 7 | accel/tcg/cputlb.c | 25 ++++++++----------------- |
10 | 1 file changed, 2 insertions(+), 2 deletions(-) | 8 | 1 file changed, 8 insertions(+), 17 deletions(-) |
11 | 9 | ||
12 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | 10 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c |
13 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/accel/tcg/cputlb.c | 12 | --- a/accel/tcg/cputlb.c |
15 | +++ b/accel/tcg/cputlb.c | 13 | +++ b/accel/tcg/cputlb.c |
16 | @@ -XXX,XX +XXX,XX @@ | 14 | @@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, |
17 | } \ | 15 | MemoryRegionSection *section; |
18 | } while (0) | 16 | MemoryRegion *mr; |
19 | 17 | uint64_t val; | |
20 | -#define assert_cpu_is_self(this_cpu) do { \ | 18 | - bool locked = false; |
21 | +#define assert_cpu_is_self(cpu) do { \ | 19 | MemTxResult r; |
22 | if (DEBUG_TLB_GATE) { \ | 20 | |
23 | - g_assert(!cpu->created || qemu_cpu_is_self(cpu)); \ | 21 | section = iotlb_to_section(cpu, full->xlat_section, full->attrs); |
24 | + g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ | 22 | @@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, |
25 | } \ | 23 | cpu_io_recompile(cpu, retaddr); |
26 | } while (0) | 24 | } |
27 | 25 | ||
26 | - if (!qemu_mutex_iothread_locked()) { | ||
27 | - qemu_mutex_lock_iothread(); | ||
28 | - locked = true; | ||
29 | + { | ||
30 | + QEMU_IOTHREAD_LOCK_GUARD(); | ||
31 | + r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs); | ||
32 | } | ||
33 | - r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs); | ||
34 | + | ||
35 | if (r != MEMTX_OK) { | ||
36 | hwaddr physaddr = mr_offset + | ||
37 | section->offset_within_address_space - | ||
38 | @@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, | ||
39 | cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, | ||
40 | mmu_idx, full->attrs, r, retaddr); | ||
41 | } | ||
42 | - if (locked) { | ||
43 | - qemu_mutex_unlock_iothread(); | ||
44 | - } | ||
45 | - | ||
46 | return val; | ||
47 | } | ||
48 | |||
49 | @@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, | ||
50 | hwaddr mr_offset; | ||
51 | MemoryRegionSection *section; | ||
52 | MemoryRegion *mr; | ||
53 | - bool locked = false; | ||
54 | MemTxResult r; | ||
55 | |||
56 | section = iotlb_to_section(cpu, full->xlat_section, full->attrs); | ||
57 | @@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, | ||
58 | */ | ||
59 | save_iotlb_data(cpu, section, mr_offset); | ||
60 | |||
61 | - if (!qemu_mutex_iothread_locked()) { | ||
62 | - qemu_mutex_lock_iothread(); | ||
63 | - locked = true; | ||
64 | + { | ||
65 | + QEMU_IOTHREAD_LOCK_GUARD(); | ||
66 | + r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs); | ||
67 | } | ||
68 | - r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs); | ||
69 | + | ||
70 | if (r != MEMTX_OK) { | ||
71 | hwaddr physaddr = mr_offset + | ||
72 | section->offset_within_address_space - | ||
73 | @@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, | ||
74 | MMU_DATA_STORE, mmu_idx, full->attrs, r, | ||
75 | retaddr); | ||
76 | } | ||
77 | - if (locked) { | ||
78 | - qemu_mutex_unlock_iothread(); | ||
79 | - } | ||
80 | } | ||
81 | |||
82 | static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) | ||
28 | -- | 83 | -- |
29 | 2.17.2 | 84 | 2.34.1 |
30 | 85 | ||
31 | 86 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Replace goto allocate_in_reg with a boolean. | ||
2 | Remove o_preferred_regs which isn't used, except to copy. | ||
1 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | tcg/tcg.c | 45 +++++++++++++++++++++------------------------ | ||
8 | 1 file changed, 21 insertions(+), 24 deletions(-) | ||
9 | |||
10 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/tcg.c | ||
13 | +++ b/tcg/tcg.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
15 | |||
16 | /* satisfy input constraints */ | ||
17 | for (k = 0; k < nb_iargs; k++) { | ||
18 | - TCGRegSet i_preferred_regs, o_preferred_regs; | ||
19 | + TCGRegSet i_preferred_regs; | ||
20 | + bool allocate_new_reg; | ||
21 | |||
22 | i = def->args_ct[nb_oargs + k].sort_index; | ||
23 | arg = op->args[i]; | ||
24 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
25 | continue; | ||
26 | } | ||
27 | |||
28 | - i_preferred_regs = o_preferred_regs = 0; | ||
29 | + reg = ts->reg; | ||
30 | + i_preferred_regs = 0; | ||
31 | + allocate_new_reg = false; | ||
32 | + | ||
33 | if (arg_ct->ialias) { | ||
34 | - o_preferred_regs = op->output_pref[arg_ct->alias_index]; | ||
35 | + i_preferred_regs = op->output_pref[arg_ct->alias_index]; | ||
36 | |||
37 | /* | ||
38 | * If the input is readonly, then it cannot also be an | ||
39 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
40 | * register and move it. | ||
41 | */ | ||
42 | if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { | ||
43 | - goto allocate_in_reg; | ||
44 | + allocate_new_reg = true; | ||
45 | + } else if (ts->val_type == TEMP_VAL_REG) { | ||
46 | + /* | ||
47 | + * Check if the current register has already been | ||
48 | + * allocated for another input. | ||
49 | + */ | ||
50 | + allocate_new_reg = tcg_regset_test_reg(i_allocated_regs, reg); | ||
51 | } | ||
52 | - | ||
53 | - /* | ||
54 | - * Check if the current register has already been allocated | ||
55 | - * for another input aliased to an output. | ||
56 | - */ | ||
57 | - if (ts->val_type == TEMP_VAL_REG) { | ||
58 | - reg = ts->reg; | ||
59 | - for (int k2 = 0; k2 < k; k2++) { | ||
60 | - int i2 = def->args_ct[nb_oargs + k2].sort_index; | ||
61 | - if (def->args_ct[i2].ialias && reg == new_args[i2]) { | ||
62 | - goto allocate_in_reg; | ||
63 | - } | ||
64 | - } | ||
65 | - } | ||
66 | - i_preferred_regs = o_preferred_regs; | ||
67 | } | ||
68 | |||
69 | - temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); | ||
70 | - reg = ts->reg; | ||
71 | + if (!allocate_new_reg) { | ||
72 | + temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); | ||
73 | + reg = ts->reg; | ||
74 | + allocate_new_reg = !tcg_regset_test_reg(arg_ct->regs, reg); | ||
75 | + } | ||
76 | |||
77 | - if (!tcg_regset_test_reg(arg_ct->regs, reg)) { | ||
78 | - allocate_in_reg: | ||
79 | + if (allocate_new_reg) { | ||
80 | /* | ||
81 | * Allocate a new register matching the constraint | ||
82 | * and move the temporary register into it. | ||
83 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
84 | temp_load(s, ts, tcg_target_available_regs[ts->type], | ||
85 | i_allocated_regs, 0); | ||
86 | reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs, | ||
87 | - o_preferred_regs, ts->indirect_base); | ||
88 | + i_preferred_regs, ts->indirect_base); | ||
89 | if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { | ||
90 | /* | ||
91 | * Cross register class move not supported. Sync the | ||
92 | -- | ||
93 | 2.34.1 | ||
94 | |||
95 | diff view generated by jsdifflib |
1 | Reviewed-by: Emilio G. Cota <cota@braap.org> | 1 | The hppa host code has been removed since 2013; this |
---|---|---|---|
2 | should have been deleted at the same time. | ||
3 | |||
4 | Fixes: 802b5081233a ("tcg-hppa: Remove tcg backend") | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
3 | --- | 7 | --- |
4 | target/arm/helper-a64.c | 259 +++++++++++++++++++++------------------- | 8 | tcg/aarch64/tcg-target.h | 1 - |
5 | 1 file changed, 133 insertions(+), 126 deletions(-) | 9 | tcg/arm/tcg-target.h | 1 - |
10 | tcg/tcg.c | 32 ++------------------------------ | ||
11 | 3 files changed, 2 insertions(+), 32 deletions(-) | ||
6 | 12 | ||
7 | diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c | 13 | diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h |
8 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
9 | --- a/target/arm/helper-a64.c | 15 | --- a/tcg/aarch64/tcg-target.h |
10 | +++ b/target/arm/helper-a64.c | 16 | +++ b/tcg/aarch64/tcg-target.h |
11 | @@ -XXX,XX +XXX,XX @@ | 17 | @@ -XXX,XX +XXX,XX @@ |
12 | #include "exec/exec-all.h" | 18 | #define TCG_TARGET_INSN_UNIT_SIZE 4 |
13 | #include "exec/cpu_ldst.h" | 19 | #define TCG_TARGET_TLB_DISPLACEMENT_BITS 24 |
14 | #include "qemu/int128.h" | 20 | #define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) |
15 | +#include "qemu/atomic128.h" | 21 | -#undef TCG_TARGET_STACK_GROWSUP |
16 | #include "tcg.h" | 22 | |
17 | #include "fpu/softfloat.h" | 23 | typedef enum { |
18 | #include <zlib.h> /* For crc32 */ | 24 | TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, |
19 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) | 25 | diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h |
20 | return crc32c(acc, buf, bytes) ^ 0xffffffff; | 26 | index XXXXXXX..XXXXXXX 100644 |
27 | --- a/tcg/arm/tcg-target.h | ||
28 | +++ b/tcg/arm/tcg-target.h | ||
29 | @@ -XXX,XX +XXX,XX @@ extern int arm_arch; | ||
30 | |||
31 | #define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7) | ||
32 | |||
33 | -#undef TCG_TARGET_STACK_GROWSUP | ||
34 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | ||
35 | #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 | ||
36 | #define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX | ||
37 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/tcg/tcg.c | ||
40 | +++ b/tcg/tcg.c | ||
41 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
42 | } | ||
43 | |||
44 | if (TCG_TARGET_REG_BITS < 64 && is_64bit) { | ||
45 | - /* | ||
46 | - * If stack grows up, then we will be placing successive | ||
47 | - * arguments at lower addresses, which means we need to | ||
48 | - * reverse the order compared to how we would normally | ||
49 | - * treat either big or little-endian. For those arguments | ||
50 | - * that will wind up in registers, this still works for | ||
51 | - * HPPA (the only current STACK_GROWSUP target) since the | ||
52 | - * argument registers are *also* allocated in decreasing | ||
53 | - * order. If another such target is added, this logic may | ||
54 | - * have to get more complicated to differentiate between | ||
55 | - * stack arguments and register arguments. | ||
56 | - */ | ||
57 | -#if HOST_BIG_ENDIAN != defined(TCG_TARGET_STACK_GROWSUP) | ||
58 | - op->args[pi++] = temp_arg(args[i] + 1); | ||
59 | - op->args[pi++] = temp_arg(args[i]); | ||
60 | -#else | ||
61 | - op->args[pi++] = temp_arg(args[i]); | ||
62 | - op->args[pi++] = temp_arg(args[i] + 1); | ||
63 | -#endif | ||
64 | + op->args[pi++] = temp_arg(args[i] + HOST_BIG_ENDIAN); | ||
65 | + op->args[pi++] = temp_arg(args[i] + !HOST_BIG_ENDIAN); | ||
66 | real_args += 2; | ||
67 | continue; | ||
68 | } | ||
69 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
70 | return true; | ||
21 | } | 71 | } |
22 | 72 | ||
23 | -/* Returns 0 on success; 1 otherwise. */ | 73 | -#ifdef TCG_TARGET_STACK_GROWSUP |
24 | -static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr, | 74 | -#define STACK_DIR(x) (-(x)) |
25 | - uint64_t new_lo, uint64_t new_hi, | 75 | -#else |
26 | - bool parallel, uintptr_t ra) | 76 | -#define STACK_DIR(x) (x) |
27 | +uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, | 77 | -#endif |
28 | + uint64_t new_lo, uint64_t new_hi) | 78 | - |
79 | static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
29 | { | 80 | { |
30 | - Int128 oldv, cmpv, newv; | 81 | const int nb_oargs = TCGOP_CALLO(op); |
31 | + Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); | 82 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) |
32 | + Int128 newv = int128_make128(new_lo, new_hi); | 83 | stack_offset = TCG_TARGET_CALL_STACK_OFFSET; |
33 | + Int128 oldv; | 84 | for (i = nb_regs; i < nb_iargs; i++) { |
34 | + uintptr_t ra = GETPC(); | 85 | arg = op->args[nb_oargs + i]; |
35 | + uint64_t o0, o1; | 86 | -#ifdef TCG_TARGET_STACK_GROWSUP |
36 | bool success; | 87 | - stack_offset -= sizeof(tcg_target_long); |
37 | |||
38 | - cmpv = int128_make128(env->exclusive_val, env->exclusive_high); | ||
39 | - newv = int128_make128(new_lo, new_hi); | ||
40 | - | ||
41 | - if (parallel) { | ||
42 | -#ifndef CONFIG_ATOMIC128 | ||
43 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
44 | -#else | ||
45 | - int mem_idx = cpu_mmu_index(env, false); | ||
46 | - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
47 | - oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); | ||
48 | - success = int128_eq(oldv, cmpv); | ||
49 | -#endif | 88 | -#endif |
50 | - } else { | 89 | if (arg != TCG_CALL_DUMMY_ARG) { |
51 | - uint64_t o0, o1; | 90 | ts = arg_temp(arg); |
52 | - | 91 | temp_load(s, ts, tcg_target_available_regs[ts->type], |
53 | #ifdef CONFIG_USER_ONLY | 92 | s->reserved_regs, 0); |
54 | - /* ??? Enforce alignment. */ | 93 | tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); |
55 | - uint64_t *haddr = g2h(addr); | 94 | } |
56 | + /* ??? Enforce alignment. */ | 95 | -#ifndef TCG_TARGET_STACK_GROWSUP |
57 | + uint64_t *haddr = g2h(addr); | 96 | stack_offset += sizeof(tcg_target_long); |
58 | |||
59 | - helper_retaddr = ra; | ||
60 | - o0 = ldq_le_p(haddr + 0); | ||
61 | - o1 = ldq_le_p(haddr + 1); | ||
62 | - oldv = int128_make128(o0, o1); | ||
63 | + helper_retaddr = ra; | ||
64 | + o0 = ldq_le_p(haddr + 0); | ||
65 | + o1 = ldq_le_p(haddr + 1); | ||
66 | + oldv = int128_make128(o0, o1); | ||
67 | |||
68 | - success = int128_eq(oldv, cmpv); | ||
69 | - if (success) { | ||
70 | - stq_le_p(haddr + 0, int128_getlo(newv)); | ||
71 | - stq_le_p(haddr + 1, int128_gethi(newv)); | ||
72 | - } | ||
73 | - helper_retaddr = 0; | ||
74 | -#else | ||
75 | - int mem_idx = cpu_mmu_index(env, false); | ||
76 | - TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
77 | - TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); | ||
78 | - | ||
79 | - o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); | ||
80 | - o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); | ||
81 | - oldv = int128_make128(o0, o1); | ||
82 | - | ||
83 | - success = int128_eq(oldv, cmpv); | ||
84 | - if (success) { | ||
85 | - helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); | ||
86 | - helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); | ||
87 | - } | ||
88 | -#endif | ||
89 | + success = int128_eq(oldv, cmpv); | ||
90 | + if (success) { | ||
91 | + stq_le_p(haddr + 0, int128_getlo(newv)); | ||
92 | + stq_le_p(haddr + 1, int128_gethi(newv)); | ||
93 | } | ||
94 | + helper_retaddr = 0; | ||
95 | +#else | ||
96 | + int mem_idx = cpu_mmu_index(env, false); | ||
97 | + TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
98 | + TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); | ||
99 | + | ||
100 | + o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); | ||
101 | + o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); | ||
102 | + oldv = int128_make128(o0, o1); | ||
103 | + | ||
104 | + success = int128_eq(oldv, cmpv); | ||
105 | + if (success) { | ||
106 | + helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); | ||
107 | + helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); | ||
108 | + } | ||
109 | +#endif | ||
110 | |||
111 | return !success; | ||
112 | } | ||
113 | |||
114 | -uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, | ||
115 | - uint64_t new_lo, uint64_t new_hi) | ||
116 | -{ | ||
117 | - return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false, GETPC()); | ||
118 | -} | ||
119 | - | ||
120 | uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr, | ||
121 | uint64_t new_lo, uint64_t new_hi) | ||
122 | -{ | ||
123 | - return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true, GETPC()); | ||
124 | -} | ||
125 | - | ||
126 | -static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr, | ||
127 | - uint64_t new_lo, uint64_t new_hi, | ||
128 | - bool parallel, uintptr_t ra) | ||
129 | { | ||
130 | Int128 oldv, cmpv, newv; | ||
131 | + uintptr_t ra = GETPC(); | ||
132 | bool success; | ||
133 | + int mem_idx; | ||
134 | + TCGMemOpIdx oi; | ||
135 | |||
136 | - /* high and low need to be switched here because this is not actually a | ||
137 | - * 128bit store but two doublewords stored consecutively | ||
138 | - */ | ||
139 | - cmpv = int128_make128(env->exclusive_high, env->exclusive_val); | ||
140 | - newv = int128_make128(new_hi, new_lo); | ||
141 | - | ||
142 | - if (parallel) { | ||
143 | -#ifndef CONFIG_ATOMIC128 | ||
144 | + if (!HAVE_CMPXCHG128) { | ||
145 | cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
146 | -#else | ||
147 | - int mem_idx = cpu_mmu_index(env, false); | ||
148 | - TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); | ||
149 | - oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); | ||
150 | - success = int128_eq(oldv, cmpv); | ||
151 | -#endif | ||
152 | - } else { | ||
153 | - uint64_t o0, o1; | ||
154 | - | ||
155 | -#ifdef CONFIG_USER_ONLY | ||
156 | - /* ??? Enforce alignment. */ | ||
157 | - uint64_t *haddr = g2h(addr); | ||
158 | - | ||
159 | - helper_retaddr = ra; | ||
160 | - o1 = ldq_be_p(haddr + 0); | ||
161 | - o0 = ldq_be_p(haddr + 1); | ||
162 | - oldv = int128_make128(o0, o1); | ||
163 | - | ||
164 | - success = int128_eq(oldv, cmpv); | ||
165 | - if (success) { | ||
166 | - stq_be_p(haddr + 0, int128_gethi(newv)); | ||
167 | - stq_be_p(haddr + 1, int128_getlo(newv)); | ||
168 | - } | ||
169 | - helper_retaddr = 0; | ||
170 | -#else | ||
171 | - int mem_idx = cpu_mmu_index(env, false); | ||
172 | - TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); | ||
173 | - TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); | ||
174 | - | ||
175 | - o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); | ||
176 | - o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); | ||
177 | - oldv = int128_make128(o0, o1); | ||
178 | - | ||
179 | - success = int128_eq(oldv, cmpv); | ||
180 | - if (success) { | ||
181 | - helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); | ||
182 | - helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); | ||
183 | - } | ||
184 | -#endif | 97 | -#endif |
185 | } | 98 | } |
186 | 99 | ||
187 | + mem_idx = cpu_mmu_index(env, false); | 100 | /* assign input registers */ |
188 | + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
189 | + | ||
190 | + cmpv = int128_make128(env->exclusive_val, env->exclusive_high); | ||
191 | + newv = int128_make128(new_lo, new_hi); | ||
192 | + oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); | ||
193 | + | ||
194 | + success = int128_eq(oldv, cmpv); | ||
195 | return !success; | ||
196 | } | ||
197 | |||
198 | uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, | ||
199 | uint64_t new_lo, uint64_t new_hi) | ||
200 | { | ||
201 | - return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false, GETPC()); | ||
202 | + /* | ||
203 | + * High and low need to be switched here because this is not actually a | ||
204 | + * 128bit store but two doublewords stored consecutively | ||
205 | + */ | ||
206 | + Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); | ||
207 | + Int128 newv = int128_make128(new_lo, new_hi); | ||
208 | + Int128 oldv; | ||
209 | + uintptr_t ra = GETPC(); | ||
210 | + uint64_t o0, o1; | ||
211 | + bool success; | ||
212 | + | ||
213 | +#ifdef CONFIG_USER_ONLY | ||
214 | + /* ??? Enforce alignment. */ | ||
215 | + uint64_t *haddr = g2h(addr); | ||
216 | + | ||
217 | + helper_retaddr = ra; | ||
218 | + o1 = ldq_be_p(haddr + 0); | ||
219 | + o0 = ldq_be_p(haddr + 1); | ||
220 | + oldv = int128_make128(o0, o1); | ||
221 | + | ||
222 | + success = int128_eq(oldv, cmpv); | ||
223 | + if (success) { | ||
224 | + stq_be_p(haddr + 0, int128_gethi(newv)); | ||
225 | + stq_be_p(haddr + 1, int128_getlo(newv)); | ||
226 | + } | ||
227 | + helper_retaddr = 0; | ||
228 | +#else | ||
229 | + int mem_idx = cpu_mmu_index(env, false); | ||
230 | + TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); | ||
231 | + TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); | ||
232 | + | ||
233 | + o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); | ||
234 | + o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); | ||
235 | + oldv = int128_make128(o0, o1); | ||
236 | + | ||
237 | + success = int128_eq(oldv, cmpv); | ||
238 | + if (success) { | ||
239 | + helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); | ||
240 | + helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); | ||
241 | + } | ||
242 | +#endif | ||
243 | + | ||
244 | + return !success; | ||
245 | } | ||
246 | |||
247 | uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr, | ||
248 | - uint64_t new_lo, uint64_t new_hi) | ||
249 | + uint64_t new_lo, uint64_t new_hi) | ||
250 | { | ||
251 | - return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC()); | ||
252 | + Int128 oldv, cmpv, newv; | ||
253 | + uintptr_t ra = GETPC(); | ||
254 | + bool success; | ||
255 | + int mem_idx; | ||
256 | + TCGMemOpIdx oi; | ||
257 | + | ||
258 | + if (!HAVE_CMPXCHG128) { | ||
259 | + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
260 | + } | ||
261 | + | ||
262 | + mem_idx = cpu_mmu_index(env, false); | ||
263 | + oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); | ||
264 | + | ||
265 | + /* | ||
266 | + * High and low need to be switched here because this is not actually a | ||
267 | + * 128bit store but two doublewords stored consecutively | ||
268 | + */ | ||
269 | + cmpv = int128_make128(env->exclusive_high, env->exclusive_val); | ||
270 | + newv = int128_make128(new_hi, new_lo); | ||
271 | + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); | ||
272 | + | ||
273 | + success = int128_eq(oldv, cmpv); | ||
274 | + return !success; | ||
275 | } | ||
276 | |||
277 | /* Writes back the old data into Rs. */ | ||
278 | void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, | ||
279 | uint64_t new_lo, uint64_t new_hi) | ||
280 | { | ||
281 | - uintptr_t ra = GETPC(); | ||
282 | -#ifndef CONFIG_ATOMIC128 | ||
283 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
284 | -#else | ||
285 | Int128 oldv, cmpv, newv; | ||
286 | + uintptr_t ra = GETPC(); | ||
287 | + int mem_idx; | ||
288 | + TCGMemOpIdx oi; | ||
289 | + | ||
290 | + if (!HAVE_CMPXCHG128) { | ||
291 | + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
292 | + } | ||
293 | + | ||
294 | + mem_idx = cpu_mmu_index(env, false); | ||
295 | + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
296 | |||
297 | cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]); | ||
298 | newv = int128_make128(new_lo, new_hi); | ||
299 | - | ||
300 | - int mem_idx = cpu_mmu_index(env, false); | ||
301 | - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
302 | oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); | ||
303 | |||
304 | env->xregs[rs] = int128_getlo(oldv); | ||
305 | env->xregs[rs + 1] = int128_gethi(oldv); | ||
306 | -#endif | ||
307 | } | ||
308 | |||
309 | void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, | ||
310 | uint64_t new_hi, uint64_t new_lo) | ||
311 | { | ||
312 | - uintptr_t ra = GETPC(); | ||
313 | -#ifndef CONFIG_ATOMIC128 | ||
314 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
315 | -#else | ||
316 | Int128 oldv, cmpv, newv; | ||
317 | + uintptr_t ra = GETPC(); | ||
318 | + int mem_idx; | ||
319 | + TCGMemOpIdx oi; | ||
320 | + | ||
321 | + if (!HAVE_CMPXCHG128) { | ||
322 | + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
323 | + } | ||
324 | + | ||
325 | + mem_idx = cpu_mmu_index(env, false); | ||
326 | + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
327 | |||
328 | cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]); | ||
329 | newv = int128_make128(new_lo, new_hi); | ||
330 | - | ||
331 | - int mem_idx = cpu_mmu_index(env, false); | ||
332 | - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
333 | oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); | ||
334 | |||
335 | env->xregs[rs + 1] = int128_getlo(oldv); | ||
336 | env->xregs[rs] = int128_gethi(oldv); | ||
337 | -#endif | ||
338 | } | ||
339 | |||
340 | /* | ||
341 | -- | 101 | -- |
342 | 2.17.2 | 102 | 2.34.1 |
343 | 103 | ||
344 | 104 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Unused since commit 7b7d8b2d9a ("tcg/tci: Use ffi for calls"). | ||
1 | 2 | ||
3 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | tcg/tci.c | 1 - | ||
7 | tcg/tci/tcg-target.c.inc | 4 ---- | ||
8 | 2 files changed, 5 deletions(-) | ||
9 | |||
10 | diff --git a/tcg/tci.c b/tcg/tci.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/tci.c | ||
13 | +++ b/tcg/tci.c | ||
14 | @@ -XXX,XX +XXX,XX @@ | ||
15 | */ | ||
16 | |||
17 | #include "qemu/osdep.h" | ||
18 | -#include "tcg/tcg.h" /* MAX_OPC_PARAM_IARGS */ | ||
19 | #include "exec/cpu_ldst.h" | ||
20 | #include "tcg/tcg-op.h" | ||
21 | #include "tcg/tcg-ldst.h" | ||
22 | diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/tcg/tci/tcg-target.c.inc | ||
25 | +++ b/tcg/tci/tcg-target.c.inc | ||
26 | @@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = { | ||
27 | TCG_REG_R0, | ||
28 | }; | ||
29 | |||
30 | -#if MAX_OPC_PARAM_IARGS != 7 | ||
31 | -# error Fix needed, number of supported input arguments changed! | ||
32 | -#endif | ||
33 | - | ||
34 | /* No call arguments via registers. All will be stored on the "stack". */ | ||
35 | static const int tcg_target_call_iarg_regs[] = { }; | ||
36 | |||
37 | -- | ||
38 | 2.34.1 | ||
39 | |||
40 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The assignment to mem_coherent should be done with any | ||
2 | modification, not simply with a newly allocated register. | ||
1 | 3 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | tcg/tcg.c | 4 ++-- | ||
7 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
8 | |||
9 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/tcg/tcg.c | ||
12 | +++ b/tcg/tcg.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) | ||
14 | ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, | ||
15 | op->output_pref[0], ots->indirect_base); | ||
16 | ots->val_type = TEMP_VAL_REG; | ||
17 | - ots->mem_coherent = 0; | ||
18 | s->reg_to_temp[ots->reg] = ots; | ||
19 | } | ||
20 | |||
21 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) | ||
22 | tcg_debug_assert(ok); | ||
23 | |||
24 | done: | ||
25 | + ots->mem_coherent = 0; | ||
26 | if (IS_DEAD_ARG(1)) { | ||
27 | temp_dead(s, its); | ||
28 | } | ||
29 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
30 | ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, | ||
31 | op->output_pref[0], ots->indirect_base); | ||
32 | ots->val_type = TEMP_VAL_REG; | ||
33 | - ots->mem_coherent = 0; | ||
34 | s->reg_to_temp[ots->reg] = ots; | ||
35 | } | ||
36 | |||
37 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
38 | return false; | ||
39 | |||
40 | done: | ||
41 | + ots->mem_coherent = 0; | ||
42 | if (IS_DEAD_ARG(1)) { | ||
43 | temp_dead(s, itsl); | ||
44 | } | ||
45 | -- | ||
46 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | Create two new functions, set_temp_val_{reg,nonreg}. | |
2 | Assert that the reg_to_temp mapping is correct before | ||
3 | any changes are made. | ||
4 | |||
5 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/tcg.c | 159 +++++++++++++++++++++++++++++------------------------- | ||
9 | 1 file changed, 85 insertions(+), 74 deletions(-) | ||
10 | |||
11 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/tcg/tcg.c | ||
14 | +++ b/tcg/tcg.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) | ||
16 | ts->mem_allocated = 1; | ||
17 | } | ||
18 | |||
19 | +/* Assign @reg to @ts, and update reg_to_temp[]. */ | ||
20 | +static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) | ||
21 | +{ | ||
22 | + if (ts->val_type == TEMP_VAL_REG) { | ||
23 | + TCGReg old = ts->reg; | ||
24 | + tcg_debug_assert(s->reg_to_temp[old] == ts); | ||
25 | + if (old == reg) { | ||
26 | + return; | ||
27 | + } | ||
28 | + s->reg_to_temp[old] = NULL; | ||
29 | + } | ||
30 | + tcg_debug_assert(s->reg_to_temp[reg] == NULL); | ||
31 | + s->reg_to_temp[reg] = ts; | ||
32 | + ts->val_type = TEMP_VAL_REG; | ||
33 | + ts->reg = reg; | ||
34 | +} | ||
35 | + | ||
36 | +/* Assign a non-register value type to @ts, and update reg_to_temp[]. */ | ||
37 | +static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) | ||
38 | +{ | ||
39 | + tcg_debug_assert(type != TEMP_VAL_REG); | ||
40 | + if (ts->val_type == TEMP_VAL_REG) { | ||
41 | + TCGReg reg = ts->reg; | ||
42 | + tcg_debug_assert(s->reg_to_temp[reg] == ts); | ||
43 | + s->reg_to_temp[reg] = NULL; | ||
44 | + } | ||
45 | + ts->val_type = type; | ||
46 | +} | ||
47 | + | ||
48 | static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); | ||
49 | |||
50 | /* Mark a temporary as free or dead. If 'free_or_dead' is negative, | ||
51 | @@ -XXX,XX +XXX,XX @@ static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) | ||
52 | default: | ||
53 | g_assert_not_reached(); | ||
54 | } | ||
55 | - if (ts->val_type == TEMP_VAL_REG) { | ||
56 | - s->reg_to_temp[ts->reg] = NULL; | ||
57 | - } | ||
58 | - ts->val_type = new_type; | ||
59 | + set_temp_val_nonreg(s, ts, new_type); | ||
60 | } | ||
61 | |||
62 | /* Mark a temporary as dead. */ | ||
63 | @@ -XXX,XX +XXX,XX @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, | ||
64 | default: | ||
65 | tcg_abort(); | ||
66 | } | ||
67 | - ts->reg = reg; | ||
68 | - ts->val_type = TEMP_VAL_REG; | ||
69 | - s->reg_to_temp[reg] = ts; | ||
70 | + set_temp_val_reg(s, ts, reg); | ||
71 | } | ||
72 | |||
73 | /* Save a temporary to memory. 'allocated_regs' is used in case a | ||
74 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, | ||
75 | tcg_debug_assert(!temp_readonly(ots)); | ||
76 | |||
77 | /* The movi is not explicitly generated here. */ | ||
78 | - if (ots->val_type == TEMP_VAL_REG) { | ||
79 | - s->reg_to_temp[ots->reg] = NULL; | ||
80 | - } | ||
81 | - ots->val_type = TEMP_VAL_CONST; | ||
82 | + set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); | ||
83 | ots->val = val; | ||
84 | ots->mem_coherent = 0; | ||
85 | if (NEED_SYNC_ARG(0)) { | ||
86 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) | ||
87 | TCGRegSet allocated_regs, preferred_regs; | ||
88 | TCGTemp *ts, *ots; | ||
89 | TCGType otype, itype; | ||
90 | + TCGReg oreg, ireg; | ||
91 | |||
92 | allocated_regs = s->reserved_regs; | ||
93 | preferred_regs = op->output_pref[0]; | ||
94 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) | ||
95 | temp_load(s, ts, tcg_target_available_regs[itype], | ||
96 | allocated_regs, preferred_regs); | ||
97 | } | ||
98 | - | ||
99 | tcg_debug_assert(ts->val_type == TEMP_VAL_REG); | ||
100 | + ireg = ts->reg; | ||
101 | + | ||
102 | if (IS_DEAD_ARG(0)) { | ||
103 | /* mov to a non-saved dead register makes no sense (even with | ||
104 | liveness analysis disabled). */ | ||
105 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) | ||
106 | if (!ots->mem_allocated) { | ||
107 | temp_allocate_frame(s, ots); | ||
108 | } | ||
109 | - tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); | ||
110 | + tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); | ||
111 | if (IS_DEAD_ARG(1)) { | ||
112 | temp_dead(s, ts); | ||
113 | } | ||
114 | temp_dead(s, ots); | ||
115 | + return; | ||
116 | + } | ||
117 | + | ||
118 | + if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { | ||
119 | + /* | ||
120 | + * The mov can be suppressed. Kill input first, so that it | ||
121 | + * is unlinked from reg_to_temp, then set the output to the | ||
122 | + * reg that we saved from the input. | ||
123 | + */ | ||
124 | + temp_dead(s, ts); | ||
125 | + oreg = ireg; | ||
126 | } else { | ||
127 | - if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { | ||
128 | - /* the mov can be suppressed */ | ||
129 | - if (ots->val_type == TEMP_VAL_REG) { | ||
130 | - s->reg_to_temp[ots->reg] = NULL; | ||
131 | - } | ||
132 | - ots->reg = ts->reg; | ||
133 | - temp_dead(s, ts); | ||
134 | + if (ots->val_type == TEMP_VAL_REG) { | ||
135 | + oreg = ots->reg; | ||
136 | } else { | ||
137 | - if (ots->val_type != TEMP_VAL_REG) { | ||
138 | - /* When allocating a new register, make sure to not spill the | ||
139 | - input one. */ | ||
140 | - tcg_regset_set_reg(allocated_regs, ts->reg); | ||
141 | - ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], | ||
142 | - allocated_regs, preferred_regs, | ||
143 | - ots->indirect_base); | ||
144 | - } | ||
145 | - if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) { | ||
146 | - /* | ||
147 | - * Cross register class move not supported. | ||
148 | - * Store the source register into the destination slot | ||
149 | - * and leave the destination temp as TEMP_VAL_MEM. | ||
150 | - */ | ||
151 | - assert(!temp_readonly(ots)); | ||
152 | - if (!ts->mem_allocated) { | ||
153 | - temp_allocate_frame(s, ots); | ||
154 | - } | ||
155 | - tcg_out_st(s, ts->type, ts->reg, | ||
156 | - ots->mem_base->reg, ots->mem_offset); | ||
157 | - ots->mem_coherent = 1; | ||
158 | - temp_free_or_dead(s, ots, -1); | ||
159 | - return; | ||
160 | - } | ||
161 | + /* Make sure to not spill the input register during allocation. */ | ||
162 | + oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], | ||
163 | + allocated_regs | ((TCGRegSet)1 << ireg), | ||
164 | + preferred_regs, ots->indirect_base); | ||
165 | } | ||
166 | - ots->val_type = TEMP_VAL_REG; | ||
167 | - ots->mem_coherent = 0; | ||
168 | - s->reg_to_temp[ots->reg] = ots; | ||
169 | - if (NEED_SYNC_ARG(0)) { | ||
170 | - temp_sync(s, ots, allocated_regs, 0, 0); | ||
171 | + if (!tcg_out_mov(s, otype, oreg, ireg)) { | ||
172 | + /* | ||
173 | + * Cross register class move not supported. | ||
174 | + * Store the source register into the destination slot | ||
175 | + * and leave the destination temp as TEMP_VAL_MEM. | ||
176 | + */ | ||
177 | + assert(!temp_readonly(ots)); | ||
178 | + if (!ts->mem_allocated) { | ||
179 | + temp_allocate_frame(s, ots); | ||
180 | + } | ||
181 | + tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); | ||
182 | + set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); | ||
183 | + ots->mem_coherent = 1; | ||
184 | + return; | ||
185 | } | ||
186 | } | ||
187 | + set_temp_val_reg(s, ots, oreg); | ||
188 | + ots->mem_coherent = 0; | ||
189 | + | ||
190 | + if (NEED_SYNC_ARG(0)) { | ||
191 | + temp_sync(s, ots, allocated_regs, 0, 0); | ||
192 | + } | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) | ||
197 | /* Allocate the output register now. */ | ||
198 | if (ots->val_type != TEMP_VAL_REG) { | ||
199 | TCGRegSet allocated_regs = s->reserved_regs; | ||
200 | + TCGReg oreg; | ||
201 | |||
202 | if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { | ||
203 | /* Make sure to not spill the input register. */ | ||
204 | tcg_regset_set_reg(allocated_regs, its->reg); | ||
205 | } | ||
206 | - ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, | ||
207 | - op->output_pref[0], ots->indirect_base); | ||
208 | - ots->val_type = TEMP_VAL_REG; | ||
209 | - s->reg_to_temp[ots->reg] = ots; | ||
210 | + oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, | ||
211 | + op->output_pref[0], ots->indirect_base); | ||
212 | + set_temp_val_reg(s, ots, oreg); | ||
213 | } | ||
214 | |||
215 | switch (its->val_type) { | ||
216 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) | ||
217 | #else | ||
218 | endian_fixup = 0; | ||
219 | #endif | ||
220 | + /* Attempt to dup directly from the input memory slot. */ | ||
221 | if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, | ||
222 | its->mem_offset + endian_fixup)) { | ||
223 | goto done; | ||
224 | } | ||
225 | + /* Load the input into the destination vector register. */ | ||
226 | tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); | ||
227 | break; | ||
228 | |||
229 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
230 | op->output_pref[k], ts->indirect_base); | ||
231 | } | ||
232 | tcg_regset_set_reg(o_allocated_regs, reg); | ||
233 | - if (ts->val_type == TEMP_VAL_REG) { | ||
234 | - s->reg_to_temp[ts->reg] = NULL; | ||
235 | - } | ||
236 | - ts->val_type = TEMP_VAL_REG; | ||
237 | - ts->reg = reg; | ||
238 | - /* | ||
239 | - * Temp value is modified, so the value kept in memory is | ||
240 | - * potentially not the same. | ||
241 | - */ | ||
242 | + set_temp_val_reg(s, ts, reg); | ||
243 | ts->mem_coherent = 0; | ||
244 | - s->reg_to_temp[reg] = ts; | ||
245 | new_args[i] = reg; | ||
246 | } | ||
247 | } | ||
248 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
249 | TCGRegSet allocated_regs = s->reserved_regs; | ||
250 | TCGRegSet dup_out_regs = | ||
251 | tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; | ||
252 | + TCGReg oreg; | ||
253 | |||
254 | /* Make sure to not spill the input registers. */ | ||
255 | if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { | ||
256 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
257 | tcg_regset_set_reg(allocated_regs, itsh->reg); | ||
258 | } | ||
259 | |||
260 | - ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, | ||
261 | - op->output_pref[0], ots->indirect_base); | ||
262 | - ots->val_type = TEMP_VAL_REG; | ||
263 | - s->reg_to_temp[ots->reg] = ots; | ||
264 | + oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, | ||
265 | + op->output_pref[0], ots->indirect_base); | ||
266 | + set_temp_val_reg(s, ots, oreg); | ||
267 | } | ||
268 | |||
269 | /* Promote dup2 of immediates to dupi_vec. */ | ||
270 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
271 | tcg_debug_assert(!temp_readonly(ts)); | ||
272 | |||
273 | reg = tcg_target_call_oarg_regs[i]; | ||
274 | - tcg_debug_assert(s->reg_to_temp[reg] == NULL); | ||
275 | - if (ts->val_type == TEMP_VAL_REG) { | ||
276 | - s->reg_to_temp[ts->reg] = NULL; | ||
277 | - } | ||
278 | - ts->val_type = TEMP_VAL_REG; | ||
279 | - ts->reg = reg; | ||
280 | + set_temp_val_reg(s, ts, reg); | ||
281 | ts->mem_coherent = 0; | ||
282 | - s->reg_to_temp[reg] = ts; | ||
283 | if (NEED_SYNC_ARG(i)) { | ||
284 | temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); | ||
285 | } else if (IS_DEAD_ARG(i)) { | ||
286 | -- | ||
287 | 2.34.1 | ||
288 | |||
289 | diff view generated by jsdifflib |
1 | From: "Emilio G. Cota" <cota@braap.org> | 1 | We now check the consistency of reg_to_temp[] with each update, |
---|---|---|---|
2 | 2 | so the utility of checking consistency at the end of each | |
3 | As far as I can tell tlb_flush does not need to be called | 3 | opcode is minimal. In addition, the form of this check is |
4 | this early. tlb_flush is eventually called after the CPU | 4 | quite expensive, consuming 10% of a checking-enabled build. |
5 | has been realized. | ||
6 | |||
7 | This change paves the way to the introduction of tlb_init, | ||
8 | which will be called from cpu_exec_realizefn. | ||
9 | 5 | ||
10 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 6 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> |
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
13 | Message-Id: <20181009174557.16125-2-cota@braap.org> | ||
14 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
15 | --- | 8 | --- |
16 | target/alpha/cpu.c | 1 - | 9 | tcg/tcg.c | 76 ------------------------------------------------------- |
17 | 1 file changed, 1 deletion(-) | 10 | 1 file changed, 76 deletions(-) |
18 | 11 | ||
19 | diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c | 12 | diff --git a/tcg/tcg.c b/tcg/tcg.c |
20 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/target/alpha/cpu.c | 14 | --- a/tcg/tcg.c |
22 | +++ b/target/alpha/cpu.c | 15 | +++ b/tcg/tcg.c |
23 | @@ -XXX,XX +XXX,XX @@ static void alpha_cpu_initfn(Object *obj) | 16 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) |
24 | CPUAlphaState *env = &cpu->env; | 17 | return changes; |
25 | 18 | } | |
26 | cs->env_ptr = env; | 19 | |
27 | - tlb_flush(cs); | 20 | -#ifdef CONFIG_DEBUG_TCG |
28 | 21 | -static void dump_regs(TCGContext *s) | |
29 | env->lock_addr = -1; | 22 | -{ |
30 | #if defined(CONFIG_USER_ONLY) | 23 | - TCGTemp *ts; |
24 | - int i; | ||
25 | - char buf[64]; | ||
26 | - | ||
27 | - for(i = 0; i < s->nb_temps; i++) { | ||
28 | - ts = &s->temps[i]; | ||
29 | - printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); | ||
30 | - switch(ts->val_type) { | ||
31 | - case TEMP_VAL_REG: | ||
32 | - printf("%s", tcg_target_reg_names[ts->reg]); | ||
33 | - break; | ||
34 | - case TEMP_VAL_MEM: | ||
35 | - printf("%d(%s)", (int)ts->mem_offset, | ||
36 | - tcg_target_reg_names[ts->mem_base->reg]); | ||
37 | - break; | ||
38 | - case TEMP_VAL_CONST: | ||
39 | - printf("$0x%" PRIx64, ts->val); | ||
40 | - break; | ||
41 | - case TEMP_VAL_DEAD: | ||
42 | - printf("D"); | ||
43 | - break; | ||
44 | - default: | ||
45 | - printf("???"); | ||
46 | - break; | ||
47 | - } | ||
48 | - printf("\n"); | ||
49 | - } | ||
50 | - | ||
51 | - for(i = 0; i < TCG_TARGET_NB_REGS; i++) { | ||
52 | - if (s->reg_to_temp[i] != NULL) { | ||
53 | - printf("%s: %s\n", | ||
54 | - tcg_target_reg_names[i], | ||
55 | - tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); | ||
56 | - } | ||
57 | - } | ||
58 | -} | ||
59 | - | ||
60 | -static void check_regs(TCGContext *s) | ||
61 | -{ | ||
62 | - int reg; | ||
63 | - int k; | ||
64 | - TCGTemp *ts; | ||
65 | - char buf[64]; | ||
66 | - | ||
67 | - for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { | ||
68 | - ts = s->reg_to_temp[reg]; | ||
69 | - if (ts != NULL) { | ||
70 | - if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { | ||
71 | - printf("Inconsistency for register %s:\n", | ||
72 | - tcg_target_reg_names[reg]); | ||
73 | - goto fail; | ||
74 | - } | ||
75 | - } | ||
76 | - } | ||
77 | - for (k = 0; k < s->nb_temps; k++) { | ||
78 | - ts = &s->temps[k]; | ||
79 | - if (ts->val_type == TEMP_VAL_REG | ||
80 | - && ts->kind != TEMP_FIXED | ||
81 | - && s->reg_to_temp[ts->reg] != ts) { | ||
82 | - printf("Inconsistency for temp %s:\n", | ||
83 | - tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); | ||
84 | - fail: | ||
85 | - printf("reg state:\n"); | ||
86 | - dump_regs(s); | ||
87 | - tcg_abort(); | ||
88 | - } | ||
89 | - } | ||
90 | -} | ||
91 | -#endif | ||
92 | - | ||
93 | static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) | ||
94 | { | ||
95 | intptr_t off, size, align; | ||
96 | @@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) | ||
97 | tcg_reg_alloc_op(s, op); | ||
98 | break; | ||
99 | } | ||
100 | -#ifdef CONFIG_DEBUG_TCG | ||
101 | - check_regs(s); | ||
102 | -#endif | ||
103 | /* Test for (pending) buffer overflow. The assumption is that any | ||
104 | one operation beginning below the high water mark cannot overrun | ||
105 | the buffer completely. Thus we can test for overflow after | ||
31 | -- | 106 | -- |
32 | 2.17.2 | 107 | 2.34.1 |
33 | 108 | ||
34 | 109 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
1 | 2 | ||
3 | In preparation of introducing paired registers, | ||
4 | massage a bit process_op_defs()'s switch case. | ||
5 | |||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | [PMD: Split from bigger patch, 1/3] | ||
8 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
9 | Message-Id: <20221219220925.79218-2-philmd@linaro.org> | ||
10 | --- | ||
11 | tcg/tcg.c | 61 +++++++++++++++++++++++++++++++------------------------ | ||
12 | 1 file changed, 34 insertions(+), 27 deletions(-) | ||
13 | |||
14 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/tcg/tcg.c | ||
17 | +++ b/tcg/tcg.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s) | ||
19 | for (op = 0; op < NB_OPS; op++) { | ||
20 | TCGOpDef *def = &tcg_op_defs[op]; | ||
21 | const TCGTargetOpDef *tdefs; | ||
22 | - int i, nb_args; | ||
23 | + int i, o, nb_args; | ||
24 | |||
25 | if (def->flags & TCG_OPF_NOT_PRESENT) { | ||
26 | continue; | ||
27 | @@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s) | ||
28 | |||
29 | for (i = 0; i < nb_args; i++) { | ||
30 | const char *ct_str = tdefs->args_ct_str[i]; | ||
31 | + bool input_p = i >= def->nb_oargs; | ||
32 | + | ||
33 | /* Incomplete TCGTargetOpDef entry. */ | ||
34 | tcg_debug_assert(ct_str != NULL); | ||
35 | |||
36 | - while (*ct_str != '\0') { | ||
37 | - switch(*ct_str) { | ||
38 | - case '0' ... '9': | ||
39 | - { | ||
40 | - int oarg = *ct_str - '0'; | ||
41 | - tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); | ||
42 | - tcg_debug_assert(oarg < def->nb_oargs); | ||
43 | - tcg_debug_assert(def->args_ct[oarg].regs != 0); | ||
44 | - def->args_ct[i] = def->args_ct[oarg]; | ||
45 | - /* The output sets oalias. */ | ||
46 | - def->args_ct[oarg].oalias = true; | ||
47 | - def->args_ct[oarg].alias_index = i; | ||
48 | - /* The input sets ialias. */ | ||
49 | - def->args_ct[i].ialias = true; | ||
50 | - def->args_ct[i].alias_index = oarg; | ||
51 | - } | ||
52 | - ct_str++; | ||
53 | - break; | ||
54 | - case '&': | ||
55 | - def->args_ct[i].newreg = true; | ||
56 | - ct_str++; | ||
57 | - break; | ||
58 | + switch (*ct_str) { | ||
59 | + case '0' ... '9': | ||
60 | + o = *ct_str - '0'; | ||
61 | + tcg_debug_assert(input_p); | ||
62 | + tcg_debug_assert(o < def->nb_oargs); | ||
63 | + tcg_debug_assert(def->args_ct[o].regs != 0); | ||
64 | + tcg_debug_assert(!def->args_ct[o].oalias); | ||
65 | + def->args_ct[i] = def->args_ct[o]; | ||
66 | + /* The output sets oalias. */ | ||
67 | + def->args_ct[o].oalias = 1; | ||
68 | + def->args_ct[o].alias_index = i; | ||
69 | + /* The input sets ialias. */ | ||
70 | + def->args_ct[i].ialias = 1; | ||
71 | + def->args_ct[i].alias_index = o; | ||
72 | + tcg_debug_assert(ct_str[1] == '\0'); | ||
73 | + continue; | ||
74 | + | ||
75 | + case '&': | ||
76 | + tcg_debug_assert(!input_p); | ||
77 | + def->args_ct[i].newreg = true; | ||
78 | + ct_str++; | ||
79 | + break; | ||
80 | + } | ||
81 | + | ||
82 | + do { | ||
83 | + switch (*ct_str) { | ||
84 | case 'i': | ||
85 | def->args_ct[i].ct |= TCG_CT_CONST; | ||
86 | - ct_str++; | ||
87 | break; | ||
88 | |||
89 | /* Include all of the target-specific constraints. */ | ||
90 | |||
91 | #undef CONST | ||
92 | #define CONST(CASE, MASK) \ | ||
93 | - case CASE: def->args_ct[i].ct |= MASK; ct_str++; break; | ||
94 | + case CASE: def->args_ct[i].ct |= MASK; break; | ||
95 | #define REGS(CASE, MASK) \ | ||
96 | - case CASE: def->args_ct[i].regs |= MASK; ct_str++; break; | ||
97 | + case CASE: def->args_ct[i].regs |= MASK; break; | ||
98 | |||
99 | #include "tcg-target-con-str.h" | ||
100 | |||
101 | #undef REGS | ||
102 | #undef CONST | ||
103 | default: | ||
104 | + case '0' ... '9': | ||
105 | + case '&': | ||
106 | /* Typo in TCGTargetOpDef constraint. */ | ||
107 | g_assert_not_reached(); | ||
108 | } | ||
109 | - } | ||
110 | + } while (*++ct_str != '\0'); | ||
111 | } | ||
112 | |||
113 | /* TCGTargetOpDef entry with too much information? */ | ||
114 | -- | ||
115 | 2.34.1 | ||
116 | |||
117 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | There are several instances where we need to be able to | ||
2 | allocate a pair of registers to related inputs/outputs. | ||
3 | Add 'p' and 'm' register constraints for this, in order to | ||
4 | be able to allocate the even/odd register first or second. | ||
1 | 5 | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | include/tcg/tcg.h | 2 + | ||
9 | tcg/tcg.c | 419 ++++++++++++++++++++++++++++++++++++++++------ | ||
10 | 2 files changed, 373 insertions(+), 48 deletions(-) | ||
11 | |||
12 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/include/tcg/tcg.h | ||
15 | +++ b/include/tcg/tcg.h | ||
16 | @@ -XXX,XX +XXX,XX @@ typedef struct TCGArgConstraint { | ||
17 | unsigned ct : 16; | ||
18 | unsigned alias_index : 4; | ||
19 | unsigned sort_index : 4; | ||
20 | + unsigned pair_index : 4; | ||
21 | + unsigned pair : 2; /* 0: none, 1: first, 2: second, 3: second alias */ | ||
22 | bool oalias : 1; | ||
23 | bool ialias : 1; | ||
24 | bool newreg : 1; | ||
25 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/tcg/tcg.c | ||
28 | +++ b/tcg/tcg.c | ||
29 | @@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) | ||
30 | static int get_constraint_priority(const TCGOpDef *def, int k) | ||
31 | { | ||
32 | const TCGArgConstraint *arg_ct = &def->args_ct[k]; | ||
33 | - int n; | ||
34 | + int n = ctpop64(arg_ct->regs); | ||
35 | |||
36 | - if (arg_ct->oalias) { | ||
37 | - /* an alias is equivalent to a single register */ | ||
38 | - n = 1; | ||
39 | - } else { | ||
40 | - n = ctpop64(arg_ct->regs); | ||
41 | + /* | ||
42 | + * Sort constraints of a single register first, which includes output | ||
43 | + * aliases (which must exactly match the input already allocated). | ||
44 | + */ | ||
45 | + if (n == 1 || arg_ct->oalias) { | ||
46 | + return INT_MAX; | ||
47 | } | ||
48 | - return TCG_TARGET_NB_REGS - n + 1; | ||
49 | + | ||
50 | + /* | ||
51 | + * Sort register pairs next, first then second immediately after. | ||
52 | + * Arbitrarily sort multiple pairs by the index of the first reg; | ||
53 | + * there shouldn't be many pairs. | ||
54 | + */ | ||
55 | + switch (arg_ct->pair) { | ||
56 | + case 1: | ||
57 | + case 3: | ||
58 | + return (k + 1) * 2; | ||
59 | + case 2: | ||
60 | + return (arg_ct->pair_index + 1) * 2 - 1; | ||
61 | + } | ||
62 | + | ||
63 | + /* Finally, sort by decreasing register count. */ | ||
64 | + assert(n > 1); | ||
65 | + return -n; | ||
66 | } | ||
67 | |||
68 | /* sort from highest priority to lowest */ | ||
69 | @@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s) | ||
70 | for (op = 0; op < NB_OPS; op++) { | ||
71 | TCGOpDef *def = &tcg_op_defs[op]; | ||
72 | const TCGTargetOpDef *tdefs; | ||
73 | - int i, o, nb_args; | ||
74 | + bool saw_alias_pair = false; | ||
75 | + int i, o, i2, o2, nb_args; | ||
76 | |||
77 | if (def->flags & TCG_OPF_NOT_PRESENT) { | ||
78 | continue; | ||
79 | @@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s) | ||
80 | /* The input sets ialias. */ | ||
81 | def->args_ct[i].ialias = 1; | ||
82 | def->args_ct[i].alias_index = o; | ||
83 | + if (def->args_ct[i].pair) { | ||
84 | + saw_alias_pair = true; | ||
85 | + } | ||
86 | tcg_debug_assert(ct_str[1] == '\0'); | ||
87 | continue; | ||
88 | |||
89 | @@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s) | ||
90 | def->args_ct[i].newreg = true; | ||
91 | ct_str++; | ||
92 | break; | ||
93 | + | ||
94 | + case 'p': /* plus */ | ||
95 | + /* Allocate to the register after the previous. */ | ||
96 | + tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); | ||
97 | + o = i - 1; | ||
98 | + tcg_debug_assert(!def->args_ct[o].pair); | ||
99 | + tcg_debug_assert(!def->args_ct[o].ct); | ||
100 | + def->args_ct[i] = (TCGArgConstraint){ | ||
101 | + .pair = 2, | ||
102 | + .pair_index = o, | ||
103 | + .regs = def->args_ct[o].regs << 1, | ||
104 | + }; | ||
105 | + def->args_ct[o].pair = 1; | ||
106 | + def->args_ct[o].pair_index = i; | ||
107 | + tcg_debug_assert(ct_str[1] == '\0'); | ||
108 | + continue; | ||
109 | + | ||
110 | + case 'm': /* minus */ | ||
111 | + /* Allocate to the register before the previous. */ | ||
112 | + tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); | ||
113 | + o = i - 1; | ||
114 | + tcg_debug_assert(!def->args_ct[o].pair); | ||
115 | + tcg_debug_assert(!def->args_ct[o].ct); | ||
116 | + def->args_ct[i] = (TCGArgConstraint){ | ||
117 | + .pair = 1, | ||
118 | + .pair_index = o, | ||
119 | + .regs = def->args_ct[o].regs >> 1, | ||
120 | + }; | ||
121 | + def->args_ct[o].pair = 2; | ||
122 | + def->args_ct[o].pair_index = i; | ||
123 | + tcg_debug_assert(ct_str[1] == '\0'); | ||
124 | + continue; | ||
125 | } | ||
126 | |||
127 | do { | ||
128 | @@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s) | ||
129 | default: | ||
130 | case '0' ... '9': | ||
131 | case '&': | ||
132 | + case 'p': | ||
133 | + case 'm': | ||
134 | /* Typo in TCGTargetOpDef constraint. */ | ||
135 | g_assert_not_reached(); | ||
136 | } | ||
137 | @@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s) | ||
138 | /* TCGTargetOpDef entry with too much information? */ | ||
139 | tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); | ||
140 | |||
141 | + /* | ||
142 | + * Fix up output pairs that are aliased with inputs. | ||
143 | + * When we created the alias, we copied pair from the output. | ||
144 | + * There are three cases: | ||
145 | + * (1a) Pairs of inputs alias pairs of outputs. | ||
146 | + * (1b) One input aliases the first of a pair of outputs. | ||
147 | + * (2) One input aliases the second of a pair of outputs. | ||
148 | + * | ||
149 | + * Case 1a is handled by making sure that the pair_index'es are | ||
150 | + * properly updated so that they appear the same as a pair of inputs. | ||
151 | + * | ||
152 | + * Case 1b is handled by setting the pair_index of the input to | ||
153 | + * itself, simply so it doesn't point to an unrelated argument. | ||
154 | + * Since we don't encounter the "second" during the input allocation | ||
155 | + * phase, nothing happens with the second half of the input pair. | ||
156 | + * | ||
157 | + * Case 2 is handled by setting the second input to pair=3, the | ||
158 | + * first output to pair=3, and the pair_index'es to match. | ||
159 | + */ | ||
160 | + if (saw_alias_pair) { | ||
161 | + for (i = def->nb_oargs; i < nb_args; i++) { | ||
162 | + /* | ||
163 | + * Since [0-9pm] must be alone in the constraint string, | ||
164 | + * the only way they can both be set is if the pair comes | ||
165 | + * from the output alias. | ||
166 | + */ | ||
167 | + if (!def->args_ct[i].ialias) { | ||
168 | + continue; | ||
169 | + } | ||
170 | + switch (def->args_ct[i].pair) { | ||
171 | + case 0: | ||
172 | + break; | ||
173 | + case 1: | ||
174 | + o = def->args_ct[i].alias_index; | ||
175 | + o2 = def->args_ct[o].pair_index; | ||
176 | + tcg_debug_assert(def->args_ct[o].pair == 1); | ||
177 | + tcg_debug_assert(def->args_ct[o2].pair == 2); | ||
178 | + if (def->args_ct[o2].oalias) { | ||
179 | + /* Case 1a */ | ||
180 | + i2 = def->args_ct[o2].alias_index; | ||
181 | + tcg_debug_assert(def->args_ct[i2].pair == 2); | ||
182 | + def->args_ct[i2].pair_index = i; | ||
183 | + def->args_ct[i].pair_index = i2; | ||
184 | + } else { | ||
185 | + /* Case 1b */ | ||
186 | + def->args_ct[i].pair_index = i; | ||
187 | + } | ||
188 | + break; | ||
189 | + case 2: | ||
190 | + o = def->args_ct[i].alias_index; | ||
191 | + o2 = def->args_ct[o].pair_index; | ||
192 | + tcg_debug_assert(def->args_ct[o].pair == 2); | ||
193 | + tcg_debug_assert(def->args_ct[o2].pair == 1); | ||
194 | + if (def->args_ct[o2].oalias) { | ||
195 | + /* Case 1a */ | ||
196 | + i2 = def->args_ct[o2].alias_index; | ||
197 | + tcg_debug_assert(def->args_ct[i2].pair == 1); | ||
198 | + def->args_ct[i2].pair_index = i; | ||
199 | + def->args_ct[i].pair_index = i2; | ||
200 | + } else { | ||
201 | + /* Case 2 */ | ||
202 | + def->args_ct[i].pair = 3; | ||
203 | + def->args_ct[o2].pair = 3; | ||
204 | + def->args_ct[i].pair_index = o2; | ||
205 | + def->args_ct[o2].pair_index = i; | ||
206 | + } | ||
207 | + break; | ||
208 | + default: | ||
209 | + g_assert_not_reached(); | ||
210 | + } | ||
211 | + } | ||
212 | + } | ||
213 | + | ||
214 | /* sort the constraints (XXX: this is just an heuristic) */ | ||
215 | sort_constraints(def, 0, def->nb_oargs); | ||
216 | sort_constraints(def, def->nb_oargs, def->nb_iargs); | ||
217 | @@ -XXX,XX +XXX,XX @@ static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, | ||
218 | tcg_abort(); | ||
219 | } | ||
220 | |||
221 | +static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, | ||
222 | + TCGRegSet allocated_regs, | ||
223 | + TCGRegSet preferred_regs, bool rev) | ||
224 | +{ | ||
225 | + int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); | ||
226 | + TCGRegSet reg_ct[2]; | ||
227 | + const int *order; | ||
228 | + | ||
229 | + /* Ensure that if I is not in allocated_regs, I+1 is not either. */ | ||
230 | + reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); | ||
231 | + tcg_debug_assert(reg_ct[1] != 0); | ||
232 | + reg_ct[0] = reg_ct[1] & preferred_regs; | ||
233 | + | ||
234 | + order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; | ||
235 | + | ||
236 | + /* | ||
237 | + * Skip the preferred_regs option if it cannot be satisfied, | ||
238 | + * or if the preference made no difference. | ||
239 | + */ | ||
240 | + k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; | ||
241 | + | ||
242 | + /* | ||
243 | + * Minimize the number of flushes by looking for 2 free registers first, | ||
244 | + * then a single flush, then two flushes. | ||
245 | + */ | ||
246 | + for (fmin = 2; fmin >= 0; fmin--) { | ||
247 | + for (j = k; j < 2; j++) { | ||
248 | + TCGRegSet set = reg_ct[j]; | ||
249 | + | ||
250 | + for (i = 0; i < n; i++) { | ||
251 | + TCGReg reg = order[i]; | ||
252 | + | ||
253 | + if (tcg_regset_test_reg(set, reg)) { | ||
254 | + int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; | ||
255 | + if (f >= fmin) { | ||
256 | + tcg_reg_free(s, reg, allocated_regs); | ||
257 | + tcg_reg_free(s, reg + 1, allocated_regs); | ||
258 | + return reg; | ||
259 | + } | ||
260 | + } | ||
261 | + } | ||
262 | + } | ||
263 | + } | ||
264 | + tcg_abort(); | ||
265 | +} | ||
266 | + | ||
267 | /* Make sure the temporary is in a register. If needed, allocate the register | ||
268 | from DESIRED while avoiding ALLOCATED. */ | ||
269 | static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, | ||
270 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
271 | |||
272 | /* satisfy input constraints */ | ||
273 | for (k = 0; k < nb_iargs; k++) { | ||
274 | - TCGRegSet i_preferred_regs; | ||
275 | - bool allocate_new_reg; | ||
276 | + TCGRegSet i_preferred_regs, i_required_regs; | ||
277 | + bool allocate_new_reg, copyto_new_reg; | ||
278 | + TCGTemp *ts2; | ||
279 | + int i1, i2; | ||
280 | |||
281 | i = def->args_ct[nb_oargs + k].sort_index; | ||
282 | arg = op->args[i]; | ||
283 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
284 | |||
285 | reg = ts->reg; | ||
286 | i_preferred_regs = 0; | ||
287 | + i_required_regs = arg_ct->regs; | ||
288 | allocate_new_reg = false; | ||
289 | + copyto_new_reg = false; | ||
290 | |||
291 | - if (arg_ct->ialias) { | ||
292 | + switch (arg_ct->pair) { | ||
293 | + case 0: /* not paired */ | ||
294 | + if (arg_ct->ialias) { | ||
295 | + i_preferred_regs = op->output_pref[arg_ct->alias_index]; | ||
296 | + | ||
297 | + /* | ||
298 | + * If the input is not dead after the instruction, | ||
299 | + * we must allocate a new register and move it. | ||
300 | + */ | ||
301 | + if (!IS_DEAD_ARG(i)) { | ||
302 | + allocate_new_reg = true; | ||
303 | + } else if (ts->val_type == TEMP_VAL_REG) { | ||
304 | + /* | ||
305 | + * Check if the current register has already been | ||
306 | + * allocated for another input. | ||
307 | + */ | ||
308 | + allocate_new_reg = | ||
309 | + tcg_regset_test_reg(i_allocated_regs, reg); | ||
310 | + } | ||
311 | + } | ||
312 | + if (!allocate_new_reg) { | ||
313 | + temp_load(s, ts, i_required_regs, i_allocated_regs, | ||
314 | + i_preferred_regs); | ||
315 | + reg = ts->reg; | ||
316 | + allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); | ||
317 | + } | ||
318 | + if (allocate_new_reg) { | ||
319 | + /* | ||
320 | + * Allocate a new register matching the constraint | ||
321 | + * and move the temporary register into it. | ||
322 | + */ | ||
323 | + temp_load(s, ts, tcg_target_available_regs[ts->type], | ||
324 | + i_allocated_regs, 0); | ||
325 | + reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, | ||
326 | + i_preferred_regs, ts->indirect_base); | ||
327 | + copyto_new_reg = true; | ||
328 | + } | ||
329 | + break; | ||
330 | + | ||
331 | + case 1: | ||
332 | + /* First of an input pair; if i1 == i2, the second is an output. */ | ||
333 | + i1 = i; | ||
334 | + i2 = arg_ct->pair_index; | ||
335 | + ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; | ||
336 | + | ||
337 | + /* | ||
338 | + * It is easier to default to allocating a new pair | ||
339 | + * and to identify a few cases where it's not required. | ||
340 | + */ | ||
341 | + if (arg_ct->ialias) { | ||
342 | + i_preferred_regs = op->output_pref[arg_ct->alias_index]; | ||
343 | + if (IS_DEAD_ARG(i1) && | ||
344 | + IS_DEAD_ARG(i2) && | ||
345 | + ts->val_type == TEMP_VAL_REG && | ||
346 | + ts->reg < TCG_TARGET_NB_REGS - 1 && | ||
347 | + tcg_regset_test_reg(i_required_regs, reg) && | ||
348 | + !tcg_regset_test_reg(i_allocated_regs, reg) && | ||
349 | + !tcg_regset_test_reg(i_allocated_regs, reg + 1) && | ||
350 | + (ts2 | ||
351 | + ? ts2->val_type == TEMP_VAL_REG && | ||
352 | + ts2->reg == reg + 1 | ||
353 | + : s->reg_to_temp[reg + 1] == NULL)) { | ||
354 | + break; | ||
355 | + } | ||
356 | + } else { | ||
357 | + /* Without aliasing, the pair must also be an input. */ | ||
358 | + tcg_debug_assert(ts2); | ||
359 | + if (ts->val_type == TEMP_VAL_REG && | ||
360 | + ts2->val_type == TEMP_VAL_REG && | ||
361 | + ts2->reg == reg + 1 && | ||
362 | + tcg_regset_test_reg(i_required_regs, reg)) { | ||
363 | + break; | ||
364 | + } | ||
365 | + } | ||
366 | + reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, | ||
367 | + 0, ts->indirect_base); | ||
368 | + goto do_pair; | ||
369 | + | ||
370 | + case 2: /* pair second */ | ||
371 | + reg = new_args[arg_ct->pair_index] + 1; | ||
372 | + goto do_pair; | ||
373 | + | ||
374 | + case 3: /* ialias with second output, no first input */ | ||
375 | + tcg_debug_assert(arg_ct->ialias); | ||
376 | i_preferred_regs = op->output_pref[arg_ct->alias_index]; | ||
377 | |||
378 | - /* | ||
379 | - * If the input is readonly, then it cannot also be an | ||
380 | - * output and aliased to itself. If the input is not | ||
381 | - * dead after the instruction, we must allocate a new | ||
382 | - * register and move it. | ||
383 | - */ | ||
384 | - if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { | ||
385 | - allocate_new_reg = true; | ||
386 | - } else if (ts->val_type == TEMP_VAL_REG) { | ||
387 | - /* | ||
388 | - * Check if the current register has already been | ||
389 | - * allocated for another input. | ||
390 | - */ | ||
391 | - allocate_new_reg = tcg_regset_test_reg(i_allocated_regs, reg); | ||
392 | + if (IS_DEAD_ARG(i) && | ||
393 | + ts->val_type == TEMP_VAL_REG && | ||
394 | + reg > 0 && | ||
395 | + s->reg_to_temp[reg - 1] == NULL && | ||
396 | + tcg_regset_test_reg(i_required_regs, reg) && | ||
397 | + !tcg_regset_test_reg(i_allocated_regs, reg) && | ||
398 | + !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { | ||
399 | + tcg_regset_set_reg(i_allocated_regs, reg - 1); | ||
400 | + break; | ||
401 | } | ||
402 | - } | ||
403 | + reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, | ||
404 | + i_allocated_regs, 0, | ||
405 | + ts->indirect_base); | ||
406 | + tcg_regset_set_reg(i_allocated_regs, reg); | ||
407 | + reg += 1; | ||
408 | + goto do_pair; | ||
409 | |||
410 | - if (!allocate_new_reg) { | ||
411 | - temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); | ||
412 | - reg = ts->reg; | ||
413 | - allocate_new_reg = !tcg_regset_test_reg(arg_ct->regs, reg); | ||
414 | - } | ||
415 | - | ||
416 | - if (allocate_new_reg) { | ||
417 | + do_pair: | ||
418 | /* | ||
419 | - * Allocate a new register matching the constraint | ||
420 | - * and move the temporary register into it. | ||
421 | + * If an aliased input is not dead after the instruction, | ||
422 | + * we must allocate a new register and move it. | ||
423 | */ | ||
424 | - temp_load(s, ts, tcg_target_available_regs[ts->type], | ||
425 | - i_allocated_regs, 0); | ||
426 | - reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs, | ||
427 | - i_preferred_regs, ts->indirect_base); | ||
428 | + if (arg_ct->ialias && !IS_DEAD_ARG(i)) { | ||
429 | + TCGRegSet t_allocated_regs = i_allocated_regs; | ||
430 | + | ||
431 | + /* | ||
432 | + * Because of the alias, and the continued life, make sure | ||
433 | + * that the temp is somewhere *other* than the reg pair, | ||
434 | + * and we get a copy in reg. | ||
435 | + */ | ||
436 | + tcg_regset_set_reg(t_allocated_regs, reg); | ||
437 | + tcg_regset_set_reg(t_allocated_regs, reg + 1); | ||
438 | + if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { | ||
439 | + /* If ts was already in reg, copy it somewhere else. */ | ||
440 | + TCGReg nr; | ||
441 | + bool ok; | ||
442 | + | ||
443 | + tcg_debug_assert(ts->kind != TEMP_FIXED); | ||
444 | + nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], | ||
445 | + t_allocated_regs, 0, ts->indirect_base); | ||
446 | + ok = tcg_out_mov(s, ts->type, nr, reg); | ||
447 | + tcg_debug_assert(ok); | ||
448 | + | ||
449 | + set_temp_val_reg(s, ts, nr); | ||
450 | + } else { | ||
451 | + temp_load(s, ts, tcg_target_available_regs[ts->type], | ||
452 | + t_allocated_regs, 0); | ||
453 | + copyto_new_reg = true; | ||
454 | + } | ||
455 | + } else { | ||
456 | + /* Preferably allocate to reg, otherwise copy. */ | ||
457 | + i_required_regs = (TCGRegSet)1 << reg; | ||
458 | + temp_load(s, ts, i_required_regs, i_allocated_regs, | ||
459 | + i_preferred_regs); | ||
460 | + copyto_new_reg = ts->reg != reg; | ||
461 | + } | ||
462 | + break; | ||
463 | + | ||
464 | + default: | ||
465 | + g_assert_not_reached(); | ||
466 | + } | ||
467 | + | ||
468 | + if (copyto_new_reg) { | ||
469 | if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { | ||
470 | /* | ||
471 | * Cross register class move not supported. Sync the | ||
472 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
473 | /* ENV should not be modified. */ | ||
474 | tcg_debug_assert(!temp_readonly(ts)); | ||
475 | |||
476 | - if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { | ||
477 | - reg = new_args[arg_ct->alias_index]; | ||
478 | - } else if (arg_ct->newreg) { | ||
479 | - reg = tcg_reg_alloc(s, arg_ct->regs, | ||
480 | - i_allocated_regs | o_allocated_regs, | ||
481 | - op->output_pref[k], ts->indirect_base); | ||
482 | - } else { | ||
483 | - reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, | ||
484 | - op->output_pref[k], ts->indirect_base); | ||
485 | + switch (arg_ct->pair) { | ||
486 | + case 0: /* not paired */ | ||
487 | + if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { | ||
488 | + reg = new_args[arg_ct->alias_index]; | ||
489 | + } else if (arg_ct->newreg) { | ||
490 | + reg = tcg_reg_alloc(s, arg_ct->regs, | ||
491 | + i_allocated_regs | o_allocated_regs, | ||
492 | + op->output_pref[k], ts->indirect_base); | ||
493 | + } else { | ||
494 | + reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, | ||
495 | + op->output_pref[k], ts->indirect_base); | ||
496 | + } | ||
497 | + break; | ||
498 | + | ||
499 | + case 1: /* first of pair */ | ||
500 | + tcg_debug_assert(!arg_ct->newreg); | ||
501 | + if (arg_ct->oalias) { | ||
502 | + reg = new_args[arg_ct->alias_index]; | ||
503 | + break; | ||
504 | + } | ||
505 | + reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, | ||
506 | + op->output_pref[k], ts->indirect_base); | ||
507 | + break; | ||
508 | + | ||
509 | + case 2: /* second of pair */ | ||
510 | + tcg_debug_assert(!arg_ct->newreg); | ||
511 | + if (arg_ct->oalias) { | ||
512 | + reg = new_args[arg_ct->alias_index]; | ||
513 | + } else { | ||
514 | + reg = new_args[arg_ct->pair_index] + 1; | ||
515 | + } | ||
516 | + break; | ||
517 | + | ||
518 | + case 3: /* first of pair, aliasing with a second input */ | ||
519 | + tcg_debug_assert(!arg_ct->newreg); | ||
520 | + reg = new_args[arg_ct->pair_index] - 1; | ||
521 | + break; | ||
522 | + | ||
523 | + default: | ||
524 | + g_assert_not_reached(); | ||
525 | } | ||
526 | tcg_regset_set_reg(o_allocated_regs, reg); | ||
527 | set_temp_val_reg(s, ts, reg); | ||
528 | -- | ||
529 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | While we initialize this value in cpu_common_reset, that | ||
2 | isn't called during startup, so set it as well in init. | ||
3 | This fixes -singlestep versus the very first TB. | ||
1 | 4 | ||
5 | Fixes: 04f5b647ed07 ("accel/tcg: Handle -singlestep in curr_cflags") | ||
6 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | hw/core/cpu-common.c | 1 + | ||
10 | 1 file changed, 1 insertion(+) | ||
11 | |||
12 | diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/hw/core/cpu-common.c | ||
15 | +++ b/hw/core/cpu-common.c | ||
16 | @@ -XXX,XX +XXX,XX @@ static void cpu_common_initfn(Object *obj) | ||
17 | /* the default value is changed by qemu_init_vcpu() for softmmu */ | ||
18 | cpu->nr_cores = 1; | ||
19 | cpu->nr_threads = 1; | ||
20 | + cpu->cflags_next_tb = -1; | ||
21 | |||
22 | qemu_mutex_init(&cpu->work_mutex); | ||
23 | QSIMPLEQ_INIT(&cpu->work_list); | ||
24 | -- | ||
25 | 2.34.1 | ||
26 | |||
27 | diff view generated by jsdifflib |
1 | From: "Emilio G. Cota" <cota@braap.org> | 1 | Use the official extend/extract functions instead of routines |
---|---|---|---|
2 | that will shortly be internal to tcg. | ||
2 | 3 | ||
3 | Currently we rely on atomic operations for cross-CPU invalidations. | 4 | Cc: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> |
4 | There are two cases that these atomics miss: cross-CPU invalidations | 5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
5 | can race with either (1) vCPU threads flushing their TLB, which | ||
6 | happens via memset, or (2) vCPUs calling tlb_reset_dirty on their TLB, | ||
7 | which updates .addr_write with a regular store. This results in | ||
8 | undefined behaviour, since we're mixing regular and atomic ops | ||
9 | on concurrent accesses. | ||
10 | |||
11 | Fix it by using tlb_lock, a per-vCPU lock. All updaters of tlb_table | ||
12 | and the corresponding victim cache now hold the lock. | ||
13 | The readers that do not hold tlb_lock must use atomic reads when | ||
14 | reading .addr_write, since this field can be updated by other threads; | ||
15 | the conversion to atomic reads is done in the next patch. | ||
16 | |||
17 | Note that an alternative fix would be to expand the use of atomic ops. | ||
18 | However, in the case of TLB flushes this would have a huge performance | ||
19 | impact, since (1) TLB flushes can happen very frequently and (2) we | ||
20 | currently use a full memory barrier to flush each TLB entry, and a TLB | ||
21 | has many entries. Instead, acquiring the lock is barely slower than a | ||
22 | full memory barrier since it is uncontended, and with a single lock | ||
23 | acquisition we can flush the entire TLB. | ||
24 | |||
25 | Tested-by: Alex Bennée <alex.bennee@linaro.org> | ||
26 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
27 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
28 | Message-Id: <20181009174557.16125-6-cota@braap.org> | ||
29 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
30 | --- | 7 | --- |
31 | include/exec/cpu-defs.h | 3 + | 8 | target/sparc/translate.c | 21 ++++----------------- |
32 | accel/tcg/cputlb.c | 155 ++++++++++++++++++++++------------------ | 9 | 1 file changed, 4 insertions(+), 17 deletions(-) |
33 | 2 files changed, 87 insertions(+), 71 deletions(-) | ||
34 | 10 | ||
35 | diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h | 11 | diff --git a/target/sparc/translate.c b/target/sparc/translate.c |
36 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
37 | --- a/include/exec/cpu-defs.h | 13 | --- a/target/sparc/translate.c |
38 | +++ b/include/exec/cpu-defs.h | 14 | +++ b/target/sparc/translate.c |
39 | @@ -XXX,XX +XXX,XX @@ | 15 | @@ -XXX,XX +XXX,XX @@ static inline void gen_update_fprs_dirty(DisasContext *dc, int rd) |
40 | #endif | 16 | /* floating point registers moves */ |
41 | 17 | static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src) | |
42 | #include "qemu/host-utils.h" | ||
43 | +#include "qemu/thread.h" | ||
44 | #include "qemu/queue.h" | ||
45 | #ifdef CONFIG_TCG | ||
46 | #include "tcg-target.h" | ||
47 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUIOTLBEntry { | ||
48 | |||
49 | #define CPU_COMMON_TLB \ | ||
50 | /* The meaning of the MMU modes is defined in the target code. */ \ | ||
51 | + /* tlb_lock serializes updates to tlb_table and tlb_v_table */ \ | ||
52 | + QemuSpin tlb_lock; \ | ||
53 | CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE]; \ | ||
54 | CPUTLBEntry tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE]; \ | ||
55 | CPUIOTLBEntry iotlb[NB_MMU_MODES][CPU_TLB_SIZE]; \ | ||
56 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/accel/tcg/cputlb.c | ||
59 | +++ b/accel/tcg/cputlb.c | ||
60 | @@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); | ||
61 | |||
62 | void tlb_init(CPUState *cpu) | ||
63 | { | 18 | { |
64 | + CPUArchState *env = cpu->env_ptr; | 19 | -#if TCG_TARGET_REG_BITS == 32 |
65 | + | 20 | - if (src & 1) { |
66 | + qemu_spin_init(&env->tlb_lock); | 21 | - return TCGV_LOW(cpu_fpr[src / 2]); |
67 | } | 22 | - } else { |
68 | 23 | - return TCGV_HIGH(cpu_fpr[src / 2]); | |
69 | /* flush_all_helper: run fn across all cpus | 24 | - } |
70 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_nocheck(CPUState *cpu) | 25 | -#else |
71 | atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1); | 26 | TCGv_i32 ret = get_temp_i32(dc); |
72 | tlb_debug("(count: %zu)\n", tlb_flush_count()); | 27 | if (src & 1) { |
73 | 28 | tcg_gen_extrl_i64_i32(ret, cpu_fpr[src / 2]); | |
74 | + /* | 29 | @@ -XXX,XX +XXX,XX @@ static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src) |
75 | + * tlb_table/tlb_v_table updates from any thread must hold tlb_lock. | 30 | tcg_gen_extrh_i64_i32(ret, cpu_fpr[src / 2]); |
76 | + * However, updates from the owner thread (as is the case here; see the | ||
77 | + * above assert_cpu_is_self) do not need atomic_set because all reads | ||
78 | + * that do not hold the lock are performed by the same owner thread. | ||
79 | + */ | ||
80 | + qemu_spin_lock(&env->tlb_lock); | ||
81 | memset(env->tlb_table, -1, sizeof(env->tlb_table)); | ||
82 | memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table)); | ||
83 | + qemu_spin_unlock(&env->tlb_lock); | ||
84 | + | ||
85 | cpu_tb_jmp_cache_clear(cpu); | ||
86 | |||
87 | env->vtlb_index = 0; | ||
88 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) | ||
89 | |||
90 | tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask); | ||
91 | |||
92 | + qemu_spin_lock(&env->tlb_lock); | ||
93 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
94 | |||
95 | if (test_bit(mmu_idx, &mmu_idx_bitmask)) { | ||
96 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) | ||
97 | memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0])); | ||
98 | } | ||
99 | } | 31 | } |
100 | + qemu_spin_unlock(&env->tlb_lock); | 32 | return ret; |
101 | |||
102 | cpu_tb_jmp_cache_clear(cpu); | ||
103 | |||
104 | @@ -XXX,XX +XXX,XX @@ static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, | ||
105 | tlb_hit_page(tlb_entry->addr_code, page); | ||
106 | } | ||
107 | |||
108 | -static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong page) | ||
109 | +/* Called with tlb_lock held */ | ||
110 | +static inline void tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, | ||
111 | + target_ulong page) | ||
112 | { | ||
113 | if (tlb_hit_page_anyprot(tlb_entry, page)) { | ||
114 | memset(tlb_entry, -1, sizeof(*tlb_entry)); | ||
115 | } | ||
116 | } | ||
117 | |||
118 | -static inline void tlb_flush_vtlb_page(CPUArchState *env, int mmu_idx, | ||
119 | - target_ulong page) | ||
120 | +/* Called with tlb_lock held */ | ||
121 | +static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, | ||
122 | + target_ulong page) | ||
123 | { | ||
124 | int k; | ||
125 | + | ||
126 | + assert_cpu_is_self(ENV_GET_CPU(env)); | ||
127 | for (k = 0; k < CPU_VTLB_SIZE; k++) { | ||
128 | - tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], page); | ||
129 | + tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page); | ||
130 | } | ||
131 | } | ||
132 | |||
133 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data) | ||
134 | |||
135 | addr &= TARGET_PAGE_MASK; | ||
136 | i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
137 | + qemu_spin_lock(&env->tlb_lock); | ||
138 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
139 | - tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr); | ||
140 | - tlb_flush_vtlb_page(env, mmu_idx, addr); | ||
141 | + tlb_flush_entry_locked(&env->tlb_table[mmu_idx][i], addr); | ||
142 | + tlb_flush_vtlb_page_locked(env, mmu_idx, addr); | ||
143 | } | ||
144 | + qemu_spin_unlock(&env->tlb_lock); | ||
145 | |||
146 | tb_flush_jmp_cache(cpu, addr); | ||
147 | } | ||
148 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu, | ||
149 | tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n", | ||
150 | page, addr, mmu_idx_bitmap); | ||
151 | |||
152 | + qemu_spin_lock(&env->tlb_lock); | ||
153 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
154 | if (test_bit(mmu_idx, &mmu_idx_bitmap)) { | ||
155 | - tlb_flush_entry(&env->tlb_table[mmu_idx][page], addr); | ||
156 | - tlb_flush_vtlb_page(env, mmu_idx, addr); | ||
157 | + tlb_flush_entry_locked(&env->tlb_table[mmu_idx][page], addr); | ||
158 | + tlb_flush_vtlb_page_locked(env, mmu_idx, addr); | ||
159 | } | ||
160 | } | ||
161 | + qemu_spin_unlock(&env->tlb_lock); | ||
162 | |||
163 | tb_flush_jmp_cache(cpu, addr); | ||
164 | } | ||
165 | @@ -XXX,XX +XXX,XX @@ void tlb_unprotect_code(ram_addr_t ram_addr) | ||
166 | * most usual is detecting writes to code regions which may invalidate | ||
167 | * generated code. | ||
168 | * | ||
169 | - * Because we want other vCPUs to respond to changes straight away we | ||
170 | - * update the te->addr_write field atomically. If the TLB entry has | ||
171 | - * been changed by the vCPU in the mean time we skip the update. | ||
172 | + * Other vCPUs might be reading their TLBs during guest execution, so we update | ||
173 | + * te->addr_write with atomic_set. We don't need to worry about this for | ||
174 | + * oversized guests as MTTCG is disabled for them. | ||
175 | * | ||
176 | - * As this function uses atomic accesses we also need to ensure | ||
177 | - * updates to tlb_entries follow the same access rules. We don't need | ||
178 | - * to worry about this for oversized guests as MTTCG is disabled for | ||
179 | - * them. | ||
180 | + * Called with tlb_lock held. | ||
181 | */ | ||
182 | - | ||
183 | -static void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start, | ||
184 | - uintptr_t length) | ||
185 | +static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, | ||
186 | + uintptr_t start, uintptr_t length) | ||
187 | { | ||
188 | -#if TCG_OVERSIZED_GUEST | ||
189 | uintptr_t addr = tlb_entry->addr_write; | ||
190 | |||
191 | if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) { | ||
192 | addr &= TARGET_PAGE_MASK; | ||
193 | addr += tlb_entry->addend; | ||
194 | if ((addr - start) < length) { | ||
195 | +#if TCG_OVERSIZED_GUEST | ||
196 | tlb_entry->addr_write |= TLB_NOTDIRTY; | ||
197 | - } | ||
198 | - } | ||
199 | #else | ||
200 | - /* paired with atomic_mb_set in tlb_set_page_with_attrs */ | ||
201 | - uintptr_t orig_addr = atomic_mb_read(&tlb_entry->addr_write); | ||
202 | - uintptr_t addr = orig_addr; | ||
203 | - | ||
204 | - if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) { | ||
205 | - addr &= TARGET_PAGE_MASK; | ||
206 | - addr += atomic_read(&tlb_entry->addend); | ||
207 | - if ((addr - start) < length) { | ||
208 | - uintptr_t notdirty_addr = orig_addr | TLB_NOTDIRTY; | ||
209 | - atomic_cmpxchg(&tlb_entry->addr_write, orig_addr, notdirty_addr); | ||
210 | + atomic_set(&tlb_entry->addr_write, | ||
211 | + tlb_entry->addr_write | TLB_NOTDIRTY); | ||
212 | +#endif | ||
213 | } | ||
214 | } | ||
215 | -#endif | 33 | -#endif |
216 | } | 34 | } |
217 | 35 | ||
218 | -/* For atomic correctness when running MTTCG we need to use the right | 36 | static void gen_store_fpr_F(DisasContext *dc, unsigned int dst, TCGv_i32 v) |
219 | - * primitives when copying entries */ | ||
220 | -static inline void copy_tlb_helper(CPUTLBEntry *d, CPUTLBEntry *s, | ||
221 | - bool atomic_set) | ||
222 | +/* | ||
223 | + * Called with tlb_lock held. | ||
224 | + * Called only from the vCPU context, i.e. the TLB's owner thread. | ||
225 | + */ | ||
226 | +static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) | ||
227 | { | 37 | { |
228 | -#if TCG_OVERSIZED_GUEST | 38 | -#if TCG_TARGET_REG_BITS == 32 |
229 | *d = *s; | 39 | - if (dst & 1) { |
40 | - tcg_gen_mov_i32(TCGV_LOW(cpu_fpr[dst / 2]), v); | ||
41 | - } else { | ||
42 | - tcg_gen_mov_i32(TCGV_HIGH(cpu_fpr[dst / 2]), v); | ||
43 | - } | ||
230 | -#else | 44 | -#else |
231 | - if (atomic_set) { | 45 | - TCGv_i64 t = (TCGv_i64)v; |
232 | - d->addr_read = s->addr_read; | 46 | + TCGv_i64 t = tcg_temp_new_i64(); |
233 | - d->addr_code = s->addr_code; | 47 | + |
234 | - atomic_set(&d->addend, atomic_read(&s->addend)); | 48 | + tcg_gen_extu_i32_i64(t, v); |
235 | - /* Pairs with flag setting in tlb_reset_dirty_range */ | 49 | tcg_gen_deposit_i64(cpu_fpr[dst / 2], cpu_fpr[dst / 2], t, |
236 | - atomic_mb_set(&d->addr_write, atomic_read(&s->addr_write)); | 50 | (dst & 1 ? 0 : 32), 32); |
237 | - } else { | ||
238 | - d->addr_read = s->addr_read; | ||
239 | - d->addr_write = atomic_read(&s->addr_write); | ||
240 | - d->addr_code = s->addr_code; | ||
241 | - d->addend = atomic_read(&s->addend); | ||
242 | - } | ||
243 | -#endif | 51 | -#endif |
52 | + tcg_temp_free_i64(t); | ||
53 | gen_update_fprs_dirty(dc, dst); | ||
244 | } | 54 | } |
245 | 55 | ||
246 | /* This is a cross vCPU call (i.e. another vCPU resetting the flags of | ||
247 | - * the target vCPU). As such care needs to be taken that we don't | ||
248 | - * dangerously race with another vCPU update. The only thing actually | ||
249 | - * updated is the target TLB entry ->addr_write flags. | ||
250 | + * the target vCPU). | ||
251 | + * We must take tlb_lock to avoid racing with another vCPU update. The only | ||
252 | + * thing actually updated is the target TLB entry ->addr_write flags. | ||
253 | */ | ||
254 | void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) | ||
255 | { | ||
256 | @@ -XXX,XX +XXX,XX @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) | ||
257 | int mmu_idx; | ||
258 | |||
259 | env = cpu->env_ptr; | ||
260 | + qemu_spin_lock(&env->tlb_lock); | ||
261 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
262 | unsigned int i; | ||
263 | |||
264 | for (i = 0; i < CPU_TLB_SIZE; i++) { | ||
265 | - tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i], | ||
266 | - start1, length); | ||
267 | + tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1, | ||
268 | + length); | ||
269 | } | ||
270 | |||
271 | for (i = 0; i < CPU_VTLB_SIZE; i++) { | ||
272 | - tlb_reset_dirty_range(&env->tlb_v_table[mmu_idx][i], | ||
273 | - start1, length); | ||
274 | + tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], start1, | ||
275 | + length); | ||
276 | } | ||
277 | } | ||
278 | + qemu_spin_unlock(&env->tlb_lock); | ||
279 | } | ||
280 | |||
281 | -static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr) | ||
282 | +/* Called with tlb_lock held */ | ||
283 | +static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, | ||
284 | + target_ulong vaddr) | ||
285 | { | ||
286 | if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { | ||
287 | tlb_entry->addr_write = vaddr; | ||
288 | @@ -XXX,XX +XXX,XX @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) | ||
289 | |||
290 | vaddr &= TARGET_PAGE_MASK; | ||
291 | i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
292 | + qemu_spin_lock(&env->tlb_lock); | ||
293 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
294 | - tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr); | ||
295 | + tlb_set_dirty1_locked(&env->tlb_table[mmu_idx][i], vaddr); | ||
296 | } | ||
297 | |||
298 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
299 | int k; | ||
300 | for (k = 0; k < CPU_VTLB_SIZE; k++) { | ||
301 | - tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr); | ||
302 | + tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr); | ||
303 | } | ||
304 | } | ||
305 | + qemu_spin_unlock(&env->tlb_lock); | ||
306 | } | ||
307 | |||
308 | /* Our TLB does not support large pages, so remember the area covered by | ||
309 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, | ||
310 | addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; | ||
311 | } | ||
312 | |||
313 | - /* Make sure there's no cached translation for the new page. */ | ||
314 | - tlb_flush_vtlb_page(env, mmu_idx, vaddr_page); | ||
315 | - | ||
316 | code_address = address; | ||
317 | iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page, | ||
318 | paddr_page, xlat, prot, &address); | ||
319 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, | ||
320 | index = (vaddr_page >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
321 | te = &env->tlb_table[mmu_idx][index]; | ||
322 | |||
323 | + /* | ||
324 | + * Hold the TLB lock for the rest of the function. We could acquire/release | ||
325 | + * the lock several times in the function, but it is faster to amortize the | ||
326 | + * acquisition cost by acquiring it just once. Note that this leads to | ||
327 | + * a longer critical section, but this is not a concern since the TLB lock | ||
328 | + * is unlikely to be contended. | ||
329 | + */ | ||
330 | + qemu_spin_lock(&env->tlb_lock); | ||
331 | + | ||
332 | + /* Make sure there's no cached translation for the new page. */ | ||
333 | + tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); | ||
334 | + | ||
335 | /* | ||
336 | * Only evict the old entry to the victim tlb if it's for a | ||
337 | * different page; otherwise just overwrite the stale data. | ||
338 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, | ||
339 | CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx]; | ||
340 | |||
341 | /* Evict the old entry into the victim tlb. */ | ||
342 | - copy_tlb_helper(tv, te, true); | ||
343 | + copy_tlb_helper_locked(tv, te); | ||
344 | env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index]; | ||
345 | } | ||
346 | |||
347 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, | ||
348 | } | ||
349 | } | ||
350 | |||
351 | - /* Pairs with flag setting in tlb_reset_dirty_range */ | ||
352 | - copy_tlb_helper(te, &tn, true); | ||
353 | - /* atomic_mb_set(&te->addr_write, write_address); */ | ||
354 | + copy_tlb_helper_locked(te, &tn); | ||
355 | + qemu_spin_unlock(&env->tlb_lock); | ||
356 | } | ||
357 | |||
358 | /* Add a new TLB entry, but without specifying the memory | ||
359 | @@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, | ||
360 | size_t elt_ofs, target_ulong page) | ||
361 | { | ||
362 | size_t vidx; | ||
363 | + | ||
364 | + assert_cpu_is_self(ENV_GET_CPU(env)); | ||
365 | for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { | ||
366 | CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx]; | ||
367 | target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); | ||
368 | @@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, | ||
369 | /* Found entry in victim tlb, swap tlb and iotlb. */ | ||
370 | CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index]; | ||
371 | |||
372 | - copy_tlb_helper(&tmptlb, tlb, false); | ||
373 | - copy_tlb_helper(tlb, vtlb, true); | ||
374 | - copy_tlb_helper(vtlb, &tmptlb, true); | ||
375 | + qemu_spin_lock(&env->tlb_lock); | ||
376 | + copy_tlb_helper_locked(&tmptlb, tlb); | ||
377 | + copy_tlb_helper_locked(tlb, vtlb); | ||
378 | + copy_tlb_helper_locked(vtlb, &tmptlb); | ||
379 | + qemu_spin_unlock(&env->tlb_lock); | ||
380 | |||
381 | CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index]; | ||
382 | CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx]; | ||
383 | -- | 56 | -- |
384 | 2.17.2 | 57 | 2.34.1 |
385 | 58 | ||
386 | 59 | diff view generated by jsdifflib |
1 | Rather than test NOCHAIN before linking, do not emit the | 1 | Move the error-generating fallback from tcg-op.c, and |
---|---|---|---|
2 | goto_tb opcode at all. We already do this for goto_ptr. | 2 | replace "_link_error" with modern QEMU_ERROR markup. |
3 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | --- | 6 | --- |
6 | accel/tcg/cpu-exec.c | 2 +- | 7 | include/tcg/tcg-op.h | 33 +++++---------------------------- |
7 | tcg/tcg-op.c | 9 ++++++++- | 8 | include/tcg/tcg.h | 12 ------------ |
8 | 2 files changed, 9 insertions(+), 2 deletions(-) | 9 | tcg/tcg-internal.h | 14 ++++++++++++++ |
10 | tcg/tcg-op-vec.c | 2 ++ | ||
11 | tcg/tcg-op.c | 37 ++++++++++++++++++++++++++++--------- | ||
12 | 5 files changed, 49 insertions(+), 49 deletions(-) | ||
9 | 13 | ||
10 | diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c | 14 | diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h |
11 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/accel/tcg/cpu-exec.c | 16 | --- a/include/tcg/tcg-op.h |
13 | +++ b/accel/tcg/cpu-exec.c | 17 | +++ b/include/tcg/tcg-op.h |
14 | @@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_find(CPUState *cpu, | 18 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) |
15 | } | 19 | tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2); |
16 | #endif | 20 | } |
17 | /* See if we can patch the calling TB. */ | 21 | #else /* TCG_TARGET_REG_BITS == 32 */ |
18 | - if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { | 22 | -static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, |
19 | + if (last_tb) { | 23 | - tcg_target_long offset) |
20 | tb_add_jump(last_tb, tb_exit, tb); | 24 | -{ |
21 | } | 25 | - tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset); |
22 | return tb; | 26 | -} |
27 | +void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); | ||
28 | +void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); | ||
29 | +void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); | ||
30 | |||
31 | -static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, | ||
32 | - tcg_target_long offset) | ||
33 | -{ | ||
34 | - tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset); | ||
35 | -} | ||
36 | - | ||
37 | -static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, | ||
38 | - tcg_target_long offset) | ||
39 | -{ | ||
40 | - tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); | ||
41 | -} | ||
42 | - | ||
43 | -static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) | ||
44 | -{ | ||
45 | - tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), | ||
46 | - TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); | ||
47 | -} | ||
48 | - | ||
49 | -static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) | ||
50 | -{ | ||
51 | - tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), | ||
52 | - TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); | ||
53 | -} | ||
54 | +void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); | ||
55 | +void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); | ||
56 | |||
57 | void tcg_gen_discard_i64(TCGv_i64 arg); | ||
58 | void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg); | ||
59 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/include/tcg/tcg.h | ||
62 | +++ b/include/tcg/tcg.h | ||
63 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_vec temp_tcgv_vec(TCGTemp *t) | ||
64 | return (TCGv_vec)temp_tcgv_i32(t); | ||
65 | } | ||
66 | |||
67 | -#if TCG_TARGET_REG_BITS == 32 | ||
68 | -static inline TCGv_i32 TCGV_LOW(TCGv_i64 t) | ||
69 | -{ | ||
70 | - return temp_tcgv_i32(tcgv_i64_temp(t)); | ||
71 | -} | ||
72 | - | ||
73 | -static inline TCGv_i32 TCGV_HIGH(TCGv_i64 t) | ||
74 | -{ | ||
75 | - return temp_tcgv_i32(tcgv_i64_temp(t) + 1); | ||
76 | -} | ||
77 | -#endif | ||
78 | - | ||
79 | static inline TCGArg tcg_get_insn_param(TCGOp *op, int arg) | ||
80 | { | ||
81 | return op->args[arg]; | ||
82 | diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h | ||
83 | index XXXXXXX..XXXXXXX 100644 | ||
84 | --- a/tcg/tcg-internal.h | ||
85 | +++ b/tcg/tcg-internal.h | ||
86 | @@ -XXX,XX +XXX,XX @@ static inline unsigned tcg_call_flags(TCGOp *op) | ||
87 | return tcg_call_info(op)->flags; | ||
88 | } | ||
89 | |||
90 | +#if TCG_TARGET_REG_BITS == 32 | ||
91 | +static inline TCGv_i32 TCGV_LOW(TCGv_i64 t) | ||
92 | +{ | ||
93 | + return temp_tcgv_i32(tcgv_i64_temp(t)); | ||
94 | +} | ||
95 | +static inline TCGv_i32 TCGV_HIGH(TCGv_i64 t) | ||
96 | +{ | ||
97 | + return temp_tcgv_i32(tcgv_i64_temp(t) + 1); | ||
98 | +} | ||
99 | +#else | ||
100 | +extern TCGv_i32 TCGV_LOW(TCGv_i64) QEMU_ERROR("32-bit code path is reachable"); | ||
101 | +extern TCGv_i32 TCGV_HIGH(TCGv_i64) QEMU_ERROR("32-bit code path is reachable"); | ||
102 | +#endif | ||
103 | + | ||
104 | #endif /* TCG_INTERNAL_H */ | ||
105 | diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c | ||
106 | index XXXXXXX..XXXXXXX 100644 | ||
107 | --- a/tcg/tcg-op-vec.c | ||
108 | +++ b/tcg/tcg-op-vec.c | ||
109 | @@ -XXX,XX +XXX,XX @@ | ||
110 | #include "tcg/tcg.h" | ||
111 | #include "tcg/tcg-op.h" | ||
112 | #include "tcg/tcg-mo.h" | ||
113 | +#include "tcg-internal.h" | ||
114 | + | ||
115 | |||
116 | /* Reduce the number of ifdefs below. This assumes that all uses of | ||
117 | TCGV_HIGH and TCGV_LOW are properly protected by a conditional that | ||
23 | diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c | 118 | diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c |
24 | index XXXXXXX..XXXXXXX 100644 | 119 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/tcg/tcg-op.c | 120 | --- a/tcg/tcg-op.c |
26 | +++ b/tcg/tcg-op.c | 121 | +++ b/tcg/tcg-op.c |
27 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_exit_tb(TranslationBlock *tb, unsigned idx) | 122 | @@ -XXX,XX +XXX,XX @@ |
28 | seen this numbered exit before, via tcg_gen_goto_tb. */ | 123 | #include "tcg/tcg-op.h" |
29 | tcg_debug_assert(tcg_ctx->goto_tb_issue_mask & (1 << idx)); | 124 | #include "tcg/tcg-mo.h" |
125 | #include "exec/plugin-gen.h" | ||
126 | +#include "tcg-internal.h" | ||
127 | |||
128 | -/* Reduce the number of ifdefs below. This assumes that all uses of | ||
129 | - TCGV_HIGH and TCGV_LOW are properly protected by a conditional that | ||
130 | - the compiler can eliminate. */ | ||
131 | -#if TCG_TARGET_REG_BITS == 64 | ||
132 | -extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64); | ||
133 | -extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); | ||
134 | -#define TCGV_LOW TCGV_LOW_link_error | ||
135 | -#define TCGV_HIGH TCGV_HIGH_link_error | ||
136 | -#endif | ||
137 | |||
138 | void tcg_gen_op1(TCGOpcode opc, TCGArg a1) | ||
139 | { | ||
140 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) | ||
30 | #endif | 141 | #endif |
31 | + /* When not chaining, exit without indicating a link. */ | 142 | } |
32 | + if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { | 143 | |
33 | + val = 0; | 144 | +void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) |
34 | + } | 145 | +{ |
35 | } else { | 146 | + tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset); |
36 | /* This is an exit via the exitreq label. */ | 147 | +} |
37 | tcg_debug_assert(idx == TB_EXIT_REQUESTED); | 148 | + |
38 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_goto_tb(unsigned idx) | 149 | +void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) |
39 | tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0); | 150 | +{ |
40 | tcg_ctx->goto_tb_issue_mask |= 1 << idx; | 151 | + tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset); |
152 | +} | ||
153 | + | ||
154 | +void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) | ||
155 | +{ | ||
156 | + tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); | ||
157 | +} | ||
158 | + | ||
159 | void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) | ||
160 | { | ||
161 | #if HOST_BIG_ENDIAN | ||
162 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) | ||
41 | #endif | 163 | #endif |
42 | - tcg_gen_op1i(INDEX_op_goto_tb, idx); | ||
43 | + /* When not chaining, we simply fall through to the "fallback" exit. */ | ||
44 | + if (!qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { | ||
45 | + tcg_gen_op1i(INDEX_op_goto_tb, idx); | ||
46 | + } | ||
47 | } | 164 | } |
48 | 165 | ||
49 | void tcg_gen_lookup_and_goto_ptr(void) | 166 | +void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) |
167 | +{ | ||
168 | + tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), | ||
169 | + TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); | ||
170 | +} | ||
171 | + | ||
172 | +void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) | ||
173 | +{ | ||
174 | + tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), | ||
175 | + TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); | ||
176 | +} | ||
177 | + | ||
178 | void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) | ||
179 | { | ||
180 | tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); | ||
50 | -- | 181 | -- |
51 | 2.17.2 | 182 | 2.34.1 |
52 | 183 | ||
53 | 184 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Record the location of a TCGTemp within a larger object. | ||
1 | 2 | ||
3 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | include/tcg/tcg.h | 1 + | ||
7 | tcg/tcg.c | 3 +++ | ||
8 | 2 files changed, 4 insertions(+) | ||
9 | |||
10 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/include/tcg/tcg.h | ||
13 | +++ b/include/tcg/tcg.h | ||
14 | @@ -XXX,XX +XXX,XX @@ typedef struct TCGTemp { | ||
15 | unsigned int mem_coherent:1; | ||
16 | unsigned int mem_allocated:1; | ||
17 | unsigned int temp_allocated:1; | ||
18 | + unsigned int temp_subindex:1; | ||
19 | |||
20 | int64_t val; | ||
21 | struct TCGTemp *mem_base; | ||
22 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/tcg/tcg.c | ||
25 | +++ b/tcg/tcg.c | ||
26 | @@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, | ||
27 | ts2->mem_allocated = 1; | ||
28 | ts2->mem_base = base_ts; | ||
29 | ts2->mem_offset = offset + (1 - bigendian) * 4; | ||
30 | + ts2->temp_subindex = 1; | ||
31 | pstrcpy(buf, sizeof(buf), name); | ||
32 | pstrcat(buf, sizeof(buf), "_1"); | ||
33 | ts2->name = strdup(buf); | ||
34 | @@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) | ||
35 | ts2->base_type = TCG_TYPE_I64; | ||
36 | ts2->type = TCG_TYPE_I32; | ||
37 | ts2->temp_allocated = 1; | ||
38 | + ts2->temp_subindex = 1; | ||
39 | ts2->kind = kind; | ||
40 | } else { | ||
41 | ts->base_type = type; | ||
42 | @@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_constant_internal(TCGType type, int64_t val) | ||
43 | ts2->type = TCG_TYPE_I32; | ||
44 | ts2->kind = TEMP_CONST; | ||
45 | ts2->temp_allocated = 1; | ||
46 | + ts2->temp_subindex = 1; | ||
47 | ts2->val = val >> 32; | ||
48 | } else { | ||
49 | ts->base_type = type; | ||
50 | -- | ||
51 | 2.34.1 | ||
52 | |||
53 | diff view generated by jsdifflib |
1 | From: "Emilio G. Cota" <cota@braap.org> | 1 | The first thing that temp_sync does is check mem_coherent, |
---|---|---|---|
2 | so there's no need for the caller to do so. | ||
2 | 3 | ||
3 | We forgot to initialize n in commit 15fa08f845 ("tcg: Dynamically | 4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
4 | allocate TCGOps", 2017-12-29). | ||
5 | |||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
7 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
8 | Message-Id: <20181010144853.13005-3-cota@braap.org> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | --- | 6 | --- |
11 | tcg/tcg.c | 2 +- | 7 | tcg/tcg.c | 8 ++------ |
12 | 1 file changed, 1 insertion(+), 1 deletion(-) | 8 | 1 file changed, 2 insertions(+), 6 deletions(-) |
13 | 9 | ||
14 | diff --git a/tcg/tcg.c b/tcg/tcg.c | 10 | diff --git a/tcg/tcg.c b/tcg/tcg.c |
15 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/tcg/tcg.c | 12 | --- a/tcg/tcg.c |
17 | +++ b/tcg/tcg.c | 13 | +++ b/tcg/tcg.c |
18 | @@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) | 14 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) |
19 | 15 | ||
20 | #ifdef CONFIG_PROFILER | 16 | /* If the two inputs form one 64-bit value, try dupm_vec. */ |
21 | { | 17 | if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) { |
22 | - int n; | 18 | - if (!itsl->mem_coherent) { |
23 | + int n = 0; | 19 | - temp_sync(s, itsl, s->reserved_regs, 0, 0); |
24 | 20 | - } | |
25 | QTAILQ_FOREACH(op, &s->ops, link) { | 21 | - if (!itsh->mem_coherent) { |
26 | n++; | 22 | - temp_sync(s, itsh, s->reserved_regs, 0, 0); |
23 | - } | ||
24 | + temp_sync(s, itsl, s->reserved_regs, 0, 0); | ||
25 | + temp_sync(s, itsh, s->reserved_regs, 0, 0); | ||
26 | #if HOST_BIG_ENDIAN | ||
27 | TCGTemp *its = itsh; | ||
28 | #else | ||
27 | -- | 29 | -- |
28 | 2.17.2 | 30 | 2.34.1 |
29 | 31 | ||
30 | 32 | diff view generated by jsdifflib |
1 | From: "Emilio G. Cota" <cota@braap.org> | 1 | Allocate the first of a pair at the lower address, and the |
---|---|---|---|
2 | second of a pair at the higher address. This will make it | ||
3 | easier to find the beginning of the larger memory block. | ||
2 | 4 | ||
3 | Updates can come from other threads, so readers that do not | 5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
4 | take tlb_lock must use atomic_read to avoid undefined | ||
5 | behaviour (UB). | ||
6 | |||
7 | This completes the conversion to tlb_lock. This conversion results | ||
8 | on average in no performance loss, as the following experiments | ||
9 | (run on an Intel i7-6700K CPU @ 4.00GHz) show. | ||
10 | |||
11 | 1. aarch64 bootup+shutdown test: | ||
12 | |||
13 | - Before: | ||
14 | Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs): | ||
15 | |||
16 | 7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% ) | ||
17 | 31,574,905,303 cycles # 4.217 GHz ( +- 0.12% ) | ||
18 | 57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% ) | ||
19 | 10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% ) | ||
20 | 173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% ) | ||
21 | |||
22 | 7.504481349 seconds time elapsed ( +- 0.14% ) | ||
23 | |||
24 | - After: | ||
25 | Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs): | ||
26 | |||
27 | 7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% ) | ||
28 | 31,478,476,520 cycles # 4.218 GHz ( +- 0.07% ) | ||
29 | 57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% ) | ||
30 | 10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% ) | ||
31 | 173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% ) | ||
32 | |||
33 | 7.474970463 seconds time elapsed ( +- 0.07% ) | ||
34 | |||
35 | 2. SPEC06int: | ||
36 | SPEC06int (test set) | ||
37 | [Y axis: Speedup over master] | ||
38 | 1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+ | ||
39 | | | | ||
40 | 1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+ | ||
41 | | +++ | +++ tlb-lock-v3 (spinl|ck) | | ||
42 | | +++ | | +++ +++ | | | | ||
43 | 1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+ | ||
44 | | ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### | | ||
45 | 1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+ | ||
46 | | *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # | | ||
47 | 0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+ | ||
48 | | * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # | | ||
49 | | * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # | | ||
50 | 0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+ | ||
51 | | * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # | | ||
52 | 0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+ | ||
53 | | * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # | | ||
54 | | * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # | | ||
55 | 0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+ | ||
56 | | * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | | ||
57 | 0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+ | ||
58 | 400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean | ||
59 | |||
60 | png: https://imgur.com/a/BHzpPTW | ||
61 | |||
62 | Notes: | ||
63 | - tlb-lock-v2 corresponds to an implementation with a mutex. | ||
64 | - tlb-lock-v3 corresponds to the current implementation, i.e. | ||
65 | a spinlock and a single lock acquisition in tlb_set_page_with_attrs. | ||
66 | |||
67 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
68 | Message-Id: <20181016153840.25877-1-cota@braap.org> | ||
69 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
70 | --- | 7 | --- |
71 | accel/tcg/softmmu_template.h | 12 ++++++------ | 8 | tcg/tcg-internal.h | 4 ++-- |
72 | include/exec/cpu_ldst.h | 11 ++++++++++- | 9 | tcg/tcg.c | 58 ++++++++++++++++++++++------------------------ |
73 | include/exec/cpu_ldst_template.h | 2 +- | 10 | 2 files changed, 30 insertions(+), 32 deletions(-) |
74 | accel/tcg/cputlb.c | 19 +++++++++++++------ | ||
75 | 4 files changed, 30 insertions(+), 14 deletions(-) | ||
76 | 11 | ||
77 | diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h | 12 | diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h |
78 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
79 | --- a/accel/tcg/softmmu_template.h | 14 | --- a/tcg/tcg-internal.h |
80 | +++ b/accel/tcg/softmmu_template.h | 15 | +++ b/tcg/tcg-internal.h |
81 | @@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | 16 | @@ -XXX,XX +XXX,XX @@ static inline unsigned tcg_call_flags(TCGOp *op) |
82 | uintptr_t mmu_idx = get_mmuidx(oi); | 17 | #if TCG_TARGET_REG_BITS == 32 |
83 | uintptr_t index = tlb_index(env, mmu_idx, addr); | 18 | static inline TCGv_i32 TCGV_LOW(TCGv_i64 t) |
84 | CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | 19 | { |
85 | - target_ulong tlb_addr = entry->addr_write; | 20 | - return temp_tcgv_i32(tcgv_i64_temp(t)); |
86 | + target_ulong tlb_addr = tlb_addr_write(entry); | 21 | + return temp_tcgv_i32(tcgv_i64_temp(t) + HOST_BIG_ENDIAN); |
87 | unsigned a_bits = get_alignment_bits(get_memop(oi)); | 22 | } |
88 | uintptr_t haddr; | 23 | static inline TCGv_i32 TCGV_HIGH(TCGv_i64 t) |
89 | 24 | { | |
90 | @@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | 25 | - return temp_tcgv_i32(tcgv_i64_temp(t) + 1); |
91 | tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, | 26 | + return temp_tcgv_i32(tcgv_i64_temp(t) + !HOST_BIG_ENDIAN); |
92 | mmu_idx, retaddr); | 27 | } |
28 | #else | ||
29 | extern TCGv_i32 TCGV_LOW(TCGv_i64) QEMU_ERROR("32-bit code path is reachable"); | ||
30 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/tcg/tcg.c | ||
33 | +++ b/tcg/tcg.c | ||
34 | @@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, | ||
35 | TCGContext *s = tcg_ctx; | ||
36 | TCGTemp *base_ts = tcgv_ptr_temp(base); | ||
37 | TCGTemp *ts = tcg_global_alloc(s); | ||
38 | - int indirect_reg = 0, bigendian = 0; | ||
39 | -#if HOST_BIG_ENDIAN | ||
40 | - bigendian = 1; | ||
41 | -#endif | ||
42 | + int indirect_reg = 0; | ||
43 | |||
44 | switch (base_ts->kind) { | ||
45 | case TEMP_FIXED: | ||
46 | @@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, | ||
47 | ts->indirect_reg = indirect_reg; | ||
48 | ts->mem_allocated = 1; | ||
49 | ts->mem_base = base_ts; | ||
50 | - ts->mem_offset = offset + bigendian * 4; | ||
51 | + ts->mem_offset = offset; | ||
52 | pstrcpy(buf, sizeof(buf), name); | ||
53 | pstrcat(buf, sizeof(buf), "_0"); | ||
54 | ts->name = strdup(buf); | ||
55 | @@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, | ||
56 | ts2->indirect_reg = indirect_reg; | ||
57 | ts2->mem_allocated = 1; | ||
58 | ts2->mem_base = base_ts; | ||
59 | - ts2->mem_offset = offset + (1 - bigendian) * 4; | ||
60 | + ts2->mem_offset = offset + 4; | ||
61 | ts2->temp_subindex = 1; | ||
62 | pstrcpy(buf, sizeof(buf), name); | ||
63 | pstrcat(buf, sizeof(buf), "_1"); | ||
64 | @@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_constant_internal(TCGType type, int64_t val) | ||
65 | |||
66 | ts = g_hash_table_lookup(h, &val); | ||
67 | if (ts == NULL) { | ||
68 | + int64_t *val_ptr; | ||
69 | + | ||
70 | ts = tcg_temp_alloc(s); | ||
71 | |||
72 | if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { | ||
73 | TCGTemp *ts2 = tcg_temp_alloc(s); | ||
74 | |||
75 | + tcg_debug_assert(ts2 == ts + 1); | ||
76 | + | ||
77 | ts->base_type = TCG_TYPE_I64; | ||
78 | ts->type = TCG_TYPE_I32; | ||
79 | ts->kind = TEMP_CONST; | ||
80 | ts->temp_allocated = 1; | ||
81 | - /* | ||
82 | - * Retain the full value of the 64-bit constant in the low | ||
83 | - * part, so that the hash table works. Actual uses will | ||
84 | - * truncate the value to the low part. | ||
85 | - */ | ||
86 | - ts->val = val; | ||
87 | |||
88 | - tcg_debug_assert(ts2 == ts + 1); | ||
89 | ts2->base_type = TCG_TYPE_I64; | ||
90 | ts2->type = TCG_TYPE_I32; | ||
91 | ts2->kind = TEMP_CONST; | ||
92 | ts2->temp_allocated = 1; | ||
93 | ts2->temp_subindex = 1; | ||
94 | - ts2->val = val >> 32; | ||
95 | + | ||
96 | + /* | ||
97 | + * Retain the full value of the 64-bit constant in the low | ||
98 | + * part, so that the hash table works. Actual uses will | ||
99 | + * truncate the value to the low part. | ||
100 | + */ | ||
101 | + ts[HOST_BIG_ENDIAN].val = val; | ||
102 | + ts[!HOST_BIG_ENDIAN].val = val >> 32; | ||
103 | + val_ptr = &ts[HOST_BIG_ENDIAN].val; | ||
104 | } else { | ||
105 | ts->base_type = type; | ||
106 | ts->type = type; | ||
107 | ts->kind = TEMP_CONST; | ||
108 | ts->temp_allocated = 1; | ||
109 | ts->val = val; | ||
110 | + val_ptr = &ts->val; | ||
93 | } | 111 | } |
94 | - tlb_addr = entry->addr_write & ~TLB_INVALID_MASK; | 112 | - g_hash_table_insert(h, &ts->val, ts); |
95 | + tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; | 113 | + g_hash_table_insert(h, val_ptr, ts); |
96 | } | 114 | } |
97 | 115 | ||
98 | /* Handle an IO access. */ | 116 | return ts; |
99 | @@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | 117 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) |
100 | cannot evict the first. */ | 118 | pi = 0; |
101 | page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; | 119 | if (ret != NULL) { |
102 | entry2 = tlb_entry(env, mmu_idx, page2); | 120 | if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) { |
103 | - if (!tlb_hit_page(entry2->addr_write, page2) | 121 | -#if HOST_BIG_ENDIAN |
104 | + if (!tlb_hit_page(tlb_addr_write(entry2), page2) | 122 | - op->args[pi++] = temp_arg(ret + 1); |
105 | && !VICTIM_TLB_HIT(addr_write, page2)) { | 123 | - op->args[pi++] = temp_arg(ret); |
106 | tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, | 124 | -#else |
107 | mmu_idx, retaddr); | 125 | op->args[pi++] = temp_arg(ret); |
108 | @@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | 126 | op->args[pi++] = temp_arg(ret + 1); |
109 | uintptr_t mmu_idx = get_mmuidx(oi); | 127 | -#endif |
110 | uintptr_t index = tlb_index(env, mmu_idx, addr); | 128 | nb_rets = 2; |
111 | CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | 129 | } else { |
112 | - target_ulong tlb_addr = entry->addr_write; | 130 | op->args[pi++] = temp_arg(ret); |
113 | + target_ulong tlb_addr = tlb_addr_write(entry); | 131 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) |
114 | unsigned a_bits = get_alignment_bits(get_memop(oi)); | ||
115 | uintptr_t haddr; | ||
116 | |||
117 | @@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
118 | tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, | ||
119 | mmu_idx, retaddr); | ||
120 | } | 132 | } |
121 | - tlb_addr = entry->addr_write & ~TLB_INVALID_MASK; | 133 | |
122 | + tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; | 134 | if (TCG_TARGET_REG_BITS < 64 && is_64bit) { |
135 | - op->args[pi++] = temp_arg(args[i] + HOST_BIG_ENDIAN); | ||
136 | - op->args[pi++] = temp_arg(args[i] + !HOST_BIG_ENDIAN); | ||
137 | + op->args[pi++] = temp_arg(args[i]); | ||
138 | + op->args[pi++] = temp_arg(args[i] + 1); | ||
139 | real_args += 2; | ||
140 | continue; | ||
141 | } | ||
142 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
123 | } | 143 | } |
124 | 144 | ||
125 | /* Handle an IO access. */ | 145 | /* If the two inputs form one 64-bit value, try dupm_vec. */ |
126 | @@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | 146 | - if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) { |
127 | cannot evict the first. */ | 147 | - temp_sync(s, itsl, s->reserved_regs, 0, 0); |
128 | page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; | 148 | - temp_sync(s, itsh, s->reserved_regs, 0, 0); |
129 | entry2 = tlb_entry(env, mmu_idx, page2); | 149 | -#if HOST_BIG_ENDIAN |
130 | - if (!tlb_hit_page(entry2->addr_write, page2) | 150 | - TCGTemp *its = itsh; |
131 | + if (!tlb_hit_page(tlb_addr_write(entry2), page2) | 151 | -#else |
132 | && !VICTIM_TLB_HIT(addr_write, page2)) { | 152 | - TCGTemp *its = itsl; |
133 | tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, | 153 | -#endif |
134 | mmu_idx, retaddr); | 154 | + if (itsl->temp_subindex == HOST_BIG_ENDIAN && |
135 | diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h | 155 | + itsh->temp_subindex == !HOST_BIG_ENDIAN && |
136 | index XXXXXXX..XXXXXXX 100644 | 156 | + itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { |
137 | --- a/include/exec/cpu_ldst.h | 157 | + TCGTemp *its = itsl - HOST_BIG_ENDIAN; |
138 | +++ b/include/exec/cpu_ldst.h | ||
139 | @@ -XXX,XX +XXX,XX @@ extern __thread uintptr_t helper_retaddr; | ||
140 | /* The memory helpers for tcg-generated code need tcg_target_long etc. */ | ||
141 | #include "tcg.h" | ||
142 | |||
143 | +static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry) | ||
144 | +{ | ||
145 | +#if TCG_OVERSIZED_GUEST | ||
146 | + return entry->addr_write; | ||
147 | +#else | ||
148 | + return atomic_read(&entry->addr_write); | ||
149 | +#endif | ||
150 | +} | ||
151 | + | 158 | + |
152 | /* Find the TLB index corresponding to the mmu_idx + address pair. */ | 159 | + temp_sync(s, its + 0, s->reserved_regs, 0, 0); |
153 | static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, | 160 | + temp_sync(s, its + 1, s->reserved_regs, 0, 0); |
154 | target_ulong addr) | ||
155 | @@ -XXX,XX +XXX,XX @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, | ||
156 | tlb_addr = tlbentry->addr_read; | ||
157 | break; | ||
158 | case 1: | ||
159 | - tlb_addr = tlbentry->addr_write; | ||
160 | + tlb_addr = tlb_addr_write(tlbentry); | ||
161 | break; | ||
162 | case 2: | ||
163 | tlb_addr = tlbentry->addr_code; | ||
164 | diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h | ||
165 | index XXXXXXX..XXXXXXX 100644 | ||
166 | --- a/include/exec/cpu_ldst_template.h | ||
167 | +++ b/include/exec/cpu_ldst_template.h | ||
168 | @@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, | ||
169 | addr = ptr; | ||
170 | mmu_idx = CPU_MMU_INDEX; | ||
171 | entry = tlb_entry(env, mmu_idx, addr); | ||
172 | - if (unlikely(entry->addr_write != | ||
173 | + if (unlikely(tlb_addr_write(entry) != | ||
174 | (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { | ||
175 | oi = make_memop_idx(SHIFT, mmu_idx); | ||
176 | glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi, | ||
177 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
178 | index XXXXXXX..XXXXXXX 100644 | ||
179 | --- a/accel/tcg/cputlb.c | ||
180 | +++ b/accel/tcg/cputlb.c | ||
181 | @@ -XXX,XX +XXX,XX @@ static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, | ||
182 | target_ulong page) | ||
183 | { | ||
184 | return tlb_hit_page(tlb_entry->addr_read, page) || | ||
185 | - tlb_hit_page(tlb_entry->addr_write, page) || | ||
186 | + tlb_hit_page(tlb_addr_write(tlb_entry), page) || | ||
187 | tlb_hit_page(tlb_entry->addr_code, page); | ||
188 | } | ||
189 | |||
190 | @@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, | ||
191 | tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr); | ||
192 | |||
193 | entry = tlb_entry(env, mmu_idx, addr); | ||
194 | - tlb_addr = entry->addr_write; | ||
195 | + tlb_addr = tlb_addr_write(entry); | ||
196 | if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { | ||
197 | /* RAM access */ | ||
198 | uintptr_t haddr = addr + entry->addend; | ||
199 | @@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, | ||
200 | assert_cpu_is_self(ENV_GET_CPU(env)); | ||
201 | for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { | ||
202 | CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx]; | ||
203 | - target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); | ||
204 | + target_ulong cmp; | ||
205 | + | 161 | + |
206 | + /* elt_ofs might correspond to .addr_write, so use atomic_read */ | 162 | if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, |
207 | +#if TCG_OVERSIZED_GUEST | 163 | its->mem_base->reg, its->mem_offset)) { |
208 | + cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); | 164 | goto done; |
209 | +#else | ||
210 | + cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); | ||
211 | +#endif | ||
212 | |||
213 | if (cmp == page) { | ||
214 | /* Found entry in victim tlb, swap tlb and iotlb. */ | ||
215 | @@ -XXX,XX +XXX,XX @@ void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx, | ||
216 | uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
217 | CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
218 | |||
219 | - if (!tlb_hit(entry->addr_write, addr)) { | ||
220 | + if (!tlb_hit(tlb_addr_write(entry), addr)) { | ||
221 | /* TLB entry is for a different page */ | ||
222 | if (!VICTIM_TLB_HIT(addr_write, addr)) { | ||
223 | tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE, | ||
224 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, | ||
225 | size_t mmu_idx = get_mmuidx(oi); | ||
226 | uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
227 | CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); | ||
228 | - target_ulong tlb_addr = tlbe->addr_write; | ||
229 | + target_ulong tlb_addr = tlb_addr_write(tlbe); | ||
230 | TCGMemOp mop = get_memop(oi); | ||
231 | int a_bits = get_alignment_bits(mop); | ||
232 | int s_bits = mop & MO_SIZE; | ||
233 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, | ||
234 | tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE, | ||
235 | mmu_idx, retaddr); | ||
236 | } | ||
237 | - tlb_addr = tlbe->addr_write & ~TLB_INVALID_MASK; | ||
238 | + tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; | ||
239 | } | ||
240 | |||
241 | /* Notice an IO access or a needs-MMU-lookup access */ | ||
242 | -- | 165 | -- |
243 | 2.17.2 | 166 | 2.34.1 |
244 | 167 | ||
245 | 168 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The count is not itself an enumerator. Move it outside to | ||
2 | prevent the compiler from considering it with -Wswitch-enum. | ||
1 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | include/tcg/tcg.h | 3 ++- | ||
8 | 1 file changed, 2 insertions(+), 1 deletion(-) | ||
9 | |||
10 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/include/tcg/tcg.h | ||
13 | +++ b/include/tcg/tcg.h | ||
14 | @@ -XXX,XX +XXX,XX @@ typedef enum TCGType { | ||
15 | TCG_TYPE_V128, | ||
16 | TCG_TYPE_V256, | ||
17 | |||
18 | - TCG_TYPE_COUNT, /* number of different types */ | ||
19 | + /* Number of different types (integer not enum) */ | ||
20 | +#define TCG_TYPE_COUNT (TCG_TYPE_V256 + 1) | ||
21 | |||
22 | /* An alias for the size of the host register. */ | ||
23 | #if TCG_TARGET_REG_BITS == 32 | ||
24 | -- | ||
25 | 2.34.1 | ||
26 | |||
27 | diff view generated by jsdifflib |
1 | From: "Emilio G. Cota" <cota@braap.org> | 1 | Add a helper function for computing the size of a type. |
---|---|---|---|
2 | 2 | ||
3 | Paves the way for the addition of a per-TLB lock. | 3 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
4 | |||
5 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
8 | Message-Id: <20181009174557.16125-4-cota@braap.org> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | --- | 5 | --- |
11 | include/exec/exec-all.h | 8 ++++++++ | 6 | include/tcg/tcg.h | 16 ++++++++++++++++ |
12 | accel/tcg/cputlb.c | 4 ++++ | 7 | tcg/tcg.c | 27 ++++++++++++--------------- |
13 | exec.c | 1 + | 8 | 2 files changed, 28 insertions(+), 15 deletions(-) |
14 | 3 files changed, 13 insertions(+) | ||
15 | 9 | ||
16 | diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h | 10 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h |
17 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/include/exec/exec-all.h | 12 | --- a/include/tcg/tcg.h |
19 | +++ b/include/exec/exec-all.h | 13 | +++ b/include/tcg/tcg.h |
20 | @@ -XXX,XX +XXX,XX @@ void cpu_address_space_init(CPUState *cpu, int asidx, | 14 | @@ -XXX,XX +XXX,XX @@ typedef enum TCGType { |
21 | 15 | #endif | |
22 | #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) | 16 | } TCGType; |
23 | /* cputlb.c */ | 17 | |
24 | +/** | 18 | +/** |
25 | + * tlb_init - initialize a CPU's TLB | 19 | + * tcg_type_size |
26 | + * @cpu: CPU whose TLB should be initialized | 20 | + * @t: type |
21 | + * | ||
22 | + * Return the size of the type in bytes. | ||
27 | + */ | 23 | + */ |
28 | +void tlb_init(CPUState *cpu); | 24 | +static inline int tcg_type_size(TCGType t) |
29 | /** | ||
30 | * tlb_flush_page: | ||
31 | * @cpu: CPU whose TLB should be flushed | ||
32 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr, | ||
33 | void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx, | ||
34 | uintptr_t retaddr); | ||
35 | #else | ||
36 | +static inline void tlb_init(CPUState *cpu) | ||
37 | +{ | 25 | +{ |
38 | +} | 26 | + unsigned i = t; |
39 | static inline void tlb_flush_page(CPUState *cpu, target_ulong addr) | 27 | + if (i >= TCG_TYPE_V64) { |
40 | { | 28 | + tcg_debug_assert(i < TCG_TYPE_COUNT); |
41 | } | 29 | + i -= TCG_TYPE_V64 - 1; |
42 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | 30 | + } |
43 | index XXXXXXX..XXXXXXX 100644 | 31 | + return 4 << i; |
44 | --- a/accel/tcg/cputlb.c | ||
45 | +++ b/accel/tcg/cputlb.c | ||
46 | @@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); | ||
47 | QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); | ||
48 | #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) | ||
49 | |||
50 | +void tlb_init(CPUState *cpu) | ||
51 | +{ | ||
52 | +} | 32 | +} |
53 | + | 33 | + |
54 | /* flush_all_helper: run fn across all cpus | 34 | /** |
55 | * | 35 | * get_alignment_bits |
56 | * If the wait flag is set then the src cpu's helper will be queued as | 36 | * @memop: MemOp value |
57 | diff --git a/exec.c b/exec.c | 37 | diff --git a/tcg/tcg.c b/tcg/tcg.c |
58 | index XXXXXXX..XXXXXXX 100644 | 38 | index XXXXXXX..XXXXXXX 100644 |
59 | --- a/exec.c | 39 | --- a/tcg/tcg.c |
60 | +++ b/exec.c | 40 | +++ b/tcg/tcg.c |
61 | @@ -XXX,XX +XXX,XX @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp) | 41 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) |
62 | tcg_target_initialized = true; | 42 | |
63 | cc->tcg_initialize(); | 43 | static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) |
64 | } | 44 | { |
65 | + tlb_init(cpu); | 45 | - intptr_t off, size, align; |
66 | 46 | + int size = tcg_type_size(ts->type); | |
67 | #ifndef CONFIG_USER_ONLY | 47 | + int align; |
68 | if (qdev_get_vmsd(DEVICE(cpu)) == NULL) { | 48 | + intptr_t off; |
49 | |||
50 | switch (ts->type) { | ||
51 | case TCG_TYPE_I32: | ||
52 | - size = align = 4; | ||
53 | + align = 4; | ||
54 | break; | ||
55 | case TCG_TYPE_I64: | ||
56 | case TCG_TYPE_V64: | ||
57 | - size = align = 8; | ||
58 | + align = 8; | ||
59 | break; | ||
60 | case TCG_TYPE_V128: | ||
61 | - size = align = 16; | ||
62 | - break; | ||
63 | case TCG_TYPE_V256: | ||
64 | /* Note that we do not require aligned storage for V256. */ | ||
65 | - size = 32, align = 16; | ||
66 | + align = 16; | ||
67 | break; | ||
68 | default: | ||
69 | g_assert_not_reached(); | ||
70 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) | ||
71 | TCGRegSet dup_out_regs, dup_in_regs; | ||
72 | TCGTemp *its, *ots; | ||
73 | TCGType itype, vtype; | ||
74 | - intptr_t endian_fixup; | ||
75 | unsigned vece; | ||
76 | + int lowpart_ofs; | ||
77 | bool ok; | ||
78 | |||
79 | ots = arg_temp(op->args[0]); | ||
80 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) | ||
81 | /* fall through */ | ||
82 | |||
83 | case TEMP_VAL_MEM: | ||
84 | -#if HOST_BIG_ENDIAN | ||
85 | - endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8; | ||
86 | - endian_fixup -= 1 << vece; | ||
87 | -#else | ||
88 | - endian_fixup = 0; | ||
89 | -#endif | ||
90 | - /* Attempt to dup directly from the input memory slot. */ | ||
91 | + lowpart_ofs = 0; | ||
92 | + if (HOST_BIG_ENDIAN) { | ||
93 | + lowpart_ofs = tcg_type_size(itype) - (1 << vece); | ||
94 | + } | ||
95 | if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, | ||
96 | - its->mem_offset + endian_fixup)) { | ||
97 | + its->mem_offset + lowpart_ofs)) { | ||
98 | goto done; | ||
99 | } | ||
100 | /* Load the input into the destination vector register. */ | ||
69 | -- | 101 | -- |
70 | 2.17.2 | 102 | 2.34.1 |
71 | 103 | ||
72 | 104 | diff view generated by jsdifflib |
1 | Reviewed-by: Emilio G. Cota <cota@braap.org> | 1 | Prepare to replace a bunch of separate ifdefs with a |
---|---|---|---|
2 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | 2 | consistent way to describe the ABI of a function call. |
3 | |||
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | --- | 7 | --- |
5 | target/arm/helper-a64.c | 16 ++++------------ | 8 | tcg/tcg-internal.h | 15 +++++++++++++++ |
6 | target/arm/translate-a64.c | 38 ++++++++++++++++++++++---------------- | 9 | 1 file changed, 15 insertions(+) |
7 | 2 files changed, 26 insertions(+), 28 deletions(-) | ||
8 | 10 | ||
9 | diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c | 11 | diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h |
10 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/target/arm/helper-a64.c | 13 | --- a/tcg/tcg-internal.h |
12 | +++ b/target/arm/helper-a64.c | 14 | +++ b/tcg/tcg-internal.h |
13 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr, | ||
14 | int mem_idx; | ||
15 | TCGMemOpIdx oi; | ||
16 | |||
17 | - if (!HAVE_CMPXCHG128) { | ||
18 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
19 | - } | ||
20 | + assert(HAVE_CMPXCHG128); | ||
21 | |||
22 | mem_idx = cpu_mmu_index(env, false); | ||
23 | oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
24 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr, | ||
25 | int mem_idx; | ||
26 | TCGMemOpIdx oi; | ||
27 | |||
28 | - if (!HAVE_CMPXCHG128) { | ||
29 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
30 | - } | ||
31 | + assert(HAVE_CMPXCHG128); | ||
32 | |||
33 | mem_idx = cpu_mmu_index(env, false); | ||
34 | oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, | ||
36 | int mem_idx; | ||
37 | TCGMemOpIdx oi; | ||
38 | |||
39 | - if (!HAVE_CMPXCHG128) { | ||
40 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
41 | - } | ||
42 | + assert(HAVE_CMPXCHG128); | ||
43 | |||
44 | mem_idx = cpu_mmu_index(env, false); | ||
45 | oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, | ||
47 | int mem_idx; | ||
48 | TCGMemOpIdx oi; | ||
49 | |||
50 | - if (!HAVE_CMPXCHG128) { | ||
51 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
52 | - } | ||
53 | + assert(HAVE_CMPXCHG128); | ||
54 | |||
55 | mem_idx = cpu_mmu_index(env, false); | ||
56 | oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
57 | diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/target/arm/translate-a64.c | ||
60 | +++ b/target/arm/translate-a64.c | ||
61 | @@ -XXX,XX +XXX,XX @@ | 15 | @@ -XXX,XX +XXX,XX @@ |
62 | 16 | ||
63 | #include "trace-tcg.h" | 17 | #define TCG_HIGHWATER 1024 |
64 | #include "translate-a64.h" | 18 | |
65 | +#include "qemu/atomic128.h" | 19 | +/* |
66 | 20 | + * Describe the calling convention of a given argument type. | |
67 | static TCGv_i64 cpu_X[32]; | 21 | + */ |
68 | static TCGv_i64 cpu_pc; | 22 | +typedef enum { |
69 | @@ -XXX,XX +XXX,XX @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, | 23 | + TCG_CALL_RET_NORMAL, /* by registers */ |
70 | get_mem_index(s), | 24 | +} TCGCallReturnKind; |
71 | MO_64 | MO_ALIGN | s->be_data); | 25 | + |
72 | tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); | 26 | +typedef enum { |
73 | - } else if (s->be_data == MO_LE) { | 27 | + TCG_CALL_ARG_NORMAL, /* by registers (continuing onto stack) */ |
74 | - if (tb_cflags(s->base.tb) & CF_PARALLEL) { | 28 | + TCG_CALL_ARG_EVEN, /* like normal, but skipping odd slots */ |
75 | + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { | 29 | + TCG_CALL_ARG_EXTEND, /* for i32, as a sign/zero-extended i64 */ |
76 | + if (!HAVE_CMPXCHG128) { | 30 | + TCG_CALL_ARG_EXTEND_U, /* ... as a zero-extended i64 */ |
77 | + gen_helper_exit_atomic(cpu_env); | 31 | + TCG_CALL_ARG_EXTEND_S, /* ... as a sign-extended i64 */ |
78 | + s->base.is_jmp = DISAS_NORETURN; | 32 | +} TCGCallArgumentKind; |
79 | + } else if (s->be_data == MO_LE) { | 33 | + |
80 | gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env, | 34 | typedef struct TCGHelperInfo { |
81 | cpu_exclusive_addr, | 35 | void *func; |
82 | cpu_reg(s, rt), | 36 | const char *name; |
83 | cpu_reg(s, rt2)); | ||
84 | } else { | ||
85 | - gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr, | ||
86 | - cpu_reg(s, rt), cpu_reg(s, rt2)); | ||
87 | - } | ||
88 | - } else { | ||
89 | - if (tb_cflags(s->base.tb) & CF_PARALLEL) { | ||
90 | gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env, | ||
91 | cpu_exclusive_addr, | ||
92 | cpu_reg(s, rt), | ||
93 | cpu_reg(s, rt2)); | ||
94 | - } else { | ||
95 | - gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr, | ||
96 | - cpu_reg(s, rt), cpu_reg(s, rt2)); | ||
97 | } | ||
98 | + } else if (s->be_data == MO_LE) { | ||
99 | + gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr, | ||
100 | + cpu_reg(s, rt), cpu_reg(s, rt2)); | ||
101 | + } else { | ||
102 | + gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr, | ||
103 | + cpu_reg(s, rt), cpu_reg(s, rt2)); | ||
104 | } | ||
105 | } else { | ||
106 | tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, | ||
107 | @@ -XXX,XX +XXX,XX @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, | ||
108 | } | ||
109 | tcg_temp_free_i64(cmp); | ||
110 | } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { | ||
111 | - TCGv_i32 tcg_rs = tcg_const_i32(rs); | ||
112 | - | ||
113 | - if (s->be_data == MO_LE) { | ||
114 | - gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); | ||
115 | + if (HAVE_CMPXCHG128) { | ||
116 | + TCGv_i32 tcg_rs = tcg_const_i32(rs); | ||
117 | + if (s->be_data == MO_LE) { | ||
118 | + gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); | ||
119 | + } else { | ||
120 | + gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); | ||
121 | + } | ||
122 | + tcg_temp_free_i32(tcg_rs); | ||
123 | } else { | ||
124 | - gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); | ||
125 | + gen_helper_exit_atomic(cpu_env); | ||
126 | + s->base.is_jmp = DISAS_NORETURN; | ||
127 | } | ||
128 | - tcg_temp_free_i32(tcg_rs); | ||
129 | } else { | ||
130 | TCGv_i64 d1 = tcg_temp_new_i64(); | ||
131 | TCGv_i64 d2 = tcg_temp_new_i64(); | ||
132 | -- | 37 | -- |
133 | 2.17.2 | 38 | 2.34.1 |
134 | 39 | ||
135 | 40 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | For 32-bit hosts when TCG_TARGET_CALL_ALIGN_ARGS was set, use | |
2 | TCG_CALL_ARG_EVEN. For 64-bit hosts, TCG_TARGET_CALL_ALIGN_ARGS | ||
3 | was silently ignored, so always use TCG_CALL_ARG_NORMAL. | ||
4 | |||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/aarch64/tcg-target.h | 2 +- | ||
9 | tcg/arm/tcg-target.h | 2 +- | ||
10 | tcg/i386/tcg-target.h | 1 + | ||
11 | tcg/loongarch64/tcg-target.h | 2 +- | ||
12 | tcg/mips/tcg-target.h | 3 ++- | ||
13 | tcg/riscv/tcg-target.h | 6 +++++- | ||
14 | tcg/s390x/tcg-target.h | 1 + | ||
15 | tcg/sparc64/tcg-target.h | 1 + | ||
16 | tcg/tci/tcg-target.h | 5 +++++ | ||
17 | tcg/tcg.c | 6 ++++-- | ||
18 | tcg/ppc/tcg-target.c.inc | 21 ++++++++------------- | ||
19 | 11 files changed, 30 insertions(+), 20 deletions(-) | ||
20 | |||
21 | diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/tcg/aarch64/tcg-target.h | ||
24 | +++ b/tcg/aarch64/tcg-target.h | ||
25 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
26 | /* used for function call generation */ | ||
27 | #define TCG_REG_CALL_STACK TCG_REG_SP | ||
28 | #define TCG_TARGET_STACK_ALIGN 16 | ||
29 | -#define TCG_TARGET_CALL_ALIGN_ARGS 1 | ||
30 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
31 | +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
32 | |||
33 | /* optional instructions */ | ||
34 | #define TCG_TARGET_HAS_div_i32 1 | ||
35 | diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h | ||
36 | index XXXXXXX..XXXXXXX 100644 | ||
37 | --- a/tcg/arm/tcg-target.h | ||
38 | +++ b/tcg/arm/tcg-target.h | ||
39 | @@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions; | ||
40 | |||
41 | /* used for function call generation */ | ||
42 | #define TCG_TARGET_STACK_ALIGN 8 | ||
43 | -#define TCG_TARGET_CALL_ALIGN_ARGS 1 | ||
44 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
45 | +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
46 | |||
47 | /* optional instructions */ | ||
48 | #define TCG_TARGET_HAS_ext8s_i32 1 | ||
49 | diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/tcg/i386/tcg-target.h | ||
52 | +++ b/tcg/i386/tcg-target.h | ||
53 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
54 | #else | ||
55 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
56 | #endif | ||
57 | +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
58 | |||
59 | extern bool have_bmi1; | ||
60 | extern bool have_popcnt; | ||
61 | diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h | ||
62 | index XXXXXXX..XXXXXXX 100644 | ||
63 | --- a/tcg/loongarch64/tcg-target.h | ||
64 | +++ b/tcg/loongarch64/tcg-target.h | ||
65 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
66 | /* used for function call generation */ | ||
67 | #define TCG_REG_CALL_STACK TCG_REG_SP | ||
68 | #define TCG_TARGET_STACK_ALIGN 16 | ||
69 | -#define TCG_TARGET_CALL_ALIGN_ARGS 1 | ||
70 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
71 | +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
72 | |||
73 | /* optional instructions */ | ||
74 | #define TCG_TARGET_HAS_movcond_i32 0 | ||
75 | diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h | ||
76 | index XXXXXXX..XXXXXXX 100644 | ||
77 | --- a/tcg/mips/tcg-target.h | ||
78 | +++ b/tcg/mips/tcg-target.h | ||
79 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
80 | #define TCG_TARGET_STACK_ALIGN 16 | ||
81 | #if _MIPS_SIM == _ABIO32 | ||
82 | # define TCG_TARGET_CALL_STACK_OFFSET 16 | ||
83 | +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
84 | #else | ||
85 | # define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
86 | +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
87 | #endif | ||
88 | -#define TCG_TARGET_CALL_ALIGN_ARGS 1 | ||
89 | |||
90 | /* MOVN/MOVZ instructions detection */ | ||
91 | #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \ | ||
92 | diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h | ||
93 | index XXXXXXX..XXXXXXX 100644 | ||
94 | --- a/tcg/riscv/tcg-target.h | ||
95 | +++ b/tcg/riscv/tcg-target.h | ||
96 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
97 | /* used for function call generation */ | ||
98 | #define TCG_REG_CALL_STACK TCG_REG_SP | ||
99 | #define TCG_TARGET_STACK_ALIGN 16 | ||
100 | -#define TCG_TARGET_CALL_ALIGN_ARGS 1 | ||
101 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
102 | +#if TCG_TARGET_REG_BITS == 32 | ||
103 | +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
104 | +#else | ||
105 | +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
106 | +#endif | ||
107 | |||
108 | /* optional instructions */ | ||
109 | #define TCG_TARGET_HAS_movcond_i32 0 | ||
110 | diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h | ||
111 | index XXXXXXX..XXXXXXX 100644 | ||
112 | --- a/tcg/s390x/tcg-target.h | ||
113 | +++ b/tcg/s390x/tcg-target.h | ||
114 | @@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3]; | ||
115 | /* used for function call generation */ | ||
116 | #define TCG_TARGET_STACK_ALIGN 8 | ||
117 | #define TCG_TARGET_CALL_STACK_OFFSET 160 | ||
118 | +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
119 | |||
120 | #define TCG_TARGET_EXTEND_ARGS 1 | ||
121 | #define TCG_TARGET_HAS_MEMORY_BSWAP 1 | ||
122 | diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h | ||
123 | index XXXXXXX..XXXXXXX 100644 | ||
124 | --- a/tcg/sparc64/tcg-target.h | ||
125 | +++ b/tcg/sparc64/tcg-target.h | ||
126 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
127 | #define TCG_TARGET_STACK_ALIGN 16 | ||
128 | #define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS) | ||
129 | #define TCG_TARGET_EXTEND_ARGS 1 | ||
130 | +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
131 | |||
132 | #if defined(__VIS__) && __VIS__ >= 0x300 | ||
133 | #define use_vis3_instructions 1 | ||
134 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | ||
135 | index XXXXXXX..XXXXXXX 100644 | ||
136 | --- a/tcg/tci/tcg-target.h | ||
137 | +++ b/tcg/tci/tcg-target.h | ||
138 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
139 | /* Used for function call generation. */ | ||
140 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
141 | #define TCG_TARGET_STACK_ALIGN 8 | ||
142 | +#if TCG_TARGET_REG_BITS == 32 | ||
143 | +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
144 | +#else | ||
145 | +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
146 | +#endif | ||
147 | |||
148 | #define HAVE_TCG_QEMU_TB_EXEC | ||
149 | #define TCG_TARGET_NEED_POOL_LABELS | ||
150 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
151 | index XXXXXXX..XXXXXXX 100644 | ||
152 | --- a/tcg/tcg.c | ||
153 | +++ b/tcg/tcg.c | ||
154 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
155 | * for passing off to ffi_call. | ||
156 | */ | ||
157 | want_align = true; | ||
158 | -#elif defined(TCG_TARGET_CALL_ALIGN_ARGS) | ||
159 | +#else | ||
160 | /* Some targets want aligned 64 bit args */ | ||
161 | - want_align = is_64bit; | ||
162 | + if (is_64bit) { | ||
163 | + want_align = TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN; | ||
164 | + } | ||
165 | #endif | ||
166 | |||
167 | if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) { | ||
168 | diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc | ||
169 | index XXXXXXX..XXXXXXX 100644 | ||
170 | --- a/tcg/ppc/tcg-target.c.inc | ||
171 | +++ b/tcg/ppc/tcg-target.c.inc | ||
172 | @@ -XXX,XX +XXX,XX @@ | ||
173 | #endif | ||
174 | |||
175 | #ifdef _CALL_SYSV | ||
176 | -# define TCG_TARGET_CALL_ALIGN_ARGS 1 | ||
177 | +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
178 | +#else | ||
179 | +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
180 | #endif | ||
181 | |||
182 | /* For some memory operations, we need a scratch that isn't R0. For the AIX | ||
183 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
184 | lo = lb->addrlo_reg; | ||
185 | hi = lb->addrhi_reg; | ||
186 | if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { | ||
187 | -#ifdef TCG_TARGET_CALL_ALIGN_ARGS | ||
188 | - arg |= 1; | ||
189 | -#endif | ||
190 | + arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); | ||
191 | tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); | ||
192 | tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); | ||
193 | } else { | ||
194 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
195 | lo = lb->addrlo_reg; | ||
196 | hi = lb->addrhi_reg; | ||
197 | if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { | ||
198 | -#ifdef TCG_TARGET_CALL_ALIGN_ARGS | ||
199 | - arg |= 1; | ||
200 | -#endif | ||
201 | + arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); | ||
202 | tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); | ||
203 | tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); | ||
204 | } else { | ||
205 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
206 | if (TCG_TARGET_REG_BITS == 32) { | ||
207 | switch (s_bits) { | ||
208 | case MO_64: | ||
209 | -#ifdef TCG_TARGET_CALL_ALIGN_ARGS | ||
210 | - arg |= 1; | ||
211 | -#endif | ||
212 | + arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); | ||
213 | tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); | ||
214 | /* FALLTHRU */ | ||
215 | case MO_32: | ||
216 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) | ||
217 | |||
218 | if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { | ||
219 | TCGReg arg = TCG_REG_R4; | ||
220 | -#ifdef TCG_TARGET_CALL_ALIGN_ARGS | ||
221 | - arg |= 1; | ||
222 | -#endif | ||
223 | + | ||
224 | + arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); | ||
225 | if (l->addrlo_reg != arg) { | ||
226 | tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg); | ||
227 | tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg); | ||
228 | -- | ||
229 | 2.34.1 | ||
230 | |||
231 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | For 64-bit hosts that had TCG_TARGET_EXTEND_ARGS, set | |
2 | TCG_TARGET_CALL_ARG_I32 to TCG_CALL_ARG_EXTEND. | ||
3 | Otherwise, use TCG_CALL_ARG_NORMAL. | ||
4 | |||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/aarch64/tcg-target.h | 1 + | ||
9 | tcg/arm/tcg-target.h | 1 + | ||
10 | tcg/i386/tcg-target.h | 1 + | ||
11 | tcg/loongarch64/tcg-target.h | 1 + | ||
12 | tcg/mips/tcg-target.h | 1 + | ||
13 | tcg/riscv/tcg-target.h | 1 + | ||
14 | tcg/s390x/tcg-target.h | 2 +- | ||
15 | tcg/sparc64/tcg-target.h | 2 +- | ||
16 | tcg/tci/tcg-target.h | 1 + | ||
17 | tcg/tcg.c | 42 ++++++++++++++++++------------------ | ||
18 | tcg/ppc/tcg-target.c.inc | 6 +++++- | ||
19 | 11 files changed, 35 insertions(+), 24 deletions(-) | ||
20 | |||
21 | diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/tcg/aarch64/tcg-target.h | ||
24 | +++ b/tcg/aarch64/tcg-target.h | ||
25 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
26 | #define TCG_REG_CALL_STACK TCG_REG_SP | ||
27 | #define TCG_TARGET_STACK_ALIGN 16 | ||
28 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
29 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
30 | #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
31 | |||
32 | /* optional instructions */ | ||
33 | diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/tcg/arm/tcg-target.h | ||
36 | +++ b/tcg/arm/tcg-target.h | ||
37 | @@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions; | ||
38 | /* used for function call generation */ | ||
39 | #define TCG_TARGET_STACK_ALIGN 8 | ||
40 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
41 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
42 | #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
43 | |||
44 | /* optional instructions */ | ||
45 | diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/tcg/i386/tcg-target.h | ||
48 | +++ b/tcg/i386/tcg-target.h | ||
49 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
50 | #else | ||
51 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
52 | #endif | ||
53 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
54 | #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
55 | |||
56 | extern bool have_bmi1; | ||
57 | diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/tcg/loongarch64/tcg-target.h | ||
60 | +++ b/tcg/loongarch64/tcg-target.h | ||
61 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
62 | #define TCG_REG_CALL_STACK TCG_REG_SP | ||
63 | #define TCG_TARGET_STACK_ALIGN 16 | ||
64 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
65 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
66 | #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
67 | |||
68 | /* optional instructions */ | ||
69 | diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h | ||
70 | index XXXXXXX..XXXXXXX 100644 | ||
71 | --- a/tcg/mips/tcg-target.h | ||
72 | +++ b/tcg/mips/tcg-target.h | ||
73 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
74 | # define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
75 | # define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
76 | #endif | ||
77 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
78 | |||
79 | /* MOVN/MOVZ instructions detection */ | ||
80 | #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \ | ||
81 | diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/tcg/riscv/tcg-target.h | ||
84 | +++ b/tcg/riscv/tcg-target.h | ||
85 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
86 | #define TCG_REG_CALL_STACK TCG_REG_SP | ||
87 | #define TCG_TARGET_STACK_ALIGN 16 | ||
88 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
89 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
90 | #if TCG_TARGET_REG_BITS == 32 | ||
91 | #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
92 | #else | ||
93 | diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/tcg/s390x/tcg-target.h | ||
96 | +++ b/tcg/s390x/tcg-target.h | ||
97 | @@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3]; | ||
98 | /* used for function call generation */ | ||
99 | #define TCG_TARGET_STACK_ALIGN 8 | ||
100 | #define TCG_TARGET_CALL_STACK_OFFSET 160 | ||
101 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND | ||
102 | #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
103 | |||
104 | -#define TCG_TARGET_EXTEND_ARGS 1 | ||
105 | #define TCG_TARGET_HAS_MEMORY_BSWAP 1 | ||
106 | |||
107 | #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) | ||
108 | diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h | ||
109 | index XXXXXXX..XXXXXXX 100644 | ||
110 | --- a/tcg/sparc64/tcg-target.h | ||
111 | +++ b/tcg/sparc64/tcg-target.h | ||
112 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
113 | #define TCG_TARGET_STACK_BIAS 2047 | ||
114 | #define TCG_TARGET_STACK_ALIGN 16 | ||
115 | #define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS) | ||
116 | -#define TCG_TARGET_EXTEND_ARGS 1 | ||
117 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND | ||
118 | #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
119 | |||
120 | #if defined(__VIS__) && __VIS__ >= 0x300 | ||
121 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | ||
122 | index XXXXXXX..XXXXXXX 100644 | ||
123 | --- a/tcg/tci/tcg-target.h | ||
124 | +++ b/tcg/tci/tcg-target.h | ||
125 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
126 | /* Used for function call generation. */ | ||
127 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
128 | #define TCG_TARGET_STACK_ALIGN 8 | ||
129 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
130 | #if TCG_TARGET_REG_BITS == 32 | ||
131 | # define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
132 | #else | ||
133 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
134 | index XXXXXXX..XXXXXXX 100644 | ||
135 | --- a/tcg/tcg.c | ||
136 | +++ b/tcg/tcg.c | ||
137 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
138 | } | ||
139 | #endif | ||
140 | |||
141 | -#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 | ||
142 | - for (i = 0; i < nargs; ++i) { | ||
143 | - int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
144 | - bool is_32bit = (argtype & ~1) == dh_typecode_i32; | ||
145 | - bool is_signed = argtype & 1; | ||
146 | + if (TCG_TARGET_CALL_ARG_I32 == TCG_CALL_ARG_EXTEND) { | ||
147 | + for (i = 0; i < nargs; ++i) { | ||
148 | + int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
149 | + bool is_32bit = (argtype & ~1) == dh_typecode_i32; | ||
150 | + bool is_signed = argtype & 1; | ||
151 | |||
152 | - if (is_32bit) { | ||
153 | - TCGv_i64 temp = tcg_temp_new_i64(); | ||
154 | - TCGv_i32 orig = temp_tcgv_i32(args[i]); | ||
155 | - if (is_signed) { | ||
156 | - tcg_gen_ext_i32_i64(temp, orig); | ||
157 | - } else { | ||
158 | - tcg_gen_extu_i32_i64(temp, orig); | ||
159 | + if (is_32bit) { | ||
160 | + TCGv_i64 temp = tcg_temp_new_i64(); | ||
161 | + TCGv_i32 orig = temp_tcgv_i32(args[i]); | ||
162 | + if (is_signed) { | ||
163 | + tcg_gen_ext_i32_i64(temp, orig); | ||
164 | + } else { | ||
165 | + tcg_gen_extu_i32_i64(temp, orig); | ||
166 | + } | ||
167 | + args[i] = tcgv_i64_temp(temp); | ||
168 | } | ||
169 | - args[i] = tcgv_i64_temp(temp); | ||
170 | } | ||
171 | } | ||
172 | -#endif /* TCG_TARGET_EXTEND_ARGS */ | ||
173 | |||
174 | op = tcg_emit_op(INDEX_op_call); | ||
175 | |||
176 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
177 | tcg_debug_assert(TCGOP_CALLI(op) == real_args); | ||
178 | tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); | ||
179 | |||
180 | -#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 | ||
181 | - for (i = 0; i < nargs; ++i) { | ||
182 | - int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
183 | - bool is_32bit = (argtype & ~1) == dh_typecode_i32; | ||
184 | + if (TCG_TARGET_CALL_ARG_I32 == TCG_CALL_ARG_EXTEND) { | ||
185 | + for (i = 0; i < nargs; ++i) { | ||
186 | + int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
187 | + bool is_32bit = (argtype & ~1) == dh_typecode_i32; | ||
188 | |||
189 | - if (is_32bit) { | ||
190 | - tcg_temp_free_internal(args[i]); | ||
191 | + if (is_32bit) { | ||
192 | + tcg_temp_free_internal(args[i]); | ||
193 | + } | ||
194 | } | ||
195 | } | ||
196 | -#endif /* TCG_TARGET_EXTEND_ARGS */ | ||
197 | } | ||
198 | |||
199 | static void tcg_reg_alloc_start(TCGContext *s) | ||
200 | diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc | ||
201 | index XXXXXXX..XXXXXXX 100644 | ||
202 | --- a/tcg/ppc/tcg-target.c.inc | ||
203 | +++ b/tcg/ppc/tcg-target.c.inc | ||
204 | @@ -XXX,XX +XXX,XX @@ | ||
205 | # endif | ||
206 | #endif | ||
207 | |||
208 | +#if TCG_TARGET_REG_BITS == 64 | ||
209 | +# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND | ||
210 | +#else | ||
211 | +# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
212 | +#endif | ||
213 | #ifdef _CALL_SYSV | ||
214 | # define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
215 | #else | ||
216 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count) | ||
217 | |||
218 | /* Parameters for function call generation, used in tcg.c. */ | ||
219 | #define TCG_TARGET_STACK_ALIGN 16 | ||
220 | -#define TCG_TARGET_EXTEND_ARGS 1 | ||
221 | |||
222 | #ifdef _CALL_AIX | ||
223 | # define LINK_AREA_SIZE (6 * SZR) | ||
224 | -- | ||
225 | 2.34.1 | ||
226 | |||
227 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Change 32-bit tci TCG_TARGET_CALL_ARG_I32 to TCG_CALL_ARG_EVEN, to | ||
2 | force 32-bit values to be aligned to 64-bit. With a small reorg | ||
3 | to the argument processing loop, this neatly replaces an ifdef for | ||
4 | CONFIG_TCG_INTERPRETER. | ||
1 | 5 | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | tcg/tci/tcg-target.h | 3 +- | ||
10 | tcg/tcg.c | 70 ++++++++++++++++++++++++++++---------------- | ||
11 | 2 files changed, 47 insertions(+), 26 deletions(-) | ||
12 | |||
13 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tcg/tci/tcg-target.h | ||
16 | +++ b/tcg/tci/tcg-target.h | ||
17 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
18 | /* Used for function call generation. */ | ||
19 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
20 | #define TCG_TARGET_STACK_ALIGN 8 | ||
21 | -#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
22 | #if TCG_TARGET_REG_BITS == 32 | ||
23 | +# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EVEN | ||
24 | # define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
25 | #else | ||
26 | +# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
27 | # define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
28 | #endif | ||
29 | |||
30 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/tcg/tcg.c | ||
33 | +++ b/tcg/tcg.c | ||
34 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
35 | real_args = 0; | ||
36 | for (i = 0; i < nargs; i++) { | ||
37 | int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
38 | - bool is_64bit = (argtype & ~1) == dh_typecode_i64; | ||
39 | - bool want_align = false; | ||
40 | + TCGCallArgumentKind kind; | ||
41 | + TCGType type; | ||
42 | |||
43 | -#if defined(CONFIG_TCG_INTERPRETER) | ||
44 | - /* | ||
45 | - * Align all arguments, so that they land in predictable places | ||
46 | - * for passing off to ffi_call. | ||
47 | - */ | ||
48 | - want_align = true; | ||
49 | -#else | ||
50 | - /* Some targets want aligned 64 bit args */ | ||
51 | - if (is_64bit) { | ||
52 | - want_align = TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN; | ||
53 | - } | ||
54 | -#endif | ||
55 | - | ||
56 | - if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) { | ||
57 | - op->args[pi++] = TCG_CALL_DUMMY_ARG; | ||
58 | - real_args++; | ||
59 | + switch (argtype) { | ||
60 | + case dh_typecode_i32: | ||
61 | + case dh_typecode_s32: | ||
62 | + type = TCG_TYPE_I32; | ||
63 | + break; | ||
64 | + case dh_typecode_i64: | ||
65 | + case dh_typecode_s64: | ||
66 | + type = TCG_TYPE_I64; | ||
67 | + break; | ||
68 | + case dh_typecode_ptr: | ||
69 | + type = TCG_TYPE_PTR; | ||
70 | + break; | ||
71 | + default: | ||
72 | + g_assert_not_reached(); | ||
73 | } | ||
74 | |||
75 | - if (TCG_TARGET_REG_BITS < 64 && is_64bit) { | ||
76 | + switch (type) { | ||
77 | + case TCG_TYPE_I32: | ||
78 | + kind = TCG_TARGET_CALL_ARG_I32; | ||
79 | + break; | ||
80 | + case TCG_TYPE_I64: | ||
81 | + kind = TCG_TARGET_CALL_ARG_I64; | ||
82 | + break; | ||
83 | + default: | ||
84 | + g_assert_not_reached(); | ||
85 | + } | ||
86 | + | ||
87 | + switch (kind) { | ||
88 | + case TCG_CALL_ARG_EVEN: | ||
89 | + if (real_args & 1) { | ||
90 | + op->args[pi++] = TCG_CALL_DUMMY_ARG; | ||
91 | + real_args++; | ||
92 | + } | ||
93 | + /* fall through */ | ||
94 | + case TCG_CALL_ARG_NORMAL: | ||
95 | + if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { | ||
96 | + op->args[pi++] = temp_arg(args[i]); | ||
97 | + op->args[pi++] = temp_arg(args[i] + 1); | ||
98 | + real_args += 2; | ||
99 | + break; | ||
100 | + } | ||
101 | op->args[pi++] = temp_arg(args[i]); | ||
102 | - op->args[pi++] = temp_arg(args[i] + 1); | ||
103 | - real_args += 2; | ||
104 | - continue; | ||
105 | + real_args++; | ||
106 | + break; | ||
107 | + default: | ||
108 | + g_assert_not_reached(); | ||
109 | } | ||
110 | - | ||
111 | - op->args[pi++] = temp_arg(args[i]); | ||
112 | - real_args++; | ||
113 | } | ||
114 | op->args[pi++] = (uintptr_t)func; | ||
115 | op->args[pi++] = (uintptr_t)info; | ||
116 | -- | ||
117 | 2.34.1 | ||
118 | |||
119 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The function pointer is immediately after the output and input | ||
2 | operands; no need to search. | ||
1 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | accel/tcg/plugin-gen.c | 29 +++++++++++------------------ | ||
8 | 1 file changed, 11 insertions(+), 18 deletions(-) | ||
9 | |||
10 | diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/accel/tcg/plugin-gen.c | ||
13 | +++ b/accel/tcg/plugin-gen.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static TCGOp *copy_st_ptr(TCGOp **begin_op, TCGOp *op) | ||
15 | static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func, | ||
16 | void *func, int *cb_idx) | ||
17 | { | ||
18 | + TCGOp *old_op; | ||
19 | + int func_idx; | ||
20 | + | ||
21 | /* copy all ops until the call */ | ||
22 | do { | ||
23 | op = copy_op_nocheck(begin_op, op); | ||
24 | } while (op->opc != INDEX_op_call); | ||
25 | |||
26 | /* fill in the op call */ | ||
27 | - op->param1 = (*begin_op)->param1; | ||
28 | - op->param2 = (*begin_op)->param2; | ||
29 | + old_op = *begin_op; | ||
30 | + TCGOP_CALLI(op) = TCGOP_CALLI(old_op); | ||
31 | + TCGOP_CALLO(op) = TCGOP_CALLO(old_op); | ||
32 | tcg_debug_assert(op->life == 0); | ||
33 | - if (*cb_idx == -1) { | ||
34 | - int i; | ||
35 | |||
36 | - /* | ||
37 | - * Instead of working out the position of the callback in args[], just | ||
38 | - * look for @empty_func, since it should be a unique pointer. | ||
39 | - */ | ||
40 | - for (i = 0; i < MAX_OPC_PARAM_ARGS; i++) { | ||
41 | - if ((uintptr_t)(*begin_op)->args[i] == (uintptr_t)empty_func) { | ||
42 | - *cb_idx = i; | ||
43 | - break; | ||
44 | - } | ||
45 | - } | ||
46 | - tcg_debug_assert(i < MAX_OPC_PARAM_ARGS); | ||
47 | - } | ||
48 | - op->args[*cb_idx] = (uintptr_t)func; | ||
49 | - op->args[*cb_idx + 1] = (*begin_op)->args[*cb_idx + 1]; | ||
50 | + func_idx = TCGOP_CALLO(op) + TCGOP_CALLI(op); | ||
51 | + *cb_idx = func_idx; | ||
52 | + | ||
53 | + op->args[func_idx] = (uintptr_t)func; | ||
54 | + op->args[func_idx + 1] = old_op->args[func_idx + 1]; | ||
55 | |||
56 | return op; | ||
57 | } | ||
58 | -- | ||
59 | 2.34.1 | ||
60 | |||
61 | diff view generated by jsdifflib |
1 | From: "Emilio G. Cota" <cota@braap.org> | 1 | We copied all of the arguments in copy_op_nocheck. |
---|---|---|---|
2 | We only need to replace the one argument that we change. | ||
2 | 3 | ||
3 | As far as I can tell tlb_flush does not need to be called | ||
4 | this early. tlb_flush is eventually called after the CPU | ||
5 | has been realized. | ||
6 | |||
7 | This change paves the way to the introduction of tlb_init, | ||
8 | which will be called from cpu_exec_realizefn. | ||
9 | |||
10 | Cc: Guan Xuetao <gxt@mprc.pku.edu.cn> | ||
11 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> |
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
13 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
14 | Message-Id: <20181009174557.16125-3-cota@braap.org> | ||
15 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
16 | --- | 7 | --- |
17 | target/unicore32/cpu.c | 2 -- | 8 | accel/tcg/plugin-gen.c | 2 -- |
18 | 1 file changed, 2 deletions(-) | 9 | 1 file changed, 2 deletions(-) |
19 | 10 | ||
20 | diff --git a/target/unicore32/cpu.c b/target/unicore32/cpu.c | 11 | diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c |
21 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/target/unicore32/cpu.c | 13 | --- a/accel/tcg/plugin-gen.c |
23 | +++ b/target/unicore32/cpu.c | 14 | +++ b/accel/tcg/plugin-gen.c |
24 | @@ -XXX,XX +XXX,XX @@ static void uc32_cpu_initfn(Object *obj) | 15 | @@ -XXX,XX +XXX,XX @@ static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func, |
25 | env->uncached_asr = ASR_MODE_PRIV; | 16 | |
26 | env->regs[31] = 0x03000000; | 17 | func_idx = TCGOP_CALLO(op) + TCGOP_CALLI(op); |
27 | #endif | 18 | *cb_idx = func_idx; |
28 | - | 19 | - |
29 | - tlb_flush(cs); | 20 | op->args[func_idx] = (uintptr_t)func; |
21 | - op->args[func_idx + 1] = old_op->args[func_idx + 1]; | ||
22 | |||
23 | return op; | ||
30 | } | 24 | } |
31 | |||
32 | static const VMStateDescription vmstate_uc32_cpu = { | ||
33 | -- | 25 | -- |
34 | 2.17.2 | 26 | 2.34.1 |
35 | 27 | ||
36 | 28 | diff view generated by jsdifflib |
1 | When op raises an exception, it may not have initialized the output | 1 | Better to re-use the existing function for copying ops. |
---|---|---|---|
2 | temps that would be written back by wout or cout. | ||
3 | 2 | ||
4 | Reviewed-by: David Hildenbrand <david@redhat.com> | 3 | Acked-by: Alex Bennée <alex.bennee@linaro.org> |
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 6 | --- |
7 | target/s390x/translate.c | 20 +++++++++++++++----- | 7 | accel/tcg/plugin-gen.c | 16 ++++++++-------- |
8 | 1 file changed, 15 insertions(+), 5 deletions(-) | 8 | 1 file changed, 8 insertions(+), 8 deletions(-) |
9 | 9 | ||
10 | diff --git a/target/s390x/translate.c b/target/s390x/translate.c | 10 | diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c |
11 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/target/s390x/translate.c | 12 | --- a/accel/tcg/plugin-gen.c |
13 | +++ b/target/s390x/translate.c | 13 | +++ b/accel/tcg/plugin-gen.c |
14 | @@ -XXX,XX +XXX,XX @@ struct DisasInsn { | 14 | @@ -XXX,XX +XXX,XX @@ static TCGOp *append_udata_cb(const struct qemu_plugin_dyn_cb *cb, |
15 | 15 | op = copy_const_ptr(&begin_op, op, cb->userp); | |
16 | const char *name; | 16 | |
17 | 17 | /* copy the ld_i32, but note that we only have to copy it once */ | |
18 | + /* Pre-process arguments before HELP_OP. */ | 18 | - begin_op = QTAILQ_NEXT(begin_op, link); |
19 | void (*help_in1)(DisasContext *, DisasFields *, DisasOps *); | 19 | - tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32); |
20 | void (*help_in2)(DisasContext *, DisasFields *, DisasOps *); | 20 | if (*cb_idx == -1) { |
21 | void (*help_prep)(DisasContext *, DisasFields *, DisasOps *); | 21 | - op = tcg_op_insert_after(tcg_ctx, op, INDEX_op_ld_i32); |
22 | + | 22 | - memcpy(op->args, begin_op->args, sizeof(op->args)); |
23 | + /* | 23 | + op = copy_op(&begin_op, op, INDEX_op_ld_i32); |
24 | + * Post-process output after HELP_OP. | 24 | + } else { |
25 | + * Note that these are not called if HELP_OP returns DISAS_NORETURN. | 25 | + begin_op = QTAILQ_NEXT(begin_op, link); |
26 | + */ | 26 | + tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32); |
27 | void (*help_wout)(DisasContext *, DisasFields *, DisasOps *); | ||
28 | void (*help_cout)(DisasContext *, DisasOps *); | ||
29 | + | ||
30 | + /* Implement the operation itself. */ | ||
31 | DisasJumpType (*help_op)(DisasContext *, DisasOps *); | ||
32 | |||
33 | uint64_t data; | ||
34 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s) | ||
35 | if (insn->help_op) { | ||
36 | ret = insn->help_op(s, &o); | ||
37 | } | 27 | } |
38 | - if (insn->help_wout) { | 28 | |
39 | - insn->help_wout(s, &f, &o); | 29 | /* call */ |
40 | - } | 30 | @@ -XXX,XX +XXX,XX @@ static TCGOp *append_mem_cb(const struct qemu_plugin_dyn_cb *cb, |
41 | - if (insn->help_cout) { | 31 | op = copy_const_ptr(&begin_op, op, cb->userp); |
42 | - insn->help_cout(s, &o); | 32 | |
43 | + if (ret != DISAS_NORETURN) { | 33 | /* copy the ld_i32, but note that we only have to copy it once */ |
44 | + if (insn->help_wout) { | 34 | - begin_op = QTAILQ_NEXT(begin_op, link); |
45 | + insn->help_wout(s, &f, &o); | 35 | - tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32); |
46 | + } | 36 | if (*cb_idx == -1) { |
47 | + if (insn->help_cout) { | 37 | - op = tcg_op_insert_after(tcg_ctx, op, INDEX_op_ld_i32); |
48 | + insn->help_cout(s, &o); | 38 | - memcpy(op->args, begin_op->args, sizeof(op->args)); |
49 | + } | 39 | + op = copy_op(&begin_op, op, INDEX_op_ld_i32); |
40 | + } else { | ||
41 | + begin_op = QTAILQ_NEXT(begin_op, link); | ||
42 | + tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32); | ||
50 | } | 43 | } |
51 | 44 | ||
52 | /* Free any temporaries created by the helpers. */ | 45 | /* extu_tl_i64 */ |
53 | -- | 46 | -- |
54 | 2.17.2 | 47 | 2.34.1 |
55 | 48 | ||
56 | 49 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | |
2 | |||
3 | In order to have variable size allocated TCGOp, pass the number | ||
4 | of arguments we use (and would allocate) up to tcg_op_alloc(). | ||
5 | |||
6 | This alters tcg_emit_op(), tcg_op_insert_before() and | ||
7 | tcg_op_insert_after() prototypes. | ||
8 | |||
9 | In tcg_op_alloc() ensure the number of arguments is in range. | ||
10 | |||
11 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | [PMD: Extracted from bigger patch] | ||
14 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
15 | Message-Id: <20221218211832.73312-2-philmd@linaro.org> | ||
16 | --- | ||
17 | include/tcg/tcg-op.h | 2 +- | ||
18 | include/tcg/tcg.h | 8 +++++--- | ||
19 | accel/tcg/plugin-gen.c | 5 ++++- | ||
20 | tcg/optimize.c | 4 ++-- | ||
21 | tcg/tcg-op-vec.c | 8 ++++---- | ||
22 | tcg/tcg-op.c | 12 ++++++------ | ||
23 | tcg/tcg.c | 30 +++++++++++++++++------------- | ||
24 | 7 files changed, 39 insertions(+), 30 deletions(-) | ||
25 | |||
26 | diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/include/tcg/tcg-op.h | ||
29 | +++ b/include/tcg/tcg-op.h | ||
30 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_plugin_cb_start(unsigned from, unsigned type, | ||
31 | |||
32 | static inline void tcg_gen_plugin_cb_end(void) | ||
33 | { | ||
34 | - tcg_emit_op(INDEX_op_plugin_cb_end); | ||
35 | + tcg_emit_op(INDEX_op_plugin_cb_end, 0); | ||
36 | } | ||
37 | |||
38 | #if TARGET_LONG_BITS == 32 | ||
39 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/include/tcg/tcg.h | ||
42 | +++ b/include/tcg/tcg.h | ||
43 | @@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op); | ||
44 | |||
45 | void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args); | ||
46 | |||
47 | -TCGOp *tcg_emit_op(TCGOpcode opc); | ||
48 | +TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs); | ||
49 | void tcg_op_remove(TCGContext *s, TCGOp *op); | ||
50 | -TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc); | ||
51 | -TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc); | ||
52 | +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, | ||
53 | + TCGOpcode opc, unsigned nargs); | ||
54 | +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, | ||
55 | + TCGOpcode opc, unsigned nargs); | ||
56 | |||
57 | /** | ||
58 | * tcg_remove_ops_after: | ||
59 | diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/accel/tcg/plugin-gen.c | ||
62 | +++ b/accel/tcg/plugin-gen.c | ||
63 | @@ -XXX,XX +XXX,XX @@ static TCGOp *rm_ops(TCGOp *op) | ||
64 | |||
65 | static TCGOp *copy_op_nocheck(TCGOp **begin_op, TCGOp *op) | ||
66 | { | ||
67 | + unsigned nargs = ARRAY_SIZE(op->args); | ||
68 | + | ||
69 | *begin_op = QTAILQ_NEXT(*begin_op, link); | ||
70 | tcg_debug_assert(*begin_op); | ||
71 | - op = tcg_op_insert_after(tcg_ctx, op, (*begin_op)->opc); | ||
72 | + op = tcg_op_insert_after(tcg_ctx, op, (*begin_op)->opc, nargs); | ||
73 | memcpy(op->args, (*begin_op)->args, sizeof(op->args)); | ||
74 | + | ||
75 | return op; | ||
76 | } | ||
77 | |||
78 | diff --git a/tcg/optimize.c b/tcg/optimize.c | ||
79 | index XXXXXXX..XXXXXXX 100644 | ||
80 | --- a/tcg/optimize.c | ||
81 | +++ b/tcg/optimize.c | ||
82 | @@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) | ||
83 | rh = op->args[1]; | ||
84 | |||
85 | /* The proper opcode is supplied by tcg_opt_gen_mov. */ | ||
86 | - op2 = tcg_op_insert_before(ctx->tcg, op, 0); | ||
87 | + op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); | ||
88 | |||
89 | tcg_opt_gen_movi(ctx, op, rl, al); | ||
90 | tcg_opt_gen_movi(ctx, op2, rh, ah); | ||
91 | @@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) | ||
92 | rh = op->args[1]; | ||
93 | |||
94 | /* The proper opcode is supplied by tcg_opt_gen_mov. */ | ||
95 | - op2 = tcg_op_insert_before(ctx->tcg, op, 0); | ||
96 | + op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); | ||
97 | |||
98 | tcg_opt_gen_movi(ctx, op, rl, l); | ||
99 | tcg_opt_gen_movi(ctx, op2, rh, h); | ||
100 | diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c | ||
101 | index XXXXXXX..XXXXXXX 100644 | ||
102 | --- a/tcg/tcg-op-vec.c | ||
103 | +++ b/tcg/tcg-op-vec.c | ||
104 | @@ -XXX,XX +XXX,XX @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list, | ||
105 | |||
106 | void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) | ||
107 | { | ||
108 | - TCGOp *op = tcg_emit_op(opc); | ||
109 | + TCGOp *op = tcg_emit_op(opc, 2); | ||
110 | TCGOP_VECL(op) = type - TCG_TYPE_V64; | ||
111 | TCGOP_VECE(op) = vece; | ||
112 | op->args[0] = r; | ||
113 | @@ -XXX,XX +XXX,XX @@ void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) | ||
114 | void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, | ||
115 | TCGArg r, TCGArg a, TCGArg b) | ||
116 | { | ||
117 | - TCGOp *op = tcg_emit_op(opc); | ||
118 | + TCGOp *op = tcg_emit_op(opc, 3); | ||
119 | TCGOP_VECL(op) = type - TCG_TYPE_V64; | ||
120 | TCGOP_VECE(op) = vece; | ||
121 | op->args[0] = r; | ||
122 | @@ -XXX,XX +XXX,XX @@ void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, | ||
123 | void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, | ||
124 | TCGArg r, TCGArg a, TCGArg b, TCGArg c) | ||
125 | { | ||
126 | - TCGOp *op = tcg_emit_op(opc); | ||
127 | + TCGOp *op = tcg_emit_op(opc, 4); | ||
128 | TCGOP_VECL(op) = type - TCG_TYPE_V64; | ||
129 | TCGOP_VECE(op) = vece; | ||
130 | op->args[0] = r; | ||
131 | @@ -XXX,XX +XXX,XX @@ void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, | ||
132 | static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, | ||
133 | TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e) | ||
134 | { | ||
135 | - TCGOp *op = tcg_emit_op(opc); | ||
136 | + TCGOp *op = tcg_emit_op(opc, 6); | ||
137 | TCGOP_VECL(op) = type - TCG_TYPE_V64; | ||
138 | TCGOP_VECE(op) = vece; | ||
139 | op->args[0] = r; | ||
140 | diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c | ||
141 | index XXXXXXX..XXXXXXX 100644 | ||
142 | --- a/tcg/tcg-op.c | ||
143 | +++ b/tcg/tcg-op.c | ||
144 | @@ -XXX,XX +XXX,XX @@ | ||
145 | |||
146 | void tcg_gen_op1(TCGOpcode opc, TCGArg a1) | ||
147 | { | ||
148 | - TCGOp *op = tcg_emit_op(opc); | ||
149 | + TCGOp *op = tcg_emit_op(opc, 1); | ||
150 | op->args[0] = a1; | ||
151 | } | ||
152 | |||
153 | void tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2) | ||
154 | { | ||
155 | - TCGOp *op = tcg_emit_op(opc); | ||
156 | + TCGOp *op = tcg_emit_op(opc, 2); | ||
157 | op->args[0] = a1; | ||
158 | op->args[1] = a2; | ||
159 | } | ||
160 | |||
161 | void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3) | ||
162 | { | ||
163 | - TCGOp *op = tcg_emit_op(opc); | ||
164 | + TCGOp *op = tcg_emit_op(opc, 3); | ||
165 | op->args[0] = a1; | ||
166 | op->args[1] = a2; | ||
167 | op->args[2] = a3; | ||
168 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3) | ||
169 | |||
170 | void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4) | ||
171 | { | ||
172 | - TCGOp *op = tcg_emit_op(opc); | ||
173 | + TCGOp *op = tcg_emit_op(opc, 4); | ||
174 | op->args[0] = a1; | ||
175 | op->args[1] = a2; | ||
176 | op->args[2] = a3; | ||
177 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4) | ||
178 | void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, | ||
179 | TCGArg a4, TCGArg a5) | ||
180 | { | ||
181 | - TCGOp *op = tcg_emit_op(opc); | ||
182 | + TCGOp *op = tcg_emit_op(opc, 5); | ||
183 | op->args[0] = a1; | ||
184 | op->args[1] = a2; | ||
185 | op->args[2] = a3; | ||
186 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, | ||
187 | void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, | ||
188 | TCGArg a4, TCGArg a5, TCGArg a6) | ||
189 | { | ||
190 | - TCGOp *op = tcg_emit_op(opc); | ||
191 | + TCGOp *op = tcg_emit_op(opc, 6); | ||
192 | op->args[0] = a1; | ||
193 | op->args[1] = a2; | ||
194 | op->args[2] = a3; | ||
195 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
196 | index XXXXXXX..XXXXXXX 100644 | ||
197 | --- a/tcg/tcg.c | ||
198 | +++ b/tcg/tcg.c | ||
199 | @@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op) | ||
200 | and endian swap in tcg_reg_alloc_call(). */ | ||
201 | void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
202 | { | ||
203 | - int i, real_args, nb_rets, pi; | ||
204 | + int i, real_args, nb_rets, pi, max_args; | ||
205 | unsigned typemask; | ||
206 | const TCGHelperInfo *info; | ||
207 | TCGOp *op; | ||
208 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
209 | } | ||
210 | } | ||
211 | |||
212 | - op = tcg_emit_op(INDEX_op_call); | ||
213 | + max_args = ARRAY_SIZE(op->args); | ||
214 | + op = tcg_emit_op(INDEX_op_call, max_args); | ||
215 | |||
216 | pi = 0; | ||
217 | if (ret != NULL) { | ||
218 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
219 | |||
220 | /* Make sure the fields didn't overflow. */ | ||
221 | tcg_debug_assert(TCGOP_CALLI(op) == real_args); | ||
222 | - tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); | ||
223 | + tcg_debug_assert(pi <= max_args); | ||
224 | |||
225 | if (TCG_TARGET_CALL_ARG_I32 == TCG_CALL_ARG_EXTEND) { | ||
226 | for (i = 0; i < nargs; ++i) { | ||
227 | @@ -XXX,XX +XXX,XX @@ void tcg_remove_ops_after(TCGOp *op) | ||
228 | } | ||
229 | } | ||
230 | |||
231 | -static TCGOp *tcg_op_alloc(TCGOpcode opc) | ||
232 | +static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) | ||
233 | { | ||
234 | TCGContext *s = tcg_ctx; | ||
235 | TCGOp *op; | ||
236 | |||
237 | + assert(nargs < ARRAY_SIZE(op->args)); | ||
238 | if (likely(QTAILQ_EMPTY(&s->free_ops))) { | ||
239 | op = tcg_malloc(sizeof(TCGOp)); | ||
240 | } else { | ||
241 | @@ -XXX,XX +XXX,XX @@ static TCGOp *tcg_op_alloc(TCGOpcode opc) | ||
242 | return op; | ||
243 | } | ||
244 | |||
245 | -TCGOp *tcg_emit_op(TCGOpcode opc) | ||
246 | +TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) | ||
247 | { | ||
248 | - TCGOp *op = tcg_op_alloc(opc); | ||
249 | + TCGOp *op = tcg_op_alloc(opc, nargs); | ||
250 | QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); | ||
251 | return op; | ||
252 | } | ||
253 | |||
254 | -TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) | ||
255 | +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, | ||
256 | + TCGOpcode opc, unsigned nargs) | ||
257 | { | ||
258 | - TCGOp *new_op = tcg_op_alloc(opc); | ||
259 | + TCGOp *new_op = tcg_op_alloc(opc, nargs); | ||
260 | QTAILQ_INSERT_BEFORE(old_op, new_op, link); | ||
261 | return new_op; | ||
262 | } | ||
263 | |||
264 | -TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) | ||
265 | +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, | ||
266 | + TCGOpcode opc, unsigned nargs) | ||
267 | { | ||
268 | - TCGOp *new_op = tcg_op_alloc(opc); | ||
269 | + TCGOp *new_op = tcg_op_alloc(opc, nargs); | ||
270 | QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); | ||
271 | return new_op; | ||
272 | } | ||
273 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) | ||
274 | TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 | ||
275 | ? INDEX_op_ld_i32 | ||
276 | : INDEX_op_ld_i64); | ||
277 | - TCGOp *lop = tcg_op_insert_before(s, op, lopc); | ||
278 | + TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); | ||
279 | |||
280 | lop->args[0] = temp_arg(dir_ts); | ||
281 | lop->args[1] = temp_arg(arg_ts->mem_base); | ||
282 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) | ||
283 | TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 | ||
284 | ? INDEX_op_st_i32 | ||
285 | : INDEX_op_st_i64); | ||
286 | - TCGOp *sop = tcg_op_insert_after(s, op, sopc); | ||
287 | + TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); | ||
288 | TCGTemp *out_ts = dir_ts; | ||
289 | |||
290 | if (IS_DEAD_ARG(0)) { | ||
291 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) | ||
292 | TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 | ||
293 | ? INDEX_op_st_i32 | ||
294 | : INDEX_op_st_i64); | ||
295 | - TCGOp *sop = tcg_op_insert_after(s, op, sopc); | ||
296 | + TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); | ||
297 | |||
298 | sop->args[0] = temp_arg(dir_ts); | ||
299 | sop->args[1] = temp_arg(arg_ts->mem_base); | ||
300 | -- | ||
301 | 2.34.1 | ||
302 | |||
303 | diff view generated by jsdifflib |
1 | Reviewed-by: David Hildenbrand <david@redhat.com> | 1 | We have been allocating a worst case number of arguments |
---|---|---|---|
2 | to support calls. Instead, allow the size to vary. | ||
3 | By default leave space for 4 args, to maximize reuse, | ||
4 | but allow calls to increase the number of args to 32. | ||
5 | |||
2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | [PMD: Split patch in two] | ||
8 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
9 | Message-Id: <20221218211832.73312-3-philmd@linaro.org> | ||
3 | --- | 10 | --- |
4 | target/s390x/mem_helper.c | 92 +++++++++++++++++---------------------- | 11 | include/exec/helper-head.h | 2 -- |
5 | 1 file changed, 41 insertions(+), 51 deletions(-) | 12 | include/tcg/tcg.h | 46 +++++++++++++------------------------- |
13 | accel/tcg/plugin-gen.c | 10 ++++----- | ||
14 | tcg/tcg.c | 35 +++++++++++++++++++++-------- | ||
15 | 4 files changed, 47 insertions(+), 46 deletions(-) | ||
6 | 16 | ||
7 | diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c | 17 | diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h |
8 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
9 | --- a/target/s390x/mem_helper.c | 19 | --- a/include/exec/helper-head.h |
10 | +++ b/target/s390x/mem_helper.c | 20 | +++ b/include/exec/helper-head.h |
11 | @@ -XXX,XX +XXX,XX @@ | 21 | @@ -XXX,XX +XXX,XX @@ |
12 | #include "exec/exec-all.h" | 22 | #define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \ |
13 | #include "exec/cpu_ldst.h" | 23 | DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7) |
14 | #include "qemu/int128.h" | 24 | |
15 | +#include "qemu/atomic128.h" | 25 | -/* MAX_OPC_PARAM_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */ |
16 | 26 | - | |
17 | #if !defined(CONFIG_USER_ONLY) | 27 | #endif /* EXEC_HELPER_HEAD_H */ |
18 | #include "hw/s390x/storage-keys.h" | 28 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h |
19 | @@ -XXX,XX +XXX,XX @@ static void do_cdsg(CPUS390XState *env, uint64_t addr, | 29 | index XXXXXXX..XXXXXXX 100644 |
20 | bool fail; | 30 | --- a/include/tcg/tcg.h |
21 | 31 | +++ b/include/tcg/tcg.h | |
22 | if (parallel) { | 32 | @@ -XXX,XX +XXX,XX @@ |
23 | -#ifndef CONFIG_ATOMIC128 | 33 | /* XXX: make safe guess about sizes */ |
24 | +#if !HAVE_CMPXCHG128 | 34 | #define MAX_OP_PER_INSTR 266 |
25 | cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | 35 | |
26 | #else | 36 | -#if HOST_LONG_BITS == 32 |
27 | int mem_idx = cpu_mmu_index(env, false); | 37 | -#define MAX_OPC_PARAM_PER_ARG 2 |
28 | @@ -XXX,XX +XXX,XX @@ void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr, | 38 | -#else |
29 | static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, | 39 | -#define MAX_OPC_PARAM_PER_ARG 1 |
30 | uint64_t a2, bool parallel) | 40 | -#endif |
41 | -#define MAX_OPC_PARAM_IARGS 7 | ||
42 | -#define MAX_OPC_PARAM_OARGS 1 | ||
43 | -#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS) | ||
44 | - | ||
45 | -/* A Call op needs up to 4 + 2N parameters on 32-bit archs, | ||
46 | - * and up to 4 + N parameters on 64-bit archs | ||
47 | - * (N = number of input arguments + output arguments). */ | ||
48 | -#define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS)) | ||
49 | - | ||
50 | #define CPU_TEMP_BUF_NLONGS 128 | ||
51 | #define TCG_STATIC_FRAME_SIZE (CPU_TEMP_BUF_NLONGS * sizeof(long)) | ||
52 | |||
53 | @@ -XXX,XX +XXX,XX @@ typedef struct TCGTempSet { | ||
54 | unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)]; | ||
55 | } TCGTempSet; | ||
56 | |||
57 | -/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding, | ||
58 | - this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands. | ||
59 | - There are never more than 2 outputs, which means that we can store all | ||
60 | - dead + sync data within 16 bits. */ | ||
61 | -#define DEAD_ARG 4 | ||
62 | -#define SYNC_ARG 1 | ||
63 | -typedef uint16_t TCGLifeData; | ||
64 | +/* | ||
65 | + * With 1 128-bit output, a 32-bit host requires 4 output parameters, | ||
66 | + * which leaves a maximum of 28 other slots. Which is enough for 7 | ||
67 | + * 128-bit operands. | ||
68 | + */ | ||
69 | +#define DEAD_ARG (1 << 4) | ||
70 | +#define SYNC_ARG (1 << 0) | ||
71 | +typedef uint32_t TCGLifeData; | ||
72 | |||
73 | -/* The layout here is designed to avoid a bitfield crossing of | ||
74 | - a 32-bit boundary, which would cause GCC to add extra padding. */ | ||
75 | typedef struct TCGOp { | ||
76 | - TCGOpcode opc : 8; /* 8 */ | ||
77 | + TCGOpcode opc : 8; | ||
78 | + unsigned nargs : 8; | ||
79 | |||
80 | /* Parameters for this opcode. See below. */ | ||
81 | - unsigned param1 : 4; /* 12 */ | ||
82 | - unsigned param2 : 4; /* 16 */ | ||
83 | + unsigned param1 : 8; | ||
84 | + unsigned param2 : 8; | ||
85 | |||
86 | /* Lifetime data of the operands. */ | ||
87 | - unsigned life : 16; /* 32 */ | ||
88 | + TCGLifeData life; | ||
89 | |||
90 | /* Next and previous opcodes. */ | ||
91 | QTAILQ_ENTRY(TCGOp) link; | ||
92 | |||
93 | - /* Arguments for the opcode. */ | ||
94 | - TCGArg args[MAX_OPC_PARAM]; | ||
95 | - | ||
96 | /* Register preferences for the output(s). */ | ||
97 | TCGRegSet output_pref[2]; | ||
98 | + | ||
99 | + /* Arguments for the opcode. */ | ||
100 | + TCGArg args[]; | ||
101 | } TCGOp; | ||
102 | |||
103 | #define TCGOP_CALLI(X) (X)->param1 | ||
104 | diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c | ||
105 | index XXXXXXX..XXXXXXX 100644 | ||
106 | --- a/accel/tcg/plugin-gen.c | ||
107 | +++ b/accel/tcg/plugin-gen.c | ||
108 | @@ -XXX,XX +XXX,XX @@ static TCGOp *rm_ops(TCGOp *op) | ||
109 | |||
110 | static TCGOp *copy_op_nocheck(TCGOp **begin_op, TCGOp *op) | ||
31 | { | 111 | { |
32 | -#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128) | 112 | - unsigned nargs = ARRAY_SIZE(op->args); |
33 | uint32_t mem_idx = cpu_mmu_index(env, false); | 113 | + TCGOp *old_op = QTAILQ_NEXT(*begin_op, link); |
34 | -#endif | 114 | + unsigned nargs = old_op->nargs; |
35 | uintptr_t ra = GETPC(); | 115 | |
36 | uint32_t fc = extract32(env->regs[0], 0, 8); | 116 | - *begin_op = QTAILQ_NEXT(*begin_op, link); |
37 | uint32_t sc = extract32(env->regs[0], 8, 8); | 117 | - tcg_debug_assert(*begin_op); |
38 | @@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, | 118 | - op = tcg_op_insert_after(tcg_ctx, op, (*begin_op)->opc, nargs); |
39 | probe_write(env, a2, 0, mem_idx, ra); | 119 | - memcpy(op->args, (*begin_op)->args, sizeof(op->args)); |
40 | #endif | 120 | + *begin_op = old_op; |
41 | 121 | + op = tcg_op_insert_after(tcg_ctx, op, old_op->opc, nargs); | |
42 | - /* Note that the compare-and-swap is atomic, and the store is atomic, but | 122 | + memcpy(op->args, old_op->args, sizeof(op->args[0]) * nargs); |
43 | - the complete operation is not. Therefore we do not need to assert serial | 123 | |
44 | - context in order to implement this. That said, restart early if we can't | 124 | return op; |
45 | - support either operation that is supposed to be atomic. */ | 125 | } |
46 | + /* | 126 | diff --git a/tcg/tcg.c b/tcg/tcg.c |
47 | + * Note that the compare-and-swap is atomic, and the store is atomic, | 127 | index XXXXXXX..XXXXXXX 100644 |
48 | + * but the complete operation is not. Therefore we do not need to | 128 | --- a/tcg/tcg.c |
49 | + * assert serial context in order to implement this. That said, | 129 | +++ b/tcg/tcg.c |
50 | + * restart early if we can't support either operation that is supposed | 130 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) |
51 | + * to be atomic. | ||
52 | + */ | ||
53 | if (parallel) { | ||
54 | - int mask = 0; | ||
55 | -#if !defined(CONFIG_ATOMIC64) | ||
56 | - mask = -8; | ||
57 | -#elif !defined(CONFIG_ATOMIC128) | ||
58 | - mask = -16; | ||
59 | + uint32_t max = 2; | ||
60 | +#ifdef CONFIG_ATOMIC64 | ||
61 | + max = 3; | ||
62 | #endif | ||
63 | - if (((4 << fc) | (1 << sc)) & mask) { | ||
64 | + if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) || | ||
65 | + (HAVE_ATOMIC128 ? 0 : sc > max)) { | ||
66 | cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
67 | } | 131 | } |
68 | } | 132 | } |
69 | @@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, | 133 | |
70 | Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]); | 134 | - max_args = ARRAY_SIZE(op->args); |
71 | Int128 ov; | 135 | + /* |
72 | 136 | + * A Call op needs up to 4 + 2N parameters on 32-bit archs, | |
73 | - if (parallel) { | 137 | + * and up to 4 + N parameters on 64-bit archs |
74 | -#ifdef CONFIG_ATOMIC128 | 138 | + * (N = number of input arguments + output arguments). |
75 | - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | 139 | + */ |
76 | - ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra); | 140 | + max_args = (64 / TCG_TARGET_REG_BITS) * nargs + 4; |
77 | - cc = !int128_eq(ov, cv); | 141 | op = tcg_emit_op(INDEX_op_call, max_args); |
78 | -#else | 142 | |
79 | - /* Note that we asserted !parallel above. */ | 143 | pi = 0; |
80 | - g_assert_not_reached(); | 144 | @@ -XXX,XX +XXX,XX @@ void tcg_remove_ops_after(TCGOp *op) |
81 | -#endif | 145 | static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) |
82 | - } else { | 146 | { |
83 | + if (!parallel) { | 147 | TCGContext *s = tcg_ctx; |
84 | uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra); | 148 | - TCGOp *op; |
85 | uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra); | 149 | + TCGOp *op = NULL; |
86 | 150 | ||
87 | @@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, | 151 | - assert(nargs < ARRAY_SIZE(op->args)); |
88 | 152 | - if (likely(QTAILQ_EMPTY(&s->free_ops))) { | |
89 | cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra); | 153 | - op = tcg_malloc(sizeof(TCGOp)); |
90 | cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra); | 154 | - } else { |
91 | + } else if (HAVE_CMPXCHG128) { | 155 | - op = QTAILQ_FIRST(&s->free_ops); |
92 | + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | 156 | - QTAILQ_REMOVE(&s->free_ops, op, link); |
93 | + ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra); | 157 | + if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { |
94 | + cc = !int128_eq(ov, cv); | 158 | + QTAILQ_FOREACH(op, &s->free_ops, link) { |
95 | + } else { | 159 | + if (nargs <= op->nargs) { |
96 | + /* Note that we asserted !parallel above. */ | 160 | + QTAILQ_REMOVE(&s->free_ops, op, link); |
97 | + g_assert_not_reached(); | 161 | + nargs = op->nargs; |
98 | } | 162 | + goto found; |
99 | 163 | + } | |
100 | env->regs[r3 + 0] = int128_gethi(ov); | 164 | + } |
101 | @@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, | ||
102 | cpu_stq_data_ra(env, a2, svh, ra); | ||
103 | break; | ||
104 | case 4: | ||
105 | - if (parallel) { | ||
106 | -#ifdef CONFIG_ATOMIC128 | ||
107 | + if (!parallel) { | ||
108 | + cpu_stq_data_ra(env, a2 + 0, svh, ra); | ||
109 | + cpu_stq_data_ra(env, a2 + 8, svl, ra); | ||
110 | + } else if (HAVE_ATOMIC128) { | ||
111 | TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
112 | Int128 sv = int128_make128(svl, svh); | ||
113 | helper_atomic_sto_be_mmu(env, a2, sv, oi, ra); | ||
114 | -#else | ||
115 | + } else { | ||
116 | /* Note that we asserted !parallel above. */ | ||
117 | g_assert_not_reached(); | ||
118 | -#endif | ||
119 | - } else { | ||
120 | - cpu_stq_data_ra(env, a2 + 0, svh, ra); | ||
121 | - cpu_stq_data_ra(env, a2 + 8, svl, ra); | ||
122 | } | ||
123 | break; | ||
124 | default: | ||
125 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel) | ||
126 | uintptr_t ra = GETPC(); | ||
127 | uint64_t hi, lo; | ||
128 | |||
129 | - if (parallel) { | ||
130 | -#ifndef CONFIG_ATOMIC128 | ||
131 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
132 | -#else | ||
133 | + if (!parallel) { | ||
134 | + check_alignment(env, addr, 16, ra); | ||
135 | + hi = cpu_ldq_data_ra(env, addr + 0, ra); | ||
136 | + lo = cpu_ldq_data_ra(env, addr + 8, ra); | ||
137 | + } else if (HAVE_ATOMIC128) { | ||
138 | int mem_idx = cpu_mmu_index(env, false); | ||
139 | TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
140 | Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); | ||
141 | hi = int128_gethi(v); | ||
142 | lo = int128_getlo(v); | ||
143 | -#endif | ||
144 | } else { | ||
145 | - check_alignment(env, addr, 16, ra); | ||
146 | - | ||
147 | - hi = cpu_ldq_data_ra(env, addr + 0, ra); | ||
148 | - lo = cpu_ldq_data_ra(env, addr + 8, ra); | ||
149 | + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
150 | } | 165 | } |
151 | 166 | + | |
152 | env->retxl = lo; | 167 | + /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ |
153 | @@ -XXX,XX +XXX,XX @@ static void do_stpq(CPUS390XState *env, uint64_t addr, | 168 | + nargs = MAX(4, nargs); |
154 | { | 169 | + op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); |
155 | uintptr_t ra = GETPC(); | 170 | + |
156 | 171 | + found: | |
157 | - if (parallel) { | 172 | memset(op, 0, offsetof(TCGOp, link)); |
158 | -#ifndef CONFIG_ATOMIC128 | 173 | op->opc = opc; |
159 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | 174 | - s->nb_ops++; |
160 | -#else | 175 | + op->nargs = nargs; |
161 | - int mem_idx = cpu_mmu_index(env, false); | 176 | |
162 | - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | 177 | + /* Check for bitfield overflow. */ |
163 | - | 178 | + tcg_debug_assert(op->nargs == nargs); |
164 | - Int128 v = int128_make128(low, high); | 179 | + |
165 | - helper_atomic_sto_be_mmu(env, addr, v, oi, ra); | 180 | + s->nb_ops++; |
166 | -#endif | 181 | return op; |
167 | - } else { | ||
168 | + if (!parallel) { | ||
169 | check_alignment(env, addr, 16, ra); | ||
170 | - | ||
171 | cpu_stq_data_ra(env, addr + 0, high, ra); | ||
172 | cpu_stq_data_ra(env, addr + 8, low, ra); | ||
173 | + } else if (HAVE_ATOMIC128) { | ||
174 | + int mem_idx = cpu_mmu_index(env, false); | ||
175 | + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
176 | + Int128 v = int128_make128(low, high); | ||
177 | + helper_atomic_sto_be_mmu(env, addr, v, oi, ra); | ||
178 | + } else { | ||
179 | + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
180 | } | ||
181 | } | 182 | } |
182 | 183 | ||
183 | -- | 184 | -- |
184 | 2.17.2 | 185 | 2.34.1 |
185 | 186 | ||
186 | 187 | diff view generated by jsdifflib |
1 | Isolate the computation of an index from an address into a | 1 | We will shortly have the possibility of more that two outputs, |
---|---|---|---|
2 | helper before we change that function. | 2 | though only for calls (for which preferences are moot). Avoid |
3 | direct references to op->output_pref[] when possible. | ||
3 | 4 | ||
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | [ cota: convert tlb_vaddr_to_host; use atomic_read on addr_write ] | ||
7 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
8 | Message-Id: <20181009175129.17888-2-cota@braap.org> | ||
9 | --- | 7 | --- |
10 | accel/tcg/softmmu_template.h | 64 +++++++++++++++++--------------- | 8 | include/tcg/tcg.h | 5 +++++ |
11 | include/exec/cpu_ldst.h | 19 ++++++++-- | 9 | tcg/tcg.c | 34 ++++++++++++++++++---------------- |
12 | include/exec/cpu_ldst_template.h | 25 +++++++------ | 10 | 2 files changed, 23 insertions(+), 16 deletions(-) |
13 | accel/tcg/cputlb.c | 60 ++++++++++++++---------------- | ||
14 | 4 files changed, 90 insertions(+), 78 deletions(-) | ||
15 | 11 | ||
16 | diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h | 12 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h |
17 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/accel/tcg/softmmu_template.h | 14 | --- a/include/tcg/tcg.h |
19 | +++ b/accel/tcg/softmmu_template.h | 15 | +++ b/include/tcg/tcg.h |
20 | @@ -XXX,XX +XXX,XX @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, | 16 | @@ -XXX,XX +XXX,XX @@ typedef struct TCGOp { |
21 | WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, | 17 | /* Make sure operands fit in the bitfields above. */ |
22 | TCGMemOpIdx oi, uintptr_t retaddr) | 18 | QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8)); |
23 | { | 19 | |
24 | - unsigned mmu_idx = get_mmuidx(oi); | 20 | +static inline TCGRegSet output_pref(const TCGOp *op, unsigned i) |
25 | - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | 21 | +{ |
26 | - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; | 22 | + return i < ARRAY_SIZE(op->output_pref) ? op->output_pref[i] : 0; |
27 | + uintptr_t mmu_idx = get_mmuidx(oi); | 23 | +} |
28 | + uintptr_t index = tlb_index(env, mmu_idx, addr); | 24 | + |
29 | + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | 25 | typedef struct TCGProfile { |
30 | + target_ulong tlb_addr = entry->ADDR_READ; | 26 | int64_t cpu_exec_time; |
31 | unsigned a_bits = get_alignment_bits(get_memop(oi)); | 27 | int64_t tb_count1; |
32 | uintptr_t haddr; | 28 | diff --git a/tcg/tcg.c b/tcg/tcg.c |
33 | DATA_TYPE res; | 29 | index XXXXXXX..XXXXXXX 100644 |
34 | @@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, | 30 | --- a/tcg/tcg.c |
35 | tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE, | 31 | +++ b/tcg/tcg.c |
36 | mmu_idx, retaddr); | 32 | @@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) |
33 | |||
34 | if (have_prefs) { | ||
35 | for (i = 0; i < nb_oargs; ++i) { | ||
36 | - TCGRegSet set = op->output_pref[i]; | ||
37 | + TCGRegSet set = output_pref(op, i); | ||
38 | |||
39 | if (i == 0) { | ||
40 | ne_fprintf(f, " pref="); | ||
41 | @@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s) | ||
42 | } | ||
43 | ts->state = TS_DEAD; | ||
44 | la_reset_pref(ts); | ||
45 | - | ||
46 | - /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ | ||
47 | - op->output_pref[i] = 0; | ||
48 | } | ||
49 | |||
50 | + /* Not used -- it will be tcg_target_call_oarg_reg(). */ | ||
51 | + memset(op->output_pref, 0, sizeof(op->output_pref)); | ||
52 | + | ||
53 | if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | | ||
54 | TCG_CALL_NO_READ_GLOBALS))) { | ||
55 | la_global_kill(s, nb_globals); | ||
56 | @@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s) | ||
57 | ts = arg_temp(op->args[i]); | ||
58 | |||
59 | /* Remember the preference of the uses that followed. */ | ||
60 | - op->output_pref[i] = *la_temp_pref(ts); | ||
61 | + if (i < ARRAY_SIZE(op->output_pref)) { | ||
62 | + op->output_pref[i] = *la_temp_pref(ts); | ||
63 | + } | ||
64 | |||
65 | /* Output args are dead. */ | ||
66 | if (ts->state & TS_DEAD) { | ||
67 | @@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s) | ||
68 | |||
69 | set &= ct->regs; | ||
70 | if (ct->ialias) { | ||
71 | - set &= op->output_pref[ct->alias_index]; | ||
72 | + set &= output_pref(op, ct->alias_index); | ||
73 | } | ||
74 | /* If the combination is not possible, restart. */ | ||
75 | if (set == 0) { | ||
76 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) | ||
77 | TCGReg oreg, ireg; | ||
78 | |||
79 | allocated_regs = s->reserved_regs; | ||
80 | - preferred_regs = op->output_pref[0]; | ||
81 | + preferred_regs = output_pref(op, 0); | ||
82 | ots = arg_temp(op->args[0]); | ||
83 | ts = arg_temp(op->args[1]); | ||
84 | |||
85 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) | ||
86 | if (IS_DEAD_ARG(1)) { | ||
87 | temp_dead(s, its); | ||
37 | } | 88 | } |
38 | - tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; | 89 | - tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]); |
39 | + tlb_addr = entry->ADDR_READ; | 90 | + tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); |
40 | } | ||
41 | |||
42 | /* Handle an IO access. */ | ||
43 | @@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, | ||
44 | return res; | ||
45 | } | ||
46 | |||
47 | - haddr = addr + env->tlb_table[mmu_idx][index].addend; | ||
48 | + haddr = addr + entry->addend; | ||
49 | #if DATA_SIZE == 1 | ||
50 | res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr); | ||
51 | #else | ||
52 | @@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, | ||
53 | WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, | ||
54 | TCGMemOpIdx oi, uintptr_t retaddr) | ||
55 | { | ||
56 | - unsigned mmu_idx = get_mmuidx(oi); | ||
57 | - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
58 | - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; | ||
59 | + uintptr_t mmu_idx = get_mmuidx(oi); | ||
60 | + uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
61 | + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
62 | + target_ulong tlb_addr = entry->ADDR_READ; | ||
63 | unsigned a_bits = get_alignment_bits(get_memop(oi)); | ||
64 | uintptr_t haddr; | ||
65 | DATA_TYPE res; | ||
66 | @@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, | ||
67 | tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE, | ||
68 | mmu_idx, retaddr); | ||
69 | } | ||
70 | - tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; | ||
71 | + tlb_addr = entry->ADDR_READ; | ||
72 | } | ||
73 | |||
74 | /* Handle an IO access. */ | ||
75 | @@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, | ||
76 | return res; | ||
77 | } | ||
78 | |||
79 | - haddr = addr + env->tlb_table[mmu_idx][index].addend; | ||
80 | + haddr = addr + entry->addend; | ||
81 | res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr); | ||
82 | return res; | ||
83 | } | ||
84 | @@ -XXX,XX +XXX,XX @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, | ||
85 | void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
86 | TCGMemOpIdx oi, uintptr_t retaddr) | ||
87 | { | ||
88 | - unsigned mmu_idx = get_mmuidx(oi); | ||
89 | - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
90 | - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; | ||
91 | + uintptr_t mmu_idx = get_mmuidx(oi); | ||
92 | + uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
93 | + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
94 | + target_ulong tlb_addr = entry->addr_write; | ||
95 | unsigned a_bits = get_alignment_bits(get_memop(oi)); | ||
96 | uintptr_t haddr; | ||
97 | |||
98 | @@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
99 | tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, | ||
100 | mmu_idx, retaddr); | ||
101 | } | ||
102 | - tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK; | ||
103 | + tlb_addr = entry->addr_write & ~TLB_INVALID_MASK; | ||
104 | } | ||
105 | |||
106 | /* Handle an IO access. */ | ||
107 | @@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
108 | if (DATA_SIZE > 1 | ||
109 | && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 | ||
110 | >= TARGET_PAGE_SIZE)) { | ||
111 | - int i, index2; | ||
112 | - target_ulong page2, tlb_addr2; | ||
113 | + int i; | ||
114 | + target_ulong page2; | ||
115 | + CPUTLBEntry *entry2; | ||
116 | do_unaligned_access: | ||
117 | /* Ensure the second page is in the TLB. Note that the first page | ||
118 | is already guaranteed to be filled, and that the second page | ||
119 | cannot evict the first. */ | ||
120 | page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; | ||
121 | - index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
122 | - tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write; | ||
123 | - if (!tlb_hit_page(tlb_addr2, page2) | ||
124 | + entry2 = tlb_entry(env, mmu_idx, page2); | ||
125 | + if (!tlb_hit_page(entry2->addr_write, page2) | ||
126 | && !VICTIM_TLB_HIT(addr_write, page2)) { | ||
127 | tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, | ||
128 | mmu_idx, retaddr); | ||
129 | @@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
130 | return; | 91 | return; |
131 | } | 92 | } |
132 | 93 | ||
133 | - haddr = addr + env->tlb_table[mmu_idx][index].addend; | 94 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) |
134 | + haddr = addr + entry->addend; | 95 | tcg_regset_set_reg(allocated_regs, its->reg); |
135 | #if DATA_SIZE == 1 | ||
136 | glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val); | ||
137 | #else | ||
138 | @@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
139 | void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
140 | TCGMemOpIdx oi, uintptr_t retaddr) | ||
141 | { | ||
142 | - unsigned mmu_idx = get_mmuidx(oi); | ||
143 | - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
144 | - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; | ||
145 | + uintptr_t mmu_idx = get_mmuidx(oi); | ||
146 | + uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
147 | + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
148 | + target_ulong tlb_addr = entry->addr_write; | ||
149 | unsigned a_bits = get_alignment_bits(get_memop(oi)); | ||
150 | uintptr_t haddr; | ||
151 | |||
152 | @@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
153 | tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, | ||
154 | mmu_idx, retaddr); | ||
155 | } | 96 | } |
156 | - tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK; | 97 | oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, |
157 | + tlb_addr = entry->addr_write & ~TLB_INVALID_MASK; | 98 | - op->output_pref[0], ots->indirect_base); |
99 | + output_pref(op, 0), ots->indirect_base); | ||
100 | set_temp_val_reg(s, ots, oreg); | ||
158 | } | 101 | } |
159 | 102 | ||
160 | /* Handle an IO access. */ | 103 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) |
161 | @@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | 104 | switch (arg_ct->pair) { |
162 | if (DATA_SIZE > 1 | 105 | case 0: /* not paired */ |
163 | && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 | 106 | if (arg_ct->ialias) { |
164 | >= TARGET_PAGE_SIZE)) { | 107 | - i_preferred_regs = op->output_pref[arg_ct->alias_index]; |
165 | - int i, index2; | 108 | + i_preferred_regs = output_pref(op, arg_ct->alias_index); |
166 | - target_ulong page2, tlb_addr2; | 109 | |
167 | + int i; | 110 | /* |
168 | + target_ulong page2; | 111 | * If the input is not dead after the instruction, |
169 | + CPUTLBEntry *entry2; | 112 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) |
170 | do_unaligned_access: | 113 | * and to identify a few cases where it's not required. |
171 | /* Ensure the second page is in the TLB. Note that the first page | 114 | */ |
172 | is already guaranteed to be filled, and that the second page | 115 | if (arg_ct->ialias) { |
173 | cannot evict the first. */ | 116 | - i_preferred_regs = op->output_pref[arg_ct->alias_index]; |
174 | page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; | 117 | + i_preferred_regs = output_pref(op, arg_ct->alias_index); |
175 | - index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | 118 | if (IS_DEAD_ARG(i1) && |
176 | - tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write; | 119 | IS_DEAD_ARG(i2) && |
177 | - if (!tlb_hit_page(tlb_addr2, page2) | 120 | ts->val_type == TEMP_VAL_REG && |
178 | + entry2 = tlb_entry(env, mmu_idx, page2); | 121 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) |
179 | + if (!tlb_hit_page(entry2->addr_write, page2) | 122 | |
180 | && !VICTIM_TLB_HIT(addr_write, page2)) { | 123 | case 3: /* ialias with second output, no first input */ |
181 | tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, | 124 | tcg_debug_assert(arg_ct->ialias); |
182 | mmu_idx, retaddr); | 125 | - i_preferred_regs = op->output_pref[arg_ct->alias_index]; |
183 | @@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | 126 | + i_preferred_regs = output_pref(op, arg_ct->alias_index); |
184 | return; | 127 | |
128 | if (IS_DEAD_ARG(i) && | ||
129 | ts->val_type == TEMP_VAL_REG && | ||
130 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
131 | } else if (arg_ct->newreg) { | ||
132 | reg = tcg_reg_alloc(s, arg_ct->regs, | ||
133 | i_allocated_regs | o_allocated_regs, | ||
134 | - op->output_pref[k], ts->indirect_base); | ||
135 | + output_pref(op, k), ts->indirect_base); | ||
136 | } else { | ||
137 | reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, | ||
138 | - op->output_pref[k], ts->indirect_base); | ||
139 | + output_pref(op, k), ts->indirect_base); | ||
140 | } | ||
141 | break; | ||
142 | |||
143 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
144 | break; | ||
145 | } | ||
146 | reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, | ||
147 | - op->output_pref[k], ts->indirect_base); | ||
148 | + output_pref(op, k), ts->indirect_base); | ||
149 | break; | ||
150 | |||
151 | case 2: /* second of pair */ | ||
152 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
153 | } | ||
154 | |||
155 | oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, | ||
156 | - op->output_pref[0], ots->indirect_base); | ||
157 | + output_pref(op, 0), ots->indirect_base); | ||
158 | set_temp_val_reg(s, ots, oreg); | ||
185 | } | 159 | } |
186 | 160 | ||
187 | - haddr = addr + env->tlb_table[mmu_idx][index].addend; | ||
188 | + haddr = addr + entry->addend; | ||
189 | glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val); | ||
190 | } | ||
191 | #endif /* DATA_SIZE > 1 */ | ||
192 | diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h | ||
193 | index XXXXXXX..XXXXXXX 100644 | ||
194 | --- a/include/exec/cpu_ldst.h | ||
195 | +++ b/include/exec/cpu_ldst.h | ||
196 | @@ -XXX,XX +XXX,XX @@ extern __thread uintptr_t helper_retaddr; | ||
197 | /* The memory helpers for tcg-generated code need tcg_target_long etc. */ | ||
198 | #include "tcg.h" | ||
199 | |||
200 | +/* Find the TLB index corresponding to the mmu_idx + address pair. */ | ||
201 | +static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, | ||
202 | + target_ulong addr) | ||
203 | +{ | ||
204 | + return (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
205 | +} | ||
206 | + | ||
207 | +/* Find the TLB entry corresponding to the mmu_idx + address pair. */ | ||
208 | +static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, | ||
209 | + target_ulong addr) | ||
210 | +{ | ||
211 | + return &env->tlb_table[mmu_idx][tlb_index(env, mmu_idx, addr)]; | ||
212 | +} | ||
213 | + | ||
214 | #ifdef MMU_MODE0_SUFFIX | ||
215 | #define CPU_MMU_INDEX 0 | ||
216 | #define MEMSUFFIX MMU_MODE0_SUFFIX | ||
217 | @@ -XXX,XX +XXX,XX @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, | ||
218 | #if defined(CONFIG_USER_ONLY) | ||
219 | return g2h(addr); | ||
220 | #else | ||
221 | - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
222 | - CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index]; | ||
223 | + CPUTLBEntry *tlbentry = tlb_entry(env, mmu_idx, addr); | ||
224 | abi_ptr tlb_addr; | ||
225 | uintptr_t haddr; | ||
226 | |||
227 | @@ -XXX,XX +XXX,XX @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, | ||
228 | return NULL; | ||
229 | } | ||
230 | |||
231 | - haddr = addr + env->tlb_table[mmu_idx][index].addend; | ||
232 | + haddr = addr + tlbentry->addend; | ||
233 | return (void *)haddr; | ||
234 | #endif /* defined(CONFIG_USER_ONLY) */ | ||
235 | } | ||
236 | diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h | ||
237 | index XXXXXXX..XXXXXXX 100644 | ||
238 | --- a/include/exec/cpu_ldst_template.h | ||
239 | +++ b/include/exec/cpu_ldst_template.h | ||
240 | @@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, | ||
241 | target_ulong ptr, | ||
242 | uintptr_t retaddr) | ||
243 | { | ||
244 | - int page_index; | ||
245 | + CPUTLBEntry *entry; | ||
246 | RES_TYPE res; | ||
247 | target_ulong addr; | ||
248 | int mmu_idx; | ||
249 | @@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, | ||
250 | #endif | ||
251 | |||
252 | addr = ptr; | ||
253 | - page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
254 | mmu_idx = CPU_MMU_INDEX; | ||
255 | - if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ != | ||
256 | + entry = tlb_entry(env, mmu_idx, addr); | ||
257 | + if (unlikely(entry->ADDR_READ != | ||
258 | (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { | ||
259 | oi = make_memop_idx(SHIFT, mmu_idx); | ||
260 | res = glue(glue(helper_ret_ld, URETSUFFIX), MMUSUFFIX)(env, addr, | ||
261 | oi, retaddr); | ||
262 | } else { | ||
263 | - uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend; | ||
264 | + uintptr_t hostaddr = addr + entry->addend; | ||
265 | res = glue(glue(ld, USUFFIX), _p)((uint8_t *)hostaddr); | ||
266 | } | ||
267 | return res; | ||
268 | @@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, | ||
269 | target_ulong ptr, | ||
270 | uintptr_t retaddr) | ||
271 | { | ||
272 | - int res, page_index; | ||
273 | + CPUTLBEntry *entry; | ||
274 | + int res; | ||
275 | target_ulong addr; | ||
276 | int mmu_idx; | ||
277 | TCGMemOpIdx oi; | ||
278 | @@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, | ||
279 | #endif | ||
280 | |||
281 | addr = ptr; | ||
282 | - page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
283 | mmu_idx = CPU_MMU_INDEX; | ||
284 | - if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ != | ||
285 | + entry = tlb_entry(env, mmu_idx, addr); | ||
286 | + if (unlikely(entry->ADDR_READ != | ||
287 | (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { | ||
288 | oi = make_memop_idx(SHIFT, mmu_idx); | ||
289 | res = (DATA_STYPE)glue(glue(helper_ret_ld, SRETSUFFIX), | ||
290 | MMUSUFFIX)(env, addr, oi, retaddr); | ||
291 | } else { | ||
292 | - uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend; | ||
293 | + uintptr_t hostaddr = addr + entry->addend; | ||
294 | res = glue(glue(lds, SUFFIX), _p)((uint8_t *)hostaddr); | ||
295 | } | ||
296 | return res; | ||
297 | @@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, | ||
298 | target_ulong ptr, | ||
299 | RES_TYPE v, uintptr_t retaddr) | ||
300 | { | ||
301 | - int page_index; | ||
302 | + CPUTLBEntry *entry; | ||
303 | target_ulong addr; | ||
304 | int mmu_idx; | ||
305 | TCGMemOpIdx oi; | ||
306 | @@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, | ||
307 | #endif | ||
308 | |||
309 | addr = ptr; | ||
310 | - page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
311 | mmu_idx = CPU_MMU_INDEX; | ||
312 | - if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write != | ||
313 | + entry = tlb_entry(env, mmu_idx, addr); | ||
314 | + if (unlikely(entry->addr_write != | ||
315 | (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { | ||
316 | oi = make_memop_idx(SHIFT, mmu_idx); | ||
317 | glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi, | ||
318 | retaddr); | ||
319 | } else { | ||
320 | - uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend; | ||
321 | + uintptr_t hostaddr = addr + entry->addend; | ||
322 | glue(glue(st, SUFFIX), _p)((uint8_t *)hostaddr, v); | ||
323 | } | ||
324 | } | ||
325 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
326 | index XXXXXXX..XXXXXXX 100644 | ||
327 | --- a/accel/tcg/cputlb.c | ||
328 | +++ b/accel/tcg/cputlb.c | ||
329 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data) | ||
330 | { | ||
331 | CPUArchState *env = cpu->env_ptr; | ||
332 | target_ulong addr = (target_ulong) data.target_ptr; | ||
333 | - int i; | ||
334 | int mmu_idx; | ||
335 | |||
336 | assert_cpu_is_self(cpu); | ||
337 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data) | ||
338 | } | ||
339 | |||
340 | addr &= TARGET_PAGE_MASK; | ||
341 | - i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
342 | qemu_spin_lock(&env->tlb_lock); | ||
343 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
344 | - tlb_flush_entry_locked(&env->tlb_table[mmu_idx][i], addr); | ||
345 | + tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr); | ||
346 | tlb_flush_vtlb_page_locked(env, mmu_idx, addr); | ||
347 | } | ||
348 | qemu_spin_unlock(&env->tlb_lock); | ||
349 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu, | ||
350 | target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr; | ||
351 | target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK; | ||
352 | unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS; | ||
353 | - int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
354 | int mmu_idx; | ||
355 | |||
356 | assert_cpu_is_self(cpu); | ||
357 | |||
358 | - tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n", | ||
359 | - page, addr, mmu_idx_bitmap); | ||
360 | + tlb_debug("flush page addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n", | ||
361 | + addr, mmu_idx_bitmap); | ||
362 | |||
363 | qemu_spin_lock(&env->tlb_lock); | ||
364 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
365 | if (test_bit(mmu_idx, &mmu_idx_bitmap)) { | ||
366 | - tlb_flush_entry_locked(&env->tlb_table[mmu_idx][page], addr); | ||
367 | + tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr); | ||
368 | tlb_flush_vtlb_page_locked(env, mmu_idx, addr); | ||
369 | } | ||
370 | } | ||
371 | @@ -XXX,XX +XXX,XX @@ static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, | ||
372 | void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) | ||
373 | { | ||
374 | CPUArchState *env = cpu->env_ptr; | ||
375 | - int i; | ||
376 | int mmu_idx; | ||
377 | |||
378 | assert_cpu_is_self(cpu); | ||
379 | |||
380 | vaddr &= TARGET_PAGE_MASK; | ||
381 | - i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
382 | qemu_spin_lock(&env->tlb_lock); | ||
383 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
384 | - tlb_set_dirty1_locked(&env->tlb_table[mmu_idx][i], vaddr); | ||
385 | + tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); | ||
386 | } | ||
387 | |||
388 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
389 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, | ||
390 | iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page, | ||
391 | paddr_page, xlat, prot, &address); | ||
392 | |||
393 | - index = (vaddr_page >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
394 | - te = &env->tlb_table[mmu_idx][index]; | ||
395 | + index = tlb_index(env, mmu_idx, vaddr_page); | ||
396 | + te = tlb_entry(env, mmu_idx, vaddr_page); | ||
397 | |||
398 | /* | ||
399 | * Hold the TLB lock for the rest of the function. We could acquire/release | ||
400 | @@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, | ||
401 | * repeat the MMU check here. This tlb_fill() call might | ||
402 | * longjump out if this access should cause a guest exception. | ||
403 | */ | ||
404 | - int index; | ||
405 | + CPUTLBEntry *entry; | ||
406 | target_ulong tlb_addr; | ||
407 | |||
408 | tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); | ||
409 | |||
410 | - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
411 | - tlb_addr = env->tlb_table[mmu_idx][index].addr_read; | ||
412 | + entry = tlb_entry(env, mmu_idx, addr); | ||
413 | + tlb_addr = entry->addr_read; | ||
414 | if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { | ||
415 | /* RAM access */ | ||
416 | - uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend; | ||
417 | + uintptr_t haddr = addr + entry->addend; | ||
418 | |||
419 | return ldn_p((void *)haddr, size); | ||
420 | } | ||
421 | @@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, | ||
422 | * repeat the MMU check here. This tlb_fill() call might | ||
423 | * longjump out if this access should cause a guest exception. | ||
424 | */ | ||
425 | - int index; | ||
426 | + CPUTLBEntry *entry; | ||
427 | target_ulong tlb_addr; | ||
428 | |||
429 | tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr); | ||
430 | |||
431 | - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
432 | - tlb_addr = env->tlb_table[mmu_idx][index].addr_write; | ||
433 | + entry = tlb_entry(env, mmu_idx, addr); | ||
434 | + tlb_addr = entry->addr_write; | ||
435 | if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { | ||
436 | /* RAM access */ | ||
437 | - uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend; | ||
438 | + uintptr_t haddr = addr + entry->addend; | ||
439 | |||
440 | stn_p((void *)haddr, size, val); | ||
441 | return; | ||
442 | @@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, | ||
443 | */ | ||
444 | tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) | ||
445 | { | ||
446 | - int mmu_idx, index; | ||
447 | + uintptr_t mmu_idx = cpu_mmu_index(env, true); | ||
448 | + uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
449 | + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
450 | void *p; | ||
451 | |||
452 | - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
453 | - mmu_idx = cpu_mmu_index(env, true); | ||
454 | - if (unlikely(!tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr))) { | ||
455 | + if (unlikely(!tlb_hit(entry->addr_code, addr))) { | ||
456 | if (!VICTIM_TLB_HIT(addr_code, addr)) { | ||
457 | tlb_fill(ENV_GET_CPU(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); | ||
458 | } | ||
459 | - assert(tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr)); | ||
460 | + assert(tlb_hit(entry->addr_code, addr)); | ||
461 | } | ||
462 | |||
463 | - if (unlikely(env->tlb_table[mmu_idx][index].addr_code & | ||
464 | - (TLB_RECHECK | TLB_MMIO))) { | ||
465 | + if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) { | ||
466 | /* | ||
467 | * Return -1 if we can't translate and execute from an entire | ||
468 | * page of RAM here, which will cause us to execute by loading | ||
469 | @@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) | ||
470 | return -1; | ||
471 | } | ||
472 | |||
473 | - p = (void *)((uintptr_t)addr + env->tlb_table[mmu_idx][index].addend); | ||
474 | + p = (void *)((uintptr_t)addr + entry->addend); | ||
475 | return qemu_ram_addr_from_host_nofail(p); | ||
476 | } | ||
477 | |||
478 | @@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) | ||
479 | void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx, | ||
480 | uintptr_t retaddr) | ||
481 | { | ||
482 | - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
483 | - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; | ||
484 | + uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
485 | + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
486 | |||
487 | - if (!tlb_hit(tlb_addr, addr)) { | ||
488 | + if (!tlb_hit(entry->addr_write, addr)) { | ||
489 | /* TLB entry is for a different page */ | ||
490 | if (!VICTIM_TLB_HIT(addr_write, addr)) { | ||
491 | tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE, | ||
492 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, | ||
493 | NotDirtyInfo *ndi) | ||
494 | { | ||
495 | size_t mmu_idx = get_mmuidx(oi); | ||
496 | - size_t index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
497 | - CPUTLBEntry *tlbe = &env->tlb_table[mmu_idx][index]; | ||
498 | + uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
499 | + CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); | ||
500 | target_ulong tlb_addr = tlbe->addr_write; | ||
501 | TCGMemOp mop = get_memop(oi); | ||
502 | int a_bits = get_alignment_bits(mop); | ||
503 | -- | 161 | -- |
504 | 2.17.2 | 162 | 2.34.1 |
505 | 163 | ||
506 | 164 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Pre-compute the function call layout for each helper at startup. | ||
2 | Drop TCG_CALL_DUMMY_ARG, as we no longer need to leave gaps | ||
3 | in the op->args[] array. This allows several places to stop | ||
4 | checking for NULL TCGTemp, to which TCG_CALL_DUMMY_ARG mapped. | ||
1 | 5 | ||
6 | For tcg_gen_callN, loop over the arguments once. Allocate the TCGOp | ||
7 | for the call early but delay emitting it, collecting arguments first. | ||
8 | This allows the argument processing loop to emit code for extensions | ||
9 | and have them sequenced before the call. | ||
10 | |||
11 | For tcg_reg_alloc_call, loop over the arguments in reverse order, | ||
12 | which allows stack slots to be filled first naturally. | ||
13 | |||
14 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | --- | ||
16 | include/exec/helper-head.h | 2 + | ||
17 | include/tcg/tcg.h | 5 +- | ||
18 | tcg/tcg-internal.h | 22 +- | ||
19 | tcg/optimize.c | 6 +- | ||
20 | tcg/tcg.c | 609 ++++++++++++++++++++++--------------- | ||
21 | 5 files changed, 394 insertions(+), 250 deletions(-) | ||
22 | |||
23 | diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/include/exec/helper-head.h | ||
26 | +++ b/include/exec/helper-head.h | ||
27 | @@ -XXX,XX +XXX,XX @@ | ||
28 | #define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \ | ||
29 | DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7) | ||
30 | |||
31 | +/* MAX_CALL_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */ | ||
32 | + | ||
33 | #endif /* EXEC_HELPER_HEAD_H */ | ||
34 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/include/tcg/tcg.h | ||
37 | +++ b/include/tcg/tcg.h | ||
38 | @@ -XXX,XX +XXX,XX @@ | ||
39 | /* XXX: make safe guess about sizes */ | ||
40 | #define MAX_OP_PER_INSTR 266 | ||
41 | |||
42 | +#define MAX_CALL_IARGS 7 | ||
43 | + | ||
44 | #define CPU_TEMP_BUF_NLONGS 128 | ||
45 | #define TCG_STATIC_FRAME_SIZE (CPU_TEMP_BUF_NLONGS * sizeof(long)) | ||
46 | |||
47 | @@ -XXX,XX +XXX,XX @@ typedef TCGv_ptr TCGv_env; | ||
48 | #define TCG_CALL_NO_RWG_SE (TCG_CALL_NO_RWG | TCG_CALL_NO_SE) | ||
49 | #define TCG_CALL_NO_WG_SE (TCG_CALL_NO_WG | TCG_CALL_NO_SE) | ||
50 | |||
51 | -/* Used to align parameters. See the comment before tcgv_i32_temp. */ | ||
52 | -#define TCG_CALL_DUMMY_ARG ((TCGArg)0) | ||
53 | - | ||
54 | /* | ||
55 | * Flags for the bswap opcodes. | ||
56 | * If IZ, the input is zero-extended, otherwise unknown. | ||
57 | diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/tcg/tcg-internal.h | ||
60 | +++ b/tcg/tcg-internal.h | ||
61 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
62 | TCG_CALL_ARG_EXTEND_S, /* ... as a sign-extended i64 */ | ||
63 | } TCGCallArgumentKind; | ||
64 | |||
65 | +typedef struct TCGCallArgumentLoc { | ||
66 | + TCGCallArgumentKind kind : 8; | ||
67 | + unsigned arg_slot : 8; | ||
68 | + unsigned ref_slot : 8; | ||
69 | + unsigned arg_idx : 4; | ||
70 | + unsigned tmp_subindex : 2; | ||
71 | +} TCGCallArgumentLoc; | ||
72 | + | ||
73 | +/* Avoid "unsigned < 0 is always false" Werror, when iarg_regs is empty. */ | ||
74 | +#define REG_P(L) \ | ||
75 | + ((int)(L)->arg_slot < (int)ARRAY_SIZE(tcg_target_call_iarg_regs)) | ||
76 | + | ||
77 | typedef struct TCGHelperInfo { | ||
78 | void *func; | ||
79 | const char *name; | ||
80 | - unsigned flags; | ||
81 | - unsigned typemask; | ||
82 | + unsigned typemask : 32; | ||
83 | + unsigned flags : 8; | ||
84 | + unsigned nr_in : 8; | ||
85 | + unsigned nr_out : 8; | ||
86 | + TCGCallReturnKind out_kind : 8; | ||
87 | + | ||
88 | + /* Maximum physical arguments are constrained by TCG_TYPE_I128. */ | ||
89 | + TCGCallArgumentLoc in[MAX_CALL_IARGS * (128 / TCG_TARGET_REG_BITS)]; | ||
90 | } TCGHelperInfo; | ||
91 | |||
92 | extern TCGContext tcg_init_ctx; | ||
93 | diff --git a/tcg/optimize.c b/tcg/optimize.c | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/tcg/optimize.c | ||
96 | +++ b/tcg/optimize.c | ||
97 | @@ -XXX,XX +XXX,XX @@ static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) | ||
98 | { | ||
99 | for (int i = 0; i < nb_args; i++) { | ||
100 | TCGTemp *ts = arg_temp(op->args[i]); | ||
101 | - if (ts) { | ||
102 | - init_ts_info(ctx, ts); | ||
103 | - } | ||
104 | + init_ts_info(ctx, ts); | ||
105 | } | ||
106 | } | ||
107 | |||
108 | @@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op, | ||
109 | |||
110 | for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { | ||
111 | TCGTemp *ts = arg_temp(op->args[i]); | ||
112 | - if (ts && ts_is_copy(ts)) { | ||
113 | + if (ts_is_copy(ts)) { | ||
114 | op->args[i] = temp_arg(find_better_copy(s, ts)); | ||
115 | } | ||
116 | } | ||
117 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
118 | index XXXXXXX..XXXXXXX 100644 | ||
119 | --- a/tcg/tcg.c | ||
120 | +++ b/tcg/tcg.c | ||
121 | @@ -XXX,XX +XXX,XX @@ void tcg_pool_reset(TCGContext *s) | ||
122 | |||
123 | #include "exec/helper-proto.h" | ||
124 | |||
125 | -static const TCGHelperInfo all_helpers[] = { | ||
126 | +static TCGHelperInfo all_helpers[] = { | ||
127 | #include "exec/helper-tcg.h" | ||
128 | }; | ||
129 | static GHashTable *helper_table; | ||
130 | @@ -XXX,XX +XXX,XX @@ static ffi_type * const typecode_to_ffi[8] = { | ||
131 | }; | ||
132 | #endif | ||
133 | |||
134 | +typedef struct TCGCumulativeArgs { | ||
135 | + int arg_idx; /* tcg_gen_callN args[] */ | ||
136 | + int info_in_idx; /* TCGHelperInfo in[] */ | ||
137 | + int arg_slot; /* regs+stack slot */ | ||
138 | + int ref_slot; /* stack slots for references */ | ||
139 | +} TCGCumulativeArgs; | ||
140 | + | ||
141 | +static void layout_arg_even(TCGCumulativeArgs *cum) | ||
142 | +{ | ||
143 | + cum->arg_slot += cum->arg_slot & 1; | ||
144 | +} | ||
145 | + | ||
146 | +static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, | ||
147 | + TCGCallArgumentKind kind) | ||
148 | +{ | ||
149 | + TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; | ||
150 | + | ||
151 | + *loc = (TCGCallArgumentLoc){ | ||
152 | + .kind = kind, | ||
153 | + .arg_idx = cum->arg_idx, | ||
154 | + .arg_slot = cum->arg_slot, | ||
155 | + }; | ||
156 | + cum->info_in_idx++; | ||
157 | + cum->arg_slot++; | ||
158 | +} | ||
159 | + | ||
160 | +static void layout_arg_normal_n(TCGCumulativeArgs *cum, | ||
161 | + TCGHelperInfo *info, int n) | ||
162 | +{ | ||
163 | + TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; | ||
164 | + | ||
165 | + for (int i = 0; i < n; ++i) { | ||
166 | + /* Layout all using the same arg_idx, adjusting the subindex. */ | ||
167 | + loc[i] = (TCGCallArgumentLoc){ | ||
168 | + .kind = TCG_CALL_ARG_NORMAL, | ||
169 | + .arg_idx = cum->arg_idx, | ||
170 | + .tmp_subindex = i, | ||
171 | + .arg_slot = cum->arg_slot + i, | ||
172 | + }; | ||
173 | + } | ||
174 | + cum->info_in_idx += n; | ||
175 | + cum->arg_slot += n; | ||
176 | +} | ||
177 | + | ||
178 | +static void init_call_layout(TCGHelperInfo *info) | ||
179 | +{ | ||
180 | + int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); | ||
181 | + int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); | ||
182 | + unsigned typemask = info->typemask; | ||
183 | + unsigned typecode; | ||
184 | + TCGCumulativeArgs cum = { }; | ||
185 | + | ||
186 | + /* | ||
187 | + * Parse and place any function return value. | ||
188 | + */ | ||
189 | + typecode = typemask & 7; | ||
190 | + switch (typecode) { | ||
191 | + case dh_typecode_void: | ||
192 | + info->nr_out = 0; | ||
193 | + break; | ||
194 | + case dh_typecode_i32: | ||
195 | + case dh_typecode_s32: | ||
196 | + case dh_typecode_ptr: | ||
197 | + info->nr_out = 1; | ||
198 | + info->out_kind = TCG_CALL_RET_NORMAL; | ||
199 | + break; | ||
200 | + case dh_typecode_i64: | ||
201 | + case dh_typecode_s64: | ||
202 | + info->nr_out = 64 / TCG_TARGET_REG_BITS; | ||
203 | + info->out_kind = TCG_CALL_RET_NORMAL; | ||
204 | + break; | ||
205 | + default: | ||
206 | + g_assert_not_reached(); | ||
207 | + } | ||
208 | + assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs)); | ||
209 | + | ||
210 | + /* | ||
211 | + * Parse and place function arguments. | ||
212 | + */ | ||
213 | + for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { | ||
214 | + TCGCallArgumentKind kind; | ||
215 | + TCGType type; | ||
216 | + | ||
217 | + typecode = typemask & 7; | ||
218 | + switch (typecode) { | ||
219 | + case dh_typecode_i32: | ||
220 | + case dh_typecode_s32: | ||
221 | + type = TCG_TYPE_I32; | ||
222 | + break; | ||
223 | + case dh_typecode_i64: | ||
224 | + case dh_typecode_s64: | ||
225 | + type = TCG_TYPE_I64; | ||
226 | + break; | ||
227 | + case dh_typecode_ptr: | ||
228 | + type = TCG_TYPE_PTR; | ||
229 | + break; | ||
230 | + default: | ||
231 | + g_assert_not_reached(); | ||
232 | + } | ||
233 | + | ||
234 | + switch (type) { | ||
235 | + case TCG_TYPE_I32: | ||
236 | + switch (TCG_TARGET_CALL_ARG_I32) { | ||
237 | + case TCG_CALL_ARG_EVEN: | ||
238 | + layout_arg_even(&cum); | ||
239 | + /* fall through */ | ||
240 | + case TCG_CALL_ARG_NORMAL: | ||
241 | + layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); | ||
242 | + break; | ||
243 | + case TCG_CALL_ARG_EXTEND: | ||
244 | + kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); | ||
245 | + layout_arg_1(&cum, info, kind); | ||
246 | + break; | ||
247 | + default: | ||
248 | + qemu_build_not_reached(); | ||
249 | + } | ||
250 | + break; | ||
251 | + | ||
252 | + case TCG_TYPE_I64: | ||
253 | + switch (TCG_TARGET_CALL_ARG_I64) { | ||
254 | + case TCG_CALL_ARG_EVEN: | ||
255 | + layout_arg_even(&cum); | ||
256 | + /* fall through */ | ||
257 | + case TCG_CALL_ARG_NORMAL: | ||
258 | + if (TCG_TARGET_REG_BITS == 32) { | ||
259 | + layout_arg_normal_n(&cum, info, 2); | ||
260 | + } else { | ||
261 | + layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); | ||
262 | + } | ||
263 | + break; | ||
264 | + default: | ||
265 | + qemu_build_not_reached(); | ||
266 | + } | ||
267 | + break; | ||
268 | + | ||
269 | + default: | ||
270 | + g_assert_not_reached(); | ||
271 | + } | ||
272 | + } | ||
273 | + info->nr_in = cum.info_in_idx; | ||
274 | + | ||
275 | + /* Validate that we didn't overrun the input array. */ | ||
276 | + assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); | ||
277 | + /* Validate the backend has enough argument space. */ | ||
278 | + assert(cum.arg_slot <= max_reg_slots + max_stk_slots); | ||
279 | + assert(cum.ref_slot <= max_stk_slots); | ||
280 | +} | ||
281 | + | ||
282 | static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; | ||
283 | static void process_op_defs(TCGContext *s); | ||
284 | static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, | ||
285 | @@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus) | ||
286 | helper_table = g_hash_table_new(NULL, NULL); | ||
287 | |||
288 | for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { | ||
289 | + init_call_layout(&all_helpers[i]); | ||
290 | g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, | ||
291 | (gpointer)&all_helpers[i]); | ||
292 | } | ||
293 | @@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op) | ||
294 | } | ||
295 | } | ||
296 | |||
297 | -/* Note: we convert the 64 bit args to 32 bit and do some alignment | ||
298 | - and endian swap. Maybe it would be better to do the alignment | ||
299 | - and endian swap in tcg_reg_alloc_call(). */ | ||
300 | +static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); | ||
301 | + | ||
302 | void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
303 | { | ||
304 | - int i, real_args, nb_rets, pi, max_args; | ||
305 | - unsigned typemask; | ||
306 | const TCGHelperInfo *info; | ||
307 | + TCGv_i64 extend_free[MAX_CALL_IARGS]; | ||
308 | + int n_extend = 0; | ||
309 | TCGOp *op; | ||
310 | + int i, n, pi = 0, total_args; | ||
311 | |||
312 | info = g_hash_table_lookup(helper_table, (gpointer)func); | ||
313 | - typemask = info->typemask; | ||
314 | + total_args = info->nr_out + info->nr_in + 2; | ||
315 | + op = tcg_op_alloc(INDEX_op_call, total_args); | ||
316 | |||
317 | #ifdef CONFIG_PLUGIN | ||
318 | /* detect non-plugin helpers */ | ||
319 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
320 | } | ||
321 | #endif | ||
322 | |||
323 | - if (TCG_TARGET_CALL_ARG_I32 == TCG_CALL_ARG_EXTEND) { | ||
324 | - for (i = 0; i < nargs; ++i) { | ||
325 | - int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
326 | - bool is_32bit = (argtype & ~1) == dh_typecode_i32; | ||
327 | - bool is_signed = argtype & 1; | ||
328 | + TCGOP_CALLO(op) = n = info->nr_out; | ||
329 | + switch (n) { | ||
330 | + case 0: | ||
331 | + tcg_debug_assert(ret == NULL); | ||
332 | + break; | ||
333 | + case 1: | ||
334 | + tcg_debug_assert(ret != NULL); | ||
335 | + op->args[pi++] = temp_arg(ret); | ||
336 | + break; | ||
337 | + case 2: | ||
338 | + tcg_debug_assert(ret != NULL); | ||
339 | + tcg_debug_assert(ret->base_type == ret->type + 1); | ||
340 | + tcg_debug_assert(ret->temp_subindex == 0); | ||
341 | + op->args[pi++] = temp_arg(ret); | ||
342 | + op->args[pi++] = temp_arg(ret + 1); | ||
343 | + break; | ||
344 | + default: | ||
345 | + g_assert_not_reached(); | ||
346 | + } | ||
347 | |||
348 | - if (is_32bit) { | ||
349 | + TCGOP_CALLI(op) = n = info->nr_in; | ||
350 | + for (i = 0; i < n; i++) { | ||
351 | + const TCGCallArgumentLoc *loc = &info->in[i]; | ||
352 | + TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; | ||
353 | + | ||
354 | + switch (loc->kind) { | ||
355 | + case TCG_CALL_ARG_NORMAL: | ||
356 | + op->args[pi++] = temp_arg(ts); | ||
357 | + break; | ||
358 | + | ||
359 | + case TCG_CALL_ARG_EXTEND_U: | ||
360 | + case TCG_CALL_ARG_EXTEND_S: | ||
361 | + { | ||
362 | TCGv_i64 temp = tcg_temp_new_i64(); | ||
363 | - TCGv_i32 orig = temp_tcgv_i32(args[i]); | ||
364 | - if (is_signed) { | ||
365 | + TCGv_i32 orig = temp_tcgv_i32(ts); | ||
366 | + | ||
367 | + if (loc->kind == TCG_CALL_ARG_EXTEND_S) { | ||
368 | tcg_gen_ext_i32_i64(temp, orig); | ||
369 | } else { | ||
370 | tcg_gen_extu_i32_i64(temp, orig); | ||
371 | } | ||
372 | - args[i] = tcgv_i64_temp(temp); | ||
373 | + op->args[pi++] = tcgv_i64_arg(temp); | ||
374 | + extend_free[n_extend++] = temp; | ||
375 | } | ||
376 | - } | ||
377 | - } | ||
378 | - | ||
379 | - /* | ||
380 | - * A Call op needs up to 4 + 2N parameters on 32-bit archs, | ||
381 | - * and up to 4 + N parameters on 64-bit archs | ||
382 | - * (N = number of input arguments + output arguments). | ||
383 | - */ | ||
384 | - max_args = (64 / TCG_TARGET_REG_BITS) * nargs + 4; | ||
385 | - op = tcg_emit_op(INDEX_op_call, max_args); | ||
386 | - | ||
387 | - pi = 0; | ||
388 | - if (ret != NULL) { | ||
389 | - if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) { | ||
390 | - op->args[pi++] = temp_arg(ret); | ||
391 | - op->args[pi++] = temp_arg(ret + 1); | ||
392 | - nb_rets = 2; | ||
393 | - } else { | ||
394 | - op->args[pi++] = temp_arg(ret); | ||
395 | - nb_rets = 1; | ||
396 | - } | ||
397 | - } else { | ||
398 | - nb_rets = 0; | ||
399 | - } | ||
400 | - TCGOP_CALLO(op) = nb_rets; | ||
401 | - | ||
402 | - real_args = 0; | ||
403 | - for (i = 0; i < nargs; i++) { | ||
404 | - int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
405 | - TCGCallArgumentKind kind; | ||
406 | - TCGType type; | ||
407 | - | ||
408 | - switch (argtype) { | ||
409 | - case dh_typecode_i32: | ||
410 | - case dh_typecode_s32: | ||
411 | - type = TCG_TYPE_I32; | ||
412 | break; | ||
413 | - case dh_typecode_i64: | ||
414 | - case dh_typecode_s64: | ||
415 | - type = TCG_TYPE_I64; | ||
416 | - break; | ||
417 | - case dh_typecode_ptr: | ||
418 | - type = TCG_TYPE_PTR; | ||
419 | - break; | ||
420 | - default: | ||
421 | - g_assert_not_reached(); | ||
422 | - } | ||
423 | |||
424 | - switch (type) { | ||
425 | - case TCG_TYPE_I32: | ||
426 | - kind = TCG_TARGET_CALL_ARG_I32; | ||
427 | - break; | ||
428 | - case TCG_TYPE_I64: | ||
429 | - kind = TCG_TARGET_CALL_ARG_I64; | ||
430 | - break; | ||
431 | - default: | ||
432 | - g_assert_not_reached(); | ||
433 | - } | ||
434 | - | ||
435 | - switch (kind) { | ||
436 | - case TCG_CALL_ARG_EVEN: | ||
437 | - if (real_args & 1) { | ||
438 | - op->args[pi++] = TCG_CALL_DUMMY_ARG; | ||
439 | - real_args++; | ||
440 | - } | ||
441 | - /* fall through */ | ||
442 | - case TCG_CALL_ARG_NORMAL: | ||
443 | - if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { | ||
444 | - op->args[pi++] = temp_arg(args[i]); | ||
445 | - op->args[pi++] = temp_arg(args[i] + 1); | ||
446 | - real_args += 2; | ||
447 | - break; | ||
448 | - } | ||
449 | - op->args[pi++] = temp_arg(args[i]); | ||
450 | - real_args++; | ||
451 | - break; | ||
452 | default: | ||
453 | g_assert_not_reached(); | ||
454 | } | ||
455 | } | ||
456 | op->args[pi++] = (uintptr_t)func; | ||
457 | op->args[pi++] = (uintptr_t)info; | ||
458 | - TCGOP_CALLI(op) = real_args; | ||
459 | + tcg_debug_assert(pi == total_args); | ||
460 | |||
461 | - /* Make sure the fields didn't overflow. */ | ||
462 | - tcg_debug_assert(TCGOP_CALLI(op) == real_args); | ||
463 | - tcg_debug_assert(pi <= max_args); | ||
464 | + QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); | ||
465 | |||
466 | - if (TCG_TARGET_CALL_ARG_I32 == TCG_CALL_ARG_EXTEND) { | ||
467 | - for (i = 0; i < nargs; ++i) { | ||
468 | - int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
469 | - bool is_32bit = (argtype & ~1) == dh_typecode_i32; | ||
470 | - | ||
471 | - if (is_32bit) { | ||
472 | - tcg_temp_free_internal(args[i]); | ||
473 | - } | ||
474 | - } | ||
475 | + tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); | ||
476 | + for (i = 0; i < n_extend; ++i) { | ||
477 | + tcg_temp_free_i64(extend_free[i]); | ||
478 | } | ||
479 | } | ||
480 | |||
481 | @@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) | ||
482 | } | ||
483 | for (i = 0; i < nb_iargs; i++) { | ||
484 | TCGArg arg = op->args[nb_oargs + i]; | ||
485 | - const char *t = "<dummy>"; | ||
486 | - if (arg != TCG_CALL_DUMMY_ARG) { | ||
487 | - t = tcg_get_arg_str(s, buf, sizeof(buf), arg); | ||
488 | - } | ||
489 | + const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); | ||
490 | col += ne_fprintf(f, ",%s", t); | ||
491 | } | ||
492 | } else { | ||
493 | @@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s) | ||
494 | switch (opc) { | ||
495 | case INDEX_op_call: | ||
496 | { | ||
497 | - int call_flags; | ||
498 | - int nb_call_regs; | ||
499 | + const TCGHelperInfo *info = tcg_call_info(op); | ||
500 | + int call_flags = tcg_call_flags(op); | ||
501 | |||
502 | nb_oargs = TCGOP_CALLO(op); | ||
503 | nb_iargs = TCGOP_CALLI(op); | ||
504 | - call_flags = tcg_call_flags(op); | ||
505 | |||
506 | /* pure functions can be removed if their result is unused */ | ||
507 | if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { | ||
508 | @@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s) | ||
509 | /* Record arguments that die in this helper. */ | ||
510 | for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { | ||
511 | ts = arg_temp(op->args[i]); | ||
512 | - if (ts && ts->state & TS_DEAD) { | ||
513 | + if (ts->state & TS_DEAD) { | ||
514 | arg_life |= DEAD_ARG << i; | ||
515 | } | ||
516 | } | ||
517 | @@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s) | ||
518 | /* For all live registers, remove call-clobbered prefs. */ | ||
519 | la_cross_call(s, nb_temps); | ||
520 | |||
521 | - nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); | ||
522 | + /* | ||
523 | + * Input arguments are live for preceding opcodes. | ||
524 | + * | ||
525 | + * For those arguments that die, and will be allocated in | ||
526 | + * registers, clear the register set for that arg, to be | ||
527 | + * filled in below. For args that will be on the stack, | ||
528 | + * reset to any available reg. Process arguments in reverse | ||
529 | + * order so that if a temp is used more than once, the stack | ||
530 | + * reset to max happens before the register reset to 0. | ||
531 | + */ | ||
532 | + for (i = nb_iargs - 1; i >= 0; i--) { | ||
533 | + const TCGCallArgumentLoc *loc = &info->in[i]; | ||
534 | + ts = arg_temp(op->args[nb_oargs + i]); | ||
535 | |||
536 | - /* Input arguments are live for preceding opcodes. */ | ||
537 | - for (i = 0; i < nb_iargs; i++) { | ||
538 | - ts = arg_temp(op->args[i + nb_oargs]); | ||
539 | - if (ts && ts->state & TS_DEAD) { | ||
540 | - /* For those arguments that die, and will be allocated | ||
541 | - * in registers, clear the register set for that arg, | ||
542 | - * to be filled in below. For args that will be on | ||
543 | - * the stack, reset to any available reg. | ||
544 | - */ | ||
545 | - *la_temp_pref(ts) | ||
546 | - = (i < nb_call_regs ? 0 : | ||
547 | - tcg_target_available_regs[ts->type]); | ||
548 | + if (ts->state & TS_DEAD) { | ||
549 | + switch (loc->kind) { | ||
550 | + case TCG_CALL_ARG_NORMAL: | ||
551 | + case TCG_CALL_ARG_EXTEND_U: | ||
552 | + case TCG_CALL_ARG_EXTEND_S: | ||
553 | + if (REG_P(loc)) { | ||
554 | + *la_temp_pref(ts) = 0; | ||
555 | + break; | ||
556 | + } | ||
557 | + /* fall through */ | ||
558 | + default: | ||
559 | + *la_temp_pref(ts) = | ||
560 | + tcg_target_available_regs[ts->type]; | ||
561 | + break; | ||
562 | + } | ||
563 | ts->state &= ~TS_DEAD; | ||
564 | } | ||
565 | } | ||
566 | |||
567 | - /* For each input argument, add its input register to prefs. | ||
568 | - If a temp is used once, this produces a single set bit. */ | ||
569 | - for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { | ||
570 | - ts = arg_temp(op->args[i + nb_oargs]); | ||
571 | - if (ts) { | ||
572 | - tcg_regset_set_reg(*la_temp_pref(ts), | ||
573 | - tcg_target_call_iarg_regs[i]); | ||
574 | + /* | ||
575 | + * For each input argument, add its input register to prefs. | ||
576 | + * If a temp is used once, this produces a single set bit; | ||
577 | + * if a temp is used multiple times, this produces a set. | ||
578 | + */ | ||
579 | + for (i = 0; i < nb_iargs; i++) { | ||
580 | + const TCGCallArgumentLoc *loc = &info->in[i]; | ||
581 | + ts = arg_temp(op->args[nb_oargs + i]); | ||
582 | + | ||
583 | + switch (loc->kind) { | ||
584 | + case TCG_CALL_ARG_NORMAL: | ||
585 | + case TCG_CALL_ARG_EXTEND_U: | ||
586 | + case TCG_CALL_ARG_EXTEND_S: | ||
587 | + if (REG_P(loc)) { | ||
588 | + tcg_regset_set_reg(*la_temp_pref(ts), | ||
589 | + tcg_target_call_iarg_regs[loc->arg_slot]); | ||
590 | + } | ||
591 | + break; | ||
592 | + default: | ||
593 | + break; | ||
594 | } | ||
595 | } | ||
596 | } | ||
597 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) | ||
598 | /* Make sure that input arguments are available. */ | ||
599 | for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { | ||
600 | arg_ts = arg_temp(op->args[i]); | ||
601 | - if (arg_ts) { | ||
602 | - dir_ts = arg_ts->state_ptr; | ||
603 | - if (dir_ts && arg_ts->state == TS_DEAD) { | ||
604 | - TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 | ||
605 | - ? INDEX_op_ld_i32 | ||
606 | - : INDEX_op_ld_i64); | ||
607 | - TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); | ||
608 | + dir_ts = arg_ts->state_ptr; | ||
609 | + if (dir_ts && arg_ts->state == TS_DEAD) { | ||
610 | + TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 | ||
611 | + ? INDEX_op_ld_i32 | ||
612 | + : INDEX_op_ld_i64); | ||
613 | + TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); | ||
614 | |||
615 | - lop->args[0] = temp_arg(dir_ts); | ||
616 | - lop->args[1] = temp_arg(arg_ts->mem_base); | ||
617 | - lop->args[2] = arg_ts->mem_offset; | ||
618 | + lop->args[0] = temp_arg(dir_ts); | ||
619 | + lop->args[1] = temp_arg(arg_ts->mem_base); | ||
620 | + lop->args[2] = arg_ts->mem_offset; | ||
621 | |||
622 | - /* Loaded, but synced with memory. */ | ||
623 | - arg_ts->state = TS_MEM; | ||
624 | - } | ||
625 | + /* Loaded, but synced with memory. */ | ||
626 | + arg_ts->state = TS_MEM; | ||
627 | } | ||
628 | } | ||
629 | |||
630 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) | ||
631 | so that we reload when needed. */ | ||
632 | for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { | ||
633 | arg_ts = arg_temp(op->args[i]); | ||
634 | - if (arg_ts) { | ||
635 | - dir_ts = arg_ts->state_ptr; | ||
636 | - if (dir_ts) { | ||
637 | - op->args[i] = temp_arg(dir_ts); | ||
638 | - changes = true; | ||
639 | - if (IS_DEAD_ARG(i)) { | ||
640 | - arg_ts->state = TS_DEAD; | ||
641 | - } | ||
642 | + dir_ts = arg_ts->state_ptr; | ||
643 | + if (dir_ts) { | ||
644 | + op->args[i] = temp_arg(dir_ts); | ||
645 | + changes = true; | ||
646 | + if (IS_DEAD_ARG(i)) { | ||
647 | + arg_ts->state = TS_DEAD; | ||
648 | } | ||
649 | } | ||
650 | } | ||
651 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
652 | return true; | ||
653 | } | ||
654 | |||
655 | +static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, | ||
656 | + TCGRegSet allocated_regs) | ||
657 | +{ | ||
658 | + if (ts->val_type == TEMP_VAL_REG) { | ||
659 | + if (ts->reg != reg) { | ||
660 | + tcg_reg_free(s, reg, allocated_regs); | ||
661 | + if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { | ||
662 | + /* | ||
663 | + * Cross register class move not supported. Sync the | ||
664 | + * temp back to its slot and load from there. | ||
665 | + */ | ||
666 | + temp_sync(s, ts, allocated_regs, 0, 0); | ||
667 | + tcg_out_ld(s, ts->type, reg, | ||
668 | + ts->mem_base->reg, ts->mem_offset); | ||
669 | + } | ||
670 | + } | ||
671 | + } else { | ||
672 | + TCGRegSet arg_set = 0; | ||
673 | + | ||
674 | + tcg_reg_free(s, reg, allocated_regs); | ||
675 | + tcg_regset_set_reg(arg_set, reg); | ||
676 | + temp_load(s, ts, arg_set, allocated_regs, 0); | ||
677 | + } | ||
678 | +} | ||
679 | + | ||
680 | +static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts, | ||
681 | + TCGRegSet allocated_regs) | ||
682 | +{ | ||
683 | + /* | ||
684 | + * When the destination is on the stack, load up the temp and store. | ||
685 | + * If there are many call-saved registers, the temp might live to | ||
686 | + * see another use; otherwise it'll be discarded. | ||
687 | + */ | ||
688 | + temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); | ||
689 | + tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, | ||
690 | + TCG_TARGET_CALL_STACK_OFFSET + | ||
691 | + stk_slot * sizeof(tcg_target_long)); | ||
692 | +} | ||
693 | + | ||
694 | +static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, | ||
695 | + TCGTemp *ts, TCGRegSet *allocated_regs) | ||
696 | +{ | ||
697 | + if (REG_P(l)) { | ||
698 | + TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; | ||
699 | + load_arg_reg(s, reg, ts, *allocated_regs); | ||
700 | + tcg_regset_set_reg(*allocated_regs, reg); | ||
701 | + } else { | ||
702 | + load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs), | ||
703 | + ts, *allocated_regs); | ||
704 | + } | ||
705 | +} | ||
706 | + | ||
707 | static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
708 | { | ||
709 | const int nb_oargs = TCGOP_CALLO(op); | ||
710 | const int nb_iargs = TCGOP_CALLI(op); | ||
711 | const TCGLifeData arg_life = op->life; | ||
712 | - const TCGHelperInfo *info; | ||
713 | - int flags, nb_regs, i; | ||
714 | - TCGReg reg; | ||
715 | - TCGArg arg; | ||
716 | - TCGTemp *ts; | ||
717 | - intptr_t stack_offset; | ||
718 | - size_t call_stack_size; | ||
719 | - tcg_insn_unit *func_addr; | ||
720 | - int allocate_args; | ||
721 | - TCGRegSet allocated_regs; | ||
722 | + const TCGHelperInfo *info = tcg_call_info(op); | ||
723 | + TCGRegSet allocated_regs = s->reserved_regs; | ||
724 | + int i; | ||
725 | |||
726 | - func_addr = tcg_call_func(op); | ||
727 | - info = tcg_call_info(op); | ||
728 | - flags = info->flags; | ||
729 | + /* | ||
730 | + * Move inputs into place in reverse order, | ||
731 | + * so that we place stacked arguments first. | ||
732 | + */ | ||
733 | + for (i = nb_iargs - 1; i >= 0; --i) { | ||
734 | + const TCGCallArgumentLoc *loc = &info->in[i]; | ||
735 | + TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); | ||
736 | |||
737 | - nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); | ||
738 | - if (nb_regs > nb_iargs) { | ||
739 | - nb_regs = nb_iargs; | ||
740 | - } | ||
741 | - | ||
742 | - /* assign stack slots first */ | ||
743 | - call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); | ||
744 | - call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & | ||
745 | - ~(TCG_TARGET_STACK_ALIGN - 1); | ||
746 | - allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); | ||
747 | - if (allocate_args) { | ||
748 | - /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, | ||
749 | - preallocate call stack */ | ||
750 | - tcg_abort(); | ||
751 | - } | ||
752 | - | ||
753 | - stack_offset = TCG_TARGET_CALL_STACK_OFFSET; | ||
754 | - for (i = nb_regs; i < nb_iargs; i++) { | ||
755 | - arg = op->args[nb_oargs + i]; | ||
756 | - if (arg != TCG_CALL_DUMMY_ARG) { | ||
757 | - ts = arg_temp(arg); | ||
758 | - temp_load(s, ts, tcg_target_available_regs[ts->type], | ||
759 | - s->reserved_regs, 0); | ||
760 | - tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); | ||
761 | - } | ||
762 | - stack_offset += sizeof(tcg_target_long); | ||
763 | - } | ||
764 | - | ||
765 | - /* assign input registers */ | ||
766 | - allocated_regs = s->reserved_regs; | ||
767 | - for (i = 0; i < nb_regs; i++) { | ||
768 | - arg = op->args[nb_oargs + i]; | ||
769 | - if (arg != TCG_CALL_DUMMY_ARG) { | ||
770 | - ts = arg_temp(arg); | ||
771 | - reg = tcg_target_call_iarg_regs[i]; | ||
772 | - | ||
773 | - if (ts->val_type == TEMP_VAL_REG) { | ||
774 | - if (ts->reg != reg) { | ||
775 | - tcg_reg_free(s, reg, allocated_regs); | ||
776 | - if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { | ||
777 | - /* | ||
778 | - * Cross register class move not supported. Sync the | ||
779 | - * temp back to its slot and load from there. | ||
780 | - */ | ||
781 | - temp_sync(s, ts, allocated_regs, 0, 0); | ||
782 | - tcg_out_ld(s, ts->type, reg, | ||
783 | - ts->mem_base->reg, ts->mem_offset); | ||
784 | - } | ||
785 | - } | ||
786 | - } else { | ||
787 | - TCGRegSet arg_set = 0; | ||
788 | - | ||
789 | - tcg_reg_free(s, reg, allocated_regs); | ||
790 | - tcg_regset_set_reg(arg_set, reg); | ||
791 | - temp_load(s, ts, arg_set, allocated_regs, 0); | ||
792 | - } | ||
793 | - | ||
794 | - tcg_regset_set_reg(allocated_regs, reg); | ||
795 | + switch (loc->kind) { | ||
796 | + case TCG_CALL_ARG_NORMAL: | ||
797 | + case TCG_CALL_ARG_EXTEND_U: | ||
798 | + case TCG_CALL_ARG_EXTEND_S: | ||
799 | + load_arg_normal(s, loc, ts, &allocated_regs); | ||
800 | + break; | ||
801 | + default: | ||
802 | + g_assert_not_reached(); | ||
803 | } | ||
804 | } | ||
805 | |||
806 | - /* mark dead temporaries and free the associated registers */ | ||
807 | + /* Mark dead temporaries and free the associated registers. */ | ||
808 | for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { | ||
809 | if (IS_DEAD_ARG(i)) { | ||
810 | temp_dead(s, arg_temp(op->args[i])); | ||
811 | } | ||
812 | } | ||
813 | |||
814 | - /* clobber call registers */ | ||
815 | + /* Clobber call registers. */ | ||
816 | for (i = 0; i < TCG_TARGET_NB_REGS; i++) { | ||
817 | if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { | ||
818 | tcg_reg_free(s, i, allocated_regs); | ||
819 | } | ||
820 | } | ||
821 | |||
822 | - /* Save globals if they might be written by the helper, sync them if | ||
823 | - they might be read. */ | ||
824 | - if (flags & TCG_CALL_NO_READ_GLOBALS) { | ||
825 | + /* | ||
826 | + * Save globals if they might be written by the helper, | ||
827 | + * sync them if they might be read. | ||
828 | + */ | ||
829 | + if (info->flags & TCG_CALL_NO_READ_GLOBALS) { | ||
830 | /* Nothing to do */ | ||
831 | - } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { | ||
832 | + } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { | ||
833 | sync_globals(s, allocated_regs); | ||
834 | } else { | ||
835 | save_globals(s, allocated_regs); | ||
836 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
837 | gpointer hash = (gpointer)(uintptr_t)info->typemask; | ||
838 | ffi_cif *cif = g_hash_table_lookup(ffi_table, hash); | ||
839 | assert(cif != NULL); | ||
840 | - tcg_out_call(s, func_addr, cif); | ||
841 | + tcg_out_call(s, tcg_call_func(op), cif); | ||
842 | } | ||
843 | #else | ||
844 | - tcg_out_call(s, func_addr); | ||
845 | + tcg_out_call(s, tcg_call_func(op)); | ||
846 | #endif | ||
847 | |||
848 | - /* assign output registers and emit moves if needed */ | ||
849 | - for(i = 0; i < nb_oargs; i++) { | ||
850 | - arg = op->args[i]; | ||
851 | - ts = arg_temp(arg); | ||
852 | + /* Assign output registers and emit moves if needed. */ | ||
853 | + switch (info->out_kind) { | ||
854 | + case TCG_CALL_RET_NORMAL: | ||
855 | + for (i = 0; i < nb_oargs; i++) { | ||
856 | + TCGTemp *ts = arg_temp(op->args[i]); | ||
857 | + TCGReg reg = tcg_target_call_oarg_regs[i]; | ||
858 | |||
859 | - /* ENV should not be modified. */ | ||
860 | - tcg_debug_assert(!temp_readonly(ts)); | ||
861 | + /* ENV should not be modified. */ | ||
862 | + tcg_debug_assert(!temp_readonly(ts)); | ||
863 | |||
864 | - reg = tcg_target_call_oarg_regs[i]; | ||
865 | - set_temp_val_reg(s, ts, reg); | ||
866 | - ts->mem_coherent = 0; | ||
867 | + set_temp_val_reg(s, ts, reg); | ||
868 | + ts->mem_coherent = 0; | ||
869 | + } | ||
870 | + break; | ||
871 | + default: | ||
872 | + g_assert_not_reached(); | ||
873 | + } | ||
874 | + | ||
875 | + /* Flush or discard output registers as needed. */ | ||
876 | + for (i = 0; i < nb_oargs; i++) { | ||
877 | + TCGTemp *ts = arg_temp(op->args[i]); | ||
878 | if (NEED_SYNC_ARG(i)) { | ||
879 | - temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); | ||
880 | + temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); | ||
881 | } else if (IS_DEAD_ARG(i)) { | ||
882 | temp_dead(s, ts); | ||
883 | } | ||
884 | -- | ||
885 | 2.34.1 | diff view generated by jsdifflib |
1 | From: "Emilio G. Cota" <cota@braap.org> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | When we implemented per-vCPU TCG contexts, we forgot to also | 3 | In the unlikely case of invalid typecode mask, the function |
4 | distribute the tcg_time counter, which has remained as a global | 4 | will abort instead of returning a NULL pointer. |
5 | accessed without any serialization, leading to potentially missed | ||
6 | counts. | ||
7 | 5 | ||
8 | Fix it by distributing the field over the TCG contexts, embedding | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
9 | it into TCGProfile with a field called "cpu_exec_time", which is more | 7 | Message-Id: <20221111074101.2069454-27-richard.henderson@linaro.org> |
10 | descriptive than "tcg_time". Add a function to query this value | 8 | [PMD: Split from bigger patch] |
11 | directly, and for completeness, fill in the field in | 9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
12 | tcg_profile_snapshot, even though its callers do not use it. | 10 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
11 | Message-Id: <20221122180804.938-2-philmd@linaro.org> | ||
12 | --- | ||
13 | tcg/tcg.c | 30 ++++++++++++++++++++---------- | ||
14 | 1 file changed, 20 insertions(+), 10 deletions(-) | ||
13 | 15 | ||
14 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
15 | Message-Id: <20181010144853.13005-5-cota@braap.org> | ||
16 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
17 | --- | ||
18 | include/qemu/timer.h | 1 - | ||
19 | tcg/tcg.h | 2 ++ | ||
20 | cpus.c | 3 ++- | ||
21 | monitor.c | 13 ++++++++++--- | ||
22 | tcg/tcg.c | 23 +++++++++++++++++++++++ | ||
23 | 5 files changed, 37 insertions(+), 5 deletions(-) | ||
24 | |||
25 | diff --git a/include/qemu/timer.h b/include/qemu/timer.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/include/qemu/timer.h | ||
28 | +++ b/include/qemu/timer.h | ||
29 | @@ -XXX,XX +XXX,XX @@ static inline int64_t profile_getclock(void) | ||
30 | return get_clock(); | ||
31 | } | ||
32 | |||
33 | -extern int64_t tcg_time; | ||
34 | extern int64_t dev_time; | ||
35 | #endif | ||
36 | |||
37 | diff --git a/tcg/tcg.h b/tcg/tcg.h | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/tcg/tcg.h | ||
40 | +++ b/tcg/tcg.h | ||
41 | @@ -XXX,XX +XXX,XX @@ typedef struct TCGOp { | ||
42 | QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8)); | ||
43 | |||
44 | typedef struct TCGProfile { | ||
45 | + int64_t cpu_exec_time; | ||
46 | int64_t tb_count1; | ||
47 | int64_t tb_count; | ||
48 | int64_t op_count; /* total insn count */ | ||
49 | @@ -XXX,XX +XXX,XX @@ int tcg_check_temp_count(void); | ||
50 | #define tcg_check_temp_count() 0 | ||
51 | #endif | ||
52 | |||
53 | +int64_t tcg_cpu_exec_time(void); | ||
54 | void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf); | ||
55 | void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf); | ||
56 | |||
57 | diff --git a/cpus.c b/cpus.c | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/cpus.c | ||
60 | +++ b/cpus.c | ||
61 | @@ -XXX,XX +XXX,XX @@ static int tcg_cpu_exec(CPUState *cpu) | ||
62 | ret = cpu_exec(cpu); | ||
63 | cpu_exec_end(cpu); | ||
64 | #ifdef CONFIG_PROFILER | ||
65 | - tcg_time += profile_getclock() - ti; | ||
66 | + atomic_set(&tcg_ctx->prof.cpu_exec_time, | ||
67 | + tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti); | ||
68 | #endif | ||
69 | return ret; | ||
70 | } | ||
71 | diff --git a/monitor.c b/monitor.c | ||
72 | index XXXXXXX..XXXXXXX 100644 | ||
73 | --- a/monitor.c | ||
74 | +++ b/monitor.c | ||
75 | @@ -XXX,XX +XXX,XX @@ | ||
76 | #include "sysemu/cpus.h" | ||
77 | #include "sysemu/iothread.h" | ||
78 | #include "qemu/cutils.h" | ||
79 | +#include "tcg/tcg.h" | ||
80 | |||
81 | #if defined(TARGET_S390X) | ||
82 | #include "hw/s390x/storage-keys.h" | ||
83 | @@ -XXX,XX +XXX,XX @@ static void hmp_info_numa(Monitor *mon, const QDict *qdict) | ||
84 | |||
85 | #ifdef CONFIG_PROFILER | ||
86 | |||
87 | -int64_t tcg_time; | ||
88 | int64_t dev_time; | ||
89 | |||
90 | static void hmp_info_profile(Monitor *mon, const QDict *qdict) | ||
91 | { | ||
92 | + static int64_t last_cpu_exec_time; | ||
93 | + int64_t cpu_exec_time; | ||
94 | + int64_t delta; | ||
95 | + | ||
96 | + cpu_exec_time = tcg_cpu_exec_time(); | ||
97 | + delta = cpu_exec_time - last_cpu_exec_time; | ||
98 | + | ||
99 | monitor_printf(mon, "async time %" PRId64 " (%0.3f)\n", | ||
100 | dev_time, dev_time / (double)NANOSECONDS_PER_SECOND); | ||
101 | monitor_printf(mon, "qemu time %" PRId64 " (%0.3f)\n", | ||
102 | - tcg_time, tcg_time / (double)NANOSECONDS_PER_SECOND); | ||
103 | - tcg_time = 0; | ||
104 | + delta, delta / (double)NANOSECONDS_PER_SECOND); | ||
105 | + last_cpu_exec_time = cpu_exec_time; | ||
106 | dev_time = 0; | ||
107 | } | ||
108 | #else | ||
109 | diff --git a/tcg/tcg.c b/tcg/tcg.c | 16 | diff --git a/tcg/tcg.c b/tcg/tcg.c |
110 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
111 | --- a/tcg/tcg.c | 18 | --- a/tcg/tcg.c |
112 | +++ b/tcg/tcg.c | 19 | +++ b/tcg/tcg.c |
113 | @@ -XXX,XX +XXX,XX @@ | 20 | @@ -XXX,XX +XXX,XX @@ static GHashTable *helper_table; |
114 | /* Define to jump the ELF file used to communicate with GDB. */ | 21 | #ifdef CONFIG_TCG_INTERPRETER |
115 | #undef DEBUG_JIT | 22 | static GHashTable *ffi_table; |
116 | 23 | ||
117 | +#include "qemu/error-report.h" | 24 | -static ffi_type * const typecode_to_ffi[8] = { |
118 | #include "qemu/cutils.h" | 25 | - [dh_typecode_void] = &ffi_type_void, |
119 | #include "qemu/host-utils.h" | 26 | - [dh_typecode_i32] = &ffi_type_uint32, |
120 | #include "qemu/timer.h" | 27 | - [dh_typecode_s32] = &ffi_type_sint32, |
121 | @@ -XXX,XX +XXX,XX @@ void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) | 28 | - [dh_typecode_i64] = &ffi_type_uint64, |
122 | const TCGProfile *orig = &s->prof; | 29 | - [dh_typecode_s64] = &ffi_type_sint64, |
123 | 30 | - [dh_typecode_ptr] = &ffi_type_pointer, | |
124 | if (counters) { | 31 | -}; |
125 | + PROF_ADD(prof, orig, cpu_exec_time); | 32 | +static ffi_type *typecode_to_ffi(int argmask) |
126 | PROF_ADD(prof, orig, tb_count1); | ||
127 | PROF_ADD(prof, orig, tb_count); | ||
128 | PROF_ADD(prof, orig, op_count); | ||
129 | @@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) | ||
130 | prof.table_op_count[i]); | ||
131 | } | ||
132 | } | ||
133 | + | ||
134 | +int64_t tcg_cpu_exec_time(void) | ||
135 | +{ | 33 | +{ |
136 | + unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); | 34 | + switch (argmask) { |
137 | + unsigned int i; | 35 | + case dh_typecode_void: |
138 | + int64_t ret = 0; | 36 | + return &ffi_type_void; |
139 | + | 37 | + case dh_typecode_i32: |
140 | + for (i = 0; i < n_ctxs; i++) { | 38 | + return &ffi_type_uint32; |
141 | + const TCGContext *s = atomic_read(&tcg_ctxs[i]); | 39 | + case dh_typecode_s32: |
142 | + const TCGProfile *prof = &s->prof; | 40 | + return &ffi_type_sint32; |
143 | + | 41 | + case dh_typecode_i64: |
144 | + ret += atomic_read(&prof->cpu_exec_time); | 42 | + return &ffi_type_uint64; |
43 | + case dh_typecode_s64: | ||
44 | + return &ffi_type_sint64; | ||
45 | + case dh_typecode_ptr: | ||
46 | + return &ffi_type_pointer; | ||
145 | + } | 47 | + } |
146 | + return ret; | 48 | + g_assert_not_reached(); |
147 | +} | ||
148 | #else | ||
149 | void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) | ||
150 | { | ||
151 | cpu_fprintf(f, "[TCG profiler not compiled]\n"); | ||
152 | } | ||
153 | + | ||
154 | +int64_t tcg_cpu_exec_time(void) | ||
155 | +{ | ||
156 | + error_report("%s: TCG profiler not compiled", __func__); | ||
157 | + exit(EXIT_FAILURE); | ||
158 | +} | 49 | +} |
159 | #endif | 50 | #endif |
160 | 51 | ||
52 | typedef struct TCGCumulativeArgs { | ||
53 | @@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus) | ||
54 | nargs = DIV_ROUND_UP(nargs, 3); | ||
55 | |||
56 | ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); | ||
57 | - ca->cif.rtype = typecode_to_ffi[typemask & 7]; | ||
58 | + ca->cif.rtype = typecode_to_ffi(typemask & 7); | ||
59 | ca->cif.nargs = nargs; | ||
60 | |||
61 | if (nargs != 0) { | ||
62 | ca->cif.arg_types = ca->args; | ||
63 | for (int j = 0; j < nargs; ++j) { | ||
64 | int typecode = extract32(typemask, (j + 1) * 3, 3); | ||
65 | - ca->args[j] = typecode_to_ffi[typecode]; | ||
66 | + ca->args[j] = typecode_to_ffi(typecode); | ||
67 | } | ||
68 | } | ||
161 | 69 | ||
162 | -- | 70 | -- |
163 | 2.17.2 | 71 | 2.34.1 |
164 | 72 | ||
165 | 73 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
1 | 2 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Message-Id: <20221111074101.2069454-27-richard.henderson@linaro.org> | ||
5 | [PMD: Split from bigger patch] | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
8 | Message-Id: <20221122180804.938-3-philmd@linaro.org> | ||
9 | --- | ||
10 | tcg/tcg.c | 83 +++++++++++++++++++++++++++++-------------------------- | ||
11 | 1 file changed, 44 insertions(+), 39 deletions(-) | ||
12 | |||
13 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tcg/tcg.c | ||
16 | +++ b/tcg/tcg.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static ffi_type *typecode_to_ffi(int argmask) | ||
18 | } | ||
19 | g_assert_not_reached(); | ||
20 | } | ||
21 | -#endif | ||
22 | + | ||
23 | +static void init_ffi_layouts(void) | ||
24 | +{ | ||
25 | + /* g_direct_hash/equal for direct comparisons on uint32_t. */ | ||
26 | + ffi_table = g_hash_table_new(NULL, NULL); | ||
27 | + for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) { | ||
28 | + uint32_t typemask = all_helpers[i].typemask; | ||
29 | + gpointer hash = (gpointer)(uintptr_t)typemask; | ||
30 | + struct { | ||
31 | + ffi_cif cif; | ||
32 | + ffi_type *args[]; | ||
33 | + } *ca; | ||
34 | + ffi_status status; | ||
35 | + int nargs; | ||
36 | + | ||
37 | + if (g_hash_table_lookup(ffi_table, hash)) { | ||
38 | + continue; | ||
39 | + } | ||
40 | + | ||
41 | + /* Ignoring the return type, find the last non-zero field. */ | ||
42 | + nargs = 32 - clz32(typemask >> 3); | ||
43 | + nargs = DIV_ROUND_UP(nargs, 3); | ||
44 | + | ||
45 | + ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); | ||
46 | + ca->cif.rtype = typecode_to_ffi(typemask & 7); | ||
47 | + ca->cif.nargs = nargs; | ||
48 | + | ||
49 | + if (nargs != 0) { | ||
50 | + ca->cif.arg_types = ca->args; | ||
51 | + for (int j = 0; j < nargs; ++j) { | ||
52 | + int typecode = extract32(typemask, (j + 1) * 3, 3); | ||
53 | + ca->args[j] = typecode_to_ffi(typecode); | ||
54 | + } | ||
55 | + } | ||
56 | + | ||
57 | + status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, | ||
58 | + ca->cif.rtype, ca->cif.arg_types); | ||
59 | + assert(status == FFI_OK); | ||
60 | + | ||
61 | + g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif); | ||
62 | + } | ||
63 | +} | ||
64 | +#endif /* CONFIG_TCG_INTERPRETER */ | ||
65 | |||
66 | typedef struct TCGCumulativeArgs { | ||
67 | int arg_idx; /* tcg_gen_callN args[] */ | ||
68 | @@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus) | ||
69 | } | ||
70 | |||
71 | #ifdef CONFIG_TCG_INTERPRETER | ||
72 | - /* g_direct_hash/equal for direct comparisons on uint32_t. */ | ||
73 | - ffi_table = g_hash_table_new(NULL, NULL); | ||
74 | - for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { | ||
75 | - struct { | ||
76 | - ffi_cif cif; | ||
77 | - ffi_type *args[]; | ||
78 | - } *ca; | ||
79 | - uint32_t typemask = all_helpers[i].typemask; | ||
80 | - gpointer hash = (gpointer)(uintptr_t)typemask; | ||
81 | - ffi_status status; | ||
82 | - int nargs; | ||
83 | - | ||
84 | - if (g_hash_table_lookup(ffi_table, hash)) { | ||
85 | - continue; | ||
86 | - } | ||
87 | - | ||
88 | - /* Ignoring the return type, find the last non-zero field. */ | ||
89 | - nargs = 32 - clz32(typemask >> 3); | ||
90 | - nargs = DIV_ROUND_UP(nargs, 3); | ||
91 | - | ||
92 | - ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); | ||
93 | - ca->cif.rtype = typecode_to_ffi(typemask & 7); | ||
94 | - ca->cif.nargs = nargs; | ||
95 | - | ||
96 | - if (nargs != 0) { | ||
97 | - ca->cif.arg_types = ca->args; | ||
98 | - for (int j = 0; j < nargs; ++j) { | ||
99 | - int typecode = extract32(typemask, (j + 1) * 3, 3); | ||
100 | - ca->args[j] = typecode_to_ffi(typecode); | ||
101 | - } | ||
102 | - } | ||
103 | - | ||
104 | - status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, | ||
105 | - ca->cif.rtype, ca->cif.arg_types); | ||
106 | - assert(status == FFI_OK); | ||
107 | - | ||
108 | - g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif); | ||
109 | - } | ||
110 | + init_ffi_layouts(); | ||
111 | #endif | ||
112 | |||
113 | tcg_target_init(s); | ||
114 | -- | ||
115 | 2.34.1 | ||
116 | |||
117 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Instead of requiring a separate hash table lookup, | ||
2 | put a pointer to the CIF into TCGHelperInfo. | ||
1 | 3 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Message-Id: <20221111074101.2069454-27-richard.henderson@linaro.org> | ||
6 | [PMD: Split from bigger patch] | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
9 | Message-Id: <20221122180804.938-4-philmd@linaro.org> | ||
10 | --- | ||
11 | tcg/tcg-internal.h | 7 +++++++ | ||
12 | tcg/tcg.c | 30 ++++++++++++++---------------- | ||
13 | 2 files changed, 21 insertions(+), 16 deletions(-) | ||
14 | |||
15 | diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/tcg/tcg-internal.h | ||
18 | +++ b/tcg/tcg-internal.h | ||
19 | @@ -XXX,XX +XXX,XX @@ | ||
20 | #ifndef TCG_INTERNAL_H | ||
21 | #define TCG_INTERNAL_H | ||
22 | |||
23 | +#ifdef CONFIG_TCG_INTERPRETER | ||
24 | +#include <ffi.h> | ||
25 | +#endif | ||
26 | + | ||
27 | #define TCG_HIGHWATER 1024 | ||
28 | |||
29 | /* | ||
30 | @@ -XXX,XX +XXX,XX @@ typedef struct TCGCallArgumentLoc { | ||
31 | typedef struct TCGHelperInfo { | ||
32 | void *func; | ||
33 | const char *name; | ||
34 | +#ifdef CONFIG_TCG_INTERPRETER | ||
35 | + ffi_cif *cif; | ||
36 | +#endif | ||
37 | unsigned typemask : 32; | ||
38 | unsigned flags : 8; | ||
39 | unsigned nr_in : 8; | ||
40 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/tcg/tcg.c | ||
43 | +++ b/tcg/tcg.c | ||
44 | @@ -XXX,XX +XXX,XX @@ | ||
45 | #include "tcg/tcg-ldst.h" | ||
46 | #include "tcg-internal.h" | ||
47 | |||
48 | -#ifdef CONFIG_TCG_INTERPRETER | ||
49 | -#include <ffi.h> | ||
50 | -#endif | ||
51 | - | ||
52 | /* Forward declarations for functions declared in tcg-target.c.inc and | ||
53 | used here. */ | ||
54 | static void tcg_target_init(TCGContext *s); | ||
55 | @@ -XXX,XX +XXX,XX @@ static TCGHelperInfo all_helpers[] = { | ||
56 | static GHashTable *helper_table; | ||
57 | |||
58 | #ifdef CONFIG_TCG_INTERPRETER | ||
59 | -static GHashTable *ffi_table; | ||
60 | - | ||
61 | static ffi_type *typecode_to_ffi(int argmask) | ||
62 | { | ||
63 | switch (argmask) { | ||
64 | @@ -XXX,XX +XXX,XX @@ static ffi_type *typecode_to_ffi(int argmask) | ||
65 | static void init_ffi_layouts(void) | ||
66 | { | ||
67 | /* g_direct_hash/equal for direct comparisons on uint32_t. */ | ||
68 | - ffi_table = g_hash_table_new(NULL, NULL); | ||
69 | + GHashTable *ffi_table = g_hash_table_new(NULL, NULL); | ||
70 | + | ||
71 | for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) { | ||
72 | - uint32_t typemask = all_helpers[i].typemask; | ||
73 | + TCGHelperInfo *info = &all_helpers[i]; | ||
74 | + unsigned typemask = info->typemask; | ||
75 | gpointer hash = (gpointer)(uintptr_t)typemask; | ||
76 | struct { | ||
77 | ffi_cif cif; | ||
78 | @@ -XXX,XX +XXX,XX @@ static void init_ffi_layouts(void) | ||
79 | } *ca; | ||
80 | ffi_status status; | ||
81 | int nargs; | ||
82 | + ffi_cif *cif; | ||
83 | |||
84 | - if (g_hash_table_lookup(ffi_table, hash)) { | ||
85 | + cif = g_hash_table_lookup(ffi_table, hash); | ||
86 | + if (cif) { | ||
87 | + info->cif = cif; | ||
88 | continue; | ||
89 | } | ||
90 | |||
91 | @@ -XXX,XX +XXX,XX @@ static void init_ffi_layouts(void) | ||
92 | ca->cif.rtype, ca->cif.arg_types); | ||
93 | assert(status == FFI_OK); | ||
94 | |||
95 | - g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif); | ||
96 | + cif = &ca->cif; | ||
97 | + info->cif = cif; | ||
98 | + g_hash_table_insert(ffi_table, hash, (gpointer)cif); | ||
99 | } | ||
100 | + | ||
101 | + g_hash_table_destroy(ffi_table); | ||
102 | } | ||
103 | #endif /* CONFIG_TCG_INTERPRETER */ | ||
104 | |||
105 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
106 | } | ||
107 | |||
108 | #ifdef CONFIG_TCG_INTERPRETER | ||
109 | - { | ||
110 | - gpointer hash = (gpointer)(uintptr_t)info->typemask; | ||
111 | - ffi_cif *cif = g_hash_table_lookup(ffi_table, hash); | ||
112 | - assert(cif != NULL); | ||
113 | - tcg_out_call(s, tcg_call_func(op), cif); | ||
114 | - } | ||
115 | + tcg_out_call(s, tcg_call_func(op), info->cif); | ||
116 | #else | ||
117 | tcg_out_call(s, tcg_call_func(op)); | ||
118 | #endif | ||
119 | -- | ||
120 | 2.34.1 | ||
121 | |||
122 | diff view generated by jsdifflib |
1 | Reviewed-by: David Hildenbrand <david@redhat.com> | 1 | There is only one use, and BLR is perhaps even more |
---|---|---|---|
2 | self-documentary than CALLR. | ||
3 | |||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
3 | --- | 6 | --- |
4 | target/s390x/mem_helper.c | 128 ++++++++++++++++++-------------------- | 7 | tcg/aarch64/tcg-target.c.inc | 7 +------ |
5 | 1 file changed, 61 insertions(+), 67 deletions(-) | 8 | 1 file changed, 1 insertion(+), 6 deletions(-) |
6 | 9 | ||
7 | diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c | 10 | diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc |
8 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
9 | --- a/target/s390x/mem_helper.c | 12 | --- a/tcg/aarch64/tcg-target.c.inc |
10 | +++ b/target/s390x/mem_helper.c | 13 | +++ b/tcg/aarch64/tcg-target.c.inc |
11 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2, | 14 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) |
12 | return cc; | 15 | } |
13 | } | 16 | } |
14 | 17 | ||
15 | -static void do_cdsg(CPUS390XState *env, uint64_t addr, | 18 | -static inline void tcg_out_callr(TCGContext *s, TCGReg reg) |
16 | - uint32_t r1, uint32_t r3, bool parallel) | ||
17 | +void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, | ||
18 | + uint32_t r1, uint32_t r3) | ||
19 | { | ||
20 | uintptr_t ra = GETPC(); | ||
21 | Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); | ||
22 | Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); | ||
23 | Int128 oldv; | ||
24 | + uint64_t oldh, oldl; | ||
25 | bool fail; | ||
26 | |||
27 | - if (parallel) { | ||
28 | -#if !HAVE_CMPXCHG128 | ||
29 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
30 | -#else | ||
31 | - int mem_idx = cpu_mmu_index(env, false); | ||
32 | - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
33 | - oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); | ||
34 | - fail = !int128_eq(oldv, cmpv); | ||
35 | -#endif | ||
36 | - } else { | ||
37 | - uint64_t oldh, oldl; | ||
38 | + check_alignment(env, addr, 16, ra); | ||
39 | |||
40 | - check_alignment(env, addr, 16, ra); | ||
41 | + oldh = cpu_ldq_data_ra(env, addr + 0, ra); | ||
42 | + oldl = cpu_ldq_data_ra(env, addr + 8, ra); | ||
43 | |||
44 | - oldh = cpu_ldq_data_ra(env, addr + 0, ra); | ||
45 | - oldl = cpu_ldq_data_ra(env, addr + 8, ra); | ||
46 | - | ||
47 | - oldv = int128_make128(oldl, oldh); | ||
48 | - fail = !int128_eq(oldv, cmpv); | ||
49 | - if (fail) { | ||
50 | - newv = oldv; | ||
51 | - } | ||
52 | - | ||
53 | - cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra); | ||
54 | - cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra); | ||
55 | + oldv = int128_make128(oldl, oldh); | ||
56 | + fail = !int128_eq(oldv, cmpv); | ||
57 | + if (fail) { | ||
58 | + newv = oldv; | ||
59 | } | ||
60 | |||
61 | + cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra); | ||
62 | + cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra); | ||
63 | + | ||
64 | env->cc_op = fail; | ||
65 | env->regs[r1] = int128_gethi(oldv); | ||
66 | env->regs[r1 + 1] = int128_getlo(oldv); | ||
67 | } | ||
68 | |||
69 | -void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, | ||
70 | - uint32_t r1, uint32_t r3) | ||
71 | -{ | 19 | -{ |
72 | - do_cdsg(env, addr, r1, r3, false); | 20 | - tcg_out_insn(s, 3207, BLR, reg); |
73 | -} | 21 | -} |
74 | - | 22 | - |
75 | void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr, | 23 | static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) |
76 | uint32_t r1, uint32_t r3) | ||
77 | { | 24 | { |
78 | - do_cdsg(env, addr, r1, r3, true); | 25 | ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; |
79 | + uintptr_t ra = GETPC(); | 26 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) |
80 | + Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); | 27 | tcg_out_insn(s, 3206, BL, offset); |
81 | + Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); | 28 | } else { |
82 | + int mem_idx; | 29 | tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); |
83 | + TCGMemOpIdx oi; | 30 | - tcg_out_callr(s, TCG_REG_TMP); |
84 | + Int128 oldv; | 31 | + tcg_out_insn(s, 3207, BLR, TCG_REG_TMP); |
85 | + bool fail; | ||
86 | + | ||
87 | + if (!HAVE_CMPXCHG128) { | ||
88 | + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
89 | + } | ||
90 | + | ||
91 | + mem_idx = cpu_mmu_index(env, false); | ||
92 | + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
93 | + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); | ||
94 | + fail = !int128_eq(oldv, cmpv); | ||
95 | + | ||
96 | + env->cc_op = fail; | ||
97 | + env->regs[r1] = int128_gethi(oldv); | ||
98 | + env->regs[r1 + 1] = int128_getlo(oldv); | ||
99 | } | ||
100 | |||
101 | static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, | ||
102 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr) | ||
103 | #endif | ||
104 | |||
105 | /* load pair from quadword */ | ||
106 | -static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel) | ||
107 | +uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr) | ||
108 | { | ||
109 | uintptr_t ra = GETPC(); | ||
110 | uint64_t hi, lo; | ||
111 | |||
112 | - if (!parallel) { | ||
113 | - check_alignment(env, addr, 16, ra); | ||
114 | - hi = cpu_ldq_data_ra(env, addr + 0, ra); | ||
115 | - lo = cpu_ldq_data_ra(env, addr + 8, ra); | ||
116 | - } else if (HAVE_ATOMIC128) { | ||
117 | + check_alignment(env, addr, 16, ra); | ||
118 | + hi = cpu_ldq_data_ra(env, addr + 0, ra); | ||
119 | + lo = cpu_ldq_data_ra(env, addr + 8, ra); | ||
120 | + | ||
121 | + env->retxl = lo; | ||
122 | + return hi; | ||
123 | +} | ||
124 | + | ||
125 | +uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr) | ||
126 | +{ | ||
127 | + uintptr_t ra = GETPC(); | ||
128 | + uint64_t hi, lo; | ||
129 | + | ||
130 | + if (HAVE_ATOMIC128) { | ||
131 | int mem_idx = cpu_mmu_index(env, false); | ||
132 | TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
133 | Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); | ||
134 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel) | ||
135 | return hi; | ||
136 | } | ||
137 | |||
138 | -uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr) | ||
139 | -{ | ||
140 | - return do_lpq(env, addr, false); | ||
141 | -} | ||
142 | - | ||
143 | -uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr) | ||
144 | -{ | ||
145 | - return do_lpq(env, addr, true); | ||
146 | -} | ||
147 | - | ||
148 | /* store pair to quadword */ | ||
149 | -static void do_stpq(CPUS390XState *env, uint64_t addr, | ||
150 | - uint64_t low, uint64_t high, bool parallel) | ||
151 | +void HELPER(stpq)(CPUS390XState *env, uint64_t addr, | ||
152 | + uint64_t low, uint64_t high) | ||
153 | { | ||
154 | uintptr_t ra = GETPC(); | ||
155 | |||
156 | - if (!parallel) { | ||
157 | - check_alignment(env, addr, 16, ra); | ||
158 | - cpu_stq_data_ra(env, addr + 0, high, ra); | ||
159 | - cpu_stq_data_ra(env, addr + 8, low, ra); | ||
160 | - } else if (HAVE_ATOMIC128) { | ||
161 | + check_alignment(env, addr, 16, ra); | ||
162 | + cpu_stq_data_ra(env, addr + 0, high, ra); | ||
163 | + cpu_stq_data_ra(env, addr + 8, low, ra); | ||
164 | +} | ||
165 | + | ||
166 | +void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr, | ||
167 | + uint64_t low, uint64_t high) | ||
168 | +{ | ||
169 | + uintptr_t ra = GETPC(); | ||
170 | + | ||
171 | + if (HAVE_ATOMIC128) { | ||
172 | int mem_idx = cpu_mmu_index(env, false); | ||
173 | TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
174 | Int128 v = int128_make128(low, high); | ||
175 | @@ -XXX,XX +XXX,XX @@ static void do_stpq(CPUS390XState *env, uint64_t addr, | ||
176 | } | 32 | } |
177 | } | 33 | } |
178 | 34 | ||
179 | -void HELPER(stpq)(CPUS390XState *env, uint64_t addr, | ||
180 | - uint64_t low, uint64_t high) | ||
181 | -{ | ||
182 | - do_stpq(env, addr, low, high, false); | ||
183 | -} | ||
184 | - | ||
185 | -void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr, | ||
186 | - uint64_t low, uint64_t high) | ||
187 | -{ | ||
188 | - do_stpq(env, addr, low, high, true); | ||
189 | -} | ||
190 | - | ||
191 | /* Execute instruction. This instruction executes an insn modified with | ||
192 | the contents of r1. It does not change the executed instruction in memory; | ||
193 | it does not change the program counter. | ||
194 | -- | 35 | -- |
195 | 2.17.2 | 36 | 2.34.1 |
196 | 37 | ||
197 | 38 | diff view generated by jsdifflib |
1 | GCC7+ will no longer advertise support for 16-byte __atomic operations | 1 | This eliminates an ifdef for TCI, and will be required for |
---|---|---|---|
2 | if only cmpxchg is supported, as for x86_64. Fortunately, x86_64 still | 2 | expanding the call for TCGv_i128. |
3 | has support for __sync_compare_and_swap_16 and we can make use of that. | ||
4 | AArch64 does not have, nor ever has had such support, so open-code it. | ||
5 | 3 | ||
6 | Reviewed-by: Emilio G. Cota <cota@braap.org> | 4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 6 | --- |
9 | accel/tcg/atomic_template.h | 20 ++++- | 7 | tcg/tcg.c | 12 ++---------- |
10 | include/qemu/atomic128.h | 155 ++++++++++++++++++++++++++++++++++++ | 8 | tcg/aarch64/tcg-target.c.inc | 12 +++++++++--- |
11 | tcg/tcg.h | 16 ++-- | 9 | tcg/arm/tcg-target.c.inc | 10 ++++++++-- |
12 | accel/tcg/cputlb.c | 3 +- | 10 | tcg/i386/tcg-target.c.inc | 5 +++-- |
13 | accel/tcg/user-exec.c | 5 +- | 11 | tcg/loongarch64/tcg-target.c.inc | 7 ++++--- |
14 | configure | 19 +++++ | 12 | tcg/mips/tcg-target.c.inc | 3 ++- |
15 | 6 files changed, 204 insertions(+), 14 deletions(-) | 13 | tcg/ppc/tcg-target.c.inc | 7 ++++--- |
16 | create mode 100644 include/qemu/atomic128.h | 14 | tcg/riscv/tcg-target.c.inc | 7 ++++--- |
15 | tcg/s390x/tcg-target.c.inc | 12 +++++++++--- | ||
16 | tcg/sparc64/tcg-target.c.inc | 3 ++- | ||
17 | tcg/tci/tcg-target.c.inc | 3 ++- | ||
18 | 11 files changed, 49 insertions(+), 32 deletions(-) | ||
17 | 19 | ||
18 | diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h | 20 | diff --git a/tcg/tcg.c b/tcg/tcg.c |
19 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/accel/tcg/atomic_template.h | 22 | --- a/tcg/tcg.c |
21 | +++ b/accel/tcg/atomic_template.h | 23 | +++ b/tcg/tcg.c |
22 | @@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, | 24 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, |
23 | DATA_TYPE ret; | 25 | intptr_t arg2); |
24 | 26 | static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, | |
25 | ATOMIC_TRACE_RMW; | 27 | TCGReg base, intptr_t ofs); |
26 | +#if DATA_SIZE == 16 | 28 | -#ifdef CONFIG_TCG_INTERPRETER |
27 | + ret = atomic16_cmpxchg(haddr, cmpv, newv); | 29 | static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, |
28 | +#else | 30 | - ffi_cif *cif); |
29 | ret = atomic_cmpxchg__nocheck(haddr, cmpv, newv); | 31 | -#else |
30 | +#endif | 32 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target); |
31 | ATOMIC_MMU_CLEANUP; | 33 | -#endif |
32 | return ret; | 34 | + const TCGHelperInfo *info); |
33 | } | 35 | static bool tcg_target_const_match(int64_t val, TCGType type, int ct); |
34 | 36 | #ifdef TCG_TARGET_NEED_LDST_LABELS | |
35 | #if DATA_SIZE >= 16 | 37 | static int tcg_out_ldst_finalize(TCGContext *s); |
36 | +#if HAVE_ATOMIC128 | 38 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) |
37 | ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) | 39 | save_globals(s, allocated_regs); |
38 | { | 40 | } |
39 | ATOMIC_MMU_DECLS; | 41 | |
40 | DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; | 42 | -#ifdef CONFIG_TCG_INTERPRETER |
41 | 43 | - tcg_out_call(s, tcg_call_func(op), info->cif); | |
42 | ATOMIC_TRACE_LD; | 44 | -#else |
43 | - __atomic_load(haddr, &val, __ATOMIC_RELAXED); | 45 | - tcg_out_call(s, tcg_call_func(op)); |
44 | + val = atomic16_read(haddr); | 46 | -#endif |
45 | ATOMIC_MMU_CLEANUP; | 47 | + tcg_out_call(s, tcg_call_func(op), info); |
46 | return val; | 48 | |
47 | } | 49 | /* Assign output registers and emit moves if needed. */ |
48 | @@ -XXX,XX +XXX,XX @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, | 50 | switch (info->out_kind) { |
49 | DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; | 51 | diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc |
50 | 52 | index XXXXXXX..XXXXXXX 100644 | |
51 | ATOMIC_TRACE_ST; | 53 | --- a/tcg/aarch64/tcg-target.c.inc |
52 | - __atomic_store(haddr, &val, __ATOMIC_RELAXED); | 54 | +++ b/tcg/aarch64/tcg-target.c.inc |
53 | + atomic16_set(haddr, val); | 55 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) |
54 | ATOMIC_MMU_CLEANUP; | 56 | } |
55 | } | 57 | } |
56 | +#endif | 58 | |
57 | #else | 59 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) |
58 | ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, | 60 | +static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) |
59 | ABI_TYPE val EXTRA_ARGS) | 61 | { |
60 | @@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, | 62 | ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; |
61 | DATA_TYPE ret; | 63 | if (offset == sextract64(offset, 0, 26)) { |
62 | 64 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) | |
63 | ATOMIC_TRACE_RMW; | 65 | } |
64 | +#if DATA_SIZE == 16 | 66 | } |
65 | + ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv)); | 67 | |
66 | +#else | 68 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, |
67 | ret = atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)); | 69 | + const TCGHelperInfo *info) |
68 | +#endif | ||
69 | ATOMIC_MMU_CLEANUP; | ||
70 | return BSWAP(ret); | ||
71 | } | ||
72 | |||
73 | #if DATA_SIZE >= 16 | ||
74 | +#if HAVE_ATOMIC128 | ||
75 | ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) | ||
76 | { | ||
77 | ATOMIC_MMU_DECLS; | ||
78 | DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; | ||
79 | |||
80 | ATOMIC_TRACE_LD; | ||
81 | - __atomic_load(haddr, &val, __ATOMIC_RELAXED); | ||
82 | + val = atomic16_read(haddr); | ||
83 | ATOMIC_MMU_CLEANUP; | ||
84 | return BSWAP(val); | ||
85 | } | ||
86 | @@ -XXX,XX +XXX,XX @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, | ||
87 | |||
88 | ATOMIC_TRACE_ST; | ||
89 | val = BSWAP(val); | ||
90 | - __atomic_store(haddr, &val, __ATOMIC_RELAXED); | ||
91 | + atomic16_set(haddr, val); | ||
92 | ATOMIC_MMU_CLEANUP; | ||
93 | } | ||
94 | +#endif | ||
95 | #else | ||
96 | ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, | ||
97 | ABI_TYPE val EXTRA_ARGS) | ||
98 | diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h | ||
99 | new file mode 100644 | ||
100 | index XXXXXXX..XXXXXXX | ||
101 | --- /dev/null | ||
102 | +++ b/include/qemu/atomic128.h | ||
103 | @@ -XXX,XX +XXX,XX @@ | ||
104 | +/* | ||
105 | + * Simple interface for 128-bit atomic operations. | ||
106 | + * | ||
107 | + * Copyright (C) 2018 Linaro, Ltd. | ||
108 | + * | ||
109 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
110 | + * See the COPYING file in the top-level directory. | ||
111 | + * | ||
112 | + * See docs/devel/atomics.txt for discussion about the guarantees each | ||
113 | + * atomic primitive is meant to provide. | ||
114 | + */ | ||
115 | + | ||
116 | +#ifndef QEMU_ATOMIC128_H | ||
117 | +#define QEMU_ATOMIC128_H | ||
118 | + | ||
119 | +/* | ||
120 | + * GCC is a house divided about supporting large atomic operations. | ||
121 | + * | ||
122 | + * For hosts that only have large compare-and-swap, a legalistic reading | ||
123 | + * of the C++ standard means that one cannot implement __atomic_read on | ||
124 | + * read-only memory, and thus all atomic operations must synchronize | ||
125 | + * through libatomic. | ||
126 | + * | ||
127 | + * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878 | ||
128 | + * | ||
129 | + * This interpretation is not especially helpful for QEMU. | ||
130 | + * For softmmu, all RAM is always read/write from the hypervisor. | ||
131 | + * For user-only, if the guest doesn't implement such an __atomic_read | ||
132 | + * then the host need not worry about it either. | ||
133 | + * | ||
134 | + * Moreover, using libatomic is not an option, because its interface is | ||
135 | + * built for std::atomic<T>, and requires that *all* accesses to such an | ||
136 | + * object go through the library. In our case we do not have an object | ||
137 | + * in the C/C++ sense, but a view of memory as seen by the guest. | ||
138 | + * The guest may issue a large atomic operation and then access those | ||
139 | + * pieces using word-sized accesses. From the hypervisor, we have no | ||
140 | + * way to connect those two actions. | ||
141 | + * | ||
142 | + * Therefore, special case each platform. | ||
143 | + */ | ||
144 | + | ||
145 | +#if defined(CONFIG_ATOMIC128) | ||
146 | +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) | ||
147 | +{ | 70 | +{ |
148 | + return atomic_cmpxchg__nocheck(ptr, cmp, new); | 71 | + tcg_out_call_int(s, target); |
149 | +} | ||
150 | +# define HAVE_CMPXCHG128 1 | ||
151 | +#elif defined(CONFIG_CMPXCHG128) | ||
152 | +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) | ||
153 | +{ | ||
154 | + return __sync_val_compare_and_swap_16(ptr, cmp, new); | ||
155 | +} | ||
156 | +# define HAVE_CMPXCHG128 1 | ||
157 | +#elif defined(__aarch64__) | ||
158 | +/* Through gcc 8, aarch64 has no support for 128-bit at all. */ | ||
159 | +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) | ||
160 | +{ | ||
161 | + uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp); | ||
162 | + uint64_t newl = int128_getlo(new), newh = int128_gethi(new); | ||
163 | + uint64_t oldl, oldh; | ||
164 | + uint32_t tmp; | ||
165 | + | ||
166 | + asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t" | ||
167 | + "cmp %[oldl], %[cmpl]\n\t" | ||
168 | + "ccmp %[oldh], %[cmph], #0, eq\n\t" | ||
169 | + "b.ne 1f\n\t" | ||
170 | + "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t" | ||
171 | + "cbnz %w[tmp], 0b\n" | ||
172 | + "1:" | ||
173 | + : [mem] "+m"(*ptr), [tmp] "=&r"(tmp), | ||
174 | + [oldl] "=&r"(oldl), [oldh] "=r"(oldh) | ||
175 | + : [cmpl] "r"(cmpl), [cmph] "r"(cmph), | ||
176 | + [newl] "r"(newl), [newh] "r"(newh) | ||
177 | + : "memory", "cc"); | ||
178 | + | ||
179 | + return int128_make128(oldl, oldh); | ||
180 | +} | ||
181 | +# define HAVE_CMPXCHG128 1 | ||
182 | +#else | ||
183 | +/* Fallback definition that must be optimized away, or error. */ | ||
184 | +Int128 __attribute__((error("unsupported atomic"))) | ||
185 | + atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new); | ||
186 | +# define HAVE_CMPXCHG128 0 | ||
187 | +#endif /* Some definition for HAVE_CMPXCHG128 */ | ||
188 | + | ||
189 | + | ||
190 | +#if defined(CONFIG_ATOMIC128) | ||
191 | +static inline Int128 atomic16_read(Int128 *ptr) | ||
192 | +{ | ||
193 | + return atomic_read__nocheck(ptr); | ||
194 | +} | 72 | +} |
195 | + | 73 | + |
196 | +static inline void atomic16_set(Int128 *ptr, Int128 val) | 74 | void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, |
75 | uintptr_t jmp_rw, uintptr_t addr) | ||
76 | { | ||
77 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
78 | tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); | ||
79 | tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); | ||
80 | tcg_out_adr(s, TCG_REG_X3, lb->raddr); | ||
81 | - tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]); | ||
82 | + tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); | ||
83 | if (opc & MO_SIGN) { | ||
84 | tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); | ||
85 | } else { | ||
86 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
87 | tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); | ||
88 | tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); | ||
89 | tcg_out_adr(s, TCG_REG_X4, lb->raddr); | ||
90 | - tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]); | ||
91 | + tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); | ||
92 | tcg_out_goto(s, lb->raddr); | ||
93 | return true; | ||
94 | } | ||
95 | diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc | ||
96 | index XXXXXXX..XXXXXXX 100644 | ||
97 | --- a/tcg/arm/tcg-target.c.inc | ||
98 | +++ b/tcg/arm/tcg-target.c.inc | ||
99 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_goto(TCGContext *s, ARMCond cond, const tcg_insn_unit *addr) | ||
100 | * The call case is mostly used for helpers - so it's not unreasonable | ||
101 | * for them to be beyond branch range. | ||
102 | */ | ||
103 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr) | ||
104 | +static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *addr) | ||
105 | { | ||
106 | intptr_t addri = (intptr_t)addr; | ||
107 | ptrdiff_t disp = tcg_pcrel_diff(s, addr); | ||
108 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr) | ||
109 | tcg_out_blx_reg(s, COND_AL, TCG_REG_TMP); | ||
110 | } | ||
111 | |||
112 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr, | ||
113 | + const TCGHelperInfo *info) | ||
197 | +{ | 114 | +{ |
198 | + atomic_set__nocheck(ptr, val); | 115 | + tcg_out_call_int(s, addr); |
199 | +} | 116 | +} |
200 | + | 117 | + |
201 | +# define HAVE_ATOMIC128 1 | 118 | static void tcg_out_goto_label(TCGContext *s, ARMCond cond, TCGLabel *l) |
202 | +#elif !defined(CONFIG_USER_ONLY) && defined(__aarch64__) | 119 | { |
203 | +/* We can do better than cmpxchg for AArch64. */ | 120 | if (l->has_value) { |
204 | +static inline Int128 atomic16_read(Int128 *ptr) | 121 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) |
122 | argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); | ||
123 | |||
124 | /* Use the canonical unsigned helpers and minimize icache usage. */ | ||
125 | - tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]); | ||
126 | + tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); | ||
127 | |||
128 | datalo = lb->datalo_reg; | ||
129 | datahi = lb->datahi_reg; | ||
130 | diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc | ||
131 | index XXXXXXX..XXXXXXX 100644 | ||
132 | --- a/tcg/i386/tcg-target.c.inc | ||
133 | +++ b/tcg/i386/tcg-target.c.inc | ||
134 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_branch(TCGContext *s, int call, const tcg_insn_unit *dest) | ||
135 | } | ||
136 | } | ||
137 | |||
138 | -static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest) | ||
139 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest, | ||
140 | + const TCGHelperInfo *info) | ||
141 | { | ||
142 | tcg_out_branch(s, 1, dest); | ||
143 | } | ||
144 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | ||
145 | (uintptr_t)l->raddr); | ||
146 | } | ||
147 | |||
148 | - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); | ||
149 | + tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); | ||
150 | |||
151 | data_reg = l->datalo_reg; | ||
152 | switch (opc & MO_SSIZE) { | ||
153 | diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc | ||
154 | index XXXXXXX..XXXXXXX 100644 | ||
155 | --- a/tcg/loongarch64/tcg-target.c.inc | ||
156 | +++ b/tcg/loongarch64/tcg-target.c.inc | ||
157 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) | ||
158 | } | ||
159 | } | ||
160 | |||
161 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) | ||
162 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg, | ||
163 | + const TCGHelperInfo *info) | ||
164 | { | ||
165 | tcg_out_call_int(s, arg, false); | ||
166 | } | ||
167 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | ||
168 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi); | ||
169 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr); | ||
170 | |||
171 | - tcg_out_call(s, qemu_ld_helpers[size]); | ||
172 | + tcg_out_call_int(s, qemu_ld_helpers[size], false); | ||
173 | |||
174 | switch (opc & MO_SSIZE) { | ||
175 | case MO_SB: | ||
176 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | ||
177 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi); | ||
178 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr); | ||
179 | |||
180 | - tcg_out_call(s, qemu_st_helpers[size]); | ||
181 | + tcg_out_call_int(s, qemu_st_helpers[size], false); | ||
182 | |||
183 | return tcg_out_goto(s, l->raddr); | ||
184 | } | ||
185 | diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc | ||
186 | index XXXXXXX..XXXXXXX 100644 | ||
187 | --- a/tcg/mips/tcg-target.c.inc | ||
188 | +++ b/tcg/mips/tcg-target.c.inc | ||
189 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) | ||
190 | } | ||
191 | } | ||
192 | |||
193 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) | ||
194 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg, | ||
195 | + const TCGHelperInfo *info) | ||
196 | { | ||
197 | tcg_out_call_int(s, arg, false); | ||
198 | tcg_out_nop(s); | ||
199 | diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc | ||
200 | index XXXXXXX..XXXXXXX 100644 | ||
201 | --- a/tcg/ppc/tcg-target.c.inc | ||
202 | +++ b/tcg/ppc/tcg-target.c.inc | ||
203 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, int lk, | ||
204 | #endif | ||
205 | } | ||
206 | |||
207 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) | ||
208 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, | ||
209 | + const TCGHelperInfo *info) | ||
210 | { | ||
211 | tcg_out_call_int(s, LK, target); | ||
212 | } | ||
213 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
214 | tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); | ||
215 | tcg_out32(s, MFSPR | RT(arg) | LR); | ||
216 | |||
217 | - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); | ||
218 | + tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); | ||
219 | |||
220 | lo = lb->datalo_reg; | ||
221 | hi = lb->datahi_reg; | ||
222 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
223 | tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); | ||
224 | tcg_out32(s, MFSPR | RT(arg) | LR); | ||
225 | |||
226 | - tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); | ||
227 | + tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); | ||
228 | |||
229 | tcg_out_b(s, 0, lb->raddr); | ||
230 | return true; | ||
231 | diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc | ||
232 | index XXXXXXX..XXXXXXX 100644 | ||
233 | --- a/tcg/riscv/tcg-target.c.inc | ||
234 | +++ b/tcg/riscv/tcg-target.c.inc | ||
235 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) | ||
236 | } | ||
237 | } | ||
238 | |||
239 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) | ||
240 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg, | ||
241 | + const TCGHelperInfo *info) | ||
242 | { | ||
243 | tcg_out_call_int(s, arg, false); | ||
244 | } | ||
245 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | ||
246 | tcg_out_movi(s, TCG_TYPE_PTR, a2, oi); | ||
247 | tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr); | ||
248 | |||
249 | - tcg_out_call(s, qemu_ld_helpers[opc & MO_SSIZE]); | ||
250 | + tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false); | ||
251 | tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0); | ||
252 | |||
253 | tcg_out_goto(s, l->raddr); | ||
254 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | ||
255 | tcg_out_movi(s, TCG_TYPE_PTR, a3, oi); | ||
256 | tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr); | ||
257 | |||
258 | - tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]); | ||
259 | + tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false); | ||
260 | |||
261 | tcg_out_goto(s, l->raddr); | ||
262 | return true; | ||
263 | diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc | ||
264 | index XXXXXXX..XXXXXXX 100644 | ||
265 | --- a/tcg/s390x/tcg-target.c.inc | ||
266 | +++ b/tcg/s390x/tcg-target.c.inc | ||
267 | @@ -XXX,XX +XXX,XX @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, | ||
268 | tgen_branch(s, cc, l); | ||
269 | } | ||
270 | |||
271 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest) | ||
272 | +static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest) | ||
273 | { | ||
274 | ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1; | ||
275 | if (off == (int32_t)off) { | ||
276 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest) | ||
277 | } | ||
278 | } | ||
279 | |||
280 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest, | ||
281 | + const TCGHelperInfo *info) | ||
205 | +{ | 282 | +{ |
206 | + uint64_t l, h; | 283 | + tcg_out_call_int(s, dest); |
207 | + uint32_t tmp; | ||
208 | + | ||
209 | + /* The load must be paired with the store to guarantee not tearing. */ | ||
210 | + asm("0: ldxp %[l], %[h], %[mem]\n\t" | ||
211 | + "stxp %w[tmp], %[l], %[h], %[mem]\n\t" | ||
212 | + "cbnz %w[tmp], 0b" | ||
213 | + : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h)); | ||
214 | + | ||
215 | + return int128_make128(l, h); | ||
216 | +} | 284 | +} |
217 | + | 285 | + |
218 | +static inline void atomic16_set(Int128 *ptr, Int128 val) | 286 | static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data, |
219 | +{ | 287 | TCGReg base, TCGReg index, int disp) |
220 | + uint64_t l = int128_getlo(val), h = int128_gethi(val); | 288 | { |
221 | + uint64_t t1, t2; | 289 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) |
222 | + | 290 | } |
223 | + /* Load into temporaries to acquire the exclusive access lock. */ | 291 | tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi); |
224 | + asm("0: ldxp %[t1], %[t2], %[mem]\n\t" | 292 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr); |
225 | + "stxp %w[t1], %[l], %[h], %[mem]\n\t" | 293 | - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); |
226 | + "cbnz %w[t1], 0b" | 294 | + tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); |
227 | + : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2) | 295 | tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); |
228 | + : [l] "r"(l), [h] "r"(h)); | 296 | |
229 | +} | 297 | tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); |
230 | + | 298 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) |
231 | +# define HAVE_ATOMIC128 1 | 299 | } |
232 | +#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128 | 300 | tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi); |
233 | +static inline Int128 atomic16_read(Int128 *ptr) | 301 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr); |
234 | +{ | 302 | - tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); |
235 | + /* Maybe replace 0 with 0, returning the old value. */ | 303 | + tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); |
236 | + return atomic16_cmpxchg(ptr, 0, 0); | 304 | |
237 | +} | 305 | tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); |
238 | + | 306 | return true; |
239 | +static inline void atomic16_set(Int128 *ptr, Int128 val) | 307 | diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc |
240 | +{ | 308 | index XXXXXXX..XXXXXXX 100644 |
241 | + Int128 old = *ptr, cmp; | 309 | --- a/tcg/sparc64/tcg-target.c.inc |
242 | + do { | 310 | +++ b/tcg/sparc64/tcg-target.c.inc |
243 | + cmp = old; | 311 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest, |
244 | + old = atomic16_cmpxchg(ptr, cmp, val); | 312 | } |
245 | + } while (old != cmp); | 313 | } |
246 | +} | 314 | |
247 | + | 315 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest) |
248 | +# define HAVE_ATOMIC128 1 | 316 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest, |
249 | +#else | 317 | + const TCGHelperInfo *info) |
250 | +/* Fallback definitions that must be optimized away, or error. */ | 318 | { |
251 | +Int128 __attribute__((error("unsupported atomic"))) | 319 | tcg_out_call_nodelay(s, dest, false); |
252 | + atomic16_read(Int128 *ptr); | 320 | tcg_out_nop(s); |
253 | +void __attribute__((error("unsupported atomic"))) | 321 | diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc |
254 | + atomic16_set(Int128 *ptr, Int128 val); | 322 | index XXXXXXX..XXXXXXX 100644 |
255 | +# define HAVE_ATOMIC128 0 | 323 | --- a/tcg/tci/tcg-target.c.inc |
256 | +#endif /* Some definition for HAVE_ATOMIC128 */ | 324 | +++ b/tcg/tci/tcg-target.c.inc |
257 | + | 325 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, |
258 | +#endif /* QEMU_ATOMIC128_H */ | 326 | } |
259 | diff --git a/tcg/tcg.h b/tcg/tcg.h | 327 | |
260 | index XXXXXXX..XXXXXXX 100644 | 328 | static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func, |
261 | --- a/tcg/tcg.h | 329 | - ffi_cif *cif) |
262 | +++ b/tcg/tcg.h | 330 | + const TCGHelperInfo *info) |
263 | @@ -XXX,XX +XXX,XX @@ | 331 | { |
264 | #include "qemu/queue.h" | 332 | + ffi_cif *cif = info->cif; |
265 | #include "tcg-mo.h" | 333 | tcg_insn_unit insn = 0; |
266 | #include "tcg-target.h" | 334 | uint8_t which; |
267 | +#include "qemu/int128.h" | 335 | |
268 | |||
269 | /* XXX: make safe guess about sizes */ | ||
270 | #define MAX_OP_PER_INSTR 266 | ||
271 | @@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER_ALL(xchg) | ||
272 | #undef GEN_ATOMIC_HELPER | ||
273 | #endif /* CONFIG_SOFTMMU */ | ||
274 | |||
275 | -#ifdef CONFIG_ATOMIC128 | ||
276 | -#include "qemu/int128.h" | ||
277 | - | ||
278 | -/* These aren't really a "proper" helpers because TCG cannot manage Int128. | ||
279 | - However, use the same format as the others, for use by the backends. */ | ||
280 | +/* | ||
281 | + * These aren't really a "proper" helpers because TCG cannot manage Int128. | ||
282 | + * However, use the same format as the others, for use by the backends. | ||
283 | + * | ||
284 | + * The cmpxchg functions are only defined if HAVE_CMPXCHG128; | ||
285 | + * the ld/st functions are only defined if HAVE_ATOMIC128, | ||
286 | + * as defined by <qemu/atomic128.h>. | ||
287 | + */ | ||
288 | Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr, | ||
289 | Int128 cmpv, Int128 newv, | ||
290 | TCGMemOpIdx oi, uintptr_t retaddr); | ||
291 | @@ -XXX,XX +XXX,XX @@ void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val, | ||
292 | void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val, | ||
293 | TCGMemOpIdx oi, uintptr_t retaddr); | ||
294 | |||
295 | -#endif /* CONFIG_ATOMIC128 */ | ||
296 | - | ||
297 | #endif /* TCG_H */ | ||
298 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
299 | index XXXXXXX..XXXXXXX 100644 | ||
300 | --- a/accel/tcg/cputlb.c | ||
301 | +++ b/accel/tcg/cputlb.c | ||
302 | @@ -XXX,XX +XXX,XX @@ | ||
303 | #include "exec/log.h" | ||
304 | #include "exec/helper-proto.h" | ||
305 | #include "qemu/atomic.h" | ||
306 | +#include "qemu/atomic128.h" | ||
307 | |||
308 | /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ | ||
309 | /* #define DEBUG_TLB */ | ||
310 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, | ||
311 | #include "atomic_template.h" | ||
312 | #endif | ||
313 | |||
314 | -#ifdef CONFIG_ATOMIC128 | ||
315 | +#if HAVE_CMPXCHG128 || HAVE_ATOMIC128 | ||
316 | #define DATA_SIZE 16 | ||
317 | #include "atomic_template.h" | ||
318 | #endif | ||
319 | diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c | ||
320 | index XXXXXXX..XXXXXXX 100644 | ||
321 | --- a/accel/tcg/user-exec.c | ||
322 | +++ b/accel/tcg/user-exec.c | ||
323 | @@ -XXX,XX +XXX,XX @@ | ||
324 | #include "exec/cpu_ldst.h" | ||
325 | #include "translate-all.h" | ||
326 | #include "exec/helper-proto.h" | ||
327 | +#include "qemu/atomic128.h" | ||
328 | |||
329 | #undef EAX | ||
330 | #undef ECX | ||
331 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, | ||
332 | /* The following is only callable from other helpers, and matches up | ||
333 | with the softmmu version. */ | ||
334 | |||
335 | -#ifdef CONFIG_ATOMIC128 | ||
336 | +#if HAVE_ATOMIC128 || HAVE_CMPXCHG128 | ||
337 | |||
338 | #undef EXTRA_ARGS | ||
339 | #undef ATOMIC_NAME | ||
340 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, | ||
341 | |||
342 | #define DATA_SIZE 16 | ||
343 | #include "atomic_template.h" | ||
344 | -#endif /* CONFIG_ATOMIC128 */ | ||
345 | +#endif | ||
346 | diff --git a/configure b/configure | ||
347 | index XXXXXXX..XXXXXXX 100755 | ||
348 | --- a/configure | ||
349 | +++ b/configure | ||
350 | @@ -XXX,XX +XXX,XX @@ EOF | ||
351 | fi | ||
352 | fi | ||
353 | |||
354 | +cmpxchg128=no | ||
355 | +if test "$int128" = yes -a "$atomic128" = no; then | ||
356 | + cat > $TMPC << EOF | ||
357 | +int main(void) | ||
358 | +{ | ||
359 | + unsigned __int128 x = 0, y = 0; | ||
360 | + __sync_val_compare_and_swap_16(&x, y, x); | ||
361 | + return 0; | ||
362 | +} | ||
363 | +EOF | ||
364 | + if compile_prog "" "" ; then | ||
365 | + cmpxchg128=yes | ||
366 | + fi | ||
367 | +fi | ||
368 | + | ||
369 | ######################################### | ||
370 | # See if 64-bit atomic operations are supported. | ||
371 | # Note that without __atomic builtins, we can only | ||
372 | @@ -XXX,XX +XXX,XX @@ if test "$atomic128" = "yes" ; then | ||
373 | echo "CONFIG_ATOMIC128=y" >> $config_host_mak | ||
374 | fi | ||
375 | |||
376 | +if test "$cmpxchg128" = "yes" ; then | ||
377 | + echo "CONFIG_CMPXCHG128=y" >> $config_host_mak | ||
378 | +fi | ||
379 | + | ||
380 | if test "$atomic64" = "yes" ; then | ||
381 | echo "CONFIG_ATOMIC64=y" >> $config_host_mak | ||
382 | fi | ||
383 | -- | 336 | -- |
384 | 2.17.2 | 337 | 2.34.1 |
385 | 338 | ||
386 | 339 | diff view generated by jsdifflib |
1 | From: "Emilio G. Cota" <cota@braap.org> | 1 | When called from syscall(), we are not within a TB and pc == 0. |
---|---|---|---|
2 | We can skip the check for invalidating the current TB. | ||
2 | 3 | ||
3 | This plugs two 4-byte holes in 64-bit. | 4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
4 | |||
5 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
6 | Message-Id: <20181010144853.13005-4-cota@braap.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 6 | --- |
9 | tcg/tcg.h | 2 +- | 7 | accel/tcg/tb-maint.c | 78 ++++++++++++++++++++++++-------------------- |
10 | 1 file changed, 1 insertion(+), 1 deletion(-) | 8 | 1 file changed, 43 insertions(+), 35 deletions(-) |
11 | 9 | ||
12 | diff --git a/tcg/tcg.h b/tcg/tcg.h | 10 | diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c |
13 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/tcg/tcg.h | 12 | --- a/accel/tcg/tb-maint.c |
15 | +++ b/tcg/tcg.h | 13 | +++ b/accel/tcg/tb-maint.c |
16 | @@ -XXX,XX +XXX,XX @@ typedef struct TCGProfile { | 14 | @@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_page(tb_page_addr_t addr) |
17 | int64_t tb_count; | 15 | */ |
18 | int64_t op_count; /* total insn count */ | 16 | bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc) |
19 | int op_count_max; /* max insn per TB */ | 17 | { |
20 | - int64_t temp_count; | 18 | - assert(pc != 0); |
21 | int temp_count_max; | 19 | -#ifdef TARGET_HAS_PRECISE_SMC |
22 | + int64_t temp_count; | 20 | - assert_memory_lock(); |
23 | int64_t del_op_count; | 21 | - { |
24 | int64_t code_in_len; | 22 | - TranslationBlock *current_tb = tcg_tb_lookup(pc); |
25 | int64_t code_out_len; | 23 | - bool current_tb_modified = false; |
24 | - TranslationBlock *tb; | ||
25 | - PageForEachNext n; | ||
26 | + TranslationBlock *current_tb; | ||
27 | + bool current_tb_modified; | ||
28 | + TranslationBlock *tb; | ||
29 | + PageForEachNext n; | ||
30 | |||
31 | - addr &= TARGET_PAGE_MASK; | ||
32 | - | ||
33 | - PAGE_FOR_EACH_TB(addr, addr + TARGET_PAGE_SIZE, unused, tb, n) { | ||
34 | - if (current_tb == tb && | ||
35 | - (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) { | ||
36 | - /* | ||
37 | - * If we are modifying the current TB, we must stop its | ||
38 | - * execution. We could be more precise by checking that | ||
39 | - * the modification is after the current PC, but it would | ||
40 | - * require a specialized function to partially restore | ||
41 | - * the CPU state. | ||
42 | - */ | ||
43 | - current_tb_modified = true; | ||
44 | - cpu_restore_state_from_tb(current_cpu, current_tb, pc); | ||
45 | - } | ||
46 | - tb_phys_invalidate__locked(tb); | ||
47 | - } | ||
48 | - | ||
49 | - if (current_tb_modified) { | ||
50 | - /* Force execution of one insn next time. */ | ||
51 | - CPUState *cpu = current_cpu; | ||
52 | - cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu); | ||
53 | - return true; | ||
54 | - } | ||
55 | + /* | ||
56 | + * Without precise smc semantics, or when outside of a TB, | ||
57 | + * we can skip to invalidate. | ||
58 | + */ | ||
59 | +#ifndef TARGET_HAS_PRECISE_SMC | ||
60 | + pc = 0; | ||
61 | +#endif | ||
62 | + if (!pc) { | ||
63 | + tb_invalidate_phys_page(addr); | ||
64 | + return false; | ||
65 | + } | ||
66 | + | ||
67 | + assert_memory_lock(); | ||
68 | + current_tb = tcg_tb_lookup(pc); | ||
69 | + | ||
70 | + addr &= TARGET_PAGE_MASK; | ||
71 | + current_tb_modified = false; | ||
72 | + | ||
73 | + PAGE_FOR_EACH_TB(addr, addr + TARGET_PAGE_SIZE, unused, tb, n) { | ||
74 | + if (current_tb == tb && | ||
75 | + (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) { | ||
76 | + /* | ||
77 | + * If we are modifying the current TB, we must stop its | ||
78 | + * execution. We could be more precise by checking that | ||
79 | + * the modification is after the current PC, but it would | ||
80 | + * require a specialized function to partially restore | ||
81 | + * the CPU state. | ||
82 | + */ | ||
83 | + current_tb_modified = true; | ||
84 | + cpu_restore_state_from_tb(current_cpu, current_tb, pc); | ||
85 | + } | ||
86 | + tb_phys_invalidate__locked(tb); | ||
87 | + } | ||
88 | + | ||
89 | + if (current_tb_modified) { | ||
90 | + /* Force execution of one insn next time. */ | ||
91 | + CPUState *cpu = current_cpu; | ||
92 | + cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu); | ||
93 | + return true; | ||
94 | } | ||
95 | -#else | ||
96 | - tb_invalidate_phys_page(addr); | ||
97 | -#endif /* TARGET_HAS_PRECISE_SMC */ | ||
98 | return false; | ||
99 | } | ||
100 | #else | ||
26 | -- | 101 | -- |
27 | 2.17.2 | 102 | 2.34.1 |
28 | 103 | ||
29 | 104 | diff view generated by jsdifflib |
1 | Reviewed-by: Emilio G. Cota <cota@braap.org> | 1 | Because we allow lockless lookups, we have to be careful |
---|---|---|---|
2 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | 2 | when it is freed. Use rcu to delay the free until safe. |
3 | |||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | --- | 6 | --- |
5 | target/i386/mem_helper.c | 9 ++++----- | 7 | accel/tcg/user-exec.c | 18 ++++++++++-------- |
6 | 1 file changed, 4 insertions(+), 5 deletions(-) | 8 | 1 file changed, 10 insertions(+), 8 deletions(-) |
7 | 9 | ||
8 | diff --git a/target/i386/mem_helper.c b/target/i386/mem_helper.c | 10 | diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c |
9 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
10 | --- a/target/i386/mem_helper.c | 12 | --- a/accel/tcg/user-exec.c |
11 | +++ b/target/i386/mem_helper.c | 13 | +++ b/accel/tcg/user-exec.c |
12 | @@ -XXX,XX +XXX,XX @@ | 14 | @@ -XXX,XX +XXX,XX @@ |
13 | #include "exec/exec-all.h" | 15 | #include "exec/exec-all.h" |
16 | #include "tcg/tcg.h" | ||
17 | #include "qemu/bitops.h" | ||
18 | +#include "qemu/rcu.h" | ||
14 | #include "exec/cpu_ldst.h" | 19 | #include "exec/cpu_ldst.h" |
15 | #include "qemu/int128.h" | 20 | #include "exec/translate-all.h" |
16 | +#include "qemu/atomic128.h" | 21 | #include "exec/helper-proto.h" |
17 | #include "tcg.h" | 22 | @@ -XXX,XX +XXX,XX @@ bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set, |
18 | 23 | } | |
19 | void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0) | 24 | |
20 | @@ -XXX,XX +XXX,XX @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) | 25 | typedef struct PageFlagsNode { |
21 | 26 | + struct rcu_head rcu; | |
22 | if ((a0 & 0xf) != 0) { | 27 | IntervalTreeNode itree; |
23 | raise_exception_ra(env, EXCP0D_GPF, ra); | 28 | int flags; |
24 | - } else { | 29 | } PageFlagsNode; |
25 | -#ifndef CONFIG_ATOMIC128 | 30 | @@ -XXX,XX +XXX,XX @@ static bool pageflags_unset(target_ulong start, target_ulong last) |
26 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | 31 | } |
27 | -#else | 32 | } else if (p_last <= last) { |
28 | + } else if (HAVE_CMPXCHG128) { | 33 | /* Range completely covers node -- remove it. */ |
29 | int eflags = cpu_cc_compute_all(env, CC_OP); | 34 | - g_free(p); |
30 | 35 | + g_free_rcu(p, rcu); | |
31 | Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]); | 36 | } else { |
32 | @@ -XXX,XX +XXX,XX @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) | 37 | /* Truncate the node from the start. */ |
33 | eflags &= ~CC_Z; | 38 | p->itree.start = last + 1; |
39 | @@ -XXX,XX +XXX,XX @@ static void pageflags_create_merge(target_ulong start, target_ulong last, | ||
40 | if (prev) { | ||
41 | if (next) { | ||
42 | prev->itree.last = next->itree.last; | ||
43 | - g_free(next); | ||
44 | + g_free_rcu(next, rcu); | ||
45 | } else { | ||
46 | prev->itree.last = last; | ||
34 | } | 47 | } |
35 | CC_SRC = eflags; | 48 | @@ -XXX,XX +XXX,XX @@ static bool pageflags_set_clear(target_ulong start, target_ulong last, |
36 | -#endif | 49 | p->flags = merge_flags; |
37 | + } else { | 50 | } else { |
38 | + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | 51 | interval_tree_remove(&p->itree, &pageflags_root); |
52 | - g_free(p); | ||
53 | + g_free_rcu(p, rcu); | ||
54 | } | ||
55 | goto done; | ||
56 | } | ||
57 | @@ -XXX,XX +XXX,XX @@ static bool pageflags_set_clear(target_ulong start, target_ulong last, | ||
58 | p->flags = merge_flags; | ||
59 | } else { | ||
60 | interval_tree_remove(&p->itree, &pageflags_root); | ||
61 | - g_free(p); | ||
62 | + g_free_rcu(p, rcu); | ||
63 | } | ||
64 | if (p_last < last) { | ||
65 | start = p_last + 1; | ||
66 | @@ -XXX,XX +XXX,XX @@ static bool pageflags_set_clear(target_ulong start, target_ulong last, | ||
67 | p->itree.start = last + 1; | ||
68 | interval_tree_insert(&p->itree, &pageflags_root); | ||
69 | } else { | ||
70 | - g_free(p); | ||
71 | + g_free_rcu(p, rcu); | ||
72 | goto restart; | ||
73 | } | ||
74 | if (set_flags) { | ||
75 | @@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, | ||
76 | #define TBD_MASK (TARGET_PAGE_MASK * TPD_PAGES) | ||
77 | |||
78 | typedef struct TargetPageDataNode { | ||
79 | + struct rcu_head rcu; | ||
80 | IntervalTreeNode itree; | ||
81 | char data[TPD_PAGES][TARGET_PAGE_DATA_SIZE] __attribute__((aligned)); | ||
82 | } TargetPageDataNode; | ||
83 | @@ -XXX,XX +XXX,XX @@ void page_reset_target_data(target_ulong start, target_ulong end) | ||
84 | n = next, | ||
85 | next = next ? interval_tree_iter_next(n, start, last) : NULL) { | ||
86 | target_ulong n_start, n_last, p_ofs, p_len; | ||
87 | - TargetPageDataNode *t; | ||
88 | + TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree); | ||
89 | |||
90 | if (n->start >= start && n->last <= last) { | ||
91 | interval_tree_remove(n, &targetdata_root); | ||
92 | - g_free(n); | ||
93 | + g_free_rcu(t, rcu); | ||
94 | continue; | ||
95 | } | ||
96 | |||
97 | @@ -XXX,XX +XXX,XX @@ void page_reset_target_data(target_ulong start, target_ulong end) | ||
98 | n_last = MIN(last, n->last); | ||
99 | p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS; | ||
100 | |||
101 | - t = container_of(n, TargetPageDataNode, itree); | ||
102 | memset(t->data[p_ofs], 0, p_len * TARGET_PAGE_DATA_SIZE); | ||
39 | } | 103 | } |
40 | } | 104 | } |
41 | #endif | ||
42 | -- | 105 | -- |
43 | 2.17.2 | 106 | 2.34.1 |
44 | 107 | ||
45 | 108 | diff view generated by jsdifflib |
1 | Reviewed-by: David Hildenbrand <david@redhat.com> | 1 | As in page_get_flags, we need to try again with the mmap |
---|---|---|---|
2 | lock held if we fail a page lookup. | ||
3 | |||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
3 | --- | 6 | --- |
4 | target/s390x/mem_helper.c | 40 +++++++++++++++++++-------------------- | 7 | accel/tcg/user-exec.c | 41 ++++++++++++++++++++++++++++++++++------- |
5 | target/s390x/translate.c | 25 +++++++++++++++++------- | 8 | 1 file changed, 34 insertions(+), 7 deletions(-) |
6 | 2 files changed, 38 insertions(+), 27 deletions(-) | ||
7 | 9 | ||
8 | diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c | 10 | diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c |
9 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
10 | --- a/target/s390x/mem_helper.c | 12 | --- a/accel/tcg/user-exec.c |
11 | +++ b/target/s390x/mem_helper.c | 13 | +++ b/accel/tcg/user-exec.c |
12 | @@ -XXX,XX +XXX,XX @@ void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr, | 14 | @@ -XXX,XX +XXX,XX @@ void page_set_flags(target_ulong start, target_ulong end, int flags) |
13 | Int128 oldv; | 15 | int page_check_range(target_ulong start, target_ulong len, int flags) |
14 | bool fail; | ||
15 | |||
16 | - if (!HAVE_CMPXCHG128) { | ||
17 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
18 | - } | ||
19 | + assert(HAVE_CMPXCHG128); | ||
20 | |||
21 | mem_idx = cpu_mmu_index(env, false); | ||
22 | oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
23 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr) | ||
24 | { | 16 | { |
25 | uintptr_t ra = GETPC(); | 17 | target_ulong last; |
26 | uint64_t hi, lo; | 18 | + int locked; /* tri-state: =0: unlocked, +1: global, -1: local */ |
27 | + int mem_idx; | 19 | + int ret; |
28 | + TCGMemOpIdx oi; | 20 | |
29 | + Int128 v; | 21 | if (len == 0) { |
30 | 22 | return 0; /* trivial length */ | |
31 | - if (HAVE_ATOMIC128) { | 23 | @@ -XXX,XX +XXX,XX @@ int page_check_range(target_ulong start, target_ulong len, int flags) |
32 | - int mem_idx = cpu_mmu_index(env, false); | 24 | return -1; /* wrap around */ |
33 | - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | 25 | } |
34 | - Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); | 26 | |
35 | - hi = int128_gethi(v); | 27 | + locked = have_mmap_lock(); |
36 | - lo = int128_getlo(v); | 28 | while (true) { |
37 | - } else { | 29 | PageFlagsNode *p = pageflags_find(start, last); |
38 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | 30 | int missing; |
39 | - } | 31 | |
40 | + assert(HAVE_ATOMIC128); | 32 | if (!p) { |
33 | - return -1; /* entire region invalid */ | ||
34 | + if (!locked) { | ||
35 | + /* | ||
36 | + * Lockless lookups have false negatives. | ||
37 | + * Retry with the lock held. | ||
38 | + */ | ||
39 | + mmap_lock(); | ||
40 | + locked = -1; | ||
41 | + p = pageflags_find(start, last); | ||
42 | + } | ||
43 | + if (!p) { | ||
44 | + ret = -1; /* entire region invalid */ | ||
45 | + break; | ||
46 | + } | ||
47 | } | ||
48 | if (start < p->itree.start) { | ||
49 | - return -1; /* initial bytes invalid */ | ||
50 | + ret = -1; /* initial bytes invalid */ | ||
51 | + break; | ||
52 | } | ||
53 | |||
54 | missing = flags & ~p->flags; | ||
55 | if (missing & PAGE_READ) { | ||
56 | - return -1; /* page not readable */ | ||
57 | + ret = -1; /* page not readable */ | ||
58 | + break; | ||
59 | } | ||
60 | if (missing & PAGE_WRITE) { | ||
61 | if (!(p->flags & PAGE_WRITE_ORG)) { | ||
62 | - return -1; /* page not writable */ | ||
63 | + ret = -1; /* page not writable */ | ||
64 | + break; | ||
65 | } | ||
66 | /* Asking about writable, but has been protected: undo. */ | ||
67 | if (!page_unprotect(start, 0)) { | ||
68 | - return -1; | ||
69 | + ret = -1; | ||
70 | + break; | ||
71 | } | ||
72 | /* TODO: page_unprotect should take a range, not a single page. */ | ||
73 | if (last - start < TARGET_PAGE_SIZE) { | ||
74 | - return 0; /* ok */ | ||
75 | + ret = 0; /* ok */ | ||
76 | + break; | ||
77 | } | ||
78 | start += TARGET_PAGE_SIZE; | ||
79 | continue; | ||
80 | } | ||
81 | |||
82 | if (last <= p->itree.last) { | ||
83 | - return 0; /* ok */ | ||
84 | + ret = 0; /* ok */ | ||
85 | + break; | ||
86 | } | ||
87 | start = p->itree.last + 1; | ||
88 | } | ||
41 | + | 89 | + |
42 | + mem_idx = cpu_mmu_index(env, false); | 90 | + /* Release the lock if acquired locally. */ |
43 | + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | 91 | + if (locked < 0) { |
44 | + v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); | 92 | + mmap_unlock(); |
45 | + hi = int128_gethi(v); | 93 | + } |
46 | + lo = int128_getlo(v); | ||
47 | |||
48 | env->retxl = lo; | ||
49 | return hi; | ||
50 | @@ -XXX,XX +XXX,XX @@ void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr, | ||
51 | uint64_t low, uint64_t high) | ||
52 | { | ||
53 | uintptr_t ra = GETPC(); | ||
54 | + int mem_idx; | ||
55 | + TCGMemOpIdx oi; | ||
56 | + Int128 v; | ||
57 | |||
58 | - if (HAVE_ATOMIC128) { | ||
59 | - int mem_idx = cpu_mmu_index(env, false); | ||
60 | - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
61 | - Int128 v = int128_make128(low, high); | ||
62 | - helper_atomic_sto_be_mmu(env, addr, v, oi, ra); | ||
63 | - } else { | ||
64 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
65 | - } | ||
66 | + assert(HAVE_ATOMIC128); | ||
67 | + | ||
68 | + mem_idx = cpu_mmu_index(env, false); | ||
69 | + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
70 | + v = int128_make128(low, high); | ||
71 | + helper_atomic_sto_be_mmu(env, addr, v, oi, ra); | ||
72 | } | ||
73 | |||
74 | /* Execute instruction. This instruction executes an insn modified with | ||
75 | diff --git a/target/s390x/translate.c b/target/s390x/translate.c | ||
76 | index XXXXXXX..XXXXXXX 100644 | ||
77 | --- a/target/s390x/translate.c | ||
78 | +++ b/target/s390x/translate.c | ||
79 | @@ -XXX,XX +XXX,XX @@ | ||
80 | #include "trace-tcg.h" | ||
81 | #include "exec/translator.h" | ||
82 | #include "exec/log.h" | ||
83 | +#include "qemu/atomic128.h" | ||
84 | |||
85 | |||
86 | /* Information that (most) every instruction needs to manipulate. */ | ||
87 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o) | ||
88 | int r3 = get_field(s->fields, r3); | ||
89 | int d2 = get_field(s->fields, d2); | ||
90 | int b2 = get_field(s->fields, b2); | ||
91 | + DisasJumpType ret = DISAS_NEXT; | ||
92 | TCGv_i64 addr; | ||
93 | TCGv_i32 t_r1, t_r3; | ||
94 | |||
95 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o) | ||
96 | addr = get_address(s, 0, b2, d2); | ||
97 | t_r1 = tcg_const_i32(r1); | ||
98 | t_r3 = tcg_const_i32(r3); | ||
99 | - if (tb_cflags(s->base.tb) & CF_PARALLEL) { | ||
100 | + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { | ||
101 | + gen_helper_cdsg(cpu_env, addr, t_r1, t_r3); | ||
102 | + } else if (HAVE_CMPXCHG128) { | ||
103 | gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3); | ||
104 | } else { | ||
105 | - gen_helper_cdsg(cpu_env, addr, t_r1, t_r3); | ||
106 | + gen_helper_exit_atomic(cpu_env); | ||
107 | + ret = DISAS_NORETURN; | ||
108 | } | ||
109 | tcg_temp_free_i64(addr); | ||
110 | tcg_temp_free_i32(t_r1); | ||
111 | tcg_temp_free_i32(t_r3); | ||
112 | |||
113 | set_cc_static(s); | ||
114 | - return DISAS_NEXT; | ||
115 | + return ret; | 94 | + return ret; |
116 | } | 95 | } |
117 | 96 | ||
118 | static DisasJumpType op_csst(DisasContext *s, DisasOps *o) | 97 | void page_protect(tb_page_addr_t address) |
119 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lpd(DisasContext *s, DisasOps *o) | ||
120 | |||
121 | static DisasJumpType op_lpq(DisasContext *s, DisasOps *o) | ||
122 | { | ||
123 | - if (tb_cflags(s->base.tb) & CF_PARALLEL) { | ||
124 | + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { | ||
125 | + gen_helper_lpq(o->out, cpu_env, o->in2); | ||
126 | + } else if (HAVE_ATOMIC128) { | ||
127 | gen_helper_lpq_parallel(o->out, cpu_env, o->in2); | ||
128 | } else { | ||
129 | - gen_helper_lpq(o->out, cpu_env, o->in2); | ||
130 | + gen_helper_exit_atomic(cpu_env); | ||
131 | + return DISAS_NORETURN; | ||
132 | } | ||
133 | return_low128(o->out2); | ||
134 | return DISAS_NEXT; | ||
135 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType op_stmh(DisasContext *s, DisasOps *o) | ||
136 | |||
137 | static DisasJumpType op_stpq(DisasContext *s, DisasOps *o) | ||
138 | { | ||
139 | - if (tb_cflags(s->base.tb) & CF_PARALLEL) { | ||
140 | + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { | ||
141 | + gen_helper_stpq(cpu_env, o->in2, o->out2, o->out); | ||
142 | + } else if (HAVE_ATOMIC128) { | ||
143 | gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out); | ||
144 | } else { | ||
145 | - gen_helper_stpq(cpu_env, o->in2, o->out2, o->out); | ||
146 | + gen_helper_exit_atomic(cpu_env); | ||
147 | + return DISAS_NORETURN; | ||
148 | } | ||
149 | return DISAS_NEXT; | ||
150 | } | ||
151 | -- | 98 | -- |
152 | 2.17.2 | 99 | 2.34.1 |
153 | 100 | ||
154 | 101 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Ilya Leoshkevich <iii@linux.ibm.com> | |
2 | |||
3 | Add a test that locklessly changes and exercises page protection bits | ||
4 | from various threads. This helps catch race conditions in the VMA | ||
5 | handling. | ||
6 | |||
7 | Acked-by: Alex Bennée <alex.bennee@linaro.org> | ||
8 | Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> | ||
9 | Message-Id: <20221223120252.513319-1-iii@linux.ibm.com> | ||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | --- | ||
12 | tests/tcg/multiarch/nop_func.h | 25 ++++ | ||
13 | tests/tcg/multiarch/munmap-pthread.c | 16 +-- | ||
14 | tests/tcg/multiarch/vma-pthread.c | 207 +++++++++++++++++++++++++++ | ||
15 | tests/tcg/multiarch/Makefile.target | 3 + | ||
16 | 4 files changed, 236 insertions(+), 15 deletions(-) | ||
17 | create mode 100644 tests/tcg/multiarch/nop_func.h | ||
18 | create mode 100644 tests/tcg/multiarch/vma-pthread.c | ||
19 | |||
20 | diff --git a/tests/tcg/multiarch/nop_func.h b/tests/tcg/multiarch/nop_func.h | ||
21 | new file mode 100644 | ||
22 | index XXXXXXX..XXXXXXX | ||
23 | --- /dev/null | ||
24 | +++ b/tests/tcg/multiarch/nop_func.h | ||
25 | @@ -XXX,XX +XXX,XX @@ | ||
26 | +/* | ||
27 | + * No-op functions that can be safely copied. | ||
28 | + * | ||
29 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
30 | + */ | ||
31 | +#ifndef NOP_FUNC_H | ||
32 | +#define NOP_FUNC_H | ||
33 | + | ||
34 | +static const char nop_func[] = { | ||
35 | +#if defined(__aarch64__) | ||
36 | + 0xc0, 0x03, 0x5f, 0xd6, /* ret */ | ||
37 | +#elif defined(__alpha__) | ||
38 | + 0x01, 0x80, 0xFA, 0x6B, /* ret */ | ||
39 | +#elif defined(__arm__) | ||
40 | + 0x1e, 0xff, 0x2f, 0xe1, /* bx lr */ | ||
41 | +#elif defined(__riscv) | ||
42 | + 0x67, 0x80, 0x00, 0x00, /* ret */ | ||
43 | +#elif defined(__s390__) | ||
44 | + 0x07, 0xfe, /* br %r14 */ | ||
45 | +#elif defined(__i386__) || defined(__x86_64__) | ||
46 | + 0xc3, /* ret */ | ||
47 | +#endif | ||
48 | +}; | ||
49 | + | ||
50 | +#endif | ||
51 | diff --git a/tests/tcg/multiarch/munmap-pthread.c b/tests/tcg/multiarch/munmap-pthread.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/tests/tcg/multiarch/munmap-pthread.c | ||
54 | +++ b/tests/tcg/multiarch/munmap-pthread.c | ||
55 | @@ -XXX,XX +XXX,XX @@ | ||
56 | #include <sys/mman.h> | ||
57 | #include <unistd.h> | ||
58 | |||
59 | -static const char nop_func[] = { | ||
60 | -#if defined(__aarch64__) | ||
61 | - 0xc0, 0x03, 0x5f, 0xd6, /* ret */ | ||
62 | -#elif defined(__alpha__) | ||
63 | - 0x01, 0x80, 0xFA, 0x6B, /* ret */ | ||
64 | -#elif defined(__arm__) | ||
65 | - 0x1e, 0xff, 0x2f, 0xe1, /* bx lr */ | ||
66 | -#elif defined(__riscv) | ||
67 | - 0x67, 0x80, 0x00, 0x00, /* ret */ | ||
68 | -#elif defined(__s390__) | ||
69 | - 0x07, 0xfe, /* br %r14 */ | ||
70 | -#elif defined(__i386__) || defined(__x86_64__) | ||
71 | - 0xc3, /* ret */ | ||
72 | -#endif | ||
73 | -}; | ||
74 | +#include "nop_func.h" | ||
75 | |||
76 | static void *thread_mmap_munmap(void *arg) | ||
77 | { | ||
78 | diff --git a/tests/tcg/multiarch/vma-pthread.c b/tests/tcg/multiarch/vma-pthread.c | ||
79 | new file mode 100644 | ||
80 | index XXXXXXX..XXXXXXX | ||
81 | --- /dev/null | ||
82 | +++ b/tests/tcg/multiarch/vma-pthread.c | ||
83 | @@ -XXX,XX +XXX,XX @@ | ||
84 | +/* | ||
85 | + * Test that VMA updates do not race. | ||
86 | + * | ||
87 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
88 | + * | ||
89 | + * Map a contiguous chunk of RWX memory. Split it into 8 equally sized | ||
90 | + * regions, each of which is guaranteed to have a certain combination of | ||
91 | + * protection bits set. | ||
92 | + * | ||
93 | + * Reader, writer and executor threads perform the respective operations on | ||
94 | + * pages, which are guaranteed to have the respective protection bit set. | ||
95 | + * Two mutator threads change the non-fixed protection bits randomly. | ||
96 | + */ | ||
97 | +#include <assert.h> | ||
98 | +#include <fcntl.h> | ||
99 | +#include <pthread.h> | ||
100 | +#include <stdbool.h> | ||
101 | +#include <stdlib.h> | ||
102 | +#include <string.h> | ||
103 | +#include <stdio.h> | ||
104 | +#include <sys/mman.h> | ||
105 | +#include <unistd.h> | ||
106 | + | ||
107 | +#include "nop_func.h" | ||
108 | + | ||
109 | +#define PAGE_IDX_BITS 10 | ||
110 | +#define PAGE_COUNT (1 << PAGE_IDX_BITS) | ||
111 | +#define PAGE_IDX_MASK (PAGE_COUNT - 1) | ||
112 | +#define REGION_IDX_BITS 3 | ||
113 | +#define PAGE_IDX_R_MASK (1 << 7) | ||
114 | +#define PAGE_IDX_W_MASK (1 << 8) | ||
115 | +#define PAGE_IDX_X_MASK (1 << 9) | ||
116 | +#define REGION_MASK (PAGE_IDX_R_MASK | PAGE_IDX_W_MASK | PAGE_IDX_X_MASK) | ||
117 | +#define PAGES_PER_REGION (1 << (PAGE_IDX_BITS - REGION_IDX_BITS)) | ||
118 | + | ||
119 | +struct context { | ||
120 | + int pagesize; | ||
121 | + char *ptr; | ||
122 | + int dev_null_fd; | ||
123 | + volatile int mutator_count; | ||
124 | +}; | ||
125 | + | ||
126 | +static void *thread_read(void *arg) | ||
127 | +{ | ||
128 | + struct context *ctx = arg; | ||
129 | + ssize_t sret; | ||
130 | + size_t i, j; | ||
131 | + int ret; | ||
132 | + | ||
133 | + for (i = 0; ctx->mutator_count; i++) { | ||
134 | + char *p; | ||
135 | + | ||
136 | + j = (i & PAGE_IDX_MASK) | PAGE_IDX_R_MASK; | ||
137 | + p = &ctx->ptr[j * ctx->pagesize]; | ||
138 | + | ||
139 | + /* Read directly. */ | ||
140 | + ret = memcmp(p, nop_func, sizeof(nop_func)); | ||
141 | + if (ret != 0) { | ||
142 | + fprintf(stderr, "fail direct read %p\n", p); | ||
143 | + abort(); | ||
144 | + } | ||
145 | + | ||
146 | + /* Read indirectly. */ | ||
147 | + sret = write(ctx->dev_null_fd, p, 1); | ||
148 | + if (sret != 1) { | ||
149 | + if (sret < 0) { | ||
150 | + fprintf(stderr, "fail indirect read %p (%m)\n", p); | ||
151 | + } else { | ||
152 | + fprintf(stderr, "fail indirect read %p (%zd)\n", p, sret); | ||
153 | + } | ||
154 | + abort(); | ||
155 | + } | ||
156 | + } | ||
157 | + | ||
158 | + return NULL; | ||
159 | +} | ||
160 | + | ||
161 | +static void *thread_write(void *arg) | ||
162 | +{ | ||
163 | + struct context *ctx = arg; | ||
164 | + struct timespec *ts; | ||
165 | + size_t i, j; | ||
166 | + int ret; | ||
167 | + | ||
168 | + for (i = 0; ctx->mutator_count; i++) { | ||
169 | + j = (i & PAGE_IDX_MASK) | PAGE_IDX_W_MASK; | ||
170 | + | ||
171 | + /* Write directly. */ | ||
172 | + memcpy(&ctx->ptr[j * ctx->pagesize], nop_func, sizeof(nop_func)); | ||
173 | + | ||
174 | + /* Write using a syscall. */ | ||
175 | + ts = (struct timespec *)(&ctx->ptr[(j + 1) * ctx->pagesize] - | ||
176 | + sizeof(struct timespec)); | ||
177 | + ret = clock_gettime(CLOCK_REALTIME, ts); | ||
178 | + if (ret != 0) { | ||
179 | + fprintf(stderr, "fail indirect write %p (%m)\n", ts); | ||
180 | + abort(); | ||
181 | + } | ||
182 | + } | ||
183 | + | ||
184 | + return NULL; | ||
185 | +} | ||
186 | + | ||
187 | +static void *thread_execute(void *arg) | ||
188 | +{ | ||
189 | + struct context *ctx = arg; | ||
190 | + size_t i, j; | ||
191 | + | ||
192 | + for (i = 0; ctx->mutator_count; i++) { | ||
193 | + j = (i & PAGE_IDX_MASK) | PAGE_IDX_X_MASK; | ||
194 | + ((void(*)(void))&ctx->ptr[j * ctx->pagesize])(); | ||
195 | + } | ||
196 | + | ||
197 | + return NULL; | ||
198 | +} | ||
199 | + | ||
200 | +static void *thread_mutate(void *arg) | ||
201 | +{ | ||
202 | + size_t i, start_idx, end_idx, page_idx, tmp; | ||
203 | + struct context *ctx = arg; | ||
204 | + unsigned int seed; | ||
205 | + int prot, ret; | ||
206 | + | ||
207 | + seed = (unsigned int)time(NULL); | ||
208 | + for (i = 0; i < 50000; i++) { | ||
209 | + start_idx = rand_r(&seed) & PAGE_IDX_MASK; | ||
210 | + end_idx = rand_r(&seed) & PAGE_IDX_MASK; | ||
211 | + if (start_idx > end_idx) { | ||
212 | + tmp = start_idx; | ||
213 | + start_idx = end_idx; | ||
214 | + end_idx = tmp; | ||
215 | + } | ||
216 | + prot = rand_r(&seed) & (PROT_READ | PROT_WRITE | PROT_EXEC); | ||
217 | + for (page_idx = start_idx & REGION_MASK; page_idx <= end_idx; | ||
218 | + page_idx += PAGES_PER_REGION) { | ||
219 | + if (page_idx & PAGE_IDX_R_MASK) { | ||
220 | + prot |= PROT_READ; | ||
221 | + } | ||
222 | + if (page_idx & PAGE_IDX_W_MASK) { | ||
223 | + /* FIXME: qemu syscalls check for both read+write. */ | ||
224 | + prot |= PROT_WRITE | PROT_READ; | ||
225 | + } | ||
226 | + if (page_idx & PAGE_IDX_X_MASK) { | ||
227 | + prot |= PROT_EXEC; | ||
228 | + } | ||
229 | + } | ||
230 | + ret = mprotect(&ctx->ptr[start_idx * ctx->pagesize], | ||
231 | + (end_idx - start_idx + 1) * ctx->pagesize, prot); | ||
232 | + assert(ret == 0); | ||
233 | + } | ||
234 | + | ||
235 | + __atomic_fetch_sub(&ctx->mutator_count, 1, __ATOMIC_SEQ_CST); | ||
236 | + | ||
237 | + return NULL; | ||
238 | +} | ||
239 | + | ||
240 | +int main(void) | ||
241 | +{ | ||
242 | + pthread_t threads[5]; | ||
243 | + struct context ctx; | ||
244 | + size_t i; | ||
245 | + int ret; | ||
246 | + | ||
247 | + /* Without a template, nothing to test. */ | ||
248 | + if (sizeof(nop_func) == 0) { | ||
249 | + return EXIT_SUCCESS; | ||
250 | + } | ||
251 | + | ||
252 | + /* Initialize memory chunk. */ | ||
253 | + ctx.pagesize = getpagesize(); | ||
254 | + ctx.ptr = mmap(NULL, PAGE_COUNT * ctx.pagesize, | ||
255 | + PROT_READ | PROT_WRITE | PROT_EXEC, | ||
256 | + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); | ||
257 | + assert(ctx.ptr != MAP_FAILED); | ||
258 | + for (i = 0; i < PAGE_COUNT; i++) { | ||
259 | + memcpy(&ctx.ptr[i * ctx.pagesize], nop_func, sizeof(nop_func)); | ||
260 | + } | ||
261 | + ctx.dev_null_fd = open("/dev/null", O_WRONLY); | ||
262 | + assert(ctx.dev_null_fd >= 0); | ||
263 | + ctx.mutator_count = 2; | ||
264 | + | ||
265 | + /* Start threads. */ | ||
266 | + ret = pthread_create(&threads[0], NULL, thread_read, &ctx); | ||
267 | + assert(ret == 0); | ||
268 | + ret = pthread_create(&threads[1], NULL, thread_write, &ctx); | ||
269 | + assert(ret == 0); | ||
270 | + ret = pthread_create(&threads[2], NULL, thread_execute, &ctx); | ||
271 | + assert(ret == 0); | ||
272 | + for (i = 3; i <= 4; i++) { | ||
273 | + ret = pthread_create(&threads[i], NULL, thread_mutate, &ctx); | ||
274 | + assert(ret == 0); | ||
275 | + } | ||
276 | + | ||
277 | + /* Wait for threads to stop. */ | ||
278 | + for (i = 0; i < sizeof(threads) / sizeof(threads[0]); i++) { | ||
279 | + ret = pthread_join(threads[i], NULL); | ||
280 | + assert(ret == 0); | ||
281 | + } | ||
282 | + | ||
283 | + /* Destroy memory chunk. */ | ||
284 | + ret = close(ctx.dev_null_fd); | ||
285 | + assert(ret == 0); | ||
286 | + ret = munmap(ctx.ptr, PAGE_COUNT * ctx.pagesize); | ||
287 | + assert(ret == 0); | ||
288 | + | ||
289 | + return EXIT_SUCCESS; | ||
290 | +} | ||
291 | diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target | ||
292 | index XXXXXXX..XXXXXXX 100644 | ||
293 | --- a/tests/tcg/multiarch/Makefile.target | ||
294 | +++ b/tests/tcg/multiarch/Makefile.target | ||
295 | @@ -XXX,XX +XXX,XX @@ signals: LDFLAGS+=-lrt -lpthread | ||
296 | munmap-pthread: CFLAGS+=-pthread | ||
297 | munmap-pthread: LDFLAGS+=-pthread | ||
298 | |||
299 | +vma-pthread: CFLAGS+=-pthread | ||
300 | +vma-pthread: LDFLAGS+=-pthread | ||
301 | + | ||
302 | # We define the runner for test-mmap after the individual | ||
303 | # architectures have defined their supported pages sizes. If no | ||
304 | # additional page sizes are defined we only run the default test. | ||
305 | -- | ||
306 | 2.34.1 | ||
307 | |||
308 | diff view generated by jsdifflib |