1 | The following changes since commit e18e5501d8ac692d32657a3e1ef545b14e72b730: | 1 | The following changes since commit 222059a0fccf4af3be776fe35a5ea2d6a68f9a0b: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/dgilbert-gitlab/tags/pull-virtiofs-20200210' into staging (2020-02-10 18:09:14 +0000) | 3 | Merge tag 'pull-ppc-20221221' of https://gitlab.com/danielhb/qemu into staging (2022-12-21 18:08:09 +0000) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://github.com/rth7680/qemu.git tags/pull-tcg-20200212 | 7 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20221229 |
8 | 8 | ||
9 | for you to fetch changes up to 2445971604c1cfd3ec484457159f4ac300fb04d2: | 9 | for you to fetch changes up to b05e35533782a71a9fda472afd08442f50622a3e: |
10 | 10 | ||
11 | tcg: Add tcg_gen_gvec_5_ptr (2020-02-12 14:58:36 -0800) | 11 | tests/tcg/multiarch: add vma-pthread.c (2022-12-29 12:39:45 -0800) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Fix breakpoint invalidation. | 14 | Fix race conditions in new user-only vma tracking. |
15 | Add support for tcg helpers with 7 arguments. | 15 | Add tcg backend paired register allocation. |
16 | Add support for gvec helpers with 5 arguments. | 16 | Cleanup tcg backend function call abi. |
17 | 17 | ||
18 | ---------------------------------------------------------------- | 18 | ---------------------------------------------------------------- |
19 | Max Filippov (1): | 19 | Ilya Leoshkevich (1): |
20 | exec: flush CPU TB cache in breakpoint_invalidate | 20 | tests/tcg/multiarch: add vma-pthread.c |
21 | 21 | ||
22 | Richard Henderson (1): | 22 | Mark Cave-Ayland (1): |
23 | tcg: Add tcg_gen_gvec_5_ptr | 23 | tcg: convert tcg/README to rst |
24 | 24 | ||
25 | Taylor Simpson (1): | 25 | Philippe Mathieu-Daudé (5): |
26 | tcg: Add support for a helper with 7 arguments | 26 | tcg/s390x: Fix coding style |
27 | tcg: Massage process_op_defs() | ||
28 | tcg: Pass number of arguments to tcg_emit_op() / tcg_op_insert_*() | ||
29 | tcg: Convert typecode_to_ffi from array to function | ||
30 | tcg: Factor init_ffi_layouts() out of tcg_context_init() | ||
27 | 31 | ||
28 | include/exec/helper-gen.h | 13 +++++++++++++ | 32 | Richard Henderson (40): |
29 | include/exec/helper-head.h | 2 ++ | 33 | meson: Move CONFIG_TCG_INTERPRETER to config_host |
30 | include/exec/helper-proto.h | 6 ++++++ | 34 | tcg: Cleanup trailing whitespace |
31 | include/exec/helper-tcg.h | 7 +++++++ | 35 | qemu/main-loop: Introduce QEMU_IOTHREAD_LOCK_GUARD |
32 | include/tcg/tcg-op-gvec.h | 7 +++++++ | 36 | hw/mips: Use QEMU_IOTHREAD_LOCK_GUARD in cpu_mips_irq_request |
33 | exec.c | 15 +++++++-------- | 37 | target/ppc: Use QEMU_IOTHREAD_LOCK_GUARD in ppc_maybe_interrupt |
34 | tcg/tcg-op-gvec.c | 32 ++++++++++++++++++++++++++++++++ | 38 | target/ppc: Use QEMU_IOTHREAD_LOCK_GUARD in cpu_interrupt_exittb |
35 | 7 files changed, 74 insertions(+), 8 deletions(-) | 39 | target/riscv: Use QEMU_IOTHREAD_LOCK_GUARD in riscv_cpu_update_mip |
40 | hw/ppc: Use QEMU_IOTHREAD_LOCK_GUARD in ppc_set_irq | ||
41 | accel/tcg: Use QEMU_IOTHREAD_LOCK_GUARD in io_readx/io_writex | ||
42 | tcg: Tidy tcg_reg_alloc_op | ||
43 | tcg: Remove TCG_TARGET_STACK_GROWSUP | ||
44 | tci: MAX_OPC_PARAM_IARGS is no longer used | ||
45 | tcg: Fix tcg_reg_alloc_dup* | ||
46 | tcg: Centralize updates to reg_to_temp | ||
47 | tcg: Remove check_regs | ||
48 | tcg: Introduce paired register allocation | ||
49 | accel/tcg: Set cflags_next_tb in cpu_common_initfn | ||
50 | target/sparc: Avoid TCGV_{LOW,HIGH} | ||
51 | tcg: Move TCG_{LOW,HIGH} to tcg-internal.h | ||
52 | tcg: Add temp_subindex to TCGTemp | ||
53 | tcg: Simplify calls to temp_sync vs mem_coherent | ||
54 | tcg: Allocate TCGTemp pairs in host memory order | ||
55 | tcg: Move TCG_TYPE_COUNT outside enum | ||
56 | tcg: Introduce tcg_type_size | ||
57 | tcg: Introduce TCGCallReturnKind and TCGCallArgumentKind | ||
58 | tcg: Replace TCG_TARGET_CALL_ALIGN_ARGS with TCG_TARGET_CALL_ARG_I64 | ||
59 | tcg: Replace TCG_TARGET_EXTEND_ARGS with TCG_TARGET_CALL_ARG_I32 | ||
60 | tcg: Use TCG_CALL_ARG_EVEN for TCI special case | ||
61 | accel/tcg/plugin: Don't search for the function pointer index | ||
62 | accel/tcg/plugin: Avoid duplicate copy in copy_call | ||
63 | accel/tcg/plugin: Use copy_op in append_{udata,mem}_cb | ||
64 | tcg: Vary the allocation size for TCGOp | ||
65 | tcg: Use output_pref wrapper function | ||
66 | tcg: Reorg function calls | ||
67 | tcg: Move ffi_cif pointer into TCGHelperInfo | ||
68 | tcg/aarch64: Merge tcg_out_callr into tcg_out_call | ||
69 | tcg: Add TCGHelperInfo argument to tcg_out_call | ||
70 | accel/tcg: Fix tb_invalidate_phys_page_unwind | ||
71 | accel/tcg: Use g_free_rcu for user-exec interval trees | ||
72 | accel/tcg: Handle false negative lookup in page_check_range | ||
36 | 73 | ||
74 | docs/devel/atomics.rst | 2 + | ||
75 | docs/devel/index-tcg.rst | 1 + | ||
76 | docs/devel/tcg-ops.rst | 941 +++++++++++++++++++ | ||
77 | docs/devel/tcg.rst | 2 +- | ||
78 | meson.build | 4 +- | ||
79 | include/exec/helper-head.h | 2 +- | ||
80 | include/qemu/main-loop.h | 29 + | ||
81 | include/tcg/tcg-op.h | 35 +- | ||
82 | include/tcg/tcg.h | 96 +- | ||
83 | tcg/aarch64/tcg-target.h | 4 +- | ||
84 | tcg/arm/tcg-target.h | 4 +- | ||
85 | tcg/i386/tcg-target.h | 2 + | ||
86 | tcg/loongarch64/tcg-target.h | 3 +- | ||
87 | tcg/mips/tcg-target.h | 4 +- | ||
88 | tcg/riscv/tcg-target.h | 7 +- | ||
89 | tcg/s390x/tcg-target.h | 3 +- | ||
90 | tcg/sparc64/tcg-target.h | 3 +- | ||
91 | tcg/tcg-internal.h | 58 +- | ||
92 | tcg/tci/tcg-target.h | 7 + | ||
93 | tests/tcg/multiarch/nop_func.h | 25 + | ||
94 | accel/tcg/cputlb.c | 25 +- | ||
95 | accel/tcg/plugin-gen.c | 54 +- | ||
96 | accel/tcg/tb-maint.c | 78 +- | ||
97 | accel/tcg/user-exec.c | 59 +- | ||
98 | hw/core/cpu-common.c | 1 + | ||
99 | hw/mips/mips_int.c | 11 +- | ||
100 | hw/ppc/ppc.c | 10 +- | ||
101 | target/ppc/excp_helper.c | 11 +- | ||
102 | target/ppc/helper_regs.c | 14 +- | ||
103 | target/riscv/cpu_helper.c | 10 +- | ||
104 | target/sparc/translate.c | 21 +- | ||
105 | tcg/optimize.c | 10 +- | ||
106 | tcg/tcg-op-vec.c | 10 +- | ||
107 | tcg/tcg-op.c | 49 +- | ||
108 | tcg/tcg.c | 1658 +++++++++++++++++++++------------- | ||
109 | tcg/tci.c | 1 - | ||
110 | tests/tcg/multiarch/munmap-pthread.c | 16 +- | ||
111 | tests/tcg/multiarch/vma-pthread.c | 207 +++++ | ||
112 | tcg/aarch64/tcg-target.c.inc | 19 +- | ||
113 | tcg/arm/tcg-target.c.inc | 10 +- | ||
114 | tcg/i386/tcg-target.c.inc | 5 +- | ||
115 | tcg/loongarch64/tcg-target.c.inc | 7 +- | ||
116 | tcg/mips/tcg-target.c.inc | 3 +- | ||
117 | tcg/ppc/tcg-target.c.inc | 36 +- | ||
118 | tcg/riscv/tcg-target.c.inc | 7 +- | ||
119 | tcg/s390x/tcg-target.c.inc | 32 +- | ||
120 | tcg/sparc64/tcg-target.c.inc | 3 +- | ||
121 | tcg/tci/tcg-target.c.inc | 7 +- | ||
122 | tcg/README | 784 ---------------- | ||
123 | tests/tcg/multiarch/Makefile.target | 3 + | ||
124 | 50 files changed, 2630 insertions(+), 1763 deletions(-) | ||
125 | create mode 100644 docs/devel/tcg-ops.rst | ||
126 | create mode 100644 tests/tcg/multiarch/nop_func.h | ||
127 | create mode 100644 tests/tcg/multiarch/vma-pthread.c | ||
128 | delete mode 100644 tcg/README | ||
129 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> | ||
1 | 2 | ||
3 | Convert tcg/README to rst and move it to docs/devel as a new "TCG Intermediate | ||
4 | Representation" page. There are a few minor changes to improve the aesthetic | ||
5 | of the final output which are as follows: | ||
6 | |||
7 | - Rename the title from "Tiny Code Generator - Fabrice Bellard" to "TCG | ||
8 | Intermediate Representation" | ||
9 | |||
10 | - Remove the section numbering | ||
11 | |||
12 | - Add the missing parameters to the ssadd_vec operations in the "Host | ||
13 | vector operations" section | ||
14 | |||
15 | - Change the path to the Atomic Operations document to use a proper | ||
16 | reference | ||
17 | |||
18 | - Replace tcg/README in tcg.rst with a proper reference to the new document | ||
19 | |||
20 | Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> | ||
21 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
22 | Message-Id: <20221130100434.64207-2-mark.cave-ayland@ilande.co.uk> | ||
23 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
24 | --- | ||
25 | docs/devel/atomics.rst | 2 + | ||
26 | docs/devel/index-tcg.rst | 1 + | ||
27 | docs/devel/tcg-ops.rst | 941 +++++++++++++++++++++++++++++++++++++++ | ||
28 | docs/devel/tcg.rst | 2 +- | ||
29 | tcg/README | 784 -------------------------------- | ||
30 | 5 files changed, 945 insertions(+), 785 deletions(-) | ||
31 | create mode 100644 docs/devel/tcg-ops.rst | ||
32 | delete mode 100644 tcg/README | ||
33 | |||
34 | diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/docs/devel/atomics.rst | ||
37 | +++ b/docs/devel/atomics.rst | ||
38 | @@ -XXX,XX +XXX,XX @@ | ||
39 | +.. _atomics-ref: | ||
40 | + | ||
41 | ========================= | ||
42 | Atomic operations in QEMU | ||
43 | ========================= | ||
44 | diff --git a/docs/devel/index-tcg.rst b/docs/devel/index-tcg.rst | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/docs/devel/index-tcg.rst | ||
47 | +++ b/docs/devel/index-tcg.rst | ||
48 | @@ -XXX,XX +XXX,XX @@ are only implementing things for HW accelerated hypervisors. | ||
49 | :maxdepth: 2 | ||
50 | |||
51 | tcg | ||
52 | + tcg-ops | ||
53 | decodetree | ||
54 | multi-thread-tcg | ||
55 | tcg-icount | ||
56 | diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst | ||
57 | new file mode 100644 | ||
58 | index XXXXXXX..XXXXXXX | ||
59 | --- /dev/null | ||
60 | +++ b/docs/devel/tcg-ops.rst | ||
61 | @@ -XXX,XX +XXX,XX @@ | ||
62 | +.. _tcg-ops-ref: | ||
63 | + | ||
64 | +******************************* | ||
65 | +TCG Intermediate Representation | ||
66 | +******************************* | ||
67 | + | ||
68 | +Introduction | ||
69 | +============ | ||
70 | + | ||
71 | +TCG (Tiny Code Generator) began as a generic backend for a C | ||
72 | +compiler. It was simplified to be used in QEMU. It also has its roots | ||
73 | +in the QOP code generator written by Paul Brook. | ||
74 | + | ||
75 | +Definitions | ||
76 | +=========== | ||
77 | + | ||
78 | +TCG receives RISC-like *TCG ops* and performs some optimizations on them, | ||
79 | +including liveness analysis and trivial constant expression | ||
80 | +evaluation. TCG ops are then implemented in the host CPU back end, | ||
81 | +also known as the TCG target. | ||
82 | + | ||
83 | +The TCG *target* is the architecture for which we generate the | ||
84 | +code. It is of course not the same as the "target" of QEMU which is | ||
85 | +the emulated architecture. As TCG started as a generic C backend used | ||
86 | +for cross compiling, it is assumed that the TCG target is different | ||
87 | +from the host, although it is never the case for QEMU. | ||
88 | + | ||
89 | +In this document, we use *guest* to specify what architecture we are | ||
90 | +emulating; *target* always means the TCG target, the machine on which | ||
91 | +we are running QEMU. | ||
92 | + | ||
93 | +A TCG *function* corresponds to a QEMU Translated Block (TB). | ||
94 | + | ||
95 | +A TCG *temporary* is a variable only live in a basic block. Temporaries are allocated explicitly in each function. | ||
96 | + | ||
97 | +A TCG *local temporary* is a variable only live in a function. Local temporaries are allocated explicitly in each function. | ||
98 | + | ||
99 | +A TCG *global* is a variable which is live in all the functions | ||
100 | +(equivalent of a C global variable). They are defined before the | ||
101 | +functions defined. A TCG global can be a memory location (e.g. a QEMU | ||
102 | +CPU register), a fixed host register (e.g. the QEMU CPU state pointer) | ||
103 | +or a memory location which is stored in a register outside QEMU TBs | ||
104 | +(not implemented yet). | ||
105 | + | ||
106 | +A TCG *basic block* corresponds to a list of instructions terminated | ||
107 | +by a branch instruction. | ||
108 | + | ||
109 | +An operation with *undefined behavior* may result in a crash. | ||
110 | + | ||
111 | +An operation with *unspecified behavior* shall not crash. However, | ||
112 | +the result may be one of several possibilities so may be considered | ||
113 | +an *undefined result*. | ||
114 | + | ||
115 | +Intermediate representation | ||
116 | +=========================== | ||
117 | + | ||
118 | +Introduction | ||
119 | +------------ | ||
120 | + | ||
121 | +TCG instructions operate on variables which are temporaries, local | ||
122 | +temporaries or globals. TCG instructions and variables are strongly | ||
123 | +typed. Two types are supported: 32 bit integers and 64 bit | ||
124 | +integers. Pointers are defined as an alias to 32 bit or 64 bit | ||
125 | +integers depending on the TCG target word size. | ||
126 | + | ||
127 | +Each instruction has a fixed number of output variable operands, input | ||
128 | +variable operands and always constant operands. | ||
129 | + | ||
130 | +The notable exception is the call instruction which has a variable | ||
131 | +number of outputs and inputs. | ||
132 | + | ||
133 | +In the textual form, output operands usually come first, followed by | ||
134 | +input operands, followed by constant operands. The output type is | ||
135 | +included in the instruction name. Constants are prefixed with a '$'. | ||
136 | + | ||
137 | +.. code-block:: none | ||
138 | + | ||
139 | + add_i32 t0, t1, t2 /* (t0 <- t1 + t2) */ | ||
140 | + | ||
141 | + | ||
142 | +Assumptions | ||
143 | +----------- | ||
144 | + | ||
145 | +Basic blocks | ||
146 | +^^^^^^^^^^^^ | ||
147 | + | ||
148 | +* Basic blocks end after branches (e.g. brcond_i32 instruction), | ||
149 | + goto_tb and exit_tb instructions. | ||
150 | + | ||
151 | +* Basic blocks start after the end of a previous basic block, or at a | ||
152 | + set_label instruction. | ||
153 | + | ||
154 | +After the end of a basic block, the content of temporaries is | ||
155 | +destroyed, but local temporaries and globals are preserved. | ||
156 | + | ||
157 | +Floating point types | ||
158 | +^^^^^^^^^^^^^^^^^^^^ | ||
159 | + | ||
160 | +* Floating point types are not supported yet | ||
161 | + | ||
162 | +Pointers | ||
163 | +^^^^^^^^ | ||
164 | + | ||
165 | +* Depending on the TCG target, pointer size is 32 bit or 64 | ||
166 | + bit. The type ``TCG_TYPE_PTR`` is an alias to ``TCG_TYPE_I32`` or | ||
167 | + ``TCG_TYPE_I64``. | ||
168 | + | ||
169 | +Helpers | ||
170 | +^^^^^^^ | ||
171 | + | ||
172 | +* Using the tcg_gen_helper_x_y it is possible to call any function | ||
173 | + taking i32, i64 or pointer types. By default, before calling a helper, | ||
174 | + all globals are stored at their canonical location and it is assumed | ||
175 | + that the function can modify them. By default, the helper is allowed to | ||
176 | + modify the CPU state or raise an exception. | ||
177 | + | ||
178 | + This can be overridden using the following function modifiers: | ||
179 | + | ||
180 | + - ``TCG_CALL_NO_READ_GLOBALS`` means that the helper does not read globals, | ||
181 | + either directly or via an exception. They will not be saved to their | ||
182 | + canonical locations before calling the helper. | ||
183 | + | ||
184 | + - ``TCG_CALL_NO_WRITE_GLOBALS`` means that the helper does not modify any globals. | ||
185 | + They will only be saved to their canonical location before calling helpers, | ||
186 | + but they won't be reloaded afterwards. | ||
187 | + | ||
188 | + - ``TCG_CALL_NO_SIDE_EFFECTS`` means that the call to the function is removed if | ||
189 | + the return value is not used. | ||
190 | + | ||
191 | + Note that ``TCG_CALL_NO_READ_GLOBALS`` implies ``TCG_CALL_NO_WRITE_GLOBALS``. | ||
192 | + | ||
193 | + On some TCG targets (e.g. x86), several calling conventions are | ||
194 | + supported. | ||
195 | + | ||
196 | +Branches | ||
197 | +^^^^^^^^ | ||
198 | + | ||
199 | +* Use the instruction 'br' to jump to a label. | ||
200 | + | ||
201 | +Code Optimizations | ||
202 | +------------------ | ||
203 | + | ||
204 | +When generating instructions, you can count on at least the following | ||
205 | +optimizations: | ||
206 | + | ||
207 | +- Single instructions are simplified, e.g. | ||
208 | + | ||
209 | + .. code-block:: none | ||
210 | + | ||
211 | + and_i32 t0, t0, $0xffffffff | ||
212 | + | ||
213 | + is suppressed. | ||
214 | + | ||
215 | +- A liveness analysis is done at the basic block level. The | ||
216 | + information is used to suppress moves from a dead variable to | ||
217 | + another one. It is also used to remove instructions which compute | ||
218 | + dead results. The later is especially useful for condition code | ||
219 | + optimization in QEMU. | ||
220 | + | ||
221 | + In the following example: | ||
222 | + | ||
223 | + .. code-block:: none | ||
224 | + | ||
225 | + add_i32 t0, t1, t2 | ||
226 | + add_i32 t0, t0, $1 | ||
227 | + mov_i32 t0, $1 | ||
228 | + | ||
229 | + only the last instruction is kept. | ||
230 | + | ||
231 | + | ||
232 | +Instruction Reference | ||
233 | +===================== | ||
234 | + | ||
235 | +Function call | ||
236 | +------------- | ||
237 | + | ||
238 | +.. list-table:: | ||
239 | + | ||
240 | + * - call *<ret>* *<params>* ptr | ||
241 | + | ||
242 | + - | call function 'ptr' (pointer type) | ||
243 | + | | ||
244 | + | *<ret>* optional 32 bit or 64 bit return value | ||
245 | + | *<params>* optional 32 bit or 64 bit parameters | ||
246 | + | ||
247 | +Jumps/Labels | ||
248 | +------------ | ||
249 | + | ||
250 | +.. list-table:: | ||
251 | + | ||
252 | + * - set_label $label | ||
253 | + | ||
254 | + - | Define label 'label' at the current program point. | ||
255 | + | ||
256 | + * - br $label | ||
257 | + | ||
258 | + - | Jump to label. | ||
259 | + | ||
260 | + * - brcond_i32/i64 *t0*, *t1*, *cond*, *label* | ||
261 | + | ||
262 | + - | Conditional jump if *t0* *cond* *t1* is true. *cond* can be: | ||
263 | + | | ||
264 | + | ``TCG_COND_EQ`` | ||
265 | + | ``TCG_COND_NE`` | ||
266 | + | ``TCG_COND_LT /* signed */`` | ||
267 | + | ``TCG_COND_GE /* signed */`` | ||
268 | + | ``TCG_COND_LE /* signed */`` | ||
269 | + | ``TCG_COND_GT /* signed */`` | ||
270 | + | ``TCG_COND_LTU /* unsigned */`` | ||
271 | + | ``TCG_COND_GEU /* unsigned */`` | ||
272 | + | ``TCG_COND_LEU /* unsigned */`` | ||
273 | + | ``TCG_COND_GTU /* unsigned */`` | ||
274 | + | ||
275 | +Arithmetic | ||
276 | +---------- | ||
277 | + | ||
278 | +.. list-table:: | ||
279 | + | ||
280 | + * - add_i32/i64 *t0*, *t1*, *t2* | ||
281 | + | ||
282 | + - | *t0* = *t1* + *t2* | ||
283 | + | ||
284 | + * - sub_i32/i64 *t0*, *t1*, *t2* | ||
285 | + | ||
286 | + - | *t0* = *t1* - *t2* | ||
287 | + | ||
288 | + * - neg_i32/i64 *t0*, *t1* | ||
289 | + | ||
290 | + - | *t0* = -*t1* (two's complement) | ||
291 | + | ||
292 | + * - mul_i32/i64 *t0*, *t1*, *t2* | ||
293 | + | ||
294 | + - | *t0* = *t1* * *t2* | ||
295 | + | ||
296 | + * - div_i32/i64 *t0*, *t1*, *t2* | ||
297 | + | ||
298 | + - | *t0* = *t1* / *t2* (signed) | ||
299 | + | Undefined behavior if division by zero or overflow. | ||
300 | + | ||
301 | + * - divu_i32/i64 *t0*, *t1*, *t2* | ||
302 | + | ||
303 | + - | *t0* = *t1* / *t2* (unsigned) | ||
304 | + | Undefined behavior if division by zero. | ||
305 | + | ||
306 | + * - rem_i32/i64 *t0*, *t1*, *t2* | ||
307 | + | ||
308 | + - | *t0* = *t1* % *t2* (signed) | ||
309 | + | Undefined behavior if division by zero or overflow. | ||
310 | + | ||
311 | + * - remu_i32/i64 *t0*, *t1*, *t2* | ||
312 | + | ||
313 | + - | *t0* = *t1* % *t2* (unsigned) | ||
314 | + | Undefined behavior if division by zero. | ||
315 | + | ||
316 | + | ||
317 | +Logical | ||
318 | +------- | ||
319 | + | ||
320 | +.. list-table:: | ||
321 | + | ||
322 | + * - and_i32/i64 *t0*, *t1*, *t2* | ||
323 | + | ||
324 | + - | *t0* = *t1* & *t2* | ||
325 | + | ||
326 | + * - or_i32/i64 *t0*, *t1*, *t2* | ||
327 | + | ||
328 | + - | *t0* = *t1* | *t2* | ||
329 | + | ||
330 | + * - xor_i32/i64 *t0*, *t1*, *t2* | ||
331 | + | ||
332 | + - | *t0* = *t1* ^ *t2* | ||
333 | + | ||
334 | + * - not_i32/i64 *t0*, *t1* | ||
335 | + | ||
336 | + - | *t0* = ~\ *t1* | ||
337 | + | ||
338 | + * - andc_i32/i64 *t0*, *t1*, *t2* | ||
339 | + | ||
340 | + - | *t0* = *t1* & ~\ *t2* | ||
341 | + | ||
342 | + * - eqv_i32/i64 *t0*, *t1*, *t2* | ||
343 | + | ||
344 | + - | *t0* = ~(*t1* ^ *t2*), or equivalently, *t0* = *t1* ^ ~\ *t2* | ||
345 | + | ||
346 | + * - nand_i32/i64 *t0*, *t1*, *t2* | ||
347 | + | ||
348 | + - | *t0* = ~(*t1* & *t2*) | ||
349 | + | ||
350 | + * - nor_i32/i64 *t0*, *t1*, *t2* | ||
351 | + | ||
352 | + - | *t0* = ~(*t1* | *t2*) | ||
353 | + | ||
354 | + * - orc_i32/i64 *t0*, *t1*, *t2* | ||
355 | + | ||
356 | + - | *t0* = *t1* | ~\ *t2* | ||
357 | + | ||
358 | + * - clz_i32/i64 *t0*, *t1*, *t2* | ||
359 | + | ||
360 | + - | *t0* = *t1* ? clz(*t1*) : *t2* | ||
361 | + | ||
362 | + * - ctz_i32/i64 *t0*, *t1*, *t2* | ||
363 | + | ||
364 | + - | *t0* = *t1* ? ctz(*t1*) : *t2* | ||
365 | + | ||
366 | + * - ctpop_i32/i64 *t0*, *t1* | ||
367 | + | ||
368 | + - | *t0* = number of bits set in *t1* | ||
369 | + | | ||
370 | + | With *ctpop* short for "count population", matching | ||
371 | + | the function name used in ``include/qemu/host-utils.h``. | ||
372 | + | ||
373 | + | ||
374 | +Shifts/Rotates | ||
375 | +-------------- | ||
376 | + | ||
377 | +.. list-table:: | ||
378 | + | ||
379 | + * - shl_i32/i64 *t0*, *t1*, *t2* | ||
380 | + | ||
381 | + - | *t0* = *t1* << *t2* | ||
382 | + | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) | ||
383 | + | ||
384 | + * - shr_i32/i64 *t0*, *t1*, *t2* | ||
385 | + | ||
386 | + - | *t0* = *t1* >> *t2* (unsigned) | ||
387 | + | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) | ||
388 | + | ||
389 | + * - sar_i32/i64 *t0*, *t1*, *t2* | ||
390 | + | ||
391 | + - | *t0* = *t1* >> *t2* (signed) | ||
392 | + | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) | ||
393 | + | ||
394 | + * - rotl_i32/i64 *t0*, *t1*, *t2* | ||
395 | + | ||
396 | + - | Rotation of *t2* bits to the left | ||
397 | + | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) | ||
398 | + | ||
399 | + * - rotr_i32/i64 *t0*, *t1*, *t2* | ||
400 | + | ||
401 | + - | Rotation of *t2* bits to the right. | ||
402 | + | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) | ||
403 | + | ||
404 | + | ||
405 | +Misc | ||
406 | +---- | ||
407 | + | ||
408 | +.. list-table:: | ||
409 | + | ||
410 | + * - mov_i32/i64 *t0*, *t1* | ||
411 | + | ||
412 | + - | *t0* = *t1* | ||
413 | + | Move *t1* to *t0* (both operands must have the same type). | ||
414 | + | ||
415 | + * - ext8s_i32/i64 *t0*, *t1* | ||
416 | + | ||
417 | + ext8u_i32/i64 *t0*, *t1* | ||
418 | + | ||
419 | + ext16s_i32/i64 *t0*, *t1* | ||
420 | + | ||
421 | + ext16u_i32/i64 *t0*, *t1* | ||
422 | + | ||
423 | + ext32s_i64 *t0*, *t1* | ||
424 | + | ||
425 | + ext32u_i64 *t0*, *t1* | ||
426 | + | ||
427 | + - | 8, 16 or 32 bit sign/zero extension (both operands must have the same type) | ||
428 | + | ||
429 | + * - bswap16_i32/i64 *t0*, *t1*, *flags* | ||
430 | + | ||
431 | + - | 16 bit byte swap on the low bits of a 32/64 bit input. | ||
432 | + | | ||
433 | + | If *flags* & ``TCG_BSWAP_IZ``, then *t1* is known to be zero-extended from bit 15. | ||
434 | + | If *flags* & ``TCG_BSWAP_OZ``, then *t0* will be zero-extended from bit 15. | ||
435 | + | If *flags* & ``TCG_BSWAP_OS``, then *t0* will be sign-extended from bit 15. | ||
436 | + | | ||
437 | + | If neither ``TCG_BSWAP_OZ`` nor ``TCG_BSWAP_OS`` are set, then the bits of *t0* above bit 15 may contain any value. | ||
438 | + | ||
439 | + * - bswap32_i64 *t0*, *t1*, *flags* | ||
440 | + | ||
441 | + - | 32 bit byte swap on a 64-bit value. The flags are the same as for bswap16, | ||
442 | + except they apply from bit 31 instead of bit 15. | ||
443 | + | ||
444 | + * - bswap32_i32 *t0*, *t1*, *flags* | ||
445 | + | ||
446 | + bswap64_i64 *t0*, *t1*, *flags* | ||
447 | + | ||
448 | + - | 32/64 bit byte swap. The flags are ignored, but still present | ||
449 | + for consistency with the other bswap opcodes. | ||
450 | + | ||
451 | + * - discard_i32/i64 *t0* | ||
452 | + | ||
453 | + - | Indicate that the value of *t0* won't be used later. It is useful to | ||
454 | + force dead code elimination. | ||
455 | + | ||
456 | + * - deposit_i32/i64 *dest*, *t1*, *t2*, *pos*, *len* | ||
457 | + | ||
458 | + - | Deposit *t2* as a bitfield into *t1*, placing the result in *dest*. | ||
459 | + | | ||
460 | + | The bitfield is described by *pos*/*len*, which are immediate values: | ||
461 | + | | ||
462 | + | *len* - the length of the bitfield | ||
463 | + | *pos* - the position of the first bit, counting from the LSB | ||
464 | + | | ||
465 | + | For example, "deposit_i32 dest, t1, t2, 8, 4" indicates a 4-bit field | ||
466 | + at bit 8. This operation would be equivalent to | ||
467 | + | | ||
468 | + | *dest* = (*t1* & ~0x0f00) | ((*t2* << 8) & 0x0f00) | ||
469 | + | ||
470 | + * - extract_i32/i64 *dest*, *t1*, *pos*, *len* | ||
471 | + | ||
472 | + sextract_i32/i64 *dest*, *t1*, *pos*, *len* | ||
473 | + | ||
474 | + - | Extract a bitfield from *t1*, placing the result in *dest*. | ||
475 | + | | ||
476 | + | The bitfield is described by *pos*/*len*, which are immediate values, | ||
477 | + as above for deposit. For extract_*, the result will be extended | ||
478 | + to the left with zeros; for sextract_*, the result will be extended | ||
479 | + to the left with copies of the bitfield sign bit at *pos* + *len* - 1. | ||
480 | + | | ||
481 | + | For example, "sextract_i32 dest, t1, 8, 4" indicates a 4-bit field | ||
482 | + at bit 8. This operation would be equivalent to | ||
483 | + | | ||
484 | + | *dest* = (*t1* << 20) >> 28 | ||
485 | + | | ||
486 | + | (using an arithmetic right shift). | ||
487 | + | ||
488 | + * - extract2_i32/i64 *dest*, *t1*, *t2*, *pos* | ||
489 | + | ||
490 | + - | For N = {32,64}, extract an N-bit quantity from the concatenation | ||
491 | + of *t2*:*t1*, beginning at *pos*. The tcg_gen_extract2_{i32,i64} expander | ||
492 | + accepts 0 <= *pos* <= N as inputs. The backend code generator will | ||
493 | + not see either 0 or N as inputs for these opcodes. | ||
494 | + | ||
495 | + * - extrl_i64_i32 *t0*, *t1* | ||
496 | + | ||
497 | + - | For 64-bit hosts only, extract the low 32-bits of input *t1* and place it | ||
498 | + into 32-bit output *t0*. Depending on the host, this may be a simple move, | ||
499 | + or may require additional canonicalization. | ||
500 | + | ||
501 | + * - extrh_i64_i32 *t0*, *t1* | ||
502 | + | ||
503 | + - | For 64-bit hosts only, extract the high 32-bits of input *t1* and place it | ||
504 | + into 32-bit output *t0*. Depending on the host, this may be a simple shift, | ||
505 | + or may require additional canonicalization. | ||
506 | + | ||
507 | + | ||
508 | +Conditional moves | ||
509 | +----------------- | ||
510 | + | ||
511 | +.. list-table:: | ||
512 | + | ||
513 | + * - setcond_i32/i64 *dest*, *t1*, *t2*, *cond* | ||
514 | + | ||
515 | + - | *dest* = (*t1* *cond* *t2*) | ||
516 | + | | ||
517 | + | Set *dest* to 1 if (*t1* *cond* *t2*) is true, otherwise set to 0. | ||
518 | + | ||
519 | + * - movcond_i32/i64 *dest*, *c1*, *c2*, *v1*, *v2*, *cond* | ||
520 | + | ||
521 | + - | *dest* = (*c1* *cond* *c2* ? *v1* : *v2*) | ||
522 | + | | ||
523 | + | Set *dest* to *v1* if (*c1* *cond* *c2*) is true, otherwise set to *v2*. | ||
524 | + | ||
525 | + | ||
526 | +Type conversions | ||
527 | +---------------- | ||
528 | + | ||
529 | +.. list-table:: | ||
530 | + | ||
531 | + * - ext_i32_i64 *t0*, *t1* | ||
532 | + | ||
533 | + - | Convert *t1* (32 bit) to *t0* (64 bit) and does sign extension | ||
534 | + | ||
535 | + * - extu_i32_i64 *t0*, *t1* | ||
536 | + | ||
537 | + - | Convert *t1* (32 bit) to *t0* (64 bit) and does zero extension | ||
538 | + | ||
539 | + * - trunc_i64_i32 *t0*, *t1* | ||
540 | + | ||
541 | + - | Truncate *t1* (64 bit) to *t0* (32 bit) | ||
542 | + | ||
543 | + * - concat_i32_i64 *t0*, *t1*, *t2* | ||
544 | + | ||
545 | + - | Construct *t0* (64-bit) taking the low half from *t1* (32 bit) and the high half | ||
546 | + from *t2* (32 bit). | ||
547 | + | ||
548 | + * - concat32_i64 *t0*, *t1*, *t2* | ||
549 | + | ||
550 | + - | Construct *t0* (64-bit) taking the low half from *t1* (64 bit) and the high half | ||
551 | + from *t2* (64 bit). | ||
552 | + | ||
553 | + | ||
554 | +Load/Store | ||
555 | +---------- | ||
556 | + | ||
557 | +.. list-table:: | ||
558 | + | ||
559 | + * - ld_i32/i64 *t0*, *t1*, *offset* | ||
560 | + | ||
561 | + ld8s_i32/i64 *t0*, *t1*, *offset* | ||
562 | + | ||
563 | + ld8u_i32/i64 *t0*, *t1*, *offset* | ||
564 | + | ||
565 | + ld16s_i32/i64 *t0*, *t1*, *offset* | ||
566 | + | ||
567 | + ld16u_i32/i64 *t0*, *t1*, *offset* | ||
568 | + | ||
569 | + ld32s_i64 t0, *t1*, *offset* | ||
570 | + | ||
571 | + ld32u_i64 t0, *t1*, *offset* | ||
572 | + | ||
573 | + - | *t0* = read(*t1* + *offset*) | ||
574 | + | | ||
575 | + | Load 8, 16, 32 or 64 bits with or without sign extension from host memory. | ||
576 | + *offset* must be a constant. | ||
577 | + | ||
578 | + * - st_i32/i64 *t0*, *t1*, *offset* | ||
579 | + | ||
580 | + st8_i32/i64 *t0*, *t1*, *offset* | ||
581 | + | ||
582 | + st16_i32/i64 *t0*, *t1*, *offset* | ||
583 | + | ||
584 | + st32_i64 *t0*, *t1*, *offset* | ||
585 | + | ||
586 | + - | write(*t0*, *t1* + *offset*) | ||
587 | + | | ||
588 | + | Write 8, 16, 32 or 64 bits to host memory. | ||
589 | + | ||
590 | +All this opcodes assume that the pointed host memory doesn't correspond | ||
591 | +to a global. In the latter case the behaviour is unpredictable. | ||
592 | + | ||
593 | + | ||
594 | +Multiword arithmetic support | ||
595 | +---------------------------- | ||
596 | + | ||
597 | +.. list-table:: | ||
598 | + | ||
599 | + * - add2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high* | ||
600 | + | ||
601 | + sub2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high* | ||
602 | + | ||
603 | + - | Similar to add/sub, except that the double-word inputs *t1* and *t2* are | ||
604 | + formed from two single-word arguments, and the double-word output *t0* | ||
605 | + is returned in two single-word outputs. | ||
606 | + | ||
607 | + * - mulu2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2* | ||
608 | + | ||
609 | + - | Similar to mul, except two unsigned inputs *t1* and *t2* yielding the full | ||
610 | + double-word product *t0*. The latter is returned in two single-word outputs. | ||
611 | + | ||
612 | + * - muls2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2* | ||
613 | + | ||
614 | + - | Similar to mulu2, except the two inputs *t1* and *t2* are signed. | ||
615 | + | ||
616 | + * - mulsh_i32/i64 *t0*, *t1*, *t2* | ||
617 | + | ||
618 | + muluh_i32/i64 *t0*, *t1*, *t2* | ||
619 | + | ||
620 | + - | Provide the high part of a signed or unsigned multiply, respectively. | ||
621 | + | | ||
622 | + | If mulu2/muls2 are not provided by the backend, the tcg-op generator | ||
623 | + can obtain the same results by emitting a pair of opcodes, mul + muluh/mulsh. | ||
624 | + | ||
625 | + | ||
626 | +Memory Barrier support | ||
627 | +---------------------- | ||
628 | + | ||
629 | +.. list-table:: | ||
630 | + | ||
631 | + * - mb *<$arg>* | ||
632 | + | ||
633 | + - | Generate a target memory barrier instruction to ensure memory ordering | ||
634 | + as being enforced by a corresponding guest memory barrier instruction. | ||
635 | + | | ||
636 | + | The ordering enforced by the backend may be stricter than the ordering | ||
637 | + required by the guest. It cannot be weaker. This opcode takes a constant | ||
638 | + argument which is required to generate the appropriate barrier | ||
639 | + instruction. The backend should take care to emit the target barrier | ||
640 | + instruction only when necessary i.e., for SMP guests and when MTTCG is | ||
641 | + enabled. | ||
642 | + | | ||
643 | + | The guest translators should generate this opcode for all guest instructions | ||
644 | + which have ordering side effects. | ||
645 | + | | ||
646 | + | Please see :ref:`atomics-ref` for more information on memory barriers. | ||
647 | + | ||
648 | + | ||
649 | +64-bit guest on 32-bit host support | ||
650 | +----------------------------------- | ||
651 | + | ||
652 | +The following opcodes are internal to TCG. Thus they are to be implemented by | ||
653 | +32-bit host code generators, but are not to be emitted by guest translators. | ||
654 | +They are emitted as needed by inline functions within ``tcg-op.h``. | ||
655 | + | ||
656 | +.. list-table:: | ||
657 | + | ||
658 | + * - brcond2_i32 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *cond*, *label* | ||
659 | + | ||
660 | + - | Similar to brcond, except that the 64-bit values *t0* and *t1* | ||
661 | + are formed from two 32-bit arguments. | ||
662 | + | ||
663 | + * - setcond2_i32 *dest*, *t1_low*, *t1_high*, *t2_low*, *t2_high*, *cond* | ||
664 | + | ||
665 | + - | Similar to setcond, except that the 64-bit values *t1* and *t2* are | ||
666 | + formed from two 32-bit arguments. The result is a 32-bit value. | ||
667 | + | ||
668 | + | ||
669 | +QEMU specific operations | ||
670 | +------------------------ | ||
671 | + | ||
672 | +.. list-table:: | ||
673 | + | ||
674 | + * - exit_tb *t0* | ||
675 | + | ||
676 | + - | Exit the current TB and return the value *t0* (word type). | ||
677 | + | ||
678 | + * - goto_tb *index* | ||
679 | + | ||
680 | + - | Exit the current TB and jump to the TB index *index* (constant) if the | ||
681 | + current TB was linked to this TB. Otherwise execute the next | ||
682 | + instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued | ||
683 | + at most once with each slot index per TB. | ||
684 | + | ||
685 | + * - lookup_and_goto_ptr *tb_addr* | ||
686 | + | ||
687 | + - | Look up a TB address *tb_addr* and jump to it if valid. If not valid, | ||
688 | + jump to the TCG epilogue to go back to the exec loop. | ||
689 | + | | ||
690 | + | This operation is optional. If the TCG backend does not implement the | ||
691 | + goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0). | ||
692 | + | ||
693 | + * - qemu_ld_i32/i64 *t0*, *t1*, *flags*, *memidx* | ||
694 | + | ||
695 | + qemu_st_i32/i64 *t0*, *t1*, *flags*, *memidx* | ||
696 | + | ||
697 | + qemu_st8_i32 *t0*, *t1*, *flags*, *memidx* | ||
698 | + | ||
699 | + - | Load data at the guest address *t1* into *t0*, or store data in *t0* at guest | ||
700 | + address *t1*. The _i32/_i64 size applies to the size of the input/output | ||
701 | + register *t0* only. The address *t1* is always sized according to the guest, | ||
702 | + and the width of the memory operation is controlled by *flags*. | ||
703 | + | | ||
704 | + | Both *t0* and *t1* may be split into little-endian ordered pairs of registers | ||
705 | + if dealing with 64-bit quantities on a 32-bit host. | ||
706 | + | | ||
707 | + | The *memidx* selects the qemu tlb index to use (e.g. user or kernel access). | ||
708 | + The flags are the MemOp bits, selecting the sign, width, and endianness | ||
709 | + of the memory access. | ||
710 | + | | ||
711 | + | For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a | ||
712 | + 64-bit memory access specified in *flags*. | ||
713 | + | | ||
714 | + | For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of | ||
715 | + the memory operation is known to be 8-bit. This allows the backend to | ||
716 | + provide a different set of register constraints. | ||
717 | + | ||
718 | + | ||
719 | +Host vector operations | ||
720 | +---------------------- | ||
721 | + | ||
722 | +All of the vector ops have two parameters, ``TCGOP_VECL`` & ``TCGOP_VECE``. | ||
723 | +The former specifies the length of the vector in log2 64-bit units; the | ||
724 | +latter specifies the length of the element (if applicable) in log2 8-bit units. | ||
725 | +E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32. | ||
726 | + | ||
727 | +.. list-table:: | ||
728 | + | ||
729 | + * - mov_vec *v0*, *v1* | ||
730 | + ld_vec *v0*, *t1* | ||
731 | + st_vec *v0*, *t1* | ||
732 | + | ||
733 | + - | Move, load and store. | ||
734 | + | ||
735 | + * - dup_vec *v0*, *r1* | ||
736 | + | ||
737 | + - | Duplicate the low N bits of *r1* into VECL/VECE copies across *v0*. | ||
738 | + | ||
739 | + * - dupi_vec *v0*, *c* | ||
740 | + | ||
741 | + - | Similarly, for a constant. | ||
742 | + | Smaller values will be replicated to host register size by the expanders. | ||
743 | + | ||
744 | + * - dup2_vec *v0*, *r1*, *r2* | ||
745 | + | ||
746 | + - | Duplicate *r2*:*r1* into VECL/64 copies across *v0*. This opcode is | ||
747 | + only present for 32-bit hosts. | ||
748 | + | ||
749 | + * - add_vec *v0*, *v1*, *v2* | ||
750 | + | ||
751 | + - | *v0* = *v1* + *v2*, in elements across the vector. | ||
752 | + | ||
753 | + * - sub_vec *v0*, *v1*, *v2* | ||
754 | + | ||
755 | + - | Similarly, *v0* = *v1* - *v2*. | ||
756 | + | ||
757 | + * - mul_vec *v0*, *v1*, *v2* | ||
758 | + | ||
759 | + - | Similarly, *v0* = *v1* * *v2*. | ||
760 | + | ||
761 | + * - neg_vec *v0*, *v1* | ||
762 | + | ||
763 | + - | Similarly, *v0* = -*v1*. | ||
764 | + | ||
765 | + * - abs_vec *v0*, *v1* | ||
766 | + | ||
767 | + - | Similarly, *v0* = *v1* < 0 ? -*v1* : *v1*, in elements across the vector. | ||
768 | + | ||
769 | + * - smin_vec *v0*, *v1*, *v2* | ||
770 | + | ||
771 | + umin_vec *v0*, *v1*, *v2* | ||
772 | + | ||
773 | + - | Similarly, *v0* = MIN(*v1*, *v2*), for signed and unsigned element types. | ||
774 | + | ||
775 | + * - smax_vec *v0*, *v1*, *v2* | ||
776 | + | ||
777 | + umax_vec *v0*, *v1*, *v2* | ||
778 | + | ||
779 | + - | Similarly, *v0* = MAX(*v1*, *v2*), for signed and unsigned element types. | ||
780 | + | ||
781 | + * - ssadd_vec *v0*, *v1*, *v2* | ||
782 | + | ||
783 | + sssub_vec *v0*, *v1*, *v2* | ||
784 | + | ||
785 | + usadd_vec *v0*, *v1*, *v2* | ||
786 | + | ||
787 | + ussub_vec *v0*, *v1*, *v2* | ||
788 | + | ||
789 | + - | Signed and unsigned saturating addition and subtraction. | ||
790 | + | | ||
791 | + | If the true result is not representable within the element type, the | ||
792 | + element is set to the minimum or maximum value for the type. | ||
793 | + | ||
794 | + * - and_vec *v0*, *v1*, *v2* | ||
795 | + | ||
796 | + or_vec *v0*, *v1*, *v2* | ||
797 | + | ||
798 | + xor_vec *v0*, *v1*, *v2* | ||
799 | + | ||
800 | + andc_vec *v0*, *v1*, *v2* | ||
801 | + | ||
802 | + orc_vec *v0*, *v1*, *v2* | ||
803 | + | ||
804 | + not_vec *v0*, *v1* | ||
805 | + | ||
806 | + - | Similarly, logical operations with and without complement. | ||
807 | + | | ||
808 | + | Note that VECE is unused. | ||
809 | + | ||
810 | + * - shli_vec *v0*, *v1*, *i2* | ||
811 | + | ||
812 | + shls_vec *v0*, *v1*, *s2* | ||
813 | + | ||
814 | + - | Shift all elements from v1 by a scalar *i2*/*s2*. I.e. | ||
815 | + | ||
816 | + .. code-block:: c | ||
817 | + | ||
818 | + for (i = 0; i < VECL/VECE; ++i) { | ||
819 | + v0[i] = v1[i] << s2; | ||
820 | + } | ||
821 | + | ||
822 | + * - shri_vec *v0*, *v1*, *i2* | ||
823 | + | ||
824 | + sari_vec *v0*, *v1*, *i2* | ||
825 | + | ||
826 | + rotli_vec *v0*, *v1*, *i2* | ||
827 | + | ||
828 | + shrs_vec *v0*, *v1*, *s2* | ||
829 | + | ||
830 | + sars_vec *v0*, *v1*, *s2* | ||
831 | + | ||
832 | + - | Similarly for logical and arithmetic right shift, and left rotate. | ||
833 | + | ||
834 | + * - shlv_vec *v0*, *v1*, *v2* | ||
835 | + | ||
836 | + - | Shift elements from *v1* by elements from *v2*. I.e. | ||
837 | + | ||
838 | + .. code-block:: c | ||
839 | + | ||
840 | + for (i = 0; i < VECL/VECE; ++i) { | ||
841 | + v0[i] = v1[i] << v2[i]; | ||
842 | + } | ||
843 | + | ||
844 | + * - shrv_vec *v0*, *v1*, *v2* | ||
845 | + | ||
846 | + sarv_vec *v0*, *v1*, *v2* | ||
847 | + | ||
848 | + rotlv_vec *v0*, *v1*, *v2* | ||
849 | + | ||
850 | + rotrv_vec *v0*, *v1*, *v2* | ||
851 | + | ||
852 | + - | Similarly for logical and arithmetic right shift, and rotates. | ||
853 | + | ||
854 | + * - cmp_vec *v0*, *v1*, *v2*, *cond* | ||
855 | + | ||
856 | + - | Compare vectors by element, storing -1 for true and 0 for false. | ||
857 | + | ||
858 | + * - bitsel_vec *v0*, *v1*, *v2*, *v3* | ||
859 | + | ||
860 | + - | Bitwise select, *v0* = (*v2* & *v1*) | (*v3* & ~\ *v1*), across the entire vector. | ||
861 | + | ||
862 | + * - cmpsel_vec *v0*, *c1*, *c2*, *v3*, *v4*, *cond* | ||
863 | + | ||
864 | + - | Select elements based on comparison results: | ||
865 | + | ||
866 | + .. code-block:: c | ||
867 | + | ||
868 | + for (i = 0; i < n; ++i) { | ||
869 | + v0[i] = (c1[i] cond c2[i]) ? v3[i] : v4[i]. | ||
870 | + } | ||
871 | + | ||
872 | +**Note 1**: Some shortcuts are defined when the last operand is known to be | ||
873 | +a constant (e.g. addi for add, movi for mov). | ||
874 | + | ||
875 | +**Note 2**: When using TCG, the opcodes must never be generated directly | ||
876 | +as some of them may not be available as "real" opcodes. Always use the | ||
877 | +function tcg_gen_xxx(args). | ||
878 | + | ||
879 | + | ||
880 | +Backend | ||
881 | +======= | ||
882 | + | ||
883 | +``tcg-target.h`` contains the target specific definitions. ``tcg-target.c.inc`` | ||
884 | +contains the target specific code; it is #included by ``tcg/tcg.c``, rather | ||
885 | +than being a standalone C file. | ||
886 | + | ||
887 | +Assumptions | ||
888 | +----------- | ||
889 | + | ||
890 | +The target word size (``TCG_TARGET_REG_BITS``) is expected to be 32 bit or | ||
891 | +64 bit. It is expected that the pointer has the same size as the word. | ||
892 | + | ||
893 | +On a 32 bit target, all 64 bit operations are converted to 32 bits. A | ||
894 | +few specific operations must be implemented to allow it (see add2_i32, | ||
895 | +sub2_i32, brcond2_i32). | ||
896 | + | ||
897 | +On a 64 bit target, the values are transferred between 32 and 64-bit | ||
898 | +registers using the following ops: | ||
899 | + | ||
900 | +- trunc_shr_i64_i32 | ||
901 | +- ext_i32_i64 | ||
902 | +- extu_i32_i64 | ||
903 | + | ||
904 | +They ensure that the values are correctly truncated or extended when | ||
905 | +moved from a 32-bit to a 64-bit register or vice-versa. Note that the | ||
906 | +trunc_shr_i64_i32 is an optional op. It is not necessary to implement | ||
907 | +it if all the following conditions are met: | ||
908 | + | ||
909 | +- 64-bit registers can hold 32-bit values | ||
910 | +- 32-bit values in a 64-bit register do not need to stay zero or | ||
911 | + sign extended | ||
912 | +- all 32-bit TCG ops ignore the high part of 64-bit registers | ||
913 | + | ||
914 | +Floating point operations are not supported in this version. A | ||
915 | +previous incarnation of the code generator had full support of them, | ||
916 | +but it is better to concentrate on integer operations first. | ||
917 | + | ||
918 | +Constraints | ||
919 | +---------------- | ||
920 | + | ||
921 | +GCC like constraints are used to define the constraints of every | ||
922 | +instruction. Memory constraints are not supported in this | ||
923 | +version. Aliases are specified in the input operands as for GCC. | ||
924 | + | ||
925 | +The same register may be used for both an input and an output, even when | ||
926 | +they are not explicitly aliased. If an op expands to multiple target | ||
927 | +instructions then care must be taken to avoid clobbering input values. | ||
928 | +GCC style "early clobber" outputs are supported, with '``&``'. | ||
929 | + | ||
930 | +A target can define specific register or constant constraints. If an | ||
931 | +operation uses a constant input constraint which does not allow all | ||
932 | +constants, it must also accept registers in order to have a fallback. | ||
933 | +The constraint '``i``' is defined generically to accept any constant. | ||
934 | +The constraint '``r``' is not defined generically, but is consistently | ||
935 | +used by each backend to indicate all registers. | ||
936 | + | ||
937 | +The movi_i32 and movi_i64 operations must accept any constants. | ||
938 | + | ||
939 | +The mov_i32 and mov_i64 operations must accept any registers of the | ||
940 | +same type. | ||
941 | + | ||
942 | +The ld/st/sti instructions must accept signed 32 bit constant offsets. | ||
943 | +This can be implemented by reserving a specific register in which to | ||
944 | +compute the address if the offset is too big. | ||
945 | + | ||
946 | +The ld/st instructions must accept any destination (ld) or source (st) | ||
947 | +register. | ||
948 | + | ||
949 | +The sti instruction may fail if it cannot store the given constant. | ||
950 | + | ||
951 | +Function call assumptions | ||
952 | +------------------------- | ||
953 | + | ||
954 | +- The only supported types for parameters and return value are: 32 and | ||
955 | + 64 bit integers and pointer. | ||
956 | +- The stack grows downwards. | ||
957 | +- The first N parameters are passed in registers. | ||
958 | +- The next parameters are passed on the stack by storing them as words. | ||
959 | +- Some registers are clobbered during the call. | ||
960 | +- The function can return 0 or 1 value in registers. On a 32 bit | ||
961 | + target, functions must be able to return 2 values in registers for | ||
962 | + 64 bit return type. | ||
963 | + | ||
964 | + | ||
965 | +Recommended coding rules for best performance | ||
966 | +============================================= | ||
967 | + | ||
968 | +- Use globals to represent the parts of the QEMU CPU state which are | ||
969 | + often modified, e.g. the integer registers and the condition | ||
970 | + codes. TCG will be able to use host registers to store them. | ||
971 | + | ||
972 | +- Avoid globals stored in fixed registers. They must be used only to | ||
973 | + store the pointer to the CPU state and possibly to store a pointer | ||
974 | + to a register window. | ||
975 | + | ||
976 | +- Use temporaries. Use local temporaries only when really needed, | ||
977 | + e.g. when you need to use a value after a jump. Local temporaries | ||
978 | + introduce a performance hit in the current TCG implementation: their | ||
979 | + content is saved to memory at end of each basic block. | ||
980 | + | ||
981 | +- Free temporaries and local temporaries when they are no longer used | ||
982 | + (tcg_temp_free). Since tcg_const_x() also creates a temporary, you | ||
983 | + should free it after it is used. Freeing temporaries does not yield | ||
984 | + a better generated code, but it reduces the memory usage of TCG and | ||
985 | + the speed of the translation. | ||
986 | + | ||
987 | +- Don't hesitate to use helpers for complicated or seldom used guest | ||
988 | + instructions. There is little performance advantage in using TCG to | ||
989 | + implement guest instructions taking more than about twenty TCG | ||
990 | + instructions. Note that this rule of thumb is more applicable to | ||
991 | + helpers doing complex logic or arithmetic, where the C compiler has | ||
992 | + scope to do a good job of optimisation; it is less relevant where | ||
993 | + the instruction is mostly doing loads and stores, and in those cases | ||
994 | + inline TCG may still be faster for longer sequences. | ||
995 | + | ||
996 | +- The hard limit on the number of TCG instructions you can generate | ||
997 | + per guest instruction is set by ``MAX_OP_PER_INSTR`` in ``exec-all.h`` -- | ||
998 | + you cannot exceed this without risking a buffer overrun. | ||
999 | + | ||
1000 | +- Use the 'discard' instruction if you know that TCG won't be able to | ||
1001 | + prove that a given global is "dead" at a given program point. The | ||
1002 | + x86 guest uses it to improve the condition codes optimisation. | ||
1003 | diff --git a/docs/devel/tcg.rst b/docs/devel/tcg.rst | ||
1004 | index XXXXXXX..XXXXXXX 100644 | ||
1005 | --- a/docs/devel/tcg.rst | ||
1006 | +++ b/docs/devel/tcg.rst | ||
1007 | @@ -XXX,XX +XXX,XX @@ which make it relatively easily portable and simple while achieving good | ||
1008 | performances. | ||
1009 | |||
1010 | QEMU's dynamic translation backend is called TCG, for "Tiny Code | ||
1011 | -Generator". For more information, please take a look at ``tcg/README``. | ||
1012 | +Generator". For more information, please take a look at :ref:`tcg-ops-ref`. | ||
1013 | |||
1014 | The following sections outline some notable features and implementation | ||
1015 | details of QEMU's dynamic translator. | ||
1016 | diff --git a/tcg/README b/tcg/README | ||
1017 | deleted file mode 100644 | ||
1018 | index XXXXXXX..XXXXXXX | ||
1019 | --- a/tcg/README | ||
1020 | +++ /dev/null | ||
1021 | @@ -XXX,XX +XXX,XX @@ | ||
1022 | -Tiny Code Generator - Fabrice Bellard. | ||
1023 | - | ||
1024 | -1) Introduction | ||
1025 | - | ||
1026 | -TCG (Tiny Code Generator) began as a generic backend for a C | ||
1027 | -compiler. It was simplified to be used in QEMU. It also has its roots | ||
1028 | -in the QOP code generator written by Paul Brook. | ||
1029 | - | ||
1030 | -2) Definitions | ||
1031 | - | ||
1032 | -TCG receives RISC-like "TCG ops" and performs some optimizations on them, | ||
1033 | -including liveness analysis and trivial constant expression | ||
1034 | -evaluation. TCG ops are then implemented in the host CPU back end, | ||
1035 | -also known as the TCG "target". | ||
1036 | - | ||
1037 | -The TCG "target" is the architecture for which we generate the | ||
1038 | -code. It is of course not the same as the "target" of QEMU which is | ||
1039 | -the emulated architecture. As TCG started as a generic C backend used | ||
1040 | -for cross compiling, it is assumed that the TCG target is different | ||
1041 | -from the host, although it is never the case for QEMU. | ||
1042 | - | ||
1043 | -In this document, we use "guest" to specify what architecture we are | ||
1044 | -emulating; "target" always means the TCG target, the machine on which | ||
1045 | -we are running QEMU. | ||
1046 | - | ||
1047 | -A TCG "function" corresponds to a QEMU Translated Block (TB). | ||
1048 | - | ||
1049 | -A TCG "temporary" is a variable only live in a basic | ||
1050 | -block. Temporaries are allocated explicitly in each function. | ||
1051 | - | ||
1052 | -A TCG "local temporary" is a variable only live in a function. Local | ||
1053 | -temporaries are allocated explicitly in each function. | ||
1054 | - | ||
1055 | -A TCG "global" is a variable which is live in all the functions | ||
1056 | -(equivalent of a C global variable). They are defined before the | ||
1057 | -functions defined. A TCG global can be a memory location (e.g. a QEMU | ||
1058 | -CPU register), a fixed host register (e.g. the QEMU CPU state pointer) | ||
1059 | -or a memory location which is stored in a register outside QEMU TBs | ||
1060 | -(not implemented yet). | ||
1061 | - | ||
1062 | -A TCG "basic block" corresponds to a list of instructions terminated | ||
1063 | -by a branch instruction. | ||
1064 | - | ||
1065 | -An operation with "undefined behavior" may result in a crash. | ||
1066 | - | ||
1067 | -An operation with "unspecified behavior" shall not crash. However, | ||
1068 | -the result may be one of several possibilities so may be considered | ||
1069 | -an "undefined result". | ||
1070 | - | ||
1071 | -3) Intermediate representation | ||
1072 | - | ||
1073 | -3.1) Introduction | ||
1074 | - | ||
1075 | -TCG instructions operate on variables which are temporaries, local | ||
1076 | -temporaries or globals. TCG instructions and variables are strongly | ||
1077 | -typed. Two types are supported: 32 bit integers and 64 bit | ||
1078 | -integers. Pointers are defined as an alias to 32 bit or 64 bit | ||
1079 | -integers depending on the TCG target word size. | ||
1080 | - | ||
1081 | -Each instruction has a fixed number of output variable operands, input | ||
1082 | -variable operands and always constant operands. | ||
1083 | - | ||
1084 | -The notable exception is the call instruction which has a variable | ||
1085 | -number of outputs and inputs. | ||
1086 | - | ||
1087 | -In the textual form, output operands usually come first, followed by | ||
1088 | -input operands, followed by constant operands. The output type is | ||
1089 | -included in the instruction name. Constants are prefixed with a '$'. | ||
1090 | - | ||
1091 | -add_i32 t0, t1, t2 (t0 <- t1 + t2) | ||
1092 | - | ||
1093 | -3.2) Assumptions | ||
1094 | - | ||
1095 | -* Basic blocks | ||
1096 | - | ||
1097 | -- Basic blocks end after branches (e.g. brcond_i32 instruction), | ||
1098 | - goto_tb and exit_tb instructions. | ||
1099 | -- Basic blocks start after the end of a previous basic block, or at a | ||
1100 | - set_label instruction. | ||
1101 | - | ||
1102 | -After the end of a basic block, the content of temporaries is | ||
1103 | -destroyed, but local temporaries and globals are preserved. | ||
1104 | - | ||
1105 | -* Floating point types are not supported yet | ||
1106 | - | ||
1107 | -* Pointers: depending on the TCG target, pointer size is 32 bit or 64 | ||
1108 | - bit. The type TCG_TYPE_PTR is an alias to TCG_TYPE_I32 or | ||
1109 | - TCG_TYPE_I64. | ||
1110 | - | ||
1111 | -* Helpers: | ||
1112 | - | ||
1113 | -Using the tcg_gen_helper_x_y it is possible to call any function | ||
1114 | -taking i32, i64 or pointer types. By default, before calling a helper, | ||
1115 | -all globals are stored at their canonical location and it is assumed | ||
1116 | -that the function can modify them. By default, the helper is allowed to | ||
1117 | -modify the CPU state or raise an exception. | ||
1118 | - | ||
1119 | -This can be overridden using the following function modifiers: | ||
1120 | -- TCG_CALL_NO_READ_GLOBALS means that the helper does not read globals, | ||
1121 | - either directly or via an exception. They will not be saved to their | ||
1122 | - canonical locations before calling the helper. | ||
1123 | -- TCG_CALL_NO_WRITE_GLOBALS means that the helper does not modify any globals. | ||
1124 | - They will only be saved to their canonical location before calling helpers, | ||
1125 | - but they won't be reloaded afterwards. | ||
1126 | -- TCG_CALL_NO_SIDE_EFFECTS means that the call to the function is removed if | ||
1127 | - the return value is not used. | ||
1128 | - | ||
1129 | -Note that TCG_CALL_NO_READ_GLOBALS implies TCG_CALL_NO_WRITE_GLOBALS. | ||
1130 | - | ||
1131 | -On some TCG targets (e.g. x86), several calling conventions are | ||
1132 | -supported. | ||
1133 | - | ||
1134 | -* Branches: | ||
1135 | - | ||
1136 | -Use the instruction 'br' to jump to a label. | ||
1137 | - | ||
1138 | -3.3) Code Optimizations | ||
1139 | - | ||
1140 | -When generating instructions, you can count on at least the following | ||
1141 | -optimizations: | ||
1142 | - | ||
1143 | -- Single instructions are simplified, e.g. | ||
1144 | - | ||
1145 | - and_i32 t0, t0, $0xffffffff | ||
1146 | - | ||
1147 | - is suppressed. | ||
1148 | - | ||
1149 | -- A liveness analysis is done at the basic block level. The | ||
1150 | - information is used to suppress moves from a dead variable to | ||
1151 | - another one. It is also used to remove instructions which compute | ||
1152 | - dead results. The later is especially useful for condition code | ||
1153 | - optimization in QEMU. | ||
1154 | - | ||
1155 | - In the following example: | ||
1156 | - | ||
1157 | - add_i32 t0, t1, t2 | ||
1158 | - add_i32 t0, t0, $1 | ||
1159 | - mov_i32 t0, $1 | ||
1160 | - | ||
1161 | - only the last instruction is kept. | ||
1162 | - | ||
1163 | -3.4) Instruction Reference | ||
1164 | - | ||
1165 | -********* Function call | ||
1166 | - | ||
1167 | -* call <ret> <params> ptr | ||
1168 | - | ||
1169 | -call function 'ptr' (pointer type) | ||
1170 | - | ||
1171 | -<ret> optional 32 bit or 64 bit return value | ||
1172 | -<params> optional 32 bit or 64 bit parameters | ||
1173 | - | ||
1174 | -********* Jumps/Labels | ||
1175 | - | ||
1176 | -* set_label $label | ||
1177 | - | ||
1178 | -Define label 'label' at the current program point. | ||
1179 | - | ||
1180 | -* br $label | ||
1181 | - | ||
1182 | -Jump to label. | ||
1183 | - | ||
1184 | -* brcond_i32/i64 t0, t1, cond, label | ||
1185 | - | ||
1186 | -Conditional jump if t0 cond t1 is true. cond can be: | ||
1187 | - TCG_COND_EQ | ||
1188 | - TCG_COND_NE | ||
1189 | - TCG_COND_LT /* signed */ | ||
1190 | - TCG_COND_GE /* signed */ | ||
1191 | - TCG_COND_LE /* signed */ | ||
1192 | - TCG_COND_GT /* signed */ | ||
1193 | - TCG_COND_LTU /* unsigned */ | ||
1194 | - TCG_COND_GEU /* unsigned */ | ||
1195 | - TCG_COND_LEU /* unsigned */ | ||
1196 | - TCG_COND_GTU /* unsigned */ | ||
1197 | - | ||
1198 | -********* Arithmetic | ||
1199 | - | ||
1200 | -* add_i32/i64 t0, t1, t2 | ||
1201 | - | ||
1202 | -t0=t1+t2 | ||
1203 | - | ||
1204 | -* sub_i32/i64 t0, t1, t2 | ||
1205 | - | ||
1206 | -t0=t1-t2 | ||
1207 | - | ||
1208 | -* neg_i32/i64 t0, t1 | ||
1209 | - | ||
1210 | -t0=-t1 (two's complement) | ||
1211 | - | ||
1212 | -* mul_i32/i64 t0, t1, t2 | ||
1213 | - | ||
1214 | -t0=t1*t2 | ||
1215 | - | ||
1216 | -* div_i32/i64 t0, t1, t2 | ||
1217 | - | ||
1218 | -t0=t1/t2 (signed). Undefined behavior if division by zero or overflow. | ||
1219 | - | ||
1220 | -* divu_i32/i64 t0, t1, t2 | ||
1221 | - | ||
1222 | -t0=t1/t2 (unsigned). Undefined behavior if division by zero. | ||
1223 | - | ||
1224 | -* rem_i32/i64 t0, t1, t2 | ||
1225 | - | ||
1226 | -t0=t1%t2 (signed). Undefined behavior if division by zero or overflow. | ||
1227 | - | ||
1228 | -* remu_i32/i64 t0, t1, t2 | ||
1229 | - | ||
1230 | -t0=t1%t2 (unsigned). Undefined behavior if division by zero. | ||
1231 | - | ||
1232 | -********* Logical | ||
1233 | - | ||
1234 | -* and_i32/i64 t0, t1, t2 | ||
1235 | - | ||
1236 | -t0=t1&t2 | ||
1237 | - | ||
1238 | -* or_i32/i64 t0, t1, t2 | ||
1239 | - | ||
1240 | -t0=t1|t2 | ||
1241 | - | ||
1242 | -* xor_i32/i64 t0, t1, t2 | ||
1243 | - | ||
1244 | -t0=t1^t2 | ||
1245 | - | ||
1246 | -* not_i32/i64 t0, t1 | ||
1247 | - | ||
1248 | -t0=~t1 | ||
1249 | - | ||
1250 | -* andc_i32/i64 t0, t1, t2 | ||
1251 | - | ||
1252 | -t0=t1&~t2 | ||
1253 | - | ||
1254 | -* eqv_i32/i64 t0, t1, t2 | ||
1255 | - | ||
1256 | -t0=~(t1^t2), or equivalently, t0=t1^~t2 | ||
1257 | - | ||
1258 | -* nand_i32/i64 t0, t1, t2 | ||
1259 | - | ||
1260 | -t0=~(t1&t2) | ||
1261 | - | ||
1262 | -* nor_i32/i64 t0, t1, t2 | ||
1263 | - | ||
1264 | -t0=~(t1|t2) | ||
1265 | - | ||
1266 | -* orc_i32/i64 t0, t1, t2 | ||
1267 | - | ||
1268 | -t0=t1|~t2 | ||
1269 | - | ||
1270 | -* clz_i32/i64 t0, t1, t2 | ||
1271 | - | ||
1272 | -t0 = t1 ? clz(t1) : t2 | ||
1273 | - | ||
1274 | -* ctz_i32/i64 t0, t1, t2 | ||
1275 | - | ||
1276 | -t0 = t1 ? ctz(t1) : t2 | ||
1277 | - | ||
1278 | -* ctpop_i32/i64 t0, t1 | ||
1279 | - | ||
1280 | -t0 = number of bits set in t1 | ||
1281 | -With "ctpop" short for "count population", matching | ||
1282 | -the function name used in include/qemu/host-utils.h. | ||
1283 | - | ||
1284 | -********* Shifts/Rotates | ||
1285 | - | ||
1286 | -* shl_i32/i64 t0, t1, t2 | ||
1287 | - | ||
1288 | -t0=t1 << t2. Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) | ||
1289 | - | ||
1290 | -* shr_i32/i64 t0, t1, t2 | ||
1291 | - | ||
1292 | -t0=t1 >> t2 (unsigned). Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) | ||
1293 | - | ||
1294 | -* sar_i32/i64 t0, t1, t2 | ||
1295 | - | ||
1296 | -t0=t1 >> t2 (signed). Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) | ||
1297 | - | ||
1298 | -* rotl_i32/i64 t0, t1, t2 | ||
1299 | - | ||
1300 | -Rotation of t2 bits to the left. | ||
1301 | -Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) | ||
1302 | - | ||
1303 | -* rotr_i32/i64 t0, t1, t2 | ||
1304 | - | ||
1305 | -Rotation of t2 bits to the right. | ||
1306 | -Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) | ||
1307 | - | ||
1308 | -********* Misc | ||
1309 | - | ||
1310 | -* mov_i32/i64 t0, t1 | ||
1311 | - | ||
1312 | -t0 = t1 | ||
1313 | - | ||
1314 | -Move t1 to t0 (both operands must have the same type). | ||
1315 | - | ||
1316 | -* ext8s_i32/i64 t0, t1 | ||
1317 | -ext8u_i32/i64 t0, t1 | ||
1318 | -ext16s_i32/i64 t0, t1 | ||
1319 | -ext16u_i32/i64 t0, t1 | ||
1320 | -ext32s_i64 t0, t1 | ||
1321 | -ext32u_i64 t0, t1 | ||
1322 | - | ||
1323 | -8, 16 or 32 bit sign/zero extension (both operands must have the same type) | ||
1324 | - | ||
1325 | -* bswap16_i32/i64 t0, t1, flags | ||
1326 | - | ||
1327 | -16 bit byte swap on the low bits of a 32/64 bit input. | ||
1328 | -If flags & TCG_BSWAP_IZ, then t1 is known to be zero-extended from bit 15. | ||
1329 | -If flags & TCG_BSWAP_OZ, then t0 will be zero-extended from bit 15. | ||
1330 | -If flags & TCG_BSWAP_OS, then t0 will be sign-extended from bit 15. | ||
1331 | -If neither TCG_BSWAP_OZ nor TCG_BSWAP_OS are set, then the bits of | ||
1332 | -t0 above bit 15 may contain any value. | ||
1333 | - | ||
1334 | -* bswap32_i64 t0, t1, flags | ||
1335 | - | ||
1336 | -32 bit byte swap on a 64-bit value. The flags are the same as for bswap16, | ||
1337 | -except they apply from bit 31 instead of bit 15. | ||
1338 | - | ||
1339 | -* bswap32_i32 t0, t1, flags | ||
1340 | -* bswap64_i64 t0, t1, flags | ||
1341 | - | ||
1342 | -32/64 bit byte swap. The flags are ignored, but still present | ||
1343 | -for consistency with the other bswap opcodes. | ||
1344 | - | ||
1345 | -* discard_i32/i64 t0 | ||
1346 | - | ||
1347 | -Indicate that the value of t0 won't be used later. It is useful to | ||
1348 | -force dead code elimination. | ||
1349 | - | ||
1350 | -* deposit_i32/i64 dest, t1, t2, pos, len | ||
1351 | - | ||
1352 | -Deposit T2 as a bitfield into T1, placing the result in DEST. | ||
1353 | -The bitfield is described by POS/LEN, which are immediate values: | ||
1354 | - | ||
1355 | - LEN - the length of the bitfield | ||
1356 | - POS - the position of the first bit, counting from the LSB | ||
1357 | - | ||
1358 | -For example, "deposit_i32 dest, t1, t2, 8, 4" indicates a 4-bit field | ||
1359 | -at bit 8. This operation would be equivalent to | ||
1360 | - | ||
1361 | - dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00) | ||
1362 | - | ||
1363 | -* extract_i32/i64 dest, t1, pos, len | ||
1364 | -* sextract_i32/i64 dest, t1, pos, len | ||
1365 | - | ||
1366 | -Extract a bitfield from T1, placing the result in DEST. | ||
1367 | -The bitfield is described by POS/LEN, which are immediate values, | ||
1368 | -as above for deposit. For extract_*, the result will be extended | ||
1369 | -to the left with zeros; for sextract_*, the result will be extended | ||
1370 | -to the left with copies of the bitfield sign bit at pos + len - 1. | ||
1371 | - | ||
1372 | -For example, "sextract_i32 dest, t1, 8, 4" indicates a 4-bit field | ||
1373 | -at bit 8. This operation would be equivalent to | ||
1374 | - | ||
1375 | - dest = (t1 << 20) >> 28 | ||
1376 | - | ||
1377 | -(using an arithmetic right shift). | ||
1378 | - | ||
1379 | -* extract2_i32/i64 dest, t1, t2, pos | ||
1380 | - | ||
1381 | -For N = {32,64}, extract an N-bit quantity from the concatenation | ||
1382 | -of t2:t1, beginning at pos. The tcg_gen_extract2_{i32,i64} expander | ||
1383 | -accepts 0 <= pos <= N as inputs. The backend code generator will | ||
1384 | -not see either 0 or N as inputs for these opcodes. | ||
1385 | - | ||
1386 | -* extrl_i64_i32 t0, t1 | ||
1387 | - | ||
1388 | -For 64-bit hosts only, extract the low 32-bits of input T1 and place it | ||
1389 | -into 32-bit output T0. Depending on the host, this may be a simple move, | ||
1390 | -or may require additional canonicalization. | ||
1391 | - | ||
1392 | -* extrh_i64_i32 t0, t1 | ||
1393 | - | ||
1394 | -For 64-bit hosts only, extract the high 32-bits of input T1 and place it | ||
1395 | -into 32-bit output T0. Depending on the host, this may be a simple shift, | ||
1396 | -or may require additional canonicalization. | ||
1397 | - | ||
1398 | -********* Conditional moves | ||
1399 | - | ||
1400 | -* setcond_i32/i64 dest, t1, t2, cond | ||
1401 | - | ||
1402 | -dest = (t1 cond t2) | ||
1403 | - | ||
1404 | -Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0. | ||
1405 | - | ||
1406 | -* movcond_i32/i64 dest, c1, c2, v1, v2, cond | ||
1407 | - | ||
1408 | -dest = (c1 cond c2 ? v1 : v2) | ||
1409 | - | ||
1410 | -Set DEST to V1 if (C1 cond C2) is true, otherwise set to V2. | ||
1411 | - | ||
1412 | -********* Type conversions | ||
1413 | - | ||
1414 | -* ext_i32_i64 t0, t1 | ||
1415 | -Convert t1 (32 bit) to t0 (64 bit) and does sign extension | ||
1416 | - | ||
1417 | -* extu_i32_i64 t0, t1 | ||
1418 | -Convert t1 (32 bit) to t0 (64 bit) and does zero extension | ||
1419 | - | ||
1420 | -* trunc_i64_i32 t0, t1 | ||
1421 | -Truncate t1 (64 bit) to t0 (32 bit) | ||
1422 | - | ||
1423 | -* concat_i32_i64 t0, t1, t2 | ||
1424 | -Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half | ||
1425 | -from t2 (32 bit). | ||
1426 | - | ||
1427 | -* concat32_i64 t0, t1, t2 | ||
1428 | -Construct t0 (64-bit) taking the low half from t1 (64 bit) and the high half | ||
1429 | -from t2 (64 bit). | ||
1430 | - | ||
1431 | -********* Load/Store | ||
1432 | - | ||
1433 | -* ld_i32/i64 t0, t1, offset | ||
1434 | -ld8s_i32/i64 t0, t1, offset | ||
1435 | -ld8u_i32/i64 t0, t1, offset | ||
1436 | -ld16s_i32/i64 t0, t1, offset | ||
1437 | -ld16u_i32/i64 t0, t1, offset | ||
1438 | -ld32s_i64 t0, t1, offset | ||
1439 | -ld32u_i64 t0, t1, offset | ||
1440 | - | ||
1441 | -t0 = read(t1 + offset) | ||
1442 | -Load 8, 16, 32 or 64 bits with or without sign extension from host memory. | ||
1443 | -offset must be a constant. | ||
1444 | - | ||
1445 | -* st_i32/i64 t0, t1, offset | ||
1446 | -st8_i32/i64 t0, t1, offset | ||
1447 | -st16_i32/i64 t0, t1, offset | ||
1448 | -st32_i64 t0, t1, offset | ||
1449 | - | ||
1450 | -write(t0, t1 + offset) | ||
1451 | -Write 8, 16, 32 or 64 bits to host memory. | ||
1452 | - | ||
1453 | -All this opcodes assume that the pointed host memory doesn't correspond | ||
1454 | -to a global. In the latter case the behaviour is unpredictable. | ||
1455 | - | ||
1456 | -********* Multiword arithmetic support | ||
1457 | - | ||
1458 | -* add2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high | ||
1459 | -* sub2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high | ||
1460 | - | ||
1461 | -Similar to add/sub, except that the double-word inputs T1 and T2 are | ||
1462 | -formed from two single-word arguments, and the double-word output T0 | ||
1463 | -is returned in two single-word outputs. | ||
1464 | - | ||
1465 | -* mulu2_i32/i64 t0_low, t0_high, t1, t2 | ||
1466 | - | ||
1467 | -Similar to mul, except two unsigned inputs T1 and T2 yielding the full | ||
1468 | -double-word product T0. The later is returned in two single-word outputs. | ||
1469 | - | ||
1470 | -* muls2_i32/i64 t0_low, t0_high, t1, t2 | ||
1471 | - | ||
1472 | -Similar to mulu2, except the two inputs T1 and T2 are signed. | ||
1473 | - | ||
1474 | -* mulsh_i32/i64 t0, t1, t2 | ||
1475 | -* muluh_i32/i64 t0, t1, t2 | ||
1476 | - | ||
1477 | -Provide the high part of a signed or unsigned multiply, respectively. | ||
1478 | -If mulu2/muls2 are not provided by the backend, the tcg-op generator | ||
1479 | -can obtain the same results can be obtained by emitting a pair of | ||
1480 | -opcodes, mul+muluh/mulsh. | ||
1481 | - | ||
1482 | -********* Memory Barrier support | ||
1483 | - | ||
1484 | -* mb <$arg> | ||
1485 | - | ||
1486 | -Generate a target memory barrier instruction to ensure memory ordering as being | ||
1487 | -enforced by a corresponding guest memory barrier instruction. The ordering | ||
1488 | -enforced by the backend may be stricter than the ordering required by the guest. | ||
1489 | -It cannot be weaker. This opcode takes a constant argument which is required to | ||
1490 | -generate the appropriate barrier instruction. The backend should take care to | ||
1491 | -emit the target barrier instruction only when necessary i.e., for SMP guests and | ||
1492 | -when MTTCG is enabled. | ||
1493 | - | ||
1494 | -The guest translators should generate this opcode for all guest instructions | ||
1495 | -which have ordering side effects. | ||
1496 | - | ||
1497 | -Please see docs/devel/atomics.rst for more information on memory barriers. | ||
1498 | - | ||
1499 | -********* 64-bit guest on 32-bit host support | ||
1500 | - | ||
1501 | -The following opcodes are internal to TCG. Thus they are to be implemented by | ||
1502 | -32-bit host code generators, but are not to be emitted by guest translators. | ||
1503 | -They are emitted as needed by inline functions within "tcg-op.h". | ||
1504 | - | ||
1505 | -* brcond2_i32 t0_low, t0_high, t1_low, t1_high, cond, label | ||
1506 | - | ||
1507 | -Similar to brcond, except that the 64-bit values T0 and T1 | ||
1508 | -are formed from two 32-bit arguments. | ||
1509 | - | ||
1510 | -* setcond2_i32 dest, t1_low, t1_high, t2_low, t2_high, cond | ||
1511 | - | ||
1512 | -Similar to setcond, except that the 64-bit values T1 and T2 are | ||
1513 | -formed from two 32-bit arguments. The result is a 32-bit value. | ||
1514 | - | ||
1515 | -********* QEMU specific operations | ||
1516 | - | ||
1517 | -* exit_tb t0 | ||
1518 | - | ||
1519 | -Exit the current TB and return the value t0 (word type). | ||
1520 | - | ||
1521 | -* goto_tb index | ||
1522 | - | ||
1523 | -Exit the current TB and jump to the TB index 'index' (constant) if the | ||
1524 | -current TB was linked to this TB. Otherwise execute the next | ||
1525 | -instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued | ||
1526 | -at most once with each slot index per TB. | ||
1527 | - | ||
1528 | -* lookup_and_goto_ptr tb_addr | ||
1529 | - | ||
1530 | -Look up a TB address ('tb_addr') and jump to it if valid. If not valid, | ||
1531 | -jump to the TCG epilogue to go back to the exec loop. | ||
1532 | - | ||
1533 | -This operation is optional. If the TCG backend does not implement the | ||
1534 | -goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0). | ||
1535 | - | ||
1536 | -* qemu_ld_i32/i64 t0, t1, flags, memidx | ||
1537 | -* qemu_st_i32/i64 t0, t1, flags, memidx | ||
1538 | -* qemu_st8_i32 t0, t1, flags, memidx | ||
1539 | - | ||
1540 | -Load data at the guest address t1 into t0, or store data in t0 at guest | ||
1541 | -address t1. The _i32/_i64 size applies to the size of the input/output | ||
1542 | -register t0 only. The address t1 is always sized according to the guest, | ||
1543 | -and the width of the memory operation is controlled by flags. | ||
1544 | - | ||
1545 | -Both t0 and t1 may be split into little-endian ordered pairs of registers | ||
1546 | -if dealing with 64-bit quantities on a 32-bit host. | ||
1547 | - | ||
1548 | -The memidx selects the qemu tlb index to use (e.g. user or kernel access). | ||
1549 | -The flags are the MemOp bits, selecting the sign, width, and endianness | ||
1550 | -of the memory access. | ||
1551 | - | ||
1552 | -For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a | ||
1553 | -64-bit memory access specified in flags. | ||
1554 | - | ||
1555 | -For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of | ||
1556 | -the memory operation is known to be 8-bit. This allows the backend to | ||
1557 | -provide a different set of register constraints. | ||
1558 | - | ||
1559 | -********* Host vector operations | ||
1560 | - | ||
1561 | -All of the vector ops have two parameters, TCGOP_VECL & TCGOP_VECE. | ||
1562 | -The former specifies the length of the vector in log2 64-bit units; the | ||
1563 | -later specifies the length of the element (if applicable) in log2 8-bit units. | ||
1564 | -E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32. | ||
1565 | - | ||
1566 | -* mov_vec v0, v1 | ||
1567 | -* ld_vec v0, t1 | ||
1568 | -* st_vec v0, t1 | ||
1569 | - | ||
1570 | - Move, load and store. | ||
1571 | - | ||
1572 | -* dup_vec v0, r1 | ||
1573 | - | ||
1574 | - Duplicate the low N bits of R1 into VECL/VECE copies across V0. | ||
1575 | - | ||
1576 | -* dupi_vec v0, c | ||
1577 | - | ||
1578 | - Similarly, for a constant. | ||
1579 | - Smaller values will be replicated to host register size by the expanders. | ||
1580 | - | ||
1581 | -* dup2_vec v0, r1, r2 | ||
1582 | - | ||
1583 | - Duplicate r2:r1 into VECL/64 copies across V0. This opcode is | ||
1584 | - only present for 32-bit hosts. | ||
1585 | - | ||
1586 | -* add_vec v0, v1, v2 | ||
1587 | - | ||
1588 | - v0 = v1 + v2, in elements across the vector. | ||
1589 | - | ||
1590 | -* sub_vec v0, v1, v2 | ||
1591 | - | ||
1592 | - Similarly, v0 = v1 - v2. | ||
1593 | - | ||
1594 | -* mul_vec v0, v1, v2 | ||
1595 | - | ||
1596 | - Similarly, v0 = v1 * v2. | ||
1597 | - | ||
1598 | -* neg_vec v0, v1 | ||
1599 | - | ||
1600 | - Similarly, v0 = -v1. | ||
1601 | - | ||
1602 | -* abs_vec v0, v1 | ||
1603 | - | ||
1604 | - Similarly, v0 = v1 < 0 ? -v1 : v1, in elements across the vector. | ||
1605 | - | ||
1606 | -* smin_vec: | ||
1607 | -* umin_vec: | ||
1608 | - | ||
1609 | - Similarly, v0 = MIN(v1, v2), for signed and unsigned element types. | ||
1610 | - | ||
1611 | -* smax_vec: | ||
1612 | -* umax_vec: | ||
1613 | - | ||
1614 | - Similarly, v0 = MAX(v1, v2), for signed and unsigned element types. | ||
1615 | - | ||
1616 | -* ssadd_vec: | ||
1617 | -* sssub_vec: | ||
1618 | -* usadd_vec: | ||
1619 | -* ussub_vec: | ||
1620 | - | ||
1621 | - Signed and unsigned saturating addition and subtraction. If the true | ||
1622 | - result is not representable within the element type, the element is | ||
1623 | - set to the minimum or maximum value for the type. | ||
1624 | - | ||
1625 | -* and_vec v0, v1, v2 | ||
1626 | -* or_vec v0, v1, v2 | ||
1627 | -* xor_vec v0, v1, v2 | ||
1628 | -* andc_vec v0, v1, v2 | ||
1629 | -* orc_vec v0, v1, v2 | ||
1630 | -* not_vec v0, v1 | ||
1631 | - | ||
1632 | - Similarly, logical operations with and without complement. | ||
1633 | - Note that VECE is unused. | ||
1634 | - | ||
1635 | -* shli_vec v0, v1, i2 | ||
1636 | -* shls_vec v0, v1, s2 | ||
1637 | - | ||
1638 | - Shift all elements from v1 by a scalar i2/s2. I.e. | ||
1639 | - | ||
1640 | - for (i = 0; i < VECL/VECE; ++i) { | ||
1641 | - v0[i] = v1[i] << s2; | ||
1642 | - } | ||
1643 | - | ||
1644 | -* shri_vec v0, v1, i2 | ||
1645 | -* sari_vec v0, v1, i2 | ||
1646 | -* rotli_vec v0, v1, i2 | ||
1647 | -* shrs_vec v0, v1, s2 | ||
1648 | -* sars_vec v0, v1, s2 | ||
1649 | - | ||
1650 | - Similarly for logical and arithmetic right shift, and left rotate. | ||
1651 | - | ||
1652 | -* shlv_vec v0, v1, v2 | ||
1653 | - | ||
1654 | - Shift elements from v1 by elements from v2. I.e. | ||
1655 | - | ||
1656 | - for (i = 0; i < VECL/VECE; ++i) { | ||
1657 | - v0[i] = v1[i] << v2[i]; | ||
1658 | - } | ||
1659 | - | ||
1660 | -* shrv_vec v0, v1, v2 | ||
1661 | -* sarv_vec v0, v1, v2 | ||
1662 | -* rotlv_vec v0, v1, v2 | ||
1663 | -* rotrv_vec v0, v1, v2 | ||
1664 | - | ||
1665 | - Similarly for logical and arithmetic right shift, and rotates. | ||
1666 | - | ||
1667 | -* cmp_vec v0, v1, v2, cond | ||
1668 | - | ||
1669 | - Compare vectors by element, storing -1 for true and 0 for false. | ||
1670 | - | ||
1671 | -* bitsel_vec v0, v1, v2, v3 | ||
1672 | - | ||
1673 | - Bitwise select, v0 = (v2 & v1) | (v3 & ~v1), across the entire vector. | ||
1674 | - | ||
1675 | -* cmpsel_vec v0, c1, c2, v3, v4, cond | ||
1676 | - | ||
1677 | - Select elements based on comparison results: | ||
1678 | - for (i = 0; i < n; ++i) { | ||
1679 | - v0[i] = (c1[i] cond c2[i]) ? v3[i] : v4[i]. | ||
1680 | - } | ||
1681 | - | ||
1682 | -********* | ||
1683 | - | ||
1684 | -Note 1: Some shortcuts are defined when the last operand is known to be | ||
1685 | -a constant (e.g. addi for add, movi for mov). | ||
1686 | - | ||
1687 | -Note 2: When using TCG, the opcodes must never be generated directly | ||
1688 | -as some of them may not be available as "real" opcodes. Always use the | ||
1689 | -function tcg_gen_xxx(args). | ||
1690 | - | ||
1691 | -4) Backend | ||
1692 | - | ||
1693 | -tcg-target.h contains the target specific definitions. tcg-target.c.inc | ||
1694 | -contains the target specific code; it is #included by tcg/tcg.c, rather | ||
1695 | -than being a standalone C file. | ||
1696 | - | ||
1697 | -4.1) Assumptions | ||
1698 | - | ||
1699 | -The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or | ||
1700 | -64 bit. It is expected that the pointer has the same size as the word. | ||
1701 | - | ||
1702 | -On a 32 bit target, all 64 bit operations are converted to 32 bits. A | ||
1703 | -few specific operations must be implemented to allow it (see add2_i32, | ||
1704 | -sub2_i32, brcond2_i32). | ||
1705 | - | ||
1706 | -On a 64 bit target, the values are transferred between 32 and 64-bit | ||
1707 | -registers using the following ops: | ||
1708 | -- trunc_shr_i64_i32 | ||
1709 | -- ext_i32_i64 | ||
1710 | -- extu_i32_i64 | ||
1711 | - | ||
1712 | -They ensure that the values are correctly truncated or extended when | ||
1713 | -moved from a 32-bit to a 64-bit register or vice-versa. Note that the | ||
1714 | -trunc_shr_i64_i32 is an optional op. It is not necessary to implement | ||
1715 | -it if all the following conditions are met: | ||
1716 | -- 64-bit registers can hold 32-bit values | ||
1717 | -- 32-bit values in a 64-bit register do not need to stay zero or | ||
1718 | - sign extended | ||
1719 | -- all 32-bit TCG ops ignore the high part of 64-bit registers | ||
1720 | - | ||
1721 | -Floating point operations are not supported in this version. A | ||
1722 | -previous incarnation of the code generator had full support of them, | ||
1723 | -but it is better to concentrate on integer operations first. | ||
1724 | - | ||
1725 | -4.2) Constraints | ||
1726 | - | ||
1727 | -GCC like constraints are used to define the constraints of every | ||
1728 | -instruction. Memory constraints are not supported in this | ||
1729 | -version. Aliases are specified in the input operands as for GCC. | ||
1730 | - | ||
1731 | -The same register may be used for both an input and an output, even when | ||
1732 | -they are not explicitly aliased. If an op expands to multiple target | ||
1733 | -instructions then care must be taken to avoid clobbering input values. | ||
1734 | -GCC style "early clobber" outputs are supported, with '&'. | ||
1735 | - | ||
1736 | -A target can define specific register or constant constraints. If an | ||
1737 | -operation uses a constant input constraint which does not allow all | ||
1738 | -constants, it must also accept registers in order to have a fallback. | ||
1739 | -The constraint 'i' is defined generically to accept any constant. | ||
1740 | -The constraint 'r' is not defined generically, but is consistently | ||
1741 | -used by each backend to indicate all registers. | ||
1742 | - | ||
1743 | -The movi_i32 and movi_i64 operations must accept any constants. | ||
1744 | - | ||
1745 | -The mov_i32 and mov_i64 operations must accept any registers of the | ||
1746 | -same type. | ||
1747 | - | ||
1748 | -The ld/st/sti instructions must accept signed 32 bit constant offsets. | ||
1749 | -This can be implemented by reserving a specific register in which to | ||
1750 | -compute the address if the offset is too big. | ||
1751 | - | ||
1752 | -The ld/st instructions must accept any destination (ld) or source (st) | ||
1753 | -register. | ||
1754 | - | ||
1755 | -The sti instruction may fail if it cannot store the given constant. | ||
1756 | - | ||
1757 | -4.3) Function call assumptions | ||
1758 | - | ||
1759 | -- The only supported types for parameters and return value are: 32 and | ||
1760 | - 64 bit integers and pointer. | ||
1761 | -- The stack grows downwards. | ||
1762 | -- The first N parameters are passed in registers. | ||
1763 | -- The next parameters are passed on the stack by storing them as words. | ||
1764 | -- Some registers are clobbered during the call. | ||
1765 | -- The function can return 0 or 1 value in registers. On a 32 bit | ||
1766 | - target, functions must be able to return 2 values in registers for | ||
1767 | - 64 bit return type. | ||
1768 | - | ||
1769 | -5) Recommended coding rules for best performance | ||
1770 | - | ||
1771 | -- Use globals to represent the parts of the QEMU CPU state which are | ||
1772 | - often modified, e.g. the integer registers and the condition | ||
1773 | - codes. TCG will be able to use host registers to store them. | ||
1774 | - | ||
1775 | -- Avoid globals stored in fixed registers. They must be used only to | ||
1776 | - store the pointer to the CPU state and possibly to store a pointer | ||
1777 | - to a register window. | ||
1778 | - | ||
1779 | -- Use temporaries. Use local temporaries only when really needed, | ||
1780 | - e.g. when you need to use a value after a jump. Local temporaries | ||
1781 | - introduce a performance hit in the current TCG implementation: their | ||
1782 | - content is saved to memory at end of each basic block. | ||
1783 | - | ||
1784 | -- Free temporaries and local temporaries when they are no longer used | ||
1785 | - (tcg_temp_free). Since tcg_const_x() also creates a temporary, you | ||
1786 | - should free it after it is used. Freeing temporaries does not yield | ||
1787 | - a better generated code, but it reduces the memory usage of TCG and | ||
1788 | - the speed of the translation. | ||
1789 | - | ||
1790 | -- Don't hesitate to use helpers for complicated or seldom used guest | ||
1791 | - instructions. There is little performance advantage in using TCG to | ||
1792 | - implement guest instructions taking more than about twenty TCG | ||
1793 | - instructions. Note that this rule of thumb is more applicable to | ||
1794 | - helpers doing complex logic or arithmetic, where the C compiler has | ||
1795 | - scope to do a good job of optimisation; it is less relevant where | ||
1796 | - the instruction is mostly doing loads and stores, and in those cases | ||
1797 | - inline TCG may still be faster for longer sequences. | ||
1798 | - | ||
1799 | -- The hard limit on the number of TCG instructions you can generate | ||
1800 | - per guest instruction is set by MAX_OP_PER_INSTR in exec-all.h -- | ||
1801 | - you cannot exceed this without risking a buffer overrun. | ||
1802 | - | ||
1803 | -- Use the 'discard' instruction if you know that TCG won't be able to | ||
1804 | - prove that a given global is "dead" at a given program point. The | ||
1805 | - x86 guest uses it to improve the condition codes optimisation. | ||
1806 | -- | ||
1807 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Like CONFIG_TCG, the enabled method of execution is a host property | ||
2 | not a guest property. This exposes the define to compile-once files. | ||
1 | 3 | ||
4 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | meson.build | 4 +--- | ||
9 | 1 file changed, 1 insertion(+), 3 deletions(-) | ||
10 | |||
11 | diff --git a/meson.build b/meson.build | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/meson.build | ||
14 | +++ b/meson.build | ||
15 | @@ -XXX,XX +XXX,XX @@ if get_option('tcg').allowed() | ||
16 | endif | ||
17 | if get_option('tcg_interpreter') | ||
18 | tcg_arch = 'tci' | ||
19 | + config_host += { 'CONFIG_TCG_INTERPRETER': 'y' } | ||
20 | elif host_arch == 'x86_64' | ||
21 | tcg_arch = 'i386' | ||
22 | elif host_arch == 'ppc64' | ||
23 | @@ -XXX,XX +XXX,XX @@ foreach target : target_dirs | ||
24 | if sym == 'CONFIG_TCG' or target in accelerator_targets.get(sym, []) | ||
25 | config_target += { sym: 'y' } | ||
26 | config_all += { sym: 'y' } | ||
27 | - if sym == 'CONFIG_TCG' and tcg_arch == 'tci' | ||
28 | - config_target += { 'CONFIG_TCG_INTERPRETER': 'y' } | ||
29 | - endif | ||
30 | if target in modular_tcg | ||
31 | config_target += { 'CONFIG_TCG_MODULAR': 'y' } | ||
32 | else | ||
33 | -- | ||
34 | 2.34.1 | ||
35 | |||
36 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
1 | 2 | ||
3 | We are going to modify this code, so fix its style first to avoid: | ||
4 | |||
5 | ERROR: spaces required around that '*' (ctx:VxV) | ||
6 | #281: FILE: tcg/s390x/tcg-target.c.inc:1224: | ||
7 | + uintptr_t mask = ~(0xffffull << i*16); | ||
8 | ^ | ||
9 | |||
10 | Reviewed-by: Wilfred Mallawa <wilfred.mallawa@wdc.com> | ||
11 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
12 | Message-Id: <20221130132654.76369-2-philmd@linaro.org> | ||
13 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | --- | ||
15 | tcg/s390x/tcg-target.c.inc | 20 ++++++++++---------- | ||
16 | 1 file changed, 10 insertions(+), 10 deletions(-) | ||
17 | |||
18 | diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/tcg/s390x/tcg-target.c.inc | ||
21 | +++ b/tcg/s390x/tcg-target.c.inc | ||
22 | @@ -XXX,XX +XXX,XX @@ static bool maybe_out_small_movi(TCGContext *s, TCGType type, | ||
23 | } | ||
24 | |||
25 | for (i = 0; i < 4; i++) { | ||
26 | - tcg_target_long mask = 0xffffull << i*16; | ||
27 | + tcg_target_long mask = 0xffffull << i * 16; | ||
28 | if ((uval & mask) == uval) { | ||
29 | - tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16); | ||
30 | + tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i * 16); | ||
31 | return true; | ||
32 | } | ||
33 | } | ||
34 | @@ -XXX,XX +XXX,XX @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) | ||
35 | |||
36 | /* Try all 32-bit insns that can perform it in one go. */ | ||
37 | for (i = 0; i < 4; i++) { | ||
38 | - tcg_target_ulong mask = ~(0xffffull << i*16); | ||
39 | + tcg_target_ulong mask = ~(0xffffull << i * 16); | ||
40 | if (((val | ~valid) & mask) == mask) { | ||
41 | - tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16); | ||
42 | + tcg_out_insn_RI(s, ni_insns[i], dest, val >> i * 16); | ||
43 | return; | ||
44 | } | ||
45 | } | ||
46 | @@ -XXX,XX +XXX,XX @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) | ||
47 | /* Try all 48-bit insns that can perform it in one go. */ | ||
48 | if (HAVE_FACILITY(EXT_IMM)) { | ||
49 | for (i = 0; i < 2; i++) { | ||
50 | - tcg_target_ulong mask = ~(0xffffffffull << i*32); | ||
51 | + tcg_target_ulong mask = ~(0xffffffffull << i * 32); | ||
52 | if (((val | ~valid) & mask) == mask) { | ||
53 | - tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); | ||
54 | + tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i * 32); | ||
55 | return; | ||
56 | } | ||
57 | } | ||
58 | @@ -XXX,XX +XXX,XX @@ static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) | ||
59 | |||
60 | /* Try all 32-bit insns that can perform it in one go. */ | ||
61 | for (i = 0; i < 4; i++) { | ||
62 | - tcg_target_ulong mask = (0xffffull << i*16); | ||
63 | + tcg_target_ulong mask = (0xffffull << i * 16); | ||
64 | if ((val & mask) != 0 && (val & ~mask) == 0) { | ||
65 | - tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); | ||
66 | + tcg_out_insn_RI(s, oi_insns[i], dest, val >> i * 16); | ||
67 | return; | ||
68 | } | ||
69 | } | ||
70 | @@ -XXX,XX +XXX,XX @@ static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) | ||
71 | /* Try all 48-bit insns that can perform it in one go. */ | ||
72 | if (HAVE_FACILITY(EXT_IMM)) { | ||
73 | for (i = 0; i < 2; i++) { | ||
74 | - tcg_target_ulong mask = (0xffffffffull << i*32); | ||
75 | + tcg_target_ulong mask = (0xffffffffull << i * 32); | ||
76 | if ((val & mask) != 0 && (val & ~mask) == 0) { | ||
77 | - tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i*32); | ||
78 | + tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i * 32); | ||
79 | return; | ||
80 | } | ||
81 | } | ||
82 | -- | ||
83 | 2.34.1 | ||
84 | |||
85 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Remove whitespace at end of line, plus one place this also | ||
2 | highlights some missing braces. | ||
1 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | tcg/tcg.c | 33 +++++++++++++++++---------------- | ||
8 | tcg/ppc/tcg-target.c.inc | 2 +- | ||
9 | 2 files changed, 18 insertions(+), 17 deletions(-) | ||
10 | |||
11 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/tcg/tcg.c | ||
14 | +++ b/tcg/tcg.c | ||
15 | @@ -XXX,XX +XXX,XX @@ void *tcg_malloc_internal(TCGContext *s, int size) | ||
16 | { | ||
17 | TCGPool *p; | ||
18 | int pool_size; | ||
19 | - | ||
20 | + | ||
21 | if (size > TCG_POOL_CHUNK_SIZE) { | ||
22 | /* big malloc: insert a new pool (XXX: could optimize) */ | ||
23 | p = g_malloc(sizeof(TCGPool) + size); | ||
24 | @@ -XXX,XX +XXX,XX @@ void *tcg_malloc_internal(TCGContext *s, int size) | ||
25 | p = g_malloc(sizeof(TCGPool) + pool_size); | ||
26 | p->size = pool_size; | ||
27 | p->next = NULL; | ||
28 | - if (s->pool_current) | ||
29 | + if (s->pool_current) { | ||
30 | s->pool_current->next = p; | ||
31 | - else | ||
32 | + } else { | ||
33 | s->pool_first = p; | ||
34 | + } | ||
35 | } else { | ||
36 | p = p->next; | ||
37 | } | ||
38 | @@ -XXX,XX +XXX,XX @@ static void dump_regs(TCGContext *s) | ||
39 | |||
40 | for(i = 0; i < TCG_TARGET_NB_REGS; i++) { | ||
41 | if (s->reg_to_temp[i] != NULL) { | ||
42 | - printf("%s: %s\n", | ||
43 | - tcg_target_reg_names[i], | ||
44 | + printf("%s: %s\n", | ||
45 | + tcg_target_reg_names[i], | ||
46 | tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); | ||
47 | } | ||
48 | } | ||
49 | @@ -XXX,XX +XXX,XX @@ static void check_regs(TCGContext *s) | ||
50 | ts = s->reg_to_temp[reg]; | ||
51 | if (ts != NULL) { | ||
52 | if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { | ||
53 | - printf("Inconsistency for register %s:\n", | ||
54 | + printf("Inconsistency for register %s:\n", | ||
55 | tcg_target_reg_names[reg]); | ||
56 | goto fail; | ||
57 | } | ||
58 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
59 | nb_iargs = def->nb_iargs; | ||
60 | |||
61 | /* copy constants */ | ||
62 | - memcpy(new_args + nb_oargs + nb_iargs, | ||
63 | + memcpy(new_args + nb_oargs + nb_iargs, | ||
64 | op->args + nb_oargs + nb_iargs, | ||
65 | sizeof(TCGArg) * def->nb_cargs); | ||
66 | |||
67 | i_allocated_regs = s->reserved_regs; | ||
68 | o_allocated_regs = s->reserved_regs; | ||
69 | |||
70 | - /* satisfy input constraints */ | ||
71 | + /* satisfy input constraints */ | ||
72 | for (k = 0; k < nb_iargs; k++) { | ||
73 | TCGRegSet i_preferred_regs, o_preferred_regs; | ||
74 | |||
75 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
76 | const_args[i] = 0; | ||
77 | tcg_regset_set_reg(i_allocated_regs, reg); | ||
78 | } | ||
79 | - | ||
80 | + | ||
81 | /* mark dead temporaries and free the associated registers */ | ||
82 | for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { | ||
83 | if (IS_DEAD_ARG(i)) { | ||
84 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
85 | tcg_reg_alloc_bb_end(s, i_allocated_regs); | ||
86 | } else { | ||
87 | if (def->flags & TCG_OPF_CALL_CLOBBER) { | ||
88 | - /* XXX: permit generic clobber register list ? */ | ||
89 | + /* XXX: permit generic clobber register list ? */ | ||
90 | for (i = 0; i < TCG_TARGET_NB_REGS; i++) { | ||
91 | if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { | ||
92 | tcg_reg_free(s, i, i_allocated_regs); | ||
93 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
94 | an exception. */ | ||
95 | sync_globals(s, i_allocated_regs); | ||
96 | } | ||
97 | - | ||
98 | + | ||
99 | /* satisfy the output constraints */ | ||
100 | for(k = 0; k < nb_oargs; k++) { | ||
101 | i = def->args_ct[k].sort_index; | ||
102 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
103 | |||
104 | /* assign stack slots first */ | ||
105 | call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); | ||
106 | - call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & | ||
107 | + call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & | ||
108 | ~(TCG_TARGET_STACK_ALIGN - 1); | ||
109 | allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); | ||
110 | if (allocate_args) { | ||
111 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
112 | stack_offset += sizeof(tcg_target_long); | ||
113 | #endif | ||
114 | } | ||
115 | - | ||
116 | + | ||
117 | /* assign input registers */ | ||
118 | allocated_regs = s->reserved_regs; | ||
119 | for (i = 0; i < nb_regs; i++) { | ||
120 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
121 | tcg_regset_set_reg(allocated_regs, reg); | ||
122 | } | ||
123 | } | ||
124 | - | ||
125 | + | ||
126 | /* mark dead temporaries and free the associated registers */ | ||
127 | for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { | ||
128 | if (IS_DEAD_ARG(i)) { | ||
129 | temp_dead(s, arg_temp(op->args[i])); | ||
130 | } | ||
131 | } | ||
132 | - | ||
133 | + | ||
134 | /* clobber call registers */ | ||
135 | for (i = 0; i < TCG_TARGET_NB_REGS; i++) { | ||
136 | if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { | ||
137 | @@ -XXX,XX +XXX,XX @@ void tcg_dump_info(GString *buf) | ||
138 | (double)s->code_out_len / tb_div_count); | ||
139 | g_string_append_printf(buf, "avg search data/TB %0.1f\n", | ||
140 | (double)s->search_out_len / tb_div_count); | ||
141 | - | ||
142 | + | ||
143 | g_string_append_printf(buf, "cycles/op %0.1f\n", | ||
144 | s->op_count ? (double)tot / s->op_count : 0); | ||
145 | g_string_append_printf(buf, "cycles/in byte %0.1f\n", | ||
146 | diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc | ||
147 | index XXXXXXX..XXXXXXX 100644 | ||
148 | --- a/tcg/ppc/tcg-target.c.inc | ||
149 | +++ b/tcg/ppc/tcg-target.c.inc | ||
150 | @@ -XXX,XX +XXX,XX @@ | ||
151 | # else | ||
152 | # error "Unknown ABI" | ||
153 | # endif | ||
154 | -#endif | ||
155 | +#endif | ||
156 | |||
157 | #ifdef _CALL_SYSV | ||
158 | # define TCG_TARGET_CALL_ALIGN_ARGS 1 | ||
159 | -- | ||
160 | 2.34.1 | ||
161 | |||
162 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Create a wrapper for locking/unlocking the iothread lock. | ||
1 | 2 | ||
3 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | include/qemu/main-loop.h | 29 +++++++++++++++++++++++++++++ | ||
7 | 1 file changed, 29 insertions(+) | ||
8 | |||
9 | diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/include/qemu/main-loop.h | ||
12 | +++ b/include/qemu/main-loop.h | ||
13 | @@ -XXX,XX +XXX,XX @@ void qemu_mutex_lock_iothread_impl(const char *file, int line); | ||
14 | */ | ||
15 | void qemu_mutex_unlock_iothread(void); | ||
16 | |||
17 | +/** | ||
18 | + * QEMU_IOTHREAD_LOCK_GUARD | ||
19 | + * | ||
20 | + * Wrap a block of code in a conditional qemu_mutex_{lock,unlock}_iothread. | ||
21 | + */ | ||
22 | +typedef struct IOThreadLockAuto IOThreadLockAuto; | ||
23 | + | ||
24 | +static inline IOThreadLockAuto *qemu_iothread_auto_lock(const char *file, | ||
25 | + int line) | ||
26 | +{ | ||
27 | + if (qemu_mutex_iothread_locked()) { | ||
28 | + return NULL; | ||
29 | + } | ||
30 | + qemu_mutex_lock_iothread_impl(file, line); | ||
31 | + /* Anything non-NULL causes the cleanup function to be called */ | ||
32 | + return (IOThreadLockAuto *)(uintptr_t)1; | ||
33 | +} | ||
34 | + | ||
35 | +static inline void qemu_iothread_auto_unlock(IOThreadLockAuto *l) | ||
36 | +{ | ||
37 | + qemu_mutex_unlock_iothread(); | ||
38 | +} | ||
39 | + | ||
40 | +G_DEFINE_AUTOPTR_CLEANUP_FUNC(IOThreadLockAuto, qemu_iothread_auto_unlock) | ||
41 | + | ||
42 | +#define QEMU_IOTHREAD_LOCK_GUARD() \ | ||
43 | + g_autoptr(IOThreadLockAuto) _iothread_lock_auto __attribute__((unused)) \ | ||
44 | + = qemu_iothread_auto_lock(__FILE__, __LINE__) | ||
45 | + | ||
46 | /* | ||
47 | * qemu_cond_wait_iothread: Wait on condition for the main loop mutex | ||
48 | * | ||
49 | -- | ||
50 | 2.34.1 | ||
51 | |||
52 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | --- | ||
4 | hw/mips/mips_int.c | 11 +---------- | ||
5 | 1 file changed, 1 insertion(+), 10 deletions(-) | ||
1 | 6 | ||
7 | diff --git a/hw/mips/mips_int.c b/hw/mips/mips_int.c | ||
8 | index XXXXXXX..XXXXXXX 100644 | ||
9 | --- a/hw/mips/mips_int.c | ||
10 | +++ b/hw/mips/mips_int.c | ||
11 | @@ -XXX,XX +XXX,XX @@ static void cpu_mips_irq_request(void *opaque, int irq, int level) | ||
12 | MIPSCPU *cpu = opaque; | ||
13 | CPUMIPSState *env = &cpu->env; | ||
14 | CPUState *cs = CPU(cpu); | ||
15 | - bool locked = false; | ||
16 | |||
17 | if (irq < 0 || irq > 7) { | ||
18 | return; | ||
19 | } | ||
20 | |||
21 | - /* Make sure locking works even if BQL is already held by the caller */ | ||
22 | - if (!qemu_mutex_iothread_locked()) { | ||
23 | - locked = true; | ||
24 | - qemu_mutex_lock_iothread(); | ||
25 | - } | ||
26 | + QEMU_IOTHREAD_LOCK_GUARD(); | ||
27 | |||
28 | if (level) { | ||
29 | env->CP0_Cause |= 1 << (irq + CP0Ca_IP); | ||
30 | @@ -XXX,XX +XXX,XX @@ static void cpu_mips_irq_request(void *opaque, int irq, int level) | ||
31 | } else { | ||
32 | cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); | ||
33 | } | ||
34 | - | ||
35 | - if (locked) { | ||
36 | - qemu_mutex_unlock_iothread(); | ||
37 | - } | ||
38 | } | ||
39 | |||
40 | void cpu_mips_irq_init_cpu(MIPSCPU *cpu) | ||
41 | -- | ||
42 | 2.34.1 | ||
43 | |||
44 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
2 | Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com> | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | --- | ||
5 | target/ppc/excp_helper.c | 11 +---------- | ||
6 | 1 file changed, 1 insertion(+), 10 deletions(-) | ||
1 | 7 | ||
8 | diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c | ||
9 | index XXXXXXX..XXXXXXX 100644 | ||
10 | --- a/target/ppc/excp_helper.c | ||
11 | +++ b/target/ppc/excp_helper.c | ||
12 | @@ -XXX,XX +XXX,XX @@ static int ppc_next_unmasked_interrupt(CPUPPCState *env) | ||
13 | void ppc_maybe_interrupt(CPUPPCState *env) | ||
14 | { | ||
15 | CPUState *cs = env_cpu(env); | ||
16 | - bool locked = false; | ||
17 | - | ||
18 | - if (!qemu_mutex_iothread_locked()) { | ||
19 | - locked = true; | ||
20 | - qemu_mutex_lock_iothread(); | ||
21 | - } | ||
22 | + QEMU_IOTHREAD_LOCK_GUARD(); | ||
23 | |||
24 | if (ppc_next_unmasked_interrupt(env)) { | ||
25 | cpu_interrupt(cs, CPU_INTERRUPT_HARD); | ||
26 | } else { | ||
27 | cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); | ||
28 | } | ||
29 | - | ||
30 | - if (locked) { | ||
31 | - qemu_mutex_unlock_iothread(); | ||
32 | - } | ||
33 | } | ||
34 | |||
35 | #if defined(TARGET_PPC64) | ||
36 | -- | ||
37 | 2.34.1 | ||
38 | |||
39 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | In addition, use tcg_enabled instead of !kvm_enabled. | ||
1 | 2 | ||
3 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
4 | Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | target/ppc/helper_regs.c | 14 ++++---------- | ||
8 | 1 file changed, 4 insertions(+), 10 deletions(-) | ||
9 | |||
10 | diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/target/ppc/helper_regs.c | ||
13 | +++ b/target/ppc/helper_regs.c | ||
14 | @@ -XXX,XX +XXX,XX @@ | ||
15 | #include "qemu/main-loop.h" | ||
16 | #include "exec/exec-all.h" | ||
17 | #include "sysemu/kvm.h" | ||
18 | +#include "sysemu/tcg.h" | ||
19 | #include "helper_regs.h" | ||
20 | #include "power8-pmu.h" | ||
21 | #include "cpu-models.h" | ||
22 | @@ -XXX,XX +XXX,XX @@ void cpu_interrupt_exittb(CPUState *cs) | ||
23 | { | ||
24 | /* | ||
25 | * We don't need to worry about translation blocks | ||
26 | - * when running with KVM. | ||
27 | + * unless running with TCG. | ||
28 | */ | ||
29 | - if (kvm_enabled()) { | ||
30 | - return; | ||
31 | - } | ||
32 | - | ||
33 | - if (!qemu_mutex_iothread_locked()) { | ||
34 | - qemu_mutex_lock_iothread(); | ||
35 | - cpu_interrupt(cs, CPU_INTERRUPT_EXITTB); | ||
36 | - qemu_mutex_unlock_iothread(); | ||
37 | - } else { | ||
38 | + if (tcg_enabled()) { | ||
39 | + QEMU_IOTHREAD_LOCK_GUARD(); | ||
40 | cpu_interrupt(cs, CPU_INTERRUPT_EXITTB); | ||
41 | } | ||
42 | } | ||
43 | -- | ||
44 | 2.34.1 | ||
45 | |||
46 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
2 | Reviewed-by: Alistair Francis <alistair.francis@wdc.com> | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | --- | ||
5 | target/riscv/cpu_helper.c | 10 +--------- | ||
6 | 1 file changed, 1 insertion(+), 9 deletions(-) | ||
1 | 7 | ||
8 | diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c | ||
9 | index XXXXXXX..XXXXXXX 100644 | ||
10 | --- a/target/riscv/cpu_helper.c | ||
11 | +++ b/target/riscv/cpu_helper.c | ||
12 | @@ -XXX,XX +XXX,XX @@ uint64_t riscv_cpu_update_mip(RISCVCPU *cpu, uint64_t mask, uint64_t value) | ||
13 | CPURISCVState *env = &cpu->env; | ||
14 | CPUState *cs = CPU(cpu); | ||
15 | uint64_t gein, vsgein = 0, vstip = 0, old = env->mip; | ||
16 | - bool locked = false; | ||
17 | |||
18 | if (riscv_cpu_virt_enabled(env)) { | ||
19 | gein = get_field(env->hstatus, HSTATUS_VGEIN); | ||
20 | @@ -XXX,XX +XXX,XX @@ uint64_t riscv_cpu_update_mip(RISCVCPU *cpu, uint64_t mask, uint64_t value) | ||
21 | mask = ((mask == MIP_VSTIP) && env->vstime_irq) ? 0 : mask; | ||
22 | vstip = env->vstime_irq ? MIP_VSTIP : 0; | ||
23 | |||
24 | - if (!qemu_mutex_iothread_locked()) { | ||
25 | - locked = true; | ||
26 | - qemu_mutex_lock_iothread(); | ||
27 | - } | ||
28 | + QEMU_IOTHREAD_LOCK_GUARD(); | ||
29 | |||
30 | env->mip = (env->mip & ~mask) | (value & mask); | ||
31 | |||
32 | @@ -XXX,XX +XXX,XX @@ uint64_t riscv_cpu_update_mip(RISCVCPU *cpu, uint64_t mask, uint64_t value) | ||
33 | cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); | ||
34 | } | ||
35 | |||
36 | - if (locked) { | ||
37 | - qemu_mutex_unlock_iothread(); | ||
38 | - } | ||
39 | - | ||
40 | return old; | ||
41 | } | ||
42 | |||
43 | -- | ||
44 | 2.34.1 | ||
45 | |||
46 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
2 | Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com> | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | --- | ||
5 | hw/ppc/ppc.c | 10 +--------- | ||
6 | 1 file changed, 1 insertion(+), 9 deletions(-) | ||
1 | 7 | ||
8 | diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c | ||
9 | index XXXXXXX..XXXXXXX 100644 | ||
10 | --- a/hw/ppc/ppc.c | ||
11 | +++ b/hw/ppc/ppc.c | ||
12 | @@ -XXX,XX +XXX,XX @@ void ppc_set_irq(PowerPCCPU *cpu, int irq, int level) | ||
13 | { | ||
14 | CPUPPCState *env = &cpu->env; | ||
15 | unsigned int old_pending; | ||
16 | - bool locked = false; | ||
17 | |||
18 | /* We may already have the BQL if coming from the reset path */ | ||
19 | - if (!qemu_mutex_iothread_locked()) { | ||
20 | - locked = true; | ||
21 | - qemu_mutex_lock_iothread(); | ||
22 | - } | ||
23 | + QEMU_IOTHREAD_LOCK_GUARD(); | ||
24 | |||
25 | old_pending = env->pending_interrupts; | ||
26 | |||
27 | @@ -XXX,XX +XXX,XX @@ void ppc_set_irq(PowerPCCPU *cpu, int irq, int level) | ||
28 | |||
29 | trace_ppc_irq_set_exit(env, irq, level, env->pending_interrupts, | ||
30 | CPU(cpu)->interrupt_request); | ||
31 | - | ||
32 | - if (locked) { | ||
33 | - qemu_mutex_unlock_iothread(); | ||
34 | - } | ||
35 | } | ||
36 | |||
37 | /* PowerPC 6xx / 7xx internal IRQ controller */ | ||
38 | -- | ||
39 | 2.34.1 | ||
40 | |||
41 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Narrow the scope of the lock to the actual read/write, | ||
2 | moving the cpu_transation_failed call outside the lock. | ||
1 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | accel/tcg/cputlb.c | 25 ++++++++----------------- | ||
8 | 1 file changed, 8 insertions(+), 17 deletions(-) | ||
9 | |||
10 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/accel/tcg/cputlb.c | ||
13 | +++ b/accel/tcg/cputlb.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, | ||
15 | MemoryRegionSection *section; | ||
16 | MemoryRegion *mr; | ||
17 | uint64_t val; | ||
18 | - bool locked = false; | ||
19 | MemTxResult r; | ||
20 | |||
21 | section = iotlb_to_section(cpu, full->xlat_section, full->attrs); | ||
22 | @@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, | ||
23 | cpu_io_recompile(cpu, retaddr); | ||
24 | } | ||
25 | |||
26 | - if (!qemu_mutex_iothread_locked()) { | ||
27 | - qemu_mutex_lock_iothread(); | ||
28 | - locked = true; | ||
29 | + { | ||
30 | + QEMU_IOTHREAD_LOCK_GUARD(); | ||
31 | + r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs); | ||
32 | } | ||
33 | - r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs); | ||
34 | + | ||
35 | if (r != MEMTX_OK) { | ||
36 | hwaddr physaddr = mr_offset + | ||
37 | section->offset_within_address_space - | ||
38 | @@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, | ||
39 | cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, | ||
40 | mmu_idx, full->attrs, r, retaddr); | ||
41 | } | ||
42 | - if (locked) { | ||
43 | - qemu_mutex_unlock_iothread(); | ||
44 | - } | ||
45 | - | ||
46 | return val; | ||
47 | } | ||
48 | |||
49 | @@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, | ||
50 | hwaddr mr_offset; | ||
51 | MemoryRegionSection *section; | ||
52 | MemoryRegion *mr; | ||
53 | - bool locked = false; | ||
54 | MemTxResult r; | ||
55 | |||
56 | section = iotlb_to_section(cpu, full->xlat_section, full->attrs); | ||
57 | @@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, | ||
58 | */ | ||
59 | save_iotlb_data(cpu, section, mr_offset); | ||
60 | |||
61 | - if (!qemu_mutex_iothread_locked()) { | ||
62 | - qemu_mutex_lock_iothread(); | ||
63 | - locked = true; | ||
64 | + { | ||
65 | + QEMU_IOTHREAD_LOCK_GUARD(); | ||
66 | + r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs); | ||
67 | } | ||
68 | - r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs); | ||
69 | + | ||
70 | if (r != MEMTX_OK) { | ||
71 | hwaddr physaddr = mr_offset + | ||
72 | section->offset_within_address_space - | ||
73 | @@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, | ||
74 | MMU_DATA_STORE, mmu_idx, full->attrs, r, | ||
75 | retaddr); | ||
76 | } | ||
77 | - if (locked) { | ||
78 | - qemu_mutex_unlock_iothread(); | ||
79 | - } | ||
80 | } | ||
81 | |||
82 | static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) | ||
83 | -- | ||
84 | 2.34.1 | ||
85 | |||
86 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Replace goto allocate_in_reg with a boolean. | ||
2 | Remove o_preferred_regs which isn't used, except to copy. | ||
1 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | tcg/tcg.c | 45 +++++++++++++++++++++------------------------ | ||
8 | 1 file changed, 21 insertions(+), 24 deletions(-) | ||
9 | |||
10 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/tcg.c | ||
13 | +++ b/tcg/tcg.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
15 | |||
16 | /* satisfy input constraints */ | ||
17 | for (k = 0; k < nb_iargs; k++) { | ||
18 | - TCGRegSet i_preferred_regs, o_preferred_regs; | ||
19 | + TCGRegSet i_preferred_regs; | ||
20 | + bool allocate_new_reg; | ||
21 | |||
22 | i = def->args_ct[nb_oargs + k].sort_index; | ||
23 | arg = op->args[i]; | ||
24 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
25 | continue; | ||
26 | } | ||
27 | |||
28 | - i_preferred_regs = o_preferred_regs = 0; | ||
29 | + reg = ts->reg; | ||
30 | + i_preferred_regs = 0; | ||
31 | + allocate_new_reg = false; | ||
32 | + | ||
33 | if (arg_ct->ialias) { | ||
34 | - o_preferred_regs = op->output_pref[arg_ct->alias_index]; | ||
35 | + i_preferred_regs = op->output_pref[arg_ct->alias_index]; | ||
36 | |||
37 | /* | ||
38 | * If the input is readonly, then it cannot also be an | ||
39 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
40 | * register and move it. | ||
41 | */ | ||
42 | if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { | ||
43 | - goto allocate_in_reg; | ||
44 | + allocate_new_reg = true; | ||
45 | + } else if (ts->val_type == TEMP_VAL_REG) { | ||
46 | + /* | ||
47 | + * Check if the current register has already been | ||
48 | + * allocated for another input. | ||
49 | + */ | ||
50 | + allocate_new_reg = tcg_regset_test_reg(i_allocated_regs, reg); | ||
51 | } | ||
52 | - | ||
53 | - /* | ||
54 | - * Check if the current register has already been allocated | ||
55 | - * for another input aliased to an output. | ||
56 | - */ | ||
57 | - if (ts->val_type == TEMP_VAL_REG) { | ||
58 | - reg = ts->reg; | ||
59 | - for (int k2 = 0; k2 < k; k2++) { | ||
60 | - int i2 = def->args_ct[nb_oargs + k2].sort_index; | ||
61 | - if (def->args_ct[i2].ialias && reg == new_args[i2]) { | ||
62 | - goto allocate_in_reg; | ||
63 | - } | ||
64 | - } | ||
65 | - } | ||
66 | - i_preferred_regs = o_preferred_regs; | ||
67 | } | ||
68 | |||
69 | - temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); | ||
70 | - reg = ts->reg; | ||
71 | + if (!allocate_new_reg) { | ||
72 | + temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); | ||
73 | + reg = ts->reg; | ||
74 | + allocate_new_reg = !tcg_regset_test_reg(arg_ct->regs, reg); | ||
75 | + } | ||
76 | |||
77 | - if (!tcg_regset_test_reg(arg_ct->regs, reg)) { | ||
78 | - allocate_in_reg: | ||
79 | + if (allocate_new_reg) { | ||
80 | /* | ||
81 | * Allocate a new register matching the constraint | ||
82 | * and move the temporary register into it. | ||
83 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
84 | temp_load(s, ts, tcg_target_available_regs[ts->type], | ||
85 | i_allocated_regs, 0); | ||
86 | reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs, | ||
87 | - o_preferred_regs, ts->indirect_base); | ||
88 | + i_preferred_regs, ts->indirect_base); | ||
89 | if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { | ||
90 | /* | ||
91 | * Cross register class move not supported. Sync the | ||
92 | -- | ||
93 | 2.34.1 | ||
94 | |||
95 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The hppa host code has been removed since 2013; this | ||
2 | should have been deleted at the same time. | ||
1 | 3 | ||
4 | Fixes: 802b5081233a ("tcg-hppa: Remove tcg backend") | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/aarch64/tcg-target.h | 1 - | ||
9 | tcg/arm/tcg-target.h | 1 - | ||
10 | tcg/tcg.c | 32 ++------------------------------ | ||
11 | 3 files changed, 2 insertions(+), 32 deletions(-) | ||
12 | |||
13 | diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tcg/aarch64/tcg-target.h | ||
16 | +++ b/tcg/aarch64/tcg-target.h | ||
17 | @@ -XXX,XX +XXX,XX @@ | ||
18 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | ||
19 | #define TCG_TARGET_TLB_DISPLACEMENT_BITS 24 | ||
20 | #define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) | ||
21 | -#undef TCG_TARGET_STACK_GROWSUP | ||
22 | |||
23 | typedef enum { | ||
24 | TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, | ||
25 | diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/tcg/arm/tcg-target.h | ||
28 | +++ b/tcg/arm/tcg-target.h | ||
29 | @@ -XXX,XX +XXX,XX @@ extern int arm_arch; | ||
30 | |||
31 | #define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7) | ||
32 | |||
33 | -#undef TCG_TARGET_STACK_GROWSUP | ||
34 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | ||
35 | #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 | ||
36 | #define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX | ||
37 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/tcg/tcg.c | ||
40 | +++ b/tcg/tcg.c | ||
41 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
42 | } | ||
43 | |||
44 | if (TCG_TARGET_REG_BITS < 64 && is_64bit) { | ||
45 | - /* | ||
46 | - * If stack grows up, then we will be placing successive | ||
47 | - * arguments at lower addresses, which means we need to | ||
48 | - * reverse the order compared to how we would normally | ||
49 | - * treat either big or little-endian. For those arguments | ||
50 | - * that will wind up in registers, this still works for | ||
51 | - * HPPA (the only current STACK_GROWSUP target) since the | ||
52 | - * argument registers are *also* allocated in decreasing | ||
53 | - * order. If another such target is added, this logic may | ||
54 | - * have to get more complicated to differentiate between | ||
55 | - * stack arguments and register arguments. | ||
56 | - */ | ||
57 | -#if HOST_BIG_ENDIAN != defined(TCG_TARGET_STACK_GROWSUP) | ||
58 | - op->args[pi++] = temp_arg(args[i] + 1); | ||
59 | - op->args[pi++] = temp_arg(args[i]); | ||
60 | -#else | ||
61 | - op->args[pi++] = temp_arg(args[i]); | ||
62 | - op->args[pi++] = temp_arg(args[i] + 1); | ||
63 | -#endif | ||
64 | + op->args[pi++] = temp_arg(args[i] + HOST_BIG_ENDIAN); | ||
65 | + op->args[pi++] = temp_arg(args[i] + !HOST_BIG_ENDIAN); | ||
66 | real_args += 2; | ||
67 | continue; | ||
68 | } | ||
69 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
70 | return true; | ||
71 | } | ||
72 | |||
73 | -#ifdef TCG_TARGET_STACK_GROWSUP | ||
74 | -#define STACK_DIR(x) (-(x)) | ||
75 | -#else | ||
76 | -#define STACK_DIR(x) (x) | ||
77 | -#endif | ||
78 | - | ||
79 | static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
80 | { | ||
81 | const int nb_oargs = TCGOP_CALLO(op); | ||
82 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
83 | stack_offset = TCG_TARGET_CALL_STACK_OFFSET; | ||
84 | for (i = nb_regs; i < nb_iargs; i++) { | ||
85 | arg = op->args[nb_oargs + i]; | ||
86 | -#ifdef TCG_TARGET_STACK_GROWSUP | ||
87 | - stack_offset -= sizeof(tcg_target_long); | ||
88 | -#endif | ||
89 | if (arg != TCG_CALL_DUMMY_ARG) { | ||
90 | ts = arg_temp(arg); | ||
91 | temp_load(s, ts, tcg_target_available_regs[ts->type], | ||
92 | s->reserved_regs, 0); | ||
93 | tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); | ||
94 | } | ||
95 | -#ifndef TCG_TARGET_STACK_GROWSUP | ||
96 | stack_offset += sizeof(tcg_target_long); | ||
97 | -#endif | ||
98 | } | ||
99 | |||
100 | /* assign input registers */ | ||
101 | -- | ||
102 | 2.34.1 | ||
103 | |||
104 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Unused since commit 7b7d8b2d9a ("tcg/tci: Use ffi for calls"). | ||
1 | 2 | ||
3 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | tcg/tci.c | 1 - | ||
7 | tcg/tci/tcg-target.c.inc | 4 ---- | ||
8 | 2 files changed, 5 deletions(-) | ||
9 | |||
10 | diff --git a/tcg/tci.c b/tcg/tci.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/tci.c | ||
13 | +++ b/tcg/tci.c | ||
14 | @@ -XXX,XX +XXX,XX @@ | ||
15 | */ | ||
16 | |||
17 | #include "qemu/osdep.h" | ||
18 | -#include "tcg/tcg.h" /* MAX_OPC_PARAM_IARGS */ | ||
19 | #include "exec/cpu_ldst.h" | ||
20 | #include "tcg/tcg-op.h" | ||
21 | #include "tcg/tcg-ldst.h" | ||
22 | diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/tcg/tci/tcg-target.c.inc | ||
25 | +++ b/tcg/tci/tcg-target.c.inc | ||
26 | @@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = { | ||
27 | TCG_REG_R0, | ||
28 | }; | ||
29 | |||
30 | -#if MAX_OPC_PARAM_IARGS != 7 | ||
31 | -# error Fix needed, number of supported input arguments changed! | ||
32 | -#endif | ||
33 | - | ||
34 | /* No call arguments via registers. All will be stored on the "stack". */ | ||
35 | static const int tcg_target_call_iarg_regs[] = { }; | ||
36 | |||
37 | -- | ||
38 | 2.34.1 | ||
39 | |||
40 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The assignment to mem_coherent should be done with any | ||
2 | modification, not simply with a newly allocated register. | ||
1 | 3 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | tcg/tcg.c | 4 ++-- | ||
7 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
8 | |||
9 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/tcg/tcg.c | ||
12 | +++ b/tcg/tcg.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) | ||
14 | ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, | ||
15 | op->output_pref[0], ots->indirect_base); | ||
16 | ots->val_type = TEMP_VAL_REG; | ||
17 | - ots->mem_coherent = 0; | ||
18 | s->reg_to_temp[ots->reg] = ots; | ||
19 | } | ||
20 | |||
21 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) | ||
22 | tcg_debug_assert(ok); | ||
23 | |||
24 | done: | ||
25 | + ots->mem_coherent = 0; | ||
26 | if (IS_DEAD_ARG(1)) { | ||
27 | temp_dead(s, its); | ||
28 | } | ||
29 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
30 | ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, | ||
31 | op->output_pref[0], ots->indirect_base); | ||
32 | ots->val_type = TEMP_VAL_REG; | ||
33 | - ots->mem_coherent = 0; | ||
34 | s->reg_to_temp[ots->reg] = ots; | ||
35 | } | ||
36 | |||
37 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
38 | return false; | ||
39 | |||
40 | done: | ||
41 | + ots->mem_coherent = 0; | ||
42 | if (IS_DEAD_ARG(1)) { | ||
43 | temp_dead(s, itsl); | ||
44 | } | ||
45 | -- | ||
46 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | Create two new functions, set_temp_val_{reg,nonreg}. | |
2 | Assert that the reg_to_temp mapping is correct before | ||
3 | any changes are made. | ||
4 | |||
5 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/tcg.c | 159 +++++++++++++++++++++++++++++------------------------- | ||
9 | 1 file changed, 85 insertions(+), 74 deletions(-) | ||
10 | |||
11 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/tcg/tcg.c | ||
14 | +++ b/tcg/tcg.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) | ||
16 | ts->mem_allocated = 1; | ||
17 | } | ||
18 | |||
19 | +/* Assign @reg to @ts, and update reg_to_temp[]. */ | ||
20 | +static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) | ||
21 | +{ | ||
22 | + if (ts->val_type == TEMP_VAL_REG) { | ||
23 | + TCGReg old = ts->reg; | ||
24 | + tcg_debug_assert(s->reg_to_temp[old] == ts); | ||
25 | + if (old == reg) { | ||
26 | + return; | ||
27 | + } | ||
28 | + s->reg_to_temp[old] = NULL; | ||
29 | + } | ||
30 | + tcg_debug_assert(s->reg_to_temp[reg] == NULL); | ||
31 | + s->reg_to_temp[reg] = ts; | ||
32 | + ts->val_type = TEMP_VAL_REG; | ||
33 | + ts->reg = reg; | ||
34 | +} | ||
35 | + | ||
36 | +/* Assign a non-register value type to @ts, and update reg_to_temp[]. */ | ||
37 | +static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) | ||
38 | +{ | ||
39 | + tcg_debug_assert(type != TEMP_VAL_REG); | ||
40 | + if (ts->val_type == TEMP_VAL_REG) { | ||
41 | + TCGReg reg = ts->reg; | ||
42 | + tcg_debug_assert(s->reg_to_temp[reg] == ts); | ||
43 | + s->reg_to_temp[reg] = NULL; | ||
44 | + } | ||
45 | + ts->val_type = type; | ||
46 | +} | ||
47 | + | ||
48 | static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); | ||
49 | |||
50 | /* Mark a temporary as free or dead. If 'free_or_dead' is negative, | ||
51 | @@ -XXX,XX +XXX,XX @@ static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) | ||
52 | default: | ||
53 | g_assert_not_reached(); | ||
54 | } | ||
55 | - if (ts->val_type == TEMP_VAL_REG) { | ||
56 | - s->reg_to_temp[ts->reg] = NULL; | ||
57 | - } | ||
58 | - ts->val_type = new_type; | ||
59 | + set_temp_val_nonreg(s, ts, new_type); | ||
60 | } | ||
61 | |||
62 | /* Mark a temporary as dead. */ | ||
63 | @@ -XXX,XX +XXX,XX @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, | ||
64 | default: | ||
65 | tcg_abort(); | ||
66 | } | ||
67 | - ts->reg = reg; | ||
68 | - ts->val_type = TEMP_VAL_REG; | ||
69 | - s->reg_to_temp[reg] = ts; | ||
70 | + set_temp_val_reg(s, ts, reg); | ||
71 | } | ||
72 | |||
73 | /* Save a temporary to memory. 'allocated_regs' is used in case a | ||
74 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, | ||
75 | tcg_debug_assert(!temp_readonly(ots)); | ||
76 | |||
77 | /* The movi is not explicitly generated here. */ | ||
78 | - if (ots->val_type == TEMP_VAL_REG) { | ||
79 | - s->reg_to_temp[ots->reg] = NULL; | ||
80 | - } | ||
81 | - ots->val_type = TEMP_VAL_CONST; | ||
82 | + set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); | ||
83 | ots->val = val; | ||
84 | ots->mem_coherent = 0; | ||
85 | if (NEED_SYNC_ARG(0)) { | ||
86 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) | ||
87 | TCGRegSet allocated_regs, preferred_regs; | ||
88 | TCGTemp *ts, *ots; | ||
89 | TCGType otype, itype; | ||
90 | + TCGReg oreg, ireg; | ||
91 | |||
92 | allocated_regs = s->reserved_regs; | ||
93 | preferred_regs = op->output_pref[0]; | ||
94 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) | ||
95 | temp_load(s, ts, tcg_target_available_regs[itype], | ||
96 | allocated_regs, preferred_regs); | ||
97 | } | ||
98 | - | ||
99 | tcg_debug_assert(ts->val_type == TEMP_VAL_REG); | ||
100 | + ireg = ts->reg; | ||
101 | + | ||
102 | if (IS_DEAD_ARG(0)) { | ||
103 | /* mov to a non-saved dead register makes no sense (even with | ||
104 | liveness analysis disabled). */ | ||
105 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) | ||
106 | if (!ots->mem_allocated) { | ||
107 | temp_allocate_frame(s, ots); | ||
108 | } | ||
109 | - tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); | ||
110 | + tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); | ||
111 | if (IS_DEAD_ARG(1)) { | ||
112 | temp_dead(s, ts); | ||
113 | } | ||
114 | temp_dead(s, ots); | ||
115 | + return; | ||
116 | + } | ||
117 | + | ||
118 | + if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { | ||
119 | + /* | ||
120 | + * The mov can be suppressed. Kill input first, so that it | ||
121 | + * is unlinked from reg_to_temp, then set the output to the | ||
122 | + * reg that we saved from the input. | ||
123 | + */ | ||
124 | + temp_dead(s, ts); | ||
125 | + oreg = ireg; | ||
126 | } else { | ||
127 | - if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { | ||
128 | - /* the mov can be suppressed */ | ||
129 | - if (ots->val_type == TEMP_VAL_REG) { | ||
130 | - s->reg_to_temp[ots->reg] = NULL; | ||
131 | - } | ||
132 | - ots->reg = ts->reg; | ||
133 | - temp_dead(s, ts); | ||
134 | + if (ots->val_type == TEMP_VAL_REG) { | ||
135 | + oreg = ots->reg; | ||
136 | } else { | ||
137 | - if (ots->val_type != TEMP_VAL_REG) { | ||
138 | - /* When allocating a new register, make sure to not spill the | ||
139 | - input one. */ | ||
140 | - tcg_regset_set_reg(allocated_regs, ts->reg); | ||
141 | - ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], | ||
142 | - allocated_regs, preferred_regs, | ||
143 | - ots->indirect_base); | ||
144 | - } | ||
145 | - if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) { | ||
146 | - /* | ||
147 | - * Cross register class move not supported. | ||
148 | - * Store the source register into the destination slot | ||
149 | - * and leave the destination temp as TEMP_VAL_MEM. | ||
150 | - */ | ||
151 | - assert(!temp_readonly(ots)); | ||
152 | - if (!ts->mem_allocated) { | ||
153 | - temp_allocate_frame(s, ots); | ||
154 | - } | ||
155 | - tcg_out_st(s, ts->type, ts->reg, | ||
156 | - ots->mem_base->reg, ots->mem_offset); | ||
157 | - ots->mem_coherent = 1; | ||
158 | - temp_free_or_dead(s, ots, -1); | ||
159 | - return; | ||
160 | - } | ||
161 | + /* Make sure to not spill the input register during allocation. */ | ||
162 | + oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], | ||
163 | + allocated_regs | ((TCGRegSet)1 << ireg), | ||
164 | + preferred_regs, ots->indirect_base); | ||
165 | } | ||
166 | - ots->val_type = TEMP_VAL_REG; | ||
167 | - ots->mem_coherent = 0; | ||
168 | - s->reg_to_temp[ots->reg] = ots; | ||
169 | - if (NEED_SYNC_ARG(0)) { | ||
170 | - temp_sync(s, ots, allocated_regs, 0, 0); | ||
171 | + if (!tcg_out_mov(s, otype, oreg, ireg)) { | ||
172 | + /* | ||
173 | + * Cross register class move not supported. | ||
174 | + * Store the source register into the destination slot | ||
175 | + * and leave the destination temp as TEMP_VAL_MEM. | ||
176 | + */ | ||
177 | + assert(!temp_readonly(ots)); | ||
178 | + if (!ts->mem_allocated) { | ||
179 | + temp_allocate_frame(s, ots); | ||
180 | + } | ||
181 | + tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); | ||
182 | + set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); | ||
183 | + ots->mem_coherent = 1; | ||
184 | + return; | ||
185 | } | ||
186 | } | ||
187 | + set_temp_val_reg(s, ots, oreg); | ||
188 | + ots->mem_coherent = 0; | ||
189 | + | ||
190 | + if (NEED_SYNC_ARG(0)) { | ||
191 | + temp_sync(s, ots, allocated_regs, 0, 0); | ||
192 | + } | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) | ||
197 | /* Allocate the output register now. */ | ||
198 | if (ots->val_type != TEMP_VAL_REG) { | ||
199 | TCGRegSet allocated_regs = s->reserved_regs; | ||
200 | + TCGReg oreg; | ||
201 | |||
202 | if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { | ||
203 | /* Make sure to not spill the input register. */ | ||
204 | tcg_regset_set_reg(allocated_regs, its->reg); | ||
205 | } | ||
206 | - ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, | ||
207 | - op->output_pref[0], ots->indirect_base); | ||
208 | - ots->val_type = TEMP_VAL_REG; | ||
209 | - s->reg_to_temp[ots->reg] = ots; | ||
210 | + oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, | ||
211 | + op->output_pref[0], ots->indirect_base); | ||
212 | + set_temp_val_reg(s, ots, oreg); | ||
213 | } | ||
214 | |||
215 | switch (its->val_type) { | ||
216 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) | ||
217 | #else | ||
218 | endian_fixup = 0; | ||
219 | #endif | ||
220 | + /* Attempt to dup directly from the input memory slot. */ | ||
221 | if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, | ||
222 | its->mem_offset + endian_fixup)) { | ||
223 | goto done; | ||
224 | } | ||
225 | + /* Load the input into the destination vector register. */ | ||
226 | tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); | ||
227 | break; | ||
228 | |||
229 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
230 | op->output_pref[k], ts->indirect_base); | ||
231 | } | ||
232 | tcg_regset_set_reg(o_allocated_regs, reg); | ||
233 | - if (ts->val_type == TEMP_VAL_REG) { | ||
234 | - s->reg_to_temp[ts->reg] = NULL; | ||
235 | - } | ||
236 | - ts->val_type = TEMP_VAL_REG; | ||
237 | - ts->reg = reg; | ||
238 | - /* | ||
239 | - * Temp value is modified, so the value kept in memory is | ||
240 | - * potentially not the same. | ||
241 | - */ | ||
242 | + set_temp_val_reg(s, ts, reg); | ||
243 | ts->mem_coherent = 0; | ||
244 | - s->reg_to_temp[reg] = ts; | ||
245 | new_args[i] = reg; | ||
246 | } | ||
247 | } | ||
248 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
249 | TCGRegSet allocated_regs = s->reserved_regs; | ||
250 | TCGRegSet dup_out_regs = | ||
251 | tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; | ||
252 | + TCGReg oreg; | ||
253 | |||
254 | /* Make sure to not spill the input registers. */ | ||
255 | if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { | ||
256 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
257 | tcg_regset_set_reg(allocated_regs, itsh->reg); | ||
258 | } | ||
259 | |||
260 | - ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, | ||
261 | - op->output_pref[0], ots->indirect_base); | ||
262 | - ots->val_type = TEMP_VAL_REG; | ||
263 | - s->reg_to_temp[ots->reg] = ots; | ||
264 | + oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, | ||
265 | + op->output_pref[0], ots->indirect_base); | ||
266 | + set_temp_val_reg(s, ots, oreg); | ||
267 | } | ||
268 | |||
269 | /* Promote dup2 of immediates to dupi_vec. */ | ||
270 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
271 | tcg_debug_assert(!temp_readonly(ts)); | ||
272 | |||
273 | reg = tcg_target_call_oarg_regs[i]; | ||
274 | - tcg_debug_assert(s->reg_to_temp[reg] == NULL); | ||
275 | - if (ts->val_type == TEMP_VAL_REG) { | ||
276 | - s->reg_to_temp[ts->reg] = NULL; | ||
277 | - } | ||
278 | - ts->val_type = TEMP_VAL_REG; | ||
279 | - ts->reg = reg; | ||
280 | + set_temp_val_reg(s, ts, reg); | ||
281 | ts->mem_coherent = 0; | ||
282 | - s->reg_to_temp[reg] = ts; | ||
283 | if (NEED_SYNC_ARG(i)) { | ||
284 | temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); | ||
285 | } else if (IS_DEAD_ARG(i)) { | ||
286 | -- | ||
287 | 2.34.1 | ||
288 | |||
289 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | We now check the consistency of reg_to_temp[] with each update, | ||
2 | so the utility of checking consistency at the end of each | ||
3 | opcode is minimal. In addition, the form of this check is | ||
4 | quite expensive, consuming 10% of a checking-enabled build. | ||
1 | 5 | ||
6 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | tcg/tcg.c | 76 ------------------------------------------------------- | ||
10 | 1 file changed, 76 deletions(-) | ||
11 | |||
12 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/tcg/tcg.c | ||
15 | +++ b/tcg/tcg.c | ||
16 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) | ||
17 | return changes; | ||
18 | } | ||
19 | |||
20 | -#ifdef CONFIG_DEBUG_TCG | ||
21 | -static void dump_regs(TCGContext *s) | ||
22 | -{ | ||
23 | - TCGTemp *ts; | ||
24 | - int i; | ||
25 | - char buf[64]; | ||
26 | - | ||
27 | - for(i = 0; i < s->nb_temps; i++) { | ||
28 | - ts = &s->temps[i]; | ||
29 | - printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); | ||
30 | - switch(ts->val_type) { | ||
31 | - case TEMP_VAL_REG: | ||
32 | - printf("%s", tcg_target_reg_names[ts->reg]); | ||
33 | - break; | ||
34 | - case TEMP_VAL_MEM: | ||
35 | - printf("%d(%s)", (int)ts->mem_offset, | ||
36 | - tcg_target_reg_names[ts->mem_base->reg]); | ||
37 | - break; | ||
38 | - case TEMP_VAL_CONST: | ||
39 | - printf("$0x%" PRIx64, ts->val); | ||
40 | - break; | ||
41 | - case TEMP_VAL_DEAD: | ||
42 | - printf("D"); | ||
43 | - break; | ||
44 | - default: | ||
45 | - printf("???"); | ||
46 | - break; | ||
47 | - } | ||
48 | - printf("\n"); | ||
49 | - } | ||
50 | - | ||
51 | - for(i = 0; i < TCG_TARGET_NB_REGS; i++) { | ||
52 | - if (s->reg_to_temp[i] != NULL) { | ||
53 | - printf("%s: %s\n", | ||
54 | - tcg_target_reg_names[i], | ||
55 | - tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); | ||
56 | - } | ||
57 | - } | ||
58 | -} | ||
59 | - | ||
60 | -static void check_regs(TCGContext *s) | ||
61 | -{ | ||
62 | - int reg; | ||
63 | - int k; | ||
64 | - TCGTemp *ts; | ||
65 | - char buf[64]; | ||
66 | - | ||
67 | - for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { | ||
68 | - ts = s->reg_to_temp[reg]; | ||
69 | - if (ts != NULL) { | ||
70 | - if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { | ||
71 | - printf("Inconsistency for register %s:\n", | ||
72 | - tcg_target_reg_names[reg]); | ||
73 | - goto fail; | ||
74 | - } | ||
75 | - } | ||
76 | - } | ||
77 | - for (k = 0; k < s->nb_temps; k++) { | ||
78 | - ts = &s->temps[k]; | ||
79 | - if (ts->val_type == TEMP_VAL_REG | ||
80 | - && ts->kind != TEMP_FIXED | ||
81 | - && s->reg_to_temp[ts->reg] != ts) { | ||
82 | - printf("Inconsistency for temp %s:\n", | ||
83 | - tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); | ||
84 | - fail: | ||
85 | - printf("reg state:\n"); | ||
86 | - dump_regs(s); | ||
87 | - tcg_abort(); | ||
88 | - } | ||
89 | - } | ||
90 | -} | ||
91 | -#endif | ||
92 | - | ||
93 | static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) | ||
94 | { | ||
95 | intptr_t off, size, align; | ||
96 | @@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) | ||
97 | tcg_reg_alloc_op(s, op); | ||
98 | break; | ||
99 | } | ||
100 | -#ifdef CONFIG_DEBUG_TCG | ||
101 | - check_regs(s); | ||
102 | -#endif | ||
103 | /* Test for (pending) buffer overflow. The assumption is that any | ||
104 | one operation beginning below the high water mark cannot overrun | ||
105 | the buffer completely. Thus we can test for overflow after | ||
106 | -- | ||
107 | 2.34.1 | ||
108 | |||
109 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
1 | 2 | ||
3 | In preparation of introducing paired registers, | ||
4 | massage a bit process_op_defs()'s switch case. | ||
5 | |||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | [PMD: Split from bigger patch, 1/3] | ||
8 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
9 | Message-Id: <20221219220925.79218-2-philmd@linaro.org> | ||
10 | --- | ||
11 | tcg/tcg.c | 61 +++++++++++++++++++++++++++++++------------------------ | ||
12 | 1 file changed, 34 insertions(+), 27 deletions(-) | ||
13 | |||
14 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/tcg/tcg.c | ||
17 | +++ b/tcg/tcg.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s) | ||
19 | for (op = 0; op < NB_OPS; op++) { | ||
20 | TCGOpDef *def = &tcg_op_defs[op]; | ||
21 | const TCGTargetOpDef *tdefs; | ||
22 | - int i, nb_args; | ||
23 | + int i, o, nb_args; | ||
24 | |||
25 | if (def->flags & TCG_OPF_NOT_PRESENT) { | ||
26 | continue; | ||
27 | @@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s) | ||
28 | |||
29 | for (i = 0; i < nb_args; i++) { | ||
30 | const char *ct_str = tdefs->args_ct_str[i]; | ||
31 | + bool input_p = i >= def->nb_oargs; | ||
32 | + | ||
33 | /* Incomplete TCGTargetOpDef entry. */ | ||
34 | tcg_debug_assert(ct_str != NULL); | ||
35 | |||
36 | - while (*ct_str != '\0') { | ||
37 | - switch(*ct_str) { | ||
38 | - case '0' ... '9': | ||
39 | - { | ||
40 | - int oarg = *ct_str - '0'; | ||
41 | - tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); | ||
42 | - tcg_debug_assert(oarg < def->nb_oargs); | ||
43 | - tcg_debug_assert(def->args_ct[oarg].regs != 0); | ||
44 | - def->args_ct[i] = def->args_ct[oarg]; | ||
45 | - /* The output sets oalias. */ | ||
46 | - def->args_ct[oarg].oalias = true; | ||
47 | - def->args_ct[oarg].alias_index = i; | ||
48 | - /* The input sets ialias. */ | ||
49 | - def->args_ct[i].ialias = true; | ||
50 | - def->args_ct[i].alias_index = oarg; | ||
51 | - } | ||
52 | - ct_str++; | ||
53 | - break; | ||
54 | - case '&': | ||
55 | - def->args_ct[i].newreg = true; | ||
56 | - ct_str++; | ||
57 | - break; | ||
58 | + switch (*ct_str) { | ||
59 | + case '0' ... '9': | ||
60 | + o = *ct_str - '0'; | ||
61 | + tcg_debug_assert(input_p); | ||
62 | + tcg_debug_assert(o < def->nb_oargs); | ||
63 | + tcg_debug_assert(def->args_ct[o].regs != 0); | ||
64 | + tcg_debug_assert(!def->args_ct[o].oalias); | ||
65 | + def->args_ct[i] = def->args_ct[o]; | ||
66 | + /* The output sets oalias. */ | ||
67 | + def->args_ct[o].oalias = 1; | ||
68 | + def->args_ct[o].alias_index = i; | ||
69 | + /* The input sets ialias. */ | ||
70 | + def->args_ct[i].ialias = 1; | ||
71 | + def->args_ct[i].alias_index = o; | ||
72 | + tcg_debug_assert(ct_str[1] == '\0'); | ||
73 | + continue; | ||
74 | + | ||
75 | + case '&': | ||
76 | + tcg_debug_assert(!input_p); | ||
77 | + def->args_ct[i].newreg = true; | ||
78 | + ct_str++; | ||
79 | + break; | ||
80 | + } | ||
81 | + | ||
82 | + do { | ||
83 | + switch (*ct_str) { | ||
84 | case 'i': | ||
85 | def->args_ct[i].ct |= TCG_CT_CONST; | ||
86 | - ct_str++; | ||
87 | break; | ||
88 | |||
89 | /* Include all of the target-specific constraints. */ | ||
90 | |||
91 | #undef CONST | ||
92 | #define CONST(CASE, MASK) \ | ||
93 | - case CASE: def->args_ct[i].ct |= MASK; ct_str++; break; | ||
94 | + case CASE: def->args_ct[i].ct |= MASK; break; | ||
95 | #define REGS(CASE, MASK) \ | ||
96 | - case CASE: def->args_ct[i].regs |= MASK; ct_str++; break; | ||
97 | + case CASE: def->args_ct[i].regs |= MASK; break; | ||
98 | |||
99 | #include "tcg-target-con-str.h" | ||
100 | |||
101 | #undef REGS | ||
102 | #undef CONST | ||
103 | default: | ||
104 | + case '0' ... '9': | ||
105 | + case '&': | ||
106 | /* Typo in TCGTargetOpDef constraint. */ | ||
107 | g_assert_not_reached(); | ||
108 | } | ||
109 | - } | ||
110 | + } while (*++ct_str != '\0'); | ||
111 | } | ||
112 | |||
113 | /* TCGTargetOpDef entry with too much information? */ | ||
114 | -- | ||
115 | 2.34.1 | ||
116 | |||
117 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | There are several instances where we need to be able to | ||
2 | allocate a pair of registers to related inputs/outputs. | ||
3 | Add 'p' and 'm' register constraints for this, in order to | ||
4 | be able to allocate the even/odd register first or second. | ||
1 | 5 | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | include/tcg/tcg.h | 2 + | ||
9 | tcg/tcg.c | 419 ++++++++++++++++++++++++++++++++++++++++------ | ||
10 | 2 files changed, 373 insertions(+), 48 deletions(-) | ||
11 | |||
12 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/include/tcg/tcg.h | ||
15 | +++ b/include/tcg/tcg.h | ||
16 | @@ -XXX,XX +XXX,XX @@ typedef struct TCGArgConstraint { | ||
17 | unsigned ct : 16; | ||
18 | unsigned alias_index : 4; | ||
19 | unsigned sort_index : 4; | ||
20 | + unsigned pair_index : 4; | ||
21 | + unsigned pair : 2; /* 0: none, 1: first, 2: second, 3: second alias */ | ||
22 | bool oalias : 1; | ||
23 | bool ialias : 1; | ||
24 | bool newreg : 1; | ||
25 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/tcg/tcg.c | ||
28 | +++ b/tcg/tcg.c | ||
29 | @@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) | ||
30 | static int get_constraint_priority(const TCGOpDef *def, int k) | ||
31 | { | ||
32 | const TCGArgConstraint *arg_ct = &def->args_ct[k]; | ||
33 | - int n; | ||
34 | + int n = ctpop64(arg_ct->regs); | ||
35 | |||
36 | - if (arg_ct->oalias) { | ||
37 | - /* an alias is equivalent to a single register */ | ||
38 | - n = 1; | ||
39 | - } else { | ||
40 | - n = ctpop64(arg_ct->regs); | ||
41 | + /* | ||
42 | + * Sort constraints of a single register first, which includes output | ||
43 | + * aliases (which must exactly match the input already allocated). | ||
44 | + */ | ||
45 | + if (n == 1 || arg_ct->oalias) { | ||
46 | + return INT_MAX; | ||
47 | } | ||
48 | - return TCG_TARGET_NB_REGS - n + 1; | ||
49 | + | ||
50 | + /* | ||
51 | + * Sort register pairs next, first then second immediately after. | ||
52 | + * Arbitrarily sort multiple pairs by the index of the first reg; | ||
53 | + * there shouldn't be many pairs. | ||
54 | + */ | ||
55 | + switch (arg_ct->pair) { | ||
56 | + case 1: | ||
57 | + case 3: | ||
58 | + return (k + 1) * 2; | ||
59 | + case 2: | ||
60 | + return (arg_ct->pair_index + 1) * 2 - 1; | ||
61 | + } | ||
62 | + | ||
63 | + /* Finally, sort by decreasing register count. */ | ||
64 | + assert(n > 1); | ||
65 | + return -n; | ||
66 | } | ||
67 | |||
68 | /* sort from highest priority to lowest */ | ||
69 | @@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s) | ||
70 | for (op = 0; op < NB_OPS; op++) { | ||
71 | TCGOpDef *def = &tcg_op_defs[op]; | ||
72 | const TCGTargetOpDef *tdefs; | ||
73 | - int i, o, nb_args; | ||
74 | + bool saw_alias_pair = false; | ||
75 | + int i, o, i2, o2, nb_args; | ||
76 | |||
77 | if (def->flags & TCG_OPF_NOT_PRESENT) { | ||
78 | continue; | ||
79 | @@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s) | ||
80 | /* The input sets ialias. */ | ||
81 | def->args_ct[i].ialias = 1; | ||
82 | def->args_ct[i].alias_index = o; | ||
83 | + if (def->args_ct[i].pair) { | ||
84 | + saw_alias_pair = true; | ||
85 | + } | ||
86 | tcg_debug_assert(ct_str[1] == '\0'); | ||
87 | continue; | ||
88 | |||
89 | @@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s) | ||
90 | def->args_ct[i].newreg = true; | ||
91 | ct_str++; | ||
92 | break; | ||
93 | + | ||
94 | + case 'p': /* plus */ | ||
95 | + /* Allocate to the register after the previous. */ | ||
96 | + tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); | ||
97 | + o = i - 1; | ||
98 | + tcg_debug_assert(!def->args_ct[o].pair); | ||
99 | + tcg_debug_assert(!def->args_ct[o].ct); | ||
100 | + def->args_ct[i] = (TCGArgConstraint){ | ||
101 | + .pair = 2, | ||
102 | + .pair_index = o, | ||
103 | + .regs = def->args_ct[o].regs << 1, | ||
104 | + }; | ||
105 | + def->args_ct[o].pair = 1; | ||
106 | + def->args_ct[o].pair_index = i; | ||
107 | + tcg_debug_assert(ct_str[1] == '\0'); | ||
108 | + continue; | ||
109 | + | ||
110 | + case 'm': /* minus */ | ||
111 | + /* Allocate to the register before the previous. */ | ||
112 | + tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); | ||
113 | + o = i - 1; | ||
114 | + tcg_debug_assert(!def->args_ct[o].pair); | ||
115 | + tcg_debug_assert(!def->args_ct[o].ct); | ||
116 | + def->args_ct[i] = (TCGArgConstraint){ | ||
117 | + .pair = 1, | ||
118 | + .pair_index = o, | ||
119 | + .regs = def->args_ct[o].regs >> 1, | ||
120 | + }; | ||
121 | + def->args_ct[o].pair = 2; | ||
122 | + def->args_ct[o].pair_index = i; | ||
123 | + tcg_debug_assert(ct_str[1] == '\0'); | ||
124 | + continue; | ||
125 | } | ||
126 | |||
127 | do { | ||
128 | @@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s) | ||
129 | default: | ||
130 | case '0' ... '9': | ||
131 | case '&': | ||
132 | + case 'p': | ||
133 | + case 'm': | ||
134 | /* Typo in TCGTargetOpDef constraint. */ | ||
135 | g_assert_not_reached(); | ||
136 | } | ||
137 | @@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s) | ||
138 | /* TCGTargetOpDef entry with too much information? */ | ||
139 | tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); | ||
140 | |||
141 | + /* | ||
142 | + * Fix up output pairs that are aliased with inputs. | ||
143 | + * When we created the alias, we copied pair from the output. | ||
144 | + * There are three cases: | ||
145 | + * (1a) Pairs of inputs alias pairs of outputs. | ||
146 | + * (1b) One input aliases the first of a pair of outputs. | ||
147 | + * (2) One input aliases the second of a pair of outputs. | ||
148 | + * | ||
149 | + * Case 1a is handled by making sure that the pair_index'es are | ||
150 | + * properly updated so that they appear the same as a pair of inputs. | ||
151 | + * | ||
152 | + * Case 1b is handled by setting the pair_index of the input to | ||
153 | + * itself, simply so it doesn't point to an unrelated argument. | ||
154 | + * Since we don't encounter the "second" during the input allocation | ||
155 | + * phase, nothing happens with the second half of the input pair. | ||
156 | + * | ||
157 | + * Case 2 is handled by setting the second input to pair=3, the | ||
158 | + * first output to pair=3, and the pair_index'es to match. | ||
159 | + */ | ||
160 | + if (saw_alias_pair) { | ||
161 | + for (i = def->nb_oargs; i < nb_args; i++) { | ||
162 | + /* | ||
163 | + * Since [0-9pm] must be alone in the constraint string, | ||
164 | + * the only way they can both be set is if the pair comes | ||
165 | + * from the output alias. | ||
166 | + */ | ||
167 | + if (!def->args_ct[i].ialias) { | ||
168 | + continue; | ||
169 | + } | ||
170 | + switch (def->args_ct[i].pair) { | ||
171 | + case 0: | ||
172 | + break; | ||
173 | + case 1: | ||
174 | + o = def->args_ct[i].alias_index; | ||
175 | + o2 = def->args_ct[o].pair_index; | ||
176 | + tcg_debug_assert(def->args_ct[o].pair == 1); | ||
177 | + tcg_debug_assert(def->args_ct[o2].pair == 2); | ||
178 | + if (def->args_ct[o2].oalias) { | ||
179 | + /* Case 1a */ | ||
180 | + i2 = def->args_ct[o2].alias_index; | ||
181 | + tcg_debug_assert(def->args_ct[i2].pair == 2); | ||
182 | + def->args_ct[i2].pair_index = i; | ||
183 | + def->args_ct[i].pair_index = i2; | ||
184 | + } else { | ||
185 | + /* Case 1b */ | ||
186 | + def->args_ct[i].pair_index = i; | ||
187 | + } | ||
188 | + break; | ||
189 | + case 2: | ||
190 | + o = def->args_ct[i].alias_index; | ||
191 | + o2 = def->args_ct[o].pair_index; | ||
192 | + tcg_debug_assert(def->args_ct[o].pair == 2); | ||
193 | + tcg_debug_assert(def->args_ct[o2].pair == 1); | ||
194 | + if (def->args_ct[o2].oalias) { | ||
195 | + /* Case 1a */ | ||
196 | + i2 = def->args_ct[o2].alias_index; | ||
197 | + tcg_debug_assert(def->args_ct[i2].pair == 1); | ||
198 | + def->args_ct[i2].pair_index = i; | ||
199 | + def->args_ct[i].pair_index = i2; | ||
200 | + } else { | ||
201 | + /* Case 2 */ | ||
202 | + def->args_ct[i].pair = 3; | ||
203 | + def->args_ct[o2].pair = 3; | ||
204 | + def->args_ct[i].pair_index = o2; | ||
205 | + def->args_ct[o2].pair_index = i; | ||
206 | + } | ||
207 | + break; | ||
208 | + default: | ||
209 | + g_assert_not_reached(); | ||
210 | + } | ||
211 | + } | ||
212 | + } | ||
213 | + | ||
214 | /* sort the constraints (XXX: this is just an heuristic) */ | ||
215 | sort_constraints(def, 0, def->nb_oargs); | ||
216 | sort_constraints(def, def->nb_oargs, def->nb_iargs); | ||
217 | @@ -XXX,XX +XXX,XX @@ static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, | ||
218 | tcg_abort(); | ||
219 | } | ||
220 | |||
221 | +static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, | ||
222 | + TCGRegSet allocated_regs, | ||
223 | + TCGRegSet preferred_regs, bool rev) | ||
224 | +{ | ||
225 | + int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); | ||
226 | + TCGRegSet reg_ct[2]; | ||
227 | + const int *order; | ||
228 | + | ||
229 | + /* Ensure that if I is not in allocated_regs, I+1 is not either. */ | ||
230 | + reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); | ||
231 | + tcg_debug_assert(reg_ct[1] != 0); | ||
232 | + reg_ct[0] = reg_ct[1] & preferred_regs; | ||
233 | + | ||
234 | + order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; | ||
235 | + | ||
236 | + /* | ||
237 | + * Skip the preferred_regs option if it cannot be satisfied, | ||
238 | + * or if the preference made no difference. | ||
239 | + */ | ||
240 | + k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; | ||
241 | + | ||
242 | + /* | ||
243 | + * Minimize the number of flushes by looking for 2 free registers first, | ||
244 | + * then a single flush, then two flushes. | ||
245 | + */ | ||
246 | + for (fmin = 2; fmin >= 0; fmin--) { | ||
247 | + for (j = k; j < 2; j++) { | ||
248 | + TCGRegSet set = reg_ct[j]; | ||
249 | + | ||
250 | + for (i = 0; i < n; i++) { | ||
251 | + TCGReg reg = order[i]; | ||
252 | + | ||
253 | + if (tcg_regset_test_reg(set, reg)) { | ||
254 | + int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; | ||
255 | + if (f >= fmin) { | ||
256 | + tcg_reg_free(s, reg, allocated_regs); | ||
257 | + tcg_reg_free(s, reg + 1, allocated_regs); | ||
258 | + return reg; | ||
259 | + } | ||
260 | + } | ||
261 | + } | ||
262 | + } | ||
263 | + } | ||
264 | + tcg_abort(); | ||
265 | +} | ||
266 | + | ||
267 | /* Make sure the temporary is in a register. If needed, allocate the register | ||
268 | from DESIRED while avoiding ALLOCATED. */ | ||
269 | static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, | ||
270 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
271 | |||
272 | /* satisfy input constraints */ | ||
273 | for (k = 0; k < nb_iargs; k++) { | ||
274 | - TCGRegSet i_preferred_regs; | ||
275 | - bool allocate_new_reg; | ||
276 | + TCGRegSet i_preferred_regs, i_required_regs; | ||
277 | + bool allocate_new_reg, copyto_new_reg; | ||
278 | + TCGTemp *ts2; | ||
279 | + int i1, i2; | ||
280 | |||
281 | i = def->args_ct[nb_oargs + k].sort_index; | ||
282 | arg = op->args[i]; | ||
283 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
284 | |||
285 | reg = ts->reg; | ||
286 | i_preferred_regs = 0; | ||
287 | + i_required_regs = arg_ct->regs; | ||
288 | allocate_new_reg = false; | ||
289 | + copyto_new_reg = false; | ||
290 | |||
291 | - if (arg_ct->ialias) { | ||
292 | + switch (arg_ct->pair) { | ||
293 | + case 0: /* not paired */ | ||
294 | + if (arg_ct->ialias) { | ||
295 | + i_preferred_regs = op->output_pref[arg_ct->alias_index]; | ||
296 | + | ||
297 | + /* | ||
298 | + * If the input is not dead after the instruction, | ||
299 | + * we must allocate a new register and move it. | ||
300 | + */ | ||
301 | + if (!IS_DEAD_ARG(i)) { | ||
302 | + allocate_new_reg = true; | ||
303 | + } else if (ts->val_type == TEMP_VAL_REG) { | ||
304 | + /* | ||
305 | + * Check if the current register has already been | ||
306 | + * allocated for another input. | ||
307 | + */ | ||
308 | + allocate_new_reg = | ||
309 | + tcg_regset_test_reg(i_allocated_regs, reg); | ||
310 | + } | ||
311 | + } | ||
312 | + if (!allocate_new_reg) { | ||
313 | + temp_load(s, ts, i_required_regs, i_allocated_regs, | ||
314 | + i_preferred_regs); | ||
315 | + reg = ts->reg; | ||
316 | + allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); | ||
317 | + } | ||
318 | + if (allocate_new_reg) { | ||
319 | + /* | ||
320 | + * Allocate a new register matching the constraint | ||
321 | + * and move the temporary register into it. | ||
322 | + */ | ||
323 | + temp_load(s, ts, tcg_target_available_regs[ts->type], | ||
324 | + i_allocated_regs, 0); | ||
325 | + reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, | ||
326 | + i_preferred_regs, ts->indirect_base); | ||
327 | + copyto_new_reg = true; | ||
328 | + } | ||
329 | + break; | ||
330 | + | ||
331 | + case 1: | ||
332 | + /* First of an input pair; if i1 == i2, the second is an output. */ | ||
333 | + i1 = i; | ||
334 | + i2 = arg_ct->pair_index; | ||
335 | + ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; | ||
336 | + | ||
337 | + /* | ||
338 | + * It is easier to default to allocating a new pair | ||
339 | + * and to identify a few cases where it's not required. | ||
340 | + */ | ||
341 | + if (arg_ct->ialias) { | ||
342 | + i_preferred_regs = op->output_pref[arg_ct->alias_index]; | ||
343 | + if (IS_DEAD_ARG(i1) && | ||
344 | + IS_DEAD_ARG(i2) && | ||
345 | + ts->val_type == TEMP_VAL_REG && | ||
346 | + ts->reg < TCG_TARGET_NB_REGS - 1 && | ||
347 | + tcg_regset_test_reg(i_required_regs, reg) && | ||
348 | + !tcg_regset_test_reg(i_allocated_regs, reg) && | ||
349 | + !tcg_regset_test_reg(i_allocated_regs, reg + 1) && | ||
350 | + (ts2 | ||
351 | + ? ts2->val_type == TEMP_VAL_REG && | ||
352 | + ts2->reg == reg + 1 | ||
353 | + : s->reg_to_temp[reg + 1] == NULL)) { | ||
354 | + break; | ||
355 | + } | ||
356 | + } else { | ||
357 | + /* Without aliasing, the pair must also be an input. */ | ||
358 | + tcg_debug_assert(ts2); | ||
359 | + if (ts->val_type == TEMP_VAL_REG && | ||
360 | + ts2->val_type == TEMP_VAL_REG && | ||
361 | + ts2->reg == reg + 1 && | ||
362 | + tcg_regset_test_reg(i_required_regs, reg)) { | ||
363 | + break; | ||
364 | + } | ||
365 | + } | ||
366 | + reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, | ||
367 | + 0, ts->indirect_base); | ||
368 | + goto do_pair; | ||
369 | + | ||
370 | + case 2: /* pair second */ | ||
371 | + reg = new_args[arg_ct->pair_index] + 1; | ||
372 | + goto do_pair; | ||
373 | + | ||
374 | + case 3: /* ialias with second output, no first input */ | ||
375 | + tcg_debug_assert(arg_ct->ialias); | ||
376 | i_preferred_regs = op->output_pref[arg_ct->alias_index]; | ||
377 | |||
378 | - /* | ||
379 | - * If the input is readonly, then it cannot also be an | ||
380 | - * output and aliased to itself. If the input is not | ||
381 | - * dead after the instruction, we must allocate a new | ||
382 | - * register and move it. | ||
383 | - */ | ||
384 | - if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { | ||
385 | - allocate_new_reg = true; | ||
386 | - } else if (ts->val_type == TEMP_VAL_REG) { | ||
387 | - /* | ||
388 | - * Check if the current register has already been | ||
389 | - * allocated for another input. | ||
390 | - */ | ||
391 | - allocate_new_reg = tcg_regset_test_reg(i_allocated_regs, reg); | ||
392 | + if (IS_DEAD_ARG(i) && | ||
393 | + ts->val_type == TEMP_VAL_REG && | ||
394 | + reg > 0 && | ||
395 | + s->reg_to_temp[reg - 1] == NULL && | ||
396 | + tcg_regset_test_reg(i_required_regs, reg) && | ||
397 | + !tcg_regset_test_reg(i_allocated_regs, reg) && | ||
398 | + !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { | ||
399 | + tcg_regset_set_reg(i_allocated_regs, reg - 1); | ||
400 | + break; | ||
401 | } | ||
402 | - } | ||
403 | + reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, | ||
404 | + i_allocated_regs, 0, | ||
405 | + ts->indirect_base); | ||
406 | + tcg_regset_set_reg(i_allocated_regs, reg); | ||
407 | + reg += 1; | ||
408 | + goto do_pair; | ||
409 | |||
410 | - if (!allocate_new_reg) { | ||
411 | - temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); | ||
412 | - reg = ts->reg; | ||
413 | - allocate_new_reg = !tcg_regset_test_reg(arg_ct->regs, reg); | ||
414 | - } | ||
415 | - | ||
416 | - if (allocate_new_reg) { | ||
417 | + do_pair: | ||
418 | /* | ||
419 | - * Allocate a new register matching the constraint | ||
420 | - * and move the temporary register into it. | ||
421 | + * If an aliased input is not dead after the instruction, | ||
422 | + * we must allocate a new register and move it. | ||
423 | */ | ||
424 | - temp_load(s, ts, tcg_target_available_regs[ts->type], | ||
425 | - i_allocated_regs, 0); | ||
426 | - reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs, | ||
427 | - i_preferred_regs, ts->indirect_base); | ||
428 | + if (arg_ct->ialias && !IS_DEAD_ARG(i)) { | ||
429 | + TCGRegSet t_allocated_regs = i_allocated_regs; | ||
430 | + | ||
431 | + /* | ||
432 | + * Because of the alias, and the continued life, make sure | ||
433 | + * that the temp is somewhere *other* than the reg pair, | ||
434 | + * and we get a copy in reg. | ||
435 | + */ | ||
436 | + tcg_regset_set_reg(t_allocated_regs, reg); | ||
437 | + tcg_regset_set_reg(t_allocated_regs, reg + 1); | ||
438 | + if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { | ||
439 | + /* If ts was already in reg, copy it somewhere else. */ | ||
440 | + TCGReg nr; | ||
441 | + bool ok; | ||
442 | + | ||
443 | + tcg_debug_assert(ts->kind != TEMP_FIXED); | ||
444 | + nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], | ||
445 | + t_allocated_regs, 0, ts->indirect_base); | ||
446 | + ok = tcg_out_mov(s, ts->type, nr, reg); | ||
447 | + tcg_debug_assert(ok); | ||
448 | + | ||
449 | + set_temp_val_reg(s, ts, nr); | ||
450 | + } else { | ||
451 | + temp_load(s, ts, tcg_target_available_regs[ts->type], | ||
452 | + t_allocated_regs, 0); | ||
453 | + copyto_new_reg = true; | ||
454 | + } | ||
455 | + } else { | ||
456 | + /* Preferably allocate to reg, otherwise copy. */ | ||
457 | + i_required_regs = (TCGRegSet)1 << reg; | ||
458 | + temp_load(s, ts, i_required_regs, i_allocated_regs, | ||
459 | + i_preferred_regs); | ||
460 | + copyto_new_reg = ts->reg != reg; | ||
461 | + } | ||
462 | + break; | ||
463 | + | ||
464 | + default: | ||
465 | + g_assert_not_reached(); | ||
466 | + } | ||
467 | + | ||
468 | + if (copyto_new_reg) { | ||
469 | if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { | ||
470 | /* | ||
471 | * Cross register class move not supported. Sync the | ||
472 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
473 | /* ENV should not be modified. */ | ||
474 | tcg_debug_assert(!temp_readonly(ts)); | ||
475 | |||
476 | - if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { | ||
477 | - reg = new_args[arg_ct->alias_index]; | ||
478 | - } else if (arg_ct->newreg) { | ||
479 | - reg = tcg_reg_alloc(s, arg_ct->regs, | ||
480 | - i_allocated_regs | o_allocated_regs, | ||
481 | - op->output_pref[k], ts->indirect_base); | ||
482 | - } else { | ||
483 | - reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, | ||
484 | - op->output_pref[k], ts->indirect_base); | ||
485 | + switch (arg_ct->pair) { | ||
486 | + case 0: /* not paired */ | ||
487 | + if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { | ||
488 | + reg = new_args[arg_ct->alias_index]; | ||
489 | + } else if (arg_ct->newreg) { | ||
490 | + reg = tcg_reg_alloc(s, arg_ct->regs, | ||
491 | + i_allocated_regs | o_allocated_regs, | ||
492 | + op->output_pref[k], ts->indirect_base); | ||
493 | + } else { | ||
494 | + reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, | ||
495 | + op->output_pref[k], ts->indirect_base); | ||
496 | + } | ||
497 | + break; | ||
498 | + | ||
499 | + case 1: /* first of pair */ | ||
500 | + tcg_debug_assert(!arg_ct->newreg); | ||
501 | + if (arg_ct->oalias) { | ||
502 | + reg = new_args[arg_ct->alias_index]; | ||
503 | + break; | ||
504 | + } | ||
505 | + reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, | ||
506 | + op->output_pref[k], ts->indirect_base); | ||
507 | + break; | ||
508 | + | ||
509 | + case 2: /* second of pair */ | ||
510 | + tcg_debug_assert(!arg_ct->newreg); | ||
511 | + if (arg_ct->oalias) { | ||
512 | + reg = new_args[arg_ct->alias_index]; | ||
513 | + } else { | ||
514 | + reg = new_args[arg_ct->pair_index] + 1; | ||
515 | + } | ||
516 | + break; | ||
517 | + | ||
518 | + case 3: /* first of pair, aliasing with a second input */ | ||
519 | + tcg_debug_assert(!arg_ct->newreg); | ||
520 | + reg = new_args[arg_ct->pair_index] - 1; | ||
521 | + break; | ||
522 | + | ||
523 | + default: | ||
524 | + g_assert_not_reached(); | ||
525 | } | ||
526 | tcg_regset_set_reg(o_allocated_regs, reg); | ||
527 | set_temp_val_reg(s, ts, reg); | ||
528 | -- | ||
529 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | While we initialize this value in cpu_common_reset, that | ||
2 | isn't called during startup, so set it as well in init. | ||
3 | This fixes -singlestep versus the very first TB. | ||
1 | 4 | ||
5 | Fixes: 04f5b647ed07 ("accel/tcg: Handle -singlestep in curr_cflags") | ||
6 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | hw/core/cpu-common.c | 1 + | ||
10 | 1 file changed, 1 insertion(+) | ||
11 | |||
12 | diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/hw/core/cpu-common.c | ||
15 | +++ b/hw/core/cpu-common.c | ||
16 | @@ -XXX,XX +XXX,XX @@ static void cpu_common_initfn(Object *obj) | ||
17 | /* the default value is changed by qemu_init_vcpu() for softmmu */ | ||
18 | cpu->nr_cores = 1; | ||
19 | cpu->nr_threads = 1; | ||
20 | + cpu->cflags_next_tb = -1; | ||
21 | |||
22 | qemu_mutex_init(&cpu->work_mutex); | ||
23 | QSIMPLEQ_INIT(&cpu->work_list); | ||
24 | -- | ||
25 | 2.34.1 | ||
26 | |||
27 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Use the official extend/extract functions instead of routines | ||
2 | that will shortly be internal to tcg. | ||
1 | 3 | ||
4 | Cc: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | target/sparc/translate.c | 21 ++++----------------- | ||
9 | 1 file changed, 4 insertions(+), 17 deletions(-) | ||
10 | |||
11 | diff --git a/target/sparc/translate.c b/target/sparc/translate.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/sparc/translate.c | ||
14 | +++ b/target/sparc/translate.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static inline void gen_update_fprs_dirty(DisasContext *dc, int rd) | ||
16 | /* floating point registers moves */ | ||
17 | static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src) | ||
18 | { | ||
19 | -#if TCG_TARGET_REG_BITS == 32 | ||
20 | - if (src & 1) { | ||
21 | - return TCGV_LOW(cpu_fpr[src / 2]); | ||
22 | - } else { | ||
23 | - return TCGV_HIGH(cpu_fpr[src / 2]); | ||
24 | - } | ||
25 | -#else | ||
26 | TCGv_i32 ret = get_temp_i32(dc); | ||
27 | if (src & 1) { | ||
28 | tcg_gen_extrl_i64_i32(ret, cpu_fpr[src / 2]); | ||
29 | @@ -XXX,XX +XXX,XX @@ static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src) | ||
30 | tcg_gen_extrh_i64_i32(ret, cpu_fpr[src / 2]); | ||
31 | } | ||
32 | return ret; | ||
33 | -#endif | ||
34 | } | ||
35 | |||
36 | static void gen_store_fpr_F(DisasContext *dc, unsigned int dst, TCGv_i32 v) | ||
37 | { | ||
38 | -#if TCG_TARGET_REG_BITS == 32 | ||
39 | - if (dst & 1) { | ||
40 | - tcg_gen_mov_i32(TCGV_LOW(cpu_fpr[dst / 2]), v); | ||
41 | - } else { | ||
42 | - tcg_gen_mov_i32(TCGV_HIGH(cpu_fpr[dst / 2]), v); | ||
43 | - } | ||
44 | -#else | ||
45 | - TCGv_i64 t = (TCGv_i64)v; | ||
46 | + TCGv_i64 t = tcg_temp_new_i64(); | ||
47 | + | ||
48 | + tcg_gen_extu_i32_i64(t, v); | ||
49 | tcg_gen_deposit_i64(cpu_fpr[dst / 2], cpu_fpr[dst / 2], t, | ||
50 | (dst & 1 ? 0 : 32), 32); | ||
51 | -#endif | ||
52 | + tcg_temp_free_i64(t); | ||
53 | gen_update_fprs_dirty(dc, dst); | ||
54 | } | ||
55 | |||
56 | -- | ||
57 | 2.34.1 | ||
58 | |||
59 | diff view generated by jsdifflib |
1 | Extend the vector generator infrastructure to handle | 1 | Move the error-generating fallback from tcg-op.c, and |
---|---|---|---|
2 | 5 vector arguments. | 2 | replace "_link_error" with modern QEMU_ERROR markup. |
3 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | 4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
5 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
6 | Reviewed-by: Taylor Simpson <tsimpson@quicinc.com> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 6 | --- |
9 | include/tcg/tcg-op-gvec.h | 7 +++++++ | 7 | include/tcg/tcg-op.h | 33 +++++---------------------------- |
10 | tcg/tcg-op-gvec.c | 32 ++++++++++++++++++++++++++++++++ | 8 | include/tcg/tcg.h | 12 ------------ |
11 | 2 files changed, 39 insertions(+) | 9 | tcg/tcg-internal.h | 14 ++++++++++++++ |
10 | tcg/tcg-op-vec.c | 2 ++ | ||
11 | tcg/tcg-op.c | 37 ++++++++++++++++++++++++++++--------- | ||
12 | 5 files changed, 49 insertions(+), 49 deletions(-) | ||
12 | 13 | ||
13 | diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h | 14 | diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h |
14 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/include/tcg/tcg-op-gvec.h | 16 | --- a/include/tcg/tcg-op.h |
16 | +++ b/include/tcg/tcg-op-gvec.h | 17 | +++ b/include/tcg/tcg-op.h |
17 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs, | 18 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) |
18 | uint32_t maxsz, int32_t data, | 19 | tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2); |
19 | gen_helper_gvec_4_ptr *fn); | 20 | } |
20 | 21 | #else /* TCG_TARGET_REG_BITS == 32 */ | |
21 | +typedef void gen_helper_gvec_5_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, | 22 | -static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, |
22 | + TCGv_ptr, TCGv_ptr, TCGv_i32); | 23 | - tcg_target_long offset) |
23 | +void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs, | 24 | -{ |
24 | + uint32_t cofs, uint32_t eofs, TCGv_ptr ptr, | 25 | - tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset); |
25 | + uint32_t oprsz, uint32_t maxsz, int32_t data, | 26 | -} |
26 | + gen_helper_gvec_5_ptr *fn); | 27 | +void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); |
28 | +void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); | ||
29 | +void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); | ||
30 | |||
31 | -static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, | ||
32 | - tcg_target_long offset) | ||
33 | -{ | ||
34 | - tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset); | ||
35 | -} | ||
36 | - | ||
37 | -static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, | ||
38 | - tcg_target_long offset) | ||
39 | -{ | ||
40 | - tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); | ||
41 | -} | ||
42 | - | ||
43 | -static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) | ||
44 | -{ | ||
45 | - tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), | ||
46 | - TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); | ||
47 | -} | ||
48 | - | ||
49 | -static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) | ||
50 | -{ | ||
51 | - tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), | ||
52 | - TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); | ||
53 | -} | ||
54 | +void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); | ||
55 | +void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); | ||
56 | |||
57 | void tcg_gen_discard_i64(TCGv_i64 arg); | ||
58 | void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg); | ||
59 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/include/tcg/tcg.h | ||
62 | +++ b/include/tcg/tcg.h | ||
63 | @@ -XXX,XX +XXX,XX @@ static inline TCGv_vec temp_tcgv_vec(TCGTemp *t) | ||
64 | return (TCGv_vec)temp_tcgv_i32(t); | ||
65 | } | ||
66 | |||
67 | -#if TCG_TARGET_REG_BITS == 32 | ||
68 | -static inline TCGv_i32 TCGV_LOW(TCGv_i64 t) | ||
69 | -{ | ||
70 | - return temp_tcgv_i32(tcgv_i64_temp(t)); | ||
71 | -} | ||
72 | - | ||
73 | -static inline TCGv_i32 TCGV_HIGH(TCGv_i64 t) | ||
74 | -{ | ||
75 | - return temp_tcgv_i32(tcgv_i64_temp(t) + 1); | ||
76 | -} | ||
77 | -#endif | ||
78 | - | ||
79 | static inline TCGArg tcg_get_insn_param(TCGOp *op, int arg) | ||
80 | { | ||
81 | return op->args[arg]; | ||
82 | diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h | ||
83 | index XXXXXXX..XXXXXXX 100644 | ||
84 | --- a/tcg/tcg-internal.h | ||
85 | +++ b/tcg/tcg-internal.h | ||
86 | @@ -XXX,XX +XXX,XX @@ static inline unsigned tcg_call_flags(TCGOp *op) | ||
87 | return tcg_call_info(op)->flags; | ||
88 | } | ||
89 | |||
90 | +#if TCG_TARGET_REG_BITS == 32 | ||
91 | +static inline TCGv_i32 TCGV_LOW(TCGv_i64 t) | ||
92 | +{ | ||
93 | + return temp_tcgv_i32(tcgv_i64_temp(t)); | ||
94 | +} | ||
95 | +static inline TCGv_i32 TCGV_HIGH(TCGv_i64 t) | ||
96 | +{ | ||
97 | + return temp_tcgv_i32(tcgv_i64_temp(t) + 1); | ||
98 | +} | ||
99 | +#else | ||
100 | +extern TCGv_i32 TCGV_LOW(TCGv_i64) QEMU_ERROR("32-bit code path is reachable"); | ||
101 | +extern TCGv_i32 TCGV_HIGH(TCGv_i64) QEMU_ERROR("32-bit code path is reachable"); | ||
102 | +#endif | ||
27 | + | 103 | + |
28 | /* Expand a gvec operation. Either inline or out-of-line depending on | 104 | #endif /* TCG_INTERNAL_H */ |
29 | the actual vector size and the operations supported by the host. */ | 105 | diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c |
30 | typedef struct { | ||
31 | diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | 106 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/tcg/tcg-op-gvec.c | 107 | --- a/tcg/tcg-op-vec.c |
34 | +++ b/tcg/tcg-op-gvec.c | 108 | +++ b/tcg/tcg-op-vec.c |
35 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs, | 109 | @@ -XXX,XX +XXX,XX @@ |
36 | tcg_temp_free_i32(desc); | 110 | #include "tcg/tcg.h" |
111 | #include "tcg/tcg-op.h" | ||
112 | #include "tcg/tcg-mo.h" | ||
113 | +#include "tcg-internal.h" | ||
114 | + | ||
115 | |||
116 | /* Reduce the number of ifdefs below. This assumes that all uses of | ||
117 | TCGV_HIGH and TCGV_LOW are properly protected by a conditional that | ||
118 | diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c | ||
119 | index XXXXXXX..XXXXXXX 100644 | ||
120 | --- a/tcg/tcg-op.c | ||
121 | +++ b/tcg/tcg-op.c | ||
122 | @@ -XXX,XX +XXX,XX @@ | ||
123 | #include "tcg/tcg-op.h" | ||
124 | #include "tcg/tcg-mo.h" | ||
125 | #include "exec/plugin-gen.h" | ||
126 | +#include "tcg-internal.h" | ||
127 | |||
128 | -/* Reduce the number of ifdefs below. This assumes that all uses of | ||
129 | - TCGV_HIGH and TCGV_LOW are properly protected by a conditional that | ||
130 | - the compiler can eliminate. */ | ||
131 | -#if TCG_TARGET_REG_BITS == 64 | ||
132 | -extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64); | ||
133 | -extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); | ||
134 | -#define TCGV_LOW TCGV_LOW_link_error | ||
135 | -#define TCGV_HIGH TCGV_HIGH_link_error | ||
136 | -#endif | ||
137 | |||
138 | void tcg_gen_op1(TCGOpcode opc, TCGArg a1) | ||
139 | { | ||
140 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) | ||
141 | #endif | ||
37 | } | 142 | } |
38 | 143 | ||
39 | +/* Generate a call to a gvec-style helper with five vector operands | 144 | +void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) |
40 | + and an extra pointer operand. */ | ||
41 | +void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs, | ||
42 | + uint32_t cofs, uint32_t eofs, TCGv_ptr ptr, | ||
43 | + uint32_t oprsz, uint32_t maxsz, int32_t data, | ||
44 | + gen_helper_gvec_5_ptr *fn) | ||
45 | +{ | 145 | +{ |
46 | + TCGv_ptr a0, a1, a2, a3, a4; | 146 | + tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset); |
47 | + TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data)); | ||
48 | + | ||
49 | + a0 = tcg_temp_new_ptr(); | ||
50 | + a1 = tcg_temp_new_ptr(); | ||
51 | + a2 = tcg_temp_new_ptr(); | ||
52 | + a3 = tcg_temp_new_ptr(); | ||
53 | + a4 = tcg_temp_new_ptr(); | ||
54 | + | ||
55 | + tcg_gen_addi_ptr(a0, cpu_env, dofs); | ||
56 | + tcg_gen_addi_ptr(a1, cpu_env, aofs); | ||
57 | + tcg_gen_addi_ptr(a2, cpu_env, bofs); | ||
58 | + tcg_gen_addi_ptr(a3, cpu_env, cofs); | ||
59 | + tcg_gen_addi_ptr(a4, cpu_env, eofs); | ||
60 | + | ||
61 | + fn(a0, a1, a2, a3, a4, ptr, desc); | ||
62 | + | ||
63 | + tcg_temp_free_ptr(a0); | ||
64 | + tcg_temp_free_ptr(a1); | ||
65 | + tcg_temp_free_ptr(a2); | ||
66 | + tcg_temp_free_ptr(a3); | ||
67 | + tcg_temp_free_ptr(a4); | ||
68 | + tcg_temp_free_i32(desc); | ||
69 | +} | 147 | +} |
70 | + | 148 | + |
71 | /* Return true if we want to implement something of OPRSZ bytes | 149 | +void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) |
72 | in units of LNSZ. This limits the expansion of inline code. */ | 150 | +{ |
73 | static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz) | 151 | + tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset); |
152 | +} | ||
153 | + | ||
154 | +void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) | ||
155 | +{ | ||
156 | + tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); | ||
157 | +} | ||
158 | + | ||
159 | void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) | ||
160 | { | ||
161 | #if HOST_BIG_ENDIAN | ||
162 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) | ||
163 | #endif | ||
164 | } | ||
165 | |||
166 | +void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) | ||
167 | +{ | ||
168 | + tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), | ||
169 | + TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); | ||
170 | +} | ||
171 | + | ||
172 | +void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) | ||
173 | +{ | ||
174 | + tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), | ||
175 | + TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); | ||
176 | +} | ||
177 | + | ||
178 | void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) | ||
179 | { | ||
180 | tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); | ||
74 | -- | 181 | -- |
75 | 2.20.1 | 182 | 2.34.1 |
76 | 183 | ||
77 | 184 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Record the location of a TCGTemp within a larger object. | ||
1 | 2 | ||
3 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | include/tcg/tcg.h | 1 + | ||
7 | tcg/tcg.c | 3 +++ | ||
8 | 2 files changed, 4 insertions(+) | ||
9 | |||
10 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/include/tcg/tcg.h | ||
13 | +++ b/include/tcg/tcg.h | ||
14 | @@ -XXX,XX +XXX,XX @@ typedef struct TCGTemp { | ||
15 | unsigned int mem_coherent:1; | ||
16 | unsigned int mem_allocated:1; | ||
17 | unsigned int temp_allocated:1; | ||
18 | + unsigned int temp_subindex:1; | ||
19 | |||
20 | int64_t val; | ||
21 | struct TCGTemp *mem_base; | ||
22 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/tcg/tcg.c | ||
25 | +++ b/tcg/tcg.c | ||
26 | @@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, | ||
27 | ts2->mem_allocated = 1; | ||
28 | ts2->mem_base = base_ts; | ||
29 | ts2->mem_offset = offset + (1 - bigendian) * 4; | ||
30 | + ts2->temp_subindex = 1; | ||
31 | pstrcpy(buf, sizeof(buf), name); | ||
32 | pstrcat(buf, sizeof(buf), "_1"); | ||
33 | ts2->name = strdup(buf); | ||
34 | @@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) | ||
35 | ts2->base_type = TCG_TYPE_I64; | ||
36 | ts2->type = TCG_TYPE_I32; | ||
37 | ts2->temp_allocated = 1; | ||
38 | + ts2->temp_subindex = 1; | ||
39 | ts2->kind = kind; | ||
40 | } else { | ||
41 | ts->base_type = type; | ||
42 | @@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_constant_internal(TCGType type, int64_t val) | ||
43 | ts2->type = TCG_TYPE_I32; | ||
44 | ts2->kind = TEMP_CONST; | ||
45 | ts2->temp_allocated = 1; | ||
46 | + ts2->temp_subindex = 1; | ||
47 | ts2->val = val >> 32; | ||
48 | } else { | ||
49 | ts->base_type = type; | ||
50 | -- | ||
51 | 2.34.1 | ||
52 | |||
53 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The first thing that temp_sync does is check mem_coherent, | ||
2 | so there's no need for the caller to do so. | ||
1 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | tcg/tcg.c | 8 ++------ | ||
8 | 1 file changed, 2 insertions(+), 6 deletions(-) | ||
9 | |||
10 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/tcg.c | ||
13 | +++ b/tcg/tcg.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
15 | |||
16 | /* If the two inputs form one 64-bit value, try dupm_vec. */ | ||
17 | if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) { | ||
18 | - if (!itsl->mem_coherent) { | ||
19 | - temp_sync(s, itsl, s->reserved_regs, 0, 0); | ||
20 | - } | ||
21 | - if (!itsh->mem_coherent) { | ||
22 | - temp_sync(s, itsh, s->reserved_regs, 0, 0); | ||
23 | - } | ||
24 | + temp_sync(s, itsl, s->reserved_regs, 0, 0); | ||
25 | + temp_sync(s, itsh, s->reserved_regs, 0, 0); | ||
26 | #if HOST_BIG_ENDIAN | ||
27 | TCGTemp *its = itsh; | ||
28 | #else | ||
29 | -- | ||
30 | 2.34.1 | ||
31 | |||
32 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Allocate the first of a pair at the lower address, and the | ||
2 | second of a pair at the higher address. This will make it | ||
3 | easier to find the beginning of the larger memory block. | ||
1 | 4 | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/tcg-internal.h | 4 ++-- | ||
9 | tcg/tcg.c | 58 ++++++++++++++++++++++------------------------ | ||
10 | 2 files changed, 30 insertions(+), 32 deletions(-) | ||
11 | |||
12 | diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/tcg/tcg-internal.h | ||
15 | +++ b/tcg/tcg-internal.h | ||
16 | @@ -XXX,XX +XXX,XX @@ static inline unsigned tcg_call_flags(TCGOp *op) | ||
17 | #if TCG_TARGET_REG_BITS == 32 | ||
18 | static inline TCGv_i32 TCGV_LOW(TCGv_i64 t) | ||
19 | { | ||
20 | - return temp_tcgv_i32(tcgv_i64_temp(t)); | ||
21 | + return temp_tcgv_i32(tcgv_i64_temp(t) + HOST_BIG_ENDIAN); | ||
22 | } | ||
23 | static inline TCGv_i32 TCGV_HIGH(TCGv_i64 t) | ||
24 | { | ||
25 | - return temp_tcgv_i32(tcgv_i64_temp(t) + 1); | ||
26 | + return temp_tcgv_i32(tcgv_i64_temp(t) + !HOST_BIG_ENDIAN); | ||
27 | } | ||
28 | #else | ||
29 | extern TCGv_i32 TCGV_LOW(TCGv_i64) QEMU_ERROR("32-bit code path is reachable"); | ||
30 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/tcg/tcg.c | ||
33 | +++ b/tcg/tcg.c | ||
34 | @@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, | ||
35 | TCGContext *s = tcg_ctx; | ||
36 | TCGTemp *base_ts = tcgv_ptr_temp(base); | ||
37 | TCGTemp *ts = tcg_global_alloc(s); | ||
38 | - int indirect_reg = 0, bigendian = 0; | ||
39 | -#if HOST_BIG_ENDIAN | ||
40 | - bigendian = 1; | ||
41 | -#endif | ||
42 | + int indirect_reg = 0; | ||
43 | |||
44 | switch (base_ts->kind) { | ||
45 | case TEMP_FIXED: | ||
46 | @@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, | ||
47 | ts->indirect_reg = indirect_reg; | ||
48 | ts->mem_allocated = 1; | ||
49 | ts->mem_base = base_ts; | ||
50 | - ts->mem_offset = offset + bigendian * 4; | ||
51 | + ts->mem_offset = offset; | ||
52 | pstrcpy(buf, sizeof(buf), name); | ||
53 | pstrcat(buf, sizeof(buf), "_0"); | ||
54 | ts->name = strdup(buf); | ||
55 | @@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, | ||
56 | ts2->indirect_reg = indirect_reg; | ||
57 | ts2->mem_allocated = 1; | ||
58 | ts2->mem_base = base_ts; | ||
59 | - ts2->mem_offset = offset + (1 - bigendian) * 4; | ||
60 | + ts2->mem_offset = offset + 4; | ||
61 | ts2->temp_subindex = 1; | ||
62 | pstrcpy(buf, sizeof(buf), name); | ||
63 | pstrcat(buf, sizeof(buf), "_1"); | ||
64 | @@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_constant_internal(TCGType type, int64_t val) | ||
65 | |||
66 | ts = g_hash_table_lookup(h, &val); | ||
67 | if (ts == NULL) { | ||
68 | + int64_t *val_ptr; | ||
69 | + | ||
70 | ts = tcg_temp_alloc(s); | ||
71 | |||
72 | if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { | ||
73 | TCGTemp *ts2 = tcg_temp_alloc(s); | ||
74 | |||
75 | + tcg_debug_assert(ts2 == ts + 1); | ||
76 | + | ||
77 | ts->base_type = TCG_TYPE_I64; | ||
78 | ts->type = TCG_TYPE_I32; | ||
79 | ts->kind = TEMP_CONST; | ||
80 | ts->temp_allocated = 1; | ||
81 | - /* | ||
82 | - * Retain the full value of the 64-bit constant in the low | ||
83 | - * part, so that the hash table works. Actual uses will | ||
84 | - * truncate the value to the low part. | ||
85 | - */ | ||
86 | - ts->val = val; | ||
87 | |||
88 | - tcg_debug_assert(ts2 == ts + 1); | ||
89 | ts2->base_type = TCG_TYPE_I64; | ||
90 | ts2->type = TCG_TYPE_I32; | ||
91 | ts2->kind = TEMP_CONST; | ||
92 | ts2->temp_allocated = 1; | ||
93 | ts2->temp_subindex = 1; | ||
94 | - ts2->val = val >> 32; | ||
95 | + | ||
96 | + /* | ||
97 | + * Retain the full value of the 64-bit constant in the low | ||
98 | + * part, so that the hash table works. Actual uses will | ||
99 | + * truncate the value to the low part. | ||
100 | + */ | ||
101 | + ts[HOST_BIG_ENDIAN].val = val; | ||
102 | + ts[!HOST_BIG_ENDIAN].val = val >> 32; | ||
103 | + val_ptr = &ts[HOST_BIG_ENDIAN].val; | ||
104 | } else { | ||
105 | ts->base_type = type; | ||
106 | ts->type = type; | ||
107 | ts->kind = TEMP_CONST; | ||
108 | ts->temp_allocated = 1; | ||
109 | ts->val = val; | ||
110 | + val_ptr = &ts->val; | ||
111 | } | ||
112 | - g_hash_table_insert(h, &ts->val, ts); | ||
113 | + g_hash_table_insert(h, val_ptr, ts); | ||
114 | } | ||
115 | |||
116 | return ts; | ||
117 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
118 | pi = 0; | ||
119 | if (ret != NULL) { | ||
120 | if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) { | ||
121 | -#if HOST_BIG_ENDIAN | ||
122 | - op->args[pi++] = temp_arg(ret + 1); | ||
123 | - op->args[pi++] = temp_arg(ret); | ||
124 | -#else | ||
125 | op->args[pi++] = temp_arg(ret); | ||
126 | op->args[pi++] = temp_arg(ret + 1); | ||
127 | -#endif | ||
128 | nb_rets = 2; | ||
129 | } else { | ||
130 | op->args[pi++] = temp_arg(ret); | ||
131 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
132 | } | ||
133 | |||
134 | if (TCG_TARGET_REG_BITS < 64 && is_64bit) { | ||
135 | - op->args[pi++] = temp_arg(args[i] + HOST_BIG_ENDIAN); | ||
136 | - op->args[pi++] = temp_arg(args[i] + !HOST_BIG_ENDIAN); | ||
137 | + op->args[pi++] = temp_arg(args[i]); | ||
138 | + op->args[pi++] = temp_arg(args[i] + 1); | ||
139 | real_args += 2; | ||
140 | continue; | ||
141 | } | ||
142 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
143 | } | ||
144 | |||
145 | /* If the two inputs form one 64-bit value, try dupm_vec. */ | ||
146 | - if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) { | ||
147 | - temp_sync(s, itsl, s->reserved_regs, 0, 0); | ||
148 | - temp_sync(s, itsh, s->reserved_regs, 0, 0); | ||
149 | -#if HOST_BIG_ENDIAN | ||
150 | - TCGTemp *its = itsh; | ||
151 | -#else | ||
152 | - TCGTemp *its = itsl; | ||
153 | -#endif | ||
154 | + if (itsl->temp_subindex == HOST_BIG_ENDIAN && | ||
155 | + itsh->temp_subindex == !HOST_BIG_ENDIAN && | ||
156 | + itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { | ||
157 | + TCGTemp *its = itsl - HOST_BIG_ENDIAN; | ||
158 | + | ||
159 | + temp_sync(s, its + 0, s->reserved_regs, 0, 0); | ||
160 | + temp_sync(s, its + 1, s->reserved_regs, 0, 0); | ||
161 | + | ||
162 | if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, | ||
163 | its->mem_base->reg, its->mem_offset)) { | ||
164 | goto done; | ||
165 | -- | ||
166 | 2.34.1 | ||
167 | |||
168 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The count is not itself an enumerator. Move it outside to | ||
2 | prevent the compiler from considering it with -Wswitch-enum. | ||
1 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | include/tcg/tcg.h | 3 ++- | ||
8 | 1 file changed, 2 insertions(+), 1 deletion(-) | ||
9 | |||
10 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/include/tcg/tcg.h | ||
13 | +++ b/include/tcg/tcg.h | ||
14 | @@ -XXX,XX +XXX,XX @@ typedef enum TCGType { | ||
15 | TCG_TYPE_V128, | ||
16 | TCG_TYPE_V256, | ||
17 | |||
18 | - TCG_TYPE_COUNT, /* number of different types */ | ||
19 | + /* Number of different types (integer not enum) */ | ||
20 | +#define TCG_TYPE_COUNT (TCG_TYPE_V256 + 1) | ||
21 | |||
22 | /* An alias for the size of the host register. */ | ||
23 | #if TCG_TARGET_REG_BITS == 32 | ||
24 | -- | ||
25 | 2.34.1 | ||
26 | |||
27 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Add a helper function for computing the size of a type. | ||
1 | 2 | ||
3 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | include/tcg/tcg.h | 16 ++++++++++++++++ | ||
7 | tcg/tcg.c | 27 ++++++++++++--------------- | ||
8 | 2 files changed, 28 insertions(+), 15 deletions(-) | ||
9 | |||
10 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/include/tcg/tcg.h | ||
13 | +++ b/include/tcg/tcg.h | ||
14 | @@ -XXX,XX +XXX,XX @@ typedef enum TCGType { | ||
15 | #endif | ||
16 | } TCGType; | ||
17 | |||
18 | +/** | ||
19 | + * tcg_type_size | ||
20 | + * @t: type | ||
21 | + * | ||
22 | + * Return the size of the type in bytes. | ||
23 | + */ | ||
24 | +static inline int tcg_type_size(TCGType t) | ||
25 | +{ | ||
26 | + unsigned i = t; | ||
27 | + if (i >= TCG_TYPE_V64) { | ||
28 | + tcg_debug_assert(i < TCG_TYPE_COUNT); | ||
29 | + i -= TCG_TYPE_V64 - 1; | ||
30 | + } | ||
31 | + return 4 << i; | ||
32 | +} | ||
33 | + | ||
34 | /** | ||
35 | * get_alignment_bits | ||
36 | * @memop: MemOp value | ||
37 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/tcg/tcg.c | ||
40 | +++ b/tcg/tcg.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) | ||
42 | |||
43 | static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) | ||
44 | { | ||
45 | - intptr_t off, size, align; | ||
46 | + int size = tcg_type_size(ts->type); | ||
47 | + int align; | ||
48 | + intptr_t off; | ||
49 | |||
50 | switch (ts->type) { | ||
51 | case TCG_TYPE_I32: | ||
52 | - size = align = 4; | ||
53 | + align = 4; | ||
54 | break; | ||
55 | case TCG_TYPE_I64: | ||
56 | case TCG_TYPE_V64: | ||
57 | - size = align = 8; | ||
58 | + align = 8; | ||
59 | break; | ||
60 | case TCG_TYPE_V128: | ||
61 | - size = align = 16; | ||
62 | - break; | ||
63 | case TCG_TYPE_V256: | ||
64 | /* Note that we do not require aligned storage for V256. */ | ||
65 | - size = 32, align = 16; | ||
66 | + align = 16; | ||
67 | break; | ||
68 | default: | ||
69 | g_assert_not_reached(); | ||
70 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) | ||
71 | TCGRegSet dup_out_regs, dup_in_regs; | ||
72 | TCGTemp *its, *ots; | ||
73 | TCGType itype, vtype; | ||
74 | - intptr_t endian_fixup; | ||
75 | unsigned vece; | ||
76 | + int lowpart_ofs; | ||
77 | bool ok; | ||
78 | |||
79 | ots = arg_temp(op->args[0]); | ||
80 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) | ||
81 | /* fall through */ | ||
82 | |||
83 | case TEMP_VAL_MEM: | ||
84 | -#if HOST_BIG_ENDIAN | ||
85 | - endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8; | ||
86 | - endian_fixup -= 1 << vece; | ||
87 | -#else | ||
88 | - endian_fixup = 0; | ||
89 | -#endif | ||
90 | - /* Attempt to dup directly from the input memory slot. */ | ||
91 | + lowpart_ofs = 0; | ||
92 | + if (HOST_BIG_ENDIAN) { | ||
93 | + lowpart_ofs = tcg_type_size(itype) - (1 << vece); | ||
94 | + } | ||
95 | if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, | ||
96 | - its->mem_offset + endian_fixup)) { | ||
97 | + its->mem_offset + lowpart_ofs)) { | ||
98 | goto done; | ||
99 | } | ||
100 | /* Load the input into the destination vector register. */ | ||
101 | -- | ||
102 | 2.34.1 | ||
103 | |||
104 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Prepare to replace a bunch of separate ifdefs with a | ||
2 | consistent way to describe the ABI of a function call. | ||
1 | 3 | ||
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/tcg-internal.h | 15 +++++++++++++++ | ||
9 | 1 file changed, 15 insertions(+) | ||
10 | |||
11 | diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/tcg/tcg-internal.h | ||
14 | +++ b/tcg/tcg-internal.h | ||
15 | @@ -XXX,XX +XXX,XX @@ | ||
16 | |||
17 | #define TCG_HIGHWATER 1024 | ||
18 | |||
19 | +/* | ||
20 | + * Describe the calling convention of a given argument type. | ||
21 | + */ | ||
22 | +typedef enum { | ||
23 | + TCG_CALL_RET_NORMAL, /* by registers */ | ||
24 | +} TCGCallReturnKind; | ||
25 | + | ||
26 | +typedef enum { | ||
27 | + TCG_CALL_ARG_NORMAL, /* by registers (continuing onto stack) */ | ||
28 | + TCG_CALL_ARG_EVEN, /* like normal, but skipping odd slots */ | ||
29 | + TCG_CALL_ARG_EXTEND, /* for i32, as a sign/zero-extended i64 */ | ||
30 | + TCG_CALL_ARG_EXTEND_U, /* ... as a zero-extended i64 */ | ||
31 | + TCG_CALL_ARG_EXTEND_S, /* ... as a sign-extended i64 */ | ||
32 | +} TCGCallArgumentKind; | ||
33 | + | ||
34 | typedef struct TCGHelperInfo { | ||
35 | void *func; | ||
36 | const char *name; | ||
37 | -- | ||
38 | 2.34.1 | ||
39 | |||
40 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | For 32-bit hosts when TCG_TARGET_CALL_ALIGN_ARGS was set, use | |
2 | TCG_CALL_ARG_EVEN. For 64-bit hosts, TCG_TARGET_CALL_ALIGN_ARGS | ||
3 | was silently ignored, so always use TCG_CALL_ARG_NORMAL. | ||
4 | |||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/aarch64/tcg-target.h | 2 +- | ||
9 | tcg/arm/tcg-target.h | 2 +- | ||
10 | tcg/i386/tcg-target.h | 1 + | ||
11 | tcg/loongarch64/tcg-target.h | 2 +- | ||
12 | tcg/mips/tcg-target.h | 3 ++- | ||
13 | tcg/riscv/tcg-target.h | 6 +++++- | ||
14 | tcg/s390x/tcg-target.h | 1 + | ||
15 | tcg/sparc64/tcg-target.h | 1 + | ||
16 | tcg/tci/tcg-target.h | 5 +++++ | ||
17 | tcg/tcg.c | 6 ++++-- | ||
18 | tcg/ppc/tcg-target.c.inc | 21 ++++++++------------- | ||
19 | 11 files changed, 30 insertions(+), 20 deletions(-) | ||
20 | |||
21 | diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/tcg/aarch64/tcg-target.h | ||
24 | +++ b/tcg/aarch64/tcg-target.h | ||
25 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
26 | /* used for function call generation */ | ||
27 | #define TCG_REG_CALL_STACK TCG_REG_SP | ||
28 | #define TCG_TARGET_STACK_ALIGN 16 | ||
29 | -#define TCG_TARGET_CALL_ALIGN_ARGS 1 | ||
30 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
31 | +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
32 | |||
33 | /* optional instructions */ | ||
34 | #define TCG_TARGET_HAS_div_i32 1 | ||
35 | diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h | ||
36 | index XXXXXXX..XXXXXXX 100644 | ||
37 | --- a/tcg/arm/tcg-target.h | ||
38 | +++ b/tcg/arm/tcg-target.h | ||
39 | @@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions; | ||
40 | |||
41 | /* used for function call generation */ | ||
42 | #define TCG_TARGET_STACK_ALIGN 8 | ||
43 | -#define TCG_TARGET_CALL_ALIGN_ARGS 1 | ||
44 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
45 | +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
46 | |||
47 | /* optional instructions */ | ||
48 | #define TCG_TARGET_HAS_ext8s_i32 1 | ||
49 | diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/tcg/i386/tcg-target.h | ||
52 | +++ b/tcg/i386/tcg-target.h | ||
53 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
54 | #else | ||
55 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
56 | #endif | ||
57 | +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
58 | |||
59 | extern bool have_bmi1; | ||
60 | extern bool have_popcnt; | ||
61 | diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h | ||
62 | index XXXXXXX..XXXXXXX 100644 | ||
63 | --- a/tcg/loongarch64/tcg-target.h | ||
64 | +++ b/tcg/loongarch64/tcg-target.h | ||
65 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
66 | /* used for function call generation */ | ||
67 | #define TCG_REG_CALL_STACK TCG_REG_SP | ||
68 | #define TCG_TARGET_STACK_ALIGN 16 | ||
69 | -#define TCG_TARGET_CALL_ALIGN_ARGS 1 | ||
70 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
71 | +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
72 | |||
73 | /* optional instructions */ | ||
74 | #define TCG_TARGET_HAS_movcond_i32 0 | ||
75 | diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h | ||
76 | index XXXXXXX..XXXXXXX 100644 | ||
77 | --- a/tcg/mips/tcg-target.h | ||
78 | +++ b/tcg/mips/tcg-target.h | ||
79 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
80 | #define TCG_TARGET_STACK_ALIGN 16 | ||
81 | #if _MIPS_SIM == _ABIO32 | ||
82 | # define TCG_TARGET_CALL_STACK_OFFSET 16 | ||
83 | +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
84 | #else | ||
85 | # define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
86 | +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
87 | #endif | ||
88 | -#define TCG_TARGET_CALL_ALIGN_ARGS 1 | ||
89 | |||
90 | /* MOVN/MOVZ instructions detection */ | ||
91 | #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \ | ||
92 | diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h | ||
93 | index XXXXXXX..XXXXXXX 100644 | ||
94 | --- a/tcg/riscv/tcg-target.h | ||
95 | +++ b/tcg/riscv/tcg-target.h | ||
96 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
97 | /* used for function call generation */ | ||
98 | #define TCG_REG_CALL_STACK TCG_REG_SP | ||
99 | #define TCG_TARGET_STACK_ALIGN 16 | ||
100 | -#define TCG_TARGET_CALL_ALIGN_ARGS 1 | ||
101 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
102 | +#if TCG_TARGET_REG_BITS == 32 | ||
103 | +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
104 | +#else | ||
105 | +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
106 | +#endif | ||
107 | |||
108 | /* optional instructions */ | ||
109 | #define TCG_TARGET_HAS_movcond_i32 0 | ||
110 | diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h | ||
111 | index XXXXXXX..XXXXXXX 100644 | ||
112 | --- a/tcg/s390x/tcg-target.h | ||
113 | +++ b/tcg/s390x/tcg-target.h | ||
114 | @@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3]; | ||
115 | /* used for function call generation */ | ||
116 | #define TCG_TARGET_STACK_ALIGN 8 | ||
117 | #define TCG_TARGET_CALL_STACK_OFFSET 160 | ||
118 | +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
119 | |||
120 | #define TCG_TARGET_EXTEND_ARGS 1 | ||
121 | #define TCG_TARGET_HAS_MEMORY_BSWAP 1 | ||
122 | diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h | ||
123 | index XXXXXXX..XXXXXXX 100644 | ||
124 | --- a/tcg/sparc64/tcg-target.h | ||
125 | +++ b/tcg/sparc64/tcg-target.h | ||
126 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
127 | #define TCG_TARGET_STACK_ALIGN 16 | ||
128 | #define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS) | ||
129 | #define TCG_TARGET_EXTEND_ARGS 1 | ||
130 | +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
131 | |||
132 | #if defined(__VIS__) && __VIS__ >= 0x300 | ||
133 | #define use_vis3_instructions 1 | ||
134 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | ||
135 | index XXXXXXX..XXXXXXX 100644 | ||
136 | --- a/tcg/tci/tcg-target.h | ||
137 | +++ b/tcg/tci/tcg-target.h | ||
138 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
139 | /* Used for function call generation. */ | ||
140 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
141 | #define TCG_TARGET_STACK_ALIGN 8 | ||
142 | +#if TCG_TARGET_REG_BITS == 32 | ||
143 | +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
144 | +#else | ||
145 | +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
146 | +#endif | ||
147 | |||
148 | #define HAVE_TCG_QEMU_TB_EXEC | ||
149 | #define TCG_TARGET_NEED_POOL_LABELS | ||
150 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
151 | index XXXXXXX..XXXXXXX 100644 | ||
152 | --- a/tcg/tcg.c | ||
153 | +++ b/tcg/tcg.c | ||
154 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
155 | * for passing off to ffi_call. | ||
156 | */ | ||
157 | want_align = true; | ||
158 | -#elif defined(TCG_TARGET_CALL_ALIGN_ARGS) | ||
159 | +#else | ||
160 | /* Some targets want aligned 64 bit args */ | ||
161 | - want_align = is_64bit; | ||
162 | + if (is_64bit) { | ||
163 | + want_align = TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN; | ||
164 | + } | ||
165 | #endif | ||
166 | |||
167 | if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) { | ||
168 | diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc | ||
169 | index XXXXXXX..XXXXXXX 100644 | ||
170 | --- a/tcg/ppc/tcg-target.c.inc | ||
171 | +++ b/tcg/ppc/tcg-target.c.inc | ||
172 | @@ -XXX,XX +XXX,XX @@ | ||
173 | #endif | ||
174 | |||
175 | #ifdef _CALL_SYSV | ||
176 | -# define TCG_TARGET_CALL_ALIGN_ARGS 1 | ||
177 | +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
178 | +#else | ||
179 | +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
180 | #endif | ||
181 | |||
182 | /* For some memory operations, we need a scratch that isn't R0. For the AIX | ||
183 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
184 | lo = lb->addrlo_reg; | ||
185 | hi = lb->addrhi_reg; | ||
186 | if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { | ||
187 | -#ifdef TCG_TARGET_CALL_ALIGN_ARGS | ||
188 | - arg |= 1; | ||
189 | -#endif | ||
190 | + arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); | ||
191 | tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); | ||
192 | tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); | ||
193 | } else { | ||
194 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
195 | lo = lb->addrlo_reg; | ||
196 | hi = lb->addrhi_reg; | ||
197 | if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { | ||
198 | -#ifdef TCG_TARGET_CALL_ALIGN_ARGS | ||
199 | - arg |= 1; | ||
200 | -#endif | ||
201 | + arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); | ||
202 | tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); | ||
203 | tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); | ||
204 | } else { | ||
205 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
206 | if (TCG_TARGET_REG_BITS == 32) { | ||
207 | switch (s_bits) { | ||
208 | case MO_64: | ||
209 | -#ifdef TCG_TARGET_CALL_ALIGN_ARGS | ||
210 | - arg |= 1; | ||
211 | -#endif | ||
212 | + arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); | ||
213 | tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); | ||
214 | /* FALLTHRU */ | ||
215 | case MO_32: | ||
216 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) | ||
217 | |||
218 | if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { | ||
219 | TCGReg arg = TCG_REG_R4; | ||
220 | -#ifdef TCG_TARGET_CALL_ALIGN_ARGS | ||
221 | - arg |= 1; | ||
222 | -#endif | ||
223 | + | ||
224 | + arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); | ||
225 | if (l->addrlo_reg != arg) { | ||
226 | tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg); | ||
227 | tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg); | ||
228 | -- | ||
229 | 2.34.1 | ||
230 | |||
231 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | For 64-bit hosts that had TCG_TARGET_EXTEND_ARGS, set | |
2 | TCG_TARGET_CALL_ARG_I32 to TCG_CALL_ARG_EXTEND. | ||
3 | Otherwise, use TCG_CALL_ARG_NORMAL. | ||
4 | |||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/aarch64/tcg-target.h | 1 + | ||
9 | tcg/arm/tcg-target.h | 1 + | ||
10 | tcg/i386/tcg-target.h | 1 + | ||
11 | tcg/loongarch64/tcg-target.h | 1 + | ||
12 | tcg/mips/tcg-target.h | 1 + | ||
13 | tcg/riscv/tcg-target.h | 1 + | ||
14 | tcg/s390x/tcg-target.h | 2 +- | ||
15 | tcg/sparc64/tcg-target.h | 2 +- | ||
16 | tcg/tci/tcg-target.h | 1 + | ||
17 | tcg/tcg.c | 42 ++++++++++++++++++------------------ | ||
18 | tcg/ppc/tcg-target.c.inc | 6 +++++- | ||
19 | 11 files changed, 35 insertions(+), 24 deletions(-) | ||
20 | |||
21 | diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/tcg/aarch64/tcg-target.h | ||
24 | +++ b/tcg/aarch64/tcg-target.h | ||
25 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
26 | #define TCG_REG_CALL_STACK TCG_REG_SP | ||
27 | #define TCG_TARGET_STACK_ALIGN 16 | ||
28 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
29 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
30 | #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
31 | |||
32 | /* optional instructions */ | ||
33 | diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/tcg/arm/tcg-target.h | ||
36 | +++ b/tcg/arm/tcg-target.h | ||
37 | @@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions; | ||
38 | /* used for function call generation */ | ||
39 | #define TCG_TARGET_STACK_ALIGN 8 | ||
40 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
41 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
42 | #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
43 | |||
44 | /* optional instructions */ | ||
45 | diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/tcg/i386/tcg-target.h | ||
48 | +++ b/tcg/i386/tcg-target.h | ||
49 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
50 | #else | ||
51 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
52 | #endif | ||
53 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
54 | #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
55 | |||
56 | extern bool have_bmi1; | ||
57 | diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/tcg/loongarch64/tcg-target.h | ||
60 | +++ b/tcg/loongarch64/tcg-target.h | ||
61 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
62 | #define TCG_REG_CALL_STACK TCG_REG_SP | ||
63 | #define TCG_TARGET_STACK_ALIGN 16 | ||
64 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
65 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
66 | #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
67 | |||
68 | /* optional instructions */ | ||
69 | diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h | ||
70 | index XXXXXXX..XXXXXXX 100644 | ||
71 | --- a/tcg/mips/tcg-target.h | ||
72 | +++ b/tcg/mips/tcg-target.h | ||
73 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
74 | # define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
75 | # define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
76 | #endif | ||
77 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
78 | |||
79 | /* MOVN/MOVZ instructions detection */ | ||
80 | #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \ | ||
81 | diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/tcg/riscv/tcg-target.h | ||
84 | +++ b/tcg/riscv/tcg-target.h | ||
85 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
86 | #define TCG_REG_CALL_STACK TCG_REG_SP | ||
87 | #define TCG_TARGET_STACK_ALIGN 16 | ||
88 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
89 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
90 | #if TCG_TARGET_REG_BITS == 32 | ||
91 | #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
92 | #else | ||
93 | diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/tcg/s390x/tcg-target.h | ||
96 | +++ b/tcg/s390x/tcg-target.h | ||
97 | @@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3]; | ||
98 | /* used for function call generation */ | ||
99 | #define TCG_TARGET_STACK_ALIGN 8 | ||
100 | #define TCG_TARGET_CALL_STACK_OFFSET 160 | ||
101 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND | ||
102 | #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
103 | |||
104 | -#define TCG_TARGET_EXTEND_ARGS 1 | ||
105 | #define TCG_TARGET_HAS_MEMORY_BSWAP 1 | ||
106 | |||
107 | #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) | ||
108 | diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h | ||
109 | index XXXXXXX..XXXXXXX 100644 | ||
110 | --- a/tcg/sparc64/tcg-target.h | ||
111 | +++ b/tcg/sparc64/tcg-target.h | ||
112 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
113 | #define TCG_TARGET_STACK_BIAS 2047 | ||
114 | #define TCG_TARGET_STACK_ALIGN 16 | ||
115 | #define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS) | ||
116 | -#define TCG_TARGET_EXTEND_ARGS 1 | ||
117 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND | ||
118 | #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
119 | |||
120 | #if defined(__VIS__) && __VIS__ >= 0x300 | ||
121 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | ||
122 | index XXXXXXX..XXXXXXX 100644 | ||
123 | --- a/tcg/tci/tcg-target.h | ||
124 | +++ b/tcg/tci/tcg-target.h | ||
125 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
126 | /* Used for function call generation. */ | ||
127 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
128 | #define TCG_TARGET_STACK_ALIGN 8 | ||
129 | +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
130 | #if TCG_TARGET_REG_BITS == 32 | ||
131 | # define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
132 | #else | ||
133 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
134 | index XXXXXXX..XXXXXXX 100644 | ||
135 | --- a/tcg/tcg.c | ||
136 | +++ b/tcg/tcg.c | ||
137 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
138 | } | ||
139 | #endif | ||
140 | |||
141 | -#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 | ||
142 | - for (i = 0; i < nargs; ++i) { | ||
143 | - int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
144 | - bool is_32bit = (argtype & ~1) == dh_typecode_i32; | ||
145 | - bool is_signed = argtype & 1; | ||
146 | + if (TCG_TARGET_CALL_ARG_I32 == TCG_CALL_ARG_EXTEND) { | ||
147 | + for (i = 0; i < nargs; ++i) { | ||
148 | + int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
149 | + bool is_32bit = (argtype & ~1) == dh_typecode_i32; | ||
150 | + bool is_signed = argtype & 1; | ||
151 | |||
152 | - if (is_32bit) { | ||
153 | - TCGv_i64 temp = tcg_temp_new_i64(); | ||
154 | - TCGv_i32 orig = temp_tcgv_i32(args[i]); | ||
155 | - if (is_signed) { | ||
156 | - tcg_gen_ext_i32_i64(temp, orig); | ||
157 | - } else { | ||
158 | - tcg_gen_extu_i32_i64(temp, orig); | ||
159 | + if (is_32bit) { | ||
160 | + TCGv_i64 temp = tcg_temp_new_i64(); | ||
161 | + TCGv_i32 orig = temp_tcgv_i32(args[i]); | ||
162 | + if (is_signed) { | ||
163 | + tcg_gen_ext_i32_i64(temp, orig); | ||
164 | + } else { | ||
165 | + tcg_gen_extu_i32_i64(temp, orig); | ||
166 | + } | ||
167 | + args[i] = tcgv_i64_temp(temp); | ||
168 | } | ||
169 | - args[i] = tcgv_i64_temp(temp); | ||
170 | } | ||
171 | } | ||
172 | -#endif /* TCG_TARGET_EXTEND_ARGS */ | ||
173 | |||
174 | op = tcg_emit_op(INDEX_op_call); | ||
175 | |||
176 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
177 | tcg_debug_assert(TCGOP_CALLI(op) == real_args); | ||
178 | tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); | ||
179 | |||
180 | -#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 | ||
181 | - for (i = 0; i < nargs; ++i) { | ||
182 | - int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
183 | - bool is_32bit = (argtype & ~1) == dh_typecode_i32; | ||
184 | + if (TCG_TARGET_CALL_ARG_I32 == TCG_CALL_ARG_EXTEND) { | ||
185 | + for (i = 0; i < nargs; ++i) { | ||
186 | + int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
187 | + bool is_32bit = (argtype & ~1) == dh_typecode_i32; | ||
188 | |||
189 | - if (is_32bit) { | ||
190 | - tcg_temp_free_internal(args[i]); | ||
191 | + if (is_32bit) { | ||
192 | + tcg_temp_free_internal(args[i]); | ||
193 | + } | ||
194 | } | ||
195 | } | ||
196 | -#endif /* TCG_TARGET_EXTEND_ARGS */ | ||
197 | } | ||
198 | |||
199 | static void tcg_reg_alloc_start(TCGContext *s) | ||
200 | diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc | ||
201 | index XXXXXXX..XXXXXXX 100644 | ||
202 | --- a/tcg/ppc/tcg-target.c.inc | ||
203 | +++ b/tcg/ppc/tcg-target.c.inc | ||
204 | @@ -XXX,XX +XXX,XX @@ | ||
205 | # endif | ||
206 | #endif | ||
207 | |||
208 | +#if TCG_TARGET_REG_BITS == 64 | ||
209 | +# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND | ||
210 | +#else | ||
211 | +# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
212 | +#endif | ||
213 | #ifdef _CALL_SYSV | ||
214 | # define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
215 | #else | ||
216 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count) | ||
217 | |||
218 | /* Parameters for function call generation, used in tcg.c. */ | ||
219 | #define TCG_TARGET_STACK_ALIGN 16 | ||
220 | -#define TCG_TARGET_EXTEND_ARGS 1 | ||
221 | |||
222 | #ifdef _CALL_AIX | ||
223 | # define LINK_AREA_SIZE (6 * SZR) | ||
224 | -- | ||
225 | 2.34.1 | ||
226 | |||
227 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Change 32-bit tci TCG_TARGET_CALL_ARG_I32 to TCG_CALL_ARG_EVEN, to | ||
2 | force 32-bit values to be aligned to 64-bit. With a small reorg | ||
3 | to the argument processing loop, this neatly replaces an ifdef for | ||
4 | CONFIG_TCG_INTERPRETER. | ||
1 | 5 | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | tcg/tci/tcg-target.h | 3 +- | ||
10 | tcg/tcg.c | 70 ++++++++++++++++++++++++++++---------------- | ||
11 | 2 files changed, 47 insertions(+), 26 deletions(-) | ||
12 | |||
13 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tcg/tci/tcg-target.h | ||
16 | +++ b/tcg/tci/tcg-target.h | ||
17 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
18 | /* Used for function call generation. */ | ||
19 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | ||
20 | #define TCG_TARGET_STACK_ALIGN 8 | ||
21 | -#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
22 | #if TCG_TARGET_REG_BITS == 32 | ||
23 | +# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EVEN | ||
24 | # define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN | ||
25 | #else | ||
26 | +# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL | ||
27 | # define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL | ||
28 | #endif | ||
29 | |||
30 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/tcg/tcg.c | ||
33 | +++ b/tcg/tcg.c | ||
34 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
35 | real_args = 0; | ||
36 | for (i = 0; i < nargs; i++) { | ||
37 | int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
38 | - bool is_64bit = (argtype & ~1) == dh_typecode_i64; | ||
39 | - bool want_align = false; | ||
40 | + TCGCallArgumentKind kind; | ||
41 | + TCGType type; | ||
42 | |||
43 | -#if defined(CONFIG_TCG_INTERPRETER) | ||
44 | - /* | ||
45 | - * Align all arguments, so that they land in predictable places | ||
46 | - * for passing off to ffi_call. | ||
47 | - */ | ||
48 | - want_align = true; | ||
49 | -#else | ||
50 | - /* Some targets want aligned 64 bit args */ | ||
51 | - if (is_64bit) { | ||
52 | - want_align = TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN; | ||
53 | - } | ||
54 | -#endif | ||
55 | - | ||
56 | - if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) { | ||
57 | - op->args[pi++] = TCG_CALL_DUMMY_ARG; | ||
58 | - real_args++; | ||
59 | + switch (argtype) { | ||
60 | + case dh_typecode_i32: | ||
61 | + case dh_typecode_s32: | ||
62 | + type = TCG_TYPE_I32; | ||
63 | + break; | ||
64 | + case dh_typecode_i64: | ||
65 | + case dh_typecode_s64: | ||
66 | + type = TCG_TYPE_I64; | ||
67 | + break; | ||
68 | + case dh_typecode_ptr: | ||
69 | + type = TCG_TYPE_PTR; | ||
70 | + break; | ||
71 | + default: | ||
72 | + g_assert_not_reached(); | ||
73 | } | ||
74 | |||
75 | - if (TCG_TARGET_REG_BITS < 64 && is_64bit) { | ||
76 | + switch (type) { | ||
77 | + case TCG_TYPE_I32: | ||
78 | + kind = TCG_TARGET_CALL_ARG_I32; | ||
79 | + break; | ||
80 | + case TCG_TYPE_I64: | ||
81 | + kind = TCG_TARGET_CALL_ARG_I64; | ||
82 | + break; | ||
83 | + default: | ||
84 | + g_assert_not_reached(); | ||
85 | + } | ||
86 | + | ||
87 | + switch (kind) { | ||
88 | + case TCG_CALL_ARG_EVEN: | ||
89 | + if (real_args & 1) { | ||
90 | + op->args[pi++] = TCG_CALL_DUMMY_ARG; | ||
91 | + real_args++; | ||
92 | + } | ||
93 | + /* fall through */ | ||
94 | + case TCG_CALL_ARG_NORMAL: | ||
95 | + if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { | ||
96 | + op->args[pi++] = temp_arg(args[i]); | ||
97 | + op->args[pi++] = temp_arg(args[i] + 1); | ||
98 | + real_args += 2; | ||
99 | + break; | ||
100 | + } | ||
101 | op->args[pi++] = temp_arg(args[i]); | ||
102 | - op->args[pi++] = temp_arg(args[i] + 1); | ||
103 | - real_args += 2; | ||
104 | - continue; | ||
105 | + real_args++; | ||
106 | + break; | ||
107 | + default: | ||
108 | + g_assert_not_reached(); | ||
109 | } | ||
110 | - | ||
111 | - op->args[pi++] = temp_arg(args[i]); | ||
112 | - real_args++; | ||
113 | } | ||
114 | op->args[pi++] = (uintptr_t)func; | ||
115 | op->args[pi++] = (uintptr_t)info; | ||
116 | -- | ||
117 | 2.34.1 | ||
118 | |||
119 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The function pointer is immediately after the output and input | ||
2 | operands; no need to search. | ||
1 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | accel/tcg/plugin-gen.c | 29 +++++++++++------------------ | ||
8 | 1 file changed, 11 insertions(+), 18 deletions(-) | ||
9 | |||
10 | diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/accel/tcg/plugin-gen.c | ||
13 | +++ b/accel/tcg/plugin-gen.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static TCGOp *copy_st_ptr(TCGOp **begin_op, TCGOp *op) | ||
15 | static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func, | ||
16 | void *func, int *cb_idx) | ||
17 | { | ||
18 | + TCGOp *old_op; | ||
19 | + int func_idx; | ||
20 | + | ||
21 | /* copy all ops until the call */ | ||
22 | do { | ||
23 | op = copy_op_nocheck(begin_op, op); | ||
24 | } while (op->opc != INDEX_op_call); | ||
25 | |||
26 | /* fill in the op call */ | ||
27 | - op->param1 = (*begin_op)->param1; | ||
28 | - op->param2 = (*begin_op)->param2; | ||
29 | + old_op = *begin_op; | ||
30 | + TCGOP_CALLI(op) = TCGOP_CALLI(old_op); | ||
31 | + TCGOP_CALLO(op) = TCGOP_CALLO(old_op); | ||
32 | tcg_debug_assert(op->life == 0); | ||
33 | - if (*cb_idx == -1) { | ||
34 | - int i; | ||
35 | |||
36 | - /* | ||
37 | - * Instead of working out the position of the callback in args[], just | ||
38 | - * look for @empty_func, since it should be a unique pointer. | ||
39 | - */ | ||
40 | - for (i = 0; i < MAX_OPC_PARAM_ARGS; i++) { | ||
41 | - if ((uintptr_t)(*begin_op)->args[i] == (uintptr_t)empty_func) { | ||
42 | - *cb_idx = i; | ||
43 | - break; | ||
44 | - } | ||
45 | - } | ||
46 | - tcg_debug_assert(i < MAX_OPC_PARAM_ARGS); | ||
47 | - } | ||
48 | - op->args[*cb_idx] = (uintptr_t)func; | ||
49 | - op->args[*cb_idx + 1] = (*begin_op)->args[*cb_idx + 1]; | ||
50 | + func_idx = TCGOP_CALLO(op) + TCGOP_CALLI(op); | ||
51 | + *cb_idx = func_idx; | ||
52 | + | ||
53 | + op->args[func_idx] = (uintptr_t)func; | ||
54 | + op->args[func_idx + 1] = old_op->args[func_idx + 1]; | ||
55 | |||
56 | return op; | ||
57 | } | ||
58 | -- | ||
59 | 2.34.1 | ||
60 | |||
61 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | We copied all of the arguments in copy_op_nocheck. | ||
2 | We only need to replace the one argument that we change. | ||
1 | 3 | ||
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | accel/tcg/plugin-gen.c | 2 -- | ||
9 | 1 file changed, 2 deletions(-) | ||
10 | |||
11 | diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/accel/tcg/plugin-gen.c | ||
14 | +++ b/accel/tcg/plugin-gen.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func, | ||
16 | |||
17 | func_idx = TCGOP_CALLO(op) + TCGOP_CALLI(op); | ||
18 | *cb_idx = func_idx; | ||
19 | - | ||
20 | op->args[func_idx] = (uintptr_t)func; | ||
21 | - op->args[func_idx + 1] = old_op->args[func_idx + 1]; | ||
22 | |||
23 | return op; | ||
24 | } | ||
25 | -- | ||
26 | 2.34.1 | ||
27 | |||
28 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Better to re-use the existing function for copying ops. | ||
1 | 2 | ||
3 | Acked-by: Alex Bennée <alex.bennee@linaro.org> | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | accel/tcg/plugin-gen.c | 16 ++++++++-------- | ||
8 | 1 file changed, 8 insertions(+), 8 deletions(-) | ||
9 | |||
10 | diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/accel/tcg/plugin-gen.c | ||
13 | +++ b/accel/tcg/plugin-gen.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static TCGOp *append_udata_cb(const struct qemu_plugin_dyn_cb *cb, | ||
15 | op = copy_const_ptr(&begin_op, op, cb->userp); | ||
16 | |||
17 | /* copy the ld_i32, but note that we only have to copy it once */ | ||
18 | - begin_op = QTAILQ_NEXT(begin_op, link); | ||
19 | - tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32); | ||
20 | if (*cb_idx == -1) { | ||
21 | - op = tcg_op_insert_after(tcg_ctx, op, INDEX_op_ld_i32); | ||
22 | - memcpy(op->args, begin_op->args, sizeof(op->args)); | ||
23 | + op = copy_op(&begin_op, op, INDEX_op_ld_i32); | ||
24 | + } else { | ||
25 | + begin_op = QTAILQ_NEXT(begin_op, link); | ||
26 | + tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32); | ||
27 | } | ||
28 | |||
29 | /* call */ | ||
30 | @@ -XXX,XX +XXX,XX @@ static TCGOp *append_mem_cb(const struct qemu_plugin_dyn_cb *cb, | ||
31 | op = copy_const_ptr(&begin_op, op, cb->userp); | ||
32 | |||
33 | /* copy the ld_i32, but note that we only have to copy it once */ | ||
34 | - begin_op = QTAILQ_NEXT(begin_op, link); | ||
35 | - tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32); | ||
36 | if (*cb_idx == -1) { | ||
37 | - op = tcg_op_insert_after(tcg_ctx, op, INDEX_op_ld_i32); | ||
38 | - memcpy(op->args, begin_op->args, sizeof(op->args)); | ||
39 | + op = copy_op(&begin_op, op, INDEX_op_ld_i32); | ||
40 | + } else { | ||
41 | + begin_op = QTAILQ_NEXT(begin_op, link); | ||
42 | + tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32); | ||
43 | } | ||
44 | |||
45 | /* extu_tl_i64 */ | ||
46 | -- | ||
47 | 2.34.1 | ||
48 | |||
49 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | |
2 | |||
3 | In order to have variable size allocated TCGOp, pass the number | ||
4 | of arguments we use (and would allocate) up to tcg_op_alloc(). | ||
5 | |||
6 | This alters tcg_emit_op(), tcg_op_insert_before() and | ||
7 | tcg_op_insert_after() prototypes. | ||
8 | |||
9 | In tcg_op_alloc() ensure the number of arguments is in range. | ||
10 | |||
11 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | [PMD: Extracted from bigger patch] | ||
14 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
15 | Message-Id: <20221218211832.73312-2-philmd@linaro.org> | ||
16 | --- | ||
17 | include/tcg/tcg-op.h | 2 +- | ||
18 | include/tcg/tcg.h | 8 +++++--- | ||
19 | accel/tcg/plugin-gen.c | 5 ++++- | ||
20 | tcg/optimize.c | 4 ++-- | ||
21 | tcg/tcg-op-vec.c | 8 ++++---- | ||
22 | tcg/tcg-op.c | 12 ++++++------ | ||
23 | tcg/tcg.c | 30 +++++++++++++++++------------- | ||
24 | 7 files changed, 39 insertions(+), 30 deletions(-) | ||
25 | |||
26 | diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/include/tcg/tcg-op.h | ||
29 | +++ b/include/tcg/tcg-op.h | ||
30 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_plugin_cb_start(unsigned from, unsigned type, | ||
31 | |||
32 | static inline void tcg_gen_plugin_cb_end(void) | ||
33 | { | ||
34 | - tcg_emit_op(INDEX_op_plugin_cb_end); | ||
35 | + tcg_emit_op(INDEX_op_plugin_cb_end, 0); | ||
36 | } | ||
37 | |||
38 | #if TARGET_LONG_BITS == 32 | ||
39 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/include/tcg/tcg.h | ||
42 | +++ b/include/tcg/tcg.h | ||
43 | @@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op); | ||
44 | |||
45 | void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args); | ||
46 | |||
47 | -TCGOp *tcg_emit_op(TCGOpcode opc); | ||
48 | +TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs); | ||
49 | void tcg_op_remove(TCGContext *s, TCGOp *op); | ||
50 | -TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc); | ||
51 | -TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc); | ||
52 | +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, | ||
53 | + TCGOpcode opc, unsigned nargs); | ||
54 | +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, | ||
55 | + TCGOpcode opc, unsigned nargs); | ||
56 | |||
57 | /** | ||
58 | * tcg_remove_ops_after: | ||
59 | diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/accel/tcg/plugin-gen.c | ||
62 | +++ b/accel/tcg/plugin-gen.c | ||
63 | @@ -XXX,XX +XXX,XX @@ static TCGOp *rm_ops(TCGOp *op) | ||
64 | |||
65 | static TCGOp *copy_op_nocheck(TCGOp **begin_op, TCGOp *op) | ||
66 | { | ||
67 | + unsigned nargs = ARRAY_SIZE(op->args); | ||
68 | + | ||
69 | *begin_op = QTAILQ_NEXT(*begin_op, link); | ||
70 | tcg_debug_assert(*begin_op); | ||
71 | - op = tcg_op_insert_after(tcg_ctx, op, (*begin_op)->opc); | ||
72 | + op = tcg_op_insert_after(tcg_ctx, op, (*begin_op)->opc, nargs); | ||
73 | memcpy(op->args, (*begin_op)->args, sizeof(op->args)); | ||
74 | + | ||
75 | return op; | ||
76 | } | ||
77 | |||
78 | diff --git a/tcg/optimize.c b/tcg/optimize.c | ||
79 | index XXXXXXX..XXXXXXX 100644 | ||
80 | --- a/tcg/optimize.c | ||
81 | +++ b/tcg/optimize.c | ||
82 | @@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) | ||
83 | rh = op->args[1]; | ||
84 | |||
85 | /* The proper opcode is supplied by tcg_opt_gen_mov. */ | ||
86 | - op2 = tcg_op_insert_before(ctx->tcg, op, 0); | ||
87 | + op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); | ||
88 | |||
89 | tcg_opt_gen_movi(ctx, op, rl, al); | ||
90 | tcg_opt_gen_movi(ctx, op2, rh, ah); | ||
91 | @@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) | ||
92 | rh = op->args[1]; | ||
93 | |||
94 | /* The proper opcode is supplied by tcg_opt_gen_mov. */ | ||
95 | - op2 = tcg_op_insert_before(ctx->tcg, op, 0); | ||
96 | + op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); | ||
97 | |||
98 | tcg_opt_gen_movi(ctx, op, rl, l); | ||
99 | tcg_opt_gen_movi(ctx, op2, rh, h); | ||
100 | diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c | ||
101 | index XXXXXXX..XXXXXXX 100644 | ||
102 | --- a/tcg/tcg-op-vec.c | ||
103 | +++ b/tcg/tcg-op-vec.c | ||
104 | @@ -XXX,XX +XXX,XX @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list, | ||
105 | |||
106 | void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) | ||
107 | { | ||
108 | - TCGOp *op = tcg_emit_op(opc); | ||
109 | + TCGOp *op = tcg_emit_op(opc, 2); | ||
110 | TCGOP_VECL(op) = type - TCG_TYPE_V64; | ||
111 | TCGOP_VECE(op) = vece; | ||
112 | op->args[0] = r; | ||
113 | @@ -XXX,XX +XXX,XX @@ void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) | ||
114 | void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, | ||
115 | TCGArg r, TCGArg a, TCGArg b) | ||
116 | { | ||
117 | - TCGOp *op = tcg_emit_op(opc); | ||
118 | + TCGOp *op = tcg_emit_op(opc, 3); | ||
119 | TCGOP_VECL(op) = type - TCG_TYPE_V64; | ||
120 | TCGOP_VECE(op) = vece; | ||
121 | op->args[0] = r; | ||
122 | @@ -XXX,XX +XXX,XX @@ void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, | ||
123 | void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, | ||
124 | TCGArg r, TCGArg a, TCGArg b, TCGArg c) | ||
125 | { | ||
126 | - TCGOp *op = tcg_emit_op(opc); | ||
127 | + TCGOp *op = tcg_emit_op(opc, 4); | ||
128 | TCGOP_VECL(op) = type - TCG_TYPE_V64; | ||
129 | TCGOP_VECE(op) = vece; | ||
130 | op->args[0] = r; | ||
131 | @@ -XXX,XX +XXX,XX @@ void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, | ||
132 | static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, | ||
133 | TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e) | ||
134 | { | ||
135 | - TCGOp *op = tcg_emit_op(opc); | ||
136 | + TCGOp *op = tcg_emit_op(opc, 6); | ||
137 | TCGOP_VECL(op) = type - TCG_TYPE_V64; | ||
138 | TCGOP_VECE(op) = vece; | ||
139 | op->args[0] = r; | ||
140 | diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c | ||
141 | index XXXXXXX..XXXXXXX 100644 | ||
142 | --- a/tcg/tcg-op.c | ||
143 | +++ b/tcg/tcg-op.c | ||
144 | @@ -XXX,XX +XXX,XX @@ | ||
145 | |||
146 | void tcg_gen_op1(TCGOpcode opc, TCGArg a1) | ||
147 | { | ||
148 | - TCGOp *op = tcg_emit_op(opc); | ||
149 | + TCGOp *op = tcg_emit_op(opc, 1); | ||
150 | op->args[0] = a1; | ||
151 | } | ||
152 | |||
153 | void tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2) | ||
154 | { | ||
155 | - TCGOp *op = tcg_emit_op(opc); | ||
156 | + TCGOp *op = tcg_emit_op(opc, 2); | ||
157 | op->args[0] = a1; | ||
158 | op->args[1] = a2; | ||
159 | } | ||
160 | |||
161 | void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3) | ||
162 | { | ||
163 | - TCGOp *op = tcg_emit_op(opc); | ||
164 | + TCGOp *op = tcg_emit_op(opc, 3); | ||
165 | op->args[0] = a1; | ||
166 | op->args[1] = a2; | ||
167 | op->args[2] = a3; | ||
168 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3) | ||
169 | |||
170 | void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4) | ||
171 | { | ||
172 | - TCGOp *op = tcg_emit_op(opc); | ||
173 | + TCGOp *op = tcg_emit_op(opc, 4); | ||
174 | op->args[0] = a1; | ||
175 | op->args[1] = a2; | ||
176 | op->args[2] = a3; | ||
177 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4) | ||
178 | void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, | ||
179 | TCGArg a4, TCGArg a5) | ||
180 | { | ||
181 | - TCGOp *op = tcg_emit_op(opc); | ||
182 | + TCGOp *op = tcg_emit_op(opc, 5); | ||
183 | op->args[0] = a1; | ||
184 | op->args[1] = a2; | ||
185 | op->args[2] = a3; | ||
186 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, | ||
187 | void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, | ||
188 | TCGArg a4, TCGArg a5, TCGArg a6) | ||
189 | { | ||
190 | - TCGOp *op = tcg_emit_op(opc); | ||
191 | + TCGOp *op = tcg_emit_op(opc, 6); | ||
192 | op->args[0] = a1; | ||
193 | op->args[1] = a2; | ||
194 | op->args[2] = a3; | ||
195 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
196 | index XXXXXXX..XXXXXXX 100644 | ||
197 | --- a/tcg/tcg.c | ||
198 | +++ b/tcg/tcg.c | ||
199 | @@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op) | ||
200 | and endian swap in tcg_reg_alloc_call(). */ | ||
201 | void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
202 | { | ||
203 | - int i, real_args, nb_rets, pi; | ||
204 | + int i, real_args, nb_rets, pi, max_args; | ||
205 | unsigned typemask; | ||
206 | const TCGHelperInfo *info; | ||
207 | TCGOp *op; | ||
208 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
209 | } | ||
210 | } | ||
211 | |||
212 | - op = tcg_emit_op(INDEX_op_call); | ||
213 | + max_args = ARRAY_SIZE(op->args); | ||
214 | + op = tcg_emit_op(INDEX_op_call, max_args); | ||
215 | |||
216 | pi = 0; | ||
217 | if (ret != NULL) { | ||
218 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
219 | |||
220 | /* Make sure the fields didn't overflow. */ | ||
221 | tcg_debug_assert(TCGOP_CALLI(op) == real_args); | ||
222 | - tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); | ||
223 | + tcg_debug_assert(pi <= max_args); | ||
224 | |||
225 | if (TCG_TARGET_CALL_ARG_I32 == TCG_CALL_ARG_EXTEND) { | ||
226 | for (i = 0; i < nargs; ++i) { | ||
227 | @@ -XXX,XX +XXX,XX @@ void tcg_remove_ops_after(TCGOp *op) | ||
228 | } | ||
229 | } | ||
230 | |||
231 | -static TCGOp *tcg_op_alloc(TCGOpcode opc) | ||
232 | +static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) | ||
233 | { | ||
234 | TCGContext *s = tcg_ctx; | ||
235 | TCGOp *op; | ||
236 | |||
237 | + assert(nargs < ARRAY_SIZE(op->args)); | ||
238 | if (likely(QTAILQ_EMPTY(&s->free_ops))) { | ||
239 | op = tcg_malloc(sizeof(TCGOp)); | ||
240 | } else { | ||
241 | @@ -XXX,XX +XXX,XX @@ static TCGOp *tcg_op_alloc(TCGOpcode opc) | ||
242 | return op; | ||
243 | } | ||
244 | |||
245 | -TCGOp *tcg_emit_op(TCGOpcode opc) | ||
246 | +TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) | ||
247 | { | ||
248 | - TCGOp *op = tcg_op_alloc(opc); | ||
249 | + TCGOp *op = tcg_op_alloc(opc, nargs); | ||
250 | QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); | ||
251 | return op; | ||
252 | } | ||
253 | |||
254 | -TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) | ||
255 | +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, | ||
256 | + TCGOpcode opc, unsigned nargs) | ||
257 | { | ||
258 | - TCGOp *new_op = tcg_op_alloc(opc); | ||
259 | + TCGOp *new_op = tcg_op_alloc(opc, nargs); | ||
260 | QTAILQ_INSERT_BEFORE(old_op, new_op, link); | ||
261 | return new_op; | ||
262 | } | ||
263 | |||
264 | -TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) | ||
265 | +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, | ||
266 | + TCGOpcode opc, unsigned nargs) | ||
267 | { | ||
268 | - TCGOp *new_op = tcg_op_alloc(opc); | ||
269 | + TCGOp *new_op = tcg_op_alloc(opc, nargs); | ||
270 | QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); | ||
271 | return new_op; | ||
272 | } | ||
273 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) | ||
274 | TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 | ||
275 | ? INDEX_op_ld_i32 | ||
276 | : INDEX_op_ld_i64); | ||
277 | - TCGOp *lop = tcg_op_insert_before(s, op, lopc); | ||
278 | + TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); | ||
279 | |||
280 | lop->args[0] = temp_arg(dir_ts); | ||
281 | lop->args[1] = temp_arg(arg_ts->mem_base); | ||
282 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) | ||
283 | TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 | ||
284 | ? INDEX_op_st_i32 | ||
285 | : INDEX_op_st_i64); | ||
286 | - TCGOp *sop = tcg_op_insert_after(s, op, sopc); | ||
287 | + TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); | ||
288 | TCGTemp *out_ts = dir_ts; | ||
289 | |||
290 | if (IS_DEAD_ARG(0)) { | ||
291 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) | ||
292 | TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 | ||
293 | ? INDEX_op_st_i32 | ||
294 | : INDEX_op_st_i64); | ||
295 | - TCGOp *sop = tcg_op_insert_after(s, op, sopc); | ||
296 | + TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); | ||
297 | |||
298 | sop->args[0] = temp_arg(dir_ts); | ||
299 | sop->args[1] = temp_arg(arg_ts->mem_base); | ||
300 | -- | ||
301 | 2.34.1 | ||
302 | |||
303 | diff view generated by jsdifflib |
1 | From: Taylor Simpson <tsimpson@quicinc.com> | 1 | We have been allocating a worst case number of arguments |
---|---|---|---|
2 | to support calls. Instead, allow the size to vary. | ||
3 | By default leave space for 4 args, to maximize reuse, | ||
4 | but allow calls to increase the number of args to 32. | ||
2 | 5 | ||
3 | Currently, helpers can only take up to 6 arguments. This patch adds the | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | capability for up to 7 arguments. I have tested it with the Hexagon port | 7 | [PMD: Split patch in two] |
5 | that I am preparing for submission. | 8 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
9 | Message-Id: <20221218211832.73312-3-philmd@linaro.org> | ||
10 | --- | ||
11 | include/exec/helper-head.h | 2 -- | ||
12 | include/tcg/tcg.h | 46 +++++++++++++------------------------- | ||
13 | accel/tcg/plugin-gen.c | 10 ++++----- | ||
14 | tcg/tcg.c | 35 +++++++++++++++++++++-------- | ||
15 | 4 files changed, 47 insertions(+), 46 deletions(-) | ||
6 | 16 | ||
7 | Signed-off-by: Taylor Simpson <tsimpson@quicinc.com> | ||
8 | Message-Id: <1580942510-2820-1-git-send-email-tsimpson@quicinc.com> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | ||
11 | include/exec/helper-gen.h | 13 +++++++++++++ | ||
12 | include/exec/helper-head.h | 2 ++ | ||
13 | include/exec/helper-proto.h | 6 ++++++ | ||
14 | include/exec/helper-tcg.h | 7 +++++++ | ||
15 | 4 files changed, 28 insertions(+) | ||
16 | |||
17 | diff --git a/include/exec/helper-gen.h b/include/exec/helper-gen.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/include/exec/helper-gen.h | ||
20 | +++ b/include/exec/helper-gen.h | ||
21 | @@ -XXX,XX +XXX,XX @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ | ||
22 | tcg_gen_callN(HELPER(name), dh_retvar(ret), 6, args); \ | ||
23 | } | ||
24 | |||
25 | +#define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7)\ | ||
26 | +static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ | ||
27 | + dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3), \ | ||
28 | + dh_arg_decl(t4, 4), dh_arg_decl(t5, 5), dh_arg_decl(t6, 6), \ | ||
29 | + dh_arg_decl(t7, 7)) \ | ||
30 | +{ \ | ||
31 | + TCGTemp *args[7] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3), \ | ||
32 | + dh_arg(t4, 4), dh_arg(t5, 5), dh_arg(t6, 6), \ | ||
33 | + dh_arg(t7, 7) }; \ | ||
34 | + tcg_gen_callN(HELPER(name), dh_retvar(ret), 7, args); \ | ||
35 | +} | ||
36 | + | ||
37 | #include "helper.h" | ||
38 | #include "trace/generated-helpers.h" | ||
39 | #include "trace/generated-helpers-wrappers.h" | ||
40 | @@ -XXX,XX +XXX,XX @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ | ||
41 | #undef DEF_HELPER_FLAGS_4 | ||
42 | #undef DEF_HELPER_FLAGS_5 | ||
43 | #undef DEF_HELPER_FLAGS_6 | ||
44 | +#undef DEF_HELPER_FLAGS_7 | ||
45 | #undef GEN_HELPER | ||
46 | |||
47 | #endif /* HELPER_GEN_H */ | ||
48 | diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h | 17 | diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h |
49 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
50 | --- a/include/exec/helper-head.h | 19 | --- a/include/exec/helper-head.h |
51 | +++ b/include/exec/helper-head.h | 20 | +++ b/include/exec/helper-head.h |
52 | @@ -XXX,XX +XXX,XX @@ | 21 | @@ -XXX,XX +XXX,XX @@ |
53 | DEF_HELPER_FLAGS_5(name, 0, ret, t1, t2, t3, t4, t5) | 22 | #define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \ |
54 | #define DEF_HELPER_6(name, ret, t1, t2, t3, t4, t5, t6) \ | 23 | DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7) |
55 | DEF_HELPER_FLAGS_6(name, 0, ret, t1, t2, t3, t4, t5, t6) | 24 | |
56 | +#define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \ | 25 | -/* MAX_OPC_PARAM_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */ |
57 | + DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7) | 26 | - |
58 | 27 | #endif /* EXEC_HELPER_HEAD_H */ | |
59 | /* MAX_OPC_PARAM_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */ | 28 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h |
60 | |||
61 | diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h | ||
62 | index XXXXXXX..XXXXXXX 100644 | 29 | index XXXXXXX..XXXXXXX 100644 |
63 | --- a/include/exec/helper-proto.h | 30 | --- a/include/tcg/tcg.h |
64 | +++ b/include/exec/helper-proto.h | 31 | +++ b/include/tcg/tcg.h |
65 | @@ -XXX,XX +XXX,XX @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ | 32 | @@ -XXX,XX +XXX,XX @@ |
66 | dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ | 33 | /* XXX: make safe guess about sizes */ |
67 | dh_ctype(t4), dh_ctype(t5), dh_ctype(t6)); | 34 | #define MAX_OP_PER_INSTR 266 |
68 | 35 | ||
69 | +#define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7) \ | 36 | -#if HOST_LONG_BITS == 32 |
70 | +dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ | 37 | -#define MAX_OPC_PARAM_PER_ARG 2 |
71 | + dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \ | 38 | -#else |
72 | + dh_ctype(t7)); | 39 | -#define MAX_OPC_PARAM_PER_ARG 1 |
40 | -#endif | ||
41 | -#define MAX_OPC_PARAM_IARGS 7 | ||
42 | -#define MAX_OPC_PARAM_OARGS 1 | ||
43 | -#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS) | ||
44 | - | ||
45 | -/* A Call op needs up to 4 + 2N parameters on 32-bit archs, | ||
46 | - * and up to 4 + N parameters on 64-bit archs | ||
47 | - * (N = number of input arguments + output arguments). */ | ||
48 | -#define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS)) | ||
49 | - | ||
50 | #define CPU_TEMP_BUF_NLONGS 128 | ||
51 | #define TCG_STATIC_FRAME_SIZE (CPU_TEMP_BUF_NLONGS * sizeof(long)) | ||
52 | |||
53 | @@ -XXX,XX +XXX,XX @@ typedef struct TCGTempSet { | ||
54 | unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)]; | ||
55 | } TCGTempSet; | ||
56 | |||
57 | -/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding, | ||
58 | - this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands. | ||
59 | - There are never more than 2 outputs, which means that we can store all | ||
60 | - dead + sync data within 16 bits. */ | ||
61 | -#define DEAD_ARG 4 | ||
62 | -#define SYNC_ARG 1 | ||
63 | -typedef uint16_t TCGLifeData; | ||
64 | +/* | ||
65 | + * With 1 128-bit output, a 32-bit host requires 4 output parameters, | ||
66 | + * which leaves a maximum of 28 other slots. Which is enough for 7 | ||
67 | + * 128-bit operands. | ||
68 | + */ | ||
69 | +#define DEAD_ARG (1 << 4) | ||
70 | +#define SYNC_ARG (1 << 0) | ||
71 | +typedef uint32_t TCGLifeData; | ||
72 | |||
73 | -/* The layout here is designed to avoid a bitfield crossing of | ||
74 | - a 32-bit boundary, which would cause GCC to add extra padding. */ | ||
75 | typedef struct TCGOp { | ||
76 | - TCGOpcode opc : 8; /* 8 */ | ||
77 | + TCGOpcode opc : 8; | ||
78 | + unsigned nargs : 8; | ||
79 | |||
80 | /* Parameters for this opcode. See below. */ | ||
81 | - unsigned param1 : 4; /* 12 */ | ||
82 | - unsigned param2 : 4; /* 16 */ | ||
83 | + unsigned param1 : 8; | ||
84 | + unsigned param2 : 8; | ||
85 | |||
86 | /* Lifetime data of the operands. */ | ||
87 | - unsigned life : 16; /* 32 */ | ||
88 | + TCGLifeData life; | ||
89 | |||
90 | /* Next and previous opcodes. */ | ||
91 | QTAILQ_ENTRY(TCGOp) link; | ||
92 | |||
93 | - /* Arguments for the opcode. */ | ||
94 | - TCGArg args[MAX_OPC_PARAM]; | ||
95 | - | ||
96 | /* Register preferences for the output(s). */ | ||
97 | TCGRegSet output_pref[2]; | ||
73 | + | 98 | + |
74 | #include "helper.h" | 99 | + /* Arguments for the opcode. */ |
75 | #include "trace/generated-helpers.h" | 100 | + TCGArg args[]; |
76 | #include "tcg-runtime.h" | 101 | } TCGOp; |
77 | @@ -XXX,XX +XXX,XX @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ | 102 | |
78 | #undef DEF_HELPER_FLAGS_4 | 103 | #define TCGOP_CALLI(X) (X)->param1 |
79 | #undef DEF_HELPER_FLAGS_5 | 104 | diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c |
80 | #undef DEF_HELPER_FLAGS_6 | ||
81 | +#undef DEF_HELPER_FLAGS_7 | ||
82 | |||
83 | #endif /* HELPER_PROTO_H */ | ||
84 | diff --git a/include/exec/helper-tcg.h b/include/exec/helper-tcg.h | ||
85 | index XXXXXXX..XXXXXXX 100644 | 105 | index XXXXXXX..XXXXXXX 100644 |
86 | --- a/include/exec/helper-tcg.h | 106 | --- a/accel/tcg/plugin-gen.c |
87 | +++ b/include/exec/helper-tcg.h | 107 | +++ b/accel/tcg/plugin-gen.c |
88 | @@ -XXX,XX +XXX,XX @@ | 108 | @@ -XXX,XX +XXX,XX @@ static TCGOp *rm_ops(TCGOp *op) |
89 | | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ | 109 | |
90 | | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) }, | 110 | static TCGOp *copy_op_nocheck(TCGOp **begin_op, TCGOp *op) |
91 | 111 | { | |
92 | +#define DEF_HELPER_FLAGS_7(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6, t7) \ | 112 | - unsigned nargs = ARRAY_SIZE(op->args); |
93 | + { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ | 113 | + TCGOp *old_op = QTAILQ_NEXT(*begin_op, link); |
94 | + .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | 114 | + unsigned nargs = old_op->nargs; |
95 | + | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ | 115 | |
96 | + | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) | dh_sizemask(t7, 7) }, | 116 | - *begin_op = QTAILQ_NEXT(*begin_op, link); |
117 | - tcg_debug_assert(*begin_op); | ||
118 | - op = tcg_op_insert_after(tcg_ctx, op, (*begin_op)->opc, nargs); | ||
119 | - memcpy(op->args, (*begin_op)->args, sizeof(op->args)); | ||
120 | + *begin_op = old_op; | ||
121 | + op = tcg_op_insert_after(tcg_ctx, op, old_op->opc, nargs); | ||
122 | + memcpy(op->args, old_op->args, sizeof(op->args[0]) * nargs); | ||
123 | |||
124 | return op; | ||
125 | } | ||
126 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
127 | index XXXXXXX..XXXXXXX 100644 | ||
128 | --- a/tcg/tcg.c | ||
129 | +++ b/tcg/tcg.c | ||
130 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
131 | } | ||
132 | } | ||
133 | |||
134 | - max_args = ARRAY_SIZE(op->args); | ||
135 | + /* | ||
136 | + * A Call op needs up to 4 + 2N parameters on 32-bit archs, | ||
137 | + * and up to 4 + N parameters on 64-bit archs | ||
138 | + * (N = number of input arguments + output arguments). | ||
139 | + */ | ||
140 | + max_args = (64 / TCG_TARGET_REG_BITS) * nargs + 4; | ||
141 | op = tcg_emit_op(INDEX_op_call, max_args); | ||
142 | |||
143 | pi = 0; | ||
144 | @@ -XXX,XX +XXX,XX @@ void tcg_remove_ops_after(TCGOp *op) | ||
145 | static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) | ||
146 | { | ||
147 | TCGContext *s = tcg_ctx; | ||
148 | - TCGOp *op; | ||
149 | + TCGOp *op = NULL; | ||
150 | |||
151 | - assert(nargs < ARRAY_SIZE(op->args)); | ||
152 | - if (likely(QTAILQ_EMPTY(&s->free_ops))) { | ||
153 | - op = tcg_malloc(sizeof(TCGOp)); | ||
154 | - } else { | ||
155 | - op = QTAILQ_FIRST(&s->free_ops); | ||
156 | - QTAILQ_REMOVE(&s->free_ops, op, link); | ||
157 | + if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { | ||
158 | + QTAILQ_FOREACH(op, &s->free_ops, link) { | ||
159 | + if (nargs <= op->nargs) { | ||
160 | + QTAILQ_REMOVE(&s->free_ops, op, link); | ||
161 | + nargs = op->nargs; | ||
162 | + goto found; | ||
163 | + } | ||
164 | + } | ||
165 | } | ||
97 | + | 166 | + |
98 | #include "helper.h" | 167 | + /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ |
99 | #include "trace/generated-helpers.h" | 168 | + nargs = MAX(4, nargs); |
100 | #include "tcg-runtime.h" | 169 | + op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); |
101 | @@ -XXX,XX +XXX,XX @@ | 170 | + |
102 | #undef DEF_HELPER_FLAGS_4 | 171 | + found: |
103 | #undef DEF_HELPER_FLAGS_5 | 172 | memset(op, 0, offsetof(TCGOp, link)); |
104 | #undef DEF_HELPER_FLAGS_6 | 173 | op->opc = opc; |
105 | +#undef DEF_HELPER_FLAGS_7 | 174 | - s->nb_ops++; |
106 | 175 | + op->nargs = nargs; | |
107 | #endif /* HELPER_TCG_H */ | 176 | |
177 | + /* Check for bitfield overflow. */ | ||
178 | + tcg_debug_assert(op->nargs == nargs); | ||
179 | + | ||
180 | + s->nb_ops++; | ||
181 | return op; | ||
182 | } | ||
183 | |||
108 | -- | 184 | -- |
109 | 2.20.1 | 185 | 2.34.1 |
110 | 186 | ||
111 | 187 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | We will shortly have the possibility of more that two outputs, | ||
2 | though only for calls (for which preferences are moot). Avoid | ||
3 | direct references to op->output_pref[] when possible. | ||
1 | 4 | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | include/tcg/tcg.h | 5 +++++ | ||
9 | tcg/tcg.c | 34 ++++++++++++++++++---------------- | ||
10 | 2 files changed, 23 insertions(+), 16 deletions(-) | ||
11 | |||
12 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/include/tcg/tcg.h | ||
15 | +++ b/include/tcg/tcg.h | ||
16 | @@ -XXX,XX +XXX,XX @@ typedef struct TCGOp { | ||
17 | /* Make sure operands fit in the bitfields above. */ | ||
18 | QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8)); | ||
19 | |||
20 | +static inline TCGRegSet output_pref(const TCGOp *op, unsigned i) | ||
21 | +{ | ||
22 | + return i < ARRAY_SIZE(op->output_pref) ? op->output_pref[i] : 0; | ||
23 | +} | ||
24 | + | ||
25 | typedef struct TCGProfile { | ||
26 | int64_t cpu_exec_time; | ||
27 | int64_t tb_count1; | ||
28 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/tcg/tcg.c | ||
31 | +++ b/tcg/tcg.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) | ||
33 | |||
34 | if (have_prefs) { | ||
35 | for (i = 0; i < nb_oargs; ++i) { | ||
36 | - TCGRegSet set = op->output_pref[i]; | ||
37 | + TCGRegSet set = output_pref(op, i); | ||
38 | |||
39 | if (i == 0) { | ||
40 | ne_fprintf(f, " pref="); | ||
41 | @@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s) | ||
42 | } | ||
43 | ts->state = TS_DEAD; | ||
44 | la_reset_pref(ts); | ||
45 | - | ||
46 | - /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ | ||
47 | - op->output_pref[i] = 0; | ||
48 | } | ||
49 | |||
50 | + /* Not used -- it will be tcg_target_call_oarg_reg(). */ | ||
51 | + memset(op->output_pref, 0, sizeof(op->output_pref)); | ||
52 | + | ||
53 | if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | | ||
54 | TCG_CALL_NO_READ_GLOBALS))) { | ||
55 | la_global_kill(s, nb_globals); | ||
56 | @@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s) | ||
57 | ts = arg_temp(op->args[i]); | ||
58 | |||
59 | /* Remember the preference of the uses that followed. */ | ||
60 | - op->output_pref[i] = *la_temp_pref(ts); | ||
61 | + if (i < ARRAY_SIZE(op->output_pref)) { | ||
62 | + op->output_pref[i] = *la_temp_pref(ts); | ||
63 | + } | ||
64 | |||
65 | /* Output args are dead. */ | ||
66 | if (ts->state & TS_DEAD) { | ||
67 | @@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s) | ||
68 | |||
69 | set &= ct->regs; | ||
70 | if (ct->ialias) { | ||
71 | - set &= op->output_pref[ct->alias_index]; | ||
72 | + set &= output_pref(op, ct->alias_index); | ||
73 | } | ||
74 | /* If the combination is not possible, restart. */ | ||
75 | if (set == 0) { | ||
76 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) | ||
77 | TCGReg oreg, ireg; | ||
78 | |||
79 | allocated_regs = s->reserved_regs; | ||
80 | - preferred_regs = op->output_pref[0]; | ||
81 | + preferred_regs = output_pref(op, 0); | ||
82 | ots = arg_temp(op->args[0]); | ||
83 | ts = arg_temp(op->args[1]); | ||
84 | |||
85 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) | ||
86 | if (IS_DEAD_ARG(1)) { | ||
87 | temp_dead(s, its); | ||
88 | } | ||
89 | - tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]); | ||
90 | + tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); | ||
91 | return; | ||
92 | } | ||
93 | |||
94 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) | ||
95 | tcg_regset_set_reg(allocated_regs, its->reg); | ||
96 | } | ||
97 | oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, | ||
98 | - op->output_pref[0], ots->indirect_base); | ||
99 | + output_pref(op, 0), ots->indirect_base); | ||
100 | set_temp_val_reg(s, ots, oreg); | ||
101 | } | ||
102 | |||
103 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
104 | switch (arg_ct->pair) { | ||
105 | case 0: /* not paired */ | ||
106 | if (arg_ct->ialias) { | ||
107 | - i_preferred_regs = op->output_pref[arg_ct->alias_index]; | ||
108 | + i_preferred_regs = output_pref(op, arg_ct->alias_index); | ||
109 | |||
110 | /* | ||
111 | * If the input is not dead after the instruction, | ||
112 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
113 | * and to identify a few cases where it's not required. | ||
114 | */ | ||
115 | if (arg_ct->ialias) { | ||
116 | - i_preferred_regs = op->output_pref[arg_ct->alias_index]; | ||
117 | + i_preferred_regs = output_pref(op, arg_ct->alias_index); | ||
118 | if (IS_DEAD_ARG(i1) && | ||
119 | IS_DEAD_ARG(i2) && | ||
120 | ts->val_type == TEMP_VAL_REG && | ||
121 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
122 | |||
123 | case 3: /* ialias with second output, no first input */ | ||
124 | tcg_debug_assert(arg_ct->ialias); | ||
125 | - i_preferred_regs = op->output_pref[arg_ct->alias_index]; | ||
126 | + i_preferred_regs = output_pref(op, arg_ct->alias_index); | ||
127 | |||
128 | if (IS_DEAD_ARG(i) && | ||
129 | ts->val_type == TEMP_VAL_REG && | ||
130 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
131 | } else if (arg_ct->newreg) { | ||
132 | reg = tcg_reg_alloc(s, arg_ct->regs, | ||
133 | i_allocated_regs | o_allocated_regs, | ||
134 | - op->output_pref[k], ts->indirect_base); | ||
135 | + output_pref(op, k), ts->indirect_base); | ||
136 | } else { | ||
137 | reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, | ||
138 | - op->output_pref[k], ts->indirect_base); | ||
139 | + output_pref(op, k), ts->indirect_base); | ||
140 | } | ||
141 | break; | ||
142 | |||
143 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
144 | break; | ||
145 | } | ||
146 | reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, | ||
147 | - op->output_pref[k], ts->indirect_base); | ||
148 | + output_pref(op, k), ts->indirect_base); | ||
149 | break; | ||
150 | |||
151 | case 2: /* second of pair */ | ||
152 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
153 | } | ||
154 | |||
155 | oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, | ||
156 | - op->output_pref[0], ots->indirect_base); | ||
157 | + output_pref(op, 0), ots->indirect_base); | ||
158 | set_temp_val_reg(s, ots, oreg); | ||
159 | } | ||
160 | |||
161 | -- | ||
162 | 2.34.1 | ||
163 | |||
164 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Pre-compute the function call layout for each helper at startup. | ||
2 | Drop TCG_CALL_DUMMY_ARG, as we no longer need to leave gaps | ||
3 | in the op->args[] array. This allows several places to stop | ||
4 | checking for NULL TCGTemp, to which TCG_CALL_DUMMY_ARG mapped. | ||
1 | 5 | ||
6 | For tcg_gen_callN, loop over the arguments once. Allocate the TCGOp | ||
7 | for the call early but delay emitting it, collecting arguments first. | ||
8 | This allows the argument processing loop to emit code for extensions | ||
9 | and have them sequenced before the call. | ||
10 | |||
11 | For tcg_reg_alloc_call, loop over the arguments in reverse order, | ||
12 | which allows stack slots to be filled first naturally. | ||
13 | |||
14 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | --- | ||
16 | include/exec/helper-head.h | 2 + | ||
17 | include/tcg/tcg.h | 5 +- | ||
18 | tcg/tcg-internal.h | 22 +- | ||
19 | tcg/optimize.c | 6 +- | ||
20 | tcg/tcg.c | 609 ++++++++++++++++++++++--------------- | ||
21 | 5 files changed, 394 insertions(+), 250 deletions(-) | ||
22 | |||
23 | diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/include/exec/helper-head.h | ||
26 | +++ b/include/exec/helper-head.h | ||
27 | @@ -XXX,XX +XXX,XX @@ | ||
28 | #define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \ | ||
29 | DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7) | ||
30 | |||
31 | +/* MAX_CALL_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */ | ||
32 | + | ||
33 | #endif /* EXEC_HELPER_HEAD_H */ | ||
34 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/include/tcg/tcg.h | ||
37 | +++ b/include/tcg/tcg.h | ||
38 | @@ -XXX,XX +XXX,XX @@ | ||
39 | /* XXX: make safe guess about sizes */ | ||
40 | #define MAX_OP_PER_INSTR 266 | ||
41 | |||
42 | +#define MAX_CALL_IARGS 7 | ||
43 | + | ||
44 | #define CPU_TEMP_BUF_NLONGS 128 | ||
45 | #define TCG_STATIC_FRAME_SIZE (CPU_TEMP_BUF_NLONGS * sizeof(long)) | ||
46 | |||
47 | @@ -XXX,XX +XXX,XX @@ typedef TCGv_ptr TCGv_env; | ||
48 | #define TCG_CALL_NO_RWG_SE (TCG_CALL_NO_RWG | TCG_CALL_NO_SE) | ||
49 | #define TCG_CALL_NO_WG_SE (TCG_CALL_NO_WG | TCG_CALL_NO_SE) | ||
50 | |||
51 | -/* Used to align parameters. See the comment before tcgv_i32_temp. */ | ||
52 | -#define TCG_CALL_DUMMY_ARG ((TCGArg)0) | ||
53 | - | ||
54 | /* | ||
55 | * Flags for the bswap opcodes. | ||
56 | * If IZ, the input is zero-extended, otherwise unknown. | ||
57 | diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/tcg/tcg-internal.h | ||
60 | +++ b/tcg/tcg-internal.h | ||
61 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
62 | TCG_CALL_ARG_EXTEND_S, /* ... as a sign-extended i64 */ | ||
63 | } TCGCallArgumentKind; | ||
64 | |||
65 | +typedef struct TCGCallArgumentLoc { | ||
66 | + TCGCallArgumentKind kind : 8; | ||
67 | + unsigned arg_slot : 8; | ||
68 | + unsigned ref_slot : 8; | ||
69 | + unsigned arg_idx : 4; | ||
70 | + unsigned tmp_subindex : 2; | ||
71 | +} TCGCallArgumentLoc; | ||
72 | + | ||
73 | +/* Avoid "unsigned < 0 is always false" Werror, when iarg_regs is empty. */ | ||
74 | +#define REG_P(L) \ | ||
75 | + ((int)(L)->arg_slot < (int)ARRAY_SIZE(tcg_target_call_iarg_regs)) | ||
76 | + | ||
77 | typedef struct TCGHelperInfo { | ||
78 | void *func; | ||
79 | const char *name; | ||
80 | - unsigned flags; | ||
81 | - unsigned typemask; | ||
82 | + unsigned typemask : 32; | ||
83 | + unsigned flags : 8; | ||
84 | + unsigned nr_in : 8; | ||
85 | + unsigned nr_out : 8; | ||
86 | + TCGCallReturnKind out_kind : 8; | ||
87 | + | ||
88 | + /* Maximum physical arguments are constrained by TCG_TYPE_I128. */ | ||
89 | + TCGCallArgumentLoc in[MAX_CALL_IARGS * (128 / TCG_TARGET_REG_BITS)]; | ||
90 | } TCGHelperInfo; | ||
91 | |||
92 | extern TCGContext tcg_init_ctx; | ||
93 | diff --git a/tcg/optimize.c b/tcg/optimize.c | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/tcg/optimize.c | ||
96 | +++ b/tcg/optimize.c | ||
97 | @@ -XXX,XX +XXX,XX @@ static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) | ||
98 | { | ||
99 | for (int i = 0; i < nb_args; i++) { | ||
100 | TCGTemp *ts = arg_temp(op->args[i]); | ||
101 | - if (ts) { | ||
102 | - init_ts_info(ctx, ts); | ||
103 | - } | ||
104 | + init_ts_info(ctx, ts); | ||
105 | } | ||
106 | } | ||
107 | |||
108 | @@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op, | ||
109 | |||
110 | for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { | ||
111 | TCGTemp *ts = arg_temp(op->args[i]); | ||
112 | - if (ts && ts_is_copy(ts)) { | ||
113 | + if (ts_is_copy(ts)) { | ||
114 | op->args[i] = temp_arg(find_better_copy(s, ts)); | ||
115 | } | ||
116 | } | ||
117 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
118 | index XXXXXXX..XXXXXXX 100644 | ||
119 | --- a/tcg/tcg.c | ||
120 | +++ b/tcg/tcg.c | ||
121 | @@ -XXX,XX +XXX,XX @@ void tcg_pool_reset(TCGContext *s) | ||
122 | |||
123 | #include "exec/helper-proto.h" | ||
124 | |||
125 | -static const TCGHelperInfo all_helpers[] = { | ||
126 | +static TCGHelperInfo all_helpers[] = { | ||
127 | #include "exec/helper-tcg.h" | ||
128 | }; | ||
129 | static GHashTable *helper_table; | ||
130 | @@ -XXX,XX +XXX,XX @@ static ffi_type * const typecode_to_ffi[8] = { | ||
131 | }; | ||
132 | #endif | ||
133 | |||
134 | +typedef struct TCGCumulativeArgs { | ||
135 | + int arg_idx; /* tcg_gen_callN args[] */ | ||
136 | + int info_in_idx; /* TCGHelperInfo in[] */ | ||
137 | + int arg_slot; /* regs+stack slot */ | ||
138 | + int ref_slot; /* stack slots for references */ | ||
139 | +} TCGCumulativeArgs; | ||
140 | + | ||
141 | +static void layout_arg_even(TCGCumulativeArgs *cum) | ||
142 | +{ | ||
143 | + cum->arg_slot += cum->arg_slot & 1; | ||
144 | +} | ||
145 | + | ||
146 | +static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, | ||
147 | + TCGCallArgumentKind kind) | ||
148 | +{ | ||
149 | + TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; | ||
150 | + | ||
151 | + *loc = (TCGCallArgumentLoc){ | ||
152 | + .kind = kind, | ||
153 | + .arg_idx = cum->arg_idx, | ||
154 | + .arg_slot = cum->arg_slot, | ||
155 | + }; | ||
156 | + cum->info_in_idx++; | ||
157 | + cum->arg_slot++; | ||
158 | +} | ||
159 | + | ||
160 | +static void layout_arg_normal_n(TCGCumulativeArgs *cum, | ||
161 | + TCGHelperInfo *info, int n) | ||
162 | +{ | ||
163 | + TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; | ||
164 | + | ||
165 | + for (int i = 0; i < n; ++i) { | ||
166 | + /* Layout all using the same arg_idx, adjusting the subindex. */ | ||
167 | + loc[i] = (TCGCallArgumentLoc){ | ||
168 | + .kind = TCG_CALL_ARG_NORMAL, | ||
169 | + .arg_idx = cum->arg_idx, | ||
170 | + .tmp_subindex = i, | ||
171 | + .arg_slot = cum->arg_slot + i, | ||
172 | + }; | ||
173 | + } | ||
174 | + cum->info_in_idx += n; | ||
175 | + cum->arg_slot += n; | ||
176 | +} | ||
177 | + | ||
178 | +static void init_call_layout(TCGHelperInfo *info) | ||
179 | +{ | ||
180 | + int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); | ||
181 | + int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); | ||
182 | + unsigned typemask = info->typemask; | ||
183 | + unsigned typecode; | ||
184 | + TCGCumulativeArgs cum = { }; | ||
185 | + | ||
186 | + /* | ||
187 | + * Parse and place any function return value. | ||
188 | + */ | ||
189 | + typecode = typemask & 7; | ||
190 | + switch (typecode) { | ||
191 | + case dh_typecode_void: | ||
192 | + info->nr_out = 0; | ||
193 | + break; | ||
194 | + case dh_typecode_i32: | ||
195 | + case dh_typecode_s32: | ||
196 | + case dh_typecode_ptr: | ||
197 | + info->nr_out = 1; | ||
198 | + info->out_kind = TCG_CALL_RET_NORMAL; | ||
199 | + break; | ||
200 | + case dh_typecode_i64: | ||
201 | + case dh_typecode_s64: | ||
202 | + info->nr_out = 64 / TCG_TARGET_REG_BITS; | ||
203 | + info->out_kind = TCG_CALL_RET_NORMAL; | ||
204 | + break; | ||
205 | + default: | ||
206 | + g_assert_not_reached(); | ||
207 | + } | ||
208 | + assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs)); | ||
209 | + | ||
210 | + /* | ||
211 | + * Parse and place function arguments. | ||
212 | + */ | ||
213 | + for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { | ||
214 | + TCGCallArgumentKind kind; | ||
215 | + TCGType type; | ||
216 | + | ||
217 | + typecode = typemask & 7; | ||
218 | + switch (typecode) { | ||
219 | + case dh_typecode_i32: | ||
220 | + case dh_typecode_s32: | ||
221 | + type = TCG_TYPE_I32; | ||
222 | + break; | ||
223 | + case dh_typecode_i64: | ||
224 | + case dh_typecode_s64: | ||
225 | + type = TCG_TYPE_I64; | ||
226 | + break; | ||
227 | + case dh_typecode_ptr: | ||
228 | + type = TCG_TYPE_PTR; | ||
229 | + break; | ||
230 | + default: | ||
231 | + g_assert_not_reached(); | ||
232 | + } | ||
233 | + | ||
234 | + switch (type) { | ||
235 | + case TCG_TYPE_I32: | ||
236 | + switch (TCG_TARGET_CALL_ARG_I32) { | ||
237 | + case TCG_CALL_ARG_EVEN: | ||
238 | + layout_arg_even(&cum); | ||
239 | + /* fall through */ | ||
240 | + case TCG_CALL_ARG_NORMAL: | ||
241 | + layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); | ||
242 | + break; | ||
243 | + case TCG_CALL_ARG_EXTEND: | ||
244 | + kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); | ||
245 | + layout_arg_1(&cum, info, kind); | ||
246 | + break; | ||
247 | + default: | ||
248 | + qemu_build_not_reached(); | ||
249 | + } | ||
250 | + break; | ||
251 | + | ||
252 | + case TCG_TYPE_I64: | ||
253 | + switch (TCG_TARGET_CALL_ARG_I64) { | ||
254 | + case TCG_CALL_ARG_EVEN: | ||
255 | + layout_arg_even(&cum); | ||
256 | + /* fall through */ | ||
257 | + case TCG_CALL_ARG_NORMAL: | ||
258 | + if (TCG_TARGET_REG_BITS == 32) { | ||
259 | + layout_arg_normal_n(&cum, info, 2); | ||
260 | + } else { | ||
261 | + layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); | ||
262 | + } | ||
263 | + break; | ||
264 | + default: | ||
265 | + qemu_build_not_reached(); | ||
266 | + } | ||
267 | + break; | ||
268 | + | ||
269 | + default: | ||
270 | + g_assert_not_reached(); | ||
271 | + } | ||
272 | + } | ||
273 | + info->nr_in = cum.info_in_idx; | ||
274 | + | ||
275 | + /* Validate that we didn't overrun the input array. */ | ||
276 | + assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); | ||
277 | + /* Validate the backend has enough argument space. */ | ||
278 | + assert(cum.arg_slot <= max_reg_slots + max_stk_slots); | ||
279 | + assert(cum.ref_slot <= max_stk_slots); | ||
280 | +} | ||
281 | + | ||
282 | static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; | ||
283 | static void process_op_defs(TCGContext *s); | ||
284 | static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, | ||
285 | @@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus) | ||
286 | helper_table = g_hash_table_new(NULL, NULL); | ||
287 | |||
288 | for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { | ||
289 | + init_call_layout(&all_helpers[i]); | ||
290 | g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, | ||
291 | (gpointer)&all_helpers[i]); | ||
292 | } | ||
293 | @@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op) | ||
294 | } | ||
295 | } | ||
296 | |||
297 | -/* Note: we convert the 64 bit args to 32 bit and do some alignment | ||
298 | - and endian swap. Maybe it would be better to do the alignment | ||
299 | - and endian swap in tcg_reg_alloc_call(). */ | ||
300 | +static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); | ||
301 | + | ||
302 | void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
303 | { | ||
304 | - int i, real_args, nb_rets, pi, max_args; | ||
305 | - unsigned typemask; | ||
306 | const TCGHelperInfo *info; | ||
307 | + TCGv_i64 extend_free[MAX_CALL_IARGS]; | ||
308 | + int n_extend = 0; | ||
309 | TCGOp *op; | ||
310 | + int i, n, pi = 0, total_args; | ||
311 | |||
312 | info = g_hash_table_lookup(helper_table, (gpointer)func); | ||
313 | - typemask = info->typemask; | ||
314 | + total_args = info->nr_out + info->nr_in + 2; | ||
315 | + op = tcg_op_alloc(INDEX_op_call, total_args); | ||
316 | |||
317 | #ifdef CONFIG_PLUGIN | ||
318 | /* detect non-plugin helpers */ | ||
319 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
320 | } | ||
321 | #endif | ||
322 | |||
323 | - if (TCG_TARGET_CALL_ARG_I32 == TCG_CALL_ARG_EXTEND) { | ||
324 | - for (i = 0; i < nargs; ++i) { | ||
325 | - int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
326 | - bool is_32bit = (argtype & ~1) == dh_typecode_i32; | ||
327 | - bool is_signed = argtype & 1; | ||
328 | + TCGOP_CALLO(op) = n = info->nr_out; | ||
329 | + switch (n) { | ||
330 | + case 0: | ||
331 | + tcg_debug_assert(ret == NULL); | ||
332 | + break; | ||
333 | + case 1: | ||
334 | + tcg_debug_assert(ret != NULL); | ||
335 | + op->args[pi++] = temp_arg(ret); | ||
336 | + break; | ||
337 | + case 2: | ||
338 | + tcg_debug_assert(ret != NULL); | ||
339 | + tcg_debug_assert(ret->base_type == ret->type + 1); | ||
340 | + tcg_debug_assert(ret->temp_subindex == 0); | ||
341 | + op->args[pi++] = temp_arg(ret); | ||
342 | + op->args[pi++] = temp_arg(ret + 1); | ||
343 | + break; | ||
344 | + default: | ||
345 | + g_assert_not_reached(); | ||
346 | + } | ||
347 | |||
348 | - if (is_32bit) { | ||
349 | + TCGOP_CALLI(op) = n = info->nr_in; | ||
350 | + for (i = 0; i < n; i++) { | ||
351 | + const TCGCallArgumentLoc *loc = &info->in[i]; | ||
352 | + TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; | ||
353 | + | ||
354 | + switch (loc->kind) { | ||
355 | + case TCG_CALL_ARG_NORMAL: | ||
356 | + op->args[pi++] = temp_arg(ts); | ||
357 | + break; | ||
358 | + | ||
359 | + case TCG_CALL_ARG_EXTEND_U: | ||
360 | + case TCG_CALL_ARG_EXTEND_S: | ||
361 | + { | ||
362 | TCGv_i64 temp = tcg_temp_new_i64(); | ||
363 | - TCGv_i32 orig = temp_tcgv_i32(args[i]); | ||
364 | - if (is_signed) { | ||
365 | + TCGv_i32 orig = temp_tcgv_i32(ts); | ||
366 | + | ||
367 | + if (loc->kind == TCG_CALL_ARG_EXTEND_S) { | ||
368 | tcg_gen_ext_i32_i64(temp, orig); | ||
369 | } else { | ||
370 | tcg_gen_extu_i32_i64(temp, orig); | ||
371 | } | ||
372 | - args[i] = tcgv_i64_temp(temp); | ||
373 | + op->args[pi++] = tcgv_i64_arg(temp); | ||
374 | + extend_free[n_extend++] = temp; | ||
375 | } | ||
376 | - } | ||
377 | - } | ||
378 | - | ||
379 | - /* | ||
380 | - * A Call op needs up to 4 + 2N parameters on 32-bit archs, | ||
381 | - * and up to 4 + N parameters on 64-bit archs | ||
382 | - * (N = number of input arguments + output arguments). | ||
383 | - */ | ||
384 | - max_args = (64 / TCG_TARGET_REG_BITS) * nargs + 4; | ||
385 | - op = tcg_emit_op(INDEX_op_call, max_args); | ||
386 | - | ||
387 | - pi = 0; | ||
388 | - if (ret != NULL) { | ||
389 | - if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) { | ||
390 | - op->args[pi++] = temp_arg(ret); | ||
391 | - op->args[pi++] = temp_arg(ret + 1); | ||
392 | - nb_rets = 2; | ||
393 | - } else { | ||
394 | - op->args[pi++] = temp_arg(ret); | ||
395 | - nb_rets = 1; | ||
396 | - } | ||
397 | - } else { | ||
398 | - nb_rets = 0; | ||
399 | - } | ||
400 | - TCGOP_CALLO(op) = nb_rets; | ||
401 | - | ||
402 | - real_args = 0; | ||
403 | - for (i = 0; i < nargs; i++) { | ||
404 | - int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
405 | - TCGCallArgumentKind kind; | ||
406 | - TCGType type; | ||
407 | - | ||
408 | - switch (argtype) { | ||
409 | - case dh_typecode_i32: | ||
410 | - case dh_typecode_s32: | ||
411 | - type = TCG_TYPE_I32; | ||
412 | break; | ||
413 | - case dh_typecode_i64: | ||
414 | - case dh_typecode_s64: | ||
415 | - type = TCG_TYPE_I64; | ||
416 | - break; | ||
417 | - case dh_typecode_ptr: | ||
418 | - type = TCG_TYPE_PTR; | ||
419 | - break; | ||
420 | - default: | ||
421 | - g_assert_not_reached(); | ||
422 | - } | ||
423 | |||
424 | - switch (type) { | ||
425 | - case TCG_TYPE_I32: | ||
426 | - kind = TCG_TARGET_CALL_ARG_I32; | ||
427 | - break; | ||
428 | - case TCG_TYPE_I64: | ||
429 | - kind = TCG_TARGET_CALL_ARG_I64; | ||
430 | - break; | ||
431 | - default: | ||
432 | - g_assert_not_reached(); | ||
433 | - } | ||
434 | - | ||
435 | - switch (kind) { | ||
436 | - case TCG_CALL_ARG_EVEN: | ||
437 | - if (real_args & 1) { | ||
438 | - op->args[pi++] = TCG_CALL_DUMMY_ARG; | ||
439 | - real_args++; | ||
440 | - } | ||
441 | - /* fall through */ | ||
442 | - case TCG_CALL_ARG_NORMAL: | ||
443 | - if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { | ||
444 | - op->args[pi++] = temp_arg(args[i]); | ||
445 | - op->args[pi++] = temp_arg(args[i] + 1); | ||
446 | - real_args += 2; | ||
447 | - break; | ||
448 | - } | ||
449 | - op->args[pi++] = temp_arg(args[i]); | ||
450 | - real_args++; | ||
451 | - break; | ||
452 | default: | ||
453 | g_assert_not_reached(); | ||
454 | } | ||
455 | } | ||
456 | op->args[pi++] = (uintptr_t)func; | ||
457 | op->args[pi++] = (uintptr_t)info; | ||
458 | - TCGOP_CALLI(op) = real_args; | ||
459 | + tcg_debug_assert(pi == total_args); | ||
460 | |||
461 | - /* Make sure the fields didn't overflow. */ | ||
462 | - tcg_debug_assert(TCGOP_CALLI(op) == real_args); | ||
463 | - tcg_debug_assert(pi <= max_args); | ||
464 | + QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); | ||
465 | |||
466 | - if (TCG_TARGET_CALL_ARG_I32 == TCG_CALL_ARG_EXTEND) { | ||
467 | - for (i = 0; i < nargs; ++i) { | ||
468 | - int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
469 | - bool is_32bit = (argtype & ~1) == dh_typecode_i32; | ||
470 | - | ||
471 | - if (is_32bit) { | ||
472 | - tcg_temp_free_internal(args[i]); | ||
473 | - } | ||
474 | - } | ||
475 | + tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); | ||
476 | + for (i = 0; i < n_extend; ++i) { | ||
477 | + tcg_temp_free_i64(extend_free[i]); | ||
478 | } | ||
479 | } | ||
480 | |||
481 | @@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) | ||
482 | } | ||
483 | for (i = 0; i < nb_iargs; i++) { | ||
484 | TCGArg arg = op->args[nb_oargs + i]; | ||
485 | - const char *t = "<dummy>"; | ||
486 | - if (arg != TCG_CALL_DUMMY_ARG) { | ||
487 | - t = tcg_get_arg_str(s, buf, sizeof(buf), arg); | ||
488 | - } | ||
489 | + const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); | ||
490 | col += ne_fprintf(f, ",%s", t); | ||
491 | } | ||
492 | } else { | ||
493 | @@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s) | ||
494 | switch (opc) { | ||
495 | case INDEX_op_call: | ||
496 | { | ||
497 | - int call_flags; | ||
498 | - int nb_call_regs; | ||
499 | + const TCGHelperInfo *info = tcg_call_info(op); | ||
500 | + int call_flags = tcg_call_flags(op); | ||
501 | |||
502 | nb_oargs = TCGOP_CALLO(op); | ||
503 | nb_iargs = TCGOP_CALLI(op); | ||
504 | - call_flags = tcg_call_flags(op); | ||
505 | |||
506 | /* pure functions can be removed if their result is unused */ | ||
507 | if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { | ||
508 | @@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s) | ||
509 | /* Record arguments that die in this helper. */ | ||
510 | for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { | ||
511 | ts = arg_temp(op->args[i]); | ||
512 | - if (ts && ts->state & TS_DEAD) { | ||
513 | + if (ts->state & TS_DEAD) { | ||
514 | arg_life |= DEAD_ARG << i; | ||
515 | } | ||
516 | } | ||
517 | @@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s) | ||
518 | /* For all live registers, remove call-clobbered prefs. */ | ||
519 | la_cross_call(s, nb_temps); | ||
520 | |||
521 | - nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); | ||
522 | + /* | ||
523 | + * Input arguments are live for preceding opcodes. | ||
524 | + * | ||
525 | + * For those arguments that die, and will be allocated in | ||
526 | + * registers, clear the register set for that arg, to be | ||
527 | + * filled in below. For args that will be on the stack, | ||
528 | + * reset to any available reg. Process arguments in reverse | ||
529 | + * order so that if a temp is used more than once, the stack | ||
530 | + * reset to max happens before the register reset to 0. | ||
531 | + */ | ||
532 | + for (i = nb_iargs - 1; i >= 0; i--) { | ||
533 | + const TCGCallArgumentLoc *loc = &info->in[i]; | ||
534 | + ts = arg_temp(op->args[nb_oargs + i]); | ||
535 | |||
536 | - /* Input arguments are live for preceding opcodes. */ | ||
537 | - for (i = 0; i < nb_iargs; i++) { | ||
538 | - ts = arg_temp(op->args[i + nb_oargs]); | ||
539 | - if (ts && ts->state & TS_DEAD) { | ||
540 | - /* For those arguments that die, and will be allocated | ||
541 | - * in registers, clear the register set for that arg, | ||
542 | - * to be filled in below. For args that will be on | ||
543 | - * the stack, reset to any available reg. | ||
544 | - */ | ||
545 | - *la_temp_pref(ts) | ||
546 | - = (i < nb_call_regs ? 0 : | ||
547 | - tcg_target_available_regs[ts->type]); | ||
548 | + if (ts->state & TS_DEAD) { | ||
549 | + switch (loc->kind) { | ||
550 | + case TCG_CALL_ARG_NORMAL: | ||
551 | + case TCG_CALL_ARG_EXTEND_U: | ||
552 | + case TCG_CALL_ARG_EXTEND_S: | ||
553 | + if (REG_P(loc)) { | ||
554 | + *la_temp_pref(ts) = 0; | ||
555 | + break; | ||
556 | + } | ||
557 | + /* fall through */ | ||
558 | + default: | ||
559 | + *la_temp_pref(ts) = | ||
560 | + tcg_target_available_regs[ts->type]; | ||
561 | + break; | ||
562 | + } | ||
563 | ts->state &= ~TS_DEAD; | ||
564 | } | ||
565 | } | ||
566 | |||
567 | - /* For each input argument, add its input register to prefs. | ||
568 | - If a temp is used once, this produces a single set bit. */ | ||
569 | - for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { | ||
570 | - ts = arg_temp(op->args[i + nb_oargs]); | ||
571 | - if (ts) { | ||
572 | - tcg_regset_set_reg(*la_temp_pref(ts), | ||
573 | - tcg_target_call_iarg_regs[i]); | ||
574 | + /* | ||
575 | + * For each input argument, add its input register to prefs. | ||
576 | + * If a temp is used once, this produces a single set bit; | ||
577 | + * if a temp is used multiple times, this produces a set. | ||
578 | + */ | ||
579 | + for (i = 0; i < nb_iargs; i++) { | ||
580 | + const TCGCallArgumentLoc *loc = &info->in[i]; | ||
581 | + ts = arg_temp(op->args[nb_oargs + i]); | ||
582 | + | ||
583 | + switch (loc->kind) { | ||
584 | + case TCG_CALL_ARG_NORMAL: | ||
585 | + case TCG_CALL_ARG_EXTEND_U: | ||
586 | + case TCG_CALL_ARG_EXTEND_S: | ||
587 | + if (REG_P(loc)) { | ||
588 | + tcg_regset_set_reg(*la_temp_pref(ts), | ||
589 | + tcg_target_call_iarg_regs[loc->arg_slot]); | ||
590 | + } | ||
591 | + break; | ||
592 | + default: | ||
593 | + break; | ||
594 | } | ||
595 | } | ||
596 | } | ||
597 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) | ||
598 | /* Make sure that input arguments are available. */ | ||
599 | for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { | ||
600 | arg_ts = arg_temp(op->args[i]); | ||
601 | - if (arg_ts) { | ||
602 | - dir_ts = arg_ts->state_ptr; | ||
603 | - if (dir_ts && arg_ts->state == TS_DEAD) { | ||
604 | - TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 | ||
605 | - ? INDEX_op_ld_i32 | ||
606 | - : INDEX_op_ld_i64); | ||
607 | - TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); | ||
608 | + dir_ts = arg_ts->state_ptr; | ||
609 | + if (dir_ts && arg_ts->state == TS_DEAD) { | ||
610 | + TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 | ||
611 | + ? INDEX_op_ld_i32 | ||
612 | + : INDEX_op_ld_i64); | ||
613 | + TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); | ||
614 | |||
615 | - lop->args[0] = temp_arg(dir_ts); | ||
616 | - lop->args[1] = temp_arg(arg_ts->mem_base); | ||
617 | - lop->args[2] = arg_ts->mem_offset; | ||
618 | + lop->args[0] = temp_arg(dir_ts); | ||
619 | + lop->args[1] = temp_arg(arg_ts->mem_base); | ||
620 | + lop->args[2] = arg_ts->mem_offset; | ||
621 | |||
622 | - /* Loaded, but synced with memory. */ | ||
623 | - arg_ts->state = TS_MEM; | ||
624 | - } | ||
625 | + /* Loaded, but synced with memory. */ | ||
626 | + arg_ts->state = TS_MEM; | ||
627 | } | ||
628 | } | ||
629 | |||
630 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) | ||
631 | so that we reload when needed. */ | ||
632 | for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { | ||
633 | arg_ts = arg_temp(op->args[i]); | ||
634 | - if (arg_ts) { | ||
635 | - dir_ts = arg_ts->state_ptr; | ||
636 | - if (dir_ts) { | ||
637 | - op->args[i] = temp_arg(dir_ts); | ||
638 | - changes = true; | ||
639 | - if (IS_DEAD_ARG(i)) { | ||
640 | - arg_ts->state = TS_DEAD; | ||
641 | - } | ||
642 | + dir_ts = arg_ts->state_ptr; | ||
643 | + if (dir_ts) { | ||
644 | + op->args[i] = temp_arg(dir_ts); | ||
645 | + changes = true; | ||
646 | + if (IS_DEAD_ARG(i)) { | ||
647 | + arg_ts->state = TS_DEAD; | ||
648 | } | ||
649 | } | ||
650 | } | ||
651 | @@ -XXX,XX +XXX,XX @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) | ||
652 | return true; | ||
653 | } | ||
654 | |||
655 | +static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, | ||
656 | + TCGRegSet allocated_regs) | ||
657 | +{ | ||
658 | + if (ts->val_type == TEMP_VAL_REG) { | ||
659 | + if (ts->reg != reg) { | ||
660 | + tcg_reg_free(s, reg, allocated_regs); | ||
661 | + if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { | ||
662 | + /* | ||
663 | + * Cross register class move not supported. Sync the | ||
664 | + * temp back to its slot and load from there. | ||
665 | + */ | ||
666 | + temp_sync(s, ts, allocated_regs, 0, 0); | ||
667 | + tcg_out_ld(s, ts->type, reg, | ||
668 | + ts->mem_base->reg, ts->mem_offset); | ||
669 | + } | ||
670 | + } | ||
671 | + } else { | ||
672 | + TCGRegSet arg_set = 0; | ||
673 | + | ||
674 | + tcg_reg_free(s, reg, allocated_regs); | ||
675 | + tcg_regset_set_reg(arg_set, reg); | ||
676 | + temp_load(s, ts, arg_set, allocated_regs, 0); | ||
677 | + } | ||
678 | +} | ||
679 | + | ||
680 | +static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts, | ||
681 | + TCGRegSet allocated_regs) | ||
682 | +{ | ||
683 | + /* | ||
684 | + * When the destination is on the stack, load up the temp and store. | ||
685 | + * If there are many call-saved registers, the temp might live to | ||
686 | + * see another use; otherwise it'll be discarded. | ||
687 | + */ | ||
688 | + temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); | ||
689 | + tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, | ||
690 | + TCG_TARGET_CALL_STACK_OFFSET + | ||
691 | + stk_slot * sizeof(tcg_target_long)); | ||
692 | +} | ||
693 | + | ||
694 | +static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, | ||
695 | + TCGTemp *ts, TCGRegSet *allocated_regs) | ||
696 | +{ | ||
697 | + if (REG_P(l)) { | ||
698 | + TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; | ||
699 | + load_arg_reg(s, reg, ts, *allocated_regs); | ||
700 | + tcg_regset_set_reg(*allocated_regs, reg); | ||
701 | + } else { | ||
702 | + load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs), | ||
703 | + ts, *allocated_regs); | ||
704 | + } | ||
705 | +} | ||
706 | + | ||
707 | static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
708 | { | ||
709 | const int nb_oargs = TCGOP_CALLO(op); | ||
710 | const int nb_iargs = TCGOP_CALLI(op); | ||
711 | const TCGLifeData arg_life = op->life; | ||
712 | - const TCGHelperInfo *info; | ||
713 | - int flags, nb_regs, i; | ||
714 | - TCGReg reg; | ||
715 | - TCGArg arg; | ||
716 | - TCGTemp *ts; | ||
717 | - intptr_t stack_offset; | ||
718 | - size_t call_stack_size; | ||
719 | - tcg_insn_unit *func_addr; | ||
720 | - int allocate_args; | ||
721 | - TCGRegSet allocated_regs; | ||
722 | + const TCGHelperInfo *info = tcg_call_info(op); | ||
723 | + TCGRegSet allocated_regs = s->reserved_regs; | ||
724 | + int i; | ||
725 | |||
726 | - func_addr = tcg_call_func(op); | ||
727 | - info = tcg_call_info(op); | ||
728 | - flags = info->flags; | ||
729 | + /* | ||
730 | + * Move inputs into place in reverse order, | ||
731 | + * so that we place stacked arguments first. | ||
732 | + */ | ||
733 | + for (i = nb_iargs - 1; i >= 0; --i) { | ||
734 | + const TCGCallArgumentLoc *loc = &info->in[i]; | ||
735 | + TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); | ||
736 | |||
737 | - nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); | ||
738 | - if (nb_regs > nb_iargs) { | ||
739 | - nb_regs = nb_iargs; | ||
740 | - } | ||
741 | - | ||
742 | - /* assign stack slots first */ | ||
743 | - call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); | ||
744 | - call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & | ||
745 | - ~(TCG_TARGET_STACK_ALIGN - 1); | ||
746 | - allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); | ||
747 | - if (allocate_args) { | ||
748 | - /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, | ||
749 | - preallocate call stack */ | ||
750 | - tcg_abort(); | ||
751 | - } | ||
752 | - | ||
753 | - stack_offset = TCG_TARGET_CALL_STACK_OFFSET; | ||
754 | - for (i = nb_regs; i < nb_iargs; i++) { | ||
755 | - arg = op->args[nb_oargs + i]; | ||
756 | - if (arg != TCG_CALL_DUMMY_ARG) { | ||
757 | - ts = arg_temp(arg); | ||
758 | - temp_load(s, ts, tcg_target_available_regs[ts->type], | ||
759 | - s->reserved_regs, 0); | ||
760 | - tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); | ||
761 | - } | ||
762 | - stack_offset += sizeof(tcg_target_long); | ||
763 | - } | ||
764 | - | ||
765 | - /* assign input registers */ | ||
766 | - allocated_regs = s->reserved_regs; | ||
767 | - for (i = 0; i < nb_regs; i++) { | ||
768 | - arg = op->args[nb_oargs + i]; | ||
769 | - if (arg != TCG_CALL_DUMMY_ARG) { | ||
770 | - ts = arg_temp(arg); | ||
771 | - reg = tcg_target_call_iarg_regs[i]; | ||
772 | - | ||
773 | - if (ts->val_type == TEMP_VAL_REG) { | ||
774 | - if (ts->reg != reg) { | ||
775 | - tcg_reg_free(s, reg, allocated_regs); | ||
776 | - if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { | ||
777 | - /* | ||
778 | - * Cross register class move not supported. Sync the | ||
779 | - * temp back to its slot and load from there. | ||
780 | - */ | ||
781 | - temp_sync(s, ts, allocated_regs, 0, 0); | ||
782 | - tcg_out_ld(s, ts->type, reg, | ||
783 | - ts->mem_base->reg, ts->mem_offset); | ||
784 | - } | ||
785 | - } | ||
786 | - } else { | ||
787 | - TCGRegSet arg_set = 0; | ||
788 | - | ||
789 | - tcg_reg_free(s, reg, allocated_regs); | ||
790 | - tcg_regset_set_reg(arg_set, reg); | ||
791 | - temp_load(s, ts, arg_set, allocated_regs, 0); | ||
792 | - } | ||
793 | - | ||
794 | - tcg_regset_set_reg(allocated_regs, reg); | ||
795 | + switch (loc->kind) { | ||
796 | + case TCG_CALL_ARG_NORMAL: | ||
797 | + case TCG_CALL_ARG_EXTEND_U: | ||
798 | + case TCG_CALL_ARG_EXTEND_S: | ||
799 | + load_arg_normal(s, loc, ts, &allocated_regs); | ||
800 | + break; | ||
801 | + default: | ||
802 | + g_assert_not_reached(); | ||
803 | } | ||
804 | } | ||
805 | |||
806 | - /* mark dead temporaries and free the associated registers */ | ||
807 | + /* Mark dead temporaries and free the associated registers. */ | ||
808 | for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { | ||
809 | if (IS_DEAD_ARG(i)) { | ||
810 | temp_dead(s, arg_temp(op->args[i])); | ||
811 | } | ||
812 | } | ||
813 | |||
814 | - /* clobber call registers */ | ||
815 | + /* Clobber call registers. */ | ||
816 | for (i = 0; i < TCG_TARGET_NB_REGS; i++) { | ||
817 | if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { | ||
818 | tcg_reg_free(s, i, allocated_regs); | ||
819 | } | ||
820 | } | ||
821 | |||
822 | - /* Save globals if they might be written by the helper, sync them if | ||
823 | - they might be read. */ | ||
824 | - if (flags & TCG_CALL_NO_READ_GLOBALS) { | ||
825 | + /* | ||
826 | + * Save globals if they might be written by the helper, | ||
827 | + * sync them if they might be read. | ||
828 | + */ | ||
829 | + if (info->flags & TCG_CALL_NO_READ_GLOBALS) { | ||
830 | /* Nothing to do */ | ||
831 | - } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { | ||
832 | + } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { | ||
833 | sync_globals(s, allocated_regs); | ||
834 | } else { | ||
835 | save_globals(s, allocated_regs); | ||
836 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
837 | gpointer hash = (gpointer)(uintptr_t)info->typemask; | ||
838 | ffi_cif *cif = g_hash_table_lookup(ffi_table, hash); | ||
839 | assert(cif != NULL); | ||
840 | - tcg_out_call(s, func_addr, cif); | ||
841 | + tcg_out_call(s, tcg_call_func(op), cif); | ||
842 | } | ||
843 | #else | ||
844 | - tcg_out_call(s, func_addr); | ||
845 | + tcg_out_call(s, tcg_call_func(op)); | ||
846 | #endif | ||
847 | |||
848 | - /* assign output registers and emit moves if needed */ | ||
849 | - for(i = 0; i < nb_oargs; i++) { | ||
850 | - arg = op->args[i]; | ||
851 | - ts = arg_temp(arg); | ||
852 | + /* Assign output registers and emit moves if needed. */ | ||
853 | + switch (info->out_kind) { | ||
854 | + case TCG_CALL_RET_NORMAL: | ||
855 | + for (i = 0; i < nb_oargs; i++) { | ||
856 | + TCGTemp *ts = arg_temp(op->args[i]); | ||
857 | + TCGReg reg = tcg_target_call_oarg_regs[i]; | ||
858 | |||
859 | - /* ENV should not be modified. */ | ||
860 | - tcg_debug_assert(!temp_readonly(ts)); | ||
861 | + /* ENV should not be modified. */ | ||
862 | + tcg_debug_assert(!temp_readonly(ts)); | ||
863 | |||
864 | - reg = tcg_target_call_oarg_regs[i]; | ||
865 | - set_temp_val_reg(s, ts, reg); | ||
866 | - ts->mem_coherent = 0; | ||
867 | + set_temp_val_reg(s, ts, reg); | ||
868 | + ts->mem_coherent = 0; | ||
869 | + } | ||
870 | + break; | ||
871 | + default: | ||
872 | + g_assert_not_reached(); | ||
873 | + } | ||
874 | + | ||
875 | + /* Flush or discard output registers as needed. */ | ||
876 | + for (i = 0; i < nb_oargs; i++) { | ||
877 | + TCGTemp *ts = arg_temp(op->args[i]); | ||
878 | if (NEED_SYNC_ARG(i)) { | ||
879 | - temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); | ||
880 | + temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); | ||
881 | } else if (IS_DEAD_ARG(i)) { | ||
882 | temp_dead(s, ts); | ||
883 | } | ||
884 | -- | ||
885 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
1 | 2 | ||
3 | In the unlikely case of invalid typecode mask, the function | ||
4 | will abort instead of returning a NULL pointer. | ||
5 | |||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-Id: <20221111074101.2069454-27-richard.henderson@linaro.org> | ||
8 | [PMD: Split from bigger patch] | ||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
11 | Message-Id: <20221122180804.938-2-philmd@linaro.org> | ||
12 | --- | ||
13 | tcg/tcg.c | 30 ++++++++++++++++++++---------- | ||
14 | 1 file changed, 20 insertions(+), 10 deletions(-) | ||
15 | |||
16 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/tcg/tcg.c | ||
19 | +++ b/tcg/tcg.c | ||
20 | @@ -XXX,XX +XXX,XX @@ static GHashTable *helper_table; | ||
21 | #ifdef CONFIG_TCG_INTERPRETER | ||
22 | static GHashTable *ffi_table; | ||
23 | |||
24 | -static ffi_type * const typecode_to_ffi[8] = { | ||
25 | - [dh_typecode_void] = &ffi_type_void, | ||
26 | - [dh_typecode_i32] = &ffi_type_uint32, | ||
27 | - [dh_typecode_s32] = &ffi_type_sint32, | ||
28 | - [dh_typecode_i64] = &ffi_type_uint64, | ||
29 | - [dh_typecode_s64] = &ffi_type_sint64, | ||
30 | - [dh_typecode_ptr] = &ffi_type_pointer, | ||
31 | -}; | ||
32 | +static ffi_type *typecode_to_ffi(int argmask) | ||
33 | +{ | ||
34 | + switch (argmask) { | ||
35 | + case dh_typecode_void: | ||
36 | + return &ffi_type_void; | ||
37 | + case dh_typecode_i32: | ||
38 | + return &ffi_type_uint32; | ||
39 | + case dh_typecode_s32: | ||
40 | + return &ffi_type_sint32; | ||
41 | + case dh_typecode_i64: | ||
42 | + return &ffi_type_uint64; | ||
43 | + case dh_typecode_s64: | ||
44 | + return &ffi_type_sint64; | ||
45 | + case dh_typecode_ptr: | ||
46 | + return &ffi_type_pointer; | ||
47 | + } | ||
48 | + g_assert_not_reached(); | ||
49 | +} | ||
50 | #endif | ||
51 | |||
52 | typedef struct TCGCumulativeArgs { | ||
53 | @@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus) | ||
54 | nargs = DIV_ROUND_UP(nargs, 3); | ||
55 | |||
56 | ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); | ||
57 | - ca->cif.rtype = typecode_to_ffi[typemask & 7]; | ||
58 | + ca->cif.rtype = typecode_to_ffi(typemask & 7); | ||
59 | ca->cif.nargs = nargs; | ||
60 | |||
61 | if (nargs != 0) { | ||
62 | ca->cif.arg_types = ca->args; | ||
63 | for (int j = 0; j < nargs; ++j) { | ||
64 | int typecode = extract32(typemask, (j + 1) * 3, 3); | ||
65 | - ca->args[j] = typecode_to_ffi[typecode]; | ||
66 | + ca->args[j] = typecode_to_ffi(typecode); | ||
67 | } | ||
68 | } | ||
69 | |||
70 | -- | ||
71 | 2.34.1 | ||
72 | |||
73 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
1 | 2 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Message-Id: <20221111074101.2069454-27-richard.henderson@linaro.org> | ||
5 | [PMD: Split from bigger patch] | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
8 | Message-Id: <20221122180804.938-3-philmd@linaro.org> | ||
9 | --- | ||
10 | tcg/tcg.c | 83 +++++++++++++++++++++++++++++-------------------------- | ||
11 | 1 file changed, 44 insertions(+), 39 deletions(-) | ||
12 | |||
13 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tcg/tcg.c | ||
16 | +++ b/tcg/tcg.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static ffi_type *typecode_to_ffi(int argmask) | ||
18 | } | ||
19 | g_assert_not_reached(); | ||
20 | } | ||
21 | -#endif | ||
22 | + | ||
23 | +static void init_ffi_layouts(void) | ||
24 | +{ | ||
25 | + /* g_direct_hash/equal for direct comparisons on uint32_t. */ | ||
26 | + ffi_table = g_hash_table_new(NULL, NULL); | ||
27 | + for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) { | ||
28 | + uint32_t typemask = all_helpers[i].typemask; | ||
29 | + gpointer hash = (gpointer)(uintptr_t)typemask; | ||
30 | + struct { | ||
31 | + ffi_cif cif; | ||
32 | + ffi_type *args[]; | ||
33 | + } *ca; | ||
34 | + ffi_status status; | ||
35 | + int nargs; | ||
36 | + | ||
37 | + if (g_hash_table_lookup(ffi_table, hash)) { | ||
38 | + continue; | ||
39 | + } | ||
40 | + | ||
41 | + /* Ignoring the return type, find the last non-zero field. */ | ||
42 | + nargs = 32 - clz32(typemask >> 3); | ||
43 | + nargs = DIV_ROUND_UP(nargs, 3); | ||
44 | + | ||
45 | + ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); | ||
46 | + ca->cif.rtype = typecode_to_ffi(typemask & 7); | ||
47 | + ca->cif.nargs = nargs; | ||
48 | + | ||
49 | + if (nargs != 0) { | ||
50 | + ca->cif.arg_types = ca->args; | ||
51 | + for (int j = 0; j < nargs; ++j) { | ||
52 | + int typecode = extract32(typemask, (j + 1) * 3, 3); | ||
53 | + ca->args[j] = typecode_to_ffi(typecode); | ||
54 | + } | ||
55 | + } | ||
56 | + | ||
57 | + status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, | ||
58 | + ca->cif.rtype, ca->cif.arg_types); | ||
59 | + assert(status == FFI_OK); | ||
60 | + | ||
61 | + g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif); | ||
62 | + } | ||
63 | +} | ||
64 | +#endif /* CONFIG_TCG_INTERPRETER */ | ||
65 | |||
66 | typedef struct TCGCumulativeArgs { | ||
67 | int arg_idx; /* tcg_gen_callN args[] */ | ||
68 | @@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus) | ||
69 | } | ||
70 | |||
71 | #ifdef CONFIG_TCG_INTERPRETER | ||
72 | - /* g_direct_hash/equal for direct comparisons on uint32_t. */ | ||
73 | - ffi_table = g_hash_table_new(NULL, NULL); | ||
74 | - for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { | ||
75 | - struct { | ||
76 | - ffi_cif cif; | ||
77 | - ffi_type *args[]; | ||
78 | - } *ca; | ||
79 | - uint32_t typemask = all_helpers[i].typemask; | ||
80 | - gpointer hash = (gpointer)(uintptr_t)typemask; | ||
81 | - ffi_status status; | ||
82 | - int nargs; | ||
83 | - | ||
84 | - if (g_hash_table_lookup(ffi_table, hash)) { | ||
85 | - continue; | ||
86 | - } | ||
87 | - | ||
88 | - /* Ignoring the return type, find the last non-zero field. */ | ||
89 | - nargs = 32 - clz32(typemask >> 3); | ||
90 | - nargs = DIV_ROUND_UP(nargs, 3); | ||
91 | - | ||
92 | - ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); | ||
93 | - ca->cif.rtype = typecode_to_ffi(typemask & 7); | ||
94 | - ca->cif.nargs = nargs; | ||
95 | - | ||
96 | - if (nargs != 0) { | ||
97 | - ca->cif.arg_types = ca->args; | ||
98 | - for (int j = 0; j < nargs; ++j) { | ||
99 | - int typecode = extract32(typemask, (j + 1) * 3, 3); | ||
100 | - ca->args[j] = typecode_to_ffi(typecode); | ||
101 | - } | ||
102 | - } | ||
103 | - | ||
104 | - status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, | ||
105 | - ca->cif.rtype, ca->cif.arg_types); | ||
106 | - assert(status == FFI_OK); | ||
107 | - | ||
108 | - g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif); | ||
109 | - } | ||
110 | + init_ffi_layouts(); | ||
111 | #endif | ||
112 | |||
113 | tcg_target_init(s); | ||
114 | -- | ||
115 | 2.34.1 | ||
116 | |||
117 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Instead of requiring a separate hash table lookup, | ||
2 | put a pointer to the CIF into TCGHelperInfo. | ||
1 | 3 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Message-Id: <20221111074101.2069454-27-richard.henderson@linaro.org> | ||
6 | [PMD: Split from bigger patch] | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
9 | Message-Id: <20221122180804.938-4-philmd@linaro.org> | ||
10 | --- | ||
11 | tcg/tcg-internal.h | 7 +++++++ | ||
12 | tcg/tcg.c | 30 ++++++++++++++---------------- | ||
13 | 2 files changed, 21 insertions(+), 16 deletions(-) | ||
14 | |||
15 | diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/tcg/tcg-internal.h | ||
18 | +++ b/tcg/tcg-internal.h | ||
19 | @@ -XXX,XX +XXX,XX @@ | ||
20 | #ifndef TCG_INTERNAL_H | ||
21 | #define TCG_INTERNAL_H | ||
22 | |||
23 | +#ifdef CONFIG_TCG_INTERPRETER | ||
24 | +#include <ffi.h> | ||
25 | +#endif | ||
26 | + | ||
27 | #define TCG_HIGHWATER 1024 | ||
28 | |||
29 | /* | ||
30 | @@ -XXX,XX +XXX,XX @@ typedef struct TCGCallArgumentLoc { | ||
31 | typedef struct TCGHelperInfo { | ||
32 | void *func; | ||
33 | const char *name; | ||
34 | +#ifdef CONFIG_TCG_INTERPRETER | ||
35 | + ffi_cif *cif; | ||
36 | +#endif | ||
37 | unsigned typemask : 32; | ||
38 | unsigned flags : 8; | ||
39 | unsigned nr_in : 8; | ||
40 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/tcg/tcg.c | ||
43 | +++ b/tcg/tcg.c | ||
44 | @@ -XXX,XX +XXX,XX @@ | ||
45 | #include "tcg/tcg-ldst.h" | ||
46 | #include "tcg-internal.h" | ||
47 | |||
48 | -#ifdef CONFIG_TCG_INTERPRETER | ||
49 | -#include <ffi.h> | ||
50 | -#endif | ||
51 | - | ||
52 | /* Forward declarations for functions declared in tcg-target.c.inc and | ||
53 | used here. */ | ||
54 | static void tcg_target_init(TCGContext *s); | ||
55 | @@ -XXX,XX +XXX,XX @@ static TCGHelperInfo all_helpers[] = { | ||
56 | static GHashTable *helper_table; | ||
57 | |||
58 | #ifdef CONFIG_TCG_INTERPRETER | ||
59 | -static GHashTable *ffi_table; | ||
60 | - | ||
61 | static ffi_type *typecode_to_ffi(int argmask) | ||
62 | { | ||
63 | switch (argmask) { | ||
64 | @@ -XXX,XX +XXX,XX @@ static ffi_type *typecode_to_ffi(int argmask) | ||
65 | static void init_ffi_layouts(void) | ||
66 | { | ||
67 | /* g_direct_hash/equal for direct comparisons on uint32_t. */ | ||
68 | - ffi_table = g_hash_table_new(NULL, NULL); | ||
69 | + GHashTable *ffi_table = g_hash_table_new(NULL, NULL); | ||
70 | + | ||
71 | for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) { | ||
72 | - uint32_t typemask = all_helpers[i].typemask; | ||
73 | + TCGHelperInfo *info = &all_helpers[i]; | ||
74 | + unsigned typemask = info->typemask; | ||
75 | gpointer hash = (gpointer)(uintptr_t)typemask; | ||
76 | struct { | ||
77 | ffi_cif cif; | ||
78 | @@ -XXX,XX +XXX,XX @@ static void init_ffi_layouts(void) | ||
79 | } *ca; | ||
80 | ffi_status status; | ||
81 | int nargs; | ||
82 | + ffi_cif *cif; | ||
83 | |||
84 | - if (g_hash_table_lookup(ffi_table, hash)) { | ||
85 | + cif = g_hash_table_lookup(ffi_table, hash); | ||
86 | + if (cif) { | ||
87 | + info->cif = cif; | ||
88 | continue; | ||
89 | } | ||
90 | |||
91 | @@ -XXX,XX +XXX,XX @@ static void init_ffi_layouts(void) | ||
92 | ca->cif.rtype, ca->cif.arg_types); | ||
93 | assert(status == FFI_OK); | ||
94 | |||
95 | - g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif); | ||
96 | + cif = &ca->cif; | ||
97 | + info->cif = cif; | ||
98 | + g_hash_table_insert(ffi_table, hash, (gpointer)cif); | ||
99 | } | ||
100 | + | ||
101 | + g_hash_table_destroy(ffi_table); | ||
102 | } | ||
103 | #endif /* CONFIG_TCG_INTERPRETER */ | ||
104 | |||
105 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
106 | } | ||
107 | |||
108 | #ifdef CONFIG_TCG_INTERPRETER | ||
109 | - { | ||
110 | - gpointer hash = (gpointer)(uintptr_t)info->typemask; | ||
111 | - ffi_cif *cif = g_hash_table_lookup(ffi_table, hash); | ||
112 | - assert(cif != NULL); | ||
113 | - tcg_out_call(s, tcg_call_func(op), cif); | ||
114 | - } | ||
115 | + tcg_out_call(s, tcg_call_func(op), info->cif); | ||
116 | #else | ||
117 | tcg_out_call(s, tcg_call_func(op)); | ||
118 | #endif | ||
119 | -- | ||
120 | 2.34.1 | ||
121 | |||
122 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | There is only one use, and BLR is perhaps even more | ||
2 | self-documentary than CALLR. | ||
1 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | tcg/aarch64/tcg-target.c.inc | 7 +------ | ||
8 | 1 file changed, 1 insertion(+), 6 deletions(-) | ||
9 | |||
10 | diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/aarch64/tcg-target.c.inc | ||
13 | +++ b/tcg/aarch64/tcg-target.c.inc | ||
14 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) | ||
15 | } | ||
16 | } | ||
17 | |||
18 | -static inline void tcg_out_callr(TCGContext *s, TCGReg reg) | ||
19 | -{ | ||
20 | - tcg_out_insn(s, 3207, BLR, reg); | ||
21 | -} | ||
22 | - | ||
23 | static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) | ||
24 | { | ||
25 | ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; | ||
26 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) | ||
27 | tcg_out_insn(s, 3206, BL, offset); | ||
28 | } else { | ||
29 | tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); | ||
30 | - tcg_out_callr(s, TCG_REG_TMP); | ||
31 | + tcg_out_insn(s, 3207, BLR, TCG_REG_TMP); | ||
32 | } | ||
33 | } | ||
34 | |||
35 | -- | ||
36 | 2.34.1 | ||
37 | |||
38 | diff view generated by jsdifflib |
1 | From: Max Filippov <jcmvbkbc@gmail.com> | 1 | This eliminates an ifdef for TCI, and will be required for |
---|---|---|---|
2 | expanding the call for TCGv_i128. | ||
2 | 3 | ||
3 | When a breakpoint is inserted at location for which there's currently no | 4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
4 | virtual to physical translation no action is taken on CPU TB cache. If a | ||
5 | TB for that virtual address already exists but is not visible ATM the | ||
6 | breakpoint won't be hit next time an instruction at that address will be | ||
7 | executed. | ||
8 | |||
9 | Flush entire CPU TB cache in breakpoint_invalidate to force | ||
10 | re-translation of all TBs for the breakpoint address. | ||
11 | |||
12 | This change fixes the following scenario: | ||
13 | - linux user application is running | ||
14 | - a breakpoint is inserted from QEMU gdbstub for a user address that is | ||
15 | not currently present in the target CPU TLB | ||
16 | - an instruction at that address is executed, but the external debugger | ||
17 | doesn't get control. | ||
18 | |||
19 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
20 | Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> | ||
21 | Message-Id: <20191127220602.10827-2-jcmvbkbc@gmail.com> | ||
22 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
23 | --- | 6 | --- |
24 | exec.c | 15 +++++++-------- | 7 | tcg/tcg.c | 12 ++---------- |
25 | 1 file changed, 7 insertions(+), 8 deletions(-) | 8 | tcg/aarch64/tcg-target.c.inc | 12 +++++++++--- |
9 | tcg/arm/tcg-target.c.inc | 10 ++++++++-- | ||
10 | tcg/i386/tcg-target.c.inc | 5 +++-- | ||
11 | tcg/loongarch64/tcg-target.c.inc | 7 ++++--- | ||
12 | tcg/mips/tcg-target.c.inc | 3 ++- | ||
13 | tcg/ppc/tcg-target.c.inc | 7 ++++--- | ||
14 | tcg/riscv/tcg-target.c.inc | 7 ++++--- | ||
15 | tcg/s390x/tcg-target.c.inc | 12 +++++++++--- | ||
16 | tcg/sparc64/tcg-target.c.inc | 3 ++- | ||
17 | tcg/tci/tcg-target.c.inc | 3 ++- | ||
18 | 11 files changed, 49 insertions(+), 32 deletions(-) | ||
26 | 19 | ||
27 | diff --git a/exec.c b/exec.c | 20 | diff --git a/tcg/tcg.c b/tcg/tcg.c |
28 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/exec.c | 22 | --- a/tcg/tcg.c |
30 | +++ b/exec.c | 23 | +++ b/tcg/tcg.c |
31 | @@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs) | 24 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, |
32 | 25 | intptr_t arg2); | |
33 | static void breakpoint_invalidate(CPUState *cpu, target_ulong pc) | 26 | static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, |
34 | { | 27 | TCGReg base, intptr_t ofs); |
35 | - MemTxAttrs attrs; | 28 | -#ifdef CONFIG_TCG_INTERPRETER |
36 | - hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs); | 29 | static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, |
37 | - int asidx = cpu_asidx_from_attrs(cpu, attrs); | 30 | - ffi_cif *cif); |
38 | - if (phys != -1) { | 31 | -#else |
39 | - /* Locks grabbed by tb_invalidate_phys_addr */ | 32 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target); |
40 | - tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as, | 33 | -#endif |
41 | - phys | (pc & ~TARGET_PAGE_MASK), attrs); | 34 | + const TCGHelperInfo *info); |
42 | - } | 35 | static bool tcg_target_const_match(int64_t val, TCGType type, int ct); |
43 | + /* | 36 | #ifdef TCG_TARGET_NEED_LDST_LABELS |
44 | + * There may not be a virtual to physical translation for the pc | 37 | static int tcg_out_ldst_finalize(TCGContext *s); |
45 | + * right now, but there may exist cached TB for this pc. | 38 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) |
46 | + * Flush the whole TB cache to force re-translation of such TBs. | 39 | save_globals(s, allocated_regs); |
47 | + * This is heavyweight, but we're debugging anyway. | 40 | } |
48 | + */ | 41 | |
49 | + tb_flush(cpu); | 42 | -#ifdef CONFIG_TCG_INTERPRETER |
50 | } | 43 | - tcg_out_call(s, tcg_call_func(op), info->cif); |
44 | -#else | ||
45 | - tcg_out_call(s, tcg_call_func(op)); | ||
46 | -#endif | ||
47 | + tcg_out_call(s, tcg_call_func(op), info); | ||
48 | |||
49 | /* Assign output registers and emit moves if needed. */ | ||
50 | switch (info->out_kind) { | ||
51 | diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/tcg/aarch64/tcg-target.c.inc | ||
54 | +++ b/tcg/aarch64/tcg-target.c.inc | ||
55 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) | ||
56 | } | ||
57 | } | ||
58 | |||
59 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) | ||
60 | +static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) | ||
61 | { | ||
62 | ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; | ||
63 | if (offset == sextract64(offset, 0, 26)) { | ||
64 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) | ||
65 | } | ||
66 | } | ||
67 | |||
68 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, | ||
69 | + const TCGHelperInfo *info) | ||
70 | +{ | ||
71 | + tcg_out_call_int(s, target); | ||
72 | +} | ||
73 | + | ||
74 | void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, | ||
75 | uintptr_t jmp_rw, uintptr_t addr) | ||
76 | { | ||
77 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
78 | tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); | ||
79 | tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); | ||
80 | tcg_out_adr(s, TCG_REG_X3, lb->raddr); | ||
81 | - tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]); | ||
82 | + tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); | ||
83 | if (opc & MO_SIGN) { | ||
84 | tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); | ||
85 | } else { | ||
86 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
87 | tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); | ||
88 | tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); | ||
89 | tcg_out_adr(s, TCG_REG_X4, lb->raddr); | ||
90 | - tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]); | ||
91 | + tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); | ||
92 | tcg_out_goto(s, lb->raddr); | ||
93 | return true; | ||
94 | } | ||
95 | diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc | ||
96 | index XXXXXXX..XXXXXXX 100644 | ||
97 | --- a/tcg/arm/tcg-target.c.inc | ||
98 | +++ b/tcg/arm/tcg-target.c.inc | ||
99 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_goto(TCGContext *s, ARMCond cond, const tcg_insn_unit *addr) | ||
100 | * The call case is mostly used for helpers - so it's not unreasonable | ||
101 | * for them to be beyond branch range. | ||
102 | */ | ||
103 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr) | ||
104 | +static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *addr) | ||
105 | { | ||
106 | intptr_t addri = (intptr_t)addr; | ||
107 | ptrdiff_t disp = tcg_pcrel_diff(s, addr); | ||
108 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr) | ||
109 | tcg_out_blx_reg(s, COND_AL, TCG_REG_TMP); | ||
110 | } | ||
111 | |||
112 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr, | ||
113 | + const TCGHelperInfo *info) | ||
114 | +{ | ||
115 | + tcg_out_call_int(s, addr); | ||
116 | +} | ||
117 | + | ||
118 | static void tcg_out_goto_label(TCGContext *s, ARMCond cond, TCGLabel *l) | ||
119 | { | ||
120 | if (l->has_value) { | ||
121 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
122 | argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); | ||
123 | |||
124 | /* Use the canonical unsigned helpers and minimize icache usage. */ | ||
125 | - tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]); | ||
126 | + tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); | ||
127 | |||
128 | datalo = lb->datalo_reg; | ||
129 | datahi = lb->datahi_reg; | ||
130 | diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc | ||
131 | index XXXXXXX..XXXXXXX 100644 | ||
132 | --- a/tcg/i386/tcg-target.c.inc | ||
133 | +++ b/tcg/i386/tcg-target.c.inc | ||
134 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_branch(TCGContext *s, int call, const tcg_insn_unit *dest) | ||
135 | } | ||
136 | } | ||
137 | |||
138 | -static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest) | ||
139 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest, | ||
140 | + const TCGHelperInfo *info) | ||
141 | { | ||
142 | tcg_out_branch(s, 1, dest); | ||
143 | } | ||
144 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | ||
145 | (uintptr_t)l->raddr); | ||
146 | } | ||
147 | |||
148 | - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); | ||
149 | + tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); | ||
150 | |||
151 | data_reg = l->datalo_reg; | ||
152 | switch (opc & MO_SSIZE) { | ||
153 | diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc | ||
154 | index XXXXXXX..XXXXXXX 100644 | ||
155 | --- a/tcg/loongarch64/tcg-target.c.inc | ||
156 | +++ b/tcg/loongarch64/tcg-target.c.inc | ||
157 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) | ||
158 | } | ||
159 | } | ||
160 | |||
161 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) | ||
162 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg, | ||
163 | + const TCGHelperInfo *info) | ||
164 | { | ||
165 | tcg_out_call_int(s, arg, false); | ||
166 | } | ||
167 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | ||
168 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi); | ||
169 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr); | ||
170 | |||
171 | - tcg_out_call(s, qemu_ld_helpers[size]); | ||
172 | + tcg_out_call_int(s, qemu_ld_helpers[size], false); | ||
173 | |||
174 | switch (opc & MO_SSIZE) { | ||
175 | case MO_SB: | ||
176 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | ||
177 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi); | ||
178 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr); | ||
179 | |||
180 | - tcg_out_call(s, qemu_st_helpers[size]); | ||
181 | + tcg_out_call_int(s, qemu_st_helpers[size], false); | ||
182 | |||
183 | return tcg_out_goto(s, l->raddr); | ||
184 | } | ||
185 | diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc | ||
186 | index XXXXXXX..XXXXXXX 100644 | ||
187 | --- a/tcg/mips/tcg-target.c.inc | ||
188 | +++ b/tcg/mips/tcg-target.c.inc | ||
189 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) | ||
190 | } | ||
191 | } | ||
192 | |||
193 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) | ||
194 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg, | ||
195 | + const TCGHelperInfo *info) | ||
196 | { | ||
197 | tcg_out_call_int(s, arg, false); | ||
198 | tcg_out_nop(s); | ||
199 | diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc | ||
200 | index XXXXXXX..XXXXXXX 100644 | ||
201 | --- a/tcg/ppc/tcg-target.c.inc | ||
202 | +++ b/tcg/ppc/tcg-target.c.inc | ||
203 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, int lk, | ||
51 | #endif | 204 | #endif |
205 | } | ||
206 | |||
207 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) | ||
208 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, | ||
209 | + const TCGHelperInfo *info) | ||
210 | { | ||
211 | tcg_out_call_int(s, LK, target); | ||
212 | } | ||
213 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
214 | tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); | ||
215 | tcg_out32(s, MFSPR | RT(arg) | LR); | ||
216 | |||
217 | - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); | ||
218 | + tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); | ||
219 | |||
220 | lo = lb->datalo_reg; | ||
221 | hi = lb->datahi_reg; | ||
222 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
223 | tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); | ||
224 | tcg_out32(s, MFSPR | RT(arg) | LR); | ||
225 | |||
226 | - tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); | ||
227 | + tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); | ||
228 | |||
229 | tcg_out_b(s, 0, lb->raddr); | ||
230 | return true; | ||
231 | diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc | ||
232 | index XXXXXXX..XXXXXXX 100644 | ||
233 | --- a/tcg/riscv/tcg-target.c.inc | ||
234 | +++ b/tcg/riscv/tcg-target.c.inc | ||
235 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) | ||
236 | } | ||
237 | } | ||
238 | |||
239 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) | ||
240 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg, | ||
241 | + const TCGHelperInfo *info) | ||
242 | { | ||
243 | tcg_out_call_int(s, arg, false); | ||
244 | } | ||
245 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | ||
246 | tcg_out_movi(s, TCG_TYPE_PTR, a2, oi); | ||
247 | tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr); | ||
248 | |||
249 | - tcg_out_call(s, qemu_ld_helpers[opc & MO_SSIZE]); | ||
250 | + tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false); | ||
251 | tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0); | ||
252 | |||
253 | tcg_out_goto(s, l->raddr); | ||
254 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | ||
255 | tcg_out_movi(s, TCG_TYPE_PTR, a3, oi); | ||
256 | tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr); | ||
257 | |||
258 | - tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]); | ||
259 | + tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false); | ||
260 | |||
261 | tcg_out_goto(s, l->raddr); | ||
262 | return true; | ||
263 | diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc | ||
264 | index XXXXXXX..XXXXXXX 100644 | ||
265 | --- a/tcg/s390x/tcg-target.c.inc | ||
266 | +++ b/tcg/s390x/tcg-target.c.inc | ||
267 | @@ -XXX,XX +XXX,XX @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, | ||
268 | tgen_branch(s, cc, l); | ||
269 | } | ||
270 | |||
271 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest) | ||
272 | +static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest) | ||
273 | { | ||
274 | ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1; | ||
275 | if (off == (int32_t)off) { | ||
276 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest) | ||
277 | } | ||
278 | } | ||
279 | |||
280 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest, | ||
281 | + const TCGHelperInfo *info) | ||
282 | +{ | ||
283 | + tcg_out_call_int(s, dest); | ||
284 | +} | ||
285 | + | ||
286 | static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data, | ||
287 | TCGReg base, TCGReg index, int disp) | ||
288 | { | ||
289 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
290 | } | ||
291 | tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi); | ||
292 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr); | ||
293 | - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); | ||
294 | + tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); | ||
295 | tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); | ||
296 | |||
297 | tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); | ||
298 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
299 | } | ||
300 | tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi); | ||
301 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr); | ||
302 | - tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); | ||
303 | + tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); | ||
304 | |||
305 | tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); | ||
306 | return true; | ||
307 | diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc | ||
308 | index XXXXXXX..XXXXXXX 100644 | ||
309 | --- a/tcg/sparc64/tcg-target.c.inc | ||
310 | +++ b/tcg/sparc64/tcg-target.c.inc | ||
311 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest, | ||
312 | } | ||
313 | } | ||
314 | |||
315 | -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest) | ||
316 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest, | ||
317 | + const TCGHelperInfo *info) | ||
318 | { | ||
319 | tcg_out_call_nodelay(s, dest, false); | ||
320 | tcg_out_nop(s); | ||
321 | diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc | ||
322 | index XXXXXXX..XXXXXXX 100644 | ||
323 | --- a/tcg/tci/tcg-target.c.inc | ||
324 | +++ b/tcg/tci/tcg-target.c.inc | ||
325 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, | ||
326 | } | ||
327 | |||
328 | static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func, | ||
329 | - ffi_cif *cif) | ||
330 | + const TCGHelperInfo *info) | ||
331 | { | ||
332 | + ffi_cif *cif = info->cif; | ||
333 | tcg_insn_unit insn = 0; | ||
334 | uint8_t which; | ||
52 | 335 | ||
53 | -- | 336 | -- |
54 | 2.20.1 | 337 | 2.34.1 |
55 | 338 | ||
56 | 339 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | When called from syscall(), we are not within a TB and pc == 0. | ||
2 | We can skip the check for invalidating the current TB. | ||
1 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | accel/tcg/tb-maint.c | 78 ++++++++++++++++++++++++-------------------- | ||
8 | 1 file changed, 43 insertions(+), 35 deletions(-) | ||
9 | |||
10 | diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/accel/tcg/tb-maint.c | ||
13 | +++ b/accel/tcg/tb-maint.c | ||
14 | @@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_page(tb_page_addr_t addr) | ||
15 | */ | ||
16 | bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc) | ||
17 | { | ||
18 | - assert(pc != 0); | ||
19 | -#ifdef TARGET_HAS_PRECISE_SMC | ||
20 | - assert_memory_lock(); | ||
21 | - { | ||
22 | - TranslationBlock *current_tb = tcg_tb_lookup(pc); | ||
23 | - bool current_tb_modified = false; | ||
24 | - TranslationBlock *tb; | ||
25 | - PageForEachNext n; | ||
26 | + TranslationBlock *current_tb; | ||
27 | + bool current_tb_modified; | ||
28 | + TranslationBlock *tb; | ||
29 | + PageForEachNext n; | ||
30 | |||
31 | - addr &= TARGET_PAGE_MASK; | ||
32 | - | ||
33 | - PAGE_FOR_EACH_TB(addr, addr + TARGET_PAGE_SIZE, unused, tb, n) { | ||
34 | - if (current_tb == tb && | ||
35 | - (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) { | ||
36 | - /* | ||
37 | - * If we are modifying the current TB, we must stop its | ||
38 | - * execution. We could be more precise by checking that | ||
39 | - * the modification is after the current PC, but it would | ||
40 | - * require a specialized function to partially restore | ||
41 | - * the CPU state. | ||
42 | - */ | ||
43 | - current_tb_modified = true; | ||
44 | - cpu_restore_state_from_tb(current_cpu, current_tb, pc); | ||
45 | - } | ||
46 | - tb_phys_invalidate__locked(tb); | ||
47 | - } | ||
48 | - | ||
49 | - if (current_tb_modified) { | ||
50 | - /* Force execution of one insn next time. */ | ||
51 | - CPUState *cpu = current_cpu; | ||
52 | - cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu); | ||
53 | - return true; | ||
54 | - } | ||
55 | + /* | ||
56 | + * Without precise smc semantics, or when outside of a TB, | ||
57 | + * we can skip to invalidate. | ||
58 | + */ | ||
59 | +#ifndef TARGET_HAS_PRECISE_SMC | ||
60 | + pc = 0; | ||
61 | +#endif | ||
62 | + if (!pc) { | ||
63 | + tb_invalidate_phys_page(addr); | ||
64 | + return false; | ||
65 | + } | ||
66 | + | ||
67 | + assert_memory_lock(); | ||
68 | + current_tb = tcg_tb_lookup(pc); | ||
69 | + | ||
70 | + addr &= TARGET_PAGE_MASK; | ||
71 | + current_tb_modified = false; | ||
72 | + | ||
73 | + PAGE_FOR_EACH_TB(addr, addr + TARGET_PAGE_SIZE, unused, tb, n) { | ||
74 | + if (current_tb == tb && | ||
75 | + (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) { | ||
76 | + /* | ||
77 | + * If we are modifying the current TB, we must stop its | ||
78 | + * execution. We could be more precise by checking that | ||
79 | + * the modification is after the current PC, but it would | ||
80 | + * require a specialized function to partially restore | ||
81 | + * the CPU state. | ||
82 | + */ | ||
83 | + current_tb_modified = true; | ||
84 | + cpu_restore_state_from_tb(current_cpu, current_tb, pc); | ||
85 | + } | ||
86 | + tb_phys_invalidate__locked(tb); | ||
87 | + } | ||
88 | + | ||
89 | + if (current_tb_modified) { | ||
90 | + /* Force execution of one insn next time. */ | ||
91 | + CPUState *cpu = current_cpu; | ||
92 | + cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu); | ||
93 | + return true; | ||
94 | } | ||
95 | -#else | ||
96 | - tb_invalidate_phys_page(addr); | ||
97 | -#endif /* TARGET_HAS_PRECISE_SMC */ | ||
98 | return false; | ||
99 | } | ||
100 | #else | ||
101 | -- | ||
102 | 2.34.1 | ||
103 | |||
104 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Because we allow lockless lookups, we have to be careful | ||
2 | when it is freed. Use rcu to delay the free until safe. | ||
1 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | accel/tcg/user-exec.c | 18 ++++++++++-------- | ||
8 | 1 file changed, 10 insertions(+), 8 deletions(-) | ||
9 | |||
10 | diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/accel/tcg/user-exec.c | ||
13 | +++ b/accel/tcg/user-exec.c | ||
14 | @@ -XXX,XX +XXX,XX @@ | ||
15 | #include "exec/exec-all.h" | ||
16 | #include "tcg/tcg.h" | ||
17 | #include "qemu/bitops.h" | ||
18 | +#include "qemu/rcu.h" | ||
19 | #include "exec/cpu_ldst.h" | ||
20 | #include "exec/translate-all.h" | ||
21 | #include "exec/helper-proto.h" | ||
22 | @@ -XXX,XX +XXX,XX @@ bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set, | ||
23 | } | ||
24 | |||
25 | typedef struct PageFlagsNode { | ||
26 | + struct rcu_head rcu; | ||
27 | IntervalTreeNode itree; | ||
28 | int flags; | ||
29 | } PageFlagsNode; | ||
30 | @@ -XXX,XX +XXX,XX @@ static bool pageflags_unset(target_ulong start, target_ulong last) | ||
31 | } | ||
32 | } else if (p_last <= last) { | ||
33 | /* Range completely covers node -- remove it. */ | ||
34 | - g_free(p); | ||
35 | + g_free_rcu(p, rcu); | ||
36 | } else { | ||
37 | /* Truncate the node from the start. */ | ||
38 | p->itree.start = last + 1; | ||
39 | @@ -XXX,XX +XXX,XX @@ static void pageflags_create_merge(target_ulong start, target_ulong last, | ||
40 | if (prev) { | ||
41 | if (next) { | ||
42 | prev->itree.last = next->itree.last; | ||
43 | - g_free(next); | ||
44 | + g_free_rcu(next, rcu); | ||
45 | } else { | ||
46 | prev->itree.last = last; | ||
47 | } | ||
48 | @@ -XXX,XX +XXX,XX @@ static bool pageflags_set_clear(target_ulong start, target_ulong last, | ||
49 | p->flags = merge_flags; | ||
50 | } else { | ||
51 | interval_tree_remove(&p->itree, &pageflags_root); | ||
52 | - g_free(p); | ||
53 | + g_free_rcu(p, rcu); | ||
54 | } | ||
55 | goto done; | ||
56 | } | ||
57 | @@ -XXX,XX +XXX,XX @@ static bool pageflags_set_clear(target_ulong start, target_ulong last, | ||
58 | p->flags = merge_flags; | ||
59 | } else { | ||
60 | interval_tree_remove(&p->itree, &pageflags_root); | ||
61 | - g_free(p); | ||
62 | + g_free_rcu(p, rcu); | ||
63 | } | ||
64 | if (p_last < last) { | ||
65 | start = p_last + 1; | ||
66 | @@ -XXX,XX +XXX,XX @@ static bool pageflags_set_clear(target_ulong start, target_ulong last, | ||
67 | p->itree.start = last + 1; | ||
68 | interval_tree_insert(&p->itree, &pageflags_root); | ||
69 | } else { | ||
70 | - g_free(p); | ||
71 | + g_free_rcu(p, rcu); | ||
72 | goto restart; | ||
73 | } | ||
74 | if (set_flags) { | ||
75 | @@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, | ||
76 | #define TBD_MASK (TARGET_PAGE_MASK * TPD_PAGES) | ||
77 | |||
78 | typedef struct TargetPageDataNode { | ||
79 | + struct rcu_head rcu; | ||
80 | IntervalTreeNode itree; | ||
81 | char data[TPD_PAGES][TARGET_PAGE_DATA_SIZE] __attribute__((aligned)); | ||
82 | } TargetPageDataNode; | ||
83 | @@ -XXX,XX +XXX,XX @@ void page_reset_target_data(target_ulong start, target_ulong end) | ||
84 | n = next, | ||
85 | next = next ? interval_tree_iter_next(n, start, last) : NULL) { | ||
86 | target_ulong n_start, n_last, p_ofs, p_len; | ||
87 | - TargetPageDataNode *t; | ||
88 | + TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree); | ||
89 | |||
90 | if (n->start >= start && n->last <= last) { | ||
91 | interval_tree_remove(n, &targetdata_root); | ||
92 | - g_free(n); | ||
93 | + g_free_rcu(t, rcu); | ||
94 | continue; | ||
95 | } | ||
96 | |||
97 | @@ -XXX,XX +XXX,XX @@ void page_reset_target_data(target_ulong start, target_ulong end) | ||
98 | n_last = MIN(last, n->last); | ||
99 | p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS; | ||
100 | |||
101 | - t = container_of(n, TargetPageDataNode, itree); | ||
102 | memset(t->data[p_ofs], 0, p_len * TARGET_PAGE_DATA_SIZE); | ||
103 | } | ||
104 | } | ||
105 | -- | ||
106 | 2.34.1 | ||
107 | |||
108 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | As in page_get_flags, we need to try again with the mmap | ||
2 | lock held if we fail a page lookup. | ||
1 | 3 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | accel/tcg/user-exec.c | 41 ++++++++++++++++++++++++++++++++++------- | ||
8 | 1 file changed, 34 insertions(+), 7 deletions(-) | ||
9 | |||
10 | diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/accel/tcg/user-exec.c | ||
13 | +++ b/accel/tcg/user-exec.c | ||
14 | @@ -XXX,XX +XXX,XX @@ void page_set_flags(target_ulong start, target_ulong end, int flags) | ||
15 | int page_check_range(target_ulong start, target_ulong len, int flags) | ||
16 | { | ||
17 | target_ulong last; | ||
18 | + int locked; /* tri-state: =0: unlocked, +1: global, -1: local */ | ||
19 | + int ret; | ||
20 | |||
21 | if (len == 0) { | ||
22 | return 0; /* trivial length */ | ||
23 | @@ -XXX,XX +XXX,XX @@ int page_check_range(target_ulong start, target_ulong len, int flags) | ||
24 | return -1; /* wrap around */ | ||
25 | } | ||
26 | |||
27 | + locked = have_mmap_lock(); | ||
28 | while (true) { | ||
29 | PageFlagsNode *p = pageflags_find(start, last); | ||
30 | int missing; | ||
31 | |||
32 | if (!p) { | ||
33 | - return -1; /* entire region invalid */ | ||
34 | + if (!locked) { | ||
35 | + /* | ||
36 | + * Lockless lookups have false negatives. | ||
37 | + * Retry with the lock held. | ||
38 | + */ | ||
39 | + mmap_lock(); | ||
40 | + locked = -1; | ||
41 | + p = pageflags_find(start, last); | ||
42 | + } | ||
43 | + if (!p) { | ||
44 | + ret = -1; /* entire region invalid */ | ||
45 | + break; | ||
46 | + } | ||
47 | } | ||
48 | if (start < p->itree.start) { | ||
49 | - return -1; /* initial bytes invalid */ | ||
50 | + ret = -1; /* initial bytes invalid */ | ||
51 | + break; | ||
52 | } | ||
53 | |||
54 | missing = flags & ~p->flags; | ||
55 | if (missing & PAGE_READ) { | ||
56 | - return -1; /* page not readable */ | ||
57 | + ret = -1; /* page not readable */ | ||
58 | + break; | ||
59 | } | ||
60 | if (missing & PAGE_WRITE) { | ||
61 | if (!(p->flags & PAGE_WRITE_ORG)) { | ||
62 | - return -1; /* page not writable */ | ||
63 | + ret = -1; /* page not writable */ | ||
64 | + break; | ||
65 | } | ||
66 | /* Asking about writable, but has been protected: undo. */ | ||
67 | if (!page_unprotect(start, 0)) { | ||
68 | - return -1; | ||
69 | + ret = -1; | ||
70 | + break; | ||
71 | } | ||
72 | /* TODO: page_unprotect should take a range, not a single page. */ | ||
73 | if (last - start < TARGET_PAGE_SIZE) { | ||
74 | - return 0; /* ok */ | ||
75 | + ret = 0; /* ok */ | ||
76 | + break; | ||
77 | } | ||
78 | start += TARGET_PAGE_SIZE; | ||
79 | continue; | ||
80 | } | ||
81 | |||
82 | if (last <= p->itree.last) { | ||
83 | - return 0; /* ok */ | ||
84 | + ret = 0; /* ok */ | ||
85 | + break; | ||
86 | } | ||
87 | start = p->itree.last + 1; | ||
88 | } | ||
89 | + | ||
90 | + /* Release the lock if acquired locally. */ | ||
91 | + if (locked < 0) { | ||
92 | + mmap_unlock(); | ||
93 | + } | ||
94 | + return ret; | ||
95 | } | ||
96 | |||
97 | void page_protect(tb_page_addr_t address) | ||
98 | -- | ||
99 | 2.34.1 | ||
100 | |||
101 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Ilya Leoshkevich <iii@linux.ibm.com> | |
2 | |||
3 | Add a test that locklessly changes and exercises page protection bits | ||
4 | from various threads. This helps catch race conditions in the VMA | ||
5 | handling. | ||
6 | |||
7 | Acked-by: Alex Bennée <alex.bennee@linaro.org> | ||
8 | Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> | ||
9 | Message-Id: <20221223120252.513319-1-iii@linux.ibm.com> | ||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | --- | ||
12 | tests/tcg/multiarch/nop_func.h | 25 ++++ | ||
13 | tests/tcg/multiarch/munmap-pthread.c | 16 +-- | ||
14 | tests/tcg/multiarch/vma-pthread.c | 207 +++++++++++++++++++++++++++ | ||
15 | tests/tcg/multiarch/Makefile.target | 3 + | ||
16 | 4 files changed, 236 insertions(+), 15 deletions(-) | ||
17 | create mode 100644 tests/tcg/multiarch/nop_func.h | ||
18 | create mode 100644 tests/tcg/multiarch/vma-pthread.c | ||
19 | |||
20 | diff --git a/tests/tcg/multiarch/nop_func.h b/tests/tcg/multiarch/nop_func.h | ||
21 | new file mode 100644 | ||
22 | index XXXXXXX..XXXXXXX | ||
23 | --- /dev/null | ||
24 | +++ b/tests/tcg/multiarch/nop_func.h | ||
25 | @@ -XXX,XX +XXX,XX @@ | ||
26 | +/* | ||
27 | + * No-op functions that can be safely copied. | ||
28 | + * | ||
29 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
30 | + */ | ||
31 | +#ifndef NOP_FUNC_H | ||
32 | +#define NOP_FUNC_H | ||
33 | + | ||
34 | +static const char nop_func[] = { | ||
35 | +#if defined(__aarch64__) | ||
36 | + 0xc0, 0x03, 0x5f, 0xd6, /* ret */ | ||
37 | +#elif defined(__alpha__) | ||
38 | + 0x01, 0x80, 0xFA, 0x6B, /* ret */ | ||
39 | +#elif defined(__arm__) | ||
40 | + 0x1e, 0xff, 0x2f, 0xe1, /* bx lr */ | ||
41 | +#elif defined(__riscv) | ||
42 | + 0x67, 0x80, 0x00, 0x00, /* ret */ | ||
43 | +#elif defined(__s390__) | ||
44 | + 0x07, 0xfe, /* br %r14 */ | ||
45 | +#elif defined(__i386__) || defined(__x86_64__) | ||
46 | + 0xc3, /* ret */ | ||
47 | +#endif | ||
48 | +}; | ||
49 | + | ||
50 | +#endif | ||
51 | diff --git a/tests/tcg/multiarch/munmap-pthread.c b/tests/tcg/multiarch/munmap-pthread.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/tests/tcg/multiarch/munmap-pthread.c | ||
54 | +++ b/tests/tcg/multiarch/munmap-pthread.c | ||
55 | @@ -XXX,XX +XXX,XX @@ | ||
56 | #include <sys/mman.h> | ||
57 | #include <unistd.h> | ||
58 | |||
59 | -static const char nop_func[] = { | ||
60 | -#if defined(__aarch64__) | ||
61 | - 0xc0, 0x03, 0x5f, 0xd6, /* ret */ | ||
62 | -#elif defined(__alpha__) | ||
63 | - 0x01, 0x80, 0xFA, 0x6B, /* ret */ | ||
64 | -#elif defined(__arm__) | ||
65 | - 0x1e, 0xff, 0x2f, 0xe1, /* bx lr */ | ||
66 | -#elif defined(__riscv) | ||
67 | - 0x67, 0x80, 0x00, 0x00, /* ret */ | ||
68 | -#elif defined(__s390__) | ||
69 | - 0x07, 0xfe, /* br %r14 */ | ||
70 | -#elif defined(__i386__) || defined(__x86_64__) | ||
71 | - 0xc3, /* ret */ | ||
72 | -#endif | ||
73 | -}; | ||
74 | +#include "nop_func.h" | ||
75 | |||
76 | static void *thread_mmap_munmap(void *arg) | ||
77 | { | ||
78 | diff --git a/tests/tcg/multiarch/vma-pthread.c b/tests/tcg/multiarch/vma-pthread.c | ||
79 | new file mode 100644 | ||
80 | index XXXXXXX..XXXXXXX | ||
81 | --- /dev/null | ||
82 | +++ b/tests/tcg/multiarch/vma-pthread.c | ||
83 | @@ -XXX,XX +XXX,XX @@ | ||
84 | +/* | ||
85 | + * Test that VMA updates do not race. | ||
86 | + * | ||
87 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
88 | + * | ||
89 | + * Map a contiguous chunk of RWX memory. Split it into 8 equally sized | ||
90 | + * regions, each of which is guaranteed to have a certain combination of | ||
91 | + * protection bits set. | ||
92 | + * | ||
93 | + * Reader, writer and executor threads perform the respective operations on | ||
94 | + * pages, which are guaranteed to have the respective protection bit set. | ||
95 | + * Two mutator threads change the non-fixed protection bits randomly. | ||
96 | + */ | ||
97 | +#include <assert.h> | ||
98 | +#include <fcntl.h> | ||
99 | +#include <pthread.h> | ||
100 | +#include <stdbool.h> | ||
101 | +#include <stdlib.h> | ||
102 | +#include <string.h> | ||
103 | +#include <stdio.h> | ||
104 | +#include <sys/mman.h> | ||
105 | +#include <unistd.h> | ||
106 | + | ||
107 | +#include "nop_func.h" | ||
108 | + | ||
109 | +#define PAGE_IDX_BITS 10 | ||
110 | +#define PAGE_COUNT (1 << PAGE_IDX_BITS) | ||
111 | +#define PAGE_IDX_MASK (PAGE_COUNT - 1) | ||
112 | +#define REGION_IDX_BITS 3 | ||
113 | +#define PAGE_IDX_R_MASK (1 << 7) | ||
114 | +#define PAGE_IDX_W_MASK (1 << 8) | ||
115 | +#define PAGE_IDX_X_MASK (1 << 9) | ||
116 | +#define REGION_MASK (PAGE_IDX_R_MASK | PAGE_IDX_W_MASK | PAGE_IDX_X_MASK) | ||
117 | +#define PAGES_PER_REGION (1 << (PAGE_IDX_BITS - REGION_IDX_BITS)) | ||
118 | + | ||
119 | +struct context { | ||
120 | + int pagesize; | ||
121 | + char *ptr; | ||
122 | + int dev_null_fd; | ||
123 | + volatile int mutator_count; | ||
124 | +}; | ||
125 | + | ||
126 | +static void *thread_read(void *arg) | ||
127 | +{ | ||
128 | + struct context *ctx = arg; | ||
129 | + ssize_t sret; | ||
130 | + size_t i, j; | ||
131 | + int ret; | ||
132 | + | ||
133 | + for (i = 0; ctx->mutator_count; i++) { | ||
134 | + char *p; | ||
135 | + | ||
136 | + j = (i & PAGE_IDX_MASK) | PAGE_IDX_R_MASK; | ||
137 | + p = &ctx->ptr[j * ctx->pagesize]; | ||
138 | + | ||
139 | + /* Read directly. */ | ||
140 | + ret = memcmp(p, nop_func, sizeof(nop_func)); | ||
141 | + if (ret != 0) { | ||
142 | + fprintf(stderr, "fail direct read %p\n", p); | ||
143 | + abort(); | ||
144 | + } | ||
145 | + | ||
146 | + /* Read indirectly. */ | ||
147 | + sret = write(ctx->dev_null_fd, p, 1); | ||
148 | + if (sret != 1) { | ||
149 | + if (sret < 0) { | ||
150 | + fprintf(stderr, "fail indirect read %p (%m)\n", p); | ||
151 | + } else { | ||
152 | + fprintf(stderr, "fail indirect read %p (%zd)\n", p, sret); | ||
153 | + } | ||
154 | + abort(); | ||
155 | + } | ||
156 | + } | ||
157 | + | ||
158 | + return NULL; | ||
159 | +} | ||
160 | + | ||
161 | +static void *thread_write(void *arg) | ||
162 | +{ | ||
163 | + struct context *ctx = arg; | ||
164 | + struct timespec *ts; | ||
165 | + size_t i, j; | ||
166 | + int ret; | ||
167 | + | ||
168 | + for (i = 0; ctx->mutator_count; i++) { | ||
169 | + j = (i & PAGE_IDX_MASK) | PAGE_IDX_W_MASK; | ||
170 | + | ||
171 | + /* Write directly. */ | ||
172 | + memcpy(&ctx->ptr[j * ctx->pagesize], nop_func, sizeof(nop_func)); | ||
173 | + | ||
174 | + /* Write using a syscall. */ | ||
175 | + ts = (struct timespec *)(&ctx->ptr[(j + 1) * ctx->pagesize] - | ||
176 | + sizeof(struct timespec)); | ||
177 | + ret = clock_gettime(CLOCK_REALTIME, ts); | ||
178 | + if (ret != 0) { | ||
179 | + fprintf(stderr, "fail indirect write %p (%m)\n", ts); | ||
180 | + abort(); | ||
181 | + } | ||
182 | + } | ||
183 | + | ||
184 | + return NULL; | ||
185 | +} | ||
186 | + | ||
187 | +static void *thread_execute(void *arg) | ||
188 | +{ | ||
189 | + struct context *ctx = arg; | ||
190 | + size_t i, j; | ||
191 | + | ||
192 | + for (i = 0; ctx->mutator_count; i++) { | ||
193 | + j = (i & PAGE_IDX_MASK) | PAGE_IDX_X_MASK; | ||
194 | + ((void(*)(void))&ctx->ptr[j * ctx->pagesize])(); | ||
195 | + } | ||
196 | + | ||
197 | + return NULL; | ||
198 | +} | ||
199 | + | ||
200 | +static void *thread_mutate(void *arg) | ||
201 | +{ | ||
202 | + size_t i, start_idx, end_idx, page_idx, tmp; | ||
203 | + struct context *ctx = arg; | ||
204 | + unsigned int seed; | ||
205 | + int prot, ret; | ||
206 | + | ||
207 | + seed = (unsigned int)time(NULL); | ||
208 | + for (i = 0; i < 50000; i++) { | ||
209 | + start_idx = rand_r(&seed) & PAGE_IDX_MASK; | ||
210 | + end_idx = rand_r(&seed) & PAGE_IDX_MASK; | ||
211 | + if (start_idx > end_idx) { | ||
212 | + tmp = start_idx; | ||
213 | + start_idx = end_idx; | ||
214 | + end_idx = tmp; | ||
215 | + } | ||
216 | + prot = rand_r(&seed) & (PROT_READ | PROT_WRITE | PROT_EXEC); | ||
217 | + for (page_idx = start_idx & REGION_MASK; page_idx <= end_idx; | ||
218 | + page_idx += PAGES_PER_REGION) { | ||
219 | + if (page_idx & PAGE_IDX_R_MASK) { | ||
220 | + prot |= PROT_READ; | ||
221 | + } | ||
222 | + if (page_idx & PAGE_IDX_W_MASK) { | ||
223 | + /* FIXME: qemu syscalls check for both read+write. */ | ||
224 | + prot |= PROT_WRITE | PROT_READ; | ||
225 | + } | ||
226 | + if (page_idx & PAGE_IDX_X_MASK) { | ||
227 | + prot |= PROT_EXEC; | ||
228 | + } | ||
229 | + } | ||
230 | + ret = mprotect(&ctx->ptr[start_idx * ctx->pagesize], | ||
231 | + (end_idx - start_idx + 1) * ctx->pagesize, prot); | ||
232 | + assert(ret == 0); | ||
233 | + } | ||
234 | + | ||
235 | + __atomic_fetch_sub(&ctx->mutator_count, 1, __ATOMIC_SEQ_CST); | ||
236 | + | ||
237 | + return NULL; | ||
238 | +} | ||
239 | + | ||
240 | +int main(void) | ||
241 | +{ | ||
242 | + pthread_t threads[5]; | ||
243 | + struct context ctx; | ||
244 | + size_t i; | ||
245 | + int ret; | ||
246 | + | ||
247 | + /* Without a template, nothing to test. */ | ||
248 | + if (sizeof(nop_func) == 0) { | ||
249 | + return EXIT_SUCCESS; | ||
250 | + } | ||
251 | + | ||
252 | + /* Initialize memory chunk. */ | ||
253 | + ctx.pagesize = getpagesize(); | ||
254 | + ctx.ptr = mmap(NULL, PAGE_COUNT * ctx.pagesize, | ||
255 | + PROT_READ | PROT_WRITE | PROT_EXEC, | ||
256 | + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); | ||
257 | + assert(ctx.ptr != MAP_FAILED); | ||
258 | + for (i = 0; i < PAGE_COUNT; i++) { | ||
259 | + memcpy(&ctx.ptr[i * ctx.pagesize], nop_func, sizeof(nop_func)); | ||
260 | + } | ||
261 | + ctx.dev_null_fd = open("/dev/null", O_WRONLY); | ||
262 | + assert(ctx.dev_null_fd >= 0); | ||
263 | + ctx.mutator_count = 2; | ||
264 | + | ||
265 | + /* Start threads. */ | ||
266 | + ret = pthread_create(&threads[0], NULL, thread_read, &ctx); | ||
267 | + assert(ret == 0); | ||
268 | + ret = pthread_create(&threads[1], NULL, thread_write, &ctx); | ||
269 | + assert(ret == 0); | ||
270 | + ret = pthread_create(&threads[2], NULL, thread_execute, &ctx); | ||
271 | + assert(ret == 0); | ||
272 | + for (i = 3; i <= 4; i++) { | ||
273 | + ret = pthread_create(&threads[i], NULL, thread_mutate, &ctx); | ||
274 | + assert(ret == 0); | ||
275 | + } | ||
276 | + | ||
277 | + /* Wait for threads to stop. */ | ||
278 | + for (i = 0; i < sizeof(threads) / sizeof(threads[0]); i++) { | ||
279 | + ret = pthread_join(threads[i], NULL); | ||
280 | + assert(ret == 0); | ||
281 | + } | ||
282 | + | ||
283 | + /* Destroy memory chunk. */ | ||
284 | + ret = close(ctx.dev_null_fd); | ||
285 | + assert(ret == 0); | ||
286 | + ret = munmap(ctx.ptr, PAGE_COUNT * ctx.pagesize); | ||
287 | + assert(ret == 0); | ||
288 | + | ||
289 | + return EXIT_SUCCESS; | ||
290 | +} | ||
291 | diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target | ||
292 | index XXXXXXX..XXXXXXX 100644 | ||
293 | --- a/tests/tcg/multiarch/Makefile.target | ||
294 | +++ b/tests/tcg/multiarch/Makefile.target | ||
295 | @@ -XXX,XX +XXX,XX @@ signals: LDFLAGS+=-lrt -lpthread | ||
296 | munmap-pthread: CFLAGS+=-pthread | ||
297 | munmap-pthread: LDFLAGS+=-pthread | ||
298 | |||
299 | +vma-pthread: CFLAGS+=-pthread | ||
300 | +vma-pthread: LDFLAGS+=-pthread | ||
301 | + | ||
302 | # We define the runner for test-mmap after the individual | ||
303 | # architectures have defined their supported pages sizes. If no | ||
304 | # additional page sizes are defined we only run the default test. | ||
305 | -- | ||
306 | 2.34.1 | ||
307 | |||
308 | diff view generated by jsdifflib |