1 | The following changes since commit 3ccf6cd0e3e1dfd663814640b3b18b55715d7a75: | 1 | Changes since v1: |
---|---|---|---|
2 | * Added QEMU_ERROR to wrap __attribute__((error)) -- patch 12. | ||
2 | 3 | ||
3 | Merge remote-tracking branch 'remotes/kraxel/tags/audio-20210617-pull-request' into staging (2021-06-18 09:54:42 +0100) | 4 | |
5 | r~ | ||
6 | |||
7 | |||
8 | The following changes since commit 77f7c747193662edfadeeb3118d63eed0eac51a6: | ||
9 | |||
10 | Merge remote-tracking branch 'remotes/huth-gitlab/tags/pull-request-2018-10-17' into staging (2018-10-18 13:40:19 +0100) | ||
4 | 11 | ||
5 | are available in the Git repository at: | 12 | are available in the Git repository at: |
6 | 13 | ||
7 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20210619 | 14 | https://github.com/rth7680/qemu.git tags/pull-tcg-20181018 |
8 | 15 | ||
9 | for you to fetch changes up to 8169ec35eb766a12ad0ae898119060fde148ab61: | 16 | for you to fetch changes up to 403f290c0603f35f2d09c982bf5549b6d0803ec1: |
10 | 17 | ||
11 | util/oslib-win32: Fix fatal assertion in qemu_try_memalign (2021-06-19 11:09:11 -0700) | 18 | cputlb: read CPUTLBEntry.addr_write atomically (2018-10-18 19:46:53 -0700) |
12 | 19 | ||
13 | ---------------------------------------------------------------- | 20 | ---------------------------------------------------------------- |
14 | TCI cleanup and re-encoding | 21 | Queued tcg patches. |
15 | Fixes for #367 and #390. | ||
16 | Move TCGCond to tcg/tcg-cond.h. | ||
17 | Fix for win32 qemu_try_memalign. | ||
18 | 22 | ||
19 | ---------------------------------------------------------------- | 23 | ---------------------------------------------------------------- |
20 | Alessandro Di Federico (1): | 24 | Emilio G. Cota (10): |
21 | tcg: expose TCGCond manipulation routines | 25 | tcg: access cpu->icount_decr.u16.high with atomics |
26 | tcg: fix use of uninitialized variable under CONFIG_PROFILER | ||
27 | tcg: plug holes in struct TCGProfile | ||
28 | tcg: distribute tcg_time into TCG contexts | ||
29 | target/alpha: remove tlb_flush from alpha_cpu_initfn | ||
30 | target/unicore32: remove tlb_flush from uc32_init_fn | ||
31 | exec: introduce tlb_init | ||
32 | cputlb: fix assert_cpu_is_self macro | ||
33 | cputlb: serialize tlb updates with env->tlb_lock | ||
34 | cputlb: read CPUTLBEntry.addr_write atomically | ||
22 | 35 | ||
23 | Richard Henderson (31): | 36 | Richard Henderson (11): |
24 | tcg: Combine dh_is_64bit and dh_is_signed to dh_typecode | 37 | tcg: Implement CPU_LOG_TB_NOCHAIN during expansion |
25 | tcg: Add tcg_call_flags | 38 | tcg: Add tlb_index and tlb_entry helpers |
26 | accel/tcg/plugin-gen: Drop inline markers | 39 | tcg: Split CONFIG_ATOMIC128 |
27 | plugins: Drop tcg_flags from struct qemu_plugin_dyn_cb | 40 | target/i386: Convert to HAVE_CMPXCHG128 |
28 | accel/tcg: Add tcg call flags to plugins helpers | 41 | target/arm: Convert to HAVE_CMPXCHG128 |
29 | tcg: Store the TCGHelperInfo in the TCGOp for call | 42 | target/arm: Check HAVE_CMPXCHG128 at translate time |
30 | tcg: Add tcg_call_func | 43 | target/ppc: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128 |
31 | tcg: Build ffi data structures for helpers | 44 | target/s390x: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128 |
32 | tcg/tci: Improve tcg_target_call_clobber_regs | 45 | target/s390x: Split do_cdsg, do_lpq, do_stpq |
33 | tcg/tci: Move call-return regs to end of tcg_target_reg_alloc_order | 46 | target/s390x: Skip wout, cout helpers if op helper does not return |
34 | tcg/tci: Use ffi for calls | 47 | target/s390x: Check HAVE_ATOMIC128 and HAVE_CMPXCHG128 at translate |
35 | tcg/tci: Reserve r13 for a temporary | ||
36 | tcg/tci: Emit setcond before brcond | ||
37 | tcg/tci: Remove tci_write_reg | ||
38 | tcg/tci: Change encoding to uint32_t units | ||
39 | tcg/tci: Implement goto_ptr | ||
40 | tcg/tci: Implement movcond | ||
41 | tcg/tci: Implement andc, orc, eqv, nand, nor | ||
42 | tcg/tci: Implement extract, sextract | ||
43 | tcg/tci: Implement clz, ctz, ctpop | ||
44 | tcg/tci: Implement mulu2, muls2 | ||
45 | tcg/tci: Implement add2, sub2 | ||
46 | tcg/tci: Split out tci_qemu_ld, tci_qemu_st | ||
47 | Revert "tcg/tci: Use exec/cpu_ldst.h interfaces" | ||
48 | tcg/tci: Remove the qemu_ld/st_type macros | ||
49 | tcg/tci: Use {set,clear}_helper_retaddr | ||
50 | tests/tcg: Increase timeout for TCI | ||
51 | accel/tcg: Probe the proper permissions for atomic ops | ||
52 | tcg/sparc: Fix temp_allocate_frame vs sparc stack bias | ||
53 | tcg: Allocate sufficient storage in temp_allocate_frame | ||
54 | tcg: Restart when exhausting the stack frame | ||
55 | 48 | ||
56 | Stefan Weil (1): | 49 | accel/tcg/atomic_template.h | 20 +++- |
57 | util/oslib-win32: Fix fatal assertion in qemu_try_memalign | 50 | accel/tcg/softmmu_template.h | 64 +++++----- |
51 | include/exec/cpu-defs.h | 3 + | ||
52 | include/exec/cpu_ldst.h | 30 ++++- | ||
53 | include/exec/cpu_ldst_template.h | 25 ++-- | ||
54 | include/exec/exec-all.h | 8 ++ | ||
55 | include/qemu/atomic128.h | 153 ++++++++++++++++++++++++ | ||
56 | include/qemu/compiler.h | 11 ++ | ||
57 | include/qemu/timer.h | 1 - | ||
58 | target/ppc/helper.h | 2 +- | ||
59 | tcg/tcg.h | 20 ++-- | ||
60 | accel/tcg/cpu-exec.c | 2 +- | ||
61 | accel/tcg/cputlb.c | 235 +++++++++++++++++++----------------- | ||
62 | accel/tcg/tcg-all.c | 2 +- | ||
63 | accel/tcg/translate-all.c | 2 +- | ||
64 | accel/tcg/user-exec.c | 5 +- | ||
65 | cpus.c | 3 +- | ||
66 | exec.c | 1 + | ||
67 | monitor.c | 13 +- | ||
68 | qom/cpu.c | 2 +- | ||
69 | target/alpha/cpu.c | 1 - | ||
70 | target/arm/helper-a64.c | 251 +++++++++++++++++++-------------------- | ||
71 | target/arm/translate-a64.c | 38 +++--- | ||
72 | target/i386/mem_helper.c | 9 +- | ||
73 | target/ppc/mem_helper.c | 33 ++++- | ||
74 | target/ppc/translate.c | 115 +++++++++--------- | ||
75 | target/s390x/mem_helper.c | 202 +++++++++++++++---------------- | ||
76 | target/s390x/translate.c | 45 +++++-- | ||
77 | target/unicore32/cpu.c | 2 - | ||
78 | tcg/tcg-op.c | 9 +- | ||
79 | tcg/tcg.c | 25 +++- | ||
80 | configure | 19 +++ | ||
81 | 32 files changed, 839 insertions(+), 512 deletions(-) | ||
82 | create mode 100644 include/qemu/atomic128.h | ||
58 | 83 | ||
59 | configure | 3 + | ||
60 | accel/tcg/atomic_template.h | 24 +- | ||
61 | accel/tcg/plugin-helpers.h | 5 +- | ||
62 | include/exec/helper-head.h | 37 +- | ||
63 | include/exec/helper-tcg.h | 34 +- | ||
64 | include/qemu/plugin.h | 1 - | ||
65 | include/tcg/tcg-cond.h | 101 ++ | ||
66 | include/tcg/tcg-opc.h | 4 +- | ||
67 | include/tcg/tcg.h | 71 +- | ||
68 | target/hppa/helper.h | 3 - | ||
69 | target/i386/ops_sse_header.h | 3 - | ||
70 | target/m68k/helper.h | 1 - | ||
71 | target/ppc/helper.h | 3 - | ||
72 | tcg/tcg-internal.h | 22 + | ||
73 | tcg/tci/tcg-target-con-set.h | 1 + | ||
74 | tcg/tci/tcg-target.h | 68 +- | ||
75 | accel/tcg/cputlb.c | 95 +- | ||
76 | accel/tcg/plugin-gen.c | 20 +- | ||
77 | accel/tcg/user-exec.c | 8 +- | ||
78 | plugins/core.c | 30 +- | ||
79 | tcg/optimize.c | 3 +- | ||
80 | tcg/tcg.c | 300 +++-- | ||
81 | tcg/tci.c | 1203 ++++++++++---------- | ||
82 | util/oslib-win32.c | 6 +- | ||
83 | tcg/sparc/tcg-target.c.inc | 16 +- | ||
84 | tcg/tci/tcg-target.c.inc | 550 ++++----- | ||
85 | tcg/meson.build | 8 +- | ||
86 | tcg/tci/README | 20 +- | ||
87 | tests/docker/dockerfiles/alpine.docker | 1 + | ||
88 | tests/docker/dockerfiles/centos8.docker | 1 + | ||
89 | tests/docker/dockerfiles/debian10.docker | 1 + | ||
90 | tests/docker/dockerfiles/fedora-i386-cross.docker | 1 + | ||
91 | tests/docker/dockerfiles/fedora-win32-cross.docker | 1 + | ||
92 | tests/docker/dockerfiles/fedora-win64-cross.docker | 1 + | ||
93 | tests/docker/dockerfiles/fedora.docker | 1 + | ||
94 | tests/docker/dockerfiles/ubuntu.docker | 1 + | ||
95 | tests/docker/dockerfiles/ubuntu1804.docker | 1 + | ||
96 | tests/docker/dockerfiles/ubuntu2004.docker | 1 + | ||
97 | tests/tcg/Makefile.target | 6 +- | ||
98 | 39 files changed, 1454 insertions(+), 1202 deletions(-) | ||
99 | create mode 100644 include/tcg/tcg-cond.h | ||
100 | diff view generated by jsdifflib |
1 | This operation is critical to staying within the interpretation | 1 | Rather than test NOCHAIN before linking, do not emit the |
---|---|---|---|
2 | loop longer, which avoids the overhead of setup and teardown for | 2 | goto_tb opcode at all. We already do this for goto_ptr. |
3 | many TBs. | ||
4 | 3 | ||
5 | The check in tcg_prologue_init is disabled because TCI does | ||
6 | want to use NULL to indicate exit, as opposed to branching to | ||
7 | a real epilogue. | ||
8 | |||
9 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
10 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
12 | --- | 5 | --- |
13 | tcg/tci/tcg-target-con-set.h | 1 + | 6 | accel/tcg/cpu-exec.c | 2 +- |
14 | tcg/tci/tcg-target.h | 2 +- | 7 | tcg/tcg-op.c | 9 ++++++++- |
15 | tcg/tcg.c | 8 +++++++- | 8 | 2 files changed, 9 insertions(+), 2 deletions(-) |
16 | tcg/tci.c | 19 +++++++++++++++++++ | ||
17 | tcg/tci/tcg-target.c.inc | 16 ++++++++++++++++ | ||
18 | 5 files changed, 44 insertions(+), 2 deletions(-) | ||
19 | 9 | ||
20 | diff --git a/tcg/tci/tcg-target-con-set.h b/tcg/tci/tcg-target-con-set.h | 10 | diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c |
21 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/tcg/tci/tcg-target-con-set.h | 12 | --- a/accel/tcg/cpu-exec.c |
23 | +++ b/tcg/tci/tcg-target-con-set.h | 13 | +++ b/accel/tcg/cpu-exec.c |
24 | @@ -XXX,XX +XXX,XX @@ | 14 | @@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_find(CPUState *cpu, |
25 | * Each operand should be a sequence of constraint letters as defined by | ||
26 | * tcg-target-con-str.h; the constraint combination is inclusive or. | ||
27 | */ | ||
28 | +C_O0_I1(r) | ||
29 | C_O0_I2(r, r) | ||
30 | C_O0_I3(r, r, r) | ||
31 | C_O0_I4(r, r, r, r) | ||
32 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/tcg/tci/tcg-target.h | ||
35 | +++ b/tcg/tci/tcg-target.h | ||
36 | @@ -XXX,XX +XXX,XX @@ | ||
37 | #define TCG_TARGET_HAS_muls2_i32 0 | ||
38 | #define TCG_TARGET_HAS_muluh_i32 0 | ||
39 | #define TCG_TARGET_HAS_mulsh_i32 0 | ||
40 | -#define TCG_TARGET_HAS_goto_ptr 0 | ||
41 | +#define TCG_TARGET_HAS_goto_ptr 1 | ||
42 | #define TCG_TARGET_HAS_direct_jump 0 | ||
43 | #define TCG_TARGET_HAS_qemu_st8_i32 0 | ||
44 | |||
45 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/tcg/tcg.c | ||
48 | +++ b/tcg/tcg.c | ||
49 | @@ -XXX,XX +XXX,XX @@ void tcg_prologue_init(TCGContext *s) | ||
50 | } | 15 | } |
51 | #endif | 16 | #endif |
52 | 17 | /* See if we can patch the calling TB. */ | |
53 | - /* Assert that goto_ptr is implemented completely. */ | 18 | - if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { |
54 | +#ifndef CONFIG_TCG_INTERPRETER | 19 | + if (last_tb) { |
55 | + /* | 20 | tb_add_jump(last_tb, tb_exit, tb); |
56 | + * Assert that goto_ptr is implemented completely, setting an epilogue. | ||
57 | + * For tci, we use NULL as the signal to return from the interpreter, | ||
58 | + * so skip this check. | ||
59 | + */ | ||
60 | if (TCG_TARGET_HAS_goto_ptr) { | ||
61 | tcg_debug_assert(tcg_code_gen_epilogue != NULL); | ||
62 | } | 21 | } |
63 | +#endif | 22 | return tb; |
23 | diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/tcg/tcg-op.c | ||
26 | +++ b/tcg/tcg-op.c | ||
27 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_exit_tb(TranslationBlock *tb, unsigned idx) | ||
28 | seen this numbered exit before, via tcg_gen_goto_tb. */ | ||
29 | tcg_debug_assert(tcg_ctx->goto_tb_issue_mask & (1 << idx)); | ||
30 | #endif | ||
31 | + /* When not chaining, exit without indicating a link. */ | ||
32 | + if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { | ||
33 | + val = 0; | ||
34 | + } | ||
35 | } else { | ||
36 | /* This is an exit via the exitreq label. */ | ||
37 | tcg_debug_assert(idx == TB_EXIT_REQUESTED); | ||
38 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_goto_tb(unsigned idx) | ||
39 | tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0); | ||
40 | tcg_ctx->goto_tb_issue_mask |= 1 << idx; | ||
41 | #endif | ||
42 | - tcg_gen_op1i(INDEX_op_goto_tb, idx); | ||
43 | + /* When not chaining, we simply fall through to the "fallback" exit. */ | ||
44 | + if (!qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { | ||
45 | + tcg_gen_op1i(INDEX_op_goto_tb, idx); | ||
46 | + } | ||
64 | } | 47 | } |
65 | 48 | ||
66 | void tcg_func_start(TCGContext *s) | 49 | void tcg_gen_lookup_and_goto_ptr(void) |
67 | diff --git a/tcg/tci.c b/tcg/tci.c | ||
68 | index XXXXXXX..XXXXXXX 100644 | ||
69 | --- a/tcg/tci.c | ||
70 | +++ b/tcg/tci.c | ||
71 | @@ -XXX,XX +XXX,XX @@ static void tci_args_l(uint32_t insn, const void *tb_ptr, void **l0) | ||
72 | *l0 = diff ? (void *)tb_ptr + diff : NULL; | ||
73 | } | ||
74 | |||
75 | +static void tci_args_r(uint32_t insn, TCGReg *r0) | ||
76 | +{ | ||
77 | + *r0 = extract32(insn, 8, 4); | ||
78 | +} | ||
79 | + | ||
80 | static void tci_args_nl(uint32_t insn, const void *tb_ptr, | ||
81 | uint8_t *n0, void **l1) | ||
82 | { | ||
83 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
84 | tb_ptr = *(void **)ptr; | ||
85 | break; | ||
86 | |||
87 | + case INDEX_op_goto_ptr: | ||
88 | + tci_args_r(insn, &r0); | ||
89 | + ptr = (void *)regs[r0]; | ||
90 | + if (!ptr) { | ||
91 | + return 0; | ||
92 | + } | ||
93 | + tb_ptr = ptr; | ||
94 | + break; | ||
95 | + | ||
96 | case INDEX_op_qemu_ld_i32: | ||
97 | if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) { | ||
98 | tci_args_rrm(insn, &r0, &r1, &oi); | ||
99 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | ||
100 | info->fprintf_func(info->stream, "%-12s %p", op_name, ptr); | ||
101 | break; | ||
102 | |||
103 | + case INDEX_op_goto_ptr: | ||
104 | + tci_args_r(insn, &r0); | ||
105 | + info->fprintf_func(info->stream, "%-12s %s", op_name, str_r(r0)); | ||
106 | + break; | ||
107 | + | ||
108 | case INDEX_op_call: | ||
109 | tci_args_nl(insn, tb_ptr, &len, &ptr); | ||
110 | info->fprintf_func(info->stream, "%-12s %d, %p", op_name, len, ptr); | ||
111 | diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc | ||
112 | index XXXXXXX..XXXXXXX 100644 | ||
113 | --- a/tcg/tci/tcg-target.c.inc | ||
114 | +++ b/tcg/tci/tcg-target.c.inc | ||
115 | @@ -XXX,XX +XXX,XX @@ | ||
116 | static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
117 | { | ||
118 | switch (op) { | ||
119 | + case INDEX_op_goto_ptr: | ||
120 | + return C_O0_I1(r); | ||
121 | + | ||
122 | case INDEX_op_ld8u_i32: | ||
123 | case INDEX_op_ld8s_i32: | ||
124 | case INDEX_op_ld16u_i32: | ||
125 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op_p(TCGContext *s, TCGOpcode op, void *p0) | ||
126 | tcg_out32(s, insn); | ||
127 | } | ||
128 | |||
129 | +static void tcg_out_op_r(TCGContext *s, TCGOpcode op, TCGReg r0) | ||
130 | +{ | ||
131 | + tcg_insn_unit insn = 0; | ||
132 | + | ||
133 | + insn = deposit32(insn, 0, 8, op); | ||
134 | + insn = deposit32(insn, 8, 4, r0); | ||
135 | + tcg_out32(s, insn); | ||
136 | +} | ||
137 | + | ||
138 | static void tcg_out_op_v(TCGContext *s, TCGOpcode op) | ||
139 | { | ||
140 | tcg_out32(s, (uint8_t)op); | ||
141 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
142 | set_jmp_reset_offset(s, args[0]); | ||
143 | break; | ||
144 | |||
145 | + case INDEX_op_goto_ptr: | ||
146 | + tcg_out_op_r(s, opc, args[0]); | ||
147 | + break; | ||
148 | + | ||
149 | case INDEX_op_br: | ||
150 | tcg_out_op_l(s, opc, arg_label(args[0])); | ||
151 | break; | ||
152 | -- | 50 | -- |
153 | 2.25.1 | 51 | 2.17.2 |
154 | 52 | ||
155 | 53 | diff view generated by jsdifflib |
1 | This function should have been updated for vector types | 1 | From: "Emilio G. Cota" <cota@braap.org> |
---|---|---|---|
2 | when they were introduced. | ||
3 | 2 | ||
4 | Fixes: d2fd745fe8b | 3 | Consistently access u16.high with atomics to avoid |
5 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/367 | 4 | undefined behaviour in MTTCG. |
6 | Cc: qemu-stable@nongnu.org | 5 | |
7 | Tested-by: Stefan Weil <sw@weilnetz.de> | 6 | Note that icount_decr.u16.low is only used in icount mode, |
8 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 7 | so regular accesses to it are OK. |
8 | |||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
11 | Message-Id: <20181010144853.13005-2-cota@braap.org> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | --- | 13 | --- |
11 | tcg/tcg.c | 32 +++++++++++++++++++++++++++----- | 14 | accel/tcg/tcg-all.c | 2 +- |
12 | 1 file changed, 27 insertions(+), 5 deletions(-) | 15 | accel/tcg/translate-all.c | 2 +- |
16 | qom/cpu.c | 2 +- | ||
17 | 3 files changed, 3 insertions(+), 3 deletions(-) | ||
13 | 18 | ||
14 | diff --git a/tcg/tcg.c b/tcg/tcg.c | 19 | diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c |
15 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/tcg/tcg.c | 21 | --- a/accel/tcg/tcg-all.c |
17 | +++ b/tcg/tcg.c | 22 | +++ b/accel/tcg/tcg-all.c |
18 | @@ -XXX,XX +XXX,XX @@ static void check_regs(TCGContext *s) | 23 | @@ -XXX,XX +XXX,XX @@ static void tcg_handle_interrupt(CPUState *cpu, int mask) |
19 | 24 | if (!qemu_cpu_is_self(cpu)) { | |
20 | static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) | 25 | qemu_cpu_kick(cpu); |
26 | } else { | ||
27 | - cpu->icount_decr.u16.high = -1; | ||
28 | + atomic_set(&cpu->icount_decr.u16.high, -1); | ||
29 | if (use_icount && | ||
30 | !cpu->can_do_io | ||
31 | && (mask & ~old_mask) != 0) { | ||
32 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/accel/tcg/translate-all.c | ||
35 | +++ b/accel/tcg/translate-all.c | ||
36 | @@ -XXX,XX +XXX,XX @@ void cpu_interrupt(CPUState *cpu, int mask) | ||
21 | { | 37 | { |
22 | - if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > | 38 | g_assert(qemu_mutex_iothread_locked()); |
23 | - s->frame_end) { | 39 | cpu->interrupt_request |= mask; |
24 | - tcg_abort(); | 40 | - cpu->icount_decr.u16.high = -1; |
25 | + size_t size, align; | 41 | + atomic_set(&cpu->icount_decr.u16.high, -1); |
26 | + intptr_t off; | ||
27 | + | ||
28 | + switch (ts->type) { | ||
29 | + case TCG_TYPE_I32: | ||
30 | + size = align = 4; | ||
31 | + break; | ||
32 | + case TCG_TYPE_I64: | ||
33 | + case TCG_TYPE_V64: | ||
34 | + size = align = 8; | ||
35 | + break; | ||
36 | + case TCG_TYPE_V128: | ||
37 | + size = align = 16; | ||
38 | + break; | ||
39 | + case TCG_TYPE_V256: | ||
40 | + /* Note that we do not require aligned storage for V256. */ | ||
41 | + size = 32, align = 16; | ||
42 | + break; | ||
43 | + default: | ||
44 | + g_assert_not_reached(); | ||
45 | } | ||
46 | - ts->mem_offset = s->current_frame_offset; | ||
47 | + | ||
48 | + assert(align <= TCG_TARGET_STACK_ALIGN); | ||
49 | + off = ROUND_UP(s->current_frame_offset, align); | ||
50 | + assert(off + size <= s->frame_end); | ||
51 | + s->current_frame_offset = off + size; | ||
52 | + | ||
53 | + ts->mem_offset = off; | ||
54 | #if defined(__sparc__) | ||
55 | ts->mem_offset += TCG_TARGET_STACK_BIAS; | ||
56 | #endif | ||
57 | ts->mem_base = s->frame_temp; | ||
58 | ts->mem_allocated = 1; | ||
59 | - s->current_frame_offset += sizeof(tcg_target_long); | ||
60 | } | 42 | } |
61 | 43 | ||
62 | static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); | 44 | /* |
45 | diff --git a/qom/cpu.c b/qom/cpu.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/qom/cpu.c | ||
48 | +++ b/qom/cpu.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static void cpu_common_reset(CPUState *cpu) | ||
50 | cpu->mem_io_pc = 0; | ||
51 | cpu->mem_io_vaddr = 0; | ||
52 | cpu->icount_extra = 0; | ||
53 | - cpu->icount_decr.u32 = 0; | ||
54 | + atomic_set(&cpu->icount_decr.u32, 0); | ||
55 | cpu->can_do_io = 1; | ||
56 | cpu->exception_index = -1; | ||
57 | cpu->crash_occurred = false; | ||
63 | -- | 58 | -- |
64 | 2.25.1 | 59 | 2.17.2 |
65 | 60 | ||
66 | 61 | diff view generated by jsdifflib |
1 | Assume that we'll have fewer temps allocated after | 1 | From: "Emilio G. Cota" <cota@braap.org> |
---|---|---|---|
2 | restarting with a fewer number of instructions. | ||
3 | 2 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 3 | We forgot to initialize n in commit 15fa08f845 ("tcg: Dynamically |
4 | allocate TCGOps", 2017-12-29). | ||
5 | |||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
7 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
8 | Message-Id: <20181010144853.13005-3-cota@braap.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 10 | --- |
7 | tcg/tcg.c | 6 +++++- | 11 | tcg/tcg.c | 2 +- |
8 | 1 file changed, 5 insertions(+), 1 deletion(-) | 12 | 1 file changed, 1 insertion(+), 1 deletion(-) |
9 | 13 | ||
10 | diff --git a/tcg/tcg.c b/tcg/tcg.c | 14 | diff --git a/tcg/tcg.c b/tcg/tcg.c |
11 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tcg/tcg.c | 16 | --- a/tcg/tcg.c |
13 | +++ b/tcg/tcg.c | 17 | +++ b/tcg/tcg.c |
14 | @@ -XXX,XX +XXX,XX @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) | 18 | @@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) |
15 | 19 | ||
16 | assert(align <= TCG_TARGET_STACK_ALIGN); | 20 | #ifdef CONFIG_PROFILER |
17 | off = ROUND_UP(s->current_frame_offset, align); | 21 | { |
18 | - assert(off + size <= s->frame_end); | 22 | - int n; |
19 | + | 23 | + int n = 0; |
20 | + /* If we've exhausted the stack frame, restart with a smaller TB. */ | 24 | |
21 | + if (off + size > s->frame_end) { | 25 | QTAILQ_FOREACH(op, &s->ops, link) { |
22 | + tcg_raise_tb_overflow(s); | 26 | n++; |
23 | + } | ||
24 | s->current_frame_offset = off + size; | ||
25 | |||
26 | ts->mem_offset = off; | ||
27 | -- | 27 | -- |
28 | 2.25.1 | 28 | 2.17.2 |
29 | 29 | ||
30 | 30 | diff view generated by jsdifflib |
1 | From: Stefan Weil <sw@weilnetz.de> | 1 | From: "Emilio G. Cota" <cota@braap.org> |
---|---|---|---|
2 | 2 | ||
3 | The function is called with alignment == 0 which caused an assertion. | 3 | This plugs two 4-byte holes in 64-bit. |
4 | Use the code from oslib-posix.c to fix that regression. | ||
5 | 4 | ||
6 | Fixes: ed6f53f9ca9 | 5 | Signed-off-by: Emilio G. Cota <cota@braap.org> |
7 | Signed-off-by: Stefan Weil <sw@weilnetz.de> | 6 | Message-Id: <20181010144853.13005-4-cota@braap.org> |
8 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
9 | Message-Id: <20210611105846.347954-1-sw@weilnetz.de> | ||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
11 | --- | 8 | --- |
12 | util/oslib-win32.c | 6 +++++- | 9 | tcg/tcg.h | 2 +- |
13 | 1 file changed, 5 insertions(+), 1 deletion(-) | 10 | 1 file changed, 1 insertion(+), 1 deletion(-) |
14 | 11 | ||
15 | diff --git a/util/oslib-win32.c b/util/oslib-win32.c | 12 | diff --git a/tcg/tcg.h b/tcg/tcg.h |
16 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/util/oslib-win32.c | 14 | --- a/tcg/tcg.h |
18 | +++ b/util/oslib-win32.c | 15 | +++ b/tcg/tcg.h |
19 | @@ -XXX,XX +XXX,XX @@ void *qemu_try_memalign(size_t alignment, size_t size) | 16 | @@ -XXX,XX +XXX,XX @@ typedef struct TCGProfile { |
20 | void *ptr; | 17 | int64_t tb_count; |
21 | 18 | int64_t op_count; /* total insn count */ | |
22 | g_assert(size != 0); | 19 | int op_count_max; /* max insn per TB */ |
23 | - g_assert(is_power_of_2(alignment)); | 20 | - int64_t temp_count; |
24 | + if (alignment < sizeof(void *)) { | 21 | int temp_count_max; |
25 | + alignment = sizeof(void *); | 22 | + int64_t temp_count; |
26 | + } else { | 23 | int64_t del_op_count; |
27 | + g_assert(is_power_of_2(alignment)); | 24 | int64_t code_in_len; |
28 | + } | 25 | int64_t code_out_len; |
29 | ptr = _aligned_malloc(size, alignment); | ||
30 | trace_qemu_memalign(alignment, size, ptr); | ||
31 | return ptr; | ||
32 | -- | 26 | -- |
33 | 2.25.1 | 27 | 2.17.2 |
34 | 28 | ||
35 | 29 | diff view generated by jsdifflib |
1 | This requires adjusting where arguments are stored. | 1 | From: "Emilio G. Cota" <cota@braap.org> |
---|---|---|---|
2 | Place them on the stack at left-aligned positions. | ||
3 | Adjust the stack frame to be at entirely positive offsets. | ||
4 | 2 | ||
5 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 3 | When we implemented per-vCPU TCG contexts, we forgot to also |
6 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 4 | distribute the tcg_time counter, which has remained as a global |
5 | accessed without any serialization, leading to potentially missed | ||
6 | counts. | ||
7 | |||
8 | Fix it by distributing the field over the TCG contexts, embedding | ||
9 | it into TCGProfile with a field called "cpu_exec_time", which is more | ||
10 | descriptive than "tcg_time". Add a function to query this value | ||
11 | directly, and for completeness, fill in the field in | ||
12 | tcg_profile_snapshot, even though its callers do not use it. | ||
13 | |||
14 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
15 | Message-Id: <20181010144853.13005-5-cota@braap.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 16 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 17 | --- |
9 | include/tcg/tcg.h | 1 + | 18 | include/qemu/timer.h | 1 - |
10 | tcg/tci/tcg-target.h | 2 +- | 19 | tcg/tcg.h | 2 ++ |
11 | tcg/tcg.c | 64 +++++++++++++----- | 20 | cpus.c | 3 ++- |
12 | tcg/tci.c | 142 ++++++++++++++++++++++----------------- | 21 | monitor.c | 13 ++++++++++--- |
13 | tcg/tci/tcg-target.c.inc | 50 +++++++------- | 22 | tcg/tcg.c | 23 +++++++++++++++++++++++ |
14 | 5 files changed, 153 insertions(+), 106 deletions(-) | 23 | 5 files changed, 37 insertions(+), 5 deletions(-) |
15 | 24 | ||
16 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | 25 | diff --git a/include/qemu/timer.h b/include/qemu/timer.h |
17 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/include/tcg/tcg.h | 27 | --- a/include/qemu/timer.h |
19 | +++ b/include/tcg/tcg.h | 28 | +++ b/include/qemu/timer.h |
29 | @@ -XXX,XX +XXX,XX @@ static inline int64_t profile_getclock(void) | ||
30 | return get_clock(); | ||
31 | } | ||
32 | |||
33 | -extern int64_t tcg_time; | ||
34 | extern int64_t dev_time; | ||
35 | #endif | ||
36 | |||
37 | diff --git a/tcg/tcg.h b/tcg/tcg.h | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/tcg/tcg.h | ||
40 | +++ b/tcg/tcg.h | ||
41 | @@ -XXX,XX +XXX,XX @@ typedef struct TCGOp { | ||
42 | QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8)); | ||
43 | |||
44 | typedef struct TCGProfile { | ||
45 | + int64_t cpu_exec_time; | ||
46 | int64_t tb_count1; | ||
47 | int64_t tb_count; | ||
48 | int64_t op_count; /* total insn count */ | ||
49 | @@ -XXX,XX +XXX,XX @@ int tcg_check_temp_count(void); | ||
50 | #define tcg_check_temp_count() 0 | ||
51 | #endif | ||
52 | |||
53 | +int64_t tcg_cpu_exec_time(void); | ||
54 | void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf); | ||
55 | void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf); | ||
56 | |||
57 | diff --git a/cpus.c b/cpus.c | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/cpus.c | ||
60 | +++ b/cpus.c | ||
61 | @@ -XXX,XX +XXX,XX @@ static int tcg_cpu_exec(CPUState *cpu) | ||
62 | ret = cpu_exec(cpu); | ||
63 | cpu_exec_end(cpu); | ||
64 | #ifdef CONFIG_PROFILER | ||
65 | - tcg_time += profile_getclock() - ti; | ||
66 | + atomic_set(&tcg_ctx->prof.cpu_exec_time, | ||
67 | + tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti); | ||
68 | #endif | ||
69 | return ret; | ||
70 | } | ||
71 | diff --git a/monitor.c b/monitor.c | ||
72 | index XXXXXXX..XXXXXXX 100644 | ||
73 | --- a/monitor.c | ||
74 | +++ b/monitor.c | ||
20 | @@ -XXX,XX +XXX,XX @@ | 75 | @@ -XXX,XX +XXX,XX @@ |
21 | #define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS)) | 76 | #include "sysemu/cpus.h" |
22 | 77 | #include "sysemu/iothread.h" | |
23 | #define CPU_TEMP_BUF_NLONGS 128 | 78 | #include "qemu/cutils.h" |
24 | +#define TCG_STATIC_FRAME_SIZE (CPU_TEMP_BUF_NLONGS * sizeof(long)) | 79 | +#include "tcg/tcg.h" |
25 | 80 | ||
26 | /* Default target word size to pointer size. */ | 81 | #if defined(TARGET_S390X) |
27 | #ifndef TCG_TARGET_REG_BITS | 82 | #include "hw/s390x/storage-keys.h" |
28 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | 83 | @@ -XXX,XX +XXX,XX @@ static void hmp_info_numa(Monitor *mon, const QDict *qdict) |
29 | index XXXXXXX..XXXXXXX 100644 | 84 | |
30 | --- a/tcg/tci/tcg-target.h | 85 | #ifdef CONFIG_PROFILER |
31 | +++ b/tcg/tci/tcg-target.h | 86 | |
32 | @@ -XXX,XX +XXX,XX @@ typedef enum { | 87 | -int64_t tcg_time; |
33 | 88 | int64_t dev_time; | |
34 | /* Used for function call generation. */ | 89 | |
35 | #define TCG_TARGET_CALL_STACK_OFFSET 0 | 90 | static void hmp_info_profile(Monitor *mon, const QDict *qdict) |
36 | -#define TCG_TARGET_STACK_ALIGN 16 | 91 | { |
37 | +#define TCG_TARGET_STACK_ALIGN 8 | 92 | + static int64_t last_cpu_exec_time; |
38 | 93 | + int64_t cpu_exec_time; | |
39 | #define HAVE_TCG_QEMU_TB_EXEC | 94 | + int64_t delta; |
40 | 95 | + | |
96 | + cpu_exec_time = tcg_cpu_exec_time(); | ||
97 | + delta = cpu_exec_time - last_cpu_exec_time; | ||
98 | + | ||
99 | monitor_printf(mon, "async time %" PRId64 " (%0.3f)\n", | ||
100 | dev_time, dev_time / (double)NANOSECONDS_PER_SECOND); | ||
101 | monitor_printf(mon, "qemu time %" PRId64 " (%0.3f)\n", | ||
102 | - tcg_time, tcg_time / (double)NANOSECONDS_PER_SECOND); | ||
103 | - tcg_time = 0; | ||
104 | + delta, delta / (double)NANOSECONDS_PER_SECOND); | ||
105 | + last_cpu_exec_time = cpu_exec_time; | ||
106 | dev_time = 0; | ||
107 | } | ||
108 | #else | ||
41 | diff --git a/tcg/tcg.c b/tcg/tcg.c | 109 | diff --git a/tcg/tcg.c b/tcg/tcg.c |
42 | index XXXXXXX..XXXXXXX 100644 | 110 | index XXXXXXX..XXXXXXX 100644 |
43 | --- a/tcg/tcg.c | 111 | --- a/tcg/tcg.c |
44 | +++ b/tcg/tcg.c | 112 | +++ b/tcg/tcg.c |
45 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, | ||
46 | intptr_t arg2); | ||
47 | static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, | ||
48 | TCGReg base, intptr_t ofs); | ||
49 | +#ifdef CONFIG_TCG_INTERPRETER | ||
50 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, | ||
51 | + ffi_cif *cif); | ||
52 | +#else | ||
53 | static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target); | ||
54 | +#endif | ||
55 | static bool tcg_target_const_match(int64_t val, TCGType type, int ct); | ||
56 | #ifdef TCG_TARGET_NEED_LDST_LABELS | ||
57 | static int tcg_out_ldst_finalize(TCGContext *s); | ||
58 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
59 | for (i = 0; i < nargs; i++) { | ||
60 | int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
61 | bool is_64bit = (argtype & ~1) == dh_typecode_i64; | ||
62 | + bool want_align = false; | ||
63 | + | ||
64 | +#if defined(CONFIG_TCG_INTERPRETER) | ||
65 | + /* | ||
66 | + * Align all arguments, so that they land in predictable places | ||
67 | + * for passing off to ffi_call. | ||
68 | + */ | ||
69 | + want_align = true; | ||
70 | +#elif defined(TCG_TARGET_CALL_ALIGN_ARGS) | ||
71 | + /* Some targets want aligned 64 bit args */ | ||
72 | + want_align = is_64bit; | ||
73 | +#endif | ||
74 | + | ||
75 | + if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) { | ||
76 | + op->args[pi++] = TCG_CALL_DUMMY_ARG; | ||
77 | + real_args++; | ||
78 | + } | ||
79 | |||
80 | if (TCG_TARGET_REG_BITS < 64 && is_64bit) { | ||
81 | -#ifdef TCG_TARGET_CALL_ALIGN_ARGS | ||
82 | - /* some targets want aligned 64 bit args */ | ||
83 | - if (real_args & 1) { | ||
84 | - op->args[pi++] = TCG_CALL_DUMMY_ARG; | ||
85 | - real_args++; | ||
86 | - } | ||
87 | -#endif | ||
88 | - /* If stack grows up, then we will be placing successive | ||
89 | - arguments at lower addresses, which means we need to | ||
90 | - reverse the order compared to how we would normally | ||
91 | - treat either big or little-endian. For those arguments | ||
92 | - that will wind up in registers, this still works for | ||
93 | - HPPA (the only current STACK_GROWSUP target) since the | ||
94 | - argument registers are *also* allocated in decreasing | ||
95 | - order. If another such target is added, this logic may | ||
96 | - have to get more complicated to differentiate between | ||
97 | - stack arguments and register arguments. */ | ||
98 | + /* | ||
99 | + * If stack grows up, then we will be placing successive | ||
100 | + * arguments at lower addresses, which means we need to | ||
101 | + * reverse the order compared to how we would normally | ||
102 | + * treat either big or little-endian. For those arguments | ||
103 | + * that will wind up in registers, this still works for | ||
104 | + * HPPA (the only current STACK_GROWSUP target) since the | ||
105 | + * argument registers are *also* allocated in decreasing | ||
106 | + * order. If another such target is added, this logic may | ||
107 | + * have to get more complicated to differentiate between | ||
108 | + * stack arguments and register arguments. | ||
109 | + */ | ||
110 | #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) | ||
111 | op->args[pi++] = temp_arg(args[i] + 1); | ||
112 | op->args[pi++] = temp_arg(args[i]); | ||
113 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
114 | const int nb_oargs = TCGOP_CALLO(op); | ||
115 | const int nb_iargs = TCGOP_CALLI(op); | ||
116 | const TCGLifeData arg_life = op->life; | ||
117 | + const TCGHelperInfo *info; | ||
118 | int flags, nb_regs, i; | ||
119 | TCGReg reg; | ||
120 | TCGArg arg; | ||
121 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
122 | TCGRegSet allocated_regs; | ||
123 | |||
124 | func_addr = tcg_call_func(op); | ||
125 | - flags = tcg_call_flags(op); | ||
126 | + info = tcg_call_info(op); | ||
127 | + flags = info->flags; | ||
128 | |||
129 | nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); | ||
130 | if (nb_regs > nb_iargs) { | ||
131 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
132 | save_globals(s, allocated_regs); | ||
133 | } | ||
134 | |||
135 | +#ifdef CONFIG_TCG_INTERPRETER | ||
136 | + { | ||
137 | + gpointer hash = (gpointer)(uintptr_t)info->typemask; | ||
138 | + ffi_cif *cif = g_hash_table_lookup(ffi_table, hash); | ||
139 | + assert(cif != NULL); | ||
140 | + tcg_out_call(s, func_addr, cif); | ||
141 | + } | ||
142 | +#else | ||
143 | tcg_out_call(s, func_addr); | ||
144 | +#endif | ||
145 | |||
146 | /* assign output registers and emit moves if needed */ | ||
147 | for(i = 0; i < nb_oargs; i++) { | ||
148 | diff --git a/tcg/tci.c b/tcg/tci.c | ||
149 | index XXXXXXX..XXXXXXX 100644 | ||
150 | --- a/tcg/tci.c | ||
151 | +++ b/tcg/tci.c | ||
152 | @@ -XXX,XX +XXX,XX @@ | 113 | @@ -XXX,XX +XXX,XX @@ |
153 | */ | 114 | /* Define to jump the ELF file used to communicate with GDB. */ |
154 | 115 | #undef DEBUG_JIT | |
155 | #include "qemu/osdep.h" | 116 | |
156 | +#include "qemu-common.h" | 117 | +#include "qemu/error-report.h" |
157 | +#include "tcg/tcg.h" /* MAX_OPC_PARAM_IARGS */ | 118 | #include "qemu/cutils.h" |
158 | +#include "exec/cpu_ldst.h" | 119 | #include "qemu/host-utils.h" |
159 | +#include "tcg/tcg-op.h" | 120 | #include "qemu/timer.h" |
160 | +#include "qemu/compiler.h" | 121 | @@ -XXX,XX +XXX,XX @@ void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) |
161 | +#include <ffi.h> | 122 | const TCGProfile *orig = &s->prof; |
162 | 123 | ||
163 | -/* Enable TCI assertions only when debugging TCG (and without NDEBUG defined). | 124 | if (counters) { |
164 | - * Without assertions, the interpreter runs much faster. */ | 125 | + PROF_ADD(prof, orig, cpu_exec_time); |
165 | + | 126 | PROF_ADD(prof, orig, tb_count1); |
166 | +/* | 127 | PROF_ADD(prof, orig, tb_count); |
167 | + * Enable TCI assertions only when debugging TCG (and without NDEBUG defined). | 128 | PROF_ADD(prof, orig, op_count); |
168 | + * Without assertions, the interpreter runs much faster. | 129 | @@ -XXX,XX +XXX,XX @@ void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) |
169 | + */ | 130 | prof.table_op_count[i]); |
170 | #if defined(CONFIG_DEBUG_TCG) | ||
171 | # define tci_assert(cond) assert(cond) | ||
172 | #else | ||
173 | # define tci_assert(cond) ((void)(cond)) | ||
174 | #endif | ||
175 | |||
176 | -#include "qemu-common.h" | ||
177 | -#include "tcg/tcg.h" /* MAX_OPC_PARAM_IARGS */ | ||
178 | -#include "exec/cpu_ldst.h" | ||
179 | -#include "tcg/tcg-op.h" | ||
180 | -#include "qemu/compiler.h" | ||
181 | - | ||
182 | -#if MAX_OPC_PARAM_IARGS != 6 | ||
183 | -# error Fix needed, number of supported input arguments changed! | ||
184 | -#endif | ||
185 | -#if TCG_TARGET_REG_BITS == 32 | ||
186 | -typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong, | ||
187 | - tcg_target_ulong, tcg_target_ulong, | ||
188 | - tcg_target_ulong, tcg_target_ulong, | ||
189 | - tcg_target_ulong, tcg_target_ulong, | ||
190 | - tcg_target_ulong, tcg_target_ulong, | ||
191 | - tcg_target_ulong, tcg_target_ulong); | ||
192 | -#else | ||
193 | -typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong, | ||
194 | - tcg_target_ulong, tcg_target_ulong, | ||
195 | - tcg_target_ulong, tcg_target_ulong); | ||
196 | -#endif | ||
197 | - | ||
198 | __thread uintptr_t tci_tb_ptr; | ||
199 | |||
200 | -static tcg_target_ulong tci_read_reg(const tcg_target_ulong *regs, TCGReg index) | ||
201 | -{ | ||
202 | - tci_assert(index < TCG_TARGET_NB_REGS); | ||
203 | - return regs[index]; | ||
204 | -} | ||
205 | - | ||
206 | static void | ||
207 | tci_write_reg(tcg_target_ulong *regs, TCGReg index, tcg_target_ulong value) | ||
208 | { | ||
209 | @@ -XXX,XX +XXX,XX @@ static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr) | ||
210 | * I = immediate (tcg_target_ulong) | ||
211 | * l = label or pointer | ||
212 | * m = immediate (TCGMemOpIdx) | ||
213 | + * n = immediate (call return length) | ||
214 | * r = register | ||
215 | * s = signed ldst offset | ||
216 | */ | ||
217 | @@ -XXX,XX +XXX,XX @@ static void tci_args_l(const uint8_t **tb_ptr, void **l0) | ||
218 | check_size(start, tb_ptr); | ||
219 | } | ||
220 | |||
221 | +static void tci_args_nll(const uint8_t **tb_ptr, uint8_t *n0, | ||
222 | + void **l1, void **l2) | ||
223 | +{ | ||
224 | + const uint8_t *start = *tb_ptr; | ||
225 | + | ||
226 | + *n0 = tci_read_b(tb_ptr); | ||
227 | + *l1 = (void *)tci_read_label(tb_ptr); | ||
228 | + *l2 = (void *)tci_read_label(tb_ptr); | ||
229 | + | ||
230 | + check_size(start, tb_ptr); | ||
231 | +} | ||
232 | + | ||
233 | static void tci_args_rr(const uint8_t **tb_ptr, | ||
234 | TCGReg *r0, TCGReg *r1) | ||
235 | { | ||
236 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
237 | { | ||
238 | const uint8_t *tb_ptr = v_tb_ptr; | ||
239 | tcg_target_ulong regs[TCG_TARGET_NB_REGS]; | ||
240 | - long tcg_temps[CPU_TEMP_BUF_NLONGS]; | ||
241 | - uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS); | ||
242 | + uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE) | ||
243 | + / sizeof(uint64_t)]; | ||
244 | + void *call_slots[TCG_STATIC_CALL_ARGS_SIZE / sizeof(uint64_t)]; | ||
245 | |||
246 | regs[TCG_AREG0] = (tcg_target_ulong)env; | ||
247 | - regs[TCG_REG_CALL_STACK] = sp_value; | ||
248 | + regs[TCG_REG_CALL_STACK] = (uintptr_t)stack; | ||
249 | + /* Other call_slots entries initialized at first use (see below). */ | ||
250 | + call_slots[0] = NULL; | ||
251 | tci_assert(tb_ptr); | ||
252 | |||
253 | for (;;) { | ||
254 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
255 | #endif | ||
256 | TCGMemOpIdx oi; | ||
257 | int32_t ofs; | ||
258 | - void *ptr; | ||
259 | + void *ptr, *cif; | ||
260 | |||
261 | /* Skip opcode and size entry. */ | ||
262 | tb_ptr += 2; | ||
263 | |||
264 | switch (opc) { | ||
265 | case INDEX_op_call: | ||
266 | - tci_args_l(&tb_ptr, &ptr); | ||
267 | + /* | ||
268 | + * Set up the ffi_avalue array once, delayed until now | ||
269 | + * because many TB's do not make any calls. In tcg_gen_callN, | ||
270 | + * we arranged for every real argument to be "left-aligned" | ||
271 | + * in each 64-bit slot. | ||
272 | + */ | ||
273 | + if (unlikely(call_slots[0] == NULL)) { | ||
274 | + for (int i = 0; i < ARRAY_SIZE(call_slots); ++i) { | ||
275 | + call_slots[i] = &stack[i]; | ||
276 | + } | ||
277 | + } | ||
278 | + | ||
279 | + tci_args_nll(&tb_ptr, &len, &ptr, &cif); | ||
280 | + | ||
281 | + /* Helper functions may need to access the "return address" */ | ||
282 | tci_tb_ptr = (uintptr_t)tb_ptr; | ||
283 | -#if TCG_TARGET_REG_BITS == 32 | ||
284 | - tmp64 = ((helper_function)ptr)(tci_read_reg(regs, TCG_REG_R0), | ||
285 | - tci_read_reg(regs, TCG_REG_R1), | ||
286 | - tci_read_reg(regs, TCG_REG_R2), | ||
287 | - tci_read_reg(regs, TCG_REG_R3), | ||
288 | - tci_read_reg(regs, TCG_REG_R4), | ||
289 | - tci_read_reg(regs, TCG_REG_R5), | ||
290 | - tci_read_reg(regs, TCG_REG_R6), | ||
291 | - tci_read_reg(regs, TCG_REG_R7), | ||
292 | - tci_read_reg(regs, TCG_REG_R8), | ||
293 | - tci_read_reg(regs, TCG_REG_R9), | ||
294 | - tci_read_reg(regs, TCG_REG_R10), | ||
295 | - tci_read_reg(regs, TCG_REG_R11)); | ||
296 | - tci_write_reg(regs, TCG_REG_R0, tmp64); | ||
297 | - tci_write_reg(regs, TCG_REG_R1, tmp64 >> 32); | ||
298 | -#else | ||
299 | - tmp64 = ((helper_function)ptr)(tci_read_reg(regs, TCG_REG_R0), | ||
300 | - tci_read_reg(regs, TCG_REG_R1), | ||
301 | - tci_read_reg(regs, TCG_REG_R2), | ||
302 | - tci_read_reg(regs, TCG_REG_R3), | ||
303 | - tci_read_reg(regs, TCG_REG_R4), | ||
304 | - tci_read_reg(regs, TCG_REG_R5)); | ||
305 | - tci_write_reg(regs, TCG_REG_R0, tmp64); | ||
306 | -#endif | ||
307 | + | ||
308 | + ffi_call(cif, ptr, stack, call_slots); | ||
309 | + | ||
310 | + /* Any result winds up "left-aligned" in the stack[0] slot. */ | ||
311 | + switch (len) { | ||
312 | + case 0: /* void */ | ||
313 | + break; | ||
314 | + case 1: /* uint32_t */ | ||
315 | + /* | ||
316 | + * Note that libffi has an odd special case in that it will | ||
317 | + * always widen an integral result to ffi_arg. | ||
318 | + */ | ||
319 | + if (sizeof(ffi_arg) == 4) { | ||
320 | + regs[TCG_REG_R0] = *(uint32_t *)stack; | ||
321 | + break; | ||
322 | + } | ||
323 | + /* fall through */ | ||
324 | + case 2: /* uint64_t */ | ||
325 | + if (TCG_TARGET_REG_BITS == 32) { | ||
326 | + tci_write_reg64(regs, TCG_REG_R1, TCG_REG_R0, stack[0]); | ||
327 | + } else { | ||
328 | + regs[TCG_REG_R0] = stack[0]; | ||
329 | + } | ||
330 | + break; | ||
331 | + default: | ||
332 | + g_assert_not_reached(); | ||
333 | + } | ||
334 | break; | ||
335 | + | ||
336 | case INDEX_op_br: | ||
337 | tci_args_l(&tb_ptr, &ptr); | ||
338 | tb_ptr = ptr; | ||
339 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | ||
340 | TCGCond c; | ||
341 | TCGMemOpIdx oi; | ||
342 | uint8_t pos, len; | ||
343 | - void *ptr; | ||
344 | + void *ptr, *cif; | ||
345 | const uint8_t *tb_ptr; | ||
346 | |||
347 | status = info->read_memory_func(addr, buf, 2, info); | ||
348 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | ||
349 | |||
350 | switch (op) { | ||
351 | case INDEX_op_br: | ||
352 | - case INDEX_op_call: | ||
353 | case INDEX_op_exit_tb: | ||
354 | case INDEX_op_goto_tb: | ||
355 | tci_args_l(&tb_ptr, &ptr); | ||
356 | info->fprintf_func(info->stream, "%-12s %p", op_name, ptr); | ||
357 | break; | ||
358 | |||
359 | + case INDEX_op_call: | ||
360 | + tci_args_nll(&tb_ptr, &len, &ptr, &cif); | ||
361 | + info->fprintf_func(info->stream, "%-12s %d, %p, %p", | ||
362 | + op_name, len, ptr, cif); | ||
363 | + break; | ||
364 | + | ||
365 | case INDEX_op_brcond_i32: | ||
366 | case INDEX_op_brcond_i64: | ||
367 | tci_args_rrcl(&tb_ptr, &r0, &r1, &c, &ptr); | ||
368 | diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc | ||
369 | index XXXXXXX..XXXXXXX 100644 | ||
370 | --- a/tcg/tci/tcg-target.c.inc | ||
371 | +++ b/tcg/tci/tcg-target.c.inc | ||
372 | @@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = { | ||
373 | # error Fix needed, number of supported input arguments changed! | ||
374 | #endif | ||
375 | |||
376 | -static const int tcg_target_call_iarg_regs[] = { | ||
377 | - TCG_REG_R0, | ||
378 | - TCG_REG_R1, | ||
379 | - TCG_REG_R2, | ||
380 | - TCG_REG_R3, | ||
381 | - TCG_REG_R4, | ||
382 | - TCG_REG_R5, | ||
383 | -#if TCG_TARGET_REG_BITS == 32 | ||
384 | - /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */ | ||
385 | - TCG_REG_R6, | ||
386 | - TCG_REG_R7, | ||
387 | - TCG_REG_R8, | ||
388 | - TCG_REG_R9, | ||
389 | - TCG_REG_R10, | ||
390 | - TCG_REG_R11, | ||
391 | -#endif | ||
392 | -}; | ||
393 | +/* No call arguments via registers. All will be stored on the "stack". */ | ||
394 | +static const int tcg_target_call_iarg_regs[] = { }; | ||
395 | |||
396 | static const int tcg_target_call_oarg_regs[] = { | ||
397 | TCG_REG_R0, | ||
398 | @@ -XXX,XX +XXX,XX @@ static void tci_out_label(TCGContext *s, TCGLabel *label) | ||
399 | static void stack_bounds_check(TCGReg base, target_long offset) | ||
400 | { | ||
401 | if (base == TCG_REG_CALL_STACK) { | ||
402 | - tcg_debug_assert(offset < 0); | ||
403 | - tcg_debug_assert(offset >= -(CPU_TEMP_BUF_NLONGS * sizeof(long))); | ||
404 | + tcg_debug_assert(offset >= 0); | ||
405 | + tcg_debug_assert(offset < (TCG_STATIC_CALL_ARGS_SIZE + | ||
406 | + TCG_STATIC_FRAME_SIZE)); | ||
407 | } | 131 | } |
408 | } | 132 | } |
409 | 133 | + | |
410 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, | 134 | +int64_t tcg_cpu_exec_time(void) |
411 | } | 135 | +{ |
136 | + unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); | ||
137 | + unsigned int i; | ||
138 | + int64_t ret = 0; | ||
139 | + | ||
140 | + for (i = 0; i < n_ctxs; i++) { | ||
141 | + const TCGContext *s = atomic_read(&tcg_ctxs[i]); | ||
142 | + const TCGProfile *prof = &s->prof; | ||
143 | + | ||
144 | + ret += atomic_read(&prof->cpu_exec_time); | ||
145 | + } | ||
146 | + return ret; | ||
147 | +} | ||
148 | #else | ||
149 | void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) | ||
150 | { | ||
151 | cpu_fprintf(f, "[TCG profiler not compiled]\n"); | ||
412 | } | 152 | } |
413 | |||
414 | -static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) | ||
415 | +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func, | ||
416 | + ffi_cif *cif) | ||
417 | { | ||
418 | uint8_t *old_code_ptr = s->code_ptr; | ||
419 | + uint8_t which; | ||
420 | + | 153 | + |
421 | + if (cif->rtype == &ffi_type_void) { | 154 | +int64_t tcg_cpu_exec_time(void) |
422 | + which = 0; | 155 | +{ |
423 | + } else if (cif->rtype->size == 4) { | 156 | + error_report("%s: TCG profiler not compiled", __func__); |
424 | + which = 1; | 157 | + exit(EXIT_FAILURE); |
425 | + } else { | 158 | +} |
426 | + tcg_debug_assert(cif->rtype->size == 8); | 159 | #endif |
427 | + which = 2; | 160 | |
428 | + } | 161 | |
429 | tcg_out_op_t(s, INDEX_op_call); | ||
430 | - tcg_out_i(s, (uintptr_t)arg); | ||
431 | + tcg_out8(s, which); | ||
432 | + tcg_out_i(s, (uintptr_t)func); | ||
433 | + tcg_out_i(s, (uintptr_t)cif); | ||
434 | + | ||
435 | old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
436 | } | ||
437 | |||
438 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
439 | s->reserved_regs = 0; | ||
440 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); | ||
441 | |||
442 | - /* We use negative offsets from "sp" so that we can distinguish | ||
443 | - stores that might pretend to be call arguments. */ | ||
444 | - tcg_set_frame(s, TCG_REG_CALL_STACK, | ||
445 | - -CPU_TEMP_BUF_NLONGS * sizeof(long), | ||
446 | - CPU_TEMP_BUF_NLONGS * sizeof(long)); | ||
447 | + /* The call arguments come first, followed by the temp storage. */ | ||
448 | + tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, | ||
449 | + TCG_STATIC_FRAME_SIZE); | ||
450 | } | ||
451 | |||
452 | /* Generate global QEMU prologue and epilogue code. */ | ||
453 | -- | 162 | -- |
454 | 2.25.1 | 163 | 2.17.2 |
455 | 164 | ||
456 | 165 | diff view generated by jsdifflib |
1 | From: Alessandro Di Federico <ale@rev.ng> | 1 | From: "Emilio G. Cota" <cota@braap.org> |
---|---|---|---|
2 | 2 | ||
3 | This commit moves into a separate file routines used to manipulate | 3 | As far as I can tell tlb_flush does not need to be called |
4 | TCGCond. These will be employed by the idef-parser. | 4 | this early. tlb_flush is eventually called after the CPU |
5 | has been realized. | ||
5 | 6 | ||
6 | Signed-off-by: Alessandro Di Federico <ale@rev.ng> | 7 | This change paves the way to the introduction of tlb_init, |
7 | Signed-off-by: Paolo Montesel <babush@rev.ng> | 8 | which will be called from cpu_exec_realizefn. |
8 | Message-Id: <20210619093713.1845446-2-ale.qemu@rev.ng> | 9 | |
10 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
13 | Message-Id: <20181009174557.16125-2-cota@braap.org> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 14 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | --- | 15 | --- |
11 | include/tcg/tcg-cond.h | 101 +++++++++++++++++++++++++++++++++++++++++ | 16 | target/alpha/cpu.c | 1 - |
12 | include/tcg/tcg.h | 70 +--------------------------- | 17 | 1 file changed, 1 deletion(-) |
13 | 2 files changed, 102 insertions(+), 69 deletions(-) | ||
14 | create mode 100644 include/tcg/tcg-cond.h | ||
15 | 18 | ||
16 | diff --git a/include/tcg/tcg-cond.h b/include/tcg/tcg-cond.h | 19 | diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c |
17 | new file mode 100644 | ||
18 | index XXXXXXX..XXXXXXX | ||
19 | --- /dev/null | ||
20 | +++ b/include/tcg/tcg-cond.h | ||
21 | @@ -XXX,XX +XXX,XX @@ | ||
22 | +/* | ||
23 | + * Tiny Code Generator for QEMU | ||
24 | + * | ||
25 | + * Copyright (c) 2008 Fabrice Bellard | ||
26 | + * | ||
27 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
28 | + * of this software and associated documentation files (the "Software"), to deal | ||
29 | + * in the Software without restriction, including without limitation the rights | ||
30 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
31 | + * copies of the Software, and to permit persons to whom the Software is | ||
32 | + * furnished to do so, subject to the following conditions: | ||
33 | + * | ||
34 | + * The above copyright notice and this permission notice shall be included in | ||
35 | + * all copies or substantial portions of the Software. | ||
36 | + * | ||
37 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
38 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
39 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
40 | + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
41 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
42 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
43 | + * THE SOFTWARE. | ||
44 | + */ | ||
45 | + | ||
46 | +#ifndef TCG_COND_H | ||
47 | +#define TCG_COND_H | ||
48 | + | ||
49 | +/* | ||
50 | + * Conditions. Note that these are laid out for easy manipulation by | ||
51 | + * the functions below: | ||
52 | + * bit 0 is used for inverting; | ||
53 | + * bit 1 is signed, | ||
54 | + * bit 2 is unsigned, | ||
55 | + * bit 3 is used with bit 0 for swapping signed/unsigned. | ||
56 | + */ | ||
57 | +typedef enum { | ||
58 | + /* non-signed */ | ||
59 | + TCG_COND_NEVER = 0 | 0 | 0 | 0, | ||
60 | + TCG_COND_ALWAYS = 0 | 0 | 0 | 1, | ||
61 | + TCG_COND_EQ = 8 | 0 | 0 | 0, | ||
62 | + TCG_COND_NE = 8 | 0 | 0 | 1, | ||
63 | + /* signed */ | ||
64 | + TCG_COND_LT = 0 | 0 | 2 | 0, | ||
65 | + TCG_COND_GE = 0 | 0 | 2 | 1, | ||
66 | + TCG_COND_LE = 8 | 0 | 2 | 0, | ||
67 | + TCG_COND_GT = 8 | 0 | 2 | 1, | ||
68 | + /* unsigned */ | ||
69 | + TCG_COND_LTU = 0 | 4 | 0 | 0, | ||
70 | + TCG_COND_GEU = 0 | 4 | 0 | 1, | ||
71 | + TCG_COND_LEU = 8 | 4 | 0 | 0, | ||
72 | + TCG_COND_GTU = 8 | 4 | 0 | 1, | ||
73 | +} TCGCond; | ||
74 | + | ||
75 | +/* Invert the sense of the comparison. */ | ||
76 | +static inline TCGCond tcg_invert_cond(TCGCond c) | ||
77 | +{ | ||
78 | + return (TCGCond)(c ^ 1); | ||
79 | +} | ||
80 | + | ||
81 | +/* Swap the operands in a comparison. */ | ||
82 | +static inline TCGCond tcg_swap_cond(TCGCond c) | ||
83 | +{ | ||
84 | + return c & 6 ? (TCGCond)(c ^ 9) : c; | ||
85 | +} | ||
86 | + | ||
87 | +/* Create an "unsigned" version of a "signed" comparison. */ | ||
88 | +static inline TCGCond tcg_unsigned_cond(TCGCond c) | ||
89 | +{ | ||
90 | + return c & 2 ? (TCGCond)(c ^ 6) : c; | ||
91 | +} | ||
92 | + | ||
93 | +/* Create a "signed" version of an "unsigned" comparison. */ | ||
94 | +static inline TCGCond tcg_signed_cond(TCGCond c) | ||
95 | +{ | ||
96 | + return c & 4 ? (TCGCond)(c ^ 6) : c; | ||
97 | +} | ||
98 | + | ||
99 | +/* Must a comparison be considered unsigned? */ | ||
100 | +static inline bool is_unsigned_cond(TCGCond c) | ||
101 | +{ | ||
102 | + return (c & 4) != 0; | ||
103 | +} | ||
104 | + | ||
105 | +/* | ||
106 | + * Create a "high" version of a double-word comparison. | ||
107 | + * This removes equality from a LTE or GTE comparison. | ||
108 | + */ | ||
109 | +static inline TCGCond tcg_high_cond(TCGCond c) | ||
110 | +{ | ||
111 | + switch (c) { | ||
112 | + case TCG_COND_GE: | ||
113 | + case TCG_COND_LE: | ||
114 | + case TCG_COND_GEU: | ||
115 | + case TCG_COND_LEU: | ||
116 | + return (TCGCond)(c ^ 8); | ||
117 | + default: | ||
118 | + return c; | ||
119 | + } | ||
120 | +} | ||
121 | + | ||
122 | +#endif /* TCG_COND_H */ | ||
123 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h | ||
124 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
125 | --- a/include/tcg/tcg.h | 21 | --- a/target/alpha/cpu.c |
126 | +++ b/include/tcg/tcg.h | 22 | +++ b/target/alpha/cpu.c |
127 | @@ -XXX,XX +XXX,XX @@ | 23 | @@ -XXX,XX +XXX,XX @@ static void alpha_cpu_initfn(Object *obj) |
128 | #include "tcg/tcg-mo.h" | 24 | CPUAlphaState *env = &cpu->env; |
129 | #include "tcg-target.h" | 25 | |
130 | #include "qemu/int128.h" | 26 | cs->env_ptr = env; |
131 | +#include "tcg/tcg-cond.h" | 27 | - tlb_flush(cs); |
132 | 28 | ||
133 | /* XXX: make safe guess about sizes */ | 29 | env->lock_addr = -1; |
134 | #define MAX_OP_PER_INSTR 266 | 30 | #if defined(CONFIG_USER_ONLY) |
135 | @@ -XXX,XX +XXX,XX @@ typedef TCGv_ptr TCGv_env; | ||
136 | /* Used to align parameters. See the comment before tcgv_i32_temp. */ | ||
137 | #define TCG_CALL_DUMMY_ARG ((TCGArg)0) | ||
138 | |||
139 | -/* Conditions. Note that these are laid out for easy manipulation by | ||
140 | - the functions below: | ||
141 | - bit 0 is used for inverting; | ||
142 | - bit 1 is signed, | ||
143 | - bit 2 is unsigned, | ||
144 | - bit 3 is used with bit 0 for swapping signed/unsigned. */ | ||
145 | -typedef enum { | ||
146 | - /* non-signed */ | ||
147 | - TCG_COND_NEVER = 0 | 0 | 0 | 0, | ||
148 | - TCG_COND_ALWAYS = 0 | 0 | 0 | 1, | ||
149 | - TCG_COND_EQ = 8 | 0 | 0 | 0, | ||
150 | - TCG_COND_NE = 8 | 0 | 0 | 1, | ||
151 | - /* signed */ | ||
152 | - TCG_COND_LT = 0 | 0 | 2 | 0, | ||
153 | - TCG_COND_GE = 0 | 0 | 2 | 1, | ||
154 | - TCG_COND_LE = 8 | 0 | 2 | 0, | ||
155 | - TCG_COND_GT = 8 | 0 | 2 | 1, | ||
156 | - /* unsigned */ | ||
157 | - TCG_COND_LTU = 0 | 4 | 0 | 0, | ||
158 | - TCG_COND_GEU = 0 | 4 | 0 | 1, | ||
159 | - TCG_COND_LEU = 8 | 4 | 0 | 0, | ||
160 | - TCG_COND_GTU = 8 | 4 | 0 | 1, | ||
161 | -} TCGCond; | ||
162 | - | ||
163 | -/* Invert the sense of the comparison. */ | ||
164 | -static inline TCGCond tcg_invert_cond(TCGCond c) | ||
165 | -{ | ||
166 | - return (TCGCond)(c ^ 1); | ||
167 | -} | ||
168 | - | ||
169 | -/* Swap the operands in a comparison. */ | ||
170 | -static inline TCGCond tcg_swap_cond(TCGCond c) | ||
171 | -{ | ||
172 | - return c & 6 ? (TCGCond)(c ^ 9) : c; | ||
173 | -} | ||
174 | - | ||
175 | -/* Create an "unsigned" version of a "signed" comparison. */ | ||
176 | -static inline TCGCond tcg_unsigned_cond(TCGCond c) | ||
177 | -{ | ||
178 | - return c & 2 ? (TCGCond)(c ^ 6) : c; | ||
179 | -} | ||
180 | - | ||
181 | -/* Create a "signed" version of an "unsigned" comparison. */ | ||
182 | -static inline TCGCond tcg_signed_cond(TCGCond c) | ||
183 | -{ | ||
184 | - return c & 4 ? (TCGCond)(c ^ 6) : c; | ||
185 | -} | ||
186 | - | ||
187 | -/* Must a comparison be considered unsigned? */ | ||
188 | -static inline bool is_unsigned_cond(TCGCond c) | ||
189 | -{ | ||
190 | - return (c & 4) != 0; | ||
191 | -} | ||
192 | - | ||
193 | -/* Create a "high" version of a double-word comparison. | ||
194 | - This removes equality from a LTE or GTE comparison. */ | ||
195 | -static inline TCGCond tcg_high_cond(TCGCond c) | ||
196 | -{ | ||
197 | - switch (c) { | ||
198 | - case TCG_COND_GE: | ||
199 | - case TCG_COND_LE: | ||
200 | - case TCG_COND_GEU: | ||
201 | - case TCG_COND_LEU: | ||
202 | - return (TCGCond)(c ^ 8); | ||
203 | - default: | ||
204 | - return c; | ||
205 | - } | ||
206 | -} | ||
207 | - | ||
208 | typedef enum TCGTempVal { | ||
209 | TEMP_VAL_DEAD, | ||
210 | TEMP_VAL_REG, | ||
211 | -- | 31 | -- |
212 | 2.25.1 | 32 | 2.17.2 |
213 | 33 | ||
214 | 34 | diff view generated by jsdifflib |
1 | These macros are only used in one place. By expanding, | 1 | From: "Emilio G. Cota" <cota@braap.org> |
---|---|---|---|
2 | we get to apply some common-subexpression elimination | ||
3 | and create some local variables. | ||
4 | 2 | ||
5 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 3 | As far as I can tell tlb_flush does not need to be called |
6 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 4 | this early. tlb_flush is eventually called after the CPU |
5 | has been realized. | ||
6 | |||
7 | This change paves the way to the introduction of tlb_init, | ||
8 | which will be called from cpu_exec_realizefn. | ||
9 | |||
10 | Cc: Guan Xuetao <gxt@mprc.pku.edu.cn> | ||
11 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
14 | Message-Id: <20181009174557.16125-3-cota@braap.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 15 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 16 | --- |
9 | tcg/tci.c | 165 +++++++++++++++++++++++++++++++++--------------------- | 17 | target/unicore32/cpu.c | 2 -- |
10 | 1 file changed, 100 insertions(+), 65 deletions(-) | 18 | 1 file changed, 2 deletions(-) |
11 | 19 | ||
12 | diff --git a/tcg/tci.c b/tcg/tci.c | 20 | diff --git a/target/unicore32/cpu.c b/target/unicore32/cpu.c |
13 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/tcg/tci.c | 22 | --- a/target/unicore32/cpu.c |
15 | +++ b/tcg/tci.c | 23 | +++ b/target/unicore32/cpu.c |
16 | @@ -XXX,XX +XXX,XX @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) | 24 | @@ -XXX,XX +XXX,XX @@ static void uc32_cpu_initfn(Object *obj) |
17 | return result; | 25 | env->uncached_asr = ASR_MODE_PRIV; |
26 | env->regs[31] = 0x03000000; | ||
27 | #endif | ||
28 | - | ||
29 | - tlb_flush(cs); | ||
18 | } | 30 | } |
19 | 31 | ||
20 | -#ifdef CONFIG_SOFTMMU | 32 | static const VMStateDescription vmstate_uc32_cpu = { |
21 | -# define qemu_ld_ub \ | ||
22 | - helper_ret_ldub_mmu(env, taddr, oi, (uintptr_t)tb_ptr) | ||
23 | -# define qemu_ld_leuw \ | ||
24 | - helper_le_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr) | ||
25 | -# define qemu_ld_leul \ | ||
26 | - helper_le_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr) | ||
27 | -# define qemu_ld_leq \ | ||
28 | - helper_le_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr) | ||
29 | -# define qemu_ld_beuw \ | ||
30 | - helper_be_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr) | ||
31 | -# define qemu_ld_beul \ | ||
32 | - helper_be_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr) | ||
33 | -# define qemu_ld_beq \ | ||
34 | - helper_be_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr) | ||
35 | -# define qemu_st_b(X) \ | ||
36 | - helper_ret_stb_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) | ||
37 | -# define qemu_st_lew(X) \ | ||
38 | - helper_le_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) | ||
39 | -# define qemu_st_lel(X) \ | ||
40 | - helper_le_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) | ||
41 | -# define qemu_st_leq(X) \ | ||
42 | - helper_le_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) | ||
43 | -# define qemu_st_bew(X) \ | ||
44 | - helper_be_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) | ||
45 | -# define qemu_st_bel(X) \ | ||
46 | - helper_be_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) | ||
47 | -# define qemu_st_beq(X) \ | ||
48 | - helper_be_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) | ||
49 | -#else | ||
50 | -# define qemu_ld_ub ldub_p(g2h(env_cpu(env), taddr)) | ||
51 | -# define qemu_ld_leuw lduw_le_p(g2h(env_cpu(env), taddr)) | ||
52 | -# define qemu_ld_leul (uint32_t)ldl_le_p(g2h(env_cpu(env), taddr)) | ||
53 | -# define qemu_ld_leq ldq_le_p(g2h(env_cpu(env), taddr)) | ||
54 | -# define qemu_ld_beuw lduw_be_p(g2h(env_cpu(env), taddr)) | ||
55 | -# define qemu_ld_beul (uint32_t)ldl_be_p(g2h(env_cpu(env), taddr)) | ||
56 | -# define qemu_ld_beq ldq_be_p(g2h(env_cpu(env), taddr)) | ||
57 | -# define qemu_st_b(X) stb_p(g2h(env_cpu(env), taddr), X) | ||
58 | -# define qemu_st_lew(X) stw_le_p(g2h(env_cpu(env), taddr), X) | ||
59 | -# define qemu_st_lel(X) stl_le_p(g2h(env_cpu(env), taddr), X) | ||
60 | -# define qemu_st_leq(X) stq_le_p(g2h(env_cpu(env), taddr), X) | ||
61 | -# define qemu_st_bew(X) stw_be_p(g2h(env_cpu(env), taddr), X) | ||
62 | -# define qemu_st_bel(X) stl_be_p(g2h(env_cpu(env), taddr), X) | ||
63 | -# define qemu_st_beq(X) stq_be_p(g2h(env_cpu(env), taddr), X) | ||
64 | -#endif | ||
65 | - | ||
66 | static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr, | ||
67 | TCGMemOpIdx oi, const void *tb_ptr) | ||
68 | { | ||
69 | MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE); | ||
70 | |||
71 | +#ifdef CONFIG_SOFTMMU | ||
72 | + uintptr_t ra = (uintptr_t)tb_ptr; | ||
73 | + | ||
74 | switch (mop) { | ||
75 | case MO_UB: | ||
76 | - return qemu_ld_ub; | ||
77 | + return helper_ret_ldub_mmu(env, taddr, oi, ra); | ||
78 | case MO_SB: | ||
79 | - return (int8_t)qemu_ld_ub; | ||
80 | + return helper_ret_ldsb_mmu(env, taddr, oi, ra); | ||
81 | case MO_LEUW: | ||
82 | - return qemu_ld_leuw; | ||
83 | + return helper_le_lduw_mmu(env, taddr, oi, ra); | ||
84 | case MO_LESW: | ||
85 | - return (int16_t)qemu_ld_leuw; | ||
86 | + return helper_le_ldsw_mmu(env, taddr, oi, ra); | ||
87 | case MO_LEUL: | ||
88 | - return qemu_ld_leul; | ||
89 | + return helper_le_ldul_mmu(env, taddr, oi, ra); | ||
90 | case MO_LESL: | ||
91 | - return (int32_t)qemu_ld_leul; | ||
92 | + return helper_le_ldsl_mmu(env, taddr, oi, ra); | ||
93 | case MO_LEQ: | ||
94 | - return qemu_ld_leq; | ||
95 | + return helper_le_ldq_mmu(env, taddr, oi, ra); | ||
96 | case MO_BEUW: | ||
97 | - return qemu_ld_beuw; | ||
98 | + return helper_be_lduw_mmu(env, taddr, oi, ra); | ||
99 | case MO_BESW: | ||
100 | - return (int16_t)qemu_ld_beuw; | ||
101 | + return helper_be_ldsw_mmu(env, taddr, oi, ra); | ||
102 | case MO_BEUL: | ||
103 | - return qemu_ld_beul; | ||
104 | + return helper_be_ldul_mmu(env, taddr, oi, ra); | ||
105 | case MO_BESL: | ||
106 | - return (int32_t)qemu_ld_beul; | ||
107 | + return helper_be_ldsl_mmu(env, taddr, oi, ra); | ||
108 | case MO_BEQ: | ||
109 | - return qemu_ld_beq; | ||
110 | + return helper_be_ldq_mmu(env, taddr, oi, ra); | ||
111 | default: | ||
112 | g_assert_not_reached(); | ||
113 | } | ||
114 | +#else | ||
115 | + void *haddr = g2h(env_cpu(env), taddr); | ||
116 | + uint64_t ret; | ||
117 | + | ||
118 | + switch (mop) { | ||
119 | + case MO_UB: | ||
120 | + ret = ldub_p(haddr); | ||
121 | + break; | ||
122 | + case MO_SB: | ||
123 | + ret = ldsb_p(haddr); | ||
124 | + break; | ||
125 | + case MO_LEUW: | ||
126 | + ret = lduw_le_p(haddr); | ||
127 | + break; | ||
128 | + case MO_LESW: | ||
129 | + ret = ldsw_le_p(haddr); | ||
130 | + break; | ||
131 | + case MO_LEUL: | ||
132 | + ret = (uint32_t)ldl_le_p(haddr); | ||
133 | + break; | ||
134 | + case MO_LESL: | ||
135 | + ret = (int32_t)ldl_le_p(haddr); | ||
136 | + break; | ||
137 | + case MO_LEQ: | ||
138 | + ret = ldq_le_p(haddr); | ||
139 | + break; | ||
140 | + case MO_BEUW: | ||
141 | + ret = lduw_be_p(haddr); | ||
142 | + break; | ||
143 | + case MO_BESW: | ||
144 | + ret = ldsw_be_p(haddr); | ||
145 | + break; | ||
146 | + case MO_BEUL: | ||
147 | + ret = (uint32_t)ldl_be_p(haddr); | ||
148 | + break; | ||
149 | + case MO_BESL: | ||
150 | + ret = (int32_t)ldl_be_p(haddr); | ||
151 | + break; | ||
152 | + case MO_BEQ: | ||
153 | + ret = ldq_be_p(haddr); | ||
154 | + break; | ||
155 | + default: | ||
156 | + g_assert_not_reached(); | ||
157 | + } | ||
158 | + return ret; | ||
159 | +#endif | ||
160 | } | ||
161 | |||
162 | static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val, | ||
163 | @@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val, | ||
164 | { | ||
165 | MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE); | ||
166 | |||
167 | +#ifdef CONFIG_SOFTMMU | ||
168 | + uintptr_t ra = (uintptr_t)tb_ptr; | ||
169 | + | ||
170 | switch (mop) { | ||
171 | case MO_UB: | ||
172 | - qemu_st_b(val); | ||
173 | + helper_ret_stb_mmu(env, taddr, val, oi, ra); | ||
174 | break; | ||
175 | case MO_LEUW: | ||
176 | - qemu_st_lew(val); | ||
177 | + helper_le_stw_mmu(env, taddr, val, oi, ra); | ||
178 | break; | ||
179 | case MO_LEUL: | ||
180 | - qemu_st_lel(val); | ||
181 | + helper_le_stl_mmu(env, taddr, val, oi, ra); | ||
182 | break; | ||
183 | case MO_LEQ: | ||
184 | - qemu_st_leq(val); | ||
185 | + helper_le_stq_mmu(env, taddr, val, oi, ra); | ||
186 | break; | ||
187 | case MO_BEUW: | ||
188 | - qemu_st_bew(val); | ||
189 | + helper_be_stw_mmu(env, taddr, val, oi, ra); | ||
190 | break; | ||
191 | case MO_BEUL: | ||
192 | - qemu_st_bel(val); | ||
193 | + helper_be_stl_mmu(env, taddr, val, oi, ra); | ||
194 | break; | ||
195 | case MO_BEQ: | ||
196 | - qemu_st_beq(val); | ||
197 | + helper_be_stq_mmu(env, taddr, val, oi, ra); | ||
198 | break; | ||
199 | default: | ||
200 | g_assert_not_reached(); | ||
201 | } | ||
202 | +#else | ||
203 | + void *haddr = g2h(env_cpu(env), taddr); | ||
204 | + | ||
205 | + switch (mop) { | ||
206 | + case MO_UB: | ||
207 | + stb_p(haddr, val); | ||
208 | + break; | ||
209 | + case MO_LEUW: | ||
210 | + stw_le_p(haddr, val); | ||
211 | + break; | ||
212 | + case MO_LEUL: | ||
213 | + stl_le_p(haddr, val); | ||
214 | + break; | ||
215 | + case MO_LEQ: | ||
216 | + stq_le_p(haddr, val); | ||
217 | + break; | ||
218 | + case MO_BEUW: | ||
219 | + stw_be_p(haddr, val); | ||
220 | + break; | ||
221 | + case MO_BEUL: | ||
222 | + stl_be_p(haddr, val); | ||
223 | + break; | ||
224 | + case MO_BEQ: | ||
225 | + stq_be_p(haddr, val); | ||
226 | + break; | ||
227 | + default: | ||
228 | + g_assert_not_reached(); | ||
229 | + } | ||
230 | +#endif | ||
231 | } | ||
232 | |||
233 | #if TCG_TARGET_REG_BITS == 64 | ||
234 | -- | 33 | -- |
235 | 2.25.1 | 34 | 2.17.2 |
236 | 35 | ||
237 | 36 | diff view generated by jsdifflib |
1 | We can share this code between 32-bit and 64-bit loads and stores. | 1 | From: "Emilio G. Cota" <cota@braap.org> |
---|---|---|---|
2 | 2 | ||
3 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 3 | Paves the way for the addition of a per-TLB lock. |
4 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 4 | |
5 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
8 | Message-Id: <20181009174557.16125-4-cota@braap.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 10 | --- |
7 | tcg/tci.c | 183 +++++++++++++++++++++--------------------------------- | 11 | include/exec/exec-all.h | 8 ++++++++ |
8 | 1 file changed, 71 insertions(+), 112 deletions(-) | 12 | accel/tcg/cputlb.c | 4 ++++ |
13 | exec.c | 1 + | ||
14 | 3 files changed, 13 insertions(+) | ||
9 | 15 | ||
10 | diff --git a/tcg/tci.c b/tcg/tci.c | 16 | diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h |
11 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tcg/tci.c | 18 | --- a/include/exec/exec-all.h |
13 | +++ b/tcg/tci.c | 19 | +++ b/include/exec/exec-all.h |
14 | @@ -XXX,XX +XXX,XX @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) | 20 | @@ -XXX,XX +XXX,XX @@ void cpu_address_space_init(CPUState *cpu, int asidx, |
15 | #define qemu_st_beq(X) \ | 21 | |
16 | cpu_stq_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) | 22 | #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) |
17 | 23 | /* cputlb.c */ | |
18 | +static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr, | 24 | +/** |
19 | + TCGMemOpIdx oi, const void *tb_ptr) | 25 | + * tlb_init - initialize a CPU's TLB |
26 | + * @cpu: CPU whose TLB should be initialized | ||
27 | + */ | ||
28 | +void tlb_init(CPUState *cpu); | ||
29 | /** | ||
30 | * tlb_flush_page: | ||
31 | * @cpu: CPU whose TLB should be flushed | ||
32 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr, | ||
33 | void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx, | ||
34 | uintptr_t retaddr); | ||
35 | #else | ||
36 | +static inline void tlb_init(CPUState *cpu) | ||
20 | +{ | 37 | +{ |
21 | + MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE); | 38 | +} |
22 | + | 39 | static inline void tlb_flush_page(CPUState *cpu, target_ulong addr) |
23 | + switch (mop) { | 40 | { |
24 | + case MO_UB: | 41 | } |
25 | + return qemu_ld_ub; | 42 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c |
26 | + case MO_SB: | 43 | index XXXXXXX..XXXXXXX 100644 |
27 | + return (int8_t)qemu_ld_ub; | 44 | --- a/accel/tcg/cputlb.c |
28 | + case MO_LEUW: | 45 | +++ b/accel/tcg/cputlb.c |
29 | + return qemu_ld_leuw; | 46 | @@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); |
30 | + case MO_LESW: | 47 | QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); |
31 | + return (int16_t)qemu_ld_leuw; | 48 | #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) |
32 | + case MO_LEUL: | 49 | |
33 | + return qemu_ld_leul; | 50 | +void tlb_init(CPUState *cpu) |
34 | + case MO_LESL: | 51 | +{ |
35 | + return (int32_t)qemu_ld_leul; | ||
36 | + case MO_LEQ: | ||
37 | + return qemu_ld_leq; | ||
38 | + case MO_BEUW: | ||
39 | + return qemu_ld_beuw; | ||
40 | + case MO_BESW: | ||
41 | + return (int16_t)qemu_ld_beuw; | ||
42 | + case MO_BEUL: | ||
43 | + return qemu_ld_beul; | ||
44 | + case MO_BESL: | ||
45 | + return (int32_t)qemu_ld_beul; | ||
46 | + case MO_BEQ: | ||
47 | + return qemu_ld_beq; | ||
48 | + default: | ||
49 | + g_assert_not_reached(); | ||
50 | + } | ||
51 | +} | 52 | +} |
52 | + | 53 | + |
53 | +static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val, | 54 | /* flush_all_helper: run fn across all cpus |
54 | + TCGMemOpIdx oi, const void *tb_ptr) | 55 | * |
55 | +{ | 56 | * If the wait flag is set then the src cpu's helper will be queued as |
56 | + MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE); | 57 | diff --git a/exec.c b/exec.c |
57 | + | 58 | index XXXXXXX..XXXXXXX 100644 |
58 | + switch (mop) { | 59 | --- a/exec.c |
59 | + case MO_UB: | 60 | +++ b/exec.c |
60 | + qemu_st_b(val); | 61 | @@ -XXX,XX +XXX,XX @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp) |
61 | + break; | 62 | tcg_target_initialized = true; |
62 | + case MO_LEUW: | 63 | cc->tcg_initialize(); |
63 | + qemu_st_lew(val); | 64 | } |
64 | + break; | 65 | + tlb_init(cpu); |
65 | + case MO_LEUL: | 66 | |
66 | + qemu_st_lel(val); | 67 | #ifndef CONFIG_USER_ONLY |
67 | + break; | 68 | if (qdev_get_vmsd(DEVICE(cpu)) == NULL) { |
68 | + case MO_LEQ: | ||
69 | + qemu_st_leq(val); | ||
70 | + break; | ||
71 | + case MO_BEUW: | ||
72 | + qemu_st_bew(val); | ||
73 | + break; | ||
74 | + case MO_BEUL: | ||
75 | + qemu_st_bel(val); | ||
76 | + break; | ||
77 | + case MO_BEQ: | ||
78 | + qemu_st_beq(val); | ||
79 | + break; | ||
80 | + default: | ||
81 | + g_assert_not_reached(); | ||
82 | + } | ||
83 | +} | ||
84 | + | ||
85 | #if TCG_TARGET_REG_BITS == 64 | ||
86 | # define CASE_32_64(x) \ | ||
87 | case glue(glue(INDEX_op_, x), _i64): \ | ||
88 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
89 | tci_args_rrrm(insn, &r0, &r1, &r2, &oi); | ||
90 | taddr = tci_uint64(regs[r2], regs[r1]); | ||
91 | } | ||
92 | - switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) { | ||
93 | - case MO_UB: | ||
94 | - tmp32 = qemu_ld_ub; | ||
95 | - break; | ||
96 | - case MO_SB: | ||
97 | - tmp32 = (int8_t)qemu_ld_ub; | ||
98 | - break; | ||
99 | - case MO_LEUW: | ||
100 | - tmp32 = qemu_ld_leuw; | ||
101 | - break; | ||
102 | - case MO_LESW: | ||
103 | - tmp32 = (int16_t)qemu_ld_leuw; | ||
104 | - break; | ||
105 | - case MO_LEUL: | ||
106 | - tmp32 = qemu_ld_leul; | ||
107 | - break; | ||
108 | - case MO_BEUW: | ||
109 | - tmp32 = qemu_ld_beuw; | ||
110 | - break; | ||
111 | - case MO_BESW: | ||
112 | - tmp32 = (int16_t)qemu_ld_beuw; | ||
113 | - break; | ||
114 | - case MO_BEUL: | ||
115 | - tmp32 = qemu_ld_beul; | ||
116 | - break; | ||
117 | - default: | ||
118 | - g_assert_not_reached(); | ||
119 | - } | ||
120 | + tmp32 = tci_qemu_ld(env, taddr, oi, tb_ptr); | ||
121 | regs[r0] = tmp32; | ||
122 | break; | ||
123 | |||
124 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
125 | taddr = tci_uint64(regs[r3], regs[r2]); | ||
126 | oi = regs[r4]; | ||
127 | } | ||
128 | - switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) { | ||
129 | - case MO_UB: | ||
130 | - tmp64 = qemu_ld_ub; | ||
131 | - break; | ||
132 | - case MO_SB: | ||
133 | - tmp64 = (int8_t)qemu_ld_ub; | ||
134 | - break; | ||
135 | - case MO_LEUW: | ||
136 | - tmp64 = qemu_ld_leuw; | ||
137 | - break; | ||
138 | - case MO_LESW: | ||
139 | - tmp64 = (int16_t)qemu_ld_leuw; | ||
140 | - break; | ||
141 | - case MO_LEUL: | ||
142 | - tmp64 = qemu_ld_leul; | ||
143 | - break; | ||
144 | - case MO_LESL: | ||
145 | - tmp64 = (int32_t)qemu_ld_leul; | ||
146 | - break; | ||
147 | - case MO_LEQ: | ||
148 | - tmp64 = qemu_ld_leq; | ||
149 | - break; | ||
150 | - case MO_BEUW: | ||
151 | - tmp64 = qemu_ld_beuw; | ||
152 | - break; | ||
153 | - case MO_BESW: | ||
154 | - tmp64 = (int16_t)qemu_ld_beuw; | ||
155 | - break; | ||
156 | - case MO_BEUL: | ||
157 | - tmp64 = qemu_ld_beul; | ||
158 | - break; | ||
159 | - case MO_BESL: | ||
160 | - tmp64 = (int32_t)qemu_ld_beul; | ||
161 | - break; | ||
162 | - case MO_BEQ: | ||
163 | - tmp64 = qemu_ld_beq; | ||
164 | - break; | ||
165 | - default: | ||
166 | - g_assert_not_reached(); | ||
167 | - } | ||
168 | + tmp64 = tci_qemu_ld(env, taddr, oi, tb_ptr); | ||
169 | if (TCG_TARGET_REG_BITS == 32) { | ||
170 | tci_write_reg64(regs, r1, r0, tmp64); | ||
171 | } else { | ||
172 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
173 | taddr = tci_uint64(regs[r2], regs[r1]); | ||
174 | } | ||
175 | tmp32 = regs[r0]; | ||
176 | - switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) { | ||
177 | - case MO_UB: | ||
178 | - qemu_st_b(tmp32); | ||
179 | - break; | ||
180 | - case MO_LEUW: | ||
181 | - qemu_st_lew(tmp32); | ||
182 | - break; | ||
183 | - case MO_LEUL: | ||
184 | - qemu_st_lel(tmp32); | ||
185 | - break; | ||
186 | - case MO_BEUW: | ||
187 | - qemu_st_bew(tmp32); | ||
188 | - break; | ||
189 | - case MO_BEUL: | ||
190 | - qemu_st_bel(tmp32); | ||
191 | - break; | ||
192 | - default: | ||
193 | - g_assert_not_reached(); | ||
194 | - } | ||
195 | + tci_qemu_st(env, taddr, tmp32, oi, tb_ptr); | ||
196 | break; | ||
197 | |||
198 | case INDEX_op_qemu_st_i64: | ||
199 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
200 | } | ||
201 | tmp64 = tci_uint64(regs[r1], regs[r0]); | ||
202 | } | ||
203 | - switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) { | ||
204 | - case MO_UB: | ||
205 | - qemu_st_b(tmp64); | ||
206 | - break; | ||
207 | - case MO_LEUW: | ||
208 | - qemu_st_lew(tmp64); | ||
209 | - break; | ||
210 | - case MO_LEUL: | ||
211 | - qemu_st_lel(tmp64); | ||
212 | - break; | ||
213 | - case MO_LEQ: | ||
214 | - qemu_st_leq(tmp64); | ||
215 | - break; | ||
216 | - case MO_BEUW: | ||
217 | - qemu_st_bew(tmp64); | ||
218 | - break; | ||
219 | - case MO_BEUL: | ||
220 | - qemu_st_bel(tmp64); | ||
221 | - break; | ||
222 | - case MO_BEQ: | ||
223 | - qemu_st_beq(tmp64); | ||
224 | - break; | ||
225 | - default: | ||
226 | - g_assert_not_reached(); | ||
227 | - } | ||
228 | + tci_qemu_st(env, taddr, tmp64, oi, tb_ptr); | ||
229 | break; | ||
230 | |||
231 | case INDEX_op_mb: | ||
232 | -- | 69 | -- |
233 | 2.25.1 | 70 | 2.17.2 |
234 | 71 | ||
235 | 72 | diff view generated by jsdifflib |
1 | Wrap guest memory operations for tci like we do for cpu_ld*_data. | 1 | From: "Emilio G. Cota" <cota@braap.org> |
---|---|---|---|
2 | 2 | ||
3 | We cannot actually use the cpu_ldst.h interface without duplicating | 3 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
4 | the memory trace operations performed within, which will already | 4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> |
5 | have been expanded into the tcg opcode stream. | 5 | Signed-off-by: Emilio G. Cota <cota@braap.org> |
6 | 6 | Message-Id: <20181009174557.16125-5-cota@braap.org> | |
7 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
8 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | --- | 8 | --- |
11 | tcg/tci.c | 10 ++++++---- | 9 | accel/tcg/cputlb.c | 4 ++-- |
12 | 1 file changed, 6 insertions(+), 4 deletions(-) | 10 | 1 file changed, 2 insertions(+), 2 deletions(-) |
13 | 11 | ||
14 | diff --git a/tcg/tci.c b/tcg/tci.c | 12 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c |
15 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/tcg/tci.c | 14 | --- a/accel/tcg/cputlb.c |
17 | +++ b/tcg/tci.c | 15 | +++ b/accel/tcg/cputlb.c |
18 | @@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr, | 16 | @@ -XXX,XX +XXX,XX @@ |
19 | TCGMemOpIdx oi, const void *tb_ptr) | 17 | } \ |
20 | { | 18 | } while (0) |
21 | MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE); | 19 | |
22 | - | 20 | -#define assert_cpu_is_self(this_cpu) do { \ |
23 | -#ifdef CONFIG_SOFTMMU | 21 | +#define assert_cpu_is_self(cpu) do { \ |
24 | uintptr_t ra = (uintptr_t)tb_ptr; | 22 | if (DEBUG_TLB_GATE) { \ |
25 | 23 | - g_assert(!cpu->created || qemu_cpu_is_self(cpu)); \ | |
26 | +#ifdef CONFIG_SOFTMMU | 24 | + g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ |
27 | switch (mop) { | 25 | } \ |
28 | case MO_UB: | 26 | } while (0) |
29 | return helper_ret_ldub_mmu(env, taddr, oi, ra); | ||
30 | @@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr, | ||
31 | void *haddr = g2h(env_cpu(env), taddr); | ||
32 | uint64_t ret; | ||
33 | |||
34 | + set_helper_retaddr(ra); | ||
35 | switch (mop) { | ||
36 | case MO_UB: | ||
37 | ret = ldub_p(haddr); | ||
38 | @@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr, | ||
39 | default: | ||
40 | g_assert_not_reached(); | ||
41 | } | ||
42 | + clear_helper_retaddr(); | ||
43 | return ret; | ||
44 | #endif | ||
45 | } | ||
46 | @@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val, | ||
47 | TCGMemOpIdx oi, const void *tb_ptr) | ||
48 | { | ||
49 | MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE); | ||
50 | - | ||
51 | -#ifdef CONFIG_SOFTMMU | ||
52 | uintptr_t ra = (uintptr_t)tb_ptr; | ||
53 | |||
54 | +#ifdef CONFIG_SOFTMMU | ||
55 | switch (mop) { | ||
56 | case MO_UB: | ||
57 | helper_ret_stb_mmu(env, taddr, val, oi, ra); | ||
58 | @@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val, | ||
59 | #else | ||
60 | void *haddr = g2h(env_cpu(env), taddr); | ||
61 | |||
62 | + set_helper_retaddr(ra); | ||
63 | switch (mop) { | ||
64 | case MO_UB: | ||
65 | stb_p(haddr, val); | ||
66 | @@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val, | ||
67 | default: | ||
68 | g_assert_not_reached(); | ||
69 | } | ||
70 | + clear_helper_retaddr(); | ||
71 | #endif | ||
72 | } | ||
73 | 27 | ||
74 | -- | 28 | -- |
75 | 2.25.1 | 29 | 2.17.2 |
76 | 30 | ||
77 | 31 | diff view generated by jsdifflib |
1 | This will give us both flags and typemask for use later. | 1 | From: "Emilio G. Cota" <cota@braap.org> |
---|---|---|---|
2 | 2 | ||
3 | We also fix a dumping bug, wherein calls generated for plugins | 3 | Currently we rely on atomic operations for cross-CPU invalidations. |
4 | fail tcg_find_helper and print (null) instead of either a name | 4 | There are two cases that these atomics miss: cross-CPU invalidations |
5 | or the raw function pointer. | 5 | can race with either (1) vCPU threads flushing their TLB, which |
6 | 6 | happens via memset, or (2) vCPUs calling tlb_reset_dirty on their TLB, | |
7 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 7 | which updates .addr_write with a regular store. This results in |
8 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 8 | undefined behaviour, since we're mixing regular and atomic ops |
9 | on concurrent accesses. | ||
10 | |||
11 | Fix it by using tlb_lock, a per-vCPU lock. All updaters of tlb_table | ||
12 | and the corresponding victim cache now hold the lock. | ||
13 | The readers that do not hold tlb_lock must use atomic reads when | ||
14 | reading .addr_write, since this field can be updated by other threads; | ||
15 | the conversion to atomic reads is done in the next patch. | ||
16 | |||
17 | Note that an alternative fix would be to expand the use of atomic ops. | ||
18 | However, in the case of TLB flushes this would have a huge performance | ||
19 | impact, since (1) TLB flushes can happen very frequently and (2) we | ||
20 | currently use a full memory barrier to flush each TLB entry, and a TLB | ||
21 | has many entries. Instead, acquiring the lock is barely slower than a | ||
22 | full memory barrier since it is uncontended, and with a single lock | ||
23 | acquisition we can flush the entire TLB. | ||
24 | |||
25 | Tested-by: Alex Bennée <alex.bennee@linaro.org> | ||
26 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
27 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
28 | Message-Id: <20181009174557.16125-6-cota@braap.org> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 29 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | --- | 30 | --- |
11 | tcg/tcg-internal.h | 14 ++++++++++++- | 31 | include/exec/cpu-defs.h | 3 + |
12 | tcg/tcg.c | 49 ++++++++++++++++++++-------------------------- | 32 | accel/tcg/cputlb.c | 155 ++++++++++++++++++++++------------------ |
13 | 2 files changed, 34 insertions(+), 29 deletions(-) | 33 | 2 files changed, 87 insertions(+), 71 deletions(-) |
14 | 34 | ||
15 | diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h | 35 | diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h |
16 | index XXXXXXX..XXXXXXX 100644 | 36 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/tcg/tcg-internal.h | 37 | --- a/include/exec/cpu-defs.h |
18 | +++ b/tcg/tcg-internal.h | 38 | +++ b/include/exec/cpu-defs.h |
19 | @@ -XXX,XX +XXX,XX @@ | 39 | @@ -XXX,XX +XXX,XX @@ |
20 | 40 | #endif | |
21 | #define TCG_HIGHWATER 1024 | 41 | |
22 | 42 | #include "qemu/host-utils.h" | |
23 | +typedef struct TCGHelperInfo { | 43 | +#include "qemu/thread.h" |
24 | + void *func; | 44 | #include "qemu/queue.h" |
25 | + const char *name; | 45 | #ifdef CONFIG_TCG |
26 | + unsigned flags; | 46 | #include "tcg-target.h" |
27 | + unsigned typemask; | 47 | @@ -XXX,XX +XXX,XX @@ typedef struct CPUIOTLBEntry { |
28 | +} TCGHelperInfo; | 48 | |
29 | + | 49 | #define CPU_COMMON_TLB \ |
30 | extern TCGContext tcg_init_ctx; | 50 | /* The meaning of the MMU modes is defined in the target code. */ \ |
31 | extern TCGContext **tcg_ctxs; | 51 | + /* tlb_lock serializes updates to tlb_table and tlb_v_table */ \ |
32 | extern unsigned int tcg_cur_ctxs; | 52 | + QemuSpin tlb_lock; \ |
33 | @@ -XXX,XX +XXX,XX @@ bool tcg_region_alloc(TCGContext *s); | 53 | CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE]; \ |
34 | void tcg_region_initial_alloc(TCGContext *s); | 54 | CPUTLBEntry tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE]; \ |
35 | void tcg_region_prologue_set(TCGContext *s); | 55 | CPUIOTLBEntry iotlb[NB_MMU_MODES][CPU_TLB_SIZE]; \ |
36 | 56 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | |
37 | +static inline const TCGHelperInfo *tcg_call_info(TCGOp *op) | ||
38 | +{ | ||
39 | + return (void *)(uintptr_t)op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; | ||
40 | +} | ||
41 | + | ||
42 | static inline unsigned tcg_call_flags(TCGOp *op) | ||
43 | { | ||
44 | - return op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; | ||
45 | + return tcg_call_info(op)->flags; | ||
46 | } | ||
47 | |||
48 | #endif /* TCG_INTERNAL_H */ | ||
49 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | 57 | index XXXXXXX..XXXXXXX 100644 |
51 | --- a/tcg/tcg.c | 58 | --- a/accel/tcg/cputlb.c |
52 | +++ b/tcg/tcg.c | 59 | +++ b/accel/tcg/cputlb.c |
53 | @@ -XXX,XX +XXX,XX @@ void tcg_pool_reset(TCGContext *s) | 60 | @@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); |
54 | s->pool_current = NULL; | 61 | |
55 | } | 62 | void tlb_init(CPUState *cpu) |
56 | 63 | { | |
57 | -typedef struct TCGHelperInfo { | 64 | + CPUArchState *env = cpu->env_ptr; |
58 | - void *func; | 65 | + |
59 | - const char *name; | 66 | + qemu_spin_init(&env->tlb_lock); |
60 | - unsigned flags; | 67 | } |
61 | - unsigned typemask; | 68 | |
62 | -} TCGHelperInfo; | 69 | /* flush_all_helper: run fn across all cpus |
70 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_nocheck(CPUState *cpu) | ||
71 | atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1); | ||
72 | tlb_debug("(count: %zu)\n", tlb_flush_count()); | ||
73 | |||
74 | + /* | ||
75 | + * tlb_table/tlb_v_table updates from any thread must hold tlb_lock. | ||
76 | + * However, updates from the owner thread (as is the case here; see the | ||
77 | + * above assert_cpu_is_self) do not need atomic_set because all reads | ||
78 | + * that do not hold the lock are performed by the same owner thread. | ||
79 | + */ | ||
80 | + qemu_spin_lock(&env->tlb_lock); | ||
81 | memset(env->tlb_table, -1, sizeof(env->tlb_table)); | ||
82 | memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table)); | ||
83 | + qemu_spin_unlock(&env->tlb_lock); | ||
84 | + | ||
85 | cpu_tb_jmp_cache_clear(cpu); | ||
86 | |||
87 | env->vtlb_index = 0; | ||
88 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) | ||
89 | |||
90 | tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask); | ||
91 | |||
92 | + qemu_spin_lock(&env->tlb_lock); | ||
93 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
94 | |||
95 | if (test_bit(mmu_idx, &mmu_idx_bitmask)) { | ||
96 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) | ||
97 | memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0])); | ||
98 | } | ||
99 | } | ||
100 | + qemu_spin_unlock(&env->tlb_lock); | ||
101 | |||
102 | cpu_tb_jmp_cache_clear(cpu); | ||
103 | |||
104 | @@ -XXX,XX +XXX,XX @@ static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, | ||
105 | tlb_hit_page(tlb_entry->addr_code, page); | ||
106 | } | ||
107 | |||
108 | -static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong page) | ||
109 | +/* Called with tlb_lock held */ | ||
110 | +static inline void tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, | ||
111 | + target_ulong page) | ||
112 | { | ||
113 | if (tlb_hit_page_anyprot(tlb_entry, page)) { | ||
114 | memset(tlb_entry, -1, sizeof(*tlb_entry)); | ||
115 | } | ||
116 | } | ||
117 | |||
118 | -static inline void tlb_flush_vtlb_page(CPUArchState *env, int mmu_idx, | ||
119 | - target_ulong page) | ||
120 | +/* Called with tlb_lock held */ | ||
121 | +static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, | ||
122 | + target_ulong page) | ||
123 | { | ||
124 | int k; | ||
125 | + | ||
126 | + assert_cpu_is_self(ENV_GET_CPU(env)); | ||
127 | for (k = 0; k < CPU_VTLB_SIZE; k++) { | ||
128 | - tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], page); | ||
129 | + tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page); | ||
130 | } | ||
131 | } | ||
132 | |||
133 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data) | ||
134 | |||
135 | addr &= TARGET_PAGE_MASK; | ||
136 | i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
137 | + qemu_spin_lock(&env->tlb_lock); | ||
138 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
139 | - tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr); | ||
140 | - tlb_flush_vtlb_page(env, mmu_idx, addr); | ||
141 | + tlb_flush_entry_locked(&env->tlb_table[mmu_idx][i], addr); | ||
142 | + tlb_flush_vtlb_page_locked(env, mmu_idx, addr); | ||
143 | } | ||
144 | + qemu_spin_unlock(&env->tlb_lock); | ||
145 | |||
146 | tb_flush_jmp_cache(cpu, addr); | ||
147 | } | ||
148 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu, | ||
149 | tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n", | ||
150 | page, addr, mmu_idx_bitmap); | ||
151 | |||
152 | + qemu_spin_lock(&env->tlb_lock); | ||
153 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
154 | if (test_bit(mmu_idx, &mmu_idx_bitmap)) { | ||
155 | - tlb_flush_entry(&env->tlb_table[mmu_idx][page], addr); | ||
156 | - tlb_flush_vtlb_page(env, mmu_idx, addr); | ||
157 | + tlb_flush_entry_locked(&env->tlb_table[mmu_idx][page], addr); | ||
158 | + tlb_flush_vtlb_page_locked(env, mmu_idx, addr); | ||
159 | } | ||
160 | } | ||
161 | + qemu_spin_unlock(&env->tlb_lock); | ||
162 | |||
163 | tb_flush_jmp_cache(cpu, addr); | ||
164 | } | ||
165 | @@ -XXX,XX +XXX,XX @@ void tlb_unprotect_code(ram_addr_t ram_addr) | ||
166 | * most usual is detecting writes to code regions which may invalidate | ||
167 | * generated code. | ||
168 | * | ||
169 | - * Because we want other vCPUs to respond to changes straight away we | ||
170 | - * update the te->addr_write field atomically. If the TLB entry has | ||
171 | - * been changed by the vCPU in the mean time we skip the update. | ||
172 | + * Other vCPUs might be reading their TLBs during guest execution, so we update | ||
173 | + * te->addr_write with atomic_set. We don't need to worry about this for | ||
174 | + * oversized guests as MTTCG is disabled for them. | ||
175 | * | ||
176 | - * As this function uses atomic accesses we also need to ensure | ||
177 | - * updates to tlb_entries follow the same access rules. We don't need | ||
178 | - * to worry about this for oversized guests as MTTCG is disabled for | ||
179 | - * them. | ||
180 | + * Called with tlb_lock held. | ||
181 | */ | ||
63 | - | 182 | - |
64 | #include "exec/helper-proto.h" | 183 | -static void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start, |
65 | 184 | - uintptr_t length) | |
66 | static const TCGHelperInfo all_helpers[] = { | 185 | +static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, |
67 | @@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op) | 186 | + uintptr_t start, uintptr_t length) |
68 | void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | 187 | { |
69 | { | 188 | -#if TCG_OVERSIZED_GUEST |
70 | int i, real_args, nb_rets, pi; | 189 | uintptr_t addr = tlb_entry->addr_write; |
71 | - unsigned typemask, flags; | 190 | |
72 | - TCGHelperInfo *info; | 191 | if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) { |
73 | + unsigned typemask; | 192 | addr &= TARGET_PAGE_MASK; |
74 | + const TCGHelperInfo *info; | 193 | addr += tlb_entry->addend; |
75 | TCGOp *op; | 194 | if ((addr - start) < length) { |
76 | 195 | +#if TCG_OVERSIZED_GUEST | |
77 | info = g_hash_table_lookup(helper_table, (gpointer)func); | 196 | tlb_entry->addr_write |= TLB_NOTDIRTY; |
78 | - flags = info->flags; | ||
79 | typemask = info->typemask; | ||
80 | |||
81 | #ifdef CONFIG_PLUGIN | ||
82 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
83 | real_args++; | ||
84 | } | ||
85 | op->args[pi++] = (uintptr_t)func; | ||
86 | - op->args[pi++] = flags; | ||
87 | + op->args[pi++] = (uintptr_t)info; | ||
88 | TCGOP_CALLI(op) = real_args; | ||
89 | |||
90 | /* Make sure the fields didn't overflow. */ | ||
91 | @@ -XXX,XX +XXX,XX @@ static char *tcg_get_arg_str(TCGContext *s, char *buf, | ||
92 | return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); | ||
93 | } | ||
94 | |||
95 | -/* Find helper name. */ | ||
96 | -static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) | ||
97 | -{ | ||
98 | - const char *ret = NULL; | ||
99 | - if (helper_table) { | ||
100 | - TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val); | ||
101 | - if (info) { | ||
102 | - ret = info->name; | ||
103 | - } | 197 | - } |
104 | - } | 198 | - } |
105 | - return ret; | 199 | #else |
106 | -} | 200 | - /* paired with atomic_mb_set in tlb_set_page_with_attrs */ |
201 | - uintptr_t orig_addr = atomic_mb_read(&tlb_entry->addr_write); | ||
202 | - uintptr_t addr = orig_addr; | ||
107 | - | 203 | - |
108 | static const char * const cond_name[] = | 204 | - if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) { |
109 | { | 205 | - addr &= TARGET_PAGE_MASK; |
110 | [TCG_COND_NEVER] = "never", | 206 | - addr += atomic_read(&tlb_entry->addend); |
111 | @@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs) | 207 | - if ((addr - start) < length) { |
112 | col += qemu_log(" " TARGET_FMT_lx, a); | 208 | - uintptr_t notdirty_addr = orig_addr | TLB_NOTDIRTY; |
113 | } | 209 | - atomic_cmpxchg(&tlb_entry->addr_write, orig_addr, notdirty_addr); |
114 | } else if (c == INDEX_op_call) { | 210 | + atomic_set(&tlb_entry->addr_write, |
115 | + const TCGHelperInfo *info = tcg_call_info(op); | 211 | + tlb_entry->addr_write | TLB_NOTDIRTY); |
116 | + void *func; | 212 | +#endif |
117 | + | 213 | } |
118 | /* variable number of arguments */ | 214 | } |
119 | nb_oargs = TCGOP_CALLO(op); | 215 | -#endif |
120 | nb_iargs = TCGOP_CALLI(op); | 216 | } |
121 | nb_cargs = def->nb_cargs; | 217 | |
122 | 218 | -/* For atomic correctness when running MTTCG we need to use the right | |
123 | - /* function name, flags, out args */ | 219 | - * primitives when copying entries */ |
124 | - col += qemu_log(" %s %s,$0x%x,$%d", def->name, | 220 | -static inline void copy_tlb_helper(CPUTLBEntry *d, CPUTLBEntry *s, |
125 | - tcg_find_helper(s, op->args[nb_oargs + nb_iargs]), | 221 | - bool atomic_set) |
126 | - tcg_call_flags(op), nb_oargs); | 222 | +/* |
127 | + col += qemu_log(" %s ", def->name); | 223 | + * Called with tlb_lock held. |
128 | + | 224 | + * Called only from the vCPU context, i.e. the TLB's owner thread. |
129 | + /* | 225 | + */ |
130 | + * Print the function name from TCGHelperInfo, if available. | 226 | +static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) |
131 | + * Note that plugins have a template function for the info, | 227 | { |
132 | + * but the actual function pointer comes from the plugin. | 228 | -#if TCG_OVERSIZED_GUEST |
133 | + */ | 229 | *d = *s; |
134 | + func = (void *)(uintptr_t)op->args[nb_oargs + nb_iargs]; | 230 | -#else |
135 | + if (func == info->func) { | 231 | - if (atomic_set) { |
136 | + col += qemu_log("%s", info->name); | 232 | - d->addr_read = s->addr_read; |
137 | + } else { | 233 | - d->addr_code = s->addr_code; |
138 | + col += qemu_log("plugin(%p)", func); | 234 | - atomic_set(&d->addend, atomic_read(&s->addend)); |
139 | + } | 235 | - /* Pairs with flag setting in tlb_reset_dirty_range */ |
140 | + | 236 | - atomic_mb_set(&d->addr_write, atomic_read(&s->addr_write)); |
141 | + col += qemu_log("$0x%x,$%d", info->flags, nb_oargs); | 237 | - } else { |
142 | for (i = 0; i < nb_oargs; i++) { | 238 | - d->addr_read = s->addr_read; |
143 | col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf), | 239 | - d->addr_write = atomic_read(&s->addr_write); |
144 | op->args[i])); | 240 | - d->addr_code = s->addr_code; |
241 | - d->addend = atomic_read(&s->addend); | ||
242 | - } | ||
243 | -#endif | ||
244 | } | ||
245 | |||
246 | /* This is a cross vCPU call (i.e. another vCPU resetting the flags of | ||
247 | - * the target vCPU). As such care needs to be taken that we don't | ||
248 | - * dangerously race with another vCPU update. The only thing actually | ||
249 | - * updated is the target TLB entry ->addr_write flags. | ||
250 | + * the target vCPU). | ||
251 | + * We must take tlb_lock to avoid racing with another vCPU update. The only | ||
252 | + * thing actually updated is the target TLB entry ->addr_write flags. | ||
253 | */ | ||
254 | void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) | ||
255 | { | ||
256 | @@ -XXX,XX +XXX,XX @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) | ||
257 | int mmu_idx; | ||
258 | |||
259 | env = cpu->env_ptr; | ||
260 | + qemu_spin_lock(&env->tlb_lock); | ||
261 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
262 | unsigned int i; | ||
263 | |||
264 | for (i = 0; i < CPU_TLB_SIZE; i++) { | ||
265 | - tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i], | ||
266 | - start1, length); | ||
267 | + tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1, | ||
268 | + length); | ||
269 | } | ||
270 | |||
271 | for (i = 0; i < CPU_VTLB_SIZE; i++) { | ||
272 | - tlb_reset_dirty_range(&env->tlb_v_table[mmu_idx][i], | ||
273 | - start1, length); | ||
274 | + tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], start1, | ||
275 | + length); | ||
276 | } | ||
277 | } | ||
278 | + qemu_spin_unlock(&env->tlb_lock); | ||
279 | } | ||
280 | |||
281 | -static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr) | ||
282 | +/* Called with tlb_lock held */ | ||
283 | +static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, | ||
284 | + target_ulong vaddr) | ||
285 | { | ||
286 | if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { | ||
287 | tlb_entry->addr_write = vaddr; | ||
288 | @@ -XXX,XX +XXX,XX @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) | ||
289 | |||
290 | vaddr &= TARGET_PAGE_MASK; | ||
291 | i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
292 | + qemu_spin_lock(&env->tlb_lock); | ||
293 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
294 | - tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr); | ||
295 | + tlb_set_dirty1_locked(&env->tlb_table[mmu_idx][i], vaddr); | ||
296 | } | ||
297 | |||
298 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
299 | int k; | ||
300 | for (k = 0; k < CPU_VTLB_SIZE; k++) { | ||
301 | - tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr); | ||
302 | + tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr); | ||
303 | } | ||
304 | } | ||
305 | + qemu_spin_unlock(&env->tlb_lock); | ||
306 | } | ||
307 | |||
308 | /* Our TLB does not support large pages, so remember the area covered by | ||
309 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, | ||
310 | addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; | ||
311 | } | ||
312 | |||
313 | - /* Make sure there's no cached translation for the new page. */ | ||
314 | - tlb_flush_vtlb_page(env, mmu_idx, vaddr_page); | ||
315 | - | ||
316 | code_address = address; | ||
317 | iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page, | ||
318 | paddr_page, xlat, prot, &address); | ||
319 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, | ||
320 | index = (vaddr_page >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
321 | te = &env->tlb_table[mmu_idx][index]; | ||
322 | |||
323 | + /* | ||
324 | + * Hold the TLB lock for the rest of the function. We could acquire/release | ||
325 | + * the lock several times in the function, but it is faster to amortize the | ||
326 | + * acquisition cost by acquiring it just once. Note that this leads to | ||
327 | + * a longer critical section, but this is not a concern since the TLB lock | ||
328 | + * is unlikely to be contended. | ||
329 | + */ | ||
330 | + qemu_spin_lock(&env->tlb_lock); | ||
331 | + | ||
332 | + /* Make sure there's no cached translation for the new page. */ | ||
333 | + tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); | ||
334 | + | ||
335 | /* | ||
336 | * Only evict the old entry to the victim tlb if it's for a | ||
337 | * different page; otherwise just overwrite the stale data. | ||
338 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, | ||
339 | CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx]; | ||
340 | |||
341 | /* Evict the old entry into the victim tlb. */ | ||
342 | - copy_tlb_helper(tv, te, true); | ||
343 | + copy_tlb_helper_locked(tv, te); | ||
344 | env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index]; | ||
345 | } | ||
346 | |||
347 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, | ||
348 | } | ||
349 | } | ||
350 | |||
351 | - /* Pairs with flag setting in tlb_reset_dirty_range */ | ||
352 | - copy_tlb_helper(te, &tn, true); | ||
353 | - /* atomic_mb_set(&te->addr_write, write_address); */ | ||
354 | + copy_tlb_helper_locked(te, &tn); | ||
355 | + qemu_spin_unlock(&env->tlb_lock); | ||
356 | } | ||
357 | |||
358 | /* Add a new TLB entry, but without specifying the memory | ||
359 | @@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, | ||
360 | size_t elt_ofs, target_ulong page) | ||
361 | { | ||
362 | size_t vidx; | ||
363 | + | ||
364 | + assert_cpu_is_self(ENV_GET_CPU(env)); | ||
365 | for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { | ||
366 | CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx]; | ||
367 | target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); | ||
368 | @@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, | ||
369 | /* Found entry in victim tlb, swap tlb and iotlb. */ | ||
370 | CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index]; | ||
371 | |||
372 | - copy_tlb_helper(&tmptlb, tlb, false); | ||
373 | - copy_tlb_helper(tlb, vtlb, true); | ||
374 | - copy_tlb_helper(vtlb, &tmptlb, true); | ||
375 | + qemu_spin_lock(&env->tlb_lock); | ||
376 | + copy_tlb_helper_locked(&tmptlb, tlb); | ||
377 | + copy_tlb_helper_locked(tlb, vtlb); | ||
378 | + copy_tlb_helper_locked(vtlb, &tmptlb); | ||
379 | + qemu_spin_unlock(&env->tlb_lock); | ||
380 | |||
381 | CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index]; | ||
382 | CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx]; | ||
145 | -- | 383 | -- |
146 | 2.25.1 | 384 | 2.17.2 |
147 | 385 | ||
148 | 386 | diff view generated by jsdifflib |
1 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 1 | Isolate the computation of an index from an address into a |
---|---|---|---|
2 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 2 | helper before we change that function. |
3 | |||
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | [ cota: convert tlb_vaddr_to_host; use atomic_read on addr_write ] | ||
7 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
8 | Message-Id: <20181009175129.17888-2-cota@braap.org> | ||
4 | --- | 9 | --- |
5 | tcg/tci/tcg-target.h | 8 ++++---- | 10 | accel/tcg/softmmu_template.h | 64 +++++++++++++++++--------------- |
6 | tcg/tci.c | 42 ++++++++++++++++++++++++++++++++++++++++ | 11 | include/exec/cpu_ldst.h | 19 ++++++++-- |
7 | tcg/tci/tcg-target.c.inc | 32 ++++++++++++++++++++++++++++++ | 12 | include/exec/cpu_ldst_template.h | 25 +++++++------ |
8 | 3 files changed, 78 insertions(+), 4 deletions(-) | 13 | accel/tcg/cputlb.c | 60 ++++++++++++++---------------- |
14 | 4 files changed, 90 insertions(+), 78 deletions(-) | ||
9 | 15 | ||
10 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | 16 | diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h |
11 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tcg/tci/tcg-target.h | 18 | --- a/accel/tcg/softmmu_template.h |
13 | +++ b/tcg/tci/tcg-target.h | 19 | +++ b/accel/tcg/softmmu_template.h |
14 | @@ -XXX,XX +XXX,XX @@ | 20 | @@ -XXX,XX +XXX,XX @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, |
15 | #define TCG_TARGET_HAS_ext16u_i32 1 | 21 | WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, |
16 | #define TCG_TARGET_HAS_andc_i32 1 | 22 | TCGMemOpIdx oi, uintptr_t retaddr) |
17 | #define TCG_TARGET_HAS_deposit_i32 1 | 23 | { |
18 | -#define TCG_TARGET_HAS_extract_i32 0 | 24 | - unsigned mmu_idx = get_mmuidx(oi); |
19 | -#define TCG_TARGET_HAS_sextract_i32 0 | 25 | - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); |
20 | +#define TCG_TARGET_HAS_extract_i32 1 | 26 | - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; |
21 | +#define TCG_TARGET_HAS_sextract_i32 1 | 27 | + uintptr_t mmu_idx = get_mmuidx(oi); |
22 | #define TCG_TARGET_HAS_extract2_i32 0 | 28 | + uintptr_t index = tlb_index(env, mmu_idx, addr); |
23 | #define TCG_TARGET_HAS_eqv_i32 1 | 29 | + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); |
24 | #define TCG_TARGET_HAS_nand_i32 1 | 30 | + target_ulong tlb_addr = entry->ADDR_READ; |
25 | @@ -XXX,XX +XXX,XX @@ | 31 | unsigned a_bits = get_alignment_bits(get_memop(oi)); |
26 | #define TCG_TARGET_HAS_bswap32_i64 1 | 32 | uintptr_t haddr; |
27 | #define TCG_TARGET_HAS_bswap64_i64 1 | 33 | DATA_TYPE res; |
28 | #define TCG_TARGET_HAS_deposit_i64 1 | 34 | @@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, |
29 | -#define TCG_TARGET_HAS_extract_i64 0 | 35 | tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE, |
30 | -#define TCG_TARGET_HAS_sextract_i64 0 | 36 | mmu_idx, retaddr); |
31 | +#define TCG_TARGET_HAS_extract_i64 1 | 37 | } |
32 | +#define TCG_TARGET_HAS_sextract_i64 1 | 38 | - tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; |
33 | #define TCG_TARGET_HAS_extract2_i64 0 | 39 | + tlb_addr = entry->ADDR_READ; |
34 | #define TCG_TARGET_HAS_div_i64 1 | 40 | } |
35 | #define TCG_TARGET_HAS_rem_i64 1 | 41 | |
36 | diff --git a/tcg/tci.c b/tcg/tci.c | 42 | /* Handle an IO access. */ |
43 | @@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, | ||
44 | return res; | ||
45 | } | ||
46 | |||
47 | - haddr = addr + env->tlb_table[mmu_idx][index].addend; | ||
48 | + haddr = addr + entry->addend; | ||
49 | #if DATA_SIZE == 1 | ||
50 | res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr); | ||
51 | #else | ||
52 | @@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, | ||
53 | WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, | ||
54 | TCGMemOpIdx oi, uintptr_t retaddr) | ||
55 | { | ||
56 | - unsigned mmu_idx = get_mmuidx(oi); | ||
57 | - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
58 | - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; | ||
59 | + uintptr_t mmu_idx = get_mmuidx(oi); | ||
60 | + uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
61 | + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
62 | + target_ulong tlb_addr = entry->ADDR_READ; | ||
63 | unsigned a_bits = get_alignment_bits(get_memop(oi)); | ||
64 | uintptr_t haddr; | ||
65 | DATA_TYPE res; | ||
66 | @@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, | ||
67 | tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE, | ||
68 | mmu_idx, retaddr); | ||
69 | } | ||
70 | - tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; | ||
71 | + tlb_addr = entry->ADDR_READ; | ||
72 | } | ||
73 | |||
74 | /* Handle an IO access. */ | ||
75 | @@ -XXX,XX +XXX,XX @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, | ||
76 | return res; | ||
77 | } | ||
78 | |||
79 | - haddr = addr + env->tlb_table[mmu_idx][index].addend; | ||
80 | + haddr = addr + entry->addend; | ||
81 | res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr); | ||
82 | return res; | ||
83 | } | ||
84 | @@ -XXX,XX +XXX,XX @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, | ||
85 | void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
86 | TCGMemOpIdx oi, uintptr_t retaddr) | ||
87 | { | ||
88 | - unsigned mmu_idx = get_mmuidx(oi); | ||
89 | - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
90 | - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; | ||
91 | + uintptr_t mmu_idx = get_mmuidx(oi); | ||
92 | + uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
93 | + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
94 | + target_ulong tlb_addr = entry->addr_write; | ||
95 | unsigned a_bits = get_alignment_bits(get_memop(oi)); | ||
96 | uintptr_t haddr; | ||
97 | |||
98 | @@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
99 | tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, | ||
100 | mmu_idx, retaddr); | ||
101 | } | ||
102 | - tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK; | ||
103 | + tlb_addr = entry->addr_write & ~TLB_INVALID_MASK; | ||
104 | } | ||
105 | |||
106 | /* Handle an IO access. */ | ||
107 | @@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
108 | if (DATA_SIZE > 1 | ||
109 | && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 | ||
110 | >= TARGET_PAGE_SIZE)) { | ||
111 | - int i, index2; | ||
112 | - target_ulong page2, tlb_addr2; | ||
113 | + int i; | ||
114 | + target_ulong page2; | ||
115 | + CPUTLBEntry *entry2; | ||
116 | do_unaligned_access: | ||
117 | /* Ensure the second page is in the TLB. Note that the first page | ||
118 | is already guaranteed to be filled, and that the second page | ||
119 | cannot evict the first. */ | ||
120 | page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; | ||
121 | - index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
122 | - tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write; | ||
123 | - if (!tlb_hit_page(tlb_addr2, page2) | ||
124 | + entry2 = tlb_entry(env, mmu_idx, page2); | ||
125 | + if (!tlb_hit_page(entry2->addr_write, page2) | ||
126 | && !VICTIM_TLB_HIT(addr_write, page2)) { | ||
127 | tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, | ||
128 | mmu_idx, retaddr); | ||
129 | @@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
130 | return; | ||
131 | } | ||
132 | |||
133 | - haddr = addr + env->tlb_table[mmu_idx][index].addend; | ||
134 | + haddr = addr + entry->addend; | ||
135 | #if DATA_SIZE == 1 | ||
136 | glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val); | ||
137 | #else | ||
138 | @@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
139 | void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
140 | TCGMemOpIdx oi, uintptr_t retaddr) | ||
141 | { | ||
142 | - unsigned mmu_idx = get_mmuidx(oi); | ||
143 | - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
144 | - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; | ||
145 | + uintptr_t mmu_idx = get_mmuidx(oi); | ||
146 | + uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
147 | + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
148 | + target_ulong tlb_addr = entry->addr_write; | ||
149 | unsigned a_bits = get_alignment_bits(get_memop(oi)); | ||
150 | uintptr_t haddr; | ||
151 | |||
152 | @@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
153 | tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, | ||
154 | mmu_idx, retaddr); | ||
155 | } | ||
156 | - tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK; | ||
157 | + tlb_addr = entry->addr_write & ~TLB_INVALID_MASK; | ||
158 | } | ||
159 | |||
160 | /* Handle an IO access. */ | ||
161 | @@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
162 | if (DATA_SIZE > 1 | ||
163 | && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 | ||
164 | >= TARGET_PAGE_SIZE)) { | ||
165 | - int i, index2; | ||
166 | - target_ulong page2, tlb_addr2; | ||
167 | + int i; | ||
168 | + target_ulong page2; | ||
169 | + CPUTLBEntry *entry2; | ||
170 | do_unaligned_access: | ||
171 | /* Ensure the second page is in the TLB. Note that the first page | ||
172 | is already guaranteed to be filled, and that the second page | ||
173 | cannot evict the first. */ | ||
174 | page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; | ||
175 | - index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
176 | - tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write; | ||
177 | - if (!tlb_hit_page(tlb_addr2, page2) | ||
178 | + entry2 = tlb_entry(env, mmu_idx, page2); | ||
179 | + if (!tlb_hit_page(entry2->addr_write, page2) | ||
180 | && !VICTIM_TLB_HIT(addr_write, page2)) { | ||
181 | tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, | ||
182 | mmu_idx, retaddr); | ||
183 | @@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
184 | return; | ||
185 | } | ||
186 | |||
187 | - haddr = addr + env->tlb_table[mmu_idx][index].addend; | ||
188 | + haddr = addr + entry->addend; | ||
189 | glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val); | ||
190 | } | ||
191 | #endif /* DATA_SIZE > 1 */ | ||
192 | diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h | ||
37 | index XXXXXXX..XXXXXXX 100644 | 193 | index XXXXXXX..XXXXXXX 100644 |
38 | --- a/tcg/tci.c | 194 | --- a/include/exec/cpu_ldst.h |
39 | +++ b/tcg/tci.c | 195 | +++ b/include/exec/cpu_ldst.h |
40 | @@ -XXX,XX +XXX,XX @@ static void tci_args_rrs(uint32_t insn, TCGReg *r0, TCGReg *r1, int32_t *i2) | 196 | @@ -XXX,XX +XXX,XX @@ extern __thread uintptr_t helper_retaddr; |
41 | *i2 = sextract32(insn, 16, 16); | 197 | /* The memory helpers for tcg-generated code need tcg_target_long etc. */ |
42 | } | 198 | #include "tcg.h" |
43 | 199 | ||
44 | +static void tci_args_rrbb(uint32_t insn, TCGReg *r0, TCGReg *r1, | 200 | +/* Find the TLB index corresponding to the mmu_idx + address pair. */ |
45 | + uint8_t *i2, uint8_t *i3) | 201 | +static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, |
202 | + target_ulong addr) | ||
46 | +{ | 203 | +{ |
47 | + *r0 = extract32(insn, 8, 4); | 204 | + return (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); |
48 | + *r1 = extract32(insn, 12, 4); | ||
49 | + *i2 = extract32(insn, 16, 6); | ||
50 | + *i3 = extract32(insn, 22, 6); | ||
51 | +} | 205 | +} |
52 | + | 206 | + |
53 | static void tci_args_rrrc(uint32_t insn, | 207 | +/* Find the TLB entry corresponding to the mmu_idx + address pair. */ |
54 | TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGCond *c3) | 208 | +static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, |
55 | { | 209 | + target_ulong addr) |
56 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
57 | tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); | ||
58 | regs[r0] = deposit32(regs[r1], pos, len, regs[r2]); | ||
59 | break; | ||
60 | +#endif | ||
61 | +#if TCG_TARGET_HAS_extract_i32 | ||
62 | + case INDEX_op_extract_i32: | ||
63 | + tci_args_rrbb(insn, &r0, &r1, &pos, &len); | ||
64 | + regs[r0] = extract32(regs[r1], pos, len); | ||
65 | + break; | ||
66 | +#endif | ||
67 | +#if TCG_TARGET_HAS_sextract_i32 | ||
68 | + case INDEX_op_sextract_i32: | ||
69 | + tci_args_rrbb(insn, &r0, &r1, &pos, &len); | ||
70 | + regs[r0] = sextract32(regs[r1], pos, len); | ||
71 | + break; | ||
72 | #endif | ||
73 | case INDEX_op_brcond_i32: | ||
74 | tci_args_rl(insn, tb_ptr, &r0, &ptr); | ||
75 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
76 | tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); | ||
77 | regs[r0] = deposit64(regs[r1], pos, len, regs[r2]); | ||
78 | break; | ||
79 | +#endif | ||
80 | +#if TCG_TARGET_HAS_extract_i64 | ||
81 | + case INDEX_op_extract_i64: | ||
82 | + tci_args_rrbb(insn, &r0, &r1, &pos, &len); | ||
83 | + regs[r0] = extract64(regs[r1], pos, len); | ||
84 | + break; | ||
85 | +#endif | ||
86 | +#if TCG_TARGET_HAS_sextract_i64 | ||
87 | + case INDEX_op_sextract_i64: | ||
88 | + tci_args_rrbb(insn, &r0, &r1, &pos, &len); | ||
89 | + regs[r0] = sextract64(regs[r1], pos, len); | ||
90 | + break; | ||
91 | #endif | ||
92 | case INDEX_op_brcond_i64: | ||
93 | tci_args_rl(insn, tb_ptr, &r0, &ptr); | ||
94 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | ||
95 | op_name, str_r(r0), str_r(r1), str_r(r2), pos, len); | ||
96 | break; | ||
97 | |||
98 | + case INDEX_op_extract_i32: | ||
99 | + case INDEX_op_extract_i64: | ||
100 | + case INDEX_op_sextract_i32: | ||
101 | + case INDEX_op_sextract_i64: | ||
102 | + tci_args_rrbb(insn, &r0, &r1, &pos, &len); | ||
103 | + info->fprintf_func(info->stream, "%-12s %s,%s,%d,%d", | ||
104 | + op_name, str_r(r0), str_r(r1), pos, len); | ||
105 | + break; | ||
106 | + | ||
107 | case INDEX_op_movcond_i32: | ||
108 | case INDEX_op_movcond_i64: | ||
109 | case INDEX_op_setcond2_i32: | ||
110 | diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc | ||
111 | index XXXXXXX..XXXXXXX 100644 | ||
112 | --- a/tcg/tci/tcg-target.c.inc | ||
113 | +++ b/tcg/tci/tcg-target.c.inc | ||
114 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
115 | case INDEX_op_bswap32_i32: | ||
116 | case INDEX_op_bswap32_i64: | ||
117 | case INDEX_op_bswap64_i64: | ||
118 | + case INDEX_op_extract_i32: | ||
119 | + case INDEX_op_extract_i64: | ||
120 | + case INDEX_op_sextract_i32: | ||
121 | + case INDEX_op_sextract_i64: | ||
122 | return C_O1_I1(r, r); | ||
123 | |||
124 | case INDEX_op_st8_i32: | ||
125 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrs(TCGContext *s, TCGOpcode op, | ||
126 | tcg_out32(s, insn); | ||
127 | } | ||
128 | |||
129 | +static void tcg_out_op_rrbb(TCGContext *s, TCGOpcode op, TCGReg r0, | ||
130 | + TCGReg r1, uint8_t b2, uint8_t b3) | ||
131 | +{ | 210 | +{ |
132 | + tcg_insn_unit insn = 0; | 211 | + return &env->tlb_table[mmu_idx][tlb_index(env, mmu_idx, addr)]; |
133 | + | ||
134 | + tcg_debug_assert(b2 == extract32(b2, 0, 6)); | ||
135 | + tcg_debug_assert(b3 == extract32(b3, 0, 6)); | ||
136 | + insn = deposit32(insn, 0, 8, op); | ||
137 | + insn = deposit32(insn, 8, 4, r0); | ||
138 | + insn = deposit32(insn, 12, 4, r1); | ||
139 | + insn = deposit32(insn, 16, 6, b2); | ||
140 | + insn = deposit32(insn, 22, 6, b3); | ||
141 | + tcg_out32(s, insn); | ||
142 | +} | 212 | +} |
143 | + | 213 | + |
144 | static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op, | 214 | #ifdef MMU_MODE0_SUFFIX |
145 | TCGReg r0, TCGReg r1, TCGReg r2, TCGCond c3) | 215 | #define CPU_MMU_INDEX 0 |
146 | { | 216 | #define MEMSUFFIX MMU_MODE0_SUFFIX |
147 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | 217 | @@ -XXX,XX +XXX,XX @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, |
148 | } | 218 | #if defined(CONFIG_USER_ONLY) |
149 | break; | 219 | return g2h(addr); |
150 | 220 | #else | |
151 | + CASE_32_64(extract) /* Optional (TCG_TARGET_HAS_extract_*). */ | 221 | - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); |
152 | + CASE_32_64(sextract) /* Optional (TCG_TARGET_HAS_sextract_*). */ | 222 | - CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index]; |
153 | + { | 223 | + CPUTLBEntry *tlbentry = tlb_entry(env, mmu_idx, addr); |
154 | + TCGArg pos = args[2], len = args[3]; | 224 | abi_ptr tlb_addr; |
155 | + TCGArg max = tcg_op_defs[opc].flags & TCG_OPF_64BIT ? 64 : 32; | 225 | uintptr_t haddr; |
156 | + | 226 | |
157 | + tcg_debug_assert(pos < max); | 227 | @@ -XXX,XX +XXX,XX @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, |
158 | + tcg_debug_assert(pos + len <= max); | 228 | return NULL; |
159 | + | 229 | } |
160 | + tcg_out_op_rrbb(s, opc, args[0], args[1], pos, len); | 230 | |
161 | + } | 231 | - haddr = addr + env->tlb_table[mmu_idx][index].addend; |
162 | + break; | 232 | + haddr = addr + tlbentry->addend; |
163 | + | 233 | return (void *)haddr; |
164 | CASE_32_64(brcond) | 234 | #endif /* defined(CONFIG_USER_ONLY) */ |
165 | tcg_out_op_rrrc(s, (opc == INDEX_op_brcond_i32 | 235 | } |
166 | ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64), | 236 | diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h |
237 | index XXXXXXX..XXXXXXX 100644 | ||
238 | --- a/include/exec/cpu_ldst_template.h | ||
239 | +++ b/include/exec/cpu_ldst_template.h | ||
240 | @@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, | ||
241 | target_ulong ptr, | ||
242 | uintptr_t retaddr) | ||
243 | { | ||
244 | - int page_index; | ||
245 | + CPUTLBEntry *entry; | ||
246 | RES_TYPE res; | ||
247 | target_ulong addr; | ||
248 | int mmu_idx; | ||
249 | @@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, | ||
250 | #endif | ||
251 | |||
252 | addr = ptr; | ||
253 | - page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
254 | mmu_idx = CPU_MMU_INDEX; | ||
255 | - if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ != | ||
256 | + entry = tlb_entry(env, mmu_idx, addr); | ||
257 | + if (unlikely(entry->ADDR_READ != | ||
258 | (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { | ||
259 | oi = make_memop_idx(SHIFT, mmu_idx); | ||
260 | res = glue(glue(helper_ret_ld, URETSUFFIX), MMUSUFFIX)(env, addr, | ||
261 | oi, retaddr); | ||
262 | } else { | ||
263 | - uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend; | ||
264 | + uintptr_t hostaddr = addr + entry->addend; | ||
265 | res = glue(glue(ld, USUFFIX), _p)((uint8_t *)hostaddr); | ||
266 | } | ||
267 | return res; | ||
268 | @@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, | ||
269 | target_ulong ptr, | ||
270 | uintptr_t retaddr) | ||
271 | { | ||
272 | - int res, page_index; | ||
273 | + CPUTLBEntry *entry; | ||
274 | + int res; | ||
275 | target_ulong addr; | ||
276 | int mmu_idx; | ||
277 | TCGMemOpIdx oi; | ||
278 | @@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, | ||
279 | #endif | ||
280 | |||
281 | addr = ptr; | ||
282 | - page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
283 | mmu_idx = CPU_MMU_INDEX; | ||
284 | - if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ != | ||
285 | + entry = tlb_entry(env, mmu_idx, addr); | ||
286 | + if (unlikely(entry->ADDR_READ != | ||
287 | (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { | ||
288 | oi = make_memop_idx(SHIFT, mmu_idx); | ||
289 | res = (DATA_STYPE)glue(glue(helper_ret_ld, SRETSUFFIX), | ||
290 | MMUSUFFIX)(env, addr, oi, retaddr); | ||
291 | } else { | ||
292 | - uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend; | ||
293 | + uintptr_t hostaddr = addr + entry->addend; | ||
294 | res = glue(glue(lds, SUFFIX), _p)((uint8_t *)hostaddr); | ||
295 | } | ||
296 | return res; | ||
297 | @@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, | ||
298 | target_ulong ptr, | ||
299 | RES_TYPE v, uintptr_t retaddr) | ||
300 | { | ||
301 | - int page_index; | ||
302 | + CPUTLBEntry *entry; | ||
303 | target_ulong addr; | ||
304 | int mmu_idx; | ||
305 | TCGMemOpIdx oi; | ||
306 | @@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, | ||
307 | #endif | ||
308 | |||
309 | addr = ptr; | ||
310 | - page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
311 | mmu_idx = CPU_MMU_INDEX; | ||
312 | - if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write != | ||
313 | + entry = tlb_entry(env, mmu_idx, addr); | ||
314 | + if (unlikely(entry->addr_write != | ||
315 | (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { | ||
316 | oi = make_memop_idx(SHIFT, mmu_idx); | ||
317 | glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi, | ||
318 | retaddr); | ||
319 | } else { | ||
320 | - uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend; | ||
321 | + uintptr_t hostaddr = addr + entry->addend; | ||
322 | glue(glue(st, SUFFIX), _p)((uint8_t *)hostaddr, v); | ||
323 | } | ||
324 | } | ||
325 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
326 | index XXXXXXX..XXXXXXX 100644 | ||
327 | --- a/accel/tcg/cputlb.c | ||
328 | +++ b/accel/tcg/cputlb.c | ||
329 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data) | ||
330 | { | ||
331 | CPUArchState *env = cpu->env_ptr; | ||
332 | target_ulong addr = (target_ulong) data.target_ptr; | ||
333 | - int i; | ||
334 | int mmu_idx; | ||
335 | |||
336 | assert_cpu_is_self(cpu); | ||
337 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data) | ||
338 | } | ||
339 | |||
340 | addr &= TARGET_PAGE_MASK; | ||
341 | - i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
342 | qemu_spin_lock(&env->tlb_lock); | ||
343 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
344 | - tlb_flush_entry_locked(&env->tlb_table[mmu_idx][i], addr); | ||
345 | + tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr); | ||
346 | tlb_flush_vtlb_page_locked(env, mmu_idx, addr); | ||
347 | } | ||
348 | qemu_spin_unlock(&env->tlb_lock); | ||
349 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu, | ||
350 | target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr; | ||
351 | target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK; | ||
352 | unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS; | ||
353 | - int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
354 | int mmu_idx; | ||
355 | |||
356 | assert_cpu_is_self(cpu); | ||
357 | |||
358 | - tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n", | ||
359 | - page, addr, mmu_idx_bitmap); | ||
360 | + tlb_debug("flush page addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n", | ||
361 | + addr, mmu_idx_bitmap); | ||
362 | |||
363 | qemu_spin_lock(&env->tlb_lock); | ||
364 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
365 | if (test_bit(mmu_idx, &mmu_idx_bitmap)) { | ||
366 | - tlb_flush_entry_locked(&env->tlb_table[mmu_idx][page], addr); | ||
367 | + tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr); | ||
368 | tlb_flush_vtlb_page_locked(env, mmu_idx, addr); | ||
369 | } | ||
370 | } | ||
371 | @@ -XXX,XX +XXX,XX @@ static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, | ||
372 | void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) | ||
373 | { | ||
374 | CPUArchState *env = cpu->env_ptr; | ||
375 | - int i; | ||
376 | int mmu_idx; | ||
377 | |||
378 | assert_cpu_is_self(cpu); | ||
379 | |||
380 | vaddr &= TARGET_PAGE_MASK; | ||
381 | - i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
382 | qemu_spin_lock(&env->tlb_lock); | ||
383 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
384 | - tlb_set_dirty1_locked(&env->tlb_table[mmu_idx][i], vaddr); | ||
385 | + tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); | ||
386 | } | ||
387 | |||
388 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
389 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, | ||
390 | iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page, | ||
391 | paddr_page, xlat, prot, &address); | ||
392 | |||
393 | - index = (vaddr_page >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
394 | - te = &env->tlb_table[mmu_idx][index]; | ||
395 | + index = tlb_index(env, mmu_idx, vaddr_page); | ||
396 | + te = tlb_entry(env, mmu_idx, vaddr_page); | ||
397 | |||
398 | /* | ||
399 | * Hold the TLB lock for the rest of the function. We could acquire/release | ||
400 | @@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, | ||
401 | * repeat the MMU check here. This tlb_fill() call might | ||
402 | * longjump out if this access should cause a guest exception. | ||
403 | */ | ||
404 | - int index; | ||
405 | + CPUTLBEntry *entry; | ||
406 | target_ulong tlb_addr; | ||
407 | |||
408 | tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); | ||
409 | |||
410 | - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
411 | - tlb_addr = env->tlb_table[mmu_idx][index].addr_read; | ||
412 | + entry = tlb_entry(env, mmu_idx, addr); | ||
413 | + tlb_addr = entry->addr_read; | ||
414 | if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { | ||
415 | /* RAM access */ | ||
416 | - uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend; | ||
417 | + uintptr_t haddr = addr + entry->addend; | ||
418 | |||
419 | return ldn_p((void *)haddr, size); | ||
420 | } | ||
421 | @@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, | ||
422 | * repeat the MMU check here. This tlb_fill() call might | ||
423 | * longjump out if this access should cause a guest exception. | ||
424 | */ | ||
425 | - int index; | ||
426 | + CPUTLBEntry *entry; | ||
427 | target_ulong tlb_addr; | ||
428 | |||
429 | tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr); | ||
430 | |||
431 | - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
432 | - tlb_addr = env->tlb_table[mmu_idx][index].addr_write; | ||
433 | + entry = tlb_entry(env, mmu_idx, addr); | ||
434 | + tlb_addr = entry->addr_write; | ||
435 | if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { | ||
436 | /* RAM access */ | ||
437 | - uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend; | ||
438 | + uintptr_t haddr = addr + entry->addend; | ||
439 | |||
440 | stn_p((void *)haddr, size, val); | ||
441 | return; | ||
442 | @@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, | ||
443 | */ | ||
444 | tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) | ||
445 | { | ||
446 | - int mmu_idx, index; | ||
447 | + uintptr_t mmu_idx = cpu_mmu_index(env, true); | ||
448 | + uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
449 | + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
450 | void *p; | ||
451 | |||
452 | - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
453 | - mmu_idx = cpu_mmu_index(env, true); | ||
454 | - if (unlikely(!tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr))) { | ||
455 | + if (unlikely(!tlb_hit(entry->addr_code, addr))) { | ||
456 | if (!VICTIM_TLB_HIT(addr_code, addr)) { | ||
457 | tlb_fill(ENV_GET_CPU(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); | ||
458 | } | ||
459 | - assert(tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr)); | ||
460 | + assert(tlb_hit(entry->addr_code, addr)); | ||
461 | } | ||
462 | |||
463 | - if (unlikely(env->tlb_table[mmu_idx][index].addr_code & | ||
464 | - (TLB_RECHECK | TLB_MMIO))) { | ||
465 | + if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) { | ||
466 | /* | ||
467 | * Return -1 if we can't translate and execute from an entire | ||
468 | * page of RAM here, which will cause us to execute by loading | ||
469 | @@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) | ||
470 | return -1; | ||
471 | } | ||
472 | |||
473 | - p = (void *)((uintptr_t)addr + env->tlb_table[mmu_idx][index].addend); | ||
474 | + p = (void *)((uintptr_t)addr + entry->addend); | ||
475 | return qemu_ram_addr_from_host_nofail(p); | ||
476 | } | ||
477 | |||
478 | @@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) | ||
479 | void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx, | ||
480 | uintptr_t retaddr) | ||
481 | { | ||
482 | - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
483 | - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; | ||
484 | + uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
485 | + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
486 | |||
487 | - if (!tlb_hit(tlb_addr, addr)) { | ||
488 | + if (!tlb_hit(entry->addr_write, addr)) { | ||
489 | /* TLB entry is for a different page */ | ||
490 | if (!VICTIM_TLB_HIT(addr_write, addr)) { | ||
491 | tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE, | ||
492 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, | ||
493 | NotDirtyInfo *ndi) | ||
494 | { | ||
495 | size_t mmu_idx = get_mmuidx(oi); | ||
496 | - size_t index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); | ||
497 | - CPUTLBEntry *tlbe = &env->tlb_table[mmu_idx][index]; | ||
498 | + uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
499 | + CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); | ||
500 | target_ulong tlb_addr = tlbe->addr_write; | ||
501 | TCGMemOp mop = get_memop(oi); | ||
502 | int a_bits = get_alignment_bits(mop); | ||
167 | -- | 503 | -- |
168 | 2.25.1 | 504 | 2.17.2 |
169 | 505 | ||
170 | 506 | diff view generated by jsdifflib |
1 | We had a single ATOMIC_MMU_LOOKUP macro that probed for | 1 | GCC7+ will no longer advertise support for 16-byte __atomic operations |
---|---|---|---|
2 | read+write on all atomic ops. This is incorrect for | 2 | if only cmpxchg is supported, as for x86_64. Fortunately, x86_64 still |
3 | plain atomic load and atomic store. | 3 | has support for __sync_compare_and_swap_16 and we can make use of that. |
4 | AArch64 does not have, nor ever has had such support, so open-code it. | ||
4 | 5 | ||
5 | For user-only, we rely on the host page permissions. | 6 | Reviewed-by: Emilio G. Cota <cota@braap.org> |
6 | |||
7 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/390 | ||
8 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | --- | 8 | --- |
11 | accel/tcg/atomic_template.h | 24 +++++----- | 9 | accel/tcg/atomic_template.h | 20 ++++- |
12 | accel/tcg/cputlb.c | 95 ++++++++++++++++++++++++++----------- | 10 | include/qemu/atomic128.h | 153 ++++++++++++++++++++++++++++++++++++ |
13 | accel/tcg/user-exec.c | 8 ++-- | 11 | include/qemu/compiler.h | 11 +++ |
14 | 3 files changed, 83 insertions(+), 44 deletions(-) | 12 | tcg/tcg.h | 16 ++-- |
13 | accel/tcg/cputlb.c | 3 +- | ||
14 | accel/tcg/user-exec.c | 5 +- | ||
15 | configure | 19 +++++ | ||
16 | 7 files changed, 213 insertions(+), 14 deletions(-) | ||
17 | create mode 100644 include/qemu/atomic128.h | ||
15 | 18 | ||
16 | diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h | 19 | diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h |
17 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/accel/tcg/atomic_template.h | 21 | --- a/accel/tcg/atomic_template.h |
19 | +++ b/accel/tcg/atomic_template.h | 22 | +++ b/accel/tcg/atomic_template.h |
20 | @@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, | 23 | @@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, |
21 | ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS) | ||
22 | { | ||
23 | ATOMIC_MMU_DECLS; | ||
24 | - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; | ||
25 | + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; | ||
26 | DATA_TYPE ret; | 24 | DATA_TYPE ret; |
27 | uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, | 25 | |
28 | ATOMIC_MMU_IDX); | 26 | ATOMIC_TRACE_RMW; |
29 | @@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, | 27 | +#if DATA_SIZE == 16 |
28 | + ret = atomic16_cmpxchg(haddr, cmpv, newv); | ||
29 | +#else | ||
30 | ret = atomic_cmpxchg__nocheck(haddr, cmpv, newv); | ||
31 | +#endif | ||
32 | ATOMIC_MMU_CLEANUP; | ||
33 | return ret; | ||
34 | } | ||
35 | |||
36 | #if DATA_SIZE >= 16 | ||
37 | +#if HAVE_ATOMIC128 | ||
30 | ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) | 38 | ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) |
31 | { | 39 | { |
32 | ATOMIC_MMU_DECLS; | 40 | ATOMIC_MMU_DECLS; |
33 | - DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; | 41 | DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; |
34 | + DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP_R; | 42 | |
35 | uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, | 43 | ATOMIC_TRACE_LD; |
36 | ATOMIC_MMU_IDX); | 44 | - __atomic_load(haddr, &val, __ATOMIC_RELAXED); |
37 | 45 | + val = atomic16_read(haddr); | |
46 | ATOMIC_MMU_CLEANUP; | ||
47 | return val; | ||
48 | } | ||
38 | @@ -XXX,XX +XXX,XX @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, | 49 | @@ -XXX,XX +XXX,XX @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, |
39 | ABI_TYPE val EXTRA_ARGS) | 50 | DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; |
40 | { | 51 | |
41 | ATOMIC_MMU_DECLS; | 52 | ATOMIC_TRACE_ST; |
42 | - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; | 53 | - __atomic_store(haddr, &val, __ATOMIC_RELAXED); |
43 | + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_W; | 54 | + atomic16_set(haddr, val); |
44 | uint16_t info = trace_mem_build_info(SHIFT, false, 0, true, | 55 | ATOMIC_MMU_CLEANUP; |
45 | ATOMIC_MMU_IDX); | 56 | } |
46 | 57 | +#endif | |
47 | @@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, | 58 | #else |
59 | ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, | ||
48 | ABI_TYPE val EXTRA_ARGS) | 60 | ABI_TYPE val EXTRA_ARGS) |
49 | { | 61 | @@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, |
50 | ATOMIC_MMU_DECLS; | ||
51 | - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; | ||
52 | + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; | ||
53 | DATA_TYPE ret; | 62 | DATA_TYPE ret; |
54 | uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, | 63 | |
55 | ATOMIC_MMU_IDX); | 64 | ATOMIC_TRACE_RMW; |
56 | @@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ | 65 | +#if DATA_SIZE == 16 |
57 | ABI_TYPE val EXTRA_ARGS) \ | 66 | + ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv)); |
58 | { \ | 67 | +#else |
59 | ATOMIC_MMU_DECLS; \ | 68 | ret = atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)); |
60 | - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ | 69 | +#endif |
61 | + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; \ | 70 | ATOMIC_MMU_CLEANUP; |
62 | DATA_TYPE ret; \ | 71 | return BSWAP(ret); |
63 | uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, \ | 72 | } |
64 | ATOMIC_MMU_IDX); \ | 73 | |
65 | @@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ | 74 | #if DATA_SIZE >= 16 |
66 | ABI_TYPE xval EXTRA_ARGS) \ | 75 | +#if HAVE_ATOMIC128 |
67 | { \ | ||
68 | ATOMIC_MMU_DECLS; \ | ||
69 | - XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ | ||
70 | + XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; \ | ||
71 | XDATA_TYPE cmp, old, new, val = xval; \ | ||
72 | uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, \ | ||
73 | ATOMIC_MMU_IDX); \ | ||
74 | @@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, | ||
75 | ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS) | ||
76 | { | ||
77 | ATOMIC_MMU_DECLS; | ||
78 | - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; | ||
79 | + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; | ||
80 | DATA_TYPE ret; | ||
81 | uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, false, | ||
82 | ATOMIC_MMU_IDX); | ||
83 | @@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, | ||
84 | ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) | 76 | ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) |
85 | { | 77 | { |
86 | ATOMIC_MMU_DECLS; | 78 | ATOMIC_MMU_DECLS; |
87 | - DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; | 79 | DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; |
88 | + DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP_R; | 80 | |
89 | uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, false, | 81 | ATOMIC_TRACE_LD; |
90 | ATOMIC_MMU_IDX); | 82 | - __atomic_load(haddr, &val, __ATOMIC_RELAXED); |
91 | 83 | + val = atomic16_read(haddr); | |
84 | ATOMIC_MMU_CLEANUP; | ||
85 | return BSWAP(val); | ||
86 | } | ||
92 | @@ -XXX,XX +XXX,XX @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, | 87 | @@ -XXX,XX +XXX,XX @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, |
93 | ABI_TYPE val EXTRA_ARGS) | 88 | |
94 | { | 89 | ATOMIC_TRACE_ST; |
95 | ATOMIC_MMU_DECLS; | 90 | val = BSWAP(val); |
96 | - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; | 91 | - __atomic_store(haddr, &val, __ATOMIC_RELAXED); |
97 | + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_W; | 92 | + atomic16_set(haddr, val); |
98 | uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, true, | 93 | ATOMIC_MMU_CLEANUP; |
99 | ATOMIC_MMU_IDX); | 94 | } |
100 | 95 | +#endif | |
101 | @@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, | 96 | #else |
97 | ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, | ||
102 | ABI_TYPE val EXTRA_ARGS) | 98 | ABI_TYPE val EXTRA_ARGS) |
103 | { | 99 | diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h |
104 | ATOMIC_MMU_DECLS; | 100 | new file mode 100644 |
105 | - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; | 101 | index XXXXXXX..XXXXXXX |
106 | + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; | 102 | --- /dev/null |
107 | ABI_TYPE ret; | 103 | +++ b/include/qemu/atomic128.h |
108 | uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, false, | 104 | @@ -XXX,XX +XXX,XX @@ |
109 | ATOMIC_MMU_IDX); | 105 | +/* |
110 | @@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ | 106 | + * Simple interface for 128-bit atomic operations. |
111 | ABI_TYPE val EXTRA_ARGS) \ | 107 | + * |
112 | { \ | 108 | + * Copyright (C) 2018 Linaro, Ltd. |
113 | ATOMIC_MMU_DECLS; \ | 109 | + * |
114 | - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ | 110 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. |
115 | + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; \ | 111 | + * See the COPYING file in the top-level directory. |
116 | DATA_TYPE ret; \ | 112 | + * |
117 | uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, \ | 113 | + * See docs/devel/atomics.txt for discussion about the guarantees each |
118 | false, ATOMIC_MMU_IDX); \ | 114 | + * atomic primitive is meant to provide. |
119 | @@ -XXX,XX +XXX,XX @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ | 115 | + */ |
120 | ABI_TYPE xval EXTRA_ARGS) \ | 116 | + |
121 | { \ | 117 | +#ifndef QEMU_ATOMIC128_H |
122 | ATOMIC_MMU_DECLS; \ | 118 | +#define QEMU_ATOMIC128_H |
123 | - XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ | 119 | + |
124 | + XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; \ | 120 | +/* |
125 | XDATA_TYPE ldo, ldn, old, new, val = xval; \ | 121 | + * GCC is a house divided about supporting large atomic operations. |
126 | uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, \ | 122 | + * |
127 | false, ATOMIC_MMU_IDX); \ | 123 | + * For hosts that only have large compare-and-swap, a legalistic reading |
124 | + * of the C++ standard means that one cannot implement __atomic_read on | ||
125 | + * read-only memory, and thus all atomic operations must synchronize | ||
126 | + * through libatomic. | ||
127 | + * | ||
128 | + * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878 | ||
129 | + * | ||
130 | + * This interpretation is not especially helpful for QEMU. | ||
131 | + * For softmmu, all RAM is always read/write from the hypervisor. | ||
132 | + * For user-only, if the guest doesn't implement such an __atomic_read | ||
133 | + * then the host need not worry about it either. | ||
134 | + * | ||
135 | + * Moreover, using libatomic is not an option, because its interface is | ||
136 | + * built for std::atomic<T>, and requires that *all* accesses to such an | ||
137 | + * object go through the library. In our case we do not have an object | ||
138 | + * in the C/C++ sense, but a view of memory as seen by the guest. | ||
139 | + * The guest may issue a large atomic operation and then access those | ||
140 | + * pieces using word-sized accesses. From the hypervisor, we have no | ||
141 | + * way to connect those two actions. | ||
142 | + * | ||
143 | + * Therefore, special case each platform. | ||
144 | + */ | ||
145 | + | ||
146 | +#if defined(CONFIG_ATOMIC128) | ||
147 | +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) | ||
148 | +{ | ||
149 | + return atomic_cmpxchg__nocheck(ptr, cmp, new); | ||
150 | +} | ||
151 | +# define HAVE_CMPXCHG128 1 | ||
152 | +#elif defined(CONFIG_CMPXCHG128) | ||
153 | +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) | ||
154 | +{ | ||
155 | + return __sync_val_compare_and_swap_16(ptr, cmp, new); | ||
156 | +} | ||
157 | +# define HAVE_CMPXCHG128 1 | ||
158 | +#elif defined(__aarch64__) | ||
159 | +/* Through gcc 8, aarch64 has no support for 128-bit at all. */ | ||
160 | +static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) | ||
161 | +{ | ||
162 | + uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp); | ||
163 | + uint64_t newl = int128_getlo(new), newh = int128_gethi(new); | ||
164 | + uint64_t oldl, oldh; | ||
165 | + uint32_t tmp; | ||
166 | + | ||
167 | + asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t" | ||
168 | + "cmp %[oldl], %[cmpl]\n\t" | ||
169 | + "ccmp %[oldh], %[cmph], #0, eq\n\t" | ||
170 | + "b.ne 1f\n\t" | ||
171 | + "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t" | ||
172 | + "cbnz %w[tmp], 0b\n" | ||
173 | + "1:" | ||
174 | + : [mem] "+m"(*ptr), [tmp] "=&r"(tmp), | ||
175 | + [oldl] "=&r"(oldl), [oldh] "=r"(oldh) | ||
176 | + : [cmpl] "r"(cmpl), [cmph] "r"(cmph), | ||
177 | + [newl] "r"(newl), [newh] "r"(newh) | ||
178 | + : "memory", "cc"); | ||
179 | + | ||
180 | + return int128_make128(oldl, oldh); | ||
181 | +} | ||
182 | +# define HAVE_CMPXCHG128 1 | ||
183 | +#else | ||
184 | +/* Fallback definition that must be optimized away, or error. */ | ||
185 | +Int128 QEMU_ERROR("unsupported atomic") | ||
186 | + atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new); | ||
187 | +# define HAVE_CMPXCHG128 0 | ||
188 | +#endif /* Some definition for HAVE_CMPXCHG128 */ | ||
189 | + | ||
190 | + | ||
191 | +#if defined(CONFIG_ATOMIC128) | ||
192 | +static inline Int128 atomic16_read(Int128 *ptr) | ||
193 | +{ | ||
194 | + return atomic_read__nocheck(ptr); | ||
195 | +} | ||
196 | + | ||
197 | +static inline void atomic16_set(Int128 *ptr, Int128 val) | ||
198 | +{ | ||
199 | + atomic_set__nocheck(ptr, val); | ||
200 | +} | ||
201 | + | ||
202 | +# define HAVE_ATOMIC128 1 | ||
203 | +#elif !defined(CONFIG_USER_ONLY) && defined(__aarch64__) | ||
204 | +/* We can do better than cmpxchg for AArch64. */ | ||
205 | +static inline Int128 atomic16_read(Int128 *ptr) | ||
206 | +{ | ||
207 | + uint64_t l, h; | ||
208 | + uint32_t tmp; | ||
209 | + | ||
210 | + /* The load must be paired with the store to guarantee not tearing. */ | ||
211 | + asm("0: ldxp %[l], %[h], %[mem]\n\t" | ||
212 | + "stxp %w[tmp], %[l], %[h], %[mem]\n\t" | ||
213 | + "cbnz %w[tmp], 0b" | ||
214 | + : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h)); | ||
215 | + | ||
216 | + return int128_make128(l, h); | ||
217 | +} | ||
218 | + | ||
219 | +static inline void atomic16_set(Int128 *ptr, Int128 val) | ||
220 | +{ | ||
221 | + uint64_t l = int128_getlo(val), h = int128_gethi(val); | ||
222 | + uint64_t t1, t2; | ||
223 | + | ||
224 | + /* Load into temporaries to acquire the exclusive access lock. */ | ||
225 | + asm("0: ldxp %[t1], %[t2], %[mem]\n\t" | ||
226 | + "stxp %w[t1], %[l], %[h], %[mem]\n\t" | ||
227 | + "cbnz %w[t1], 0b" | ||
228 | + : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2) | ||
229 | + : [l] "r"(l), [h] "r"(h)); | ||
230 | +} | ||
231 | + | ||
232 | +# define HAVE_ATOMIC128 1 | ||
233 | +#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128 | ||
234 | +static inline Int128 atomic16_read(Int128 *ptr) | ||
235 | +{ | ||
236 | + /* Maybe replace 0 with 0, returning the old value. */ | ||
237 | + return atomic16_cmpxchg(ptr, 0, 0); | ||
238 | +} | ||
239 | + | ||
240 | +static inline void atomic16_set(Int128 *ptr, Int128 val) | ||
241 | +{ | ||
242 | + Int128 old = *ptr, cmp; | ||
243 | + do { | ||
244 | + cmp = old; | ||
245 | + old = atomic16_cmpxchg(ptr, cmp, val); | ||
246 | + } while (old != cmp); | ||
247 | +} | ||
248 | + | ||
249 | +# define HAVE_ATOMIC128 1 | ||
250 | +#else | ||
251 | +/* Fallback definitions that must be optimized away, or error. */ | ||
252 | +Int128 QEMU_ERROR("unsupported atomic") atomic16_read(Int128 *ptr); | ||
253 | +void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val); | ||
254 | +# define HAVE_ATOMIC128 0 | ||
255 | +#endif /* Some definition for HAVE_ATOMIC128 */ | ||
256 | + | ||
257 | +#endif /* QEMU_ATOMIC128_H */ | ||
258 | diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h | ||
259 | index XXXXXXX..XXXXXXX 100644 | ||
260 | --- a/include/qemu/compiler.h | ||
261 | +++ b/include/qemu/compiler.h | ||
262 | @@ -XXX,XX +XXX,XX @@ | ||
263 | # define QEMU_FLATTEN | ||
264 | #endif | ||
265 | |||
266 | +/* | ||
267 | + * If __attribute__((error)) is present, use it to produce an error at | ||
268 | + * compile time. Otherwise, one must wait for the linker to diagnose | ||
269 | + * the missing symbol. | ||
270 | + */ | ||
271 | +#if __has_attribute(error) | ||
272 | +# define QEMU_ERROR(X) __attribute__((error(X))) | ||
273 | +#else | ||
274 | +# define QEMU_ERROR(X) | ||
275 | +#endif | ||
276 | + | ||
277 | /* Implement C11 _Generic via GCC builtins. Example: | ||
278 | * | ||
279 | * QEMU_GENERIC(x, (float, sinf), (long double, sinl), sin) (x) | ||
280 | diff --git a/tcg/tcg.h b/tcg/tcg.h | ||
281 | index XXXXXXX..XXXXXXX 100644 | ||
282 | --- a/tcg/tcg.h | ||
283 | +++ b/tcg/tcg.h | ||
284 | @@ -XXX,XX +XXX,XX @@ | ||
285 | #include "qemu/queue.h" | ||
286 | #include "tcg-mo.h" | ||
287 | #include "tcg-target.h" | ||
288 | +#include "qemu/int128.h" | ||
289 | |||
290 | /* XXX: make safe guess about sizes */ | ||
291 | #define MAX_OP_PER_INSTR 266 | ||
292 | @@ -XXX,XX +XXX,XX @@ GEN_ATOMIC_HELPER_ALL(xchg) | ||
293 | #undef GEN_ATOMIC_HELPER | ||
294 | #endif /* CONFIG_SOFTMMU */ | ||
295 | |||
296 | -#ifdef CONFIG_ATOMIC128 | ||
297 | -#include "qemu/int128.h" | ||
298 | - | ||
299 | -/* These aren't really a "proper" helpers because TCG cannot manage Int128. | ||
300 | - However, use the same format as the others, for use by the backends. */ | ||
301 | +/* | ||
302 | + * These aren't really a "proper" helpers because TCG cannot manage Int128. | ||
303 | + * However, use the same format as the others, for use by the backends. | ||
304 | + * | ||
305 | + * The cmpxchg functions are only defined if HAVE_CMPXCHG128; | ||
306 | + * the ld/st functions are only defined if HAVE_ATOMIC128, | ||
307 | + * as defined by <qemu/atomic128.h>. | ||
308 | + */ | ||
309 | Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr, | ||
310 | Int128 cmpv, Int128 newv, | ||
311 | TCGMemOpIdx oi, uintptr_t retaddr); | ||
312 | @@ -XXX,XX +XXX,XX @@ void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val, | ||
313 | void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val, | ||
314 | TCGMemOpIdx oi, uintptr_t retaddr); | ||
315 | |||
316 | -#endif /* CONFIG_ATOMIC128 */ | ||
317 | - | ||
318 | #endif /* TCG_H */ | ||
128 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | 319 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c |
129 | index XXXXXXX..XXXXXXX 100644 | 320 | index XXXXXXX..XXXXXXX 100644 |
130 | --- a/accel/tcg/cputlb.c | 321 | --- a/accel/tcg/cputlb.c |
131 | +++ b/accel/tcg/cputlb.c | 322 | +++ b/accel/tcg/cputlb.c |
132 | @@ -XXX,XX +XXX,XX @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, | 323 | @@ -XXX,XX +XXX,XX @@ |
133 | 324 | #include "exec/log.h" | |
325 | #include "exec/helper-proto.h" | ||
326 | #include "qemu/atomic.h" | ||
327 | +#include "qemu/atomic128.h" | ||
328 | |||
329 | /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ | ||
330 | /* #define DEBUG_TLB */ | ||
331 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, | ||
332 | #include "atomic_template.h" | ||
134 | #endif | 333 | #endif |
135 | 334 | ||
136 | -/* Probe for a read-modify-write atomic operation. Do not allow unaligned | 335 | -#ifdef CONFIG_ATOMIC128 |
137 | - * operations, or io operations to proceed. Return the host address. */ | 336 | +#if HAVE_CMPXCHG128 || HAVE_ATOMIC128 |
138 | +/* | 337 | #define DATA_SIZE 16 |
139 | + * Probe for an atomic operation. Do not allow unaligned operations, | ||
140 | + * or io operations to proceed. Return the host address. | ||
141 | + * | ||
142 | + * @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE. | ||
143 | + */ | ||
144 | static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, | ||
145 | - TCGMemOpIdx oi, uintptr_t retaddr) | ||
146 | + TCGMemOpIdx oi, int size, int prot, | ||
147 | + uintptr_t retaddr) | ||
148 | { | ||
149 | size_t mmu_idx = get_mmuidx(oi); | ||
150 | - uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
151 | - CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); | ||
152 | - target_ulong tlb_addr = tlb_addr_write(tlbe); | ||
153 | MemOp mop = get_memop(oi); | ||
154 | int a_bits = get_alignment_bits(mop); | ||
155 | - int s_bits = mop & MO_SIZE; | ||
156 | + uintptr_t index; | ||
157 | + CPUTLBEntry *tlbe; | ||
158 | + target_ulong tlb_addr; | ||
159 | void *hostaddr; | ||
160 | |||
161 | /* Adjust the given return address. */ | ||
162 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, | ||
163 | } | ||
164 | |||
165 | /* Enforce qemu required alignment. */ | ||
166 | - if (unlikely(addr & ((1 << s_bits) - 1))) { | ||
167 | + if (unlikely(addr & (size - 1))) { | ||
168 | /* We get here if guest alignment was not requested, | ||
169 | or was not enforced by cpu_unaligned_access above. | ||
170 | We might widen the access and emulate, but for now | ||
171 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, | ||
172 | goto stop_the_world; | ||
173 | } | ||
174 | |||
175 | + index = tlb_index(env, mmu_idx, addr); | ||
176 | + tlbe = tlb_entry(env, mmu_idx, addr); | ||
177 | + | ||
178 | /* Check TLB entry and enforce page permissions. */ | ||
179 | - if (!tlb_hit(tlb_addr, addr)) { | ||
180 | - if (!VICTIM_TLB_HIT(addr_write, addr)) { | ||
181 | - tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, | ||
182 | - mmu_idx, retaddr); | ||
183 | - index = tlb_index(env, mmu_idx, addr); | ||
184 | - tlbe = tlb_entry(env, mmu_idx, addr); | ||
185 | + if (prot & PAGE_WRITE) { | ||
186 | + tlb_addr = tlb_addr_write(tlbe); | ||
187 | + if (!tlb_hit(tlb_addr, addr)) { | ||
188 | + if (!VICTIM_TLB_HIT(addr_write, addr)) { | ||
189 | + tlb_fill(env_cpu(env), addr, size, | ||
190 | + MMU_DATA_STORE, mmu_idx, retaddr); | ||
191 | + index = tlb_index(env, mmu_idx, addr); | ||
192 | + tlbe = tlb_entry(env, mmu_idx, addr); | ||
193 | + } | ||
194 | + tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; | ||
195 | + } | ||
196 | + | ||
197 | + /* Let the guest notice RMW on a write-only page. */ | ||
198 | + if ((prot & PAGE_READ) && | ||
199 | + unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { | ||
200 | + tlb_fill(env_cpu(env), addr, size, | ||
201 | + MMU_DATA_LOAD, mmu_idx, retaddr); | ||
202 | + /* | ||
203 | + * Since we don't support reads and writes to different addresses, | ||
204 | + * and we do have the proper page loaded for write, this shouldn't | ||
205 | + * ever return. But just in case, handle via stop-the-world. | ||
206 | + */ | ||
207 | + goto stop_the_world; | ||
208 | + } | ||
209 | + } else /* if (prot & PAGE_READ) */ { | ||
210 | + tlb_addr = tlbe->addr_read; | ||
211 | + if (!tlb_hit(tlb_addr, addr)) { | ||
212 | + if (!VICTIM_TLB_HIT(addr_write, addr)) { | ||
213 | + tlb_fill(env_cpu(env), addr, size, | ||
214 | + MMU_DATA_LOAD, mmu_idx, retaddr); | ||
215 | + index = tlb_index(env, mmu_idx, addr); | ||
216 | + tlbe = tlb_entry(env, mmu_idx, addr); | ||
217 | + } | ||
218 | + tlb_addr = tlbe->addr_read & ~TLB_INVALID_MASK; | ||
219 | } | ||
220 | - tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; | ||
221 | } | ||
222 | |||
223 | /* Notice an IO access or a needs-MMU-lookup access */ | ||
224 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, | ||
225 | goto stop_the_world; | ||
226 | } | ||
227 | |||
228 | - /* Let the guest notice RMW on a write-only page. */ | ||
229 | - if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { | ||
230 | - tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, | ||
231 | - mmu_idx, retaddr); | ||
232 | - /* Since we don't support reads and writes to different addresses, | ||
233 | - and we do have the proper page loaded for write, this shouldn't | ||
234 | - ever return. But just in case, handle via stop-the-world. */ | ||
235 | - goto stop_the_world; | ||
236 | - } | ||
237 | - | ||
238 | hostaddr = (void *)((uintptr_t)addr + tlbe->addend); | ||
239 | |||
240 | if (unlikely(tlb_addr & TLB_NOTDIRTY)) { | ||
241 | - notdirty_write(env_cpu(env), addr, 1 << s_bits, | ||
242 | + notdirty_write(env_cpu(env), addr, size, | ||
243 | &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); | ||
244 | } | ||
245 | |||
246 | @@ -XXX,XX +XXX,XX @@ void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val) | ||
247 | #define ATOMIC_NAME(X) \ | ||
248 | HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) | ||
249 | #define ATOMIC_MMU_DECLS | ||
250 | -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) | ||
251 | +#define ATOMIC_MMU_LOOKUP_RW \ | ||
252 | + atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_READ | PAGE_WRITE, retaddr) | ||
253 | +#define ATOMIC_MMU_LOOKUP_R \ | ||
254 | + atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_READ, retaddr) | ||
255 | +#define ATOMIC_MMU_LOOKUP_W \ | ||
256 | + atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_WRITE, retaddr) | ||
257 | #define ATOMIC_MMU_CLEANUP | ||
258 | #define ATOMIC_MMU_IDX get_mmuidx(oi) | ||
259 | |||
260 | @@ -XXX,XX +XXX,XX @@ void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val) | ||
261 | |||
262 | #undef EXTRA_ARGS | ||
263 | #undef ATOMIC_NAME | ||
264 | -#undef ATOMIC_MMU_LOOKUP | ||
265 | +#undef ATOMIC_MMU_LOOKUP_RW | ||
266 | +#undef ATOMIC_MMU_LOOKUP_R | ||
267 | +#undef ATOMIC_MMU_LOOKUP_W | ||
268 | + | ||
269 | #define EXTRA_ARGS , TCGMemOpIdx oi | ||
270 | #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) | ||
271 | -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) | ||
272 | +#define ATOMIC_MMU_LOOKUP_RW \ | ||
273 | + atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_READ | PAGE_WRITE, GETPC()) | ||
274 | +#define ATOMIC_MMU_LOOKUP_R \ | ||
275 | + atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_READ, GETPC()) | ||
276 | +#define ATOMIC_MMU_LOOKUP_W \ | ||
277 | + atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_WRITE, GETPC()) | ||
278 | |||
279 | #define DATA_SIZE 1 | ||
280 | #include "atomic_template.h" | 338 | #include "atomic_template.h" |
339 | #endif | ||
281 | diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c | 340 | diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c |
282 | index XXXXXXX..XXXXXXX 100644 | 341 | index XXXXXXX..XXXXXXX 100644 |
283 | --- a/accel/tcg/user-exec.c | 342 | --- a/accel/tcg/user-exec.c |
284 | +++ b/accel/tcg/user-exec.c | 343 | +++ b/accel/tcg/user-exec.c |
344 | @@ -XXX,XX +XXX,XX @@ | ||
345 | #include "exec/cpu_ldst.h" | ||
346 | #include "translate-all.h" | ||
347 | #include "exec/helper-proto.h" | ||
348 | +#include "qemu/atomic128.h" | ||
349 | |||
350 | #undef EAX | ||
351 | #undef ECX | ||
285 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, | 352 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, |
286 | 353 | /* The following is only callable from other helpers, and matches up | |
287 | /* Macro to call the above, with local variables from the use context. */ | 354 | with the softmmu version. */ |
288 | #define ATOMIC_MMU_DECLS do {} while (0) | 355 | |
289 | -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC()) | 356 | -#ifdef CONFIG_ATOMIC128 |
290 | +#define ATOMIC_MMU_LOOKUP_RW atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC()) | 357 | +#if HAVE_ATOMIC128 || HAVE_CMPXCHG128 |
291 | +#define ATOMIC_MMU_LOOKUP_R ATOMIC_MMU_LOOKUP_RW | ||
292 | +#define ATOMIC_MMU_LOOKUP_W ATOMIC_MMU_LOOKUP_RW | ||
293 | #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0) | ||
294 | #define ATOMIC_MMU_IDX MMU_USER_IDX | ||
295 | |||
296 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, | ||
297 | 358 | ||
298 | #undef EXTRA_ARGS | 359 | #undef EXTRA_ARGS |
299 | #undef ATOMIC_NAME | 360 | #undef ATOMIC_NAME |
300 | -#undef ATOMIC_MMU_LOOKUP | 361 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, |
301 | +#undef ATOMIC_MMU_LOOKUP_RW | ||
302 | |||
303 | #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr | ||
304 | #define ATOMIC_NAME(X) \ | ||
305 | HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) | ||
306 | -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, DATA_SIZE, retaddr) | ||
307 | +#define ATOMIC_MMU_LOOKUP_RW atomic_mmu_lookup(env, addr, DATA_SIZE, retaddr) | ||
308 | 362 | ||
309 | #define DATA_SIZE 16 | 363 | #define DATA_SIZE 16 |
310 | #include "atomic_template.h" | 364 | #include "atomic_template.h" |
365 | -#endif /* CONFIG_ATOMIC128 */ | ||
366 | +#endif | ||
367 | diff --git a/configure b/configure | ||
368 | index XXXXXXX..XXXXXXX 100755 | ||
369 | --- a/configure | ||
370 | +++ b/configure | ||
371 | @@ -XXX,XX +XXX,XX @@ EOF | ||
372 | fi | ||
373 | fi | ||
374 | |||
375 | +cmpxchg128=no | ||
376 | +if test "$int128" = yes -a "$atomic128" = no; then | ||
377 | + cat > $TMPC << EOF | ||
378 | +int main(void) | ||
379 | +{ | ||
380 | + unsigned __int128 x = 0, y = 0; | ||
381 | + __sync_val_compare_and_swap_16(&x, y, x); | ||
382 | + return 0; | ||
383 | +} | ||
384 | +EOF | ||
385 | + if compile_prog "" "" ; then | ||
386 | + cmpxchg128=yes | ||
387 | + fi | ||
388 | +fi | ||
389 | + | ||
390 | ######################################### | ||
391 | # See if 64-bit atomic operations are supported. | ||
392 | # Note that without __atomic builtins, we can only | ||
393 | @@ -XXX,XX +XXX,XX @@ if test "$atomic128" = "yes" ; then | ||
394 | echo "CONFIG_ATOMIC128=y" >> $config_host_mak | ||
395 | fi | ||
396 | |||
397 | +if test "$cmpxchg128" = "yes" ; then | ||
398 | + echo "CONFIG_CMPXCHG128=y" >> $config_host_mak | ||
399 | +fi | ||
400 | + | ||
401 | if test "$atomic64" = "yes" ; then | ||
402 | echo "CONFIG_ATOMIC64=y" >> $config_host_mak | ||
403 | fi | ||
311 | -- | 404 | -- |
312 | 2.25.1 | 405 | 2.17.2 |
313 | 406 | ||
314 | 407 | diff view generated by jsdifflib |
1 | As noted by qemu-plugins.h, plugins can neither read nor write | 1 | Reviewed-by: Emilio G. Cota <cota@braap.org> |
---|---|---|---|
2 | guest registers. | 2 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
3 | |||
4 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | 4 | --- |
8 | accel/tcg/plugin-helpers.h | 4 ++-- | 5 | target/i386/mem_helper.c | 9 ++++----- |
9 | 1 file changed, 2 insertions(+), 2 deletions(-) | 6 | 1 file changed, 4 insertions(+), 5 deletions(-) |
10 | 7 | ||
11 | diff --git a/accel/tcg/plugin-helpers.h b/accel/tcg/plugin-helpers.h | 8 | diff --git a/target/i386/mem_helper.c b/target/i386/mem_helper.c |
12 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/accel/tcg/plugin-helpers.h | 10 | --- a/target/i386/mem_helper.c |
14 | +++ b/accel/tcg/plugin-helpers.h | 11 | +++ b/target/i386/mem_helper.c |
15 | @@ -XXX,XX +XXX,XX @@ | 12 | @@ -XXX,XX +XXX,XX @@ |
16 | #ifdef CONFIG_PLUGIN | 13 | #include "exec/exec-all.h" |
17 | -DEF_HELPER_2(plugin_vcpu_udata_cb, void, i32, ptr) | 14 | #include "exec/cpu_ldst.h" |
18 | -DEF_HELPER_4(plugin_vcpu_mem_cb, void, i32, i32, i64, ptr) | 15 | #include "qemu/int128.h" |
19 | +DEF_HELPER_FLAGS_2(plugin_vcpu_udata_cb, TCG_CALL_NO_RWG, void, i32, ptr) | 16 | +#include "qemu/atomic128.h" |
20 | +DEF_HELPER_FLAGS_4(plugin_vcpu_mem_cb, TCG_CALL_NO_RWG, void, i32, i32, i64, ptr) | 17 | #include "tcg.h" |
18 | |||
19 | void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0) | ||
20 | @@ -XXX,XX +XXX,XX @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) | ||
21 | |||
22 | if ((a0 & 0xf) != 0) { | ||
23 | raise_exception_ra(env, EXCP0D_GPF, ra); | ||
24 | - } else { | ||
25 | -#ifndef CONFIG_ATOMIC128 | ||
26 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
27 | -#else | ||
28 | + } else if (HAVE_CMPXCHG128) { | ||
29 | int eflags = cpu_cc_compute_all(env, CC_OP); | ||
30 | |||
31 | Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]); | ||
32 | @@ -XXX,XX +XXX,XX @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) | ||
33 | eflags &= ~CC_Z; | ||
34 | } | ||
35 | CC_SRC = eflags; | ||
36 | -#endif | ||
37 | + } else { | ||
38 | + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
39 | } | ||
40 | } | ||
21 | #endif | 41 | #endif |
22 | -- | 42 | -- |
23 | 2.25.1 | 43 | 2.17.2 |
24 | 44 | ||
25 | 45 | diff view generated by jsdifflib |
1 | When this opcode is not available in the backend, tcg middle-end | 1 | Reviewed-by: Emilio G. Cota <cota@braap.org> |
---|---|---|---|
2 | will expand this as a series of 5 opcodes. So implementing this | ||
3 | saves bytecode space. | ||
4 | |||
5 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 3 | --- |
9 | tcg/tci/tcg-target.h | 4 ++-- | 4 | target/arm/helper-a64.c | 259 +++++++++++++++++++++------------------- |
10 | tcg/tci.c | 16 +++++++++++++++- | 5 | 1 file changed, 133 insertions(+), 126 deletions(-) |
11 | tcg/tci/tcg-target.c.inc | 10 +++++++--- | ||
12 | 3 files changed, 24 insertions(+), 6 deletions(-) | ||
13 | 6 | ||
14 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | 7 | diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c |
15 | index XXXXXXX..XXXXXXX 100644 | 8 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/tcg/tci/tcg-target.h | 9 | --- a/target/arm/helper-a64.c |
17 | +++ b/tcg/tci/tcg-target.h | 10 | +++ b/target/arm/helper-a64.c |
18 | @@ -XXX,XX +XXX,XX @@ | 11 | @@ -XXX,XX +XXX,XX @@ |
19 | #define TCG_TARGET_HAS_not_i32 1 | 12 | #include "exec/exec-all.h" |
20 | #define TCG_TARGET_HAS_orc_i32 0 | 13 | #include "exec/cpu_ldst.h" |
21 | #define TCG_TARGET_HAS_rot_i32 1 | 14 | #include "qemu/int128.h" |
22 | -#define TCG_TARGET_HAS_movcond_i32 0 | 15 | +#include "qemu/atomic128.h" |
23 | +#define TCG_TARGET_HAS_movcond_i32 1 | 16 | #include "tcg.h" |
24 | #define TCG_TARGET_HAS_muls2_i32 0 | 17 | #include "fpu/softfloat.h" |
25 | #define TCG_TARGET_HAS_muluh_i32 0 | 18 | #include <zlib.h> /* For crc32 */ |
26 | #define TCG_TARGET_HAS_mulsh_i32 0 | 19 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) |
27 | @@ -XXX,XX +XXX,XX @@ | 20 | return crc32c(acc, buf, bytes) ^ 0xffffffff; |
28 | #define TCG_TARGET_HAS_not_i64 1 | 21 | } |
29 | #define TCG_TARGET_HAS_orc_i64 0 | 22 | |
30 | #define TCG_TARGET_HAS_rot_i64 1 | 23 | -/* Returns 0 on success; 1 otherwise. */ |
31 | -#define TCG_TARGET_HAS_movcond_i64 0 | 24 | -static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr, |
32 | +#define TCG_TARGET_HAS_movcond_i64 1 | 25 | - uint64_t new_lo, uint64_t new_hi, |
33 | #define TCG_TARGET_HAS_muls2_i64 0 | 26 | - bool parallel, uintptr_t ra) |
34 | #define TCG_TARGET_HAS_add2_i32 0 | 27 | +uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, |
35 | #define TCG_TARGET_HAS_sub2_i32 0 | 28 | + uint64_t new_lo, uint64_t new_hi) |
36 | diff --git a/tcg/tci.c b/tcg/tci.c | 29 | { |
37 | index XXXXXXX..XXXXXXX 100644 | 30 | - Int128 oldv, cmpv, newv; |
38 | --- a/tcg/tci.c | 31 | + Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); |
39 | +++ b/tcg/tci.c | 32 | + Int128 newv = int128_make128(new_lo, new_hi); |
40 | @@ -XXX,XX +XXX,XX @@ static void tci_args_rrrr(uint32_t insn, | 33 | + Int128 oldv; |
41 | *r2 = extract32(insn, 16, 4); | 34 | + uintptr_t ra = GETPC(); |
42 | *r3 = extract32(insn, 20, 4); | 35 | + uint64_t o0, o1; |
43 | } | 36 | bool success; |
37 | |||
38 | - cmpv = int128_make128(env->exclusive_val, env->exclusive_high); | ||
39 | - newv = int128_make128(new_lo, new_hi); | ||
40 | - | ||
41 | - if (parallel) { | ||
42 | -#ifndef CONFIG_ATOMIC128 | ||
43 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
44 | -#else | ||
45 | - int mem_idx = cpu_mmu_index(env, false); | ||
46 | - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
47 | - oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); | ||
48 | - success = int128_eq(oldv, cmpv); | ||
49 | -#endif | ||
50 | - } else { | ||
51 | - uint64_t o0, o1; | ||
52 | - | ||
53 | #ifdef CONFIG_USER_ONLY | ||
54 | - /* ??? Enforce alignment. */ | ||
55 | - uint64_t *haddr = g2h(addr); | ||
56 | + /* ??? Enforce alignment. */ | ||
57 | + uint64_t *haddr = g2h(addr); | ||
58 | |||
59 | - helper_retaddr = ra; | ||
60 | - o0 = ldq_le_p(haddr + 0); | ||
61 | - o1 = ldq_le_p(haddr + 1); | ||
62 | - oldv = int128_make128(o0, o1); | ||
63 | + helper_retaddr = ra; | ||
64 | + o0 = ldq_le_p(haddr + 0); | ||
65 | + o1 = ldq_le_p(haddr + 1); | ||
66 | + oldv = int128_make128(o0, o1); | ||
67 | |||
68 | - success = int128_eq(oldv, cmpv); | ||
69 | - if (success) { | ||
70 | - stq_le_p(haddr + 0, int128_getlo(newv)); | ||
71 | - stq_le_p(haddr + 1, int128_gethi(newv)); | ||
72 | - } | ||
73 | - helper_retaddr = 0; | ||
74 | -#else | ||
75 | - int mem_idx = cpu_mmu_index(env, false); | ||
76 | - TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
77 | - TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); | ||
78 | - | ||
79 | - o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); | ||
80 | - o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); | ||
81 | - oldv = int128_make128(o0, o1); | ||
82 | - | ||
83 | - success = int128_eq(oldv, cmpv); | ||
84 | - if (success) { | ||
85 | - helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); | ||
86 | - helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); | ||
87 | - } | ||
88 | -#endif | ||
89 | + success = int128_eq(oldv, cmpv); | ||
90 | + if (success) { | ||
91 | + stq_le_p(haddr + 0, int128_getlo(newv)); | ||
92 | + stq_le_p(haddr + 1, int128_gethi(newv)); | ||
93 | } | ||
94 | + helper_retaddr = 0; | ||
95 | +#else | ||
96 | + int mem_idx = cpu_mmu_index(env, false); | ||
97 | + TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
98 | + TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); | ||
99 | + | ||
100 | + o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); | ||
101 | + o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); | ||
102 | + oldv = int128_make128(o0, o1); | ||
103 | + | ||
104 | + success = int128_eq(oldv, cmpv); | ||
105 | + if (success) { | ||
106 | + helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); | ||
107 | + helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); | ||
108 | + } | ||
44 | +#endif | 109 | +#endif |
45 | 110 | ||
46 | static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1, | 111 | return !success; |
47 | TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5) | 112 | } |
48 | @@ -XXX,XX +XXX,XX @@ static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1, | 113 | |
49 | *c5 = extract32(insn, 28, 4); | 114 | -uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, |
50 | } | 115 | - uint64_t new_lo, uint64_t new_hi) |
51 | 116 | -{ | |
52 | +#if TCG_TARGET_REG_BITS == 32 | 117 | - return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false, GETPC()); |
53 | static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1, | 118 | -} |
54 | TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGReg *r5) | 119 | - |
55 | { | 120 | uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr, |
56 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | 121 | uint64_t new_lo, uint64_t new_hi) |
57 | tci_args_rrrc(insn, &r0, &r1, &r2, &condition); | 122 | -{ |
58 | regs[r0] = tci_compare32(regs[r1], regs[r2], condition); | 123 | - return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true, GETPC()); |
59 | break; | 124 | -} |
60 | + case INDEX_op_movcond_i32: | 125 | - |
61 | + tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); | 126 | -static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr, |
62 | + tmp32 = tci_compare32(regs[r1], regs[r2], condition); | 127 | - uint64_t new_lo, uint64_t new_hi, |
63 | + regs[r0] = regs[tmp32 ? r3 : r4]; | 128 | - bool parallel, uintptr_t ra) |
64 | + break; | 129 | { |
65 | #if TCG_TARGET_REG_BITS == 32 | 130 | Int128 oldv, cmpv, newv; |
66 | case INDEX_op_setcond2_i32: | 131 | + uintptr_t ra = GETPC(); |
67 | tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); | 132 | bool success; |
68 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | 133 | + int mem_idx; |
69 | tci_args_rrrc(insn, &r0, &r1, &r2, &condition); | 134 | + TCGMemOpIdx oi; |
70 | regs[r0] = tci_compare64(regs[r1], regs[r2], condition); | 135 | |
71 | break; | 136 | - /* high and low need to be switched here because this is not actually a |
72 | + case INDEX_op_movcond_i64: | 137 | - * 128bit store but two doublewords stored consecutively |
73 | + tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); | 138 | - */ |
74 | + tmp32 = tci_compare64(regs[r1], regs[r2], condition); | 139 | - cmpv = int128_make128(env->exclusive_high, env->exclusive_val); |
75 | + regs[r0] = regs[tmp32 ? r3 : r4]; | 140 | - newv = int128_make128(new_hi, new_lo); |
76 | + break; | 141 | - |
77 | #endif | 142 | - if (parallel) { |
78 | CASE_32_64(mov) | 143 | -#ifndef CONFIG_ATOMIC128 |
79 | tci_args_rr(insn, &r0, &r1); | 144 | + if (!HAVE_CMPXCHG128) { |
80 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | 145 | cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); |
81 | op_name, str_r(r0), str_r(r1), str_r(r2), pos, len); | 146 | -#else |
82 | break; | 147 | - int mem_idx = cpu_mmu_index(env, false); |
83 | 148 | - TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); | |
84 | -#if TCG_TARGET_REG_BITS == 32 | 149 | - oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); |
85 | + case INDEX_op_movcond_i32: | 150 | - success = int128_eq(oldv, cmpv); |
86 | + case INDEX_op_movcond_i64: | 151 | -#endif |
87 | case INDEX_op_setcond2_i32: | 152 | - } else { |
88 | tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &c); | 153 | - uint64_t o0, o1; |
89 | info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s", | 154 | - |
90 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | 155 | -#ifdef CONFIG_USER_ONLY |
91 | str_r(r3), str_r(r4), str_c(c)); | 156 | - /* ??? Enforce alignment. */ |
92 | break; | 157 | - uint64_t *haddr = g2h(addr); |
93 | 158 | - | |
94 | +#if TCG_TARGET_REG_BITS == 32 | 159 | - helper_retaddr = ra; |
95 | case INDEX_op_mulu2_i32: | 160 | - o1 = ldq_be_p(haddr + 0); |
96 | tci_args_rrrr(insn, &r0, &r1, &r2, &r3); | 161 | - o0 = ldq_be_p(haddr + 1); |
97 | info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", | 162 | - oldv = int128_make128(o0, o1); |
98 | diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc | 163 | - |
99 | index XXXXXXX..XXXXXXX 100644 | 164 | - success = int128_eq(oldv, cmpv); |
100 | --- a/tcg/tci/tcg-target.c.inc | 165 | - if (success) { |
101 | +++ b/tcg/tci/tcg-target.c.inc | 166 | - stq_be_p(haddr + 0, int128_gethi(newv)); |
102 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | 167 | - stq_be_p(haddr + 1, int128_getlo(newv)); |
103 | return C_O0_I4(r, r, r, r); | 168 | - } |
104 | case INDEX_op_mulu2_i32: | 169 | - helper_retaddr = 0; |
105 | return C_O2_I2(r, r, r, r); | 170 | -#else |
171 | - int mem_idx = cpu_mmu_index(env, false); | ||
172 | - TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); | ||
173 | - TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); | ||
174 | - | ||
175 | - o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); | ||
176 | - o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); | ||
177 | - oldv = int128_make128(o0, o1); | ||
178 | - | ||
179 | - success = int128_eq(oldv, cmpv); | ||
180 | - if (success) { | ||
181 | - helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); | ||
182 | - helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); | ||
183 | - } | ||
184 | -#endif | ||
185 | } | ||
186 | |||
187 | + mem_idx = cpu_mmu_index(env, false); | ||
188 | + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
189 | + | ||
190 | + cmpv = int128_make128(env->exclusive_val, env->exclusive_high); | ||
191 | + newv = int128_make128(new_lo, new_hi); | ||
192 | + oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); | ||
193 | + | ||
194 | + success = int128_eq(oldv, cmpv); | ||
195 | return !success; | ||
196 | } | ||
197 | |||
198 | uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, | ||
199 | uint64_t new_lo, uint64_t new_hi) | ||
200 | { | ||
201 | - return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false, GETPC()); | ||
202 | + /* | ||
203 | + * High and low need to be switched here because this is not actually a | ||
204 | + * 128bit store but two doublewords stored consecutively | ||
205 | + */ | ||
206 | + Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); | ||
207 | + Int128 newv = int128_make128(new_lo, new_hi); | ||
208 | + Int128 oldv; | ||
209 | + uintptr_t ra = GETPC(); | ||
210 | + uint64_t o0, o1; | ||
211 | + bool success; | ||
212 | + | ||
213 | +#ifdef CONFIG_USER_ONLY | ||
214 | + /* ??? Enforce alignment. */ | ||
215 | + uint64_t *haddr = g2h(addr); | ||
216 | + | ||
217 | + helper_retaddr = ra; | ||
218 | + o1 = ldq_be_p(haddr + 0); | ||
219 | + o0 = ldq_be_p(haddr + 1); | ||
220 | + oldv = int128_make128(o0, o1); | ||
221 | + | ||
222 | + success = int128_eq(oldv, cmpv); | ||
223 | + if (success) { | ||
224 | + stq_be_p(haddr + 0, int128_gethi(newv)); | ||
225 | + stq_be_p(haddr + 1, int128_getlo(newv)); | ||
226 | + } | ||
227 | + helper_retaddr = 0; | ||
228 | +#else | ||
229 | + int mem_idx = cpu_mmu_index(env, false); | ||
230 | + TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); | ||
231 | + TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); | ||
232 | + | ||
233 | + o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); | ||
234 | + o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); | ||
235 | + oldv = int128_make128(o0, o1); | ||
236 | + | ||
237 | + success = int128_eq(oldv, cmpv); | ||
238 | + if (success) { | ||
239 | + helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); | ||
240 | + helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); | ||
241 | + } | ||
106 | +#endif | 242 | +#endif |
107 | + | 243 | + |
108 | + case INDEX_op_movcond_i32: | 244 | + return !success; |
109 | + case INDEX_op_movcond_i64: | 245 | } |
110 | case INDEX_op_setcond2_i32: | 246 | |
111 | return C_O1_I4(r, r, r, r, r); | 247 | uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr, |
112 | -#endif | 248 | - uint64_t new_lo, uint64_t new_hi) |
113 | 249 | + uint64_t new_lo, uint64_t new_hi) | |
114 | case INDEX_op_qemu_ld_i32: | 250 | { |
115 | return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS | 251 | - return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC()); |
116 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op, | 252 | + Int128 oldv, cmpv, newv; |
117 | insn = deposit32(insn, 20, 4, r3); | 253 | + uintptr_t ra = GETPC(); |
118 | tcg_out32(s, insn); | 254 | + bool success; |
119 | } | 255 | + int mem_idx; |
120 | +#endif | 256 | + TCGMemOpIdx oi; |
121 | 257 | + | |
122 | static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op, | 258 | + if (!HAVE_CMPXCHG128) { |
123 | TCGReg r0, TCGReg r1, TCGReg r2, | 259 | + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); |
124 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op, | 260 | + } |
125 | tcg_out32(s, insn); | 261 | + |
126 | } | 262 | + mem_idx = cpu_mmu_index(env, false); |
127 | 263 | + oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); | |
128 | +#if TCG_TARGET_REG_BITS == 32 | 264 | + |
129 | static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op, | 265 | + /* |
130 | TCGReg r0, TCGReg r1, TCGReg r2, | 266 | + * High and low need to be switched here because this is not actually a |
131 | TCGReg r3, TCGReg r4, TCGReg r5) | 267 | + * 128bit store but two doublewords stored consecutively |
132 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | 268 | + */ |
133 | tcg_out_op_rrrc(s, opc, args[0], args[1], args[2], args[3]); | 269 | + cmpv = int128_make128(env->exclusive_high, env->exclusive_val); |
134 | break; | 270 | + newv = int128_make128(new_hi, new_lo); |
135 | 271 | + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); | |
136 | -#if TCG_TARGET_REG_BITS == 32 | 272 | + |
137 | + CASE_32_64(movcond) | 273 | + success = int128_eq(oldv, cmpv); |
138 | case INDEX_op_setcond2_i32: | 274 | + return !success; |
139 | tcg_out_op_rrrrrc(s, opc, args[0], args[1], args[2], | 275 | } |
140 | args[3], args[4], args[5]); | 276 | |
141 | break; | 277 | /* Writes back the old data into Rs. */ |
142 | -#endif | 278 | void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, |
143 | 279 | uint64_t new_lo, uint64_t new_hi) | |
144 | CASE_32_64(ld8u) | 280 | { |
145 | CASE_32_64(ld8s) | 281 | - uintptr_t ra = GETPC(); |
282 | -#ifndef CONFIG_ATOMIC128 | ||
283 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
284 | -#else | ||
285 | Int128 oldv, cmpv, newv; | ||
286 | + uintptr_t ra = GETPC(); | ||
287 | + int mem_idx; | ||
288 | + TCGMemOpIdx oi; | ||
289 | + | ||
290 | + if (!HAVE_CMPXCHG128) { | ||
291 | + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
292 | + } | ||
293 | + | ||
294 | + mem_idx = cpu_mmu_index(env, false); | ||
295 | + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
296 | |||
297 | cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]); | ||
298 | newv = int128_make128(new_lo, new_hi); | ||
299 | - | ||
300 | - int mem_idx = cpu_mmu_index(env, false); | ||
301 | - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
302 | oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); | ||
303 | |||
304 | env->xregs[rs] = int128_getlo(oldv); | ||
305 | env->xregs[rs + 1] = int128_gethi(oldv); | ||
306 | -#endif | ||
307 | } | ||
308 | |||
309 | void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, | ||
310 | uint64_t new_hi, uint64_t new_lo) | ||
311 | { | ||
312 | - uintptr_t ra = GETPC(); | ||
313 | -#ifndef CONFIG_ATOMIC128 | ||
314 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
315 | -#else | ||
316 | Int128 oldv, cmpv, newv; | ||
317 | + uintptr_t ra = GETPC(); | ||
318 | + int mem_idx; | ||
319 | + TCGMemOpIdx oi; | ||
320 | + | ||
321 | + if (!HAVE_CMPXCHG128) { | ||
322 | + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
323 | + } | ||
324 | + | ||
325 | + mem_idx = cpu_mmu_index(env, false); | ||
326 | + oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
327 | |||
328 | cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]); | ||
329 | newv = int128_make128(new_lo, new_hi); | ||
330 | - | ||
331 | - int mem_idx = cpu_mmu_index(env, false); | ||
332 | - TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
333 | oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); | ||
334 | |||
335 | env->xregs[rs + 1] = int128_getlo(oldv); | ||
336 | env->xregs[rs] = int128_gethi(oldv); | ||
337 | -#endif | ||
338 | } | ||
339 | |||
340 | /* | ||
146 | -- | 341 | -- |
147 | 2.25.1 | 342 | 2.17.2 |
148 | 343 | ||
149 | 344 | diff view generated by jsdifflib |
1 | Inline it into its one caller, tci_write_reg64. | 1 | Reviewed-by: Emilio G. Cota <cota@braap.org> |
---|---|---|---|
2 | Drop the asserts that are redundant with tcg_read_r. | 2 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
3 | |||
4 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | 4 | --- |
8 | tcg/tci.c | 13 ++----------- | 5 | target/arm/helper-a64.c | 16 ++++------------ |
9 | 1 file changed, 2 insertions(+), 11 deletions(-) | 6 | target/arm/translate-a64.c | 38 ++++++++++++++++++++++---------------- |
7 | 2 files changed, 26 insertions(+), 28 deletions(-) | ||
10 | 8 | ||
11 | diff --git a/tcg/tci.c b/tcg/tci.c | 9 | diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c |
12 | index XXXXXXX..XXXXXXX 100644 | 10 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/tcg/tci.c | 11 | --- a/target/arm/helper-a64.c |
14 | +++ b/tcg/tci.c | 12 | +++ b/target/arm/helper-a64.c |
13 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr, | ||
14 | int mem_idx; | ||
15 | TCGMemOpIdx oi; | ||
16 | |||
17 | - if (!HAVE_CMPXCHG128) { | ||
18 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
19 | - } | ||
20 | + assert(HAVE_CMPXCHG128); | ||
21 | |||
22 | mem_idx = cpu_mmu_index(env, false); | ||
23 | oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
24 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr, | ||
25 | int mem_idx; | ||
26 | TCGMemOpIdx oi; | ||
27 | |||
28 | - if (!HAVE_CMPXCHG128) { | ||
29 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
30 | - } | ||
31 | + assert(HAVE_CMPXCHG128); | ||
32 | |||
33 | mem_idx = cpu_mmu_index(env, false); | ||
34 | oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); | ||
35 | @@ -XXX,XX +XXX,XX @@ void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, | ||
36 | int mem_idx; | ||
37 | TCGMemOpIdx oi; | ||
38 | |||
39 | - if (!HAVE_CMPXCHG128) { | ||
40 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
41 | - } | ||
42 | + assert(HAVE_CMPXCHG128); | ||
43 | |||
44 | mem_idx = cpu_mmu_index(env, false); | ||
45 | oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, | ||
47 | int mem_idx; | ||
48 | TCGMemOpIdx oi; | ||
49 | |||
50 | - if (!HAVE_CMPXCHG128) { | ||
51 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
52 | - } | ||
53 | + assert(HAVE_CMPXCHG128); | ||
54 | |||
55 | mem_idx = cpu_mmu_index(env, false); | ||
56 | oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); | ||
57 | diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/target/arm/translate-a64.c | ||
60 | +++ b/target/arm/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ | 61 | @@ -XXX,XX +XXX,XX @@ |
16 | 62 | ||
17 | __thread uintptr_t tci_tb_ptr; | 63 | #include "trace-tcg.h" |
18 | 64 | #include "translate-a64.h" | |
19 | -static void | 65 | +#include "qemu/atomic128.h" |
20 | -tci_write_reg(tcg_target_ulong *regs, TCGReg index, tcg_target_ulong value) | 66 | |
21 | -{ | 67 | static TCGv_i64 cpu_X[32]; |
22 | - tci_assert(index < TCG_TARGET_NB_REGS); | 68 | static TCGv_i64 cpu_pc; |
23 | - tci_assert(index != TCG_AREG0); | 69 | @@ -XXX,XX +XXX,XX @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, |
24 | - tci_assert(index != TCG_REG_CALL_STACK); | 70 | get_mem_index(s), |
25 | - regs[index] = value; | 71 | MO_64 | MO_ALIGN | s->be_data); |
26 | -} | 72 | tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); |
73 | - } else if (s->be_data == MO_LE) { | ||
74 | - if (tb_cflags(s->base.tb) & CF_PARALLEL) { | ||
75 | + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { | ||
76 | + if (!HAVE_CMPXCHG128) { | ||
77 | + gen_helper_exit_atomic(cpu_env); | ||
78 | + s->base.is_jmp = DISAS_NORETURN; | ||
79 | + } else if (s->be_data == MO_LE) { | ||
80 | gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env, | ||
81 | cpu_exclusive_addr, | ||
82 | cpu_reg(s, rt), | ||
83 | cpu_reg(s, rt2)); | ||
84 | } else { | ||
85 | - gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr, | ||
86 | - cpu_reg(s, rt), cpu_reg(s, rt2)); | ||
87 | - } | ||
88 | - } else { | ||
89 | - if (tb_cflags(s->base.tb) & CF_PARALLEL) { | ||
90 | gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env, | ||
91 | cpu_exclusive_addr, | ||
92 | cpu_reg(s, rt), | ||
93 | cpu_reg(s, rt2)); | ||
94 | - } else { | ||
95 | - gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr, | ||
96 | - cpu_reg(s, rt), cpu_reg(s, rt2)); | ||
97 | } | ||
98 | + } else if (s->be_data == MO_LE) { | ||
99 | + gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr, | ||
100 | + cpu_reg(s, rt), cpu_reg(s, rt2)); | ||
101 | + } else { | ||
102 | + gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr, | ||
103 | + cpu_reg(s, rt), cpu_reg(s, rt2)); | ||
104 | } | ||
105 | } else { | ||
106 | tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, | ||
107 | @@ -XXX,XX +XXX,XX @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, | ||
108 | } | ||
109 | tcg_temp_free_i64(cmp); | ||
110 | } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { | ||
111 | - TCGv_i32 tcg_rs = tcg_const_i32(rs); | ||
27 | - | 112 | - |
28 | static void tci_write_reg64(tcg_target_ulong *regs, uint32_t high_index, | 113 | - if (s->be_data == MO_LE) { |
29 | uint32_t low_index, uint64_t value) | 114 | - gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); |
30 | { | 115 | + if (HAVE_CMPXCHG128) { |
31 | - tci_write_reg(regs, low_index, value); | 116 | + TCGv_i32 tcg_rs = tcg_const_i32(rs); |
32 | - tci_write_reg(regs, high_index, value >> 32); | 117 | + if (s->be_data == MO_LE) { |
33 | + regs[low_index] = value; | 118 | + gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); |
34 | + regs[high_index] = value >> 32; | 119 | + } else { |
35 | } | 120 | + gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); |
36 | 121 | + } | |
37 | /* Create a 64 bit value from two 32 bit values. */ | 122 | + tcg_temp_free_i32(tcg_rs); |
123 | } else { | ||
124 | - gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); | ||
125 | + gen_helper_exit_atomic(cpu_env); | ||
126 | + s->base.is_jmp = DISAS_NORETURN; | ||
127 | } | ||
128 | - tcg_temp_free_i32(tcg_rs); | ||
129 | } else { | ||
130 | TCGv_i64 d1 = tcg_temp_new_i64(); | ||
131 | TCGv_i64 d2 = tcg_temp_new_i64(); | ||
38 | -- | 132 | -- |
39 | 2.25.1 | 133 | 2.17.2 |
40 | 134 | ||
41 | 135 | diff view generated by jsdifflib |
1 | We will shortly be interested in distinguishing pointers | 1 | Reviewed-by: Emilio G. Cota <cota@braap.org> |
---|---|---|---|
2 | from integers in the helper's declaration, as well as a | ||
3 | true void return. We currently have two parallel 1 bit | ||
4 | fields; merge them and expand to a 3 bit field. | ||
5 | |||
6 | Our current maximum is 7 helper arguments, plus the return | ||
7 | makes 8 * 3 = 24 bits used within the uint32_t typemask. | ||
8 | |||
9 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
10 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
12 | --- | 3 | --- |
13 | include/exec/helper-head.h | 37 +++++-------------- | 4 | target/ppc/helper.h | 2 +- |
14 | include/exec/helper-tcg.h | 34 ++++++++--------- | 5 | target/ppc/mem_helper.c | 33 ++++++++++-- |
15 | target/hppa/helper.h | 3 -- | 6 | target/ppc/translate.c | 115 +++++++++++++++++++++------------------- |
16 | target/i386/ops_sse_header.h | 3 -- | 7 | 3 files changed, 88 insertions(+), 62 deletions(-) |
17 | target/m68k/helper.h | 1 - | ||
18 | target/ppc/helper.h | 3 -- | ||
19 | tcg/tcg.c | 71 +++++++++++++++++++++--------------- | ||
20 | 7 files changed, 67 insertions(+), 85 deletions(-) | ||
21 | 8 | ||
22 | diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/include/exec/helper-head.h | ||
25 | +++ b/include/exec/helper-head.h | ||
26 | @@ -XXX,XX +XXX,XX @@ | ||
27 | #define dh_retvar_ptr tcgv_ptr_temp(retval) | ||
28 | #define dh_retvar(t) glue(dh_retvar_, dh_alias(t)) | ||
29 | |||
30 | -#define dh_is_64bit_void 0 | ||
31 | -#define dh_is_64bit_noreturn 0 | ||
32 | -#define dh_is_64bit_i32 0 | ||
33 | -#define dh_is_64bit_i64 1 | ||
34 | -#define dh_is_64bit_ptr (sizeof(void *) == 8) | ||
35 | -#define dh_is_64bit_cptr dh_is_64bit_ptr | ||
36 | -#define dh_is_64bit(t) glue(dh_is_64bit_, dh_alias(t)) | ||
37 | - | ||
38 | -#define dh_is_signed_void 0 | ||
39 | -#define dh_is_signed_noreturn 0 | ||
40 | -#define dh_is_signed_i32 0 | ||
41 | -#define dh_is_signed_s32 1 | ||
42 | -#define dh_is_signed_i64 0 | ||
43 | -#define dh_is_signed_s64 1 | ||
44 | -#define dh_is_signed_f16 0 | ||
45 | -#define dh_is_signed_f32 0 | ||
46 | -#define dh_is_signed_f64 0 | ||
47 | -#define dh_is_signed_tl 0 | ||
48 | -#define dh_is_signed_int 1 | ||
49 | -/* ??? This is highly specific to the host cpu. There are even special | ||
50 | - extension instructions that may be required, e.g. ia64's addp4. But | ||
51 | - for now we don't support any 64-bit targets with 32-bit pointers. */ | ||
52 | -#define dh_is_signed_ptr 0 | ||
53 | -#define dh_is_signed_cptr dh_is_signed_ptr | ||
54 | -#define dh_is_signed_env dh_is_signed_ptr | ||
55 | -#define dh_is_signed(t) dh_is_signed_##t | ||
56 | +#define dh_typecode_void 0 | ||
57 | +#define dh_typecode_noreturn 0 | ||
58 | +#define dh_typecode_i32 2 | ||
59 | +#define dh_typecode_s32 3 | ||
60 | +#define dh_typecode_i64 4 | ||
61 | +#define dh_typecode_s64 5 | ||
62 | +#define dh_typecode_ptr 6 | ||
63 | +#define dh_typecode(t) glue(dh_typecode_, dh_alias(t)) | ||
64 | |||
65 | #define dh_callflag_i32 0 | ||
66 | #define dh_callflag_s32 0 | ||
67 | @@ -XXX,XX +XXX,XX @@ | ||
68 | #define dh_callflag_noreturn TCG_CALL_NO_RETURN | ||
69 | #define dh_callflag(t) glue(dh_callflag_, dh_alias(t)) | ||
70 | |||
71 | -#define dh_sizemask(t, n) \ | ||
72 | - ((dh_is_64bit(t) << (n*2)) | (dh_is_signed(t) << (n*2+1))) | ||
73 | +#define dh_typemask(t, n) (dh_typecode(t) << (n * 3)) | ||
74 | |||
75 | #define dh_arg(t, n) \ | ||
76 | glue(glue(tcgv_, dh_alias(t)), _temp)(glue(arg, n)) | ||
77 | diff --git a/include/exec/helper-tcg.h b/include/exec/helper-tcg.h | ||
78 | index XXXXXXX..XXXXXXX 100644 | ||
79 | --- a/include/exec/helper-tcg.h | ||
80 | +++ b/include/exec/helper-tcg.h | ||
81 | @@ -XXX,XX +XXX,XX @@ | ||
82 | #define DEF_HELPER_FLAGS_0(NAME, FLAGS, ret) \ | ||
83 | { .func = HELPER(NAME), .name = str(NAME), \ | ||
84 | .flags = FLAGS | dh_callflag(ret), \ | ||
85 | - .sizemask = dh_sizemask(ret, 0) }, | ||
86 | + .typemask = dh_typemask(ret, 0) }, | ||
87 | |||
88 | #define DEF_HELPER_FLAGS_1(NAME, FLAGS, ret, t1) \ | ||
89 | { .func = HELPER(NAME), .name = str(NAME), \ | ||
90 | .flags = FLAGS | dh_callflag(ret), \ | ||
91 | - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) }, | ||
92 | + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) }, | ||
93 | |||
94 | #define DEF_HELPER_FLAGS_2(NAME, FLAGS, ret, t1, t2) \ | ||
95 | { .func = HELPER(NAME), .name = str(NAME), \ | ||
96 | .flags = FLAGS | dh_callflag(ret), \ | ||
97 | - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | ||
98 | - | dh_sizemask(t2, 2) }, | ||
99 | + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ | ||
100 | + | dh_typemask(t2, 2) }, | ||
101 | |||
102 | #define DEF_HELPER_FLAGS_3(NAME, FLAGS, ret, t1, t2, t3) \ | ||
103 | { .func = HELPER(NAME), .name = str(NAME), \ | ||
104 | .flags = FLAGS | dh_callflag(ret), \ | ||
105 | - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | ||
106 | - | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) }, | ||
107 | + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ | ||
108 | + | dh_typemask(t2, 2) | dh_typemask(t3, 3) }, | ||
109 | |||
110 | #define DEF_HELPER_FLAGS_4(NAME, FLAGS, ret, t1, t2, t3, t4) \ | ||
111 | { .func = HELPER(NAME), .name = str(NAME), \ | ||
112 | .flags = FLAGS | dh_callflag(ret), \ | ||
113 | - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | ||
114 | - | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) }, | ||
115 | + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ | ||
116 | + | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) }, | ||
117 | |||
118 | #define DEF_HELPER_FLAGS_5(NAME, FLAGS, ret, t1, t2, t3, t4, t5) \ | ||
119 | { .func = HELPER(NAME), .name = str(NAME), \ | ||
120 | .flags = FLAGS | dh_callflag(ret), \ | ||
121 | - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | ||
122 | - | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ | ||
123 | - | dh_sizemask(t5, 5) }, | ||
124 | + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ | ||
125 | + | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) \ | ||
126 | + | dh_typemask(t5, 5) }, | ||
127 | |||
128 | #define DEF_HELPER_FLAGS_6(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6) \ | ||
129 | { .func = HELPER(NAME), .name = str(NAME), \ | ||
130 | .flags = FLAGS | dh_callflag(ret), \ | ||
131 | - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | ||
132 | - | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ | ||
133 | - | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) }, | ||
134 | + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ | ||
135 | + | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) \ | ||
136 | + | dh_typemask(t5, 5) | dh_typemask(t6, 6) }, | ||
137 | |||
138 | #define DEF_HELPER_FLAGS_7(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6, t7) \ | ||
139 | { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ | ||
140 | - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | ||
141 | - | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ | ||
142 | - | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) | dh_sizemask(t7, 7) }, | ||
143 | + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ | ||
144 | + | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) \ | ||
145 | + | dh_typemask(t5, 5) | dh_typemask(t6, 6) | dh_typemask(t7, 7) }, | ||
146 | |||
147 | #include "helper.h" | ||
148 | #include "trace/generated-helpers.h" | ||
149 | diff --git a/target/hppa/helper.h b/target/hppa/helper.h | ||
150 | index XXXXXXX..XXXXXXX 100644 | ||
151 | --- a/target/hppa/helper.h | ||
152 | +++ b/target/hppa/helper.h | ||
153 | @@ -XXX,XX +XXX,XX @@ | ||
154 | #if TARGET_REGISTER_BITS == 64 | ||
155 | # define dh_alias_tr i64 | ||
156 | -# define dh_is_64bit_tr 1 | ||
157 | #else | ||
158 | # define dh_alias_tr i32 | ||
159 | -# define dh_is_64bit_tr 0 | ||
160 | #endif | ||
161 | #define dh_ctype_tr target_ureg | ||
162 | -#define dh_is_signed_tr 0 | ||
163 | |||
164 | DEF_HELPER_2(excp, noreturn, env, int) | ||
165 | DEF_HELPER_FLAGS_2(tsv, TCG_CALL_NO_WG, void, env, tr) | ||
166 | diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h | ||
167 | index XXXXXXX..XXXXXXX 100644 | ||
168 | --- a/target/i386/ops_sse_header.h | ||
169 | +++ b/target/i386/ops_sse_header.h | ||
170 | @@ -XXX,XX +XXX,XX @@ | ||
171 | #define dh_ctype_Reg Reg * | ||
172 | #define dh_ctype_ZMMReg ZMMReg * | ||
173 | #define dh_ctype_MMXReg MMXReg * | ||
174 | -#define dh_is_signed_Reg dh_is_signed_ptr | ||
175 | -#define dh_is_signed_ZMMReg dh_is_signed_ptr | ||
176 | -#define dh_is_signed_MMXReg dh_is_signed_ptr | ||
177 | |||
178 | DEF_HELPER_3(glue(psrlw, SUFFIX), void, env, Reg, Reg) | ||
179 | DEF_HELPER_3(glue(psraw, SUFFIX), void, env, Reg, Reg) | ||
180 | diff --git a/target/m68k/helper.h b/target/m68k/helper.h | ||
181 | index XXXXXXX..XXXXXXX 100644 | ||
182 | --- a/target/m68k/helper.h | ||
183 | +++ b/target/m68k/helper.h | ||
184 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(cas2l_parallel, void, env, i32, i32, i32) | ||
185 | |||
186 | #define dh_alias_fp ptr | ||
187 | #define dh_ctype_fp FPReg * | ||
188 | -#define dh_is_signed_fp dh_is_signed_ptr | ||
189 | |||
190 | DEF_HELPER_3(exts32, void, env, fp, s32) | ||
191 | DEF_HELPER_3(extf32, void, env, fp, f32) | ||
192 | diff --git a/target/ppc/helper.h b/target/ppc/helper.h | 9 | diff --git a/target/ppc/helper.h b/target/ppc/helper.h |
193 | index XXXXXXX..XXXXXXX 100644 | 10 | index XXXXXXX..XXXXXXX 100644 |
194 | --- a/target/ppc/helper.h | 11 | --- a/target/ppc/helper.h |
195 | +++ b/target/ppc/helper.h | 12 | +++ b/target/ppc/helper.h |
196 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64) | 13 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32) |
197 | 14 | DEF_HELPER_1(tbegin, void, env) | |
198 | #define dh_alias_avr ptr | 15 | DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env) |
199 | #define dh_ctype_avr ppc_avr_t * | 16 | |
200 | -#define dh_is_signed_avr dh_is_signed_ptr | 17 | -#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128) |
201 | 18 | +#ifdef TARGET_PPC64 | |
202 | #define dh_alias_vsr ptr | 19 | DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32) |
203 | #define dh_ctype_vsr ppc_vsr_t * | 20 | DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32) |
204 | -#define dh_is_signed_vsr dh_is_signed_ptr | 21 | DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG, |
205 | 22 | diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c | |
206 | DEF_HELPER_3(vavgub, void, avr, avr, avr) | ||
207 | DEF_HELPER_3(vavguh, void, avr, avr, avr) | ||
208 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(store_601_batu, void, env, i32, tl) | ||
209 | |||
210 | #define dh_alias_fprp ptr | ||
211 | #define dh_ctype_fprp ppc_fprp_t * | ||
212 | -#define dh_is_signed_fprp dh_is_signed_ptr | ||
213 | |||
214 | DEF_HELPER_4(dadd, void, env, fprp, fprp, fprp) | ||
215 | DEF_HELPER_4(daddq, void, env, fprp, fprp, fprp) | ||
216 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
217 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
218 | --- a/tcg/tcg.c | 24 | --- a/target/ppc/mem_helper.c |
219 | +++ b/tcg/tcg.c | 25 | +++ b/target/ppc/mem_helper.c |
220 | @@ -XXX,XX +XXX,XX @@ typedef struct TCGHelperInfo { | 26 | @@ -XXX,XX +XXX,XX @@ |
221 | void *func; | 27 | #include "exec/cpu_ldst.h" |
222 | const char *name; | 28 | #include "tcg.h" |
223 | unsigned flags; | 29 | #include "internal.h" |
224 | - unsigned sizemask; | 30 | +#include "qemu/atomic128.h" |
225 | + unsigned typemask; | 31 | |
226 | } TCGHelperInfo; | 32 | //#define DEBUG_OP |
227 | 33 | ||
228 | #include "exec/helper-proto.h" | 34 | @@ -XXX,XX +XXX,XX @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg, |
229 | @@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op) | 35 | return i; |
230 | void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | 36 | } |
231 | { | 37 | |
232 | int i, real_args, nb_rets, pi; | 38 | -#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128) |
233 | - unsigned sizemask, flags; | 39 | +#ifdef TARGET_PPC64 |
234 | + unsigned typemask, flags; | 40 | uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr, |
235 | TCGHelperInfo *info; | 41 | uint32_t opidx) |
236 | TCGOp *op; | 42 | { |
237 | 43 | - Int128 ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC()); | |
238 | info = g_hash_table_lookup(helper_table, (gpointer)func); | 44 | + Int128 ret; |
239 | flags = info->flags; | 45 | + |
240 | - sizemask = info->sizemask; | 46 | + /* We will have raised EXCP_ATOMIC from the translator. */ |
241 | + typemask = info->typemask; | 47 | + assert(HAVE_ATOMIC128); |
242 | 48 | + ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC()); | |
243 | #ifdef CONFIG_PLUGIN | 49 | env->retxh = int128_gethi(ret); |
244 | /* detect non-plugin helpers */ | 50 | return int128_getlo(ret); |
245 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | 51 | } |
246 | && !defined(CONFIG_TCG_INTERPRETER) | 52 | @@ -XXX,XX +XXX,XX @@ uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr, |
247 | /* We have 64-bit values in one register, but need to pass as two | 53 | uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr, |
248 | separate parameters. Split them. */ | 54 | uint32_t opidx) |
249 | - int orig_sizemask = sizemask; | 55 | { |
250 | + int orig_typemask = typemask; | 56 | - Int128 ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC()); |
251 | int orig_nargs = nargs; | 57 | + Int128 ret; |
252 | TCGv_i64 retl, reth; | 58 | + |
253 | TCGTemp *split_args[MAX_OPC_PARAM]; | 59 | + /* We will have raised EXCP_ATOMIC from the translator. */ |
254 | 60 | + assert(HAVE_ATOMIC128); | |
255 | retl = NULL; | 61 | + ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC()); |
256 | reth = NULL; | 62 | env->retxh = int128_gethi(ret); |
257 | - if (sizemask != 0) { | 63 | return int128_getlo(ret); |
258 | - for (i = real_args = 0; i < nargs; ++i) { | 64 | } |
259 | - int is_64bit = sizemask & (1 << (i+1)*2); | 65 | @@ -XXX,XX +XXX,XX @@ uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr, |
260 | - if (is_64bit) { | 66 | void helper_stq_le_parallel(CPUPPCState *env, target_ulong addr, |
261 | - TCGv_i64 orig = temp_tcgv_i64(args[i]); | 67 | uint64_t lo, uint64_t hi, uint32_t opidx) |
262 | - TCGv_i32 h = tcg_temp_new_i32(); | 68 | { |
263 | - TCGv_i32 l = tcg_temp_new_i32(); | 69 | - Int128 val = int128_make128(lo, hi); |
264 | - tcg_gen_extr_i64_i32(l, h, orig); | 70 | + Int128 val; |
265 | - split_args[real_args++] = tcgv_i32_temp(h); | 71 | + |
266 | - split_args[real_args++] = tcgv_i32_temp(l); | 72 | + /* We will have raised EXCP_ATOMIC from the translator. */ |
267 | - } else { | 73 | + assert(HAVE_ATOMIC128); |
268 | - split_args[real_args++] = args[i]; | 74 | + val = int128_make128(lo, hi); |
269 | - } | 75 | helper_atomic_sto_le_mmu(env, addr, val, opidx, GETPC()); |
270 | + typemask = 0; | 76 | } |
271 | + for (i = real_args = 0; i < nargs; ++i) { | 77 | |
272 | + int argtype = extract32(orig_typemask, (i + 1) * 3, 3); | 78 | void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr, |
273 | + bool is_64bit = (argtype & ~1) == dh_typecode_i64; | 79 | uint64_t lo, uint64_t hi, uint32_t opidx) |
274 | + | 80 | { |
275 | + if (is_64bit) { | 81 | - Int128 val = int128_make128(lo, hi); |
276 | + TCGv_i64 orig = temp_tcgv_i64(args[i]); | 82 | + Int128 val; |
277 | + TCGv_i32 h = tcg_temp_new_i32(); | 83 | + |
278 | + TCGv_i32 l = tcg_temp_new_i32(); | 84 | + /* We will have raised EXCP_ATOMIC from the translator. */ |
279 | + tcg_gen_extr_i64_i32(l, h, orig); | 85 | + assert(HAVE_ATOMIC128); |
280 | + split_args[real_args++] = tcgv_i32_temp(h); | 86 | + val = int128_make128(lo, hi); |
281 | + typemask |= dh_typecode_i32 << (real_args * 3); | 87 | helper_atomic_sto_be_mmu(env, addr, val, opidx, GETPC()); |
282 | + split_args[real_args++] = tcgv_i32_temp(l); | 88 | } |
283 | + typemask |= dh_typecode_i32 << (real_args * 3); | 89 | |
284 | + } else { | 90 | @@ -XXX,XX +XXX,XX @@ uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr, |
285 | + split_args[real_args++] = args[i]; | 91 | { |
286 | + typemask |= argtype << (real_args * 3); | 92 | bool success = false; |
93 | |||
94 | + /* We will have raised EXCP_ATOMIC from the translator. */ | ||
95 | + assert(HAVE_CMPXCHG128); | ||
96 | + | ||
97 | if (likely(addr == env->reserve_addr)) { | ||
98 | Int128 oldv, cmpv, newv; | ||
99 | |||
100 | @@ -XXX,XX +XXX,XX @@ uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr, | ||
101 | { | ||
102 | bool success = false; | ||
103 | |||
104 | + /* We will have raised EXCP_ATOMIC from the translator. */ | ||
105 | + assert(HAVE_CMPXCHG128); | ||
106 | + | ||
107 | if (likely(addr == env->reserve_addr)) { | ||
108 | Int128 oldv, cmpv, newv; | ||
109 | |||
110 | diff --git a/target/ppc/translate.c b/target/ppc/translate.c | ||
111 | index XXXXXXX..XXXXXXX 100644 | ||
112 | --- a/target/ppc/translate.c | ||
113 | +++ b/target/ppc/translate.c | ||
114 | @@ -XXX,XX +XXX,XX @@ | ||
115 | #include "trace-tcg.h" | ||
116 | #include "exec/translator.h" | ||
117 | #include "exec/log.h" | ||
118 | +#include "qemu/atomic128.h" | ||
119 | |||
120 | |||
121 | #define CPU_SINGLE_STEP 0x1 | ||
122 | @@ -XXX,XX +XXX,XX @@ static void gen_lq(DisasContext *ctx) | ||
123 | hi = cpu_gpr[rd]; | ||
124 | |||
125 | if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { | ||
126 | -#ifdef CONFIG_ATOMIC128 | ||
127 | - TCGv_i32 oi = tcg_temp_new_i32(); | ||
128 | - if (ctx->le_mode) { | ||
129 | - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); | ||
130 | - gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); | ||
131 | + if (HAVE_ATOMIC128) { | ||
132 | + TCGv_i32 oi = tcg_temp_new_i32(); | ||
133 | + if (ctx->le_mode) { | ||
134 | + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); | ||
135 | + gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); | ||
136 | + } else { | ||
137 | + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); | ||
138 | + gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); | ||
139 | + } | ||
140 | + tcg_temp_free_i32(oi); | ||
141 | + tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); | ||
142 | } else { | ||
143 | - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); | ||
144 | - gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); | ||
145 | + /* Restart with exclusive lock. */ | ||
146 | + gen_helper_exit_atomic(cpu_env); | ||
147 | + ctx->base.is_jmp = DISAS_NORETURN; | ||
287 | } | 148 | } |
288 | - nargs = real_args; | 149 | - tcg_temp_free_i32(oi); |
289 | - args = split_args; | 150 | - tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); |
290 | - sizemask = 0; | 151 | -#else |
291 | } | 152 | - /* Restart with exclusive lock. */ |
292 | + nargs = real_args; | 153 | - gen_helper_exit_atomic(cpu_env); |
293 | + args = split_args; | 154 | - ctx->base.is_jmp = DISAS_NORETURN; |
294 | #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 | 155 | -#endif |
295 | for (i = 0; i < nargs; ++i) { | 156 | } else if (ctx->le_mode) { |
296 | - int is_64bit = sizemask & (1 << (i+1)*2); | 157 | tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ); |
297 | - int is_signed = sizemask & (2 << (i+1)*2); | 158 | gen_addr_add(ctx, EA, EA, 8); |
298 | - if (!is_64bit) { | 159 | @@ -XXX,XX +XXX,XX @@ static void gen_std(DisasContext *ctx) |
299 | + int argtype = extract32(typemask, (i + 1) * 3, 3); | 160 | hi = cpu_gpr[rs]; |
300 | + bool is_32bit = (argtype & ~1) == dh_typecode_i32; | 161 | |
301 | + bool is_signed = argtype & 1; | 162 | if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { |
302 | + | 163 | -#ifdef CONFIG_ATOMIC128 |
303 | + if (is_32bit) { | 164 | - TCGv_i32 oi = tcg_temp_new_i32(); |
304 | TCGv_i64 temp = tcg_temp_new_i64(); | 165 | - if (ctx->le_mode) { |
305 | TCGv_i64 orig = temp_tcgv_i64(args[i]); | 166 | - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); |
306 | if (is_signed) { | 167 | - gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi); |
307 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | 168 | + if (HAVE_ATOMIC128) { |
308 | if (ret != NULL) { | 169 | + TCGv_i32 oi = tcg_temp_new_i32(); |
309 | #if defined(__sparc__) && !defined(__arch64__) \ | 170 | + if (ctx->le_mode) { |
310 | && !defined(CONFIG_TCG_INTERPRETER) | 171 | + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx)); |
311 | - if (orig_sizemask & 1) { | 172 | + gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi); |
312 | + if ((typemask & 6) == dh_typecode_i64) { | 173 | + } else { |
313 | /* The 32-bit ABI is going to return the 64-bit value in | 174 | + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); |
314 | the %o0/%o1 register pair. Prepare for this by using | 175 | + gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi); |
315 | two return temporaries, and reassemble below. */ | 176 | + } |
316 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | 177 | + tcg_temp_free_i32(oi); |
317 | nb_rets = 1; | 178 | } else { |
179 | - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx)); | ||
180 | - gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi); | ||
181 | + /* Restart with exclusive lock. */ | ||
182 | + gen_helper_exit_atomic(cpu_env); | ||
183 | + ctx->base.is_jmp = DISAS_NORETURN; | ||
184 | } | ||
185 | - tcg_temp_free_i32(oi); | ||
186 | -#else | ||
187 | - /* Restart with exclusive lock. */ | ||
188 | - gen_helper_exit_atomic(cpu_env); | ||
189 | - ctx->base.is_jmp = DISAS_NORETURN; | ||
190 | -#endif | ||
191 | } else if (ctx->le_mode) { | ||
192 | tcg_gen_qemu_st_i64(lo, EA, ctx->mem_idx, MO_LEQ); | ||
193 | gen_addr_add(ctx, EA, EA, 8); | ||
194 | @@ -XXX,XX +XXX,XX @@ static void gen_lqarx(DisasContext *ctx) | ||
195 | hi = cpu_gpr[rd]; | ||
196 | |||
197 | if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { | ||
198 | -#ifdef CONFIG_ATOMIC128 | ||
199 | - TCGv_i32 oi = tcg_temp_new_i32(); | ||
200 | - if (ctx->le_mode) { | ||
201 | - tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16, | ||
202 | - ctx->mem_idx)); | ||
203 | - gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); | ||
204 | + if (HAVE_ATOMIC128) { | ||
205 | + TCGv_i32 oi = tcg_temp_new_i32(); | ||
206 | + if (ctx->le_mode) { | ||
207 | + tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16, | ||
208 | + ctx->mem_idx)); | ||
209 | + gen_helper_lq_le_parallel(lo, cpu_env, EA, oi); | ||
210 | + } else { | ||
211 | + tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16, | ||
212 | + ctx->mem_idx)); | ||
213 | + gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); | ||
214 | + } | ||
215 | + tcg_temp_free_i32(oi); | ||
216 | + tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); | ||
217 | } else { | ||
218 | - tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16, | ||
219 | - ctx->mem_idx)); | ||
220 | - gen_helper_lq_be_parallel(lo, cpu_env, EA, oi); | ||
221 | + /* Restart with exclusive lock. */ | ||
222 | + gen_helper_exit_atomic(cpu_env); | ||
223 | + ctx->base.is_jmp = DISAS_NORETURN; | ||
224 | + tcg_temp_free(EA); | ||
225 | + return; | ||
318 | } | 226 | } |
319 | #else | 227 | - tcg_temp_free_i32(oi); |
320 | - if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { | 228 | - tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh)); |
321 | + if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) { | 229 | -#else |
322 | #ifdef HOST_WORDS_BIGENDIAN | 230 | - /* Restart with exclusive lock. */ |
323 | op->args[pi++] = temp_arg(ret + 1); | 231 | - gen_helper_exit_atomic(cpu_env); |
324 | op->args[pi++] = temp_arg(ret); | 232 | - ctx->base.is_jmp = DISAS_NORETURN; |
325 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | 233 | - tcg_temp_free(EA); |
326 | 234 | - return; | |
327 | real_args = 0; | 235 | -#endif |
328 | for (i = 0; i < nargs; i++) { | 236 | } else if (ctx->le_mode) { |
329 | - int is_64bit = sizemask & (1 << (i+1)*2); | 237 | tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ | MO_ALIGN_16); |
330 | + int argtype = extract32(typemask, (i + 1) * 3, 3); | 238 | tcg_gen_mov_tl(cpu_reserve, EA); |
331 | + bool is_64bit = (argtype & ~1) == dh_typecode_i64; | 239 | @@ -XXX,XX +XXX,XX @@ static void gen_stqcx_(DisasContext *ctx) |
332 | + | 240 | hi = cpu_gpr[rs]; |
333 | if (TCG_TARGET_REG_BITS < 64 && is_64bit) { | 241 | |
334 | #ifdef TCG_TARGET_CALL_ALIGN_ARGS | 242 | if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { |
335 | /* some targets want aligned 64 bit args */ | 243 | - TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16); |
336 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | 244 | -#ifdef CONFIG_ATOMIC128 |
337 | && !defined(CONFIG_TCG_INTERPRETER) | 245 | - if (ctx->le_mode) { |
338 | /* Free all of the parts we allocated above. */ | 246 | - gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi); |
339 | for (i = real_args = 0; i < orig_nargs; ++i) { | 247 | + if (HAVE_CMPXCHG128) { |
340 | - int is_64bit = orig_sizemask & (1 << (i+1)*2); | 248 | + TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16); |
341 | + int argtype = extract32(orig_typemask, (i + 1) * 3, 3); | 249 | + if (ctx->le_mode) { |
342 | + bool is_64bit = (argtype & ~1) == dh_typecode_i64; | 250 | + gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, |
343 | + | 251 | + EA, lo, hi, oi); |
344 | if (is_64bit) { | 252 | + } else { |
345 | tcg_temp_free_internal(args[real_args++]); | 253 | + gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env, |
346 | tcg_temp_free_internal(args[real_args++]); | 254 | + EA, lo, hi, oi); |
347 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | 255 | + } |
348 | real_args++; | 256 | + tcg_temp_free_i32(oi); |
257 | } else { | ||
258 | - gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi); | ||
259 | + /* Restart with exclusive lock. */ | ||
260 | + gen_helper_exit_atomic(cpu_env); | ||
261 | + ctx->base.is_jmp = DISAS_NORETURN; | ||
349 | } | 262 | } |
350 | } | 263 | -#else |
351 | - if (orig_sizemask & 1) { | 264 | - /* Restart with exclusive lock. */ |
352 | + if ((orig_typemask & 6) == dh_typecode_i64) { | 265 | - gen_helper_exit_atomic(cpu_env); |
353 | /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. | 266 | - ctx->base.is_jmp = DISAS_NORETURN; |
354 | Note that describing these as TCGv_i64 eliminates an unnecessary | 267 | -#endif |
355 | zero-extension that tcg_gen_concat_i32_i64 would create. */ | 268 | tcg_temp_free(EA); |
356 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | 269 | - tcg_temp_free_i32(oi); |
357 | } | 270 | } else { |
358 | #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 | 271 | TCGLabel *lab_fail = gen_new_label(); |
359 | for (i = 0; i < nargs; ++i) { | 272 | TCGLabel *lab_over = gen_new_label(); |
360 | - int is_64bit = sizemask & (1 << (i+1)*2); | ||
361 | - if (!is_64bit) { | ||
362 | + int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
363 | + bool is_32bit = (argtype & ~1) == dh_typecode_i32; | ||
364 | + | ||
365 | + if (is_32bit) { | ||
366 | tcg_temp_free_internal(args[i]); | ||
367 | } | ||
368 | } | ||
369 | -- | 273 | -- |
370 | 2.25.1 | 274 | 2.17.2 |
371 | 275 | ||
372 | 276 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | We're going to change how to look up the call flags from a TCGop, | ||
2 | so extract it as a helper. | ||
3 | 1 | ||
4 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/tcg-internal.h | 5 +++++ | ||
9 | tcg/optimize.c | 3 ++- | ||
10 | tcg/tcg.c | 14 ++++++-------- | ||
11 | 3 files changed, 13 insertions(+), 9 deletions(-) | ||
12 | |||
13 | diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tcg/tcg-internal.h | ||
16 | +++ b/tcg/tcg-internal.h | ||
17 | @@ -XXX,XX +XXX,XX @@ bool tcg_region_alloc(TCGContext *s); | ||
18 | void tcg_region_initial_alloc(TCGContext *s); | ||
19 | void tcg_region_prologue_set(TCGContext *s); | ||
20 | |||
21 | +static inline unsigned tcg_call_flags(TCGOp *op) | ||
22 | +{ | ||
23 | + return op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; | ||
24 | +} | ||
25 | + | ||
26 | #endif /* TCG_INTERNAL_H */ | ||
27 | diff --git a/tcg/optimize.c b/tcg/optimize.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/tcg/optimize.c | ||
30 | +++ b/tcg/optimize.c | ||
31 | @@ -XXX,XX +XXX,XX @@ | ||
32 | |||
33 | #include "qemu/osdep.h" | ||
34 | #include "tcg/tcg-op.h" | ||
35 | +#include "tcg-internal.h" | ||
36 | |||
37 | #define CASE_OP_32_64(x) \ | ||
38 | glue(glue(case INDEX_op_, x), _i32): \ | ||
39 | @@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s) | ||
40 | break; | ||
41 | |||
42 | case INDEX_op_call: | ||
43 | - if (!(op->args[nb_oargs + nb_iargs + 1] | ||
44 | + if (!(tcg_call_flags(op) | ||
45 | & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { | ||
46 | for (i = 0; i < nb_globals; i++) { | ||
47 | if (test_bit(i, temps_used.l)) { | ||
48 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/tcg/tcg.c | ||
51 | +++ b/tcg/tcg.c | ||
52 | @@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs) | ||
53 | nb_cargs = def->nb_cargs; | ||
54 | |||
55 | /* function name, flags, out args */ | ||
56 | - col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, | ||
57 | + col += qemu_log(" %s %s,$0x%x,$%d", def->name, | ||
58 | tcg_find_helper(s, op->args[nb_oargs + nb_iargs]), | ||
59 | - op->args[nb_oargs + nb_iargs + 1], nb_oargs); | ||
60 | + tcg_call_flags(op), nb_oargs); | ||
61 | for (i = 0; i < nb_oargs; i++) { | ||
62 | col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf), | ||
63 | op->args[i])); | ||
64 | @@ -XXX,XX +XXX,XX @@ static void reachable_code_pass(TCGContext *s) | ||
65 | QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { | ||
66 | bool remove = dead; | ||
67 | TCGLabel *label; | ||
68 | - int call_flags; | ||
69 | |||
70 | switch (op->opc) { | ||
71 | case INDEX_op_set_label: | ||
72 | @@ -XXX,XX +XXX,XX @@ static void reachable_code_pass(TCGContext *s) | ||
73 | |||
74 | case INDEX_op_call: | ||
75 | /* Notice noreturn helper calls, raising exceptions. */ | ||
76 | - call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; | ||
77 | - if (call_flags & TCG_CALL_NO_RETURN) { | ||
78 | + if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { | ||
79 | dead = true; | ||
80 | } | ||
81 | break; | ||
82 | @@ -XXX,XX +XXX,XX @@ static void liveness_pass_1(TCGContext *s) | ||
83 | |||
84 | nb_oargs = TCGOP_CALLO(op); | ||
85 | nb_iargs = TCGOP_CALLI(op); | ||
86 | - call_flags = op->args[nb_oargs + nb_iargs + 1]; | ||
87 | + call_flags = tcg_call_flags(op); | ||
88 | |||
89 | /* pure functions can be removed if their result is unused */ | ||
90 | if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { | ||
91 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) | ||
92 | if (opc == INDEX_op_call) { | ||
93 | nb_oargs = TCGOP_CALLO(op); | ||
94 | nb_iargs = TCGOP_CALLI(op); | ||
95 | - call_flags = op->args[nb_oargs + nb_iargs + 1]; | ||
96 | + call_flags = tcg_call_flags(op); | ||
97 | } else { | ||
98 | nb_iargs = def->nb_iargs; | ||
99 | nb_oargs = def->nb_oargs; | ||
100 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
101 | TCGRegSet allocated_regs; | ||
102 | |||
103 | func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs]; | ||
104 | - flags = op->args[nb_oargs + nb_iargs + 1]; | ||
105 | + flags = tcg_call_flags(op); | ||
106 | |||
107 | nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); | ||
108 | if (nb_regs > nb_iargs) { | ||
109 | -- | ||
110 | 2.25.1 | ||
111 | |||
112 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Let the compiler decide on inlining. | ||
2 | 1 | ||
3 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | accel/tcg/plugin-gen.c | 12 +++++------- | ||
8 | 1 file changed, 5 insertions(+), 7 deletions(-) | ||
9 | |||
10 | diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/accel/tcg/plugin-gen.c | ||
13 | +++ b/accel/tcg/plugin-gen.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static void gen_empty_mem_helper(void) | ||
15 | tcg_temp_free_ptr(ptr); | ||
16 | } | ||
17 | |||
18 | -static inline | ||
19 | -void gen_plugin_cb_start(enum plugin_gen_from from, | ||
20 | - enum plugin_gen_cb type, unsigned wr) | ||
21 | +static void gen_plugin_cb_start(enum plugin_gen_from from, | ||
22 | + enum plugin_gen_cb type, unsigned wr) | ||
23 | { | ||
24 | TCGOp *op; | ||
25 | |||
26 | @@ -XXX,XX +XXX,XX @@ static void gen_wrapped(enum plugin_gen_from from, | ||
27 | tcg_gen_plugin_cb_end(); | ||
28 | } | ||
29 | |||
30 | -static inline void plugin_gen_empty_callback(enum plugin_gen_from from) | ||
31 | +static void plugin_gen_empty_callback(enum plugin_gen_from from) | ||
32 | { | ||
33 | switch (from) { | ||
34 | case PLUGIN_GEN_AFTER_INSN: | ||
35 | @@ -XXX,XX +XXX,XX @@ static bool op_rw(const TCGOp *op, const struct qemu_plugin_dyn_cb *cb) | ||
36 | return !!(cb->rw & (w + 1)); | ||
37 | } | ||
38 | |||
39 | -static inline | ||
40 | -void inject_cb_type(const GArray *cbs, TCGOp *begin_op, inject_fn inject, | ||
41 | - op_ok_fn ok) | ||
42 | +static void inject_cb_type(const GArray *cbs, TCGOp *begin_op, | ||
43 | + inject_fn inject, op_ok_fn ok) | ||
44 | { | ||
45 | TCGOp *end_op; | ||
46 | TCGOp *op; | ||
47 | -- | ||
48 | 2.25.1 | ||
49 | |||
50 | diff view generated by jsdifflib |
1 | This removes all of the problems with unaligned accesses | 1 | Reviewed-by: David Hildenbrand <david@redhat.com> |
---|---|---|---|
2 | to the bytecode stream. | ||
3 | |||
4 | With an 8-bit opcode at the bottom, we have 24 bits remaining, | ||
5 | which are generally split into 6 4-bit slots. This fits well | ||
6 | with the maximum length opcodes, e.g. INDEX_op_add2_i32, which | ||
7 | have 6 register operands. | ||
8 | |||
9 | We have, in previous patches, rearranged things such that there | ||
10 | are no operations with a label which have more than one other | ||
11 | operand. Which leaves us with a 20-bit field in which to encode | ||
12 | a label, giving us a maximum TB size of 512k -- easily large. | ||
13 | |||
14 | Change the INDEX_op_tci_movi_{i32,i64} opcodes to tci_mov[il]. | ||
15 | The former puts the immediate in the upper 20 bits of the insn, | ||
16 | like we do for the label displacement. The later uses a label | ||
17 | to reference an entry in the constant pool. Thus, in the worst | ||
18 | case we still have a single memory reference for any constant, | ||
19 | but now the constants are out-of-line of the bytecode and can | ||
20 | be shared between different moves saving space. | ||
21 | |||
22 | Change INDEX_op_call to use a label to reference a pair of | ||
23 | pointers in the constant pool. This removes the only slightly | ||
24 | dodgy link with the layout of struct TCGHelperInfo. | ||
25 | |||
26 | The re-encode cannot be done in pieces. | ||
27 | |||
28 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
29 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
30 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
31 | --- | 3 | --- |
32 | include/tcg/tcg-opc.h | 4 +- | 4 | target/s390x/mem_helper.c | 92 +++++++++++++++++---------------------- |
33 | tcg/tci/tcg-target.h | 3 +- | 5 | 1 file changed, 41 insertions(+), 51 deletions(-) |
34 | tcg/tci.c | 539 +++++++++++++++------------------------ | ||
35 | tcg/tci/tcg-target.c.inc | 379 ++++++++++++--------------- | ||
36 | tcg/tci/README | 20 +- | ||
37 | 5 files changed, 383 insertions(+), 562 deletions(-) | ||
38 | 6 | ||
39 | diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h | 7 | diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c |
40 | index XXXXXXX..XXXXXXX 100644 | 8 | index XXXXXXX..XXXXXXX 100644 |
41 | --- a/include/tcg/tcg-opc.h | 9 | --- a/target/s390x/mem_helper.c |
42 | +++ b/include/tcg/tcg-opc.h | 10 | +++ b/target/s390x/mem_helper.c |
43 | @@ -XXX,XX +XXX,XX @@ DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT) | 11 | @@ -XXX,XX +XXX,XX @@ |
44 | 12 | #include "exec/exec-all.h" | |
45 | #ifdef TCG_TARGET_INTERPRETER | 13 | #include "exec/cpu_ldst.h" |
46 | /* These opcodes are only for use between the tci generator and interpreter. */ | 14 | #include "qemu/int128.h" |
47 | -DEF(tci_movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT) | 15 | +#include "qemu/atomic128.h" |
48 | -DEF(tci_movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) | 16 | |
49 | +DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT) | 17 | #if !defined(CONFIG_USER_ONLY) |
50 | +DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT) | 18 | #include "hw/s390x/storage-keys.h" |
19 | @@ -XXX,XX +XXX,XX @@ static void do_cdsg(CPUS390XState *env, uint64_t addr, | ||
20 | bool fail; | ||
21 | |||
22 | if (parallel) { | ||
23 | -#ifndef CONFIG_ATOMIC128 | ||
24 | +#if !HAVE_CMPXCHG128 | ||
25 | cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
26 | #else | ||
27 | int mem_idx = cpu_mmu_index(env, false); | ||
28 | @@ -XXX,XX +XXX,XX @@ void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr, | ||
29 | static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, | ||
30 | uint64_t a2, bool parallel) | ||
31 | { | ||
32 | -#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128) | ||
33 | uint32_t mem_idx = cpu_mmu_index(env, false); | ||
34 | -#endif | ||
35 | uintptr_t ra = GETPC(); | ||
36 | uint32_t fc = extract32(env->regs[0], 0, 8); | ||
37 | uint32_t sc = extract32(env->regs[0], 8, 8); | ||
38 | @@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, | ||
39 | probe_write(env, a2, 0, mem_idx, ra); | ||
51 | #endif | 40 | #endif |
52 | 41 | ||
53 | #undef TLADDR_ARGS | 42 | - /* Note that the compare-and-swap is atomic, and the store is atomic, but |
54 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | 43 | - the complete operation is not. Therefore we do not need to assert serial |
55 | index XXXXXXX..XXXXXXX 100644 | 44 | - context in order to implement this. That said, restart early if we can't |
56 | --- a/tcg/tci/tcg-target.h | 45 | - support either operation that is supposed to be atomic. */ |
57 | +++ b/tcg/tci/tcg-target.h | 46 | + /* |
58 | @@ -XXX,XX +XXX,XX @@ | 47 | + * Note that the compare-and-swap is atomic, and the store is atomic, |
59 | #define TCG_TARGET_H | 48 | + * but the complete operation is not. Therefore we do not need to |
60 | 49 | + * assert serial context in order to implement this. That said, | |
61 | #define TCG_TARGET_INTERPRETER 1 | 50 | + * restart early if we can't support either operation that is supposed |
62 | -#define TCG_TARGET_INSN_UNIT_SIZE 1 | 51 | + * to be atomic. |
63 | +#define TCG_TARGET_INSN_UNIT_SIZE 4 | 52 | + */ |
64 | #define TCG_TARGET_TLB_DISPLACEMENT_BITS 32 | 53 | if (parallel) { |
65 | #define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) | 54 | - int mask = 0; |
66 | 55 | -#if !defined(CONFIG_ATOMIC64) | |
67 | @@ -XXX,XX +XXX,XX @@ typedef enum { | 56 | - mask = -8; |
68 | #define TCG_TARGET_STACK_ALIGN 8 | 57 | -#elif !defined(CONFIG_ATOMIC128) |
69 | 58 | - mask = -16; | |
70 | #define HAVE_TCG_QEMU_TB_EXEC | 59 | + uint32_t max = 2; |
71 | +#define TCG_TARGET_NEED_POOL_LABELS | 60 | +#ifdef CONFIG_ATOMIC64 |
72 | 61 | + max = 3; | |
73 | /* We could notice __i386__ or __s390x__ and reduce the barriers depending | 62 | #endif |
74 | on the host. But if you want performance, you use the normal backend. | 63 | - if (((4 << fc) | (1 << sc)) & mask) { |
75 | diff --git a/tcg/tci.c b/tcg/tci.c | 64 | + if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) || |
76 | index XXXXXXX..XXXXXXX 100644 | 65 | + (HAVE_ATOMIC128 ? 0 : sc > max)) { |
77 | --- a/tcg/tci.c | 66 | cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); |
78 | +++ b/tcg/tci.c | 67 | } |
79 | @@ -XXX,XX +XXX,XX @@ static uint64_t tci_uint64(uint32_t high, uint32_t low) | 68 | } |
80 | return ((uint64_t)high << 32) + low; | 69 | @@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, |
81 | } | 70 | Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]); |
82 | 71 | Int128 ov; | |
83 | -/* Read constant byte from bytecode. */ | 72 | |
84 | -static uint8_t tci_read_b(const uint8_t **tb_ptr) | 73 | - if (parallel) { |
85 | -{ | 74 | -#ifdef CONFIG_ATOMIC128 |
86 | - return *(tb_ptr[0]++); | 75 | - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); |
87 | -} | 76 | - ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra); |
88 | - | 77 | - cc = !int128_eq(ov, cv); |
89 | -/* Read register number from bytecode. */ | 78 | -#else |
90 | -static TCGReg tci_read_r(const uint8_t **tb_ptr) | 79 | - /* Note that we asserted !parallel above. */ |
91 | -{ | 80 | - g_assert_not_reached(); |
92 | - uint8_t regno = tci_read_b(tb_ptr); | ||
93 | - tci_assert(regno < TCG_TARGET_NB_REGS); | ||
94 | - return regno; | ||
95 | -} | ||
96 | - | ||
97 | -/* Read constant (native size) from bytecode. */ | ||
98 | -static tcg_target_ulong tci_read_i(const uint8_t **tb_ptr) | ||
99 | -{ | ||
100 | - tcg_target_ulong value = *(const tcg_target_ulong *)(*tb_ptr); | ||
101 | - *tb_ptr += sizeof(value); | ||
102 | - return value; | ||
103 | -} | ||
104 | - | ||
105 | -/* Read unsigned constant (32 bit) from bytecode. */ | ||
106 | -static uint32_t tci_read_i32(const uint8_t **tb_ptr) | ||
107 | -{ | ||
108 | - uint32_t value = *(const uint32_t *)(*tb_ptr); | ||
109 | - *tb_ptr += sizeof(value); | ||
110 | - return value; | ||
111 | -} | ||
112 | - | ||
113 | -/* Read signed constant (32 bit) from bytecode. */ | ||
114 | -static int32_t tci_read_s32(const uint8_t **tb_ptr) | ||
115 | -{ | ||
116 | - int32_t value = *(const int32_t *)(*tb_ptr); | ||
117 | - *tb_ptr += sizeof(value); | ||
118 | - return value; | ||
119 | -} | ||
120 | - | ||
121 | -static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr) | ||
122 | -{ | ||
123 | - return tci_read_i(tb_ptr); | ||
124 | -} | ||
125 | - | ||
126 | /* | ||
127 | * Load sets of arguments all at once. The naming convention is: | ||
128 | * tci_args_<arguments> | ||
129 | @@ -XXX,XX +XXX,XX @@ static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr) | ||
130 | * s = signed ldst offset | ||
131 | */ | ||
132 | |||
133 | -static void check_size(const uint8_t *start, const uint8_t **tb_ptr) | ||
134 | +static void tci_args_l(uint32_t insn, const void *tb_ptr, void **l0) | ||
135 | { | ||
136 | - const uint8_t *old_code_ptr = start - 2; | ||
137 | - uint8_t op_size = old_code_ptr[1]; | ||
138 | - tci_assert(*tb_ptr == old_code_ptr + op_size); | ||
139 | + int diff = sextract32(insn, 12, 20); | ||
140 | + *l0 = diff ? (void *)tb_ptr + diff : NULL; | ||
141 | } | ||
142 | |||
143 | -static void tci_args_l(const uint8_t **tb_ptr, void **l0) | ||
144 | +static void tci_args_nl(uint32_t insn, const void *tb_ptr, | ||
145 | + uint8_t *n0, void **l1) | ||
146 | { | ||
147 | - const uint8_t *start = *tb_ptr; | ||
148 | - | ||
149 | - *l0 = (void *)tci_read_label(tb_ptr); | ||
150 | - | ||
151 | - check_size(start, tb_ptr); | ||
152 | + *n0 = extract32(insn, 8, 4); | ||
153 | + *l1 = sextract32(insn, 12, 20) + (void *)tb_ptr; | ||
154 | } | ||
155 | |||
156 | -static void tci_args_nll(const uint8_t **tb_ptr, uint8_t *n0, | ||
157 | - void **l1, void **l2) | ||
158 | +static void tci_args_rl(uint32_t insn, const void *tb_ptr, | ||
159 | + TCGReg *r0, void **l1) | ||
160 | { | ||
161 | - const uint8_t *start = *tb_ptr; | ||
162 | - | ||
163 | - *n0 = tci_read_b(tb_ptr); | ||
164 | - *l1 = (void *)tci_read_label(tb_ptr); | ||
165 | - *l2 = (void *)tci_read_label(tb_ptr); | ||
166 | - | ||
167 | - check_size(start, tb_ptr); | ||
168 | + *r0 = extract32(insn, 8, 4); | ||
169 | + *l1 = sextract32(insn, 12, 20) + (void *)tb_ptr; | ||
170 | } | ||
171 | |||
172 | -static void tci_args_rl(const uint8_t **tb_ptr, TCGReg *r0, void **l1) | ||
173 | +static void tci_args_rr(uint32_t insn, TCGReg *r0, TCGReg *r1) | ||
174 | { | ||
175 | - const uint8_t *start = *tb_ptr; | ||
176 | - | ||
177 | - *r0 = tci_read_r(tb_ptr); | ||
178 | - *l1 = (void *)tci_read_label(tb_ptr); | ||
179 | - | ||
180 | - check_size(start, tb_ptr); | ||
181 | + *r0 = extract32(insn, 8, 4); | ||
182 | + *r1 = extract32(insn, 12, 4); | ||
183 | } | ||
184 | |||
185 | -static void tci_args_rr(const uint8_t **tb_ptr, | ||
186 | - TCGReg *r0, TCGReg *r1) | ||
187 | +static void tci_args_ri(uint32_t insn, TCGReg *r0, tcg_target_ulong *i1) | ||
188 | { | ||
189 | - const uint8_t *start = *tb_ptr; | ||
190 | - | ||
191 | - *r0 = tci_read_r(tb_ptr); | ||
192 | - *r1 = tci_read_r(tb_ptr); | ||
193 | - | ||
194 | - check_size(start, tb_ptr); | ||
195 | + *r0 = extract32(insn, 8, 4); | ||
196 | + *i1 = sextract32(insn, 12, 20); | ||
197 | } | ||
198 | |||
199 | -static void tci_args_ri(const uint8_t **tb_ptr, | ||
200 | - TCGReg *r0, tcg_target_ulong *i1) | ||
201 | +static void tci_args_rrm(uint32_t insn, TCGReg *r0, | ||
202 | + TCGReg *r1, TCGMemOpIdx *m2) | ||
203 | { | ||
204 | - const uint8_t *start = *tb_ptr; | ||
205 | - | ||
206 | - *r0 = tci_read_r(tb_ptr); | ||
207 | - *i1 = tci_read_i32(tb_ptr); | ||
208 | - | ||
209 | - check_size(start, tb_ptr); | ||
210 | + *r0 = extract32(insn, 8, 4); | ||
211 | + *r1 = extract32(insn, 12, 4); | ||
212 | + *m2 = extract32(insn, 20, 12); | ||
213 | } | ||
214 | |||
215 | -#if TCG_TARGET_REG_BITS == 64 | ||
216 | -static void tci_args_rI(const uint8_t **tb_ptr, | ||
217 | - TCGReg *r0, tcg_target_ulong *i1) | ||
218 | +static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2) | ||
219 | { | ||
220 | - const uint8_t *start = *tb_ptr; | ||
221 | - | ||
222 | - *r0 = tci_read_r(tb_ptr); | ||
223 | - *i1 = tci_read_i(tb_ptr); | ||
224 | - | ||
225 | - check_size(start, tb_ptr); | ||
226 | -} | ||
227 | -#endif | 81 | -#endif |
228 | - | 82 | - } else { |
229 | -static void tci_args_rrm(const uint8_t **tb_ptr, | 83 | + if (!parallel) { |
230 | - TCGReg *r0, TCGReg *r1, TCGMemOpIdx *m2) | 84 | uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra); |
231 | -{ | 85 | uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra); |
232 | - const uint8_t *start = *tb_ptr; | 86 | |
233 | - | 87 | @@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, |
234 | - *r0 = tci_read_r(tb_ptr); | 88 | |
235 | - *r1 = tci_read_r(tb_ptr); | 89 | cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra); |
236 | - *m2 = tci_read_i32(tb_ptr); | 90 | cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra); |
237 | - | 91 | + } else if (HAVE_CMPXCHG128) { |
238 | - check_size(start, tb_ptr); | 92 | + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); |
239 | + *r0 = extract32(insn, 8, 4); | 93 | + ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra); |
240 | + *r1 = extract32(insn, 12, 4); | 94 | + cc = !int128_eq(ov, cv); |
241 | + *r2 = extract32(insn, 16, 4); | 95 | + } else { |
242 | } | 96 | + /* Note that we asserted !parallel above. */ |
243 | 97 | + g_assert_not_reached(); | |
244 | -static void tci_args_rrr(const uint8_t **tb_ptr, | ||
245 | - TCGReg *r0, TCGReg *r1, TCGReg *r2) | ||
246 | +static void tci_args_rrs(uint32_t insn, TCGReg *r0, TCGReg *r1, int32_t *i2) | ||
247 | { | ||
248 | - const uint8_t *start = *tb_ptr; | ||
249 | - | ||
250 | - *r0 = tci_read_r(tb_ptr); | ||
251 | - *r1 = tci_read_r(tb_ptr); | ||
252 | - *r2 = tci_read_r(tb_ptr); | ||
253 | - | ||
254 | - check_size(start, tb_ptr); | ||
255 | + *r0 = extract32(insn, 8, 4); | ||
256 | + *r1 = extract32(insn, 12, 4); | ||
257 | + *i2 = sextract32(insn, 16, 16); | ||
258 | } | ||
259 | |||
260 | -static void tci_args_rrs(const uint8_t **tb_ptr, | ||
261 | - TCGReg *r0, TCGReg *r1, int32_t *i2) | ||
262 | -{ | ||
263 | - const uint8_t *start = *tb_ptr; | ||
264 | - | ||
265 | - *r0 = tci_read_r(tb_ptr); | ||
266 | - *r1 = tci_read_r(tb_ptr); | ||
267 | - *i2 = tci_read_s32(tb_ptr); | ||
268 | - | ||
269 | - check_size(start, tb_ptr); | ||
270 | -} | ||
271 | - | ||
272 | -static void tci_args_rrrc(const uint8_t **tb_ptr, | ||
273 | +static void tci_args_rrrc(uint32_t insn, | ||
274 | TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGCond *c3) | ||
275 | { | ||
276 | - const uint8_t *start = *tb_ptr; | ||
277 | - | ||
278 | - *r0 = tci_read_r(tb_ptr); | ||
279 | - *r1 = tci_read_r(tb_ptr); | ||
280 | - *r2 = tci_read_r(tb_ptr); | ||
281 | - *c3 = tci_read_b(tb_ptr); | ||
282 | - | ||
283 | - check_size(start, tb_ptr); | ||
284 | + *r0 = extract32(insn, 8, 4); | ||
285 | + *r1 = extract32(insn, 12, 4); | ||
286 | + *r2 = extract32(insn, 16, 4); | ||
287 | + *c3 = extract32(insn, 20, 4); | ||
288 | } | ||
289 | |||
290 | -static void tci_args_rrrm(const uint8_t **tb_ptr, | ||
291 | +static void tci_args_rrrm(uint32_t insn, | ||
292 | TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGMemOpIdx *m3) | ||
293 | { | ||
294 | - const uint8_t *start = *tb_ptr; | ||
295 | - | ||
296 | - *r0 = tci_read_r(tb_ptr); | ||
297 | - *r1 = tci_read_r(tb_ptr); | ||
298 | - *r2 = tci_read_r(tb_ptr); | ||
299 | - *m3 = tci_read_i32(tb_ptr); | ||
300 | - | ||
301 | - check_size(start, tb_ptr); | ||
302 | + *r0 = extract32(insn, 8, 4); | ||
303 | + *r1 = extract32(insn, 12, 4); | ||
304 | + *r2 = extract32(insn, 16, 4); | ||
305 | + *m3 = extract32(insn, 20, 12); | ||
306 | } | ||
307 | |||
308 | -static void tci_args_rrrbb(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1, | ||
309 | +static void tci_args_rrrbb(uint32_t insn, TCGReg *r0, TCGReg *r1, | ||
310 | TCGReg *r2, uint8_t *i3, uint8_t *i4) | ||
311 | { | ||
312 | - const uint8_t *start = *tb_ptr; | ||
313 | - | ||
314 | - *r0 = tci_read_r(tb_ptr); | ||
315 | - *r1 = tci_read_r(tb_ptr); | ||
316 | - *r2 = tci_read_r(tb_ptr); | ||
317 | - *i3 = tci_read_b(tb_ptr); | ||
318 | - *i4 = tci_read_b(tb_ptr); | ||
319 | - | ||
320 | - check_size(start, tb_ptr); | ||
321 | + *r0 = extract32(insn, 8, 4); | ||
322 | + *r1 = extract32(insn, 12, 4); | ||
323 | + *r2 = extract32(insn, 16, 4); | ||
324 | + *i3 = extract32(insn, 20, 6); | ||
325 | + *i4 = extract32(insn, 26, 6); | ||
326 | } | ||
327 | |||
328 | -static void tci_args_rrrrm(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1, | ||
329 | - TCGReg *r2, TCGReg *r3, TCGMemOpIdx *m4) | ||
330 | +static void tci_args_rrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1, | ||
331 | + TCGReg *r2, TCGReg *r3, TCGReg *r4) | ||
332 | { | ||
333 | - const uint8_t *start = *tb_ptr; | ||
334 | - | ||
335 | - *r0 = tci_read_r(tb_ptr); | ||
336 | - *r1 = tci_read_r(tb_ptr); | ||
337 | - *r2 = tci_read_r(tb_ptr); | ||
338 | - *r3 = tci_read_r(tb_ptr); | ||
339 | - *m4 = tci_read_i32(tb_ptr); | ||
340 | - | ||
341 | - check_size(start, tb_ptr); | ||
342 | + *r0 = extract32(insn, 8, 4); | ||
343 | + *r1 = extract32(insn, 12, 4); | ||
344 | + *r2 = extract32(insn, 16, 4); | ||
345 | + *r3 = extract32(insn, 20, 4); | ||
346 | + *r4 = extract32(insn, 24, 4); | ||
347 | } | ||
348 | |||
349 | #if TCG_TARGET_REG_BITS == 32 | ||
350 | -static void tci_args_rrrr(const uint8_t **tb_ptr, | ||
351 | +static void tci_args_rrrr(uint32_t insn, | ||
352 | TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3) | ||
353 | { | ||
354 | - const uint8_t *start = *tb_ptr; | ||
355 | - | ||
356 | - *r0 = tci_read_r(tb_ptr); | ||
357 | - *r1 = tci_read_r(tb_ptr); | ||
358 | - *r2 = tci_read_r(tb_ptr); | ||
359 | - *r3 = tci_read_r(tb_ptr); | ||
360 | - | ||
361 | - check_size(start, tb_ptr); | ||
362 | + *r0 = extract32(insn, 8, 4); | ||
363 | + *r1 = extract32(insn, 12, 4); | ||
364 | + *r2 = extract32(insn, 16, 4); | ||
365 | + *r3 = extract32(insn, 20, 4); | ||
366 | } | ||
367 | |||
368 | -static void tci_args_rrrrrc(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1, | ||
369 | +static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1, | ||
370 | TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5) | ||
371 | { | ||
372 | - const uint8_t *start = *tb_ptr; | ||
373 | - | ||
374 | - *r0 = tci_read_r(tb_ptr); | ||
375 | - *r1 = tci_read_r(tb_ptr); | ||
376 | - *r2 = tci_read_r(tb_ptr); | ||
377 | - *r3 = tci_read_r(tb_ptr); | ||
378 | - *r4 = tci_read_r(tb_ptr); | ||
379 | - *c5 = tci_read_b(tb_ptr); | ||
380 | - | ||
381 | - check_size(start, tb_ptr); | ||
382 | + *r0 = extract32(insn, 8, 4); | ||
383 | + *r1 = extract32(insn, 12, 4); | ||
384 | + *r2 = extract32(insn, 16, 4); | ||
385 | + *r3 = extract32(insn, 20, 4); | ||
386 | + *r4 = extract32(insn, 24, 4); | ||
387 | + *c5 = extract32(insn, 28, 4); | ||
388 | } | ||
389 | |||
390 | -static void tci_args_rrrrrr(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1, | ||
391 | +static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1, | ||
392 | TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGReg *r5) | ||
393 | { | ||
394 | - const uint8_t *start = *tb_ptr; | ||
395 | - | ||
396 | - *r0 = tci_read_r(tb_ptr); | ||
397 | - *r1 = tci_read_r(tb_ptr); | ||
398 | - *r2 = tci_read_r(tb_ptr); | ||
399 | - *r3 = tci_read_r(tb_ptr); | ||
400 | - *r4 = tci_read_r(tb_ptr); | ||
401 | - *r5 = tci_read_r(tb_ptr); | ||
402 | - | ||
403 | - check_size(start, tb_ptr); | ||
404 | + *r0 = extract32(insn, 8, 4); | ||
405 | + *r1 = extract32(insn, 12, 4); | ||
406 | + *r2 = extract32(insn, 16, 4); | ||
407 | + *r3 = extract32(insn, 20, 4); | ||
408 | + *r4 = extract32(insn, 24, 4); | ||
409 | + *r5 = extract32(insn, 28, 4); | ||
410 | } | ||
411 | #endif | ||
412 | |||
413 | @@ -XXX,XX +XXX,XX @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) | ||
414 | uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
415 | const void *v_tb_ptr) | ||
416 | { | ||
417 | - const uint8_t *tb_ptr = v_tb_ptr; | ||
418 | + const uint32_t *tb_ptr = v_tb_ptr; | ||
419 | tcg_target_ulong regs[TCG_TARGET_NB_REGS]; | ||
420 | uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE) | ||
421 | / sizeof(uint64_t)]; | ||
422 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
423 | tci_assert(tb_ptr); | ||
424 | |||
425 | for (;;) { | ||
426 | - TCGOpcode opc = tb_ptr[0]; | ||
427 | - TCGReg r0, r1, r2, r3; | ||
428 | + uint32_t insn; | ||
429 | + TCGOpcode opc; | ||
430 | + TCGReg r0, r1, r2, r3, r4; | ||
431 | tcg_target_ulong t1; | ||
432 | TCGCond condition; | ||
433 | target_ulong taddr; | ||
434 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
435 | uint32_t tmp32; | ||
436 | uint64_t tmp64; | ||
437 | #if TCG_TARGET_REG_BITS == 32 | ||
438 | - TCGReg r4, r5; | ||
439 | + TCGReg r5; | ||
440 | uint64_t T1, T2; | ||
441 | #endif | ||
442 | TCGMemOpIdx oi; | ||
443 | int32_t ofs; | ||
444 | - void *ptr, *cif; | ||
445 | + void *ptr; | ||
446 | |||
447 | - /* Skip opcode and size entry. */ | ||
448 | - tb_ptr += 2; | ||
449 | + insn = *tb_ptr++; | ||
450 | + opc = extract32(insn, 0, 8); | ||
451 | |||
452 | switch (opc) { | ||
453 | case INDEX_op_call: | ||
454 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
455 | } | ||
456 | } | 98 | } |
457 | 99 | ||
458 | - tci_args_nll(&tb_ptr, &len, &ptr, &cif); | 100 | env->regs[r3 + 0] = int128_gethi(ov); |
459 | + tci_args_nl(insn, tb_ptr, &len, &ptr); | 101 | @@ -XXX,XX +XXX,XX @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, |
460 | 102 | cpu_stq_data_ra(env, a2, svh, ra); | |
461 | /* Helper functions may need to access the "return address" */ | ||
462 | tci_tb_ptr = (uintptr_t)tb_ptr; | ||
463 | |||
464 | - ffi_call(cif, ptr, stack, call_slots); | ||
465 | + { | ||
466 | + void **pptr = ptr; | ||
467 | + ffi_call(pptr[1], pptr[0], stack, call_slots); | ||
468 | + } | ||
469 | |||
470 | /* Any result winds up "left-aligned" in the stack[0] slot. */ | ||
471 | switch (len) { | ||
472 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
473 | break; | 103 | break; |
474 | 104 | case 4: | |
475 | case INDEX_op_br: | 105 | - if (parallel) { |
476 | - tci_args_l(&tb_ptr, &ptr); | 106 | -#ifdef CONFIG_ATOMIC128 |
477 | + tci_args_l(insn, tb_ptr, &ptr); | 107 | + if (!parallel) { |
478 | tb_ptr = ptr; | 108 | + cpu_stq_data_ra(env, a2 + 0, svh, ra); |
479 | continue; | 109 | + cpu_stq_data_ra(env, a2 + 8, svl, ra); |
480 | case INDEX_op_setcond_i32: | 110 | + } else if (HAVE_ATOMIC128) { |
481 | - tci_args_rrrc(&tb_ptr, &r0, &r1, &r2, &condition); | 111 | TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); |
482 | + tci_args_rrrc(insn, &r0, &r1, &r2, &condition); | 112 | Int128 sv = int128_make128(svl, svh); |
483 | regs[r0] = tci_compare32(regs[r1], regs[r2], condition); | 113 | helper_atomic_sto_be_mmu(env, a2, sv, oi, ra); |
484 | break; | 114 | -#else |
485 | #if TCG_TARGET_REG_BITS == 32 | 115 | + } else { |
486 | case INDEX_op_setcond2_i32: | 116 | /* Note that we asserted !parallel above. */ |
487 | - tci_args_rrrrrc(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &condition); | 117 | g_assert_not_reached(); |
488 | + tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); | 118 | -#endif |
489 | T1 = tci_uint64(regs[r2], regs[r1]); | 119 | - } else { |
490 | T2 = tci_uint64(regs[r4], regs[r3]); | 120 | - cpu_stq_data_ra(env, a2 + 0, svh, ra); |
491 | regs[r0] = tci_compare64(T1, T2, condition); | 121 | - cpu_stq_data_ra(env, a2 + 8, svl, ra); |
492 | break; | ||
493 | #elif TCG_TARGET_REG_BITS == 64 | ||
494 | case INDEX_op_setcond_i64: | ||
495 | - tci_args_rrrc(&tb_ptr, &r0, &r1, &r2, &condition); | ||
496 | + tci_args_rrrc(insn, &r0, &r1, &r2, &condition); | ||
497 | regs[r0] = tci_compare64(regs[r1], regs[r2], condition); | ||
498 | break; | ||
499 | #endif | ||
500 | CASE_32_64(mov) | ||
501 | - tci_args_rr(&tb_ptr, &r0, &r1); | ||
502 | + tci_args_rr(insn, &r0, &r1); | ||
503 | regs[r0] = regs[r1]; | ||
504 | break; | ||
505 | - case INDEX_op_tci_movi_i32: | ||
506 | - tci_args_ri(&tb_ptr, &r0, &t1); | ||
507 | + case INDEX_op_tci_movi: | ||
508 | + tci_args_ri(insn, &r0, &t1); | ||
509 | regs[r0] = t1; | ||
510 | break; | ||
511 | + case INDEX_op_tci_movl: | ||
512 | + tci_args_rl(insn, tb_ptr, &r0, &ptr); | ||
513 | + regs[r0] = *(tcg_target_ulong *)ptr; | ||
514 | + break; | ||
515 | |||
516 | /* Load/store operations (32 bit). */ | ||
517 | |||
518 | CASE_32_64(ld8u) | ||
519 | - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); | ||
520 | + tci_args_rrs(insn, &r0, &r1, &ofs); | ||
521 | ptr = (void *)(regs[r1] + ofs); | ||
522 | regs[r0] = *(uint8_t *)ptr; | ||
523 | break; | ||
524 | CASE_32_64(ld8s) | ||
525 | - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); | ||
526 | + tci_args_rrs(insn, &r0, &r1, &ofs); | ||
527 | ptr = (void *)(regs[r1] + ofs); | ||
528 | regs[r0] = *(int8_t *)ptr; | ||
529 | break; | ||
530 | CASE_32_64(ld16u) | ||
531 | - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); | ||
532 | + tci_args_rrs(insn, &r0, &r1, &ofs); | ||
533 | ptr = (void *)(regs[r1] + ofs); | ||
534 | regs[r0] = *(uint16_t *)ptr; | ||
535 | break; | ||
536 | CASE_32_64(ld16s) | ||
537 | - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); | ||
538 | + tci_args_rrs(insn, &r0, &r1, &ofs); | ||
539 | ptr = (void *)(regs[r1] + ofs); | ||
540 | regs[r0] = *(int16_t *)ptr; | ||
541 | break; | ||
542 | case INDEX_op_ld_i32: | ||
543 | CASE_64(ld32u) | ||
544 | - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); | ||
545 | + tci_args_rrs(insn, &r0, &r1, &ofs); | ||
546 | ptr = (void *)(regs[r1] + ofs); | ||
547 | regs[r0] = *(uint32_t *)ptr; | ||
548 | break; | ||
549 | CASE_32_64(st8) | ||
550 | - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); | ||
551 | + tci_args_rrs(insn, &r0, &r1, &ofs); | ||
552 | ptr = (void *)(regs[r1] + ofs); | ||
553 | *(uint8_t *)ptr = regs[r0]; | ||
554 | break; | ||
555 | CASE_32_64(st16) | ||
556 | - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); | ||
557 | + tci_args_rrs(insn, &r0, &r1, &ofs); | ||
558 | ptr = (void *)(regs[r1] + ofs); | ||
559 | *(uint16_t *)ptr = regs[r0]; | ||
560 | break; | ||
561 | case INDEX_op_st_i32: | ||
562 | CASE_64(st32) | ||
563 | - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); | ||
564 | + tci_args_rrs(insn, &r0, &r1, &ofs); | ||
565 | ptr = (void *)(regs[r1] + ofs); | ||
566 | *(uint32_t *)ptr = regs[r0]; | ||
567 | break; | ||
568 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
569 | /* Arithmetic operations (mixed 32/64 bit). */ | ||
570 | |||
571 | CASE_32_64(add) | ||
572 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
573 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
574 | regs[r0] = regs[r1] + regs[r2]; | ||
575 | break; | ||
576 | CASE_32_64(sub) | ||
577 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
578 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
579 | regs[r0] = regs[r1] - regs[r2]; | ||
580 | break; | ||
581 | CASE_32_64(mul) | ||
582 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
583 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
584 | regs[r0] = regs[r1] * regs[r2]; | ||
585 | break; | ||
586 | CASE_32_64(and) | ||
587 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
588 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
589 | regs[r0] = regs[r1] & regs[r2]; | ||
590 | break; | ||
591 | CASE_32_64(or) | ||
592 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
593 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
594 | regs[r0] = regs[r1] | regs[r2]; | ||
595 | break; | ||
596 | CASE_32_64(xor) | ||
597 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
598 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
599 | regs[r0] = regs[r1] ^ regs[r2]; | ||
600 | break; | ||
601 | |||
602 | /* Arithmetic operations (32 bit). */ | ||
603 | |||
604 | case INDEX_op_div_i32: | ||
605 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
606 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
607 | regs[r0] = (int32_t)regs[r1] / (int32_t)regs[r2]; | ||
608 | break; | ||
609 | case INDEX_op_divu_i32: | ||
610 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
611 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
612 | regs[r0] = (uint32_t)regs[r1] / (uint32_t)regs[r2]; | ||
613 | break; | ||
614 | case INDEX_op_rem_i32: | ||
615 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
616 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
617 | regs[r0] = (int32_t)regs[r1] % (int32_t)regs[r2]; | ||
618 | break; | ||
619 | case INDEX_op_remu_i32: | ||
620 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
621 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
622 | regs[r0] = (uint32_t)regs[r1] % (uint32_t)regs[r2]; | ||
623 | break; | ||
624 | |||
625 | /* Shift/rotate operations (32 bit). */ | ||
626 | |||
627 | case INDEX_op_shl_i32: | ||
628 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
629 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
630 | regs[r0] = (uint32_t)regs[r1] << (regs[r2] & 31); | ||
631 | break; | ||
632 | case INDEX_op_shr_i32: | ||
633 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
634 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
635 | regs[r0] = (uint32_t)regs[r1] >> (regs[r2] & 31); | ||
636 | break; | ||
637 | case INDEX_op_sar_i32: | ||
638 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
639 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
640 | regs[r0] = (int32_t)regs[r1] >> (regs[r2] & 31); | ||
641 | break; | ||
642 | #if TCG_TARGET_HAS_rot_i32 | ||
643 | case INDEX_op_rotl_i32: | ||
644 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
645 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
646 | regs[r0] = rol32(regs[r1], regs[r2] & 31); | ||
647 | break; | ||
648 | case INDEX_op_rotr_i32: | ||
649 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
650 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
651 | regs[r0] = ror32(regs[r1], regs[r2] & 31); | ||
652 | break; | ||
653 | #endif | ||
654 | #if TCG_TARGET_HAS_deposit_i32 | ||
655 | case INDEX_op_deposit_i32: | ||
656 | - tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len); | ||
657 | + tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); | ||
658 | regs[r0] = deposit32(regs[r1], pos, len, regs[r2]); | ||
659 | break; | ||
660 | #endif | ||
661 | case INDEX_op_brcond_i32: | ||
662 | - tci_args_rl(&tb_ptr, &r0, &ptr); | ||
663 | + tci_args_rl(insn, tb_ptr, &r0, &ptr); | ||
664 | if ((uint32_t)regs[r0]) { | ||
665 | tb_ptr = ptr; | ||
666 | } | 122 | } |
667 | break; | 123 | break; |
668 | #if TCG_TARGET_REG_BITS == 32 | 124 | default: |
669 | case INDEX_op_add2_i32: | 125 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel) |
670 | - tci_args_rrrrrr(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &r5); | 126 | uintptr_t ra = GETPC(); |
671 | + tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); | 127 | uint64_t hi, lo; |
672 | T1 = tci_uint64(regs[r3], regs[r2]); | 128 | |
673 | T2 = tci_uint64(regs[r5], regs[r4]); | 129 | - if (parallel) { |
674 | tci_write_reg64(regs, r1, r0, T1 + T2); | 130 | -#ifndef CONFIG_ATOMIC128 |
675 | break; | 131 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); |
676 | case INDEX_op_sub2_i32: | 132 | -#else |
677 | - tci_args_rrrrrr(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &r5); | 133 | + if (!parallel) { |
678 | + tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); | 134 | + check_alignment(env, addr, 16, ra); |
679 | T1 = tci_uint64(regs[r3], regs[r2]); | 135 | + hi = cpu_ldq_data_ra(env, addr + 0, ra); |
680 | T2 = tci_uint64(regs[r5], regs[r4]); | 136 | + lo = cpu_ldq_data_ra(env, addr + 8, ra); |
681 | tci_write_reg64(regs, r1, r0, T1 - T2); | 137 | + } else if (HAVE_ATOMIC128) { |
682 | break; | 138 | int mem_idx = cpu_mmu_index(env, false); |
683 | case INDEX_op_mulu2_i32: | 139 | TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); |
684 | - tci_args_rrrr(&tb_ptr, &r0, &r1, &r2, &r3); | 140 | Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); |
685 | + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); | 141 | hi = int128_gethi(v); |
686 | tci_write_reg64(regs, r1, r0, (uint64_t)regs[r2] * regs[r3]); | 142 | lo = int128_getlo(v); |
687 | break; | 143 | -#endif |
688 | #endif /* TCG_TARGET_REG_BITS == 32 */ | 144 | } else { |
689 | #if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64 | 145 | - check_alignment(env, addr, 16, ra); |
690 | CASE_32_64(ext8s) | ||
691 | - tci_args_rr(&tb_ptr, &r0, &r1); | ||
692 | + tci_args_rr(insn, &r0, &r1); | ||
693 | regs[r0] = (int8_t)regs[r1]; | ||
694 | break; | ||
695 | #endif | ||
696 | #if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 | ||
697 | CASE_32_64(ext16s) | ||
698 | - tci_args_rr(&tb_ptr, &r0, &r1); | ||
699 | + tci_args_rr(insn, &r0, &r1); | ||
700 | regs[r0] = (int16_t)regs[r1]; | ||
701 | break; | ||
702 | #endif | ||
703 | #if TCG_TARGET_HAS_ext8u_i32 || TCG_TARGET_HAS_ext8u_i64 | ||
704 | CASE_32_64(ext8u) | ||
705 | - tci_args_rr(&tb_ptr, &r0, &r1); | ||
706 | + tci_args_rr(insn, &r0, &r1); | ||
707 | regs[r0] = (uint8_t)regs[r1]; | ||
708 | break; | ||
709 | #endif | ||
710 | #if TCG_TARGET_HAS_ext16u_i32 || TCG_TARGET_HAS_ext16u_i64 | ||
711 | CASE_32_64(ext16u) | ||
712 | - tci_args_rr(&tb_ptr, &r0, &r1); | ||
713 | + tci_args_rr(insn, &r0, &r1); | ||
714 | regs[r0] = (uint16_t)regs[r1]; | ||
715 | break; | ||
716 | #endif | ||
717 | #if TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64 | ||
718 | CASE_32_64(bswap16) | ||
719 | - tci_args_rr(&tb_ptr, &r0, &r1); | ||
720 | + tci_args_rr(insn, &r0, &r1); | ||
721 | regs[r0] = bswap16(regs[r1]); | ||
722 | break; | ||
723 | #endif | ||
724 | #if TCG_TARGET_HAS_bswap32_i32 || TCG_TARGET_HAS_bswap32_i64 | ||
725 | CASE_32_64(bswap32) | ||
726 | - tci_args_rr(&tb_ptr, &r0, &r1); | ||
727 | + tci_args_rr(insn, &r0, &r1); | ||
728 | regs[r0] = bswap32(regs[r1]); | ||
729 | break; | ||
730 | #endif | ||
731 | #if TCG_TARGET_HAS_not_i32 || TCG_TARGET_HAS_not_i64 | ||
732 | CASE_32_64(not) | ||
733 | - tci_args_rr(&tb_ptr, &r0, &r1); | ||
734 | + tci_args_rr(insn, &r0, &r1); | ||
735 | regs[r0] = ~regs[r1]; | ||
736 | break; | ||
737 | #endif | ||
738 | #if TCG_TARGET_HAS_neg_i32 || TCG_TARGET_HAS_neg_i64 | ||
739 | CASE_32_64(neg) | ||
740 | - tci_args_rr(&tb_ptr, &r0, &r1); | ||
741 | + tci_args_rr(insn, &r0, &r1); | ||
742 | regs[r0] = -regs[r1]; | ||
743 | break; | ||
744 | #endif | ||
745 | #if TCG_TARGET_REG_BITS == 64 | ||
746 | - case INDEX_op_tci_movi_i64: | ||
747 | - tci_args_rI(&tb_ptr, &r0, &t1); | ||
748 | - regs[r0] = t1; | ||
749 | - break; | ||
750 | - | 146 | - |
751 | /* Load/store operations (64 bit). */ | 147 | - hi = cpu_ldq_data_ra(env, addr + 0, ra); |
752 | 148 | - lo = cpu_ldq_data_ra(env, addr + 8, ra); | |
753 | case INDEX_op_ld32s_i64: | 149 | + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); |
754 | - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); | 150 | } |
755 | + tci_args_rrs(insn, &r0, &r1, &ofs); | 151 | |
756 | ptr = (void *)(regs[r1] + ofs); | 152 | env->retxl = lo; |
757 | regs[r0] = *(int32_t *)ptr; | 153 | @@ -XXX,XX +XXX,XX @@ static void do_stpq(CPUS390XState *env, uint64_t addr, |
758 | break; | ||
759 | case INDEX_op_ld_i64: | ||
760 | - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); | ||
761 | + tci_args_rrs(insn, &r0, &r1, &ofs); | ||
762 | ptr = (void *)(regs[r1] + ofs); | ||
763 | regs[r0] = *(uint64_t *)ptr; | ||
764 | break; | ||
765 | case INDEX_op_st_i64: | ||
766 | - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); | ||
767 | + tci_args_rrs(insn, &r0, &r1, &ofs); | ||
768 | ptr = (void *)(regs[r1] + ofs); | ||
769 | *(uint64_t *)ptr = regs[r0]; | ||
770 | break; | ||
771 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
772 | /* Arithmetic operations (64 bit). */ | ||
773 | |||
774 | case INDEX_op_div_i64: | ||
775 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
776 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
777 | regs[r0] = (int64_t)regs[r1] / (int64_t)regs[r2]; | ||
778 | break; | ||
779 | case INDEX_op_divu_i64: | ||
780 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
781 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
782 | regs[r0] = (uint64_t)regs[r1] / (uint64_t)regs[r2]; | ||
783 | break; | ||
784 | case INDEX_op_rem_i64: | ||
785 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
786 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
787 | regs[r0] = (int64_t)regs[r1] % (int64_t)regs[r2]; | ||
788 | break; | ||
789 | case INDEX_op_remu_i64: | ||
790 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
791 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
792 | regs[r0] = (uint64_t)regs[r1] % (uint64_t)regs[r2]; | ||
793 | break; | ||
794 | |||
795 | /* Shift/rotate operations (64 bit). */ | ||
796 | |||
797 | case INDEX_op_shl_i64: | ||
798 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
799 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
800 | regs[r0] = regs[r1] << (regs[r2] & 63); | ||
801 | break; | ||
802 | case INDEX_op_shr_i64: | ||
803 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
804 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
805 | regs[r0] = regs[r1] >> (regs[r2] & 63); | ||
806 | break; | ||
807 | case INDEX_op_sar_i64: | ||
808 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
809 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
810 | regs[r0] = (int64_t)regs[r1] >> (regs[r2] & 63); | ||
811 | break; | ||
812 | #if TCG_TARGET_HAS_rot_i64 | ||
813 | case INDEX_op_rotl_i64: | ||
814 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
815 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
816 | regs[r0] = rol64(regs[r1], regs[r2] & 63); | ||
817 | break; | ||
818 | case INDEX_op_rotr_i64: | ||
819 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
820 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
821 | regs[r0] = ror64(regs[r1], regs[r2] & 63); | ||
822 | break; | ||
823 | #endif | ||
824 | #if TCG_TARGET_HAS_deposit_i64 | ||
825 | case INDEX_op_deposit_i64: | ||
826 | - tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len); | ||
827 | + tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); | ||
828 | regs[r0] = deposit64(regs[r1], pos, len, regs[r2]); | ||
829 | break; | ||
830 | #endif | ||
831 | case INDEX_op_brcond_i64: | ||
832 | - tci_args_rl(&tb_ptr, &r0, &ptr); | ||
833 | + tci_args_rl(insn, tb_ptr, &r0, &ptr); | ||
834 | if (regs[r0]) { | ||
835 | tb_ptr = ptr; | ||
836 | } | ||
837 | break; | ||
838 | case INDEX_op_ext32s_i64: | ||
839 | case INDEX_op_ext_i32_i64: | ||
840 | - tci_args_rr(&tb_ptr, &r0, &r1); | ||
841 | + tci_args_rr(insn, &r0, &r1); | ||
842 | regs[r0] = (int32_t)regs[r1]; | ||
843 | break; | ||
844 | case INDEX_op_ext32u_i64: | ||
845 | case INDEX_op_extu_i32_i64: | ||
846 | - tci_args_rr(&tb_ptr, &r0, &r1); | ||
847 | + tci_args_rr(insn, &r0, &r1); | ||
848 | regs[r0] = (uint32_t)regs[r1]; | ||
849 | break; | ||
850 | #if TCG_TARGET_HAS_bswap64_i64 | ||
851 | case INDEX_op_bswap64_i64: | ||
852 | - tci_args_rr(&tb_ptr, &r0, &r1); | ||
853 | + tci_args_rr(insn, &r0, &r1); | ||
854 | regs[r0] = bswap64(regs[r1]); | ||
855 | break; | ||
856 | #endif | ||
857 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
858 | /* QEMU specific operations. */ | ||
859 | |||
860 | case INDEX_op_exit_tb: | ||
861 | - tci_args_l(&tb_ptr, &ptr); | ||
862 | + tci_args_l(insn, tb_ptr, &ptr); | ||
863 | return (uintptr_t)ptr; | ||
864 | |||
865 | case INDEX_op_goto_tb: | ||
866 | - tci_args_l(&tb_ptr, &ptr); | ||
867 | + tci_args_l(insn, tb_ptr, &ptr); | ||
868 | tb_ptr = *(void **)ptr; | ||
869 | break; | ||
870 | |||
871 | case INDEX_op_qemu_ld_i32: | ||
872 | if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) { | ||
873 | - tci_args_rrm(&tb_ptr, &r0, &r1, &oi); | ||
874 | + tci_args_rrm(insn, &r0, &r1, &oi); | ||
875 | taddr = regs[r1]; | ||
876 | } else { | ||
877 | - tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi); | ||
878 | + tci_args_rrrm(insn, &r0, &r1, &r2, &oi); | ||
879 | taddr = tci_uint64(regs[r2], regs[r1]); | ||
880 | } | ||
881 | switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) { | ||
882 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
883 | |||
884 | case INDEX_op_qemu_ld_i64: | ||
885 | if (TCG_TARGET_REG_BITS == 64) { | ||
886 | - tci_args_rrm(&tb_ptr, &r0, &r1, &oi); | ||
887 | + tci_args_rrm(insn, &r0, &r1, &oi); | ||
888 | taddr = regs[r1]; | ||
889 | } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) { | ||
890 | - tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi); | ||
891 | + tci_args_rrrm(insn, &r0, &r1, &r2, &oi); | ||
892 | taddr = regs[r2]; | ||
893 | } else { | ||
894 | - tci_args_rrrrm(&tb_ptr, &r0, &r1, &r2, &r3, &oi); | ||
895 | + tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4); | ||
896 | taddr = tci_uint64(regs[r3], regs[r2]); | ||
897 | + oi = regs[r4]; | ||
898 | } | ||
899 | switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) { | ||
900 | case MO_UB: | ||
901 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
902 | |||
903 | case INDEX_op_qemu_st_i32: | ||
904 | if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) { | ||
905 | - tci_args_rrm(&tb_ptr, &r0, &r1, &oi); | ||
906 | + tci_args_rrm(insn, &r0, &r1, &oi); | ||
907 | taddr = regs[r1]; | ||
908 | } else { | ||
909 | - tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi); | ||
910 | + tci_args_rrrm(insn, &r0, &r1, &r2, &oi); | ||
911 | taddr = tci_uint64(regs[r2], regs[r1]); | ||
912 | } | ||
913 | tmp32 = regs[r0]; | ||
914 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
915 | |||
916 | case INDEX_op_qemu_st_i64: | ||
917 | if (TCG_TARGET_REG_BITS == 64) { | ||
918 | - tci_args_rrm(&tb_ptr, &r0, &r1, &oi); | ||
919 | + tci_args_rrm(insn, &r0, &r1, &oi); | ||
920 | taddr = regs[r1]; | ||
921 | tmp64 = regs[r0]; | ||
922 | } else { | ||
923 | if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) { | ||
924 | - tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi); | ||
925 | + tci_args_rrrm(insn, &r0, &r1, &r2, &oi); | ||
926 | taddr = regs[r2]; | ||
927 | } else { | ||
928 | - tci_args_rrrrm(&tb_ptr, &r0, &r1, &r2, &r3, &oi); | ||
929 | + tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4); | ||
930 | taddr = tci_uint64(regs[r3], regs[r2]); | ||
931 | + oi = regs[r4]; | ||
932 | } | ||
933 | tmp64 = tci_uint64(regs[r1], regs[r0]); | ||
934 | } | ||
935 | @@ -XXX,XX +XXX,XX @@ static const char *str_c(TCGCond c) | ||
936 | /* Disassemble TCI bytecode. */ | ||
937 | int print_insn_tci(bfd_vma addr, disassemble_info *info) | ||
938 | { | 154 | { |
939 | - uint8_t buf[256]; | 155 | uintptr_t ra = GETPC(); |
940 | - int length, status; | 156 | |
941 | + const uint32_t *tb_ptr = (const void *)(uintptr_t)addr; | 157 | - if (parallel) { |
942 | const TCGOpDef *def; | 158 | -#ifndef CONFIG_ATOMIC128 |
943 | const char *op_name; | 159 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); |
944 | + uint32_t insn; | 160 | -#else |
945 | TCGOpcode op; | 161 | - int mem_idx = cpu_mmu_index(env, false); |
946 | - TCGReg r0, r1, r2, r3; | 162 | - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); |
947 | + TCGReg r0, r1, r2, r3, r4; | ||
948 | #if TCG_TARGET_REG_BITS == 32 | ||
949 | - TCGReg r4, r5; | ||
950 | + TCGReg r5; | ||
951 | #endif | ||
952 | tcg_target_ulong i1; | ||
953 | int32_t s2; | ||
954 | TCGCond c; | ||
955 | TCGMemOpIdx oi; | ||
956 | uint8_t pos, len; | ||
957 | - void *ptr, *cif; | ||
958 | - const uint8_t *tb_ptr; | ||
959 | + void *ptr; | ||
960 | |||
961 | - status = info->read_memory_func(addr, buf, 2, info); | ||
962 | - if (status != 0) { | ||
963 | - info->memory_error_func(status, addr, info); | ||
964 | - return -1; | ||
965 | - } | ||
966 | - op = buf[0]; | ||
967 | - length = buf[1]; | ||
968 | + /* TCI is always the host, so we don't need to load indirect. */ | ||
969 | + insn = *tb_ptr++; | ||
970 | |||
971 | - if (length < 2) { | ||
972 | - info->fprintf_func(info->stream, "invalid length %d", length); | ||
973 | - return 1; | ||
974 | - } | ||
975 | - | 163 | - |
976 | - status = info->read_memory_func(addr + 2, buf + 2, length - 2, info); | 164 | - Int128 v = int128_make128(low, high); |
977 | - if (status != 0) { | 165 | - helper_atomic_sto_be_mmu(env, addr, v, oi, ra); |
978 | - info->memory_error_func(status, addr + 2, info); | ||
979 | - return -1; | ||
980 | - } | ||
981 | + info->fprintf_func(info->stream, "%08x ", insn); | ||
982 | |||
983 | + op = extract32(insn, 0, 8); | ||
984 | def = &tcg_op_defs[op]; | ||
985 | op_name = def->name; | ||
986 | - tb_ptr = buf + 2; | ||
987 | |||
988 | switch (op) { | ||
989 | case INDEX_op_br: | ||
990 | case INDEX_op_exit_tb: | ||
991 | case INDEX_op_goto_tb: | ||
992 | - tci_args_l(&tb_ptr, &ptr); | ||
993 | + tci_args_l(insn, tb_ptr, &ptr); | ||
994 | info->fprintf_func(info->stream, "%-12s %p", op_name, ptr); | ||
995 | break; | ||
996 | |||
997 | case INDEX_op_call: | ||
998 | - tci_args_nll(&tb_ptr, &len, &ptr, &cif); | ||
999 | - info->fprintf_func(info->stream, "%-12s %d, %p, %p", | ||
1000 | - op_name, len, ptr, cif); | ||
1001 | + tci_args_nl(insn, tb_ptr, &len, &ptr); | ||
1002 | + info->fprintf_func(info->stream, "%-12s %d, %p", op_name, len, ptr); | ||
1003 | break; | ||
1004 | |||
1005 | case INDEX_op_brcond_i32: | ||
1006 | case INDEX_op_brcond_i64: | ||
1007 | - tci_args_rl(&tb_ptr, &r0, &ptr); | ||
1008 | + tci_args_rl(insn, tb_ptr, &r0, &ptr); | ||
1009 | info->fprintf_func(info->stream, "%-12s %s, 0, ne, %p", | ||
1010 | op_name, str_r(r0), ptr); | ||
1011 | break; | ||
1012 | |||
1013 | case INDEX_op_setcond_i32: | ||
1014 | case INDEX_op_setcond_i64: | ||
1015 | - tci_args_rrrc(&tb_ptr, &r0, &r1, &r2, &c); | ||
1016 | + tci_args_rrrc(insn, &r0, &r1, &r2, &c); | ||
1017 | info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", | ||
1018 | op_name, str_r(r0), str_r(r1), str_r(r2), str_c(c)); | ||
1019 | break; | ||
1020 | |||
1021 | - case INDEX_op_tci_movi_i32: | ||
1022 | - tci_args_ri(&tb_ptr, &r0, &i1); | ||
1023 | + case INDEX_op_tci_movi: | ||
1024 | + tci_args_ri(insn, &r0, &i1); | ||
1025 | info->fprintf_func(info->stream, "%-12s %s, 0x%" TCG_PRIlx, | ||
1026 | op_name, str_r(r0), i1); | ||
1027 | break; | ||
1028 | |||
1029 | -#if TCG_TARGET_REG_BITS == 64 | ||
1030 | - case INDEX_op_tci_movi_i64: | ||
1031 | - tci_args_rI(&tb_ptr, &r0, &i1); | ||
1032 | - info->fprintf_func(info->stream, "%-12s %s, 0x%" TCG_PRIlx, | ||
1033 | - op_name, str_r(r0), i1); | ||
1034 | + case INDEX_op_tci_movl: | ||
1035 | + tci_args_rl(insn, tb_ptr, &r0, &ptr); | ||
1036 | + info->fprintf_func(info->stream, "%-12s %s, %p", | ||
1037 | + op_name, str_r(r0), ptr); | ||
1038 | break; | ||
1039 | -#endif | 166 | -#endif |
1040 | 167 | - } else { | |
1041 | case INDEX_op_ld8u_i32: | 168 | + if (!parallel) { |
1042 | case INDEX_op_ld8u_i64: | 169 | check_alignment(env, addr, 16, ra); |
1043 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | 170 | - |
1044 | case INDEX_op_st32_i64: | 171 | cpu_stq_data_ra(env, addr + 0, high, ra); |
1045 | case INDEX_op_st_i32: | 172 | cpu_stq_data_ra(env, addr + 8, low, ra); |
1046 | case INDEX_op_st_i64: | 173 | + } else if (HAVE_ATOMIC128) { |
1047 | - tci_args_rrs(&tb_ptr, &r0, &r1, &s2); | 174 | + int mem_idx = cpu_mmu_index(env, false); |
1048 | + tci_args_rrs(insn, &r0, &r1, &s2); | 175 | + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); |
1049 | info->fprintf_func(info->stream, "%-12s %s, %s, %d", | 176 | + Int128 v = int128_make128(low, high); |
1050 | op_name, str_r(r0), str_r(r1), s2); | 177 | + helper_atomic_sto_be_mmu(env, addr, v, oi, ra); |
1051 | break; | 178 | + } else { |
1052 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | 179 | + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); |
1053 | case INDEX_op_not_i64: | ||
1054 | case INDEX_op_neg_i32: | ||
1055 | case INDEX_op_neg_i64: | ||
1056 | - tci_args_rr(&tb_ptr, &r0, &r1); | ||
1057 | + tci_args_rr(insn, &r0, &r1); | ||
1058 | info->fprintf_func(info->stream, "%-12s %s, %s", | ||
1059 | op_name, str_r(r0), str_r(r1)); | ||
1060 | break; | ||
1061 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | ||
1062 | case INDEX_op_rotl_i64: | ||
1063 | case INDEX_op_rotr_i32: | ||
1064 | case INDEX_op_rotr_i64: | ||
1065 | - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); | ||
1066 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
1067 | info->fprintf_func(info->stream, "%-12s %s, %s, %s", | ||
1068 | op_name, str_r(r0), str_r(r1), str_r(r2)); | ||
1069 | break; | ||
1070 | |||
1071 | case INDEX_op_deposit_i32: | ||
1072 | case INDEX_op_deposit_i64: | ||
1073 | - tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len); | ||
1074 | + tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); | ||
1075 | info->fprintf_func(info->stream, "%-12s %s, %s, %s, %d, %d", | ||
1076 | op_name, str_r(r0), str_r(r1), str_r(r2), pos, len); | ||
1077 | break; | ||
1078 | |||
1079 | #if TCG_TARGET_REG_BITS == 32 | ||
1080 | case INDEX_op_setcond2_i32: | ||
1081 | - tci_args_rrrrrc(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &c); | ||
1082 | + tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &c); | ||
1083 | info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s", | ||
1084 | op_name, str_r(r0), str_r(r1), str_r(r2), | ||
1085 | str_r(r3), str_r(r4), str_c(c)); | ||
1086 | break; | ||
1087 | |||
1088 | case INDEX_op_mulu2_i32: | ||
1089 | - tci_args_rrrr(&tb_ptr, &r0, &r1, &r2, &r3); | ||
1090 | + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); | ||
1091 | info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", | ||
1092 | op_name, str_r(r0), str_r(r1), | ||
1093 | str_r(r2), str_r(r3)); | ||
1094 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | ||
1095 | |||
1096 | case INDEX_op_add2_i32: | ||
1097 | case INDEX_op_sub2_i32: | ||
1098 | - tci_args_rrrrrr(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &r5); | ||
1099 | + tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); | ||
1100 | info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s", | ||
1101 | op_name, str_r(r0), str_r(r1), str_r(r2), | ||
1102 | str_r(r3), str_r(r4), str_r(r5)); | ||
1103 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | ||
1104 | len += DIV_ROUND_UP(TARGET_LONG_BITS, TCG_TARGET_REG_BITS); | ||
1105 | switch (len) { | ||
1106 | case 2: | ||
1107 | - tci_args_rrm(&tb_ptr, &r0, &r1, &oi); | ||
1108 | + tci_args_rrm(insn, &r0, &r1, &oi); | ||
1109 | info->fprintf_func(info->stream, "%-12s %s, %s, %x", | ||
1110 | op_name, str_r(r0), str_r(r1), oi); | ||
1111 | break; | ||
1112 | case 3: | ||
1113 | - tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi); | ||
1114 | + tci_args_rrrm(insn, &r0, &r1, &r2, &oi); | ||
1115 | info->fprintf_func(info->stream, "%-12s %s, %s, %s, %x", | ||
1116 | op_name, str_r(r0), str_r(r1), str_r(r2), oi); | ||
1117 | break; | ||
1118 | case 4: | ||
1119 | - tci_args_rrrrm(&tb_ptr, &r0, &r1, &r2, &r3, &oi); | ||
1120 | - info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %x", | ||
1121 | + tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4); | ||
1122 | + info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s", | ||
1123 | op_name, str_r(r0), str_r(r1), | ||
1124 | - str_r(r2), str_r(r3), oi); | ||
1125 | + str_r(r2), str_r(r3), str_r(r4)); | ||
1126 | break; | ||
1127 | default: | ||
1128 | g_assert_not_reached(); | ||
1129 | } | ||
1130 | break; | ||
1131 | |||
1132 | + case 0: | ||
1133 | + /* tcg_out_nop_fill uses zeros */ | ||
1134 | + if (insn == 0) { | ||
1135 | + info->fprintf_func(info->stream, "align"); | ||
1136 | + break; | ||
1137 | + } | ||
1138 | + /* fall through */ | ||
1139 | + | ||
1140 | default: | ||
1141 | info->fprintf_func(info->stream, "illegal opcode %d", op); | ||
1142 | break; | ||
1143 | } | 180 | } |
1144 | |||
1145 | - return length; | ||
1146 | + return sizeof(insn); | ||
1147 | } | 181 | } |
1148 | diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc | 182 | |
1149 | index XXXXXXX..XXXXXXX 100644 | ||
1150 | --- a/tcg/tci/tcg-target.c.inc | ||
1151 | +++ b/tcg/tci/tcg-target.c.inc | ||
1152 | @@ -XXX,XX +XXX,XX @@ | ||
1153 | * THE SOFTWARE. | ||
1154 | */ | ||
1155 | |||
1156 | -/* TODO list: | ||
1157 | - * - See TODO comments in code. | ||
1158 | - */ | ||
1159 | - | ||
1160 | -/* Marker for missing code. */ | ||
1161 | -#define TODO() \ | ||
1162 | - do { \ | ||
1163 | - fprintf(stderr, "TODO %s:%u: %s()\n", \ | ||
1164 | - __FILE__, __LINE__, __func__); \ | ||
1165 | - tcg_abort(); \ | ||
1166 | - } while (0) | ||
1167 | - | ||
1168 | -/* Bitfield n...m (in 32 bit value). */ | ||
1169 | -#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m) | ||
1170 | +#include "../tcg-pool.c.inc" | ||
1171 | |||
1172 | static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
1173 | { | ||
1174 | @@ -XXX,XX +XXX,XX @@ static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { | ||
1175 | static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
1176 | intptr_t value, intptr_t addend) | ||
1177 | { | ||
1178 | - /* tcg_out_reloc always uses the same type, addend. */ | ||
1179 | - tcg_debug_assert(type == sizeof(tcg_target_long)); | ||
1180 | + intptr_t diff = value - (intptr_t)(code_ptr + 1); | ||
1181 | + | ||
1182 | tcg_debug_assert(addend == 0); | ||
1183 | - tcg_debug_assert(value != 0); | ||
1184 | - if (TCG_TARGET_REG_BITS == 32) { | ||
1185 | - tcg_patch32(code_ptr, value); | ||
1186 | - } else { | ||
1187 | - tcg_patch64(code_ptr, value); | ||
1188 | - } | ||
1189 | - return true; | ||
1190 | -} | ||
1191 | - | ||
1192 | -/* Write value (native size). */ | ||
1193 | -static void tcg_out_i(TCGContext *s, tcg_target_ulong v) | ||
1194 | -{ | ||
1195 | - if (TCG_TARGET_REG_BITS == 32) { | ||
1196 | - tcg_out32(s, v); | ||
1197 | - } else { | ||
1198 | - tcg_out64(s, v); | ||
1199 | - } | ||
1200 | -} | ||
1201 | - | ||
1202 | -/* Write opcode. */ | ||
1203 | -static void tcg_out_op_t(TCGContext *s, TCGOpcode op) | ||
1204 | -{ | ||
1205 | - tcg_out8(s, op); | ||
1206 | - tcg_out8(s, 0); | ||
1207 | -} | ||
1208 | - | ||
1209 | -/* Write register. */ | ||
1210 | -static void tcg_out_r(TCGContext *s, TCGArg t0) | ||
1211 | -{ | ||
1212 | - tcg_debug_assert(t0 < TCG_TARGET_NB_REGS); | ||
1213 | - tcg_out8(s, t0); | ||
1214 | -} | ||
1215 | - | ||
1216 | -/* Write label. */ | ||
1217 | -static void tci_out_label(TCGContext *s, TCGLabel *label) | ||
1218 | -{ | ||
1219 | - if (label->has_value) { | ||
1220 | - tcg_out_i(s, label->u.value); | ||
1221 | - tcg_debug_assert(label->u.value); | ||
1222 | - } else { | ||
1223 | - tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), label, 0); | ||
1224 | - s->code_ptr += sizeof(tcg_target_ulong); | ||
1225 | + tcg_debug_assert(type == 20); | ||
1226 | + | ||
1227 | + if (diff == sextract32(diff, 0, type)) { | ||
1228 | + tcg_patch32(code_ptr, deposit32(*code_ptr, 32 - type, type, diff)); | ||
1229 | + return true; | ||
1230 | } | ||
1231 | + return false; | ||
1232 | } | ||
1233 | |||
1234 | static void stack_bounds_check(TCGReg base, target_long offset) | ||
1235 | @@ -XXX,XX +XXX,XX @@ static void stack_bounds_check(TCGReg base, target_long offset) | ||
1236 | |||
1237 | static void tcg_out_op_l(TCGContext *s, TCGOpcode op, TCGLabel *l0) | ||
1238 | { | ||
1239 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1240 | + tcg_insn_unit insn = 0; | ||
1241 | |||
1242 | - tcg_out_op_t(s, op); | ||
1243 | - tci_out_label(s, l0); | ||
1244 | - | ||
1245 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1246 | + tcg_out_reloc(s, s->code_ptr, 20, l0, 0); | ||
1247 | + insn = deposit32(insn, 0, 8, op); | ||
1248 | + tcg_out32(s, insn); | ||
1249 | } | ||
1250 | |||
1251 | static void tcg_out_op_p(TCGContext *s, TCGOpcode op, void *p0) | ||
1252 | { | ||
1253 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1254 | + tcg_insn_unit insn = 0; | ||
1255 | + intptr_t diff; | ||
1256 | |||
1257 | - tcg_out_op_t(s, op); | ||
1258 | - tcg_out_i(s, (uintptr_t)p0); | ||
1259 | - | ||
1260 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1261 | + /* Special case for exit_tb: map null -> 0. */ | ||
1262 | + if (p0 == NULL) { | ||
1263 | + diff = 0; | ||
1264 | + } else { | ||
1265 | + diff = p0 - (void *)(s->code_ptr + 1); | ||
1266 | + tcg_debug_assert(diff != 0); | ||
1267 | + if (diff != sextract32(diff, 0, 20)) { | ||
1268 | + tcg_raise_tb_overflow(s); | ||
1269 | + } | ||
1270 | + } | ||
1271 | + insn = deposit32(insn, 0, 8, op); | ||
1272 | + insn = deposit32(insn, 12, 20, diff); | ||
1273 | + tcg_out32(s, insn); | ||
1274 | } | ||
1275 | |||
1276 | static void tcg_out_op_v(TCGContext *s, TCGOpcode op) | ||
1277 | { | ||
1278 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1279 | - | ||
1280 | - tcg_out_op_t(s, op); | ||
1281 | - | ||
1282 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1283 | + tcg_out32(s, (uint8_t)op); | ||
1284 | } | ||
1285 | |||
1286 | static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1) | ||
1287 | { | ||
1288 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1289 | + tcg_insn_unit insn = 0; | ||
1290 | |||
1291 | - tcg_out_op_t(s, op); | ||
1292 | - tcg_out_r(s, r0); | ||
1293 | - tcg_out32(s, i1); | ||
1294 | - | ||
1295 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1296 | + tcg_debug_assert(i1 == sextract32(i1, 0, 20)); | ||
1297 | + insn = deposit32(insn, 0, 8, op); | ||
1298 | + insn = deposit32(insn, 8, 4, r0); | ||
1299 | + insn = deposit32(insn, 12, 20, i1); | ||
1300 | + tcg_out32(s, insn); | ||
1301 | } | ||
1302 | |||
1303 | -#if TCG_TARGET_REG_BITS == 64 | ||
1304 | -static void tcg_out_op_rI(TCGContext *s, TCGOpcode op, | ||
1305 | - TCGReg r0, uint64_t i1) | ||
1306 | -{ | ||
1307 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1308 | - | ||
1309 | - tcg_out_op_t(s, op); | ||
1310 | - tcg_out_r(s, r0); | ||
1311 | - tcg_out64(s, i1); | ||
1312 | - | ||
1313 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1314 | -} | ||
1315 | -#endif | ||
1316 | - | ||
1317 | static void tcg_out_op_rl(TCGContext *s, TCGOpcode op, TCGReg r0, TCGLabel *l1) | ||
1318 | { | ||
1319 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1320 | + tcg_insn_unit insn = 0; | ||
1321 | |||
1322 | - tcg_out_op_t(s, op); | ||
1323 | - tcg_out_r(s, r0); | ||
1324 | - tci_out_label(s, l1); | ||
1325 | - | ||
1326 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1327 | + tcg_out_reloc(s, s->code_ptr, 20, l1, 0); | ||
1328 | + insn = deposit32(insn, 0, 8, op); | ||
1329 | + insn = deposit32(insn, 8, 4, r0); | ||
1330 | + tcg_out32(s, insn); | ||
1331 | } | ||
1332 | |||
1333 | static void tcg_out_op_rr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1) | ||
1334 | { | ||
1335 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1336 | + tcg_insn_unit insn = 0; | ||
1337 | |||
1338 | - tcg_out_op_t(s, op); | ||
1339 | - tcg_out_r(s, r0); | ||
1340 | - tcg_out_r(s, r1); | ||
1341 | - | ||
1342 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1343 | + insn = deposit32(insn, 0, 8, op); | ||
1344 | + insn = deposit32(insn, 8, 4, r0); | ||
1345 | + insn = deposit32(insn, 12, 4, r1); | ||
1346 | + tcg_out32(s, insn); | ||
1347 | } | ||
1348 | |||
1349 | static void tcg_out_op_rrm(TCGContext *s, TCGOpcode op, | ||
1350 | TCGReg r0, TCGReg r1, TCGArg m2) | ||
1351 | { | ||
1352 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1353 | + tcg_insn_unit insn = 0; | ||
1354 | |||
1355 | - tcg_out_op_t(s, op); | ||
1356 | - tcg_out_r(s, r0); | ||
1357 | - tcg_out_r(s, r1); | ||
1358 | - tcg_out32(s, m2); | ||
1359 | - | ||
1360 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1361 | + tcg_debug_assert(m2 == extract32(m2, 0, 12)); | ||
1362 | + insn = deposit32(insn, 0, 8, op); | ||
1363 | + insn = deposit32(insn, 8, 4, r0); | ||
1364 | + insn = deposit32(insn, 12, 4, r1); | ||
1365 | + insn = deposit32(insn, 20, 12, m2); | ||
1366 | + tcg_out32(s, insn); | ||
1367 | } | ||
1368 | |||
1369 | static void tcg_out_op_rrr(TCGContext *s, TCGOpcode op, | ||
1370 | TCGReg r0, TCGReg r1, TCGReg r2) | ||
1371 | { | ||
1372 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1373 | + tcg_insn_unit insn = 0; | ||
1374 | |||
1375 | - tcg_out_op_t(s, op); | ||
1376 | - tcg_out_r(s, r0); | ||
1377 | - tcg_out_r(s, r1); | ||
1378 | - tcg_out_r(s, r2); | ||
1379 | - | ||
1380 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1381 | + insn = deposit32(insn, 0, 8, op); | ||
1382 | + insn = deposit32(insn, 8, 4, r0); | ||
1383 | + insn = deposit32(insn, 12, 4, r1); | ||
1384 | + insn = deposit32(insn, 16, 4, r2); | ||
1385 | + tcg_out32(s, insn); | ||
1386 | } | ||
1387 | |||
1388 | static void tcg_out_op_rrs(TCGContext *s, TCGOpcode op, | ||
1389 | TCGReg r0, TCGReg r1, intptr_t i2) | ||
1390 | { | ||
1391 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1392 | + tcg_insn_unit insn = 0; | ||
1393 | |||
1394 | - tcg_out_op_t(s, op); | ||
1395 | - tcg_out_r(s, r0); | ||
1396 | - tcg_out_r(s, r1); | ||
1397 | - tcg_debug_assert(i2 == (int32_t)i2); | ||
1398 | - tcg_out32(s, i2); | ||
1399 | - | ||
1400 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1401 | + tcg_debug_assert(i2 == sextract32(i2, 0, 16)); | ||
1402 | + insn = deposit32(insn, 0, 8, op); | ||
1403 | + insn = deposit32(insn, 8, 4, r0); | ||
1404 | + insn = deposit32(insn, 12, 4, r1); | ||
1405 | + insn = deposit32(insn, 16, 16, i2); | ||
1406 | + tcg_out32(s, insn); | ||
1407 | } | ||
1408 | |||
1409 | static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op, | ||
1410 | TCGReg r0, TCGReg r1, TCGReg r2, TCGCond c3) | ||
1411 | { | ||
1412 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1413 | + tcg_insn_unit insn = 0; | ||
1414 | |||
1415 | - tcg_out_op_t(s, op); | ||
1416 | - tcg_out_r(s, r0); | ||
1417 | - tcg_out_r(s, r1); | ||
1418 | - tcg_out_r(s, r2); | ||
1419 | - tcg_out8(s, c3); | ||
1420 | - | ||
1421 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1422 | + insn = deposit32(insn, 0, 8, op); | ||
1423 | + insn = deposit32(insn, 8, 4, r0); | ||
1424 | + insn = deposit32(insn, 12, 4, r1); | ||
1425 | + insn = deposit32(insn, 16, 4, r2); | ||
1426 | + insn = deposit32(insn, 20, 4, c3); | ||
1427 | + tcg_out32(s, insn); | ||
1428 | } | ||
1429 | |||
1430 | static void tcg_out_op_rrrm(TCGContext *s, TCGOpcode op, | ||
1431 | TCGReg r0, TCGReg r1, TCGReg r2, TCGArg m3) | ||
1432 | { | ||
1433 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1434 | + tcg_insn_unit insn = 0; | ||
1435 | |||
1436 | - tcg_out_op_t(s, op); | ||
1437 | - tcg_out_r(s, r0); | ||
1438 | - tcg_out_r(s, r1); | ||
1439 | - tcg_out_r(s, r2); | ||
1440 | - tcg_out32(s, m3); | ||
1441 | - | ||
1442 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1443 | + tcg_debug_assert(m3 == extract32(m3, 0, 12)); | ||
1444 | + insn = deposit32(insn, 0, 8, op); | ||
1445 | + insn = deposit32(insn, 8, 4, r0); | ||
1446 | + insn = deposit32(insn, 12, 4, r1); | ||
1447 | + insn = deposit32(insn, 16, 4, r2); | ||
1448 | + insn = deposit32(insn, 20, 12, m3); | ||
1449 | + tcg_out32(s, insn); | ||
1450 | } | ||
1451 | |||
1452 | static void tcg_out_op_rrrbb(TCGContext *s, TCGOpcode op, TCGReg r0, | ||
1453 | TCGReg r1, TCGReg r2, uint8_t b3, uint8_t b4) | ||
1454 | { | ||
1455 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1456 | + tcg_insn_unit insn = 0; | ||
1457 | |||
1458 | - tcg_out_op_t(s, op); | ||
1459 | - tcg_out_r(s, r0); | ||
1460 | - tcg_out_r(s, r1); | ||
1461 | - tcg_out_r(s, r2); | ||
1462 | - tcg_out8(s, b3); | ||
1463 | - tcg_out8(s, b4); | ||
1464 | - | ||
1465 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1466 | + tcg_debug_assert(b3 == extract32(b3, 0, 6)); | ||
1467 | + tcg_debug_assert(b4 == extract32(b4, 0, 6)); | ||
1468 | + insn = deposit32(insn, 0, 8, op); | ||
1469 | + insn = deposit32(insn, 8, 4, r0); | ||
1470 | + insn = deposit32(insn, 12, 4, r1); | ||
1471 | + insn = deposit32(insn, 16, 4, r2); | ||
1472 | + insn = deposit32(insn, 20, 6, b3); | ||
1473 | + insn = deposit32(insn, 26, 6, b4); | ||
1474 | + tcg_out32(s, insn); | ||
1475 | } | ||
1476 | |||
1477 | -static void tcg_out_op_rrrrm(TCGContext *s, TCGOpcode op, TCGReg r0, | ||
1478 | - TCGReg r1, TCGReg r2, TCGReg r3, TCGArg m4) | ||
1479 | +static void tcg_out_op_rrrrr(TCGContext *s, TCGOpcode op, TCGReg r0, | ||
1480 | + TCGReg r1, TCGReg r2, TCGReg r3, TCGReg r4) | ||
1481 | { | ||
1482 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1483 | + tcg_insn_unit insn = 0; | ||
1484 | |||
1485 | - tcg_out_op_t(s, op); | ||
1486 | - tcg_out_r(s, r0); | ||
1487 | - tcg_out_r(s, r1); | ||
1488 | - tcg_out_r(s, r2); | ||
1489 | - tcg_out_r(s, r3); | ||
1490 | - tcg_out32(s, m4); | ||
1491 | - | ||
1492 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1493 | + insn = deposit32(insn, 0, 8, op); | ||
1494 | + insn = deposit32(insn, 8, 4, r0); | ||
1495 | + insn = deposit32(insn, 12, 4, r1); | ||
1496 | + insn = deposit32(insn, 16, 4, r2); | ||
1497 | + insn = deposit32(insn, 20, 4, r3); | ||
1498 | + insn = deposit32(insn, 24, 4, r4); | ||
1499 | + tcg_out32(s, insn); | ||
1500 | } | ||
1501 | |||
1502 | #if TCG_TARGET_REG_BITS == 32 | ||
1503 | static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op, | ||
1504 | TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3) | ||
1505 | { | ||
1506 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1507 | + tcg_insn_unit insn = 0; | ||
1508 | |||
1509 | - tcg_out_op_t(s, op); | ||
1510 | - tcg_out_r(s, r0); | ||
1511 | - tcg_out_r(s, r1); | ||
1512 | - tcg_out_r(s, r2); | ||
1513 | - tcg_out_r(s, r3); | ||
1514 | - | ||
1515 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1516 | + insn = deposit32(insn, 0, 8, op); | ||
1517 | + insn = deposit32(insn, 8, 4, r0); | ||
1518 | + insn = deposit32(insn, 12, 4, r1); | ||
1519 | + insn = deposit32(insn, 16, 4, r2); | ||
1520 | + insn = deposit32(insn, 20, 4, r3); | ||
1521 | + tcg_out32(s, insn); | ||
1522 | } | ||
1523 | |||
1524 | static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op, | ||
1525 | TCGReg r0, TCGReg r1, TCGReg r2, | ||
1526 | TCGReg r3, TCGReg r4, TCGCond c5) | ||
1527 | { | ||
1528 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1529 | + tcg_insn_unit insn = 0; | ||
1530 | |||
1531 | - tcg_out_op_t(s, op); | ||
1532 | - tcg_out_r(s, r0); | ||
1533 | - tcg_out_r(s, r1); | ||
1534 | - tcg_out_r(s, r2); | ||
1535 | - tcg_out_r(s, r3); | ||
1536 | - tcg_out_r(s, r4); | ||
1537 | - tcg_out8(s, c5); | ||
1538 | - | ||
1539 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1540 | + insn = deposit32(insn, 0, 8, op); | ||
1541 | + insn = deposit32(insn, 8, 4, r0); | ||
1542 | + insn = deposit32(insn, 12, 4, r1); | ||
1543 | + insn = deposit32(insn, 16, 4, r2); | ||
1544 | + insn = deposit32(insn, 20, 4, r3); | ||
1545 | + insn = deposit32(insn, 24, 4, r4); | ||
1546 | + insn = deposit32(insn, 28, 4, c5); | ||
1547 | + tcg_out32(s, insn); | ||
1548 | } | ||
1549 | |||
1550 | static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op, | ||
1551 | TCGReg r0, TCGReg r1, TCGReg r2, | ||
1552 | TCGReg r3, TCGReg r4, TCGReg r5) | ||
1553 | { | ||
1554 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1555 | + tcg_insn_unit insn = 0; | ||
1556 | |||
1557 | - tcg_out_op_t(s, op); | ||
1558 | - tcg_out_r(s, r0); | ||
1559 | - tcg_out_r(s, r1); | ||
1560 | - tcg_out_r(s, r2); | ||
1561 | - tcg_out_r(s, r3); | ||
1562 | - tcg_out_r(s, r4); | ||
1563 | - tcg_out_r(s, r5); | ||
1564 | - | ||
1565 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1566 | + insn = deposit32(insn, 0, 8, op); | ||
1567 | + insn = deposit32(insn, 8, 4, r0); | ||
1568 | + insn = deposit32(insn, 12, 4, r1); | ||
1569 | + insn = deposit32(insn, 16, 4, r2); | ||
1570 | + insn = deposit32(insn, 20, 4, r3); | ||
1571 | + insn = deposit32(insn, 24, 4, r4); | ||
1572 | + insn = deposit32(insn, 28, 4, r5); | ||
1573 | + tcg_out32(s, insn); | ||
1574 | } | ||
1575 | #endif | ||
1576 | |||
1577 | +static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val, | ||
1578 | + TCGReg base, intptr_t offset) | ||
1579 | +{ | ||
1580 | + stack_bounds_check(base, offset); | ||
1581 | + if (offset != sextract32(offset, 0, 16)) { | ||
1582 | + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset); | ||
1583 | + tcg_out_op_rrr(s, (TCG_TARGET_REG_BITS == 32 | ||
1584 | + ? INDEX_op_add_i32 : INDEX_op_add_i64), | ||
1585 | + TCG_REG_TMP, TCG_REG_TMP, base); | ||
1586 | + base = TCG_REG_TMP; | ||
1587 | + offset = 0; | ||
1588 | + } | ||
1589 | + tcg_out_op_rrs(s, op, val, base, offset); | ||
1590 | +} | ||
1591 | + | ||
1592 | static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg val, TCGReg base, | ||
1593 | intptr_t offset) | ||
1594 | { | ||
1595 | - stack_bounds_check(base, offset); | ||
1596 | switch (type) { | ||
1597 | case TCG_TYPE_I32: | ||
1598 | - tcg_out_op_rrs(s, INDEX_op_ld_i32, val, base, offset); | ||
1599 | + tcg_out_ldst(s, INDEX_op_ld_i32, val, base, offset); | ||
1600 | break; | ||
1601 | #if TCG_TARGET_REG_BITS == 64 | ||
1602 | case TCG_TYPE_I64: | ||
1603 | - tcg_out_op_rrs(s, INDEX_op_ld_i64, val, base, offset); | ||
1604 | + tcg_out_ldst(s, INDEX_op_ld_i64, val, base, offset); | ||
1605 | break; | ||
1606 | #endif | ||
1607 | default: | ||
1608 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, | ||
1609 | { | ||
1610 | switch (type) { | ||
1611 | case TCG_TYPE_I32: | ||
1612 | - tcg_out_op_ri(s, INDEX_op_tci_movi_i32, ret, arg); | ||
1613 | - break; | ||
1614 | #if TCG_TARGET_REG_BITS == 64 | ||
1615 | + arg = (int32_t)arg; | ||
1616 | + /* fall through */ | ||
1617 | case TCG_TYPE_I64: | ||
1618 | - tcg_out_op_rI(s, INDEX_op_tci_movi_i64, ret, arg); | ||
1619 | - break; | ||
1620 | #endif | ||
1621 | + break; | ||
1622 | default: | ||
1623 | g_assert_not_reached(); | ||
1624 | } | ||
1625 | + | ||
1626 | + if (arg == sextract32(arg, 0, 20)) { | ||
1627 | + tcg_out_op_ri(s, INDEX_op_tci_movi, ret, arg); | ||
1628 | + } else { | ||
1629 | + tcg_insn_unit insn = 0; | ||
1630 | + | ||
1631 | + new_pool_label(s, arg, 20, s->code_ptr, 0); | ||
1632 | + insn = deposit32(insn, 0, 8, INDEX_op_tci_movl); | ||
1633 | + insn = deposit32(insn, 8, 4, ret); | ||
1634 | + tcg_out32(s, insn); | ||
1635 | + } | ||
1636 | } | ||
1637 | |||
1638 | static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func, | ||
1639 | ffi_cif *cif) | ||
1640 | { | ||
1641 | - uint8_t *old_code_ptr = s->code_ptr; | ||
1642 | + tcg_insn_unit insn = 0; | ||
1643 | uint8_t which; | ||
1644 | |||
1645 | if (cif->rtype == &ffi_type_void) { | ||
1646 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func, | ||
1647 | tcg_debug_assert(cif->rtype->size == 8); | ||
1648 | which = 2; | ||
1649 | } | ||
1650 | - tcg_out_op_t(s, INDEX_op_call); | ||
1651 | - tcg_out8(s, which); | ||
1652 | - tcg_out_i(s, (uintptr_t)func); | ||
1653 | - tcg_out_i(s, (uintptr_t)cif); | ||
1654 | - | ||
1655 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
1656 | + new_pool_l2(s, 20, s->code_ptr, 0, (uintptr_t)func, (uintptr_t)cif); | ||
1657 | + insn = deposit32(insn, 0, 8, INDEX_op_call); | ||
1658 | + insn = deposit32(insn, 8, 4, which); | ||
1659 | + tcg_out32(s, insn); | ||
1660 | } | ||
1661 | |||
1662 | #if TCG_TARGET_REG_BITS == 64 | ||
1663 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
1664 | case INDEX_op_st_i32: | ||
1665 | CASE_64(st32) | ||
1666 | CASE_64(st) | ||
1667 | - stack_bounds_check(args[1], args[2]); | ||
1668 | - tcg_out_op_rrs(s, opc, args[0], args[1], args[2]); | ||
1669 | + tcg_out_ldst(s, opc, args[0], args[1], args[2]); | ||
1670 | break; | ||
1671 | |||
1672 | CASE_32_64(add) | ||
1673 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
1674 | } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) { | ||
1675 | tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]); | ||
1676 | } else { | ||
1677 | - tcg_out_op_rrrrm(s, opc, args[0], args[1], | ||
1678 | - args[2], args[3], args[4]); | ||
1679 | + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]); | ||
1680 | + tcg_out_op_rrrrr(s, opc, args[0], args[1], | ||
1681 | + args[2], args[3], TCG_REG_TMP); | ||
1682 | } | ||
1683 | break; | ||
1684 | |||
1685 | @@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) | ||
1686 | return ct & TCG_CT_CONST; | ||
1687 | } | ||
1688 | |||
1689 | +static void tcg_out_nop_fill(tcg_insn_unit *p, int count) | ||
1690 | +{ | ||
1691 | + memset(p, 0, sizeof(*p) * count); | ||
1692 | +} | ||
1693 | + | ||
1694 | static void tcg_target_init(TCGContext *s) | ||
1695 | { | ||
1696 | #if defined(CONFIG_DEBUG_TCG_INTERPRETER) | ||
1697 | diff --git a/tcg/tci/README b/tcg/tci/README | ||
1698 | index XXXXXXX..XXXXXXX 100644 | ||
1699 | --- a/tcg/tci/README | ||
1700 | +++ b/tcg/tci/README | ||
1701 | @@ -XXX,XX +XXX,XX @@ This is what TCI (Tiny Code Interpreter) does. | ||
1702 | Like each TCG host frontend, TCI implements the code generator in | ||
1703 | tcg-target.c.inc, tcg-target.h. Both files are in directory tcg/tci. | ||
1704 | |||
1705 | -The additional file tcg/tci.c adds the interpreter. | ||
1706 | +The additional file tcg/tci.c adds the interpreter and disassembler. | ||
1707 | |||
1708 | -The bytecode consists of opcodes (same numeric values as those used by | ||
1709 | -TCG), command length and arguments of variable size and number. | ||
1710 | +The bytecode consists of opcodes (with only a few exceptions, with | ||
1711 | +the same same numeric values and semantics as used by TCG), and up | ||
1712 | +to six arguments packed into a 32-bit integer. See comments in tci.c | ||
1713 | +for details on the encoding. | ||
1714 | |||
1715 | 3) Usage | ||
1716 | |||
1717 | @@ -XXX,XX +XXX,XX @@ suggest using this option. Setting it automatically would need | ||
1718 | additional code in configure which must be fixed when new native TCG | ||
1719 | implementations are added. | ||
1720 | |||
1721 | -System emulation should work on any 32 or 64 bit host. | ||
1722 | -User mode emulation might work. Maybe a new linker script (*.ld) | ||
1723 | -is needed. Byte order might be wrong (on big endian hosts) | ||
1724 | -and need fixes in configure. | ||
1725 | - | ||
1726 | For hosts with native TCG, the interpreter TCI can be enabled by | ||
1727 | |||
1728 | configure --enable-tcg-interpreter | ||
1729 | @@ -XXX,XX +XXX,XX @@ u1 = linux-user-test works | ||
1730 | in the interpreter. These opcodes raise a runtime exception, so it is | ||
1731 | possible to see where code must be added. | ||
1732 | |||
1733 | -* The pseudo code is not optimized and still ugly. For hosts with special | ||
1734 | - alignment requirements, it needs some fixes (maybe aligned bytecode | ||
1735 | - would also improve speed for hosts which support byte alignment). | ||
1736 | - | ||
1737 | -* A better disassembler for the pseudo code would be nice (a very primitive | ||
1738 | - disassembler is included in tcg-target.c.inc). | ||
1739 | - | ||
1740 | * It might be useful to have a runtime option which selects the native TCG | ||
1741 | or TCI, so QEMU would have to include two TCGs. Today, selecting TCI | ||
1742 | is a configure option, so you need two compilations of QEMU. | ||
1743 | -- | 183 | -- |
1744 | 2.25.1 | 184 | 2.17.2 |
1745 | 185 | ||
1746 | 186 | diff view generated by jsdifflib |
1 | The encoding planned for tci does not have enough room for | 1 | Reviewed-by: David Hildenbrand <david@redhat.com> |
---|---|---|---|
2 | brcond2, with 4 registers and a condition as input as well | ||
3 | as the label. Resolve the condition into TCG_REG_TMP, and | ||
4 | relax brcond to one register plus a label, considering the | ||
5 | condition to always be reg != 0. | ||
6 | |||
7 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
8 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | --- | 3 | --- |
11 | tcg/tci.c | 68 ++++++++++------------------------------ | 4 | target/s390x/mem_helper.c | 128 ++++++++++++++++++-------------------- |
12 | tcg/tci/tcg-target.c.inc | 52 +++++++++++------------------- | 5 | 1 file changed, 61 insertions(+), 67 deletions(-) |
13 | 2 files changed, 35 insertions(+), 85 deletions(-) | ||
14 | 6 | ||
15 | diff --git a/tcg/tci.c b/tcg/tci.c | 7 | diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c |
16 | index XXXXXXX..XXXXXXX 100644 | 8 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/tcg/tci.c | 9 | --- a/target/s390x/mem_helper.c |
18 | +++ b/tcg/tci.c | 10 | +++ b/target/s390x/mem_helper.c |
19 | @@ -XXX,XX +XXX,XX @@ static void tci_args_nll(const uint8_t **tb_ptr, uint8_t *n0, | 11 | @@ -XXX,XX +XXX,XX @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2, |
20 | check_size(start, tb_ptr); | 12 | return cc; |
21 | } | 13 | } |
22 | 14 | ||
23 | +static void tci_args_rl(const uint8_t **tb_ptr, TCGReg *r0, void **l1) | 15 | -static void do_cdsg(CPUS390XState *env, uint64_t addr, |
24 | +{ | 16 | - uint32_t r1, uint32_t r3, bool parallel) |
25 | + const uint8_t *start = *tb_ptr; | 17 | +void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, |
18 | + uint32_t r1, uint32_t r3) | ||
19 | { | ||
20 | uintptr_t ra = GETPC(); | ||
21 | Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); | ||
22 | Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); | ||
23 | Int128 oldv; | ||
24 | + uint64_t oldh, oldl; | ||
25 | bool fail; | ||
26 | |||
27 | - if (parallel) { | ||
28 | -#if !HAVE_CMPXCHG128 | ||
29 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
30 | -#else | ||
31 | - int mem_idx = cpu_mmu_index(env, false); | ||
32 | - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
33 | - oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); | ||
34 | - fail = !int128_eq(oldv, cmpv); | ||
35 | -#endif | ||
36 | - } else { | ||
37 | - uint64_t oldh, oldl; | ||
38 | + check_alignment(env, addr, 16, ra); | ||
39 | |||
40 | - check_alignment(env, addr, 16, ra); | ||
41 | + oldh = cpu_ldq_data_ra(env, addr + 0, ra); | ||
42 | + oldl = cpu_ldq_data_ra(env, addr + 8, ra); | ||
43 | |||
44 | - oldh = cpu_ldq_data_ra(env, addr + 0, ra); | ||
45 | - oldl = cpu_ldq_data_ra(env, addr + 8, ra); | ||
46 | - | ||
47 | - oldv = int128_make128(oldl, oldh); | ||
48 | - fail = !int128_eq(oldv, cmpv); | ||
49 | - if (fail) { | ||
50 | - newv = oldv; | ||
51 | - } | ||
52 | - | ||
53 | - cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra); | ||
54 | - cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra); | ||
55 | + oldv = int128_make128(oldl, oldh); | ||
56 | + fail = !int128_eq(oldv, cmpv); | ||
57 | + if (fail) { | ||
58 | + newv = oldv; | ||
59 | } | ||
60 | |||
61 | + cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra); | ||
62 | + cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra); | ||
26 | + | 63 | + |
27 | + *r0 = tci_read_r(tb_ptr); | 64 | env->cc_op = fail; |
28 | + *l1 = (void *)tci_read_label(tb_ptr); | 65 | env->regs[r1] = int128_gethi(oldv); |
66 | env->regs[r1 + 1] = int128_getlo(oldv); | ||
67 | } | ||
68 | |||
69 | -void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, | ||
70 | - uint32_t r1, uint32_t r3) | ||
71 | -{ | ||
72 | - do_cdsg(env, addr, r1, r3, false); | ||
73 | -} | ||
74 | - | ||
75 | void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr, | ||
76 | uint32_t r1, uint32_t r3) | ||
77 | { | ||
78 | - do_cdsg(env, addr, r1, r3, true); | ||
79 | + uintptr_t ra = GETPC(); | ||
80 | + Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); | ||
81 | + Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); | ||
82 | + int mem_idx; | ||
83 | + TCGMemOpIdx oi; | ||
84 | + Int128 oldv; | ||
85 | + bool fail; | ||
29 | + | 86 | + |
30 | + check_size(start, tb_ptr); | 87 | + if (!HAVE_CMPXCHG128) { |
88 | + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
89 | + } | ||
90 | + | ||
91 | + mem_idx = cpu_mmu_index(env, false); | ||
92 | + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
93 | + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); | ||
94 | + fail = !int128_eq(oldv, cmpv); | ||
95 | + | ||
96 | + env->cc_op = fail; | ||
97 | + env->regs[r1] = int128_gethi(oldv); | ||
98 | + env->regs[r1 + 1] = int128_getlo(oldv); | ||
99 | } | ||
100 | |||
101 | static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, | ||
102 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr) | ||
103 | #endif | ||
104 | |||
105 | /* load pair from quadword */ | ||
106 | -static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel) | ||
107 | +uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr) | ||
108 | { | ||
109 | uintptr_t ra = GETPC(); | ||
110 | uint64_t hi, lo; | ||
111 | |||
112 | - if (!parallel) { | ||
113 | - check_alignment(env, addr, 16, ra); | ||
114 | - hi = cpu_ldq_data_ra(env, addr + 0, ra); | ||
115 | - lo = cpu_ldq_data_ra(env, addr + 8, ra); | ||
116 | - } else if (HAVE_ATOMIC128) { | ||
117 | + check_alignment(env, addr, 16, ra); | ||
118 | + hi = cpu_ldq_data_ra(env, addr + 0, ra); | ||
119 | + lo = cpu_ldq_data_ra(env, addr + 8, ra); | ||
120 | + | ||
121 | + env->retxl = lo; | ||
122 | + return hi; | ||
31 | +} | 123 | +} |
32 | + | 124 | + |
33 | static void tci_args_rr(const uint8_t **tb_ptr, | 125 | +uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr) |
34 | TCGReg *r0, TCGReg *r1) | 126 | +{ |
35 | { | 127 | + uintptr_t ra = GETPC(); |
36 | @@ -XXX,XX +XXX,XX @@ static void tci_args_rrs(const uint8_t **tb_ptr, | 128 | + uint64_t hi, lo; |
37 | check_size(start, tb_ptr); | 129 | + |
130 | + if (HAVE_ATOMIC128) { | ||
131 | int mem_idx = cpu_mmu_index(env, false); | ||
132 | TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
133 | Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); | ||
134 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel) | ||
135 | return hi; | ||
38 | } | 136 | } |
39 | 137 | ||
40 | -static void tci_args_rrcl(const uint8_t **tb_ptr, | 138 | -uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr) |
41 | - TCGReg *r0, TCGReg *r1, TCGCond *c2, void **l3) | ||
42 | -{ | 139 | -{ |
43 | - const uint8_t *start = *tb_ptr; | 140 | - return do_lpq(env, addr, false); |
44 | - | ||
45 | - *r0 = tci_read_r(tb_ptr); | ||
46 | - *r1 = tci_read_r(tb_ptr); | ||
47 | - *c2 = tci_read_b(tb_ptr); | ||
48 | - *l3 = (void *)tci_read_label(tb_ptr); | ||
49 | - | ||
50 | - check_size(start, tb_ptr); | ||
51 | -} | 141 | -} |
52 | - | 142 | - |
53 | static void tci_args_rrrc(const uint8_t **tb_ptr, | 143 | -uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr) |
54 | TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGCond *c3) | ||
55 | { | ||
56 | @@ -XXX,XX +XXX,XX @@ static void tci_args_rrrr(const uint8_t **tb_ptr, | ||
57 | check_size(start, tb_ptr); | ||
58 | } | ||
59 | |||
60 | -static void tci_args_rrrrcl(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1, | ||
61 | - TCGReg *r2, TCGReg *r3, TCGCond *c4, void **l5) | ||
62 | -{ | 144 | -{ |
63 | - const uint8_t *start = *tb_ptr; | 145 | - return do_lpq(env, addr, true); |
64 | - | ||
65 | - *r0 = tci_read_r(tb_ptr); | ||
66 | - *r1 = tci_read_r(tb_ptr); | ||
67 | - *r2 = tci_read_r(tb_ptr); | ||
68 | - *r3 = tci_read_r(tb_ptr); | ||
69 | - *c4 = tci_read_b(tb_ptr); | ||
70 | - *l5 = (void *)tci_read_label(tb_ptr); | ||
71 | - | ||
72 | - check_size(start, tb_ptr); | ||
73 | -} | 146 | -} |
74 | - | 147 | - |
75 | static void tci_args_rrrrrc(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1, | 148 | /* store pair to quadword */ |
76 | TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5) | 149 | -static void do_stpq(CPUS390XState *env, uint64_t addr, |
150 | - uint64_t low, uint64_t high, bool parallel) | ||
151 | +void HELPER(stpq)(CPUS390XState *env, uint64_t addr, | ||
152 | + uint64_t low, uint64_t high) | ||
77 | { | 153 | { |
78 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | 154 | uintptr_t ra = GETPC(); |
79 | break; | 155 | |
80 | #endif | 156 | - if (!parallel) { |
81 | case INDEX_op_brcond_i32: | 157 | - check_alignment(env, addr, 16, ra); |
82 | - tci_args_rrcl(&tb_ptr, &r0, &r1, &condition, &ptr); | 158 | - cpu_stq_data_ra(env, addr + 0, high, ra); |
83 | - if (tci_compare32(regs[r0], regs[r1], condition)) { | 159 | - cpu_stq_data_ra(env, addr + 8, low, ra); |
84 | + tci_args_rl(&tb_ptr, &r0, &ptr); | 160 | - } else if (HAVE_ATOMIC128) { |
85 | + if ((uint32_t)regs[r0]) { | 161 | + check_alignment(env, addr, 16, ra); |
86 | tb_ptr = ptr; | 162 | + cpu_stq_data_ra(env, addr + 0, high, ra); |
87 | } | 163 | + cpu_stq_data_ra(env, addr + 8, low, ra); |
88 | break; | ||
89 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
90 | T2 = tci_uint64(regs[r5], regs[r4]); | ||
91 | tci_write_reg64(regs, r1, r0, T1 - T2); | ||
92 | break; | ||
93 | - case INDEX_op_brcond2_i32: | ||
94 | - tci_args_rrrrcl(&tb_ptr, &r0, &r1, &r2, &r3, &condition, &ptr); | ||
95 | - T1 = tci_uint64(regs[r1], regs[r0]); | ||
96 | - T2 = tci_uint64(regs[r3], regs[r2]); | ||
97 | - if (tci_compare64(T1, T2, condition)) { | ||
98 | - tb_ptr = ptr; | ||
99 | - continue; | ||
100 | - } | ||
101 | - break; | ||
102 | case INDEX_op_mulu2_i32: | ||
103 | tci_args_rrrr(&tb_ptr, &r0, &r1, &r2, &r3); | ||
104 | tci_write_reg64(regs, r1, r0, (uint64_t)regs[r2] * regs[r3]); | ||
105 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
106 | break; | ||
107 | #endif | ||
108 | case INDEX_op_brcond_i64: | ||
109 | - tci_args_rrcl(&tb_ptr, &r0, &r1, &condition, &ptr); | ||
110 | - if (tci_compare64(regs[r0], regs[r1], condition)) { | ||
111 | + tci_args_rl(&tb_ptr, &r0, &ptr); | ||
112 | + if (regs[r0]) { | ||
113 | tb_ptr = ptr; | ||
114 | } | ||
115 | break; | ||
116 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | ||
117 | |||
118 | case INDEX_op_brcond_i32: | ||
119 | case INDEX_op_brcond_i64: | ||
120 | - tci_args_rrcl(&tb_ptr, &r0, &r1, &c, &ptr); | ||
121 | - info->fprintf_func(info->stream, "%-12s %s, %s, %s, %p", | ||
122 | - op_name, str_r(r0), str_r(r1), str_c(c), ptr); | ||
123 | + tci_args_rl(&tb_ptr, &r0, &ptr); | ||
124 | + info->fprintf_func(info->stream, "%-12s %s, 0, ne, %p", | ||
125 | + op_name, str_r(r0), ptr); | ||
126 | break; | ||
127 | |||
128 | case INDEX_op_setcond_i32: | ||
129 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | ||
130 | str_r(r3), str_r(r4), str_c(c)); | ||
131 | break; | ||
132 | |||
133 | - case INDEX_op_brcond2_i32: | ||
134 | - tci_args_rrrrcl(&tb_ptr, &r0, &r1, &r2, &r3, &c, &ptr); | ||
135 | - info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %p", | ||
136 | - op_name, str_r(r0), str_r(r1), | ||
137 | - str_r(r2), str_r(r3), str_c(c), ptr); | ||
138 | - break; | ||
139 | - | ||
140 | case INDEX_op_mulu2_i32: | ||
141 | tci_args_rrrr(&tb_ptr, &r0, &r1, &r2, &r3); | ||
142 | info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", | ||
143 | diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc | ||
144 | index XXXXXXX..XXXXXXX 100644 | ||
145 | --- a/tcg/tci/tcg-target.c.inc | ||
146 | +++ b/tcg/tci/tcg-target.c.inc | ||
147 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rI(TCGContext *s, TCGOpcode op, | ||
148 | } | ||
149 | #endif | ||
150 | |||
151 | +static void tcg_out_op_rl(TCGContext *s, TCGOpcode op, TCGReg r0, TCGLabel *l1) | ||
152 | +{ | ||
153 | + uint8_t *old_code_ptr = s->code_ptr; | ||
154 | + | ||
155 | + tcg_out_op_t(s, op); | ||
156 | + tcg_out_r(s, r0); | ||
157 | + tci_out_label(s, l1); | ||
158 | + | ||
159 | + old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
160 | +} | 164 | +} |
161 | + | 165 | + |
162 | static void tcg_out_op_rr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1) | 166 | +void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr, |
163 | { | 167 | + uint64_t low, uint64_t high) |
164 | uint8_t *old_code_ptr = s->code_ptr; | 168 | +{ |
165 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrs(TCGContext *s, TCGOpcode op, | 169 | + uintptr_t ra = GETPC(); |
166 | old_code_ptr[1] = s->code_ptr - old_code_ptr; | 170 | + |
171 | + if (HAVE_ATOMIC128) { | ||
172 | int mem_idx = cpu_mmu_index(env, false); | ||
173 | TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
174 | Int128 v = int128_make128(low, high); | ||
175 | @@ -XXX,XX +XXX,XX @@ static void do_stpq(CPUS390XState *env, uint64_t addr, | ||
176 | } | ||
167 | } | 177 | } |
168 | 178 | ||
169 | -static void tcg_out_op_rrcl(TCGContext *s, TCGOpcode op, | 179 | -void HELPER(stpq)(CPUS390XState *env, uint64_t addr, |
170 | - TCGReg r0, TCGReg r1, TCGCond c2, TCGLabel *l3) | 180 | - uint64_t low, uint64_t high) |
171 | -{ | 181 | -{ |
172 | - uint8_t *old_code_ptr = s->code_ptr; | 182 | - do_stpq(env, addr, low, high, false); |
173 | - | ||
174 | - tcg_out_op_t(s, op); | ||
175 | - tcg_out_r(s, r0); | ||
176 | - tcg_out_r(s, r1); | ||
177 | - tcg_out8(s, c2); | ||
178 | - tci_out_label(s, l3); | ||
179 | - | ||
180 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
181 | -} | 183 | -} |
182 | - | 184 | - |
183 | static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op, | 185 | -void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr, |
184 | TCGReg r0, TCGReg r1, TCGReg r2, TCGCond c3) | 186 | - uint64_t low, uint64_t high) |
185 | { | ||
186 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op, | ||
187 | old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
188 | } | ||
189 | |||
190 | -static void tcg_out_op_rrrrcl(TCGContext *s, TCGOpcode op, | ||
191 | - TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3, | ||
192 | - TCGCond c4, TCGLabel *l5) | ||
193 | -{ | 187 | -{ |
194 | - uint8_t *old_code_ptr = s->code_ptr; | 188 | - do_stpq(env, addr, low, high, true); |
195 | - | ||
196 | - tcg_out_op_t(s, op); | ||
197 | - tcg_out_r(s, r0); | ||
198 | - tcg_out_r(s, r1); | ||
199 | - tcg_out_r(s, r2); | ||
200 | - tcg_out_r(s, r3); | ||
201 | - tcg_out8(s, c4); | ||
202 | - tci_out_label(s, l5); | ||
203 | - | ||
204 | - old_code_ptr[1] = s->code_ptr - old_code_ptr; | ||
205 | -} | 189 | -} |
206 | - | 190 | - |
207 | static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op, | 191 | /* Execute instruction. This instruction executes an insn modified with |
208 | TCGReg r0, TCGReg r1, TCGReg r2, | 192 | the contents of r1. It does not change the executed instruction in memory; |
209 | TCGReg r3, TCGReg r4, TCGCond c5) | 193 | it does not change the program counter. |
210 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
211 | break; | ||
212 | |||
213 | CASE_32_64(brcond) | ||
214 | - tcg_out_op_rrcl(s, opc, args[0], args[1], args[2], arg_label(args[3])); | ||
215 | + tcg_out_op_rrrc(s, (opc == INDEX_op_brcond_i32 | ||
216 | + ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64), | ||
217 | + TCG_REG_TMP, args[0], args[1], args[2]); | ||
218 | + tcg_out_op_rl(s, opc, TCG_REG_TMP, arg_label(args[3])); | ||
219 | break; | ||
220 | |||
221 | CASE_32_64(neg) /* Optional (TCG_TARGET_HAS_neg_*). */ | ||
222 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
223 | args[3], args[4], args[5]); | ||
224 | break; | ||
225 | case INDEX_op_brcond2_i32: | ||
226 | - tcg_out_op_rrrrcl(s, opc, args[0], args[1], args[2], | ||
227 | - args[3], args[4], arg_label(args[5])); | ||
228 | + tcg_out_op_rrrrrc(s, INDEX_op_setcond2_i32, TCG_REG_TMP, | ||
229 | + args[0], args[1], args[2], args[3], args[4]); | ||
230 | + tcg_out_op_rl(s, INDEX_op_brcond_i32, TCG_REG_TMP, arg_label(args[5])); | ||
231 | break; | ||
232 | case INDEX_op_mulu2_i32: | ||
233 | tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]); | ||
234 | -- | 194 | -- |
235 | 2.25.1 | 195 | 2.17.2 |
236 | 196 | ||
237 | 197 | diff view generated by jsdifflib |
1 | As noted by qemu-plugins.h, enum qemu_plugin_cb_flags is | 1 | When op raises an exception, it may not have initialized the output |
---|---|---|---|
2 | currently unused -- plugins can neither read nor write | 2 | temps that would be written back by wout or cout. |
3 | guest registers. | ||
4 | 3 | ||
5 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 4 | Reviewed-by: David Hildenbrand <david@redhat.com> |
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | 6 | --- |
8 | accel/tcg/plugin-helpers.h | 1 - | 7 | target/s390x/translate.c | 20 +++++++++++++++----- |
9 | include/qemu/plugin.h | 1 - | 8 | 1 file changed, 15 insertions(+), 5 deletions(-) |
10 | accel/tcg/plugin-gen.c | 8 ++++---- | ||
11 | plugins/core.c | 30 ++++++------------------------ | ||
12 | 4 files changed, 10 insertions(+), 30 deletions(-) | ||
13 | 9 | ||
14 | diff --git a/accel/tcg/plugin-helpers.h b/accel/tcg/plugin-helpers.h | 10 | diff --git a/target/s390x/translate.c b/target/s390x/translate.c |
15 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/accel/tcg/plugin-helpers.h | 12 | --- a/target/s390x/translate.c |
17 | +++ b/accel/tcg/plugin-helpers.h | 13 | +++ b/target/s390x/translate.c |
18 | @@ -XXX,XX +XXX,XX @@ | 14 | @@ -XXX,XX +XXX,XX @@ struct DisasInsn { |
19 | #ifdef CONFIG_PLUGIN | 15 | |
20 | -/* Note: no TCG flags because those are overwritten later */ | 16 | const char *name; |
21 | DEF_HELPER_2(plugin_vcpu_udata_cb, void, i32, ptr) | 17 | |
22 | DEF_HELPER_4(plugin_vcpu_mem_cb, void, i32, i32, i64, ptr) | 18 | + /* Pre-process arguments before HELP_OP. */ |
23 | #endif | 19 | void (*help_in1)(DisasContext *, DisasFields *, DisasOps *); |
24 | diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h | 20 | void (*help_in2)(DisasContext *, DisasFields *, DisasOps *); |
25 | index XXXXXXX..XXXXXXX 100644 | 21 | void (*help_prep)(DisasContext *, DisasFields *, DisasOps *); |
26 | --- a/include/qemu/plugin.h | 22 | + |
27 | +++ b/include/qemu/plugin.h | 23 | + /* |
28 | @@ -XXX,XX +XXX,XX @@ enum plugin_dyn_cb_subtype { | 24 | + * Post-process output after HELP_OP. |
29 | struct qemu_plugin_dyn_cb { | 25 | + * Note that these are not called if HELP_OP returns DISAS_NORETURN. |
30 | union qemu_plugin_cb_sig f; | 26 | + */ |
31 | void *userp; | 27 | void (*help_wout)(DisasContext *, DisasFields *, DisasOps *); |
32 | - unsigned tcg_flags; | 28 | void (*help_cout)(DisasContext *, DisasOps *); |
33 | enum plugin_dyn_cb_subtype type; | 29 | + |
34 | /* @rw applies to mem callbacks only (both regular and inline) */ | 30 | + /* Implement the operation itself. */ |
35 | enum qemu_plugin_mem_rw rw; | 31 | DisasJumpType (*help_op)(DisasContext *, DisasOps *); |
36 | diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c | 32 | |
37 | index XXXXXXX..XXXXXXX 100644 | 33 | uint64_t data; |
38 | --- a/accel/tcg/plugin-gen.c | 34 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s) |
39 | +++ b/accel/tcg/plugin-gen.c | 35 | if (insn->help_op) { |
40 | @@ -XXX,XX +XXX,XX @@ static TCGOp *copy_st_ptr(TCGOp **begin_op, TCGOp *op) | 36 | ret = insn->help_op(s, &o); |
41 | } | ||
42 | |||
43 | static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func, | ||
44 | - void *func, unsigned tcg_flags, int *cb_idx) | ||
45 | + void *func, int *cb_idx) | ||
46 | { | ||
47 | /* copy all ops until the call */ | ||
48 | do { | ||
49 | @@ -XXX,XX +XXX,XX @@ static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func, | ||
50 | tcg_debug_assert(i < MAX_OPC_PARAM_ARGS); | ||
51 | } | 37 | } |
52 | op->args[*cb_idx] = (uintptr_t)func; | 38 | - if (insn->help_wout) { |
53 | - op->args[*cb_idx + 1] = tcg_flags; | 39 | - insn->help_wout(s, &f, &o); |
54 | + op->args[*cb_idx + 1] = (*begin_op)->args[*cb_idx + 1]; | 40 | - } |
55 | 41 | - if (insn->help_cout) { | |
56 | return op; | 42 | - insn->help_cout(s, &o); |
57 | } | 43 | + if (ret != DISAS_NORETURN) { |
58 | @@ -XXX,XX +XXX,XX @@ static TCGOp *append_udata_cb(const struct qemu_plugin_dyn_cb *cb, | 44 | + if (insn->help_wout) { |
59 | 45 | + insn->help_wout(s, &f, &o); | |
60 | /* call */ | 46 | + } |
61 | op = copy_call(&begin_op, op, HELPER(plugin_vcpu_udata_cb), | 47 | + if (insn->help_cout) { |
62 | - cb->f.vcpu_udata, cb->tcg_flags, cb_idx); | 48 | + insn->help_cout(s, &o); |
63 | + cb->f.vcpu_udata, cb_idx); | 49 | + } |
64 | |||
65 | return op; | ||
66 | } | ||
67 | @@ -XXX,XX +XXX,XX @@ static TCGOp *append_mem_cb(const struct qemu_plugin_dyn_cb *cb, | ||
68 | if (type == PLUGIN_GEN_CB_MEM) { | ||
69 | /* call */ | ||
70 | op = copy_call(&begin_op, op, HELPER(plugin_vcpu_mem_cb), | ||
71 | - cb->f.vcpu_udata, cb->tcg_flags, cb_idx); | ||
72 | + cb->f.vcpu_udata, cb_idx); | ||
73 | } | 50 | } |
74 | 51 | ||
75 | return op; | 52 | /* Free any temporaries created by the helpers. */ |
76 | diff --git a/plugins/core.c b/plugins/core.c | ||
77 | index XXXXXXX..XXXXXXX 100644 | ||
78 | --- a/plugins/core.c | ||
79 | +++ b/plugins/core.c | ||
80 | @@ -XXX,XX +XXX,XX @@ void plugin_register_inline_op(GArray **arr, | ||
81 | dyn_cb->inline_insn.imm = imm; | ||
82 | } | ||
83 | |||
84 | -static inline uint32_t cb_to_tcg_flags(enum qemu_plugin_cb_flags flags) | ||
85 | -{ | ||
86 | - uint32_t ret; | ||
87 | - | ||
88 | - switch (flags) { | ||
89 | - case QEMU_PLUGIN_CB_RW_REGS: | ||
90 | - ret = 0; | ||
91 | - break; | ||
92 | - case QEMU_PLUGIN_CB_R_REGS: | ||
93 | - ret = TCG_CALL_NO_WG; | ||
94 | - break; | ||
95 | - case QEMU_PLUGIN_CB_NO_REGS: | ||
96 | - default: | ||
97 | - ret = TCG_CALL_NO_RWG; | ||
98 | - } | ||
99 | - return ret; | ||
100 | -} | ||
101 | - | ||
102 | -inline void | ||
103 | -plugin_register_dyn_cb__udata(GArray **arr, | ||
104 | - qemu_plugin_vcpu_udata_cb_t cb, | ||
105 | - enum qemu_plugin_cb_flags flags, void *udata) | ||
106 | +void plugin_register_dyn_cb__udata(GArray **arr, | ||
107 | + qemu_plugin_vcpu_udata_cb_t cb, | ||
108 | + enum qemu_plugin_cb_flags flags, | ||
109 | + void *udata) | ||
110 | { | ||
111 | struct qemu_plugin_dyn_cb *dyn_cb = plugin_get_dyn_cb(arr); | ||
112 | |||
113 | dyn_cb->userp = udata; | ||
114 | - dyn_cb->tcg_flags = cb_to_tcg_flags(flags); | ||
115 | + /* Note flags are discarded as unused. */ | ||
116 | dyn_cb->f.vcpu_udata = cb; | ||
117 | dyn_cb->type = PLUGIN_CB_REGULAR; | ||
118 | } | ||
119 | @@ -XXX,XX +XXX,XX @@ void plugin_register_vcpu_mem_cb(GArray **arr, | ||
120 | |||
121 | dyn_cb = plugin_get_dyn_cb(arr); | ||
122 | dyn_cb->userp = udata; | ||
123 | - dyn_cb->tcg_flags = cb_to_tcg_flags(flags); | ||
124 | + /* Note flags are discarded as unused. */ | ||
125 | dyn_cb->type = PLUGIN_CB_REGULAR; | ||
126 | dyn_cb->rw = rw; | ||
127 | dyn_cb->f.generic = cb; | ||
128 | -- | 53 | -- |
129 | 2.25.1 | 54 | 2.17.2 |
130 | 55 | ||
131 | 56 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
2 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | --- | ||
5 | tcg/tcg-internal.h | 5 +++++ | ||
6 | tcg/tcg.c | 5 ++--- | ||
7 | 2 files changed, 7 insertions(+), 3 deletions(-) | ||
8 | 1 | ||
9 | diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/tcg/tcg-internal.h | ||
12 | +++ b/tcg/tcg-internal.h | ||
13 | @@ -XXX,XX +XXX,XX @@ bool tcg_region_alloc(TCGContext *s); | ||
14 | void tcg_region_initial_alloc(TCGContext *s); | ||
15 | void tcg_region_prologue_set(TCGContext *s); | ||
16 | |||
17 | +static inline void *tcg_call_func(TCGOp *op) | ||
18 | +{ | ||
19 | + return (void *)(uintptr_t)op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op)]; | ||
20 | +} | ||
21 | + | ||
22 | static inline const TCGHelperInfo *tcg_call_info(TCGOp *op) | ||
23 | { | ||
24 | return (void *)(uintptr_t)op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; | ||
25 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/tcg/tcg.c | ||
28 | +++ b/tcg/tcg.c | ||
29 | @@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs) | ||
30 | } | ||
31 | } else if (c == INDEX_op_call) { | ||
32 | const TCGHelperInfo *info = tcg_call_info(op); | ||
33 | - void *func; | ||
34 | + void *func = tcg_call_func(op); | ||
35 | |||
36 | /* variable number of arguments */ | ||
37 | nb_oargs = TCGOP_CALLO(op); | ||
38 | @@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs) | ||
39 | * Note that plugins have a template function for the info, | ||
40 | * but the actual function pointer comes from the plugin. | ||
41 | */ | ||
42 | - func = (void *)(uintptr_t)op->args[nb_oargs + nb_iargs]; | ||
43 | if (func == info->func) { | ||
44 | col += qemu_log("%s", info->name); | ||
45 | } else { | ||
46 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) | ||
47 | int allocate_args; | ||
48 | TCGRegSet allocated_regs; | ||
49 | |||
50 | - func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs]; | ||
51 | + func_addr = tcg_call_func(op); | ||
52 | flags = tcg_call_flags(op); | ||
53 | |||
54 | nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); | ||
55 | -- | ||
56 | 2.25.1 | ||
57 | |||
58 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Add libffi as a build requirement for TCI. | ||
2 | Add libffi to the dockerfiles to satisfy that requirement. | ||
3 | 1 | ||
4 | Construct an ffi_cif structure for each unique typemask. | ||
5 | Record the result in a separate hash table for later lookup; | ||
6 | this allows helper_table to stay const. | ||
7 | |||
8 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
9 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | --- | ||
12 | tcg/tcg.c | 58 +++++++++++++++++++ | ||
13 | tcg/meson.build | 8 ++- | ||
14 | tests/docker/dockerfiles/alpine.docker | 1 + | ||
15 | tests/docker/dockerfiles/centos8.docker | 1 + | ||
16 | tests/docker/dockerfiles/debian10.docker | 1 + | ||
17 | .../dockerfiles/fedora-i386-cross.docker | 1 + | ||
18 | .../dockerfiles/fedora-win32-cross.docker | 1 + | ||
19 | .../dockerfiles/fedora-win64-cross.docker | 1 + | ||
20 | tests/docker/dockerfiles/fedora.docker | 1 + | ||
21 | tests/docker/dockerfiles/ubuntu.docker | 1 + | ||
22 | tests/docker/dockerfiles/ubuntu1804.docker | 1 + | ||
23 | tests/docker/dockerfiles/ubuntu2004.docker | 1 + | ||
24 | 12 files changed, 75 insertions(+), 1 deletion(-) | ||
25 | |||
26 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/tcg/tcg.c | ||
29 | +++ b/tcg/tcg.c | ||
30 | @@ -XXX,XX +XXX,XX @@ | ||
31 | #include "exec/log.h" | ||
32 | #include "tcg-internal.h" | ||
33 | |||
34 | +#ifdef CONFIG_TCG_INTERPRETER | ||
35 | +#include <ffi.h> | ||
36 | +#endif | ||
37 | + | ||
38 | /* Forward declarations for functions declared in tcg-target.c.inc and | ||
39 | used here. */ | ||
40 | static void tcg_target_init(TCGContext *s); | ||
41 | @@ -XXX,XX +XXX,XX @@ static const TCGHelperInfo all_helpers[] = { | ||
42 | }; | ||
43 | static GHashTable *helper_table; | ||
44 | |||
45 | +#ifdef CONFIG_TCG_INTERPRETER | ||
46 | +static GHashTable *ffi_table; | ||
47 | + | ||
48 | +static ffi_type * const typecode_to_ffi[8] = { | ||
49 | + [dh_typecode_void] = &ffi_type_void, | ||
50 | + [dh_typecode_i32] = &ffi_type_uint32, | ||
51 | + [dh_typecode_s32] = &ffi_type_sint32, | ||
52 | + [dh_typecode_i64] = &ffi_type_uint64, | ||
53 | + [dh_typecode_s64] = &ffi_type_sint64, | ||
54 | + [dh_typecode_ptr] = &ffi_type_pointer, | ||
55 | +}; | ||
56 | +#endif | ||
57 | + | ||
58 | static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; | ||
59 | static void process_op_defs(TCGContext *s); | ||
60 | static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, | ||
61 | @@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus) | ||
62 | (gpointer)&all_helpers[i]); | ||
63 | } | ||
64 | |||
65 | +#ifdef CONFIG_TCG_INTERPRETER | ||
66 | + /* g_direct_hash/equal for direct comparisons on uint32_t. */ | ||
67 | + ffi_table = g_hash_table_new(NULL, NULL); | ||
68 | + for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { | ||
69 | + struct { | ||
70 | + ffi_cif cif; | ||
71 | + ffi_type *args[]; | ||
72 | + } *ca; | ||
73 | + uint32_t typemask = all_helpers[i].typemask; | ||
74 | + gpointer hash = (gpointer)(uintptr_t)typemask; | ||
75 | + ffi_status status; | ||
76 | + int nargs; | ||
77 | + | ||
78 | + if (g_hash_table_lookup(ffi_table, hash)) { | ||
79 | + continue; | ||
80 | + } | ||
81 | + | ||
82 | + /* Ignoring the return type, find the last non-zero field. */ | ||
83 | + nargs = 32 - clz32(typemask >> 3); | ||
84 | + nargs = DIV_ROUND_UP(nargs, 3); | ||
85 | + | ||
86 | + ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); | ||
87 | + ca->cif.rtype = typecode_to_ffi[typemask & 7]; | ||
88 | + ca->cif.nargs = nargs; | ||
89 | + | ||
90 | + if (nargs != 0) { | ||
91 | + ca->cif.arg_types = ca->args; | ||
92 | + for (i = 0; i < nargs; ++i) { | ||
93 | + int typecode = extract32(typemask, (i + 1) * 3, 3); | ||
94 | + ca->args[i] = typecode_to_ffi[typecode]; | ||
95 | + } | ||
96 | + } | ||
97 | + | ||
98 | + status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, | ||
99 | + ca->cif.rtype, ca->cif.arg_types); | ||
100 | + assert(status == FFI_OK); | ||
101 | + | ||
102 | + g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif); | ||
103 | + } | ||
104 | +#endif | ||
105 | + | ||
106 | tcg_target_init(s); | ||
107 | process_op_defs(s); | ||
108 | |||
109 | diff --git a/tcg/meson.build b/tcg/meson.build | ||
110 | index XXXXXXX..XXXXXXX 100644 | ||
111 | --- a/tcg/meson.build | ||
112 | +++ b/tcg/meson.build | ||
113 | @@ -XXX,XX +XXX,XX @@ tcg_ss.add(files( | ||
114 | 'tcg-op-gvec.c', | ||
115 | 'tcg-op-vec.c', | ||
116 | )) | ||
117 | -tcg_ss.add(when: 'CONFIG_TCG_INTERPRETER', if_true: files('tci.c')) | ||
118 | + | ||
119 | +if get_option('tcg_interpreter') | ||
120 | + libffi = dependency('libffi', version: '>=3.0', required: true, | ||
121 | + method: 'pkg-config', kwargs: static_kwargs) | ||
122 | + specific_ss.add(libffi) | ||
123 | + specific_ss.add(files('tci.c')) | ||
124 | +endif | ||
125 | |||
126 | specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss) | ||
127 | diff --git a/tests/docker/dockerfiles/alpine.docker b/tests/docker/dockerfiles/alpine.docker | ||
128 | index XXXXXXX..XXXXXXX 100644 | ||
129 | --- a/tests/docker/dockerfiles/alpine.docker | ||
130 | +++ b/tests/docker/dockerfiles/alpine.docker | ||
131 | @@ -XXX,XX +XXX,XX @@ ENV PACKAGES \ | ||
132 | libaio-dev \ | ||
133 | libbpf-dev \ | ||
134 | libcap-ng-dev \ | ||
135 | + libffi-dev \ | ||
136 | libjpeg-turbo-dev \ | ||
137 | libnfs-dev \ | ||
138 | libpng-dev \ | ||
139 | diff --git a/tests/docker/dockerfiles/centos8.docker b/tests/docker/dockerfiles/centos8.docker | ||
140 | index XXXXXXX..XXXXXXX 100644 | ||
141 | --- a/tests/docker/dockerfiles/centos8.docker | ||
142 | +++ b/tests/docker/dockerfiles/centos8.docker | ||
143 | @@ -XXX,XX +XXX,XX @@ ENV PACKAGES \ | ||
144 | libbpf-devel \ | ||
145 | libepoxy-devel \ | ||
146 | libfdt-devel \ | ||
147 | + libffi-devel \ | ||
148 | libgcrypt-devel \ | ||
149 | lzo-devel \ | ||
150 | make \ | ||
151 | diff --git a/tests/docker/dockerfiles/debian10.docker b/tests/docker/dockerfiles/debian10.docker | ||
152 | index XXXXXXX..XXXXXXX 100644 | ||
153 | --- a/tests/docker/dockerfiles/debian10.docker | ||
154 | +++ b/tests/docker/dockerfiles/debian10.docker | ||
155 | @@ -XXX,XX +XXX,XX @@ RUN apt update && \ | ||
156 | gdb-multiarch \ | ||
157 | gettext \ | ||
158 | git \ | ||
159 | + libffi-dev \ | ||
160 | libncurses5-dev \ | ||
161 | ninja-build \ | ||
162 | pkg-config \ | ||
163 | diff --git a/tests/docker/dockerfiles/fedora-i386-cross.docker b/tests/docker/dockerfiles/fedora-i386-cross.docker | ||
164 | index XXXXXXX..XXXXXXX 100644 | ||
165 | --- a/tests/docker/dockerfiles/fedora-i386-cross.docker | ||
166 | +++ b/tests/docker/dockerfiles/fedora-i386-cross.docker | ||
167 | @@ -XXX,XX +XXX,XX @@ ENV PACKAGES \ | ||
168 | findutils \ | ||
169 | gcc \ | ||
170 | git \ | ||
171 | + libffi-devel.i686 \ | ||
172 | libtasn1-devel.i686 \ | ||
173 | libzstd-devel.i686 \ | ||
174 | make \ | ||
175 | diff --git a/tests/docker/dockerfiles/fedora-win32-cross.docker b/tests/docker/dockerfiles/fedora-win32-cross.docker | ||
176 | index XXXXXXX..XXXXXXX 100644 | ||
177 | --- a/tests/docker/dockerfiles/fedora-win32-cross.docker | ||
178 | +++ b/tests/docker/dockerfiles/fedora-win32-cross.docker | ||
179 | @@ -XXX,XX +XXX,XX @@ ENV PACKAGES \ | ||
180 | mingw32-gmp \ | ||
181 | mingw32-gnutls \ | ||
182 | mingw32-gtk3 \ | ||
183 | + mingw32-libffi \ | ||
184 | mingw32-libjpeg-turbo \ | ||
185 | mingw32-libpng \ | ||
186 | mingw32-libtasn1 \ | ||
187 | diff --git a/tests/docker/dockerfiles/fedora-win64-cross.docker b/tests/docker/dockerfiles/fedora-win64-cross.docker | ||
188 | index XXXXXXX..XXXXXXX 100644 | ||
189 | --- a/tests/docker/dockerfiles/fedora-win64-cross.docker | ||
190 | +++ b/tests/docker/dockerfiles/fedora-win64-cross.docker | ||
191 | @@ -XXX,XX +XXX,XX @@ ENV PACKAGES \ | ||
192 | mingw64-glib2 \ | ||
193 | mingw64-gmp \ | ||
194 | mingw64-gtk3 \ | ||
195 | + mingw64-libffi \ | ||
196 | mingw64-libjpeg-turbo \ | ||
197 | mingw64-libpng \ | ||
198 | mingw64-libtasn1 \ | ||
199 | diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker | ||
200 | index XXXXXXX..XXXXXXX 100644 | ||
201 | --- a/tests/docker/dockerfiles/fedora.docker | ||
202 | +++ b/tests/docker/dockerfiles/fedora.docker | ||
203 | @@ -XXX,XX +XXX,XX @@ ENV PACKAGES \ | ||
204 | libepoxy-devel \ | ||
205 | libfdt-devel \ | ||
206 | libbpf-devel \ | ||
207 | + libffi-devel \ | ||
208 | libiscsi-devel \ | ||
209 | libjpeg-devel \ | ||
210 | libpmem-devel \ | ||
211 | diff --git a/tests/docker/dockerfiles/ubuntu.docker b/tests/docker/dockerfiles/ubuntu.docker | ||
212 | index XXXXXXX..XXXXXXX 100644 | ||
213 | --- a/tests/docker/dockerfiles/ubuntu.docker | ||
214 | +++ b/tests/docker/dockerfiles/ubuntu.docker | ||
215 | @@ -XXX,XX +XXX,XX @@ ENV PACKAGES \ | ||
216 | libdrm-dev \ | ||
217 | libepoxy-dev \ | ||
218 | libfdt-dev \ | ||
219 | + libffi-dev \ | ||
220 | libgbm-dev \ | ||
221 | libgnutls28-dev \ | ||
222 | libgtk-3-dev \ | ||
223 | diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker | ||
224 | index XXXXXXX..XXXXXXX 100644 | ||
225 | --- a/tests/docker/dockerfiles/ubuntu1804.docker | ||
226 | +++ b/tests/docker/dockerfiles/ubuntu1804.docker | ||
227 | @@ -XXX,XX +XXX,XX @@ ENV PACKAGES \ | ||
228 | libdrm-dev \ | ||
229 | libepoxy-dev \ | ||
230 | libfdt-dev \ | ||
231 | + libffi-dev \ | ||
232 | libgbm-dev \ | ||
233 | libgtk-3-dev \ | ||
234 | libibverbs-dev \ | ||
235 | diff --git a/tests/docker/dockerfiles/ubuntu2004.docker b/tests/docker/dockerfiles/ubuntu2004.docker | ||
236 | index XXXXXXX..XXXXXXX 100644 | ||
237 | --- a/tests/docker/dockerfiles/ubuntu2004.docker | ||
238 | +++ b/tests/docker/dockerfiles/ubuntu2004.docker | ||
239 | @@ -XXX,XX +XXX,XX @@ ENV PACKAGES flex bison \ | ||
240 | libdrm-dev \ | ||
241 | libepoxy-dev \ | ||
242 | libfdt-dev \ | ||
243 | + libffi-dev \ | ||
244 | libgbm-dev \ | ||
245 | libgtk-3-dev \ | ||
246 | libibverbs-dev \ | ||
247 | -- | ||
248 | 2.25.1 | ||
249 | |||
250 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | The current setting is much too pessimistic. Indicating only | ||
2 | the one or two registers that are actually assigned after a | ||
3 | call should avoid unnecessary movement between the register | ||
4 | array and the stack array. | ||
5 | 1 | ||
6 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | --- | ||
10 | tcg/tci/tcg-target.c.inc | 10 ++++++++-- | ||
11 | 1 file changed, 8 insertions(+), 2 deletions(-) | ||
12 | |||
13 | diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tcg/tci/tcg-target.c.inc | ||
16 | +++ b/tcg/tci/tcg-target.c.inc | ||
17 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
18 | tcg_target_available_regs[TCG_TYPE_I32] = BIT(TCG_TARGET_NB_REGS) - 1; | ||
19 | /* Registers available for 64 bit operations. */ | ||
20 | tcg_target_available_regs[TCG_TYPE_I64] = BIT(TCG_TARGET_NB_REGS) - 1; | ||
21 | - /* TODO: Which registers should be set here? */ | ||
22 | - tcg_target_call_clobber_regs = BIT(TCG_TARGET_NB_REGS) - 1; | ||
23 | + /* | ||
24 | + * The interpreter "registers" are in the local stack frame and | ||
25 | + * cannot be clobbered by the called helper functions. However, | ||
26 | + * the interpreter assumes a 64-bit return value and assigns to | ||
27 | + * the return value registers. | ||
28 | + */ | ||
29 | + tcg_target_call_clobber_regs = | ||
30 | + MAKE_64BIT_MASK(TCG_REG_R0, 64 / TCG_TARGET_REG_BITS); | ||
31 | |||
32 | s->reserved_regs = 0; | ||
33 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); | ||
34 | -- | ||
35 | 2.25.1 | ||
36 | |||
37 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | As the only call-clobbered regs for TCI, these should | ||
2 | receive the least priority. | ||
3 | 1 | ||
4 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/tci/tcg-target.c.inc | 4 ++-- | ||
9 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
10 | |||
11 | diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/tcg/tci/tcg-target.c.inc | ||
14 | +++ b/tcg/tci/tcg-target.c.inc | ||
15 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
16 | } | ||
17 | |||
18 | static const int tcg_target_reg_alloc_order[] = { | ||
19 | - TCG_REG_R0, | ||
20 | - TCG_REG_R1, | ||
21 | TCG_REG_R2, | ||
22 | TCG_REG_R3, | ||
23 | TCG_REG_R4, | ||
24 | @@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = { | ||
25 | TCG_REG_R13, | ||
26 | TCG_REG_R14, | ||
27 | TCG_REG_R15, | ||
28 | + TCG_REG_R1, | ||
29 | + TCG_REG_R0, | ||
30 | }; | ||
31 | |||
32 | #if MAX_OPC_PARAM_IARGS != 6 | ||
33 | -- | ||
34 | 2.25.1 | ||
35 | |||
36 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | We're about to adjust the offset range on host memory ops, | ||
2 | and the format of branches. Both will require a temporary. | ||
3 | 1 | ||
4 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/tci/tcg-target.h | 1 + | ||
9 | tcg/tci/tcg-target.c.inc | 1 + | ||
10 | 2 files changed, 2 insertions(+) | ||
11 | |||
12 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/tcg/tci/tcg-target.h | ||
15 | +++ b/tcg/tci/tcg-target.h | ||
16 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
17 | TCG_REG_R14, | ||
18 | TCG_REG_R15, | ||
19 | |||
20 | + TCG_REG_TMP = TCG_REG_R13, | ||
21 | TCG_AREG0 = TCG_REG_R14, | ||
22 | TCG_REG_CALL_STACK = TCG_REG_R15, | ||
23 | } TCGReg; | ||
24 | diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/tcg/tci/tcg-target.c.inc | ||
27 | +++ b/tcg/tci/tcg-target.c.inc | ||
28 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
29 | MAKE_64BIT_MASK(TCG_REG_R0, 64 / TCG_TARGET_REG_BITS); | ||
30 | |||
31 | s->reserved_regs = 0; | ||
32 | + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); | ||
33 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); | ||
34 | |||
35 | /* The call arguments come first, followed by the temp storage. */ | ||
36 | -- | ||
37 | 2.25.1 | ||
38 | |||
39 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | These were already present in tcg-target.c.inc, | ||
2 | but not in the interpreter. | ||
3 | 1 | ||
4 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/tci/tcg-target.h | 20 ++++++++++---------- | ||
9 | tcg/tci.c | 40 ++++++++++++++++++++++++++++++++++++++++ | ||
10 | 2 files changed, 50 insertions(+), 10 deletions(-) | ||
11 | |||
12 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/tcg/tci/tcg-target.h | ||
15 | +++ b/tcg/tci/tcg-target.h | ||
16 | @@ -XXX,XX +XXX,XX @@ | ||
17 | #define TCG_TARGET_HAS_ext16s_i32 1 | ||
18 | #define TCG_TARGET_HAS_ext8u_i32 1 | ||
19 | #define TCG_TARGET_HAS_ext16u_i32 1 | ||
20 | -#define TCG_TARGET_HAS_andc_i32 0 | ||
21 | +#define TCG_TARGET_HAS_andc_i32 1 | ||
22 | #define TCG_TARGET_HAS_deposit_i32 1 | ||
23 | #define TCG_TARGET_HAS_extract_i32 0 | ||
24 | #define TCG_TARGET_HAS_sextract_i32 0 | ||
25 | #define TCG_TARGET_HAS_extract2_i32 0 | ||
26 | -#define TCG_TARGET_HAS_eqv_i32 0 | ||
27 | -#define TCG_TARGET_HAS_nand_i32 0 | ||
28 | -#define TCG_TARGET_HAS_nor_i32 0 | ||
29 | +#define TCG_TARGET_HAS_eqv_i32 1 | ||
30 | +#define TCG_TARGET_HAS_nand_i32 1 | ||
31 | +#define TCG_TARGET_HAS_nor_i32 1 | ||
32 | #define TCG_TARGET_HAS_clz_i32 0 | ||
33 | #define TCG_TARGET_HAS_ctz_i32 0 | ||
34 | #define TCG_TARGET_HAS_ctpop_i32 0 | ||
35 | #define TCG_TARGET_HAS_neg_i32 1 | ||
36 | #define TCG_TARGET_HAS_not_i32 1 | ||
37 | -#define TCG_TARGET_HAS_orc_i32 0 | ||
38 | +#define TCG_TARGET_HAS_orc_i32 1 | ||
39 | #define TCG_TARGET_HAS_rot_i32 1 | ||
40 | #define TCG_TARGET_HAS_movcond_i32 1 | ||
41 | #define TCG_TARGET_HAS_muls2_i32 0 | ||
42 | @@ -XXX,XX +XXX,XX @@ | ||
43 | #define TCG_TARGET_HAS_ext8u_i64 1 | ||
44 | #define TCG_TARGET_HAS_ext16u_i64 1 | ||
45 | #define TCG_TARGET_HAS_ext32u_i64 1 | ||
46 | -#define TCG_TARGET_HAS_andc_i64 0 | ||
47 | -#define TCG_TARGET_HAS_eqv_i64 0 | ||
48 | -#define TCG_TARGET_HAS_nand_i64 0 | ||
49 | -#define TCG_TARGET_HAS_nor_i64 0 | ||
50 | +#define TCG_TARGET_HAS_andc_i64 1 | ||
51 | +#define TCG_TARGET_HAS_eqv_i64 1 | ||
52 | +#define TCG_TARGET_HAS_nand_i64 1 | ||
53 | +#define TCG_TARGET_HAS_nor_i64 1 | ||
54 | #define TCG_TARGET_HAS_clz_i64 0 | ||
55 | #define TCG_TARGET_HAS_ctz_i64 0 | ||
56 | #define TCG_TARGET_HAS_ctpop_i64 0 | ||
57 | #define TCG_TARGET_HAS_neg_i64 1 | ||
58 | #define TCG_TARGET_HAS_not_i64 1 | ||
59 | -#define TCG_TARGET_HAS_orc_i64 0 | ||
60 | +#define TCG_TARGET_HAS_orc_i64 1 | ||
61 | #define TCG_TARGET_HAS_rot_i64 1 | ||
62 | #define TCG_TARGET_HAS_movcond_i64 1 | ||
63 | #define TCG_TARGET_HAS_muls2_i64 0 | ||
64 | diff --git a/tcg/tci.c b/tcg/tci.c | ||
65 | index XXXXXXX..XXXXXXX 100644 | ||
66 | --- a/tcg/tci.c | ||
67 | +++ b/tcg/tci.c | ||
68 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
69 | tci_args_rrr(insn, &r0, &r1, &r2); | ||
70 | regs[r0] = regs[r1] ^ regs[r2]; | ||
71 | break; | ||
72 | +#if TCG_TARGET_HAS_andc_i32 || TCG_TARGET_HAS_andc_i64 | ||
73 | + CASE_32_64(andc) | ||
74 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
75 | + regs[r0] = regs[r1] & ~regs[r2]; | ||
76 | + break; | ||
77 | +#endif | ||
78 | +#if TCG_TARGET_HAS_orc_i32 || TCG_TARGET_HAS_orc_i64 | ||
79 | + CASE_32_64(orc) | ||
80 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
81 | + regs[r0] = regs[r1] | ~regs[r2]; | ||
82 | + break; | ||
83 | +#endif | ||
84 | +#if TCG_TARGET_HAS_eqv_i32 || TCG_TARGET_HAS_eqv_i64 | ||
85 | + CASE_32_64(eqv) | ||
86 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
87 | + regs[r0] = ~(regs[r1] ^ regs[r2]); | ||
88 | + break; | ||
89 | +#endif | ||
90 | +#if TCG_TARGET_HAS_nand_i32 || TCG_TARGET_HAS_nand_i64 | ||
91 | + CASE_32_64(nand) | ||
92 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
93 | + regs[r0] = ~(regs[r1] & regs[r2]); | ||
94 | + break; | ||
95 | +#endif | ||
96 | +#if TCG_TARGET_HAS_nor_i32 || TCG_TARGET_HAS_nor_i64 | ||
97 | + CASE_32_64(nor) | ||
98 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
99 | + regs[r0] = ~(regs[r1] | regs[r2]); | ||
100 | + break; | ||
101 | +#endif | ||
102 | |||
103 | /* Arithmetic operations (32 bit). */ | ||
104 | |||
105 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | ||
106 | case INDEX_op_or_i64: | ||
107 | case INDEX_op_xor_i32: | ||
108 | case INDEX_op_xor_i64: | ||
109 | + case INDEX_op_andc_i32: | ||
110 | + case INDEX_op_andc_i64: | ||
111 | + case INDEX_op_orc_i32: | ||
112 | + case INDEX_op_orc_i64: | ||
113 | + case INDEX_op_eqv_i32: | ||
114 | + case INDEX_op_eqv_i64: | ||
115 | + case INDEX_op_nand_i32: | ||
116 | + case INDEX_op_nand_i64: | ||
117 | + case INDEX_op_nor_i32: | ||
118 | + case INDEX_op_nor_i64: | ||
119 | case INDEX_op_div_i32: | ||
120 | case INDEX_op_div_i64: | ||
121 | case INDEX_op_rem_i32: | ||
122 | -- | ||
123 | 2.25.1 | ||
124 | |||
125 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
2 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | --- | ||
5 | tcg/tci/tcg-target.h | 12 +++++------ | ||
6 | tcg/tci.c | 44 ++++++++++++++++++++++++++++++++++++++++ | ||
7 | tcg/tci/tcg-target.c.inc | 9 ++++++++ | ||
8 | 3 files changed, 59 insertions(+), 6 deletions(-) | ||
9 | 1 | ||
10 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/tci/tcg-target.h | ||
13 | +++ b/tcg/tci/tcg-target.h | ||
14 | @@ -XXX,XX +XXX,XX @@ | ||
15 | #define TCG_TARGET_HAS_eqv_i32 1 | ||
16 | #define TCG_TARGET_HAS_nand_i32 1 | ||
17 | #define TCG_TARGET_HAS_nor_i32 1 | ||
18 | -#define TCG_TARGET_HAS_clz_i32 0 | ||
19 | -#define TCG_TARGET_HAS_ctz_i32 0 | ||
20 | -#define TCG_TARGET_HAS_ctpop_i32 0 | ||
21 | +#define TCG_TARGET_HAS_clz_i32 1 | ||
22 | +#define TCG_TARGET_HAS_ctz_i32 1 | ||
23 | +#define TCG_TARGET_HAS_ctpop_i32 1 | ||
24 | #define TCG_TARGET_HAS_neg_i32 1 | ||
25 | #define TCG_TARGET_HAS_not_i32 1 | ||
26 | #define TCG_TARGET_HAS_orc_i32 1 | ||
27 | @@ -XXX,XX +XXX,XX @@ | ||
28 | #define TCG_TARGET_HAS_eqv_i64 1 | ||
29 | #define TCG_TARGET_HAS_nand_i64 1 | ||
30 | #define TCG_TARGET_HAS_nor_i64 1 | ||
31 | -#define TCG_TARGET_HAS_clz_i64 0 | ||
32 | -#define TCG_TARGET_HAS_ctz_i64 0 | ||
33 | -#define TCG_TARGET_HAS_ctpop_i64 0 | ||
34 | +#define TCG_TARGET_HAS_clz_i64 1 | ||
35 | +#define TCG_TARGET_HAS_ctz_i64 1 | ||
36 | +#define TCG_TARGET_HAS_ctpop_i64 1 | ||
37 | #define TCG_TARGET_HAS_neg_i64 1 | ||
38 | #define TCG_TARGET_HAS_not_i64 1 | ||
39 | #define TCG_TARGET_HAS_orc_i64 1 | ||
40 | diff --git a/tcg/tci.c b/tcg/tci.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/tcg/tci.c | ||
43 | +++ b/tcg/tci.c | ||
44 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
45 | tci_args_rrr(insn, &r0, &r1, &r2); | ||
46 | regs[r0] = (uint32_t)regs[r1] % (uint32_t)regs[r2]; | ||
47 | break; | ||
48 | +#if TCG_TARGET_HAS_clz_i32 | ||
49 | + case INDEX_op_clz_i32: | ||
50 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
51 | + tmp32 = regs[r1]; | ||
52 | + regs[r0] = tmp32 ? clz32(tmp32) : regs[r2]; | ||
53 | + break; | ||
54 | +#endif | ||
55 | +#if TCG_TARGET_HAS_ctz_i32 | ||
56 | + case INDEX_op_ctz_i32: | ||
57 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
58 | + tmp32 = regs[r1]; | ||
59 | + regs[r0] = tmp32 ? ctz32(tmp32) : regs[r2]; | ||
60 | + break; | ||
61 | +#endif | ||
62 | +#if TCG_TARGET_HAS_ctpop_i32 | ||
63 | + case INDEX_op_ctpop_i32: | ||
64 | + tci_args_rr(insn, &r0, &r1); | ||
65 | + regs[r0] = ctpop32(regs[r1]); | ||
66 | + break; | ||
67 | +#endif | ||
68 | |||
69 | /* Shift/rotate operations (32 bit). */ | ||
70 | |||
71 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
72 | tci_args_rrr(insn, &r0, &r1, &r2); | ||
73 | regs[r0] = (uint64_t)regs[r1] % (uint64_t)regs[r2]; | ||
74 | break; | ||
75 | +#if TCG_TARGET_HAS_clz_i64 | ||
76 | + case INDEX_op_clz_i64: | ||
77 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
78 | + regs[r0] = regs[r1] ? clz64(regs[r1]) : regs[r2]; | ||
79 | + break; | ||
80 | +#endif | ||
81 | +#if TCG_TARGET_HAS_ctz_i64 | ||
82 | + case INDEX_op_ctz_i64: | ||
83 | + tci_args_rrr(insn, &r0, &r1, &r2); | ||
84 | + regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2]; | ||
85 | + break; | ||
86 | +#endif | ||
87 | +#if TCG_TARGET_HAS_ctpop_i64 | ||
88 | + case INDEX_op_ctpop_i64: | ||
89 | + tci_args_rr(insn, &r0, &r1); | ||
90 | + regs[r0] = ctpop64(regs[r1]); | ||
91 | + break; | ||
92 | +#endif | ||
93 | |||
94 | /* Shift/rotate operations (64 bit). */ | ||
95 | |||
96 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | ||
97 | case INDEX_op_not_i64: | ||
98 | case INDEX_op_neg_i32: | ||
99 | case INDEX_op_neg_i64: | ||
100 | + case INDEX_op_ctpop_i32: | ||
101 | + case INDEX_op_ctpop_i64: | ||
102 | tci_args_rr(insn, &r0, &r1); | ||
103 | info->fprintf_func(info->stream, "%-12s %s, %s", | ||
104 | op_name, str_r(r0), str_r(r1)); | ||
105 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | ||
106 | case INDEX_op_rotl_i64: | ||
107 | case INDEX_op_rotr_i32: | ||
108 | case INDEX_op_rotr_i64: | ||
109 | + case INDEX_op_clz_i32: | ||
110 | + case INDEX_op_clz_i64: | ||
111 | + case INDEX_op_ctz_i32: | ||
112 | + case INDEX_op_ctz_i64: | ||
113 | tci_args_rrr(insn, &r0, &r1, &r2); | ||
114 | info->fprintf_func(info->stream, "%-12s %s, %s, %s", | ||
115 | op_name, str_r(r0), str_r(r1), str_r(r2)); | ||
116 | diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc | ||
117 | index XXXXXXX..XXXXXXX 100644 | ||
118 | --- a/tcg/tci/tcg-target.c.inc | ||
119 | +++ b/tcg/tci/tcg-target.c.inc | ||
120 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
121 | case INDEX_op_extract_i64: | ||
122 | case INDEX_op_sextract_i32: | ||
123 | case INDEX_op_sextract_i64: | ||
124 | + case INDEX_op_ctpop_i32: | ||
125 | + case INDEX_op_ctpop_i64: | ||
126 | return C_O1_I1(r, r); | ||
127 | |||
128 | case INDEX_op_st8_i32: | ||
129 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
130 | case INDEX_op_setcond_i64: | ||
131 | case INDEX_op_deposit_i32: | ||
132 | case INDEX_op_deposit_i64: | ||
133 | + case INDEX_op_clz_i32: | ||
134 | + case INDEX_op_clz_i64: | ||
135 | + case INDEX_op_ctz_i32: | ||
136 | + case INDEX_op_ctz_i64: | ||
137 | return C_O1_I2(r, r, r); | ||
138 | |||
139 | case INDEX_op_brcond_i32: | ||
140 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
141 | CASE_32_64(divu) /* Optional (TCG_TARGET_HAS_div_*). */ | ||
142 | CASE_32_64(rem) /* Optional (TCG_TARGET_HAS_div_*). */ | ||
143 | CASE_32_64(remu) /* Optional (TCG_TARGET_HAS_div_*). */ | ||
144 | + CASE_32_64(clz) /* Optional (TCG_TARGET_HAS_clz_*). */ | ||
145 | + CASE_32_64(ctz) /* Optional (TCG_TARGET_HAS_ctz_*). */ | ||
146 | tcg_out_op_rrr(s, opc, args[0], args[1], args[2]); | ||
147 | break; | ||
148 | |||
149 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
150 | CASE_32_64(bswap16) /* Optional (TCG_TARGET_HAS_bswap16_*). */ | ||
151 | CASE_32_64(bswap32) /* Optional (TCG_TARGET_HAS_bswap32_*). */ | ||
152 | CASE_64(bswap64) /* Optional (TCG_TARGET_HAS_bswap64_i64). */ | ||
153 | + CASE_32_64(ctpop) /* Optional (TCG_TARGET_HAS_ctpop_*). */ | ||
154 | tcg_out_op_rr(s, opc, args[0], args[1]); | ||
155 | break; | ||
156 | |||
157 | -- | ||
158 | 2.25.1 | ||
159 | |||
160 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | We already had mulu2_i32 for a 32-bit host; expand this to 64-bit | ||
2 | hosts as well. The muls2_i32 and the 64-bit opcodes are new. | ||
3 | 1 | ||
4 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | tcg/tci/tcg-target.h | 8 ++++---- | ||
9 | tcg/tci.c | 35 +++++++++++++++++++++++++++++------ | ||
10 | tcg/tci/tcg-target.c.inc | 16 ++++++++++------ | ||
11 | 3 files changed, 43 insertions(+), 16 deletions(-) | ||
12 | |||
13 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tcg/tci/tcg-target.h | ||
16 | +++ b/tcg/tci/tcg-target.h | ||
17 | @@ -XXX,XX +XXX,XX @@ | ||
18 | #define TCG_TARGET_HAS_orc_i32 1 | ||
19 | #define TCG_TARGET_HAS_rot_i32 1 | ||
20 | #define TCG_TARGET_HAS_movcond_i32 1 | ||
21 | -#define TCG_TARGET_HAS_muls2_i32 0 | ||
22 | +#define TCG_TARGET_HAS_muls2_i32 1 | ||
23 | #define TCG_TARGET_HAS_muluh_i32 0 | ||
24 | #define TCG_TARGET_HAS_mulsh_i32 0 | ||
25 | #define TCG_TARGET_HAS_goto_ptr 1 | ||
26 | @@ -XXX,XX +XXX,XX @@ | ||
27 | #define TCG_TARGET_HAS_orc_i64 1 | ||
28 | #define TCG_TARGET_HAS_rot_i64 1 | ||
29 | #define TCG_TARGET_HAS_movcond_i64 1 | ||
30 | -#define TCG_TARGET_HAS_muls2_i64 0 | ||
31 | +#define TCG_TARGET_HAS_muls2_i64 1 | ||
32 | #define TCG_TARGET_HAS_add2_i32 0 | ||
33 | #define TCG_TARGET_HAS_sub2_i32 0 | ||
34 | -#define TCG_TARGET_HAS_mulu2_i32 0 | ||
35 | +#define TCG_TARGET_HAS_mulu2_i32 1 | ||
36 | #define TCG_TARGET_HAS_add2_i64 0 | ||
37 | #define TCG_TARGET_HAS_sub2_i64 0 | ||
38 | -#define TCG_TARGET_HAS_mulu2_i64 0 | ||
39 | +#define TCG_TARGET_HAS_mulu2_i64 1 | ||
40 | #define TCG_TARGET_HAS_muluh_i64 0 | ||
41 | #define TCG_TARGET_HAS_mulsh_i64 0 | ||
42 | #else | ||
43 | diff --git a/tcg/tci.c b/tcg/tci.c | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/tcg/tci.c | ||
46 | +++ b/tcg/tci.c | ||
47 | @@ -XXX,XX +XXX,XX @@ __thread uintptr_t tci_tb_ptr; | ||
48 | static void tci_write_reg64(tcg_target_ulong *regs, uint32_t high_index, | ||
49 | uint32_t low_index, uint64_t value) | ||
50 | { | ||
51 | - regs[low_index] = value; | ||
52 | + regs[low_index] = (uint32_t)value; | ||
53 | regs[high_index] = value >> 32; | ||
54 | } | ||
55 | |||
56 | @@ -XXX,XX +XXX,XX @@ static void tci_args_rrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1, | ||
57 | *r4 = extract32(insn, 24, 4); | ||
58 | } | ||
59 | |||
60 | -#if TCG_TARGET_REG_BITS == 32 | ||
61 | static void tci_args_rrrr(uint32_t insn, | ||
62 | TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3) | ||
63 | { | ||
64 | @@ -XXX,XX +XXX,XX @@ static void tci_args_rrrr(uint32_t insn, | ||
65 | *r2 = extract32(insn, 16, 4); | ||
66 | *r3 = extract32(insn, 20, 4); | ||
67 | } | ||
68 | -#endif | ||
69 | |||
70 | static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1, | ||
71 | TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5) | ||
72 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
73 | T2 = tci_uint64(regs[r5], regs[r4]); | ||
74 | tci_write_reg64(regs, r1, r0, T1 - T2); | ||
75 | break; | ||
76 | +#endif /* TCG_TARGET_REG_BITS == 32 */ | ||
77 | +#if TCG_TARGET_HAS_mulu2_i32 | ||
78 | case INDEX_op_mulu2_i32: | ||
79 | tci_args_rrrr(insn, &r0, &r1, &r2, &r3); | ||
80 | - tci_write_reg64(regs, r1, r0, (uint64_t)regs[r2] * regs[r3]); | ||
81 | + tmp64 = (uint64_t)(uint32_t)regs[r2] * (uint32_t)regs[r3]; | ||
82 | + tci_write_reg64(regs, r1, r0, tmp64); | ||
83 | break; | ||
84 | -#endif /* TCG_TARGET_REG_BITS == 32 */ | ||
85 | +#endif | ||
86 | +#if TCG_TARGET_HAS_muls2_i32 | ||
87 | + case INDEX_op_muls2_i32: | ||
88 | + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); | ||
89 | + tmp64 = (int64_t)(int32_t)regs[r2] * (int32_t)regs[r3]; | ||
90 | + tci_write_reg64(regs, r1, r0, tmp64); | ||
91 | + break; | ||
92 | +#endif | ||
93 | #if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64 | ||
94 | CASE_32_64(ext8s) | ||
95 | tci_args_rr(insn, &r0, &r1); | ||
96 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
97 | regs[r0] = ctpop64(regs[r1]); | ||
98 | break; | ||
99 | #endif | ||
100 | +#if TCG_TARGET_HAS_mulu2_i64 | ||
101 | + case INDEX_op_mulu2_i64: | ||
102 | + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); | ||
103 | + mulu64(®s[r0], ®s[r1], regs[r2], regs[r3]); | ||
104 | + break; | ||
105 | +#endif | ||
106 | +#if TCG_TARGET_HAS_muls2_i64 | ||
107 | + case INDEX_op_muls2_i64: | ||
108 | + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); | ||
109 | + muls64(®s[r0], ®s[r1], regs[r2], regs[r3]); | ||
110 | + break; | ||
111 | +#endif | ||
112 | |||
113 | /* Shift/rotate operations (64 bit). */ | ||
114 | |||
115 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | ||
116 | str_r(r3), str_r(r4), str_c(c)); | ||
117 | break; | ||
118 | |||
119 | -#if TCG_TARGET_REG_BITS == 32 | ||
120 | case INDEX_op_mulu2_i32: | ||
121 | + case INDEX_op_mulu2_i64: | ||
122 | + case INDEX_op_muls2_i32: | ||
123 | + case INDEX_op_muls2_i64: | ||
124 | tci_args_rrrr(insn, &r0, &r1, &r2, &r3); | ||
125 | info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", | ||
126 | op_name, str_r(r0), str_r(r1), | ||
127 | str_r(r2), str_r(r3)); | ||
128 | break; | ||
129 | |||
130 | +#if TCG_TARGET_REG_BITS == 32 | ||
131 | case INDEX_op_add2_i32: | ||
132 | case INDEX_op_sub2_i32: | ||
133 | tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); | ||
134 | diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc | ||
135 | index XXXXXXX..XXXXXXX 100644 | ||
136 | --- a/tcg/tci/tcg-target.c.inc | ||
137 | +++ b/tcg/tci/tcg-target.c.inc | ||
138 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
139 | return C_O2_I4(r, r, r, r, r, r); | ||
140 | case INDEX_op_brcond2_i32: | ||
141 | return C_O0_I4(r, r, r, r); | ||
142 | - case INDEX_op_mulu2_i32: | ||
143 | - return C_O2_I2(r, r, r, r); | ||
144 | #endif | ||
145 | |||
146 | + case INDEX_op_mulu2_i32: | ||
147 | + case INDEX_op_mulu2_i64: | ||
148 | + case INDEX_op_muls2_i32: | ||
149 | + case INDEX_op_muls2_i64: | ||
150 | + return C_O2_I2(r, r, r, r); | ||
151 | + | ||
152 | case INDEX_op_movcond_i32: | ||
153 | case INDEX_op_movcond_i64: | ||
154 | case INDEX_op_setcond2_i32: | ||
155 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrrrr(TCGContext *s, TCGOpcode op, TCGReg r0, | ||
156 | tcg_out32(s, insn); | ||
157 | } | ||
158 | |||
159 | -#if TCG_TARGET_REG_BITS == 32 | ||
160 | static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op, | ||
161 | TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3) | ||
162 | { | ||
163 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op, | ||
164 | insn = deposit32(insn, 20, 4, r3); | ||
165 | tcg_out32(s, insn); | ||
166 | } | ||
167 | -#endif | ||
168 | |||
169 | static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op, | ||
170 | TCGReg r0, TCGReg r1, TCGReg r2, | ||
171 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
172 | args[0], args[1], args[2], args[3], args[4]); | ||
173 | tcg_out_op_rl(s, INDEX_op_brcond_i32, TCG_REG_TMP, arg_label(args[5])); | ||
174 | break; | ||
175 | - case INDEX_op_mulu2_i32: | ||
176 | +#endif | ||
177 | + | ||
178 | + CASE_32_64(mulu2) | ||
179 | + CASE_32_64(muls2) | ||
180 | tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]); | ||
181 | break; | ||
182 | -#endif | ||
183 | |||
184 | case INDEX_op_qemu_ld_i32: | ||
185 | case INDEX_op_qemu_st_i32: | ||
186 | -- | ||
187 | 2.25.1 | ||
188 | |||
189 | diff view generated by jsdifflib |
1 | We already had the 32-bit versions for a 32-bit host; expand this | 1 | Reviewed-by: David Hildenbrand <david@redhat.com> |
---|---|---|---|
2 | to 64-bit hosts as well. The 64-bit opcodes are new. | ||
3 | |||
4 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | 3 | --- |
8 | tcg/tci/tcg-target.h | 8 ++++---- | 4 | target/s390x/mem_helper.c | 40 +++++++++++++++++++-------------------- |
9 | tcg/tci.c | 40 ++++++++++++++++++++++++++-------------- | 5 | target/s390x/translate.c | 25 +++++++++++++++++------- |
10 | tcg/tci/tcg-target.c.inc | 15 ++++++++------- | 6 | 2 files changed, 38 insertions(+), 27 deletions(-) |
11 | 3 files changed, 38 insertions(+), 25 deletions(-) | ||
12 | 7 | ||
13 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | 8 | diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c |
14 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/tcg/tci/tcg-target.h | 10 | --- a/target/s390x/mem_helper.c |
16 | +++ b/tcg/tci/tcg-target.h | 11 | +++ b/target/s390x/mem_helper.c |
12 | @@ -XXX,XX +XXX,XX @@ void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr, | ||
13 | Int128 oldv; | ||
14 | bool fail; | ||
15 | |||
16 | - if (!HAVE_CMPXCHG128) { | ||
17 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
18 | - } | ||
19 | + assert(HAVE_CMPXCHG128); | ||
20 | |||
21 | mem_idx = cpu_mmu_index(env, false); | ||
22 | oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
23 | @@ -XXX,XX +XXX,XX @@ uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr) | ||
24 | { | ||
25 | uintptr_t ra = GETPC(); | ||
26 | uint64_t hi, lo; | ||
27 | + int mem_idx; | ||
28 | + TCGMemOpIdx oi; | ||
29 | + Int128 v; | ||
30 | |||
31 | - if (HAVE_ATOMIC128) { | ||
32 | - int mem_idx = cpu_mmu_index(env, false); | ||
33 | - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
34 | - Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); | ||
35 | - hi = int128_gethi(v); | ||
36 | - lo = int128_getlo(v); | ||
37 | - } else { | ||
38 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
39 | - } | ||
40 | + assert(HAVE_ATOMIC128); | ||
41 | + | ||
42 | + mem_idx = cpu_mmu_index(env, false); | ||
43 | + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
44 | + v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); | ||
45 | + hi = int128_gethi(v); | ||
46 | + lo = int128_getlo(v); | ||
47 | |||
48 | env->retxl = lo; | ||
49 | return hi; | ||
50 | @@ -XXX,XX +XXX,XX @@ void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr, | ||
51 | uint64_t low, uint64_t high) | ||
52 | { | ||
53 | uintptr_t ra = GETPC(); | ||
54 | + int mem_idx; | ||
55 | + TCGMemOpIdx oi; | ||
56 | + Int128 v; | ||
57 | |||
58 | - if (HAVE_ATOMIC128) { | ||
59 | - int mem_idx = cpu_mmu_index(env, false); | ||
60 | - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
61 | - Int128 v = int128_make128(low, high); | ||
62 | - helper_atomic_sto_be_mmu(env, addr, v, oi, ra); | ||
63 | - } else { | ||
64 | - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); | ||
65 | - } | ||
66 | + assert(HAVE_ATOMIC128); | ||
67 | + | ||
68 | + mem_idx = cpu_mmu_index(env, false); | ||
69 | + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); | ||
70 | + v = int128_make128(low, high); | ||
71 | + helper_atomic_sto_be_mmu(env, addr, v, oi, ra); | ||
72 | } | ||
73 | |||
74 | /* Execute instruction. This instruction executes an insn modified with | ||
75 | diff --git a/target/s390x/translate.c b/target/s390x/translate.c | ||
76 | index XXXXXXX..XXXXXXX 100644 | ||
77 | --- a/target/s390x/translate.c | ||
78 | +++ b/target/s390x/translate.c | ||
17 | @@ -XXX,XX +XXX,XX @@ | 79 | @@ -XXX,XX +XXX,XX @@ |
18 | #define TCG_TARGET_HAS_rot_i64 1 | 80 | #include "trace-tcg.h" |
19 | #define TCG_TARGET_HAS_movcond_i64 1 | 81 | #include "exec/translator.h" |
20 | #define TCG_TARGET_HAS_muls2_i64 1 | 82 | #include "exec/log.h" |
21 | -#define TCG_TARGET_HAS_add2_i32 0 | 83 | +#include "qemu/atomic128.h" |
22 | -#define TCG_TARGET_HAS_sub2_i32 0 | 84 | |
23 | +#define TCG_TARGET_HAS_add2_i32 1 | 85 | |
24 | +#define TCG_TARGET_HAS_sub2_i32 1 | 86 | /* Information that (most) every instruction needs to manipulate. */ |
25 | #define TCG_TARGET_HAS_mulu2_i32 1 | 87 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o) |
26 | -#define TCG_TARGET_HAS_add2_i64 0 | 88 | int r3 = get_field(s->fields, r3); |
27 | -#define TCG_TARGET_HAS_sub2_i64 0 | 89 | int d2 = get_field(s->fields, d2); |
28 | +#define TCG_TARGET_HAS_add2_i64 1 | 90 | int b2 = get_field(s->fields, b2); |
29 | +#define TCG_TARGET_HAS_sub2_i64 1 | 91 | + DisasJumpType ret = DISAS_NEXT; |
30 | #define TCG_TARGET_HAS_mulu2_i64 1 | 92 | TCGv_i64 addr; |
31 | #define TCG_TARGET_HAS_muluh_i64 0 | 93 | TCGv_i32 t_r1, t_r3; |
32 | #define TCG_TARGET_HAS_mulsh_i64 0 | 94 | |
33 | diff --git a/tcg/tci.c b/tcg/tci.c | 95 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o) |
34 | index XXXXXXX..XXXXXXX 100644 | 96 | addr = get_address(s, 0, b2, d2); |
35 | --- a/tcg/tci.c | 97 | t_r1 = tcg_const_i32(r1); |
36 | +++ b/tcg/tci.c | 98 | t_r3 = tcg_const_i32(r3); |
37 | @@ -XXX,XX +XXX,XX @@ static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1, | 99 | - if (tb_cflags(s->base.tb) & CF_PARALLEL) { |
38 | *c5 = extract32(insn, 28, 4); | 100 | + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { |
101 | + gen_helper_cdsg(cpu_env, addr, t_r1, t_r3); | ||
102 | + } else if (HAVE_CMPXCHG128) { | ||
103 | gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3); | ||
104 | } else { | ||
105 | - gen_helper_cdsg(cpu_env, addr, t_r1, t_r3); | ||
106 | + gen_helper_exit_atomic(cpu_env); | ||
107 | + ret = DISAS_NORETURN; | ||
108 | } | ||
109 | tcg_temp_free_i64(addr); | ||
110 | tcg_temp_free_i32(t_r1); | ||
111 | tcg_temp_free_i32(t_r3); | ||
112 | |||
113 | set_cc_static(s); | ||
114 | - return DISAS_NEXT; | ||
115 | + return ret; | ||
39 | } | 116 | } |
40 | 117 | ||
41 | -#if TCG_TARGET_REG_BITS == 32 | 118 | static DisasJumpType op_csst(DisasContext *s, DisasOps *o) |
42 | static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1, | 119 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lpd(DisasContext *s, DisasOps *o) |
43 | TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGReg *r5) | 120 | |
121 | static DisasJumpType op_lpq(DisasContext *s, DisasOps *o) | ||
44 | { | 122 | { |
45 | @@ -XXX,XX +XXX,XX @@ static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1, | 123 | - if (tb_cflags(s->base.tb) & CF_PARALLEL) { |
46 | *r4 = extract32(insn, 24, 4); | 124 | + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { |
47 | *r5 = extract32(insn, 28, 4); | 125 | + gen_helper_lpq(o->out, cpu_env, o->in2); |
126 | + } else if (HAVE_ATOMIC128) { | ||
127 | gen_helper_lpq_parallel(o->out, cpu_env, o->in2); | ||
128 | } else { | ||
129 | - gen_helper_lpq(o->out, cpu_env, o->in2); | ||
130 | + gen_helper_exit_atomic(cpu_env); | ||
131 | + return DISAS_NORETURN; | ||
132 | } | ||
133 | return_low128(o->out2); | ||
134 | return DISAS_NEXT; | ||
135 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType op_stmh(DisasContext *s, DisasOps *o) | ||
136 | |||
137 | static DisasJumpType op_stpq(DisasContext *s, DisasOps *o) | ||
138 | { | ||
139 | - if (tb_cflags(s->base.tb) & CF_PARALLEL) { | ||
140 | + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { | ||
141 | + gen_helper_stpq(cpu_env, o->in2, o->out2, o->out); | ||
142 | + } else if (HAVE_ATOMIC128) { | ||
143 | gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out); | ||
144 | } else { | ||
145 | - gen_helper_stpq(cpu_env, o->in2, o->out2, o->out); | ||
146 | + gen_helper_exit_atomic(cpu_env); | ||
147 | + return DISAS_NORETURN; | ||
148 | } | ||
149 | return DISAS_NEXT; | ||
48 | } | 150 | } |
49 | -#endif | ||
50 | |||
51 | static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition) | ||
52 | { | ||
53 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
54 | for (;;) { | ||
55 | uint32_t insn; | ||
56 | TCGOpcode opc; | ||
57 | - TCGReg r0, r1, r2, r3, r4; | ||
58 | + TCGReg r0, r1, r2, r3, r4, r5; | ||
59 | tcg_target_ulong t1; | ||
60 | TCGCond condition; | ||
61 | target_ulong taddr; | ||
62 | uint8_t pos, len; | ||
63 | uint32_t tmp32; | ||
64 | uint64_t tmp64; | ||
65 | -#if TCG_TARGET_REG_BITS == 32 | ||
66 | - TCGReg r5; | ||
67 | uint64_t T1, T2; | ||
68 | -#endif | ||
69 | TCGMemOpIdx oi; | ||
70 | int32_t ofs; | ||
71 | void *ptr; | ||
72 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
73 | tb_ptr = ptr; | ||
74 | } | ||
75 | break; | ||
76 | -#if TCG_TARGET_REG_BITS == 32 | ||
77 | +#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_add2_i32 | ||
78 | case INDEX_op_add2_i32: | ||
79 | tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); | ||
80 | T1 = tci_uint64(regs[r3], regs[r2]); | ||
81 | T2 = tci_uint64(regs[r5], regs[r4]); | ||
82 | tci_write_reg64(regs, r1, r0, T1 + T2); | ||
83 | break; | ||
84 | +#endif | ||
85 | +#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_sub2_i32 | ||
86 | case INDEX_op_sub2_i32: | ||
87 | tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); | ||
88 | T1 = tci_uint64(regs[r3], regs[r2]); | ||
89 | T2 = tci_uint64(regs[r5], regs[r4]); | ||
90 | tci_write_reg64(regs, r1, r0, T1 - T2); | ||
91 | break; | ||
92 | -#endif /* TCG_TARGET_REG_BITS == 32 */ | ||
93 | +#endif | ||
94 | #if TCG_TARGET_HAS_mulu2_i32 | ||
95 | case INDEX_op_mulu2_i32: | ||
96 | tci_args_rrrr(insn, &r0, &r1, &r2, &r3); | ||
97 | @@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, | ||
98 | muls64(®s[r0], ®s[r1], regs[r2], regs[r3]); | ||
99 | break; | ||
100 | #endif | ||
101 | +#if TCG_TARGET_HAS_add2_i64 | ||
102 | + case INDEX_op_add2_i64: | ||
103 | + tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); | ||
104 | + T1 = regs[r2] + regs[r4]; | ||
105 | + T2 = regs[r3] + regs[r5] + (T1 < regs[r2]); | ||
106 | + regs[r0] = T1; | ||
107 | + regs[r1] = T2; | ||
108 | + break; | ||
109 | +#endif | ||
110 | +#if TCG_TARGET_HAS_add2_i64 | ||
111 | + case INDEX_op_sub2_i64: | ||
112 | + tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); | ||
113 | + T1 = regs[r2] - regs[r4]; | ||
114 | + T2 = regs[r3] - regs[r5] - (regs[r2] < regs[r4]); | ||
115 | + regs[r0] = T1; | ||
116 | + regs[r1] = T2; | ||
117 | + break; | ||
118 | +#endif | ||
119 | |||
120 | /* Shift/rotate operations (64 bit). */ | ||
121 | |||
122 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | ||
123 | const char *op_name; | ||
124 | uint32_t insn; | ||
125 | TCGOpcode op; | ||
126 | - TCGReg r0, r1, r2, r3, r4; | ||
127 | -#if TCG_TARGET_REG_BITS == 32 | ||
128 | - TCGReg r5; | ||
129 | -#endif | ||
130 | + TCGReg r0, r1, r2, r3, r4, r5; | ||
131 | tcg_target_ulong i1; | ||
132 | int32_t s2; | ||
133 | TCGCond c; | ||
134 | @@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) | ||
135 | str_r(r2), str_r(r3)); | ||
136 | break; | ||
137 | |||
138 | -#if TCG_TARGET_REG_BITS == 32 | ||
139 | case INDEX_op_add2_i32: | ||
140 | + case INDEX_op_add2_i64: | ||
141 | case INDEX_op_sub2_i32: | ||
142 | + case INDEX_op_sub2_i64: | ||
143 | tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); | ||
144 | info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s", | ||
145 | op_name, str_r(r0), str_r(r1), str_r(r2), | ||
146 | str_r(r3), str_r(r4), str_r(r5)); | ||
147 | break; | ||
148 | -#endif | ||
149 | |||
150 | case INDEX_op_qemu_ld_i64: | ||
151 | case INDEX_op_qemu_st_i64: | ||
152 | diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc | ||
153 | index XXXXXXX..XXXXXXX 100644 | ||
154 | --- a/tcg/tci/tcg-target.c.inc | ||
155 | +++ b/tcg/tci/tcg-target.c.inc | ||
156 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
157 | case INDEX_op_brcond_i64: | ||
158 | return C_O0_I2(r, r); | ||
159 | |||
160 | -#if TCG_TARGET_REG_BITS == 32 | ||
161 | - /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */ | ||
162 | case INDEX_op_add2_i32: | ||
163 | + case INDEX_op_add2_i64: | ||
164 | case INDEX_op_sub2_i32: | ||
165 | + case INDEX_op_sub2_i64: | ||
166 | return C_O2_I4(r, r, r, r, r, r); | ||
167 | + | ||
168 | +#if TCG_TARGET_REG_BITS == 32 | ||
169 | case INDEX_op_brcond2_i32: | ||
170 | return C_O0_I4(r, r, r, r); | ||
171 | #endif | ||
172 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op, | ||
173 | tcg_out32(s, insn); | ||
174 | } | ||
175 | |||
176 | -#if TCG_TARGET_REG_BITS == 32 | ||
177 | static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op, | ||
178 | TCGReg r0, TCGReg r1, TCGReg r2, | ||
179 | TCGReg r3, TCGReg r4, TCGReg r5) | ||
180 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op, | ||
181 | insn = deposit32(insn, 28, 4, r5); | ||
182 | tcg_out32(s, insn); | ||
183 | } | ||
184 | -#endif | ||
185 | |||
186 | static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val, | ||
187 | TCGReg base, intptr_t offset) | ||
188 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
189 | tcg_out_op_rr(s, opc, args[0], args[1]); | ||
190 | break; | ||
191 | |||
192 | -#if TCG_TARGET_REG_BITS == 32 | ||
193 | - case INDEX_op_add2_i32: | ||
194 | - case INDEX_op_sub2_i32: | ||
195 | + CASE_32_64(add2) | ||
196 | + CASE_32_64(sub2) | ||
197 | tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2], | ||
198 | args[3], args[4], args[5]); | ||
199 | break; | ||
200 | + | ||
201 | +#if TCG_TARGET_REG_BITS == 32 | ||
202 | case INDEX_op_brcond2_i32: | ||
203 | tcg_out_op_rrrrrc(s, INDEX_op_setcond2_i32, TCG_REG_TMP, | ||
204 | args[0], args[1], args[2], args[3], args[4]); | ||
205 | -- | 151 | -- |
206 | 2.25.1 | 152 | 2.17.2 |
207 | 153 | ||
208 | 154 | diff view generated by jsdifflib |
1 | This reverts commit dc09f047eddec8f4a1991c4f5f4a428d7aa3f2c0. | 1 | From: "Emilio G. Cota" <cota@braap.org> |
---|---|---|---|
2 | 2 | ||
3 | For tcg, tracepoints are expanded inline in tcg opcodes. | 3 | Updates can come from other threads, so readers that do not |
4 | Using a helper which generates a second tracepoint is incorrect. | 4 | take tlb_lock must use atomic_read to avoid undefined |
5 | 5 | behaviour (UB). | |
6 | For system mode, the extraction and re-packing of MemOp and mmu_idx | 6 | |
7 | lost the alignment information from MemOp. So we were no longer | 7 | This completes the conversion to tlb_lock. This conversion results |
8 | raising alignment exceptions for !TARGET_ALIGNED_ONLY guests. | 8 | on average in no performance loss, as the following experiments |
9 | This can be seen in tests/tcg/xtensa/test_load_store.S. | 9 | (run on an Intel i7-6700K CPU @ 4.00GHz) show. |
10 | 10 | ||
11 | For user mode, we must update to the new signature of g2h() so that | 11 | 1. aarch64 bootup+shutdown test: |
12 | the revert compiles. We can leave set_helper_retaddr for later. | 12 | |
13 | 13 | - Before: | |
14 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 14 | Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs): |
15 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 15 | |
16 | 7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% ) | ||
17 | 31,574,905,303 cycles # 4.217 GHz ( +- 0.12% ) | ||
18 | 57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% ) | ||
19 | 10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% ) | ||
20 | 173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% ) | ||
21 | |||
22 | 7.504481349 seconds time elapsed ( +- 0.14% ) | ||
23 | |||
24 | - After: | ||
25 | Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs): | ||
26 | |||
27 | 7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% ) | ||
28 | 31,478,476,520 cycles # 4.218 GHz ( +- 0.07% ) | ||
29 | 57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% ) | ||
30 | 10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% ) | ||
31 | 173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% ) | ||
32 | |||
33 | 7.474970463 seconds time elapsed ( +- 0.07% ) | ||
34 | |||
35 | 2. SPEC06int: | ||
36 | SPEC06int (test set) | ||
37 | [Y axis: Speedup over master] | ||
38 | 1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+ | ||
39 | | | | ||
40 | 1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+ | ||
41 | | +++ | +++ tlb-lock-v3 (spinl|ck) | | ||
42 | | +++ | | +++ +++ | | | | ||
43 | 1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+ | ||
44 | | ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### | | ||
45 | 1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+ | ||
46 | | *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # | | ||
47 | 0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+ | ||
48 | | * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # | | ||
49 | | * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # | | ||
50 | 0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+ | ||
51 | | * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # | | ||
52 | 0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+ | ||
53 | | * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # | | ||
54 | | * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # | | ||
55 | 0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+ | ||
56 | | * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | | ||
57 | 0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+ | ||
58 | 400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean | ||
59 | |||
60 | png: https://imgur.com/a/BHzpPTW | ||
61 | |||
62 | Notes: | ||
63 | - tlb-lock-v2 corresponds to an implementation with a mutex. | ||
64 | - tlb-lock-v3 corresponds to the current implementation, i.e. | ||
65 | a spinlock and a single lock acquisition in tlb_set_page_with_attrs. | ||
66 | |||
67 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
68 | Message-Id: <20181016153840.25877-1-cota@braap.org> | ||
16 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 69 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
17 | --- | 70 | --- |
18 | tcg/tci.c | 73 ++++++++++++++++++++++++++++++++++--------------------- | 71 | accel/tcg/softmmu_template.h | 12 ++++++------ |
19 | 1 file changed, 45 insertions(+), 28 deletions(-) | 72 | include/exec/cpu_ldst.h | 11 ++++++++++- |
20 | 73 | include/exec/cpu_ldst_template.h | 2 +- | |
21 | diff --git a/tcg/tci.c b/tcg/tci.c | 74 | accel/tcg/cputlb.c | 19 +++++++++++++------ |
22 | index XXXXXXX..XXXXXXX 100644 | 75 | 4 files changed, 30 insertions(+), 14 deletions(-) |
23 | --- a/tcg/tci.c | 76 | |
24 | +++ b/tcg/tci.c | 77 | diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h |
25 | @@ -XXX,XX +XXX,XX @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) | 78 | index XXXXXXX..XXXXXXX 100644 |
26 | return result; | 79 | --- a/accel/tcg/softmmu_template.h |
80 | +++ b/accel/tcg/softmmu_template.h | ||
81 | @@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
82 | uintptr_t mmu_idx = get_mmuidx(oi); | ||
83 | uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
84 | CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
85 | - target_ulong tlb_addr = entry->addr_write; | ||
86 | + target_ulong tlb_addr = tlb_addr_write(entry); | ||
87 | unsigned a_bits = get_alignment_bits(get_memop(oi)); | ||
88 | uintptr_t haddr; | ||
89 | |||
90 | @@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
91 | tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, | ||
92 | mmu_idx, retaddr); | ||
93 | } | ||
94 | - tlb_addr = entry->addr_write & ~TLB_INVALID_MASK; | ||
95 | + tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; | ||
96 | } | ||
97 | |||
98 | /* Handle an IO access. */ | ||
99 | @@ -XXX,XX +XXX,XX @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
100 | cannot evict the first. */ | ||
101 | page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; | ||
102 | entry2 = tlb_entry(env, mmu_idx, page2); | ||
103 | - if (!tlb_hit_page(entry2->addr_write, page2) | ||
104 | + if (!tlb_hit_page(tlb_addr_write(entry2), page2) | ||
105 | && !VICTIM_TLB_HIT(addr_write, page2)) { | ||
106 | tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, | ||
107 | mmu_idx, retaddr); | ||
108 | @@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
109 | uintptr_t mmu_idx = get_mmuidx(oi); | ||
110 | uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
111 | CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
112 | - target_ulong tlb_addr = entry->addr_write; | ||
113 | + target_ulong tlb_addr = tlb_addr_write(entry); | ||
114 | unsigned a_bits = get_alignment_bits(get_memop(oi)); | ||
115 | uintptr_t haddr; | ||
116 | |||
117 | @@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
118 | tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, | ||
119 | mmu_idx, retaddr); | ||
120 | } | ||
121 | - tlb_addr = entry->addr_write & ~TLB_INVALID_MASK; | ||
122 | + tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; | ||
123 | } | ||
124 | |||
125 | /* Handle an IO access. */ | ||
126 | @@ -XXX,XX +XXX,XX @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, | ||
127 | cannot evict the first. */ | ||
128 | page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; | ||
129 | entry2 = tlb_entry(env, mmu_idx, page2); | ||
130 | - if (!tlb_hit_page(entry2->addr_write, page2) | ||
131 | + if (!tlb_hit_page(tlb_addr_write(entry2), page2) | ||
132 | && !VICTIM_TLB_HIT(addr_write, page2)) { | ||
133 | tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, | ||
134 | mmu_idx, retaddr); | ||
135 | diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h | ||
136 | index XXXXXXX..XXXXXXX 100644 | ||
137 | --- a/include/exec/cpu_ldst.h | ||
138 | +++ b/include/exec/cpu_ldst.h | ||
139 | @@ -XXX,XX +XXX,XX @@ extern __thread uintptr_t helper_retaddr; | ||
140 | /* The memory helpers for tcg-generated code need tcg_target_long etc. */ | ||
141 | #include "tcg.h" | ||
142 | |||
143 | +static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry) | ||
144 | +{ | ||
145 | +#if TCG_OVERSIZED_GUEST | ||
146 | + return entry->addr_write; | ||
147 | +#else | ||
148 | + return atomic_read(&entry->addr_write); | ||
149 | +#endif | ||
150 | +} | ||
151 | + | ||
152 | /* Find the TLB index corresponding to the mmu_idx + address pair. */ | ||
153 | static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, | ||
154 | target_ulong addr) | ||
155 | @@ -XXX,XX +XXX,XX @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, | ||
156 | tlb_addr = tlbentry->addr_read; | ||
157 | break; | ||
158 | case 1: | ||
159 | - tlb_addr = tlbentry->addr_write; | ||
160 | + tlb_addr = tlb_addr_write(tlbentry); | ||
161 | break; | ||
162 | case 2: | ||
163 | tlb_addr = tlbentry->addr_code; | ||
164 | diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h | ||
165 | index XXXXXXX..XXXXXXX 100644 | ||
166 | --- a/include/exec/cpu_ldst_template.h | ||
167 | +++ b/include/exec/cpu_ldst_template.h | ||
168 | @@ -XXX,XX +XXX,XX @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, | ||
169 | addr = ptr; | ||
170 | mmu_idx = CPU_MMU_INDEX; | ||
171 | entry = tlb_entry(env, mmu_idx, addr); | ||
172 | - if (unlikely(entry->addr_write != | ||
173 | + if (unlikely(tlb_addr_write(entry) != | ||
174 | (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { | ||
175 | oi = make_memop_idx(SHIFT, mmu_idx); | ||
176 | glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi, | ||
177 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
178 | index XXXXXXX..XXXXXXX 100644 | ||
179 | --- a/accel/tcg/cputlb.c | ||
180 | +++ b/accel/tcg/cputlb.c | ||
181 | @@ -XXX,XX +XXX,XX @@ static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, | ||
182 | target_ulong page) | ||
183 | { | ||
184 | return tlb_hit_page(tlb_entry->addr_read, page) || | ||
185 | - tlb_hit_page(tlb_entry->addr_write, page) || | ||
186 | + tlb_hit_page(tlb_addr_write(tlb_entry), page) || | ||
187 | tlb_hit_page(tlb_entry->addr_code, page); | ||
27 | } | 188 | } |
28 | 189 | ||
29 | -#define qemu_ld_ub \ | 190 | @@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, |
30 | - cpu_ldub_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) | 191 | tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr); |
31 | -#define qemu_ld_leuw \ | 192 | |
32 | - cpu_lduw_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) | 193 | entry = tlb_entry(env, mmu_idx, addr); |
33 | -#define qemu_ld_leul \ | 194 | - tlb_addr = entry->addr_write; |
34 | - cpu_ldl_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) | 195 | + tlb_addr = tlb_addr_write(entry); |
35 | -#define qemu_ld_leq \ | 196 | if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { |
36 | - cpu_ldq_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) | 197 | /* RAM access */ |
37 | -#define qemu_ld_beuw \ | 198 | uintptr_t haddr = addr + entry->addend; |
38 | - cpu_lduw_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) | 199 | @@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, |
39 | -#define qemu_ld_beul \ | 200 | assert_cpu_is_self(ENV_GET_CPU(env)); |
40 | - cpu_ldl_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) | 201 | for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { |
41 | -#define qemu_ld_beq \ | 202 | CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx]; |
42 | - cpu_ldq_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) | 203 | - target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); |
43 | -#define qemu_st_b(X) \ | 204 | + target_ulong cmp; |
44 | - cpu_stb_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) | 205 | + |
45 | -#define qemu_st_lew(X) \ | 206 | + /* elt_ofs might correspond to .addr_write, so use atomic_read */ |
46 | - cpu_stw_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) | 207 | +#if TCG_OVERSIZED_GUEST |
47 | -#define qemu_st_lel(X) \ | 208 | + cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); |
48 | - cpu_stl_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) | ||
49 | -#define qemu_st_leq(X) \ | ||
50 | - cpu_stq_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) | ||
51 | -#define qemu_st_bew(X) \ | ||
52 | - cpu_stw_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) | ||
53 | -#define qemu_st_bel(X) \ | ||
54 | - cpu_stl_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) | ||
55 | -#define qemu_st_beq(X) \ | ||
56 | - cpu_stq_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) | ||
57 | +#ifdef CONFIG_SOFTMMU | ||
58 | +# define qemu_ld_ub \ | ||
59 | + helper_ret_ldub_mmu(env, taddr, oi, (uintptr_t)tb_ptr) | ||
60 | +# define qemu_ld_leuw \ | ||
61 | + helper_le_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr) | ||
62 | +# define qemu_ld_leul \ | ||
63 | + helper_le_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr) | ||
64 | +# define qemu_ld_leq \ | ||
65 | + helper_le_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr) | ||
66 | +# define qemu_ld_beuw \ | ||
67 | + helper_be_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr) | ||
68 | +# define qemu_ld_beul \ | ||
69 | + helper_be_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr) | ||
70 | +# define qemu_ld_beq \ | ||
71 | + helper_be_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr) | ||
72 | +# define qemu_st_b(X) \ | ||
73 | + helper_ret_stb_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) | ||
74 | +# define qemu_st_lew(X) \ | ||
75 | + helper_le_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) | ||
76 | +# define qemu_st_lel(X) \ | ||
77 | + helper_le_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) | ||
78 | +# define qemu_st_leq(X) \ | ||
79 | + helper_le_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) | ||
80 | +# define qemu_st_bew(X) \ | ||
81 | + helper_be_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) | ||
82 | +# define qemu_st_bel(X) \ | ||
83 | + helper_be_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) | ||
84 | +# define qemu_st_beq(X) \ | ||
85 | + helper_be_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) | ||
86 | +#else | 209 | +#else |
87 | +# define qemu_ld_ub ldub_p(g2h(env_cpu(env), taddr)) | 210 | + cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); |
88 | +# define qemu_ld_leuw lduw_le_p(g2h(env_cpu(env), taddr)) | ||
89 | +# define qemu_ld_leul (uint32_t)ldl_le_p(g2h(env_cpu(env), taddr)) | ||
90 | +# define qemu_ld_leq ldq_le_p(g2h(env_cpu(env), taddr)) | ||
91 | +# define qemu_ld_beuw lduw_be_p(g2h(env_cpu(env), taddr)) | ||
92 | +# define qemu_ld_beul (uint32_t)ldl_be_p(g2h(env_cpu(env), taddr)) | ||
93 | +# define qemu_ld_beq ldq_be_p(g2h(env_cpu(env), taddr)) | ||
94 | +# define qemu_st_b(X) stb_p(g2h(env_cpu(env), taddr), X) | ||
95 | +# define qemu_st_lew(X) stw_le_p(g2h(env_cpu(env), taddr), X) | ||
96 | +# define qemu_st_lel(X) stl_le_p(g2h(env_cpu(env), taddr), X) | ||
97 | +# define qemu_st_leq(X) stq_le_p(g2h(env_cpu(env), taddr), X) | ||
98 | +# define qemu_st_bew(X) stw_be_p(g2h(env_cpu(env), taddr), X) | ||
99 | +# define qemu_st_bel(X) stl_be_p(g2h(env_cpu(env), taddr), X) | ||
100 | +# define qemu_st_beq(X) stq_be_p(g2h(env_cpu(env), taddr), X) | ||
101 | +#endif | 211 | +#endif |
102 | 212 | ||
103 | static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr, | 213 | if (cmp == page) { |
104 | TCGMemOpIdx oi, const void *tb_ptr) | 214 | /* Found entry in victim tlb, swap tlb and iotlb. */ |
215 | @@ -XXX,XX +XXX,XX @@ void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx, | ||
216 | uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
217 | CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); | ||
218 | |||
219 | - if (!tlb_hit(entry->addr_write, addr)) { | ||
220 | + if (!tlb_hit(tlb_addr_write(entry), addr)) { | ||
221 | /* TLB entry is for a different page */ | ||
222 | if (!VICTIM_TLB_HIT(addr_write, addr)) { | ||
223 | tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE, | ||
224 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, | ||
225 | size_t mmu_idx = get_mmuidx(oi); | ||
226 | uintptr_t index = tlb_index(env, mmu_idx, addr); | ||
227 | CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); | ||
228 | - target_ulong tlb_addr = tlbe->addr_write; | ||
229 | + target_ulong tlb_addr = tlb_addr_write(tlbe); | ||
230 | TCGMemOp mop = get_memop(oi); | ||
231 | int a_bits = get_alignment_bits(mop); | ||
232 | int s_bits = mop & MO_SIZE; | ||
233 | @@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, | ||
234 | tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE, | ||
235 | mmu_idx, retaddr); | ||
236 | } | ||
237 | - tlb_addr = tlbe->addr_write & ~TLB_INVALID_MASK; | ||
238 | + tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; | ||
239 | } | ||
240 | |||
241 | /* Notice an IO access or a needs-MMU-lookup access */ | ||
105 | -- | 242 | -- |
106 | 2.25.1 | 243 | 2.17.2 |
107 | 244 | ||
108 | 245 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | The longest test at the moment seems to be a (slower) | ||
2 | aarch64 host, for which test-mmap takes 64 seconds. | ||
3 | 1 | ||
4 | Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
5 | Acked-by: Alex Bennée <alex.bennee@linaro.org> | ||
6 | Reviewed-by: Thomas Huth <thuth@redhat.com> | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | --- | ||
10 | configure | 3 +++ | ||
11 | tests/tcg/Makefile.target | 6 ++++-- | ||
12 | 2 files changed, 7 insertions(+), 2 deletions(-) | ||
13 | |||
14 | diff --git a/configure b/configure | ||
15 | index XXXXXXX..XXXXXXX 100755 | ||
16 | --- a/configure | ||
17 | +++ b/configure | ||
18 | @@ -XXX,XX +XXX,XX @@ fi | ||
19 | if test "$optreset" = "yes" ; then | ||
20 | echo "HAVE_OPTRESET=y" >> $config_host_mak | ||
21 | fi | ||
22 | +if test "$tcg" = "enabled" -a "$tcg_interpreter" = "true" ; then | ||
23 | + echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak | ||
24 | +fi | ||
25 | if test "$fdatasync" = "yes" ; then | ||
26 | echo "CONFIG_FDATASYNC=y" >> $config_host_mak | ||
27 | fi | ||
28 | diff --git a/tests/tcg/Makefile.target b/tests/tcg/Makefile.target | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/tests/tcg/Makefile.target | ||
31 | +++ b/tests/tcg/Makefile.target | ||
32 | @@ -XXX,XX +XXX,XX @@ LDFLAGS= | ||
33 | QEMU_OPTS= | ||
34 | |||
35 | |||
36 | -# If TCG debugging is enabled things are a lot slower | ||
37 | -ifeq ($(CONFIG_DEBUG_TCG),y) | ||
38 | +# If TCG debugging, or TCI is enabled things are a lot slower | ||
39 | +ifneq ($(CONFIG_TCG_INTERPRETER),) | ||
40 | +TIMEOUT=90 | ||
41 | +else ifneq ($(CONFIG_DEBUG_TCG),) | ||
42 | TIMEOUT=60 | ||
43 | else | ||
44 | TIMEOUT=15 | ||
45 | -- | ||
46 | 2.25.1 | ||
47 | |||
48 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | We should not be aligning the offset in temp_allocate_frame, | ||
2 | because the odd offset produces an aligned address in the end. | ||
3 | Instead, pass the logical offset into tcg_set_frame and add | ||
4 | the stack bias last. | ||
5 | 1 | ||
6 | Cc: qemu-stable@nongnu.org | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | --- | ||
10 | tcg/tcg.c | 9 +++------ | ||
11 | tcg/sparc/tcg-target.c.inc | 16 ++++++++++------ | ||
12 | 2 files changed, 13 insertions(+), 12 deletions(-) | ||
13 | |||
14 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/tcg/tcg.c | ||
17 | +++ b/tcg/tcg.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static void check_regs(TCGContext *s) | ||
19 | |||
20 | static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) | ||
21 | { | ||
22 | -#if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) | ||
23 | - /* Sparc64 stack is accessed with offset of 2047 */ | ||
24 | - s->current_frame_offset = (s->current_frame_offset + | ||
25 | - (tcg_target_long)sizeof(tcg_target_long) - 1) & | ||
26 | - ~(sizeof(tcg_target_long) - 1); | ||
27 | -#endif | ||
28 | if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > | ||
29 | s->frame_end) { | ||
30 | tcg_abort(); | ||
31 | } | ||
32 | ts->mem_offset = s->current_frame_offset; | ||
33 | +#if defined(__sparc__) | ||
34 | + ts->mem_offset += TCG_TARGET_STACK_BIAS; | ||
35 | +#endif | ||
36 | ts->mem_base = s->frame_temp; | ||
37 | ts->mem_allocated = 1; | ||
38 | s->current_frame_offset += sizeof(tcg_target_long); | ||
39 | diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/tcg/sparc/tcg-target.c.inc | ||
42 | +++ b/tcg/sparc/tcg-target.c.inc | ||
43 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s) | ||
44 | { | ||
45 | int tmp_buf_size, frame_size; | ||
46 | |||
47 | - /* The TCG temp buffer is at the top of the frame, immediately | ||
48 | - below the frame pointer. */ | ||
49 | + /* | ||
50 | + * The TCG temp buffer is at the top of the frame, immediately | ||
51 | + * below the frame pointer. Use the logical (aligned) offset here; | ||
52 | + * the stack bias is applied in temp_allocate_frame(). | ||
53 | + */ | ||
54 | tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long); | ||
55 | - tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size, | ||
56 | - tmp_buf_size); | ||
57 | + tcg_set_frame(s, TCG_REG_I6, -tmp_buf_size, tmp_buf_size); | ||
58 | |||
59 | - /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is | ||
60 | - otherwise the minimal frame usable by callees. */ | ||
61 | + /* | ||
62 | + * TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is | ||
63 | + * otherwise the minimal frame usable by callees. | ||
64 | + */ | ||
65 | frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS; | ||
66 | frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size; | ||
67 | frame_size += TCG_TARGET_STACK_ALIGN - 1; | ||
68 | -- | ||
69 | 2.25.1 | ||
70 | |||
71 | diff view generated by jsdifflib |